diff --git a/1.4.0/404.html b/1.4.0/404.html deleted file mode 100644 index cfb7d8a..0000000 --- a/1.4.0/404.html +++ /dev/null @@ -1,554 +0,0 @@ - - - -
- - - - - - - - - - - - - - - -medchem.alerts
¶medchem.alerts
-
-
-¶AlertFilters
-
-
-¶Filtering class for building a library based on a list of structural alerts
-To list the available alerts, use the list_default_available_alerts
method.
__call__(mols, n_jobs=None, progress=False, include_all_alerts=False)
-
-¶Run alert evaluation on this list of molecule and return the full dataframe
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- Iterable[Union[str, rdchem.Mol]]
- |
-
-
-
- input list of molecules - |
- - required - | -
n_jobs |
-
- Optional[int]
- |
-
-
-
- number of jobs - |
-
- None
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show progress or not - |
-
- False
- |
-
include_all_alerts |
-
- bool
- |
-
-
-
- whether to include all of the alerts that match as columns - |
-
- False
- |
-
__init__(alerts_set=None, alerts_db=None)
-
-¶Filtering molecules based on chemical alerts
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
alerts_set |
-
- Union[str, List[str]]
- |
-
-
-
- Alerts catalog to use. Default is BMS+Dundee+Glaxo - |
-
- None
- |
-
alerts_db |
-
- Optional[os.PathLike]
- |
-
-
-
- Alerts file to use. Default is internal - |
-
- None
- |
-
evaluate(mol)
-
-¶Evaluate structure alerts on a molecule
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[str, rdchem.Mol]
- |
-
-
-
- input molecule - |
- - required - | -
Returns:
-Type | -Description | -
---|---|
- | -
-
-
- list of alerts matched - |
-
list_default_available_alerts()
-
-
- cached
- staticmethod
-
-
-¶Return a list of unique rule set names
- -NovartisFilters
-
-
-¶Filtering class for building a screening deck following the novartis filtering process -published in https://dx.doi.org/10.1021/acs.jmedchem.0c01332.
-The output of the filter are explained below:
-- status: one of ["Exclude", "Flag", "Annotations", "Ok"]
(ordered by quality).
- Generally, you can keep anything without the "Exclude" label, as long as you also apply
- a maximum severity score for compounds that collects too many flags.
-- covalent: number of potentially covalent motifs contained in the compound
-- severity: how severe are the issues with the molecules:
- - 0
: compound has no flags, might have annotations;
- - 1-9
: number of flags the compound raises;
- - >= 10
: default exclusion criterion used in the paper
-- special_mol: whether the compound/parts of the compound belongs to a special class of molecules
- (e.g peptides, glycosides, fatty acid). In that case, you should review the rejection reasons.
__call__(mols, n_jobs=None, progress=False)
-
-¶Run alert evaluation on this list of molecule and return the full dataframe
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- Iterable[Union[str, rdchem.Mol]]
- |
-
-
-
- input list of molecules - |
- - required - | -
n_jobs |
-
- Optional[int]
- |
-
-
-
- number of jobs - |
-
- None
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show progress or not - |
-
- False
- |
-
medchem.catalog
¶medchem.catalog
-
-
-¶NamedCatalogs
-
-
-¶Holder for substructure matching catalogs
- - - - - -alerts(subset=None)
-
-
- staticmethod
-
-
-¶Alerts filter catalogs commonly used in molecule filtering
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
subset |
-
- Optional[Union[List[str], str]]
- |
-
-
-
- subset of providers to consider - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
catalog |
- FilterCatalog
- |
-
-
-
- filter catalog - |
-
bredt()
-
-
- cached
- staticmethod
-
-
-¶Bredt fitler rules -Also see example of usage by surge's -https://github.com/StructureGenerator/SURGE/blob/main/doc/surge1_0.pdf
- -chemical_groups(filters='medicinal')
-
-
- cached
- staticmethod
-
-
-¶Chemical group filter catalogs
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
filters |
-
- Union[str, List[str]]
- |
-
-
-
- list of tag to filter the catalog on. - |
-
- 'medicinal'
- |
-
nibr()
-
-
- cached
- staticmethod
-
-
-¶Catalog from NIBR
-Warning
-This includes all the compounds in the catalog, regardless of severity (FLAG, EXCLUDE, ANNOTATION) -You likely don't want to use this for blind prioritization
-tox(pains_a=True, pains_b=True, pains_c=False, brenk=True, nih=False, zinc=False)
-
-
- cached
- staticmethod
-
-
-¶Common toxicity and interference catalog
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
pains_a |
-
- bool
- |
-
-
-
- whether to include PAINS filters from assay A - |
-
- True
- |
-
pains_b |
-
- bool
- |
-
-
-
- whether to include PAINS filters from assay B - |
-
- True
- |
-
pains_c |
-
- bool
- |
-
-
-
- whether to include PAINS filters from assay C - |
-
- False
- |
-
brenk |
-
- bool
- |
-
-
-
- whether to include BRENK filters - |
-
- True
- |
-
nih |
-
- bool
- |
-
-
-
- whether to include NIH filters - |
-
- False
- |
-
zinc |
-
- bool
- |
-
-
-
- whether to include ZINC filters - |
-
- False
- |
-
unstable_graph(max_severity=5)
-
-
- cached
- staticmethod
-
-
-¶Unstable molecular graph to filter out especially for generative models
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
max_severity |
-
- int
- |
-
-
-
- maximum severity to consider for graph rules to be acceptable - |
-
- 5
- |
-
from_smarts(smarts, labels=None, mincounts=None, maxcounts=None, entry_as_inds=False)
-
-¶Load catalog from a list of smarts
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
smarts |
-
- List[str]
- |
-
-
-
- list of input smarts to add to the catalog - |
- - required - | -
labels |
-
- Optional[List[str]]
- |
-
-
-
- list of label for each smarts - |
-
- None
- |
-
mincounts |
-
- Optional[List[int]]
- |
-
-
-
- minimum count before a match is recognized - |
-
- None
- |
-
maxcounts |
-
- Optional[List[int]]
- |
-
-
-
- maximum count for a match to be valid - |
-
- None
- |
-
entry_as_inds |
-
- bool
- |
-
-
-
- whether to use index for entry id or the label - |
-
- False
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
catalog |
- FilterCatalog
- |
-
-
-
- merged catalogs - |
-
list_named_catalogs()
-
-¶List all available named catalogs. This list will ignore all chemical groups
-For a list of chemical group to be queried using NamedCatalog.chemical_groups, use medchem.group.list_default_chemical_groups
merge_catalogs(*catalogs)
-
-¶Merge several catalogs into a single one
- -Returns:
-Name | Type | -Description | -
---|---|---|
catalog |
- FilterCatalog
- |
-
-
-
- merged catalog - |
-
medchem.rules
¶medchem.complexity.complexity_filter
-
-
-¶ComplexityFilter
-
-
-¶Complexity filters derived from nonpher: -https://github.com/lich-uct/nonpher/blob/master/nonpher/nonpher.py
-To recover the original complexity score, use threshold_stats_file = "zinc_12"
.
-The threshold have been re-calculated using the original new zinc-15 and focusing only on
-commercially available compounds.
__call__(mol)
-
-¶Check whether the input structure is too complex given this instance of the complexity filter -Return False is the molecule is too complex, else True
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- dm.Mol
- |
-
-
-
- input molecule - |
- - required - | -
__init__(limit='99', complexity_metric='bertz', threshold_stats_file='zinc_15_available')
-
-¶Default complexity limit is set on at least 1 exceeding metric on the 999th permille level
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
limit |
-
- str
- |
-
-
-
- The complexity percentile outlier limit to be used (should be expressed as an integer) - |
-
- '99'
- |
-
complexity_metric |
-
- str
- |
-
-
-
- The complexity filter name to be used.
-Use |
-
- 'bertz'
- |
-
threshold_stats_file |
-
- Optional[str]
- |
-
-
-
- The path to or type the threshold file to be used. -The default available threshold stats files are -* "zinc_12" -* "zinc_15_available" - |
-
- 'zinc_15_available'
- |
-
list_default_available_filters()
-
-
- classmethod
-
-
-¶Return a list of unique filter names
- -list_default_percentile(threshold_stats_file=None)
-
-
- cached
- classmethod
-
-
-¶Return the default percentile list for the threshold file
- -load_threshold_stats_file(path=None)
-
-
- classmethod
-
-
-¶Load threshold file to compute the percentille depending on the MW for each complexity_metric
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
path |
-
- Optional[str]
- |
-
-
-
- path to the threshold file - |
-
- None
- |
-
medchem.complexity._complexity_calc
-
-
-¶Complexity filters as implemented in nonpher -https://github.com/lich-uct/nonpher/blob/master/nonpher/complex_lib.py
- - - -BaroneCT(mol, chiral=False)
-
-¶Compute a Barone complexity measure for a molecule as described in:
-R. Barone and M. Chanon, J. Chem. Inf. Comput. Sci., 2001, 41 (2), pp 269–272 -Qi Huang, Lin-LiLi, Sheng-Yong Yang, J. Mol. Graph. Model. 2010, 28 (8), pp 775–787
-Parameter values are hardcoded as in the articles. -On zinc 15 commercially available dataset, the range of this score is [30, 4266] with a median of 538
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- dm.Mol
- |
-
-
-
- The input molecule. - |
- - required - | -
chiral |
-
- bool
- |
-
-
-
- Whether to include chirality in the calculation. - |
-
- False
- |
-
SMCM(mol)
-
-¶Compute synthetic and molecular complexity as described in:
-TK Allu, TI Oprea, J. Chem. Inf. Model. 2005, 45(5), pp. 1237-1243. -https://sci-hub.ee/10.1021/ci0501387
-On zinc 15 commercially available dataset, the range of this score is [1.93, 192.00] with a median of 42.23
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- dm.Mol
- |
-
-
-
- the input molecule - |
- - required - | -
TWC(mol, log10=True)
-
-¶Compute total walk count in a molecules as proxy for complexity. This score is described in:
-twc = 1/2 sum(k=1..n-1,sum(i=atoms,awc(k,i)))
-Gerta Rucker and Christoph Rucker, J. Chem. Inf. Comput. Sci. 1993, 33, 683-695
On zinc 15 commercially available dataset, the range of this score is [1.20, 39.08] with a median of 10.65
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
- - | -
-
-
- the input molecule - |
- - required - | -
log10 |
-
- bool
- |
-
-
-
- whether to return the log10 of the values - |
-
- True
- |
-
WhitlockCT(mol, ringval=4, unsatval=2, heteroval=1, chiralval=2)
-
-¶A chemically intuitive measure for molecular complexity. This complexity measure -has been described in : H. W. Whitlock, J. Org. Chem., 1998, 63, 7982-7989. -Benzyls, fenyls, etc. are not treated at all.
-On zinc 15 commercially available dataset, the range of this score is [0, 172] with a median of 25
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
- - | -
-
-
- The input molecule. - |
- - required - | -
ringval |
-
- float
- |
-
-
-
- The contribution of rings - |
-
- 4
- |
-
unsatval |
-
- float
- |
-
-
-
- The contribution of the unsaturated bond. - |
-
- 2
- |
-
heteroval |
-
- float
- |
-
-
-
- The contribution of the heteroatom. - |
-
- 1
- |
-
chiralval |
-
- float
- |
-
-
-
- The contribution of the chiral center. - |
-
- 2
- |
-
medchem.demerits
¶medchem.demerits
-
-
-¶batch_score(smiles_list, n_jobs=None, batch_size=5000, progress=False, **run_options)
-
-¶Run scorer on input smile list in batch
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
smiles_list |
-
- List
- |
-
-
-
- list of smiles - |
- - required - | -
n_jobs |
-
- Optional[int]
- |
-
-
-
- Number of jobs to run in parallel. - |
-
- None
- |
-
batch_size |
-
- Optional[int]
- |
-
-
-
- Optional batch_size to run the the scoring in parallels. - |
-
- 5000
- |
-
progress |
-
- bool
- |
-
-
-
- Whether to show progress bar. - |
-
- False
- |
-
run_options |
- - | -
-
-
- Run options to pass to the underlining score function - |
-
- {}
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
out_df |
- pd.DataFrame
- |
-
-
-
- Dataframe containing the smiles and computed properties: -(rejected, demerit_score, reason, step) - |
-
run_cmd(cmd, shell=False)
-
-¶Run command
- -score(smiles_list, mc_first_pass_options='', iwd_options='', stop_after_step=3, **run_options)
-
-¶Run scorer on input smile list:
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
smiles_list |
-
- List
- |
-
-
-
- list of smiles - |
- - required - | -
mc_first_pass_options |
-
- Optional[str]
- |
-
-
-
- Initial options to pass to mc_first_pass - |
-
- ''
- |
-
iwd_options |
-
- Optional[str]
- |
-
-
-
- Initial options to pass to iwdemerit - |
-
- ''
- |
-
stop_after_step |
-
- Optional[int]
- |
-
-
-
- Where to stop in the pipeline. Don't change this if you don't know. - |
-
- 3
- |
-
run_options |
- - | -
-
-
- Additional option to run the pipeline - |
-
- {}
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
out_df |
- pd.DataFrame
- |
-
-
-
- Dataframe containing the smiles and computed properties: -(rejected, demerit_score, reason, step) - |
-
medchem.filter
¶medchem.filter.lead
-
-
-¶alert_filter(mols, alerts, alerts_db=None, n_jobs=1, rule_dict=None, return_idx=False)
-
-¶Filter a dataset of molecules, based on common structural alerts and specific rules.
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- Iterable[Union[str, dm.Mol]]
- |
-
-
-
- List of molecules to filter - |
- - required - | -
alerts |
-
- List[str]
- |
-
-
-
- List of alert collections to screen for. See AlertFilters.list_default_available_alerts() - |
- - required - | -
alerts_db |
-
- Optional[os.PathLike]
- |
-
-
-
- Path to the alert file name. -The internal default file (alerts.csv) will be used if not provided - |
-
- None
- |
-
n_jobs |
-
- Optional[int]
- |
-
-
-
- Number of cpu to use - |
-
- 1
- |
-
rule_dict |
-
- Dict
- |
-
-
-
- Dictionary with additional rules to apply during the filtering. -For example, such dictionary for drug-like compounds would look like this: ------- |
-
- None
- |
-
return_idx |
-
- bool
- |
-
-
-
- Whether to return the filtered index - |
-
- False
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
filtered_mask | - | -
-
-
- boolean array (or index array) where true means -the molecule IS OK (not found in the alert catalog). - |
-
bredt_filter(mols, return_idx=False, n_jobs=None, progress=False, scheduler='threads', batch_size=100)
-
-¶Filter a list of compounds according to Bredt's rules -https://en.wikipedia.org/wiki/Bredt%27s_rule
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- Sequence[Union[str, dm.Mol]]
- |
-
-
-
- list of input molecules - |
- - required - | -
return_idx |
-
- bool
- |
-
-
-
- whether to return index or a boolean mask - |
-
- False
- |
-
n_jobs |
-
- Optional[int]
- |
-
-
-
- number of parallel job to run. Sequential by default - |
-
- None
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show progress bar - |
-
- False
- |
-
scheduler |
-
- str
- |
-
-
-
- joblib scheduler to use - |
-
- 'threads'
- |
-
batch_size |
-
- int
- |
-
-
-
- batch size for parallel processing. Note that |
-
- 100
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
filtered_mask | - | -
-
-
- boolean array (or index array) where true means the molecule is not toxic. - |
-
catalog_filter(mols, catalogs, return_idx=False, n_jobs=None, progress=False, scheduler='processes', batch_size=100)
-
-¶Filter a list of compounds according to catalog of structures alerts and patterns
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- Sequence[Union[str, dm.Mol]]
- |
-
-
-
- list of input molecules - |
- - required - | -
catalogs |
-
- List[Union[str, FilterCatalog]]
- |
-
-
-
- list of catalogs (name or FilterCatalog) - |
- - required - | -
return_idx |
-
- bool
- |
-
-
-
- whether to return index or a boolean mask - |
-
- False
- |
-
n_jobs |
-
- Optional[int]
- |
-
-
-
- number of parallel job to run. Sequential by default - |
-
- None
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show progress bar - |
-
- False
- |
-
scheduler |
-
- str
- |
-
-
-
- joblib scheduler to use - |
-
- 'processes'
- |
-
batch_size |
-
- int
- |
-
-
-
- batch size for parallel processing. Note that |
-
- 100
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
filtered_mask | - | -
-
-
- boolean array (or index array) where true means the molecule is not found in the catalog. - |
-
chemical_group_filter(mols, chemical_group, return_idx=False, n_jobs=None, progress=False, scheduler='threads')
-
-¶Filter a list of compounds according to a chemical group instance.
-Note
-This function will return the list of molecules that DO NOT match the chemical group
-Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- Iterable[Union[str, dm.Mol]]
- |
-
-
-
- list of input molecules - |
- - required - | -
chemical_group |
-
- ChemicalGroup
- |
-
-
-
- a chemical group instance with the required functional groups to use. - |
- - required - | -
return_idx |
-
- bool
- |
-
-
-
- whether to return index or a boolean mask - |
-
- False
- |
-
n_jobs |
-
- Optional[int]
- |
-
-
-
- number of parallel job to run. Sequential by default - |
-
- None
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show progress bar - |
-
- False
- |
-
scheduler |
-
- str
- |
-
-
-
- joblib scheduler to use - |
-
- 'threads'
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
filtered_mask | - | -
-
-
- boolean array (or index array) where true means the molecule DOES NOT MATCH the groups. - |
-
complexity_filter(mols, complexity_metric='bertz', threshold_stats_file='zinc_15_available', limit='99', return_idx=False, n_jobs=None, progress=False, scheduler='processes')
-
-¶Filter a list of compounds according to a chemical group instance
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- Iterable[Union[str, dm.Mol]]
- |
-
-
-
- list of input molecules - |
- - required - | -
complexity_metric |
-
- str
- |
-
-
-
- complexity metric to use
-Use |
-
- 'bertz'
- |
-
threshold_stats_file |
-
- str
- |
-
-
-
- complexity threshold statistic origin to use - |
-
- 'zinc_15_available'
- |
-
limit |
-
- str
- |
-
-
-
- complexity outlier percentile to use - |
-
- '99'
- |
-
return_idx |
-
- bool
- |
-
-
-
- whether to return index or a boolean mask - |
-
- False
- |
-
n_jobs |
-
- Optional[int]
- |
-
-
-
- number of parallel job to run. Sequential by default - |
-
- None
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show progress bar - |
-
- False
- |
-
scheduler |
-
- str
- |
-
-
-
- joblib scheduler to use - |
-
- 'processes'
- |
-
medchem.complexity.ComplexityFilter
-Returns:
-Name | Type | -Description | -
---|---|---|
filtered_mask | - | -
-
-
- boolean array (or index array) where true means the molecule MATCH the rules. - |
-
lilly_demerit_filter(smiles, max_demerits=160, return_idx=False, n_jobs=None, progress=False, **kwargs)
-
-¶Run Lilly demerit filtering on current list of molecules
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
smiles |
-
- Iterable[str]
- |
-
-
-
- list of input molecules as smiles preferably - |
- - required - | -
max_demerits |
-
- Optional[int]
- |
-
-
-
- Cutoff to reject molecules Defaults to 160. - |
-
- 160
- |
-
return_idx |
-
- bool
- |
-
-
-
- whether to return a mask or a list of valid indexes - |
-
- False
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show progress bar - |
-
- False
- |
-
kwargs |
- - | -
-
-
- parameters specific to the |
-
- {}
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
filtered_mask | - | -
-
-
- boolean array (or index array) where true means the molecule is ok. - |
-
molecular_graph_filter(mols, max_severity=5, return_idx=False, n_jobs=None, progress=False, scheduler='threads')
-
-¶Filter a list of compounds according to unstable molecular graph filter list.
-This list was obtained from observation around The disallowed graphs are:
-Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- Iterable[Union[str, dm.Mol]]
- |
-
-
-
- list of input molecules - |
- - required - | -
max_severity |
-
- int
- |
-
-
-
- maximum acceptable severity (1-10). Default is <5 - |
-
- 5
- |
-
return_idx |
-
- bool
- |
-
-
-
- whether to return index or a boolean mask - |
-
- False
- |
-
n_jobs |
-
- Optional[int]
- |
-
-
-
- number of parallel job to run. Sequential by default - |
-
- None
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show progress bar - |
-
- False
- |
-
scheduler |
-
- str
- |
-
-
-
- joblib scheduler to use - |
-
- 'threads'
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
filtered_mask | - | -
-
-
- boolean array (or index array) where true means the molecule is not toxic. - |
-
protecting_groups_filter(mols, return_idx=False, protecting_groups=['fmoc', 'tert-butoxymethyl', 'tert-butyl carbamate', 'tert-butyloxycarbonyl'], n_jobs=None, progress=False, scheduler='threads')
-
-¶Filter a list of compounds according to match to known protecting groups. -Note that is a syntaxic sugar for calling chemical_group_filter with the protecting groups subset
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- Iterable[Union[str, dm.Mol]]
- |
-
-
-
- list of input molecules - |
- - required - | -
protecting_groups |
-
- str
- |
-
-
-
- type of protection group to consider if not provided, will use all (not advised) - |
-
- ['fmoc', 'tert-butoxymethyl', 'tert-butyl carbamate', 'tert-butyloxycarbonyl']
- |
-
return_idx |
-
- bool
- |
-
-
-
- whether to return index or a boolean mask - |
-
- False
- |
-
n_jobs |
-
- Optional[int]
- |
-
-
-
- number of parallel job to run. Sequential by default - |
-
- None
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show progress bar - |
-
- False
- |
-
scheduler |
-
- str
- |
-
-
-
- joblib scheduler to use - |
-
- 'threads'
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
filtered_mask | - | -
-
-
- boolean array (or index array) where true means the molecule DOES NOT MATCH the groups. - |
-
rules_filter(mols, rules, return_idx=False, n_jobs=None, progress=False, scheduler='processes')
-
-¶Filter a list of compounds according to a predefined set of rules
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- Iterable[Union[str, dm.Mol]]
- |
-
-
-
- list of input molecules - |
- - required - | -
rules |
-
- Union[List[Any], RuleFilters]
- |
-
-
-
- list of rules to apply to the input molecules. - |
- - required - | -
return_idx |
-
- bool
- |
-
-
-
- whether to return index or a boolean mask - |
-
- False
- |
-
n_jobs |
-
- Optional[int]
- |
-
-
-
- number of parallel job to run. Sequential by default - |
-
- None
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show progress bar - |
-
- False
- |
-
scheduler |
-
- str
- |
-
-
-
- joblib scheduler to use - |
-
- 'processes'
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
filtered_mask | - | -
-
-
- boolean array (or index array) where true means the molecule MATCH the rules. - |
-
screening_filter(mols, n_jobs=None, max_severity=10, return_idx=False)
-
-¶Filter a set of molecules based on novartis screening deck curation process -Schuffenhauer, A. et al. Evolution of Novartis' small molecule screening deck design, J. Med. Chem. (2020) -DOI. https://dx.doi.org/10.1021/acs.jmedchem.0c01332
-Note
-The severity argument corresponds to the accumulated severity for a compounds accross all pattern in the -catalog.
-Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- Iterable[Union[str, dm.Mol]]
- |
-
-
-
- list of input molecules - |
- - required - | -
n_jobs |
-
- Optional[int]
- |
-
-
-
- number of parallel job to run. Sequential by default - |
-
- None
- |
-
max_severity |
-
- int
- |
-
-
-
- maximum severity allowed. Default is <10 - |
-
- 10
- |
-
return_idx |
-
- bool
- |
-
-
-
- Whether to return the filtered index - |
-
- False
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
filtered_mask | - | -
-
-
- boolean array (or index array) where true means the molecule -IS NOT REJECTED (i.e not found in the alert catalog). - |
-
medchem.filter.generic
-
-
-¶atom_list_filter(mols, unwanted_atom_list=None, wanted_atom_list=None, return_idx=False, n_jobs=None, progress=False, scheduler=None)
-
-¶Find molecule without any atom from a set of unwanted atom symbols -and with all atoms in the set of desirable atom list
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- Iterable[Union[str, dm.Mol]]
- |
-
-
-
- list of input molecules - |
- - required - | -
unwanted_atom_list |
-
- Optional[Iterable]
- |
-
-
-
- list of undesirable atom symbol - |
-
- None
- |
-
wanted_atom_list |
-
- Optional[Iterable]
- |
-
-
-
- list of desirable atom symbol - |
-
- None
- |
-
return_idx |
-
- bool
- |
-
-
-
- whether to return index or a boolean mask - |
-
- False
- |
-
n_jobs |
-
- Optional[int]
- |
-
-
-
- number of parallel job to run. Sequential by default - |
-
- None
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show progress bar - |
-
- False
- |
-
scheduler |
-
- Optional[str]
- |
-
-
-
- joblib scheduler to use - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
filtered_mask | - | -
-
-
- boolean array (or index array) where true means the molecule is ok. - |
-
halogenicity_filter(mols, thresh_F=6, thresh_Br=3, thresh_Cl=3, return_idx=False, n_jobs=None, progress=False, scheduler=None)
-
-¶Find molecule that do not exceed halogen threshold. These thresholds are:
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- Iterable[Union[str, dm.Mol]]
- |
-
-
-
- list of input molecules - |
- - required - | -
thresh_F |
-
- int
- |
-
-
-
- maximum number of fluorine - |
-
- 6
- |
-
thresh_Br |
-
- int
- |
-
-
-
- maximum number of bromine - |
-
- 3
- |
-
thresh_Cl |
-
- int
- |
-
-
-
- maximum number of chlorine - |
-
- 3
- |
-
return_idx |
-
- bool
- |
-
-
-
- whether to return index or a boolean mask - |
-
- False
- |
-
n_jobs |
-
- Optional[int]
- |
-
-
-
- number of parallel job to run. Sequential by default - |
-
- None
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show progress bar - |
-
- False
- |
-
scheduler |
-
- Optional[str]
- |
-
-
-
- joblib scheduler to use - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
filtered_mask | - | -
-
-
- boolean array (or index array) where true means the molecule is ok. - |
-
macrocycle_filter(mols, max_cycle_size=10, return_idx=False, n_jobs=None, progress=False, scheduler=None)
-
-¶Find molecules that do not infringe the strict maximum cycle size.
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- Iterable[Union[str, dm.Mol]]
- |
-
-
-
- list of input molecules - |
- - required - | -
max_cycle_size |
-
- int
- |
-
-
-
- strict maximum macrocycle size - |
-
- 10
- |
-
return_idx |
-
- bool
- |
-
-
-
- whether to return index or a boolean mask - |
-
- False
- |
-
n_jobs |
-
- Optional[int]
- |
-
-
-
- number of parallel job to run. Sequential by default - |
-
- None
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show progress bar - |
-
- False
- |
-
scheduler |
-
- Optional[str]
- |
-
-
-
- joblib scheduler to use - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
filtered_mask | - | -
-
-
- boolean array (or index array) where true means the molecule is ok. - |
-
num_atom_filter(mols, min_atoms=None, max_atoms=None, return_idx=False, n_jobs=None, progress=False, scheduler=None)
-
-¶Find a molecule that match the atom number constraints -Returning True means the molecule is fine
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- Iterable[Union[str, dm.Mol]]
- |
-
-
-
- list of input molecules - |
- - required - | -
min_atoms |
-
- Optional[int]
- |
-
-
-
- strict minimum number of atoms (atoms > min_atoms) - |
-
- None
- |
-
max_atoms |
-
- Optional[int]
- |
-
-
-
- strict maximum number of atoms (atoms < max_atoms) - |
-
- None
- |
-
return_idx |
-
- bool
- |
-
-
-
- whether to return index or a boolean mask - |
-
- False
- |
-
n_jobs |
-
- Optional[int]
- |
-
-
-
- number of parallel job to run. Sequential by default - |
-
- None
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show progress bar - |
-
- False
- |
-
scheduler |
-
- Optional[str]
- |
-
-
-
- joblib scheduler to use - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
filtered_mask | - | -
-
-
- boolean array (or index array) where true means the molecule is ok. - |
-
num_stereo_center_filter(mols, max_stereo_centers=4, max_undefined_stereo_centers=2, return_idx=False, n_jobs=None, progress=False, scheduler=None)
-
-¶Find a molecule that match the number of stereo center constraints. -Returning True means the molecule is fine
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- Iterable[Union[str, dm.Mol]]
- |
-
-
-
- list of input molecules - |
- - required - | -
max_stereo_center |
- - | -
-
-
- strict maximum number of stereo centers (<). Default is 4 - |
- - required - | -
max_undefined_stereo_centers |
-
- Optional[int]
- |
-
-
-
- strict maximum number of undefined stereo centers (<). Default is 2 - |
-
- 2
- |
-
return_idx |
-
- bool
- |
-
-
-
- whether to return index or a boolean mask - |
-
- False
- |
-
n_jobs |
-
- Optional[int]
- |
-
-
-
- number of parallel job to run. Sequential by default - |
-
- None
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show progress bar - |
-
- False
- |
-
scheduler |
-
- Optional[str]
- |
-
-
-
- joblib scheduler to use - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
filtered_mask | - | -
-
-
- boolean array (or index array) where true means the molecule is ok. - |
-
ring_infraction_filter(mols, hetcycle_min_size=4, return_idx=False, n_jobs=None, progress=False, scheduler=None)
-
-¶Find molecules that have a ring infraction filter. -Returning True means the molecule is fine
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- Iterable[Union[str, dm.Mol]]
- |
-
-
-
- list of input molecules - |
- - required - | -
hetcycle_min_size |
-
- int
- |
-
-
-
- Minimum ring size before more than 1 hetero atom or any non single bond is allowed. -This is a strict threshold (>) - |
-
- 4
- |
-
return_idx |
-
- bool
- |
-
-
-
- whether to return index or a boolean mask - |
-
- False
- |
-
n_jobs |
-
- Optional[int]
- |
-
-
-
- number of parallel job to run. Sequential by default - |
-
- None
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show progress bar - |
-
- False
- |
-
scheduler |
-
- Optional[str]
- |
-
-
-
- joblib scheduler to use - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
filtered_mask | - | -
-
-
- boolean array (or index array) where true means the molecule is ok. - |
-
symmetry_filter(mols, symmetry_threshold=0.8, return_idx=False, n_jobs=None, progress=False, scheduler=None)
-
-¶Find molecules that are not symmetrical, given a symmetry threshold
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- Iterable[Union[str, dm.Mol]]
- |
-
-
-
- list of input molecules - |
- - required - | -
symmetry_threshold |
-
- float
- |
-
-
-
- threshold to consider a molecule highly symmetrical - |
-
- 0.8
- |
-
return_idx |
-
- bool
- |
-
-
-
- whether to return index or a boolean mask - |
-
- False
- |
-
n_jobs |
-
- Optional[int]
- |
-
-
-
- number of parallel job to run. Sequential by default - |
-
- None
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show progress bar - |
-
- False
- |
-
scheduler |
-
- Optional[str]
- |
-
-
-
- joblib scheduler to use - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
filtered_mask | - | -
-
-
- boolean array (or index array) where true means the molecule is ok. - |
-
medchem.groups
¶medchem.groups
-
-
-¶ChemicalGroup
-
-
-¶Build a library of chemical groups using a list of structures parsed from a file
-The default library of structure has been curated from https://github.com/Sulstice/global-chem and additional open source data.
-Note
-For new chemical groups, please minimally provide the 'smiles'/'smarts', 'name' and "group" and optional 'hierarchy' columns
-Warning
-The SMILES and SMARTS used in the default list of substructures do not result in the same matches. -Unless specified otherwise, the SMILES will be used in the matching done by this class, -whereas due to RDKit's limitation, the SMARTS will be used in the matching done by the generated catalog. -For more information see this discussion: https://github.com/valence-platform/medchem/pull/19,
-dataframe
-
-
- property
-
-
-¶Get the dataframe of the chemical groups
-mol_smarts
-
-
- property
-
-
-¶Get the SMARTS of the chemical groups in this instance
-mols
-
-
- property
-
-
-¶Get the Molecule object of the SMILES for the chemical groups in this instance
-name
-
-
- property
-
-
-¶Get the Name of the chemical groups in this instance
-smarts
-
-
- property
-
-
-¶Get the SMARTS of the chemical groups in this instance
-smiles
-
-
- property
-
-
-¶Get the SMILES of the chemical groups in this instance
-__init__(groups=None, n_jobs=None, groups_db=None)
-
-¶Build a chemical group library
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
groups |
-
- Union[str, List[str]]
- |
-
-
-
- List of groups to use. Defaults to None where all functional groups are used - |
-
- None
- |
-
n_jobs |
-
- Optional[int]
- |
-
-
-
- Optional number of jobs to run in parallel for internally building the data. Defaults to None. - |
-
- None
- |
-
groups_db |
-
- Optional[os.PathLike]
- |
-
-
-
- Path to a file containing the dump of the chemical groups. Defaults is internal dataset - |
-
- None
- |
-
filter(names, fuzzy=False)
-
-¶Filter the group to restrict to only the name in input
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
names |
-
- List[str]
- |
-
-
-
- list of names to use for filters - |
- - required - | -
fuzzy |
-
- bool
- |
-
-
-
- whether to use exact of fuzzy matching - |
-
- False
- |
-
get_catalog()
-
-
- cached
-
-
-¶Build an rdkit catalog from the current chemical group data
- -get_matches(mol, use_smiles=True)
-
-¶Get all the functional groups in this instance that matches the input molecule
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
use_smiles |
-
- bool
- |
-
-
-
- whether to use the smiles representation of the catalog or the smarts - |
-
- True
- |
-
has_match(mol)
-
-¶Check whether the input molecule has any functional group in this instance
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
list_groups()
-
-¶List all the chemical groups available
- -list_hierarchy_groups()
-
-¶List all the hierarchy in chemical groups available.
-To get the full hierarchy on each path, split by the .
character.
list_default_chemical_groups(hierachy=False)
-
-¶List all the chemical groups available.
-Note
-chemical groups defines how a collection of patterns are organized. -They do not correspond to individual pattern name.
-Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
hierarchy |
- - | -
-
-
- whether to return the full hierarchy or the group name only - |
- - required - | -
Returns:
-Type | -Description | -
---|---|
- | -
-
-
- List of chemical groups - |
-
list_functional_group_names(exclude_basic=True)
-
-¶List common functional group names
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
exclude_basic |
-
- bool
- |
-
-
-
- whether to include the basic functional groups - |
-
- True
- |
-
Returns:
-Type | -Description | -
---|---|
- | -
-
-
- List of functional group names - |
-
medchem.query
¶This module helps build a filter based on a query language that can be parsed.
-By default, the default query parser will be used, which contains the following instructions that can be orchestrated using boolean operation (or
, and
, not
and parenthesis)
import datamol as dm
-from medchem.query.eval import QueryFilter
-
-query = """HASPROP("tpsa" < 120) AND HASSUBSTRUCTURE("[OH]", True)"""
-chemical_filter = QueryFilter(query, parser="lalr")
-mols = dm.data.cdk2().mol[:10]
-chemical_filter(mols, n_jobs=-1) # [False, False, False, False, False, True, True, True, False, False]
-
Any string provided as query
argument needs to be quoted (similar to json) to avoid ambiguity in parsing.
-* An example of valid query is """(HASPROP("tpsa" > 120 ) | HASSUBSTRUCTURE("c1ccccc1")) AND NOT HASALERT("pains") OR HASSUBSTRUCTURE("[OH]", max, 2)"""
.
-* Examples of invalid queries are
- * """HASPROP("tpsa" > 120) OR HASSUBSTRUCTURE("[OH]", True, >, 3)"""
: unexpected wrong operator >
- * """HASPROP(tpsa > 120)"""
: tpsa is not quoted
- * """HASPROP("tpsa") > 120"""
: this is not part of the language specification
- * """(HASPROP("tpsa" > 120) AND HASSUBSTRUCTURE("[OH]", True, max, 3 )"""
: mismatching parenthesis (
"""HASPROP("tpsa" > 120) OR HASSUBSTRUCTURE("CO")"""
, """(HASPROP("tpsa" > 120)) OR (HASSUBSTRUCTURE("CO"))"""
and """(HASPROP("tpsa" > 120) OR HASSUBSTRUCTURE("CO"))"""
are equivalentcheck whether a molecule has an alert
from a catalog
-
# alert is one supported alert catalog by `medchem`. For example `pains`
-HASALERT(alert:str)
-
check whether a molecule has a specific functional group from a catalog
-# group is one supported functional group provided by `medchem`
-HASGROUP(group:str)
-
check whether a molecule match a predefined druglikeness rule
from a catalog
-
# rule is one supported rule provided by `medchem`. For example `rule_of_five`
-MATCHRULE(rule:str)
-
check whether a molecule has query
as superstructure
-
# query is a SMILES
-HASSUPERSTRUCTURE(query:str)
-
Check whether a molecule has query
as substructure.
-Note that providing the comma separator ,
is mandatory here as each variable is an argument.
# query is a SMILES or a SMARTS, operator is defined below, is_smarts is a boolean
-
-HASSUBSTRUCTURE(query:str, is_smarts:Optional[bool], operator:Optional[str], limit:Optional[int])
-
-# which correspond to setting this default values
-HASSUBSTRUCTURE(query:str, is_smarts=False, operator="min", limit=1)
-# same as
-HASSUBSTRUCTURE(query:str, is_smarts=None, operator=None, limit=None)
-
Not providing optional arguments is allowed, but they need to be provided in the exact same order shown above. Thus:
-HASSUBSTRUCTURE("CO")
HASSUBSTRUCTURE("CO", False)
HASSUBSTRUCTURE("CO", False, min)
HASSUBSTRUCTURE("CO", False, min, 1)
are all valid
and equivalent
(given their default values)
Furthermore, since the correct argument map can be inferred when no ambiguity arises, the following are valid but discouraged
HASSUBSTRUCTURE("CO", False, 1)
HASSUBSTRUCTURE("CO", min, 1)
Whereas, this is invalid:
-* HASSUBSTRUCTURE("CO", min, False, 1)
Check whether a molecule has prop
as property within a defined limit.
-Any comma ,
provided between arguments will be ignored
# prop is a valid datamol.descriptors property, comparator is a required comparator operator and defined below
-HASPROP(prop:str comparator:str limit:float)
-
Check whether a molecule is similar enough to another molecule.
-Any comma ,
provided between arguments will be ignored
# query is a SMILES
-LIKE(query:str comparator:str limit:float)
-
=
==
, !=
, <
, >
, <=
, >=
true
, false
, True
, False
, TRUE
, FALSE
min
, MIN
max
, MAX
AND
or &
or &&
or and
OR
or |
or ||
or or
NOT
or !
or ~
or not
medchem.query.parser
-
-
-¶QueryParser
-
-
-¶
- Bases: Transformer
Query parser for the custom query language for molecule. This parses the input language, build a parseable and evaluable representation.
-The trick for lazy evaluation is to define custom guard with 'fn(*)
' around expression that needs to be evaluated.
Note that you SHOULD NOT HAVE TO INTERACT WITH THIS CLASS DIRECTLY.
- -------import medchem -import lark -QUERY_GRAMMAR = medchem.utils.loader.get_grammar(as_string=True) -QUERY_PARSER = Lark(QUERY_GRAMMAR, parser="lalr", transformer=QueryParser())
-see how the string needs to be "quoted". This builds on the json quote requirements to avoid dealing with unwanted outcomes¶
-example = """(HASPROP("tpsa" > 120 ) | HASSUBSTRUCTURE("c1ccccc1")) AND NOT HASALERT("pains") OR HASSUBSTRUCTURE("[OH]", max)""" -t = QUERY_PARSER.parse(example) -print(t) -((((
-fn(getprop, prop='tpsa')
> 120.0) orfn(hassubstructure, query='c1ccccc1', operator='None', limit=None, is_smarts=None)
) and notfn(hasalert, alert='pains')
) orfn(hassubstructure, query='[OH]', operator='max', limit=None, is_smarts=None)
)
bool_expr(bool_term, *others)
-
-¶Define how boolean expressions should be parsed
- -bool_term(bool_factor, *others)
-
-¶Define how boolean terms should be parsed
- -hasalert(value)
-
-¶Format the hasalert node in the query
-Note
-The parser does not enforce any validity on the argument and -the underlying function is supposed to handle it.
-hasgroup(value)
-
-¶Format the hasgroup node in the query
-Note
-The parser does not enforce any validity on the argument and -the underlying function is supposed to handle it.
-hasprop(value, comparator, limit)
-
-¶Format the hasprop node in the query
-Note
-The parser does not enforce any validity on the argument and -the underlying function is supposed to handle it.
-hassubstructure(value, is_smarts, operator, limit)
-
-¶Format the substructure node in the query
-Note
-The parser does not enforce any validity on the argument and -the underlying function is supposed to handle it.
-hassuperstructure(value)
-
-¶Format the superstructure node in the query
-Note
-The parser does not enforce any validity on the argument and -the underlying function is supposed to handle it.
-like(value, comparator, limit)
-
-¶Format the like node in the query
-Note
-The parser does not enforce any validity on the argument and -the underlying function is supposed to handle it.
-matchrule(value)
-
-¶Format the matchrule node in the query
-Note
-The parser does not enforce any validity on the argument and -the underlying function is supposed to handle it.
-not_bool_factor(*args)
-
-¶Define representation of a negation
- -medchem.query.eval
-
-
-¶QueryFilter
-
-
-¶Query filtering system based on a custom query grammar
- - - - - -__call__(mols, scheduler='processes', n_jobs=-1, progress=True)
-
-¶Call the internal chemical filter that has been build
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- List[Union[str, dm.Mol]]
- |
-
-
-
- list of input molecules to filter - |
- - required - | -
n_jobs |
-
- int
- |
-
-
-
- whether to run job in parallel and number of jobs to consider. Defaults to -1. - |
-
- -1
- |
-
scheduler |
- - | -
-
-
- scheduler to use. Defaults to 'processes'. - |
-
- 'processes'
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show job progress. Defaults to True. - |
-
- True
- |
-
__init__(query, grammar=None, parser='lalr')
-
-¶Constructor for query filtering system
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
query |
-
- str
- |
-
-
-
- input unparsed query - |
- - required - | -
grammar |
-
- Optional[str]
- |
-
-
-
- path to grammar language to use. Defaults to None, which will use the default grammar. - |
-
- None
- |
-
parser |
-
- str
- |
-
-
-
- which Lark language parser to use. Defaults to "lalr". - |
-
- 'lalr'
- |
-
QueryOperator
-
-
-¶A class to hold all the operators that can be used in queries
- - - - - -getprop(mol, prop)
-
-
- staticmethod
-
-
-¶Compute the molecular property if a molecule. -This is an alternative to the hasprop function, that does not enforce any comparison.
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
prop |
-
- str
- |
-
-
-
- molecular property to apply as filter on the molecule - |
- - required - | -
Returns:
-Name | Type | -Description | -
---|---|---|
property |
- float
- |
-
-
-
- computed property value - |
-
hasalert(mol, alert)
-
-
- staticmethod
-
-
-¶Check if a molecule match a named alert catalog. -The alert catalog needs to be one supported by the medchem package.
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
alert |
-
- str
- |
-
-
-
- named catalog to apply as filter on the molecule - |
- - required - | -
Returns:
-Name | Type | -Description | -
---|---|---|
has_alert |
- bool
- |
-
-
-
- whether the molecule has a given alert - |
-
hasgroup(mol, group)
-
-
- staticmethod
-
-
-¶Check if a molecule has a specific functional group.
-Internally, this is done fetching the smarts corresponding to the group
-then calling QueryOperator.hassubstructure
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
group |
-
- str
- |
-
-
-
- functional group to check on the molecule. - |
- - required - | -
Returns:
-Name | Type | -Description | -
---|---|---|
has_group |
- bool
- |
-
-
-
- whether the molecule has the given functional group - |
-
hasprop(mol, prop, comparator, limit)
-
-
- staticmethod
-
-
-¶Check if a molecule has a molecule property within desired range
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
prop |
-
- str
- |
-
-
-
- molecular property to apply as filter on the molecule - |
- - required - | -
comparator |
-
- Callable
- |
-
-
-
- operator function to apply to check whether the molecule property matches the expected value - |
- - required - | -
limit |
-
- float
- |
-
-
-
- limit value for determining whether the molecule property is within desired range - |
- - required - | -
Returns:
-Name | Type | -Description | -
---|---|---|
has_property |
- bool
- |
-
-
-
- whether the molecule has a given property within a desired range - |
-
hassubstructure(mol, query, is_smarts=False, operator='min', limit=1)
-
-
- staticmethod
-
-
-¶Check if a molecule has substructure provided by a query
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
query |
-
- str
- |
-
-
-
- input smarts query - |
- - required - | -
is_smarts |
-
- bool
- |
-
-
-
- whether this is a smarts query or not - |
-
- False
- |
-
operator |
-
- str
- |
-
-
-
- one of min or max to specify the min or max limit - |
-
- 'min'
- |
-
limit |
-
- int
- |
-
-
-
- limit of substructures to be found - |
-
- 1
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
has_substructure |
- bool
- |
-
-
-
- whether the query is a subgraph of the molecule - |
-
hassuperstructure(mol, query)
-
-
- staticmethod
-
-
-¶Check if a molecule has a superstructure defined by a query. -Note that a superstructure cannot be a query (smarts)
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
query |
-
- str
- |
-
-
-
- input smarts query - |
- - required - | -
Returns:
-Name | Type | -Description | -
---|---|---|
has_superstructure |
- bool
- |
-
-
-
- whether the molecule is a subgraph of the query - |
-
like(mol, query, comparator, limit)
-
-
- staticmethod
-
-
-¶Check if a molecule is similar or distant enough from another molecule using tanimoto ECFP distance. -and is useful for letting python handles the binary comparison operators.
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
query |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule to compare with - |
- - required - | -
comparator |
-
- Callable[[float, float], bool]
- |
-
-
-
- operator function to apply to check whether the molecule property matches the expected value.
-Takes computed_similarity and |
- - required - | -
limit |
-
- float
- |
-
-
-
- limit value for determining whether the molecule property is within desired range - |
- - required - | -
Returns:
-Name | Type | -Description | -
---|---|---|
is_similar |
- bool
- |
-
-
-
- whether the molecule is similar or distant enough from the query - |
-
matchrule(mol, rule)
-
-
- staticmethod
-
-
-¶Check if a molecule match a druglikeness rule
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
rule |
-
- str
- |
-
-
-
- druglikeness rule check on the molecule. - |
- - required - | -
Returns:
-Name | Type | -Description | -
---|---|---|
match_rule |
- bool
- |
-
-
-
- whether the molecule match the given rule - |
-
similarity(mol, query)
-
-
- staticmethod
-
-
-¶Compute the ECFP tanimoto similarity between two molecules. -This is an alternative to the like function, that does not enforce any comparison, -and is useful for letting python handles the binary comparison operators.
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
query |
-
- Union[dm.Mol, str]
- |
-
-
-
- input query molecule to compute similarity against - |
- - required - | -
Returns:
-Name | Type | -Description | -
---|---|---|
similarity |
- float
- |
-
-
-
- computed similarity value between mol and query - |
-
medchem.rules
¶medchem.rules.basic_rules
-
-
-¶rule_of_chemaxon_druglikeness(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, n_rotatable_bonds=None, n_rings=None, **kwargs)
-
-¶Compute the drug likeness filter according to chemaxon:
-It computes: MW < 400 & logP < 5 & HBA <= 10 & HBD <= 5 & ROTBONDS < 5 & ring > 0
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
mw |
-
- Optional[float]
- |
-
-
-
- precomputed molecular weight. Defaults to None. - |
-
- None
- |
-
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed cLogP. Defaults to None. - |
-
- None
- |
-
n_hba |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBA. Defaults to None. - |
-
- None
- |
-
n_hbd |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBD. Defaults to None. - |
-
- None
- |
-
n_rotatable_bonds |
-
- Optional[int]
- |
-
-
-
- precomputed number of rotatable bonds in the molecule. Defaults to None. - |
-
- None
- |
-
n_rings |
-
- Optional[int]
- |
-
-
-
- precomputed number of rings in the molecule. Defaults to None. - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
roc | - | -
-
-
- True if molecule is compliant, False otherwise - |
-
rule_of_cns(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, **kwargs)
-
-¶Computes drug likeness rule for CNS penetrant molecules as described in: -Jeffrey & Summerfield (2010) Assessment of the blood-brain barrier in CNS drug discovery.
-It computes: MW in [135, 582] & logP in [-0.2, 6.1] & TPSA in [3, 118] & HBD <= 3 & HBA <= 5
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
mw |
-
- Optional[float]
- |
-
-
-
- precomputed molecular weight. Defaults to None. - |
-
- None
- |
-
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed logP. Defaults to None. - |
-
- None
- |
-
n_hba |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBA. Defaults to None. - |
-
- None
- |
-
n_hbd |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBD. Defaults to None. - |
-
- None
- |
-
tpsa |
-
- Optional[int]
- |
-
-
-
- precomputed TPSA. Defaults to None. - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
roc | - | -
-
-
- True if molecule is compliant, False otherwise - |
-
rule_of_druglike_soft(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, n_rings=None, n_hetero_atoms=None, charge=None, **kwargs)
-
-¶Compute the DrugLike Soft rule available in FAF-Drugs4. -The rules are described at https://fafdrugs4.rpbs.univ-paris-diderot.fr/filters.html
-It computes: -
MW in [100, 600] & logP < in [-3, 6] & HBD <= 7 & HBA <= 12 & TPSA <=180 & ROTBONDS <= 11 &
-RIGBONDS <= 30 & N_RINGS <= 6 & MAX_SIZE_RING <= 18 & N_CARBONS in [3, 35] & N_HETEROATOMS in [1, 15] &
-HC_RATIO in [0.1, 1.1] & CHARGE in [-4, 4] & N_ATOM_CHARGE <= 4
-
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
mw |
-
- Optional[float]
- |
-
-
-
- precomputed molecular weight. Defaults to None. - |
-
- None
- |
-
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed cLogP. Defaults to None. - |
-
- None
- |
-
n_hba |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBA. Defaults to None. - |
-
- None
- |
-
n_hbd |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBD. Defaults to None. - |
-
- None
- |
-
tpsa |
-
- Optional[float]
- |
-
-
-
- precomputed TPSA. Defaults to None. - |
-
- None
- |
-
n_rotatable_bonds |
-
- Optional[int]
- |
-
-
-
- precomputed number of rotatable bonds. Defaults to None. - |
-
- None
- |
-
n_rings |
-
- Optional[int]
- |
-
-
-
- precomputed number of rings in the molecules. Defaults to None. - |
-
- None
- |
-
n_hetero_atoms |
-
- Optional[int]
- |
-
-
-
- precomputed number of heteroatoms. Defaults to None. - |
-
- None
- |
-
charge |
-
- Optional[float]
- |
-
-
-
- precomputed charge. Defaults to None. - |
-
- None
- |
-
rule_of_egan(mol, clogp=None, tpsa=None, **kwargs)
-
-¶Compute passive intestinal absorption according to Egan Rules as described in: -Egan, William J., Kenneth M. Merz, and John J. Baldwin (2000) Prediction of drug absorption using multivariate statistics
-It computes: TPSA in [0, 132] & logP in [-1, 6]
Note
-The author built a multivariate statistics model of passive intestinal absorption with robust outlier detection. -Outliers were identified as being actively transported. They chose PSA and AlogP98 (cLogP), based on consideration of the physical processes -involved in membrane permeability and the interrelationships and redundancies between other available descriptors. -Compounds, which had been assayed for Caco-2 cell permeability, demonstrated a good rate of successful predictions (74−92%)
-Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed cLogP. Defaults to None. - |
-
- None
- |
-
tpsa |
-
- Optional[float]
- |
-
-
-
- precomputed TPSA. Defaults to None. - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
roe | - | -
-
-
- True if molecule is compliant, False otherwise - |
-
rule_of_five(mol, mw=None, clogp=None, n_lipinski_hbd=None, n_lipinski_hba=None, **kwargs)
-
-¶Compute the Lipinski's rule-of-5 for a molecule. Also known as Pfizer's rule of five or RO5, -this rule is a rule of thumb to evaluate the druglikeness of a chemical compounds
-It computes: MW <= 500 & logP <= 5 & HBD <= 5 & HBA <= 10
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
mw |
-
- Optional[float]
- |
-
-
-
- precomputed molecular weight. Defaults to None. - |
-
- None
- |
-
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed cLogP. Defaults to None. - |
-
- None
- |
-
n_lipinski_hbd |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBD. Defaults to None. - |
-
- None
- |
-
n_lipinski_hba |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBA. Defaults to None. - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
ro5 | - | -
-
-
- True if molecule is compliant, False otherwise - |
-
rule_of_five_beyond(mol, mw=None, clogp=None, n_hbd=None, n_hba=None, tpsa=None, n_rotatable_bonds=None, **kwargs)
-
-¶Compute the Beyond rule-of-5 rule for a molecule. This rule illustrates the potential of compounds far beyond rule of 5 space to -modulate novel and difficult target classes that have large, flat, and groove-shaped binding sites and has been described in:
-Doak, Bradley C., et al. (2015) How Beyond Rule of 5 Drugs and Clinical Candidates Bind to Their Targets.
-It computes: MW <= 1000 & logP in [-2, 10] & HBD <= 6 & HBA <= 15 & TPSA <=250 & ROTBONDS <= 20
Note
-This is a very permissive rule and is likely to not be a good predictor for druglikeness as known for small molecules.
-Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
mw |
-
- Optional[float]
- |
-
-
-
- precomputed molecular weight. Defaults to None. - |
-
- None
- |
-
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed cLogP. Defaults to None. - |
-
- None
- |
-
n_hbd |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBD. Defaults to None. - |
-
- None
- |
-
n_hba |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBA. Defaults to None. - |
-
- None
- |
-
tpsa |
-
- Optional[float]
- |
-
-
-
- precomputed TPSA. Defaults to None. - |
-
- None
- |
-
n_rotatable_bonds |
-
- Optional[int]
- |
-
-
-
- precomputed number of rotatable bonds. Defaults to None. - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
ro5 | - | -
-
-
- True if molecule is compliant, False otherwise - |
-
rule_of_four(mol, mw=None, clogp=None, n_hba=None, n_rings=None, **kwargs)
-
-¶Compute the rule-of-4 for a molecule. The rule-of-4 define a rule of thumb for PPI inhibitors, -which are typically larger and more lipophilic than inhibitors of more standard binding sites. It has been published in:
-Morelli X, Bourgeas R, Roche P. (2011) Chemical and structural lessons from recent successes in protein–protein interaction inhibition. -Also see: Shin et al. (2020) Current Challenges and Opportunities in Designing Protein–Protein Interaction Targeted Drugs. doi:10.2147/AABC.S235542
-It computes: MW >= 400 & logP >= 4 & RINGS >=4 & HBA >= 4
Warning
-Do not use this for small molecules that are not PPI inhibitors
-Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
mw |
-
- Optional[float]
- |
-
-
-
- precomputed molecular weight. Defaults to None. - |
-
- None
- |
-
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed cLogP. Defaults to None. - |
-
- None
- |
-
n_hba |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBA. Defaults to None. - |
-
- None
- |
-
n_rings |
-
- Optional[int]
- |
-
-
-
- precomputed number of rings in the molecules. Defaults to None. - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
ro4 | - | -
-
-
- True if molecule is compliant, False otherwise - |
-
rule_of_generative_design(mol, mw=None, clogp=None, n_lipinski_hba=None, n_lipinski_hbd=None, tpsa=None, n_rotatable_bonds=None, n_hetero_atoms=None, charge=None, **kwargs)
-
-¶Compute druglikeness rule of generative design.
-This set of rules are proprietary of Valence Discovery and have been curated to better filters molecules -suggested by generative models for small molecules
-It computes:
-MW in [200, 600] & logP < in [-3, 6] & HBD <= 7 & HBA <= 12 & TPSA in [40, 180] &
-ROTBONDS <= 15 & RIGID BONDS <= 30 & N_AROMATIC_RINGS <= 5 & N_FUSED_AROMATIC_RINGS_TOGETHER <= 2 &
-MAX_SIZE_RING_SYSTEM <= 18 & N_CARBONS in [3, 40] & N_HETEROATOMS in [1, 15] & CHARGE in [-2, 2] &
-N_ATOM_CHARGE <= 2 & N_TOTAL_ATOMS < 70 & N_HEAVY_METALS < 1
-
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
mw |
-
- Optional[float]
- |
-
-
-
- precomputed molecular weight. Defaults to None. - |
-
- None
- |
-
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed cLogP. Defaults to None. - |
-
- None
- |
-
n_lipinski_hba |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBA. Defaults to None. - |
-
- None
- |
-
n_lipinski_hbd |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBD. Defaults to None. - |
-
- None
- |
-
tpsa |
-
- Optional[float]
- |
-
-
-
- precomputed TPSA. Defaults to None. - |
-
- None
- |
-
n_rotatable_bonds |
-
- Optional[int]
- |
-
-
-
- precomputed number of rotatable bonds. Defaults to None. - |
-
- None
- |
-
n_hetero_atoms |
-
- Optional[int]
- |
-
-
-
- precomputed number of heteroatoms. Defaults to None. - |
-
- None
- |
-
charge |
-
- Optional[float]
- |
-
-
-
- precomputed charge. Defaults to None. - |
-
- None
- |
-
rule_of_generative_design_strict(mol, mw=None, clogp=None, n_lipinski_hba=None, n_lipinski_hbd=None, tpsa=None, n_rotatable_bonds=None, n_hetero_atoms=None, charge=None, **kwargs)
-
-¶Compute druglikeness rule of generative design.
-This set of rules are proprietary of Valence Discovery and have been curated to better filters molecules -suggested by generative models
-It computes:
-MW in [200, 600] & logP < in [-3, 6] & HBD <= 7 & HBA <= 12 & TPSA in [40, 180] &
-ROTBONDS <= 15 & RIGID BONDS <= 30 & N_AROMATIC_RINGS <= 5 & N_FUSED_AROMATIC_RINGS_TOGETHER <= 2 &
-MAX_SIZE_RING_SYSTEM <= 18 & N_CARBONS in [3, 40] & N_HETEROATOMS in [1, 15] & CHARGE in [-2, 2] &
-N_ATOM_CHARGE <= 2 & N_TOTAL_ATOMS < 70 & N_HEAVY_METALS < 1 & N_STEREO_CENTER <= 3 &
-HAS_NO_SPIDER_SIDE_CHAINS & FRACTION_RING_SYSTEM >= 0.25
-
By default SPIDER_SIDE_CHAINS are defined as having at least 2 'chains' of >=4 consecutif atoms in side chains (not part of any ring system)
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
mw |
-
- Optional[float]
- |
-
-
-
- precomputed molecular weight. Defaults to None. - |
-
- None
- |
-
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed cLogP. Defaults to None. - |
-
- None
- |
-
n_lipinski_hba |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBA. Defaults to None. - |
-
- None
- |
-
n_lipinski_hbd |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBD. Defaults to None. - |
-
- None
- |
-
tpsa |
-
- Optional[float]
- |
-
-
-
- precomputed TPSA. Defaults to None. - |
-
- None
- |
-
n_rotatable_bonds |
-
- Optional[int]
- |
-
-
-
- precomputed number of rotatable bonds. Defaults to None. - |
-
- None
- |
-
n_hetero_atoms |
-
- Optional[int]
- |
-
-
-
- precomputed number of heteroatoms. Defaults to None. - |
-
- None
- |
-
charge |
-
- Optional[float]
- |
-
-
-
- precomputed charge. Defaults to None. - |
-
- None
- |
-
rule_of_ghose(mol, mw=None, clogp=None, mr=None, **kwargs)
-
-¶Compute the Ghose filter. The Ghose filter is a drug-like filter described in: -Ghose, AK.; Viswanadhan, VN.; Wendoloski JJ. (1999) A knowledge-based approach in designing combinatorial or medicinal -chemistry libraries for drug discovery.1. A qualitative and quantitative characterization of known drug databases.
-It computes: MW in [160, 480] & logP in [-0.4, 5.6] & Natoms in [20, 70] & refractivity in [40, 130]
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
mw |
-
- Optional[float]
- |
-
-
-
- precomputed molecular weight. Defaults to None. - |
-
- None
- |
-
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed cLogP. Defaults to None. - |
-
- None
- |
-
mr |
-
- Optional[float]
- |
-
-
-
- precomputed molecule refractivity. Defaults to None. - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
rog | - | -
-
-
- True if molecule is compliant, False otherwise - |
-
rule_of_gsk_4_400(mol, mw=None, clogp=None, **kwargs)
-
-¶Compute GSK Rule (4/400) for druglikeness using interpretable ADMET rule of thumb based on -Gleeson, M. Paul (2008). Generation of a set of simple, interpretable ADMET rules of thumb.
-It computes: MW <= 400 & logP <= 4
.
Note
-Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed cLogP. Defaults to None. - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
rog | - | -
-
-
- True if molecule is compliant, False otherwise - |
-
rule_of_leadlike_soft(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, n_rings=None, n_hetero_atoms=None, charge=None, **kwargs)
-
-¶Compute the Lead-Like Soft rule available in FAF-Drugs4. -The rules are described at https://fafdrugs4.rpbs.univ-paris-diderot.fr/filters.html
-It computes: -
MW in [150, 400] & logP < in [-3, 4] & HBD <= 4 & HBA <= 7 & TPSA <=160 & ROTBONDS <= 9 &
-RIGBONDS <= 30 & N_RINGS <= 4 & MAX_SIZE_RING <= 18 & N_CARBONS in [3, 35] & N_HETEROATOMS in [1, 15] &
-HC_RATIO in [0.1, 1.1] & CHARGE in [-4, 4] & N_ATOM_CHARGE <= 4 & N_STEREO_CENTER <= 2
-
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
mw |
-
- Optional[float]
- |
-
-
-
- precomputed molecular weight. Defaults to None. - |
-
- None
- |
-
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed cLogP. Defaults to None. - |
-
- None
- |
-
n_hba |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBA. Defaults to None. - |
-
- None
- |
-
n_hbd |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBD. Defaults to None. - |
-
- None
- |
-
tpsa |
-
- Optional[float]
- |
-
-
-
- precomputed TPSA. Defaults to None. - |
-
- None
- |
-
n_rotatable_bonds |
-
- Optional[int]
- |
-
-
-
- precomputed number of rotatable bonds. Defaults to None. - |
-
- None
- |
-
n_rings |
-
- Optional[int]
- |
-
-
-
- precomputed number of rings in the molecules. Defaults to None. - |
-
- None
- |
-
n_hetero_atoms |
-
- Optional[int]
- |
-
-
-
- precomputed number of heteroatoms. Defaults to None. - |
-
- None
- |
-
charge |
-
- Optional[float]
- |
-
-
-
- precomputed charge. Defaults to None. - |
-
- None
- |
-
rule_of_oprea(mol, n_hba=None, n_hbd=None, n_rotatable_bonds=None, n_rings=None, **kwargs)
-
-¶Computes Oprea's rule of drug likeness obtained by comparing drug vs non drug compounds across multiple datasets. -The rules have been described in: Oprea (2000) Property distribution of drug-related chemical databases*
-It computes: HBD in [0, 2] & HBA in [2, 9] & ROTBONDS in [2,8] and RINGS in [1, 4]
Note
-Seventy percent of the `drug-like' compounds were found between the following limits: 0 ≤ HDO ≤ 2, 2 ≤ HAC ≤ 9, 2 ≤ RTB ≤ 8, and 1 ≤ RNG ≤ 4
-Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
n_hba |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBA. Defaults to None. - |
-
- None
- |
-
n_hbd |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBD. Defaults to None. - |
-
- None
- |
-
n_rotatable_bonds |
-
- Optional[int]
- |
-
-
-
- precomputed number of rotatable bonds in the molecule. Defaults to None. - |
-
- None
- |
-
n_rings |
-
- Optional[int]
- |
-
-
-
- precomputed number of rings in the molecule. Defaults to None. - |
-
- None
- |
-
Returns - roo: True if molecule is compliant, False otherwise
- -rule_of_pfizer_3_75(mol, clogp=None, tpsa=None, **kwargs)
-
-¶Compute Pfizer Rule(3/75 Rule) for invivo toxicity. It has been described in: -* Hughes, et al. (2008) Physiochemical drug properties associated with in vivo toxicological outcomes. -* Price et al. (2009) Physicochemical drug properties associated with in vivo toxicological outcomes: a review
-It computes: ! (TPSA < 75 & logP > 3)
Note
-Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed cLogP. Defaults to None. - |
-
- None
- |
-
tpsa |
-
- Optional[float]
- |
-
-
-
- precomputed TPSA. Defaults to None. - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
rop | - | -
-
-
- True if molecule is compliant, False otherwise - |
-
rule_of_reos(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, charge=None, n_rotatable_bonds=None, n_heavy_atoms=None, **kwargs)
-
-¶Compute the REOS filter. The REOS filter is a filter designed to filter out unuseful compounds from HTS screening results. -The filter is described in: Waters & Namchuk (2003) Designing screens: how to make your hits a hit.
-It computes: MW in [200, 500] & logP in [-5, 5] & HBA in [0, 10] & HBD in [0, 5] & charge in [-2, 2] & ROTBONDS in [0, 8] & NHeavyAtoms in [15, 50]
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
mw |
-
- Optional[float]
- |
-
-
-
- precomputed molecular weight. Defaults to None. - |
-
- None
- |
-
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed cLogP. Defaults to None. - |
-
- None
- |
-
n_hba |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBA. Defaults to None. - |
-
- None
- |
-
n_hbd |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBD. Defaults to None. - |
-
- None
- |
-
charge |
-
- Optional[int]
- |
-
-
-
- precomputed formal charge. Defaults to None. - |
-
- None
- |
-
n_rotatable_bonds |
-
- Optional[int]
- |
-
-
-
- precomputed number of rotatable bonds in the molecule. Defaults to None. - |
-
- None
- |
-
n_heavy_atoms |
-
- Optional[int]
- |
-
-
-
- precomputed number of heavy atoms in the molecule. Defaults to None. - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
ror | - | -
-
-
- True if molecule is compliant, False otherwise - |
-
rule_of_respiratory(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, n_rings=None, **kwargs)
-
-¶Computes drug likeness rule for Respiratory (nasal/inhalatory) molecules as described in -Ritchie et al. (2009) Analysis of the Calculated Physicochemical Properties of Respiratory Drugs: Can We Design for Inhaled Drugs Yet?
-It computes: MW in [240, 520] & logP in [-2, 4.7] & HBONDS in [6, 12] & TPSA in [51, 135] & ROTBONDS in [3,8] & RINGS in [1,5]
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
mw |
-
- Optional[float]
- |
-
-
-
- precomputed molecular weight. Defaults to None. - |
-
- None
- |
-
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed logP. Defaults to None. - |
-
- None
- |
-
n_hba |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBA. Defaults to None. - |
-
- None
- |
-
n_hbd |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBD. Defaults to None. - |
-
- None
- |
-
tpsa |
-
- Optional[int]
- |
-
-
-
- precomputed TPSA. Defaults to None. - |
-
- None
- |
-
n_rotatable_bonds |
-
- Optional[int]
- |
-
-
-
- precomputed number of rotatable bonds in the molecule. Defaults to None. - |
-
- None
- |
-
n_rings |
-
- Optional[int]
- |
-
-
-
- precomputed number of rings. Defaults to None - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
roc | - | -
-
-
- True if molecule is compliant, False otherwise - |
-
rule_of_three(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, n_rotatable_bonds=None, **kwargs)
-
-¶Compute the rule-of-3. The rule-of-three is a rule of thumb for molecular fragments (and not small molecules) published in:
-Congreve M, Carr R, Murray C, Jhoti H. (2003) A "rule of three" for fragment-based lead discovery?
.
It computes: MW <= 300 & logP <= 3 & HBA <= 3 & HBD <= 3 & ROTBONDS <= 3
Note
-TPSA is not used in this version of the rule of three. Other version uses TPSA <= 60 AND logP in [-3, 3]
in addition
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
mw |
-
- Optional[float]
- |
-
-
-
- precomputed molecular weight. Defaults to None. - |
-
- None
- |
-
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed cLogP. Defaults to None. - |
-
- None
- |
-
n_hba |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBA. Defaults to None. - |
-
- None
- |
-
n_hbd |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBD. Defaults to None. - |
-
- None
- |
-
n_rotatable_bonds |
-
- Optional[int]
- |
-
-
-
- precomputed number of rotatable bonds in the molecule. Defaults to None. - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
ro3 | - | -
-
-
- True if molecule is compliant, False otherwise - |
-
rule_of_three_extended(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, **kwargs)
-
-¶Compute the extended rule-of-3. This is an extenion of the rule of three that computes:
-It computes: MW <= 300 & logP in [-3, 3] & HBA <= 6 & HBD <= 3 & ROTBONDS <= 3 & TPSA <= 60
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
mw |
-
- Optional[float]
- |
-
-
-
- precomputed molecular weight. Defaults to None. - |
-
- None
- |
-
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed cLogP. Defaults to None. - |
-
- None
- |
-
n_hba |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBA. Defaults to None. - |
-
- None
- |
-
n_hbd |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBD. Defaults to None. - |
-
- None
- |
-
tpsa |
-
- Optional[float]
- |
-
-
-
- precomputed TPSA. Defaults to None. - |
-
- None
- |
-
n_rotatable_bonds |
-
- Optional[int]
- |
-
-
-
- precomputed number of rotatable bonds in the molecule. Defaults to None. - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
ro3 | - | -
-
-
- True if molecule is compliant, False otherwise - |
-
rule_of_two(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, **kwargs)
-
-¶Computes rules-of-2 for reagent (building block design). It aims for prioritization of reagents that typically -do not add more than 200 Da in MW or 2 units of clogP. The rule of two has been described in:
-Goldberg et al. (2015) Designing novel building blocks is an overlooked strategy to improve compound quality -see: http://csmres.co.uk/cs.public.upd/article-downloads/Designing-novel-building-blocks.pdf
-Note
-Their analysis showed that molecular weight (MW) and clogP were important factors in the frequency of use of reagents. -Other parameters, such as TPSA, HBA, HBD and ROTBONDS count, were less important.
-It computes MW <= 200 & logP <= 2 & HBA <= 4 & HBD <= 2
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
mw |
-
- Optional[float]
- |
-
-
-
- precomputed molecular weight. Defaults to None. - |
-
- None
- |
-
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed cLogP. Defaults to None. - |
-
- None
- |
-
n_hba |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBA. Defaults to None. - |
-
- None
- |
-
n_hbd |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBD. Defaults to None. - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
ro2 | - | -
-
-
- True if molecule is compliant, False otherwise - |
-
rule_of_veber(mol, tpsa=None, n_rotatable_bonds=None, **kwargs)
-
-¶Compute the Veber filter. The Veber filter is a druglike filter for orally active drugs described in:
-Veber et. al. (2002) Molecular Properties That Influence the Oral Bioavailability of Drug Candidates.
-It computes: ROTBONDS <= 10 & TPSA < 140
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
tpsa |
-
- Optional[float]
- |
-
-
-
- precomputed TPSA. Defaults to None. - |
-
- None
- |
-
n_rotatable_bonds |
-
- Optional[int]
- |
-
-
-
- precomputed number of rotatable bonds. Defaults to None. - |
-
- None
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
rov | - | -
-
-
- True if molecule is compliant, False otherwise - |
-
rule_of_xu(mol, n_hba=None, n_hbd=None, n_rotatable_bonds=None, n_rings=None, n_heavy_atoms=None, **kwargs)
-
-¶Computes Xu's rule of drug likeness as described in: -Xu & Stevenson (2000), Drug-like Index: A New Approach To Measure Drug-like Compounds and Their Diversity
-It computes HBD <= 5 & HBA <= 10 & ROTBONDS in [2, 35] & RINGS in [1, 7] & NHeavyAtoms in [10, 50]
.
Note
-A compound's Drug Likeness Index is calculated based upon the knowledge derived from known drugs selected from Comprehensive Medicinal Chemistry (CMC) database.
-Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
n_hba |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBA. Defaults to None. - |
-
- None
- |
-
n_hbd |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBD. Defaults to None. - |
-
- None
- |
-
n_rotatable_bonds |
-
- Optional[int]
- |
-
-
-
- precomputed number of rotatable bonds in the molecule. Defaults to None. - |
-
- None
- |
-
n_rings |
-
- Optional[int]
- |
-
-
-
- precomputed number of rings in the molecule. Defaults to None. - |
-
- None
- |
-
n_heavy_atoms |
-
- Optional[int]
- |
-
-
-
- precomputed number of rings in the molecule. Defaults to None. - |
-
- None
- |
-
Returns - rox: True if molecule is compliant, False otherwise
- -rule_of_zinc(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, n_rings=None, charge=None, **kwargs)
-
-¶Compute the Zinc rule for a molecule. This rule is a rule of thumb to evaluate the druglikeness of a chemical compounds, based on:
-Irwin & Schoichet (2005) ZINC - A Free Database of Commercially Available Compounds for Virtual Screening.
-Also see: https://fafdrugs4.rpbs.univ-paris-diderot.fr/filters.html
-It computes: MW in [60, 600] & logP < in [-4, 6] & HBD <= 6 & HBA <= 11 & TPSA <=150 & ROTBONDS <= 12 & RIGBONDS <= 50 & N_RINGS <= 7 & MAX_SIZE_RING <= 12 & N_CARBONS >=3 & HC_RATIO <= 2.0 & CHARGE in [-4, 4]
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- input molecule - |
- - required - | -
mw |
-
- Optional[float]
- |
-
-
-
- precomputed molecular weight. Defaults to None. - |
-
- None
- |
-
clogp |
-
- Optional[float]
- |
-
-
-
- precomputed cLogP. Defaults to None. - |
-
- None
- |
-
n_hba |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBA. Defaults to None. - |
-
- None
- |
-
n_hbd |
-
- Optional[float]
- |
-
-
-
- precomputed number of HBD. Defaults to None. - |
-
- None
- |
-
tpsa |
-
- Optional[float]
- |
-
-
-
- precomputed TPSA. Defaults to None. - |
-
- None
- |
-
n_rotatable_bonds |
-
- Optional[int]
- |
-
-
-
- precomputed number of rotatable bonds. Defaults to None. - |
-
- None
- |
-
n_rings |
-
- Optional[int]
- |
-
-
-
- precomputed number of rings in the molecules. Defaults to None. - |
-
- None
- |
-
charge |
-
- Optional[float]
- |
-
-
-
- precomputed charge. Defaults to None. - |
-
- None
- |
-
medchem.rules.rule_filter
-
-
-¶RuleFilters
-
-
-¶Build a filter based on a compound phychem properties. For a list of default rules, use RuleFilters.list_available_rules()
.
-Most of these rules have been collected from the litterature including https://fafdrugs4.rpbs.univ-paris-diderot.fr/descriptors.html
__call__(mols, n_jobs=None, progress=False, scheduler='processes')
-
-¶Compute the rules for a list of molecules
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mols |
-
- List[Union[str, dm.Mol]]
- |
-
-
-
- list of input molecule object. - |
- - required - | -
n_jobs |
-
- Optional[int]
- |
-
-
-
- number of jobs to run in parallel. Defaults to None. - |
-
- None
- |
-
progress |
-
- bool
- |
-
-
-
- whether to show progress or not. Defaults to False. - |
-
- False
- |
-
scheduler |
-
- str
- |
-
-
-
- which scheduler to use. Defaults to "processes". - |
-
- 'processes'
- |
-
Returns:
-Name | Type | -Description | -
---|---|---|
df | - | -
-
-
- Dataframe where each row is a molecule and each column is a the outcomes of applying self.rules[column]. - |
-
__getitems__(ind)
-
-¶Return a specific rule
- -__init__(rule_list, rule_list_names=None, precompute_props=True)
-
-¶Build a rule filtering object
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
rule_list |
-
- List[Union[str, Callable]]
- |
-
-
-
- list of rules to apply. Either a callable that takes a molecule as input (with kwargs) or a string -of the name of a pre-defined rule as defined in the basic_rules module - |
- - required - | -
rule_list_names |
-
- Optional[List[str]]
- |
-
-
-
- Name of the rules passed as inputs. Defaults to None. - |
-
- None
- |
-
precompute_props |
-
- bool
- |
-
-
-
- Whether to precompute the properties for all molecules to speed up redundant calculation. Defaults to True. - |
-
- True
- |
-
__len__()
-
-¶Return the number of rules inside this filter
- -list_available_rules(query=None)
-
-
- cached
- staticmethod
-
-
-¶List all the available rules and they properties
- -list_available_rules_names(query=None)
-
-
- cached
- staticmethod
-
-
-¶List only the names of the available rules
- -medchem.utils
¶medchem.utils.smarts
-
-
-¶SMARTSUtils
-
-
-¶Collections of utils to build complex SMARTS query more efficiently for non experienced user
- - - - - -aliphatic_chain(min_size=6, unbranched=False, unsaturated_bondtype=None, allow_hetero_atoms=True)
-
-
- classmethod
-
-
-¶Returns a query that can match a long aliphatic chain
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
min_size |
-
- int
- |
-
-
-
- minimum size of the long chain - |
-
- 6
- |
-
unbranched |
-
- bool
- |
-
-
-
- whether the chain should be unbranched - |
-
- False
- |
-
unsaturated_bondtype |
-
- Optional[str]
- |
-
-
-
- additional unsaturated bond type to use for the query. By default, Any bond type (~) is used. -Single bonds ARE always allowed and bondtype cannot be aromatic - |
-
- None
- |
-
allow_hetero_atoms |
-
- bool
- |
-
-
-
- whether the chain can contain hetero atoms - |
-
- True
- |
-
to build a query for a long aliphatic chain of a least 5 atoms (e.g: 'CCC(C)CCC')
-------SMARTSUtils.aliphatic_chain(min_size=5)
-
Returns:
-Name | Type | -Description | -
---|---|---|
smarts | - | -
-
-
- smarts pattern matching a long aliphatic chain - |
-
atom_in_env(*smarts_strs, include_atoms=False, union=False)
-
-
- classmethod
-
-
-¶Returns a recursive/group smarts to find an atom that fits in the environments as defined by all the input smarts
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
smarts_strs |
- - | -
-
-
- list of input patterns defining the environment the atom must fit in. The first atom of each pattern -should be the atom we want to match to, unless include_atoms is set to True, then [*:99] will be added at the start of each pattern - |
-
- ()
- |
-
include_atoms |
-
- bool
- |
-
-
-
- whether to include an additional first atom that needs to be in the required environment or not - |
-
- False
- |
-
union |
-
- bool
- |
-
-
-
- whether to use the union of the environments or the intersection - |
-
- False
- |
-
you can use this function to construct a complex query if you are not sure about how to write the smarts -for example, to find a carbon atom that is both in a ring or size 6, bonded to an ethoxy and have a Fluorine in meta
-------SMARTSUtils.atom_in_env("[#6;r6][OD2][C&D1]", "[c]aa[F]", union=False) # there are alternative way to write this
-
Returns:
-Name | Type | -Description | -
---|---|---|
smarts | - | -
-
-
- smarts pattern matching the group/environment - |
-
different_fragment(*smarts_strs)
-
-
- classmethod
-
-
-¶Returns a new query that match patterns that are in different fragments.
-Warning
-This feature is not supported yet by RDKit. See https://github.com/rdkit/rdkit/issues/1261
-Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
smarts_strs |
- - | -
-
-
- list of input patterns defining the fragments - |
-
- ()
- |
-
matching two oxygens in a molecule will work with '[#8].[#8]', but if you want the -oxygens to be in DIFFERENT fragments, then build the query with:
-------SMARTSUtils.different_fragment('[#8]', '[#8]')
-
Returns:
-Name | Type | -Description | -
---|---|---|
smarts | - | -
-
-
- smarts pattern matching patterns that are in different fragments - |
-
meta(smarts_str1, smarts_str2, aromatic_only=False)
-
-
- classmethod
-
-
-¶Returns a recursive smarts string connecting the two input smarts in meta
of each other.
-Connexion points needs to be through single or double bonds
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
smarts_str1 |
-
- str
- |
-
-
-
- first smarts pattern defining the first functional group - |
- - required - | -
smarts_str2 |
-
- str
- |
-
-
-
- second smarts pattern defining the second functional group - |
- - required - | -
aromatic_only |
-
- bool
- |
-
-
-
- whether the ring needs to be aromatic or not - |
-
- False
- |
-
to build a smarts for a methyl group in meta to an oxygen (e.g: 'c1c(C)cc(O)cc1')
-------SMARTSUtils.meta('[#6;!R]', '[#8]')
-
Returns:
-Name | Type | -Description | -
---|---|---|
smarts | - | -
-
-
- smarts pattern connecting the two input smarts in |
-
ortho(smarts_str1, smarts_str2, aromatic_only=False)
-
-
- classmethod
-
-
-¶Returns a recursive smarts string connecting the two input smarts in ortho
of each other.
-Connexion points needs to be through single or double bonds
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
smarts_str1 |
-
- str
- |
-
-
-
- first smarts pattern defining the first functional group - |
- - required - | -
smarts_str2 |
-
- str
- |
-
-
-
- second smarts pattern defining the second functional group - |
- - required - | -
aromatic_only |
-
- bool
- |
-
-
-
- whether the ring needs to be aromatic or not - |
-
- False
- |
-
to build a smarts for a methyl group in ortho to an oxygen (e.g: 'C1CC(C)C(O)CC1')
-------SMARTSUtils.ortho('[#6;!R]', '[#8]')
-
Returns:
-Name | Type | -Description | -
---|---|---|
smarts | - | -
-
-
- smarts pattern connecting the two input smarts in |
-
para(smarts_str1, smarts_str2, aromatic_only=False)
-
-
- classmethod
-
-
-¶Returns a recursive smarts string connecting the two input smarts in para
of each other.
-Connexion points needs to be through single or double bonds
Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
smarts_str1 |
-
- str
- |
-
-
-
- first smarts pattern defining the first functional group - |
- - required - | -
smarts_str2 |
-
- str
- |
-
-
-
- second smarts pattern defining the second functional group - |
- - required - | -
aromatic_only |
-
- bool
- |
-
-
-
- whether the ring needs to be aromatic or not - |
-
- False
- |
-
to build a smarts for a methyl group in para to an oxygen (e.g: 'c1(C)ccc(O)cc1')
-------SMARTSUtils.para('[#6;!R]', '[#8]')
-
Returns:
-Name | Type | -Description | -
---|---|---|
smarts | - | -
-
-
- smarts pattern connecting the two input smarts in |
-
same_fragment(*smarts_strs)
-
-
- classmethod
-
-
-¶Returns a new query that match patterns that are in THE SAME fragment (component)
-Warning
-This feature is not supported yet by RDKit. See https://github.com/rdkit/rdkit/issues/1261
-Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
smarts_strs |
- - | -
-
-
- list of input patterns defining the fragments - |
-
- ()
- |
-
matching two oxygens in a molecule will work with '[#8].[#8]', but if you want the -oxygens to be in the SAME fragment, then build the query with:
-------SMARTSUtils.same_fragment('[#8]', '[#8]')
-
Returns:
-Name | Type | -Description | -
---|---|---|
smarts | - | -
-
-
- smarts pattern matching patterns that are in the same component - |
-
standardize_attachment(smiles, attach_tokens='[*:1]')
-
-
- classmethod
-
-
-¶Standardize an attachment point in a smiles
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
smiles |
-
- str
- |
-
-
-
- SMILES string - |
- - required - | -
attach_tokens |
-
- str
- |
-
-
-
- Attachment point token to use as standard token - |
-
- '[*:1]'
- |
-
medchem.utils.matches
-
-
-¶Constraints
-
-
-¶Complex query system for matches with additional constraints
-Example
-------mol1 = dm.to_mol("CN(C)C(=O)c1cncc(C)c1") -mol2 = dm.to_mol("c1ccc(cc1)-c1cccnc1") -core = dm.from_smarts("c1cncc([*:1])c1") -[atom.SetProp("query", "my_constraints") for atom in core.GetAtoms() if atom.GetAtomMapNum() == 1] -constraint_fns = dict(my_constraints=lambda x: dm.descriptors.n_aromatic_atoms(x) > 0) -constraint = Constraints(core, constraint_fns) -matches = [constraint(mol1), constraint(mol2)] # False, True
-
__call__(mol)
-
-¶Check if input molecule respect the constraints
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
- - | -
-
-
- input molecule - |
- - required - | -
__init__(core, constraint_fns, prop_name='query')
-
-¶Initialize the constraint matcher
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
core |
-
- dm.Mol
- |
-
-
-
- the scaffold/query molecule to match against. Needs to be a molecule - |
- - required - | -
constraint_fns |
-
- Dict[Callable]
- |
-
-
-
- a dictionary of constraints functions - |
- - required - | -
prop_name |
-
- str
- |
-
-
-
- the property name to use in the match at each atom defined by the core -for further matches against the constraints functions - |
-
- 'query'
- |
-
get_matches(mol, multiple=True)
-
-¶Get matches that respect the constraints in the molecules
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- dm.Mol
- |
-
-
-
- input molecule - |
- - required - | -
multiple |
-
- bool
- |
-
-
-
- if True, return all the matches, if False, return the first match - |
-
- True
- |
-
has_match(mol)
-
-¶Check if input molecule respect the constraints
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- dm.Mol
- |
-
-
-
- input molecule - |
- - required - | -
validate(mol, constraints)
-
-
- staticmethod
-
-
-¶Validate a list of constraint object against a molecule
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
- - | -
-
-
- the molecule object - |
- - required - | -
constraints |
-
- List[Constraints]
- |
-
-
-
- list of Contraints object to validate against the molecule - |
- - required - | -
medchem.utils.loader
-
-
-¶get_data(file=None)
-
-¶Return the folder that contains the package specific data
- -get_grammar(grammar=None, as_string=False)
-
-¶Return the default lark grammar file for queries
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
grammar |
-
- Optional[os.PathLike]
- |
-
-
-
- The path to the grammar file. If None, the default grammar - |
-
- None
- |
-
as_string |
-
- bool
- |
-
-
-
- If True, return the grammar as a string. Defaults to False. - |
-
- False
- |
-
medchem.utils.graph
-
-
-¶automorphism(mol, standardize=True, node_attrs=DEFAULT_NODE_ATTR, edge_attrs=DEFAULT_EDGE_ATTR)
-
-¶Compute automorphism in a molecular graph
- -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[str, dm.Mol]
- |
-
-
-
- input molecular graph - |
- - required - | -
standardize |
-
- bool
- |
-
-
-
- whether to standardize the compound or not - |
-
- True
- |
-
node_attrs |
-
- List[str]
- |
-
-
-
- list of categorical atom attributes/properties to consider for node matching - |
-
- DEFAULT_NODE_ATTR
- |
-
edge_attrs |
-
- List[str]
- |
-
-
-
- list of categorical bond attributes/properties to consider for edge matching - |
-
- DEFAULT_EDGE_ATTR
- |
-
score_symmetry(mol, exclude_self_mapped_edged=False, **automorphism_kwargs)
-
-¶Provide a symmetry score for a given input molecule
-Note
-This is an heuristic and our definition of symmetry is pretty loose. -We define symmetry according to any (set of) plans dividing the molecule into two very similar subgraph. -We include both edge and vertex transitivity. For example the star-molecular graph -(e.g neopentane) is symmetrical here, although it's not vertex-transitive. -For more information see https://github.com/valence-platform/medchem/pull/41
-Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
mol |
-
- Union[dm.Mol, str]
- |
-
-
-
- inputs molecules - |
- - required - | -
exclude_self_mapped_edged |
-
- bool
- |
-
-
-
- Whether to exclude edges that matches to themselves in automorphism. - |
-
- False
- |
-
automorphism_kwargs |
- - | -
-
-
- keyword for determining automorphism - |
-
- {}
- |
-
Package for applying common medchem filters to a dataset of molecules.
-This package contains various implementation of medchem rules collected from various sources that may be applied as filters on generated or screened molecules. It centralizes all common filters used at Valence Discovery.
-Although the list is as exhaustive as possible, filtering rules mainly depends on the drug discovery programs.
-It should be noted that systematically applying all filters is to be avoided. For example, "PAINS C" filters are usually not very relevant, another example is the filtering are very strict and could flag important substructure for a project (example some ZBGs).
-micromamba install -c conda-forge medchem
-
The following filters are available:
-These are python binding of the implementation of Eli Lilly Medchem Rules published under "Rules for Identifying Potentially Reactive or Promiscuous Compounds" by Robert F. Bruns and Ian W. Watson, J. Med. Chem. 2012, 55, 9763--9772 as ACS Author choice, i.e. open access at doi 10.1021/jm301008n.
-These rules are used in medchem.filter.lilly_demerit_filter
function and are the main offering of this package.
Rules used by Novartis to build their new screening deck. The rules are published under "Evolution of Novartis' small molecule screening deck design" by Schuffenhauer, A. et al. J. Med. Chem. (2020), https://dx.doi.org/10.1021/acs.jmedchem.0c01332.
-These rules are used in lead filtering as medchem.filter.lead.screening_filter
These are filters based on the Bredt's rules for unstable chemistry.There are used in lead filtering as medchem.filter.lead.bredt_filter
.
These are alerts rules from the ChEMBL database curation scheme and public litterature on promiscuous compounds on commons assays. The rule set are:
-name | -# alerts | -source | -
---|---|---|
Glaxo | -55 | -ChEMBL | -
Dundee | -105 | -ChEMBL | -
BMS | -180 | -ChEMBL | -
PAINS | -481 | -ChEMBL | -
SureChEMBL | -166 | -ChEMBL | -
MLSMR | -116 | -ChEMBL | -
Inpharmatica | -91 | -ChEMBL | -
LINT | -57 | -ChEMBL | -
Alarm-NMR | -75 | -Litterature | -
AlphaScreen-Hitters | -6 | -Litterature | -
GST-Hitters | -34 | -Litterature | -
HIS-Hitters | -19 | -Litterature | -
LuciferaseInhibitor | -3 | -Litterature | -
DNABinder | -78 | -Litterature | -
Chelator | -55 | -Litterature | -
Frequent-Hitter | -15 | -Litterature | -
Electrophilic | -119 | -Litterature | -
Genotoxic-Carcinogenicity | -117 | -Litterature | -
LD50-Oral | -20 | -Litterature | -
Non-Genotoxic-Carcinogenicity | -22 | -Litterature | -
Reactive-Unstable-Toxic | -335 | -Litterature | -
Skin | -155 | -Litterature | -
Toxicophore | -154 | -Litterature | -
There are used in lead filtering through medchem.filter.lead.alert_filter
These are generic filters based on specific molecular property such as number of atoms, size of macrocycles, etc. They are available at medchem.filter.generic
Package for applying common medchem filters to a dataset of molecules.
"},{"location":"index.html#summary","title":"Summary","text":"This package contains various implementation of medchem rules collected from various sources that may be applied as filters on generated or screened molecules. It centralizes all common filters used at Valence Discovery.
Although the list is as exhaustive as possible, filtering rules mainly depends on the drug discovery programs.
It should be noted that systematically applying all filters is to be avoided. For example, \"PAINS C\" filters are usually not very relevant, another example is the filtering are very strict and could flag important substructure for a project (example some ZBGs).
"},{"location":"index.html#installation","title":"Installation","text":"micromamba install -c conda-forge medchem\n
"},{"location":"index.html#available-filters","title":"Available Filters","text":"The following filters are available:
"},{"location":"index.html#eli-lilly-medchem-rules","title":"Eli Lilly Medchem Rules","text":"These are python binding of the implementation of Eli Lilly Medchem Rules published under \"Rules for Identifying Potentially Reactive or Promiscuous Compounds\" by Robert F. Bruns and Ian W. Watson, J. Med. Chem. 2012, 55, 9763--9772 as ACS Author choice, i.e. open access at doi 10.1021/jm301008n.
These rules are used in medchem.filter.lilly_demerit_filter
function and are the main offering of this package.
Rules used by Novartis to build their new screening deck. The rules are published under \"Evolution of Novartis' small molecule screening deck design\" by Schuffenhauer, A. et al. J. Med. Chem. (2020), https://dx.doi.org/10.1021/acs.jmedchem.0c01332.
These rules are used in lead filtering as medchem.filter.lead.screening_filter
These are filters based on the Bredt's rules for unstable chemistry.There are used in lead filtering as medchem.filter.lead.bredt_filter
.
These are alerts rules from the ChEMBL database curation scheme and public litterature on promiscuous compounds on commons assays. The rule set are:
name # alerts source Glaxo 55 ChEMBL Dundee 105 ChEMBL BMS 180 ChEMBL PAINS 481 ChEMBL SureChEMBL 166 ChEMBL MLSMR 116 ChEMBL Inpharmatica 91 ChEMBL LINT 57 ChEMBL Alarm-NMR 75 Litterature AlphaScreen-Hitters 6 Litterature GST-Hitters 34 Litterature HIS-Hitters 19 Litterature LuciferaseInhibitor 3 Litterature DNABinder 78 Litterature Chelator 55 Litterature Frequent-Hitter 15 Litterature Electrophilic 119 Litterature Genotoxic-Carcinogenicity 117 Litterature LD50-Oral 20 Litterature Non-Genotoxic-Carcinogenicity 22 Litterature Reactive-Unstable-Toxic 335 Litterature Skin 155 Litterature Toxicophore 154 LitteratureThere are used in lead filtering through medchem.filter.lead.alert_filter
These are generic filters based on specific molecular property such as number of atoms, size of macrocycles, etc. They are available at medchem.filter.generic
medchem.alerts
","text":""},{"location":"api/medchem.alerts.html#medchem.alerts","title":"medchem.alerts
","text":""},{"location":"api/medchem.alerts.html#medchem.alerts.AlertFilters","title":"AlertFilters
","text":"Filtering class for building a library based on a list of structural alerts
To list the available alerts, use the list_default_available_alerts
method.
__call__(mols, n_jobs=None, progress=False, include_all_alerts=False)
","text":"Run alert evaluation on this list of molecule and return the full dataframe
Parameters:
Name Type Description Defaultmols
Iterable[Union[str, rdchem.Mol]]
input list of molecules
requiredn_jobs
Optional[int]
number of jobs
None
progress
bool
whether to show progress or not
False
include_all_alerts
bool
whether to include all of the alerts that match as columns
False
"},{"location":"api/medchem.alerts.html#medchem.alerts.AlertFilters.__init__","title":"__init__(alerts_set=None, alerts_db=None)
","text":"Filtering molecules based on chemical alerts
Parameters:
Name Type Description Defaultalerts_set
Union[str, List[str]]
Alerts catalog to use. Default is BMS+Dundee+Glaxo
None
alerts_db
Optional[os.PathLike]
Alerts file to use. Default is internal
None
"},{"location":"api/medchem.alerts.html#medchem.alerts.AlertFilters.evaluate","title":"evaluate(mol)
","text":"Evaluate structure alerts on a molecule
Parameters:
Name Type Description Defaultmol
Union[str, rdchem.Mol]
input molecule
requiredReturns:
Type Descriptionlist of alerts matched
"},{"location":"api/medchem.alerts.html#medchem.alerts.AlertFilters.list_default_available_alerts","title":"list_default_available_alerts()
cached
staticmethod
","text":"Return a list of unique rule set names
"},{"location":"api/medchem.alerts.html#medchem.alerts.NovartisFilters","title":"NovartisFilters
","text":"Filtering class for building a screening deck following the novartis filtering process published in https://dx.doi.org/10.1021/acs.jmedchem.0c01332.
The output of the filter are explained below: - status: one of [\"Exclude\", \"Flag\", \"Annotations\", \"Ok\"]
(ordered by quality). Generally, you can keep anything without the \"Exclude\" label, as long as you also apply a maximum severity score for compounds that collects too many flags. - covalent: number of potentially covalent motifs contained in the compound - severity: how severe are the issues with the molecules: - 0
: compound has no flags, might have annotations; - 1-9
: number of flags the compound raises; - >= 10
: default exclusion criterion used in the paper - special_mol: whether the compound/parts of the compound belongs to a special class of molecules (e.g peptides, glycosides, fatty acid). In that case, you should review the rejection reasons.
__call__(mols, n_jobs=None, progress=False)
","text":"Run alert evaluation on this list of molecule and return the full dataframe
Parameters:
Name Type Description Defaultmols
Iterable[Union[str, rdchem.Mol]]
input list of molecules
requiredn_jobs
Optional[int]
number of jobs
None
progress
bool
whether to show progress or not
False
"},{"location":"api/medchem.catalog.html","title":"medchem.catalog
","text":""},{"location":"api/medchem.catalog.html#medchem.catalog","title":"medchem.catalog
","text":""},{"location":"api/medchem.catalog.html#medchem.catalog.NamedCatalogs","title":"NamedCatalogs
","text":"Holder for substructure matching catalogs
"},{"location":"api/medchem.catalog.html#medchem.catalog.NamedCatalogs.alerts","title":"alerts(subset=None)
staticmethod
","text":"Alerts filter catalogs commonly used in molecule filtering
Parameters:
Name Type Description Defaultsubset
Optional[Union[List[str], str]]
subset of providers to consider
None
Returns:
Name Type Descriptioncatalog
FilterCatalog
filter catalog
"},{"location":"api/medchem.catalog.html#medchem.catalog.NamedCatalogs.bredt","title":"bredt()
cached
staticmethod
","text":"Bredt fitler rules Also see example of usage by surge's https://github.com/StructureGenerator/SURGE/blob/main/doc/surge1_0.pdf
"},{"location":"api/medchem.catalog.html#medchem.catalog.NamedCatalogs.chemical_groups","title":"chemical_groups(filters='medicinal')
cached
staticmethod
","text":"Chemical group filter catalogs
Parameters:
Name Type Description Defaultfilters
Union[str, List[str]]
list of tag to filter the catalog on.
'medicinal'
"},{"location":"api/medchem.catalog.html#medchem.catalog.NamedCatalogs.nibr","title":"nibr()
cached
staticmethod
","text":"Catalog from NIBR
Warning
This includes all the compounds in the catalog, regardless of severity (FLAG, EXCLUDE, ANNOTATION) You likely don't want to use this for blind prioritization
"},{"location":"api/medchem.catalog.html#medchem.catalog.NamedCatalogs.tox","title":"tox(pains_a=True, pains_b=True, pains_c=False, brenk=True, nih=False, zinc=False)
cached
staticmethod
","text":"Common toxicity and interference catalog
Parameters:
Name Type Description Defaultpains_a
bool
whether to include PAINS filters from assay A
True
pains_b
bool
whether to include PAINS filters from assay B
True
pains_c
bool
whether to include PAINS filters from assay C
False
brenk
bool
whether to include BRENK filters
True
nih
bool
whether to include NIH filters
False
zinc
bool
whether to include ZINC filters
False
"},{"location":"api/medchem.catalog.html#medchem.catalog.NamedCatalogs.unstable_graph","title":"unstable_graph(max_severity=5)
cached
staticmethod
","text":"Unstable molecular graph to filter out especially for generative models
Parameters:
Name Type Description Defaultmax_severity
int
maximum severity to consider for graph rules to be acceptable
5
"},{"location":"api/medchem.catalog.html#medchem.catalog.from_smarts","title":"from_smarts(smarts, labels=None, mincounts=None, maxcounts=None, entry_as_inds=False)
","text":"Load catalog from a list of smarts
Parameters:
Name Type Description Defaultsmarts
List[str]
list of input smarts to add to the catalog
requiredlabels
Optional[List[str]]
list of label for each smarts
None
mincounts
Optional[List[int]]
minimum count before a match is recognized
None
maxcounts
Optional[List[int]]
maximum count for a match to be valid
None
entry_as_inds
bool
whether to use index for entry id or the label
False
Returns:
Name Type Descriptioncatalog
FilterCatalog
merged catalogs
"},{"location":"api/medchem.catalog.html#medchem.catalog.list_named_catalogs","title":"list_named_catalogs()
","text":"List all available named catalogs. This list will ignore all chemical groups For a list of chemical group to be queried using NamedCatalog.chemical_groups, use medchem.group.list_default_chemical_groups
merge_catalogs(*catalogs)
","text":"Merge several catalogs into a single one
Returns:
Name Type Descriptioncatalog
FilterCatalog
merged catalog
"},{"location":"api/medchem.complexity.html","title":"medchem.rules
","text":""},{"location":"api/medchem.complexity.html#medchem.complexity.complexity_filter","title":"medchem.complexity.complexity_filter
","text":""},{"location":"api/medchem.complexity.html#medchem.complexity.complexity_filter.ComplexityFilter","title":"ComplexityFilter
","text":"Complexity filters derived from nonpher: https://github.com/lich-uct/nonpher/blob/master/nonpher/nonpher.py
To recover the original complexity score, use threshold_stats_file = \"zinc_12\"
. The threshold have been re-calculated using the original new zinc-15 and focusing only on commercially available compounds.
__call__(mol)
","text":"Check whether the input structure is too complex given this instance of the complexity filter Return False is the molecule is too complex, else True
Parameters:
Name Type Description Defaultmol
dm.Mol
input molecule
required"},{"location":"api/medchem.complexity.html#medchem.complexity.complexity_filter.ComplexityFilter.__init__","title":"__init__(limit='99', complexity_metric='bertz', threshold_stats_file='zinc_15_available')
","text":"Default complexity limit is set on at least 1 exceeding metric on the 999th permille level
Parameters:
Name Type Description Defaultlimit
str
The complexity percentile outlier limit to be used (should be expressed as an integer)
'99'
complexity_metric
str
The complexity filter name to be used. Use ComplexityFilter.list_default_available_filters
to list default filters. The following complexity metrics are supported by default * \"bertz\": bertz complexity index * \"sas\": synthetic accessibility score (zinc_15_available
only) * \"qed\": qed score (zinc_15_available
only) * \"clogp\": clogp for how greasy a molecule is compared to other in the same mw range (zinc_15_available
only) * \"whitlock\": whitlock complexity index * \"barone\": barone complexity index * \"smcm\": synthetic and molecular complexity * \"twc\": total walk count complexity (zinc_15_available
only)
'bertz'
threshold_stats_file
Optional[str]
The path to or type the threshold file to be used. The default available threshold stats files are * \"zinc_12\" * \"zinc_15_available\"
'zinc_15_available'
"},{"location":"api/medchem.complexity.html#medchem.complexity.complexity_filter.ComplexityFilter.list_default_available_filters","title":"list_default_available_filters()
classmethod
","text":"Return a list of unique filter names
"},{"location":"api/medchem.complexity.html#medchem.complexity.complexity_filter.ComplexityFilter.list_default_percentile","title":"list_default_percentile(threshold_stats_file=None)
cached
classmethod
","text":"Return the default percentile list for the threshold file
"},{"location":"api/medchem.complexity.html#medchem.complexity.complexity_filter.ComplexityFilter.load_threshold_stats_file","title":"load_threshold_stats_file(path=None)
classmethod
","text":"Load threshold file to compute the percentille depending on the MW for each complexity_metric
Parameters:
Name Type Description Defaultpath
Optional[str]
path to the threshold file
None
"},{"location":"api/medchem.complexity.html#medchem.complexity._complexity_calc","title":"medchem.complexity._complexity_calc
","text":"Complexity filters as implemented in nonpher https://github.com/lich-uct/nonpher/blob/master/nonpher/complex_lib.py
"},{"location":"api/medchem.complexity.html#medchem.complexity._complexity_calc.BaroneCT","title":"BaroneCT(mol, chiral=False)
","text":"Compute a Barone complexity measure for a molecule as described in:
R. Barone and M. Chanon, J. Chem. Inf. Comput. Sci., 2001, 41 (2), pp 269\u2013272 Qi Huang, Lin-LiLi, Sheng-Yong Yang, J. Mol. Graph. Model. 2010, 28 (8), pp 775\u2013787
Parameter values are hardcoded as in the articles. On zinc 15 commercially available dataset, the range of this score is [30, 4266] with a median of 538
Parameters:
Name Type Description Defaultmol
dm.Mol
The input molecule.
requiredchiral
bool
Whether to include chirality in the calculation.
False
"},{"location":"api/medchem.complexity.html#medchem.complexity._complexity_calc.SMCM","title":"SMCM(mol)
","text":"Compute synthetic and molecular complexity as described in:
TK Allu, TI Oprea, J. Chem. Inf. Model. 2005, 45(5), pp. 1237-1243. https://sci-hub.ee/10.1021/ci0501387
On zinc 15 commercially available dataset, the range of this score is [1.93, 192.00] with a median of 42.23
Parameters:
Name Type Description Defaultmol
dm.Mol
the input molecule
required"},{"location":"api/medchem.complexity.html#medchem.complexity._complexity_calc.TWC","title":"TWC(mol, log10=True)
","text":"Compute total walk count in a molecules as proxy for complexity. This score is described in: twc = 1/2 sum(k=1..n-1,sum(i=atoms,awc(k,i)))
Gerta Rucker and Christoph Rucker, J. Chem. Inf. Comput. Sci. 1993, 33, 683-695
On zinc 15 commercially available dataset, the range of this score is [1.20, 39.08] with a median of 10.65
Parameters:
Name Type Description Defaultmol
the input molecule
requiredlog10
bool
whether to return the log10 of the values
True
"},{"location":"api/medchem.complexity.html#medchem.complexity._complexity_calc.WhitlockCT","title":"WhitlockCT(mol, ringval=4, unsatval=2, heteroval=1, chiralval=2)
","text":"A chemically intuitive measure for molecular complexity. This complexity measure has been described in : H. W. Whitlock, J. Org. Chem., 1998, 63, 7982-7989. Benzyls, fenyls, etc. are not treated at all.
On zinc 15 commercially available dataset, the range of this score is [0, 172] with a median of 25
Parameters:
Name Type Description Defaultmol
The input molecule.
requiredringval
float
The contribution of rings
4
unsatval
float
The contribution of the unsaturated bond.
2
heteroval
float
The contribution of the heteroatom.
1
chiralval
float
The contribution of the chiral center.
2
"},{"location":"api/medchem.demerits.html","title":"medchem.demerits
","text":""},{"location":"api/medchem.demerits.html#medchem.demerits","title":"medchem.demerits
","text":""},{"location":"api/medchem.demerits.html#medchem.demerits.batch_score","title":"batch_score(smiles_list, n_jobs=None, batch_size=5000, progress=False, **run_options)
","text":"Run scorer on input smile list in batch
Parameters:
Name Type Description Defaultsmiles_list
List
list of smiles
requiredn_jobs
Optional[int]
Number of jobs to run in parallel.
None
batch_size
Optional[int]
Optional batch_size to run the the scoring in parallels.
5000
progress
bool
Whether to show progress bar.
False
run_options
Run options to pass to the underlining score function
{}
Returns:
Name Type Descriptionout_df
pd.DataFrame
Dataframe containing the smiles and computed properties: (rejected, demerit_score, reason, step)
"},{"location":"api/medchem.demerits.html#medchem.demerits.run_cmd","title":"run_cmd(cmd, shell=False)
","text":"Run command
"},{"location":"api/medchem.demerits.html#medchem.demerits.score","title":"score(smiles_list, mc_first_pass_options='', iwd_options='', stop_after_step=3, **run_options)
","text":"Run scorer on input smile list:
Parameters:
Name Type Description Defaultsmiles_list
List
list of smiles
requiredmc_first_pass_options
Optional[str]
Initial options to pass to mc_first_pass
''
iwd_options
Optional[str]
Initial options to pass to iwdemerit
''
stop_after_step
Optional[int]
Where to stop in the pipeline. Don't change this if you don't know.
3
run_options
Additional option to run the pipeline
{}
Returns:
Name Type Descriptionout_df
pd.DataFrame
Dataframe containing the smiles and computed properties: (rejected, demerit_score, reason, step)
"},{"location":"api/medchem.filter.html","title":"medchem.filter
","text":""},{"location":"api/medchem.filter.html#medchem.filter.lead","title":"medchem.filter.lead
","text":""},{"location":"api/medchem.filter.html#medchem.filter.lead.alert_filter","title":"alert_filter(mols, alerts, alerts_db=None, n_jobs=1, rule_dict=None, return_idx=False)
","text":"Filter a dataset of molecules, based on common structural alerts and specific rules.
Parameters:
Name Type Description Defaultmols
Iterable[Union[str, dm.Mol]]
List of molecules to filter
requiredalerts
List[str]
List of alert collections to screen for. See AlertFilters.list_default_available_alerts()
requiredalerts_db
Optional[os.PathLike]
Path to the alert file name. The internal default file (alerts.csv) will be used if not provided
None
n_jobs
Optional[int]
Number of cpu to use
1
rule_dict
Dict
Dictionary with additional rules to apply during the filtering. For example, such dictionary for drug-like compounds would look like this:
rule_dict {\"MW\": [0, 500], \"LogP\": [-0.5, 5], \"HBD\": [0, 5], \"HBA\": [0, 10], \"TPSA\": [0, 150]}
None
return_idx
bool
Whether to return the filtered index
False
Returns:
Name Type Descriptionfiltered_mask
boolean array (or index array) where true means the molecule IS OK (not found in the alert catalog).
"},{"location":"api/medchem.filter.html#medchem.filter.lead.bredt_filter","title":"bredt_filter(mols, return_idx=False, n_jobs=None, progress=False, scheduler='threads', batch_size=100)
","text":"Filter a list of compounds according to Bredt's rules https://en.wikipedia.org/wiki/Bredt%27s_rule
Parameters:
Name Type Description Defaultmols
Sequence[Union[str, dm.Mol]]
list of input molecules
requiredreturn_idx
bool
whether to return index or a boolean mask
False
n_jobs
Optional[int]
number of parallel job to run. Sequential by default
None
progress
bool
whether to show progress bar
False
scheduler
str
joblib scheduler to use
'threads'
batch_size
int
batch size for parallel processing. Note that batch_size
should be increased if the number of used CPUs gets very large.
100
Returns:
Name Type Descriptionfiltered_mask
boolean array (or index array) where true means the molecule is not toxic.
"},{"location":"api/medchem.filter.html#medchem.filter.lead.catalog_filter","title":"catalog_filter(mols, catalogs, return_idx=False, n_jobs=None, progress=False, scheduler='processes', batch_size=100)
","text":"Filter a list of compounds according to catalog of structures alerts and patterns
Parameters:
Name Type Description Defaultmols
Sequence[Union[str, dm.Mol]]
list of input molecules
requiredcatalogs
List[Union[str, FilterCatalog]]
list of catalogs (name or FilterCatalog)
requiredreturn_idx
bool
whether to return index or a boolean mask
False
n_jobs
Optional[int]
number of parallel job to run. Sequential by default
None
progress
bool
whether to show progress bar
False
scheduler
str
joblib scheduler to use
'processes'
batch_size
int
batch size for parallel processing. Note that batch_size
should be increased if the number of used CPUs gets very large.
100
Returns:
Name Type Descriptionfiltered_mask
boolean array (or index array) where true means the molecule is not found in the catalog.
"},{"location":"api/medchem.filter.html#medchem.filter.lead.chemical_group_filter","title":"chemical_group_filter(mols, chemical_group, return_idx=False, n_jobs=None, progress=False, scheduler='threads')
","text":"Filter a list of compounds according to a chemical group instance.
Note
This function will return the list of molecules that DO NOT match the chemical group
Parameters:
Name Type Description Defaultmols
Iterable[Union[str, dm.Mol]]
list of input molecules
requiredchemical_group
ChemicalGroup
a chemical group instance with the required functional groups to use.
requiredreturn_idx
bool
whether to return index or a boolean mask
False
n_jobs
Optional[int]
number of parallel job to run. Sequential by default
None
progress
bool
whether to show progress bar
False
scheduler
str
joblib scheduler to use
'threads'
Returns:
Name Type Descriptionfiltered_mask
boolean array (or index array) where true means the molecule DOES NOT MATCH the groups.
"},{"location":"api/medchem.filter.html#medchem.filter.lead.complexity_filter","title":"complexity_filter(mols, complexity_metric='bertz', threshold_stats_file='zinc_15_available', limit='99', return_idx=False, n_jobs=None, progress=False, scheduler='processes')
","text":"Filter a list of compounds according to a chemical group instance
Parameters:
Name Type Description Defaultmols
Iterable[Union[str, dm.Mol]]
list of input molecules
requiredcomplexity_metric
str
complexity metric to use Use ComplexityFilter.list_default_available_filters
to list default filters. The following complexity metrics are supported by default * \"bertz\": bertz complexity index * \"sas\": synthetic accessibility score (zinc_15_available
only) * \"qed\": qed score (zinc_15_available
only) * \"clogp\": clogp for how greasy a molecule is compared to other in the same mw range (zinc_15_available
only) * \"whitlock\": whitlock complexity index * \"barone\": barone complexity index * \"smcm\": synthetic and molecular complexity * \"twc\": total walk count complexity (zinc_15_available
only)
'bertz'
threshold_stats_file
str
complexity threshold statistic origin to use
'zinc_15_available'
limit
str
complexity outlier percentile to use
'99'
return_idx
bool
whether to return index or a boolean mask
False
n_jobs
Optional[int]
number of parallel job to run. Sequential by default
None
progress
bool
whether to show progress bar
False
scheduler
str
joblib scheduler to use
'processes'
Also see medchem.complexity.ComplexityFilter
Returns:
Name Type Descriptionfiltered_mask
boolean array (or index array) where true means the molecule MATCH the rules.
"},{"location":"api/medchem.filter.html#medchem.filter.lead.lilly_demerit_filter","title":"lilly_demerit_filter(smiles, max_demerits=160, return_idx=False, n_jobs=None, progress=False, **kwargs)
","text":"Run Lilly demerit filtering on current list of molecules
Parameters:
Name Type Description Defaultsmiles
Iterable[str]
list of input molecules as smiles preferably
requiredmax_demerits
Optional[int]
Cutoff to reject molecules Defaults to 160.
160
return_idx
bool
whether to return a mask or a list of valid indexes
False
progress
bool
whether to show progress bar
False
kwargs
parameters specific to the demerits.score
function
{}
Returns:
Name Type Descriptionfiltered_mask
boolean array (or index array) where true means the molecule is ok.
"},{"location":"api/medchem.filter.html#medchem.filter.lead.molecular_graph_filter","title":"molecular_graph_filter(mols, max_severity=5, return_idx=False, n_jobs=None, progress=False, scheduler='threads')
","text":"Filter a list of compounds according to unstable molecular graph filter list.
This list was obtained from observation around The disallowed graphs are:
Parameters:
Name Type Description Defaultmols
Iterable[Union[str, dm.Mol]]
list of input molecules
requiredmax_severity
int
maximum acceptable severity (1-10). Default is <5
5
return_idx
bool
whether to return index or a boolean mask
False
n_jobs
Optional[int]
number of parallel job to run. Sequential by default
None
progress
bool
whether to show progress bar
False
scheduler
str
joblib scheduler to use
'threads'
Returns:
Name Type Descriptionfiltered_mask
boolean array (or index array) where true means the molecule is not toxic.
"},{"location":"api/medchem.filter.html#medchem.filter.lead.protecting_groups_filter","title":"protecting_groups_filter(mols, return_idx=False, protecting_groups=['fmoc', 'tert-butoxymethyl', 'tert-butyl carbamate', 'tert-butyloxycarbonyl'], n_jobs=None, progress=False, scheduler='threads')
","text":"Filter a list of compounds according to match to known protecting groups. Note that is a syntaxic sugar for calling chemical_group_filter with the protecting groups subset
Parameters:
Name Type Description Defaultmols
Iterable[Union[str, dm.Mol]]
list of input molecules
requiredprotecting_groups
str
type of protection group to consider if not provided, will use all (not advised)
['fmoc', 'tert-butoxymethyl', 'tert-butyl carbamate', 'tert-butyloxycarbonyl']
return_idx
bool
whether to return index or a boolean mask
False
n_jobs
Optional[int]
number of parallel job to run. Sequential by default
None
progress
bool
whether to show progress bar
False
scheduler
str
joblib scheduler to use
'threads'
Returns:
Name Type Descriptionfiltered_mask
boolean array (or index array) where true means the molecule DOES NOT MATCH the groups.
"},{"location":"api/medchem.filter.html#medchem.filter.lead.rules_filter","title":"rules_filter(mols, rules, return_idx=False, n_jobs=None, progress=False, scheduler='processes')
","text":"Filter a list of compounds according to a predefined set of rules
Parameters:
Name Type Description Defaultmols
Iterable[Union[str, dm.Mol]]
list of input molecules
requiredrules
Union[List[Any], RuleFilters]
list of rules to apply to the input molecules.
requiredreturn_idx
bool
whether to return index or a boolean mask
False
n_jobs
Optional[int]
number of parallel job to run. Sequential by default
None
progress
bool
whether to show progress bar
False
scheduler
str
joblib scheduler to use
'processes'
Returns:
Name Type Descriptionfiltered_mask
boolean array (or index array) where true means the molecule MATCH the rules.
"},{"location":"api/medchem.filter.html#medchem.filter.lead.screening_filter","title":"screening_filter(mols, n_jobs=None, max_severity=10, return_idx=False)
","text":"Filter a set of molecules based on novartis screening deck curation process Schuffenhauer, A. et al. Evolution of Novartis' small molecule screening deck design, J. Med. Chem. (2020) DOI. https://dx.doi.org/10.1021/acs.jmedchem.0c01332
Note
The severity argument corresponds to the accumulated severity for a compounds accross all pattern in the catalog.
Parameters:
Name Type Description Defaultmols
Iterable[Union[str, dm.Mol]]
list of input molecules
requiredn_jobs
Optional[int]
number of parallel job to run. Sequential by default
None
max_severity
int
maximum severity allowed. Default is <10
10
return_idx
bool
Whether to return the filtered index
False
Returns:
Name Type Descriptionfiltered_mask
boolean array (or index array) where true means the molecule IS NOT REJECTED (i.e not found in the alert catalog).
"},{"location":"api/medchem.filter.html#medchem.filter.generic","title":"medchem.filter.generic
","text":""},{"location":"api/medchem.filter.html#medchem.filter.generic.atom_list_filter","title":"atom_list_filter(mols, unwanted_atom_list=None, wanted_atom_list=None, return_idx=False, n_jobs=None, progress=False, scheduler=None)
","text":"Find molecule without any atom from a set of unwanted atom symbols and with all atoms in the set of desirable atom list
Parameters:
Name Type Description Defaultmols
Iterable[Union[str, dm.Mol]]
list of input molecules
requiredunwanted_atom_list
Optional[Iterable]
list of undesirable atom symbol
None
wanted_atom_list
Optional[Iterable]
list of desirable atom symbol
None
return_idx
bool
whether to return index or a boolean mask
False
n_jobs
Optional[int]
number of parallel job to run. Sequential by default
None
progress
bool
whether to show progress bar
False
scheduler
Optional[str]
joblib scheduler to use
None
Returns:
Name Type Descriptionfiltered_mask
boolean array (or index array) where true means the molecule is ok.
"},{"location":"api/medchem.filter.html#medchem.filter.generic.halogenicity_filter","title":"halogenicity_filter(mols, thresh_F=6, thresh_Br=3, thresh_Cl=3, return_idx=False, n_jobs=None, progress=False, scheduler=None)
","text":"Find molecule that do not exceed halogen threshold. These thresholds are:
Parameters:
Name Type Description Defaultmols
Iterable[Union[str, dm.Mol]]
list of input molecules
requiredthresh_F
int
maximum number of fluorine
6
thresh_Br
int
maximum number of bromine
3
thresh_Cl
int
maximum number of chlorine
3
return_idx
bool
whether to return index or a boolean mask
False
n_jobs
Optional[int]
number of parallel job to run. Sequential by default
None
progress
bool
whether to show progress bar
False
scheduler
Optional[str]
joblib scheduler to use
None
Returns:
Name Type Descriptionfiltered_mask
boolean array (or index array) where true means the molecule is ok.
"},{"location":"api/medchem.filter.html#medchem.filter.generic.macrocycle_filter","title":"macrocycle_filter(mols, max_cycle_size=10, return_idx=False, n_jobs=None, progress=False, scheduler=None)
","text":"Find molecules that do not infringe the strict maximum cycle size.
Parameters:
Name Type Description Defaultmols
Iterable[Union[str, dm.Mol]]
list of input molecules
requiredmax_cycle_size
int
strict maximum macrocycle size
10
return_idx
bool
whether to return index or a boolean mask
False
n_jobs
Optional[int]
number of parallel job to run. Sequential by default
None
progress
bool
whether to show progress bar
False
scheduler
Optional[str]
joblib scheduler to use
None
Returns:
Name Type Descriptionfiltered_mask
boolean array (or index array) where true means the molecule is ok.
"},{"location":"api/medchem.filter.html#medchem.filter.generic.num_atom_filter","title":"num_atom_filter(mols, min_atoms=None, max_atoms=None, return_idx=False, n_jobs=None, progress=False, scheduler=None)
","text":"Find a molecule that match the atom number constraints Returning True means the molecule is fine
Parameters:
Name Type Description Defaultmols
Iterable[Union[str, dm.Mol]]
list of input molecules
requiredmin_atoms
Optional[int]
strict minimum number of atoms (atoms > min_atoms)
None
max_atoms
Optional[int]
strict maximum number of atoms (atoms < max_atoms)
None
return_idx
bool
whether to return index or a boolean mask
False
n_jobs
Optional[int]
number of parallel job to run. Sequential by default
None
progress
bool
whether to show progress bar
False
scheduler
Optional[str]
joblib scheduler to use
None
Returns:
Name Type Descriptionfiltered_mask
boolean array (or index array) where true means the molecule is ok.
"},{"location":"api/medchem.filter.html#medchem.filter.generic.num_stereo_center_filter","title":"num_stereo_center_filter(mols, max_stereo_centers=4, max_undefined_stereo_centers=2, return_idx=False, n_jobs=None, progress=False, scheduler=None)
","text":"Find a molecule that match the number of stereo center constraints. Returning True means the molecule is fine
Parameters:
Name Type Description Defaultmols
Iterable[Union[str, dm.Mol]]
list of input molecules
requiredmax_stereo_center
strict maximum number of stereo centers (<). Default is 4
requiredmax_undefined_stereo_centers
Optional[int]
strict maximum number of undefined stereo centers (<). Default is 2
2
return_idx
bool
whether to return index or a boolean mask
False
n_jobs
Optional[int]
number of parallel job to run. Sequential by default
None
progress
bool
whether to show progress bar
False
scheduler
Optional[str]
joblib scheduler to use
None
Returns:
Name Type Descriptionfiltered_mask
boolean array (or index array) where true means the molecule is ok.
"},{"location":"api/medchem.filter.html#medchem.filter.generic.ring_infraction_filter","title":"ring_infraction_filter(mols, hetcycle_min_size=4, return_idx=False, n_jobs=None, progress=False, scheduler=None)
","text":"Find molecules that have a ring infraction filter. Returning True means the molecule is fine
Parameters:
Name Type Description Defaultmols
Iterable[Union[str, dm.Mol]]
list of input molecules
requiredhetcycle_min_size
int
Minimum ring size before more than 1 hetero atom or any non single bond is allowed. This is a strict threshold (>)
4
return_idx
bool
whether to return index or a boolean mask
False
n_jobs
Optional[int]
number of parallel job to run. Sequential by default
None
progress
bool
whether to show progress bar
False
scheduler
Optional[str]
joblib scheduler to use
None
Returns:
Name Type Descriptionfiltered_mask
boolean array (or index array) where true means the molecule is ok.
"},{"location":"api/medchem.filter.html#medchem.filter.generic.symmetry_filter","title":"symmetry_filter(mols, symmetry_threshold=0.8, return_idx=False, n_jobs=None, progress=False, scheduler=None)
","text":"Find molecules that are not symmetrical, given a symmetry threshold
Parameters:
Name Type Description Defaultmols
Iterable[Union[str, dm.Mol]]
list of input molecules
requiredsymmetry_threshold
float
threshold to consider a molecule highly symmetrical
0.8
return_idx
bool
whether to return index or a boolean mask
False
n_jobs
Optional[int]
number of parallel job to run. Sequential by default
None
progress
bool
whether to show progress bar
False
scheduler
Optional[str]
joblib scheduler to use
None
Returns:
Name Type Descriptionfiltered_mask
boolean array (or index array) where true means the molecule is ok.
"},{"location":"api/medchem.groups.html","title":"medchem.groups
","text":""},{"location":"api/medchem.groups.html#medchem.groups","title":"medchem.groups
","text":""},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup","title":"ChemicalGroup
","text":"Build a library of chemical groups using a list of structures parsed from a file
The default library of structure has been curated from https://github.com/Sulstice/global-chem and additional open source data.
Note
For new chemical groups, please minimally provide the 'smiles'/'smarts', 'name' and \"group\" and optional 'hierarchy' columns
Warning
The SMILES and SMARTS used in the default list of substructures do not result in the same matches. Unless specified otherwise, the SMILES will be used in the matching done by this class, whereas due to RDKit's limitation, the SMARTS will be used in the matching done by the generated catalog. For more information see this discussion: https://github.com/valence-platform/medchem/pull/19,
"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.dataframe","title":"dataframe
property
","text":"Get the dataframe of the chemical groups
"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.mol_smarts","title":"mol_smarts
property
","text":"Get the SMARTS of the chemical groups in this instance
"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.mols","title":"mols
property
","text":"Get the Molecule object of the SMILES for the chemical groups in this instance
"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.name","title":"name
property
","text":"Get the Name of the chemical groups in this instance
"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.smarts","title":"smarts
property
","text":"Get the SMARTS of the chemical groups in this instance
"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.smiles","title":"smiles
property
","text":"Get the SMILES of the chemical groups in this instance
"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.__init__","title":"__init__(groups=None, n_jobs=None, groups_db=None)
","text":"Build a chemical group library
Parameters:
Name Type Description Defaultgroups
Union[str, List[str]]
List of groups to use. Defaults to None where all functional groups are used
None
n_jobs
Optional[int]
Optional number of jobs to run in parallel for internally building the data. Defaults to None.
None
groups_db
Optional[os.PathLike]
Path to a file containing the dump of the chemical groups. Defaults is internal dataset
None
"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.filter","title":"filter(names, fuzzy=False)
","text":"Filter the group to restrict to only the name in input
Parameters:
Name Type Description Defaultnames
List[str]
list of names to use for filters
requiredfuzzy
bool
whether to use exact of fuzzy matching
False
"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.get_catalog","title":"get_catalog()
cached
","text":"Build an rdkit catalog from the current chemical group data
"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.get_matches","title":"get_matches(mol, use_smiles=True)
","text":"Get all the functional groups in this instance that matches the input molecule
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requireduse_smiles
bool
whether to use the smiles representation of the catalog or the smarts
True
"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.has_match","title":"has_match(mol)
","text":"Check whether the input molecule has any functional group in this instance
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
required"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.list_groups","title":"list_groups()
","text":"List all the chemical groups available
"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.list_hierarchy_groups","title":"list_hierarchy_groups()
","text":"List all the hierarchy in chemical groups available. To get the full hierarchy on each path, split by the .
character.
list_default_chemical_groups(hierachy=False)
","text":"List all the chemical groups available.
Note
chemical groups defines how a collection of patterns are organized. They do not correspond to individual pattern name.
Parameters:
Name Type Description Defaulthierarchy
whether to return the full hierarchy or the group name only
requiredReturns:
Type DescriptionList of chemical groups
"},{"location":"api/medchem.groups.html#medchem.groups.list_functional_group_names","title":"list_functional_group_names(exclude_basic=True)
","text":"List common functional group names
Parameters:
Name Type Description Defaultexclude_basic
bool
whether to include the basic functional groups
True
Returns:
Type DescriptionList of functional group names
"},{"location":"api/medchem.query.html","title":"medchem.query
","text":"This module helps build a filter based on a query language that can be parsed. By default, the default query parser will be used, which contains the following instructions that can be orchestrated using boolean operation (or
, and
, not
and parenthesis)
import datamol as dm\nfrom medchem.query.eval import QueryFilter\n\nquery = \"\"\"HASPROP(\"tpsa\" < 120) AND HASSUBSTRUCTURE(\"[OH]\", True)\"\"\"\nchemical_filter = QueryFilter(query, parser=\"lalr\")\nmols = dm.data.cdk2().mol[:10]\nchemical_filter(mols, n_jobs=-1) # [False, False, False, False, False, True, True, True, False, False]\n
"},{"location":"api/medchem.query.html#syntax","title":"Syntax","text":"Any string provided as query
argument needs to be quoted (similar to json) to avoid ambiguity in parsing. * An example of valid query is \"\"\"(HASPROP(\"tpsa\" > 120 ) | HASSUBSTRUCTURE(\"c1ccccc1\")) AND NOT HASALERT(\"pains\") OR HASSUBSTRUCTURE(\"[OH]\", max, 2)\"\"\"
. * Examples of invalid queries are * \"\"\"HASPROP(\"tpsa\" > 120) OR HASSUBSTRUCTURE(\"[OH]\", True, >, 3)\"\"\"
: unexpected wrong operator >
* \"\"\"HASPROP(tpsa > 120)\"\"\"
: tpsa is not quoted * \"\"\"HASPROP(\"tpsa\") > 120\"\"\"
: this is not part of the language specification * \"\"\"(HASPROP(\"tpsa\" > 120) AND HASSUBSTRUCTURE(\"[OH]\", True, max, 3 )\"\"\"
: mismatching parenthesis (
\"\"\"HASPROP(\"tpsa\" > 120) OR HASSUBSTRUCTURE(\"CO\")\"\"\"
, \"\"\"(HASPROP(\"tpsa\" > 120)) OR (HASSUBSTRUCTURE(\"CO\"))\"\"\"
and \"\"\"(HASPROP(\"tpsa\" > 120) OR HASSUBSTRUCTURE(\"CO\"))\"\"\"
are equivalentcheck whether a molecule has an alert
from a catalog
# alert is one supported alert catalog by `medchem`. For example `pains`\nHASALERT(alert:str) \n
"},{"location":"api/medchem.query.html#hasgroup","title":"HASGROUP","text":"check whether a molecule has a specific functional group from a catalog
# group is one supported functional group provided by `medchem`\nHASGROUP(group:str) \n
"},{"location":"api/medchem.query.html#matchrule","title":"MATCHRULE","text":"check whether a molecule match a predefined druglikeness rule
from a catalog
# rule is one supported rule provided by `medchem`. For example `rule_of_five`\nMATCHRULE(rule:str) \n
"},{"location":"api/medchem.query.html#hassuperstructure","title":"HASSUPERSTRUCTURE","text":"check whether a molecule has query
as superstructure
# query is a SMILES\nHASSUPERSTRUCTURE(query:str) \n
"},{"location":"api/medchem.query.html#hassubstructure","title":"HASSUBSTRUCTURE","text":"Check whether a molecule has query
as substructure. Note that providing the comma separator ,
is mandatory here as each variable is an argument.
# query is a SMILES or a SMARTS, operator is defined below, is_smarts is a boolean\n\nHASSUBSTRUCTURE(query:str, is_smarts:Optional[bool], operator:Optional[str], limit:Optional[int])\n\n# which correspond to setting this default values\nHASSUBSTRUCTURE(query:str, is_smarts=False, operator=\"min\", limit=1)\n# same as\nHASSUBSTRUCTURE(query:str, is_smarts=None, operator=None, limit=None)\n
Not providing optional arguments is allowed, but they need to be provided in the exact same order shown above. Thus:
HASSUBSTRUCTURE(\"CO\")
HASSUBSTRUCTURE(\"CO\", False)
HASSUBSTRUCTURE(\"CO\", False, min)
HASSUBSTRUCTURE(\"CO\", False, min, 1)
are all valid
and equivalent
(given their default values)
Furthermore, since the correct argument map can be inferred when no ambiguity arises, the following are valid but discouraged
HASSUBSTRUCTURE(\"CO\", False, 1)
HASSUBSTRUCTURE(\"CO\", min, 1)
Whereas, this is invalid: * HASSUBSTRUCTURE(\"CO\", min, False, 1)
Check whether a molecule has prop
as property within a defined limit. Any comma ,
provided between arguments will be ignored
# prop is a valid datamol.descriptors property, comparator is a required comparator operator and defined below\nHASPROP(prop:str comparator:str limit:float)\n
"},{"location":"api/medchem.query.html#like","title":"LIKE","text":"Check whether a molecule is similar enough to another molecule. Any comma ,
provided between arguments will be ignored
# query is a SMILES\nLIKE(query:str comparator:str limit:float)\n
"},{"location":"api/medchem.query.html#basic-operators","title":"Basic operators:","text":"=
==
, !=
, <
, >
, <=
, >=
true
, false
, True
, False
, TRUE
, FALSE
min
, MIN
max
, MAX
AND
or &
or &&
or and
OR
or |
or ||
or or
NOT
or !
or ~
or not
medchem.query.parser
","text":""},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser","title":"QueryParser
","text":" Bases: Transformer
Query parser for the custom query language for molecule. This parses the input language, build a parseable and evaluable representation. The trick for lazy evaluation is to define custom guard with 'fn(*)
' around expression that needs to be evaluated.
Note that you SHOULD NOT HAVE TO INTERACT WITH THIS CLASS DIRECTLY.
Exampleimport medchem import lark QUERY_GRAMMAR = medchem.utils.loader.get_grammar(as_string=True) QUERY_PARSER = Lark(QUERY_GRAMMAR, parser=\"lalr\", transformer=QueryParser())
"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser--see-how-the-string-needs-to-be-quoted-this-builds-on-the-json-quote-requirements-to-avoid-dealing-with-unwanted-outcomes","title":"see how the string needs to be \"quoted\". This builds on the json quote requirements to avoid dealing with unwanted outcomes","text":"example = \"\"\"(HASPROP(\"tpsa\" > 120 ) | HASSUBSTRUCTURE(\"c1ccccc1\")) AND NOT HASALERT(\"pains\") OR HASSUBSTRUCTURE(\"[OH]\", max)\"\"\" t = QUERY_PARSER.parse(example) print(t) ((((fn(getprop, prop='tpsa')
> 120.0) or fn(hassubstructure, query='c1ccccc1', operator='None', limit=None, is_smarts=None)
) and not fn(hasalert, alert='pains')
) or fn(hassubstructure, query='[OH]', operator='max', limit=None, is_smarts=None)
)
bool_expr(bool_term, *others)
","text":"Define how boolean expressions should be parsed
"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.bool_term","title":"bool_term(bool_factor, *others)
","text":"Define how boolean terms should be parsed
"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.hasalert","title":"hasalert(value)
","text":"Format the hasalert node in the query
Note
The parser does not enforce any validity on the argument and the underlying function is supposed to handle it.
"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.hasgroup","title":"hasgroup(value)
","text":"Format the hasgroup node in the query
Note
The parser does not enforce any validity on the argument and the underlying function is supposed to handle it.
"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.hasprop","title":"hasprop(value, comparator, limit)
","text":"Format the hasprop node in the query
Note
The parser does not enforce any validity on the argument and the underlying function is supposed to handle it.
"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.hassubstructure","title":"hassubstructure(value, is_smarts, operator, limit)
","text":"Format the substructure node in the query
Note
The parser does not enforce any validity on the argument and the underlying function is supposed to handle it.
"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.hassuperstructure","title":"hassuperstructure(value)
","text":"Format the superstructure node in the query
Note
The parser does not enforce any validity on the argument and the underlying function is supposed to handle it.
"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.like","title":"like(value, comparator, limit)
","text":"Format the like node in the query
Note
The parser does not enforce any validity on the argument and the underlying function is supposed to handle it.
"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.matchrule","title":"matchrule(value)
","text":"Format the matchrule node in the query
Note
The parser does not enforce any validity on the argument and the underlying function is supposed to handle it.
"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.not_bool_factor","title":"not_bool_factor(*args)
","text":"Define representation of a negation
"},{"location":"api/medchem.query.html#medchem.query.eval","title":"medchem.query.eval
","text":""},{"location":"api/medchem.query.html#medchem.query.eval.QueryFilter","title":"QueryFilter
","text":"Query filtering system based on a custom query grammar
"},{"location":"api/medchem.query.html#medchem.query.eval.QueryFilter.__call__","title":"__call__(mols, scheduler='processes', n_jobs=-1, progress=True)
","text":"Call the internal chemical filter that has been build
Parameters:
Name Type Description Defaultmols
List[Union[str, dm.Mol]]
list of input molecules to filter
requiredn_jobs
int
whether to run job in parallel and number of jobs to consider. Defaults to -1.
-1
scheduler
scheduler to use. Defaults to 'processes'.
'processes'
progress
bool
whether to show job progress. Defaults to True.
True
"},{"location":"api/medchem.query.html#medchem.query.eval.QueryFilter.__init__","title":"__init__(query, grammar=None, parser='lalr')
","text":"Constructor for query filtering system
Parameters:
Name Type Description Defaultquery
str
input unparsed query
requiredgrammar
Optional[str]
path to grammar language to use. Defaults to None, which will use the default grammar.
None
parser
str
which Lark language parser to use. Defaults to \"lalr\".
'lalr'
"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator","title":"QueryOperator
","text":"A class to hold all the operators that can be used in queries
"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator.getprop","title":"getprop(mol, prop)
staticmethod
","text":"Compute the molecular property if a molecule. This is an alternative to the hasprop function, that does not enforce any comparison.
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredprop
str
molecular property to apply as filter on the molecule
requiredReturns:
Name Type Descriptionproperty
float
computed property value
"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator.hasalert","title":"hasalert(mol, alert)
staticmethod
","text":"Check if a molecule match a named alert catalog. The alert catalog needs to be one supported by the medchem package.
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredalert
str
named catalog to apply as filter on the molecule
requiredReturns:
Name Type Descriptionhas_alert
bool
whether the molecule has a given alert
"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator.hasgroup","title":"hasgroup(mol, group)
staticmethod
","text":"Check if a molecule has a specific functional group. Internally, this is done fetching the smarts corresponding to the group then calling QueryOperator.hassubstructure
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredgroup
str
functional group to check on the molecule.
requiredReturns:
Name Type Descriptionhas_group
bool
whether the molecule has the given functional group
"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator.hasprop","title":"hasprop(mol, prop, comparator, limit)
staticmethod
","text":"Check if a molecule has a molecule property within desired range
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredprop
str
molecular property to apply as filter on the molecule
requiredcomparator
Callable
operator function to apply to check whether the molecule property matches the expected value
requiredlimit
float
limit value for determining whether the molecule property is within desired range
requiredReturns:
Name Type Descriptionhas_property
bool
whether the molecule has a given property within a desired range
"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator.hassubstructure","title":"hassubstructure(mol, query, is_smarts=False, operator='min', limit=1)
staticmethod
","text":"Check if a molecule has substructure provided by a query
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredquery
str
input smarts query
requiredis_smarts
bool
whether this is a smarts query or not
False
operator
str
one of min or max to specify the min or max limit
'min'
limit
int
limit of substructures to be found
1
Returns:
Name Type Descriptionhas_substructure
bool
whether the query is a subgraph of the molecule
"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator.hassuperstructure","title":"hassuperstructure(mol, query)
staticmethod
","text":"Check if a molecule has a superstructure defined by a query. Note that a superstructure cannot be a query (smarts)
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredquery
str
input smarts query
requiredReturns:
Name Type Descriptionhas_superstructure
bool
whether the molecule is a subgraph of the query
"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator.like","title":"like(mol, query, comparator, limit)
staticmethod
","text":"Check if a molecule is similar or distant enough from another molecule using tanimoto ECFP distance. and is useful for letting python handles the binary comparison operators.
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredquery
Union[dm.Mol, str]
input molecule to compare with
requiredcomparator
Callable[[float, float], bool]
operator function to apply to check whether the molecule property matches the expected value. Takes computed_similarity and limit
as arguments and returns a boolean.
limit
float
limit value for determining whether the molecule property is within desired range
requiredReturns:
Name Type Descriptionis_similar
bool
whether the molecule is similar or distant enough from the query
"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator.matchrule","title":"matchrule(mol, rule)
staticmethod
","text":"Check if a molecule match a druglikeness rule
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredrule
str
druglikeness rule check on the molecule.
requiredReturns:
Name Type Descriptionmatch_rule
bool
whether the molecule match the given rule
"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator.similarity","title":"similarity(mol, query)
staticmethod
","text":"Compute the ECFP tanimoto similarity between two molecules. This is an alternative to the like function, that does not enforce any comparison, and is useful for letting python handles the binary comparison operators.
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredquery
Union[dm.Mol, str]
input query molecule to compute similarity against
requiredReturns:
Name Type Descriptionsimilarity
float
computed similarity value between mol and query
"},{"location":"api/medchem.rules.html","title":"medchem.rules
","text":""},{"location":"api/medchem.rules.html#medchem.rules.basic_rules","title":"medchem.rules.basic_rules
","text":""},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_chemaxon_druglikeness","title":"rule_of_chemaxon_druglikeness(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, n_rotatable_bonds=None, n_rings=None, **kwargs)
","text":"Compute the drug likeness filter according to chemaxon:
It computes: MW < 400 & logP < 5 & HBA <= 10 & HBD <= 5 & ROTBONDS < 5 & ring > 0
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredmw
Optional[float]
precomputed molecular weight. Defaults to None.
None
clogp
Optional[float]
precomputed cLogP. Defaults to None.
None
n_hba
Optional[float]
precomputed number of HBA. Defaults to None.
None
n_hbd
Optional[float]
precomputed number of HBD. Defaults to None.
None
n_rotatable_bonds
Optional[int]
precomputed number of rotatable bonds in the molecule. Defaults to None.
None
n_rings
Optional[int]
precomputed number of rings in the molecule. Defaults to None.
None
Returns:
Name Type Descriptionroc
True if molecule is compliant, False otherwise
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_cns","title":"rule_of_cns(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, **kwargs)
","text":"Computes drug likeness rule for CNS penetrant molecules as described in: Jeffrey & Summerfield (2010) Assessment of the blood-brain barrier in CNS drug discovery.
It computes: MW in [135, 582] & logP in [-0.2, 6.1] & TPSA in [3, 118] & HBD <= 3 & HBA <= 5
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredmw
Optional[float]
precomputed molecular weight. Defaults to None.
None
clogp
Optional[float]
precomputed logP. Defaults to None.
None
n_hba
Optional[float]
precomputed number of HBA. Defaults to None.
None
n_hbd
Optional[float]
precomputed number of HBD. Defaults to None.
None
tpsa
Optional[int]
precomputed TPSA. Defaults to None.
None
Returns:
Name Type Descriptionroc
True if molecule is compliant, False otherwise
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_druglike_soft","title":"rule_of_druglike_soft(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, n_rings=None, n_hetero_atoms=None, charge=None, **kwargs)
","text":"Compute the DrugLike Soft rule available in FAF-Drugs4. The rules are described at https://fafdrugs4.rpbs.univ-paris-diderot.fr/filters.html
It computes:
MW in [100, 600] & logP < in [-3, 6] & HBD <= 7 & HBA <= 12 & TPSA <=180 & ROTBONDS <= 11 &\nRIGBONDS <= 30 & N_RINGS <= 6 & MAX_SIZE_RING <= 18 & N_CARBONS in [3, 35] & N_HETEROATOMS in [1, 15] &\nHC_RATIO in [0.1, 1.1] & CHARGE in [-4, 4] & N_ATOM_CHARGE <= 4\n
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredmw
Optional[float]
precomputed molecular weight. Defaults to None.
None
clogp
Optional[float]
precomputed cLogP. Defaults to None.
None
n_hba
Optional[float]
precomputed number of HBA. Defaults to None.
None
n_hbd
Optional[float]
precomputed number of HBD. Defaults to None.
None
tpsa
Optional[float]
precomputed TPSA. Defaults to None.
None
n_rotatable_bonds
Optional[int]
precomputed number of rotatable bonds. Defaults to None.
None
n_rings
Optional[int]
precomputed number of rings in the molecules. Defaults to None.
None
n_hetero_atoms
Optional[int]
precomputed number of heteroatoms. Defaults to None.
None
charge
Optional[float]
precomputed charge. Defaults to None.
None
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_egan","title":"rule_of_egan(mol, clogp=None, tpsa=None, **kwargs)
","text":"Compute passive intestinal absorption according to Egan Rules as described in: Egan, William J., Kenneth M. Merz, and John J. Baldwin (2000) Prediction of drug absorption using multivariate statistics
It computes: TPSA in [0, 132] & logP in [-1, 6]
Note
The author built a multivariate statistics model of passive intestinal absorption with robust outlier detection. Outliers were identified as being actively transported. They chose PSA and AlogP98 (cLogP), based on consideration of the physical processes involved in membrane permeability and the interrelationships and redundancies between other available descriptors. Compounds, which had been assayed for Caco-2 cell permeability, demonstrated a good rate of successful predictions (74\u221292%)
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredclogp
Optional[float]
precomputed cLogP. Defaults to None.
None
tpsa
Optional[float]
precomputed TPSA. Defaults to None.
None
Returns:
Name Type Descriptionroe
True if molecule is compliant, False otherwise
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_five","title":"rule_of_five(mol, mw=None, clogp=None, n_lipinski_hbd=None, n_lipinski_hba=None, **kwargs)
","text":"Compute the Lipinski's rule-of-5 for a molecule. Also known as Pfizer's rule of five or RO5, this rule is a rule of thumb to evaluate the druglikeness of a chemical compounds
It computes: MW <= 500 & logP <= 5 & HBD <= 5 & HBA <= 10
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredmw
Optional[float]
precomputed molecular weight. Defaults to None.
None
clogp
Optional[float]
precomputed cLogP. Defaults to None.
None
n_lipinski_hbd
Optional[float]
precomputed number of HBD. Defaults to None.
None
n_lipinski_hba
Optional[float]
precomputed number of HBA. Defaults to None.
None
Returns:
Name Type Descriptionro5
True if molecule is compliant, False otherwise
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_five_beyond","title":"rule_of_five_beyond(mol, mw=None, clogp=None, n_hbd=None, n_hba=None, tpsa=None, n_rotatable_bonds=None, **kwargs)
","text":"Compute the Beyond rule-of-5 rule for a molecule. This rule illustrates the potential of compounds far beyond rule of 5 space to modulate novel and difficult target classes that have large, flat, and groove-shaped binding sites and has been described in:
Doak, Bradley C., et al. (2015) How Beyond Rule of 5 Drugs and Clinical Candidates Bind to Their Targets.
It computes: MW <= 1000 & logP in [-2, 10] & HBD <= 6 & HBA <= 15 & TPSA <=250 & ROTBONDS <= 20
Note
This is a very permissive rule and is likely to not be a good predictor for druglikeness as known for small molecules.
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredmw
Optional[float]
precomputed molecular weight. Defaults to None.
None
clogp
Optional[float]
precomputed cLogP. Defaults to None.
None
n_hbd
Optional[float]
precomputed number of HBD. Defaults to None.
None
n_hba
Optional[float]
precomputed number of HBA. Defaults to None.
None
tpsa
Optional[float]
precomputed TPSA. Defaults to None.
None
n_rotatable_bonds
Optional[int]
precomputed number of rotatable bonds. Defaults to None.
None
Returns:
Name Type Descriptionro5
True if molecule is compliant, False otherwise
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_four","title":"rule_of_four(mol, mw=None, clogp=None, n_hba=None, n_rings=None, **kwargs)
","text":"Compute the rule-of-4 for a molecule. The rule-of-4 define a rule of thumb for PPI inhibitors, which are typically larger and more lipophilic than inhibitors of more standard binding sites. It has been published in:
Morelli X, Bourgeas R, Roche P. (2011) Chemical and structural lessons from recent successes in protein\u2013protein interaction inhibition. Also see: Shin et al. (2020) Current Challenges and Opportunities in Designing Protein\u2013Protein Interaction Targeted Drugs. doi:10.2147/AABC.S235542
It computes: MW >= 400 & logP >= 4 & RINGS >=4 & HBA >= 4
Warning
Do not use this for small molecules that are not PPI inhibitors
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredmw
Optional[float]
precomputed molecular weight. Defaults to None.
None
clogp
Optional[float]
precomputed cLogP. Defaults to None.
None
n_hba
Optional[float]
precomputed number of HBA. Defaults to None.
None
n_rings
Optional[int]
precomputed number of rings in the molecules. Defaults to None.
None
Returns:
Name Type Descriptionro4
True if molecule is compliant, False otherwise
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_generative_design","title":"rule_of_generative_design(mol, mw=None, clogp=None, n_lipinski_hba=None, n_lipinski_hbd=None, tpsa=None, n_rotatable_bonds=None, n_hetero_atoms=None, charge=None, **kwargs)
","text":"Compute druglikeness rule of generative design.
This set of rules are proprietary of Valence Discovery and have been curated to better filters molecules suggested by generative models for small molecules
It computes:
MW in [200, 600] & logP < in [-3, 6] & HBD <= 7 & HBA <= 12 & TPSA in [40, 180] &\nROTBONDS <= 15 & RIGID BONDS <= 30 & N_AROMATIC_RINGS <= 5 & N_FUSED_AROMATIC_RINGS_TOGETHER <= 2 &\nMAX_SIZE_RING_SYSTEM <= 18 & N_CARBONS in [3, 40] & N_HETEROATOMS in [1, 15] & CHARGE in [-2, 2] &\nN_ATOM_CHARGE <= 2 & N_TOTAL_ATOMS < 70 & N_HEAVY_METALS < 1\n
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredmw
Optional[float]
precomputed molecular weight. Defaults to None.
None
clogp
Optional[float]
precomputed cLogP. Defaults to None.
None
n_lipinski_hba
Optional[float]
precomputed number of HBA. Defaults to None.
None
n_lipinski_hbd
Optional[float]
precomputed number of HBD. Defaults to None.
None
tpsa
Optional[float]
precomputed TPSA. Defaults to None.
None
n_rotatable_bonds
Optional[int]
precomputed number of rotatable bonds. Defaults to None.
None
n_hetero_atoms
Optional[int]
precomputed number of heteroatoms. Defaults to None.
None
charge
Optional[float]
precomputed charge. Defaults to None.
None
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_generative_design_strict","title":"rule_of_generative_design_strict(mol, mw=None, clogp=None, n_lipinski_hba=None, n_lipinski_hbd=None, tpsa=None, n_rotatable_bonds=None, n_hetero_atoms=None, charge=None, **kwargs)
","text":"Compute druglikeness rule of generative design.
This set of rules are proprietary of Valence Discovery and have been curated to better filters molecules suggested by generative models
It computes:
MW in [200, 600] & logP < in [-3, 6] & HBD <= 7 & HBA <= 12 & TPSA in [40, 180] &\nROTBONDS <= 15 & RIGID BONDS <= 30 & N_AROMATIC_RINGS <= 5 & N_FUSED_AROMATIC_RINGS_TOGETHER <= 2 &\nMAX_SIZE_RING_SYSTEM <= 18 & N_CARBONS in [3, 40] & N_HETEROATOMS in [1, 15] & CHARGE in [-2, 2] &\nN_ATOM_CHARGE <= 2 & N_TOTAL_ATOMS < 70 & N_HEAVY_METALS < 1 & N_STEREO_CENTER <= 3 &\nHAS_NO_SPIDER_SIDE_CHAINS & FRACTION_RING_SYSTEM >= 0.25\n
By default SPIDER_SIDE_CHAINS are defined as having at least 2 'chains' of >=4 consecutif atoms in side chains (not part of any ring system)
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredmw
Optional[float]
precomputed molecular weight. Defaults to None.
None
clogp
Optional[float]
precomputed cLogP. Defaults to None.
None
n_lipinski_hba
Optional[float]
precomputed number of HBA. Defaults to None.
None
n_lipinski_hbd
Optional[float]
precomputed number of HBD. Defaults to None.
None
tpsa
Optional[float]
precomputed TPSA. Defaults to None.
None
n_rotatable_bonds
Optional[int]
precomputed number of rotatable bonds. Defaults to None.
None
n_hetero_atoms
Optional[int]
precomputed number of heteroatoms. Defaults to None.
None
charge
Optional[float]
precomputed charge. Defaults to None.
None
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_ghose","title":"rule_of_ghose(mol, mw=None, clogp=None, mr=None, **kwargs)
","text":"Compute the Ghose filter. The Ghose filter is a drug-like filter described in: Ghose, AK.; Viswanadhan, VN.; Wendoloski JJ. (1999) A knowledge-based approach in designing combinatorial or medicinal chemistry libraries for drug discovery.1. A qualitative and quantitative characterization of known drug databases.
It computes: MW in [160, 480] & logP in [-0.4, 5.6] & Natoms in [20, 70] & refractivity in [40, 130]
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredmw
Optional[float]
precomputed molecular weight. Defaults to None.
None
clogp
Optional[float]
precomputed cLogP. Defaults to None.
None
mr
Optional[float]
precomputed molecule refractivity. Defaults to None.
None
Returns:
Name Type Descriptionrog
True if molecule is compliant, False otherwise
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_gsk_4_400","title":"rule_of_gsk_4_400(mol, mw=None, clogp=None, **kwargs)
","text":"Compute GSK Rule (4/400) for druglikeness using interpretable ADMET rule of thumb based on Gleeson, M. Paul (2008). Generation of a set of simple, interpretable ADMET rules of thumb.
It computes: MW <= 400 & logP <= 4
.
Note
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredclogp
Optional[float]
precomputed cLogP. Defaults to None.
None
Returns:
Name Type Descriptionrog
True if molecule is compliant, False otherwise
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_leadlike_soft","title":"rule_of_leadlike_soft(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, n_rings=None, n_hetero_atoms=None, charge=None, **kwargs)
","text":"Compute the Lead-Like Soft rule available in FAF-Drugs4. The rules are described at https://fafdrugs4.rpbs.univ-paris-diderot.fr/filters.html
It computes:
MW in [150, 400] & logP < in [-3, 4] & HBD <= 4 & HBA <= 7 & TPSA <=160 & ROTBONDS <= 9 &\nRIGBONDS <= 30 & N_RINGS <= 4 & MAX_SIZE_RING <= 18 & N_CARBONS in [3, 35] & N_HETEROATOMS in [1, 15] &\nHC_RATIO in [0.1, 1.1] & CHARGE in [-4, 4] & N_ATOM_CHARGE <= 4 & N_STEREO_CENTER <= 2\n
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredmw
Optional[float]
precomputed molecular weight. Defaults to None.
None
clogp
Optional[float]
precomputed cLogP. Defaults to None.
None
n_hba
Optional[float]
precomputed number of HBA. Defaults to None.
None
n_hbd
Optional[float]
precomputed number of HBD. Defaults to None.
None
tpsa
Optional[float]
precomputed TPSA. Defaults to None.
None
n_rotatable_bonds
Optional[int]
precomputed number of rotatable bonds. Defaults to None.
None
n_rings
Optional[int]
precomputed number of rings in the molecules. Defaults to None.
None
n_hetero_atoms
Optional[int]
precomputed number of heteroatoms. Defaults to None.
None
charge
Optional[float]
precomputed charge. Defaults to None.
None
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_oprea","title":"rule_of_oprea(mol, n_hba=None, n_hbd=None, n_rotatable_bonds=None, n_rings=None, **kwargs)
","text":"Computes Oprea's rule of drug likeness obtained by comparing drug vs non drug compounds across multiple datasets. The rules have been described in: Oprea (2000) Property distribution of drug-related chemical databases*
It computes: HBD in [0, 2] & HBA in [2, 9] & ROTBONDS in [2,8] and RINGS in [1, 4]
Note
Seventy percent of the `drug-like' compounds were found between the following limits: 0 \u2264 HDO \u2264 2, 2 \u2264 HAC \u2264 9, 2 \u2264 RTB \u2264 8, and 1 \u2264 RNG \u2264 4
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredn_hba
Optional[float]
precomputed number of HBA. Defaults to None.
None
n_hbd
Optional[float]
precomputed number of HBD. Defaults to None.
None
n_rotatable_bonds
Optional[int]
precomputed number of rotatable bonds in the molecule. Defaults to None.
None
n_rings
Optional[int]
precomputed number of rings in the molecule. Defaults to None.
None
Returns roo: True if molecule is compliant, False otherwise
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_pfizer_3_75","title":"rule_of_pfizer_3_75(mol, clogp=None, tpsa=None, **kwargs)
","text":"Compute Pfizer Rule(3/75 Rule) for invivo toxicity. It has been described in: * Hughes, et al. (2008) Physiochemical drug properties associated with in vivo toxicological outcomes. * Price et al. (2009) Physicochemical drug properties associated with in vivo toxicological outcomes: a review
It computes: ! (TPSA < 75 & logP > 3)
Note
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredclogp
Optional[float]
precomputed cLogP. Defaults to None.
None
tpsa
Optional[float]
precomputed TPSA. Defaults to None.
None
Returns:
Name Type Descriptionrop
True if molecule is compliant, False otherwise
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_reos","title":"rule_of_reos(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, charge=None, n_rotatable_bonds=None, n_heavy_atoms=None, **kwargs)
","text":"Compute the REOS filter. The REOS filter is a filter designed to filter out unuseful compounds from HTS screening results. The filter is described in: Waters & Namchuk (2003) Designing screens: how to make your hits a hit.
It computes: MW in [200, 500] & logP in [-5, 5] & HBA in [0, 10] & HBD in [0, 5] & charge in [-2, 2] & ROTBONDS in [0, 8] & NHeavyAtoms in [15, 50]
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredmw
Optional[float]
precomputed molecular weight. Defaults to None.
None
clogp
Optional[float]
precomputed cLogP. Defaults to None.
None
n_hba
Optional[float]
precomputed number of HBA. Defaults to None.
None
n_hbd
Optional[float]
precomputed number of HBD. Defaults to None.
None
charge
Optional[int]
precomputed formal charge. Defaults to None.
None
n_rotatable_bonds
Optional[int]
precomputed number of rotatable bonds in the molecule. Defaults to None.
None
n_heavy_atoms
Optional[int]
precomputed number of heavy atoms in the molecule. Defaults to None.
None
Returns:
Name Type Descriptionror
True if molecule is compliant, False otherwise
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_respiratory","title":"rule_of_respiratory(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, n_rings=None, **kwargs)
","text":"Computes drug likeness rule for Respiratory (nasal/inhalatory) molecules as described in Ritchie et al. (2009) Analysis of the Calculated Physicochemical Properties of Respiratory Drugs: Can We Design for Inhaled Drugs Yet?
It computes: MW in [240, 520] & logP in [-2, 4.7] & HBONDS in [6, 12] & TPSA in [51, 135] & ROTBONDS in [3,8] & RINGS in [1,5]
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredmw
Optional[float]
precomputed molecular weight. Defaults to None.
None
clogp
Optional[float]
precomputed logP. Defaults to None.
None
n_hba
Optional[float]
precomputed number of HBA. Defaults to None.
None
n_hbd
Optional[float]
precomputed number of HBD. Defaults to None.
None
tpsa
Optional[int]
precomputed TPSA. Defaults to None.
None
n_rotatable_bonds
Optional[int]
precomputed number of rotatable bonds in the molecule. Defaults to None.
None
n_rings
Optional[int]
precomputed number of rings. Defaults to None
None
Returns:
Name Type Descriptionroc
True if molecule is compliant, False otherwise
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_three","title":"rule_of_three(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, n_rotatable_bonds=None, **kwargs)
","text":"Compute the rule-of-3. The rule-of-three is a rule of thumb for molecular fragments (and not small molecules) published in:
Congreve M, Carr R, Murray C, Jhoti H. (2003) A \"rule of three\" for fragment-based lead discovery?
.
It computes: MW <= 300 & logP <= 3 & HBA <= 3 & HBD <= 3 & ROTBONDS <= 3
Note
TPSA is not used in this version of the rule of three. Other version uses TPSA <= 60 AND logP in [-3, 3]
in addition
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredmw
Optional[float]
precomputed molecular weight. Defaults to None.
None
clogp
Optional[float]
precomputed cLogP. Defaults to None.
None
n_hba
Optional[float]
precomputed number of HBA. Defaults to None.
None
n_hbd
Optional[float]
precomputed number of HBD. Defaults to None.
None
n_rotatable_bonds
Optional[int]
precomputed number of rotatable bonds in the molecule. Defaults to None.
None
Returns:
Name Type Descriptionro3
True if molecule is compliant, False otherwise
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_three_extended","title":"rule_of_three_extended(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, **kwargs)
","text":"Compute the extended rule-of-3. This is an extenion of the rule of three that computes:
It computes: MW <= 300 & logP in [-3, 3] & HBA <= 6 & HBD <= 3 & ROTBONDS <= 3 & TPSA <= 60
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredmw
Optional[float]
precomputed molecular weight. Defaults to None.
None
clogp
Optional[float]
precomputed cLogP. Defaults to None.
None
n_hba
Optional[float]
precomputed number of HBA. Defaults to None.
None
n_hbd
Optional[float]
precomputed number of HBD. Defaults to None.
None
tpsa
Optional[float]
precomputed TPSA. Defaults to None.
None
n_rotatable_bonds
Optional[int]
precomputed number of rotatable bonds in the molecule. Defaults to None.
None
Returns:
Name Type Descriptionro3
True if molecule is compliant, False otherwise
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_two","title":"rule_of_two(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, **kwargs)
","text":"Computes rules-of-2 for reagent (building block design). It aims for prioritization of reagents that typically do not add more than 200 Da in MW or 2 units of clogP. The rule of two has been described in:
Goldberg et al. (2015) Designing novel building blocks is an overlooked strategy to improve compound quality see: http://csmres.co.uk/cs.public.upd/article-downloads/Designing-novel-building-blocks.pdf
Note
Their analysis showed that molecular weight (MW) and clogP were important factors in the frequency of use of reagents. Other parameters, such as TPSA, HBA, HBD and ROTBONDS count, were less important.
It computes MW <= 200 & logP <= 2 & HBA <= 4 & HBD <= 2
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredmw
Optional[float]
precomputed molecular weight. Defaults to None.
None
clogp
Optional[float]
precomputed cLogP. Defaults to None.
None
n_hba
Optional[float]
precomputed number of HBA. Defaults to None.
None
n_hbd
Optional[float]
precomputed number of HBD. Defaults to None.
None
Returns:
Name Type Descriptionro2
True if molecule is compliant, False otherwise
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_veber","title":"rule_of_veber(mol, tpsa=None, n_rotatable_bonds=None, **kwargs)
","text":"Compute the Veber filter. The Veber filter is a druglike filter for orally active drugs described in:
Veber et. al. (2002) Molecular Properties That Influence the Oral Bioavailability of Drug Candidates.
It computes: ROTBONDS <= 10 & TPSA < 140
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredtpsa
Optional[float]
precomputed TPSA. Defaults to None.
None
n_rotatable_bonds
Optional[int]
precomputed number of rotatable bonds. Defaults to None.
None
Returns:
Name Type Descriptionrov
True if molecule is compliant, False otherwise
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_xu","title":"rule_of_xu(mol, n_hba=None, n_hbd=None, n_rotatable_bonds=None, n_rings=None, n_heavy_atoms=None, **kwargs)
","text":"Computes Xu's rule of drug likeness as described in: Xu & Stevenson (2000), Drug-like Index: A New Approach To Measure Drug-like Compounds and Their Diversity
It computes HBD <= 5 & HBA <= 10 & ROTBONDS in [2, 35] & RINGS in [1, 7] & NHeavyAtoms in [10, 50]
.
Note
A compound's Drug Likeness Index is calculated based upon the knowledge derived from known drugs selected from Comprehensive Medicinal Chemistry (CMC) database.
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredn_hba
Optional[float]
precomputed number of HBA. Defaults to None.
None
n_hbd
Optional[float]
precomputed number of HBD. Defaults to None.
None
n_rotatable_bonds
Optional[int]
precomputed number of rotatable bonds in the molecule. Defaults to None.
None
n_rings
Optional[int]
precomputed number of rings in the molecule. Defaults to None.
None
n_heavy_atoms
Optional[int]
precomputed number of rings in the molecule. Defaults to None.
None
Returns rox: True if molecule is compliant, False otherwise
"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_zinc","title":"rule_of_zinc(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, n_rings=None, charge=None, **kwargs)
","text":"Compute the Zinc rule for a molecule. This rule is a rule of thumb to evaluate the druglikeness of a chemical compounds, based on:
Irwin & Schoichet (2005) ZINC - A Free Database of Commercially Available Compounds for Virtual Screening.
Also see: https://fafdrugs4.rpbs.univ-paris-diderot.fr/filters.html
It computes: MW in [60, 600] & logP < in [-4, 6] & HBD <= 6 & HBA <= 11 & TPSA <=150 & ROTBONDS <= 12 & RIGBONDS <= 50 & N_RINGS <= 7 & MAX_SIZE_RING <= 12 & N_CARBONS >=3 & HC_RATIO <= 2.0 & CHARGE in [-4, 4]
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
input molecule
requiredmw
Optional[float]
precomputed molecular weight. Defaults to None.
None
clogp
Optional[float]
precomputed cLogP. Defaults to None.
None
n_hba
Optional[float]
precomputed number of HBA. Defaults to None.
None
n_hbd
Optional[float]
precomputed number of HBD. Defaults to None.
None
tpsa
Optional[float]
precomputed TPSA. Defaults to None.
None
n_rotatable_bonds
Optional[int]
precomputed number of rotatable bonds. Defaults to None.
None
n_rings
Optional[int]
precomputed number of rings in the molecules. Defaults to None.
None
charge
Optional[float]
precomputed charge. Defaults to None.
None
"},{"location":"api/medchem.rules.html#medchem.rules.rule_filter","title":"medchem.rules.rule_filter
","text":""},{"location":"api/medchem.rules.html#medchem.rules.rule_filter.RuleFilters","title":"RuleFilters
","text":"Build a filter based on a compound phychem properties. For a list of default rules, use RuleFilters.list_available_rules()
. Most of these rules have been collected from the litterature including https://fafdrugs4.rpbs.univ-paris-diderot.fr/descriptors.html
__call__(mols, n_jobs=None, progress=False, scheduler='processes')
","text":"Compute the rules for a list of molecules
Parameters:
Name Type Description Defaultmols
List[Union[str, dm.Mol]]
list of input molecule object.
requiredn_jobs
Optional[int]
number of jobs to run in parallel. Defaults to None.
None
progress
bool
whether to show progress or not. Defaults to False.
False
scheduler
str
which scheduler to use. Defaults to \"processes\".
'processes'
Returns:
Name Type Descriptiondf
Dataframe where each row is a molecule and each column is a the outcomes of applying self.rules[column].
"},{"location":"api/medchem.rules.html#medchem.rules.rule_filter.RuleFilters.__getitems__","title":"__getitems__(ind)
","text":"Return a specific rule
"},{"location":"api/medchem.rules.html#medchem.rules.rule_filter.RuleFilters.__init__","title":"__init__(rule_list, rule_list_names=None, precompute_props=True)
","text":"Build a rule filtering object
Parameters:
Name Type Description Defaultrule_list
List[Union[str, Callable]]
list of rules to apply. Either a callable that takes a molecule as input (with kwargs) or a string of the name of a pre-defined rule as defined in the basic_rules module
requiredrule_list_names
Optional[List[str]]
Name of the rules passed as inputs. Defaults to None.
None
precompute_props
bool
Whether to precompute the properties for all molecules to speed up redundant calculation. Defaults to True.
True
"},{"location":"api/medchem.rules.html#medchem.rules.rule_filter.RuleFilters.__len__","title":"__len__()
","text":"Return the number of rules inside this filter
"},{"location":"api/medchem.rules.html#medchem.rules.rule_filter.RuleFilters.list_available_rules","title":"list_available_rules(query=None)
cached
staticmethod
","text":"List all the available rules and they properties
"},{"location":"api/medchem.rules.html#medchem.rules.rule_filter.RuleFilters.list_available_rules_names","title":"list_available_rules_names(query=None)
cached
staticmethod
","text":"List only the names of the available rules
"},{"location":"api/medchem.utils.html","title":"medchem.utils
","text":""},{"location":"api/medchem.utils.html#medchem.utils.smarts","title":"medchem.utils.smarts
","text":""},{"location":"api/medchem.utils.html#medchem.utils.smarts.SMARTSUtils","title":"SMARTSUtils
","text":"Collections of utils to build complex SMARTS query more efficiently for non experienced user
"},{"location":"api/medchem.utils.html#medchem.utils.smarts.SMARTSUtils.aliphatic_chain","title":"aliphatic_chain(min_size=6, unbranched=False, unsaturated_bondtype=None, allow_hetero_atoms=True)
classmethod
","text":"Returns a query that can match a long aliphatic chain
Parameters:
Name Type Description Defaultmin_size
int
minimum size of the long chain
6
unbranched
bool
whether the chain should be unbranched
False
unsaturated_bondtype
Optional[str]
additional unsaturated bond type to use for the query. By default, Any bond type (~) is used. Single bonds ARE always allowed and bondtype cannot be aromatic
None
allow_hetero_atoms
bool
whether the chain can contain hetero atoms
True
Example to build a query for a long aliphatic chain of a least 5 atoms (e.g: 'CCC(C)CCC')
SMARTSUtils.aliphatic_chain(min_size=5)
Returns:
Name Type Descriptionsmarts
smarts pattern matching a long aliphatic chain
"},{"location":"api/medchem.utils.html#medchem.utils.smarts.SMARTSUtils.atom_in_env","title":"atom_in_env(*smarts_strs, include_atoms=False, union=False)
classmethod
","text":"Returns a recursive/group smarts to find an atom that fits in the environments as defined by all the input smarts
Parameters:
Name Type Description Defaultsmarts_strs
list of input patterns defining the environment the atom must fit in. The first atom of each pattern should be the atom we want to match to, unless include_atoms is set to True, then [*:99] will be added at the start of each pattern
()
include_atoms
bool
whether to include an additional first atom that needs to be in the required environment or not
False
union
bool
whether to use the union of the environments or the intersection
False
Example you can use this function to construct a complex query if you are not sure about how to write the smarts for example, to find a carbon atom that is both in a ring or size 6, bonded to an ethoxy and have a Fluorine in meta
SMARTSUtils.atom_in_env(\"#6;r6[C&D1]\", \"[c]aa[F]\", union=False) # there are alternative way to write this
Returns:
Name Type Descriptionsmarts
smarts pattern matching the group/environment
"},{"location":"api/medchem.utils.html#medchem.utils.smarts.SMARTSUtils.different_fragment","title":"different_fragment(*smarts_strs)
classmethod
","text":"Returns a new query that match patterns that are in different fragments.
Warning
This feature is not supported yet by RDKit. See https://github.com/rdkit/rdkit/issues/1261
Parameters:
Name Type Description Defaultsmarts_strs
list of input patterns defining the fragments
()
Example matching two oxygens in a molecule will work with '[#8].[#8]', but if you want the oxygens to be in DIFFERENT fragments, then build the query with:
SMARTSUtils.different_fragment('[#8]', '[#8]')
Returns:
Name Type Descriptionsmarts
smarts pattern matching patterns that are in different fragments
"},{"location":"api/medchem.utils.html#medchem.utils.smarts.SMARTSUtils.meta","title":"meta(smarts_str1, smarts_str2, aromatic_only=False)
classmethod
","text":"Returns a recursive smarts string connecting the two input smarts in meta
of each other. Connexion points needs to be through single or double bonds
Parameters:
Name Type Description Defaultsmarts_str1
str
first smarts pattern defining the first functional group
requiredsmarts_str2
str
second smarts pattern defining the second functional group
requiredaromatic_only
bool
whether the ring needs to be aromatic or not
False
Example to build a smarts for a methyl group in meta to an oxygen (e.g: 'c1c(C)cc(O)cc1')
SMARTSUtils.meta('[#6;!R]', '[#8]')
Returns:
Name Type Descriptionsmarts
smarts pattern connecting the two input smarts in meta
of each other
ortho(smarts_str1, smarts_str2, aromatic_only=False)
classmethod
","text":"Returns a recursive smarts string connecting the two input smarts in ortho
of each other. Connexion points needs to be through single or double bonds
Parameters:
Name Type Description Defaultsmarts_str1
str
first smarts pattern defining the first functional group
requiredsmarts_str2
str
second smarts pattern defining the second functional group
requiredaromatic_only
bool
whether the ring needs to be aromatic or not
False
Example to build a smarts for a methyl group in ortho to an oxygen (e.g: 'C1CC(C)C(O)CC1')
SMARTSUtils.ortho('[#6;!R]', '[#8]')
Returns:
Name Type Descriptionsmarts
smarts pattern connecting the two input smarts in ortho
of each other
para(smarts_str1, smarts_str2, aromatic_only=False)
classmethod
","text":"Returns a recursive smarts string connecting the two input smarts in para
of each other. Connexion points needs to be through single or double bonds
Parameters:
Name Type Description Defaultsmarts_str1
str
first smarts pattern defining the first functional group
requiredsmarts_str2
str
second smarts pattern defining the second functional group
requiredaromatic_only
bool
whether the ring needs to be aromatic or not
False
Example to build a smarts for a methyl group in para to an oxygen (e.g: 'c1(C)ccc(O)cc1')
SMARTSUtils.para('[#6;!R]', '[#8]')
Returns:
Name Type Descriptionsmarts
smarts pattern connecting the two input smarts in para
of each other
same_fragment(*smarts_strs)
classmethod
","text":"Returns a new query that match patterns that are in THE SAME fragment (component)
Warning
This feature is not supported yet by RDKit. See https://github.com/rdkit/rdkit/issues/1261
Parameters:
Name Type Description Defaultsmarts_strs
list of input patterns defining the fragments
()
Example matching two oxygens in a molecule will work with '[#8].[#8]', but if you want the oxygens to be in the SAME fragment, then build the query with:
SMARTSUtils.same_fragment('[#8]', '[#8]')
Returns:
Name Type Descriptionsmarts
smarts pattern matching patterns that are in the same component
"},{"location":"api/medchem.utils.html#medchem.utils.smarts.SMARTSUtils.standardize_attachment","title":"standardize_attachment(smiles, attach_tokens='[*:1]')
classmethod
","text":"Standardize an attachment point in a smiles
Parameters:
Name Type Description Defaultsmiles
str
SMILES string
requiredattach_tokens
str
Attachment point token to use as standard token
'[*:1]'
"},{"location":"api/medchem.utils.html#medchem.utils.matches","title":"medchem.utils.matches
","text":""},{"location":"api/medchem.utils.html#medchem.utils.matches.Constraints","title":"Constraints
","text":"Complex query system for matches with additional constraints
Example
mol1 = dm.to_mol(\"CN(C)C(=O)c1cncc(C)c1\") mol2 = dm.to_mol(\"c1ccc(cc1)-c1cccnc1\") core = dm.from_smarts(\"c1cncc([*:1])c1\") [atom.SetProp(\"query\", \"my_constraints\") for atom in core.GetAtoms() if atom.GetAtomMapNum() == 1] constraint_fns = dict(my_constraints=lambda x: dm.descriptors.n_aromatic_atoms(x) > 0) constraint = Constraints(core, constraint_fns) matches = [constraint(mol1), constraint(mol2)] # False, True
"},{"location":"api/medchem.utils.html#medchem.utils.matches.Constraints.__call__","title":"__call__(mol)
","text":"Check if input molecule respect the constraints
Parameters:
Name Type Description Defaultmol
input molecule
required"},{"location":"api/medchem.utils.html#medchem.utils.matches.Constraints.__init__","title":"__init__(core, constraint_fns, prop_name='query')
","text":"Initialize the constraint matcher
Parameters:
Name Type Description Defaultcore
dm.Mol
the scaffold/query molecule to match against. Needs to be a molecule
requiredconstraint_fns
Dict[Callable]
a dictionary of constraints functions
requiredprop_name
str
the property name to use in the match at each atom defined by the core for further matches against the constraints functions
'query'
"},{"location":"api/medchem.utils.html#medchem.utils.matches.Constraints.get_matches","title":"get_matches(mol, multiple=True)
","text":"Get matches that respect the constraints in the molecules
Parameters:
Name Type Description Defaultmol
dm.Mol
input molecule
requiredmultiple
bool
if True, return all the matches, if False, return the first match
True
"},{"location":"api/medchem.utils.html#medchem.utils.matches.Constraints.has_match","title":"has_match(mol)
","text":"Check if input molecule respect the constraints
Parameters:
Name Type Description Defaultmol
dm.Mol
input molecule
required"},{"location":"api/medchem.utils.html#medchem.utils.matches.Constraints.validate","title":"validate(mol, constraints)
staticmethod
","text":"Validate a list of constraint object against a molecule
Parameters:
Name Type Description Defaultmol
the molecule object
requiredconstraints
List[Constraints]
list of Contraints object to validate against the molecule
required"},{"location":"api/medchem.utils.html#medchem.utils.loader","title":"medchem.utils.loader
","text":""},{"location":"api/medchem.utils.html#medchem.utils.loader.get_data","title":"get_data(file=None)
","text":"Return the folder that contains the package specific data
"},{"location":"api/medchem.utils.html#medchem.utils.loader.get_grammar","title":"get_grammar(grammar=None, as_string=False)
","text":"Return the default lark grammar file for queries
Parameters:
Name Type Description Defaultgrammar
Optional[os.PathLike]
The path to the grammar file. If None, the default grammar
None
as_string
bool
If True, return the grammar as a string. Defaults to False.
False
"},{"location":"api/medchem.utils.html#medchem.utils.graph","title":"medchem.utils.graph
","text":""},{"location":"api/medchem.utils.html#medchem.utils.graph.automorphism","title":"automorphism(mol, standardize=True, node_attrs=DEFAULT_NODE_ATTR, edge_attrs=DEFAULT_EDGE_ATTR)
","text":"Compute automorphism in a molecular graph
Parameters:
Name Type Description Defaultmol
Union[str, dm.Mol]
input molecular graph
requiredstandardize
bool
whether to standardize the compound or not
True
node_attrs
List[str]
list of categorical atom attributes/properties to consider for node matching
DEFAULT_NODE_ATTR
edge_attrs
List[str]
list of categorical bond attributes/properties to consider for edge matching
DEFAULT_EDGE_ATTR
"},{"location":"api/medchem.utils.html#medchem.utils.graph.score_symmetry","title":"score_symmetry(mol, exclude_self_mapped_edged=False, **automorphism_kwargs)
","text":"Provide a symmetry score for a given input molecule
Note
This is an heuristic and our definition of symmetry is pretty loose. We define symmetry according to any (set of) plans dividing the molecule into two very similar subgraph. We include both edge and vertex transitivity. For example the star-molecular graph (e.g neopentane) is symmetrical here, although it's not vertex-transitive. For more information see https://github.com/valence-platform/medchem/pull/41
Parameters:
Name Type Description Defaultmol
Union[dm.Mol, str]
inputs molecules
requiredexclude_self_mapped_edged
bool
Whether to exclude edges that matches to themselves in automorphism.
False
automorphism_kwargs
keyword for determining automorphism
{}
"},{"location":"tutorials/getting-started.html","title":"Getting Started","text":"In\u00a0[1]: Copied! %load_ext autoreload\n%autoreload 2\n%load_ext autoreload %autoreload 2 In\u00a0[2]: Copied!
import datamol as dm\nimport numpy as np\nfrom loguru import logger\n\ndata = dm.data.freesolv().sample(500)\nsmiles_list = data.smiles.values\nimport datamol as dm import numpy as np from loguru import logger data = dm.data.freesolv().sample(500) smiles_list = data.smiles.values In\u00a0[3]: Copied!
from medchem.filter import lead\nfrom medchem.demerits import score, batch_score\nfrom medchem.alerts import NovartisFilters\nfrom medchem.alerts import AlertFilters\nfrom medchem.catalog import NamedCatalogs\nfrom medchem.utils import get_data\nfrom rdkit.Chem import rdfiltercatalog\nfrom medchem.filter import lead from medchem.demerits import score, batch_score from medchem.alerts import NovartisFilters from medchem.alerts import AlertFilters from medchem.catalog import NamedCatalogs from medchem.utils import get_data from rdkit.Chem import rdfiltercatalog In\u00a0[4]: Copied!
# common filters including pains, brenk, nih, zinc\npains_a = rdfiltercatalog.FilterCatalogParams.FilterCatalogs.PAINS_A\nlead.catalog_filter(smiles_list, [\"nih\", pains_a, NamedCatalogs.dundee()])\n# common filters including pains, brenk, nih, zinc pains_a = rdfiltercatalog.FilterCatalogParams.FilterCatalogs.PAINS_A lead.catalog_filter(smiles_list, [\"nih\", pains_a, NamedCatalogs.dundee()]) Out[4]:
array([ True, True, True, True, False, False, True, False, False,\n True, False, True, True, True, False, True, False, False,\n False, True, True, False, True, False, False, False, True,\n True, True, True, True, True, False, True, False, False,\n False, True, False, False, False, True, False, False, False,\n True, False, True, False, True, True, False, True, True,\n True, False, True, False, False, False, True, False, False,\n True, False, True, True, False, False, False, False, True,\n True, True, False, True, False, True, True, True, False,\n True, True, False, True, True, True, True, True, False,\n False, True, True, False, True, False, False, False, True,\n False, False, False, False, True, False, False, True, True,\n True, True, True, False, True, True, True, False, True,\n False, False, True, True, True, True, False, False, True,\n False, False, True, True, False, True, True, True, True,\n False, False, False, True, False, False, False, True, False,\n False, True, False, False, True, True, True, False, True,\n False, False, False, True, True, True, False, True, True,\n False, False, False, False, False, True, False, False, False,\n False, False, True, False, True, False, True, False, False,\n True, False, False, True, False, True, True, True, False,\n True, True, True, True, False, True, True, True, False,\n False, True, True, False, True, False, True, False, True,\n True, True, True, False, False, True, False, True, False,\n False, True, False, True, False, True, False, False, False,\n False, True, True, False, False, True, True, False, True,\n True, False, True, True, True, False, False, False, True,\n False, False, False, True, True, False, True, True, False,\n True, True, True, True, False, True, False, True, True,\n True, True, False, True, False, True, False, False, True,\n True, True, False, False, False, True, True, True, False,\n True, False, False, False, False, False, True, False, True,\n True, False, True, True, True, False, True, True, False,\n False, False, True, False, True, False, False, False, True,\n True, False, True, False, False, True, False, False, True,\n True, False, False, False, True, True, True, False, True,\n True, True, True, False, True, False, False, True, True,\n True, False, False, True, False, False, False, True, False,\n False, False, False, True, False, True, True, True, False,\n True, False, False, False, True, False, True, False, False,\n False, False, False, False, True, True, True, False, True,\n False, False, False, False, False, True, True, True, True,\n False, True, True, True, False, True, False, True, False,\n False, False, True, False, False, True, True, False, False,\n True, True, False, True, True, False, True, False, False,\n False, True, True, False, True, True, True, False, False,\n False, False, False, False, False, True, False, False, True,\n True, True, True, True, True, True, True, False, True,\n True, False, False, True, True, True, True, False, False,\n False, True, False, True, False, True, True, True, False,\n False, True, True, True, False, True, False, True, False,\n True, True, True, True, True, True, True, True, False,\n False, True, True, True, False, True, True, False, True,\n False, True, False, True, True, True, False, True, False,\n True, True, True, False, False, True, True, False, True,\n True, False, True, False, False])In\u00a0[5]: Copied!
# filtering based on some commons alerts + additional lead like rules\nlead.alert_filter(smiles_list, alerts=[\"Glaxo\", \"BMS\"], rule_dict=dict(MW=[0, 100]))\n# filtering based on some commons alerts + additional lead like rules lead.alert_filter(smiles_list, alerts=[\"Glaxo\", \"BMS\"], rule_dict=dict(MW=[0, 100])) Out[5]:
array([False, False, False, True, False, False, True, False, False,\n False, True, True, False, False, False, False, False, False,\n True, True, False, False, False, False, False, True, True,\n True, False, False, True, False, False, False, False, False,\n False, False, False, False, True, True, False, False, False,\n False, False, False, False, False, False, False, False, False,\n False, False, True, True, False, False, True, False, False,\n False, True, False, False, False, False, True, False, False,\n False, False, False, False, True, True, True, False, False,\n True, False, False, False, False, True, False, False, False,\n False, False, False, False, True, True, False, False, False,\n False, False, False, False, True, True, False, False, True,\n False, True, True, False, False, False, False, False, False,\n False, False, True, True, False, False, False, False, False,\n True, False, True, True, False, True, False, False, False,\n False, False, False, False, False, False, False, True, True,\n False, False, False, True, False, False, True, False, True,\n False, False, False, False, True, True, False, False, True,\n False, False, False, False, False, True, False, False, False,\n False, False, True, False, True, False, True, False, True,\n False, False, False, False, False, False, False, False, False,\n True, True, False, False, False, False, False, False, False,\n False, True, True, False, False, False, False, False, False,\n True, False, False, True, True, True, False, False, False,\n False, False, False, False, False, True, False, False, True,\n False, False, False, True, False, False, False, False, False,\n False, False, False, False, True, False, False, False, False,\n False, False, False, True, False, False, False, True, False,\n False, False, False, False, False, False, False, False, False,\n True, True, False, False, False, True, False, True, True,\n False, True, False, False, False, False, True, False, False,\n True, False, False, False, False, False, True, False, False,\n False, False, True, True, False, False, True, False, False,\n False, False, True, False, False, False, False, False, False,\n True, True, False, False, False, True, False, False, True,\n False, False, False, False, False, True, False, False, False,\n False, True, False, False, False, False, True, False, False,\n False, True, False, False, False, False, False, False, False,\n False, False, False, False, False, True, True, True, False,\n False, False, False, False, False, False, True, False, False,\n True, False, False, False, False, True, False, False, False,\n False, False, False, True, False, False, False, True, False,\n False, True, False, False, False, False, False, False, True,\n False, True, True, False, True, False, True, False, True,\n False, True, False, False, False, False, True, False, False,\n False, True, True, False, True, True, True, False, False,\n False, False, False, False, True, False, True, False, True,\n True, False, False, False, False, True, True, False, False,\n True, False, False, False, True, True, False, False, False,\n False, False, False, True, False, False, False, False, False,\n False, False, False, True, False, False, False, True, False,\n False, False, True, True, False, False, False, True, False,\n False, False, False, True, False, False, True, False, True,\n False, False, False, True, True, False, False, True, False,\n False, False, True, False, False, False, False, True, False,\n False, True, False, False, False])In\u00a0[6]: Copied!
# filtering based on NIBR screening deck process described in\n# \"Evolution of Novartis' small molecule screening deck design\" by Schuffenhauer, A. et al. J. Med. Chem. (2020),\n# https://dx.doi.org/10.1021/acs.jmedchem.0c01332.\nlead.screening_filter(smiles_list, return_idx=True)\n# filtering based on NIBR screening deck process described in # \"Evolution of Novartis' small molecule screening deck design\" by Schuffenhauer, A. et al. J. Med. Chem. (2020), # https://dx.doi.org/10.1021/acs.jmedchem.0c01332. lead.screening_filter(smiles_list, return_idx=True) Out[6]:
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,\n 13, 14, 15, 17, 18, 19, 20, 21, 22, 25, 26, 27, 28,\n 29, 30, 31, 32, 33, 36, 37, 38, 40, 41, 43, 45, 47,\n 48, 49, 50, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62,\n 63, 64, 65, 66, 67, 69, 71, 72, 73, 74, 75, 77, 78,\n 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 91, 92,\n 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106,\n 107, 108, 109, 110, 111, 112, 113, 114, 115, 117, 118, 119, 120,\n 121, 122, 123, 125, 127, 128, 129, 131, 132, 133, 134, 135, 136,\n 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,\n 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 163,\n 164, 165, 166, 167, 169, 170, 171, 173, 174, 175, 176, 177, 178,\n 179, 180, 181, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192,\n 194, 195, 196, 197, 198, 199, 200, 202, 203, 204, 206, 207, 208,\n 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,\n 222, 223, 224, 225, 226, 227, 228, 230, 231, 233, 234, 235, 236,\n 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,\n 250, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263,\n 264, 266, 268, 269, 270, 271, 273, 274, 275, 276, 277, 278, 279,\n 280, 281, 282, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293,\n 294, 295, 296, 297, 298, 299, 300, 303, 304, 305, 306, 307, 308,\n 309, 310, 311, 313, 314, 315, 317, 318, 319, 320, 321, 322, 323,\n 324, 325, 326, 328, 329, 331, 332, 333, 334, 336, 337, 338, 339,\n 340, 341, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 355,\n 357, 358, 359, 360, 364, 365, 366, 367, 368, 369, 370, 371, 372,\n 373, 374, 375, 376, 377, 379, 380, 381, 382, 383, 384, 385, 387,\n 388, 389, 391, 392, 393, 394, 395, 396, 397, 399, 400, 402, 403,\n 404, 406, 407, 408, 409, 410, 411, 414, 415, 416, 417, 418, 419,\n 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432,\n 433, 434, 435, 436, 437, 438, 439, 440, 442, 443, 444, 445, 446,\n 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459,\n 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472,\n 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485,\n 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 499])In\u00a0[7]: Copied!
# Filter based on the demerit scoring of Eli Lilly\ntest_config = {\n \"min_atoms\": 10, # default is 1\n \"soft_max_atoms\": 30, # default is 30\n \"hard_max_atoms\": 50, # default is 30\n \"nodemerit\": False, # default is False\n \"dthresh\": 160, # default is None with no threshold\n}\nlead.lilly_demerit_filter(smiles_list, max_demerits=160, return_idx=True, **test_config)\n# Filter based on the demerit scoring of Eli Lilly test_config = { \"min_atoms\": 10, # default is 1 \"soft_max_atoms\": 30, # default is 30 \"hard_max_atoms\": 50, # default is 30 \"nodemerit\": False, # default is False \"dthresh\": 160, # default is None with no threshold } lead.lilly_demerit_filter(smiles_list, max_demerits=160, return_idx=True, **test_config) Out[7]:
array([ 0, 4, 37, 61, 62, 72, 73, 84, 89, 98, 106, 113, 132,\n 140, 148, 178, 180, 187, 192, 203, 209, 214, 226, 240, 244, 247,\n 249, 253, 254, 259, 273, 282, 287, 293, 310, 317, 319, 324, 345,\n 374, 377, 383, 384, 385, 399, 400, 417, 419, 434, 442, 446, 448,\n 454, 455, 459, 464, 470, 473, 475, 478, 482, 485, 487, 492, 494])In\u00a0[8]: Copied!
filter_obj = AlertFilters(alerts_set=[\"inpharmatica\", \"SureChEMBL\"])\nfilter_obj.list_default_available_alerts()\nfilter_obj = AlertFilters(alerts_set=[\"inpharmatica\", \"SureChEMBL\"]) filter_obj.list_default_available_alerts() Out[8]: rule_set_name smarts catalog_description rule_set source 10 Glaxo 55 Glaxo Wellcome Hard filters 1 ChEMBL 5 Dundee 105 University of Dundee NTD Screening Library Fil... 2 ChEMBL 2 BMS 180 Bristol-Myers Squibb HTS Deck filters 3 ChEMBL 18 PAINS 481 PAINS filters 4 ChEMBL 21 SureChEMBL 166 SureChEMBL Non-MedChem Friendly SMARTS 5 ChEMBL 16 MLSMR 116 NIH MLSMR Excluded Functionality filters (MLSMR) 6 ChEMBL 12 Inpharmatica 91 Unwanted fragments derived by Inpharmatica Ltd. 7 ChEMBL 14 LINT 57 Pfizer lint filters (lint) 8 ChEMBL 0 Alarm-NMR 75 Reactive False Positives in Biochemical Screen... 9 Litterature 1 AlphaScreen-Hitters 6 Structural filters for compounds that may be a... 10 Litterature 8 GST-Hitters 34 Structural filters for compounds may prevent G... 11 Litterature 11 HIS-Hitters 19 Structural filters for compounds prevents the ... 12 Litterature 15 LuciferaseInhibitor 3 Structural filters for compounds that may inhi... 13 Litterature 4 DNABinder 78 Structural filters for compounds that may bind... 14 Litterature 3 Chelator 55 Structural filters for compounds that may inhi... 15 Litterature 7 Frequent-Hitter 15 Structural filters for compounds that are freq... 16 Litterature 6 Electrophilic 119 Structural filters for compounds that could ta... 17 Litterature 9 Genotoxic-Carcinogenicity 117 Structural filters for compounds that may caus... 18 Litterature 13 LD50-Oral 20 Structural filters for compounds that may caus... 19 Litterature 17 Non-Genotoxic-Carcinogenicity 22 Structural filters for compounds that may caus... 20 Litterature 19 Reactive-Unstable-Toxic 335 General very reactive/unstable or Toxic compounds 21 Litterature 20 Skin 155 Skin Sensitization filters (irritables) 22 Litterature 22 Toxicophore 154 General Toxicophores 23 Litterature In\u00a0[9]: Copied!
out = filter_obj(smiles_list)\nout\nout = filter_obj(smiles_list) out Out[9]: _smiles status reasons MW LogP HBD HBA TPSA 0 c1ccc(Cn2ccnc2)cc1 Ok None 158.204 1.93140 0 2 17.82 1 NC(=O)c1ccccc1 Ok None 121.139 0.78550 1 1 43.09 2 c1cc2c3c(cccc3c1)CC2 Ok None 154.212 2.93840 0 0 0.00 3 Cc1cnccn1 Ok None 94.117 0.78502 0 2 25.78 4 CN(C)C(=O)c1ccc([N+](=O)[O-])cc1 Ok None 194.190 1.29660 0 3 63.45 ... ... ... ... ... ... ... ... ... 495 Cc1ccc(O)c(C)c1 Ok None 122.167 2.00904 1 1 20.23 496 C/C=C/CCCC Ok None 98.189 2.75270 0 0 0.00 497 CNc1ccccc1 Ok None 107.156 1.72830 1 1 12.03 498 ClCCCl Exclude alkyl_halides; Filter1_2_halo_ether; Filter26_... 98.960 1.46400 0 0 0.00 499 CCCCOC(C)=O Ok None 116.160 1.34960 0 2 26.30
500 rows \u00d7 8 columns
In\u00a0[10]: Copied!filter_obj = NovartisFilters()\nout = filter_obj(smiles_list)\nout\nfilter_obj = NovartisFilters() out = filter_obj(smiles_list) out Out[10]: _smiles status reasons severity covalent special_mol 0 c1ccc(Cn2ccnc2)cc1 Ok None 0 NaN NaN 1 NC(=O)c1ccccc1 Ok None 0 NaN NaN 2 c1cc2c3c(cccc3c1)CC2 Ok None 0 NaN NaN 3 Cc1cnccn1 Ok None 0 NaN NaN 4 CN(C)C(=O)c1ccc([N+](=O)[O-])cc1 Annotations nitro_count_1_min(1) 0 0.0 0.0 ... ... ... ... ... ... ... 495 Cc1ccc(O)c(C)c1 Ok None 0 NaN NaN 496 C/C=C/CCCC Ok None 0 NaN NaN 497 CNc1ccccc1 Ok None 0 NaN NaN 498 ClCCCl Exclude halo_ether_min(1); halogen_alkyl_min(1); halog... 10 2.0 0.0 499 CCCCOC(C)=O Ok None 0 NaN NaN
500 rows \u00d7 6 columns
In\u00a0[11]: Copied!out = score(smiles_list, **test_config)\nout\nout = score(smiles_list, **test_config) out Out[11]: _smiles ID reasons step rejected demerit_score status 0 C1=CC=C(C=C1)CN1C=CN=C1 0 NaN 4 False 0.0 Ok 1 C1=CC=C(C=C1)C(=O)N 1 not_enough_atoms 1 True NaN Exclude 2 C1=CC2=CC=CC3=C2C(=C1)CC3 2 no_interesting_atoms 1 True NaN Exclude 3 CC1=CN=CC=N1 3 not_enough_atoms 1 True NaN Exclude 4 CN(C)C(=O)C1=CC=C(C=C1)N(=O)=O 4 nitro:D60 4 False 60.0 Flag ... ... ... ... ... ... ... ... 495 CC1=CC=C(O)C(=C1)C 495 not_enough_atoms 1 True NaN Exclude 496 CCCC/C=C/C 496 not_enough_atoms 1 True NaN Exclude 497 CNC1=CC=CC=C1 497 not_enough_atoms 1 True NaN Exclude 498 C(Cl)CCl 498 not_enough_atoms 1 True NaN Exclude 499 CCCCOC(=O)C 499 not_enough_atoms 1 True NaN Exclude
500 rows \u00d7 7 columns
In\u00a0[12]: Copied!# Although the demirits.score is already quite fast, you can also call the parallelized version of it using the `batch_score` function\n# Although the demirits.score is already quite fast, you can also call the parallelized version of it using the `batch_score` function In\u00a0[13]: Copied!
out2 = batch_score(smiles_list, n_jobs=2, batch_size=100, progress=True, **test_config)\nout2\nout2 = batch_score(smiles_list, n_jobs=2, batch_size=100, progress=True, **test_config) out2
100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 5/5 [00:00<00:00, 8.17it/s]\nOut[13]: _smiles ID reasons step rejected demerit_score status 0 C1=CC=C(C=C1)CN1C=CN=C1 0 NaN 4 False 0.0 Ok 1 C1=CC=C(C=C1)C(=O)N 1 not_enough_atoms 1 True NaN Exclude 2 C1=CC2=CC=CC3=C2C(=C1)CC3 2 no_interesting_atoms 1 True NaN Exclude 3 CC1=CN=CC=N1 3 not_enough_atoms 1 True NaN Exclude 4 CN(C)C(=O)C1=CC=C(C=C1)N(=O)=O 4 nitro:D60 4 False 60.0 Flag ... ... ... ... ... ... ... ... 495 CC1=CC=C(O)C(=C1)C 495 not_enough_atoms 1 True NaN Exclude 496 CCCC/C=C/C 496 not_enough_atoms 1 True NaN Exclude 497 CNC1=CC=CC=C1 497 not_enough_atoms 1 True NaN Exclude 498 C(Cl)CCl 498 not_enough_atoms 1 True NaN Exclude 499 CCCCOC(=O)C 499 not_enough_atoms 1 True NaN Exclude
500 rows \u00d7 7 columns
In\u00a0[14]: Copied!from medchem.groups import ChemicalGroup\nc_group = ChemicalGroup(groups=\"rings_in_drugs\")\nfrom medchem.groups import ChemicalGroup c_group = ChemicalGroup(groups=\"rings_in_drugs\") In\u00a0[15]: Copied!
mol = dm.to_mol(\"CCS(=O)(=O)N1CC(C1)(CC#N)N2C=C(C=N2)C3=C4C=CNC4=NC=N3\")\nc_group.get_matches(mol, use_smiles=True)\nmol = dm.to_mol(\"CCS(=O)(=O)N1CC(C1)(CC#N)N2C=C(C=N2)C3=C4C=CNC4=NC=N3\") c_group.get_matches(mol, use_smiles=True) Out[15]: name smiles smarts group matches 204 diazine C1=NC=CC=N1 [#6]1:[#7]:[#6]:[#6]:[#6]:[#7]:1 rings_in_drugs ((24, 23, 22, 18, 17, 25),) 234 1H-pyrazole N1=CC=CN1 [#7]1:[#6]:[#6]:[#6]:[#7H]:1 rings_in_drugs ((12, 13, 14, 15, 16),) 257 1H-pyrrole C1=CC=CN1 [#6]1:[#6]:[#6]:[#6]:[#7H]:1 rings_in_drugs ((20, 19, 18, 22, 21),)
You can also load a custom library of queries. You custom df needs to provide the following columns: 'smiles'/'smarts'
, 'name'
and \"group\"
and optionally 'hierarchy'
c_group = ChemicalGroup(groups_db=get_data(\"smarts_bank.csv\"))\nc_group.get_matches(mol, use_smiles=False)\nc_group = ChemicalGroup(groups_db=get_data(\"smarts_bank.csv\")) c_group.get_matches(mol, use_smiles=False) Out[16]: name smiles smarts group matches 0 HBA [!$([#6,F,Cl,Br,I,o,s,nX3,#7v5,#15v5,#16v4,#16... custom_queries ((3,), (4,), (5,), (11,), (16,), (23,), (25,)) 2 HBD [!$([#6,H0,-,-2,-3])] custom_queries ((21,),) 3 HBD [!H0;#7,#8,#9] custom_queries ((21,),) 12 Hydrogen [*!H0,#1] custom_queries ((0,), (1,), (6,), (8,), (9,), (13,), (15,), (... 13 Hydrogen [#6!H0,#1] custom_queries ((0,), (1,), (6,), (8,), (9,), (13,), (15,), (... 14 Hydrogen [H,#1] custom_queries ((13,), (15,), (19,), (20,), (21,), (24,)) 16 Acyclic Bonds *!@* custom_queries ((0, 1), (1, 2), (2, 3), (2, 4), (2, 5), (7, 9... 18 Rotable Bond [!$(*#*)&!D1]-!@[!$(*#*)&!D1] custom_queries ((1, 2), (2, 5), (7, 9), (7, 12), (14, 17)) 20 SP3 Nitrogen [$([NX4+]),$([NX3]);!$(*=*)&!$(*:*)] custom_queries ((5,),) 21 SP2 Nitrogen [$([nX3](:*):*),$([nX2](:*):*),$([#7X2]=*),$([... custom_queries ((12,), (16,), (21,), (23,), (25,)) 22 SP2 Carbon [$([cX3](:*):*),$([cX2+](:*):*),$([CX3]=*),$([... custom_queries ((13,), (14,), (15,), (17,), (18,), (19,), (20... 23 Aromatic SP2 Carbon [$([cX3](:*):*),$([cX2+](:*):*)] custom_queries ((13,), (14,), (15,), (17,), (18,), (19,), (20... 24 Chiral Carbon [$([#6X4@](*)(*)(*)*),$([#6X4@H](*)(*)*)] custom_queries ((7,),)
You can apply the basic rules independently
In\u00a0[17]: Copied!from medchem.rules.basic_rules import rule_of_five, rule_of_three, rule_of_leadlike_soft\nfrom medchem.rules.basic_rules import rule_of_five, rule_of_three, rule_of_leadlike_soft In\u00a0[18]: Copied!
print(\"RO5\", rule_of_five(mol))\nprint(\"RO3\", rule_of_three(mol))\nprint(\"ROLS\", rule_of_leadlike_soft(mol))\nprint(\"RO5\", rule_of_five(mol)) print(\"RO3\", rule_of_three(mol)) print(\"ROLS\", rule_of_leadlike_soft(mol))
RO5 True\nRO3 False\nROLS True\n
To list all available rules and what they are good for, use :
In\u00a0[19]: Copied!from medchem.rules import RuleFilters\nfrom medchem.rules import RuleFilters In\u00a0[20]: Copied!
RuleFilters.list_available_rules()\nRuleFilters.list_available_rules() Out[20]: name rules description 0 rule_of_five MW <= 500 & logP <= 5 & HBD <= 5 & HBA <= 10 leadlike;druglike;small molecule;library design 1 rule_of_five_beyond MW <= 1000 & logP in [-2, 10] & HBD <= 6 & HBA... leadlike;druglike;small molecule;library design 2 rule_of_four MW >= 400 & logP >= 4 & RINGS >=4 & HBA >= 4 PPI inhibitor;druglike 3 rule_of_three MW <= 300 & logP <= 3 & HBA <= 3 & HBD <= 3 & ... fragment;building block 4 rule_of_three_extended MW <= 300 & logP in [-3, 3] & HBA <= 6 & HBD <... fragment;building block 5 rule_of_two MW <= 200 & logP <= 2 & HBA <= 4 & HBD <= 2 fragment;reagent;building block 6 rule_of_ghose MW in [160, 480] & logP in [-0.4, 5.6] & Natom... leadlike;druglike;small molecule;library design 7 rule_of_veber rotatable bond <= 10 & TPSA < 140 druglike;leadlike;small molecule;oral 8 rule_of_reos MW in [200, 500] & logP in [-5, 5] & HBA in [0... druglike;small molecule;library design;HTS 9 rule_of_chemaxon_druglikeness MW < 400 & logP < 5 & HBA <= 10 & HBD <= 5 & r... leadlike;druglike;small molecule 10 rule_of_egan TPSA in [0, 132] & logP in [-1, 6] druglike;small molecule;admet;absorption;perme... 11 rule_of_pfizer_3_75 not (TPSA < 75 & logP > 3) druglike;toxicity;invivo;small molecule 12 rule_of_gsk_4_400 MW <= 400 & logP <= 4 druglike;admet;small molecule 13 rule_of_oprea HBD in [0, 2] & HBA in [2, 9] & ROTBONDS in [2... druglike;small molecule 14 rule_of_xu HBD <= 5 & HBA <= 10 & ROTBONDS in [2, 35] & R... druglike;small molecule;library design 15 rule_of_cns MW in [135, 582] & logP in [-0.2, 6.1] & TPSA ... druglike;CNS;BBB;small molecule 16 rule_of_respiratory MW in [240, 520] & logP in [-2, 4.7] & HBONDS... druglike;respiratory;small molecule;nasal;inha... 17 rule_of_zinc MW in [60, 600] & logP < in [-4, 6] & HBD <= 6... druglike;small molecule;library design;zinc 18 rule_of_leadlike_soft MW in [150, 400] & logP < in [-3, 4] & HBD <= ... leadlike;small molecule;library design;admet 19 rule_of_druglike_soft MW in [100, 600] & logP < in [-3, 6] & HBD <= ... druglike;small molecule;library design
To list the available rules for small molecules only, you can use the list_available_rules(\"small molecule\")
RuleFilters.list_available_rules(\"small molecule\")\nRuleFilters.list_available_rules(\"small molecule\") Out[21]: name rules description 0 rule_of_five MW <= 500 & logP <= 5 & HBD <= 5 & HBA <= 10 leadlike;druglike;small molecule;library design 1 rule_of_five_beyond MW <= 1000 & logP in [-2, 10] & HBD <= 6 & HBA... leadlike;druglike;small molecule;library design 6 rule_of_ghose MW in [160, 480] & logP in [-0.4, 5.6] & Natom... leadlike;druglike;small molecule;library design 7 rule_of_veber rotatable bond <= 10 & TPSA < 140 druglike;leadlike;small molecule;oral 8 rule_of_reos MW in [200, 500] & logP in [-5, 5] & HBA in [0... druglike;small molecule;library design;HTS 9 rule_of_chemaxon_druglikeness MW < 400 & logP < 5 & HBA <= 10 & HBD <= 5 & r... leadlike;druglike;small molecule 10 rule_of_egan TPSA in [0, 132] & logP in [-1, 6] druglike;small molecule;admet;absorption;perme... 11 rule_of_pfizer_3_75 not (TPSA < 75 & logP > 3) druglike;toxicity;invivo;small molecule 12 rule_of_gsk_4_400 MW <= 400 & logP <= 4 druglike;admet;small molecule 13 rule_of_oprea HBD in [0, 2] & HBA in [2, 9] & ROTBONDS in [2... druglike;small molecule 14 rule_of_xu HBD <= 5 & HBA <= 10 & ROTBONDS in [2, 35] & R... druglike;small molecule;library design 15 rule_of_cns MW in [135, 582] & logP in [-0.2, 6.1] & TPSA ... druglike;CNS;BBB;small molecule 16 rule_of_respiratory MW in [240, 520] & logP in [-2, 4.7] & HBONDS... druglike;respiratory;small molecule;nasal;inha... 17 rule_of_zinc MW in [60, 600] & logP < in [-4, 6] & HBD <= 6... druglike;small molecule;library design;zinc 18 rule_of_leadlike_soft MW in [150, 400] & logP < in [-3, 4] & HBD <= ... leadlike;small molecule;library design;admet 19 rule_of_druglike_soft MW in [100, 600] & logP < in [-3, 6] & HBD <= ... druglike;small molecule;library design
You can also apply the rule filter to a batch of molecules.
In\u00a0[22]: Copied!rule_obj = RuleFilters(rule_list=[\"rule_of_five\", \"rule_of_oprea\", \"rule_of_cns\", \"rule_of_leadlike_soft\"], rule_list_names=[\"rule_of_five\", \"rule_of_oprea\", \"rule_of_cns\", \"rule_of_leadlike_soft\"], precompute_props=True)\nrule_obj = RuleFilters(rule_list=[\"rule_of_five\", \"rule_of_oprea\", \"rule_of_cns\", \"rule_of_leadlike_soft\"], rule_list_names=[\"rule_of_five\", \"rule_of_oprea\", \"rule_of_cns\", \"rule_of_leadlike_soft\"], precompute_props=True) In\u00a0[23]: Copied!
out = rule_obj(smiles_list, n_jobs=-1, progress=True)\n# you need to reset the columns because the input rule list can be columns\nout\nout = rule_obj(smiles_list, n_jobs=-1, progress=True) # you need to reset the columns because the input rule list can be columns out
Props: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 500/500 [00:00<00:00, 5903.89it/s] \nProps: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 500/500 [00:00<00:00, 7238.20it/s]\nProps: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 500/500 [00:00<00:00, 8098.33it/s]\nProps: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 500/500 [00:00<00:00, 1102.02it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 4/4 [00:00<00:00, 5.86it/s]\nOut[23]: rule_of_five rule_of_oprea rule_of_cns rule_of_leadlike_soft 0 True True True True 1 True False False False 2 True False False False 3 True False False False 4 True True True True ... ... ... ... ... 495 True False False False 496 True False False False 497 True False False False 498 True False False False 499 True False False False
500 rows \u00d7 4 columns
In\u00a0[24]: Copied!from medchem import catalog\nprint(catalog.list_named_catalogs())\nfrom medchem import catalog print(catalog.list_named_catalogs())
['tox', 'pains', 'pains_a', 'pains_b', 'pains_c', 'nih', 'zinc', 'brenk', 'dundee', 'bms', 'glaxo', 'schembl', 'mlsmr', 'inpharmatica', 'lint', 'alarm_nmr', 'alphascreen', 'gst_hitters', 'his_hitters', 'luciferase', 'dnabinder', 'chelator', 'hitters', 'electrophilic', 'carcinogen', 'ld50_oral', 'reactive_unstable_toxic', 'skin', 'toxicophore', 'nibr', 'bredt', 'unstable_graph']\nIn\u00a0[25]: Copied!
from medchem.catalog import from_smarts\nimport pandas as pd\nfrom medchem.catalog import from_smarts import pandas as pd In\u00a0[26]: Copied!
smarts_bank = pd.read_csv(get_data(\"smarts_bank.csv\"))\nsmarts_bank = pd.read_csv(get_data(\"smarts_bank.csv\")) In\u00a0[27]: Copied!
custom_catalog = from_smarts(smarts_bank.smarts.values, smarts_bank.name.values, entry_as_inds=False)\ncustom_catalog = from_smarts(smarts_bank.smarts.values, smarts_bank.name.values, entry_as_inds=False) In\u00a0[28]: Copied!
mol = \"CCS(=O)(=O)N1CC(C1)N2C=C(C=N2)C3=C4C=CNC4=NC=N3\"\nmol = dm.to_mol(mol)\nmol\nmol = \"CCS(=O)(=O)N1CC(C1)N2C=C(C=N2)C3=C4C=CNC4=NC=N3\" mol = dm.to_mol(mol) mol Out[28]: In\u00a0[29]: Copied!
matches = custom_catalog.GetMatches(mol)\n[m.GetDescription() for m in matches]\nmatches = custom_catalog.GetMatches(mol) [m.GetDescription() for m in matches] Out[29]:
['HBA',\n 'HBD',\n 'HBD',\n 'Hydrogen',\n 'Hydrogen',\n 'Hydrogen',\n 'Acyclic Bonds',\n 'Rotable Bond',\n 'SP3 Nitrogen',\n 'SP2 Nitrogen',\n 'SP2 Carbon',\n 'Aromatic SP2 Carbon',\n 'Chiral Carbon']In\u00a0[30]: Copied!
from medchem.query import QueryFilter\nfrom medchem.query import QueryFilter In\u00a0[31]: Copied!
# note that whitespace and newlines are ignored in the query\nquery = \"\"\"\n(\n HASPROP(\"tpsa\" < 80) AND HASPROP(\"clogp\" < 3) AND ! HASALERT(\"pains\")\n) \n OR \n(\n HASPROP(\"n_heavy_atoms\" >= 10) \n AND \n (\n HASGROUP(\"Alcohols\") \n OR \n HASSUBSTRUCTURE(\"[CX3](=[OX1])O\", True, 1)\n )\n)\n\"\"\"\n# note that whitespace and newlines are ignored in the query query = \"\"\" ( HASPROP(\"tpsa\" < 80) AND HASPROP(\"clogp\" < 3) AND ! HASALERT(\"pains\") ) OR ( HASPROP(\"n_heavy_atoms\" >= 10) AND ( HASGROUP(\"Alcohols\") OR HASSUBSTRUCTURE(\"[CX3](=[OX1])O\", True, 1) ) ) \"\"\" In\u00a0[32]: Copied!
data = dm.freesolv()\nquery_filter = QueryFilter(query)\nout = query_filter(data.smiles, n_jobs=-1, progress=True)\ndata = dm.freesolv() query_filter = QueryFilter(query) out = query_filter(data.smiles, n_jobs=-1, progress=True)
\rIn\u00a0[33]: Copied!
any(out)\nany(out) Out[33]:
True"},{"location":"tutorials/getting-started.html#medchem","title":"Medchem\u00b6","text":"
Medchem is a package for applying general filtering rules on a set of molecules to ensure they have drug-like properties.
In this tutorial, we will apply various filtering on an example dataset to get highlight the package API
"},{"location":"tutorials/getting-started.html#setup","title":"Setup\u00b6","text":""},{"location":"tutorials/getting-started.html#using-the-filter-module","title":"Using the filter module\u00b6","text":"The filter module provides a variety of two types of filters:
generic
: custom filtering based on some given molecule properties such as number of atoms, presence of specific atom type, etclead
: filtering based on structural motifs that are known to either be toxic, reactive, unstable or frequent false positiveThe advanced options allow a better control over the filtering process. They also provide more information on the issues with the molecules.
"},{"location":"tutorials/getting-started.html#alertfilters","title":"AlertFilters\u00b6","text":"These are the underlying filters called by lead.alert_filter
. In the output, the compound status is indicated as either \"Exclude\"
or \"Ok\"
.
These are the underlying filters called by lead.screening_filter
.
Here is an explanation of the output:
[\"Exclude\", \"Flag\", \"Annotations\", \"Ok\"]
(ordered by quality). Generally, you can keep anything without the \"Exclude\" label, as long as you also apply a maximum severity score for compounds that collects too many flags.0
: compound has no flags, might have annotations; - 1-9
: number of flags the compound raises; - >= 10
: default exclusion criterion used in the paperDemerit scoring uses the Eli Lilly filter rules. Those are complex rules, that can be customized in any way you wish.
The following \"information\" will be computed and added as columns to a DataFrame for each run:
\"Exclude\"
, \"Flag\"
or \"Ok\"
.It is also possible to initialize a list of functional group to use for molecules matching
"},{"location":"tutorials/getting-started.html#physchem-rule-application","title":"PhysChem Rule application\u00b6","text":"You can also apply a set of physchem rules to a list of compounds
"},{"location":"tutorials/getting-started.html#available-catalogs","title":"Available Catalogs\u00b6","text":"For a list of all available named catalogs, you can use catalog.list_named_catalogs
You can build a custom catalog based on smarts you have defined. For example, using an internal smarts bank.
"},{"location":"tutorials/getting-started.html#query-system","title":"Query system\u00b6","text":"Medchem provides a query system that can be used for filtering molecules based on a constructed query made of statement compliant with the provided grammar.
Detailed information about the query system can be seen in the documentation
"}]} \ No newline at end of file diff --git a/1.4.0/sitemap.xml b/1.4.0/sitemap.xml deleted file mode 100644 index 7227c66..0000000 --- a/1.4.0/sitemap.xml +++ /dev/null @@ -1,58 +0,0 @@ - -%load_ext autoreload
-%autoreload 2
-
import datamol as dm
-import numpy as np
-from loguru import logger
-
-data = dm.data.freesolv().sample(500)
-smiles_list = data.smiles.values
-
from medchem.filter import lead
-from medchem.demerits import score, batch_score
-from medchem.alerts import NovartisFilters
-from medchem.alerts import AlertFilters
-from medchem.catalog import NamedCatalogs
-from medchem.utils import get_data
-from rdkit.Chem import rdfiltercatalog
-
The filter module provides a variety of two types of filters:
-generic
: custom filtering based on some given molecule properties such as number of atoms, presence of specific atom type, etclead
: filtering based on structural motifs that are known to either be toxic, reactive, unstable or frequent false positive# common filters including pains, brenk, nih, zinc
-pains_a = rdfiltercatalog.FilterCatalogParams.FilterCatalogs.PAINS_A
-lead.catalog_filter(smiles_list, ["nih", pains_a, NamedCatalogs.dundee()])
-
array([ True, True, True, True, False, False, True, False, False, - True, False, True, True, True, False, True, False, False, - False, True, True, False, True, False, False, False, True, - True, True, True, True, True, False, True, False, False, - False, True, False, False, False, True, False, False, False, - True, False, True, False, True, True, False, True, True, - True, False, True, False, False, False, True, False, False, - True, False, True, True, False, False, False, False, True, - True, True, False, True, False, True, True, True, False, - True, True, False, True, True, True, True, True, False, - False, True, True, False, True, False, False, False, True, - False, False, False, False, True, False, False, True, True, - True, True, True, False, True, True, True, False, True, - False, False, True, True, True, True, False, False, True, - False, False, True, True, False, True, True, True, True, - False, False, False, True, False, False, False, True, False, - False, True, False, False, True, True, True, False, True, - False, False, False, True, True, True, False, True, True, - False, False, False, False, False, True, False, False, False, - False, False, True, False, True, False, True, False, False, - True, False, False, True, False, True, True, True, False, - True, True, True, True, False, True, True, True, False, - False, True, True, False, True, False, True, False, True, - True, True, True, False, False, True, False, True, False, - False, True, False, True, False, True, False, False, False, - False, True, True, False, False, True, True, False, True, - True, False, True, True, True, False, False, False, True, - False, False, False, True, True, False, True, True, False, - True, True, True, True, False, True, False, True, True, - True, True, False, True, False, True, False, False, True, - True, True, False, False, False, True, True, True, False, - True, False, False, False, False, False, True, False, True, - True, False, True, True, True, False, True, True, False, - False, False, True, False, True, False, False, False, True, - True, False, True, False, False, True, False, False, True, - True, False, False, False, True, True, True, False, True, - True, True, True, False, True, False, False, True, True, - True, False, False, True, False, False, False, True, False, - False, False, False, True, False, True, True, True, False, - True, False, False, False, True, False, True, False, False, - False, False, False, False, True, True, True, False, True, - False, False, False, False, False, True, True, True, True, - False, True, True, True, False, True, False, True, False, - False, False, True, False, False, True, True, False, False, - True, True, False, True, True, False, True, False, False, - False, True, True, False, True, True, True, False, False, - False, False, False, False, False, True, False, False, True, - True, True, True, True, True, True, True, False, True, - True, False, False, True, True, True, True, False, False, - False, True, False, True, False, True, True, True, False, - False, True, True, True, False, True, False, True, False, - True, True, True, True, True, True, True, True, False, - False, True, True, True, False, True, True, False, True, - False, True, False, True, True, True, False, True, False, - True, True, True, False, False, True, True, False, True, - True, False, True, False, False])-
# filtering based on some commons alerts + additional lead like rules
-lead.alert_filter(smiles_list, alerts=["Glaxo", "BMS"], rule_dict=dict(MW=[0, 100]))
-
array([False, False, False, True, False, False, True, False, False, - False, True, True, False, False, False, False, False, False, - True, True, False, False, False, False, False, True, True, - True, False, False, True, False, False, False, False, False, - False, False, False, False, True, True, False, False, False, - False, False, False, False, False, False, False, False, False, - False, False, True, True, False, False, True, False, False, - False, True, False, False, False, False, True, False, False, - False, False, False, False, True, True, True, False, False, - True, False, False, False, False, True, False, False, False, - False, False, False, False, True, True, False, False, False, - False, False, False, False, True, True, False, False, True, - False, True, True, False, False, False, False, False, False, - False, False, True, True, False, False, False, False, False, - True, False, True, True, False, True, False, False, False, - False, False, False, False, False, False, False, True, True, - False, False, False, True, False, False, True, False, True, - False, False, False, False, True, True, False, False, True, - False, False, False, False, False, True, False, False, False, - False, False, True, False, True, False, True, False, True, - False, False, False, False, False, False, False, False, False, - True, True, False, False, False, False, False, False, False, - False, True, True, False, False, False, False, False, False, - True, False, False, True, True, True, False, False, False, - False, False, False, False, False, True, False, False, True, - False, False, False, True, False, False, False, False, False, - False, False, False, False, True, False, False, False, False, - False, False, False, True, False, False, False, True, False, - False, False, False, False, False, False, False, False, False, - True, True, False, False, False, True, False, True, True, - False, True, False, False, False, False, True, False, False, - True, False, False, False, False, False, True, False, False, - False, False, True, True, False, False, True, False, False, - False, False, True, False, False, False, False, False, False, - True, True, False, False, False, True, False, False, True, - False, False, False, False, False, True, False, False, False, - False, True, False, False, False, False, True, False, False, - False, True, False, False, False, False, False, False, False, - False, False, False, False, False, True, True, True, False, - False, False, False, False, False, False, True, False, False, - True, False, False, False, False, True, False, False, False, - False, False, False, True, False, False, False, True, False, - False, True, False, False, False, False, False, False, True, - False, True, True, False, True, False, True, False, True, - False, True, False, False, False, False, True, False, False, - False, True, True, False, True, True, True, False, False, - False, False, False, False, True, False, True, False, True, - True, False, False, False, False, True, True, False, False, - True, False, False, False, True, True, False, False, False, - False, False, False, True, False, False, False, False, False, - False, False, False, True, False, False, False, True, False, - False, False, True, True, False, False, False, True, False, - False, False, False, True, False, False, True, False, True, - False, False, False, True, True, False, False, True, False, - False, False, True, False, False, False, False, True, False, - False, True, False, False, False])-
# filtering based on NIBR screening deck process described in
-# "Evolution of Novartis' small molecule screening deck design" by Schuffenhauer, A. et al. J. Med. Chem. (2020),
-# https://dx.doi.org/10.1021/acs.jmedchem.0c01332.
-lead.screening_filter(smiles_list, return_idx=True)
-
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 17, 18, 19, 20, 21, 22, 25, 26, 27, 28, - 29, 30, 31, 32, 33, 36, 37, 38, 40, 41, 43, 45, 47, - 48, 49, 50, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, - 63, 64, 65, 66, 67, 69, 71, 72, 73, 74, 75, 77, 78, - 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 91, 92, - 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, - 107, 108, 109, 110, 111, 112, 113, 114, 115, 117, 118, 119, 120, - 121, 122, 123, 125, 127, 128, 129, 131, 132, 133, 134, 135, 136, - 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 163, - 164, 165, 166, 167, 169, 170, 171, 173, 174, 175, 176, 177, 178, - 179, 180, 181, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, - 194, 195, 196, 197, 198, 199, 200, 202, 203, 204, 206, 207, 208, - 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, - 222, 223, 224, 225, 226, 227, 228, 230, 231, 233, 234, 235, 236, - 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, - 250, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, - 264, 266, 268, 269, 270, 271, 273, 274, 275, 276, 277, 278, 279, - 280, 281, 282, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, - 294, 295, 296, 297, 298, 299, 300, 303, 304, 305, 306, 307, 308, - 309, 310, 311, 313, 314, 315, 317, 318, 319, 320, 321, 322, 323, - 324, 325, 326, 328, 329, 331, 332, 333, 334, 336, 337, 338, 339, - 340, 341, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 355, - 357, 358, 359, 360, 364, 365, 366, 367, 368, 369, 370, 371, 372, - 373, 374, 375, 376, 377, 379, 380, 381, 382, 383, 384, 385, 387, - 388, 389, 391, 392, 393, 394, 395, 396, 397, 399, 400, 402, 403, - 404, 406, 407, 408, 409, 410, 411, 414, 415, 416, 417, 418, 419, - 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, - 433, 434, 435, 436, 437, 438, 439, 440, 442, 443, 444, 445, 446, - 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, - 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, - 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, - 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 499])-
# Filter based on the demerit scoring of Eli Lilly
-test_config = {
- "min_atoms": 10, # default is 1
- "soft_max_atoms": 30, # default is 30
- "hard_max_atoms": 50, # default is 30
- "nodemerit": False, # default is False
- "dthresh": 160, # default is None with no threshold
-}
-lead.lilly_demerit_filter(smiles_list, max_demerits=160, return_idx=True, **test_config)
-
array([ 0, 4, 37, 61, 62, 72, 73, 84, 89, 98, 106, 113, 132, - 140, 148, 178, 180, 187, 192, 203, 209, 214, 226, 240, 244, 247, - 249, 253, 254, 259, 273, 282, 287, 293, 310, 317, 319, 324, 345, - 374, 377, 383, 384, 385, 399, 400, 417, 419, 434, 442, 446, 448, - 454, 455, 459, 464, 470, 473, 475, 478, 482, 485, 487, 492, 494])-
The advanced options allow a better control over the filtering process. They also provide more information on the issues with the molecules.
- -These are the underlying filters called by lead.alert_filter
. In the output, the compound status is indicated as either "Exclude"
or "Ok"
.
filter_obj = AlertFilters(alerts_set=["inpharmatica", "SureChEMBL"])
-filter_obj.list_default_available_alerts()
-
- | rule_set_name | -smarts | -catalog_description | -rule_set | -source | -
---|---|---|---|---|---|
10 | -Glaxo | -55 | -Glaxo Wellcome Hard filters | -1 | -ChEMBL | -
5 | -Dundee | -105 | -University of Dundee NTD Screening Library Fil... | -2 | -ChEMBL | -
2 | -BMS | -180 | -Bristol-Myers Squibb HTS Deck filters | -3 | -ChEMBL | -
18 | -PAINS | -481 | -PAINS filters | -4 | -ChEMBL | -
21 | -SureChEMBL | -166 | -SureChEMBL Non-MedChem Friendly SMARTS | -5 | -ChEMBL | -
16 | -MLSMR | -116 | -NIH MLSMR Excluded Functionality filters (MLSMR) | -6 | -ChEMBL | -
12 | -Inpharmatica | -91 | -Unwanted fragments derived by Inpharmatica Ltd. | -7 | -ChEMBL | -
14 | -LINT | -57 | -Pfizer lint filters (lint) | -8 | -ChEMBL | -
0 | -Alarm-NMR | -75 | -Reactive False Positives in Biochemical Screen... | -9 | -Litterature | -
1 | -AlphaScreen-Hitters | -6 | -Structural filters for compounds that may be a... | -10 | -Litterature | -
8 | -GST-Hitters | -34 | -Structural filters for compounds may prevent G... | -11 | -Litterature | -
11 | -HIS-Hitters | -19 | -Structural filters for compounds prevents the ... | -12 | -Litterature | -
15 | -LuciferaseInhibitor | -3 | -Structural filters for compounds that may inhi... | -13 | -Litterature | -
4 | -DNABinder | -78 | -Structural filters for compounds that may bind... | -14 | -Litterature | -
3 | -Chelator | -55 | -Structural filters for compounds that may inhi... | -15 | -Litterature | -
7 | -Frequent-Hitter | -15 | -Structural filters for compounds that are freq... | -16 | -Litterature | -
6 | -Electrophilic | -119 | -Structural filters for compounds that could ta... | -17 | -Litterature | -
9 | -Genotoxic-Carcinogenicity | -117 | -Structural filters for compounds that may caus... | -18 | -Litterature | -
13 | -LD50-Oral | -20 | -Structural filters for compounds that may caus... | -19 | -Litterature | -
17 | -Non-Genotoxic-Carcinogenicity | -22 | -Structural filters for compounds that may caus... | -20 | -Litterature | -
19 | -Reactive-Unstable-Toxic | -335 | -General very reactive/unstable or Toxic compounds | -21 | -Litterature | -
20 | -Skin | -155 | -Skin Sensitization filters (irritables) | -22 | -Litterature | -
22 | -Toxicophore | -154 | -General Toxicophores | -23 | -Litterature | -
out = filter_obj(smiles_list)
-out
-
- | _smiles | -status | -reasons | -MW | -LogP | -HBD | -HBA | -TPSA | -
---|---|---|---|---|---|---|---|---|
0 | -c1ccc(Cn2ccnc2)cc1 | -Ok | -None | -158.204 | -1.93140 | -0 | -2 | -17.82 | -
1 | -NC(=O)c1ccccc1 | -Ok | -None | -121.139 | -0.78550 | -1 | -1 | -43.09 | -
2 | -c1cc2c3c(cccc3c1)CC2 | -Ok | -None | -154.212 | -2.93840 | -0 | -0 | -0.00 | -
3 | -Cc1cnccn1 | -Ok | -None | -94.117 | -0.78502 | -0 | -2 | -25.78 | -
4 | -CN(C)C(=O)c1ccc([N+](=O)[O-])cc1 | -Ok | -None | -194.190 | -1.29660 | -0 | -3 | -63.45 | -
... | -... | -... | -... | -... | -... | -... | -... | -... | -
495 | -Cc1ccc(O)c(C)c1 | -Ok | -None | -122.167 | -2.00904 | -1 | -1 | -20.23 | -
496 | -C/C=C/CCCC | -Ok | -None | -98.189 | -2.75270 | -0 | -0 | -0.00 | -
497 | -CNc1ccccc1 | -Ok | -None | -107.156 | -1.72830 | -1 | -1 | -12.03 | -
498 | -ClCCCl | -Exclude | -alkyl_halides; Filter1_2_halo_ether; Filter26_... | -98.960 | -1.46400 | -0 | -0 | -0.00 | -
499 | -CCCCOC(C)=O | -Ok | -None | -116.160 | -1.34960 | -0 | -2 | -26.30 | -
500 rows × 8 columns
-These are the underlying filters called by lead.screening_filter
.
Here is an explanation of the output:
-["Exclude", "Flag", "Annotations", "Ok"]
(ordered by quality). Generally, you can keep anything without the "Exclude" label, as long as you also apply a maximum severity score for compounds that collects too many flags.0
: compound has no flags, might have annotations;
- - 1-9
: number of flags the compound raises;
- - >= 10
: default exclusion criterion used in the paperfilter_obj = NovartisFilters()
-out = filter_obj(smiles_list)
-out
-
- | _smiles | -status | -reasons | -severity | -covalent | -special_mol | -
---|---|---|---|---|---|---|
0 | -c1ccc(Cn2ccnc2)cc1 | -Ok | -None | -0 | -NaN | -NaN | -
1 | -NC(=O)c1ccccc1 | -Ok | -None | -0 | -NaN | -NaN | -
2 | -c1cc2c3c(cccc3c1)CC2 | -Ok | -None | -0 | -NaN | -NaN | -
3 | -Cc1cnccn1 | -Ok | -None | -0 | -NaN | -NaN | -
4 | -CN(C)C(=O)c1ccc([N+](=O)[O-])cc1 | -Annotations | -nitro_count_1_min(1) | -0 | -0.0 | -0.0 | -
... | -... | -... | -... | -... | -... | -... | -
495 | -Cc1ccc(O)c(C)c1 | -Ok | -None | -0 | -NaN | -NaN | -
496 | -C/C=C/CCCC | -Ok | -None | -0 | -NaN | -NaN | -
497 | -CNc1ccccc1 | -Ok | -None | -0 | -NaN | -NaN | -
498 | -ClCCCl | -Exclude | -halo_ether_min(1); halogen_alkyl_min(1); halog... | -10 | -2.0 | -0.0 | -
499 | -CCCCOC(C)=O | -Ok | -None | -0 | -NaN | -NaN | -
500 rows × 6 columns
-Demerit scoring uses the Eli Lilly filter rules. Those are complex rules, that can be customized in any way you wish.
-The following "information" will be computed and added as columns to a DataFrame for each run:
-"Exclude"
, "Flag"
or "Ok"
.out = score(smiles_list, **test_config)
-out
-
- | _smiles | -ID | -reasons | -step | -rejected | -demerit_score | -status | -
---|---|---|---|---|---|---|---|
0 | -C1=CC=C(C=C1)CN1C=CN=C1 | -0 | -NaN | -4 | -False | -0.0 | -Ok | -
1 | -C1=CC=C(C=C1)C(=O)N | -1 | -not_enough_atoms | -1 | -True | -NaN | -Exclude | -
2 | -C1=CC2=CC=CC3=C2C(=C1)CC3 | -2 | -no_interesting_atoms | -1 | -True | -NaN | -Exclude | -
3 | -CC1=CN=CC=N1 | -3 | -not_enough_atoms | -1 | -True | -NaN | -Exclude | -
4 | -CN(C)C(=O)C1=CC=C(C=C1)N(=O)=O | -4 | -nitro:D60 | -4 | -False | -60.0 | -Flag | -
... | -... | -... | -... | -... | -... | -... | -... | -
495 | -CC1=CC=C(O)C(=C1)C | -495 | -not_enough_atoms | -1 | -True | -NaN | -Exclude | -
496 | -CCCC/C=C/C | -496 | -not_enough_atoms | -1 | -True | -NaN | -Exclude | -
497 | -CNC1=CC=CC=C1 | -497 | -not_enough_atoms | -1 | -True | -NaN | -Exclude | -
498 | -C(Cl)CCl | -498 | -not_enough_atoms | -1 | -True | -NaN | -Exclude | -
499 | -CCCCOC(=O)C | -499 | -not_enough_atoms | -1 | -True | -NaN | -Exclude | -
500 rows × 7 columns
-# Although the demirits.score is already quite fast, you can also call the parallelized version of it using the `batch_score` function
-
out2 = batch_score(smiles_list, n_jobs=2, batch_size=100, progress=True, **test_config)
-out2
-
100%|██████████| 5/5 [00:00<00:00, 8.17it/s] --
- | _smiles | -ID | -reasons | -step | -rejected | -demerit_score | -status | -
---|---|---|---|---|---|---|---|
0 | -C1=CC=C(C=C1)CN1C=CN=C1 | -0 | -NaN | -4 | -False | -0.0 | -Ok | -
1 | -C1=CC=C(C=C1)C(=O)N | -1 | -not_enough_atoms | -1 | -True | -NaN | -Exclude | -
2 | -C1=CC2=CC=CC3=C2C(=C1)CC3 | -2 | -no_interesting_atoms | -1 | -True | -NaN | -Exclude | -
3 | -CC1=CN=CC=N1 | -3 | -not_enough_atoms | -1 | -True | -NaN | -Exclude | -
4 | -CN(C)C(=O)C1=CC=C(C=C1)N(=O)=O | -4 | -nitro:D60 | -4 | -False | -60.0 | -Flag | -
... | -... | -... | -... | -... | -... | -... | -... | -
495 | -CC1=CC=C(O)C(=C1)C | -495 | -not_enough_atoms | -1 | -True | -NaN | -Exclude | -
496 | -CCCC/C=C/C | -496 | -not_enough_atoms | -1 | -True | -NaN | -Exclude | -
497 | -CNC1=CC=CC=C1 | -497 | -not_enough_atoms | -1 | -True | -NaN | -Exclude | -
498 | -C(Cl)CCl | -498 | -not_enough_atoms | -1 | -True | -NaN | -Exclude | -
499 | -CCCCOC(=O)C | -499 | -not_enough_atoms | -1 | -True | -NaN | -Exclude | -
500 rows × 7 columns
-It is also possible to initialize a list of functional group to use for molecules matching
- -from medchem.groups import ChemicalGroup
-c_group = ChemicalGroup(groups="rings_in_drugs")
-
mol = dm.to_mol("CCS(=O)(=O)N1CC(C1)(CC#N)N2C=C(C=N2)C3=C4C=CNC4=NC=N3")
-c_group.get_matches(mol, use_smiles=True)
-
- | name | -smiles | -smarts | -group | -matches | -
---|---|---|---|---|---|
204 | -diazine | -C1=NC=CC=N1 | -[#6]1:[#7]:[#6]:[#6]:[#6]:[#7]:1 | -rings_in_drugs | -((24, 23, 22, 18, 17, 25),) | -
234 | -1H-pyrazole | -N1=CC=CN1 | -[#7]1:[#6]:[#6]:[#6]:[#7H]:1 | -rings_in_drugs | -((12, 13, 14, 15, 16),) | -
257 | -1H-pyrrole | -C1=CC=CN1 | -[#6]1:[#6]:[#6]:[#6]:[#7H]:1 | -rings_in_drugs | -((20, 19, 18, 22, 21),) | -
You can also load a custom library of queries. You custom df needs to provide the following columns:
-'smiles'/'smarts'
, 'name'
and "group"
and optionally 'hierarchy'
c_group = ChemicalGroup(groups_db=get_data("smarts_bank.csv"))
-c_group.get_matches(mol, use_smiles=False)
-
- | name | -smiles | -smarts | -group | -matches | -
---|---|---|---|---|---|
0 | -HBA | -- | [!$([#6,F,Cl,Br,I,o,s,nX3,#7v5,#15v5,#16v4,#16... | -custom_queries | -((3,), (4,), (5,), (11,), (16,), (23,), (25,)) | -
2 | -HBD | -- | [!$([#6,H0,-,-2,-3])] | -custom_queries | -((21,),) | -
3 | -HBD | -- | [!H0;#7,#8,#9] | -custom_queries | -((21,),) | -
12 | -Hydrogen | -- | [*!H0,#1] | -custom_queries | -((0,), (1,), (6,), (8,), (9,), (13,), (15,), (... | -
13 | -Hydrogen | -- | [#6!H0,#1] | -custom_queries | -((0,), (1,), (6,), (8,), (9,), (13,), (15,), (... | -
14 | -Hydrogen | -- | [H,#1] | -custom_queries | -((13,), (15,), (19,), (20,), (21,), (24,)) | -
16 | -Acyclic Bonds | -- | *!@* | -custom_queries | -((0, 1), (1, 2), (2, 3), (2, 4), (2, 5), (7, 9... | -
18 | -Rotable Bond | -- | [!$(*#*)&!D1]-!@[!$(*#*)&!D1] | -custom_queries | -((1, 2), (2, 5), (7, 9), (7, 12), (14, 17)) | -
20 | -SP3 Nitrogen | -- | [$([NX4+]),$([NX3]);!$(*=*)&!$(*:*)] | -custom_queries | -((5,),) | -
21 | -SP2 Nitrogen | -- | [$([nX3](:*):*),$([nX2](:*):*),$([#7X2]=*),$([... | -custom_queries | -((12,), (16,), (21,), (23,), (25,)) | -
22 | -SP2 Carbon | -- | [$([cX3](:*):*),$([cX2+](:*):*),$([CX3]=*),$([... | -custom_queries | -((13,), (14,), (15,), (17,), (18,), (19,), (20... | -
23 | -Aromatic SP2 Carbon | -- | [$([cX3](:*):*),$([cX2+](:*):*)] | -custom_queries | -((13,), (14,), (15,), (17,), (18,), (19,), (20... | -
24 | -Chiral Carbon | -- | [$([#6X4@](*)(*)(*)*),$([#6X4@H](*)(*)*)] | -custom_queries | -((7,),) | -
You can also apply a set of physchem rules to a list of compounds
- -You can apply the basic rules independently
- -from medchem.rules.basic_rules import rule_of_five, rule_of_three, rule_of_leadlike_soft
-
print("RO5", rule_of_five(mol))
-print("RO3", rule_of_three(mol))
-print("ROLS", rule_of_leadlike_soft(mol))
-
RO5 True -RO3 False -ROLS True --
To list all available rules and what they are good for, use :
- -from medchem.rules import RuleFilters
-
RuleFilters.list_available_rules()
-
- | name | -rules | -description | -
---|---|---|---|
0 | -rule_of_five | -MW <= 500 & logP <= 5 & HBD <= 5 & HBA <= 10 | -leadlike;druglike;small molecule;library design | -
1 | -rule_of_five_beyond | -MW <= 1000 & logP in [-2, 10] & HBD <= 6 & HBA... | -leadlike;druglike;small molecule;library design | -
2 | -rule_of_four | -MW >= 400 & logP >= 4 & RINGS >=4 & HBA >= 4 | -PPI inhibitor;druglike | -
3 | -rule_of_three | -MW <= 300 & logP <= 3 & HBA <= 3 & HBD <= 3 & ... | -fragment;building block | -
4 | -rule_of_three_extended | -MW <= 300 & logP in [-3, 3] & HBA <= 6 & HBD <... | -fragment;building block | -
5 | -rule_of_two | -MW <= 200 & logP <= 2 & HBA <= 4 & HBD <= 2 | -fragment;reagent;building block | -
6 | -rule_of_ghose | -MW in [160, 480] & logP in [-0.4, 5.6] & Natom... | -leadlike;druglike;small molecule;library design | -
7 | -rule_of_veber | -rotatable bond <= 10 & TPSA < 140 | -druglike;leadlike;small molecule;oral | -
8 | -rule_of_reos | -MW in [200, 500] & logP in [-5, 5] & HBA in [0... | -druglike;small molecule;library design;HTS | -
9 | -rule_of_chemaxon_druglikeness | -MW < 400 & logP < 5 & HBA <= 10 & HBD <= 5 & r... | -leadlike;druglike;small molecule | -
10 | -rule_of_egan | -TPSA in [0, 132] & logP in [-1, 6] | -druglike;small molecule;admet;absorption;perme... | -
11 | -rule_of_pfizer_3_75 | -not (TPSA < 75 & logP > 3) | -druglike;toxicity;invivo;small molecule | -
12 | -rule_of_gsk_4_400 | -MW <= 400 & logP <= 4 | -druglike;admet;small molecule | -
13 | -rule_of_oprea | -HBD in [0, 2] & HBA in [2, 9] & ROTBONDS in [2... | -druglike;small molecule | -
14 | -rule_of_xu | -HBD <= 5 & HBA <= 10 & ROTBONDS in [2, 35] & R... | -druglike;small molecule;library design | -
15 | -rule_of_cns | -MW in [135, 582] & logP in [-0.2, 6.1] & TPSA ... | -druglike;CNS;BBB;small molecule | -
16 | -rule_of_respiratory | -MW in [240, 520] & logP in [-2, 4.7] & HBONDS... | -druglike;respiratory;small molecule;nasal;inha... | -
17 | -rule_of_zinc | -MW in [60, 600] & logP < in [-4, 6] & HBD <= 6... | -druglike;small molecule;library design;zinc | -
18 | -rule_of_leadlike_soft | -MW in [150, 400] & logP < in [-3, 4] & HBD <= ... | -leadlike;small molecule;library design;admet | -
19 | -rule_of_druglike_soft | -MW in [100, 600] & logP < in [-3, 6] & HBD <= ... | -druglike;small molecule;library design | -
To list the available rules for small molecules only, you can use the list_available_rules("small molecule")
RuleFilters.list_available_rules("small molecule")
-
- | name | -rules | -description | -
---|---|---|---|
0 | -rule_of_five | -MW <= 500 & logP <= 5 & HBD <= 5 & HBA <= 10 | -leadlike;druglike;small molecule;library design | -
1 | -rule_of_five_beyond | -MW <= 1000 & logP in [-2, 10] & HBD <= 6 & HBA... | -leadlike;druglike;small molecule;library design | -
6 | -rule_of_ghose | -MW in [160, 480] & logP in [-0.4, 5.6] & Natom... | -leadlike;druglike;small molecule;library design | -
7 | -rule_of_veber | -rotatable bond <= 10 & TPSA < 140 | -druglike;leadlike;small molecule;oral | -
8 | -rule_of_reos | -MW in [200, 500] & logP in [-5, 5] & HBA in [0... | -druglike;small molecule;library design;HTS | -
9 | -rule_of_chemaxon_druglikeness | -MW < 400 & logP < 5 & HBA <= 10 & HBD <= 5 & r... | -leadlike;druglike;small molecule | -
10 | -rule_of_egan | -TPSA in [0, 132] & logP in [-1, 6] | -druglike;small molecule;admet;absorption;perme... | -
11 | -rule_of_pfizer_3_75 | -not (TPSA < 75 & logP > 3) | -druglike;toxicity;invivo;small molecule | -
12 | -rule_of_gsk_4_400 | -MW <= 400 & logP <= 4 | -druglike;admet;small molecule | -
13 | -rule_of_oprea | -HBD in [0, 2] & HBA in [2, 9] & ROTBONDS in [2... | -druglike;small molecule | -
14 | -rule_of_xu | -HBD <= 5 & HBA <= 10 & ROTBONDS in [2, 35] & R... | -druglike;small molecule;library design | -
15 | -rule_of_cns | -MW in [135, 582] & logP in [-0.2, 6.1] & TPSA ... | -druglike;CNS;BBB;small molecule | -
16 | -rule_of_respiratory | -MW in [240, 520] & logP in [-2, 4.7] & HBONDS... | -druglike;respiratory;small molecule;nasal;inha... | -
17 | -rule_of_zinc | -MW in [60, 600] & logP < in [-4, 6] & HBD <= 6... | -druglike;small molecule;library design;zinc | -
18 | -rule_of_leadlike_soft | -MW in [150, 400] & logP < in [-3, 4] & HBD <= ... | -leadlike;small molecule;library design;admet | -
19 | -rule_of_druglike_soft | -MW in [100, 600] & logP < in [-3, 6] & HBD <= ... | -druglike;small molecule;library design | -
You can also apply the rule filter to a batch of molecules.
- -rule_obj = RuleFilters(rule_list=["rule_of_five", "rule_of_oprea", "rule_of_cns", "rule_of_leadlike_soft"], rule_list_names=["rule_of_five", "rule_of_oprea", "rule_of_cns", "rule_of_leadlike_soft"], precompute_props=True)
-
out = rule_obj(smiles_list, n_jobs=-1, progress=True)
-# you need to reset the columns because the input rule list can be columns
-out
-
Props: 100%|██████████| 500/500 [00:00<00:00, 5903.89it/s] -Props: 100%|██████████| 500/500 [00:00<00:00, 7238.20it/s] -Props: 100%|██████████| 500/500 [00:00<00:00, 8098.33it/s] -Props: 100%|██████████| 500/500 [00:00<00:00, 1102.02it/s] -100%|██████████| 4/4 [00:00<00:00, 5.86it/s] --
- | rule_of_five | -rule_of_oprea | -rule_of_cns | -rule_of_leadlike_soft | -
---|---|---|---|---|
0 | -True | -True | -True | -True | -
1 | -True | -False | -False | -False | -
2 | -True | -False | -False | -False | -
3 | -True | -False | -False | -False | -
4 | -True | -True | -True | -True | -
... | -... | -... | -... | -... | -
495 | -True | -False | -False | -False | -
496 | -True | -False | -False | -False | -
497 | -True | -False | -False | -False | -
498 | -True | -False | -False | -False | -
499 | -True | -False | -False | -False | -
500 rows × 4 columns
-For a list of all available named catalogs, you can use catalog.list_named_catalogs
from medchem import catalog
-print(catalog.list_named_catalogs())
-
['tox', 'pains', 'pains_a', 'pains_b', 'pains_c', 'nih', 'zinc', 'brenk', 'dundee', 'bms', 'glaxo', 'schembl', 'mlsmr', 'inpharmatica', 'lint', 'alarm_nmr', 'alphascreen', 'gst_hitters', 'his_hitters', 'luciferase', 'dnabinder', 'chelator', 'hitters', 'electrophilic', 'carcinogen', 'ld50_oral', 'reactive_unstable_toxic', 'skin', 'toxicophore', 'nibr', 'bredt', 'unstable_graph'] --
You can build a custom catalog based on smarts you have defined. For example, using an internal smarts bank.
- -from medchem.catalog import from_smarts
-import pandas as pd
-
smarts_bank = pd.read_csv(get_data("smarts_bank.csv"))
-
custom_catalog = from_smarts(smarts_bank.smarts.values, smarts_bank.name.values, entry_as_inds=False)
-
mol = "CCS(=O)(=O)N1CC(C1)N2C=C(C=N2)C3=C4C=CNC4=NC=N3"
-mol = dm.to_mol(mol)
-mol
-
matches = custom_catalog.GetMatches(mol)
-[m.GetDescription() for m in matches]
-
['HBA', - 'HBD', - 'HBD', - 'Hydrogen', - 'Hydrogen', - 'Hydrogen', - 'Acyclic Bonds', - 'Rotable Bond', - 'SP3 Nitrogen', - 'SP2 Nitrogen', - 'SP2 Carbon', - 'Aromatic SP2 Carbon', - 'Chiral Carbon']-
Medchem provides a query system that can be used for filtering molecules based on a constructed query made of statement compliant with the provided grammar.
-Detailed information about the query system can be seen in the documentation
- -from medchem.query import QueryFilter
-
# note that whitespace and newlines are ignored in the query
-query = """
-(
- HASPROP("tpsa" < 80) AND HASPROP("clogp" < 3) AND ! HASALERT("pains")
-)
- OR
-(
- HASPROP("n_heavy_atoms" >= 10)
- AND
- (
- HASGROUP("Alcohols")
- OR
- HASSUBSTRUCTURE("[CX3](=[OX1])O", True, 1)
- )
-)
-"""
-
data = dm.freesolv()
-query_filter = QueryFilter(query)
-out = query_filter(data.smiles, n_jobs=-1, progress=True)
-
-
any(out)
-
True-