diff --git a/1.4.0/404.html b/1.4.0/404.html deleted file mode 100644 index cfb7d8a..0000000 --- a/1.4.0/404.html +++ /dev/null @@ -1,554 +0,0 @@ - - - - - - - - - - - - - - - - - - - - medchem - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
-
- -
- - - - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- -

404 - Not found

- -
-
- - -
- -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/1.4.0/_assets/css/custom.css b/1.4.0/_assets/css/custom.css deleted file mode 100644 index 443bf62..0000000 --- a/1.4.0/_assets/css/custom.css +++ /dev/null @@ -1,126 +0,0 @@ -/* Indentation. */ -div.doc-contents:not(.first) { - padding-left: 25px; - border-left: 4px solid rgba(230, 230, 230); - margin-bottom: 80px; -} - -/* Don't capitalize names. */ -h5.doc-heading { - text-transform: none !important; -} - -/* Don't use vertical space on hidden ToC entries. */ -.hidden-toc::before { - margin-top: 0 !important; - padding-top: 0 !important; -} - -/* Don't show permalink of hidden ToC entries. */ -.hidden-toc a.headerlink { - display: none; -} - -/* Avoid breaking parameters name, etc. in table cells. */ -td code { - word-break: normal !important; -} - -/* For pieces of Markdown rendered in table cells. */ -td p { - margin-top: 0 !important; - margin-bottom: 0 !important; -} - -:root { - --custom-primary: rgb(255, 153, 0); - --custom-secondary: #343a40; - - /* Primary color shades */ - --md-primary-fg-color: var(--custom-primary); - --md-primary-fg-color--light: var(--custom-primary); - --md-primary-fg-color--dark: var(--custom-primary); - --md-primary-bg-color: var(--custom-secondary); - --md-primary-bg-color--light: var(--custom-secondary); - --md-text-link-color: var(--custom-secondary); - - /* Accent color shades */ - --md-accent-fg-color: var(--custom-secondary); - --md-accent-fg-color--transparent: var(--custom-secondary); - --md-accent-bg-color: var(--custom-secondary); - --md-accent-bg-color--light: var(--custom-secondary); -} - -:root>* { - /* Code block color shades */ - --md-code-bg-color: hsla(0, 0%, 96%, 1); - --md-code-fg-color: hsla(200, 18%, 26%, 1); - - /* Footer */ - --md-footer-bg-color: var(--custom-primary); - /* --md-footer-bg-color--dark: hsla(0, 0%, 0%, 0.32); */ - --md-footer-fg-color: var(--custom-secondary); - --md-footer-fg-color--light: var(--custom-secondary); - --md-footer-fg-color--lighter: var(--custom-secondary); - -} - -.md-header { - background-image: linear-gradient(to right, #70ad47ff, #70ad47ff); -} - -.md-footer { - background-image: linear-gradient(to right, #70ad47ff, #70ad47ff); -} - -.md-tabs { - background-image: linear-gradient(to right, #F4F6F9, #E2CEC3); -} - -.md-header__topic { - color: rgb(255, 255, 255); -} - -.md-source__repository, -.md-source__icon, -.md-search__input, -.md-search__input::placeholder, -.md-search__input~.md-search__icon, -.md-footer__inner.md-grid, -.md-copyright__highlight, -.md-copyright, -.md-footer-meta.md-typeset a, -.md-version { - color: rgb(255, 255, 255) !important; -} - -.md-search__form { - background-color: rgba(255, 255, 255, 0.2); -} - -.md-search__input { - color: #222222 !important; -} - -.md-header__topic { - color: rgb(255, 255, 255); - font-size: 1.4em; -} - -/* Increase the size of the logo */ -.md-header__button.md-logo img, -.md-header__button.md-logo svg { - height: 2rem !important; -} - -/* Reduce the margin around the logo */ -.md-header__button.md-logo { - margin: 0.4em; - padding: 0.4em; -} - -/* Remove the `In` and `Out` block in rendered Jupyter notebooks */ -.md-container .jp-Cell-outputWrapper .jp-OutputPrompt.jp-OutputArea-prompt, -.md-container .jp-Cell-inputWrapper .jp-InputPrompt.jp-InputArea-prompt { - display: none !important; -} diff --git a/1.4.0/api/medchem.alerts.html b/1.4.0/api/medchem.alerts.html deleted file mode 100644 index 8ec8119..0000000 --- a/1.4.0/api/medchem.alerts.html +++ /dev/null @@ -1,1204 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - medchem.alerts - medchem - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

medchem.alerts

- - -
- - - -

- medchem.alerts - - -

- -
- - - -
- - - - - - - - -
- - - -

- AlertFilters - - -

- - -
- - -

Filtering class for building a library based on a list of structural alerts

-

To list the available alerts, use the list_default_available_alerts method.

- - - - - -
- - - - - - - - - -
- - - -
-__call__(mols, n_jobs=None, progress=False, include_all_alerts=False) - -
- - -
- -

Run alert evaluation on this list of molecule and return the full dataframe

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - Iterable[Union[str, rdchem.Mol]] - -
-

input list of molecules

-
-
- required -
n_jobs - Optional[int] - -
-

number of jobs

-
-
- None -
progress - bool - -
-

whether to show progress or not

-
-
- False -
include_all_alerts - bool - -
-

whether to include all of the alerts that match as columns

-
-
- False -
- -
- -
- -
- - - -
-__init__(alerts_set=None, alerts_db=None) - -
- - -
- -

Filtering molecules based on chemical alerts

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
alerts_set - Union[str, List[str]] - -
-

Alerts catalog to use. Default is BMS+Dundee+Glaxo

-
-
- None -
alerts_db - Optional[os.PathLike] - -
-

Alerts file to use. Default is internal

-
-
- None -
- -
- -
- -
- - - -
-evaluate(mol) - -
- - -
- -

Evaluate structure alerts on a molecule

- -

Parameters:

- - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[str, rdchem.Mol] - -
-

input molecule

-
-
- required -
- -

Returns:

- - - - - - - - - - - - - -
TypeDescription
- -
-

list of alerts matched

-
-
- -
- -
- -
- - - -
-list_default_available_alerts() - - - cached - staticmethod - - -
- - -
- -

Return a list of unique rule set names

- -
- -
- - - -
- -
- -
- -
- - - -

- NovartisFilters - - -

- - -
- - -

Filtering class for building a screening deck following the novartis filtering process -published in https://dx.doi.org/10.1021/acs.jmedchem.0c01332.

-

The output of the filter are explained below: -- status: one of ["Exclude", "Flag", "Annotations", "Ok"] (ordered by quality). - Generally, you can keep anything without the "Exclude" label, as long as you also apply - a maximum severity score for compounds that collects too many flags. -- covalent: number of potentially covalent motifs contained in the compound -- severity: how severe are the issues with the molecules: - - 0: compound has no flags, might have annotations; - - 1-9: number of flags the compound raises; - - >= 10: default exclusion criterion used in the paper -- special_mol: whether the compound/parts of the compound belongs to a special class of molecules - (e.g peptides, glycosides, fatty acid). In that case, you should review the rejection reasons.

- - - - - -
- - - - - - - - - -
- - - -
-__call__(mols, n_jobs=None, progress=False) - -
- - -
- -

Run alert evaluation on this list of molecule and return the full dataframe

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - Iterable[Union[str, rdchem.Mol]] - -
-

input list of molecules

-
-
- required -
n_jobs - Optional[int] - -
-

number of jobs

-
-
- None -
progress - bool - -
-

whether to show progress or not

-
-
- False -
- -
- -
- - - -
- -
- -
- - - - -
- -
- -
- - - - - - -
-
- - -
- -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/1.4.0/api/medchem.catalog.html b/1.4.0/api/medchem.catalog.html deleted file mode 100644 index 93bf1bf..0000000 --- a/1.4.0/api/medchem.catalog.html +++ /dev/null @@ -1,1402 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - medchem.catalog - medchem - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

medchem.catalog

- - -
- - - -

- medchem.catalog - - -

- -
- - - -
- - - - - - - - -
- - - -

- NamedCatalogs - - -

- - -
- - -

Holder for substructure matching catalogs

- - - - - -
- - - - - - - - - -
- - - -
-alerts(subset=None) - - - staticmethod - - -
- - -
- -

Alerts filter catalogs commonly used in molecule filtering

- -

Parameters:

- - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
subset - Optional[Union[List[str], str]] - -
-

subset of providers to consider

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
catalog - FilterCatalog - -
-

filter catalog

-
-
- -
- -
- -
- - - -
-bredt() - - - cached - staticmethod - - -
- - -
- -

Bredt fitler rules -Also see example of usage by surge's -https://github.com/StructureGenerator/SURGE/blob/main/doc/surge1_0.pdf

- -
- -
- -
- - - -
-chemical_groups(filters='medicinal') - - - cached - staticmethod - - -
- - -
- -

Chemical group filter catalogs

- -

Parameters:

- - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
filters - Union[str, List[str]] - -
-

list of tag to filter the catalog on.

-
-
- 'medicinal' -
- -
- -
- -
- - - -
-nibr() - - - cached - staticmethod - - -
- - -
- -

Catalog from NIBR

-
-

Warning

-

This includes all the compounds in the catalog, regardless of severity (FLAG, EXCLUDE, ANNOTATION) -You likely don't want to use this for blind prioritization

-
- -
- -
- -
- - - -
-tox(pains_a=True, pains_b=True, pains_c=False, brenk=True, nih=False, zinc=False) - - - cached - staticmethod - - -
- - -
- -

Common toxicity and interference catalog

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
pains_a - bool - -
-

whether to include PAINS filters from assay A

-
-
- True -
pains_b - bool - -
-

whether to include PAINS filters from assay B

-
-
- True -
pains_c - bool - -
-

whether to include PAINS filters from assay C

-
-
- False -
brenk - bool - -
-

whether to include BRENK filters

-
-
- True -
nih - bool - -
-

whether to include NIH filters

-
-
- False -
zinc - bool - -
-

whether to include ZINC filters

-
-
- False -
- -
- -
- -
- - - -
-unstable_graph(max_severity=5) - - - cached - staticmethod - - -
- - -
- -

Unstable molecular graph to filter out especially for generative models

- -

Parameters:

- - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
max_severity - int - -
-

maximum severity to consider for graph rules to be acceptable

-
-
- 5 -
- -
- -
- - - -
- -
- -
- - -
- - - -

-from_smarts(smarts, labels=None, mincounts=None, maxcounts=None, entry_as_inds=False) - -

- - -
- -

Load catalog from a list of smarts

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
smarts - List[str] - -
-

list of input smarts to add to the catalog

-
-
- required -
labels - Optional[List[str]] - -
-

list of label for each smarts

-
-
- None -
mincounts - Optional[List[int]] - -
-

minimum count before a match is recognized

-
-
- None -
maxcounts - Optional[List[int]] - -
-

maximum count for a match to be valid

-
-
- None -
entry_as_inds - bool - -
-

whether to use index for entry id or the label

-
-
- False -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
catalog - FilterCatalog - -
-

merged catalogs

-
-
- -
- -
- -
- - - -

-list_named_catalogs() - -

- - -
- -

List all available named catalogs. This list will ignore all chemical groups -For a list of chemical group to be queried using NamedCatalog.chemical_groups, use medchem.group.list_default_chemical_groups

- -
- -
- -
- - - -

-merge_catalogs(*catalogs) - -

- - -
- -

Merge several catalogs into a single one

- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
catalog - FilterCatalog - -
-

merged catalog

-
-
- -
- -
- - - -
- -
- -
- - - - - - -
-
- - -
- -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/1.4.0/api/medchem.complexity.html b/1.4.0/api/medchem.complexity.html deleted file mode 100644 index 70abca0..0000000 --- a/1.4.0/api/medchem.complexity.html +++ /dev/null @@ -1,1443 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - medchem.complexity - medchem - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

medchem.rules

- - -
- - - -

- medchem.complexity.complexity_filter - - -

- -
- - - -
- - - - - - - - -
- - - -

- ComplexityFilter - - -

- - -
- - -

Complexity filters derived from nonpher: -https://github.com/lich-uct/nonpher/blob/master/nonpher/nonpher.py

-

To recover the original complexity score, use threshold_stats_file = "zinc_12". -The threshold have been re-calculated using the original new zinc-15 and focusing only on -commercially available compounds.

- - - - - -
- - - - - - - - - -
- - - -
-__call__(mol) - -
- - -
- -

Check whether the input structure is too complex given this instance of the complexity filter -Return False is the molecule is too complex, else True

- -

Parameters:

- - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - dm.Mol - -
-

input molecule

-
-
- required -
- -
- -
- -
- - - -
-__init__(limit='99', complexity_metric='bertz', threshold_stats_file='zinc_15_available') - -
- - -
- -

Default complexity limit is set on at least 1 exceeding metric on the 999th permille level

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
limit - str - -
-

The complexity percentile outlier limit to be used (should be expressed as an integer)

-
-
- '99' -
complexity_metric - str - -
-

The complexity filter name to be used. -Use ComplexityFilter.list_default_available_filters to list default filters. -The following complexity metrics are supported by default -* "bertz": bertz complexity index -* "sas": synthetic accessibility score (zinc_15_available only) -* "qed": qed score (zinc_15_available only) -* "clogp": clogp for how greasy a molecule is compared to other in the same mw range (zinc_15_available only) -* "whitlock": whitlock complexity index -* "barone": barone complexity index -* "smcm": synthetic and molecular complexity -* "twc": total walk count complexity (zinc_15_available only)

-
-
- 'bertz' -
threshold_stats_file - Optional[str] - -
-

The path to or type the threshold file to be used. -The default available threshold stats files are -* "zinc_12" -* "zinc_15_available"

-
-
- 'zinc_15_available' -
- -
- -
- -
- - - -
-list_default_available_filters() - - - classmethod - - -
- - -
- -

Return a list of unique filter names

- -
- -
- -
- - - -
-list_default_percentile(threshold_stats_file=None) - - - cached - classmethod - - -
- - -
- -

Return the default percentile list for the threshold file

- -
- -
- -
- - - -
-load_threshold_stats_file(path=None) - - - classmethod - - -
- - -
- -

Load threshold file to compute the percentille depending on the MW for each complexity_metric

- -

Parameters:

- - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
path - Optional[str] - -
-

path to the threshold file

-
-
- None -
- -
- -
- - - -
- -
- -
- - - - -
- -
- -

- - -
- - - -

- medchem.complexity._complexity_calc - - -

- -
- -

Complexity filters as implemented in nonpher -https://github.com/lich-uct/nonpher/blob/master/nonpher/complex_lib.py

- - - -
- - - - - - - - - -
- - - -

-BaroneCT(mol, chiral=False) - -

- - -
- -

Compute a Barone complexity measure for a molecule as described in:

-

R. Barone and M. Chanon, J. Chem. Inf. Comput. Sci., 2001, 41 (2), pp 269–272 -Qi Huang, Lin-LiLi, Sheng-Yong Yang, J. Mol. Graph. Model. 2010, 28 (8), pp 775–787

-

Parameter values are hardcoded as in the articles. -On zinc 15 commercially available dataset, the range of this score is [30, 4266] with a median of 538

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - dm.Mol - -
-

The input molecule.

-
-
- required -
chiral - bool - -
-

Whether to include chirality in the calculation.

-
-
- False -
- -
- -
- -
- - - -

-SMCM(mol) - -

- - -
- -

Compute synthetic and molecular complexity as described in:

-

TK Allu, TI Oprea, J. Chem. Inf. Model. 2005, 45(5), pp. 1237-1243. -https://sci-hub.ee/10.1021/ci0501387

-

On zinc 15 commercially available dataset, the range of this score is [1.93, 192.00] with a median of 42.23

- -

Parameters:

- - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - dm.Mol - -
-

the input molecule

-
-
- required -
- -
- -
- -
- - - -

-TWC(mol, log10=True) - -

- - -
- -

Compute total walk count in a molecules as proxy for complexity. This score is described in: -twc = 1/2 sum(k=1..n-1,sum(i=atoms,awc(k,i))) -Gerta Rucker and Christoph Rucker, J. Chem. Inf. Comput. Sci. 1993, 33, 683-695

-

On zinc 15 commercially available dataset, the range of this score is [1.20, 39.08] with a median of 10.65

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - -
-

the input molecule

-
-
- required -
log10 - bool - -
-

whether to return the log10 of the values

-
-
- True -
- -
- -
- -
- - - -

-WhitlockCT(mol, ringval=4, unsatval=2, heteroval=1, chiralval=2) - -

- - -
- -

A chemically intuitive measure for molecular complexity. This complexity measure -has been described in : H. W. Whitlock, J. Org. Chem., 1998, 63, 7982-7989. -Benzyls, fenyls, etc. are not treated at all.

-

On zinc 15 commercially available dataset, the range of this score is [0, 172] with a median of 25

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - -
-

The input molecule.

-
-
- required -
ringval - float - -
-

The contribution of rings

-
-
- 4 -
unsatval - float - -
-

The contribution of the unsaturated bond.

-
-
- 2 -
heteroval - float - -
-

The contribution of the heteroatom.

-
-
- 1 -
chiralval - float - -
-

The contribution of the chiral center.

-
-
- 2 -
- -
- -
- - - -
- -
- -
- - - - - - -
-
- - -
- -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/1.4.0/api/medchem.demerits.html b/1.4.0/api/medchem.demerits.html deleted file mode 100644 index 82c12c4..0000000 --- a/1.4.0/api/medchem.demerits.html +++ /dev/null @@ -1,989 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - medchem.demerits - medchem - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

medchem.demerits

- - -
- - - -

- medchem.demerits - - -

- -
- - - -
- - - - - - - - - -
- - - -

-batch_score(smiles_list, n_jobs=None, batch_size=5000, progress=False, **run_options) - -

- - -
- -

Run scorer on input smile list in batch

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
smiles_list - List - -
-

list of smiles

-
-
- required -
n_jobs - Optional[int] - -
-

Number of jobs to run in parallel.

-
-
- None -
batch_size - Optional[int] - -
-

Optional batch_size to run the the scoring in parallels.

-
-
- 5000 -
progress - bool - -
-

Whether to show progress bar.

-
-
- False -
run_options - -
-

Run options to pass to the underlining score function

-
-
- {} -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
out_df - pd.DataFrame - -
-

Dataframe containing the smiles and computed properties: -(rejected, demerit_score, reason, step)

-
-
- -
- -
- -
- - - -

-run_cmd(cmd, shell=False) - -

- - -
- -

Run command

- -
- -
- -
- - - -

-score(smiles_list, mc_first_pass_options='', iwd_options='', stop_after_step=3, **run_options) - -

- - -
- -

Run scorer on input smile list:

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
smiles_list - List - -
-

list of smiles

-
-
- required -
mc_first_pass_options - Optional[str] - -
-

Initial options to pass to mc_first_pass

-
-
- '' -
iwd_options - Optional[str] - -
-

Initial options to pass to iwdemerit

-
-
- '' -
stop_after_step - Optional[int] - -
-

Where to stop in the pipeline. Don't change this if you don't know.

-
-
- 3 -
run_options - -
-

Additional option to run the pipeline

-
-
- {} -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
out_df - pd.DataFrame - -
-

Dataframe containing the smiles and computed properties: -(rejected, demerit_score, reason, step)

-
-
- -
- -
- - - -
- -
- -
- - - - - - -
-
- - -
- -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/1.4.0/api/medchem.filter.html b/1.4.0/api/medchem.filter.html deleted file mode 100644 index cb389e2..0000000 --- a/1.4.0/api/medchem.filter.html +++ /dev/null @@ -1,3427 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - medchem.filter - medchem - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - - - - - -
-
- - - - - - - -

medchem.filter

- - -
- - - -

- medchem.filter.lead - - -

- -
- - - -
- - - - - - - - - -
- - - -

-alert_filter(mols, alerts, alerts_db=None, n_jobs=1, rule_dict=None, return_idx=False) - -

- - -
- -

Filter a dataset of molecules, based on common structural alerts and specific rules.

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - Iterable[Union[str, dm.Mol]] - -
-

List of molecules to filter

-
-
- required -
alerts - List[str] - -
-

List of alert collections to screen for. See AlertFilters.list_default_available_alerts()

-
-
- required -
alerts_db - Optional[os.PathLike] - -
-

Path to the alert file name. -The internal default file (alerts.csv) will be used if not provided

-
-
- None -
n_jobs - Optional[int] - -
-

Number of cpu to use

-
-
- 1 -
rule_dict - Dict - -
-

Dictionary with additional rules to apply during the filtering. -For example, such dictionary for drug-like compounds would look like this:

-
-
-
-

rule_dict - {"MW": [0, 500], "LogP": [-0.5, 5], "HBD": [0, 5], "HBA": [0, 10], "TPSA": [0, 150]}

-
-
-
-
-
- None -
return_idx - bool - -
-

Whether to return the filtered index

-
-
- False -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
filtered_mask - -
-

boolean array (or index array) where true means -the molecule IS OK (not found in the alert catalog).

-
-
- -
- -
- -
- - - -

-bredt_filter(mols, return_idx=False, n_jobs=None, progress=False, scheduler='threads', batch_size=100) - -

- - -
- -

Filter a list of compounds according to Bredt's rules -https://en.wikipedia.org/wiki/Bredt%27s_rule

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - Sequence[Union[str, dm.Mol]] - -
-

list of input molecules

-
-
- required -
return_idx - bool - -
-

whether to return index or a boolean mask

-
-
- False -
n_jobs - Optional[int] - -
-

number of parallel job to run. Sequential by default

-
-
- None -
progress - bool - -
-

whether to show progress bar

-
-
- False -
scheduler - str - -
-

joblib scheduler to use

-
-
- 'threads' -
batch_size - int - -
-

batch size for parallel processing. Note that batch_size should be -increased if the number of used CPUs gets very large.

-
-
- 100 -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
filtered_mask - -
-

boolean array (or index array) where true means the molecule is not toxic.

-
-
- -
- -
- -
- - - -

-catalog_filter(mols, catalogs, return_idx=False, n_jobs=None, progress=False, scheduler='processes', batch_size=100) - -

- - -
- -

Filter a list of compounds according to catalog of structures alerts and patterns

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - Sequence[Union[str, dm.Mol]] - -
-

list of input molecules

-
-
- required -
catalogs - List[Union[str, FilterCatalog]] - -
-

list of catalogs (name or FilterCatalog)

-
-
- required -
return_idx - bool - -
-

whether to return index or a boolean mask

-
-
- False -
n_jobs - Optional[int] - -
-

number of parallel job to run. Sequential by default

-
-
- None -
progress - bool - -
-

whether to show progress bar

-
-
- False -
scheduler - str - -
-

joblib scheduler to use

-
-
- 'processes' -
batch_size - int - -
-

batch size for parallel processing. Note that batch_size should be -increased if the number of used CPUs gets very large.

-
-
- 100 -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
filtered_mask - -
-

boolean array (or index array) where true means the molecule is not found in the catalog.

-
-
- -
- -
- -
- - - -

-chemical_group_filter(mols, chemical_group, return_idx=False, n_jobs=None, progress=False, scheduler='threads') - -

- - -
- -

Filter a list of compounds according to a chemical group instance.

-
-

Note

-

This function will return the list of molecules that DO NOT match the chemical group

-
- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - Iterable[Union[str, dm.Mol]] - -
-

list of input molecules

-
-
- required -
chemical_group - ChemicalGroup - -
-

a chemical group instance with the required functional groups to use.

-
-
- required -
return_idx - bool - -
-

whether to return index or a boolean mask

-
-
- False -
n_jobs - Optional[int] - -
-

number of parallel job to run. Sequential by default

-
-
- None -
progress - bool - -
-

whether to show progress bar

-
-
- False -
scheduler - str - -
-

joblib scheduler to use

-
-
- 'threads' -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
filtered_mask - -
-

boolean array (or index array) where true means the molecule DOES NOT MATCH the groups.

-
-
- -
- -
- -
- - - -

-complexity_filter(mols, complexity_metric='bertz', threshold_stats_file='zinc_15_available', limit='99', return_idx=False, n_jobs=None, progress=False, scheduler='processes') - -

- - -
- -

Filter a list of compounds according to a chemical group instance

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - Iterable[Union[str, dm.Mol]] - -
-

list of input molecules

-
-
- required -
complexity_metric - str - -
-

complexity metric to use -Use ComplexityFilter.list_default_available_filters to list default filters. -The following complexity metrics are supported by default -* "bertz": bertz complexity index -* "sas": synthetic accessibility score (zinc_15_available only) -* "qed": qed score (zinc_15_available only) -* "clogp": clogp for how greasy a molecule is compared to other in the same mw range (zinc_15_available only) -* "whitlock": whitlock complexity index -* "barone": barone complexity index -* "smcm": synthetic and molecular complexity -* "twc": total walk count complexity (zinc_15_available only)

-
-
- 'bertz' -
threshold_stats_file - str - -
-

complexity threshold statistic origin to use

-
-
- 'zinc_15_available' -
limit - str - -
-

complexity outlier percentile to use

-
-
- '99' -
return_idx - bool - -
-

whether to return index or a boolean mask

-
-
- False -
n_jobs - Optional[int] - -
-

number of parallel job to run. Sequential by default

-
-
- None -
progress - bool - -
-

whether to show progress bar

-
-
- False -
scheduler - str - -
-

joblib scheduler to use

-
-
- 'processes' -
- -
- Also see -

medchem.complexity.ComplexityFilter

-
-

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
filtered_mask - -
-

boolean array (or index array) where true means the molecule MATCH the rules.

-
-
- -
- -
- -
- - - -

-lilly_demerit_filter(smiles, max_demerits=160, return_idx=False, n_jobs=None, progress=False, **kwargs) - -

- - -
- -

Run Lilly demerit filtering on current list of molecules

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
smiles - Iterable[str] - -
-

list of input molecules as smiles preferably

-
-
- required -
max_demerits - Optional[int] - -
-

Cutoff to reject molecules Defaults to 160.

-
-
- 160 -
return_idx - bool - -
-

whether to return a mask or a list of valid indexes

-
-
- False -
progress - bool - -
-

whether to show progress bar

-
-
- False -
kwargs - -
-

parameters specific to the demerits.score function

-
-
- {} -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
filtered_mask - -
-

boolean array (or index array) where true means the molecule is ok.

-
-
- -
- -
- -
- - - -

-molecular_graph_filter(mols, max_severity=5, return_idx=False, n_jobs=None, progress=False, scheduler='threads') - -

- - -
- -

Filter a list of compounds according to unstable molecular graph filter list.

-

This list was obtained from observation around The disallowed graphs are:

-
    -
  • K3,3 or K2,4 structure
  • -
  • Cone of P4 or K4 with 3-ear
  • -
  • Node in more than one ring of length 3 or 4
  • -
- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - Iterable[Union[str, dm.Mol]] - -
-

list of input molecules

-
-
- required -
max_severity - int - -
-

maximum acceptable severity (1-10). Default is <5

-
-
- 5 -
return_idx - bool - -
-

whether to return index or a boolean mask

-
-
- False -
n_jobs - Optional[int] - -
-

number of parallel job to run. Sequential by default

-
-
- None -
progress - bool - -
-

whether to show progress bar

-
-
- False -
scheduler - str - -
-

joblib scheduler to use

-
-
- 'threads' -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
filtered_mask - -
-

boolean array (or index array) where true means the molecule is not toxic.

-
-
- -
- -
- -
- - - -

-protecting_groups_filter(mols, return_idx=False, protecting_groups=['fmoc', 'tert-butoxymethyl', 'tert-butyl carbamate', 'tert-butyloxycarbonyl'], n_jobs=None, progress=False, scheduler='threads') - -

- - -
- -

Filter a list of compounds according to match to known protecting groups. -Note that is a syntaxic sugar for calling chemical_group_filter with the protecting groups subset

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - Iterable[Union[str, dm.Mol]] - -
-

list of input molecules

-
-
- required -
protecting_groups - str - -
-

type of protection group to consider if not provided, will use all (not advised)

-
-
- ['fmoc', 'tert-butoxymethyl', 'tert-butyl carbamate', 'tert-butyloxycarbonyl'] -
return_idx - bool - -
-

whether to return index or a boolean mask

-
-
- False -
n_jobs - Optional[int] - -
-

number of parallel job to run. Sequential by default

-
-
- None -
progress - bool - -
-

whether to show progress bar

-
-
- False -
scheduler - str - -
-

joblib scheduler to use

-
-
- 'threads' -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
filtered_mask - -
-

boolean array (or index array) where true means the molecule DOES NOT MATCH the groups.

-
-
- -
- -
- -
- - - -

-rules_filter(mols, rules, return_idx=False, n_jobs=None, progress=False, scheduler='processes') - -

- - -
- -

Filter a list of compounds according to a predefined set of rules

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - Iterable[Union[str, dm.Mol]] - -
-

list of input molecules

-
-
- required -
rules - Union[List[Any], RuleFilters] - -
-

list of rules to apply to the input molecules.

-
-
- required -
return_idx - bool - -
-

whether to return index or a boolean mask

-
-
- False -
n_jobs - Optional[int] - -
-

number of parallel job to run. Sequential by default

-
-
- None -
progress - bool - -
-

whether to show progress bar

-
-
- False -
scheduler - str - -
-

joblib scheduler to use

-
-
- 'processes' -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
filtered_mask - -
-

boolean array (or index array) where true means the molecule MATCH the rules.

-
-
- -
- -
- -
- - - -

-screening_filter(mols, n_jobs=None, max_severity=10, return_idx=False) - -

- - -
- -

Filter a set of molecules based on novartis screening deck curation process -Schuffenhauer, A. et al. Evolution of Novartis' small molecule screening deck design, J. Med. Chem. (2020) -DOI. https://dx.doi.org/10.1021/acs.jmedchem.0c01332

-
-

Note

-

The severity argument corresponds to the accumulated severity for a compounds accross all pattern in the -catalog.

-
- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - Iterable[Union[str, dm.Mol]] - -
-

list of input molecules

-
-
- required -
n_jobs - Optional[int] - -
-

number of parallel job to run. Sequential by default

-
-
- None -
max_severity - int - -
-

maximum severity allowed. Default is <10

-
-
- 10 -
return_idx - bool - -
-

Whether to return the filtered index

-
-
- False -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
filtered_mask - -
-

boolean array (or index array) where true means the molecule -IS NOT REJECTED (i.e not found in the alert catalog).

-
-
- -
- -
- - - -
- -
- -

- - -
- - - -

- medchem.filter.generic - - -

- -
- - - -
- - - - - - - - - -
- - - -

-atom_list_filter(mols, unwanted_atom_list=None, wanted_atom_list=None, return_idx=False, n_jobs=None, progress=False, scheduler=None) - -

- - -
- -

Find molecule without any atom from a set of unwanted atom symbols -and with all atoms in the set of desirable atom list

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - Iterable[Union[str, dm.Mol]] - -
-

list of input molecules

-
-
- required -
unwanted_atom_list - Optional[Iterable] - -
-

list of undesirable atom symbol

-
-
- None -
wanted_atom_list - Optional[Iterable] - -
-

list of desirable atom symbol

-
-
- None -
return_idx - bool - -
-

whether to return index or a boolean mask

-
-
- False -
n_jobs - Optional[int] - -
-

number of parallel job to run. Sequential by default

-
-
- None -
progress - bool - -
-

whether to show progress bar

-
-
- False -
scheduler - Optional[str] - -
-

joblib scheduler to use

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
filtered_mask - -
-

boolean array (or index array) where true means the molecule is ok.

-
-
- -
- -
- -
- - - -

-halogenicity_filter(mols, thresh_F=6, thresh_Br=3, thresh_Cl=3, return_idx=False, n_jobs=None, progress=False, scheduler=None) - -

- - -
- -

Find molecule that do not exceed halogen threshold. These thresholds are:

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - Iterable[Union[str, dm.Mol]] - -
-

list of input molecules

-
-
- required -
thresh_F - int - -
-

maximum number of fluorine

-
-
- 6 -
thresh_Br - int - -
-

maximum number of bromine

-
-
- 3 -
thresh_Cl - int - -
-

maximum number of chlorine

-
-
- 3 -
return_idx - bool - -
-

whether to return index or a boolean mask

-
-
- False -
n_jobs - Optional[int] - -
-

number of parallel job to run. Sequential by default

-
-
- None -
progress - bool - -
-

whether to show progress bar

-
-
- False -
scheduler - Optional[str] - -
-

joblib scheduler to use

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
filtered_mask - -
-

boolean array (or index array) where true means the molecule is ok.

-
-
- -
- -
- -
- - - -

-macrocycle_filter(mols, max_cycle_size=10, return_idx=False, n_jobs=None, progress=False, scheduler=None) - -

- - -
- -

Find molecules that do not infringe the strict maximum cycle size.

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - Iterable[Union[str, dm.Mol]] - -
-

list of input molecules

-
-
- required -
max_cycle_size - int - -
-

strict maximum macrocycle size

-
-
- 10 -
return_idx - bool - -
-

whether to return index or a boolean mask

-
-
- False -
n_jobs - Optional[int] - -
-

number of parallel job to run. Sequential by default

-
-
- None -
progress - bool - -
-

whether to show progress bar

-
-
- False -
scheduler - Optional[str] - -
-

joblib scheduler to use

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
filtered_mask - -
-

boolean array (or index array) where true means the molecule is ok.

-
-
- -
- -
- -
- - - -

-num_atom_filter(mols, min_atoms=None, max_atoms=None, return_idx=False, n_jobs=None, progress=False, scheduler=None) - -

- - -
- -

Find a molecule that match the atom number constraints -Returning True means the molecule is fine

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - Iterable[Union[str, dm.Mol]] - -
-

list of input molecules

-
-
- required -
min_atoms - Optional[int] - -
-

strict minimum number of atoms (atoms > min_atoms)

-
-
- None -
max_atoms - Optional[int] - -
-

strict maximum number of atoms (atoms < max_atoms)

-
-
- None -
return_idx - bool - -
-

whether to return index or a boolean mask

-
-
- False -
n_jobs - Optional[int] - -
-

number of parallel job to run. Sequential by default

-
-
- None -
progress - bool - -
-

whether to show progress bar

-
-
- False -
scheduler - Optional[str] - -
-

joblib scheduler to use

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
filtered_mask - -
-

boolean array (or index array) where true means the molecule is ok.

-
-
- -
- -
- -
- - - -

-num_stereo_center_filter(mols, max_stereo_centers=4, max_undefined_stereo_centers=2, return_idx=False, n_jobs=None, progress=False, scheduler=None) - -

- - -
- -

Find a molecule that match the number of stereo center constraints. -Returning True means the molecule is fine

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - Iterable[Union[str, dm.Mol]] - -
-

list of input molecules

-
-
- required -
max_stereo_center - -
-

strict maximum number of stereo centers (<). Default is 4

-
-
- required -
max_undefined_stereo_centers - Optional[int] - -
-

strict maximum number of undefined stereo centers (<). Default is 2

-
-
- 2 -
return_idx - bool - -
-

whether to return index or a boolean mask

-
-
- False -
n_jobs - Optional[int] - -
-

number of parallel job to run. Sequential by default

-
-
- None -
progress - bool - -
-

whether to show progress bar

-
-
- False -
scheduler - Optional[str] - -
-

joblib scheduler to use

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
filtered_mask - -
-

boolean array (or index array) where true means the molecule is ok.

-
-
- -
- -
- -
- - - -

-ring_infraction_filter(mols, hetcycle_min_size=4, return_idx=False, n_jobs=None, progress=False, scheduler=None) - -

- - -
- -

Find molecules that have a ring infraction filter. -Returning True means the molecule is fine

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - Iterable[Union[str, dm.Mol]] - -
-

list of input molecules

-
-
- required -
hetcycle_min_size - int - -
-

Minimum ring size before more than 1 hetero atom or any non single bond is allowed. -This is a strict threshold (>)

-
-
- 4 -
return_idx - bool - -
-

whether to return index or a boolean mask

-
-
- False -
n_jobs - Optional[int] - -
-

number of parallel job to run. Sequential by default

-
-
- None -
progress - bool - -
-

whether to show progress bar

-
-
- False -
scheduler - Optional[str] - -
-

joblib scheduler to use

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
filtered_mask - -
-

boolean array (or index array) where true means the molecule is ok.

-
-
- -
- -
- -
- - - -

-symmetry_filter(mols, symmetry_threshold=0.8, return_idx=False, n_jobs=None, progress=False, scheduler=None) - -

- - -
- -

Find molecules that are not symmetrical, given a symmetry threshold

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - Iterable[Union[str, dm.Mol]] - -
-

list of input molecules

-
-
- required -
symmetry_threshold - float - -
-

threshold to consider a molecule highly symmetrical

-
-
- 0.8 -
return_idx - bool - -
-

whether to return index or a boolean mask

-
-
- False -
n_jobs - Optional[int] - -
-

number of parallel job to run. Sequential by default

-
-
- None -
progress - bool - -
-

whether to show progress bar

-
-
- False -
scheduler - Optional[str] - -
-

joblib scheduler to use

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
filtered_mask - -
-

boolean array (or index array) where true means the molecule is ok.

-
-
- -
- -
- - - -
- -
- -
- - - - - - -
-
- - -
- -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/1.4.0/api/medchem.groups.html b/1.4.0/api/medchem.groups.html deleted file mode 100644 index 4de67a7..0000000 --- a/1.4.0/api/medchem.groups.html +++ /dev/null @@ -1,1530 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - medchem.groups - medchem - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

medchem.groups

- - -
- - - -

- medchem.groups - - -

- -
- - - -
- - - - - - - - -
- - - -

- ChemicalGroup - - -

- - -
- - -

Build a library of chemical groups using a list of structures parsed from a file

-

The default library of structure has been curated from https://github.com/Sulstice/global-chem and additional open source data.

-
-

Note

-

For new chemical groups, please minimally provide the 'smiles'/'smarts', 'name' and "group" and optional 'hierarchy' columns

-
-
-

Warning

-

The SMILES and SMARTS used in the default list of substructures do not result in the same matches. -Unless specified otherwise, the SMILES will be used in the matching done by this class, -whereas due to RDKit's limitation, the SMARTS will be used in the matching done by the generated catalog. -For more information see this discussion: https://github.com/valence-platform/medchem/pull/19,

-
- - - - - -
- - - - - - - -
- - - -
-dataframe - - - property - - -
- - -
- -

Get the dataframe of the chemical groups

-
- -
- -
- - - -
-mol_smarts - - - property - - -
- - -
- -

Get the SMARTS of the chemical groups in this instance

-
- -
- -
- - - -
-mols - - - property - - -
- - -
- -

Get the Molecule object of the SMILES for the chemical groups in this instance

-
- -
- -
- - - -
-name - - - property - - -
- - -
- -

Get the Name of the chemical groups in this instance

-
- -
- -
- - - -
-smarts - - - property - - -
- - -
- -

Get the SMARTS of the chemical groups in this instance

-
- -
- -
- - - -
-smiles - - - property - - -
- - -
- -

Get the SMILES of the chemical groups in this instance

-
- -
- - - -
- - - -
-__init__(groups=None, n_jobs=None, groups_db=None) - -
- - -
- -

Build a chemical group library

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
groups - Union[str, List[str]] - -
-

List of groups to use. Defaults to None where all functional groups are used

-
-
- None -
n_jobs - Optional[int] - -
-

Optional number of jobs to run in parallel for internally building the data. Defaults to None.

-
-
- None -
groups_db - Optional[os.PathLike] - -
-

Path to a file containing the dump of the chemical groups. Defaults is internal dataset

-
-
- None -
- -
- -
- -
- - - -
-filter(names, fuzzy=False) - -
- - -
- -

Filter the group to restrict to only the name in input

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
names - List[str] - -
-

list of names to use for filters

-
-
- required -
fuzzy - bool - -
-

whether to use exact of fuzzy matching

-
-
- False -
- -
- -
- -
- - - -
-get_catalog() - - - cached - - -
- - -
- -

Build an rdkit catalog from the current chemical group data

- -
- -
- -
- - - -
-get_matches(mol, use_smiles=True) - -
- - -
- -

Get all the functional groups in this instance that matches the input molecule

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
use_smiles - bool - -
-

whether to use the smiles representation of the catalog or the smarts

-
-
- True -
- -
- -
- -
- - - -
-has_match(mol) - -
- - -
- -

Check whether the input molecule has any functional group in this instance

- -

Parameters:

- - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
- -
- -
- -
- - - -
-list_groups() - -
- - -
- -

List all the chemical groups available

- -
- -
- -
- - - -
-list_hierarchy_groups() - -
- - -
- -

List all the hierarchy in chemical groups available. -To get the full hierarchy on each path, split by the . character.

- -
- -
- - - -
- -
- -
- - -
- - - -

-list_default_chemical_groups(hierachy=False) - -

- - -
- -

List all the chemical groups available.

-
-

Note

-

chemical groups defines how a collection of patterns are organized. -They do not correspond to individual pattern name.

-
- -

Parameters:

- - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
hierarchy - -
-

whether to return the full hierarchy or the group name only

-
-
- required -
- -

Returns:

- - - - - - - - - - - - - -
TypeDescription
- -
-

List of chemical groups

-
-
- -
- -
- -
- - - -

-list_functional_group_names(exclude_basic=True) - -

- - -
- -

List common functional group names

- -

Parameters:

- - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
exclude_basic - bool - -
-

whether to include the basic functional groups

-
-
- True -
- -

Returns:

- - - - - - - - - - - - - -
TypeDescription
- -
-

List of functional group names

-
-
- -
- -
- - - -
- -
- -
- - - - - - -
-
- - -
- -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/1.4.0/api/medchem.query.html b/1.4.0/api/medchem.query.html deleted file mode 100644 index d5d609f..0000000 --- a/1.4.0/api/medchem.query.html +++ /dev/null @@ -1,2791 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - medchem.query - medchem - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - - - - - -
-
- - - - - - - -

medchem.query

-

This module helps build a filter based on a query language that can be parsed. -By default, the default query parser will be used, which contains the following instructions that can be orchestrated using boolean operation (or, and, not and parenthesis)

-

Example

-
import datamol as dm
-from medchem.query.eval import QueryFilter
-
-query = """HASPROP("tpsa" < 120) AND HASSUBSTRUCTURE("[OH]", True)"""
-chemical_filter = QueryFilter(query, parser="lalr")
-mols = dm.data.cdk2().mol[:10]
-chemical_filter(mols, n_jobs=-1) # [False, False, False, False, False, True, True, True, False, False]
-
-

Syntax

-

Any string provided as query argument needs to be quoted (similar to json) to avoid ambiguity in parsing. -* An example of valid query is """(HASPROP("tpsa" > 120 ) | HASSUBSTRUCTURE("c1ccccc1")) AND NOT HASALERT("pains") OR HASSUBSTRUCTURE("[OH]", max, 2)""". -* Examples of invalid queries are - * """HASPROP("tpsa" > 120) OR HASSUBSTRUCTURE("[OH]", True, >, 3)""" : unexpected wrong operator > - * """HASPROP(tpsa > 120)""" : tpsa is not quoted - * """HASPROP("tpsa") > 120""" : this is not part of the language specification - * """(HASPROP("tpsa" > 120) AND HASSUBSTRUCTURE("[OH]", True, max, 3 )""": mismatching parenthesis (

-
    -
  • """HASPROP("tpsa" > 120) OR HASSUBSTRUCTURE("CO")""", """(HASPROP("tpsa" > 120)) OR (HASSUBSTRUCTURE("CO"))""" and """(HASPROP("tpsa" > 120) OR HASSUBSTRUCTURE("CO"))""" are equivalent
  • -
-

HASALERT

-

check whether a molecule has an alert from a catalog -

# alert is one supported alert catalog by `medchem`. For example `pains`
-HASALERT(alert:str) 
-

-

HASGROUP

-

check whether a molecule has a specific functional group from a catalog

-
# group is one supported functional group provided by `medchem`
-HASGROUP(group:str) 
-
-

MATCHRULE

-

check whether a molecule match a predefined druglikeness rule from a catalog -

# rule is one supported rule provided by `medchem`. For example `rule_of_five`
-MATCHRULE(rule:str) 
-

-

HASSUPERSTRUCTURE

-

check whether a molecule has query as superstructure -

# query is a SMILES
-HASSUPERSTRUCTURE(query:str) 
-

-

HASSUBSTRUCTURE

-

Check whether a molecule has query as substructure. -Note that providing the comma separator , is mandatory here as each variable is an argument.

-
# query is a SMILES or a SMARTS, operator is defined below, is_smarts is a boolean
-
-HASSUBSTRUCTURE(query:str, is_smarts:Optional[bool], operator:Optional[str], limit:Optional[int])
-
-# which correspond to setting this default values
-HASSUBSTRUCTURE(query:str, is_smarts=False, operator="min", limit=1)
-# same as
-HASSUBSTRUCTURE(query:str, is_smarts=None, operator=None, limit=None)
-
-

Not providing optional arguments is allowed, but they need to be provided in the exact same order shown above. Thus:

-
    -
  • HASSUBSTRUCTURE("CO")
  • -
  • HASSUBSTRUCTURE("CO", False)
  • -
  • HASSUBSTRUCTURE("CO", False, min)
  • -
  • HASSUBSTRUCTURE("CO", False, min, 1)
  • -
-

are all valid and equivalent (given their default values)

-

Furthermore, since the correct argument map can be inferred when no ambiguity arises, the following are valid but discouraged

-
    -
  • HASSUBSTRUCTURE("CO", False, 1)
  • -
  • HASSUBSTRUCTURE("CO", min, 1)
  • -
-

Whereas, this is invalid: -* HASSUBSTRUCTURE("CO", min, False, 1)

-

HASPROP

-

Check whether a molecule has prop as property within a defined limit. -Any comma , provided between arguments will be ignored

-
# prop is a valid datamol.descriptors property, comparator is a required comparator operator and defined below
-HASPROP(prop:str comparator:str limit:float)
-
-

LIKE

-

Check whether a molecule is similar enough to another molecule. -Any comma , provided between arguments will be ignored

-
# query is a SMILES
-LIKE(query:str  comparator:str limit:float)
-
-

Basic operators:

-
    -
  • comparator: one of = ==, !=, <, >, <=, >=
  • -
  • misc: the following misc values are accepted and parsed true, false, True, False, TRUE, FALSE
  • -
  • operator (can be quoted or unquoted):
  • -
  • MIN: min, MIN
  • -
  • MAX: max, MAX
  • -
  • boolean operator:
  • -
  • AND operator : AND or & or && or and
  • -
  • OR operator : OR or | or || or or
  • -
  • NOT operator : NOT or ! or ~ or not
  • -
-

API

- - -
- - - -

- medchem.query.parser - - -

- -
- - - -
- - - - - - - - -
- - - -

- QueryParser - - -

- - -
-

- Bases: Transformer

- - -

Query parser for the custom query language for molecule. This parses the input language, build a parseable and evaluable representation. -The trick for lazy evaluation is to define custom guard with 'fn(*)' around expression that needs to be evaluated.

-

Note that you SHOULD NOT HAVE TO INTERACT WITH THIS CLASS DIRECTLY.

- -
- Example -
-
-
-

import medchem -import lark -QUERY_GRAMMAR = medchem.utils.loader.get_grammar(as_string=True) -QUERY_PARSER = Lark(QUERY_GRAMMAR, parser="lalr", transformer=QueryParser())

-
see how the string needs to be "quoted". This builds on the json quote requirements to avoid dealing with unwanted outcomes
-

example = """(HASPROP("tpsa" > 120 ) | HASSUBSTRUCTURE("c1ccccc1")) AND NOT HASALERT("pains") OR HASSUBSTRUCTURE("[OH]", max)""" -t = QUERY_PARSER.parse(example) -print(t) -((((fn(getprop, prop='tpsa') > 120.0) or fn(hassubstructure, query='c1ccccc1', operator='None', limit=None, is_smarts=None)) and not fn(hasalert, alert='pains')) or fn(hassubstructure, query='[OH]', operator='max', limit=None, is_smarts=None))

-
-
-
-
- - - - -
- - - - - - - - - -
- - - -
-bool_expr(bool_term, *others) - -
- - -
- -

Define how boolean expressions should be parsed

- -
- -
- -
- - - -
-bool_term(bool_factor, *others) - -
- - -
- -

Define how boolean terms should be parsed

- -
- -
- -
- - - -
-hasalert(value) - -
- - -
- -

Format the hasalert node in the query

-
-

Note

-

The parser does not enforce any validity on the argument and -the underlying function is supposed to handle it.

-
- -
- -
- -
- - - -
-hasgroup(value) - -
- - -
- -

Format the hasgroup node in the query

-
-

Note

-

The parser does not enforce any validity on the argument and -the underlying function is supposed to handle it.

-
- -
- -
- -
- - - -
-hasprop(value, comparator, limit) - -
- - -
- -

Format the hasprop node in the query

-
-

Note

-

The parser does not enforce any validity on the argument and -the underlying function is supposed to handle it.

-
- -
- -
- -
- - - -
-hassubstructure(value, is_smarts, operator, limit) - -
- - -
- -

Format the substructure node in the query

-
-

Note

-

The parser does not enforce any validity on the argument and -the underlying function is supposed to handle it.

-
- -
- -
- -
- - - -
-hassuperstructure(value) - -
- - -
- -

Format the superstructure node in the query

-
-

Note

-

The parser does not enforce any validity on the argument and -the underlying function is supposed to handle it.

-
- -
- -
- -
- - - -
-like(value, comparator, limit) - -
- - -
- -

Format the like node in the query

-
-

Note

-

The parser does not enforce any validity on the argument and -the underlying function is supposed to handle it.

-
- -
- -
- -
- - - -
-matchrule(value) - -
- - -
- -

Format the matchrule node in the query

-
-

Note

-

The parser does not enforce any validity on the argument and -the underlying function is supposed to handle it.

-
- -
- -
- -
- - - -
-not_bool_factor(*args) - -
- - -
- -

Define representation of a negation

- -
- -
- - - -
- -
- -
- - - - -
- -
- -

- - -
- - - -

- medchem.query.eval - - -

- -
- - - -
- - - - - - - - -
- - - -

- QueryFilter - - -

- - -
- - -

Query filtering system based on a custom query grammar

- - - - - -
- - - - - - - - - -
- - - -
-__call__(mols, scheduler='processes', n_jobs=-1, progress=True) - -
- - -
- -

Call the internal chemical filter that has been build

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - List[Union[str, dm.Mol]] - -
-

list of input molecules to filter

-
-
- required -
n_jobs - int - -
-

whether to run job in parallel and number of jobs to consider. Defaults to -1.

-
-
- -1 -
scheduler - -
-

scheduler to use. Defaults to 'processes'.

-
-
- 'processes' -
progress - bool - -
-

whether to show job progress. Defaults to True.

-
-
- True -
- -
- -
- -
- - - -
-__init__(query, grammar=None, parser='lalr') - -
- - -
- -

Constructor for query filtering system

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
query - str - -
-

input unparsed query

-
-
- required -
grammar - Optional[str] - -
-

path to grammar language to use. Defaults to None, which will use the default grammar.

-
-
- None -
parser - str - -
-

which Lark language parser to use. Defaults to "lalr".

-
-
- 'lalr' -
- -
- -
- - - -
- -
- -
- -
- - - -

- QueryOperator - - -

- - -
- - -

A class to hold all the operators that can be used in queries

- - - - - -
- - - - - - - - - -
- - - -
-getprop(mol, prop) - - - staticmethod - - -
- - -
- -

Compute the molecular property if a molecule. -This is an alternative to the hasprop function, that does not enforce any comparison.

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
prop - str - -
-

molecular property to apply as filter on the molecule

-
-
- required -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
property - float - -
-

computed property value

-
-
- -
- -
- -
- - - -
-hasalert(mol, alert) - - - staticmethod - - -
- - -
- -

Check if a molecule match a named alert catalog. -The alert catalog needs to be one supported by the medchem package.

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
alert - str - -
-

named catalog to apply as filter on the molecule

-
-
- required -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
has_alert - bool - -
-

whether the molecule has a given alert

-
-
- -
- -
- -
- - - -
-hasgroup(mol, group) - - - staticmethod - - -
- - -
- -

Check if a molecule has a specific functional group. -Internally, this is done fetching the smarts corresponding to the group -then calling QueryOperator.hassubstructure

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
group - str - -
-

functional group to check on the molecule.

-
-
- required -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
has_group - bool - -
-

whether the molecule has the given functional group

-
-
- -
- -
- -
- - - -
-hasprop(mol, prop, comparator, limit) - - - staticmethod - - -
- - -
- -

Check if a molecule has a molecule property within desired range

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
prop - str - -
-

molecular property to apply as filter on the molecule

-
-
- required -
comparator - Callable - -
-

operator function to apply to check whether the molecule property matches the expected value

-
-
- required -
limit - float - -
-

limit value for determining whether the molecule property is within desired range

-
-
- required -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
has_property - bool - -
-

whether the molecule has a given property within a desired range

-
-
- -
- -
- -
- - - -
-hassubstructure(mol, query, is_smarts=False, operator='min', limit=1) - - - staticmethod - - -
- - -
- -

Check if a molecule has substructure provided by a query

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
query - str - -
-

input smarts query

-
-
- required -
is_smarts - bool - -
-

whether this is a smarts query or not

-
-
- False -
operator - str - -
-

one of min or max to specify the min or max limit

-
-
- 'min' -
limit - int - -
-

limit of substructures to be found

-
-
- 1 -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
has_substructure - bool - -
-

whether the query is a subgraph of the molecule

-
-
- -
- -
- -
- - - -
-hassuperstructure(mol, query) - - - staticmethod - - -
- - -
- -

Check if a molecule has a superstructure defined by a query. -Note that a superstructure cannot be a query (smarts)

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
query - str - -
-

input smarts query

-
-
- required -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
has_superstructure - bool - -
-

whether the molecule is a subgraph of the query

-
-
- -
- -
- -
- - - -
-like(mol, query, comparator, limit) - - - staticmethod - - -
- - -
- -

Check if a molecule is similar or distant enough from another molecule using tanimoto ECFP distance. -and is useful for letting python handles the binary comparison operators.

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
query - Union[dm.Mol, str] - -
-

input molecule to compare with

-
-
- required -
comparator - Callable[[float, float], bool] - -
-

operator function to apply to check whether the molecule property matches the expected value. -Takes computed_similarity and limit as arguments and returns a boolean.

-
-
- required -
limit - float - -
-

limit value for determining whether the molecule property is within desired range

-
-
- required -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
is_similar - bool - -
-

whether the molecule is similar or distant enough from the query

-
-
- -
- -
- -
- - - -
-matchrule(mol, rule) - - - staticmethod - - -
- - -
- -

Check if a molecule match a druglikeness rule

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
rule - str - -
-

druglikeness rule check on the molecule.

-
-
- required -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
match_rule - bool - -
-

whether the molecule match the given rule

-
-
- -
- -
- -
- - - -
-similarity(mol, query) - - - staticmethod - - -
- - -
- -

Compute the ECFP tanimoto similarity between two molecules. -This is an alternative to the like function, that does not enforce any comparison, -and is useful for letting python handles the binary comparison operators.

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
query - Union[dm.Mol, str] - -
-

input query molecule to compute similarity against

-
-
- required -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
similarity - float - -
-

computed similarity value between mol and query

-
-
- -
- -
- - - -
- -
- -
- - - - -
- -
- -
- - - - - - -
-
- - -
- -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/1.4.0/api/medchem.rules.html b/1.4.0/api/medchem.rules.html deleted file mode 100644 index 47a7185..0000000 --- a/1.4.0/api/medchem.rules.html +++ /dev/null @@ -1,4526 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - medchem.rules - medchem - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - - - - - -
-
- - - - - - - -

medchem.rules

- - -
- - - -

- medchem.rules.basic_rules - - -

- -
- - - -
- - - - - - - - - -
- - - -

-rule_of_chemaxon_druglikeness(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, n_rotatable_bonds=None, n_rings=None, **kwargs) - -

- - -
- -

Compute the drug likeness filter according to chemaxon:

-

It computes: MW < 400 & logP < 5 & HBA <= 10 & HBD <= 5 & ROTBONDS < 5 & ring > 0

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
mw - Optional[float] - -
-

precomputed molecular weight. Defaults to None.

-
-
- None -
clogp - Optional[float] - -
-

precomputed cLogP. Defaults to None.

-
-
- None -
n_hba - Optional[float] - -
-

precomputed number of HBA. Defaults to None.

-
-
- None -
n_hbd - Optional[float] - -
-

precomputed number of HBD. Defaults to None.

-
-
- None -
n_rotatable_bonds - Optional[int] - -
-

precomputed number of rotatable bonds in the molecule. Defaults to None.

-
-
- None -
n_rings - Optional[int] - -
-

precomputed number of rings in the molecule. Defaults to None.

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
roc - -
-

True if molecule is compliant, False otherwise

-
-
- -
- -
- -
- - - -

-rule_of_cns(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, **kwargs) - -

- - -
- -

Computes drug likeness rule for CNS penetrant molecules as described in: -Jeffrey & Summerfield (2010) Assessment of the blood-brain barrier in CNS drug discovery.

-

It computes: MW in [135, 582] & logP in [-0.2, 6.1] & TPSA in [3, 118] & HBD <= 3 & HBA <= 5

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
mw - Optional[float] - -
-

precomputed molecular weight. Defaults to None.

-
-
- None -
clogp - Optional[float] - -
-

precomputed logP. Defaults to None.

-
-
- None -
n_hba - Optional[float] - -
-

precomputed number of HBA. Defaults to None.

-
-
- None -
n_hbd - Optional[float] - -
-

precomputed number of HBD. Defaults to None.

-
-
- None -
tpsa - Optional[int] - -
-

precomputed TPSA. Defaults to None.

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
roc - -
-

True if molecule is compliant, False otherwise

-
-
- -
- -
- -
- - - -

-rule_of_druglike_soft(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, n_rings=None, n_hetero_atoms=None, charge=None, **kwargs) - -

- - -
- -

Compute the DrugLike Soft rule available in FAF-Drugs4. -The rules are described at https://fafdrugs4.rpbs.univ-paris-diderot.fr/filters.html

-

It computes: -

MW in [100, 600] & logP < in [-3, 6] & HBD <= 7 & HBA <= 12 & TPSA <=180 & ROTBONDS <= 11 &
-RIGBONDS <= 30 & N_RINGS <= 6 & MAX_SIZE_RING <= 18 & N_CARBONS in [3, 35] &  N_HETEROATOMS in [1, 15] &
-HC_RATIO in [0.1, 1.1] & CHARGE in [-4, 4] & N_ATOM_CHARGE <= 4
-

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
mw - Optional[float] - -
-

precomputed molecular weight. Defaults to None.

-
-
- None -
clogp - Optional[float] - -
-

precomputed cLogP. Defaults to None.

-
-
- None -
n_hba - Optional[float] - -
-

precomputed number of HBA. Defaults to None.

-
-
- None -
n_hbd - Optional[float] - -
-

precomputed number of HBD. Defaults to None.

-
-
- None -
tpsa - Optional[float] - -
-

precomputed TPSA. Defaults to None.

-
-
- None -
n_rotatable_bonds - Optional[int] - -
-

precomputed number of rotatable bonds. Defaults to None.

-
-
- None -
n_rings - Optional[int] - -
-

precomputed number of rings in the molecules. Defaults to None.

-
-
- None -
n_hetero_atoms - Optional[int] - -
-

precomputed number of heteroatoms. Defaults to None.

-
-
- None -
charge - Optional[float] - -
-

precomputed charge. Defaults to None.

-
-
- None -
- -
- -
- -
- - - -

-rule_of_egan(mol, clogp=None, tpsa=None, **kwargs) - -

- - -
- -

Compute passive intestinal absorption according to Egan Rules as described in: -Egan, William J., Kenneth M. Merz, and John J. Baldwin (2000) Prediction of drug absorption using multivariate statistics

-

It computes: TPSA in [0, 132] & logP in [-1, 6]

-
-

Note

-

The author built a multivariate statistics model of passive intestinal absorption with robust outlier detection. -Outliers were identified as being actively transported. They chose PSA and AlogP98 (cLogP), based on consideration of the physical processes -involved in membrane permeability and the interrelationships and redundancies between other available descriptors. -Compounds, which had been assayed for Caco-2 cell permeability, demonstrated a good rate of successful predictions (74−92%)

-
- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
clogp - Optional[float] - -
-

precomputed cLogP. Defaults to None.

-
-
- None -
tpsa - Optional[float] - -
-

precomputed TPSA. Defaults to None.

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
roe - -
-

True if molecule is compliant, False otherwise

-
-
- -
- -
- -
- - - -

-rule_of_five(mol, mw=None, clogp=None, n_lipinski_hbd=None, n_lipinski_hba=None, **kwargs) - -

- - -
- -

Compute the Lipinski's rule-of-5 for a molecule. Also known as Pfizer's rule of five or RO5, -this rule is a rule of thumb to evaluate the druglikeness of a chemical compounds

-

It computes: MW <= 500 & logP <= 5 & HBD <= 5 & HBA <= 10

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
mw - Optional[float] - -
-

precomputed molecular weight. Defaults to None.

-
-
- None -
clogp - Optional[float] - -
-

precomputed cLogP. Defaults to None.

-
-
- None -
n_lipinski_hbd - Optional[float] - -
-

precomputed number of HBD. Defaults to None.

-
-
- None -
n_lipinski_hba - Optional[float] - -
-

precomputed number of HBA. Defaults to None.

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
ro5 - -
-

True if molecule is compliant, False otherwise

-
-
- -
- -
- -
- - - -

-rule_of_five_beyond(mol, mw=None, clogp=None, n_hbd=None, n_hba=None, tpsa=None, n_rotatable_bonds=None, **kwargs) - -

- - -
- -

Compute the Beyond rule-of-5 rule for a molecule. This rule illustrates the potential of compounds far beyond rule of 5 space to -modulate novel and difficult target classes that have large, flat, and groove-shaped binding sites and has been described in:

-

Doak, Bradley C., et al. (2015) How Beyond Rule of 5 Drugs and Clinical Candidates Bind to Their Targets.

-

It computes: MW <= 1000 & logP in [-2, 10] & HBD <= 6 & HBA <= 15 & TPSA <=250 & ROTBONDS <= 20

-
-

Note

-

This is a very permissive rule and is likely to not be a good predictor for druglikeness as known for small molecules.

-
- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
mw - Optional[float] - -
-

precomputed molecular weight. Defaults to None.

-
-
- None -
clogp - Optional[float] - -
-

precomputed cLogP. Defaults to None.

-
-
- None -
n_hbd - Optional[float] - -
-

precomputed number of HBD. Defaults to None.

-
-
- None -
n_hba - Optional[float] - -
-

precomputed number of HBA. Defaults to None.

-
-
- None -
tpsa - Optional[float] - -
-

precomputed TPSA. Defaults to None.

-
-
- None -
n_rotatable_bonds - Optional[int] - -
-

precomputed number of rotatable bonds. Defaults to None.

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
ro5 - -
-

True if molecule is compliant, False otherwise

-
-
- -
- -
- -
- - - -

-rule_of_four(mol, mw=None, clogp=None, n_hba=None, n_rings=None, **kwargs) - -

- - -
- -

Compute the rule-of-4 for a molecule. The rule-of-4 define a rule of thumb for PPI inhibitors, -which are typically larger and more lipophilic than inhibitors of more standard binding sites. It has been published in:

-

Morelli X, Bourgeas R, Roche P. (2011) Chemical and structural lessons from recent successes in protein–protein interaction inhibition. -Also see: Shin et al. (2020) Current Challenges and Opportunities in Designing Protein–Protein Interaction Targeted Drugs. doi:10.2147/AABC.S235542

-

It computes: MW >= 400 & logP >= 4 & RINGS >=4 & HBA >= 4

-
-

Warning

-

Do not use this for small molecules that are not PPI inhibitors

-
- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
mw - Optional[float] - -
-

precomputed molecular weight. Defaults to None.

-
-
- None -
clogp - Optional[float] - -
-

precomputed cLogP. Defaults to None.

-
-
- None -
n_hba - Optional[float] - -
-

precomputed number of HBA. Defaults to None.

-
-
- None -
n_rings - Optional[int] - -
-

precomputed number of rings in the molecules. Defaults to None.

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
ro4 - -
-

True if molecule is compliant, False otherwise

-
-
- -
- -
- -
- - - -

-rule_of_generative_design(mol, mw=None, clogp=None, n_lipinski_hba=None, n_lipinski_hbd=None, tpsa=None, n_rotatable_bonds=None, n_hetero_atoms=None, charge=None, **kwargs) - -

- - -
- -

Compute druglikeness rule of generative design.

-

This set of rules are proprietary of Valence Discovery and have been curated to better filters molecules -suggested by generative models for small molecules

-

It computes:

-
MW in [200, 600] & logP < in [-3, 6] & HBD <= 7  & HBA <= 12 & TPSA in [40, 180] &
-ROTBONDS <= 15 & RIGID BONDS <= 30 & N_AROMATIC_RINGS <= 5 & N_FUSED_AROMATIC_RINGS_TOGETHER <= 2 &
-MAX_SIZE_RING_SYSTEM <= 18  & N_CARBONS in [3, 40] & N_HETEROATOMS in [1, 15] & CHARGE in [-2, 2] &
-N_ATOM_CHARGE <= 2 & N_TOTAL_ATOMS < 70 & N_HEAVY_METALS < 1
-
- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
mw - Optional[float] - -
-

precomputed molecular weight. Defaults to None.

-
-
- None -
clogp - Optional[float] - -
-

precomputed cLogP. Defaults to None.

-
-
- None -
n_lipinski_hba - Optional[float] - -
-

precomputed number of HBA. Defaults to None.

-
-
- None -
n_lipinski_hbd - Optional[float] - -
-

precomputed number of HBD. Defaults to None.

-
-
- None -
tpsa - Optional[float] - -
-

precomputed TPSA. Defaults to None.

-
-
- None -
n_rotatable_bonds - Optional[int] - -
-

precomputed number of rotatable bonds. Defaults to None.

-
-
- None -
n_hetero_atoms - Optional[int] - -
-

precomputed number of heteroatoms. Defaults to None.

-
-
- None -
charge - Optional[float] - -
-

precomputed charge. Defaults to None.

-
-
- None -
- -
- -
- -
- - - -

-rule_of_generative_design_strict(mol, mw=None, clogp=None, n_lipinski_hba=None, n_lipinski_hbd=None, tpsa=None, n_rotatable_bonds=None, n_hetero_atoms=None, charge=None, **kwargs) - -

- - -
- -

Compute druglikeness rule of generative design.

-

This set of rules are proprietary of Valence Discovery and have been curated to better filters molecules -suggested by generative models

-

It computes:

-
MW in [200, 600] & logP < in [-3, 6] & HBD <= 7  & HBA <= 12 & TPSA in [40, 180] &
-ROTBONDS <= 15 & RIGID BONDS <= 30 & N_AROMATIC_RINGS <= 5 & N_FUSED_AROMATIC_RINGS_TOGETHER <= 2 &
-MAX_SIZE_RING_SYSTEM <= 18  & N_CARBONS in [3, 40] & N_HETEROATOMS in [1, 15] & CHARGE in [-2, 2] &
-N_ATOM_CHARGE <= 2 & N_TOTAL_ATOMS < 70 & N_HEAVY_METALS < 1 & N_STEREO_CENTER <= 3 &
-HAS_NO_SPIDER_SIDE_CHAINS & FRACTION_RING_SYSTEM >= 0.25
-
-

By default SPIDER_SIDE_CHAINS are defined as having at least 2 'chains' of >=4 consecutif atoms in side chains (not part of any ring system)

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
mw - Optional[float] - -
-

precomputed molecular weight. Defaults to None.

-
-
- None -
clogp - Optional[float] - -
-

precomputed cLogP. Defaults to None.

-
-
- None -
n_lipinski_hba - Optional[float] - -
-

precomputed number of HBA. Defaults to None.

-
-
- None -
n_lipinski_hbd - Optional[float] - -
-

precomputed number of HBD. Defaults to None.

-
-
- None -
tpsa - Optional[float] - -
-

precomputed TPSA. Defaults to None.

-
-
- None -
n_rotatable_bonds - Optional[int] - -
-

precomputed number of rotatable bonds. Defaults to None.

-
-
- None -
n_hetero_atoms - Optional[int] - -
-

precomputed number of heteroatoms. Defaults to None.

-
-
- None -
charge - Optional[float] - -
-

precomputed charge. Defaults to None.

-
-
- None -
- -
- -
- -
- - - -

-rule_of_ghose(mol, mw=None, clogp=None, mr=None, **kwargs) - -

- - -
- -

Compute the Ghose filter. The Ghose filter is a drug-like filter described in: -Ghose, AK.; Viswanadhan, VN.; Wendoloski JJ. (1999) A knowledge-based approach in designing combinatorial or medicinal -chemistry libraries for drug discovery.1. A qualitative and quantitative characterization of known drug databases.

-

It computes: MW in [160, 480] & logP in [-0.4, 5.6] & Natoms in [20, 70] & refractivity in [40, 130]

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
mw - Optional[float] - -
-

precomputed molecular weight. Defaults to None.

-
-
- None -
clogp - Optional[float] - -
-

precomputed cLogP. Defaults to None.

-
-
- None -
mr - Optional[float] - -
-

precomputed molecule refractivity. Defaults to None.

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
rog - -
-

True if molecule is compliant, False otherwise

-
-
- -
- -
- -
- - - -

-rule_of_gsk_4_400(mol, mw=None, clogp=None, **kwargs) - -

- - -
- -

Compute GSK Rule (4/400) for druglikeness using interpretable ADMET rule of thumb based on -Gleeson, M. Paul (2008). Generation of a set of simple, interpretable ADMET rules of thumb.

-

It computes: MW <= 400 & logP <= 4.

-
-

Note

-
    -
  • The rule are based on a set of consistent structure-property guides determined from an analysis of a number of key - ADMET assays run within GSK: solubility, permeability, bioavailability, volume of distribution, plasma protein binding, - CNS penetration, brain tissue binding, P-gp efflux, hERG inhibition, and cytochrome P450 1A2/2C9/2C19/2D6/3A4 inhibition.
  • -
  • Conclusion: It is clear from the analyses reported herein that almost all ADMET parameters deteriorate with either increasing molecular weight, - logP, or both, with ionization state playing either a beneficial or detrimental affect depending on the parameter in question.
  • -
-
- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
clogp - Optional[float] - -
-

precomputed cLogP. Defaults to None.

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
rog - -
-

True if molecule is compliant, False otherwise

-
-
- -
- -
- -
- - - -

-rule_of_leadlike_soft(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, n_rings=None, n_hetero_atoms=None, charge=None, **kwargs) - -

- - -
- -

Compute the Lead-Like Soft rule available in FAF-Drugs4. -The rules are described at https://fafdrugs4.rpbs.univ-paris-diderot.fr/filters.html

-

It computes: -

MW in [150, 400] & logP < in [-3, 4] & HBD <= 4 & HBA <= 7 & TPSA <=160 & ROTBONDS <= 9 &
-RIGBONDS <= 30 & N_RINGS <= 4 & MAX_SIZE_RING <= 18 & N_CARBONS in [3, 35] &  N_HETEROATOMS in [1, 15] &
-HC_RATIO in [0.1, 1.1] & CHARGE in [-4, 4] & N_ATOM_CHARGE <= 4 & N_STEREO_CENTER <= 2
-

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
mw - Optional[float] - -
-

precomputed molecular weight. Defaults to None.

-
-
- None -
clogp - Optional[float] - -
-

precomputed cLogP. Defaults to None.

-
-
- None -
n_hba - Optional[float] - -
-

precomputed number of HBA. Defaults to None.

-
-
- None -
n_hbd - Optional[float] - -
-

precomputed number of HBD. Defaults to None.

-
-
- None -
tpsa - Optional[float] - -
-

precomputed TPSA. Defaults to None.

-
-
- None -
n_rotatable_bonds - Optional[int] - -
-

precomputed number of rotatable bonds. Defaults to None.

-
-
- None -
n_rings - Optional[int] - -
-

precomputed number of rings in the molecules. Defaults to None.

-
-
- None -
n_hetero_atoms - Optional[int] - -
-

precomputed number of heteroatoms. Defaults to None.

-
-
- None -
charge - Optional[float] - -
-

precomputed charge. Defaults to None.

-
-
- None -
- -
- -
- -
- - - -

-rule_of_oprea(mol, n_hba=None, n_hbd=None, n_rotatable_bonds=None, n_rings=None, **kwargs) - -

- - -
- -

Computes Oprea's rule of drug likeness obtained by comparing drug vs non drug compounds across multiple datasets. -The rules have been described in: Oprea (2000) Property distribution of drug-related chemical databases*

-

It computes: HBD in [0, 2] & HBA in [2, 9] & ROTBONDS in [2,8] and RINGS in [1, 4]

-
-

Note

-

Seventy percent of the `drug-like' compounds were found between the following limits: 0 ≤ HDO ≤ 2, 2 ≤ HAC ≤ 9, 2 ≤ RTB ≤ 8, and 1 ≤ RNG ≤ 4

-
- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
n_hba - Optional[float] - -
-

precomputed number of HBA. Defaults to None.

-
-
- None -
n_hbd - Optional[float] - -
-

precomputed number of HBD. Defaults to None.

-
-
- None -
n_rotatable_bonds - Optional[int] - -
-

precomputed number of rotatable bonds in the molecule. Defaults to None.

-
-
- None -
n_rings - Optional[int] - -
-

precomputed number of rings in the molecule. Defaults to None.

-
-
- None -
-

Returns - roo: True if molecule is compliant, False otherwise

- -
- -
- -
- - - -

-rule_of_pfizer_3_75(mol, clogp=None, tpsa=None, **kwargs) - -

- - -
- -

Compute Pfizer Rule(3/75 Rule) for invivo toxicity. It has been described in: -* Hughes, et al. (2008) Physiochemical drug properties associated with in vivo toxicological outcomes. -* Price et al. (2009) Physicochemical drug properties associated with in vivo toxicological outcomes: a review

-

It computes: ! (TPSA < 75 & logP > 3)

-
-

Note

-
    -
  • In vivo toleration (IVT) studies on 245 preclinical Pfizer compounds found an increased likelihood of toxic events for less polar, more lipophilic compounds.
  • -
  • Compounds with low clogP / high TPSA are ∼ 2.5 times more likely not to have any toxity issue at a fixed concentration of 10 uM (total) or 1 uM (free);
  • -
  • Compounds with high clogP / low TPSA are ∼ 2.5 times more likely to have a toxity finding; this represents an overall odds >= 6.
  • -
-
- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
clogp - Optional[float] - -
-

precomputed cLogP. Defaults to None.

-
-
- None -
tpsa - Optional[float] - -
-

precomputed TPSA. Defaults to None.

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
rop - -
-

True if molecule is compliant, False otherwise

-
-
- -
- -
- -
- - - -

-rule_of_reos(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, charge=None, n_rotatable_bonds=None, n_heavy_atoms=None, **kwargs) - -

- - -
- -

Compute the REOS filter. The REOS filter is a filter designed to filter out unuseful compounds from HTS screening results. -The filter is described in: Waters & Namchuk (2003) Designing screens: how to make your hits a hit.

-

It computes: MW in [200, 500] & logP in [-5, 5] & HBA in [0, 10] & HBD in [0, 5] & charge in [-2, 2] & ROTBONDS in [0, 8] & NHeavyAtoms in [15, 50]

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
mw - Optional[float] - -
-

precomputed molecular weight. Defaults to None.

-
-
- None -
clogp - Optional[float] - -
-

precomputed cLogP. Defaults to None.

-
-
- None -
n_hba - Optional[float] - -
-

precomputed number of HBA. Defaults to None.

-
-
- None -
n_hbd - Optional[float] - -
-

precomputed number of HBD. Defaults to None.

-
-
- None -
charge - Optional[int] - -
-

precomputed formal charge. Defaults to None.

-
-
- None -
n_rotatable_bonds - Optional[int] - -
-

precomputed number of rotatable bonds in the molecule. Defaults to None.

-
-
- None -
n_heavy_atoms - Optional[int] - -
-

precomputed number of heavy atoms in the molecule. Defaults to None.

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
ror - -
-

True if molecule is compliant, False otherwise

-
-
- -
- -
- -
- - - -

-rule_of_respiratory(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, n_rings=None, **kwargs) - -

- - -
- -

Computes drug likeness rule for Respiratory (nasal/inhalatory) molecules as described in -Ritchie et al. (2009) Analysis of the Calculated Physicochemical Properties of Respiratory Drugs: Can We Design for Inhaled Drugs Yet?

-

It computes: MW in [240, 520] & logP in [-2, 4.7] & HBONDS in [6, 12] & TPSA in [51, 135] & ROTBONDS in [3,8] & RINGS in [1,5]

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
mw - Optional[float] - -
-

precomputed molecular weight. Defaults to None.

-
-
- None -
clogp - Optional[float] - -
-

precomputed logP. Defaults to None.

-
-
- None -
n_hba - Optional[float] - -
-

precomputed number of HBA. Defaults to None.

-
-
- None -
n_hbd - Optional[float] - -
-

precomputed number of HBD. Defaults to None.

-
-
- None -
tpsa - Optional[int] - -
-

precomputed TPSA. Defaults to None.

-
-
- None -
n_rotatable_bonds - Optional[int] - -
-

precomputed number of rotatable bonds in the molecule. Defaults to None.

-
-
- None -
n_rings - Optional[int] - -
-

precomputed number of rings. Defaults to None

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
roc - -
-

True if molecule is compliant, False otherwise

-
-
- -
- -
- -
- - - -

-rule_of_three(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, n_rotatable_bonds=None, **kwargs) - -

- - -
- -

Compute the rule-of-3. The rule-of-three is a rule of thumb for molecular fragments (and not small molecules) published in:

-

Congreve M, Carr R, Murray C, Jhoti H. (2003) A "rule of three" for fragment-based lead discovery?.

-

It computes: MW <= 300 & logP <= 3 & HBA <= 3 & HBD <= 3 & ROTBONDS <= 3

-
-

Note

-

TPSA is not used in this version of the rule of three. Other version uses TPSA <= 60 AND logP in [-3, 3] in addition

-
- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
mw - Optional[float] - -
-

precomputed molecular weight. Defaults to None.

-
-
- None -
clogp - Optional[float] - -
-

precomputed cLogP. Defaults to None.

-
-
- None -
n_hba - Optional[float] - -
-

precomputed number of HBA. Defaults to None.

-
-
- None -
n_hbd - Optional[float] - -
-

precomputed number of HBD. Defaults to None.

-
-
- None -
n_rotatable_bonds - Optional[int] - -
-

precomputed number of rotatable bonds in the molecule. Defaults to None.

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
ro3 - -
-

True if molecule is compliant, False otherwise

-
-
- -
- -
- -
- - - -

-rule_of_three_extended(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, **kwargs) - -

- - -
- -

Compute the extended rule-of-3. This is an extenion of the rule of three that computes:

-

It computes: MW <= 300 & logP in [-3, 3] & HBA <= 6 & HBD <= 3 & ROTBONDS <= 3 & TPSA <= 60

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
mw - Optional[float] - -
-

precomputed molecular weight. Defaults to None.

-
-
- None -
clogp - Optional[float] - -
-

precomputed cLogP. Defaults to None.

-
-
- None -
n_hba - Optional[float] - -
-

precomputed number of HBA. Defaults to None.

-
-
- None -
n_hbd - Optional[float] - -
-

precomputed number of HBD. Defaults to None.

-
-
- None -
tpsa - Optional[float] - -
-

precomputed TPSA. Defaults to None.

-
-
- None -
n_rotatable_bonds - Optional[int] - -
-

precomputed number of rotatable bonds in the molecule. Defaults to None.

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
ro3 - -
-

True if molecule is compliant, False otherwise

-
-
- -
- -
- -
- - - -

-rule_of_two(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, **kwargs) - -

- - -
- -

Computes rules-of-2 for reagent (building block design). It aims for prioritization of reagents that typically -do not add more than 200 Da in MW or 2 units of clogP. The rule of two has been described in:

-

Goldberg et al. (2015) Designing novel building blocks is an overlooked strategy to improve compound quality -see: http://csmres.co.uk/cs.public.upd/article-downloads/Designing-novel-building-blocks.pdf

-
-

Note

-

Their analysis showed that molecular weight (MW) and clogP were important factors in the frequency of use of reagents. -Other parameters, such as TPSA, HBA, HBD and ROTBONDS count, were less important.

-
-

It computes MW <= 200 & logP <= 2 & HBA <= 4 & HBD <= 2

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
mw - Optional[float] - -
-

precomputed molecular weight. Defaults to None.

-
-
- None -
clogp - Optional[float] - -
-

precomputed cLogP. Defaults to None.

-
-
- None -
n_hba - Optional[float] - -
-

precomputed number of HBA. Defaults to None.

-
-
- None -
n_hbd - Optional[float] - -
-

precomputed number of HBD. Defaults to None.

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
ro2 - -
-

True if molecule is compliant, False otherwise

-
-
- -
- -
- -
- - - -

-rule_of_veber(mol, tpsa=None, n_rotatable_bonds=None, **kwargs) - -

- - -
- -

Compute the Veber filter. The Veber filter is a druglike filter for orally active drugs described in:

-

Veber et. al. (2002) Molecular Properties That Influence the Oral Bioavailability of Drug Candidates.

-

It computes: ROTBONDS <= 10 & TPSA < 140

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
tpsa - Optional[float] - -
-

precomputed TPSA. Defaults to None.

-
-
- None -
n_rotatable_bonds - Optional[int] - -
-

precomputed number of rotatable bonds. Defaults to None.

-
-
- None -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
rov - -
-

True if molecule is compliant, False otherwise

-
-
- -
- -
- -
- - - -

-rule_of_xu(mol, n_hba=None, n_hbd=None, n_rotatable_bonds=None, n_rings=None, n_heavy_atoms=None, **kwargs) - -

- - -
- -

Computes Xu's rule of drug likeness as described in: -Xu & Stevenson (2000), Drug-like Index: A New Approach To Measure Drug-like Compounds and Their Diversity

-

It computes HBD <= 5 & HBA <= 10 & ROTBONDS in [2, 35] & RINGS in [1, 7] & NHeavyAtoms in [10, 50].

-
-

Note

-

A compound's Drug Likeness Index is calculated based upon the knowledge derived from known drugs selected from Comprehensive Medicinal Chemistry (CMC) database.

-
- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
n_hba - Optional[float] - -
-

precomputed number of HBA. Defaults to None.

-
-
- None -
n_hbd - Optional[float] - -
-

precomputed number of HBD. Defaults to None.

-
-
- None -
n_rotatable_bonds - Optional[int] - -
-

precomputed number of rotatable bonds in the molecule. Defaults to None.

-
-
- None -
n_rings - Optional[int] - -
-

precomputed number of rings in the molecule. Defaults to None.

-
-
- None -
n_heavy_atoms - Optional[int] - -
-

precomputed number of rings in the molecule. Defaults to None.

-
-
- None -
-

Returns - rox: True if molecule is compliant, False otherwise

- -
- -
- -
- - - -

-rule_of_zinc(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, n_rings=None, charge=None, **kwargs) - -

- - -
- -

Compute the Zinc rule for a molecule. This rule is a rule of thumb to evaluate the druglikeness of a chemical compounds, based on:

-

Irwin & Schoichet (2005) ZINC - A Free Database of Commercially Available Compounds for Virtual Screening.

-

Also see: https://fafdrugs4.rpbs.univ-paris-diderot.fr/filters.html

-

It computes: MW in [60, 600] & logP < in [-4, 6] & HBD <= 6 & HBA <= 11 & TPSA <=150 & ROTBONDS <= 12 & RIGBONDS <= 50 & N_RINGS <= 7 & MAX_SIZE_RING <= 12 & N_CARBONS >=3 & HC_RATIO <= 2.0 & CHARGE in [-4, 4]

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

input molecule

-
-
- required -
mw - Optional[float] - -
-

precomputed molecular weight. Defaults to None.

-
-
- None -
clogp - Optional[float] - -
-

precomputed cLogP. Defaults to None.

-
-
- None -
n_hba - Optional[float] - -
-

precomputed number of HBA. Defaults to None.

-
-
- None -
n_hbd - Optional[float] - -
-

precomputed number of HBD. Defaults to None.

-
-
- None -
tpsa - Optional[float] - -
-

precomputed TPSA. Defaults to None.

-
-
- None -
n_rotatable_bonds - Optional[int] - -
-

precomputed number of rotatable bonds. Defaults to None.

-
-
- None -
n_rings - Optional[int] - -
-

precomputed number of rings in the molecules. Defaults to None.

-
-
- None -
charge - Optional[float] - -
-

precomputed charge. Defaults to None.

-
-
- None -
- -
- -
- - - -
- -
- -

- - -
- - - -

- medchem.rules.rule_filter - - -

- -
- - - -
- - - - - - - - -
- - - -

- RuleFilters - - -

- - -
- - -

Build a filter based on a compound phychem properties. For a list of default rules, use RuleFilters.list_available_rules(). -Most of these rules have been collected from the litterature including https://fafdrugs4.rpbs.univ-paris-diderot.fr/descriptors.html

- - - - - -
- - - - - - - - - -
- - - -
-__call__(mols, n_jobs=None, progress=False, scheduler='processes') - -
- - -
- -

Compute the rules for a list of molecules

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mols - List[Union[str, dm.Mol]] - -
-

list of input molecule object.

-
-
- required -
n_jobs - Optional[int] - -
-

number of jobs to run in parallel. Defaults to None.

-
-
- None -
progress - bool - -
-

whether to show progress or not. Defaults to False.

-
-
- False -
scheduler - str - -
-

which scheduler to use. Defaults to "processes".

-
-
- 'processes' -
- -

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
df - -
-

Dataframe where each row is a molecule and each column is a the outcomes of applying self.rules[column].

-
-
- -
- -
- -
- - - -
-__getitems__(ind) - -
- - -
- -

Return a specific rule

- -
- -
- -
- - - -
-__init__(rule_list, rule_list_names=None, precompute_props=True) - -
- - -
- -

Build a rule filtering object

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
rule_list - List[Union[str, Callable]] - -
-

list of rules to apply. Either a callable that takes a molecule as input (with kwargs) or a string -of the name of a pre-defined rule as defined in the basic_rules module

-
-
- required -
rule_list_names - Optional[List[str]] - -
-

Name of the rules passed as inputs. Defaults to None.

-
-
- None -
precompute_props - bool - -
-

Whether to precompute the properties for all molecules to speed up redundant calculation. Defaults to True.

-
-
- True -
- -
- -
- -
- - - -
-__len__() - -
- - -
- -

Return the number of rules inside this filter

- -
- -
- -
- - - -
-list_available_rules(query=None) - - - cached - staticmethod - - -
- - -
- -

List all the available rules and they properties

- -
- -
- -
- - - -
-list_available_rules_names(query=None) - - - cached - staticmethod - - -
- - -
- -

List only the names of the available rules

- -
- -
- - - -
- -
- -
- - - - -
- -
- -
- - - - - - -
-
- - -
- -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/1.4.0/api/medchem.utils.html b/1.4.0/api/medchem.utils.html deleted file mode 100644 index b5958d8..0000000 --- a/1.4.0/api/medchem.utils.html +++ /dev/null @@ -1,2581 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - medchem.utils - medchem - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

medchem.utils

- - -
- - - -

- medchem.utils.smarts - - -

- -
- - - -
- - - - - - - - -
- - - -

- SMARTSUtils - - -

- - -
- - -

Collections of utils to build complex SMARTS query more efficiently for non experienced user

- - - - - -
- - - - - - - - - -
- - - -
-aliphatic_chain(min_size=6, unbranched=False, unsaturated_bondtype=None, allow_hetero_atoms=True) - - - classmethod - - -
- - -
- -

Returns a query that can match a long aliphatic chain

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
min_size - int - -
-

minimum size of the long chain

-
-
- 6 -
unbranched - bool - -
-

whether the chain should be unbranched

-
-
- False -
unsaturated_bondtype - Optional[str] - -
-

additional unsaturated bond type to use for the query. By default, Any bond type (~) is used. -Single bonds ARE always allowed and bondtype cannot be aromatic

-
-
- None -
allow_hetero_atoms - bool - -
-

whether the chain can contain hetero atoms

-
-
- True -
- -
- Example -

to build a query for a long aliphatic chain of a least 5 atoms (e.g: 'CCC(C)CCC')

-
-
-
-

SMARTSUtils.aliphatic_chain(min_size=5)

-
-
-
-
-

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
smarts - -
-

smarts pattern matching a long aliphatic chain

-
-
- -
- -
- -
- - - -
-atom_in_env(*smarts_strs, include_atoms=False, union=False) - - - classmethod - - -
- - -
- -

Returns a recursive/group smarts to find an atom that fits in the environments as defined by all the input smarts

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
smarts_strs - -
-

list of input patterns defining the environment the atom must fit in. The first atom of each pattern -should be the atom we want to match to, unless include_atoms is set to True, then [*:99] will be added at the start of each pattern

-
-
- () -
include_atoms - bool - -
-

whether to include an additional first atom that needs to be in the required environment or not

-
-
- False -
union - bool - -
-

whether to use the union of the environments or the intersection

-
-
- False -
- -
- Example -

you can use this function to construct a complex query if you are not sure about how to write the smarts -for example, to find a carbon atom that is both in a ring or size 6, bonded to an ethoxy and have a Fluorine in meta

-
-
-
-

SMARTSUtils.atom_in_env("[#6;r6][OD2][C&D1]", "[c]aa[F]", union=False) # there are alternative way to write this

-
-
-
-
-

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
smarts - -
-

smarts pattern matching the group/environment

-
-
- -
- -
- -
- - - -
-different_fragment(*smarts_strs) - - - classmethod - - -
- - -
- -

Returns a new query that match patterns that are in different fragments.

-
-

Warning

-

This feature is not supported yet by RDKit. See https://github.com/rdkit/rdkit/issues/1261

-
- -

Parameters:

- - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
smarts_strs - -
-

list of input patterns defining the fragments

-
-
- () -
- -
- Example -

matching two oxygens in a molecule will work with '[#8].[#8]', but if you want the -oxygens to be in DIFFERENT fragments, then build the query with:

-
-
-
-

SMARTSUtils.different_fragment('[#8]', '[#8]')

-
-
-
-
-

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
smarts - -
-

smarts pattern matching patterns that are in different fragments

-
-
- -
- -
- -
- - - -
-meta(smarts_str1, smarts_str2, aromatic_only=False) - - - classmethod - - -
- - -
- -

Returns a recursive smarts string connecting the two input smarts in meta of each other. -Connexion points needs to be through single or double bonds

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
smarts_str1 - str - -
-

first smarts pattern defining the first functional group

-
-
- required -
smarts_str2 - str - -
-

second smarts pattern defining the second functional group

-
-
- required -
aromatic_only - bool - -
-

whether the ring needs to be aromatic or not

-
-
- False -
- -
- Example -

to build a smarts for a methyl group in meta to an oxygen (e.g: 'c1c(C)cc(O)cc1')

-
-
-
-

SMARTSUtils.meta('[#6;!R]', '[#8]')

-
-
-
-
-

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
smarts - -
-

smarts pattern connecting the two input smarts in meta of each other

-
-
- -
- -
- -
- - - -
-ortho(smarts_str1, smarts_str2, aromatic_only=False) - - - classmethod - - -
- - -
- -

Returns a recursive smarts string connecting the two input smarts in ortho of each other. -Connexion points needs to be through single or double bonds

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
smarts_str1 - str - -
-

first smarts pattern defining the first functional group

-
-
- required -
smarts_str2 - str - -
-

second smarts pattern defining the second functional group

-
-
- required -
aromatic_only - bool - -
-

whether the ring needs to be aromatic or not

-
-
- False -
- -
- Example -

to build a smarts for a methyl group in ortho to an oxygen (e.g: 'C1CC(C)C(O)CC1')

-
-
-
-

SMARTSUtils.ortho('[#6;!R]', '[#8]')

-
-
-
-
-

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
smarts - -
-

smarts pattern connecting the two input smarts in ortho of each other

-
-
- -
- -
- -
- - - -
-para(smarts_str1, smarts_str2, aromatic_only=False) - - - classmethod - - -
- - -
- -

Returns a recursive smarts string connecting the two input smarts in para of each other. -Connexion points needs to be through single or double bonds

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
smarts_str1 - str - -
-

first smarts pattern defining the first functional group

-
-
- required -
smarts_str2 - str - -
-

second smarts pattern defining the second functional group

-
-
- required -
aromatic_only - bool - -
-

whether the ring needs to be aromatic or not

-
-
- False -
- -
- Example -

to build a smarts for a methyl group in para to an oxygen (e.g: 'c1(C)ccc(O)cc1')

-
-
-
-

SMARTSUtils.para('[#6;!R]', '[#8]')

-
-
-
-
-

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
smarts - -
-

smarts pattern connecting the two input smarts in para of each other

-
-
- -
- -
- -
- - - -
-same_fragment(*smarts_strs) - - - classmethod - - -
- - -
- -

Returns a new query that match patterns that are in THE SAME fragment (component)

-
-

Warning

-

This feature is not supported yet by RDKit. See https://github.com/rdkit/rdkit/issues/1261

-
- -

Parameters:

- - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
smarts_strs - -
-

list of input patterns defining the fragments

-
-
- () -
- -
- Example -

matching two oxygens in a molecule will work with '[#8].[#8]', but if you want the -oxygens to be in the SAME fragment, then build the query with:

-
-
-
-

SMARTSUtils.same_fragment('[#8]', '[#8]')

-
-
-
-
-

Returns:

- - - - - - - - - - - - - -
Name TypeDescription
smarts - -
-

smarts pattern matching patterns that are in the same component

-
-
- -
- -
- -
- - - -
-standardize_attachment(smiles, attach_tokens='[*:1]') - - - classmethod - - -
- - -
- -

Standardize an attachment point in a smiles

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
smiles - str - -
-

SMILES string

-
-
- required -
attach_tokens - str - -
-

Attachment point token to use as standard token

-
-
- '[*:1]' -
- -
- -
- - - -
- -
- -
- - - - -
- -
- -

- - -
- - - -

- medchem.utils.matches - - -

- -
- - - -
- - - - - - - - -
- - - -

- Constraints - - -

- - -
- - -

Complex query system for matches with additional constraints

-
-

Example

-
-
-
-

mol1 = dm.to_mol("CN(C)C(=O)c1cncc(C)c1") -mol2 = dm.to_mol("c1ccc(cc1)-c1cccnc1") -core = dm.from_smarts("c1cncc([*:1])c1") -[atom.SetProp("query", "my_constraints") for atom in core.GetAtoms() if atom.GetAtomMapNum() == 1] -constraint_fns = dict(my_constraints=lambda x: dm.descriptors.n_aromatic_atoms(x) > 0) -constraint = Constraints(core, constraint_fns) -matches = [constraint(mol1), constraint(mol2)] # False, True

-
-
-
-
- - - - - -
- - - - - - - - - -
- - - -
-__call__(mol) - -
- - -
- -

Check if input molecule respect the constraints

- -

Parameters:

- - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - -
-

input molecule

-
-
- required -
- -
- -
- -
- - - -
-__init__(core, constraint_fns, prop_name='query') - -
- - -
- -

Initialize the constraint matcher

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
core - dm.Mol - -
-

the scaffold/query molecule to match against. Needs to be a molecule

-
-
- required -
constraint_fns - Dict[Callable] - -
-

a dictionary of constraints functions

-
-
- required -
prop_name - str - -
-

the property name to use in the match at each atom defined by the core -for further matches against the constraints functions

-
-
- 'query' -
- -
- -
- -
- - - -
-get_matches(mol, multiple=True) - -
- - -
- -

Get matches that respect the constraints in the molecules

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - dm.Mol - -
-

input molecule

-
-
- required -
multiple - bool - -
-

if True, return all the matches, if False, return the first match

-
-
- True -
- -
- -
- -
- - - -
-has_match(mol) - -
- - -
- -

Check if input molecule respect the constraints

- -

Parameters:

- - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - dm.Mol - -
-

input molecule

-
-
- required -
- -
- -
- -
- - - -
-validate(mol, constraints) - - - staticmethod - - -
- - -
- -

Validate a list of constraint object against a molecule

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - -
-

the molecule object

-
-
- required -
constraints - List[Constraints] - -
-

list of Contraints object to validate against the molecule

-
-
- required -
- -
- -
- - - -
- -
- -
- - - - -
- -
- -

- - -
- - - -

- medchem.utils.loader - - -

- -
- - - -
- - - - - - - - - -
- - - -

-get_data(file=None) - -

- - -
- -

Return the folder that contains the package specific data

- -
- -
- -
- - - -

-get_grammar(grammar=None, as_string=False) - -

- - -
- -

Return the default lark grammar file for queries

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
grammar - Optional[os.PathLike] - -
-

The path to the grammar file. If None, the default grammar

-
-
- None -
as_string - bool - -
-

If True, return the grammar as a string. Defaults to False.

-
-
- False -
- -
- -
- - - -
- -
- -

- - -
- - - -

- medchem.utils.graph - - -

- -
- - - -
- - - - - - - - - -
- - - -

-automorphism(mol, standardize=True, node_attrs=DEFAULT_NODE_ATTR, edge_attrs=DEFAULT_EDGE_ATTR) - -

- - -
- -

Compute automorphism in a molecular graph

- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[str, dm.Mol] - -
-

input molecular graph

-
-
- required -
standardize - bool - -
-

whether to standardize the compound or not

-
-
- True -
node_attrs - List[str] - -
-

list of categorical atom attributes/properties to consider for node matching

-
-
- DEFAULT_NODE_ATTR -
edge_attrs - List[str] - -
-

list of categorical bond attributes/properties to consider for edge matching

-
-
- DEFAULT_EDGE_ATTR -
- -
- -
- -
- - - -

-score_symmetry(mol, exclude_self_mapped_edged=False, **automorphism_kwargs) - -

- - -
- -

Provide a symmetry score for a given input molecule

-
-

Note

-

This is an heuristic and our definition of symmetry is pretty loose. -We define symmetry according to any (set of) plans dividing the molecule into two very similar subgraph. -We include both edge and vertex transitivity. For example the star-molecular graph -(e.g neopentane) is symmetrical here, although it's not vertex-transitive. -For more information see https://github.com/valence-platform/medchem/pull/41

-
- -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
mol - Union[dm.Mol, str] - -
-

inputs molecules

-
-
- required -
exclude_self_mapped_edged - bool - -
-

Whether to exclude edges that matches to themselves in automorphism.

-
-
- False -
automorphism_kwargs - -
-

keyword for determining automorphism

-
-
- {} -
- -
- -
- - - -
- -
- -
- - - - - - -
-
- - -
- -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/1.4.0/assets/_mkdocstrings.css b/1.4.0/assets/_mkdocstrings.css deleted file mode 100644 index 049a254..0000000 --- a/1.4.0/assets/_mkdocstrings.css +++ /dev/null @@ -1,64 +0,0 @@ - -/* Avoid breaking parameter names, etc. in table cells. */ -.doc-contents td code { - word-break: normal !important; -} - -/* No line break before first paragraph of descriptions. */ -.doc-md-description, -.doc-md-description>p:first-child { - display: inline; -} - -/* Max width for docstring sections tables. */ -.doc .md-typeset__table, -.doc .md-typeset__table table { - display: table !important; - width: 100%; -} - -.doc .md-typeset__table tr { - display: table-row; -} - -/* Defaults in Spacy table style. */ -.doc-param-default { - float: right; -} - -/* Keep headings consistent. */ -h1.doc-heading, -h2.doc-heading, -h3.doc-heading, -h4.doc-heading, -h5.doc-heading, -h6.doc-heading { - font-weight: 400; - line-height: 1.5; - color: inherit; - text-transform: none; -} - -h1.doc-heading { - font-size: 1.6rem; -} - -h2.doc-heading { - font-size: 1.2rem; -} - -h3.doc-heading { - font-size: 1.15rem; -} - -h4.doc-heading { - font-size: 1.10rem; -} - -h5.doc-heading { - font-size: 1.05rem; -} - -h6.doc-heading { - font-size: 1rem; -} \ No newline at end of file diff --git a/1.4.0/assets/images/favicon.png b/1.4.0/assets/images/favicon.png deleted file mode 100644 index 1cf13b9..0000000 Binary files a/1.4.0/assets/images/favicon.png and /dev/null differ diff --git a/1.4.0/assets/javascripts/bundle.b4d07000.min.js b/1.4.0/assets/javascripts/bundle.b4d07000.min.js deleted file mode 100644 index 3c0bdad..0000000 --- a/1.4.0/assets/javascripts/bundle.b4d07000.min.js +++ /dev/null @@ -1,29 +0,0 @@ -"use strict";(()=>{var Ci=Object.create;var gr=Object.defineProperty;var Ri=Object.getOwnPropertyDescriptor;var ki=Object.getOwnPropertyNames,Ht=Object.getOwnPropertySymbols,Hi=Object.getPrototypeOf,yr=Object.prototype.hasOwnProperty,nn=Object.prototype.propertyIsEnumerable;var rn=(e,t,r)=>t in e?gr(e,t,{enumerable:!0,configurable:!0,writable:!0,value:r}):e[t]=r,P=(e,t)=>{for(var r in t||(t={}))yr.call(t,r)&&rn(e,r,t[r]);if(Ht)for(var r of Ht(t))nn.call(t,r)&&rn(e,r,t[r]);return e};var on=(e,t)=>{var r={};for(var n in e)yr.call(e,n)&&t.indexOf(n)<0&&(r[n]=e[n]);if(e!=null&&Ht)for(var n of Ht(e))t.indexOf(n)<0&&nn.call(e,n)&&(r[n]=e[n]);return r};var Pt=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports);var Pi=(e,t,r,n)=>{if(t&&typeof t=="object"||typeof t=="function")for(let o of ki(t))!yr.call(e,o)&&o!==r&&gr(e,o,{get:()=>t[o],enumerable:!(n=Ri(t,o))||n.enumerable});return e};var yt=(e,t,r)=>(r=e!=null?Ci(Hi(e)):{},Pi(t||!e||!e.__esModule?gr(r,"default",{value:e,enumerable:!0}):r,e));var sn=Pt((xr,an)=>{(function(e,t){typeof xr=="object"&&typeof an!="undefined"?t():typeof define=="function"&&define.amd?define(t):t()})(xr,function(){"use strict";function e(r){var n=!0,o=!1,i=null,s={text:!0,search:!0,url:!0,tel:!0,email:!0,password:!0,number:!0,date:!0,month:!0,week:!0,time:!0,datetime:!0,"datetime-local":!0};function a(O){return!!(O&&O!==document&&O.nodeName!=="HTML"&&O.nodeName!=="BODY"&&"classList"in O&&"contains"in O.classList)}function f(O){var Qe=O.type,De=O.tagName;return!!(De==="INPUT"&&s[Qe]&&!O.readOnly||De==="TEXTAREA"&&!O.readOnly||O.isContentEditable)}function c(O){O.classList.contains("focus-visible")||(O.classList.add("focus-visible"),O.setAttribute("data-focus-visible-added",""))}function u(O){O.hasAttribute("data-focus-visible-added")&&(O.classList.remove("focus-visible"),O.removeAttribute("data-focus-visible-added"))}function p(O){O.metaKey||O.altKey||O.ctrlKey||(a(r.activeElement)&&c(r.activeElement),n=!0)}function m(O){n=!1}function d(O){a(O.target)&&(n||f(O.target))&&c(O.target)}function h(O){a(O.target)&&(O.target.classList.contains("focus-visible")||O.target.hasAttribute("data-focus-visible-added"))&&(o=!0,window.clearTimeout(i),i=window.setTimeout(function(){o=!1},100),u(O.target))}function v(O){document.visibilityState==="hidden"&&(o&&(n=!0),Y())}function Y(){document.addEventListener("mousemove",N),document.addEventListener("mousedown",N),document.addEventListener("mouseup",N),document.addEventListener("pointermove",N),document.addEventListener("pointerdown",N),document.addEventListener("pointerup",N),document.addEventListener("touchmove",N),document.addEventListener("touchstart",N),document.addEventListener("touchend",N)}function B(){document.removeEventListener("mousemove",N),document.removeEventListener("mousedown",N),document.removeEventListener("mouseup",N),document.removeEventListener("pointermove",N),document.removeEventListener("pointerdown",N),document.removeEventListener("pointerup",N),document.removeEventListener("touchmove",N),document.removeEventListener("touchstart",N),document.removeEventListener("touchend",N)}function N(O){O.target.nodeName&&O.target.nodeName.toLowerCase()==="html"||(n=!1,B())}document.addEventListener("keydown",p,!0),document.addEventListener("mousedown",m,!0),document.addEventListener("pointerdown",m,!0),document.addEventListener("touchstart",m,!0),document.addEventListener("visibilitychange",v,!0),Y(),r.addEventListener("focus",d,!0),r.addEventListener("blur",h,!0),r.nodeType===Node.DOCUMENT_FRAGMENT_NODE&&r.host?r.host.setAttribute("data-js-focus-visible",""):r.nodeType===Node.DOCUMENT_NODE&&(document.documentElement.classList.add("js-focus-visible"),document.documentElement.setAttribute("data-js-focus-visible",""))}if(typeof window!="undefined"&&typeof document!="undefined"){window.applyFocusVisiblePolyfill=e;var t;try{t=new CustomEvent("focus-visible-polyfill-ready")}catch(r){t=document.createEvent("CustomEvent"),t.initCustomEvent("focus-visible-polyfill-ready",!1,!1,{})}window.dispatchEvent(t)}typeof document!="undefined"&&e(document)})});var cn=Pt(Er=>{(function(e){var t=function(){try{return!!Symbol.iterator}catch(c){return!1}},r=t(),n=function(c){var u={next:function(){var p=c.shift();return{done:p===void 0,value:p}}};return r&&(u[Symbol.iterator]=function(){return u}),u},o=function(c){return encodeURIComponent(c).replace(/%20/g,"+")},i=function(c){return decodeURIComponent(String(c).replace(/\+/g," "))},s=function(){var c=function(p){Object.defineProperty(this,"_entries",{writable:!0,value:{}});var m=typeof p;if(m!=="undefined")if(m==="string")p!==""&&this._fromString(p);else if(p instanceof c){var d=this;p.forEach(function(B,N){d.append(N,B)})}else if(p!==null&&m==="object")if(Object.prototype.toString.call(p)==="[object Array]")for(var h=0;hd[0]?1:0}),c._entries&&(c._entries={});for(var p=0;p1?i(d[1]):"")}})})(typeof global!="undefined"?global:typeof window!="undefined"?window:typeof self!="undefined"?self:Er);(function(e){var t=function(){try{var o=new e.URL("b","http://a");return o.pathname="c d",o.href==="http://a/c%20d"&&o.searchParams}catch(i){return!1}},r=function(){var o=e.URL,i=function(f,c){typeof f!="string"&&(f=String(f)),c&&typeof c!="string"&&(c=String(c));var u=document,p;if(c&&(e.location===void 0||c!==e.location.href)){c=c.toLowerCase(),u=document.implementation.createHTMLDocument(""),p=u.createElement("base"),p.href=c,u.head.appendChild(p);try{if(p.href.indexOf(c)!==0)throw new Error(p.href)}catch(O){throw new Error("URL unable to set base "+c+" due to "+O)}}var m=u.createElement("a");m.href=f,p&&(u.body.appendChild(m),m.href=m.href);var d=u.createElement("input");if(d.type="url",d.value=f,m.protocol===":"||!/:/.test(m.href)||!d.checkValidity()&&!c)throw new TypeError("Invalid URL");Object.defineProperty(this,"_anchorElement",{value:m});var h=new e.URLSearchParams(this.search),v=!0,Y=!0,B=this;["append","delete","set"].forEach(function(O){var Qe=h[O];h[O]=function(){Qe.apply(h,arguments),v&&(Y=!1,B.search=h.toString(),Y=!0)}}),Object.defineProperty(this,"searchParams",{value:h,enumerable:!0});var N=void 0;Object.defineProperty(this,"_updateSearchParams",{enumerable:!1,configurable:!1,writable:!1,value:function(){this.search!==N&&(N=this.search,Y&&(v=!1,this.searchParams._fromString(this.search),v=!0))}})},s=i.prototype,a=function(f){Object.defineProperty(s,f,{get:function(){return this._anchorElement[f]},set:function(c){this._anchorElement[f]=c},enumerable:!0})};["hash","host","hostname","port","protocol"].forEach(function(f){a(f)}),Object.defineProperty(s,"search",{get:function(){return this._anchorElement.search},set:function(f){this._anchorElement.search=f,this._updateSearchParams()},enumerable:!0}),Object.defineProperties(s,{toString:{get:function(){var f=this;return function(){return f.href}}},href:{get:function(){return this._anchorElement.href.replace(/\?$/,"")},set:function(f){this._anchorElement.href=f,this._updateSearchParams()},enumerable:!0},pathname:{get:function(){return this._anchorElement.pathname.replace(/(^\/?)/,"/")},set:function(f){this._anchorElement.pathname=f},enumerable:!0},origin:{get:function(){var f={"http:":80,"https:":443,"ftp:":21}[this._anchorElement.protocol],c=this._anchorElement.port!=f&&this._anchorElement.port!=="";return this._anchorElement.protocol+"//"+this._anchorElement.hostname+(c?":"+this._anchorElement.port:"")},enumerable:!0},password:{get:function(){return""},set:function(f){},enumerable:!0},username:{get:function(){return""},set:function(f){},enumerable:!0}}),i.createObjectURL=function(f){return o.createObjectURL.apply(o,arguments)},i.revokeObjectURL=function(f){return o.revokeObjectURL.apply(o,arguments)},e.URL=i};if(t()||r(),e.location!==void 0&&!("origin"in e.location)){var n=function(){return e.location.protocol+"//"+e.location.hostname+(e.location.port?":"+e.location.port:"")};try{Object.defineProperty(e.location,"origin",{get:n,enumerable:!0})}catch(o){setInterval(function(){e.location.origin=n()},100)}}})(typeof global!="undefined"?global:typeof window!="undefined"?window:typeof self!="undefined"?self:Er)});var qr=Pt((Mt,Nr)=>{/*! - * clipboard.js v2.0.11 - * https://clipboardjs.com/ - * - * Licensed MIT © Zeno Rocha - */(function(t,r){typeof Mt=="object"&&typeof Nr=="object"?Nr.exports=r():typeof define=="function"&&define.amd?define([],r):typeof Mt=="object"?Mt.ClipboardJS=r():t.ClipboardJS=r()})(Mt,function(){return function(){var e={686:function(n,o,i){"use strict";i.d(o,{default:function(){return Ai}});var s=i(279),a=i.n(s),f=i(370),c=i.n(f),u=i(817),p=i.n(u);function m(j){try{return document.execCommand(j)}catch(T){return!1}}var d=function(T){var E=p()(T);return m("cut"),E},h=d;function v(j){var T=document.documentElement.getAttribute("dir")==="rtl",E=document.createElement("textarea");E.style.fontSize="12pt",E.style.border="0",E.style.padding="0",E.style.margin="0",E.style.position="absolute",E.style[T?"right":"left"]="-9999px";var H=window.pageYOffset||document.documentElement.scrollTop;return E.style.top="".concat(H,"px"),E.setAttribute("readonly",""),E.value=j,E}var Y=function(T,E){var H=v(T);E.container.appendChild(H);var I=p()(H);return m("copy"),H.remove(),I},B=function(T){var E=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body},H="";return typeof T=="string"?H=Y(T,E):T instanceof HTMLInputElement&&!["text","search","url","tel","password"].includes(T==null?void 0:T.type)?H=Y(T.value,E):(H=p()(T),m("copy")),H},N=B;function O(j){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?O=function(E){return typeof E}:O=function(E){return E&&typeof Symbol=="function"&&E.constructor===Symbol&&E!==Symbol.prototype?"symbol":typeof E},O(j)}var Qe=function(){var T=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},E=T.action,H=E===void 0?"copy":E,I=T.container,q=T.target,Me=T.text;if(H!=="copy"&&H!=="cut")throw new Error('Invalid "action" value, use either "copy" or "cut"');if(q!==void 0)if(q&&O(q)==="object"&&q.nodeType===1){if(H==="copy"&&q.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if(H==="cut"&&(q.hasAttribute("readonly")||q.hasAttribute("disabled")))throw new Error(`Invalid "target" attribute. You can't cut text from elements with "readonly" or "disabled" attributes`)}else throw new Error('Invalid "target" value, use a valid Element');if(Me)return N(Me,{container:I});if(q)return H==="cut"?h(q):N(q,{container:I})},De=Qe;function $e(j){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?$e=function(E){return typeof E}:$e=function(E){return E&&typeof Symbol=="function"&&E.constructor===Symbol&&E!==Symbol.prototype?"symbol":typeof E},$e(j)}function Ei(j,T){if(!(j instanceof T))throw new TypeError("Cannot call a class as a function")}function tn(j,T){for(var E=0;E0&&arguments[0]!==void 0?arguments[0]:{};this.action=typeof I.action=="function"?I.action:this.defaultAction,this.target=typeof I.target=="function"?I.target:this.defaultTarget,this.text=typeof I.text=="function"?I.text:this.defaultText,this.container=$e(I.container)==="object"?I.container:document.body}},{key:"listenClick",value:function(I){var q=this;this.listener=c()(I,"click",function(Me){return q.onClick(Me)})}},{key:"onClick",value:function(I){var q=I.delegateTarget||I.currentTarget,Me=this.action(q)||"copy",kt=De({action:Me,container:this.container,target:this.target(q),text:this.text(q)});this.emit(kt?"success":"error",{action:Me,text:kt,trigger:q,clearSelection:function(){q&&q.focus(),window.getSelection().removeAllRanges()}})}},{key:"defaultAction",value:function(I){return vr("action",I)}},{key:"defaultTarget",value:function(I){var q=vr("target",I);if(q)return document.querySelector(q)}},{key:"defaultText",value:function(I){return vr("text",I)}},{key:"destroy",value:function(){this.listener.destroy()}}],[{key:"copy",value:function(I){var q=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body};return N(I,q)}},{key:"cut",value:function(I){return h(I)}},{key:"isSupported",value:function(){var I=arguments.length>0&&arguments[0]!==void 0?arguments[0]:["copy","cut"],q=typeof I=="string"?[I]:I,Me=!!document.queryCommandSupported;return q.forEach(function(kt){Me=Me&&!!document.queryCommandSupported(kt)}),Me}}]),E}(a()),Ai=Li},828:function(n){var o=9;if(typeof Element!="undefined"&&!Element.prototype.matches){var i=Element.prototype;i.matches=i.matchesSelector||i.mozMatchesSelector||i.msMatchesSelector||i.oMatchesSelector||i.webkitMatchesSelector}function s(a,f){for(;a&&a.nodeType!==o;){if(typeof a.matches=="function"&&a.matches(f))return a;a=a.parentNode}}n.exports=s},438:function(n,o,i){var s=i(828);function a(u,p,m,d,h){var v=c.apply(this,arguments);return u.addEventListener(m,v,h),{destroy:function(){u.removeEventListener(m,v,h)}}}function f(u,p,m,d,h){return typeof u.addEventListener=="function"?a.apply(null,arguments):typeof m=="function"?a.bind(null,document).apply(null,arguments):(typeof u=="string"&&(u=document.querySelectorAll(u)),Array.prototype.map.call(u,function(v){return a(v,p,m,d,h)}))}function c(u,p,m,d){return function(h){h.delegateTarget=s(h.target,p),h.delegateTarget&&d.call(u,h)}}n.exports=f},879:function(n,o){o.node=function(i){return i!==void 0&&i instanceof HTMLElement&&i.nodeType===1},o.nodeList=function(i){var s=Object.prototype.toString.call(i);return i!==void 0&&(s==="[object NodeList]"||s==="[object HTMLCollection]")&&"length"in i&&(i.length===0||o.node(i[0]))},o.string=function(i){return typeof i=="string"||i instanceof String},o.fn=function(i){var s=Object.prototype.toString.call(i);return s==="[object Function]"}},370:function(n,o,i){var s=i(879),a=i(438);function f(m,d,h){if(!m&&!d&&!h)throw new Error("Missing required arguments");if(!s.string(d))throw new TypeError("Second argument must be a String");if(!s.fn(h))throw new TypeError("Third argument must be a Function");if(s.node(m))return c(m,d,h);if(s.nodeList(m))return u(m,d,h);if(s.string(m))return p(m,d,h);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList")}function c(m,d,h){return m.addEventListener(d,h),{destroy:function(){m.removeEventListener(d,h)}}}function u(m,d,h){return Array.prototype.forEach.call(m,function(v){v.addEventListener(d,h)}),{destroy:function(){Array.prototype.forEach.call(m,function(v){v.removeEventListener(d,h)})}}}function p(m,d,h){return a(document.body,m,d,h)}n.exports=f},817:function(n){function o(i){var s;if(i.nodeName==="SELECT")i.focus(),s=i.value;else if(i.nodeName==="INPUT"||i.nodeName==="TEXTAREA"){var a=i.hasAttribute("readonly");a||i.setAttribute("readonly",""),i.select(),i.setSelectionRange(0,i.value.length),a||i.removeAttribute("readonly"),s=i.value}else{i.hasAttribute("contenteditable")&&i.focus();var f=window.getSelection(),c=document.createRange();c.selectNodeContents(i),f.removeAllRanges(),f.addRange(c),s=f.toString()}return s}n.exports=o},279:function(n){function o(){}o.prototype={on:function(i,s,a){var f=this.e||(this.e={});return(f[i]||(f[i]=[])).push({fn:s,ctx:a}),this},once:function(i,s,a){var f=this;function c(){f.off(i,c),s.apply(a,arguments)}return c._=s,this.on(i,c,a)},emit:function(i){var s=[].slice.call(arguments,1),a=((this.e||(this.e={}))[i]||[]).slice(),f=0,c=a.length;for(f;f{"use strict";/*! - * escape-html - * Copyright(c) 2012-2013 TJ Holowaychuk - * Copyright(c) 2015 Andreas Lubbe - * Copyright(c) 2015 Tiancheng "Timothy" Gu - * MIT Licensed - */var rs=/["'&<>]/;Yo.exports=ns;function ns(e){var t=""+e,r=rs.exec(t);if(!r)return t;var n,o="",i=0,s=0;for(i=r.index;i0&&i[i.length-1])&&(c[0]===6||c[0]===2)){r=0;continue}if(c[0]===3&&(!i||c[1]>i[0]&&c[1]=e.length&&(e=void 0),{value:e&&e[n++],done:!e}}};throw new TypeError(t?"Object is not iterable.":"Symbol.iterator is not defined.")}function W(e,t){var r=typeof Symbol=="function"&&e[Symbol.iterator];if(!r)return e;var n=r.call(e),o,i=[],s;try{for(;(t===void 0||t-- >0)&&!(o=n.next()).done;)i.push(o.value)}catch(a){s={error:a}}finally{try{o&&!o.done&&(r=n.return)&&r.call(n)}finally{if(s)throw s.error}}return i}function D(e,t,r){if(r||arguments.length===2)for(var n=0,o=t.length,i;n1||a(m,d)})})}function a(m,d){try{f(n[m](d))}catch(h){p(i[0][3],h)}}function f(m){m.value instanceof et?Promise.resolve(m.value.v).then(c,u):p(i[0][2],m)}function c(m){a("next",m)}function u(m){a("throw",m)}function p(m,d){m(d),i.shift(),i.length&&a(i[0][0],i[0][1])}}function pn(e){if(!Symbol.asyncIterator)throw new TypeError("Symbol.asyncIterator is not defined.");var t=e[Symbol.asyncIterator],r;return t?t.call(e):(e=typeof Ee=="function"?Ee(e):e[Symbol.iterator](),r={},n("next"),n("throw"),n("return"),r[Symbol.asyncIterator]=function(){return this},r);function n(i){r[i]=e[i]&&function(s){return new Promise(function(a,f){s=e[i](s),o(a,f,s.done,s.value)})}}function o(i,s,a,f){Promise.resolve(f).then(function(c){i({value:c,done:a})},s)}}function C(e){return typeof e=="function"}function at(e){var t=function(n){Error.call(n),n.stack=new Error().stack},r=e(t);return r.prototype=Object.create(Error.prototype),r.prototype.constructor=r,r}var It=at(function(e){return function(r){e(this),this.message=r?r.length+` errors occurred during unsubscription: -`+r.map(function(n,o){return o+1+") "+n.toString()}).join(` - `):"",this.name="UnsubscriptionError",this.errors=r}});function Ve(e,t){if(e){var r=e.indexOf(t);0<=r&&e.splice(r,1)}}var Ie=function(){function e(t){this.initialTeardown=t,this.closed=!1,this._parentage=null,this._finalizers=null}return e.prototype.unsubscribe=function(){var t,r,n,o,i;if(!this.closed){this.closed=!0;var s=this._parentage;if(s)if(this._parentage=null,Array.isArray(s))try{for(var a=Ee(s),f=a.next();!f.done;f=a.next()){var c=f.value;c.remove(this)}}catch(v){t={error:v}}finally{try{f&&!f.done&&(r=a.return)&&r.call(a)}finally{if(t)throw t.error}}else s.remove(this);var u=this.initialTeardown;if(C(u))try{u()}catch(v){i=v instanceof It?v.errors:[v]}var p=this._finalizers;if(p){this._finalizers=null;try{for(var m=Ee(p),d=m.next();!d.done;d=m.next()){var h=d.value;try{ln(h)}catch(v){i=i!=null?i:[],v instanceof It?i=D(D([],W(i)),W(v.errors)):i.push(v)}}}catch(v){n={error:v}}finally{try{d&&!d.done&&(o=m.return)&&o.call(m)}finally{if(n)throw n.error}}}if(i)throw new It(i)}},e.prototype.add=function(t){var r;if(t&&t!==this)if(this.closed)ln(t);else{if(t instanceof e){if(t.closed||t._hasParent(this))return;t._addParent(this)}(this._finalizers=(r=this._finalizers)!==null&&r!==void 0?r:[]).push(t)}},e.prototype._hasParent=function(t){var r=this._parentage;return r===t||Array.isArray(r)&&r.includes(t)},e.prototype._addParent=function(t){var r=this._parentage;this._parentage=Array.isArray(r)?(r.push(t),r):r?[r,t]:t},e.prototype._removeParent=function(t){var r=this._parentage;r===t?this._parentage=null:Array.isArray(r)&&Ve(r,t)},e.prototype.remove=function(t){var r=this._finalizers;r&&Ve(r,t),t instanceof e&&t._removeParent(this)},e.EMPTY=function(){var t=new e;return t.closed=!0,t}(),e}();var Sr=Ie.EMPTY;function jt(e){return e instanceof Ie||e&&"closed"in e&&C(e.remove)&&C(e.add)&&C(e.unsubscribe)}function ln(e){C(e)?e():e.unsubscribe()}var Le={onUnhandledError:null,onStoppedNotification:null,Promise:void 0,useDeprecatedSynchronousErrorHandling:!1,useDeprecatedNextContext:!1};var st={setTimeout:function(e,t){for(var r=[],n=2;n0},enumerable:!1,configurable:!0}),t.prototype._trySubscribe=function(r){return this._throwIfClosed(),e.prototype._trySubscribe.call(this,r)},t.prototype._subscribe=function(r){return this._throwIfClosed(),this._checkFinalizedStatuses(r),this._innerSubscribe(r)},t.prototype._innerSubscribe=function(r){var n=this,o=this,i=o.hasError,s=o.isStopped,a=o.observers;return i||s?Sr:(this.currentObservers=null,a.push(r),new Ie(function(){n.currentObservers=null,Ve(a,r)}))},t.prototype._checkFinalizedStatuses=function(r){var n=this,o=n.hasError,i=n.thrownError,s=n.isStopped;o?r.error(i):s&&r.complete()},t.prototype.asObservable=function(){var r=new F;return r.source=this,r},t.create=function(r,n){return new xn(r,n)},t}(F);var xn=function(e){ie(t,e);function t(r,n){var o=e.call(this)||this;return o.destination=r,o.source=n,o}return t.prototype.next=function(r){var n,o;(o=(n=this.destination)===null||n===void 0?void 0:n.next)===null||o===void 0||o.call(n,r)},t.prototype.error=function(r){var n,o;(o=(n=this.destination)===null||n===void 0?void 0:n.error)===null||o===void 0||o.call(n,r)},t.prototype.complete=function(){var r,n;(n=(r=this.destination)===null||r===void 0?void 0:r.complete)===null||n===void 0||n.call(r)},t.prototype._subscribe=function(r){var n,o;return(o=(n=this.source)===null||n===void 0?void 0:n.subscribe(r))!==null&&o!==void 0?o:Sr},t}(x);var Et={now:function(){return(Et.delegate||Date).now()},delegate:void 0};var wt=function(e){ie(t,e);function t(r,n,o){r===void 0&&(r=1/0),n===void 0&&(n=1/0),o===void 0&&(o=Et);var i=e.call(this)||this;return i._bufferSize=r,i._windowTime=n,i._timestampProvider=o,i._buffer=[],i._infiniteTimeWindow=!0,i._infiniteTimeWindow=n===1/0,i._bufferSize=Math.max(1,r),i._windowTime=Math.max(1,n),i}return t.prototype.next=function(r){var n=this,o=n.isStopped,i=n._buffer,s=n._infiniteTimeWindow,a=n._timestampProvider,f=n._windowTime;o||(i.push(r),!s&&i.push(a.now()+f)),this._trimBuffer(),e.prototype.next.call(this,r)},t.prototype._subscribe=function(r){this._throwIfClosed(),this._trimBuffer();for(var n=this._innerSubscribe(r),o=this,i=o._infiniteTimeWindow,s=o._buffer,a=s.slice(),f=0;f0?e.prototype.requestAsyncId.call(this,r,n,o):(r.actions.push(this),r._scheduled||(r._scheduled=ut.requestAnimationFrame(function(){return r.flush(void 0)})))},t.prototype.recycleAsyncId=function(r,n,o){var i;if(o===void 0&&(o=0),o!=null?o>0:this.delay>0)return e.prototype.recycleAsyncId.call(this,r,n,o);var s=r.actions;n!=null&&((i=s[s.length-1])===null||i===void 0?void 0:i.id)!==n&&(ut.cancelAnimationFrame(n),r._scheduled=void 0)},t}(Wt);var Sn=function(e){ie(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t.prototype.flush=function(r){this._active=!0;var n=this._scheduled;this._scheduled=void 0;var o=this.actions,i;r=r||o.shift();do if(i=r.execute(r.state,r.delay))break;while((r=o[0])&&r.id===n&&o.shift());if(this._active=!1,i){for(;(r=o[0])&&r.id===n&&o.shift();)r.unsubscribe();throw i}},t}(Dt);var Oe=new Sn(wn);var _=new F(function(e){return e.complete()});function Vt(e){return e&&C(e.schedule)}function Cr(e){return e[e.length-1]}function Ye(e){return C(Cr(e))?e.pop():void 0}function Te(e){return Vt(Cr(e))?e.pop():void 0}function zt(e,t){return typeof Cr(e)=="number"?e.pop():t}var pt=function(e){return e&&typeof e.length=="number"&&typeof e!="function"};function Nt(e){return C(e==null?void 0:e.then)}function qt(e){return C(e[ft])}function Kt(e){return Symbol.asyncIterator&&C(e==null?void 0:e[Symbol.asyncIterator])}function Qt(e){return new TypeError("You provided "+(e!==null&&typeof e=="object"?"an invalid object":"'"+e+"'")+" where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.")}function zi(){return typeof Symbol!="function"||!Symbol.iterator?"@@iterator":Symbol.iterator}var Yt=zi();function Gt(e){return C(e==null?void 0:e[Yt])}function Bt(e){return un(this,arguments,function(){var r,n,o,i;return $t(this,function(s){switch(s.label){case 0:r=e.getReader(),s.label=1;case 1:s.trys.push([1,,9,10]),s.label=2;case 2:return[4,et(r.read())];case 3:return n=s.sent(),o=n.value,i=n.done,i?[4,et(void 0)]:[3,5];case 4:return[2,s.sent()];case 5:return[4,et(o)];case 6:return[4,s.sent()];case 7:return s.sent(),[3,2];case 8:return[3,10];case 9:return r.releaseLock(),[7];case 10:return[2]}})})}function Jt(e){return C(e==null?void 0:e.getReader)}function U(e){if(e instanceof F)return e;if(e!=null){if(qt(e))return Ni(e);if(pt(e))return qi(e);if(Nt(e))return Ki(e);if(Kt(e))return On(e);if(Gt(e))return Qi(e);if(Jt(e))return Yi(e)}throw Qt(e)}function Ni(e){return new F(function(t){var r=e[ft]();if(C(r.subscribe))return r.subscribe(t);throw new TypeError("Provided object does not correctly implement Symbol.observable")})}function qi(e){return new F(function(t){for(var r=0;r=2;return function(n){return n.pipe(e?A(function(o,i){return e(o,i,n)}):de,ge(1),r?He(t):Dn(function(){return new Zt}))}}function Vn(){for(var e=[],t=0;t=2,!0))}function pe(e){e===void 0&&(e={});var t=e.connector,r=t===void 0?function(){return new x}:t,n=e.resetOnError,o=n===void 0?!0:n,i=e.resetOnComplete,s=i===void 0?!0:i,a=e.resetOnRefCountZero,f=a===void 0?!0:a;return function(c){var u,p,m,d=0,h=!1,v=!1,Y=function(){p==null||p.unsubscribe(),p=void 0},B=function(){Y(),u=m=void 0,h=v=!1},N=function(){var O=u;B(),O==null||O.unsubscribe()};return y(function(O,Qe){d++,!v&&!h&&Y();var De=m=m!=null?m:r();Qe.add(function(){d--,d===0&&!v&&!h&&(p=$r(N,f))}),De.subscribe(Qe),!u&&d>0&&(u=new rt({next:function($e){return De.next($e)},error:function($e){v=!0,Y(),p=$r(B,o,$e),De.error($e)},complete:function(){h=!0,Y(),p=$r(B,s),De.complete()}}),U(O).subscribe(u))})(c)}}function $r(e,t){for(var r=[],n=2;ne.next(document)),e}function K(e,t=document){return Array.from(t.querySelectorAll(e))}function z(e,t=document){let r=ce(e,t);if(typeof r=="undefined")throw new ReferenceError(`Missing element: expected "${e}" to be present`);return r}function ce(e,t=document){return t.querySelector(e)||void 0}function _e(){return document.activeElement instanceof HTMLElement&&document.activeElement||void 0}function tr(e){return L(b(document.body,"focusin"),b(document.body,"focusout")).pipe(ke(1),l(()=>{let t=_e();return typeof t!="undefined"?e.contains(t):!1}),V(e===_e()),J())}function Xe(e){return{x:e.offsetLeft,y:e.offsetTop}}function Kn(e){return L(b(window,"load"),b(window,"resize")).pipe(Ce(0,Oe),l(()=>Xe(e)),V(Xe(e)))}function rr(e){return{x:e.scrollLeft,y:e.scrollTop}}function dt(e){return L(b(e,"scroll"),b(window,"resize")).pipe(Ce(0,Oe),l(()=>rr(e)),V(rr(e)))}var Yn=function(){if(typeof Map!="undefined")return Map;function e(t,r){var n=-1;return t.some(function(o,i){return o[0]===r?(n=i,!0):!1}),n}return function(){function t(){this.__entries__=[]}return Object.defineProperty(t.prototype,"size",{get:function(){return this.__entries__.length},enumerable:!0,configurable:!0}),t.prototype.get=function(r){var n=e(this.__entries__,r),o=this.__entries__[n];return o&&o[1]},t.prototype.set=function(r,n){var o=e(this.__entries__,r);~o?this.__entries__[o][1]=n:this.__entries__.push([r,n])},t.prototype.delete=function(r){var n=this.__entries__,o=e(n,r);~o&&n.splice(o,1)},t.prototype.has=function(r){return!!~e(this.__entries__,r)},t.prototype.clear=function(){this.__entries__.splice(0)},t.prototype.forEach=function(r,n){n===void 0&&(n=null);for(var o=0,i=this.__entries__;o0},e.prototype.connect_=function(){!Wr||this.connected_||(document.addEventListener("transitionend",this.onTransitionEnd_),window.addEventListener("resize",this.refresh),va?(this.mutationsObserver_=new MutationObserver(this.refresh),this.mutationsObserver_.observe(document,{attributes:!0,childList:!0,characterData:!0,subtree:!0})):(document.addEventListener("DOMSubtreeModified",this.refresh),this.mutationEventsAdded_=!0),this.connected_=!0)},e.prototype.disconnect_=function(){!Wr||!this.connected_||(document.removeEventListener("transitionend",this.onTransitionEnd_),window.removeEventListener("resize",this.refresh),this.mutationsObserver_&&this.mutationsObserver_.disconnect(),this.mutationEventsAdded_&&document.removeEventListener("DOMSubtreeModified",this.refresh),this.mutationsObserver_=null,this.mutationEventsAdded_=!1,this.connected_=!1)},e.prototype.onTransitionEnd_=function(t){var r=t.propertyName,n=r===void 0?"":r,o=ba.some(function(i){return!!~n.indexOf(i)});o&&this.refresh()},e.getInstance=function(){return this.instance_||(this.instance_=new e),this.instance_},e.instance_=null,e}(),Gn=function(e,t){for(var r=0,n=Object.keys(t);r0},e}(),Jn=typeof WeakMap!="undefined"?new WeakMap:new Yn,Xn=function(){function e(t){if(!(this instanceof e))throw new TypeError("Cannot call a class as a function.");if(!arguments.length)throw new TypeError("1 argument required, but only 0 present.");var r=ga.getInstance(),n=new La(t,r,this);Jn.set(this,n)}return e}();["observe","unobserve","disconnect"].forEach(function(e){Xn.prototype[e]=function(){var t;return(t=Jn.get(this))[e].apply(t,arguments)}});var Aa=function(){return typeof nr.ResizeObserver!="undefined"?nr.ResizeObserver:Xn}(),Zn=Aa;var eo=new x,Ca=$(()=>k(new Zn(e=>{for(let t of e)eo.next(t)}))).pipe(g(e=>L(ze,k(e)).pipe(R(()=>e.disconnect()))),X(1));function he(e){return{width:e.offsetWidth,height:e.offsetHeight}}function ye(e){return Ca.pipe(S(t=>t.observe(e)),g(t=>eo.pipe(A(({target:r})=>r===e),R(()=>t.unobserve(e)),l(()=>he(e)))),V(he(e)))}function bt(e){return{width:e.scrollWidth,height:e.scrollHeight}}function ar(e){let t=e.parentElement;for(;t&&(e.scrollWidth<=t.scrollWidth&&e.scrollHeight<=t.scrollHeight);)t=(e=t).parentElement;return t?e:void 0}var to=new x,Ra=$(()=>k(new IntersectionObserver(e=>{for(let t of e)to.next(t)},{threshold:0}))).pipe(g(e=>L(ze,k(e)).pipe(R(()=>e.disconnect()))),X(1));function sr(e){return Ra.pipe(S(t=>t.observe(e)),g(t=>to.pipe(A(({target:r})=>r===e),R(()=>t.unobserve(e)),l(({isIntersecting:r})=>r))))}function ro(e,t=16){return dt(e).pipe(l(({y:r})=>{let n=he(e),o=bt(e);return r>=o.height-n.height-t}),J())}var cr={drawer:z("[data-md-toggle=drawer]"),search:z("[data-md-toggle=search]")};function no(e){return cr[e].checked}function Ke(e,t){cr[e].checked!==t&&cr[e].click()}function Ue(e){let t=cr[e];return b(t,"change").pipe(l(()=>t.checked),V(t.checked))}function ka(e,t){switch(e.constructor){case HTMLInputElement:return e.type==="radio"?/^Arrow/.test(t):!0;case HTMLSelectElement:case HTMLTextAreaElement:return!0;default:return e.isContentEditable}}function Ha(){return L(b(window,"compositionstart").pipe(l(()=>!0)),b(window,"compositionend").pipe(l(()=>!1))).pipe(V(!1))}function oo(){let e=b(window,"keydown").pipe(A(t=>!(t.metaKey||t.ctrlKey)),l(t=>({mode:no("search")?"search":"global",type:t.key,claim(){t.preventDefault(),t.stopPropagation()}})),A(({mode:t,type:r})=>{if(t==="global"){let n=_e();if(typeof n!="undefined")return!ka(n,r)}return!0}),pe());return Ha().pipe(g(t=>t?_:e))}function le(){return new URL(location.href)}function ot(e){location.href=e.href}function io(){return new x}function ao(e,t){if(typeof t=="string"||typeof t=="number")e.innerHTML+=t.toString();else if(t instanceof Node)e.appendChild(t);else if(Array.isArray(t))for(let r of t)ao(e,r)}function M(e,t,...r){let n=document.createElement(e);if(t)for(let o of Object.keys(t))typeof t[o]!="undefined"&&(typeof t[o]!="boolean"?n.setAttribute(o,t[o]):n.setAttribute(o,""));for(let o of r)ao(n,o);return n}function fr(e){if(e>999){let t=+((e-950)%1e3>99);return`${((e+1e-6)/1e3).toFixed(t)}k`}else return e.toString()}function so(){return location.hash.substring(1)}function Dr(e){let t=M("a",{href:e});t.addEventListener("click",r=>r.stopPropagation()),t.click()}function Pa(e){return L(b(window,"hashchange"),e).pipe(l(so),V(so()),A(t=>t.length>0),X(1))}function co(e){return Pa(e).pipe(l(t=>ce(`[id="${t}"]`)),A(t=>typeof t!="undefined"))}function Vr(e){let t=matchMedia(e);return er(r=>t.addListener(()=>r(t.matches))).pipe(V(t.matches))}function fo(){let e=matchMedia("print");return L(b(window,"beforeprint").pipe(l(()=>!0)),b(window,"afterprint").pipe(l(()=>!1))).pipe(V(e.matches))}function zr(e,t){return e.pipe(g(r=>r?t():_))}function ur(e,t={credentials:"same-origin"}){return ue(fetch(`${e}`,t)).pipe(fe(()=>_),g(r=>r.status!==200?Ot(()=>new Error(r.statusText)):k(r)))}function We(e,t){return ur(e,t).pipe(g(r=>r.json()),X(1))}function uo(e,t){let r=new DOMParser;return ur(e,t).pipe(g(n=>n.text()),l(n=>r.parseFromString(n,"text/xml")),X(1))}function pr(e){let t=M("script",{src:e});return $(()=>(document.head.appendChild(t),L(b(t,"load"),b(t,"error").pipe(g(()=>Ot(()=>new ReferenceError(`Invalid script: ${e}`))))).pipe(l(()=>{}),R(()=>document.head.removeChild(t)),ge(1))))}function po(){return{x:Math.max(0,scrollX),y:Math.max(0,scrollY)}}function lo(){return L(b(window,"scroll",{passive:!0}),b(window,"resize",{passive:!0})).pipe(l(po),V(po()))}function mo(){return{width:innerWidth,height:innerHeight}}function ho(){return b(window,"resize",{passive:!0}).pipe(l(mo),V(mo()))}function bo(){return G([lo(),ho()]).pipe(l(([e,t])=>({offset:e,size:t})),X(1))}function lr(e,{viewport$:t,header$:r}){let n=t.pipe(ee("size")),o=G([n,r]).pipe(l(()=>Xe(e)));return G([r,t,o]).pipe(l(([{height:i},{offset:s,size:a},{x:f,y:c}])=>({offset:{x:s.x-f,y:s.y-c+i},size:a})))}(()=>{function e(n,o){parent.postMessage(n,o||"*")}function t(...n){return n.reduce((o,i)=>o.then(()=>new Promise(s=>{let a=document.createElement("script");a.src=i,a.onload=s,document.body.appendChild(a)})),Promise.resolve())}var r=class extends EventTarget{constructor(n){super(),this.url=n,this.m=i=>{i.source===this.w&&(this.dispatchEvent(new MessageEvent("message",{data:i.data})),this.onmessage&&this.onmessage(i))},this.e=(i,s,a,f,c)=>{if(s===`${this.url}`){let u=new ErrorEvent("error",{message:i,filename:s,lineno:a,colno:f,error:c});this.dispatchEvent(u),this.onerror&&this.onerror(u)}};let o=document.createElement("iframe");o.hidden=!0,document.body.appendChild(this.iframe=o),this.w.document.open(),this.w.document.write(` - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

Medchem

-

Package for applying common medchem filters to a dataset of molecules.

-

Summary

-

This package contains various implementation of medchem rules collected from various sources that may be applied as filters on generated or screened molecules. It centralizes all common filters used at Valence Discovery.

-

Although the list is as exhaustive as possible, filtering rules mainly depends on the drug discovery programs.

-

It should be noted that systematically applying all filters is to be avoided. For example, "PAINS C" filters are usually not very relevant, another example is the filtering are very strict and could flag important substructure for a project (example some ZBGs).

-

Installation

-
micromamba install -c conda-forge medchem
-
-

Available Filters

-

The following filters are available:

-

Eli Lilly Medchem Rules

-

These are python binding of the implementation of Eli Lilly Medchem Rules published under "Rules for Identifying Potentially Reactive or Promiscuous Compounds" by Robert F. Bruns and Ian W. Watson, J. Med. Chem. 2012, 55, 9763--9772 as ACS Author choice, i.e. open access at doi 10.1021/jm301008n.

-

These rules are used in medchem.filter.lilly_demerit_filter function and are the main offering of this package.

-

NIBR filters

-

Rules used by Novartis to build their new screening deck. The rules are published under "Evolution of Novartis' small molecule screening deck design" by Schuffenhauer, A. et al. J. Med. Chem. (2020), https://dx.doi.org/10.1021/acs.jmedchem.0c01332.

-

These rules are used in lead filtering as medchem.filter.lead.screening_filter

-

Bredt filters

-

These are filters based on the Bredt's rules for unstable chemistry.There are used in lead filtering as medchem.filter.lead.bredt_filter.

-

Alerts filters

-

These are alerts rules from the ChEMBL database curation scheme and public litterature on promiscuous compounds on commons assays. The rule set are:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
name# alertssource
Glaxo55ChEMBL
Dundee105ChEMBL
BMS180ChEMBL
PAINS481ChEMBL
SureChEMBL166ChEMBL
MLSMR116ChEMBL
Inpharmatica91ChEMBL
LINT57ChEMBL
Alarm-NMR75Litterature
AlphaScreen-Hitters6Litterature
GST-Hitters34Litterature
HIS-Hitters19Litterature
LuciferaseInhibitor3Litterature
DNABinder78Litterature
Chelator55Litterature
Frequent-Hitter15Litterature
Electrophilic119Litterature
Genotoxic-Carcinogenicity117Litterature
LD50-Oral20Litterature
Non-Genotoxic-Carcinogenicity22Litterature
Reactive-Unstable-Toxic335Litterature
Skin155Litterature
Toxicophore154Litterature
-

There are used in lead filtering through medchem.filter.lead.alert_filter

-

Generic filters

-

These are generic filters based on specific molecular property such as number of atoms, size of macrocycles, etc. They are available at medchem.filter.generic

- - - - - - -
-
- - -
- -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/1.4.0/objects.inv b/1.4.0/objects.inv deleted file mode 100644 index 7f95387..0000000 --- a/1.4.0/objects.inv +++ /dev/null @@ -1,10 +0,0 @@ -# Sphinx inventory version 2 -# Project: medchem -# Version: 0.0.0 -# The remainder of this file is compressed using zlib. -xڽK6 ײ)@oiM#1F|$Kҏ.$Y9Y4pX5.nU;Ok^K? -2C(~=q.@)Қ"42pD8%}ЫaRV!uu|;pAayE)mFm~T}X9m2 - y[UåW28o;B=::ZOt=qе%Yͽj\\XZF`N G^gwgE@9|80yT 2]*s*t ~-T<}kUţ>&_TGk{|W=ooesVeꞨ0F@9l>e[^X7e:3@7\ ^(j;LҘ`D9РE*D= xJ*AjP -!v¦9&4XC4IR\&X^Gq!y$R7bb/IE9S},@S򁎌HS;2k mH% VIi Bp%.Y4!j]˼. iƢe]/5nR=@Rpuf9v_ 4z='h@YOlf>e?9C!s!mmtMy*ft>MȌW4!c"JL׋O.V;t`+H1-LcφZʮ6trΧAME9KW? z¤ AfYd2cd 3. 2 32rx94o=-x)x~ 3/r uΓ)&+7 fywR{[J06`(y!5oqoMEMI -lM<)Hi'?7дx7CvzZd{Jl:Dr`\4Fp婧}Feƕ sS|ri kNmP[.0Vrzlzpn]ݧ 4Gݧ1]adƩ]ČTְNwhvJ5:4 %u(њoы>U1~sfkb;sΜ@y5/f;Xsбﲚ=LW&9f -8=֒7U%2ckEJ"Ӂ| \ No newline at end of file diff --git a/1.4.0/search/search_index.json b/1.4.0/search/search_index.json deleted file mode 100644 index a3c86c5..0000000 --- a/1.4.0/search/search_index.json +++ /dev/null @@ -1 +0,0 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"index.html","title":"Medchem","text":"

Package for applying common medchem filters to a dataset of molecules.

"},{"location":"index.html#summary","title":"Summary","text":"

This package contains various implementation of medchem rules collected from various sources that may be applied as filters on generated or screened molecules. It centralizes all common filters used at Valence Discovery.

Although the list is as exhaustive as possible, filtering rules mainly depends on the drug discovery programs.

It should be noted that systematically applying all filters is to be avoided. For example, \"PAINS C\" filters are usually not very relevant, another example is the filtering are very strict and could flag important substructure for a project (example some ZBGs).

"},{"location":"index.html#installation","title":"Installation","text":"
micromamba install -c conda-forge medchem\n
"},{"location":"index.html#available-filters","title":"Available Filters","text":"

The following filters are available:

"},{"location":"index.html#eli-lilly-medchem-rules","title":"Eli Lilly Medchem Rules","text":"

These are python binding of the implementation of Eli Lilly Medchem Rules published under \"Rules for Identifying Potentially Reactive or Promiscuous Compounds\" by Robert F. Bruns and Ian W. Watson, J. Med. Chem. 2012, 55, 9763--9772 as ACS Author choice, i.e. open access at doi 10.1021/jm301008n.

These rules are used in medchem.filter.lilly_demerit_filter function and are the main offering of this package.

"},{"location":"index.html#nibr-filters","title":"NIBR filters","text":"

Rules used by Novartis to build their new screening deck. The rules are published under \"Evolution of Novartis' small molecule screening deck design\" by Schuffenhauer, A. et al. J. Med. Chem. (2020), https://dx.doi.org/10.1021/acs.jmedchem.0c01332.

These rules are used in lead filtering as medchem.filter.lead.screening_filter

"},{"location":"index.html#bredt-filters","title":"Bredt filters","text":"

These are filters based on the Bredt's rules for unstable chemistry.There are used in lead filtering as medchem.filter.lead.bredt_filter.

"},{"location":"index.html#alerts-filters","title":"Alerts filters","text":"

These are alerts rules from the ChEMBL database curation scheme and public litterature on promiscuous compounds on commons assays. The rule set are:

name # alerts source Glaxo 55 ChEMBL Dundee 105 ChEMBL BMS 180 ChEMBL PAINS 481 ChEMBL SureChEMBL 166 ChEMBL MLSMR 116 ChEMBL Inpharmatica 91 ChEMBL LINT 57 ChEMBL Alarm-NMR 75 Litterature AlphaScreen-Hitters 6 Litterature GST-Hitters 34 Litterature HIS-Hitters 19 Litterature LuciferaseInhibitor 3 Litterature DNABinder 78 Litterature Chelator 55 Litterature Frequent-Hitter 15 Litterature Electrophilic 119 Litterature Genotoxic-Carcinogenicity 117 Litterature LD50-Oral 20 Litterature Non-Genotoxic-Carcinogenicity 22 Litterature Reactive-Unstable-Toxic 335 Litterature Skin 155 Litterature Toxicophore 154 Litterature

There are used in lead filtering through medchem.filter.lead.alert_filter

"},{"location":"index.html#generic-filters","title":"Generic filters","text":"

These are generic filters based on specific molecular property such as number of atoms, size of macrocycles, etc. They are available at medchem.filter.generic

"},{"location":"api/medchem.alerts.html","title":"medchem.alerts","text":""},{"location":"api/medchem.alerts.html#medchem.alerts","title":"medchem.alerts","text":""},{"location":"api/medchem.alerts.html#medchem.alerts.AlertFilters","title":"AlertFilters","text":"

Filtering class for building a library based on a list of structural alerts

To list the available alerts, use the list_default_available_alerts method.

"},{"location":"api/medchem.alerts.html#medchem.alerts.AlertFilters.__call__","title":"__call__(mols, n_jobs=None, progress=False, include_all_alerts=False)","text":"

Run alert evaluation on this list of molecule and return the full dataframe

Parameters:

Name Type Description Default mols Iterable[Union[str, rdchem.Mol]]

input list of molecules

required n_jobs Optional[int]

number of jobs

None progress bool

whether to show progress or not

False include_all_alerts bool

whether to include all of the alerts that match as columns

False"},{"location":"api/medchem.alerts.html#medchem.alerts.AlertFilters.__init__","title":"__init__(alerts_set=None, alerts_db=None)","text":"

Filtering molecules based on chemical alerts

Parameters:

Name Type Description Default alerts_set Union[str, List[str]]

Alerts catalog to use. Default is BMS+Dundee+Glaxo

None alerts_db Optional[os.PathLike]

Alerts file to use. Default is internal

None"},{"location":"api/medchem.alerts.html#medchem.alerts.AlertFilters.evaluate","title":"evaluate(mol)","text":"

Evaluate structure alerts on a molecule

Parameters:

Name Type Description Default mol Union[str, rdchem.Mol]

input molecule

required

Returns:

Type Description

list of alerts matched

"},{"location":"api/medchem.alerts.html#medchem.alerts.AlertFilters.list_default_available_alerts","title":"list_default_available_alerts() cached staticmethod","text":"

Return a list of unique rule set names

"},{"location":"api/medchem.alerts.html#medchem.alerts.NovartisFilters","title":"NovartisFilters","text":"

Filtering class for building a screening deck following the novartis filtering process published in https://dx.doi.org/10.1021/acs.jmedchem.0c01332.

The output of the filter are explained below: - status: one of [\"Exclude\", \"Flag\", \"Annotations\", \"Ok\"] (ordered by quality). Generally, you can keep anything without the \"Exclude\" label, as long as you also apply a maximum severity score for compounds that collects too many flags. - covalent: number of potentially covalent motifs contained in the compound - severity: how severe are the issues with the molecules: - 0: compound has no flags, might have annotations; - 1-9: number of flags the compound raises; - >= 10: default exclusion criterion used in the paper - special_mol: whether the compound/parts of the compound belongs to a special class of molecules (e.g peptides, glycosides, fatty acid). In that case, you should review the rejection reasons.

"},{"location":"api/medchem.alerts.html#medchem.alerts.NovartisFilters.__call__","title":"__call__(mols, n_jobs=None, progress=False)","text":"

Run alert evaluation on this list of molecule and return the full dataframe

Parameters:

Name Type Description Default mols Iterable[Union[str, rdchem.Mol]]

input list of molecules

required n_jobs Optional[int]

number of jobs

None progress bool

whether to show progress or not

False"},{"location":"api/medchem.catalog.html","title":"medchem.catalog","text":""},{"location":"api/medchem.catalog.html#medchem.catalog","title":"medchem.catalog","text":""},{"location":"api/medchem.catalog.html#medchem.catalog.NamedCatalogs","title":"NamedCatalogs","text":"

Holder for substructure matching catalogs

"},{"location":"api/medchem.catalog.html#medchem.catalog.NamedCatalogs.alerts","title":"alerts(subset=None) staticmethod","text":"

Alerts filter catalogs commonly used in molecule filtering

Parameters:

Name Type Description Default subset Optional[Union[List[str], str]]

subset of providers to consider

None

Returns:

Name Type Description catalog FilterCatalog

filter catalog

"},{"location":"api/medchem.catalog.html#medchem.catalog.NamedCatalogs.bredt","title":"bredt() cached staticmethod","text":"

Bredt fitler rules Also see example of usage by surge's https://github.com/StructureGenerator/SURGE/blob/main/doc/surge1_0.pdf

"},{"location":"api/medchem.catalog.html#medchem.catalog.NamedCatalogs.chemical_groups","title":"chemical_groups(filters='medicinal') cached staticmethod","text":"

Chemical group filter catalogs

Parameters:

Name Type Description Default filters Union[str, List[str]]

list of tag to filter the catalog on.

'medicinal'"},{"location":"api/medchem.catalog.html#medchem.catalog.NamedCatalogs.nibr","title":"nibr() cached staticmethod","text":"

Catalog from NIBR

Warning

This includes all the compounds in the catalog, regardless of severity (FLAG, EXCLUDE, ANNOTATION) You likely don't want to use this for blind prioritization

"},{"location":"api/medchem.catalog.html#medchem.catalog.NamedCatalogs.tox","title":"tox(pains_a=True, pains_b=True, pains_c=False, brenk=True, nih=False, zinc=False) cached staticmethod","text":"

Common toxicity and interference catalog

Parameters:

Name Type Description Default pains_a bool

whether to include PAINS filters from assay A

True pains_b bool

whether to include PAINS filters from assay B

True pains_c bool

whether to include PAINS filters from assay C

False brenk bool

whether to include BRENK filters

True nih bool

whether to include NIH filters

False zinc bool

whether to include ZINC filters

False"},{"location":"api/medchem.catalog.html#medchem.catalog.NamedCatalogs.unstable_graph","title":"unstable_graph(max_severity=5) cached staticmethod","text":"

Unstable molecular graph to filter out especially for generative models

Parameters:

Name Type Description Default max_severity int

maximum severity to consider for graph rules to be acceptable

5"},{"location":"api/medchem.catalog.html#medchem.catalog.from_smarts","title":"from_smarts(smarts, labels=None, mincounts=None, maxcounts=None, entry_as_inds=False)","text":"

Load catalog from a list of smarts

Parameters:

Name Type Description Default smarts List[str]

list of input smarts to add to the catalog

required labels Optional[List[str]]

list of label for each smarts

None mincounts Optional[List[int]]

minimum count before a match is recognized

None maxcounts Optional[List[int]]

maximum count for a match to be valid

None entry_as_inds bool

whether to use index for entry id or the label

False

Returns:

Name Type Description catalog FilterCatalog

merged catalogs

"},{"location":"api/medchem.catalog.html#medchem.catalog.list_named_catalogs","title":"list_named_catalogs()","text":"

List all available named catalogs. This list will ignore all chemical groups For a list of chemical group to be queried using NamedCatalog.chemical_groups, use medchem.group.list_default_chemical_groups

"},{"location":"api/medchem.catalog.html#medchem.catalog.merge_catalogs","title":"merge_catalogs(*catalogs)","text":"

Merge several catalogs into a single one

Returns:

Name Type Description catalog FilterCatalog

merged catalog

"},{"location":"api/medchem.complexity.html","title":"medchem.rules","text":""},{"location":"api/medchem.complexity.html#medchem.complexity.complexity_filter","title":"medchem.complexity.complexity_filter","text":""},{"location":"api/medchem.complexity.html#medchem.complexity.complexity_filter.ComplexityFilter","title":"ComplexityFilter","text":"

Complexity filters derived from nonpher: https://github.com/lich-uct/nonpher/blob/master/nonpher/nonpher.py

To recover the original complexity score, use threshold_stats_file = \"zinc_12\". The threshold have been re-calculated using the original new zinc-15 and focusing only on commercially available compounds.

"},{"location":"api/medchem.complexity.html#medchem.complexity.complexity_filter.ComplexityFilter.__call__","title":"__call__(mol)","text":"

Check whether the input structure is too complex given this instance of the complexity filter Return False is the molecule is too complex, else True

Parameters:

Name Type Description Default mol dm.Mol

input molecule

required"},{"location":"api/medchem.complexity.html#medchem.complexity.complexity_filter.ComplexityFilter.__init__","title":"__init__(limit='99', complexity_metric='bertz', threshold_stats_file='zinc_15_available')","text":"

Default complexity limit is set on at least 1 exceeding metric on the 999th permille level

Parameters:

Name Type Description Default limit str

The complexity percentile outlier limit to be used (should be expressed as an integer)

'99' complexity_metric str

The complexity filter name to be used. Use ComplexityFilter.list_default_available_filters to list default filters. The following complexity metrics are supported by default * \"bertz\": bertz complexity index * \"sas\": synthetic accessibility score (zinc_15_available only) * \"qed\": qed score (zinc_15_available only) * \"clogp\": clogp for how greasy a molecule is compared to other in the same mw range (zinc_15_available only) * \"whitlock\": whitlock complexity index * \"barone\": barone complexity index * \"smcm\": synthetic and molecular complexity * \"twc\": total walk count complexity (zinc_15_available only)

'bertz' threshold_stats_file Optional[str]

The path to or type the threshold file to be used. The default available threshold stats files are * \"zinc_12\" * \"zinc_15_available\"

'zinc_15_available'"},{"location":"api/medchem.complexity.html#medchem.complexity.complexity_filter.ComplexityFilter.list_default_available_filters","title":"list_default_available_filters() classmethod","text":"

Return a list of unique filter names

"},{"location":"api/medchem.complexity.html#medchem.complexity.complexity_filter.ComplexityFilter.list_default_percentile","title":"list_default_percentile(threshold_stats_file=None) cached classmethod","text":"

Return the default percentile list for the threshold file

"},{"location":"api/medchem.complexity.html#medchem.complexity.complexity_filter.ComplexityFilter.load_threshold_stats_file","title":"load_threshold_stats_file(path=None) classmethod","text":"

Load threshold file to compute the percentille depending on the MW for each complexity_metric

Parameters:

Name Type Description Default path Optional[str]

path to the threshold file

None"},{"location":"api/medchem.complexity.html#medchem.complexity._complexity_calc","title":"medchem.complexity._complexity_calc","text":"

Complexity filters as implemented in nonpher https://github.com/lich-uct/nonpher/blob/master/nonpher/complex_lib.py

"},{"location":"api/medchem.complexity.html#medchem.complexity._complexity_calc.BaroneCT","title":"BaroneCT(mol, chiral=False)","text":"

Compute a Barone complexity measure for a molecule as described in:

R. Barone and M. Chanon, J. Chem. Inf. Comput. Sci., 2001, 41 (2), pp 269\u2013272 Qi Huang, Lin-LiLi, Sheng-Yong Yang, J. Mol. Graph. Model. 2010, 28 (8), pp 775\u2013787

Parameter values are hardcoded as in the articles. On zinc 15 commercially available dataset, the range of this score is [30, 4266] with a median of 538

Parameters:

Name Type Description Default mol dm.Mol

The input molecule.

required chiral bool

Whether to include chirality in the calculation.

False"},{"location":"api/medchem.complexity.html#medchem.complexity._complexity_calc.SMCM","title":"SMCM(mol)","text":"

Compute synthetic and molecular complexity as described in:

TK Allu, TI Oprea, J. Chem. Inf. Model. 2005, 45(5), pp. 1237-1243. https://sci-hub.ee/10.1021/ci0501387

On zinc 15 commercially available dataset, the range of this score is [1.93, 192.00] with a median of 42.23

Parameters:

Name Type Description Default mol dm.Mol

the input molecule

required"},{"location":"api/medchem.complexity.html#medchem.complexity._complexity_calc.TWC","title":"TWC(mol, log10=True)","text":"

Compute total walk count in a molecules as proxy for complexity. This score is described in: twc = 1/2 sum(k=1..n-1,sum(i=atoms,awc(k,i))) Gerta Rucker and Christoph Rucker, J. Chem. Inf. Comput. Sci. 1993, 33, 683-695

On zinc 15 commercially available dataset, the range of this score is [1.20, 39.08] with a median of 10.65

Parameters:

Name Type Description Default mol

the input molecule

required log10 bool

whether to return the log10 of the values

True"},{"location":"api/medchem.complexity.html#medchem.complexity._complexity_calc.WhitlockCT","title":"WhitlockCT(mol, ringval=4, unsatval=2, heteroval=1, chiralval=2)","text":"

A chemically intuitive measure for molecular complexity. This complexity measure has been described in : H. W. Whitlock, J. Org. Chem., 1998, 63, 7982-7989. Benzyls, fenyls, etc. are not treated at all.

On zinc 15 commercially available dataset, the range of this score is [0, 172] with a median of 25

Parameters:

Name Type Description Default mol

The input molecule.

required ringval float

The contribution of rings

4 unsatval float

The contribution of the unsaturated bond.

2 heteroval float

The contribution of the heteroatom.

1 chiralval float

The contribution of the chiral center.

2"},{"location":"api/medchem.demerits.html","title":"medchem.demerits","text":""},{"location":"api/medchem.demerits.html#medchem.demerits","title":"medchem.demerits","text":""},{"location":"api/medchem.demerits.html#medchem.demerits.batch_score","title":"batch_score(smiles_list, n_jobs=None, batch_size=5000, progress=False, **run_options)","text":"

Run scorer on input smile list in batch

Parameters:

Name Type Description Default smiles_list List

list of smiles

required n_jobs Optional[int]

Number of jobs to run in parallel.

None batch_size Optional[int]

Optional batch_size to run the the scoring in parallels.

5000 progress bool

Whether to show progress bar.

False run_options

Run options to pass to the underlining score function

{}

Returns:

Name Type Description out_df pd.DataFrame

Dataframe containing the smiles and computed properties: (rejected, demerit_score, reason, step)

"},{"location":"api/medchem.demerits.html#medchem.demerits.run_cmd","title":"run_cmd(cmd, shell=False)","text":"

Run command

"},{"location":"api/medchem.demerits.html#medchem.demerits.score","title":"score(smiles_list, mc_first_pass_options='', iwd_options='', stop_after_step=3, **run_options)","text":"

Run scorer on input smile list:

Parameters:

Name Type Description Default smiles_list List

list of smiles

required mc_first_pass_options Optional[str]

Initial options to pass to mc_first_pass

'' iwd_options Optional[str]

Initial options to pass to iwdemerit

'' stop_after_step Optional[int]

Where to stop in the pipeline. Don't change this if you don't know.

3 run_options

Additional option to run the pipeline

{}

Returns:

Name Type Description out_df pd.DataFrame

Dataframe containing the smiles and computed properties: (rejected, demerit_score, reason, step)

"},{"location":"api/medchem.filter.html","title":"medchem.filter","text":""},{"location":"api/medchem.filter.html#medchem.filter.lead","title":"medchem.filter.lead","text":""},{"location":"api/medchem.filter.html#medchem.filter.lead.alert_filter","title":"alert_filter(mols, alerts, alerts_db=None, n_jobs=1, rule_dict=None, return_idx=False)","text":"

Filter a dataset of molecules, based on common structural alerts and specific rules.

Parameters:

Name Type Description Default mols Iterable[Union[str, dm.Mol]]

List of molecules to filter

required alerts List[str]

List of alert collections to screen for. See AlertFilters.list_default_available_alerts()

required alerts_db Optional[os.PathLike]

Path to the alert file name. The internal default file (alerts.csv) will be used if not provided

None n_jobs Optional[int]

Number of cpu to use

1 rule_dict Dict

Dictionary with additional rules to apply during the filtering. For example, such dictionary for drug-like compounds would look like this:

rule_dict {\"MW\": [0, 500], \"LogP\": [-0.5, 5], \"HBD\": [0, 5], \"HBA\": [0, 10], \"TPSA\": [0, 150]}

None return_idx bool

Whether to return the filtered index

False

Returns:

Name Type Description filtered_mask

boolean array (or index array) where true means the molecule IS OK (not found in the alert catalog).

"},{"location":"api/medchem.filter.html#medchem.filter.lead.bredt_filter","title":"bredt_filter(mols, return_idx=False, n_jobs=None, progress=False, scheduler='threads', batch_size=100)","text":"

Filter a list of compounds according to Bredt's rules https://en.wikipedia.org/wiki/Bredt%27s_rule

Parameters:

Name Type Description Default mols Sequence[Union[str, dm.Mol]]

list of input molecules

required return_idx bool

whether to return index or a boolean mask

False n_jobs Optional[int]

number of parallel job to run. Sequential by default

None progress bool

whether to show progress bar

False scheduler str

joblib scheduler to use

'threads' batch_size int

batch size for parallel processing. Note that batch_size should be increased if the number of used CPUs gets very large.

100

Returns:

Name Type Description filtered_mask

boolean array (or index array) where true means the molecule is not toxic.

"},{"location":"api/medchem.filter.html#medchem.filter.lead.catalog_filter","title":"catalog_filter(mols, catalogs, return_idx=False, n_jobs=None, progress=False, scheduler='processes', batch_size=100)","text":"

Filter a list of compounds according to catalog of structures alerts and patterns

Parameters:

Name Type Description Default mols Sequence[Union[str, dm.Mol]]

list of input molecules

required catalogs List[Union[str, FilterCatalog]]

list of catalogs (name or FilterCatalog)

required return_idx bool

whether to return index or a boolean mask

False n_jobs Optional[int]

number of parallel job to run. Sequential by default

None progress bool

whether to show progress bar

False scheduler str

joblib scheduler to use

'processes' batch_size int

batch size for parallel processing. Note that batch_size should be increased if the number of used CPUs gets very large.

100

Returns:

Name Type Description filtered_mask

boolean array (or index array) where true means the molecule is not found in the catalog.

"},{"location":"api/medchem.filter.html#medchem.filter.lead.chemical_group_filter","title":"chemical_group_filter(mols, chemical_group, return_idx=False, n_jobs=None, progress=False, scheduler='threads')","text":"

Filter a list of compounds according to a chemical group instance.

Note

This function will return the list of molecules that DO NOT match the chemical group

Parameters:

Name Type Description Default mols Iterable[Union[str, dm.Mol]]

list of input molecules

required chemical_group ChemicalGroup

a chemical group instance with the required functional groups to use.

required return_idx bool

whether to return index or a boolean mask

False n_jobs Optional[int]

number of parallel job to run. Sequential by default

None progress bool

whether to show progress bar

False scheduler str

joblib scheduler to use

'threads'

Returns:

Name Type Description filtered_mask

boolean array (or index array) where true means the molecule DOES NOT MATCH the groups.

"},{"location":"api/medchem.filter.html#medchem.filter.lead.complexity_filter","title":"complexity_filter(mols, complexity_metric='bertz', threshold_stats_file='zinc_15_available', limit='99', return_idx=False, n_jobs=None, progress=False, scheduler='processes')","text":"

Filter a list of compounds according to a chemical group instance

Parameters:

Name Type Description Default mols Iterable[Union[str, dm.Mol]]

list of input molecules

required complexity_metric str

complexity metric to use Use ComplexityFilter.list_default_available_filters to list default filters. The following complexity metrics are supported by default * \"bertz\": bertz complexity index * \"sas\": synthetic accessibility score (zinc_15_available only) * \"qed\": qed score (zinc_15_available only) * \"clogp\": clogp for how greasy a molecule is compared to other in the same mw range (zinc_15_available only) * \"whitlock\": whitlock complexity index * \"barone\": barone complexity index * \"smcm\": synthetic and molecular complexity * \"twc\": total walk count complexity (zinc_15_available only)

'bertz' threshold_stats_file str

complexity threshold statistic origin to use

'zinc_15_available' limit str

complexity outlier percentile to use

'99' return_idx bool

whether to return index or a boolean mask

False n_jobs Optional[int]

number of parallel job to run. Sequential by default

None progress bool

whether to show progress bar

False scheduler str

joblib scheduler to use

'processes' Also see

medchem.complexity.ComplexityFilter

Returns:

Name Type Description filtered_mask

boolean array (or index array) where true means the molecule MATCH the rules.

"},{"location":"api/medchem.filter.html#medchem.filter.lead.lilly_demerit_filter","title":"lilly_demerit_filter(smiles, max_demerits=160, return_idx=False, n_jobs=None, progress=False, **kwargs)","text":"

Run Lilly demerit filtering on current list of molecules

Parameters:

Name Type Description Default smiles Iterable[str]

list of input molecules as smiles preferably

required max_demerits Optional[int]

Cutoff to reject molecules Defaults to 160.

160 return_idx bool

whether to return a mask or a list of valid indexes

False progress bool

whether to show progress bar

False kwargs

parameters specific to the demerits.score function

{}

Returns:

Name Type Description filtered_mask

boolean array (or index array) where true means the molecule is ok.

"},{"location":"api/medchem.filter.html#medchem.filter.lead.molecular_graph_filter","title":"molecular_graph_filter(mols, max_severity=5, return_idx=False, n_jobs=None, progress=False, scheduler='threads')","text":"

Filter a list of compounds according to unstable molecular graph filter list.

This list was obtained from observation around The disallowed graphs are:

  • K3,3 or K2,4 structure
  • Cone of P4 or K4 with 3-ear
  • Node in more than one ring of length 3 or 4

Parameters:

Name Type Description Default mols Iterable[Union[str, dm.Mol]]

list of input molecules

required max_severity int

maximum acceptable severity (1-10). Default is <5

5 return_idx bool

whether to return index or a boolean mask

False n_jobs Optional[int]

number of parallel job to run. Sequential by default

None progress bool

whether to show progress bar

False scheduler str

joblib scheduler to use

'threads'

Returns:

Name Type Description filtered_mask

boolean array (or index array) where true means the molecule is not toxic.

"},{"location":"api/medchem.filter.html#medchem.filter.lead.protecting_groups_filter","title":"protecting_groups_filter(mols, return_idx=False, protecting_groups=['fmoc', 'tert-butoxymethyl', 'tert-butyl carbamate', 'tert-butyloxycarbonyl'], n_jobs=None, progress=False, scheduler='threads')","text":"

Filter a list of compounds according to match to known protecting groups. Note that is a syntaxic sugar for calling chemical_group_filter with the protecting groups subset

Parameters:

Name Type Description Default mols Iterable[Union[str, dm.Mol]]

list of input molecules

required protecting_groups str

type of protection group to consider if not provided, will use all (not advised)

['fmoc', 'tert-butoxymethyl', 'tert-butyl carbamate', 'tert-butyloxycarbonyl'] return_idx bool

whether to return index or a boolean mask

False n_jobs Optional[int]

number of parallel job to run. Sequential by default

None progress bool

whether to show progress bar

False scheduler str

joblib scheduler to use

'threads'

Returns:

Name Type Description filtered_mask

boolean array (or index array) where true means the molecule DOES NOT MATCH the groups.

"},{"location":"api/medchem.filter.html#medchem.filter.lead.rules_filter","title":"rules_filter(mols, rules, return_idx=False, n_jobs=None, progress=False, scheduler='processes')","text":"

Filter a list of compounds according to a predefined set of rules

Parameters:

Name Type Description Default mols Iterable[Union[str, dm.Mol]]

list of input molecules

required rules Union[List[Any], RuleFilters]

list of rules to apply to the input molecules.

required return_idx bool

whether to return index or a boolean mask

False n_jobs Optional[int]

number of parallel job to run. Sequential by default

None progress bool

whether to show progress bar

False scheduler str

joblib scheduler to use

'processes'

Returns:

Name Type Description filtered_mask

boolean array (or index array) where true means the molecule MATCH the rules.

"},{"location":"api/medchem.filter.html#medchem.filter.lead.screening_filter","title":"screening_filter(mols, n_jobs=None, max_severity=10, return_idx=False)","text":"

Filter a set of molecules based on novartis screening deck curation process Schuffenhauer, A. et al. Evolution of Novartis' small molecule screening deck design, J. Med. Chem. (2020) DOI. https://dx.doi.org/10.1021/acs.jmedchem.0c01332

Note

The severity argument corresponds to the accumulated severity for a compounds accross all pattern in the catalog.

Parameters:

Name Type Description Default mols Iterable[Union[str, dm.Mol]]

list of input molecules

required n_jobs Optional[int]

number of parallel job to run. Sequential by default

None max_severity int

maximum severity allowed. Default is <10

10 return_idx bool

Whether to return the filtered index

False

Returns:

Name Type Description filtered_mask

boolean array (or index array) where true means the molecule IS NOT REJECTED (i.e not found in the alert catalog).

"},{"location":"api/medchem.filter.html#medchem.filter.generic","title":"medchem.filter.generic","text":""},{"location":"api/medchem.filter.html#medchem.filter.generic.atom_list_filter","title":"atom_list_filter(mols, unwanted_atom_list=None, wanted_atom_list=None, return_idx=False, n_jobs=None, progress=False, scheduler=None)","text":"

Find molecule without any atom from a set of unwanted atom symbols and with all atoms in the set of desirable atom list

Parameters:

Name Type Description Default mols Iterable[Union[str, dm.Mol]]

list of input molecules

required unwanted_atom_list Optional[Iterable]

list of undesirable atom symbol

None wanted_atom_list Optional[Iterable]

list of desirable atom symbol

None return_idx bool

whether to return index or a boolean mask

False n_jobs Optional[int]

number of parallel job to run. Sequential by default

None progress bool

whether to show progress bar

False scheduler Optional[str]

joblib scheduler to use

None

Returns:

Name Type Description filtered_mask

boolean array (or index array) where true means the molecule is ok.

"},{"location":"api/medchem.filter.html#medchem.filter.generic.halogenicity_filter","title":"halogenicity_filter(mols, thresh_F=6, thresh_Br=3, thresh_Cl=3, return_idx=False, n_jobs=None, progress=False, scheduler=None)","text":"

Find molecule that do not exceed halogen threshold. These thresholds are:

Parameters:

Name Type Description Default mols Iterable[Union[str, dm.Mol]]

list of input molecules

required thresh_F int

maximum number of fluorine

6 thresh_Br int

maximum number of bromine

3 thresh_Cl int

maximum number of chlorine

3 return_idx bool

whether to return index or a boolean mask

False n_jobs Optional[int]

number of parallel job to run. Sequential by default

None progress bool

whether to show progress bar

False scheduler Optional[str]

joblib scheduler to use

None

Returns:

Name Type Description filtered_mask

boolean array (or index array) where true means the molecule is ok.

"},{"location":"api/medchem.filter.html#medchem.filter.generic.macrocycle_filter","title":"macrocycle_filter(mols, max_cycle_size=10, return_idx=False, n_jobs=None, progress=False, scheduler=None)","text":"

Find molecules that do not infringe the strict maximum cycle size.

Parameters:

Name Type Description Default mols Iterable[Union[str, dm.Mol]]

list of input molecules

required max_cycle_size int

strict maximum macrocycle size

10 return_idx bool

whether to return index or a boolean mask

False n_jobs Optional[int]

number of parallel job to run. Sequential by default

None progress bool

whether to show progress bar

False scheduler Optional[str]

joblib scheduler to use

None

Returns:

Name Type Description filtered_mask

boolean array (or index array) where true means the molecule is ok.

"},{"location":"api/medchem.filter.html#medchem.filter.generic.num_atom_filter","title":"num_atom_filter(mols, min_atoms=None, max_atoms=None, return_idx=False, n_jobs=None, progress=False, scheduler=None)","text":"

Find a molecule that match the atom number constraints Returning True means the molecule is fine

Parameters:

Name Type Description Default mols Iterable[Union[str, dm.Mol]]

list of input molecules

required min_atoms Optional[int]

strict minimum number of atoms (atoms > min_atoms)

None max_atoms Optional[int]

strict maximum number of atoms (atoms < max_atoms)

None return_idx bool

whether to return index or a boolean mask

False n_jobs Optional[int]

number of parallel job to run. Sequential by default

None progress bool

whether to show progress bar

False scheduler Optional[str]

joblib scheduler to use

None

Returns:

Name Type Description filtered_mask

boolean array (or index array) where true means the molecule is ok.

"},{"location":"api/medchem.filter.html#medchem.filter.generic.num_stereo_center_filter","title":"num_stereo_center_filter(mols, max_stereo_centers=4, max_undefined_stereo_centers=2, return_idx=False, n_jobs=None, progress=False, scheduler=None)","text":"

Find a molecule that match the number of stereo center constraints. Returning True means the molecule is fine

Parameters:

Name Type Description Default mols Iterable[Union[str, dm.Mol]]

list of input molecules

required max_stereo_center

strict maximum number of stereo centers (<). Default is 4

required max_undefined_stereo_centers Optional[int]

strict maximum number of undefined stereo centers (<). Default is 2

2 return_idx bool

whether to return index or a boolean mask

False n_jobs Optional[int]

number of parallel job to run. Sequential by default

None progress bool

whether to show progress bar

False scheduler Optional[str]

joblib scheduler to use

None

Returns:

Name Type Description filtered_mask

boolean array (or index array) where true means the molecule is ok.

"},{"location":"api/medchem.filter.html#medchem.filter.generic.ring_infraction_filter","title":"ring_infraction_filter(mols, hetcycle_min_size=4, return_idx=False, n_jobs=None, progress=False, scheduler=None)","text":"

Find molecules that have a ring infraction filter. Returning True means the molecule is fine

Parameters:

Name Type Description Default mols Iterable[Union[str, dm.Mol]]

list of input molecules

required hetcycle_min_size int

Minimum ring size before more than 1 hetero atom or any non single bond is allowed. This is a strict threshold (>)

4 return_idx bool

whether to return index or a boolean mask

False n_jobs Optional[int]

number of parallel job to run. Sequential by default

None progress bool

whether to show progress bar

False scheduler Optional[str]

joblib scheduler to use

None

Returns:

Name Type Description filtered_mask

boolean array (or index array) where true means the molecule is ok.

"},{"location":"api/medchem.filter.html#medchem.filter.generic.symmetry_filter","title":"symmetry_filter(mols, symmetry_threshold=0.8, return_idx=False, n_jobs=None, progress=False, scheduler=None)","text":"

Find molecules that are not symmetrical, given a symmetry threshold

Parameters:

Name Type Description Default mols Iterable[Union[str, dm.Mol]]

list of input molecules

required symmetry_threshold float

threshold to consider a molecule highly symmetrical

0.8 return_idx bool

whether to return index or a boolean mask

False n_jobs Optional[int]

number of parallel job to run. Sequential by default

None progress bool

whether to show progress bar

False scheduler Optional[str]

joblib scheduler to use

None

Returns:

Name Type Description filtered_mask

boolean array (or index array) where true means the molecule is ok.

"},{"location":"api/medchem.groups.html","title":"medchem.groups","text":""},{"location":"api/medchem.groups.html#medchem.groups","title":"medchem.groups","text":""},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup","title":"ChemicalGroup","text":"

Build a library of chemical groups using a list of structures parsed from a file

The default library of structure has been curated from https://github.com/Sulstice/global-chem and additional open source data.

Note

For new chemical groups, please minimally provide the 'smiles'/'smarts', 'name' and \"group\" and optional 'hierarchy' columns

Warning

The SMILES and SMARTS used in the default list of substructures do not result in the same matches. Unless specified otherwise, the SMILES will be used in the matching done by this class, whereas due to RDKit's limitation, the SMARTS will be used in the matching done by the generated catalog. For more information see this discussion: https://github.com/valence-platform/medchem/pull/19,

"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.dataframe","title":"dataframe property","text":"

Get the dataframe of the chemical groups

"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.mol_smarts","title":"mol_smarts property","text":"

Get the SMARTS of the chemical groups in this instance

"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.mols","title":"mols property","text":"

Get the Molecule object of the SMILES for the chemical groups in this instance

"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.name","title":"name property","text":"

Get the Name of the chemical groups in this instance

"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.smarts","title":"smarts property","text":"

Get the SMARTS of the chemical groups in this instance

"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.smiles","title":"smiles property","text":"

Get the SMILES of the chemical groups in this instance

"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.__init__","title":"__init__(groups=None, n_jobs=None, groups_db=None)","text":"

Build a chemical group library

Parameters:

Name Type Description Default groups Union[str, List[str]]

List of groups to use. Defaults to None where all functional groups are used

None n_jobs Optional[int]

Optional number of jobs to run in parallel for internally building the data. Defaults to None.

None groups_db Optional[os.PathLike]

Path to a file containing the dump of the chemical groups. Defaults is internal dataset

None"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.filter","title":"filter(names, fuzzy=False)","text":"

Filter the group to restrict to only the name in input

Parameters:

Name Type Description Default names List[str]

list of names to use for filters

required fuzzy bool

whether to use exact of fuzzy matching

False"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.get_catalog","title":"get_catalog() cached","text":"

Build an rdkit catalog from the current chemical group data

"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.get_matches","title":"get_matches(mol, use_smiles=True)","text":"

Get all the functional groups in this instance that matches the input molecule

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required use_smiles bool

whether to use the smiles representation of the catalog or the smarts

True"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.has_match","title":"has_match(mol)","text":"

Check whether the input molecule has any functional group in this instance

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.list_groups","title":"list_groups()","text":"

List all the chemical groups available

"},{"location":"api/medchem.groups.html#medchem.groups.ChemicalGroup.list_hierarchy_groups","title":"list_hierarchy_groups()","text":"

List all the hierarchy in chemical groups available. To get the full hierarchy on each path, split by the . character.

"},{"location":"api/medchem.groups.html#medchem.groups.list_default_chemical_groups","title":"list_default_chemical_groups(hierachy=False)","text":"

List all the chemical groups available.

Note

chemical groups defines how a collection of patterns are organized. They do not correspond to individual pattern name.

Parameters:

Name Type Description Default hierarchy

whether to return the full hierarchy or the group name only

required

Returns:

Type Description

List of chemical groups

"},{"location":"api/medchem.groups.html#medchem.groups.list_functional_group_names","title":"list_functional_group_names(exclude_basic=True)","text":"

List common functional group names

Parameters:

Name Type Description Default exclude_basic bool

whether to include the basic functional groups

True

Returns:

Type Description

List of functional group names

"},{"location":"api/medchem.query.html","title":"medchem.query","text":"

This module helps build a filter based on a query language that can be parsed. By default, the default query parser will be used, which contains the following instructions that can be orchestrated using boolean operation (or, and, not and parenthesis)

"},{"location":"api/medchem.query.html#example","title":"Example","text":"
import datamol as dm\nfrom medchem.query.eval import QueryFilter\n\nquery = \"\"\"HASPROP(\"tpsa\" < 120) AND HASSUBSTRUCTURE(\"[OH]\", True)\"\"\"\nchemical_filter = QueryFilter(query, parser=\"lalr\")\nmols = dm.data.cdk2().mol[:10]\nchemical_filter(mols, n_jobs=-1) # [False, False, False, False, False, True, True, True, False, False]\n
"},{"location":"api/medchem.query.html#syntax","title":"Syntax","text":"

Any string provided as query argument needs to be quoted (similar to json) to avoid ambiguity in parsing. * An example of valid query is \"\"\"(HASPROP(\"tpsa\" > 120 ) | HASSUBSTRUCTURE(\"c1ccccc1\")) AND NOT HASALERT(\"pains\") OR HASSUBSTRUCTURE(\"[OH]\", max, 2)\"\"\". * Examples of invalid queries are * \"\"\"HASPROP(\"tpsa\" > 120) OR HASSUBSTRUCTURE(\"[OH]\", True, >, 3)\"\"\" : unexpected wrong operator > * \"\"\"HASPROP(tpsa > 120)\"\"\" : tpsa is not quoted * \"\"\"HASPROP(\"tpsa\") > 120\"\"\" : this is not part of the language specification * \"\"\"(HASPROP(\"tpsa\" > 120) AND HASSUBSTRUCTURE(\"[OH]\", True, max, 3 )\"\"\": mismatching parenthesis (

  • \"\"\"HASPROP(\"tpsa\" > 120) OR HASSUBSTRUCTURE(\"CO\")\"\"\", \"\"\"(HASPROP(\"tpsa\" > 120)) OR (HASSUBSTRUCTURE(\"CO\"))\"\"\" and \"\"\"(HASPROP(\"tpsa\" > 120) OR HASSUBSTRUCTURE(\"CO\"))\"\"\" are equivalent
"},{"location":"api/medchem.query.html#hasalert","title":"HASALERT","text":"

check whether a molecule has an alert from a catalog

# alert is one supported alert catalog by `medchem`. For example `pains`\nHASALERT(alert:str) \n

"},{"location":"api/medchem.query.html#hasgroup","title":"HASGROUP","text":"

check whether a molecule has a specific functional group from a catalog

# group is one supported functional group provided by `medchem`\nHASGROUP(group:str) \n
"},{"location":"api/medchem.query.html#matchrule","title":"MATCHRULE","text":"

check whether a molecule match a predefined druglikeness rule from a catalog

# rule is one supported rule provided by `medchem`. For example `rule_of_five`\nMATCHRULE(rule:str) \n

"},{"location":"api/medchem.query.html#hassuperstructure","title":"HASSUPERSTRUCTURE","text":"

check whether a molecule has query as superstructure

# query is a SMILES\nHASSUPERSTRUCTURE(query:str) \n

"},{"location":"api/medchem.query.html#hassubstructure","title":"HASSUBSTRUCTURE","text":"

Check whether a molecule has query as substructure. Note that providing the comma separator , is mandatory here as each variable is an argument.

# query is a SMILES or a SMARTS, operator is defined below, is_smarts is a boolean\n\nHASSUBSTRUCTURE(query:str, is_smarts:Optional[bool], operator:Optional[str], limit:Optional[int])\n\n# which correspond to setting this default values\nHASSUBSTRUCTURE(query:str, is_smarts=False, operator=\"min\", limit=1)\n# same as\nHASSUBSTRUCTURE(query:str, is_smarts=None, operator=None, limit=None)\n

Not providing optional arguments is allowed, but they need to be provided in the exact same order shown above. Thus:

  • HASSUBSTRUCTURE(\"CO\")
  • HASSUBSTRUCTURE(\"CO\", False)
  • HASSUBSTRUCTURE(\"CO\", False, min)
  • HASSUBSTRUCTURE(\"CO\", False, min, 1)

are all valid and equivalent (given their default values)

Furthermore, since the correct argument map can be inferred when no ambiguity arises, the following are valid but discouraged

  • HASSUBSTRUCTURE(\"CO\", False, 1)
  • HASSUBSTRUCTURE(\"CO\", min, 1)

Whereas, this is invalid: * HASSUBSTRUCTURE(\"CO\", min, False, 1)

"},{"location":"api/medchem.query.html#hasprop","title":"HASPROP","text":"

Check whether a molecule has prop as property within a defined limit. Any comma , provided between arguments will be ignored

# prop is a valid datamol.descriptors property, comparator is a required comparator operator and defined below\nHASPROP(prop:str comparator:str limit:float)\n
"},{"location":"api/medchem.query.html#like","title":"LIKE","text":"

Check whether a molecule is similar enough to another molecule. Any comma , provided between arguments will be ignored

# query is a SMILES\nLIKE(query:str  comparator:str limit:float)\n
"},{"location":"api/medchem.query.html#basic-operators","title":"Basic operators:","text":"
  • comparator: one of = ==, !=, <, >, <=, >=
  • misc: the following misc values are accepted and parsed true, false, True, False, TRUE, FALSE
  • operator (can be quoted or unquoted):
  • MIN: min, MIN
  • MAX: max, MAX
  • boolean operator:
  • AND operator : AND or & or && or and
  • OR operator : OR or | or || or or
  • NOT operator : NOT or ! or ~ or not
"},{"location":"api/medchem.query.html#api","title":"API","text":""},{"location":"api/medchem.query.html#medchem.query.parser","title":"medchem.query.parser","text":""},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser","title":"QueryParser","text":"

Bases: Transformer

Query parser for the custom query language for molecule. This parses the input language, build a parseable and evaluable representation. The trick for lazy evaluation is to define custom guard with 'fn(*)' around expression that needs to be evaluated.

Note that you SHOULD NOT HAVE TO INTERACT WITH THIS CLASS DIRECTLY.

Example

import medchem import lark QUERY_GRAMMAR = medchem.utils.loader.get_grammar(as_string=True) QUERY_PARSER = Lark(QUERY_GRAMMAR, parser=\"lalr\", transformer=QueryParser())

"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser--see-how-the-string-needs-to-be-quoted-this-builds-on-the-json-quote-requirements-to-avoid-dealing-with-unwanted-outcomes","title":"see how the string needs to be \"quoted\". This builds on the json quote requirements to avoid dealing with unwanted outcomes","text":"

example = \"\"\"(HASPROP(\"tpsa\" > 120 ) | HASSUBSTRUCTURE(\"c1ccccc1\")) AND NOT HASALERT(\"pains\") OR HASSUBSTRUCTURE(\"[OH]\", max)\"\"\" t = QUERY_PARSER.parse(example) print(t) ((((fn(getprop, prop='tpsa') > 120.0) or fn(hassubstructure, query='c1ccccc1', operator='None', limit=None, is_smarts=None)) and not fn(hasalert, alert='pains')) or fn(hassubstructure, query='[OH]', operator='max', limit=None, is_smarts=None))

"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.bool_expr","title":"bool_expr(bool_term, *others)","text":"

Define how boolean expressions should be parsed

"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.bool_term","title":"bool_term(bool_factor, *others)","text":"

Define how boolean terms should be parsed

"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.hasalert","title":"hasalert(value)","text":"

Format the hasalert node in the query

Note

The parser does not enforce any validity on the argument and the underlying function is supposed to handle it.

"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.hasgroup","title":"hasgroup(value)","text":"

Format the hasgroup node in the query

Note

The parser does not enforce any validity on the argument and the underlying function is supposed to handle it.

"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.hasprop","title":"hasprop(value, comparator, limit)","text":"

Format the hasprop node in the query

Note

The parser does not enforce any validity on the argument and the underlying function is supposed to handle it.

"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.hassubstructure","title":"hassubstructure(value, is_smarts, operator, limit)","text":"

Format the substructure node in the query

Note

The parser does not enforce any validity on the argument and the underlying function is supposed to handle it.

"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.hassuperstructure","title":"hassuperstructure(value)","text":"

Format the superstructure node in the query

Note

The parser does not enforce any validity on the argument and the underlying function is supposed to handle it.

"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.like","title":"like(value, comparator, limit)","text":"

Format the like node in the query

Note

The parser does not enforce any validity on the argument and the underlying function is supposed to handle it.

"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.matchrule","title":"matchrule(value)","text":"

Format the matchrule node in the query

Note

The parser does not enforce any validity on the argument and the underlying function is supposed to handle it.

"},{"location":"api/medchem.query.html#medchem.query.parser.QueryParser.not_bool_factor","title":"not_bool_factor(*args)","text":"

Define representation of a negation

"},{"location":"api/medchem.query.html#medchem.query.eval","title":"medchem.query.eval","text":""},{"location":"api/medchem.query.html#medchem.query.eval.QueryFilter","title":"QueryFilter","text":"

Query filtering system based on a custom query grammar

"},{"location":"api/medchem.query.html#medchem.query.eval.QueryFilter.__call__","title":"__call__(mols, scheduler='processes', n_jobs=-1, progress=True)","text":"

Call the internal chemical filter that has been build

Parameters:

Name Type Description Default mols List[Union[str, dm.Mol]]

list of input molecules to filter

required n_jobs int

whether to run job in parallel and number of jobs to consider. Defaults to -1.

-1 scheduler

scheduler to use. Defaults to 'processes'.

'processes' progress bool

whether to show job progress. Defaults to True.

True"},{"location":"api/medchem.query.html#medchem.query.eval.QueryFilter.__init__","title":"__init__(query, grammar=None, parser='lalr')","text":"

Constructor for query filtering system

Parameters:

Name Type Description Default query str

input unparsed query

required grammar Optional[str]

path to grammar language to use. Defaults to None, which will use the default grammar.

None parser str

which Lark language parser to use. Defaults to \"lalr\".

'lalr'"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator","title":"QueryOperator","text":"

A class to hold all the operators that can be used in queries

"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator.getprop","title":"getprop(mol, prop) staticmethod","text":"

Compute the molecular property if a molecule. This is an alternative to the hasprop function, that does not enforce any comparison.

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required prop str

molecular property to apply as filter on the molecule

required

Returns:

Name Type Description property float

computed property value

"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator.hasalert","title":"hasalert(mol, alert) staticmethod","text":"

Check if a molecule match a named alert catalog. The alert catalog needs to be one supported by the medchem package.

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required alert str

named catalog to apply as filter on the molecule

required

Returns:

Name Type Description has_alert bool

whether the molecule has a given alert

"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator.hasgroup","title":"hasgroup(mol, group) staticmethod","text":"

Check if a molecule has a specific functional group. Internally, this is done fetching the smarts corresponding to the group then calling QueryOperator.hassubstructure

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required group str

functional group to check on the molecule.

required

Returns:

Name Type Description has_group bool

whether the molecule has the given functional group

"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator.hasprop","title":"hasprop(mol, prop, comparator, limit) staticmethod","text":"

Check if a molecule has a molecule property within desired range

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required prop str

molecular property to apply as filter on the molecule

required comparator Callable

operator function to apply to check whether the molecule property matches the expected value

required limit float

limit value for determining whether the molecule property is within desired range

required

Returns:

Name Type Description has_property bool

whether the molecule has a given property within a desired range

"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator.hassubstructure","title":"hassubstructure(mol, query, is_smarts=False, operator='min', limit=1) staticmethod","text":"

Check if a molecule has substructure provided by a query

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required query str

input smarts query

required is_smarts bool

whether this is a smarts query or not

False operator str

one of min or max to specify the min or max limit

'min' limit int

limit of substructures to be found

1

Returns:

Name Type Description has_substructure bool

whether the query is a subgraph of the molecule

"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator.hassuperstructure","title":"hassuperstructure(mol, query) staticmethod","text":"

Check if a molecule has a superstructure defined by a query. Note that a superstructure cannot be a query (smarts)

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required query str

input smarts query

required

Returns:

Name Type Description has_superstructure bool

whether the molecule is a subgraph of the query

"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator.like","title":"like(mol, query, comparator, limit) staticmethod","text":"

Check if a molecule is similar or distant enough from another molecule using tanimoto ECFP distance. and is useful for letting python handles the binary comparison operators.

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required query Union[dm.Mol, str]

input molecule to compare with

required comparator Callable[[float, float], bool]

operator function to apply to check whether the molecule property matches the expected value. Takes computed_similarity and limit as arguments and returns a boolean.

required limit float

limit value for determining whether the molecule property is within desired range

required

Returns:

Name Type Description is_similar bool

whether the molecule is similar or distant enough from the query

"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator.matchrule","title":"matchrule(mol, rule) staticmethod","text":"

Check if a molecule match a druglikeness rule

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required rule str

druglikeness rule check on the molecule.

required

Returns:

Name Type Description match_rule bool

whether the molecule match the given rule

"},{"location":"api/medchem.query.html#medchem.query.eval.QueryOperator.similarity","title":"similarity(mol, query) staticmethod","text":"

Compute the ECFP tanimoto similarity between two molecules. This is an alternative to the like function, that does not enforce any comparison, and is useful for letting python handles the binary comparison operators.

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required query Union[dm.Mol, str]

input query molecule to compute similarity against

required

Returns:

Name Type Description similarity float

computed similarity value between mol and query

"},{"location":"api/medchem.rules.html","title":"medchem.rules","text":""},{"location":"api/medchem.rules.html#medchem.rules.basic_rules","title":"medchem.rules.basic_rules","text":""},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_chemaxon_druglikeness","title":"rule_of_chemaxon_druglikeness(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, n_rotatable_bonds=None, n_rings=None, **kwargs)","text":"

Compute the drug likeness filter according to chemaxon:

It computes: MW < 400 & logP < 5 & HBA <= 10 & HBD <= 5 & ROTBONDS < 5 & ring > 0

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required mw Optional[float]

precomputed molecular weight. Defaults to None.

None clogp Optional[float]

precomputed cLogP. Defaults to None.

None n_hba Optional[float]

precomputed number of HBA. Defaults to None.

None n_hbd Optional[float]

precomputed number of HBD. Defaults to None.

None n_rotatable_bonds Optional[int]

precomputed number of rotatable bonds in the molecule. Defaults to None.

None n_rings Optional[int]

precomputed number of rings in the molecule. Defaults to None.

None

Returns:

Name Type Description roc

True if molecule is compliant, False otherwise

"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_cns","title":"rule_of_cns(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, **kwargs)","text":"

Computes drug likeness rule for CNS penetrant molecules as described in: Jeffrey & Summerfield (2010) Assessment of the blood-brain barrier in CNS drug discovery.

It computes: MW in [135, 582] & logP in [-0.2, 6.1] & TPSA in [3, 118] & HBD <= 3 & HBA <= 5

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required mw Optional[float]

precomputed molecular weight. Defaults to None.

None clogp Optional[float]

precomputed logP. Defaults to None.

None n_hba Optional[float]

precomputed number of HBA. Defaults to None.

None n_hbd Optional[float]

precomputed number of HBD. Defaults to None.

None tpsa Optional[int]

precomputed TPSA. Defaults to None.

None

Returns:

Name Type Description roc

True if molecule is compliant, False otherwise

"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_druglike_soft","title":"rule_of_druglike_soft(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, n_rings=None, n_hetero_atoms=None, charge=None, **kwargs)","text":"

Compute the DrugLike Soft rule available in FAF-Drugs4. The rules are described at https://fafdrugs4.rpbs.univ-paris-diderot.fr/filters.html

It computes:

MW in [100, 600] & logP < in [-3, 6] & HBD <= 7 & HBA <= 12 & TPSA <=180 & ROTBONDS <= 11 &\nRIGBONDS <= 30 & N_RINGS <= 6 & MAX_SIZE_RING <= 18 & N_CARBONS in [3, 35] &  N_HETEROATOMS in [1, 15] &\nHC_RATIO in [0.1, 1.1] & CHARGE in [-4, 4] & N_ATOM_CHARGE <= 4\n

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required mw Optional[float]

precomputed molecular weight. Defaults to None.

None clogp Optional[float]

precomputed cLogP. Defaults to None.

None n_hba Optional[float]

precomputed number of HBA. Defaults to None.

None n_hbd Optional[float]

precomputed number of HBD. Defaults to None.

None tpsa Optional[float]

precomputed TPSA. Defaults to None.

None n_rotatable_bonds Optional[int]

precomputed number of rotatable bonds. Defaults to None.

None n_rings Optional[int]

precomputed number of rings in the molecules. Defaults to None.

None n_hetero_atoms Optional[int]

precomputed number of heteroatoms. Defaults to None.

None charge Optional[float]

precomputed charge. Defaults to None.

None"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_egan","title":"rule_of_egan(mol, clogp=None, tpsa=None, **kwargs)","text":"

Compute passive intestinal absorption according to Egan Rules as described in: Egan, William J., Kenneth M. Merz, and John J. Baldwin (2000) Prediction of drug absorption using multivariate statistics

It computes: TPSA in [0, 132] & logP in [-1, 6]

Note

The author built a multivariate statistics model of passive intestinal absorption with robust outlier detection. Outliers were identified as being actively transported. They chose PSA and AlogP98 (cLogP), based on consideration of the physical processes involved in membrane permeability and the interrelationships and redundancies between other available descriptors. Compounds, which had been assayed for Caco-2 cell permeability, demonstrated a good rate of successful predictions (74\u221292%)

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required clogp Optional[float]

precomputed cLogP. Defaults to None.

None tpsa Optional[float]

precomputed TPSA. Defaults to None.

None

Returns:

Name Type Description roe

True if molecule is compliant, False otherwise

"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_five","title":"rule_of_five(mol, mw=None, clogp=None, n_lipinski_hbd=None, n_lipinski_hba=None, **kwargs)","text":"

Compute the Lipinski's rule-of-5 for a molecule. Also known as Pfizer's rule of five or RO5, this rule is a rule of thumb to evaluate the druglikeness of a chemical compounds

It computes: MW <= 500 & logP <= 5 & HBD <= 5 & HBA <= 10

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required mw Optional[float]

precomputed molecular weight. Defaults to None.

None clogp Optional[float]

precomputed cLogP. Defaults to None.

None n_lipinski_hbd Optional[float]

precomputed number of HBD. Defaults to None.

None n_lipinski_hba Optional[float]

precomputed number of HBA. Defaults to None.

None

Returns:

Name Type Description ro5

True if molecule is compliant, False otherwise

"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_five_beyond","title":"rule_of_five_beyond(mol, mw=None, clogp=None, n_hbd=None, n_hba=None, tpsa=None, n_rotatable_bonds=None, **kwargs)","text":"

Compute the Beyond rule-of-5 rule for a molecule. This rule illustrates the potential of compounds far beyond rule of 5 space to modulate novel and difficult target classes that have large, flat, and groove-shaped binding sites and has been described in:

Doak, Bradley C., et al. (2015) How Beyond Rule of 5 Drugs and Clinical Candidates Bind to Their Targets.

It computes: MW <= 1000 & logP in [-2, 10] & HBD <= 6 & HBA <= 15 & TPSA <=250 & ROTBONDS <= 20

Note

This is a very permissive rule and is likely to not be a good predictor for druglikeness as known for small molecules.

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required mw Optional[float]

precomputed molecular weight. Defaults to None.

None clogp Optional[float]

precomputed cLogP. Defaults to None.

None n_hbd Optional[float]

precomputed number of HBD. Defaults to None.

None n_hba Optional[float]

precomputed number of HBA. Defaults to None.

None tpsa Optional[float]

precomputed TPSA. Defaults to None.

None n_rotatable_bonds Optional[int]

precomputed number of rotatable bonds. Defaults to None.

None

Returns:

Name Type Description ro5

True if molecule is compliant, False otherwise

"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_four","title":"rule_of_four(mol, mw=None, clogp=None, n_hba=None, n_rings=None, **kwargs)","text":"

Compute the rule-of-4 for a molecule. The rule-of-4 define a rule of thumb for PPI inhibitors, which are typically larger and more lipophilic than inhibitors of more standard binding sites. It has been published in:

Morelli X, Bourgeas R, Roche P. (2011) Chemical and structural lessons from recent successes in protein\u2013protein interaction inhibition. Also see: Shin et al. (2020) Current Challenges and Opportunities in Designing Protein\u2013Protein Interaction Targeted Drugs. doi:10.2147/AABC.S235542

It computes: MW >= 400 & logP >= 4 & RINGS >=4 & HBA >= 4

Warning

Do not use this for small molecules that are not PPI inhibitors

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required mw Optional[float]

precomputed molecular weight. Defaults to None.

None clogp Optional[float]

precomputed cLogP. Defaults to None.

None n_hba Optional[float]

precomputed number of HBA. Defaults to None.

None n_rings Optional[int]

precomputed number of rings in the molecules. Defaults to None.

None

Returns:

Name Type Description ro4

True if molecule is compliant, False otherwise

"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_generative_design","title":"rule_of_generative_design(mol, mw=None, clogp=None, n_lipinski_hba=None, n_lipinski_hbd=None, tpsa=None, n_rotatable_bonds=None, n_hetero_atoms=None, charge=None, **kwargs)","text":"

Compute druglikeness rule of generative design.

This set of rules are proprietary of Valence Discovery and have been curated to better filters molecules suggested by generative models for small molecules

It computes:

MW in [200, 600] & logP < in [-3, 6] & HBD <= 7  & HBA <= 12 & TPSA in [40, 180] &\nROTBONDS <= 15 & RIGID BONDS <= 30 & N_AROMATIC_RINGS <= 5 & N_FUSED_AROMATIC_RINGS_TOGETHER <= 2 &\nMAX_SIZE_RING_SYSTEM <= 18  & N_CARBONS in [3, 40] & N_HETEROATOMS in [1, 15] & CHARGE in [-2, 2] &\nN_ATOM_CHARGE <= 2 & N_TOTAL_ATOMS < 70 & N_HEAVY_METALS < 1\n

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required mw Optional[float]

precomputed molecular weight. Defaults to None.

None clogp Optional[float]

precomputed cLogP. Defaults to None.

None n_lipinski_hba Optional[float]

precomputed number of HBA. Defaults to None.

None n_lipinski_hbd Optional[float]

precomputed number of HBD. Defaults to None.

None tpsa Optional[float]

precomputed TPSA. Defaults to None.

None n_rotatable_bonds Optional[int]

precomputed number of rotatable bonds. Defaults to None.

None n_hetero_atoms Optional[int]

precomputed number of heteroatoms. Defaults to None.

None charge Optional[float]

precomputed charge. Defaults to None.

None"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_generative_design_strict","title":"rule_of_generative_design_strict(mol, mw=None, clogp=None, n_lipinski_hba=None, n_lipinski_hbd=None, tpsa=None, n_rotatable_bonds=None, n_hetero_atoms=None, charge=None, **kwargs)","text":"

Compute druglikeness rule of generative design.

This set of rules are proprietary of Valence Discovery and have been curated to better filters molecules suggested by generative models

It computes:

MW in [200, 600] & logP < in [-3, 6] & HBD <= 7  & HBA <= 12 & TPSA in [40, 180] &\nROTBONDS <= 15 & RIGID BONDS <= 30 & N_AROMATIC_RINGS <= 5 & N_FUSED_AROMATIC_RINGS_TOGETHER <= 2 &\nMAX_SIZE_RING_SYSTEM <= 18  & N_CARBONS in [3, 40] & N_HETEROATOMS in [1, 15] & CHARGE in [-2, 2] &\nN_ATOM_CHARGE <= 2 & N_TOTAL_ATOMS < 70 & N_HEAVY_METALS < 1 & N_STEREO_CENTER <= 3 &\nHAS_NO_SPIDER_SIDE_CHAINS & FRACTION_RING_SYSTEM >= 0.25\n

By default SPIDER_SIDE_CHAINS are defined as having at least 2 'chains' of >=4 consecutif atoms in side chains (not part of any ring system)

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required mw Optional[float]

precomputed molecular weight. Defaults to None.

None clogp Optional[float]

precomputed cLogP. Defaults to None.

None n_lipinski_hba Optional[float]

precomputed number of HBA. Defaults to None.

None n_lipinski_hbd Optional[float]

precomputed number of HBD. Defaults to None.

None tpsa Optional[float]

precomputed TPSA. Defaults to None.

None n_rotatable_bonds Optional[int]

precomputed number of rotatable bonds. Defaults to None.

None n_hetero_atoms Optional[int]

precomputed number of heteroatoms. Defaults to None.

None charge Optional[float]

precomputed charge. Defaults to None.

None"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_ghose","title":"rule_of_ghose(mol, mw=None, clogp=None, mr=None, **kwargs)","text":"

Compute the Ghose filter. The Ghose filter is a drug-like filter described in: Ghose, AK.; Viswanadhan, VN.; Wendoloski JJ. (1999) A knowledge-based approach in designing combinatorial or medicinal chemistry libraries for drug discovery.1. A qualitative and quantitative characterization of known drug databases.

It computes: MW in [160, 480] & logP in [-0.4, 5.6] & Natoms in [20, 70] & refractivity in [40, 130]

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required mw Optional[float]

precomputed molecular weight. Defaults to None.

None clogp Optional[float]

precomputed cLogP. Defaults to None.

None mr Optional[float]

precomputed molecule refractivity. Defaults to None.

None

Returns:

Name Type Description rog

True if molecule is compliant, False otherwise

"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_gsk_4_400","title":"rule_of_gsk_4_400(mol, mw=None, clogp=None, **kwargs)","text":"

Compute GSK Rule (4/400) for druglikeness using interpretable ADMET rule of thumb based on Gleeson, M. Paul (2008). Generation of a set of simple, interpretable ADMET rules of thumb.

It computes: MW <= 400 & logP <= 4.

Note

  • The rule are based on a set of consistent structure-property guides determined from an analysis of a number of key ADMET assays run within GSK: solubility, permeability, bioavailability, volume of distribution, plasma protein binding, CNS penetration, brain tissue binding, P-gp efflux, hERG inhibition, and cytochrome P450 1A2/2C9/2C19/2D6/3A4 inhibition.
  • Conclusion: It is clear from the analyses reported herein that almost all ADMET parameters deteriorate with either increasing molecular weight, logP, or both, with ionization state playing either a beneficial or detrimental affect depending on the parameter in question.

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required clogp Optional[float]

precomputed cLogP. Defaults to None.

None

Returns:

Name Type Description rog

True if molecule is compliant, False otherwise

"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_leadlike_soft","title":"rule_of_leadlike_soft(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, n_rings=None, n_hetero_atoms=None, charge=None, **kwargs)","text":"

Compute the Lead-Like Soft rule available in FAF-Drugs4. The rules are described at https://fafdrugs4.rpbs.univ-paris-diderot.fr/filters.html

It computes:

MW in [150, 400] & logP < in [-3, 4] & HBD <= 4 & HBA <= 7 & TPSA <=160 & ROTBONDS <= 9 &\nRIGBONDS <= 30 & N_RINGS <= 4 & MAX_SIZE_RING <= 18 & N_CARBONS in [3, 35] &  N_HETEROATOMS in [1, 15] &\nHC_RATIO in [0.1, 1.1] & CHARGE in [-4, 4] & N_ATOM_CHARGE <= 4 & N_STEREO_CENTER <= 2\n

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required mw Optional[float]

precomputed molecular weight. Defaults to None.

None clogp Optional[float]

precomputed cLogP. Defaults to None.

None n_hba Optional[float]

precomputed number of HBA. Defaults to None.

None n_hbd Optional[float]

precomputed number of HBD. Defaults to None.

None tpsa Optional[float]

precomputed TPSA. Defaults to None.

None n_rotatable_bonds Optional[int]

precomputed number of rotatable bonds. Defaults to None.

None n_rings Optional[int]

precomputed number of rings in the molecules. Defaults to None.

None n_hetero_atoms Optional[int]

precomputed number of heteroatoms. Defaults to None.

None charge Optional[float]

precomputed charge. Defaults to None.

None"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_oprea","title":"rule_of_oprea(mol, n_hba=None, n_hbd=None, n_rotatable_bonds=None, n_rings=None, **kwargs)","text":"

Computes Oprea's rule of drug likeness obtained by comparing drug vs non drug compounds across multiple datasets. The rules have been described in: Oprea (2000) Property distribution of drug-related chemical databases*

It computes: HBD in [0, 2] & HBA in [2, 9] & ROTBONDS in [2,8] and RINGS in [1, 4]

Note

Seventy percent of the `drug-like' compounds were found between the following limits: 0 \u2264 HDO \u2264 2, 2 \u2264 HAC \u2264 9, 2 \u2264 RTB \u2264 8, and 1 \u2264 RNG \u2264 4

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required n_hba Optional[float]

precomputed number of HBA. Defaults to None.

None n_hbd Optional[float]

precomputed number of HBD. Defaults to None.

None n_rotatable_bonds Optional[int]

precomputed number of rotatable bonds in the molecule. Defaults to None.

None n_rings Optional[int]

precomputed number of rings in the molecule. Defaults to None.

None

Returns roo: True if molecule is compliant, False otherwise

"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_pfizer_3_75","title":"rule_of_pfizer_3_75(mol, clogp=None, tpsa=None, **kwargs)","text":"

Compute Pfizer Rule(3/75 Rule) for invivo toxicity. It has been described in: * Hughes, et al. (2008) Physiochemical drug properties associated with in vivo toxicological outcomes. * Price et al. (2009) Physicochemical drug properties associated with in vivo toxicological outcomes: a review

It computes: ! (TPSA < 75 & logP > 3)

Note

  • In vivo toleration (IVT) studies on 245 preclinical Pfizer compounds found an increased likelihood of toxic events for less polar, more lipophilic compounds.
  • Compounds with low clogP / high TPSA are \u223c 2.5 times more likely not to have any toxity issue at a fixed concentration of 10 uM (total) or 1 uM (free);
  • Compounds with high clogP / low TPSA are \u223c 2.5 times more likely to have a toxity finding; this represents an overall odds >= 6.

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required clogp Optional[float]

precomputed cLogP. Defaults to None.

None tpsa Optional[float]

precomputed TPSA. Defaults to None.

None

Returns:

Name Type Description rop

True if molecule is compliant, False otherwise

"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_reos","title":"rule_of_reos(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, charge=None, n_rotatable_bonds=None, n_heavy_atoms=None, **kwargs)","text":"

Compute the REOS filter. The REOS filter is a filter designed to filter out unuseful compounds from HTS screening results. The filter is described in: Waters & Namchuk (2003) Designing screens: how to make your hits a hit.

It computes: MW in [200, 500] & logP in [-5, 5] & HBA in [0, 10] & HBD in [0, 5] & charge in [-2, 2] & ROTBONDS in [0, 8] & NHeavyAtoms in [15, 50]

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required mw Optional[float]

precomputed molecular weight. Defaults to None.

None clogp Optional[float]

precomputed cLogP. Defaults to None.

None n_hba Optional[float]

precomputed number of HBA. Defaults to None.

None n_hbd Optional[float]

precomputed number of HBD. Defaults to None.

None charge Optional[int]

precomputed formal charge. Defaults to None.

None n_rotatable_bonds Optional[int]

precomputed number of rotatable bonds in the molecule. Defaults to None.

None n_heavy_atoms Optional[int]

precomputed number of heavy atoms in the molecule. Defaults to None.

None

Returns:

Name Type Description ror

True if molecule is compliant, False otherwise

"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_respiratory","title":"rule_of_respiratory(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, n_rings=None, **kwargs)","text":"

Computes drug likeness rule for Respiratory (nasal/inhalatory) molecules as described in Ritchie et al. (2009) Analysis of the Calculated Physicochemical Properties of Respiratory Drugs: Can We Design for Inhaled Drugs Yet?

It computes: MW in [240, 520] & logP in [-2, 4.7] & HBONDS in [6, 12] & TPSA in [51, 135] & ROTBONDS in [3,8] & RINGS in [1,5]

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required mw Optional[float]

precomputed molecular weight. Defaults to None.

None clogp Optional[float]

precomputed logP. Defaults to None.

None n_hba Optional[float]

precomputed number of HBA. Defaults to None.

None n_hbd Optional[float]

precomputed number of HBD. Defaults to None.

None tpsa Optional[int]

precomputed TPSA. Defaults to None.

None n_rotatable_bonds Optional[int]

precomputed number of rotatable bonds in the molecule. Defaults to None.

None n_rings Optional[int]

precomputed number of rings. Defaults to None

None

Returns:

Name Type Description roc

True if molecule is compliant, False otherwise

"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_three","title":"rule_of_three(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, n_rotatable_bonds=None, **kwargs)","text":"

Compute the rule-of-3. The rule-of-three is a rule of thumb for molecular fragments (and not small molecules) published in:

Congreve M, Carr R, Murray C, Jhoti H. (2003) A \"rule of three\" for fragment-based lead discovery?.

It computes: MW <= 300 & logP <= 3 & HBA <= 3 & HBD <= 3 & ROTBONDS <= 3

Note

TPSA is not used in this version of the rule of three. Other version uses TPSA <= 60 AND logP in [-3, 3] in addition

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required mw Optional[float]

precomputed molecular weight. Defaults to None.

None clogp Optional[float]

precomputed cLogP. Defaults to None.

None n_hba Optional[float]

precomputed number of HBA. Defaults to None.

None n_hbd Optional[float]

precomputed number of HBD. Defaults to None.

None n_rotatable_bonds Optional[int]

precomputed number of rotatable bonds in the molecule. Defaults to None.

None

Returns:

Name Type Description ro3

True if molecule is compliant, False otherwise

"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_three_extended","title":"rule_of_three_extended(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, **kwargs)","text":"

Compute the extended rule-of-3. This is an extenion of the rule of three that computes:

It computes: MW <= 300 & logP in [-3, 3] & HBA <= 6 & HBD <= 3 & ROTBONDS <= 3 & TPSA <= 60

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required mw Optional[float]

precomputed molecular weight. Defaults to None.

None clogp Optional[float]

precomputed cLogP. Defaults to None.

None n_hba Optional[float]

precomputed number of HBA. Defaults to None.

None n_hbd Optional[float]

precomputed number of HBD. Defaults to None.

None tpsa Optional[float]

precomputed TPSA. Defaults to None.

None n_rotatable_bonds Optional[int]

precomputed number of rotatable bonds in the molecule. Defaults to None.

None

Returns:

Name Type Description ro3

True if molecule is compliant, False otherwise

"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_two","title":"rule_of_two(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, **kwargs)","text":"

Computes rules-of-2 for reagent (building block design). It aims for prioritization of reagents that typically do not add more than 200 Da in MW or 2 units of clogP. The rule of two has been described in:

Goldberg et al. (2015) Designing novel building blocks is an overlooked strategy to improve compound quality see: http://csmres.co.uk/cs.public.upd/article-downloads/Designing-novel-building-blocks.pdf

Note

Their analysis showed that molecular weight (MW) and clogP were important factors in the frequency of use of reagents. Other parameters, such as TPSA, HBA, HBD and ROTBONDS count, were less important.

It computes MW <= 200 & logP <= 2 & HBA <= 4 & HBD <= 2

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required mw Optional[float]

precomputed molecular weight. Defaults to None.

None clogp Optional[float]

precomputed cLogP. Defaults to None.

None n_hba Optional[float]

precomputed number of HBA. Defaults to None.

None n_hbd Optional[float]

precomputed number of HBD. Defaults to None.

None

Returns:

Name Type Description ro2

True if molecule is compliant, False otherwise

"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_veber","title":"rule_of_veber(mol, tpsa=None, n_rotatable_bonds=None, **kwargs)","text":"

Compute the Veber filter. The Veber filter is a druglike filter for orally active drugs described in:

Veber et. al. (2002) Molecular Properties That Influence the Oral Bioavailability of Drug Candidates.

It computes: ROTBONDS <= 10 & TPSA < 140

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required tpsa Optional[float]

precomputed TPSA. Defaults to None.

None n_rotatable_bonds Optional[int]

precomputed number of rotatable bonds. Defaults to None.

None

Returns:

Name Type Description rov

True if molecule is compliant, False otherwise

"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_xu","title":"rule_of_xu(mol, n_hba=None, n_hbd=None, n_rotatable_bonds=None, n_rings=None, n_heavy_atoms=None, **kwargs)","text":"

Computes Xu's rule of drug likeness as described in: Xu & Stevenson (2000), Drug-like Index: A New Approach To Measure Drug-like Compounds and Their Diversity

It computes HBD <= 5 & HBA <= 10 & ROTBONDS in [2, 35] & RINGS in [1, 7] & NHeavyAtoms in [10, 50].

Note

A compound's Drug Likeness Index is calculated based upon the knowledge derived from known drugs selected from Comprehensive Medicinal Chemistry (CMC) database.

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required n_hba Optional[float]

precomputed number of HBA. Defaults to None.

None n_hbd Optional[float]

precomputed number of HBD. Defaults to None.

None n_rotatable_bonds Optional[int]

precomputed number of rotatable bonds in the molecule. Defaults to None.

None n_rings Optional[int]

precomputed number of rings in the molecule. Defaults to None.

None n_heavy_atoms Optional[int]

precomputed number of rings in the molecule. Defaults to None.

None

Returns rox: True if molecule is compliant, False otherwise

"},{"location":"api/medchem.rules.html#medchem.rules.basic_rules.rule_of_zinc","title":"rule_of_zinc(mol, mw=None, clogp=None, n_hba=None, n_hbd=None, tpsa=None, n_rotatable_bonds=None, n_rings=None, charge=None, **kwargs)","text":"

Compute the Zinc rule for a molecule. This rule is a rule of thumb to evaluate the druglikeness of a chemical compounds, based on:

Irwin & Schoichet (2005) ZINC - A Free Database of Commercially Available Compounds for Virtual Screening.

Also see: https://fafdrugs4.rpbs.univ-paris-diderot.fr/filters.html

It computes: MW in [60, 600] & logP < in [-4, 6] & HBD <= 6 & HBA <= 11 & TPSA <=150 & ROTBONDS <= 12 & RIGBONDS <= 50 & N_RINGS <= 7 & MAX_SIZE_RING <= 12 & N_CARBONS >=3 & HC_RATIO <= 2.0 & CHARGE in [-4, 4]

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

input molecule

required mw Optional[float]

precomputed molecular weight. Defaults to None.

None clogp Optional[float]

precomputed cLogP. Defaults to None.

None n_hba Optional[float]

precomputed number of HBA. Defaults to None.

None n_hbd Optional[float]

precomputed number of HBD. Defaults to None.

None tpsa Optional[float]

precomputed TPSA. Defaults to None.

None n_rotatable_bonds Optional[int]

precomputed number of rotatable bonds. Defaults to None.

None n_rings Optional[int]

precomputed number of rings in the molecules. Defaults to None.

None charge Optional[float]

precomputed charge. Defaults to None.

None"},{"location":"api/medchem.rules.html#medchem.rules.rule_filter","title":"medchem.rules.rule_filter","text":""},{"location":"api/medchem.rules.html#medchem.rules.rule_filter.RuleFilters","title":"RuleFilters","text":"

Build a filter based on a compound phychem properties. For a list of default rules, use RuleFilters.list_available_rules(). Most of these rules have been collected from the litterature including https://fafdrugs4.rpbs.univ-paris-diderot.fr/descriptors.html

"},{"location":"api/medchem.rules.html#medchem.rules.rule_filter.RuleFilters.__call__","title":"__call__(mols, n_jobs=None, progress=False, scheduler='processes')","text":"

Compute the rules for a list of molecules

Parameters:

Name Type Description Default mols List[Union[str, dm.Mol]]

list of input molecule object.

required n_jobs Optional[int]

number of jobs to run in parallel. Defaults to None.

None progress bool

whether to show progress or not. Defaults to False.

False scheduler str

which scheduler to use. Defaults to \"processes\".

'processes'

Returns:

Name Type Description df

Dataframe where each row is a molecule and each column is a the outcomes of applying self.rules[column].

"},{"location":"api/medchem.rules.html#medchem.rules.rule_filter.RuleFilters.__getitems__","title":"__getitems__(ind)","text":"

Return a specific rule

"},{"location":"api/medchem.rules.html#medchem.rules.rule_filter.RuleFilters.__init__","title":"__init__(rule_list, rule_list_names=None, precompute_props=True)","text":"

Build a rule filtering object

Parameters:

Name Type Description Default rule_list List[Union[str, Callable]]

list of rules to apply. Either a callable that takes a molecule as input (with kwargs) or a string of the name of a pre-defined rule as defined in the basic_rules module

required rule_list_names Optional[List[str]]

Name of the rules passed as inputs. Defaults to None.

None precompute_props bool

Whether to precompute the properties for all molecules to speed up redundant calculation. Defaults to True.

True"},{"location":"api/medchem.rules.html#medchem.rules.rule_filter.RuleFilters.__len__","title":"__len__()","text":"

Return the number of rules inside this filter

"},{"location":"api/medchem.rules.html#medchem.rules.rule_filter.RuleFilters.list_available_rules","title":"list_available_rules(query=None) cached staticmethod","text":"

List all the available rules and they properties

"},{"location":"api/medchem.rules.html#medchem.rules.rule_filter.RuleFilters.list_available_rules_names","title":"list_available_rules_names(query=None) cached staticmethod","text":"

List only the names of the available rules

"},{"location":"api/medchem.utils.html","title":"medchem.utils","text":""},{"location":"api/medchem.utils.html#medchem.utils.smarts","title":"medchem.utils.smarts","text":""},{"location":"api/medchem.utils.html#medchem.utils.smarts.SMARTSUtils","title":"SMARTSUtils","text":"

Collections of utils to build complex SMARTS query more efficiently for non experienced user

"},{"location":"api/medchem.utils.html#medchem.utils.smarts.SMARTSUtils.aliphatic_chain","title":"aliphatic_chain(min_size=6, unbranched=False, unsaturated_bondtype=None, allow_hetero_atoms=True) classmethod","text":"

Returns a query that can match a long aliphatic chain

Parameters:

Name Type Description Default min_size int

minimum size of the long chain

6 unbranched bool

whether the chain should be unbranched

False unsaturated_bondtype Optional[str]

additional unsaturated bond type to use for the query. By default, Any bond type (~) is used. Single bonds ARE always allowed and bondtype cannot be aromatic

None allow_hetero_atoms bool

whether the chain can contain hetero atoms

True Example

to build a query for a long aliphatic chain of a least 5 atoms (e.g: 'CCC(C)CCC')

SMARTSUtils.aliphatic_chain(min_size=5)

Returns:

Name Type Description smarts

smarts pattern matching a long aliphatic chain

"},{"location":"api/medchem.utils.html#medchem.utils.smarts.SMARTSUtils.atom_in_env","title":"atom_in_env(*smarts_strs, include_atoms=False, union=False) classmethod","text":"

Returns a recursive/group smarts to find an atom that fits in the environments as defined by all the input smarts

Parameters:

Name Type Description Default smarts_strs

list of input patterns defining the environment the atom must fit in. The first atom of each pattern should be the atom we want to match to, unless include_atoms is set to True, then [*:99] will be added at the start of each pattern

() include_atoms bool

whether to include an additional first atom that needs to be in the required environment or not

False union bool

whether to use the union of the environments or the intersection

False Example

you can use this function to construct a complex query if you are not sure about how to write the smarts for example, to find a carbon atom that is both in a ring or size 6, bonded to an ethoxy and have a Fluorine in meta

SMARTSUtils.atom_in_env(\"#6;r6[C&D1]\", \"[c]aa[F]\", union=False) # there are alternative way to write this

Returns:

Name Type Description smarts

smarts pattern matching the group/environment

"},{"location":"api/medchem.utils.html#medchem.utils.smarts.SMARTSUtils.different_fragment","title":"different_fragment(*smarts_strs) classmethod","text":"

Returns a new query that match patterns that are in different fragments.

Warning

This feature is not supported yet by RDKit. See https://github.com/rdkit/rdkit/issues/1261

Parameters:

Name Type Description Default smarts_strs

list of input patterns defining the fragments

() Example

matching two oxygens in a molecule will work with '[#8].[#8]', but if you want the oxygens to be in DIFFERENT fragments, then build the query with:

SMARTSUtils.different_fragment('[#8]', '[#8]')

Returns:

Name Type Description smarts

smarts pattern matching patterns that are in different fragments

"},{"location":"api/medchem.utils.html#medchem.utils.smarts.SMARTSUtils.meta","title":"meta(smarts_str1, smarts_str2, aromatic_only=False) classmethod","text":"

Returns a recursive smarts string connecting the two input smarts in meta of each other. Connexion points needs to be through single or double bonds

Parameters:

Name Type Description Default smarts_str1 str

first smarts pattern defining the first functional group

required smarts_str2 str

second smarts pattern defining the second functional group

required aromatic_only bool

whether the ring needs to be aromatic or not

False Example

to build a smarts for a methyl group in meta to an oxygen (e.g: 'c1c(C)cc(O)cc1')

SMARTSUtils.meta('[#6;!R]', '[#8]')

Returns:

Name Type Description smarts

smarts pattern connecting the two input smarts in meta of each other

"},{"location":"api/medchem.utils.html#medchem.utils.smarts.SMARTSUtils.ortho","title":"ortho(smarts_str1, smarts_str2, aromatic_only=False) classmethod","text":"

Returns a recursive smarts string connecting the two input smarts in ortho of each other. Connexion points needs to be through single or double bonds

Parameters:

Name Type Description Default smarts_str1 str

first smarts pattern defining the first functional group

required smarts_str2 str

second smarts pattern defining the second functional group

required aromatic_only bool

whether the ring needs to be aromatic or not

False Example

to build a smarts for a methyl group in ortho to an oxygen (e.g: 'C1CC(C)C(O)CC1')

SMARTSUtils.ortho('[#6;!R]', '[#8]')

Returns:

Name Type Description smarts

smarts pattern connecting the two input smarts in ortho of each other

"},{"location":"api/medchem.utils.html#medchem.utils.smarts.SMARTSUtils.para","title":"para(smarts_str1, smarts_str2, aromatic_only=False) classmethod","text":"

Returns a recursive smarts string connecting the two input smarts in para of each other. Connexion points needs to be through single or double bonds

Parameters:

Name Type Description Default smarts_str1 str

first smarts pattern defining the first functional group

required smarts_str2 str

second smarts pattern defining the second functional group

required aromatic_only bool

whether the ring needs to be aromatic or not

False Example

to build a smarts for a methyl group in para to an oxygen (e.g: 'c1(C)ccc(O)cc1')

SMARTSUtils.para('[#6;!R]', '[#8]')

Returns:

Name Type Description smarts

smarts pattern connecting the two input smarts in para of each other

"},{"location":"api/medchem.utils.html#medchem.utils.smarts.SMARTSUtils.same_fragment","title":"same_fragment(*smarts_strs) classmethod","text":"

Returns a new query that match patterns that are in THE SAME fragment (component)

Warning

This feature is not supported yet by RDKit. See https://github.com/rdkit/rdkit/issues/1261

Parameters:

Name Type Description Default smarts_strs

list of input patterns defining the fragments

() Example

matching two oxygens in a molecule will work with '[#8].[#8]', but if you want the oxygens to be in the SAME fragment, then build the query with:

SMARTSUtils.same_fragment('[#8]', '[#8]')

Returns:

Name Type Description smarts

smarts pattern matching patterns that are in the same component

"},{"location":"api/medchem.utils.html#medchem.utils.smarts.SMARTSUtils.standardize_attachment","title":"standardize_attachment(smiles, attach_tokens='[*:1]') classmethod","text":"

Standardize an attachment point in a smiles

Parameters:

Name Type Description Default smiles str

SMILES string

required attach_tokens str

Attachment point token to use as standard token

'[*:1]'"},{"location":"api/medchem.utils.html#medchem.utils.matches","title":"medchem.utils.matches","text":""},{"location":"api/medchem.utils.html#medchem.utils.matches.Constraints","title":"Constraints","text":"

Complex query system for matches with additional constraints

Example

mol1 = dm.to_mol(\"CN(C)C(=O)c1cncc(C)c1\") mol2 = dm.to_mol(\"c1ccc(cc1)-c1cccnc1\") core = dm.from_smarts(\"c1cncc([*:1])c1\") [atom.SetProp(\"query\", \"my_constraints\") for atom in core.GetAtoms() if atom.GetAtomMapNum() == 1] constraint_fns = dict(my_constraints=lambda x: dm.descriptors.n_aromatic_atoms(x) > 0) constraint = Constraints(core, constraint_fns) matches = [constraint(mol1), constraint(mol2)] # False, True

"},{"location":"api/medchem.utils.html#medchem.utils.matches.Constraints.__call__","title":"__call__(mol)","text":"

Check if input molecule respect the constraints

Parameters:

Name Type Description Default mol

input molecule

required"},{"location":"api/medchem.utils.html#medchem.utils.matches.Constraints.__init__","title":"__init__(core, constraint_fns, prop_name='query')","text":"

Initialize the constraint matcher

Parameters:

Name Type Description Default core dm.Mol

the scaffold/query molecule to match against. Needs to be a molecule

required constraint_fns Dict[Callable]

a dictionary of constraints functions

required prop_name str

the property name to use in the match at each atom defined by the core for further matches against the constraints functions

'query'"},{"location":"api/medchem.utils.html#medchem.utils.matches.Constraints.get_matches","title":"get_matches(mol, multiple=True)","text":"

Get matches that respect the constraints in the molecules

Parameters:

Name Type Description Default mol dm.Mol

input molecule

required multiple bool

if True, return all the matches, if False, return the first match

True"},{"location":"api/medchem.utils.html#medchem.utils.matches.Constraints.has_match","title":"has_match(mol)","text":"

Check if input molecule respect the constraints

Parameters:

Name Type Description Default mol dm.Mol

input molecule

required"},{"location":"api/medchem.utils.html#medchem.utils.matches.Constraints.validate","title":"validate(mol, constraints) staticmethod","text":"

Validate a list of constraint object against a molecule

Parameters:

Name Type Description Default mol

the molecule object

required constraints List[Constraints]

list of Contraints object to validate against the molecule

required"},{"location":"api/medchem.utils.html#medchem.utils.loader","title":"medchem.utils.loader","text":""},{"location":"api/medchem.utils.html#medchem.utils.loader.get_data","title":"get_data(file=None)","text":"

Return the folder that contains the package specific data

"},{"location":"api/medchem.utils.html#medchem.utils.loader.get_grammar","title":"get_grammar(grammar=None, as_string=False)","text":"

Return the default lark grammar file for queries

Parameters:

Name Type Description Default grammar Optional[os.PathLike]

The path to the grammar file. If None, the default grammar

None as_string bool

If True, return the grammar as a string. Defaults to False.

False"},{"location":"api/medchem.utils.html#medchem.utils.graph","title":"medchem.utils.graph","text":""},{"location":"api/medchem.utils.html#medchem.utils.graph.automorphism","title":"automorphism(mol, standardize=True, node_attrs=DEFAULT_NODE_ATTR, edge_attrs=DEFAULT_EDGE_ATTR)","text":"

Compute automorphism in a molecular graph

Parameters:

Name Type Description Default mol Union[str, dm.Mol]

input molecular graph

required standardize bool

whether to standardize the compound or not

True node_attrs List[str]

list of categorical atom attributes/properties to consider for node matching

DEFAULT_NODE_ATTR edge_attrs List[str]

list of categorical bond attributes/properties to consider for edge matching

DEFAULT_EDGE_ATTR"},{"location":"api/medchem.utils.html#medchem.utils.graph.score_symmetry","title":"score_symmetry(mol, exclude_self_mapped_edged=False, **automorphism_kwargs)","text":"

Provide a symmetry score for a given input molecule

Note

This is an heuristic and our definition of symmetry is pretty loose. We define symmetry according to any (set of) plans dividing the molecule into two very similar subgraph. We include both edge and vertex transitivity. For example the star-molecular graph (e.g neopentane) is symmetrical here, although it's not vertex-transitive. For more information see https://github.com/valence-platform/medchem/pull/41

Parameters:

Name Type Description Default mol Union[dm.Mol, str]

inputs molecules

required exclude_self_mapped_edged bool

Whether to exclude edges that matches to themselves in automorphism.

False automorphism_kwargs

keyword for determining automorphism

{}"},{"location":"tutorials/getting-started.html","title":"Getting Started","text":"In\u00a0[1]: Copied!
%load_ext autoreload\n%autoreload 2\n
%load_ext autoreload %autoreload 2 In\u00a0[2]: Copied!
import datamol as dm\nimport numpy as np\nfrom loguru import logger\n\ndata = dm.data.freesolv().sample(500)\nsmiles_list = data.smiles.values\n
import datamol as dm import numpy as np from loguru import logger data = dm.data.freesolv().sample(500) smiles_list = data.smiles.values In\u00a0[3]: Copied!
from medchem.filter import lead\nfrom medchem.demerits import score, batch_score\nfrom medchem.alerts import NovartisFilters\nfrom medchem.alerts import AlertFilters\nfrom medchem.catalog import NamedCatalogs\nfrom medchem.utils import get_data\nfrom rdkit.Chem import rdfiltercatalog\n
from medchem.filter import lead from medchem.demerits import score, batch_score from medchem.alerts import NovartisFilters from medchem.alerts import AlertFilters from medchem.catalog import NamedCatalogs from medchem.utils import get_data from rdkit.Chem import rdfiltercatalog In\u00a0[4]: Copied!
# common filters including pains, brenk, nih, zinc\npains_a = rdfiltercatalog.FilterCatalogParams.FilterCatalogs.PAINS_A\nlead.catalog_filter(smiles_list, [\"nih\", pains_a, NamedCatalogs.dundee()])\n
# common filters including pains, brenk, nih, zinc pains_a = rdfiltercatalog.FilterCatalogParams.FilterCatalogs.PAINS_A lead.catalog_filter(smiles_list, [\"nih\", pains_a, NamedCatalogs.dundee()]) Out[4]:
array([ True,  True,  True,  True, False, False,  True, False, False,\n        True, False,  True,  True,  True, False,  True, False, False,\n       False,  True,  True, False,  True, False, False, False,  True,\n        True,  True,  True,  True,  True, False,  True, False, False,\n       False,  True, False, False, False,  True, False, False, False,\n        True, False,  True, False,  True,  True, False,  True,  True,\n        True, False,  True, False, False, False,  True, False, False,\n        True, False,  True,  True, False, False, False, False,  True,\n        True,  True, False,  True, False,  True,  True,  True, False,\n        True,  True, False,  True,  True,  True,  True,  True, False,\n       False,  True,  True, False,  True, False, False, False,  True,\n       False, False, False, False,  True, False, False,  True,  True,\n        True,  True,  True, False,  True,  True,  True, False,  True,\n       False, False,  True,  True,  True,  True, False, False,  True,\n       False, False,  True,  True, False,  True,  True,  True,  True,\n       False, False, False,  True, False, False, False,  True, False,\n       False,  True, False, False,  True,  True,  True, False,  True,\n       False, False, False,  True,  True,  True, False,  True,  True,\n       False, False, False, False, False,  True, False, False, False,\n       False, False,  True, False,  True, False,  True, False, False,\n        True, False, False,  True, False,  True,  True,  True, False,\n        True,  True,  True,  True, False,  True,  True,  True, False,\n       False,  True,  True, False,  True, False,  True, False,  True,\n        True,  True,  True, False, False,  True, False,  True, False,\n       False,  True, False,  True, False,  True, False, False, False,\n       False,  True,  True, False, False,  True,  True, False,  True,\n        True, False,  True,  True,  True, False, False, False,  True,\n       False, False, False,  True,  True, False,  True,  True, False,\n        True,  True,  True,  True, False,  True, False,  True,  True,\n        True,  True, False,  True, False,  True, False, False,  True,\n        True,  True, False, False, False,  True,  True,  True, False,\n        True, False, False, False, False, False,  True, False,  True,\n        True, False,  True,  True,  True, False,  True,  True, False,\n       False, False,  True, False,  True, False, False, False,  True,\n        True, False,  True, False, False,  True, False, False,  True,\n        True, False, False, False,  True,  True,  True, False,  True,\n        True,  True,  True, False,  True, False, False,  True,  True,\n        True, False, False,  True, False, False, False,  True, False,\n       False, False, False,  True, False,  True,  True,  True, False,\n        True, False, False, False,  True, False,  True, False, False,\n       False, False, False, False,  True,  True,  True, False,  True,\n       False, False, False, False, False,  True,  True,  True,  True,\n       False,  True,  True,  True, False,  True, False,  True, False,\n       False, False,  True, False, False,  True,  True, False, False,\n        True,  True, False,  True,  True, False,  True, False, False,\n       False,  True,  True, False,  True,  True,  True, False, False,\n       False, False, False, False, False,  True, False, False,  True,\n        True,  True,  True,  True,  True,  True,  True, False,  True,\n        True, False, False,  True,  True,  True,  True, False, False,\n       False,  True, False,  True, False,  True,  True,  True, False,\n       False,  True,  True,  True, False,  True, False,  True, False,\n        True,  True,  True,  True,  True,  True,  True,  True, False,\n       False,  True,  True,  True, False,  True,  True, False,  True,\n       False,  True, False,  True,  True,  True, False,  True, False,\n        True,  True,  True, False, False,  True,  True, False,  True,\n        True, False,  True, False, False])
In\u00a0[5]: Copied!
# filtering based on some commons alerts + additional lead like rules\nlead.alert_filter(smiles_list, alerts=[\"Glaxo\", \"BMS\"], rule_dict=dict(MW=[0, 100]))\n
# filtering based on some commons alerts + additional lead like rules lead.alert_filter(smiles_list, alerts=[\"Glaxo\", \"BMS\"], rule_dict=dict(MW=[0, 100])) Out[5]:
array([False, False, False,  True, False, False,  True, False, False,\n       False,  True,  True, False, False, False, False, False, False,\n        True,  True, False, False, False, False, False,  True,  True,\n        True, False, False,  True, False, False, False, False, False,\n       False, False, False, False,  True,  True, False, False, False,\n       False, False, False, False, False, False, False, False, False,\n       False, False,  True,  True, False, False,  True, False, False,\n       False,  True, False, False, False, False,  True, False, False,\n       False, False, False, False,  True,  True,  True, False, False,\n        True, False, False, False, False,  True, False, False, False,\n       False, False, False, False,  True,  True, False, False, False,\n       False, False, False, False,  True,  True, False, False,  True,\n       False,  True,  True, False, False, False, False, False, False,\n       False, False,  True,  True, False, False, False, False, False,\n        True, False,  True,  True, False,  True, False, False, False,\n       False, False, False, False, False, False, False,  True,  True,\n       False, False, False,  True, False, False,  True, False,  True,\n       False, False, False, False,  True,  True, False, False,  True,\n       False, False, False, False, False,  True, False, False, False,\n       False, False,  True, False,  True, False,  True, False,  True,\n       False, False, False, False, False, False, False, False, False,\n        True,  True, False, False, False, False, False, False, False,\n       False,  True,  True, False, False, False, False, False, False,\n        True, False, False,  True,  True,  True, False, False, False,\n       False, False, False, False, False,  True, False, False,  True,\n       False, False, False,  True, False, False, False, False, False,\n       False, False, False, False,  True, False, False, False, False,\n       False, False, False,  True, False, False, False,  True, False,\n       False, False, False, False, False, False, False, False, False,\n        True,  True, False, False, False,  True, False,  True,  True,\n       False,  True, False, False, False, False,  True, False, False,\n        True, False, False, False, False, False,  True, False, False,\n       False, False,  True,  True, False, False,  True, False, False,\n       False, False,  True, False, False, False, False, False, False,\n        True,  True, False, False, False,  True, False, False,  True,\n       False, False, False, False, False,  True, False, False, False,\n       False,  True, False, False, False, False,  True, False, False,\n       False,  True, False, False, False, False, False, False, False,\n       False, False, False, False, False,  True,  True,  True, False,\n       False, False, False, False, False, False,  True, False, False,\n        True, False, False, False, False,  True, False, False, False,\n       False, False, False,  True, False, False, False,  True, False,\n       False,  True, False, False, False, False, False, False,  True,\n       False,  True,  True, False,  True, False,  True, False,  True,\n       False,  True, False, False, False, False,  True, False, False,\n       False,  True,  True, False,  True,  True,  True, False, False,\n       False, False, False, False,  True, False,  True, False,  True,\n        True, False, False, False, False,  True,  True, False, False,\n        True, False, False, False,  True,  True, False, False, False,\n       False, False, False,  True, False, False, False, False, False,\n       False, False, False,  True, False, False, False,  True, False,\n       False, False,  True,  True, False, False, False,  True, False,\n       False, False, False,  True, False, False,  True, False,  True,\n       False, False, False,  True,  True, False, False,  True, False,\n       False, False,  True, False, False, False, False,  True, False,\n       False,  True, False, False, False])
In\u00a0[6]: Copied!
# filtering based on NIBR screening deck process described in\n# \"Evolution of Novartis' small molecule screening deck design\" by Schuffenhauer, A. et al. J. Med. Chem. (2020),\n# https://dx.doi.org/10.1021/acs.jmedchem.0c01332.\nlead.screening_filter(smiles_list, return_idx=True)\n
# filtering based on NIBR screening deck process described in # \"Evolution of Novartis' small molecule screening deck design\" by Schuffenhauer, A. et al. J. Med. Chem. (2020), # https://dx.doi.org/10.1021/acs.jmedchem.0c01332. lead.screening_filter(smiles_list, return_idx=True) Out[6]:
array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,\n        13,  14,  15,  17,  18,  19,  20,  21,  22,  25,  26,  27,  28,\n        29,  30,  31,  32,  33,  36,  37,  38,  40,  41,  43,  45,  47,\n        48,  49,  50,  52,  53,  54,  56,  57,  58,  59,  60,  61,  62,\n        63,  64,  65,  66,  67,  69,  71,  72,  73,  74,  75,  77,  78,\n        79,  80,  81,  82,  84,  85,  86,  87,  88,  89,  90,  91,  92,\n        93,  94,  95,  96,  97,  99, 100, 101, 102, 103, 104, 105, 106,\n       107, 108, 109, 110, 111, 112, 113, 114, 115, 117, 118, 119, 120,\n       121, 122, 123, 125, 127, 128, 129, 131, 132, 133, 134, 135, 136,\n       137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,\n       150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 163,\n       164, 165, 166, 167, 169, 170, 171, 173, 174, 175, 176, 177, 178,\n       179, 180, 181, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192,\n       194, 195, 196, 197, 198, 199, 200, 202, 203, 204, 206, 207, 208,\n       209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,\n       222, 223, 224, 225, 226, 227, 228, 230, 231, 233, 234, 235, 236,\n       237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,\n       250, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263,\n       264, 266, 268, 269, 270, 271, 273, 274, 275, 276, 277, 278, 279,\n       280, 281, 282, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293,\n       294, 295, 296, 297, 298, 299, 300, 303, 304, 305, 306, 307, 308,\n       309, 310, 311, 313, 314, 315, 317, 318, 319, 320, 321, 322, 323,\n       324, 325, 326, 328, 329, 331, 332, 333, 334, 336, 337, 338, 339,\n       340, 341, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 355,\n       357, 358, 359, 360, 364, 365, 366, 367, 368, 369, 370, 371, 372,\n       373, 374, 375, 376, 377, 379, 380, 381, 382, 383, 384, 385, 387,\n       388, 389, 391, 392, 393, 394, 395, 396, 397, 399, 400, 402, 403,\n       404, 406, 407, 408, 409, 410, 411, 414, 415, 416, 417, 418, 419,\n       420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432,\n       433, 434, 435, 436, 437, 438, 439, 440, 442, 443, 444, 445, 446,\n       447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459,\n       460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472,\n       473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485,\n       486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 499])
In\u00a0[7]: Copied!
# Filter based on the demerit scoring of Eli Lilly\ntest_config = {\n    \"min_atoms\": 10, # default is 1\n    \"soft_max_atoms\": 30, # default is 30\n    \"hard_max_atoms\": 50, # default is 30\n    \"nodemerit\": False, # default is False\n    \"dthresh\": 160, # default is None with no threshold\n}\nlead.lilly_demerit_filter(smiles_list, max_demerits=160, return_idx=True, **test_config)\n
# Filter based on the demerit scoring of Eli Lilly test_config = { \"min_atoms\": 10, # default is 1 \"soft_max_atoms\": 30, # default is 30 \"hard_max_atoms\": 50, # default is 30 \"nodemerit\": False, # default is False \"dthresh\": 160, # default is None with no threshold } lead.lilly_demerit_filter(smiles_list, max_demerits=160, return_idx=True, **test_config) Out[7]:
array([  0,   4,  37,  61,  62,  72,  73,  84,  89,  98, 106, 113, 132,\n       140, 148, 178, 180, 187, 192, 203, 209, 214, 226, 240, 244, 247,\n       249, 253, 254, 259, 273, 282, 287, 293, 310, 317, 319, 324, 345,\n       374, 377, 383, 384, 385, 399, 400, 417, 419, 434, 442, 446, 448,\n       454, 455, 459, 464, 470, 473, 475, 478, 482, 485, 487, 492, 494])
In\u00a0[8]: Copied!
filter_obj = AlertFilters(alerts_set=[\"inpharmatica\", \"SureChEMBL\"])\nfilter_obj.list_default_available_alerts()\n
filter_obj = AlertFilters(alerts_set=[\"inpharmatica\", \"SureChEMBL\"]) filter_obj.list_default_available_alerts() Out[8]: rule_set_name smarts catalog_description rule_set source 10 Glaxo 55 Glaxo Wellcome Hard filters 1 ChEMBL 5 Dundee 105 University of Dundee NTD Screening Library Fil... 2 ChEMBL 2 BMS 180 Bristol-Myers Squibb HTS Deck filters 3 ChEMBL 18 PAINS 481 PAINS filters 4 ChEMBL 21 SureChEMBL 166 SureChEMBL Non-MedChem Friendly SMARTS 5 ChEMBL 16 MLSMR 116 NIH MLSMR Excluded Functionality filters (MLSMR) 6 ChEMBL 12 Inpharmatica 91 Unwanted fragments derived by Inpharmatica Ltd. 7 ChEMBL 14 LINT 57 Pfizer lint filters (lint) 8 ChEMBL 0 Alarm-NMR 75 Reactive False Positives in Biochemical Screen... 9 Litterature 1 AlphaScreen-Hitters 6 Structural filters for compounds that may be a... 10 Litterature 8 GST-Hitters 34 Structural filters for compounds may prevent G... 11 Litterature 11 HIS-Hitters 19 Structural filters for compounds prevents the ... 12 Litterature 15 LuciferaseInhibitor 3 Structural filters for compounds that may inhi... 13 Litterature 4 DNABinder 78 Structural filters for compounds that may bind... 14 Litterature 3 Chelator 55 Structural filters for compounds that may inhi... 15 Litterature 7 Frequent-Hitter 15 Structural filters for compounds that are freq... 16 Litterature 6 Electrophilic 119 Structural filters for compounds that could ta... 17 Litterature 9 Genotoxic-Carcinogenicity 117 Structural filters for compounds that may caus... 18 Litterature 13 LD50-Oral 20 Structural filters for compounds that may caus... 19 Litterature 17 Non-Genotoxic-Carcinogenicity 22 Structural filters for compounds that may caus... 20 Litterature 19 Reactive-Unstable-Toxic 335 General very reactive/unstable or Toxic compounds 21 Litterature 20 Skin 155 Skin Sensitization filters (irritables) 22 Litterature 22 Toxicophore 154 General Toxicophores 23 Litterature In\u00a0[9]: Copied!
out = filter_obj(smiles_list)\nout\n
out = filter_obj(smiles_list) out Out[9]: _smiles status reasons MW LogP HBD HBA TPSA 0 c1ccc(Cn2ccnc2)cc1 Ok None 158.204 1.93140 0 2 17.82 1 NC(=O)c1ccccc1 Ok None 121.139 0.78550 1 1 43.09 2 c1cc2c3c(cccc3c1)CC2 Ok None 154.212 2.93840 0 0 0.00 3 Cc1cnccn1 Ok None 94.117 0.78502 0 2 25.78 4 CN(C)C(=O)c1ccc([N+](=O)[O-])cc1 Ok None 194.190 1.29660 0 3 63.45 ... ... ... ... ... ... ... ... ... 495 Cc1ccc(O)c(C)c1 Ok None 122.167 2.00904 1 1 20.23 496 C/C=C/CCCC Ok None 98.189 2.75270 0 0 0.00 497 CNc1ccccc1 Ok None 107.156 1.72830 1 1 12.03 498 ClCCCl Exclude alkyl_halides; Filter1_2_halo_ether; Filter26_... 98.960 1.46400 0 0 0.00 499 CCCCOC(C)=O Ok None 116.160 1.34960 0 2 26.30

500 rows \u00d7 8 columns

In\u00a0[10]: Copied!
filter_obj = NovartisFilters()\nout = filter_obj(smiles_list)\nout\n
filter_obj = NovartisFilters() out = filter_obj(smiles_list) out Out[10]: _smiles status reasons severity covalent special_mol 0 c1ccc(Cn2ccnc2)cc1 Ok None 0 NaN NaN 1 NC(=O)c1ccccc1 Ok None 0 NaN NaN 2 c1cc2c3c(cccc3c1)CC2 Ok None 0 NaN NaN 3 Cc1cnccn1 Ok None 0 NaN NaN 4 CN(C)C(=O)c1ccc([N+](=O)[O-])cc1 Annotations nitro_count_1_min(1) 0 0.0 0.0 ... ... ... ... ... ... ... 495 Cc1ccc(O)c(C)c1 Ok None 0 NaN NaN 496 C/C=C/CCCC Ok None 0 NaN NaN 497 CNc1ccccc1 Ok None 0 NaN NaN 498 ClCCCl Exclude halo_ether_min(1); halogen_alkyl_min(1); halog... 10 2.0 0.0 499 CCCCOC(C)=O Ok None 0 NaN NaN

500 rows \u00d7 6 columns

In\u00a0[11]: Copied!
out = score(smiles_list, **test_config)\nout\n
out = score(smiles_list, **test_config) out Out[11]: _smiles ID reasons step rejected demerit_score status 0 C1=CC=C(C=C1)CN1C=CN=C1 0 NaN 4 False 0.0 Ok 1 C1=CC=C(C=C1)C(=O)N 1 not_enough_atoms 1 True NaN Exclude 2 C1=CC2=CC=CC3=C2C(=C1)CC3 2 no_interesting_atoms 1 True NaN Exclude 3 CC1=CN=CC=N1 3 not_enough_atoms 1 True NaN Exclude 4 CN(C)C(=O)C1=CC=C(C=C1)N(=O)=O 4 nitro:D60 4 False 60.0 Flag ... ... ... ... ... ... ... ... 495 CC1=CC=C(O)C(=C1)C 495 not_enough_atoms 1 True NaN Exclude 496 CCCC/C=C/C 496 not_enough_atoms 1 True NaN Exclude 497 CNC1=CC=CC=C1 497 not_enough_atoms 1 True NaN Exclude 498 C(Cl)CCl 498 not_enough_atoms 1 True NaN Exclude 499 CCCCOC(=O)C 499 not_enough_atoms 1 True NaN Exclude

500 rows \u00d7 7 columns

In\u00a0[12]: Copied!
# Although the demirits.score is already quite fast, you can also call the parallelized version of it using the `batch_score` function\n
# Although the demirits.score is already quite fast, you can also call the parallelized version of it using the `batch_score` function In\u00a0[13]: Copied!
out2 = batch_score(smiles_list, n_jobs=2,  batch_size=100,  progress=True, **test_config)\nout2\n
out2 = batch_score(smiles_list, n_jobs=2, batch_size=100, progress=True, **test_config) out2
100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 5/5 [00:00<00:00,  8.17it/s]\n
Out[13]: _smiles ID reasons step rejected demerit_score status 0 C1=CC=C(C=C1)CN1C=CN=C1 0 NaN 4 False 0.0 Ok 1 C1=CC=C(C=C1)C(=O)N 1 not_enough_atoms 1 True NaN Exclude 2 C1=CC2=CC=CC3=C2C(=C1)CC3 2 no_interesting_atoms 1 True NaN Exclude 3 CC1=CN=CC=N1 3 not_enough_atoms 1 True NaN Exclude 4 CN(C)C(=O)C1=CC=C(C=C1)N(=O)=O 4 nitro:D60 4 False 60.0 Flag ... ... ... ... ... ... ... ... 495 CC1=CC=C(O)C(=C1)C 495 not_enough_atoms 1 True NaN Exclude 496 CCCC/C=C/C 496 not_enough_atoms 1 True NaN Exclude 497 CNC1=CC=CC=C1 497 not_enough_atoms 1 True NaN Exclude 498 C(Cl)CCl 498 not_enough_atoms 1 True NaN Exclude 499 CCCCOC(=O)C 499 not_enough_atoms 1 True NaN Exclude

500 rows \u00d7 7 columns

In\u00a0[14]: Copied!
from medchem.groups import ChemicalGroup\nc_group = ChemicalGroup(groups=\"rings_in_drugs\")\n
from medchem.groups import ChemicalGroup c_group = ChemicalGroup(groups=\"rings_in_drugs\") In\u00a0[15]: Copied!
mol = dm.to_mol(\"CCS(=O)(=O)N1CC(C1)(CC#N)N2C=C(C=N2)C3=C4C=CNC4=NC=N3\")\nc_group.get_matches(mol, use_smiles=True)\n
mol = dm.to_mol(\"CCS(=O)(=O)N1CC(C1)(CC#N)N2C=C(C=N2)C3=C4C=CNC4=NC=N3\") c_group.get_matches(mol, use_smiles=True) Out[15]: name smiles smarts group matches 204 diazine C1=NC=CC=N1 [#6]1:[#7]:[#6]:[#6]:[#6]:[#7]:1 rings_in_drugs ((24, 23, 22, 18, 17, 25),) 234 1H-pyrazole N1=CC=CN1 [#7]1:[#6]:[#6]:[#6]:[#7H]:1 rings_in_drugs ((12, 13, 14, 15, 16),) 257 1H-pyrrole C1=CC=CN1 [#6]1:[#6]:[#6]:[#6]:[#7H]:1 rings_in_drugs ((20, 19, 18, 22, 21),)

You can also load a custom library of queries. You custom df needs to provide the following columns: 'smiles'/'smarts', 'name' and \"group\" and optionally 'hierarchy'

In\u00a0[16]: Copied!
c_group = ChemicalGroup(groups_db=get_data(\"smarts_bank.csv\"))\nc_group.get_matches(mol, use_smiles=False)\n
c_group = ChemicalGroup(groups_db=get_data(\"smarts_bank.csv\")) c_group.get_matches(mol, use_smiles=False) Out[16]: name smiles smarts group matches 0 HBA [!$([#6,F,Cl,Br,I,o,s,nX3,#7v5,#15v5,#16v4,#16... custom_queries ((3,), (4,), (5,), (11,), (16,), (23,), (25,)) 2 HBD [!$([#6,H0,-,-2,-3])] custom_queries ((21,),) 3 HBD [!H0;#7,#8,#9] custom_queries ((21,),) 12 Hydrogen [*!H0,#1] custom_queries ((0,), (1,), (6,), (8,), (9,), (13,), (15,), (... 13 Hydrogen [#6!H0,#1] custom_queries ((0,), (1,), (6,), (8,), (9,), (13,), (15,), (... 14 Hydrogen [H,#1] custom_queries ((13,), (15,), (19,), (20,), (21,), (24,)) 16 Acyclic Bonds *!@* custom_queries ((0, 1), (1, 2), (2, 3), (2, 4), (2, 5), (7, 9... 18 Rotable Bond [!$(*#*)&!D1]-!@[!$(*#*)&!D1] custom_queries ((1, 2), (2, 5), (7, 9), (7, 12), (14, 17)) 20 SP3 Nitrogen [$([NX4+]),$([NX3]);!$(*=*)&!$(*:*)] custom_queries ((5,),) 21 SP2 Nitrogen [$([nX3](:*):*),$([nX2](:*):*),$([#7X2]=*),$([... custom_queries ((12,), (16,), (21,), (23,), (25,)) 22 SP2 Carbon [$([cX3](:*):*),$([cX2+](:*):*),$([CX3]=*),$([... custom_queries ((13,), (14,), (15,), (17,), (18,), (19,), (20... 23 Aromatic SP2 Carbon [$([cX3](:*):*),$([cX2+](:*):*)] custom_queries ((13,), (14,), (15,), (17,), (18,), (19,), (20... 24 Chiral Carbon [$([#6X4@](*)(*)(*)*),$([#6X4@H](*)(*)*)] custom_queries ((7,),)

You can apply the basic rules independently

In\u00a0[17]: Copied!
from medchem.rules.basic_rules import rule_of_five, rule_of_three, rule_of_leadlike_soft\n
from medchem.rules.basic_rules import rule_of_five, rule_of_three, rule_of_leadlike_soft In\u00a0[18]: Copied!
print(\"RO5\", rule_of_five(mol))\nprint(\"RO3\", rule_of_three(mol))\nprint(\"ROLS\", rule_of_leadlike_soft(mol))\n
print(\"RO5\", rule_of_five(mol)) print(\"RO3\", rule_of_three(mol)) print(\"ROLS\", rule_of_leadlike_soft(mol))
RO5 True\nRO3 False\nROLS True\n

To list all available rules and what they are good for, use :

In\u00a0[19]: Copied!
from medchem.rules import RuleFilters\n
from medchem.rules import RuleFilters In\u00a0[20]: Copied!
RuleFilters.list_available_rules()\n
RuleFilters.list_available_rules() Out[20]: name rules description 0 rule_of_five MW <= 500 & logP <= 5 & HBD <= 5 & HBA <= 10 leadlike;druglike;small molecule;library design 1 rule_of_five_beyond MW <= 1000 & logP in [-2, 10] & HBD <= 6 & HBA... leadlike;druglike;small molecule;library design 2 rule_of_four MW >= 400 & logP >= 4 & RINGS >=4 & HBA >= 4 PPI inhibitor;druglike 3 rule_of_three MW <= 300 & logP <= 3 & HBA <= 3 & HBD <= 3 & ... fragment;building block 4 rule_of_three_extended MW <= 300 & logP in [-3, 3] & HBA <= 6 & HBD <... fragment;building block 5 rule_of_two MW <= 200 & logP <= 2 & HBA <= 4 & HBD <= 2 fragment;reagent;building block 6 rule_of_ghose MW in [160, 480] & logP in [-0.4, 5.6] & Natom... leadlike;druglike;small molecule;library design 7 rule_of_veber rotatable bond <= 10 & TPSA < 140 druglike;leadlike;small molecule;oral 8 rule_of_reos MW in [200, 500] & logP in [-5, 5] & HBA in [0... druglike;small molecule;library design;HTS 9 rule_of_chemaxon_druglikeness MW < 400 & logP < 5 & HBA <= 10 & HBD <= 5 & r... leadlike;druglike;small molecule 10 rule_of_egan TPSA in [0, 132] & logP in [-1, 6] druglike;small molecule;admet;absorption;perme... 11 rule_of_pfizer_3_75 not (TPSA < 75 & logP > 3) druglike;toxicity;invivo;small molecule 12 rule_of_gsk_4_400 MW <= 400 & logP <= 4 druglike;admet;small molecule 13 rule_of_oprea HBD in [0, 2] & HBA in [2, 9] & ROTBONDS in [2... druglike;small molecule 14 rule_of_xu HBD <= 5 & HBA <= 10 & ROTBONDS in [2, 35] & R... druglike;small molecule;library design 15 rule_of_cns MW in [135, 582] & logP in [-0.2, 6.1] & TPSA ... druglike;CNS;BBB;small molecule 16 rule_of_respiratory MW in [240, 520] & logP in [-2, 4.7] & HBONDS... druglike;respiratory;small molecule;nasal;inha... 17 rule_of_zinc MW in [60, 600] & logP < in [-4, 6] & HBD <= 6... druglike;small molecule;library design;zinc 18 rule_of_leadlike_soft MW in [150, 400] & logP < in [-3, 4] & HBD <= ... leadlike;small molecule;library design;admet 19 rule_of_druglike_soft MW in [100, 600] & logP < in [-3, 6] & HBD <= ... druglike;small molecule;library design

To list the available rules for small molecules only, you can use the list_available_rules(\"small molecule\")

In\u00a0[21]: Copied!
RuleFilters.list_available_rules(\"small molecule\")\n
RuleFilters.list_available_rules(\"small molecule\") Out[21]: name rules description 0 rule_of_five MW <= 500 & logP <= 5 & HBD <= 5 & HBA <= 10 leadlike;druglike;small molecule;library design 1 rule_of_five_beyond MW <= 1000 & logP in [-2, 10] & HBD <= 6 & HBA... leadlike;druglike;small molecule;library design 6 rule_of_ghose MW in [160, 480] & logP in [-0.4, 5.6] & Natom... leadlike;druglike;small molecule;library design 7 rule_of_veber rotatable bond <= 10 & TPSA < 140 druglike;leadlike;small molecule;oral 8 rule_of_reos MW in [200, 500] & logP in [-5, 5] & HBA in [0... druglike;small molecule;library design;HTS 9 rule_of_chemaxon_druglikeness MW < 400 & logP < 5 & HBA <= 10 & HBD <= 5 & r... leadlike;druglike;small molecule 10 rule_of_egan TPSA in [0, 132] & logP in [-1, 6] druglike;small molecule;admet;absorption;perme... 11 rule_of_pfizer_3_75 not (TPSA < 75 & logP > 3) druglike;toxicity;invivo;small molecule 12 rule_of_gsk_4_400 MW <= 400 & logP <= 4 druglike;admet;small molecule 13 rule_of_oprea HBD in [0, 2] & HBA in [2, 9] & ROTBONDS in [2... druglike;small molecule 14 rule_of_xu HBD <= 5 & HBA <= 10 & ROTBONDS in [2, 35] & R... druglike;small molecule;library design 15 rule_of_cns MW in [135, 582] & logP in [-0.2, 6.1] & TPSA ... druglike;CNS;BBB;small molecule 16 rule_of_respiratory MW in [240, 520] & logP in [-2, 4.7] & HBONDS... druglike;respiratory;small molecule;nasal;inha... 17 rule_of_zinc MW in [60, 600] & logP < in [-4, 6] & HBD <= 6... druglike;small molecule;library design;zinc 18 rule_of_leadlike_soft MW in [150, 400] & logP < in [-3, 4] & HBD <= ... leadlike;small molecule;library design;admet 19 rule_of_druglike_soft MW in [100, 600] & logP < in [-3, 6] & HBD <= ... druglike;small molecule;library design

You can also apply the rule filter to a batch of molecules.

In\u00a0[22]: Copied!
rule_obj = RuleFilters(rule_list=[\"rule_of_five\", \"rule_of_oprea\", \"rule_of_cns\", \"rule_of_leadlike_soft\"], rule_list_names=[\"rule_of_five\", \"rule_of_oprea\", \"rule_of_cns\", \"rule_of_leadlike_soft\"], precompute_props=True)\n
rule_obj = RuleFilters(rule_list=[\"rule_of_five\", \"rule_of_oprea\", \"rule_of_cns\", \"rule_of_leadlike_soft\"], rule_list_names=[\"rule_of_five\", \"rule_of_oprea\", \"rule_of_cns\", \"rule_of_leadlike_soft\"], precompute_props=True) In\u00a0[23]: Copied!
out = rule_obj(smiles_list, n_jobs=-1, progress=True)\n# you need to reset the columns because the input rule list can be columns\nout\n
out = rule_obj(smiles_list, n_jobs=-1, progress=True) # you need to reset the columns because the input rule list can be columns out
Props: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 500/500 [00:00<00:00, 5903.89it/s]     \nProps: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 500/500 [00:00<00:00, 7238.20it/s]\nProps: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 500/500 [00:00<00:00, 8098.33it/s]\nProps: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 500/500 [00:00<00:00, 1102.02it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 4/4 [00:00<00:00,  5.86it/s]\n
Out[23]: rule_of_five rule_of_oprea rule_of_cns rule_of_leadlike_soft 0 True True True True 1 True False False False 2 True False False False 3 True False False False 4 True True True True ... ... ... ... ... 495 True False False False 496 True False False False 497 True False False False 498 True False False False 499 True False False False

500 rows \u00d7 4 columns

In\u00a0[24]: Copied!
from medchem import catalog\nprint(catalog.list_named_catalogs())\n
from medchem import catalog print(catalog.list_named_catalogs())
['tox', 'pains', 'pains_a', 'pains_b', 'pains_c', 'nih', 'zinc', 'brenk', 'dundee', 'bms', 'glaxo', 'schembl', 'mlsmr', 'inpharmatica', 'lint', 'alarm_nmr', 'alphascreen', 'gst_hitters', 'his_hitters', 'luciferase', 'dnabinder', 'chelator', 'hitters', 'electrophilic', 'carcinogen', 'ld50_oral', 'reactive_unstable_toxic', 'skin', 'toxicophore', 'nibr', 'bredt', 'unstable_graph']\n
In\u00a0[25]: Copied!
from medchem.catalog import from_smarts\nimport pandas as pd\n
from medchem.catalog import from_smarts import pandas as pd In\u00a0[26]: Copied!
smarts_bank = pd.read_csv(get_data(\"smarts_bank.csv\"))\n
smarts_bank = pd.read_csv(get_data(\"smarts_bank.csv\")) In\u00a0[27]: Copied!
custom_catalog = from_smarts(smarts_bank.smarts.values, smarts_bank.name.values, entry_as_inds=False)\n
custom_catalog = from_smarts(smarts_bank.smarts.values, smarts_bank.name.values, entry_as_inds=False) In\u00a0[28]: Copied!
mol = \"CCS(=O)(=O)N1CC(C1)N2C=C(C=N2)C3=C4C=CNC4=NC=N3\"\nmol = dm.to_mol(mol)\nmol\n
mol = \"CCS(=O)(=O)N1CC(C1)N2C=C(C=N2)C3=C4C=CNC4=NC=N3\" mol = dm.to_mol(mol) mol Out[28]: In\u00a0[29]: Copied!
matches = custom_catalog.GetMatches(mol)\n[m.GetDescription() for m in matches]\n
matches = custom_catalog.GetMatches(mol) [m.GetDescription() for m in matches] Out[29]:
['HBA',\n 'HBD',\n 'HBD',\n 'Hydrogen',\n 'Hydrogen',\n 'Hydrogen',\n 'Acyclic Bonds',\n 'Rotable Bond',\n 'SP3 Nitrogen',\n 'SP2 Nitrogen',\n 'SP2 Carbon',\n 'Aromatic SP2 Carbon',\n 'Chiral Carbon']
In\u00a0[30]: Copied!
from medchem.query import QueryFilter\n
from medchem.query import QueryFilter In\u00a0[31]: Copied!
# note that whitespace and newlines are ignored in the query\nquery = \"\"\"\n(\n    HASPROP(\"tpsa\" < 80) AND HASPROP(\"clogp\" < 3) AND ! HASALERT(\"pains\")\n) \n    OR \n(\n    HASPROP(\"n_heavy_atoms\" >= 10) \n    AND \n    (\n        HASGROUP(\"Alcohols\") \n        OR \n        HASSUBSTRUCTURE(\"[CX3](=[OX1])O\", True, 1)\n    )\n)\n\"\"\"\n
# note that whitespace and newlines are ignored in the query query = \"\"\" ( HASPROP(\"tpsa\" < 80) AND HASPROP(\"clogp\" < 3) AND ! HASALERT(\"pains\") ) OR ( HASPROP(\"n_heavy_atoms\" >= 10) AND ( HASGROUP(\"Alcohols\") OR HASSUBSTRUCTURE(\"[CX3](=[OX1])O\", True, 1) ) ) \"\"\" In\u00a0[32]: Copied!
data = dm.freesolv()\nquery_filter = QueryFilter(query)\nout = query_filter(data.smiles, n_jobs=-1, progress=True)\n
data = dm.freesolv() query_filter = QueryFilter(query) out = query_filter(data.smiles, n_jobs=-1, progress=True)
                                                                \r
In\u00a0[33]: Copied!
any(out)\n
any(out) Out[33]:
True
"},{"location":"tutorials/getting-started.html#medchem","title":"Medchem\u00b6","text":"

Medchem is a package for applying general filtering rules on a set of molecules to ensure they have drug-like properties.

In this tutorial, we will apply various filtering on an example dataset to get highlight the package API

"},{"location":"tutorials/getting-started.html#setup","title":"Setup\u00b6","text":""},{"location":"tutorials/getting-started.html#using-the-filter-module","title":"Using the filter module\u00b6","text":"

The filter module provides a variety of two types of filters:

  • generic: custom filtering based on some given molecule properties such as number of atoms, presence of specific atom type, etc
  • lead: filtering based on structural motifs that are known to either be toxic, reactive, unstable or frequent false positive
"},{"location":"tutorials/getting-started.html#advanced-options","title":"Advanced options\u00b6","text":"

The advanced options allow a better control over the filtering process. They also provide more information on the issues with the molecules.

"},{"location":"tutorials/getting-started.html#alertfilters","title":"AlertFilters\u00b6","text":"

These are the underlying filters called by lead.alert_filter. In the output, the compound status is indicated as either \"Exclude\" or \"Ok\".

"},{"location":"tutorials/getting-started.html#novartisfilter","title":"NovartisFilter\u00b6","text":"

These are the underlying filters called by lead.screening_filter.

Here is an explanation of the output:

  • status: one of [\"Exclude\", \"Flag\", \"Annotations\", \"Ok\"] (ordered by quality). Generally, you can keep anything without the \"Exclude\" label, as long as you also apply a maximum severity score for compounds that collects too many flags.
  • covalent: number of potentially covalent motifs contained in the compound
  • severity: how severe are the issues with the molecules: - 0: compound has no flags, might have annotations; - 1-9: number of flags the compound raises; - >= 10: default exclusion criterion used in the paper
  • special_mol: whether the compound/parts of the compound belongs to a special class of molecules (e.g peptides, glycosides, fatty acid). In that case, you should review the rejection reasons.
"},{"location":"tutorials/getting-started.html#demerits-scoring","title":"Demerits scoring\u00b6","text":"

Demerit scoring uses the Eli Lilly filter rules. Those are complex rules, that can be customized in any way you wish.

The following \"information\" will be computed and added as columns to a DataFrame for each run:

  • status: this was added for compatibility and has values \"Exclude\", \"Flag\" or \"Ok\".
  • rejected : whether the molecule pass the filter or was rejected
  • reasons: the reasons why the molecule was rejected if available
  • demerit_score a demerit score for molecules. The lower the better. A cutoff is used to reject molecule with too many demerits, which you can refilter again after.
  • step: step of the pipeline where molecule was filtered out, if available
"},{"location":"tutorials/getting-started.html#functional-group-filters","title":"Functional group filters\u00b6","text":"

It is also possible to initialize a list of functional group to use for molecules matching

"},{"location":"tutorials/getting-started.html#physchem-rule-application","title":"PhysChem Rule application\u00b6","text":"

You can also apply a set of physchem rules to a list of compounds

"},{"location":"tutorials/getting-started.html#available-catalogs","title":"Available Catalogs\u00b6","text":"

For a list of all available named catalogs, you can use catalog.list_named_catalogs

"},{"location":"tutorials/getting-started.html#build-custom-catalogs","title":"Build custom catalogs\u00b6","text":"

You can build a custom catalog based on smarts you have defined. For example, using an internal smarts bank.

"},{"location":"tutorials/getting-started.html#query-system","title":"Query system\u00b6","text":"

Medchem provides a query system that can be used for filtering molecules based on a constructed query made of statement compliant with the provided grammar.

Detailed information about the query system can be seen in the documentation

"}]} \ No newline at end of file diff --git a/1.4.0/sitemap.xml b/1.4.0/sitemap.xml deleted file mode 100644 index 7227c66..0000000 --- a/1.4.0/sitemap.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - - https://github.com/datamol-io/medchem/1.4.0/index.html - 2023-06-15 - daily - - - https://github.com/datamol-io/medchem/1.4.0/api/medchem.alerts.html - 2023-06-15 - daily - - - https://github.com/datamol-io/medchem/1.4.0/api/medchem.catalog.html - 2023-06-15 - daily - - - https://github.com/datamol-io/medchem/1.4.0/api/medchem.complexity.html - 2023-06-15 - daily - - - https://github.com/datamol-io/medchem/1.4.0/api/medchem.demerits.html - 2023-06-15 - daily - - - https://github.com/datamol-io/medchem/1.4.0/api/medchem.filter.html - 2023-06-15 - daily - - - https://github.com/datamol-io/medchem/1.4.0/api/medchem.groups.html - 2023-06-15 - daily - - - https://github.com/datamol-io/medchem/1.4.0/api/medchem.query.html - 2023-06-15 - daily - - - https://github.com/datamol-io/medchem/1.4.0/api/medchem.rules.html - 2023-06-15 - daily - - - https://github.com/datamol-io/medchem/1.4.0/api/medchem.utils.html - 2023-06-15 - daily - - - https://github.com/datamol-io/medchem/1.4.0/tutorials/getting-started.html - 2023-06-15 - daily - - \ No newline at end of file diff --git a/1.4.0/sitemap.xml.gz b/1.4.0/sitemap.xml.gz deleted file mode 100644 index 5db1b60..0000000 Binary files a/1.4.0/sitemap.xml.gz and /dev/null differ diff --git a/1.4.0/tutorials/getting-started.html b/1.4.0/tutorials/getting-started.html deleted file mode 100644 index 80cf39f..0000000 --- a/1.4.0/tutorials/getting-started.html +++ /dev/null @@ -1,5225 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - Getting Started - medchem - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

Getting Started

- - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - -
- - - - - - - - -
-
- - -
- -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file