Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release 1.3.0 #15

Merged
merged 3 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
# CHANGELOG

## Version 1.3.0 - 2024-01-09

### Features

- Adding support for Condensed Graph of Reaction
- Adding support for flagging stereocontrolled reactions
- Adding routines for desalting reactions
- Adding routines for identifying reaction centers
- Adding several dataframe modifying actions
- Adding several reaction modifying actions

### Trival changes

- Unifying routines to make and concatenate batches

## Version 1.2.0 - 2022-11-21

### Features
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ Please use ``black`` package for formatting, and follow ``pep8`` style guide.

* [@ckannas](https://github.com/ckannas)
* [@SGenheden](https://www.github.com/SGenheden)
* [@anniewesterlund](https://www.github.com/anniewesterlund)
* [@CBA087](https://www.github.com/CBA087)
* [@EBjerrum](https://www.github.com/EBjerrum)
* [@A-Thakkar](https://www.github.com/A-Thakkar)

Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
project = "ReactionUtils"
copyright = "2022, Molecular AI group"
author = "Molecular AI group"
release = "1.2.0"
release = "1.3.0"

extensions = [
"sphinx.ext.autodoc",
Expand Down
2 changes: 1 addition & 1 deletion env-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ channels:
- https://conda.anaconda.org/conda-forge
- defaults
dependencies:
- python>=3.8,<3.10
- python>=3.9,<3.11
- poetry>=1.1.4,<2.0
3,628 changes: 1,852 additions & 1,776 deletions poetry.lock

Large diffs are not rendered by default.

11 changes: 7 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "reaction_utils"
version = "1.2.0"
version = "1.3.0"
description = "Utilities for working with reactions, reaction templates and template extraction"
authors = ["Genheden, Samuel <[email protected]>", "Kannas, Christos <[email protected]>"]
license = "Apache-2.0"
Expand All @@ -13,18 +13,20 @@ packages = [
]

[tool.poetry.dependencies]
python = "^3.8"
python = ">=3.9,<3.11"
urllib3 = "<2.0"
pandas = "^1.0.0"
xxhash = "^2.0.0"
rdchiral = "^1.1.0"
PyYAML = "^5.4.1"
PyYAML = "^6.0.1"
swifter = "^1.0.9"
metaflow = "^2.6.3"
py7zr = "^0.18.7"
Deprecated = "^1.2.13"
Sphinx = "^5.0.1"
wrapt-timeout-decorator = "^1.3.12"
rdkit = "^2022.3.3"
cgrtools = "^4.1.35"
scipy = "^1.11.4"

[tool.poetry.dev-dependencies]
pytest = "^6.2.2"
Expand All @@ -39,6 +41,7 @@ pre-commit = "^2.10.1"
ipython = "^7.21.0"
pylint = "^2.14.1"
invoke = "^1.7.1"
sphinx = "^4.0.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
Expand Down
141 changes: 141 additions & 0 deletions rxnutils/chem/cgr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
""" Wrapper class for the CGRTools library
"""
import io
import warnings
from typing import List

from CGRtools.files.SDFrw import SDFRead
from CGRtools.containers.reaction import ReactionContainer
from CGRtools.containers.molecule import MoleculeContainer
from rdkit import Chem

from rxnutils.chem.reaction import ChemicalReaction
from rxnutils.chem.utils import atom_mapping_numbers


class CondensedGraphReaction:
"""
The Condensed Graph of Reaction (CGR) representation of a reaction

:ivar reaction_container: the CGRTools container of the reaction
:ivar cgr_container: the CGRTools container of the CGR
:param reaction: the reaction composed of RDKit molecule to start from
:raises ValueError: if it is not possible to create the CGR from the reaction
"""

def __init__(self, reaction: ChemicalReaction) -> None:
self.reaction = reaction
self._cgr_reactants = []
self._cgr_products = []
self._make_cgr_containers()
self.reaction_container = ReactionContainer(
reactants=self._cgr_reactants, products=self._cgr_products
)
try:
self.cgr_container = self.reaction_container.compose()
except ValueError as err:
if str(err) == "mapping of graphs is not disjoint":
raise ValueError(
"Reaction contains inconsistent atom-mapping, perhaps duplicates"
)
elif str(err).endswith("} not equal"):
raise ValueError(
"Atom with the same atom-mapping in reactant and product is not equal"
)
else:
raise ValueError(f"Unknown problem with generating CGR: {err}")

def __eq__(self, obj: object) -> bool:
if not isinstance(obj, CondensedGraphReaction):
return False
return self.cgr_container == obj.cgr_container

@property
def bonds_broken(self) -> int:
"""Returns the number of broken bonds in the reaction"""
return sum(bond.p_order is None for _, _, bond in self.cgr_container.bonds())

@property
def bonds_changed(self) -> int:
"""Returns the number of broken or formed bonds in the reaction"""
return sum(
bond.p_order is None or bond.order is None
for _, _, bond in self.cgr_container.bonds()
)

@property
def bonds_formed(self) -> int:
"""Returns the number of formed bonds in the reaction"""
return sum(bond.order is None for _, _, bond in self.cgr_container.bonds())

@property
def total_centers(self) -> int:
"""Returns the number of atom and bond centers in the reaction"""
return len(self.cgr_container.center_atoms) + len(
self.cgr_container.center_bonds
)

def distance_to(self, other: "CondensedGraphReaction") -> int:
"""
Returns the chemical distance between two reactions, i.e. the absolute difference
between the total number of centers.

Used for some atom-mapping comparison statistics

:param other: the reaction to compare to
:returns: the computed distance
"""
return abs(self.total_centers - other.total_centers)

def _make_cgr_containers(self):
nreactants = len(self.reaction.reactants)
renumbered_mols = self._make_renumbered_mols()
self._cgr_reactants = _convert_rdkit_molecules(renumbered_mols[:nreactants])
self._cgr_products = _convert_rdkit_molecules(renumbered_mols[nreactants:])

if len(self._cgr_reactants) != nreactants:
warnings.warn("Warning not all reactants could be converted to CGRtools")
if len(self._cgr_products) != len(self.reaction.products):
warnings.warn("Warning not all products could be converted to CGRtools")

def _make_renumbered_mols(self):
# It is necessary that all atoms have atom-mapping
# otherwise CGRTools will start adding bad atom-mappings
# so this adds safe atom-mapping to un-mapped atoms
renumbered_mols = []
max_atom_map_numb = max(
max(atom_mapping_numbers(smi) or [0])
for smi in self.reaction.reactants_list + self.reaction.products_list
)
for mol0 in self.reaction.reactants + self.reaction.products:
if mol0 is None:
raise ValueError(
"Cannot create CGR for this reaction, some molecules are None"
)
mol = Chem.rdchem.Mol(mol0)
for atom in mol.GetAtoms():
if not atom.GetAtomMapNum():
max_atom_map_numb += 1
atom.SetAtomMapNum(max_atom_map_numb)
renumbered_mols.append(mol)
return renumbered_mols


def _convert_rdkit_molecules(
mols: List[Chem.rdchem.Mol],
) -> List[MoleculeContainer]:
# This connects the SDWriter of RDKit with the SDFRead of CGRTools
with io.StringIO() as sd_buffer:
with Chem.SDWriter(sd_buffer) as sd_writer_obj:
for mol in mols:
sd_writer_obj.write(mol)
sd_buffer.seek(0)
with SDFRead(sd_buffer, remap=False) as sd_reader_obj:
cgr_molecules = sd_reader_obj.read()

# This make the aromatization of the molecules, which were
# lost when writing to SD format. This is essential to keep
# the reactant and product structures as equal as possible
for mol in cgr_molecules:
mol.thiele()
return cgr_molecules
117 changes: 115 additions & 2 deletions rxnutils/chem/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit.Chem.SaltRemover import SaltRemover


def get_special_groups(mol) -> List[Tuple[Tuple[int, ...], Tuple[int, ...]]]:
Expand Down Expand Up @@ -336,14 +337,15 @@ def has_atom_mapping(


def remove_atom_mapping(
smiles: str, is_smarts: bool = False, sanitize: bool = True
smiles: str, is_smarts: bool = False, sanitize: bool = True, canonical: bool = True
) -> str:
"""
Returns a molecule without atom mapping

:param smiles: the SMILES/SMARTS representing the molecule
:param is_smarts: if True, will interpret the SMILES as a SMARTS
:param sanitize: if True, will sanitize the molecule
:param canonical: if False, will not canonicalize (applies to SMILES)
:return: the molecule without atom-mapping
"""
if not smiles:
Expand All @@ -358,7 +360,7 @@ def remove_atom_mapping(
# is_smarts=True: Chem.MolToSmarts
# is_smarts=False: Chem.MolToSmiles
to_func = {
False: Chem.MolToSmiles,
False: functools.partial(Chem.MolToSmiles, canonical=canonical),
True: Chem.MolToSmarts,
}
mol = from_func[is_smarts](smiles)
Expand All @@ -368,6 +370,7 @@ def remove_atom_mapping(

for atom in mol.GetAtoms():
atom.SetAtomMapNum(0)

return to_func[is_smarts](mol)


Expand Down Expand Up @@ -403,6 +406,35 @@ def neutralize_molecules(smiles_list: List[str]) -> List[str]:
return [Chem.MolToSmiles(mol) for mol in neutral_mols]


def desalt_molecules(smiles_list: List[str], keep_something: bool = False) -> List[str]:
"""
Remove salts from a set of molecules using RDKit routines

:param smiles_list: the molecules as SMILES
:param keep_something: if True will keep at least one salt
:return: the desalted molecules
"""
remover = SaltRemover() # use default saltremover
mols = [Chem.MolFromSmiles(smi) for smi in smiles_list]
desalted_mols = [
remover.StripMol(mol, dontRemoveEverything=keep_something)
for mol in mols
if mol
]
if len(mols) > len(desalted_mols):
logging.warning(
f"No. of desalted molecules ({len(desalted_mols)}) are less than No. of input molecules ({len(mols)})."
)

smiles_list_new = [Chem.MolToSmiles(mol) for mol in desalted_mols]
smiles_list_new = [smi for smi in smiles_list_new if smi]
if len(smiles_list) > len(smiles_list_new):
logging.warning(
f"No. of desalted SMILES ({len(smiles_list_new)}) are less than No. of input SMILES ({len(smiles_list)})."
)
return smiles_list_new


def same_molecule(mol1, mol2) -> bool:
"""Test if two molecules are the same.
First number of atoms and bonds are compared to guard the potentially more expensive
Expand Down Expand Up @@ -501,6 +533,17 @@ def reassign_rsmi_atom_mapping(rsmi: str, as_smiles: bool = False) -> str:
return updated_rsmi


def join_smiles_from_reaction(smiles_list: List[str]) -> str:
"""
Join a part of reaction SMILES, e.g. reactants and products into components.
Intra-molecular complexes are bracketed with parenthesis

:param smiles_list: the SMILES components
:return: the joined list
"""
return ".".join([f"({item})" if "." in item else item for item in smiles_list])


def split_smiles_from_reaction(smiles: str) -> List[str]:
"""
Split a part of reaction SMILES, e.g. reactants or products
Expand Down Expand Up @@ -540,3 +583,73 @@ def split_smiles_from_reaction(smiles: str) -> List[str]:
else:
components.append(smiles[block_start:pos])
return components


def reaction_centres(rxn: AllChem.ChemicalReaction) -> Tuple[List[int], ...]:
"""
Return reaction centre atoms, provided that the bonding partners
actually change when comparing the environment in the reactant and the product

inspired by code from Greg Landrum's tutorial
set up array to remove atoms from the reaction centers
by comparing the atom mapping in the reactant vs the products

Original implementation by Christoph Bauer

:param rxn: the initialized RDKit reaction
:return: tuple of reaction centre atoms, filtered by connectivity criterion
"""
rxncenters = rxn.GetReactingAtoms(mappedAtomsOnly=True)

remove_array = []
nreactants = len(rxncenters)
for ridx, reacting in enumerate(rxncenters):
reactant = rxn.GetReactantTemplate(ridx)
for raidx in reacting:
ratm = reactant.GetAtomWithIdx(raidx)
mapnum = ratm.GetAtomMapNum()
numexplicitH_reactant = ratm.GetNumExplicitHs()
neighbours = ratm.GetNeighbors()
map_neighbours = sorted(
[neighbour_atom.GetAtomMapNum() for neighbour_atom in neighbours]
)
foundit = False
for product in rxn.GetProducts():
for patom in product.GetAtoms():
if patom.GetAtomMapNum() == mapnum:
neighbours_product = patom.GetNeighbors()
numexplicitH_product = patom.GetNumExplicitHs()
map_neighbours_product = sorted(
[
neighbour_atom.GetAtomMapNum()
for neighbour_atom in neighbours_product
]
)
# criterion: check if the map numbers of neighbours are the same in the reactant and the product
if map_neighbours == map_neighbours_product:
if numexplicitH_reactant == numexplicitH_product:
# if yes --> then the environment doesn't change, so set remove to True
remove_array.append(True)
else:
remove_array.append(False)
else:
# False means that the environment does change, so set remove to False
remove_array.append(False)
foundit = True
break
if foundit:
break

# actual removal of reaction centres by generating new tuple
rxncenters_filtered = []
counter = 0
for reactant_idx in range(nreactants):
reactantcenters = []
for index in rxncenters[reactant_idx]:
if not remove_array[counter]:
reactantcenters.append(index)
counter += 1
reactantcenters = tuple(reactantcenters)
rxncenters_filtered.append(reactantcenters)
rxncenters_filtered = tuple(rxncenters_filtered)
return rxncenters_filtered
Loading
Loading