Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SmilesWidget: Canonicalize SMILES code #507

Merged
merged 1 commit into from
Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 31 additions & 2 deletions aiidalab_widgets_base/structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,7 +772,10 @@ def _rdkit_opt(self, smiles, steps):
return None
mol = Chem.AddHs(mol)

AllChem.EmbedMolecule(mol, maxAttempts=20, randomSeed=42)
conf_id = AllChem.EmbedMolecule(mol, maxAttempts=20, randomSeed=42)
if conf_id < 0:
self.output.value = "RDKit ERROR: Could not generate conformer"
return None
if AllChem.UFFHasAllMoleculeParams(mol):
AllChem.UFFOptimizeMolecule(mol, maxIters=steps)
else:
Expand All @@ -785,8 +788,18 @@ def _rdkit_opt(self, smiles, steps):

def _mol_from_smiles(self, smiles, steps=1000):
"""Convert SMILES to ase structure try rdkit then pybel"""

# Canonicalize the SMILES code
# https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system#Terminology
canonical_smiles = self.canonicalize_smiles(smiles)
if not canonical_smiles:
return None

if canonical_smiles != smiles:
self.output.value = f"Canonical SMILES: {canonical_smiles}"

try:
return self._rdkit_opt(smiles, steps)
return self._rdkit_opt(canonical_smiles, steps)
except ValueError as e:
self.output.value = str(e)
if self.disable_openbabel:
Expand All @@ -802,11 +815,27 @@ def _on_button_pressed(self, change=None):
return
spinner = f"Screening possible conformers {self.SPINNER}" # font-size:20em;
self.output.value = spinner

self.structure = self._mol_from_smiles(self.smiles.value)
# Don't overwrite possible error/warning messages
if self.output.value == spinner:
self.output.value = ""

def canonicalize_smiles(self, smiles):
from rdkit import Chem

mol = Chem.MolFromSmiles(smiles, sanitize=True)
if mol is None:
# Something is seriously wrong with the SMILES code,
# just return None and don't attempt anything else.
self.output.value = "RDkit ERROR: Invalid SMILES string"
return None
canonical_smiles = Chem.MolToSmiles(mol, isomericSmiles=True, canonical=True)
if not canonical_smiles:
self.output.value = "RDkit ERROR: Could not canonicalize SMILES"
return None
return canonical_smiles

@tl.default("structure")
def _default_structure(self):
return None
Expand Down
21 changes: 21 additions & 0 deletions tests/test_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,27 @@ def test_smiles_widget():
assert widget.structure.get_chemical_formula() == "N2"


@pytest.mark.usefixtures("aiida_profile_clean")
def test_smiles_canonicalization():
"""Test the SMILES canonicalization via RdKit."""
widget = awb.SmilesWidget()

# Should not change canonical smiles
assert widget.canonicalize_smiles("C") == "C"

# Should canonicalize this
canonical = widget.canonicalize_smiles("O=CC=C")
assert canonical == "C=CC=O"

# Should be idempotent
assert canonical == widget.canonicalize_smiles(canonical)

# Regression test for https://github.com/aiidalab/aiidalab-widgets-base/issues/505
# Throwing in this non-canonical string should not raise
nasty_smiles = "C=CC1=C(C2=CC=C(C3=CC=CC=C3)C=C2)C=C(C=C)C(C4=CC=C(C(C=C5)=CC=C5C(C=C6C=C)=C(C=C)C=C6C7=CC=C(C(C=C8)=CC=C8C(C=C9C=C)=C(C=C)C=C9C%10=CC=CC=C%10)C=C7)C=C4)=C1"
widget._rdkit_opt(nasty_smiles, steps=1)


@pytest.mark.usefixtures("aiida_profile_clean")
def test_basic_cell_editor_widget(structure_data_object):
"""Test the `BasicCellEditor`."""
Expand Down
Loading