From 9db2e6e7c36cb54bdc191b4af25f8549d95028c6 Mon Sep 17 00:00:00 2001 From: fabioacl <33908066+fabioacl@users.noreply.github.com> Date: Wed, 1 Nov 2023 15:04:18 +0100 Subject: [PATCH] Cdxml importer: rely only on rdkit library and improve the widget (#198) * Store ase structures in the self._structure_selector widget directly. * FileUpload widget: only accepts CDXML. Co-authored-by: Aliaksandr Yakutovich --- setup.cfg | 1 + surfaces_tools/widgets/cdxml.py | 102 ++++++++++++++------------------ 2 files changed, 46 insertions(+), 57 deletions(-) diff --git a/setup.cfg b/setup.cfg index 958d25d..075e67a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,6 +27,7 @@ install_requires = aiida-nanotech-empa>=v1.0.0b4 aiidalab-widgets-base[smiles]~=2.0 ase~=3.21 + rdkit>=2023.03.1 python_requires = >=3.8 [options.extras_require] diff --git a/surfaces_tools/widgets/cdxml.py b/surfaces_tools/widgets/cdxml.py index e34275e..7e77e48 100644 --- a/surfaces_tools/widgets/cdxml.py +++ b/surfaces_tools/widgets/cdxml.py @@ -1,8 +1,7 @@ -import re - import ase import ipywidgets as ipw import numpy as np +import rdkit import scipy import sklearn.decomposition import traitlets as tr @@ -16,24 +15,11 @@ class CdxmlUpload2GnrWidget(ipw.VBox): def __init__(self, title="CDXML to GNR", description="Upload Structure"): self.title = title - try: - import openbabel # noqa: F401 - except ImportError: - super().__init__( - [ - ipw.HTML( - "The CdxmlUpload2GnrWidget requires the OpenBabel library, " - "but the library was not found." - ) - ] - ) - return - - self.mols = None - self.original_structure = None - self.selection = set() self.file_upload = ipw.FileUpload( - description=description, multiple=False, layout={"width": "initial"} + description=description, + multiple=False, + layout={"width": "initial"}, + accept=".cdxml", ) supported_formats = ipw.HTML( """ @@ -43,12 +29,20 @@ def __init__(self, title="CDXML to GNR", description="Upload Structure"): self.file_upload.observe(self._on_file_upload, names="value") - self.allmols = ipw.Dropdown( + self._structure_selector = ipw.Dropdown( options=[None], description="Select mol", value=None, disabled=True ) - self.allmols.observe(self._on_sketch_selected, names="value") - - super().__init__(children=[self.file_upload, supported_formats, self.allmols]) + self._structure_selector.observe(self._on_sketch_selected, names="value") + self.output_message = ipw.HTML(value="") + + super().__init__( + children=[ + self.file_upload, + supported_formats, + self._structure_selector, + self.output_message, + ] + ) @staticmethod def guess_scaling_factor(atoms): @@ -87,10 +81,12 @@ def scale(atoms, s): return atoms @staticmethod - def pybel2ase(mol): - """Converts pybel molecule into ase Atoms""" - species = [ase.data.chemical_symbols[atm.atomicnum] for atm in mol.atoms] - pos = np.asarray([atm.coords for atm in mol.atoms]) + def rdkit2ase(mol): + """Converts rdkit molecule into ase Atoms""" + species = [ + ase.data.chemical_symbols[atm.GetAtomicNum()] for atm in mol.GetAtoms() + ] + pos = np.asarray(list(mol.GetConformer().GetPositions())) pca = sklearn.decomposition.PCA(n_components=3) posnew = pca.fit_transform(pos) atoms = ase.Atoms(species, positions=posnew) @@ -105,6 +101,7 @@ def pybel2ase(mol): @staticmethod def add_h(atoms): """Add missing hydrogen atoms.""" + message = "" n_l = neighborlist.NeighborList( [ase.data.covalent_radii[a.number] for a in atoms], @@ -119,7 +116,7 @@ def add_h(atoms): if atm.symbol == "C" or atm.symbol == "N": need_hydrogen.append(atm.index) - print("Added missing Hydrogen atoms: ", need_hydrogen) + message = f"Added missing Hydrogen atoms: {need_hydrogen}." for atm in need_hydrogen: vec = np.zeros(3) @@ -131,44 +128,35 @@ def add_h(atoms): vec = -vec / np.linalg.norm(vec) * 1.1 + atoms[atm].position atoms.append(ase.Atom("H", vec)) - return atoms + return message, atoms def _on_file_upload(self, change=None): """When file upload button is pressed.""" - from openbabel import pybel as pb - - self.mols = None - listmols = [] - molid = 0 - for fname, _item in change["new"].items(): - frmt = fname.split(".")[-1] - if frmt == "cdxml": - cdxml_file_string = self.file_upload.value[fname]["content"].decode( - "ascii" - ) - self.mols = re.findall( - "") - self.mols[molid] = m - listmols.append( - (str(molid) + ": " + m.formula, molid) - ) # m MUST BE a pb object!!! - molid += 1 - self.allmols.options = listmols + self._structure_selector.options = [None] + self._structure_selector.disabled = True + fname, item = next(iter(change["new"].items())) - self.allmols.disabled = False + try: + options = [ + self.rdkit2ase(mol) + for mol in rdkit.Chem.MolsFromCDXML(item["content"].decode("ascii")) + ] + self._structure_selector.options = [ + (f"{i}: " + mol.get_chemical_formula(), mol) + for i, mol in enumerate(options) + ] + self._structure_selector.disabled = False + except Exception as exc: + self.output_message.value = f"Error reading file: {exc}" - break + self.file_upload.value.clear() def _on_sketch_selected(self, change=None): self.structure = None # needed to empty view in second viewer - if self.mols is None or self.allmols.value is None: + if self._structure_selector.value is None: return - atoms = self.pybel2ase(self.mols[self.allmols.value]) + atoms = self._structure_selector.value factor = self.guess_scaling_factor(atoms) atoms = self.scale(atoms, factor) - atoms = self.add_h(atoms) + self.output_message.value, atoms = self.add_h(atoms) self.structure = atoms - self.file_upload.value.clear()