Skip to content

Commit

Permalink
Cdxml importer: rely only on rdkit library and improve the widget (#198)
Browse files Browse the repository at this point in the history
* Store ase structures in the self._structure_selector widget directly.

* FileUpload widget: only accepts CDXML.

Co-authored-by: Aliaksandr Yakutovich <[email protected]>
  • Loading branch information
fabioacl and yakutovicha authored Nov 1, 2023
1 parent e6b073b commit 9db2e6e
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 57 deletions.
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ install_requires =
aiida-nanotech-empa>=v1.0.0b4
aiidalab-widgets-base[smiles]~=2.0
ase~=3.21
rdkit>=2023.03.1
python_requires = >=3.8

[options.extras_require]
Expand Down
102 changes: 45 additions & 57 deletions surfaces_tools/widgets/cdxml.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import re

import ase
import ipywidgets as ipw
import numpy as np
import rdkit
import scipy
import sklearn.decomposition
import traitlets as tr
Expand All @@ -16,24 +15,11 @@ class CdxmlUpload2GnrWidget(ipw.VBox):

def __init__(self, title="CDXML to GNR", description="Upload Structure"):
self.title = title
try:
import openbabel # noqa: F401
except ImportError:
super().__init__(
[
ipw.HTML(
"The CdxmlUpload2GnrWidget requires the OpenBabel library, "
"but the library was not found."
)
]
)
return

self.mols = None
self.original_structure = None
self.selection = set()
self.file_upload = ipw.FileUpload(
description=description, multiple=False, layout={"width": "initial"}
description=description,
multiple=False,
layout={"width": "initial"},
accept=".cdxml",
)
supported_formats = ipw.HTML(
"""<a href="https://pubs.acs.org/doi/10.1021/ja0697875" target="_blank">
Expand All @@ -43,12 +29,20 @@ def __init__(self, title="CDXML to GNR", description="Upload Structure"):

self.file_upload.observe(self._on_file_upload, names="value")

self.allmols = ipw.Dropdown(
self._structure_selector = ipw.Dropdown(
options=[None], description="Select mol", value=None, disabled=True
)
self.allmols.observe(self._on_sketch_selected, names="value")

super().__init__(children=[self.file_upload, supported_formats, self.allmols])
self._structure_selector.observe(self._on_sketch_selected, names="value")
self.output_message = ipw.HTML(value="")

super().__init__(
children=[
self.file_upload,
supported_formats,
self._structure_selector,
self.output_message,
]
)

@staticmethod
def guess_scaling_factor(atoms):
Expand Down Expand Up @@ -87,10 +81,12 @@ def scale(atoms, s):
return atoms

@staticmethod
def pybel2ase(mol):
"""Converts pybel molecule into ase Atoms"""
species = [ase.data.chemical_symbols[atm.atomicnum] for atm in mol.atoms]
pos = np.asarray([atm.coords for atm in mol.atoms])
def rdkit2ase(mol):
"""Converts rdkit molecule into ase Atoms"""
species = [
ase.data.chemical_symbols[atm.GetAtomicNum()] for atm in mol.GetAtoms()
]
pos = np.asarray(list(mol.GetConformer().GetPositions()))
pca = sklearn.decomposition.PCA(n_components=3)
posnew = pca.fit_transform(pos)
atoms = ase.Atoms(species, positions=posnew)
Expand All @@ -105,6 +101,7 @@ def pybel2ase(mol):
@staticmethod
def add_h(atoms):
"""Add missing hydrogen atoms."""
message = ""

n_l = neighborlist.NeighborList(
[ase.data.covalent_radii[a.number] for a in atoms],
Expand All @@ -119,7 +116,7 @@ def add_h(atoms):
if atm.symbol == "C" or atm.symbol == "N":
need_hydrogen.append(atm.index)

print("Added missing Hydrogen atoms: ", need_hydrogen)
message = f"Added missing Hydrogen atoms: {need_hydrogen}."

for atm in need_hydrogen:
vec = np.zeros(3)
Expand All @@ -131,44 +128,35 @@ def add_h(atoms):
vec = -vec / np.linalg.norm(vec) * 1.1 + atoms[atm].position
atoms.append(ase.Atom("H", vec))

return atoms
return message, atoms

def _on_file_upload(self, change=None):
"""When file upload button is pressed."""
from openbabel import pybel as pb

self.mols = None
listmols = []
molid = 0
for fname, _item in change["new"].items():
frmt = fname.split(".")[-1]
if frmt == "cdxml":
cdxml_file_string = self.file_upload.value[fname]["content"].decode(
"ascii"
)
self.mols = re.findall(
"<fragment(.*?)/fragment", cdxml_file_string, re.DOTALL
)
for m in self.mols:
m = pb.readstring("cdxml", "<fragment" + m + "/fragment>")
self.mols[molid] = m
listmols.append(
(str(molid) + ": " + m.formula, molid)
) # m MUST BE a pb object!!!
molid += 1
self.allmols.options = listmols
self._structure_selector.options = [None]
self._structure_selector.disabled = True
fname, item = next(iter(change["new"].items()))

self.allmols.disabled = False
try:
options = [
self.rdkit2ase(mol)
for mol in rdkit.Chem.MolsFromCDXML(item["content"].decode("ascii"))
]
self._structure_selector.options = [
(f"{i}: " + mol.get_chemical_formula(), mol)
for i, mol in enumerate(options)
]
self._structure_selector.disabled = False
except Exception as exc:
self.output_message.value = f"Error reading file: {exc}"

break
self.file_upload.value.clear()

def _on_sketch_selected(self, change=None):
self.structure = None # needed to empty view in second viewer
if self.mols is None or self.allmols.value is None:
if self._structure_selector.value is None:
return
atoms = self.pybel2ase(self.mols[self.allmols.value])
atoms = self._structure_selector.value
factor = self.guess_scaling_factor(atoms)
atoms = self.scale(atoms, factor)
atoms = self.add_h(atoms)
self.output_message.value, atoms = self.add_h(atoms)
self.structure = atoms
self.file_upload.value.clear()

0 comments on commit 9db2e6e

Please sign in to comment.