update argument from nBits to fpSize to accomodate new datamol version (

#17) * update argument from nBits to fpSize to accomodate new datamol version * One more update argument from nBits to fpSize * One more update argument from nBits to fpSize * Update datamol version in pyproject.toml * Update tests for argument change --------- Co-authored-by: Cas Wognum <[email protected]>
datamol-io · Jun 27, 2024 · 00a07ed · 00a07ed
1 parent 3cf21a0
commit 00a07ed
Show file tree

Hide file tree

Showing 5 changed files with 5 additions and 5 deletions.
diff --git a/env.yml b/env.yml
@@ -18,7 +18,7 @@ dependencies:
   - jenkspy
 
   # Chemistry
-  - datamol >=0.11.1
+  - datamol >=0.12.5
 
   # Optional: SIMPD splitter
   - pymoo >=0.6

diff --git a/pyproject.toml b/pyproject.toml
@@ -40,7 +40,7 @@ dependencies = [
     "matplotlib",
     "seaborn",
     "jenkspy",
-    "datamol",
+    "datamol>=0.12.5",
     "rdkit",
 ]
 

diff --git a/splito/_distance_split_base.py b/splito/_distance_split_base.py
@@ -13,7 +13,7 @@
 from .utils import get_kmeans_clusters
 
 # In case users provide a list of SMILES instead of features, we rely on ECFP4 and the tanimoto distance by default
-MOLECULE_DEFAULT_FEATURIZER = dict(name="ecfp", kwargs=dict(radius=2, nBits=2048))
+MOLECULE_DEFAULT_FEATURIZER = dict(name="ecfp", kwargs=dict(radius=2, fpSize=2048))
 MOLECULE_DEFAULT_DISTANCE_METRIC = "jaccard"
 
 

diff --git a/splito/lohi/_lo.py b/splito/lohi/_lo.py
@@ -78,7 +78,7 @@ def _select_distinct_clusters(self, smiles, values, n_jobs, verbose):
         train_nodes = np.array(range(len(smiles)))
 
         train_fps = dm.parallelized(
-            functools.partial(dm.to_fp, as_array=False, radius=2, nBits=1024),
+            functools.partial(dm.to_fp, as_array=False, radius=2, fpSize=1024),
             smiles,
             n_jobs=n_jobs,
         )

diff --git a/tests/test_lo.py b/tests/test_lo.py
@@ -26,7 +26,7 @@ def one_cluster_check(train_idx, cluster_idx, smiles, threshold, min_cluster_siz
     # Ensure there is only one similar molecule in the train
     train_smiles = smiles[train_idx]
     cluster_smiles = smiles[cluster_idx]
-    distance_matrix = dm.similarity.cdist(cluster_smiles, train_smiles, radius=2, nBits=1024)
+    distance_matrix = dm.similarity.cdist(cluster_smiles, train_smiles, radius=2, fpSize=1024)
     similarity_matrix = 1.0 - distance_matrix
     is_too_similar = similarity_matrix > threshold
     no_hits_per_mol = np.sum(is_too_similar, axis=1)