Skip to content

Commit

Permalink
Merge pull request #28 from kkovary/add-batch-parallelized-to-CommonA…
Browse files Browse the repository at this point in the history
…lertsFilters

Enable batch parallelization for `CommonAlertsFilters`
  • Loading branch information
maclandrol authored Nov 18, 2024
2 parents 18e0d97 + 3a920fd commit a17bc12
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 17 deletions.
41 changes: 28 additions & 13 deletions medchem/structural/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,9 @@ def __call__(
progress: bool = False,
progress_leave: bool = False,
scheduler: str = "auto",
batch_size: Optional[int] = None,
keep_details: bool = False,
):
) -> pd.DataFrame:
"""Run alert evaluation on this list of molecule and return the full dataframe
Args:
Expand All @@ -175,6 +176,7 @@ def __call__(
progress: whether to show progress or not.
progress_leave: whether to leave the progress bar or not.
scheduler: which scheduler to use. If "auto", will use "processes" if `len(mols) > 500` else "threads".
batch_size: batch size to use for parallelization.
keep_details: whether to keep the details of the evaluation or not.
"""

Expand All @@ -183,18 +185,31 @@ def __call__(
scheduler = "processes" # pragma: no cover
else:
scheduler = "threads"

results = dm.parallelized(
functools.partial(self._evaluate, keep_details=keep_details),
mols,
progress=progress,
n_jobs=n_jobs,
scheduler=scheduler,
tqdm_kwargs=dict(
desc="Common alerts filtering",
leave=progress_leave,
),
)
if batch_size:
results = dm.parallelized_with_batches(
lambda batch: [functools.partial(self._evaluate, keep_details=keep_details)(mol) for mol in batch],
mols,
progress=progress,
n_jobs=n_jobs,
scheduler=scheduler,
batch_size=batch_size,
tqdm_kwargs=dict(
desc="Common alerts filtering",
leave=progress_leave,
),
)
else:
results = dm.parallelized(
functools.partial(self._evaluate, keep_details=keep_details),
mols,
progress=progress,
n_jobs=n_jobs,
scheduler=scheduler,
tqdm_kwargs=dict(
desc="Common alerts filtering",
leave=progress_leave,
),
)
results = pd.DataFrame(results)

return results
11 changes: 7 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,20 @@ omit = ["medchem/__init__.py", "medchem/_version.py"]
output = "coverage.xml"

[tool.ruff]
line-length = 110
target-version = "py311"
extend-exclude = ["*.ipynb"] # Exclude Jupyter notebooks

[tool.ruff.lint]
ignore = [
"E501", # Never enforce `E501` (line length violations).
]
line-length = 110
target-version = "py311"

[tool.ruff.per-file-ignores]
[tool.ruff.lint.per-file-ignores]
"__init__.py" = [
"F401", # imported but unused
"E402", # Module level import not at top of file
]

[tool.ruff.pycodestyle]
[tool.ruff.lint.pycodestyle]
max-doc-length = 150

0 comments on commit a17bc12

Please sign in to comment.