Skip to content

Commit

Permalink
fix(runtime): filelock issues (#161)
Browse files Browse the repository at this point in the history
  • Loading branch information
eddiebergman authored Jan 8, 2025
1 parent 79628d0 commit 72a9315
Show file tree
Hide file tree
Showing 76 changed files with 1,853 additions and 4,641 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
__pycache__
dist
**/*.egg-info
uv.lock

# Log files
*.out
Expand Down
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ repos:
files: '^src/.*\.py$'

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.11.2
rev: v1.14.1
hooks:
- id: mypy
files: |
Expand All @@ -43,7 +43,7 @@ repos:
- "--show-traceback"

- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.29.3
rev: 0.31.0
hooks:
- id: check-github-workflows
files: '^github/workflows/.*\.ya?ml$'
Expand All @@ -52,7 +52,7 @@ repos:
files: '^\.github/dependabot\.ya?ml$'

- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.6.9
rev: v0.8.6
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix, --no-cache]
Expand Down
20 changes: 10 additions & 10 deletions neps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,23 @@

__all__ = [
"Architecture",
"Integer",
"Float",
"Categorical",
"Constant",
"Function",
"ArchitectureParameter",
"Categorical",
"CategoricalParameter",
"Constant",
"ConstantParameter",
"Float",
"FloatParameter",
"IntegerParameter",
"Function",
"FunctionParameter",
"run",
"plot",
"get_summary_dict",
"status",
"GraphGrammar",
"GraphGrammarCell",
"GraphGrammarRepetitive",
"Integer",
"IntegerParameter",
"get_summary_dict",
"plot",
"run",
"status",
"tblogger",
]
7 changes: 6 additions & 1 deletion neps/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def run(
objective_to_minimize_value_on_error: None | float = Default(None),
cost_value_on_error: None | float = Default(None),
pre_load_hooks: Iterable | None = Default(None),
sample_batch_size: int | None = Default(None),
searcher: (
Literal[
"default",
Expand Down Expand Up @@ -98,6 +99,8 @@ def run(
cost_value_on_error: Setting this and objective_to_minimize_value_on_error to any float will
supress any error and will use given cost value instead. default: None
pre_load_hooks: List of functions that will be called before load_results().
sample_batch_size: The number of samples to ask for in a single call to the
optimizer.
searcher: Which optimizer to use. Can be a string identifier, an
instance of BaseOptimizer, or a Path to a custom optimizer.
**searcher_kwargs: Will be passed to the searcher. This is usually only needed by
Expand Down Expand Up @@ -236,6 +239,7 @@ def run(
ignore_errors=settings.ignore_errors,
overwrite_optimization_dir=settings.overwrite_working_directory,
pre_load_hooks=settings.pre_load_hooks,
sample_batch_size=settings.sample_batch_size,
)

if settings.post_run_summary:
Expand Down Expand Up @@ -278,7 +282,8 @@ def _run_args(
"mobster",
"asha",
]
| BaseOptimizer | dict
| BaseOptimizer
| dict
) = "default",
**searcher_kwargs,
) -> tuple[BaseOptimizer, dict]:
Expand Down
83 changes: 58 additions & 25 deletions neps/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import os
from collections.abc import Callable
from typing import Any, TypeVar
from typing import Any, Literal, TypeVar

T = TypeVar("T")
V = TypeVar("V")
Expand All @@ -28,6 +28,38 @@ def is_nullable(e: str) -> bool:
return e.lower() in ("none", "n", "null")


def yaml_or_json(e: str) -> Literal["yaml", "json"]:
"""Check if an environment variable is either yaml or json."""
if e.lower() in ("yaml", "json"):
return e.lower() # type: ignore
raise ValueError(f"Expected 'yaml' or 'json', got '{e}'.")


LINUX_FILELOCK_FUNCTION = get_env(
"NEPS_LINUX_FILELOCK_FUNCTION",
parse=str,
default="lockf",
)
MAX_RETRIES_GET_NEXT_TRIAL = get_env(
"NEPS_MAX_RETRIES_GET_NEXT_TRIAL",
parse=int,
default=10,
)
MAX_RETRIES_SET_EVALUATING = get_env(
"NEPS_MAX_RETRIES_SET_EVALUATING",
parse=int,
default=10,
)
MAX_RETRIES_CREATE_LOAD_STATE = get_env(
"NEPS_MAX_RETRIES_CREATE_LOAD_STATE",
parse=int,
default=10,
)
MAX_RETRIES_WORKER_CHECK_SHOULD_STOP = get_env(
"NEPS_MAX_RETRIES_WORKER_CHECK_SHOULD_STOP",
parse=int,
default=3,
)
TRIAL_FILELOCK_POLL = get_env(
"NEPS_TRIAL_FILELOCK_POLL",
parse=float,
Expand All @@ -38,40 +70,31 @@ def is_nullable(e: str) -> bool:
parse=lambda e: None if is_nullable(e) else float(e),
default=120,
)

SEED_SNAPSHOT_FILELOCK_POLL = get_env(
"NEPS_SEED_SNAPSHOT_FILELOCK_POLL",
FS_SYNC_GRACE_BASE = get_env(
"NEPS_FS_SYNC_GRACE_BASE",
parse=float,
default=0.05,
default=0.00, # Keep it low initially to not punish synced os
)
SEED_SNAPSHOT_FILELOCK_TIMEOUT = get_env(
"NEPS_SEED_SNAPSHOT_FILELOCK_TIMEOUT",
parse=lambda e: None if is_nullable(e) else float(e),
default=120,
)

OPTIMIZER_INFO_FILELOCK_POLL = get_env(
"NEPS_OPTIMIZER_INFO_FILELOCK_POLL",
FS_SYNC_GRACE_INC = get_env(
"NEPS_FS_SYNC_GRACE_INC",
parse=float,
default=0.05,
)
OPTIMIZER_INFO_FILELOCK_TIMEOUT = get_env(
"NEPS_OPTIMIZER_INFO_FILELOCK_TIMEOUT",
parse=lambda e: None if is_nullable(e) else float(e),
default=120,
default=0.1,
)

OPTIMIZER_STATE_FILELOCK_POLL = get_env(
"NEPS_OPTIMIZER_STATE_FILELOCK_POLL",
# NOTE: We want this to be greater than the trials filelock, so that
# anything requesting to just update the trials is more likely to obtain it
# as those operations tend to be faster than something that requires optimizer
# state.
STATE_FILELOCK_POLL = get_env(
"NEPS_STATE_FILELOCK_POLL",
parse=float,
default=0.05,
default=0.20,
)
OPTIMIZER_STATE_FILELOCK_TIMEOUT = get_env(
"NEPS_OPTIMIZER_STATE_FILELOCK_TIMEOUT",
STATE_FILELOCK_TIMEOUT = get_env(
"NEPS_STATE_FILELOCK_TIMEOUT",
parse=lambda e: None if is_nullable(e) else float(e),
default=120,
)

GLOBAL_ERR_FILELOCK_POLL = get_env(
"NEPS_GLOBAL_ERR_FILELOCK_POLL",
parse=float,
Expand All @@ -82,3 +105,13 @@ def is_nullable(e: str) -> bool:
parse=lambda e: None if is_nullable(e) else float(e),
default=120,
)
TRIAL_CACHE_MAX_UPDATES_BEFORE_CONSOLIDATION = get_env(
"NEPS_TRIAL_CACHE_MAX_UPDATES_BEFORE_CONSOLIDATION",
parse=int,
default=10,
)
CONFIG_SERIALIZE_FORMAT: Literal["yaml", "json"] = get_env( # type: ignore
"NEPS_CONFIG_SERIALIZE_FORMAT",
parse=yaml_or_json,
default="yaml",
)
34 changes: 12 additions & 22 deletions neps/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from __future__ import annotations

from typing import Any


class NePSError(Exception):
"""Base class for all NePS exceptions.
Expand All @@ -11,35 +13,23 @@ class NePSError(Exception):
"""


class VersionMismatchError(NePSError):
"""Raised when the version of a resource does not match the expected version."""


class VersionedResourceAlreadyExistsError(NePSError):
"""Raised when a version already exists when trying to create a new versioned
data.
"""


class VersionedResourceRemovedError(NePSError):
"""Raised when a version already exists when trying to create a new versioned
data.
"""


class VersionedResourceDoesNotExistsError(NePSError):
"""Raised when a versioned resource does not exist at a location."""


class LockFailedError(NePSError):
"""Raised when a lock cannot be acquired."""


class TrialAlreadyExistsError(VersionedResourceAlreadyExistsError):
class TrialAlreadyExistsError(NePSError):
"""Raised when a trial already exists in the store."""

def __init__(self, trial_id: str, *args: Any) -> None:
"""Initialize the exception with the trial id."""
super().__init__(trial_id, *args)
self.trial_id = trial_id

def __str__(self) -> str:
return f"Trial with id {self.trial_id} already exists!"


class TrialNotFoundError(VersionedResourceDoesNotExistsError):
class TrialNotFoundError(NePSError):
"""Raised when a trial already exists in the store."""


Expand Down
10 changes: 5 additions & 5 deletions neps/optimizers/base_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,17 +111,17 @@ def __init__(
def ask(
self,
trials: Mapping[str, Trial],
max_cost_total_info: BudgetInfo | None,
) -> SampledConfig:
budget_info: BudgetInfo | None,
n: int | None = None,
) -> SampledConfig | list[SampledConfig]:
"""Sample a new configuration.
Args:
trials: All of the trials that are known about.
max_cost_total_info: information about the max_cost_total
budget_info: information about the budget constraints.
Returns:
SampledConfig: a sampled configuration
dict: state the optimizer would like to keep between calls
The sampled configuration(s)
"""
...

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

__all__ = [
"AcquisitionMapping",
"BaseAcquisition",
"ComprehensiveExpectedImprovement",
"UpperConfidenceBound",
"BaseAcquisition",
]
Loading

0 comments on commit 72a9315

Please sign in to comment.