Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow ability to extend a sweep with a prior default value #5

Merged
merged 3 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 45 additions & 9 deletions ml_experiment/DefinitionPart.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,30 @@ def __init__(self, name: str, base: str | None = None):
self.base_path = base or os.getcwd()

self._properties: Dict[str, Set[ValueType]] = defaultdict(set)

def add_property(self, key: str, value: ValueType):
self._prior_values: Dict[str, ValueType] = {}

def add_property(
self,
key: str,
value: ValueType,
assume_prior_value: ValueType | None = None,
):
self._properties[key].add(value)

def add_sweepable_property(self, key: str, values: Iterable[ValueType]):
if assume_prior_value is not None:
self._prior_values[key] = assume_prior_value

def add_sweepable_property(
self,
key: str,
values: Iterable[ValueType],
assume_prior_value: ValueType | None = None,
):
self._properties[key] |= set(values)

if assume_prior_value is not None:
self._prior_values[key] = assume_prior_value

def get_results_path(self) -> str:
import __main__
experiment_name = __main__.__file__.split('/')[-2]
Expand All @@ -41,16 +58,17 @@ def commit(self):
# grabbing from prior tables where possible, or generating a unique id for new configs
next_config_id = table_registry.get_max_configuration_id(cur, self.name) + 1
for configuration in configurations:
existing_id = table_registry.get_configuration_id(cur, self.name, configuration)
config_query = self._get_configuration_without_priors(configuration)

if existing_id is not None:
configuration['id'] = existing_id
configuration['id'] = (
table_registry.get_configuration_id(cur, self.name, configuration)
.flat_otherwise(lambda: table_registry.get_configuration_id(cur, self.name, config_query))
.or_else(next_config_id)
)

else:
configuration['id'] = next_config_id
if configuration['id'] == next_config_id:
next_config_id += 1


# determine whether we should build a new table
# and what version to call that table
latest_table = table_registry.get_latest_version(cur, self.name)
Expand All @@ -73,6 +91,24 @@ def commit(self):
con.commit()
con.close()

def _get_configuration_without_priors(self, configuration: Dict[str, ValueType]):
"""
When a new property is introduced that has an assumed prior value,
then we need to search for configuration ids without the new
property and associated those with the new config.

This function gives back the configuration to search for to
obtain an id.
"""
out = {}
for k, v in configuration.items():
if k in self._prior_values and v == self._prior_values[k]:
continue

out[k] = v

return out


def generate_configurations(properties: Dict[str, Set[ValueType]]):
for configuration in product(*properties.values()):
Expand Down
54 changes: 54 additions & 0 deletions ml_experiment/_utils/maybe.py
Copy link
Collaborator Author

@andnp andnp Sep 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a Maybe monad. It's an algebraic type, meaning that transformations (e.g. via map and otherwise) are type-preserving --- they also return a Maybe. The transformation may alter the interior type, i.e. Maybe[str] might become a Maybe[int] in the following:

maybe_str = get_user_input()
maybe_int = maybe_str.map(parseInt)

Maybe monads are quite helpful when you have complex "nullable" logic where you might go down one of many code paths depending on if any of those code paths return a None.


Ideally, we would refactor a lot of the internal code to use Maybe[T] instead of T | None for consistency and to reduce several if x is None: ... guards.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I will work on adding maybe monad's to other parts

Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from __future__ import annotations
from typing import Callable, Generic, TypeVar


T = TypeVar('T')
U = TypeVar('U')

class Maybe(Generic[T]):
def __init__(self, v: T | None):
self._v: T | None = v


def map(self, f: Callable[[T], U | None]) -> Maybe[U]:
if self._v is None:
return Maybe[U](None)

u = f(self._v)
return Maybe(u)


def flat_map(self, f: Callable[[T], Maybe[U]]) -> Maybe[U]:
if self._v is None:
return Maybe[U](None)

return f(self._v)


def flat_otherwise(self, f: Callable[[], Maybe[T]]) -> Maybe[T]:
if self._v is None:
return f()

return self


def or_else(self, t: T) -> T:
if self._v is None:
return t

return self._v


def expect(self, msg: str = '') -> T:
if self._v is None:
raise Exception(msg)

return self._v


def is_none(self):
return self._v is None


def is_some(self):
return self._v is not None
10 changes: 5 additions & 5 deletions ml_experiment/metadata/MetadataTableRegistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import ml_experiment._utils.sqlite as sqlu

from typing import Dict, Iterable
from ml_experiment._utils.maybe import Maybe
from ml_experiment.metadata.MetadataTable import MetadataTable, ValueType


class MetadataTableRegistry:
def __init__(self):
# cached results
Expand Down Expand Up @@ -73,11 +73,11 @@ def get_max_configuration_id(self, cur: sqlite3.Cursor, part_name: str) -> int:
return max(all_ids)


def get_configuration_id(self, cur: sqlite3.Cursor, part_name: str, configuration: Dict[str, ValueType]) -> int | None:
def get_configuration_id(self, cur: sqlite3.Cursor, part_name: str, configuration: Dict[str, ValueType]) -> Maybe[int]:
latest = self.get_latest_version(cur, part_name)

if latest is None:
return None
return Maybe(None)

# walk backwards starting from the latest version
# if any table contains an id, then stop
Expand All @@ -89,9 +89,9 @@ def get_configuration_id(self, cur: sqlite3.Cursor, part_name: str, configuratio

conf_id = table.get_configuration_id(cur, configuration)
if conf_id is not None:
return conf_id
return Maybe(conf_id)

return None
return Maybe(None)


def create_new_table(self, cur: sqlite3.Cursor, part_name: str, version: int, config_params: Iterable[str]) -> MetadataTable:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ version_files = ["pyproject.toml"]

[tool.ruff.lint]
select = ['F', 'E', 'W', 'B']
ignore = ['E501', 'E701']
ignore = ['E501', 'E701', 'B023']
Copy link
Collaborator Author

@andnp andnp Sep 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This new warning is for unbound variable captures in a for loop. Python really isn't a great language for functional programming.

Something like this:

obj = {}
for i in range(10):
  call_later(lambda: obj['a'] = i)

depending on when that call_later function runs (e.g. immediately, or some other time in another thread) the result of the loop might be different.

With the Maybe monad, we use exactly this pattern. However, we know that function executions always happen synchronously and are blocking, so this is a safe pattern. So we'll just tell ruff to ignore it.


[tool.pyright]
include = ['ml_experiment']
Expand Down
Loading