Skip to content

Commit

Permalink
Merging updates
Browse files Browse the repository at this point in the history
  • Loading branch information
Neeratyoy committed Nov 6, 2023
2 parents c3aa03d + 46c6128 commit 41df811
Show file tree
Hide file tree
Showing 6 changed files with 153 additions and 208 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,5 @@ jobs:
run: poetry install

- name: Run pytest
timeout-minutes: 10
timeout-minutes: 15
run: poetry run pytest -m "all_examples or metahyper"
Original file line number Diff line number Diff line change
Expand Up @@ -32,30 +32,6 @@ def __init__(
def get_budget_level(self, config) -> int:
return int((config.fidelity.value - config.fidelity.lower) / self.b_step)

# def _preprocess_tabular(self, x: pd.Series) -> pd.Series:
# if len(x) == 0:
# return x
# # extract fid name
# _x = x.loc[0].hp_values()
# _x.pop("id")
# fid_name = list(_x.keys())[0]
# for i in x.index.values:
# # extracting actual HPs from the tabular space
# _config = self.pipeline_space.custom_grid_table.loc[x.loc[i]["id"].value].to_dict()
# # updating fidelities as per the candidate set passed
# _config.update({fid_name: x.loc[i][fid_name].value})
# # placeholder config from the raw tabular space
# config = self.pipeline_space.raw_tabular_space.sample(
# patience=100,
# user_priors=True,
# ignore_fidelity=True # True allows fidelity to appear in the sample
# )
# # copying values from table to placeholder config of type SearchSpace
# config.load_from(_config)
# # replacing the ID in the candidate set with the actual HPs of the config
# x.loc[i] = config
# return x

def preprocess(self, x: pd.Series) -> Tuple[Iterable, Iterable]:
"""Prepares the configurations for appropriate EI calculation.
Expand All @@ -68,7 +44,6 @@ def preprocess(self, x: pd.Series) -> Tuple[Iterable, Iterable]:
# preprocess tabular space differently
# expected input: IDs pertaining to the tabular data
# expected output: IDs pertaining to current observations and set of HPs
# x = self._preprocess_tabular(x)
x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space)
indices_to_drop = []
for i, config in x.items():
Expand All @@ -77,18 +52,17 @@ def preprocess(self, x: pd.Series) -> Tuple[Iterable, Iterable]:
# IMPORTANT to set the fidelity at which EI will be calculated only for
# the partial configs that have been observed already
target_fidelity = config.fidelity.value + self.b_step
config.fidelity.value = min(
target_fidelity, config.fidelity.upper
) # to respect the bounded fidelity

if np.less_equal(target_fidelity, config.fidelity.upper):
# only consider the configs with fidelity lower than the max fidelity
config.fidelity.value = target_fidelity
budget_list.append(self.get_budget_level(config))
else:
# if the target_fidelity higher than the max drop the configuration
indices_to_drop.append(i)
else:
config.fidelity.value = target_fidelity

if np.isclose(target_fidelity, config.fidelity.value):
# the fidelity was set the configuration will be considered
budget_list.append(self.get_budget_level(config))
else:
# the fidelity was not set, the configuration will be dropped
indices_to_drop.append(i)

# Drop unused configs
x.drop(labels=indices_to_drop, inplace=True)
Expand All @@ -103,22 +77,19 @@ def preprocess(self, x: pd.Series) -> Tuple[Iterable, Iterable]:
inc_list.append(inc)

return x, torch.Tensor(inc_list)

def preprocess_gp(self, x: Iterable) -> Tuple[Iterable, Iterable]:
x, inc_list = self.preprocess(x)
return x.values.tolist(), inc_list

def preprocess_deep_gp(self, x: Iterable) -> Tuple[Iterable, Iterable]:
x, inc_list = self.preprocess(x)
x_lcs = []
for idx in x.index:
if idx in self.observations.df.index.levels[0]:
budget_level = max(0, self.get_budget_level(x[idx]) - 1)
lc = self.observations.extract_learning_curve(
idx, budget_level
)
budget_level = self.get_budget_level(x[idx])
lc = self.observations.extract_learning_curve(idx, budget_level)
else:
# TODO: comment to explain why this is needed (karibbov)
# initialize a learning curve with a place holder
# This is later padded accordingly for the Conv1D layer
lc = [0.0]
Expand All @@ -137,26 +108,32 @@ def preprocess_pfn(self, x: Iterable) -> Tuple[Iterable, Iterable, Iterable]:
len_partial = len(self.observations.seen_config_ids)
z_min = x[0].fidelity.lower
# converting fidelity to the discrete budget level
# STRICT ASSUMPTION: fidelity is the second dimension
_x_tok[:len_partial, 1] = (_x_tok[:len_partial, 1] + self.b_step - z_min) / self.b_step
# STRICT ASSUMPTION: fidelity is the first dimension
_x_tok[:len_partial, 0] = (
_x_tok[:len_partial, 0] + self.b_step - z_min
) / self.b_step
return _x_tok, _x, inc_list

def eval(
self, x: pd.Series, asscalar: bool = False
) -> Tuple[np.ndarray, pd.Series]:
def eval(self, x: pd.Series, asscalar: bool = False) -> Tuple[np.ndarray, pd.Series]:
# _x = x.copy() # preprocessing needs to change the reference x Series so we don't copy here
if self.surrogate_model_name == "pfn":
_x_tok, _x, inc_list = self.preprocess_pfn(x.copy()) # IMPORTANT change from vanilla-EI
_x_tok, _x, inc_list = self.preprocess_pfn(
x.copy()
) # IMPORTANT change from vanilla-EI
ei = self.eval_pfn_ei(_x_tok, inc_list)
elif self.surrogate_model_name == "deep_gp":
_x, inc_list = self.preprocess_deep_gp(x.copy()) # IMPORTANT change from vanilla-EI
_x, inc_list = self.preprocess_deep_gp(
x.copy()
) # IMPORTANT change from vanilla-EI
ei = self.eval_gp_ei(_x, inc_list)
_x = pd.Series(_x, index=np.arange(len(_x)))
else:
_x, inc_list = self.preprocess_gp(x.copy()) # IMPORTANT change from vanilla-EI
_x, inc_list = self.preprocess_gp(
x.copy()
) # IMPORTANT change from vanilla-EI
ei = self.eval_gp_ei(_x, inc_list)
_x = pd.Series(_x, index=np.arange(len(_x)))

if ei.is_cuda:
ei = ei.cpu()
if len(x) > 1 and asscalar:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pandas as pd

from ....search_spaces.search_space import SearchSpace
from ...multi_fidelity.utils import MFObservedData, continuous_to_tabular
from ...multi_fidelity.utils import MFObservedData
from .base_acq_sampler import AcquisitionSampler


Expand All @@ -23,29 +23,27 @@ def __init__(self, **kwargs):
self.pipeline_space = None
self.is_tabular = False


def _sample_new(
self, index_from: int, n: int = None, ignore_fidelity: bool = False
) -> pd.Series:

n = n if n is not None else self.SAMPLES_TO_DRAW
new_configs = [self.pipeline_space.sample(
patience=self.patience, user_priors=False, ignore_fidelity=ignore_fidelity
) for _ in range(n)]

# if self.tabular_space is not None:
# # This function have 3 possible return options:
# # 1. Tabular data is provided then, n configs are sampled from the table
# # 2. Tabular data is not provided and a list of configs is provided then, same list of configs is returned
# # 3. Tabular data is not provided and a single config is provided then, n configs will be sampled randomly
# new_configs=self.tabular_space.sample(index_from=index_from, config=new_configs, n=n)

new_configs = [
self.pipeline_space.sample(
patience=self.patience, user_priors=False, ignore_fidelity=ignore_fidelity
)
for _ in range(n)
]

return pd.Series(
new_configs, index=range(index_from, index_from + len(new_configs))
)

def _sample_new_unique(
self, index_from: int, n: int = None, patience: int = 10, ignore_fidelity: bool=False
self,
index_from: int,
n: int = None,
patience: int = 10,
ignore_fidelity: bool = False,
) -> pd.Series:
n = n if n is not None else self.SAMPLES_TO_DRAW
assert (
Expand All @@ -58,13 +56,17 @@ def _sample_new_unique(
# Sample patience times for an unobserved configuration
for _ in range(patience):
_config = self.pipeline_space.sample(
patience=self.patience, user_priors=False, ignore_fidelity=ignore_fidelity
patience=self.patience,
user_priors=False,
ignore_fidelity=ignore_fidelity,
)
# # Convert continuous into tabular if the space is tabular
# _config = continuous_to_tabular(_config, self.tabular_space)
# Iterate over all observed configs
for config in existing_configs:
if _config.is_equal_value(config, include_fidelity=not ignore_fidelity):
if _config.is_equal_value(
config, include_fidelity=not ignore_fidelity
):
# if the sampled config already exists
# do the next iteration of patience
break
Expand All @@ -90,36 +92,47 @@ def _sample_new_unique(
)

def sample(
self,
acquisition_function=None,
n: int = None,
set_new_sample_fidelity: int | float=None
) -> list():
self,
acquisition_function=None,
n: int = None,
set_new_sample_fidelity: int | float = None,
) -> list():
"""Samples a new set and returns the total set of observed + new configs."""
partial_configs = self.observations.get_partial_configs_at_max_seen()
new_configs = self._sample_new(
index_from=self.observations.next_config_id(), n=n, ignore_fidelity=False
)

def __sample_single_new_tabular(index: int):
"""
A function to use in a list comprehension to slightly speed up
the sampling process when self.SAMPLE_TO_DRAW is large
"""
config = self.pipeline_space.sample(
patience=self.patience, user_priors=False, ignore_fidelity=False
)
config["id"].value = _new_configs[index]
config.fidelity.value = set_new_sample_fidelity
return config

if self.is_tabular:
_n = n if n is not None else self.SAMPLES_TO_DRAW
_partial_ids = set([conf["id"].value for conf in partial_configs])
_partial_ids = {conf["id"].value for conf in partial_configs}
_all_ids = set(self.pipeline_space.custom_grid_table.index.values)
# accounting for unseen configs only
_n = min(_n, len(_all_ids - _partial_ids))
_new_configs = np.random.choice(list(_all_ids - _partial_ids), size=_n, replace=False)
new_configs = [self.pipeline_space.sample(
patience=self.patience, user_priors=False, ignore_fidelity=False
) for _ in range(_n)]
for i, config in enumerate(new_configs):
config["id"].value = _new_configs[i]
config.fidelity.value = self.pipeline_space.fidelity.lower
_new_configs = np.random.choice(
list(_all_ids - _partial_ids), size=_n, replace=False
)
new_configs = [__sample_single_new_tabular(i) for i in range(_n)]
new_configs = pd.Series(
new_configs,
index=np.arange(len(partial_configs), len(partial_configs) + len(new_configs))
index=np.arange(
len(partial_configs), len(partial_configs) + len(new_configs)
),
)

if set_new_sample_fidelity is not None:
elif set_new_sample_fidelity is not None:
for config in new_configs:
config.fidelity.value = set_new_sample_fidelity

Expand All @@ -135,12 +148,8 @@ def sample(
# incrementing fidelities multiple times due to pass-by-reference
partial_configs = pd.Series(partial_configs_list, index=index_list)

# Set fidelity for new configs
for _, config in new_configs.items():
config.fidelity.value = config.fidelity.lower

configs = pd.concat([partial_configs, new_configs])

return configs

def set_state(
Expand All @@ -155,6 +164,8 @@ def set_state(
self.observations = observations
self.b_step = b_step
self.n = n if n is not None else self.SAMPLES_TO_DRAW
if hasattr(self.pipeline_space, "custom_grid_table") and self.pipeline_space.custom_grid_table is not None:
if (
hasattr(self.pipeline_space, "custom_grid_table")
and self.pipeline_space.custom_grid_table is not None
):
self.is_tabular = True

Loading

0 comments on commit 41df811

Please sign in to comment.