Skip to content

Commit

Permalink
Refactor the contender spec pattern
Browse files Browse the repository at this point in the history
First draft sent to Ville
  • Loading branch information
fwhigh committed May 16, 2021
1 parent 0679bda commit 6aa0775
Show file tree
Hide file tree
Showing 10 changed files with 874 additions and 630 deletions.
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ dev:
pip install --upgrade --upgrade-strategy eager -r requirements.txt
pip install -e .

example:
pip install --upgrade pip
pip install -r example-requirements.txt
jupyter labextension install jupyterlab-plotly

# docs:
# $(MAKE) -C docs html

Expand Down
3 changes: 3 additions & 0 deletions example-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
metaflow
scikit-learn
plotly-express
kaleido
pandas
numpy
scipy
jupyterlab
71 changes: 56 additions & 15 deletions examples/model-tournament/common.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,27 @@
import os, errno
from importlib import import_module
import subprocess
import time
import re
from pathlib import Path
import pickle
import pandas as pd
from sklearn.datasets import make_regression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from tensorflow.python.keras import Sequential
from tensorflow.python.keras.layers import Dense, Dropout
import plotly.graph_objects as go
import plotly

from metaflow_helper.constants import RunMode


def import_object_from_string(path):
path, obj_str = path.rsplit('.', 1)
module_ = import_module(path)
obj = getattr(module_, obj_str)
return obj
def silent_rm_file(filename):
try:
os.remove(filename)
except OSError as e: # this would be "except OSError, e:" before Python 2.6
if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory
raise # re-raise exception if a different error occurred


def system_command_with_retry(cmd: list):
Expand Down Expand Up @@ -86,22 +91,58 @@ def build_model_pipeline(model, step_name='model'):


def parse_contender_model_init(contender):
return {k: v for k, v in contender.items() if not k.startswith('__')}
return parse_contender(contender, prefix_filter='__init_kwargs__model__', prefix_to_remove='__init_kwargs__model__')


def parse_contender_model_fit(contender):
return parse_contender(contender, prefix_filter='__fit_kwargs__model__', prefix_to_remove='__fit_kwargs__')


def parse_contender(contender, prefix_filter, prefix_to_remove):
return {re.sub(r'^' + re.escape(prefix_to_remove), '', k): v for k, v in contender.items() if k.startswith(prefix_filter)}


def update_contender(contender, mode: RunMode, input_dim=None, best_iterations=None):
if mode is RunMode.TEST:
pass
elif mode is RunMode.TRAIN:
contender.update({
'iterations': best_iterations,
'__init_kwargs__model__iterations': best_iterations,
})
contender.update({
'input_dim': input_dim,
'mode': mode,
'__init_kwargs__model__input_dim': input_dim,
'__init_kwargs__model__mode': mode,
})
if '__build_model' in contender:
contender.update({
'build_model': import_object_from_string(contender['__build_model']),
})
return contender


def plot_all_scores(contender_results, dir, auto_open=True):
Path(dir).mkdir(parents=True, exist_ok=True)
df = pd.DataFrame().from_records([{
**pickle.loads(k),
**contender_results[k]}
for k in contender_results
])
fig = go.Figure()
for index, row in df.iterrows():
fig.add_trace(
go.Box(
name=f"{row.name} {str(row['__model']).rsplit('.', 1)[1]}",
x=(f"{row.name}",),
y=row['scores'],
),
)
fig.update_layout(
xaxis_title='Model',
yaxis_title='Score',
template='none',
)
print(f'ADFDAFDASFDAS writing ' + f"{dir}/all-scores.png")
silent_rm_file(f"{dir}/all-scores.png")
if os.path.isfile(f"{dir}/all-scores.png"):
raise FileExistsError(f"{dir}/all-scores.png")
fig.write_image(f"{dir}/all-scores.png")
print(f'writing ' + f"{dir}/all-scores.html")
silent_rm_file(f"{dir}/all-scores.html")
plotly.offline.plot(fig, filename=f"{dir}/all-scores.html", auto_open=auto_open)
return fig
56 changes: 24 additions & 32 deletions examples/model-tournament/config.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
n_numeric_features = 10
n_informative_numeric_features = 5
n_categorical_features = 1
n_categorical_features = 2
make_regression_init_kwargs = {
f'type_{i}': {
'n_samples': 10_000,
'noise': 10,
'n_samples': round(10_000/n_categorical_features),
'noise': 100,
'n_features': n_numeric_features,
'n_informative': n_informative_numeric_features,
'coef': True,
Expand All @@ -13,43 +13,35 @@
for i in range(n_categorical_features)
}
test_size = 0.2
n_splits = 1
n_splits = 5
contenders_spec = [
{
# Anything with an underscore is a specially handled parameter
# This is the algo
'__model': ['metaflow_helper.model_handlers.LightGBMRegressorHandler'],
# These go to the model initializer
'learning_rate': [0.1],
'max_depth': [1, 2, 3],
'n_estimators': [10_000],
# This goes to the pipeline elements' fitters by pipeline step stepname, where f'{stepname}__parameter' gets
# renamed to parameter and then passed to the fitter for step stepname. The model stepname = 'model'
# and the preprocessing stepname = 'preprocessor'. See utilities.build_pipeline.
'__fit_kwargs': [{
'model__eval_metric': 'mse',
'model__early_stopping_rounds': 10,
'model__verbose': 0,
}],
'__init_kwargs__model__learning_rate': [0.1],
'__init_kwargs__model__max_depth': [1, 2, 3],
'__init_kwargs__model__n_estimators': [10_000],
# These go to the model fitter
'__fit_kwargs__model__eval_metric': ['mse'],
'__fit_kwargs__model__early_stopping_rounds': [10],
'__fit_kwargs__model__verbose': [0],
},
{
# Anything with an underscore is a specially handled parameter
# This is the algo
'__model': ['metaflow_helper.model_handlers.KerasRegressorHandler'],
'__build_model': ['metaflow_helper.model_handlers.build_keras_regression_model'],
# These go to the model initializer
'metric': ['mse'],
'dense_layer_widths': [(), (15,), (15, 15,), (15*15,)],
# This goes to the pipeline elements' fitters by pipeline step stepname, where f'{stepname}__parameter' gets
# renamed to parameter and then passed to the fitter for step stepname. The model stepname = 'model'
# and the preprocessing stepname = 'preprocessor'. See utilities.build_pipeline.
'__fit_kwargs': [{
'model__batch_size': None,
'model__epochs': 10_000,
'model__validation_split': 0.2,
'model__eval_metric': 'val_mse', # monitor. Examples: 'mse' or 'val_mse'
'model__verbose': 0,
'model__patience': 10,
'model__min_delta': 0.1,
}],
'__init_kwargs__model__build_model': ['metaflow_helper.model_handlers.build_keras_regression_model'],
'__init_kwargs__model__metric': ['mse'],
'__init_kwargs__model__dense_layer_widths': [(), (15,), (15, 15,), (15*15,)],
# These go to the model fitter
'__fit_kwargs__model__batch_size': [None],
'__fit_kwargs__model__epochs': [10_000],
'__fit_kwargs__model__validation_split': [0.2],
'__fit_kwargs__model__monitor': ['val_mse'],
'__fit_kwargs__model__verbose': [0],
'__fit_kwargs__model__patience': [10],
'__fit_kwargs__model__min_delta': [0.1],
},
]
dependencies = [
Expand Down
Loading

0 comments on commit 6aa0775

Please sign in to comment.