Skip to content

Commit

Permalink
Make more results artifacts and clean up config
Browse files Browse the repository at this point in the history
  • Loading branch information
fwhigh committed May 15, 2021
1 parent 45c9b0c commit 0679bda
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 71 deletions.
3 changes: 3 additions & 0 deletions example-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
metaflow
plotly-express
kaleido
pandas
numpy
68 changes: 2 additions & 66 deletions examples/model-tournament/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
n_numeric_features = 10
n_informative_numeric_features = 5
n_categorical_features = 2
n_categorical_features = 1
make_regression_init_kwargs = {
f'type_{i}': {
'n_samples': 10_000,
Expand Down Expand Up @@ -37,71 +37,7 @@
'__build_model': ['metaflow_helper.model_handlers.build_keras_regression_model'],
# These go to the model initializer
'metric': ['mse'],
'dense_layer_widths': [()],
'dropout_probabilities': [()],
# This goes to the pipeline elements' fitters by pipeline step stepname, where f'{stepname}__parameter' gets
# renamed to parameter and then passed to the fitter for step stepname. The model stepname = 'model'
# and the preprocessing stepname = 'preprocessor'. See utilities.build_pipeline.
'__fit_kwargs': [{
'model__batch_size': None,
'model__epochs': 10_000,
'model__validation_split': 0.2,
'model__eval_metric': 'val_mse', # monitor. Examples: 'mse' or 'val_mse'
'model__verbose': 0,
'model__patience': 10,
'model__min_delta': 0.1,
}],
},
{
# Anything with an underscore is a specially handled parameter
'__model': ['metaflow_helper.model_handlers.KerasRegressorHandler'],
'__build_model': ['metaflow_helper.model_handlers.build_keras_regression_model'],
# These go to the model initializer
'metric': ['mse'],
'dense_layer_widths': [(15,)],
'dropout_probabilities': [(0,)],
# This goes to the pipeline elements' fitters by pipeline step stepname, where f'{stepname}__parameter' gets
# renamed to parameter and then passed to the fitter for step stepname. The model stepname = 'model'
# and the preprocessing stepname = 'preprocessor'. See utilities.build_pipeline.
'__fit_kwargs': [{
'model__batch_size': None,
'model__epochs': 10_000,
'model__validation_split': 0.2,
'model__eval_metric': 'val_mse', # monitor. Examples: 'mse' or 'val_mse'
'model__verbose': 0,
'model__patience': 10,
'model__min_delta': 0.1,
}],
},
{
# Anything with an underscore is a specially handled parameter
'__model': ['metaflow_helper.model_handlers.KerasRegressorHandler'],
'__build_model': ['metaflow_helper.model_handlers.build_keras_regression_model'],
# These go to the model initializer
'metric': ['mse'],
'dense_layer_widths': [(15, 15,)],
'dropout_probabilities': [(0, 0,)],
# This goes to the pipeline elements' fitters by pipeline step stepname, where f'{stepname}__parameter' gets
# renamed to parameter and then passed to the fitter for step stepname. The model stepname = 'model'
# and the preprocessing stepname = 'preprocessor'. See utilities.build_pipeline.
'__fit_kwargs': [{
'model__batch_size': None,
'model__epochs': 10_000,
'model__validation_split': 0.2,
'model__eval_metric': 'val_mse', # monitor. Examples: 'mse' or 'val_mse'
'model__verbose': 0,
'model__patience': 10,
'model__min_delta': 0.1,
}],
},
{
# Anything with an underscore is a specially handled parameter
'__model': ['metaflow_helper.model_handlers.KerasRegressorHandler'],
'__build_model': ['metaflow_helper.model_handlers.build_keras_regression_model'],
# These go to the model initializer
'metric': ['mse'],
'dense_layer_widths': [(15*15,)],
'dropout_probabilities': [(0,)],
'dense_layer_widths': [(), (15,), (15, 15,), (15*15,)],
# This goes to the pipeline elements' fitters by pipeline step stepname, where f'{stepname}__parameter' gets
# renamed to parameter and then passed to the fitter for step stepname. The model stepname = 'model'
# and the preprocessing stepname = 'preprocessor'. See utilities.build_pipeline.
Expand Down
13 changes: 10 additions & 3 deletions examples/model-tournament/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,13 @@ def train_test(self):
model__validation_data=(X_test_transformed, y_test),
**fit_kwargs,
)
self.score = r2_score(y_test, model_pipeline.predict(X_test_transformed))
y_test_pred = model_pipeline.predict(X_test_transformed)
model_pipeline.named_steps['model'].plot(
dir=f"results/{current.run_id}",
y_true=y_test,
y_pred=y_test_pred,
)
self.score = r2_score(y_test, y_test_pred)
print(f'score {self.score}, contender {contender}')

self.next(self.train)
Expand Down Expand Up @@ -188,8 +194,9 @@ def train(self):
@step
def end(self):
indent = 4
Path("results").mkdir(parents=True, exist_ok=True)
with open(f'results/results-{current.run_id}.txt', 'w') as f:
results_dir = f"results/{current.run_id}"
Path(results_dir).mkdir(parents=True, exist_ok=True)
with open(f'{results_dir}/summary.txt', 'w') as f:
print(f'data set:\n{json.dumps(self.make_regression_init_kwargs, indent=indent)}', file=f)
print('\n', file=f)
for i, k in enumerate(sorted(self.contender_results.keys(), key=lambda k: -1 * self.contender_results[k]['mean_score'])):
Expand Down
64 changes: 64 additions & 0 deletions metaflow_helper/model_handlers/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import random
from pathlib import Path
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly


class BaseModelHandler:
def __init__(self):
pass

def _validate_init_kwargs(self):
try:
self.mode
except NameError as e:
print('You must make mode an init kwarg')
raise e
try:
self.iterations
except NameError as e:
print('You must make iterations an init kwarg')
raise e
try:
self.input_dim
except NameError as e:
print('You must make input_dim an init kwarg')
raise e

def _validate_fit_kwargs(self):
pass

def plot(self, y_true, y_pred, dir='.', auto_open=True):
Path(dir).mkdir(parents=True, exist_ok=True)
if len(y_true) > 1_000:
idx = random.sample(range(len(y_true)), 1_000)
else:
idx = list(range(len(y_true)))
x = y_pred.iloc[idx] if isinstance(y_pred, pd.Series) else y_pred[idx]
y = y_true.iloc[idx] if isinstance(y_true, pd.Series) else y_true[idx]
plot_range = [np.min((x, y)), np.max((x, y))]
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=x,
y=y,
mode='markers',
),
)
fig.add_shape(
type="line",
x0=plot_range[0], y0=plot_range[0], x1=plot_range[1], y1=plot_range[1],
line=dict(
color="Black",
width=2,
)
)
fig.update_layout(
xaxis_title='Predicted',
yaxis_title='True',
template='none',
)
fig.write_image(f"{dir}/predicted-vs-true.png")
plotly.offline.plot(fig, filename=f"{dir}/predicted-vs-true.html", auto_open=auto_open)
11 changes: 10 additions & 1 deletion metaflow_helper/model_handlers/keras.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
from tensorflow.python.keras import regularizers

from ..constants import RunMode
from .base import BaseModelHandler


class KerasRegressorHandler(BaseEstimator, RegressorMixin):
class KerasRegressorHandler(BaseModelHandler, BaseEstimator, RegressorMixin):

def __init__(self, build_model=None, input_dim=None, mode=RunMode, iterations=None, eval_metric=None, **kwargs):
self.build_model = build_model
Expand All @@ -20,6 +21,7 @@ def __init__(self, build_model=None, input_dim=None, mode=RunMode, iterations=No
self.history = []
self.iterations = iterations

self._validate_init_kwargs()
self.model = self.build_model(input_dim=self.input_dim, **kwargs)

def fit(self, X, y, validation_data=None, patience=None, min_delta=0, eval_metric=None, **kwargs):
Expand Down Expand Up @@ -47,6 +49,7 @@ def fit(self, X, y, validation_data=None, patience=None, min_delta=0, eval_metri
kwargs.pop(k)
except KeyError:
pass
self._validate_fit_kwargs()
if kwargs is not None and 'validation_split' in kwargs:
result = self.model.fit(X, y, callbacks=self.callbacks, **kwargs)
else:
Expand All @@ -67,6 +70,12 @@ def build_keras_regression_model(input_dim=None, dense_layer_widths=(10,), dropo
l1_lambda_final=0, l2_lambda_final=0):
if input_dim is None:
raise ValueError(input_dim)
if len(dense_layer_widths) > len(dropout_probabilities):
dropout_probabilities = tuple([dropout_probabilities[0]]*len(dense_layer_widths))
if len(dense_layer_widths) > len(l1_lambdas):
dropout_probabilities = tuple([l1_lambdas[0]]*len(dense_layer_widths))
if len(dense_layer_widths) > len(l2_lambdas):
dropout_probabilities = tuple([l2_lambdas[0]]*len(dense_layer_widths))
model = Sequential()
model.add(Input(shape=(input_dim, )))
for i, params in enumerate(zip(dense_layer_widths, dropout_probabilities, l1_lambdas, l2_lambdas)):
Expand Down
6 changes: 5 additions & 1 deletion metaflow_helper/model_handlers/lightgbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@
from sklearn.base import BaseEstimator, RegressorMixin

from ..constants import RunMode
from .base import BaseModelHandler


class LightGBMRegressorHandler(BaseEstimator, RegressorMixin):
class LightGBMRegressorHandler(BaseModelHandler, BaseEstimator, RegressorMixin):

def __init__(self, mode: RunMode, iterations=None, input_dim=None, **kwargs):
self.mode = mode
self.iterations = iterations
self.input_dim = input_dim

if self.iterations is not None:
kwargs['n_estimators'] = self.iterations

self._validate_init_kwargs()
self.model = lgb.LGBMRegressor(**kwargs)

def fit(self, X, y, validation_data=None, **kwargs):
Expand All @@ -23,6 +26,7 @@ def fit(self, X, y, validation_data=None, **kwargs):
kwargs.pop(k)
except KeyError:
pass
self._validate_fit_kwargs()
if validation_data is not None:
self.model.fit(X, y, eval_set=validation_data, **kwargs)
else:
Expand Down

0 comments on commit 0679bda

Please sign in to comment.