Refactor the contender spec pattern

First draft sent to Ville
fwhigh · May 16, 2021 · 6aa0775 · 6aa0775
1 parent 0679bda
commit 6aa0775
Show file tree

Hide file tree

Showing 10 changed files with 874 additions and 630 deletions.
diff --git a/Makefile b/Makefile
@@ -15,6 +15,11 @@ dev:
 	pip install --upgrade --upgrade-strategy eager -r requirements.txt
 	pip install -e .
 
+example:
+	pip install --upgrade pip
+	pip install -r example-requirements.txt
+	jupyter labextension install jupyterlab-plotly
+
 # docs:
 # 	$(MAKE) -C docs html
 

diff --git a/example-requirements.txt b/example-requirements.txt
@@ -1,5 +1,8 @@
 metaflow
+scikit-learn
 plotly-express
 kaleido
 pandas
 numpy
+scipy
+jupyterlab
diff --git a/examples/model-tournament/common.py b/examples/model-tournament/common.py
@@ -1,22 +1,27 @@
+import os, errno
 from importlib import import_module
 import subprocess
 import time
+import re
+from pathlib import Path
+import pickle
 import pandas as pd
 from sklearn.datasets import make_regression
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import OneHotEncoder
 from sklearn.compose import ColumnTransformer
-from tensorflow.python.keras import Sequential
-from tensorflow.python.keras.layers import Dense, Dropout
+import plotly.graph_objects as go
+import plotly
 
 from metaflow_helper.constants import RunMode
 
 
-def import_object_from_string(path):
-    path, obj_str = path.rsplit('.', 1)
-    module_ = import_module(path)
-    obj = getattr(module_, obj_str)
-    return obj
+def silent_rm_file(filename):
+    try:
+        os.remove(filename)
+    except OSError as e: # this would be "except OSError, e:" before Python 2.6
+        if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory
+            raise # re-raise exception if a different error occurred
 
 
 def system_command_with_retry(cmd: list):
@@ -86,22 +91,58 @@ def build_model_pipeline(model, step_name='model'):
 
 
 def parse_contender_model_init(contender):
-    return {k: v for k, v in contender.items() if not k.startswith('__')}
+    return parse_contender(contender, prefix_filter='__init_kwargs__model__', prefix_to_remove='__init_kwargs__model__')
+
+
+def parse_contender_model_fit(contender):
+    return parse_contender(contender, prefix_filter='__fit_kwargs__model__', prefix_to_remove='__fit_kwargs__')
+
+
+def parse_contender(contender, prefix_filter, prefix_to_remove):
+    return {re.sub(r'^' + re.escape(prefix_to_remove), '', k): v for k, v in contender.items() if k.startswith(prefix_filter)}
 
 
 def update_contender(contender, mode: RunMode, input_dim=None, best_iterations=None):
     if mode is RunMode.TEST:
         pass
     elif mode is RunMode.TRAIN:
         contender.update({
-            'iterations': best_iterations,
+            '__init_kwargs__model__iterations': best_iterations,
         })
     contender.update({
-        'input_dim': input_dim,
-        'mode': mode,
+        '__init_kwargs__model__input_dim': input_dim,
+        '__init_kwargs__model__mode': mode,
     })
-    if '__build_model' in contender:
-        contender.update({
-            'build_model': import_object_from_string(contender['__build_model']),
-        })
     return contender
+
+
+def plot_all_scores(contender_results, dir, auto_open=True):
+    Path(dir).mkdir(parents=True, exist_ok=True)
+    df = pd.DataFrame().from_records([{
+        **pickle.loads(k),
+        **contender_results[k]}
+        for k in contender_results
+    ])
+    fig = go.Figure()
+    for index, row in df.iterrows():
+        fig.add_trace(
+            go.Box(
+                name=f"{row.name} {str(row['__model']).rsplit('.', 1)[1]}",
+                x=(f"{row.name}",),
+                y=row['scores'],
+            ),
+        )
+    fig.update_layout(
+        xaxis_title='Model',
+        yaxis_title='Score',
+        template='none',
+    )
+    print(f'ADFDAFDASFDAS writing ' + f"{dir}/all-scores.png")
+    silent_rm_file(f"{dir}/all-scores.png")
+    if os.path.isfile(f"{dir}/all-scores.png"):
+        raise FileExistsError(f"{dir}/all-scores.png")
+    fig.write_image(f"{dir}/all-scores.png")
+    print(f'writing ' + f"{dir}/all-scores.html")
+    silent_rm_file(f"{dir}/all-scores.html")
+    plotly.offline.plot(fig, filename=f"{dir}/all-scores.html", auto_open=auto_open)
+    return fig
diff --git a/examples/model-tournament/config.py b/examples/model-tournament/config.py
@@ -1,10 +1,10 @@
 n_numeric_features = 10
 n_informative_numeric_features = 5
-n_categorical_features = 1
+n_categorical_features = 2
 make_regression_init_kwargs = {
     f'type_{i}': {
-        'n_samples': 10_000,
-        'noise': 10,
+        'n_samples': round(10_000/n_categorical_features),
+        'noise': 100,
         'n_features': n_numeric_features,
         'n_informative': n_informative_numeric_features,
         'coef': True,
@@ -13,43 +13,35 @@
     for i in range(n_categorical_features)
 }
 test_size = 0.2
-n_splits = 1
+n_splits = 5
 contenders_spec = [
     {
-        # Anything with an underscore is a specially handled parameter
+        # This is the algo
         '__model': ['metaflow_helper.model_handlers.LightGBMRegressorHandler'],
         # These go to the model initializer
-        'learning_rate': [0.1],
-        'max_depth': [1, 2, 3],
-        'n_estimators': [10_000],
-        # This goes to the pipeline elements' fitters by pipeline step stepname, where f'{stepname}__parameter' gets
-        # renamed to parameter and then passed to the fitter for step stepname. The model stepname = 'model'
-        # and the preprocessing stepname = 'preprocessor'. See utilities.build_pipeline.
-        '__fit_kwargs': [{
-            'model__eval_metric': 'mse',
-            'model__early_stopping_rounds': 10,
-            'model__verbose': 0,
-        }],
+        '__init_kwargs__model__learning_rate': [0.1],
+        '__init_kwargs__model__max_depth': [1, 2, 3],
+        '__init_kwargs__model__n_estimators': [10_000],
+        # These go to the model fitter
+        '__fit_kwargs__model__eval_metric': ['mse'],
+        '__fit_kwargs__model__early_stopping_rounds': [10],
+        '__fit_kwargs__model__verbose': [0],
     },
     {
-        # Anything with an underscore is a specially handled parameter
+        # This is the algo
         '__model': ['metaflow_helper.model_handlers.KerasRegressorHandler'],
-        '__build_model': ['metaflow_helper.model_handlers.build_keras_regression_model'],
         # These go to the model initializer
-        'metric': ['mse'],
-        'dense_layer_widths': [(), (15,), (15, 15,), (15*15,)],
-        # This goes to the pipeline elements' fitters by pipeline step stepname, where f'{stepname}__parameter' gets
-        # renamed to parameter and then passed to the fitter for step stepname. The model stepname = 'model'
-        # and the preprocessing stepname = 'preprocessor'. See utilities.build_pipeline.
-        '__fit_kwargs': [{
-            'model__batch_size': None,
-            'model__epochs': 10_000,
-            'model__validation_split': 0.2,
-            'model__eval_metric': 'val_mse',  # monitor. Examples: 'mse' or 'val_mse'
-            'model__verbose': 0,
-            'model__patience': 10,
-            'model__min_delta': 0.1,
-        }],
+        '__init_kwargs__model__build_model': ['metaflow_helper.model_handlers.build_keras_regression_model'],
+        '__init_kwargs__model__metric': ['mse'],
+        '__init_kwargs__model__dense_layer_widths': [(), (15,), (15, 15,), (15*15,)],
+        # These go to the model fitter
+        '__fit_kwargs__model__batch_size': [None],
+        '__fit_kwargs__model__epochs': [10_000],
+        '__fit_kwargs__model__validation_split': [0.2],
+        '__fit_kwargs__model__monitor': ['val_mse'],
+        '__fit_kwargs__model__verbose': [0],
+        '__fit_kwargs__model__patience': [10],
+        '__fit_kwargs__model__min_delta': [0.1],
     },
 ]
 dependencies = [