Make more results artifacts and clean up config

fwhigh · May 15, 2021 · 0679bda · 0679bda
1 parent 45c9b0c
commit 0679bda
Show file tree

Hide file tree

Showing 6 changed files with 94 additions and 71 deletions.
diff --git a/example-requirements.txt b/example-requirements.txt
@@ -1,2 +1,5 @@
 metaflow
+plotly-express
+kaleido
 pandas
+numpy
diff --git a/examples/model-tournament/config.py b/examples/model-tournament/config.py
@@ -1,6 +1,6 @@
 n_numeric_features = 10
 n_informative_numeric_features = 5
-n_categorical_features = 2
+n_categorical_features = 1
 make_regression_init_kwargs = {
     f'type_{i}': {
         'n_samples': 10_000,
@@ -37,71 +37,7 @@
         '__build_model': ['metaflow_helper.model_handlers.build_keras_regression_model'],
         # These go to the model initializer
         'metric': ['mse'],
-        'dense_layer_widths': [()],
-        'dropout_probabilities': [()],
-        # This goes to the pipeline elements' fitters by pipeline step stepname, where f'{stepname}__parameter' gets
-        # renamed to parameter and then passed to the fitter for step stepname. The model stepname = 'model'
-        # and the preprocessing stepname = 'preprocessor'. See utilities.build_pipeline.
-        '__fit_kwargs': [{
-            'model__batch_size': None,
-            'model__epochs': 10_000,
-            'model__validation_split': 0.2,
-            'model__eval_metric': 'val_mse',  # monitor. Examples: 'mse' or 'val_mse'
-            'model__verbose': 0,
-            'model__patience': 10,
-            'model__min_delta': 0.1,
-        }],
-    },
-    {
-        # Anything with an underscore is a specially handled parameter
-        '__model': ['metaflow_helper.model_handlers.KerasRegressorHandler'],
-        '__build_model': ['metaflow_helper.model_handlers.build_keras_regression_model'],
-        # These go to the model initializer
-        'metric': ['mse'],
-        'dense_layer_widths': [(15,)],
-        'dropout_probabilities': [(0,)],
-        # This goes to the pipeline elements' fitters by pipeline step stepname, where f'{stepname}__parameter' gets
-        # renamed to parameter and then passed to the fitter for step stepname. The model stepname = 'model'
-        # and the preprocessing stepname = 'preprocessor'. See utilities.build_pipeline.
-        '__fit_kwargs': [{
-            'model__batch_size': None,
-            'model__epochs': 10_000,
-            'model__validation_split': 0.2,
-            'model__eval_metric': 'val_mse',  # monitor. Examples: 'mse' or 'val_mse'
-            'model__verbose': 0,
-            'model__patience': 10,
-            'model__min_delta': 0.1,
-        }],
-    },
-    {
-        # Anything with an underscore is a specially handled parameter
-        '__model': ['metaflow_helper.model_handlers.KerasRegressorHandler'],
-        '__build_model': ['metaflow_helper.model_handlers.build_keras_regression_model'],
-        # These go to the model initializer
-        'metric': ['mse'],
-        'dense_layer_widths': [(15, 15,)],
-        'dropout_probabilities': [(0, 0,)],
-        # This goes to the pipeline elements' fitters by pipeline step stepname, where f'{stepname}__parameter' gets
-        # renamed to parameter and then passed to the fitter for step stepname. The model stepname = 'model'
-        # and the preprocessing stepname = 'preprocessor'. See utilities.build_pipeline.
-        '__fit_kwargs': [{
-            'model__batch_size': None,
-            'model__epochs': 10_000,
-            'model__validation_split': 0.2,
-            'model__eval_metric': 'val_mse',  # monitor. Examples: 'mse' or 'val_mse'
-            'model__verbose': 0,
-            'model__patience': 10,
-            'model__min_delta': 0.1,
-        }],
-    },
-    {
-        # Anything with an underscore is a specially handled parameter
-        '__model': ['metaflow_helper.model_handlers.KerasRegressorHandler'],
-        '__build_model': ['metaflow_helper.model_handlers.build_keras_regression_model'],
-        # These go to the model initializer
-        'metric': ['mse'],
-        'dense_layer_widths': [(15*15,)],
-        'dropout_probabilities': [(0,)],
+        'dense_layer_widths': [(), (15,), (15, 15,), (15*15,)],
         # This goes to the pipeline elements' fitters by pipeline step stepname, where f'{stepname}__parameter' gets
         # renamed to parameter and then passed to the fitter for step stepname. The model stepname = 'model'
         # and the preprocessing stepname = 'preprocessor'. See utilities.build_pipeline.

diff --git a/examples/model-tournament/train.py b/examples/model-tournament/train.py
@@ -151,7 +151,13 @@ def train_test(self):
             model__validation_data=(X_test_transformed, y_test),
             **fit_kwargs,
         )
-        self.score = r2_score(y_test, model_pipeline.predict(X_test_transformed))
+        y_test_pred = model_pipeline.predict(X_test_transformed)
+        model_pipeline.named_steps['model'].plot(
+            dir=f"results/{current.run_id}",
+            y_true=y_test,
+            y_pred=y_test_pred,
+        )
+        self.score = r2_score(y_test, y_test_pred)
         print(f'score {self.score}, contender {contender}')
 
         self.next(self.train)
@@ -188,8 +194,9 @@ def train(self):
     @step
     def end(self):
         indent = 4
-        Path("results").mkdir(parents=True, exist_ok=True)
-        with open(f'results/results-{current.run_id}.txt', 'w') as f:
+        results_dir = f"results/{current.run_id}"
+        Path(results_dir).mkdir(parents=True, exist_ok=True)
+        with open(f'{results_dir}/summary.txt', 'w') as f:
             print(f'data set:\n{json.dumps(self.make_regression_init_kwargs, indent=indent)}', file=f)
             print('\n', file=f)
             for i, k in enumerate(sorted(self.contender_results.keys(), key=lambda k: -1 * self.contender_results[k]['mean_score'])):

diff --git a/metaflow_helper/model_handlers/base.py b/metaflow_helper/model_handlers/base.py
@@ -0,0 +1,64 @@
+import random
+from pathlib import Path
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+import plotly
+
+
+class BaseModelHandler:
+    def __init__(self):
+        pass
+
+    def _validate_init_kwargs(self):
+        try:
+            self.mode
+        except NameError as e:
+            print('You must make mode an init kwarg')
+            raise e
+        try:
+            self.iterations
+        except NameError as e:
+            print('You must make iterations an init kwarg')
+            raise e
+        try:
+            self.input_dim
+        except NameError as e:
+            print('You must make input_dim an init kwarg')
+            raise e
+
+    def _validate_fit_kwargs(self):
+        pass
+
+    def plot(self, y_true, y_pred, dir='.', auto_open=True):
+        Path(dir).mkdir(parents=True, exist_ok=True)
+        if len(y_true) > 1_000:
+            idx = random.sample(range(len(y_true)), 1_000)
+        else:
+            idx = list(range(len(y_true)))
+        x = y_pred.iloc[idx] if isinstance(y_pred, pd.Series) else y_pred[idx]
+        y = y_true.iloc[idx] if isinstance(y_true, pd.Series) else y_true[idx]
+        plot_range = [np.min((x, y)), np.max((x, y))]
+        fig = go.Figure()
+        fig.add_trace(
+            go.Scatter(
+                x=x,
+                y=y,
+                mode='markers',
+            ),
+        )
+        fig.add_shape(
+            type="line",
+            x0=plot_range[0], y0=plot_range[0], x1=plot_range[1], y1=plot_range[1],
+            line=dict(
+                color="Black",
+                width=2,
+            )
+        )
+        fig.update_layout(
+            xaxis_title='Predicted',
+            yaxis_title='True',
+            template='none',
+        )
+        fig.write_image(f"{dir}/predicted-vs-true.png")
+        plotly.offline.plot(fig, filename=f"{dir}/predicted-vs-true.html", auto_open=auto_open)
diff --git a/metaflow_helper/model_handlers/keras.py b/metaflow_helper/model_handlers/keras.py
@@ -5,9 +5,10 @@
 from tensorflow.python.keras import regularizers
 
 from ..constants import RunMode
+from .base import BaseModelHandler
 
 
-class KerasRegressorHandler(BaseEstimator, RegressorMixin):
+class KerasRegressorHandler(BaseModelHandler, BaseEstimator, RegressorMixin):
 
     def __init__(self, build_model=None, input_dim=None, mode=RunMode, iterations=None, eval_metric=None, **kwargs):
         self.build_model = build_model
@@ -20,6 +21,7 @@ def __init__(self, build_model=None, input_dim=None, mode=RunMode, iterations=No
         self.history = []
         self.iterations = iterations
 
+        self._validate_init_kwargs()
         self.model = self.build_model(input_dim=self.input_dim, **kwargs)
 
     def fit(self, X, y, validation_data=None, patience=None, min_delta=0, eval_metric=None, **kwargs):
@@ -47,6 +49,7 @@ def fit(self, X, y, validation_data=None, patience=None, min_delta=0, eval_metri
                     kwargs.pop(k)
                 except KeyError:
                     pass
+        self._validate_fit_kwargs()
         if kwargs is not None and 'validation_split' in kwargs:
             result = self.model.fit(X, y, callbacks=self.callbacks, **kwargs)
         else:
@@ -67,6 +70,12 @@ def build_keras_regression_model(input_dim=None, dense_layer_widths=(10,), dropo
                                  l1_lambda_final=0, l2_lambda_final=0):
     if input_dim is None:
         raise ValueError(input_dim)
+    if len(dense_layer_widths) > len(dropout_probabilities):
+        dropout_probabilities = tuple([dropout_probabilities[0]]*len(dense_layer_widths))
+    if len(dense_layer_widths) > len(l1_lambdas):
+        dropout_probabilities = tuple([l1_lambdas[0]]*len(dense_layer_widths))
+    if len(dense_layer_widths) > len(l2_lambdas):
+        dropout_probabilities = tuple([l2_lambdas[0]]*len(dense_layer_widths))
     model = Sequential()
     model.add(Input(shape=(input_dim, )))
     for i, params in enumerate(zip(dense_layer_widths, dropout_probabilities, l1_lambdas, l2_lambdas)):

diff --git a/metaflow_helper/model_handlers/lightgbm.py b/metaflow_helper/model_handlers/lightgbm.py
@@ -2,17 +2,20 @@
 from sklearn.base import BaseEstimator, RegressorMixin
 
 from ..constants import RunMode
+from .base import BaseModelHandler
 
 
-class LightGBMRegressorHandler(BaseEstimator, RegressorMixin):
+class LightGBMRegressorHandler(BaseModelHandler, BaseEstimator, RegressorMixin):
 
     def __init__(self, mode: RunMode, iterations=None, input_dim=None, **kwargs):
         self.mode = mode
         self.iterations = iterations
         self.input_dim = input_dim
+
         if self.iterations is not None:
             kwargs['n_estimators'] = self.iterations
 
+        self._validate_init_kwargs()
         self.model = lgb.LGBMRegressor(**kwargs)
 
     def fit(self, X, y, validation_data=None, **kwargs):
@@ -23,6 +26,7 @@ def fit(self, X, y, validation_data=None, **kwargs):
                     kwargs.pop(k)
                 except KeyError:
                     pass
+        self._validate_fit_kwargs()
         if validation_data is not None:
             self.model.fit(X, y, eval_set=validation_data, **kwargs)
         else: