Skip to content

Commit

Permalink
Add examples integration tests
Browse files Browse the repository at this point in the history
  • Loading branch information
fwhigh committed May 17, 2021
1 parent 5dada6c commit 91f7165
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 23 deletions.
34 changes: 34 additions & 0 deletions .github/workflows/examples36.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Python 3.6 examples

on: [push]

jobs:
build:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.6]

steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- uses: actions/cache@v2
with:
path: ${{ env.pythonLocation }}
key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('requirements.txt') }}
- name: Install dependencies
run: |
make dev
# - name: Lint with flake8
# run: |
# # stop the build if there are Python syntax errors or undefined names
# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
# flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Run examples
run: |
make test_examples
2 changes: 1 addition & 1 deletion .github/workflows/python36.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Python 3.6
name: Python 3.6 unit tests

on: [push]

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python37.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Python 3.7
name: Python 3.7 unit tests

on: [push]

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python38.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Python 3.8
name: Python 3.8 unit tests

on: [push]

Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,7 @@ test:
pip install -r test-requirements.txt
coverage run --omit 'venv/*' -m pytest
coverage html -i

test_examples:
pip install -r example-requirements.txt
python examples/model-selection/train.py run --test_mode 1
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ Convenience utilities for common machine learning tasks on Metaflow
![Build](https://github.com/fwhigh/metaflow-helper/actions/workflows/python37.yml/badge.svg)
![Build](https://github.com/fwhigh/metaflow-helper/actions/workflows/python38.yml/badge.svg)

![Build](https://github.com/fwhigh/metaflow-helper/actions/workflows/examples36.yml/badge.svg)

## Quickstart

You can run the tournament immediately like this.
Expand Down
50 changes: 50 additions & 0 deletions examples/model-selection/debug_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
n_numeric_features = 10
n_informative_numeric_features = 5
n_categorical_features = 1
make_regression_init_kwargs = {
f'type_{i}': {
'n_samples': round(1_00/n_categorical_features),
'noise': 100,
'n_features': n_numeric_features,
'n_informative': n_informative_numeric_features,
'coef': True,
'random_state': i,
}
for i in range(n_categorical_features)
}
test_size = 0.2
n_splits = 1
contenders_spec = [
{
# This is the algo
'__model': ['metaflow_helper.model_handlers.LightGBMRegressorHandler'],
# These go to the model initializer
'__init_kwargs__model__learning_rate': [0.1],
'__init_kwargs__model__max_depth': [1],
'__init_kwargs__model__n_estimators': [10],
# These go to the model fitter
'__fit_kwargs__model__eval_metric': ['mse'],
'__fit_kwargs__model__early_stopping_rounds': [2],
'__fit_kwargs__model__verbose': [0],
},
{
# This is the algo
'__model': ['metaflow_helper.model_handlers.KerasRegressorHandler'],
# These go to the model initializer
'__init_kwargs__model__build_model': ['metaflow_helper.model_handlers.build_keras_regression_model'],
'__init_kwargs__model__metric': ['mse'],
'__init_kwargs__model__dense_layer_widths': [(),],
# These go to the model fitter
'__fit_kwargs__model__batch_size': [None],
'__fit_kwargs__model__epochs': [100],
'__fit_kwargs__model__validation_split': [0.2],
'__fit_kwargs__model__monitor': ['val_mse'],
'__fit_kwargs__model__verbose': [0],
'__fit_kwargs__model__patience': [2],
'__fit_kwargs__model__min_delta': [0.1],
},
]
dependencies = [
{'metaflow_helper': 'git+ssh://[email protected]/fwhigh/metaflow-helper.git'},
]
auto_open_figures = True
59 changes: 39 additions & 20 deletions examples/model-selection/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pathlib import Path
from scipy import stats
import numpy as np
from metaflow import FlowSpec, step, current
from metaflow import FlowSpec, step, current, Parameter
from sklearn.model_selection import train_test_split, ParameterGrid, KFold
from sklearn.metrics import r2_score

Expand All @@ -13,54 +13,68 @@
from metaflow_helper.plot import plot_predicted_vs_true

import config
import debug_config
import common



class Train(FlowSpec):

test_mode = Parameter(
'test_mode',
help="Run in test mode?",
type=bool,
default=False,
)

@step
def start(self):
common.install_dependencies(config.dependencies)
this_config = debug_config if self.test_mode else config
if self.test_mode:
print('Running in test mode')
common.install_dependencies(this_config.dependencies)
self.df, self.numeric_features, self.categorical_features = common.generate_data(
n_numeric_features=config.n_numeric_features,
init_kwargs=config.make_regression_init_kwargs,
n_numeric_features=this_config.n_numeric_features,
init_kwargs=this_config.make_regression_init_kwargs,
)
self.make_regression_init_kwargs = config.make_regression_init_kwargs
self.make_regression_init_kwargs = this_config.make_regression_init_kwargs
print(f'generated {len(self.df)} rows and {len(self.df.columns)} columns')

self.train_validation_index, self.test_index = train_test_split(
self.df.index, test_size=config.test_size,
self.df.index, test_size=this_config.test_size,
)

self.contenders = ParameterGrid(config.contenders_spec)
if config.n_splits > 1:
self.k_fold = KFold(n_splits=config.n_splits)
self.contenders = ParameterGrid(this_config.contenders_spec)
if this_config.n_splits > 1:
self.k_fold = KFold(n_splits=this_config.n_splits)
else:
self.k_fold = None
self.folds = list(range(config.n_splits))
self.folds = list(range(this_config.n_splits))

self.next(self.foreach_contender, foreach='contenders')

@step
def foreach_contender(self):
common.install_dependencies(config.dependencies)
this_config = debug_config if self.test_mode else config
common.install_dependencies(this_config.dependencies)
self.contender = self.input

self.next(self.foreach_fold, foreach='folds')

@step
def foreach_fold(self):
common.install_dependencies(config.dependencies)
this_config = debug_config if self.test_mode else config
common.install_dependencies(this_config.dependencies)
self.fold = self.input
contender = self.contender
model_fit_kwargs = common.parse_contender_model_fit(contender)

X = self.df.loc[self.train_validation_index, :]
y = self.df.loc[self.train_validation_index, 'target']
if config.n_splits > 1:
if this_config.n_splits > 1:
train, test = list(self.k_fold.split(X))[self.fold]
else:
train, test = train_test_split(list(range(X.shape[0])), test_size=config.test_size)
train, test = train_test_split(list(range(X.shape[0])), test_size=this_config.test_size)
X_train = X.iloc[train, :]
y_train = y.iloc[train]
X_test = X.iloc[test, :]
Expand Down Expand Up @@ -94,7 +108,8 @@ def foreach_fold(self):

@step
def end_foreach_fold(self, inputs):
common.install_dependencies(config.dependencies)
this_config = debug_config if self.test_mode else config
common.install_dependencies(this_config.dependencies)
self.merge_artifacts(inputs, exclude=['fold', 'score', 'iterations'])
self.contender_results = {
'scores': [ii.score for ii in inputs],
Expand All @@ -109,7 +124,8 @@ def end_foreach_fold(self, inputs):

@step
def end_foreach_contender(self, inputs):
common.install_dependencies(config.dependencies)
this_config = debug_config if self.test_mode else config
common.install_dependencies(this_config.dependencies)
self.merge_artifacts(inputs, exclude=['contender', 'contender_results'])
self.contender_results = {
pickle.dumps(ii.contender): ii.contender_results
Expand All @@ -119,7 +135,8 @@ def end_foreach_contender(self, inputs):

@step
def train_test(self):
common.install_dependencies(config.dependencies)
this_config = debug_config if self.test_mode else config
common.install_dependencies(this_config.dependencies)
self.best_contender_ser = max(self.contender_results.keys(), key=lambda k: self.contender_results[k]['mean_score'])
self.best_contender = pickle.loads(self.best_contender_ser)
print(f'best_contender {self.best_contender}, contender_results {self.contender_results[self.best_contender_ser]}')
Expand Down Expand Up @@ -158,7 +175,7 @@ def train_test(self):
dir=f"results/{current.run_id}",
y_true=y_test,
y_pred=y_test_pred,
auto_open=config.auto_open_figures,
auto_open=this_config.auto_open_figures,
)
self.score = r2_score(y_test, y_test_pred)
print(f'score {self.score}, contender {contender}')
Expand All @@ -167,7 +184,8 @@ def train_test(self):

@step
def train(self):
common.install_dependencies(config.dependencies)
this_config = debug_config if self.test_mode else config
common.install_dependencies(this_config.dependencies)
contender = self.best_contender
model_fit_kwargs = common.parse_contender_model_fit(contender)

Expand All @@ -194,6 +212,7 @@ def train(self):

@step
def end(self):
this_config = debug_config if self.test_mode else config
indent = 4
results_dir = f"results/{current.run_id}"
Path(results_dir).mkdir(parents=True, exist_ok=True)
Expand All @@ -212,7 +231,7 @@ def end(self):
common.plot_all_scores(
contender_results=self.contender_results,
dir=results_dir,
auto_open=config.auto_open_figures,
auto_open=this_config.auto_open_figures,
)


Expand Down

0 comments on commit 91f7165

Please sign in to comment.