From 0c1bc9b9e45df814c4acd52a6d5b074c47c01ecc Mon Sep 17 00:00:00 2001 From: Erik Duane Huckvale Date: Wed, 4 Sep 2019 19:11:20 -0600 Subject: [PATCH 1/7] Added an MLP baseline --- dna/models/__init__.py | 5 +++-- dna/models/baselines.py | 10 +++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/dna/models/__init__.py b/dna/models/__init__.py index 1c476ae..e49990f 100644 --- a/dna/models/__init__.py +++ b/dna/models/__init__.py @@ -2,7 +2,7 @@ from .baselines import ( AutoSklearnMetalearner, LinearRegressionBaseline, MeanBaseline, MedianBaseline, MetaAutoSklearn, - PerPrimitiveBaseline, RandomBaseline, RandomForestBaseline, MLPRegressionModel + PerPrimitiveBaseline, RandomBaseline, RandomForestBaseline, MLPBaseline, MLPAblationModel ) from .dna_regression_model import DNARegressionModel from .lstm_model import LSTMModel @@ -31,7 +31,8 @@ def get_model(model_name: str, model_config: typing.Dict, seed: int): 'dag_attention_regression': DAGAttentionRegressionModel, 'linear_regression': LinearRegressionBaseline, 'random_forest': RandomForestBaseline, - 'mlp_regression': MLPRegressionModel, + 'mlp_regression': MLPBaseline, + 'mlp_ablation': MLPAblationModel, 'random': RandomBaseline, 'meta_autosklearn': MetaAutoSklearn, 'probabilistic_matrix_factorization': ProbabilisticMatrixFactorization, diff --git a/dna/models/baselines.py b/dna/models/baselines.py index 0e0231e..817e687 100644 --- a/dna/models/baselines.py +++ b/dna/models/baselines.py @@ -6,6 +6,7 @@ import pandas as pd from sklearn import linear_model from sklearn.ensemble import RandomForestRegressor +from sklearn.neural_network import MLPRegressor import torch from .torch_modules.mlp import MLP @@ -134,6 +135,13 @@ def __init__(self, seed=0): self.fitted = False +class MLPBaseline(SklearnBase): + def __init__(self, seed=0): + super().__init__(seed=seed) + self.regressor = MLPRegressor(random_state=seed) + self.fitted = False + + class MetaAutoSklearn(SklearnBase): def __init__(self, seed=0, **kwargs): @@ -224,7 +232,7 @@ def _process_metafeatures(data): metafeatures.drop_duplicates(inplace=True) return metafeatures -class MLPRegressionModel(PyTorchRegressionRankModelBase): +class MLPAblationModel(PyTorchRegressionRankModelBase): def __init__( self, n_hidden_layers: int, hidden_layer_size: int, activation_name: str, use_batch_norm: bool, From accd04d095ade984acf946ea0a216ad9a505f1be Mon Sep 17 00:00:00 2001 From: Erik Duane Huckvale Date: Wed, 4 Sep 2019 19:18:29 -0600 Subject: [PATCH 2/7] Renamed the mlp config to the ablation config --- main.sh | 2 +- .../{mlp_regression_config.json => mlp_ablation_config.json} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename model_configs/{mlp_regression_config.json => mlp_ablation_config.json} (100%) diff --git a/main.sh b/main.sh index c5479b7..f474dce 100755 --- a/main.sh +++ b/main.sh @@ -101,7 +101,7 @@ python3 -m dna evaluate \ # mlp outputs a constant value for a dataset, so it cannot rank python3 -m dna evaluate \ --model mlp_regression \ - --model-config-path ./model_configs/mlp_regression_config.json \ + --model-config-path ./model_configs/mlp_ablation_config.json \ --problem regression \ --metafeature-subset $metafeature_subset \ --train-path $train_path \ diff --git a/model_configs/mlp_regression_config.json b/model_configs/mlp_ablation_config.json similarity index 100% rename from model_configs/mlp_regression_config.json rename to model_configs/mlp_ablation_config.json From 63054da625fd5ae46c5284b1ce451b7fe2528806 Mon Sep 17 00:00:00 2001 From: Erik Duane Huckvale Date: Wed, 4 Sep 2019 20:09:58 -0600 Subject: [PATCH 3/7] Added a config file for the mlp baseline --- dna/models/baselines.py | 15 +++++++++++++-- model_configs/mlp_baseline_config.json | 12 ++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 model_configs/mlp_baseline_config.json diff --git a/dna/models/baselines.py b/dna/models/baselines.py index 817e687..711eb36 100644 --- a/dna/models/baselines.py +++ b/dna/models/baselines.py @@ -136,9 +136,20 @@ def __init__(self, seed=0): class MLPBaseline(SklearnBase): - def __init__(self, seed=0): + """ + Takes in a vector of metafeatures concatenated to binary nominal features that represent which primitives are in + a pipeline. This is passed into sklearn's MLPRegressor. + See https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html for a list of all + the constructor inputs, most of which can be used as tunable hyper-parameters + """ + + def __init__(self, seed=0, *, hidden_layer_size, n_hidden_layers, **kwargs): super().__init__(seed=seed) - self.regressor = MLPRegressor(random_state=seed) + hidden_layer_sizes = [hidden_layer_size] * n_hidden_layers + self.regressor = MLPRegressor( + random_state=seed, early_stopping=True, hidden_layer_sizes=hidden_layer_sizes, + **kwargs + ) self.fitted = False diff --git a/model_configs/mlp_baseline_config.json b/model_configs/mlp_baseline_config.json new file mode 100644 index 0000000..013991b --- /dev/null +++ b/model_configs/mlp_baseline_config.json @@ -0,0 +1,12 @@ +{ + "__init__": { + "activation": "relu", + "hidden_layer_size": 100, + "n_hidden_layers": 1, + + "batch_size": 25, + "momentum": 0.9, + "learning_rate": "constant", + "learning_rate_init": 0.001 + } +} From 3416d0e42c0c22910d83677ecf888283916c3513 Mon Sep 17 00:00:00 2001 From: Erik Duane Huckvale Date: Wed, 4 Sep 2019 20:19:05 -0600 Subject: [PATCH 4/7] Fixed key error; added mlp baseline to main.sh --- dna/models/__init__.py | 2 +- main.sh | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/dna/models/__init__.py b/dna/models/__init__.py index e49990f..9b1126c 100644 --- a/dna/models/__init__.py +++ b/dna/models/__init__.py @@ -31,7 +31,7 @@ def get_model(model_name: str, model_config: typing.Dict, seed: int): 'dag_attention_regression': DAGAttentionRegressionModel, 'linear_regression': LinearRegressionBaseline, 'random_forest': RandomForestBaseline, - 'mlp_regression': MLPBaseline, + 'mlp_baseline': MLPBaseline, 'mlp_ablation': MLPAblationModel, 'random': RandomBaseline, 'meta_autosklearn': MetaAutoSklearn, diff --git a/main.sh b/main.sh index f474dce..a432e22 100755 --- a/main.sh +++ b/main.sh @@ -100,7 +100,7 @@ python3 -m dna evaluate \ # mlp outputs a constant value for a dataset, so it cannot rank python3 -m dna evaluate \ - --model mlp_regression \ + --model mlp_ablation \ --model-config-path ./model_configs/mlp_ablation_config.json \ --problem regression \ --metafeature-subset $metafeature_subset \ @@ -111,6 +111,18 @@ python3 -m dna evaluate \ $use_ootsp +python3 -m dna evaluate \ + --model mlp_baseline \ + --model-config-path ./model_configs/mlp_baseline_config.json \ + --problem regression rank\ + --metafeature-subset $metafeature_subset \ + --train-path $train_path \ + --test-path $test_path \ + --output-dir $results_dir \ + --verbose \ + $use_ootsp + + python3 -m dna evaluate \ --model meta_autosklearn \ --model-config-path ./model_configs/meta_autosklearn_config.json \ From 78d4df857ab4286d323618059302309e4243bc1e Mon Sep 17 00:00:00 2001 From: Erik Duane Huckvale Date: Mon, 9 Sep 2019 17:28:44 -0600 Subject: [PATCH 5/7] added a determinism test for the mlp baseline --- test/model_configs/mlp_baseline_config.json | 12 ++++++++++++ test/test_models.py | 5 +++++ 2 files changed, 17 insertions(+) create mode 100644 test/model_configs/mlp_baseline_config.json diff --git a/test/model_configs/mlp_baseline_config.json b/test/model_configs/mlp_baseline_config.json new file mode 100644 index 0000000..47ef1d6 --- /dev/null +++ b/test/model_configs/mlp_baseline_config.json @@ -0,0 +1,12 @@ +{ + "__init__": { + "activation": "relu", + "hidden_layer_size": 2, + "n_hidden_layers": 1, + + "batch_size": 25, + "momentum": 0.9, + "learning_rate": "constant", + "learning_rate_init": 0.001 + } +} diff --git a/test/test_models.py b/test/test_models.py index 6f3776f..476f8dd 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -35,6 +35,11 @@ def test_dna_regression_determinism(self): model='dna_regression', model_config_path='./test/model_configs/dna_regression_config.json' ) + def test_mlp_regression_determinism(self): + self._test_determinism( + model='mlp_baseline', model_config_path='./test/model_configs/mlp_baseline_config.json' + ) + def test_daglstm_regression_determinism(self): # TODO: fix this test on the CPU if torch.cuda.is_available(): From 2649e7216260638e732103acd2ea029abdb6253c Mon Sep 17 00:00:00 2001 From: Erik Duane Huckvale Date: Mon, 16 Sep 2019 15:16:16 -0600 Subject: [PATCH 6/7] Print statement for debugging travis --- dna/models/baselines.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dna/models/baselines.py b/dna/models/baselines.py index b92267f..d09a9a1 100644 --- a/dna/models/baselines.py +++ b/dna/models/baselines.py @@ -163,6 +163,7 @@ class MLPBaseline(SklearnBase): def __init__(self, seed=0, *, hidden_layer_size, n_hidden_layers, **kwargs): super().__init__(seed=seed) hidden_layer_sizes = [hidden_layer_size] * n_hidden_layers + print('KWARGS:', kwargs) self.regressor = MLPRegressor( random_state=seed, early_stopping=True, hidden_layer_sizes=hidden_layer_sizes, **kwargs From c28be61f437ec76451c53f253629d3888dac9c9f Mon Sep 17 00:00:00 2001 From: Erik Duane Huckvale Date: Mon, 16 Sep 2019 15:56:59 -0600 Subject: [PATCH 7/7] Fixed travis bug; removed debug print statement --- dna/models/baselines.py | 1 - test/test_models.py | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/dna/models/baselines.py b/dna/models/baselines.py index d09a9a1..b92267f 100644 --- a/dna/models/baselines.py +++ b/dna/models/baselines.py @@ -163,7 +163,6 @@ class MLPBaseline(SklearnBase): def __init__(self, seed=0, *, hidden_layer_size, n_hidden_layers, **kwargs): super().__init__(seed=seed) hidden_layer_sizes = [hidden_layer_size] * n_hidden_layers - print('KWARGS:', kwargs) self.regressor = MLPRegressor( random_state=seed, early_stopping=True, hidden_layer_sizes=hidden_layer_sizes, **kwargs diff --git a/test/test_models.py b/test/test_models.py index 476f8dd..947af8e 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -37,7 +37,7 @@ def test_dna_regression_determinism(self): def test_mlp_regression_determinism(self): self._test_determinism( - model='mlp_baseline', model_config_path='./test/model_configs/mlp_baseline_config.json' + model='mlp_baseline', model_config_path='./test/model_configs/mlp_baseline_config.json', pytorch_model=False ) def test_daglstm_regression_determinism(self): @@ -47,7 +47,7 @@ def test_daglstm_regression_determinism(self): model='daglstm_regression', model_config_path='./test/model_configs/daglstm_regression_config.json' ) - def _test_determinism(self, model: str, model_config_path: str): + def _test_determinism(self, model: str, model_config_path: str, pytorch_model: bool = True): # Set the arguments for this test parser = argparse.ArgumentParser() configure_evaluate_parser(parser) @@ -64,19 +64,19 @@ def _test_determinism(self, model: str, model_config_path: str): ] arguments = parser.parse_args(argv) - results1 = self._evaluate_model(arguments) - results2 = self._evaluate_model(arguments) + results1 = self._evaluate_model(arguments, pytorch_model) + results2 = self._evaluate_model(arguments, pytorch_model) self.assertEqual(results1, results2) @staticmethod - def _evaluate_model(arguments): + def _evaluate_model(arguments, pytorch_model: bool): model_config_path = getattr(arguments, 'model_config_path', None) if model_config_path is None: model_config = {} else: with open(model_config_path) as f: model_config = json.load(f) - if not torch.cuda.is_available(): + if not torch.cuda.is_available() and pytorch_model: if '__init__' not in model_config: model_config['__init__'] = {} model_config['__init__']['device'] = 'cpu'