Skip to content

Commit

Permalink
Merge pull request #81 from Project-Resilience/refactor_nnp
Browse files Browse the repository at this point in the history
Refactor predictor loading and saving
  • Loading branch information
danyoungday authored May 14, 2024
2 parents f345385 + c4bde78 commit 29234c8
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 69 deletions.
13 changes: 7 additions & 6 deletions use_cases/eluc/experiments/predictor_experiments.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
" \"train_pct\": 1,\n",
" \"step_lr_params\": {\"step_size\": 1, \"gamma\": 0.1},\n",
"}\n",
"nnp = NeuralNetPredictor(**nn_config)"
"nnp = NeuralNetPredictor(nn_config)"
]
},
{
Expand All @@ -109,7 +109,7 @@
}
],
"source": [
"nnp.load(\"predictors/neural_network/trained_models/experiment_nn\")\n",
"nnp = NeuralNetPredictor.load(\"predictors/neural_network/trained_models/experiment_nn\")\n",
"print(f\"MAE Neural Net: {mean_absolute_error(dataset.test_df[nn_config['label']], nnp.predict(dataset.test_df[nn_config['features']]))}\")"
]
},
Expand All @@ -130,7 +130,7 @@
" \"features\": constants.DIFF_LAND_USE_COLS,\n",
" \"n_jobs\": -1,\n",
"}\n",
"linreg = LinearRegressionPredictor(**linreg_config)"
"linreg = LinearRegressionPredictor(linreg_config)"
]
},
{
Expand All @@ -157,7 +157,7 @@
}
],
"source": [
"linreg.load(\"predictors/sklearn/trained_models/experiment_linreg\")\n",
"linreg = LinearRegressionPredictor.load(\"predictors/sklearn/trained_models/experiment_linreg\")\n",
"print(f\"MAE Linear Regression: {mean_absolute_error(dataset.test_df['ELUC'], linreg.predict(dataset.test_df[constants.DIFF_LAND_USE_COLS]))}\")"
]
},
Expand All @@ -175,11 +175,12 @@
"outputs": [],
"source": [
"forest_config = {\n",
" \"features\": constants.NN_FEATS,\n",
" \"n_jobs\": -1,\n",
" \"max_features\": \"sqrt\",\n",
" \"random_state\": 42\n",
"}\n",
"forest = RandomForestPredictor(features=constants.NN_FEATS, **forest_config)"
"forest = RandomForestPredictor(forest_config)"
]
},
{
Expand Down Expand Up @@ -208,7 +209,7 @@
}
],
"source": [
"forest.load(\"predictors/sklearn/trained_models/experiment_rf\")\n",
"forest = RandomForestPredictor.load(\"predictors/sklearn/trained_models/experiment_rf\")\n",
"print(f\"MAE Random Forest: {mean_absolute_error(dataset.test_df['ELUC'], forest.predict(dataset.test_df[constants.NN_FEATS]))}\")"
]
},
Expand Down
78 changes: 41 additions & 37 deletions use_cases/eluc/predictors/neural_network/neural_net_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Implementation of predictor.py using a simple feed-forward NeuralNetwork
implemented in PyTorch.
"""

import copy
import json
import time
Expand Down Expand Up @@ -79,57 +78,62 @@ class NeuralNetPredictor(Predictor):
in order to take advantage of the linear relationship in the data.
Data is automatically standardized and the scaler is saved with the model.
"""
def __init__(self, features=None, label=None, hidden_sizes=[4096], linear_skip=True,
dropout=0, device="mps", epochs=3, batch_size=2048, optim_params={},
train_pct=1, step_lr_params={"step_size": 1, "gamma": 0.1}):

self.features=None
self.label=None

self.set_params(features, label, hidden_sizes, linear_skip,
dropout, device, epochs, batch_size, optim_params,
train_pct, step_lr_params)
def __init__(self, model_config: dict):
"""
Model config should contain the following:
features: list of features to use in the model (optional, defaults to all features)
label: name of the label column (optional, defaults to passed label in fit)
hidden_sizes: list of hidden layer sizes
linear_skip: whether to concatenate input to hidden layer output
dropout: dropout probability
device: device to run the model on
epochs: number of epochs to train for
batch_size: batch size for training
optim_params: dictionary of parameters to pass to the optimizer
train_pct: percentage of training data to use
step_lr_params: dictionary of parameters to pass to the step learning rate scheduler
"""

self.features = model_config.get("features", None)
self.label = model_config.get("label", None)
self.hidden_sizes = model_config.get("hidden_sizes", [4096])
self.linear_skip = model_config.get("linear_skip", True)
self.dropout = model_config.get("dropout", 0)
self.device = model_config.get("device", "cpu")
self.epochs = model_config.get("epochs", 3)
self.batch_size = model_config.get("batch_size", 2048)
self.optim_params = model_config.get("optim_params", {})
self.train_pct = model_config.get("train_pct", 1)
self.step_lr_params = model_config.get("step_lr_params", {"step_size": 1, "gamma": 0.1})

self.model = None
self.scaler = StandardScaler()

def set_params(self, features, label, hidden_sizes, linear_skip,
dropout, device, epochs, batch_size, optim_params,
train_pct, step_lr_params):
"""
Set all the parameters for the neural network.
"""
self.features = features
self.label = label
self.hidden_sizes = hidden_sizes
self.linear_skip = linear_skip
self.dropout = dropout
self.device = device
self.epochs = epochs
self.batch_size = batch_size
self.optim_params = optim_params
self.train_pct = train_pct
self.step_lr_params = step_lr_params

def load(self, path: str):
@classmethod
def load(cls, path: str) -> "NeuralNetPredictor":
"""
Loads a model from a given folder containing a config.json, model.pt, and scaler.joblib.
:param path: path to folder containing model files.
"""
load_path = Path(path)
if isinstance(path, str):
load_path = Path(path)
else:
load_path = path
if not load_path.exists():
raise FileNotFoundError(f"Path {path} does not exist.")

# Initialize model with config
with open(load_path / "config.json", "r", encoding="utf-8") as file:
config = json.load(file)
self.set_params(**config)

self.model = ELUCNeuralNet(len(self.features), self.hidden_sizes, self.linear_skip, self.dropout)
self.model.load_state_dict(torch.load(load_path / "model.pt"))
self.model.to(self.device)
self.model.eval()
self.scaler = joblib.load(load_path / "scaler.joblib")
nnp = cls(config)

nnp.model = ELUCNeuralNet(len(config["features"]), config["hidden_sizes"], config["linear_skip"], config["dropout"])
nnp.model.load_state_dict(torch.load(load_path / "model.pt"))
nnp.model.to(config["device"])
nnp.model.eval()
nnp.scaler = joblib.load(load_path / "scaler.joblib")
return nnp


def save(self, path: str):
Expand Down
4 changes: 2 additions & 2 deletions use_cases/eluc/predictors/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ def save(self, path: str):
:param path: path to save the model
"""


@classmethod
@abstractmethod
def load(self, path: str):
def load(cls, path: str) -> "Predictor":
"""
Loads a model from a path.
:param path: path to the model
Expand Down
42 changes: 28 additions & 14 deletions use_cases/eluc/predictors/sklearn/sklearn_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,15 @@ class SKLearnPredictor(Predictor, ABC):
Simple abstract class for sklearn predictors.
Keeps track of features fit on and label to predict.
"""
def __init__(self, features=None, label=None):
self.features = features
self.label = label
def __init__(self, model_config: dict):
"""
Model config contains the following:
features: list of features to use for prediction (optional, defaults to all features)
label: name of the label to predict (optional, defaults to passed label during fit)
"""
self.features = model_config.get("features", None)
self.label = model_config.get("label", None)

self.model = None

def save(self, path: str):
Expand All @@ -30,7 +36,10 @@ def save(self, path: str):
Generates path to folder if it does not exist.
:param path: path to folder to save model files.
"""
save_path = Path(path)
if isinstance(path, str):
save_path = Path(path)
else:
save_path = path
save_path.mkdir(parents=True, exist_ok=True)
config = {
"features": self.features,
Expand All @@ -40,17 +49,18 @@ def save(self, path: str):
json.dump(config, file)
joblib.dump(self.model, save_path / "model.joblib")

def load(self, path):
@classmethod
def load(cls, path) -> "SKLearnPredictor":
"""
Loads saved model and features from a folder.
:param path: path to folder to load model files from.
"""
load_path = Path(path)
with open(load_path / "config.json", "r", encoding="utf-8") as file:
config = json.load(file)
self.features = config["features"]
self.label = config["label"]
self.model = joblib.load(load_path / "model.joblib")
sklearn_predictor = cls(config)
sklearn_predictor.model = joblib.load(load_path / "model.joblib")
return sklearn_predictor

def fit(self, X_train: pd.DataFrame, y_train: pd.Series):
"""
Expand Down Expand Up @@ -83,19 +93,23 @@ class LinearRegressionPredictor(SKLearnPredictor):
Simple linear regression predictor.
See SKLearnPredictor for more details.
"""
def __init__(self, features=None, **kwargs):
super().__init__(features)
self.model = LinearRegression(**kwargs)
def __init__(self, model_config: dict):
super().__init__(model_config)
model_config.pop("features", None)
model_config.pop("label", None)
self.model = LinearRegression(**model_config)

class RandomForestPredictor(SKLearnPredictor):
"""
Simple random forest predictor.
See SKLearnPredictor for more details.
Overrides save method in order to compress it.
"""
def __init__(self, features=None, **kwargs):
super().__init__(features)
self.model = RandomForestRegressor(**kwargs)
def __init__(self, model_config: dict):
super().__init__(model_config)
model_config.pop("features", None)
model_config.pop("label", None)
self.model = RandomForestRegressor(**model_config)

def save(self, path: str, compression=0):
"""
Expand Down
4 changes: 1 addition & 3 deletions use_cases/eluc/prescriptors/nsga2/train_prescriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@

print("Loading predictor...")
# TODO: We need to make it so you can load any predictor here
nnp = NeuralNetPredictor()
nnp_path = Path(config["predictor_path"])
nnp.load(nnp_path)
nnp = NeuralNetPredictor.load(Path(config["predictor_path"]))

print("Initializing prescription...")
if "seed_dir" in config["evolution_params"].keys():
Expand Down
2 changes: 1 addition & 1 deletion use_cases/eluc/tests/test_nsga2.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def setUpClass(cls):
fields = get_fields(cls.dummy_data)
encoder = ELUCEncoder(fields)

predictor = LinearRegressionPredictor(features=constants.DIFF_LAND_USE_COLS, n_jobs=-1)
predictor = LinearRegressionPredictor(dict(features=constants.DIFF_LAND_USE_COLS, n_jobs=-1))
predictor.fit(cls.dummy_data[constants.DIFF_LAND_USE_COLS], cls.dummy_data["ELUC"])
cls.prescriptor = TorchPrescriptor(
100,
Expand Down
11 changes: 5 additions & 6 deletions use_cases/eluc/tests/test_predictors.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_save_file_names(self):
]
for model, config, test_names in zip(self.models, self.configs, save_file_names):
with self.subTest(model=model):
predictor = model(**config)
predictor = model(config)
predictor.fit(self.dummy_data, self.dummy_target)
predictor.save(self.temp_path)
files = [f.name for f in self.temp_path.glob("**/*") if f.is_file()]
Expand All @@ -61,13 +61,12 @@ def test_loaded_same(self):

for model, config in zip(self.models, self.configs):
with self.subTest(model=model):
predictor = model(**config)
predictor = model(config)
predictor.fit(self.dummy_data.iloc[:2], self.dummy_target.iloc[:2])
output = predictor.predict(self.dummy_data.iloc[2:])
predictor.save(self.temp_path)

loaded = model(**config)
loaded.load(self.temp_path)
loaded = model.load(self.temp_path)
loaded_output = loaded.predict(self.dummy_data.iloc[2:])

self.assertTrue((output == loaded_output).all().all()) # Pandas is so annoying why is this necessary?
Expand All @@ -91,7 +90,7 @@ def test_single_input(self):
"""
Tests the neural net with a single input.
"""
predictor = NeuralNetPredictor(hidden_sizes=[4], epochs=1, batch_size=1, device="cpu")
predictor = NeuralNetPredictor(dict(hidden_sizes=[4], epochs=1, batch_size=1, device="cpu"))

train_data = pd.DataFrame({"a": [1], "b": [2], "c": [3], "label": [4]})
test_data = pd.DataFrame({"a": [4], "b": [5], "c": [6]})
Expand All @@ -104,7 +103,7 @@ def test_multi_input(self):
"""
Tests the neural net with multiple inputs.
"""
predictor = NeuralNetPredictor(hidden_sizes=[4], epochs=1, batch_size=1, device="cpu")
predictor = NeuralNetPredictor(dict(hidden_sizes=[4], epochs=1, batch_size=1, device="cpu"))

train_data = pd.DataFrame({"a": [1, 2], "b": [2, 3], "c": [3, 4], "label": [4, 5]})
test_data = pd.DataFrame({"a": [4, 5], "b": [5, 6], "c": [6, 7]})
Expand Down

0 comments on commit 29234c8

Please sign in to comment.