From 8afe5e9744113ece12e42aa1d07f7f6e3d8e66ec Mon Sep 17 00:00:00 2001 From: Daniel Young Date: Thu, 6 Jun 2024 15:40:32 -0700 Subject: [PATCH] Updated data processing to be more clear with comments --- use_cases/eluc/app/process_data.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/use_cases/eluc/app/process_data.py b/use_cases/eluc/app/process_data.py index 191957a..d715caf 100644 --- a/use_cases/eluc/app/process_data.py +++ b/use_cases/eluc/app/process_data.py @@ -11,7 +11,9 @@ def main(): """ Main function that loads the data and saves it. """ - dataset = ELUCData(APP_START_YEAR-1, APP_START_YEAR, 2022) + # Subsets the dataset so train_df is from start_year-1 to test year which we discard. + # Then we take the app data as the test def which is from the app start year to the end of the dataset. + dataset = ELUCData(start_year=APP_START_YEAR-1, test_year=APP_START_YEAR) test_df = dataset.test_df save_dir = Path("data/processed") save_dir.mkdir(exist_ok=True)