-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from fialhocoelho/develop
Develop
- Loading branch information
Showing
10 changed files
with
2,114 additions
and
1,276 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -87,3 +87,5 @@ target/ | |
|
||
# Mypy cache | ||
.mypy_cache/ | ||
src/data/nixtla_api.key | ||
src/models/nixtla_api.key |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,62 @@ | ||
data: | ||
processed_path: "data/processed/" | ||
intermediate_path: "data/interim/" | ||
raw_path: "data/raw/" | ||
forecasted_path: "data/forecasted/" | ||
train_folder: "santos_dataset/train/" | ||
test_folder: "santos_dataset/test/" | ||
train_start_date: "2021-01-01 00:00:00" | ||
train_end_date: "2021-12-31 23:55:00" | ||
test_start_date: "2022-01-01 00:00:00" | ||
test_end_date: "2022-12-31 23:55:00" | ||
processed_train_df: "train_df_praticagem.csv" | ||
processed_test_df: "test_df_praticagem.csv" | ||
intermediate_path: "data/interim/" | ||
target_freq: "1h" | ||
interp_method: "linear" | ||
datetime_col: "datetime" | ||
round_freq: "5min" | ||
timegpt_fcst_file: "forecast_cache_fcst_y_validated_20240513_212354.pkl" | ||
chronos_fcst_file: "chronos_forecast_cache_20240518_160948.pkl" | ||
timegpt_cache_prefix: "timegpt_forecast_cache" | ||
chronos_fcst_file: "chronos_forecast_cache_20240520_000009.pkl" | ||
chronos_cache_prefix: "chronos_forecast_cache" | ||
crop_target_datetime: "2021-01-01 00:00:00" | ||
default_seed: 42 | ||
features: | ||
waves_palmas: | ||
name: "waves_palmas" | ||
list_features: ["hs","tp","ws"] | ||
train_filename: "waves_palmas.parquet" | ||
test_filename: "waves_palmas.parquet" | ||
freq: "20min" | ||
train_start_date: "2021-01-01 00:00:00" | ||
train_end_date: "2021-12-31 23:55:00" | ||
test_start_date: "2022-01-01 00:00:00" | ||
test_end_date: "2022-12-31 23:55:00" | ||
current_praticagem: | ||
name: "current_praticagem" | ||
list_features: ["cross_shore_current"] | ||
train_filename: "current_praticagem.parquet" | ||
test_filename: "current_praticagem.parquet" | ||
freq: "5min" | ||
train_start_date: "2021-01-01 00:00:00" | ||
train_end_date: "2021-12-31 23:55:00" | ||
test_start_date: "2022-01-01 00:00:00" | ||
test_end_date: "2022-12-31 23:55:00" | ||
model: | ||
context_window_len: 168 | ||
forecast_len: 24 | ||
forecast_len: 48 | ||
shift: 48 | ||
batch_size: 32 | ||
windowing_mode: "fixed" | ||
epochs: 500 | ||
lr: 0.001 | ||
beta: 0.5 | ||
input_size: 1 | ||
hidden_size: 64 | ||
train_shuffle: true | ||
train_shuffle: true | ||
timegpt_finetune_steps: 100 | ||
nixtla_api_key_path: "nixtla_api.key" | ||
chronos_finetune_steps: 0 | ||
attempts_after_failure: 30 | ||
device: "cuda" | ||
chronos_t5_model: "amazon/chronos-t5-large" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,214 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Run `chronos` for Santos off-shore dataset" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"INFO:utils.nexdata:Loading config file ../config/config.yaml\n", | ||
"INFO:utils.nexdata:waves_palmas train path: ../data/raw/santos_dataset/train/waves_palmas.parquet\n", | ||
"INFO:utils.nexdata:waves_palmas test path: ../data/raw/santos_dataset/test/waves_palmas.parquet\n", | ||
"INFO:utils.nexdata:current_praticagem train path: ../data/raw/santos_dataset/train/current_praticagem.parquet\n", | ||
"INFO:utils.nexdata:current_praticagem test path: ../data/raw/santos_dataset/test/current_praticagem.parquet\n", | ||
"INFO:utils.nexdata:Random seed: 42\n", | ||
"INFO:utils.nexdata:Default device: cuda\n", | ||
"INFO:utils.nexdata:Defining paths...\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import sys\n", | ||
"import os\n", | ||
"import pandas as pd\n", | ||
"from tqdm import tqdm\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"import torch\n", | ||
"from chronos import ChronosPipeline\n", | ||
"import numpy as np\n", | ||
"\n", | ||
"sys.path.append('../src/')\n", | ||
"from utils.nexdata import *\n", | ||
"from utils.nexutil import *\n", | ||
"\n", | ||
"params = NexData(nexus_folder='../')\n", | ||
"set_random_seeds(params.data_params['default_seed'])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"#### Configuring models, predict and save outputs to be used to `student` model" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Load the ChronosPipeline model from the pretrained\n", | ||
"# 'amazon/chronos-t5-large' model\n", | ||
"chronos_pipeline = ChronosPipeline.from_pretrained(\n", | ||
" 'amazon/chronos-t5-large',\n", | ||
" device_map='cuda',\n", | ||
" torch_dtype=torch.bfloat16,\n", | ||
")\n", | ||
"\n", | ||
"# Iterate over each ocean variable defined in the parameters\n", | ||
"for ocean_variable in params.features.keys():\n", | ||
" print(f'Ocean variable: {ocean_variable}')\n", | ||
"\n", | ||
" # Retrieve target features and experiment IDs\n", | ||
" target_features = params.features[ocean_variable]\n", | ||
" id_experiment = 'chronos_forecast_composed'\n", | ||
" id_experiment_ioa = 'chronos_ioa_composed'\n", | ||
"\n", | ||
" # Load train and test data for the target feature\n", | ||
" df_train_target = pd.read_parquet(\n", | ||
" target_features['train_filepath'])\n", | ||
" df_test_target = pd.read_parquet(\n", | ||
" target_features['test_filepath'])\n", | ||
"\n", | ||
" # Process the training dataframe with specified parameters\n", | ||
" df_train_processed_target = process_dataframe(\n", | ||
" df_train_target,\n", | ||
" target_features['train_start_date'],\n", | ||
" target_features['train_end_date'],\n", | ||
" params.data_params['target_freq'],\n", | ||
" params.data_params['interp_method'],\n", | ||
" params.data_params['datetime_col'],\n", | ||
" params.data_params['round_freq'])\n", | ||
"\n", | ||
" # Process the test dataframe with specified parameters\n", | ||
" df_test_processed_target = process_dataframe(\n", | ||
" df_test_target,\n", | ||
" target_features['test_start_date'],\n", | ||
" target_features['test_end_date'],\n", | ||
" params.data_params['target_freq'],\n", | ||
" params.data_params['interp_method'],\n", | ||
" params.data_params['datetime_col'],\n", | ||
" params.data_params['round_freq'])\n", | ||
"\n", | ||
" # Define the context and forecast window lengths and shift\n", | ||
" context_len = params.model_params['context_window_len']\n", | ||
" forecast_len = params.model_params['forecast_len']\n", | ||
" shift = params.model_params['shift']\n", | ||
" mode = params.model_params['windowing_mode']\n", | ||
"\n", | ||
" # Generate indices for the test set using the context and forecast lengths\n", | ||
" X_test_index, y_test_index = generate_indices(\n", | ||
" df_test_processed_target, context_len, forecast_len,\n", | ||
" shift, mode)\n", | ||
"\n", | ||
" # Initialize DataFrames for predictions and index of agreement (IOA) values\n", | ||
" df_y_hat = pd.DataFrame()\n", | ||
" df_ioa = pd.DataFrame()\n", | ||
"\n", | ||
" # Set the index for the y_hat DataFrame\n", | ||
" df_y_hat.index = np.concatenate(y_test_index)\n", | ||
" df_y_hat[params.data_params['datetime_col']] = (\n", | ||
" df_test_processed_target.loc[\n", | ||
" df_y_hat.index, params.data_params['datetime_col']\n", | ||
" ])\n", | ||
"\n", | ||
" # Iterate over each target feature for prediction\n", | ||
" for target_feature in target_features['list_features']:\n", | ||
" y_hat = []\n", | ||
" ioa_list = []\n", | ||
"\n", | ||
" # Add training data to improve the size of the inference data\n", | ||
" train_signal = df_train_processed_target.loc[:, \n", | ||
" target_feature].values\n", | ||
" len_X_test_index = len(X_test_index)\n", | ||
"\n", | ||
" # Iterate over each test window to generate predictions\n", | ||
" for idx in range(len_X_test_index):\n", | ||
" # Extract test signal for the current window\n", | ||
" test_signal = df_test_processed_target.loc[\n", | ||
" X_test_index[idx], target_feature].values\n", | ||
" y_test_signal = df_test_processed_target.loc[\n", | ||
" y_test_index[idx], target_feature].values\n", | ||
"\n", | ||
" # Concatenate training and test signals\n", | ||
" composed_signal = np.concatenate(\n", | ||
" (train_signal, test_signal))\n", | ||
" \n", | ||
" # Convert the composed signal to a tensor\n", | ||
" batch_context = torch.tensor(composed_signal)\n", | ||
" \n", | ||
" # Generate forecast using the Chronos pipeline\n", | ||
" forecast = chronos_pipeline.predict(\n", | ||
" batch_context, forecast_len)\n", | ||
" predictions = np.quantile(\n", | ||
" forecast.numpy(), 0.5, axis=1)\n", | ||
" \n", | ||
" # Append predictions to the y_hat list\n", | ||
" y_hat.extend(np.array(predictions[0]))\n", | ||
"\n", | ||
" # Calculate the index of agreement (IOA) for the predictions\n", | ||
" ioa = calculate_ioa(\n", | ||
" y_test_signal, np.array(predictions[0]))\n", | ||
" ioa_list.append(ioa)\n", | ||
"\n", | ||
" # Print the progress and IOA value for the current window\n", | ||
" print(f'Window {idx+1} from {len_X_test_index} | '\n", | ||
" f'target feature: {target_feature} | ioa: {round(ioa,3)}')\n", | ||
"\n", | ||
" # Store the predictions and IOA values in the DataFrames\n", | ||
" df_y_hat[target_feature] = y_hat\n", | ||
" df_ioa[target_feature] = ioa_list\n", | ||
"\n", | ||
" # Save the predictions DataFrame to a parquet file\n", | ||
" filename = os.path.join(\n", | ||
" params.forecasted_dir,\n", | ||
" f\"{target_features['name']}_{id_experiment}_\"\n", | ||
" f\"{params.timestamp}.pkl\")\n", | ||
" df_y_hat.to_parquet(filename)\n", | ||
"\n", | ||
" # Save the IOA DataFrame to a parquet file\n", | ||
" filename_ioa = os.path.join(\n", | ||
" params.forecasted_dir,\n", | ||
" f\"{ocean_variable}_{id_experiment_ioa}_\"\n", | ||
" f\"{params.timestamp}.pkl\")\n", | ||
" df_ioa.to_parquet(filename_ioa)\n", | ||
"\n", | ||
" # Print the file paths of the saved files\n", | ||
" print(filename)\n", | ||
" print(filename_ioa)\n" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "chronos", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.0" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.