Skip to content

Commit

Permalink
Merge pull request #2 from fialhocoelho/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
fialhocoelho authored Jun 11, 2024
2 parents 0355df9 + f666016 commit 2aaebaa
Show file tree
Hide file tree
Showing 10 changed files with 2,114 additions and 1,276 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,5 @@ target/

# Mypy cache
.mypy_cache/
src/data/nixtla_api.key
src/models/nixtla_api.key
51 changes: 47 additions & 4 deletions config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,62 @@
data:
processed_path: "data/processed/"
intermediate_path: "data/interim/"
raw_path: "data/raw/"
forecasted_path: "data/forecasted/"
train_folder: "santos_dataset/train/"
test_folder: "santos_dataset/test/"
train_start_date: "2021-01-01 00:00:00"
train_end_date: "2021-12-31 23:55:00"
test_start_date: "2022-01-01 00:00:00"
test_end_date: "2022-12-31 23:55:00"
processed_train_df: "train_df_praticagem.csv"
processed_test_df: "test_df_praticagem.csv"
intermediate_path: "data/interim/"
target_freq: "1h"
interp_method: "linear"
datetime_col: "datetime"
round_freq: "5min"
timegpt_fcst_file: "forecast_cache_fcst_y_validated_20240513_212354.pkl"
chronos_fcst_file: "chronos_forecast_cache_20240518_160948.pkl"
timegpt_cache_prefix: "timegpt_forecast_cache"
chronos_fcst_file: "chronos_forecast_cache_20240520_000009.pkl"
chronos_cache_prefix: "chronos_forecast_cache"
crop_target_datetime: "2021-01-01 00:00:00"
default_seed: 42
features:
waves_palmas:
name: "waves_palmas"
list_features: ["hs","tp","ws"]
train_filename: "waves_palmas.parquet"
test_filename: "waves_palmas.parquet"
freq: "20min"
train_start_date: "2021-01-01 00:00:00"
train_end_date: "2021-12-31 23:55:00"
test_start_date: "2022-01-01 00:00:00"
test_end_date: "2022-12-31 23:55:00"
current_praticagem:
name: "current_praticagem"
list_features: ["cross_shore_current"]
train_filename: "current_praticagem.parquet"
test_filename: "current_praticagem.parquet"
freq: "5min"
train_start_date: "2021-01-01 00:00:00"
train_end_date: "2021-12-31 23:55:00"
test_start_date: "2022-01-01 00:00:00"
test_end_date: "2022-12-31 23:55:00"
model:
context_window_len: 168
forecast_len: 24
forecast_len: 48
shift: 48
batch_size: 32
windowing_mode: "fixed"
epochs: 500
lr: 0.001
beta: 0.5
input_size: 1
hidden_size: 64
train_shuffle: true
train_shuffle: true
timegpt_finetune_steps: 100
nixtla_api_key_path: "nixtla_api.key"
chronos_finetune_steps: 0
attempts_after_failure: 30
device: "cuda"
chronos_t5_model: "amazon/chronos-t5-large"
214 changes: 214 additions & 0 deletions notebooks/chronos_usage.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Run `chronos` for Santos off-shore dataset"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:utils.nexdata:Loading config file ../config/config.yaml\n",
"INFO:utils.nexdata:waves_palmas train path: ../data/raw/santos_dataset/train/waves_palmas.parquet\n",
"INFO:utils.nexdata:waves_palmas test path: ../data/raw/santos_dataset/test/waves_palmas.parquet\n",
"INFO:utils.nexdata:current_praticagem train path: ../data/raw/santos_dataset/train/current_praticagem.parquet\n",
"INFO:utils.nexdata:current_praticagem test path: ../data/raw/santos_dataset/test/current_praticagem.parquet\n",
"INFO:utils.nexdata:Random seed: 42\n",
"INFO:utils.nexdata:Default device: cuda\n",
"INFO:utils.nexdata:Defining paths...\n"
]
}
],
"source": [
"import sys\n",
"import os\n",
"import pandas as pd\n",
"from tqdm import tqdm\n",
"import matplotlib.pyplot as plt\n",
"import torch\n",
"from chronos import ChronosPipeline\n",
"import numpy as np\n",
"\n",
"sys.path.append('../src/')\n",
"from utils.nexdata import *\n",
"from utils.nexutil import *\n",
"\n",
"params = NexData(nexus_folder='../')\n",
"set_random_seeds(params.data_params['default_seed'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Configuring models, predict and save outputs to be used to `student` model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load the ChronosPipeline model from the pretrained\n",
"# 'amazon/chronos-t5-large' model\n",
"chronos_pipeline = ChronosPipeline.from_pretrained(\n",
" 'amazon/chronos-t5-large',\n",
" device_map='cuda',\n",
" torch_dtype=torch.bfloat16,\n",
")\n",
"\n",
"# Iterate over each ocean variable defined in the parameters\n",
"for ocean_variable in params.features.keys():\n",
" print(f'Ocean variable: {ocean_variable}')\n",
"\n",
" # Retrieve target features and experiment IDs\n",
" target_features = params.features[ocean_variable]\n",
" id_experiment = 'chronos_forecast_composed'\n",
" id_experiment_ioa = 'chronos_ioa_composed'\n",
"\n",
" # Load train and test data for the target feature\n",
" df_train_target = pd.read_parquet(\n",
" target_features['train_filepath'])\n",
" df_test_target = pd.read_parquet(\n",
" target_features['test_filepath'])\n",
"\n",
" # Process the training dataframe with specified parameters\n",
" df_train_processed_target = process_dataframe(\n",
" df_train_target,\n",
" target_features['train_start_date'],\n",
" target_features['train_end_date'],\n",
" params.data_params['target_freq'],\n",
" params.data_params['interp_method'],\n",
" params.data_params['datetime_col'],\n",
" params.data_params['round_freq'])\n",
"\n",
" # Process the test dataframe with specified parameters\n",
" df_test_processed_target = process_dataframe(\n",
" df_test_target,\n",
" target_features['test_start_date'],\n",
" target_features['test_end_date'],\n",
" params.data_params['target_freq'],\n",
" params.data_params['interp_method'],\n",
" params.data_params['datetime_col'],\n",
" params.data_params['round_freq'])\n",
"\n",
" # Define the context and forecast window lengths and shift\n",
" context_len = params.model_params['context_window_len']\n",
" forecast_len = params.model_params['forecast_len']\n",
" shift = params.model_params['shift']\n",
" mode = params.model_params['windowing_mode']\n",
"\n",
" # Generate indices for the test set using the context and forecast lengths\n",
" X_test_index, y_test_index = generate_indices(\n",
" df_test_processed_target, context_len, forecast_len,\n",
" shift, mode)\n",
"\n",
" # Initialize DataFrames for predictions and index of agreement (IOA) values\n",
" df_y_hat = pd.DataFrame()\n",
" df_ioa = pd.DataFrame()\n",
"\n",
" # Set the index for the y_hat DataFrame\n",
" df_y_hat.index = np.concatenate(y_test_index)\n",
" df_y_hat[params.data_params['datetime_col']] = (\n",
" df_test_processed_target.loc[\n",
" df_y_hat.index, params.data_params['datetime_col']\n",
" ])\n",
"\n",
" # Iterate over each target feature for prediction\n",
" for target_feature in target_features['list_features']:\n",
" y_hat = []\n",
" ioa_list = []\n",
"\n",
" # Add training data to improve the size of the inference data\n",
" train_signal = df_train_processed_target.loc[:, \n",
" target_feature].values\n",
" len_X_test_index = len(X_test_index)\n",
"\n",
" # Iterate over each test window to generate predictions\n",
" for idx in range(len_X_test_index):\n",
" # Extract test signal for the current window\n",
" test_signal = df_test_processed_target.loc[\n",
" X_test_index[idx], target_feature].values\n",
" y_test_signal = df_test_processed_target.loc[\n",
" y_test_index[idx], target_feature].values\n",
"\n",
" # Concatenate training and test signals\n",
" composed_signal = np.concatenate(\n",
" (train_signal, test_signal))\n",
" \n",
" # Convert the composed signal to a tensor\n",
" batch_context = torch.tensor(composed_signal)\n",
" \n",
" # Generate forecast using the Chronos pipeline\n",
" forecast = chronos_pipeline.predict(\n",
" batch_context, forecast_len)\n",
" predictions = np.quantile(\n",
" forecast.numpy(), 0.5, axis=1)\n",
" \n",
" # Append predictions to the y_hat list\n",
" y_hat.extend(np.array(predictions[0]))\n",
"\n",
" # Calculate the index of agreement (IOA) for the predictions\n",
" ioa = calculate_ioa(\n",
" y_test_signal, np.array(predictions[0]))\n",
" ioa_list.append(ioa)\n",
"\n",
" # Print the progress and IOA value for the current window\n",
" print(f'Window {idx+1} from {len_X_test_index} | '\n",
" f'target feature: {target_feature} | ioa: {round(ioa,3)}')\n",
"\n",
" # Store the predictions and IOA values in the DataFrames\n",
" df_y_hat[target_feature] = y_hat\n",
" df_ioa[target_feature] = ioa_list\n",
"\n",
" # Save the predictions DataFrame to a parquet file\n",
" filename = os.path.join(\n",
" params.forecasted_dir,\n",
" f\"{target_features['name']}_{id_experiment}_\"\n",
" f\"{params.timestamp}.pkl\")\n",
" df_y_hat.to_parquet(filename)\n",
"\n",
" # Save the IOA DataFrame to a parquet file\n",
" filename_ioa = os.path.join(\n",
" params.forecasted_dir,\n",
" f\"{ocean_variable}_{id_experiment_ioa}_\"\n",
" f\"{params.timestamp}.pkl\")\n",
" df_ioa.to_parquet(filename_ioa)\n",
"\n",
" # Print the file paths of the saved files\n",
" print(filename)\n",
" print(filename_ioa)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "chronos",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 2aaebaa

Please sign in to comment.