.env

####################################################
# Primary parameters
####################################################
# Set the storage for the model outputs!
export STORAGE_DRIVE=$DATA

# Set the dataset name
# Currently, only the webnlg dataset is supported
export DATASET=webnlg

# 1 ... edge classifier head; 0 ... edge generation head
# set the model ID you want to train, test or use for generation
export MODEL_VARIANT=0

# -2 ... The training starts from scratch
# -1 ... The training resumes from or the test is executed for the last checkpoint of the last execution of model MODEL_VARIANT
# > 0 ... The training resumes from or the test is executed for a specific epoch checkpoint of the last execution of model MODEL_VARIANT
export CHECKPOINT_MODEL_ID=-1

# Set the hardware settings for the training, test or generation
# Available hardware settings that are defined below
# No SLURM; one node; two GPUs per node; Nvidia A40: 1_2_a40
# No SLURM; one node;  two GPUs per node; Nvidia A100: 1_2_a100
# No SLURM; one node; two GPUs per node; Nvidia TI: 1_2_ti
# SLURM; two node; two GPUs per node; Nvidia A40: s2_2_a40
# SLURM; one node; two GPUs per node; Nvidia A40: s1_2_a40
# SLURM; one node; one GPU per node; Nvidia A40: s1_1_a40
export HARDWARE_SETTING=s1_1_a40 

# Mail to which SLURM will send a mail upon job start
export USER_MAIL=filip.kovacevic@tuwien.ac.at


####################################################
# Directories
####################################################
# For timestamped output directories
CURRENT_DATETIME=$(date +"%Y-%m-%dT%H:%M:%S")

# They should be changed with care
export ROOT_DIR="${STORAGE_DRIVE}/data/core/grapher/output"
if [[ $MODEL_VARIANT -eq 0 ]]; then
    export EVAL_DIR="${ROOT_DIR}/${DATASET}_model_variant=gen"
elif [[ $MODEL_VARIANT -eq 1 ]]; then
    export EVAL_DIR="${ROOT_DIR}/${DATASET}_model_variant=class"
else
    echo "Non-existing model variant. Choose either 0 for edge generation head or 1 for edge classification head"
    exit 1
fi

if [ ! -d "$EVAL_DIR" ]; then
    mkdir -p "$EVAL_DIR"
fi

valid_dirs=()
for directory in "$EVAL_DIR"/*; do
    timestamp=$(basename "$directory")
    if date -d "$timestamp" "+%Y-%m-%dT%H:%M:%S" >/dev/null 2>&1; then
        valid_dirs+=("$timestamp")
    fi
done

if [ $CHECKPOINT_MODEL_ID -eq -2 ] || [ ${#valid_dirs[@]} -eq 0 ]; then
    export EXEC_DIR="${EVAL_DIR}/${CURRENT_DATETIME}"
    mkdir -p "${EXEC_DIR}/checkpoints" "${EXEC_DIR}/valid" "${EXEC_DIR}/test"
    echo "Created new directory: ${EXEC_DIR} with three sub directories"
elif [ $CHECKPOINT_MODEL_ID -gt -2 ] && [ ${#valid_dirs[@]} -gt 0 ]; then
    export EXEC_DIR="${EVAL_DIR}/${valid_dirs[-1]}"
else
    echo "The CHECKPOINT_MODEL_ID must be either -2 (new training), -1 (resume training from last epoch checkpoint) or >= 0 (resume trianing from a specific epoch checkpoint)"
fi

export CACHE_DIR=${STORAGE_DRIVE}/data/core/cache/grapher
export DATA_DIR=${STORAGE_DRIVE}/data/core/webnlg-dataset/release_v3.0/en

####################################################
# Server, HPC, and GPU settings 
####################################################
# Mapping for troubadix server on TU Wien
# CUDA 0 = GPU 4
# CUDA 1 = GPU 5
# CUDA 2 = GPU 0
# CUDA 3 = GPU 1
# CUDA 4 = GPU 2
# CUDA 5 = GPU 3
export CUDA_VISIBLE_DEVICES=0,1
export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:1024
export TIME=2-0

# Troubadix: Interactive terminal (implies one node)
# --------------------------------------------------
if [[ $HARDWARE_SETTING == "1_2_ti" ]]; then
    export BATCH_SIZE=8
    export NUM_NODES=1
    export NUM_GPUS_PER_NODE=2
    export NUM_DATAWORKERS=15
    # set GPUs in .env

# VSC5: Interactive terminal (implies one node)
# --------------------------------------------------
elif [[ $HARDWARE_SETTING == "1_2_a40" ]]; then
    export BATCH_SIZE=20
    export NUM_NODES=1
    export NUM_GPUS_PER_NODE=2
    export NUM_DATAWORKERS=2
elif [[ $HARDWARE_SETTING == "1_1_a40" ]]; then
    export CUDA_VISIBLE_DEVICES=0
    export BATCH_SIZE=20
    export NUM_NODES=1
    export NUM_GPUS_PER_NODE=1
    export NUM_DATAWORKERS=2
    # set GPUs in .env
elif [[ $HARDWARE_SETTING == "1_2_a100" ]]; then
    export BATCH_SIZE=16
    export NUM_NODES=1
    export NUM_GPUS_PER_NODE=2
    export NUM_DATAWORKERS=2
    # set GPUs in .env

# VSC5: SLURM
# --------------------------------------------------
elif [[ $HARDWARE_SETTING == "s2_2_a40" ]]; then
    export BATCH_SIZE=20
    export NUM_NODES=2
    export NUM_GPUS_PER_NODE=2
    export PARTITION=zen2_0256_a40x2
    export QOS=zen2_0256_a40x2
    export NUM_DATAWORKERS=2
    # set GPUs in .env 
elif [[ $HARDWARE_SETTING == "s1_2_a40" ]]; then
    export BATCH_SIZE=20
    export NUM_NODES=1
    export NUM_GPUS_PER_NODE=2
    export PARTITION=zen2_0256_a40x2
    export QOS=zen2_0256_a40x2
    export NUM_DATAWORKERS=2
    # set GPUs in .env 
elif [[ $HARDWARE_SETTING == "s1_1_a40" ]]; then
    export CUDA_VISIBLE_DEVICES=0
    export BATCH_SIZE=20
    export NUM_NODES=1
    export NUM_GPUS_PER_NODE=1
    export PARTITION=zen2_0256_a40x2
    export QOS=zen2_0256_a40x2
    export NUM_DATAWORKERS=2
    # set GPUs in .env
else 
    echo "Hardware setting not available. "
fi