From 5ef16b8aa3467ec6dd9a178a946cbff7933e496c Mon Sep 17 00:00:00 2001
From: Joel Lamy-Poirier <joel.lamy-poirier@servicenow.com>
Date: Tue, 22 Oct 2024 09:39:35 -0400
Subject: [PATCH] fix

---
 .github/workflows/build_documentation.yaml  |  18 ---
 .github/workflows/deploy_documentation.yaml |  28 -----
 examples/example_config.yaml                |  54 ---------
 examples/train_mistral.sh                   | 128 --------------------
 requirements-doc.txt                        |   7 --
 5 files changed, 235 deletions(-)
 delete mode 100644 .github/workflows/build_documentation.yaml
 delete mode 100644 .github/workflows/deploy_documentation.yaml
 delete mode 100644 examples/example_config.yaml
 delete mode 100644 examples/train_mistral.sh
 delete mode 100644 requirements-doc.txt

diff --git a/.github/workflows/build_documentation.yaml b/.github/workflows/build_documentation.yaml
deleted file mode 100644
index 6925489a..00000000
--- a/.github/workflows/build_documentation.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-name: Build the documentation
-
-on:
-  pull_request:
-    branches: [main]
-
-jobs:
-  build:
-    name: Build
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v4
-        with:
-          python-version: "3.10"
-      - run: pip install -r requirements-doc.txt
-      - name: Build the documentation
-        run: mkdocs build
diff --git a/.github/workflows/deploy_documentation.yaml b/.github/workflows/deploy_documentation.yaml
deleted file mode 100644
index 92bc2752..00000000
--- a/.github/workflows/deploy_documentation.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-name: Publish the documentation
-
-on:
-  push:
-    branches:
-      - main
-
-permissions:
-  contents: write
-
-jobs:
-  deploy:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v4
-        with:
-          python-version: "3.10"
-      - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
-      - uses: actions/cache@v3
-        with:
-          key: mkdocs-material-${{ env.cache_id }}
-          path: .cache
-          restore-keys: |
-            mkdocs-material-
-      - run: pip install -r requirements-doc.txt
-      - name: Publish the documentation
-        run: mkdocs gh-deploy --force
diff --git a/examples/example_config.yaml b/examples/example_config.yaml
deleted file mode 100644
index c23d7c7b..00000000
--- a/examples/example_config.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-run:
-  experiment_dir: null
-  wandb_post_alerts: false
-model:
-  base_model:
-    transformer:
-      num_layers: 12
-      hidden_size: 1024
-      num_attention_heads: 8
-      head_groups: 1
-      add_linear_biases: true
-      ffn_hidden_size: 4096
-      kv_channels: 128
-      activation_type: gelu
-      init_method_std: 0.03125
-      init_method_std_qkv: 0.03125
-      init_method_std_attn_proj: 0.0063788795384978605
-      init_method_std_mlp_1: 0.03125
-      init_method_std_mlp_2: 0.0063788795384978605
-      mlp_lr_scale:
-      - null
-    vocab_size: 49152
-    use_position_embeddings: true
-    tie_word_embeddings: true
-    init_method_std_embed: 0.03125
-  distributed:
-    distributed_timeout: 60.0
-    training_dtype: float32
-pretrained:
-  pretrained_checkpoint_path: null
-  pretrained_checkpoint_type: distributed
-batch:
-  micro_batch_size: 1
-  depth_first_micro_batches: 1
-  breadth_first_micro_batches: 1
-  sequential_micro_batches: 1
-  batch_size: 1
-  sequence_length: 2048
-  micro_sequence_length: 2048
-data:
-  split:
-  - 969.0
-  - 30.0
-  - 1.0
-  dataset_source: list
-  data_path:
-  - fkgtiu
-  data_sample_warn_time_ms: 1000.0
-profiling:
-  profile_cuda: false
-  profile_ranks: []
-optimizer:
-  weight_decay: 0.01
-  initial_loss_scale: 65536.0
diff --git a/examples/train_mistral.sh b/examples/train_mistral.sh
deleted file mode 100644
index 5745e38c..00000000
--- a/examples/train_mistral.sh
+++ /dev/null
@@ -1,128 +0,0 @@
-# Required or optional environment variables
-# export PROJECT_DIR=
-# export PROJECT_NAME=
-# export PROJECT_VERSION=
-# export DATA_PATH_LIST=
-# export DATA_PATH_JSON=
-# export PRETRAINED_MISTRAL_PATH=
-# export PRETRAINED_MIXTRAL_PATH=
-
-export CMD_ARGS="fast-llm train gpt"
-
-export MODEL_ARGS_PRETRAINED="\
---pretrained_checkpoint_type=huggingface \
---pretrained_checkpoint_path=$PRETRAINED_MISTRAL_PATH \
---use_pretrained_config=1 \
-"
-
-export MODEL_ARGS_ARCHITECTURE="\
---num_layers=32 \
---hidden_size=4096 \
---vocab_size=32000 \
---num_attention_heads=32 \
---head_groups=8 \
---add_linear_biases=0 \
---ffn_hidden_size=14336 \
---kv_channels=128 \
---use_rotary_embeddings=1 \
---rotary_embedding_scale=-9.210340371976184 \
---gated=1 \
---activation_type=silu \
---normalization_type=rms_norm \
---tie_word_embeddings=0 \
---window_size=4096 \
-"
-
-export DATA_ARGS_JSON="\
---split=9998,2,0 \
---dataset_source=file \
---data_path=$DATA_PATH_JSON \
-"
-
-export DATA_ARGS_LIST="\
---split=9998,2,0 \
---dataset_source=list \
---data_path=$DATA_PATH_DATA_ARGS_LIST \
-"
-
-export TRAINING_ARGS="\
---batch_size=128 \
---sequence_length=8192 \
---train_iters=500000 \
---weight_decay=0.1 \
---adam_beta1=0.9 \
---adam_beta2=0.95 \
---clip_grad=1.0 \
---lr=0.0001 \
---lr_warmup_iters=1000 \
---lr_decay_style=cosine \
---lr_decay_iters=500000 \
---min_lr=0.000003 \
-"
-
-export PERFORMANCE_ARGS="\
---micro_batch_size=1 \
---training_dtype=bf16 \
---zero_stage=2 \
---num_workers=8 \
-"
-
-export MONITORING_ARGS="\
---validation_iters=25 \
---validation_interval=1000 \
---log_interval=10 \
---log_offset=0 \
---checkpoint_interval=500 \
---max_checkpoints=5 \
---export_interval=25000 \
---wandb_status_interval=25000 \
---wandb_entity_name=$WANDB_ENTITY_NAME \
---wandb_project_name=$PROJECT_NAME \
---wandb_group_name=$PROJECT_VERSION \
-"
-
-export ALL_ARGS="\
-$CMD_ARGS \
-$MODEL_ARGS_PRETRAINED \
-$DATA_ARGS_LIST \
-$TRAINING_ARGS \
-$PERFORMANCE_ARGS \
-$MONITORING_ARGS \
-"
-
-export MODEL_ARGS_MIXTRAL_ARCHITECTURE="\
-$MODEL_ARGS_ARCHITECTURE \
---num_experts=8 \
---num_experts_per_token=2 \
-"
-
-export MIXTRAL_ARGS="\
---pretrained_checkpoint_path=$PRETRAINED_MIXTRAL_PATH \
---zero_stage=3 \
---mlp_recompute_level=activation \
-"
-
-export PROFILE_ARGS="\
---profile_cuda=1 \
---profile_skip=10 \
---profile_wait=95 \
---profile_warmup=2 \
---profile_cycles=3 \
---profile_export=1 \
-"
-
-
-run_local () { # run(name, num_gpus, base_cmd)
-  echo $1 $2 $3
-  export TORCHRUN="torchrun --nproc-per-node=$2 --nnodes=1 --no-python"
-  $TORCHRUN $3 --experiment_dir=$PROJECT_DIR/$PROJECT_NAME_$PROJECT_VERSION/$1
-}
-
-run_c10d () { # run(name, num_nodes, base_cmd)
-  echo $1 $2 $3
-  export TORCHRUN="torchrun --nproc-per-node=8 --nnodes=$2 --no-python --rdzv-backend=c10d --rdzv-endpoint=$HOST_NODE_ADDR"
-  $TORCHRUN $3 --experiment_dir=$PROJECT_DIR/$PROJECT_NAME_$PROJECT_VERSION/$1
-}
-
-run_c10d mistral_example 16 "$ALL_ARGS"
-# run_c10d mixtral_example 16 "$ALL_ARGS $MIXTRAL_ARGS --train_iters=50"
diff --git a/requirements-doc.txt b/requirements-doc.txt
deleted file mode 100644
index b7bd2efb..00000000
--- a/requirements-doc.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-mkdocs
-mkdocs-material
-mkdocs-material[imaging]
-mkdocs-section-index
-mkdocstrings[python]
-mkdocs-git-committers-plugin-2
-mkdocs-git-revision-date-localized-plugin