From 5ef16b8aa3467ec6dd9a178a946cbff7933e496c Mon Sep 17 00:00:00 2001 From: Joel Lamy-Poirier Date: Tue, 22 Oct 2024 09:39:35 -0400 Subject: [PATCH] fix --- .github/workflows/build_documentation.yaml | 18 --- .github/workflows/deploy_documentation.yaml | 28 ----- examples/example_config.yaml | 54 --------- examples/train_mistral.sh | 128 -------------------- requirements-doc.txt | 7 -- 5 files changed, 235 deletions(-) delete mode 100644 .github/workflows/build_documentation.yaml delete mode 100644 .github/workflows/deploy_documentation.yaml delete mode 100644 examples/example_config.yaml delete mode 100644 examples/train_mistral.sh delete mode 100644 requirements-doc.txt diff --git a/.github/workflows/build_documentation.yaml b/.github/workflows/build_documentation.yaml deleted file mode 100644 index 6925489a..00000000 --- a/.github/workflows/build_documentation.yaml +++ /dev/null @@ -1,18 +0,0 @@ -name: Build the documentation - -on: - pull_request: - branches: [main] - -jobs: - build: - name: Build - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 - with: - python-version: "3.10" - - run: pip install -r requirements-doc.txt - - name: Build the documentation - run: mkdocs build diff --git a/.github/workflows/deploy_documentation.yaml b/.github/workflows/deploy_documentation.yaml deleted file mode 100644 index 92bc2752..00000000 --- a/.github/workflows/deploy_documentation.yaml +++ /dev/null @@ -1,28 +0,0 @@ -name: Publish the documentation - -on: - push: - branches: - - main - -permissions: - contents: write - -jobs: - deploy: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 - with: - python-version: "3.10" - - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV - - uses: actions/cache@v3 - with: - key: mkdocs-material-${{ env.cache_id }} - path: .cache - restore-keys: | - mkdocs-material- - - run: pip install -r requirements-doc.txt - - name: Publish the documentation - run: mkdocs gh-deploy --force diff --git a/examples/example_config.yaml b/examples/example_config.yaml deleted file mode 100644 index c23d7c7b..00000000 --- a/examples/example_config.yaml +++ /dev/null @@ -1,54 +0,0 @@ -run: - experiment_dir: null - wandb_post_alerts: false -model: - base_model: - transformer: - num_layers: 12 - hidden_size: 1024 - num_attention_heads: 8 - head_groups: 1 - add_linear_biases: true - ffn_hidden_size: 4096 - kv_channels: 128 - activation_type: gelu - init_method_std: 0.03125 - init_method_std_qkv: 0.03125 - init_method_std_attn_proj: 0.0063788795384978605 - init_method_std_mlp_1: 0.03125 - init_method_std_mlp_2: 0.0063788795384978605 - mlp_lr_scale: - - null - vocab_size: 49152 - use_position_embeddings: true - tie_word_embeddings: true - init_method_std_embed: 0.03125 - distributed: - distributed_timeout: 60.0 - training_dtype: float32 -pretrained: - pretrained_checkpoint_path: null - pretrained_checkpoint_type: distributed -batch: - micro_batch_size: 1 - depth_first_micro_batches: 1 - breadth_first_micro_batches: 1 - sequential_micro_batches: 1 - batch_size: 1 - sequence_length: 2048 - micro_sequence_length: 2048 -data: - split: - - 969.0 - - 30.0 - - 1.0 - dataset_source: list - data_path: - - fkgtiu - data_sample_warn_time_ms: 1000.0 -profiling: - profile_cuda: false - profile_ranks: [] -optimizer: - weight_decay: 0.01 - initial_loss_scale: 65536.0 diff --git a/examples/train_mistral.sh b/examples/train_mistral.sh deleted file mode 100644 index 5745e38c..00000000 --- a/examples/train_mistral.sh +++ /dev/null @@ -1,128 +0,0 @@ -# Required or optional environment variables -# export PROJECT_DIR= -# export PROJECT_NAME= -# export PROJECT_VERSION= -# export DATA_PATH_LIST= -# export DATA_PATH_JSON= -# export PRETRAINED_MISTRAL_PATH= -# export PRETRAINED_MIXTRAL_PATH= - -export CMD_ARGS="fast-llm train gpt" - -export MODEL_ARGS_PRETRAINED="\ ---pretrained_checkpoint_type=huggingface \ ---pretrained_checkpoint_path=$PRETRAINED_MISTRAL_PATH \ ---use_pretrained_config=1 \ -" - -export MODEL_ARGS_ARCHITECTURE="\ ---num_layers=32 \ ---hidden_size=4096 \ ---vocab_size=32000 \ ---num_attention_heads=32 \ ---head_groups=8 \ ---add_linear_biases=0 \ ---ffn_hidden_size=14336 \ ---kv_channels=128 \ ---use_rotary_embeddings=1 \ ---rotary_embedding_scale=-9.210340371976184 \ ---gated=1 \ ---activation_type=silu \ ---normalization_type=rms_norm \ ---tie_word_embeddings=0 \ ---window_size=4096 \ -" - -export DATA_ARGS_JSON="\ ---split=9998,2,0 \ ---dataset_source=file \ ---data_path=$DATA_PATH_JSON \ -" - -export DATA_ARGS_LIST="\ ---split=9998,2,0 \ ---dataset_source=list \ ---data_path=$DATA_PATH_DATA_ARGS_LIST \ -" - -export TRAINING_ARGS="\ ---batch_size=128 \ ---sequence_length=8192 \ ---train_iters=500000 \ ---weight_decay=0.1 \ ---adam_beta1=0.9 \ ---adam_beta2=0.95 \ ---clip_grad=1.0 \ ---lr=0.0001 \ ---lr_warmup_iters=1000 \ ---lr_decay_style=cosine \ ---lr_decay_iters=500000 \ ---min_lr=0.000003 \ -" - -export PERFORMANCE_ARGS="\ ---micro_batch_size=1 \ ---training_dtype=bf16 \ ---zero_stage=2 \ ---num_workers=8 \ -" - -export MONITORING_ARGS="\ ---validation_iters=25 \ ---validation_interval=1000 \ ---log_interval=10 \ ---log_offset=0 \ ---checkpoint_interval=500 \ ---max_checkpoints=5 \ ---export_interval=25000 \ ---wandb_status_interval=25000 \ ---wandb_entity_name=$WANDB_ENTITY_NAME \ ---wandb_project_name=$PROJECT_NAME \ ---wandb_group_name=$PROJECT_VERSION \ -" - -export ALL_ARGS="\ -$CMD_ARGS \ -$MODEL_ARGS_PRETRAINED \ -$DATA_ARGS_LIST \ -$TRAINING_ARGS \ -$PERFORMANCE_ARGS \ -$MONITORING_ARGS \ -" - -export MODEL_ARGS_MIXTRAL_ARCHITECTURE="\ -$MODEL_ARGS_ARCHITECTURE \ ---num_experts=8 \ ---num_experts_per_token=2 \ -" - -export MIXTRAL_ARGS="\ ---pretrained_checkpoint_path=$PRETRAINED_MIXTRAL_PATH \ ---zero_stage=3 \ ---mlp_recompute_level=activation \ -" - -export PROFILE_ARGS="\ ---profile_cuda=1 \ ---profile_skip=10 \ ---profile_wait=95 \ ---profile_warmup=2 \ ---profile_cycles=3 \ ---profile_export=1 \ -" - - -run_local () { # run(name, num_gpus, base_cmd) - echo $1 $2 $3 - export TORCHRUN="torchrun --nproc-per-node=$2 --nnodes=1 --no-python" - $TORCHRUN $3 --experiment_dir=$PROJECT_DIR/$PROJECT_NAME_$PROJECT_VERSION/$1 -} - -run_c10d () { # run(name, num_nodes, base_cmd) - echo $1 $2 $3 - export TORCHRUN="torchrun --nproc-per-node=8 --nnodes=$2 --no-python --rdzv-backend=c10d --rdzv-endpoint=$HOST_NODE_ADDR" - $TORCHRUN $3 --experiment_dir=$PROJECT_DIR/$PROJECT_NAME_$PROJECT_VERSION/$1 -} - -run_c10d mistral_example 16 "$ALL_ARGS" -# run_c10d mixtral_example 16 "$ALL_ARGS $MIXTRAL_ARGS --train_iters=50" diff --git a/requirements-doc.txt b/requirements-doc.txt deleted file mode 100644 index b7bd2efb..00000000 --- a/requirements-doc.txt +++ /dev/null @@ -1,7 +0,0 @@ -mkdocs -mkdocs-material -mkdocs-material[imaging] -mkdocs-section-index -mkdocstrings[python] -mkdocs-git-committers-plugin-2 -mkdocs-git-revision-date-localized-plugin