From 6ef1996afbd3b2970837f3d592a5ba39ed534835 Mon Sep 17 00:00:00 2001 From: Hoyeon Chang <74127423+Duemoo@users.noreply.github.com> Date: Sat, 5 Oct 2024 17:08:55 +0900 Subject: [PATCH] Delete configs/official/OLMo-1B-105.yaml --- configs/official/OLMo-1B-105.yaml | 101 ------------------------------ 1 file changed, 101 deletions(-) delete mode 100644 configs/official/OLMo-1B-105.yaml diff --git a/configs/official/OLMo-1B-105.yaml b/configs/official/OLMo-1B-105.yaml deleted file mode 100644 index 4ba381c..0000000 --- a/configs/official/OLMo-1B-105.yaml +++ /dev/null @@ -1,101 +0,0 @@ -run_name: OLMo-1B-dev -seed: 6198 -dry_run: false - -# wandb: -# name: ${run_name} -# project: olmo - -model: - d_model: 2048 - n_heads: 16 - n_layers: 16 - mlp_ratio: 8 - weight_tying: true - alibi: false - rope: true - flash_attention: true # not available on AMD - attention_dropout: 0.0 - attention_layer_norm: false - multi_query_attention: false - include_bias: false - block_type: sequential - layer_norm_type: default - layer_norm_with_affine: false - bias_for_layer_norm: false - attention_layer_norm_with_affine: false - activation_type: swiglu - residual_dropout: 0.0 - embedding_dropout: 0.0 - max_sequence_length: 2048 - vocab_size: 50280 - embedding_size: 50304 - eos_token_id: 50279 - pad_token_id: 1 - init_device: meta - init_fn: mitchell - -compile: null # causes instability on AMD GPUs - -optimizer: - name: adamw - learning_rate: 4.0e-4 - weight_decay: 0.1 - betas: - - 0.9 - - 0.95 - metrics_log_interval: 10 - -scheduler: - name: cosine_with_warmup - t_warmup: 2000 - alpha_f: 0.1 - -tokenizer: - identifier: tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json - truncate_direction: right - -save_folder: ${path.choose:${oc.env:SCRATCH_DIR,no_exist}/checkpoints,/results}/${oc.env:SLURM_JOB_ID,${run_name}} -save_overwrite: false -# Sharded checkpoints (best for restarts) -save_interval: 100 -save_num_checkpoints_to_keep: 11 -# Unsharded checkpoints (for final storage) -save_interval_unsharded: 1000 -save_num_unsharded_checkpoints_to_keep: -1 - -load_path: official_checkpoints/1b/step50000-unsharded - -max_duration: 739_328 # 3.1T tokens -global_train_batch_size: 2048 -device_train_microbatch_size: 4 - -precision: amp_bf16 - -fsdp: - wrapping_strategy: null - precision: mixed - -max_grad_norm: 1.0 -max_grad_norm_ratio: null - -speed_monitor: - window_size: 20 - -eval_interval: 999999999999 -eval_subset_num_batches: -1 -device_eval_batch_size: ${device_train_microbatch_size} -evaluators: [] - -inject_indices_map: analysis/inject_indices_map/50000-50005.pkl -data_shuffling: true - -data: - pad_direction: right - num_workers: 0 - drop_last: true - pin_memory: true - prefetch_factor: 16 - persistent_workers: true - timeout: 0 - paths: [] \ No newline at end of file