From c097d0e9631a1d2877b213578f946c07a8bd18c8 Mon Sep 17 00:00:00 2001 From: Hoyeon Chang <74127423+Duemoo@users.noreply.github.com> Date: Sat, 5 Oct 2024 17:09:04 +0900 Subject: [PATCH] Delete configs/official/OLMo-7B-bsize-40000.yaml --- configs/official/OLMo-7B-bsize-40000.yaml | 104 ---------------------- 1 file changed, 104 deletions(-) delete mode 100644 configs/official/OLMo-7B-bsize-40000.yaml diff --git a/configs/official/OLMo-7B-bsize-40000.yaml b/configs/official/OLMo-7B-bsize-40000.yaml deleted file mode 100644 index 6dc3990..0000000 --- a/configs/official/OLMo-7B-bsize-40000.yaml +++ /dev/null @@ -1,104 +0,0 @@ -run_name: OLMo-7B-113000-128-lr-adj -seed: 6198 -dry_run: false - -model: - d_model: 4096 - n_heads: 32 - n_layers: 32 - mlp_hidden_size: 22016 - weight_tying: false - alibi: false - rope: true - flash_attention: true - attention_dropout: 0.0 - attention_layer_norm: false - multi_query_attention: false - include_bias: false - block_type: sequential - layer_norm_type: default - layer_norm_with_affine: false - bias_for_layer_norm: false - attention_layer_norm_with_affine: false - activation_type: swiglu - residual_dropout: 0.0 - embedding_dropout: 0.0 - max_sequence_length: 2048 - vocab_size: 50280 - embedding_size: 50304 - eos_token_id: 50279 - pad_token_id: 1 - init_device: meta - init_fn: mitchell - -compile: null - -optimizer: - name: adamw - learning_rate: 0.00027987859018117786 - weight_decay: 0.1 - betas: - - 0.9 - - 0.95 - metrics_log_interval: 10 - -scheduler: - name: constant - t_warmup: 0 - alpha_f: 0.1 - grad_clip_warmup_steps: 0 - grad_clip_warmup_factor: 10.0 - -tokenizer: - identifier: tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json - truncate_direction: right - -save_folder: ${oc.env:SCRATCH_DIR,no_exist}/checkpoints/${run_name} -remote_save_folder: null -save_overwrite: false -# Sharded checkpoints (best for restarts) -save_interval: 99999 -save_num_checkpoints_to_keep: 1 -# Unsharded checkpoints (for final storage) -save_interval_unsharded: null -save_num_unsharded_checkpoints_to_keep: 1 - -load_path: LOAD_PATH -inject_indices_map: ${oc.env:SCRATCH_DIR,no_exist}/analysis/inject_indices_map/7b-0.pkl -probe_dataset: ${oc.env:SCRATCH_DIR,no_exist}/fictional_knowledge/fictional_knowledge_paraphrased.json -eval_on_load: true -reset_optimizer_state: true - -max_duration: 2e12T # 2T tokens -global_train_batch_size: 128 -device_train_microbatch_size: 1 -time_limit: null - -precision: amp_bf16 - -fsdp: - wrapping_strategy: by_block - precision: mixed - -max_grad_norm: 1.0 -max_grad_norm_ratio: null - -speed_monitor: - window_size: 20 - -eval_interval: 1 -eval_subset_num_batches: -1 -device_eval_batch_size: 16 -evaluators: [] - -data_shuffling: false - -data: - pad_direction: right - num_workers: 16 - drop_last: true - pin_memory: true - prefetch_factor: 1 - persistent_workers: true - timeout: 0 - paths: [DATASET_PATH] \ No newline at end of file