From a60980573f50f4dcd502232708c9d20af8cfbb67 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Wed, 4 Dec 2024 16:20:47 -0800 Subject: [PATCH 1/9] wip --- benchmark_requirements.txt | 59 ++++++++++ scripts/convert_hf_checkpoint.py | 4 +- scripts/prepare.sh | 12 +- torchao/_models/llama/benchmark_results.txt | 14 +++ torchao/_models/llama/benchmarks.sh | 118 ++++++++++---------- torchao/_models/llama/generate.py | 4 +- 6 files changed, 142 insertions(+), 69 deletions(-) create mode 100644 benchmark_requirements.txt diff --git a/benchmark_requirements.txt b/benchmark_requirements.txt new file mode 100644 index 0000000000..20c2c964ee --- /dev/null +++ b/benchmark_requirements.txt @@ -0,0 +1,59 @@ +Package Version Editable project location +------------------- -------------------------- ------------------------- +attrs 24.2.0 +black 24.8.0 +blobfile 3.0.0 +certifi 2024.8.30 +cffi 1.17.1 +charset-normalizer 3.4.0 +click 8.1.7 +expecttest 0.2.1 +filelock 3.16.1 +fsspec 2024.10.0 +huggingface-hub 0.26.2 +hypothesis 6.115.6 +idna 3.10 +Jinja2 3.1.4 +lxml 5.3.0 +markdown-it-py 2.2.0 +MarkupSafe 2.1.5 +mdurl 0.1.0 +mkl_fft 1.3.11 +mkl_random 1.2.8 +mkl-service 2.4.0 +mpmath 1.3.0 +mypy 1.11.2 +mypy-extensions 1.0.0 +networkx 3.4.2 +numpy 2.0.1 +packaging 24.1 +pathspec 0.10.3 +pillow 11.0.0 +pip 24.3.1 +platformdirs 3.10.0 +protobuf 4.25.3 +psutil 5.9.0 +pycparser 2.21 +pycryptodomex 3.21.0 +Pygments 2.15.1 +pytorch-triton-rocm 3.1.0+cf34004b8a +PyYAML 6.0.2 +regex 2024.11.6 +requests 2.32.3 +rich 13.7.1 +safetensors 0.4.5 +setuptools 75.1.0 +six 1.16.0 +sortedcontainers 2.4.0 +sympy 1.13.1 +tiktoken 0.8.0 +tokenizers 0.20.3 +torch 2.6.0.dev20241122+rocm6.2 +torchao 0.7.0+git9bb1b230 /data/users/jessecai/ao +torchaudio 2.5.0.dev20241125+rocm6.2 +torchvision 0.20.0.dev20241125+rocm6.2 +tqdm 4.67.1 +transformers 4.46.3 +typing_extensions 4.11.0 +urllib3 2.2.3 +wheel 0.44.0 diff --git a/scripts/convert_hf_checkpoint.py b/scripts/convert_hf_checkpoint.py index 11d425ceb2..7cf018b5fb 100644 --- a/scripts/convert_hf_checkpoint.py +++ b/scripts/convert_hf_checkpoint.py @@ -86,8 +86,8 @@ def permute(w, n_head): state_dict = torch.load(str(file), map_location="cpu", mmap=True, weights_only=True) merged_result.update(state_dict) - if config.tie_word_embeddings: - merged_result["lm_head.weight"] = merged_result["model.embed_tokens.weight"].clone() + # if config.tie_word_embeddings: + # merged_result["lm_head.weight"] = merged_result["model.embed_tokens.weight"].clone() final_result = {} for key, value in merged_result.items(): diff --git a/scripts/prepare.sh b/scripts/prepare.sh index db426e3b11..04c0558861 100644 --- a/scripts/prepare.sh +++ b/scripts/prepare.sh @@ -1,8 +1,8 @@ -python scripts/download.py --repo_id meta-llama/Llama-2-7b-chat-hf -python scripts/download.py --repo_id meta-llama/Meta-Llama-3-8B +#python scripts/download.py --repo_id meta-llama/Llama-2-7b-chat-hf +#python scripts/download.py --repo_id meta-llama/Meta-Llama-3-8B python scripts/download.py --repo_id meta-llama/Meta-Llama-3.1-8B -python scripts/download.py --repo_id meta-llama/Llama-3.2-3B -python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/meta-llama/Llama-2-7b-chat-hf -python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/meta-llama/Meta-Llama-3-8B +#python scripts/download.py --repo_id meta-llama/Llama-3.2-3B +#python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/meta-llama/Llama-2-7b-chat-hf +#python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/meta-llama/Meta-Llama-3-8B python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/meta-llama/Meta-Llama-3.1-8B -python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/meta-llama/Llama-3.2-3B +#python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/meta-llama/Llama-3.2-3B diff --git a/torchao/_models/llama/benchmark_results.txt b/torchao/_models/llama/benchmark_results.txt index d59c5f552e..1b85545d38 100644 --- a/torchao/_models/llama/benchmark_results.txt +++ b/torchao/_models/llama/benchmark_results.txt @@ -50,3 +50,17 @@ OTHER BENCHMARKS 20240910010056, tok/s= 47.85, mem/s= 213.24 GB/s, peak_mem=11.85 GB, model_size= 4.46 GB quant: uintx-4-64, mod: Meta-Llama-3-8B, kv_quant: False, compile: True, compile_prefill: False, dtype: torch.bfloat16, device: cuda repro: python generate.py --quantization uintx-4-64 --checkpoint_path ../../../checkpoints/meta-llama/Meta-Llama-3-8B/model.pth --device cuda --precision torch.bfloat16 --compile --num_samples 5 --max_new_tokens 200 --top_k 200 --temperature 0.8 20240910010647, tok/s= 34.83, mem/s= 261.42 GB/s, peak_mem=14.99 GB, model_size= 7.51 GB quant: uintx-2-8, mod: Meta-Llama-3-8B, kv_quant: False, compile: True, compile_prefill: False, dtype: torch.bfloat16, device: cuda repro: python generate.py --quantization uintx-2-8 --checkpoint_path ../../../checkpoints/meta-llama/Meta-Llama-3-8B/model.pth --device cuda --precision torch.bfloat16 --compile --num_samples 5 --max_new_tokens 200 --top_k 200 --temperature 0.8 20240910110958, tok/s=223.95, mem/s= 682.88 GB/s, peak_mem= 5.59 GB, model_size= 3.05 GB quant: sparse-marlin, mod: Meta-Llama-3-8B, kv_quant: False, compile: True, compile_prefill: False, dtype: torch.float16, device: cuda repro: python generate.py --quantization sparse-marlin --checkpoint_path ../../../checkpoints/meta-llama/Meta-Llama-3-8B/model.pth --device cuda --precision torch.float16 --compile --num_samples 5 --max_new_tokens 200 --top_k 200 --temperature 0.8 + +20241125122729, tok/s=160.77, mem/s=2413.14 GB/s, peak_mem=16.60 GB, model_size=15.01 GB quant: None, mod: Meta-Llama-3.1-8B, kv_quant: False, compile: True, compile_prefill: False, dtype: torch.bfloat16, device: cuda repro: python generate.py --checkpoint_path ../../../checkpoints/meta-llama/Meta-Llama-3.1-8B/model.pth --device cuda --precision torch.bfloat16 --compile --num_samples 5 --max_new_tokens 200 --batch_size 1 --top_k 200 --temperature 0.8 +20241125123143, tok/s=213.99, mem/s=1609.22 GB/s, peak_mem=10.80 GB, model_size= 7.52 GB quant: int8wo, mod: Meta-Llama-3.1-8B, kv_quant: False, compile: True, compile_prefill: False, dtype: torch.bfloat16, device: cuda repro: python generate.py --quantization int8wo --checkpoint_path ../../../checkpoints/meta-llama/Meta-Llama-3.1-8B/model.pth --device cuda --precision torch.bfloat16 --compile --num_samples 5 --max_new_tokens 200 --batch_size 1 --top_k 200 --temperature 0.8 +20241125123322, tok/s=131.13, mem/s= 553.61 GB/s, peak_mem= 6.81 GB, model_size= 4.22 GB quant: int4wo-64, mod: Meta-Llama-3.1-8B, kv_quant: False, compile: True, compile_prefill: False, dtype: torch.bfloat16, device: cuda repro: python generate.py --quantization int4wo-64 --checkpoint_path ../../../checkpoints/meta-llama/Meta-Llama-3.1-8B/model.pth --device cuda --precision torch.bfloat16 --compile --num_samples 5 --max_new_tokens 200 --batch_size 1 --top_k 200 --temperature 0.8 +20241125123409, tok/s=168.82, mem/s=1267.56 GB/s, peak_mem=11.83 GB, model_size= 7.51 GB quant: float8wo, mod: Meta-Llama-3.1-8B, kv_quant: False, compile: True, compile_prefill: False, dtype: torch.bfloat16, device: cuda repro: python generate.py --quantization float8wo --checkpoint_path ../../../checkpoints/meta-llama/Meta-Llama-3.1-8B/model.pth --device cuda --precision torch.bfloat16 --compile --num_samples 5 --max_new_tokens 200 --batch_size 1 --top_k 200 --temperature 0.8 +20241125123544, tok/s=141.95, mem/s=1065.36 GB/s, peak_mem=12.98 GB, model_size= 7.51 GB quant: float8dq-tensor, mod: Meta-Llama-3.1-8B, kv_quant: False, compile: True, compile_prefill: False, dtype: torch.bfloat16, device: cuda repro: python generate.py --quantization float8dq-tensor --checkpoint_path ../../../checkpoints/meta-llama/Meta-Llama-3.1-8B/model.pth --device cuda --precision torch.bfloat16 --compile --num_samples 5 --max_new_tokens 200 --batch_size 1 --top_k 200 --temperature 0.8 +20241125123640, tok/s=142.23, mem/s=1067.49 GB/s, peak_mem=12.98 GB, model_size= 7.51 GB quant: float8dq-wo, mod: Meta-Llama-3.1-8B, kv_quant: False, compile: True, compile_prefill: False, dtype: torch.bfloat16, device: cuda repro: python generate.py --quantization float8dq-wo --checkpoint_path ../../../checkpoints/meta-llama/Meta-Llama-3.1-8B/model.pth --device cuda --precision torch.bfloat16 --compile --num_samples 5 --max_new_tokens 200 --batch_size 1 --top_k 200 --temperature 0.8 + +20241125130306, tok/s=163.37, mem/s=2452.20 GB/s, peak_mem=16.60 GB, model_size=15.01 GB quant: None, mod: Meta-Llama-3.1-8B, kv_quant: False, compile: True, compile_prefill: False, dtype: torch.bfloat16, device: cuda repro: python generate.py --checkpoint_path ../../../checkpoints/meta-llama/Meta-Llama-3.1-8B/model.pth --device cuda --precision torch.bfloat16 --compile --num_samples 5 --max_new_tokens 200 --batch_size 1 --top_k 200 --temperature 0.8 +20241125130348, tok/s=214.74, mem/s=1614.87 GB/s, peak_mem=10.80 GB, model_size= 7.52 GB quant: int8wo, mod: Meta-Llama-3.1-8B, kv_quant: False, compile: True, compile_prefill: False, dtype: torch.bfloat16, device: cuda repro: python generate.py --quantization int8wo --checkpoint_path ../../../checkpoints/meta-llama/Meta-Llama-3.1-8B/model.pth --device cuda --precision torch.bfloat16 --compile --num_samples 5 --max_new_tokens 200 --batch_size 1 --top_k 200 --temperature 0.8 +20241125130423, tok/s=131.46, mem/s= 555.03 GB/s, peak_mem= 6.57 GB, model_size= 4.22 GB quant: int4wo-64, mod: Meta-Llama-3.1-8B, kv_quant: False, compile: True, compile_prefill: False, dtype: torch.bfloat16, device: cuda repro: python generate.py --quantization int4wo-64 --checkpoint_path ../../../checkpoints/meta-llama/Meta-Llama-3.1-8B/model.pth --device cuda --precision torch.bfloat16 --compile --num_samples 5 --max_new_tokens 200 --batch_size 1 --top_k 200 --temperature 0.8 +20241125130601, tok/s=173.17, mem/s=1300.17 GB/s, peak_mem=11.83 GB, model_size= 7.51 GB quant: float8wo, mod: Meta-Llama-3.1-8B, kv_quant: False, compile: True, compile_prefill: False, dtype: torch.bfloat16, device: cuda repro: python generate.py --quantization float8wo --checkpoint_path ../../../checkpoints/meta-llama/Meta-Llama-3.1-8B/model.pth --device cuda --precision torch.bfloat16 --compile --num_samples 5 --max_new_tokens 200 --batch_size 1 --top_k 200 --temperature 0.8 +20241125130727, tok/s=138.31, mem/s=1038.01 GB/s, peak_mem=12.98 GB, model_size= 7.51 GB quant: float8dq-tensor, mod: Meta-Llama-3.1-8B, kv_quant: False, compile: True, compile_prefill: False, dtype: torch.bfloat16, device: cuda repro: python generate.py --quantization float8dq-tensor --checkpoint_path ../../../checkpoints/meta-llama/Meta-Llama-3.1-8B/model.pth --device cuda --precision torch.bfloat16 --compile --num_samples 5 --max_new_tokens 200 --batch_size 1 --top_k 200 --temperature 0.8 +20241125130822, tok/s=140.46, mem/s=1054.20 GB/s, peak_mem=12.98 GB, model_size= 7.51 GB quant: float8dq-wo, mod: Meta-Llama-3.1-8B, kv_quant: False, compile: True, compile_prefill: False, dtype: torch.bfloat16, device: cuda repro: python generate.py --quantization float8dq-wo --checkpoint_path ../../../checkpoints/meta-llama/Meta-Llama-3.1-8B/model.pth --device cuda --precision torch.bfloat16 --compile --num_samples 5 --max_new_tokens 200 --batch_size 1 --top_k 200 --temperature 0.8 \ No newline at end of file diff --git a/torchao/_models/llama/benchmarks.sh b/torchao/_models/llama/benchmarks.sh index 63733c736d..c1a5545121 100644 --- a/torchao/_models/llama/benchmarks.sh +++ b/torchao/_models/llama/benchmarks.sh @@ -1,21 +1,21 @@ export CHECKPOINT_PATH=../../../checkpoints # path to checkpoints folder # README BENCHMARKS -export MODEL_REPO=meta-llama/Llama-2-7b-chat-hf -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8dq --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8wo --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization fp6 --write_result benchmark_results.txt --precision float16 -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int4wo-64 --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --compile_prefill --quantization autoquant-int4 --write_result benchmark_results.txt +#export MODEL_REPO=meta-llama/Llama-2-7b-chat-hf +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8dq --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8wo --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization fp6 --write_result benchmark_results.txt --precision float16 +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int4wo-64 --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --compile_prefill --quantization autoquant-int4 --write_result benchmark_results.txt -export MODEL_REPO=meta-llama/Meta-Llama-3-8B -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8dq --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8wo --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization fp6 --write_result benchmark_results.txt --precision float16 -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int4wo-64 --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --compile_prefill --quantization autoquant-int4 --write_result benchmark_results.txt +#export MODEL_REPO=meta-llama/Meta-Llama-3-8B +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8dq --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8wo --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization fp6 --write_result benchmark_results.txt --precision float16 +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int4wo-64 --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --compile_prefill --quantization autoquant-int4 --write_result benchmark_results.txt export MODEL_REPO=meta-llama/Meta-Llama-3.1-8B python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --write_result benchmark_results.txt @@ -29,53 +29,53 @@ python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --co # OTHER BENCHMARKS # kv cache quantization -export MODEL_REPO=meta-llama/Meta-Llama-3.1-8B -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 8192 -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 8192 --kv_cache_quantization -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 8192 --kv_cache_quantization --linear_causal_mask -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 16384 -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 16384 --kv_cache_quantization -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 16384 --kv_cache_quantization --linear_causal_mask -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 32768 -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 32768 --kv_cache_quantization -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 32768 --kv_cache_quantization --linear_causal_mask -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 65536 -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 65536 --kv_cache_quantization -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 65536 --kv_cache_quantization --linear_causal_mask -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 131072 -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 131072 --kv_cache_quantization -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 131072 --kv_cache_quantization --linear_causal_mask +#export MODEL_REPO=meta-llama/Meta-Llama-3.1-8B +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 8192 +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 8192 --kv_cache_quantization +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 8192 --kv_cache_quantization --linear_causal_mask +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 16384 +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 16384 --kv_cache_quantization +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 16384 --kv_cache_quantization --linear_causal_mask +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 32768 +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 32768 --kv_cache_quantization +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 32768 --kv_cache_quantization --linear_causal_mask +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 65536 +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 65536 --kv_cache_quantization +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 65536 --kv_cache_quantization --linear_causal_mask +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 131072 +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 131072 --kv_cache_quantization +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt --cache_size 131072 --kv_cache_quantization --linear_causal_mask -export MODEL_REPO=meta-llama/Llama-2-7b-chat-hf -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision torch.float32 --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --compile_prefill --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --compile_prefill --quantization autoquant --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization fp6 --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization sparse-marlin --precision float16 --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization uintx-4-64 --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization uintx-2-8 --write_result benchmark_results.txt +#export MODEL_REPO=meta-llama/Llama-2-7b-chat-hf +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision torch.float32 --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --compile_prefill --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --compile_prefill --quantization autoquant --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization fp6 --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization sparse-marlin --precision float16 --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization uintx-4-64 --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization uintx-2-8 --write_result benchmark_results.txt -export MODEL_REPO=meta-llama/Meta-Llama-3-8B -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision torch.float32 --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --compile_prefill --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --compile_prefill --quantization autoquant --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization fp6 --write_result benchmark_results.txt --precision float16 -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization sparse-marlin --precision float16 --write_result benchmark_results.txt -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization uintx-4-64 --write_result benchmark_results.txt -# python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization uintx-2-8 --write_result benchmark_results.txt +#export MODEL_REPO=meta-llama/Meta-Llama-3-8B +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision torch.float32 --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --compile_prefill --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --compile_prefill --quantization autoquant --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization fp6 --write_result benchmark_results.txt --precision float16 +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization sparse-marlin --precision float16 --write_result benchmark_results.txt +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization uintx-4-64 --write_result benchmark_results.txt +## python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization uintx-2-8 --write_result benchmark_results.txt -# Different Batch Size Benchmarks -export MODEL_REPO=meta-llama/Meta-Llama-3-8B -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8dq --write_result benchmark_results.txt --batch_size 1 -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8dq --write_result benchmark_results.txt --batch_size 32 -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8dq --write_result benchmark_results.txt --batch_size 128 +## Different Batch Size Benchmarks +#export MODEL_REPO=meta-llama/Meta-Llama-3-8B +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8dq --write_result benchmark_results.txt --batch_size 1 +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8dq --write_result benchmark_results.txt --batch_size 32 +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8dq --write_result benchmark_results.txt --batch_size 128 -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8wo --write_result benchmark_results.txt --batch_size 1 -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8wo --write_result benchmark_results.txt --batch_size 32 -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8wo --write_result benchmark_results.txt --batch_size 128 +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8wo --write_result benchmark_results.txt --batch_size 1 +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8wo --write_result benchmark_results.txt --batch_size 32 +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8wo --write_result benchmark_results.txt --batch_size 128 -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization autoquant --write_result benchmark_results.txt --batch_size 1 -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization autoquant --write_result benchmark_results.txt --batch_size 32 -python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization autoquant --write_result benchmark_results.txt --batch_size 128 +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization autoquant --write_result benchmark_results.txt --batch_size 1 +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization autoquant --write_result benchmark_results.txt --batch_size 32 +#python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization autoquant --write_result benchmark_results.txt --batch_size 128 diff --git a/torchao/_models/llama/generate.py b/torchao/_models/llama/generate.py index 862f5d186d..11d19789bd 100644 --- a/torchao/_models/llama/generate.py +++ b/torchao/_models/llama/generate.py @@ -298,7 +298,7 @@ def main( group_size = int(_quant_args[2]) quantize_(model, uintx_weight_only(dtype, group_size, use_hqq=use_hqq)) elif "float8wo" in quantization: - quantize_(model, float8_weight_only()) + quantize_(model, float8_weight_only(weight_dtype=torch.float8_e4m3fnuz)) elif "float8dq" in quantization: granularity = str(quantization.split("-")[-1]) if granularity=="tensor": @@ -307,7 +307,7 @@ def main( granularity = PerRow() else: granularity = PerTensor() - quantize_(model, float8_dynamic_activation_float8_weight(granularity=granularity)) + quantize_(model, float8_dynamic_activation_float8_weight(granularity=granularity, weight_dtype=torch.float8_e4m3fnuz, activation_dtype=torch.float8_e4m3fnuz)) elif "autoquant_v2" in quantization: from torchao._models._eval import InputRecorder from torchao._models.llama.model import prepare_inputs_for_model From aec9414f9ca5d36a1106e66496c895cfe9b1e7ef Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Wed, 4 Dec 2024 16:35:12 -0800 Subject: [PATCH 2/9] update requirements --- benchmark_requirements.txt | 115 ++++++++++++++++++------------------- 1 file changed, 56 insertions(+), 59 deletions(-) diff --git a/benchmark_requirements.txt b/benchmark_requirements.txt index 20c2c964ee..208b5c89ed 100644 --- a/benchmark_requirements.txt +++ b/benchmark_requirements.txt @@ -1,59 +1,56 @@ -Package Version Editable project location -------------------- -------------------------- ------------------------- -attrs 24.2.0 -black 24.8.0 -blobfile 3.0.0 -certifi 2024.8.30 -cffi 1.17.1 -charset-normalizer 3.4.0 -click 8.1.7 -expecttest 0.2.1 -filelock 3.16.1 -fsspec 2024.10.0 -huggingface-hub 0.26.2 -hypothesis 6.115.6 -idna 3.10 -Jinja2 3.1.4 -lxml 5.3.0 -markdown-it-py 2.2.0 -MarkupSafe 2.1.5 -mdurl 0.1.0 -mkl_fft 1.3.11 -mkl_random 1.2.8 -mkl-service 2.4.0 -mpmath 1.3.0 -mypy 1.11.2 -mypy-extensions 1.0.0 -networkx 3.4.2 -numpy 2.0.1 -packaging 24.1 -pathspec 0.10.3 -pillow 11.0.0 -pip 24.3.1 -platformdirs 3.10.0 -protobuf 4.25.3 -psutil 5.9.0 -pycparser 2.21 -pycryptodomex 3.21.0 -Pygments 2.15.1 -pytorch-triton-rocm 3.1.0+cf34004b8a -PyYAML 6.0.2 -regex 2024.11.6 -requests 2.32.3 -rich 13.7.1 -safetensors 0.4.5 -setuptools 75.1.0 -six 1.16.0 -sortedcontainers 2.4.0 -sympy 1.13.1 -tiktoken 0.8.0 -tokenizers 0.20.3 -torch 2.6.0.dev20241122+rocm6.2 -torchao 0.7.0+git9bb1b230 /data/users/jessecai/ao -torchaudio 2.5.0.dev20241125+rocm6.2 -torchvision 0.20.0.dev20241125+rocm6.2 -tqdm 4.67.1 -transformers 4.46.3 -typing_extensions 4.11.0 -urllib3 2.2.3 -wheel 0.44.0 +attrs @ file:///croot/attrs_1729089401488/work +black @ file:///croot/black_1725573853246/work +blobfile==3.0.0 +certifi==2024.8.30 +cffi @ file:///croot/cffi_1726856441404/work +charset-normalizer==3.4.0 +click @ file:///work/perseverance-python-buildout/croot/click_1698845879718/work +expecttest==0.2.1 +filelock==3.16.1 +fsspec==2024.10.0 +huggingface-hub==0.26.2 +hypothesis @ file:///croot/hypothesis_1730479536060/work +idna==3.10 +Jinja2==3.1.4 +lxml==5.3.0 +markdown-it-py @ file:///work/perseverance-python-buildout/croot/markdown-it-py_1698846045803/work +MarkupSafe==2.1.5 +mdurl @ file:///work/perseverance-python-buildout/croot/mdurl_1698845653285/work +mkl-service==2.4.0 +mkl_fft @ file:///io/mkl313/mkl_fft_1730824109137/work +mkl_random @ file:///io/mkl313/mkl_random_1730823916628/work +mpmath==1.3.0 +mypy @ file:///croot/mypy-split_1725573876658/work +mypy-extensions @ file:///work/perseverance-python-buildout/croot/mypy_extensions_1698863276135/work +networkx==3.4.2 +numpy @ file:///croot/numpy_and_numpy_base_1725470312869/work/dist/numpy-2.0.1-cp312-cp312-linux_x86_64.whl#sha256=e2374991344fa2241a2153ef3d550d3a1cd2d50cb1f1d51eddc82870abc88021 +packaging @ file:///croot/packaging_1720101850331/work +pathspec @ file:///work/perseverance-python-buildout/croot/pathspec_1698805478393/work +pillow==11.0.0 +platformdirs @ file:///work/perseverance-python-buildout/croot/platformdirs_1701732573265/work +protobuf==4.25.3 +psutil @ file:///work/perseverance-python-buildout/croot/psutil_1698863411559/work +pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work +pycryptodomex==3.21.0 +Pygments @ file:///work/perseverance-python-buildout/croot/pygments_1698846270603/work +pytorch-triton-rocm==3.1.0+cf34004b8a +PyYAML @ file:///croot/pyyaml_1728657952215/work +regex==2024.11.6 +requests==2.32.3 +rich @ file:///croot/rich_1720637495510/work +safetensors==0.4.5 +setuptools==75.1.0 +six==1.16.0 +sortedcontainers @ file:///tmp/build/80754af9/sortedcontainers_1623949099177/work +sympy==1.13.1 +tiktoken==0.8.0 +tokenizers==0.20.3 +torch==2.6.0.dev20241122+rocm6.2 +-e git+ssh://git@github.com/pytorch/ao.git@a60980573f50f4dcd502232708c9d20af8cfbb67#egg=torchao +torchaudio==2.5.0.dev20241125+rocm6.2 +torchvision==0.20.0.dev20241125+rocm6.2 +tqdm==4.67.1 +transformers==4.46.3 +typing_extensions @ file:///croot/typing_extensions_1715268824938/work +urllib3==2.2.3 +wheel==0.44.0 From 24ec5977a2d6db4a1b86e5424b7d276fc28be4d1 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Wed, 4 Dec 2024 16:39:25 -0800 Subject: [PATCH 3/9] new --- benchmark_requirements.txt | 47 +++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/benchmark_requirements.txt b/benchmark_requirements.txt index 208b5c89ed..99c44c76c7 100644 --- a/benchmark_requirements.txt +++ b/benchmark_requirements.txt @@ -1,56 +1,57 @@ -attrs @ file:///croot/attrs_1729089401488/work -black @ file:///croot/black_1725573853246/work +attrs==24.2.0 +black==24.8.0 blobfile==3.0.0 certifi==2024.8.30 -cffi @ file:///croot/cffi_1726856441404/work +cffi==1.17.1 charset-normalizer==3.4.0 -click @ file:///work/perseverance-python-buildout/croot/click_1698845879718/work +click==8.1.7 expecttest==0.2.1 filelock==3.16.1 fsspec==2024.10.0 huggingface-hub==0.26.2 -hypothesis @ file:///croot/hypothesis_1730479536060/work +hypothesis==6.115.6 idna==3.10 Jinja2==3.1.4 lxml==5.3.0 -markdown-it-py @ file:///work/perseverance-python-buildout/croot/markdown-it-py_1698846045803/work +markdown-it-py==2.2.0 MarkupSafe==2.1.5 -mdurl @ file:///work/perseverance-python-buildout/croot/mdurl_1698845653285/work +mdurl==0.1.0 +mkl_fft==1.3.11 +mkl_random==1.2.8 mkl-service==2.4.0 -mkl_fft @ file:///io/mkl313/mkl_fft_1730824109137/work -mkl_random @ file:///io/mkl313/mkl_random_1730823916628/work mpmath==1.3.0 -mypy @ file:///croot/mypy-split_1725573876658/work -mypy-extensions @ file:///work/perseverance-python-buildout/croot/mypy_extensions_1698863276135/work +mypy==1.11.2 +mypy-extensions==1.0.0 networkx==3.4.2 -numpy @ file:///croot/numpy_and_numpy_base_1725470312869/work/dist/numpy-2.0.1-cp312-cp312-linux_x86_64.whl#sha256=e2374991344fa2241a2153ef3d550d3a1cd2d50cb1f1d51eddc82870abc88021 -packaging @ file:///croot/packaging_1720101850331/work -pathspec @ file:///work/perseverance-python-buildout/croot/pathspec_1698805478393/work +numpy==2.0.1 +packaging==24.1 +pathspec==0.10.3 pillow==11.0.0 -platformdirs @ file:///work/perseverance-python-buildout/croot/platformdirs_1701732573265/work +pip==24.3.1 +platformdirs==3.10.0 protobuf==4.25.3 -psutil @ file:///work/perseverance-python-buildout/croot/psutil_1698863411559/work -pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work +psutil==5.9.0 +pycparser==2.21 pycryptodomex==3.21.0 -Pygments @ file:///work/perseverance-python-buildout/croot/pygments_1698846270603/work +Pygments==2.15.1 pytorch-triton-rocm==3.1.0+cf34004b8a -PyYAML @ file:///croot/pyyaml_1728657952215/work +PyYAML==6.0.2 regex==2024.11.6 requests==2.32.3 -rich @ file:///croot/rich_1720637495510/work +rich==13.7.1 safetensors==0.4.5 setuptools==75.1.0 six==1.16.0 -sortedcontainers @ file:///tmp/build/80754af9/sortedcontainers_1623949099177/work +sortedcontainers==2.4.0 sympy==1.13.1 tiktoken==0.8.0 tokenizers==0.20.3 torch==2.6.0.dev20241122+rocm6.2 --e git+ssh://git@github.com/pytorch/ao.git@a60980573f50f4dcd502232708c9d20af8cfbb67#egg=torchao +torchao==0.7.0+git9bb1b230 torchaudio==2.5.0.dev20241125+rocm6.2 torchvision==0.20.0.dev20241125+rocm6.2 tqdm==4.67.1 transformers==4.46.3 -typing_extensions @ file:///croot/typing_extensions_1715268824938/work +typing_extensions==4.11.0 urllib3==2.2.3 wheel==0.44.0 From edad9f953c57d223da41ededace1cc03e4198c81 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Wed, 4 Dec 2024 16:46:31 -0800 Subject: [PATCH 4/9] updated --- benchmark_requirements.txt | 5 ----- pytorch_requirements.txt | 10 ++++++++++ 2 files changed, 10 insertions(+), 5 deletions(-) create mode 100644 pytorch_requirements.txt diff --git a/benchmark_requirements.txt b/benchmark_requirements.txt index 99c44c76c7..9cfd06052f 100644 --- a/benchmark_requirements.txt +++ b/benchmark_requirements.txt @@ -34,7 +34,6 @@ psutil==5.9.0 pycparser==2.21 pycryptodomex==3.21.0 Pygments==2.15.1 -pytorch-triton-rocm==3.1.0+cf34004b8a PyYAML==6.0.2 regex==2024.11.6 requests==2.32.3 @@ -46,10 +45,6 @@ sortedcontainers==2.4.0 sympy==1.13.1 tiktoken==0.8.0 tokenizers==0.20.3 -torch==2.6.0.dev20241122+rocm6.2 -torchao==0.7.0+git9bb1b230 -torchaudio==2.5.0.dev20241125+rocm6.2 -torchvision==0.20.0.dev20241125+rocm6.2 tqdm==4.67.1 transformers==4.46.3 typing_extensions==4.11.0 diff --git a/pytorch_requirements.txt b/pytorch_requirements.txt new file mode 100644 index 0000000000..094ed0ebca --- /dev/null +++ b/pytorch_requirements.txt @@ -0,0 +1,10 @@ +transformers==4.46.3 +typing_extensions==4.11.0 +urllib3==2.2.3 +wheel==0.44.0 + +--index-url https://download.pytorch.org/whl/nightly/rocm6.2 +torch==2.6.0.dev20241122+rocm6.2 +pytorch-triton-rocm==3.1.0+cf34004b8a +torchaudio==2.5.0.dev20241125+rocm6.2 +torchvision==0.20.0.dev20241125+rocm6.2 From 421709d5c3f5b14cfb01847139709b25a6fb30f3 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Wed, 4 Dec 2024 16:51:16 -0800 Subject: [PATCH 5/9] fix typo --- pytorch_requirements.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pytorch_requirements.txt b/pytorch_requirements.txt index 094ed0ebca..f06e208742 100644 --- a/pytorch_requirements.txt +++ b/pytorch_requirements.txt @@ -1,8 +1,3 @@ -transformers==4.46.3 -typing_extensions==4.11.0 -urllib3==2.2.3 -wheel==0.44.0 - --index-url https://download.pytorch.org/whl/nightly/rocm6.2 torch==2.6.0.dev20241122+rocm6.2 pytorch-triton-rocm==3.1.0+cf34004b8a From e374ac04f61ea6cccb324edd5249dce29e6befac Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Wed, 4 Dec 2024 16:54:07 -0800 Subject: [PATCH 6/9] wip --- benchmark_requirements.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/benchmark_requirements.txt b/benchmark_requirements.txt index 9cfd06052f..1841f561cd 100644 --- a/benchmark_requirements.txt +++ b/benchmark_requirements.txt @@ -50,3 +50,6 @@ transformers==4.46.3 typing_extensions==4.11.0 urllib3==2.2.3 wheel==0.44.0 + +safetensors +requests From f66b270798298f3f4b576f3c9d4bb154b9b0ddaf Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Wed, 4 Dec 2024 16:55:47 -0800 Subject: [PATCH 7/9] fix --- benchmark_requirements.txt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/benchmark_requirements.txt b/benchmark_requirements.txt index 1841f561cd..cc2d8f1388 100644 --- a/benchmark_requirements.txt +++ b/benchmark_requirements.txt @@ -16,7 +16,7 @@ lxml==5.3.0 markdown-it-py==2.2.0 MarkupSafe==2.1.5 mdurl==0.1.0 -mkl_fft==1.3.11 +mkl_fft mkl_random==1.2.8 mkl-service==2.4.0 mpmath==1.3.0 @@ -50,6 +50,3 @@ transformers==4.46.3 typing_extensions==4.11.0 urllib3==2.2.3 wheel==0.44.0 - -safetensors -requests From e12bf4523dbf2d01c344c5c1df8f3367d03e51ba Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Wed, 4 Dec 2024 16:57:27 -0800 Subject: [PATCH 8/9] unpin mkl libs --- benchmark_requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark_requirements.txt b/benchmark_requirements.txt index cc2d8f1388..9614ba29aa 100644 --- a/benchmark_requirements.txt +++ b/benchmark_requirements.txt @@ -17,8 +17,8 @@ markdown-it-py==2.2.0 MarkupSafe==2.1.5 mdurl==0.1.0 mkl_fft -mkl_random==1.2.8 -mkl-service==2.4.0 +mkl_random +mkl-service mpmath==1.3.0 mypy==1.11.2 mypy-extensions==1.0.0 From 6a30ecd05bd8bdc218b2fc8ca6a56a30337b05a9 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Wed, 4 Dec 2024 16:58:20 -0800 Subject: [PATCH 9/9] remove mkl libs --- benchmark_requirements.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/benchmark_requirements.txt b/benchmark_requirements.txt index 9614ba29aa..956ea83123 100644 --- a/benchmark_requirements.txt +++ b/benchmark_requirements.txt @@ -16,9 +16,6 @@ lxml==5.3.0 markdown-it-py==2.2.0 MarkupSafe==2.1.5 mdurl==0.1.0 -mkl_fft -mkl_random -mkl-service mpmath==1.3.0 mypy==1.11.2 mypy-extensions==1.0.0