diff --git a/requirements.txt b/requirements.txt index 7b11f04b6..0f37048f8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,5 +13,4 @@ tiktoken py-cpuinfo gguf cmake -auto-gptq setuptools>=61 diff --git a/scripts/cal_diff.py b/scripts/cal_diff.py index ae47b133d..89bc637c7 100644 --- a/scripts/cal_diff.py +++ b/scripts/cal_diff.py @@ -35,10 +35,10 @@ def cmpData(numa, numb): args = parser.parse_args() woq_configs = { - "fp32": {"use_cache":True, "use_quant":False}, - # "ggml_int4": {"compute_dtype":"int8", "weight_dtype":"int4", "use_cache":True, "use_ggml":True}, - "jblas_int4": {"compute_dtype":"int8", "weight_dtype":"int4", "use_cache":True}, - # "jblas_int8": {"compute_dtype":"bf16", "weight_dtype":"int8", "use_cache":True}, + "fp32": {"use_quant":False}, + # "ggml_int4": {"compute_dtype":"int8", "weight_dtype":"int4", "use_ggml":True}, + "jblas_int4": {"compute_dtype":"int8", "weight_dtype":"int4"}, + # "jblas_int8": {"compute_dtype":"bf16", "weight_dtype":"int8"}, } prompt = "What is the meaning of life?" diff --git a/tests/model-test/cpp_graph_inference.sh b/tests/model-test/cpp_graph_inference.sh index f1e7b9d56..169a2573e 100644 --- a/tests/model-test/cpp_graph_inference.sh +++ b/tests/model-test/cpp_graph_inference.sh @@ -260,9 +260,9 @@ function main() { infer_cmd="./build/bin/run_whisper" precision_list+=("q4_0") elif [[ "${model}" == "phi2" ]]; then - quant_script="./build/bin/quant_phi2" + quant_script="./build/bin/quant_phi" convert_script="${convert_script}/convert_phi.py" - infer_cmd="./build/bin/run_phi2" + infer_cmd="./build/bin/run_phi" else echo "Error: Unexpedted model: $model" 1>&2 exit 1