From a03ca99a6dba39b3b0a8af81145970c88b9e6b93 Mon Sep 17 00:00:00 2001 From: Jerry Zhang Date: Tue, 17 Dec 2024 18:35:36 -0800 Subject: [PATCH] [ez] Use noquant to make dashboard logic easier (#1430) Summary: A small fix for output json results for llama, sam, sam2 benchmarks to make the code easier for calculating geomean speedup of autoquant v.s. noquant Test Plan: local test Reviewers: Subscribers: Tasks: Tags: --- examples/sam2_amg_server/server.py | 2 +- torchao/_models/llama/generate.py | 2 +- torchao/_models/sam/eval_combo.py | 2 +- torchao/_models/sam2/build_sam.py | 2 -- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/examples/sam2_amg_server/server.py b/examples/sam2_amg_server/server.py index 359f0d6129..8aa918f193 100644 --- a/examples/sam2_amg_server/server.py +++ b/examples/sam2_amg_server/server.py @@ -658,7 +658,7 @@ def main(checkpoint_path, headers = ["name", "dtype", "device", "arch", "metric", "actual", "target"] name = "sam2-" + model_type arch = get_arch_name() - dtype = "autoquant" if use_autoquant else ("compile" if fast else "base") + dtype = "autoquant" if use_autoquant else "noquant" avg_time_per_run, max_memory_allocated_bytes, max_memory_allocated_percentage = result memory_result = [name, dtype, device, arch, "memory(MiB)", max_memory_allocated_bytes, None] memory_percent_result = [name, dtype, device, arch, "memory(%)", max_memory_allocated_percentage, None] diff --git a/torchao/_models/llama/generate.py b/torchao/_models/llama/generate.py index 91bedd9d73..231133c2c9 100644 --- a/torchao/_models/llama/generate.py +++ b/torchao/_models/llama/generate.py @@ -940,7 +940,7 @@ def callback(x): headers = ["name", "dtype", "device", "arch", "metric", "actual", "target"] name = checkpoint_path.parent.name arch = get_arch_name() - dtype = quantization or str(precision) + dtype = quantization or "noquant" memory_result = [name, dtype, device, arch, "mem/s", bandwidth, None] performance_result = [name, dtype, device, arch, "tok/s", tokpersec, None] write_json_result = write_json_result_local if output_json_local else write_json_result_ossci diff --git a/torchao/_models/sam/eval_combo.py b/torchao/_models/sam/eval_combo.py index 95309fde85..928ce6285d 100644 --- a/torchao/_models/sam/eval_combo.py +++ b/torchao/_models/sam/eval_combo.py @@ -467,7 +467,7 @@ def mlp_only(mod, name): headers = ["name", "dtype", "device", "arch", "metric", "actual", "target"] name = sam_model_type arch = get_arch_name() - dtype = compress or str(use_half) or "torch.float32" + dtype = compress or "noquant" memory_result = [name, dtype, device, arch, "memory(MiB)", max_memory_allocated_bytes, None] performance_result = [name, dtype, device, arch, "img_s(avg)", img_s, None] write_json_result = write_json_result_local if output_json_local else write_json_result_ossci diff --git a/torchao/_models/sam2/build_sam.py b/torchao/_models/sam2/build_sam.py index 8bacc78233..d6847ede83 100644 --- a/torchao/_models/sam2/build_sam.py +++ b/torchao/_models/sam2/build_sam.py @@ -87,8 +87,6 @@ def build_sam2( "++model.sam_mask_decoder_extra_args.dynamic_multimask_stability_thresh=0.98", ] # Read config and init model - import os - print("cur path:", os.getcwd()) cfg = compose(config_name=config_file, overrides=hydra_overrides_extra) OmegaConf.resolve(cfg) model = instantiate(cfg.model, _recursive_=True)