Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
update
Browse files Browse the repository at this point in the history
Signed-off-by: zhenwei-intel <[email protected]>
  • Loading branch information
zhenwei-intel committed Feb 4, 2024
1 parent 4aee532 commit cf5e06e
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 6 deletions.
8 changes: 5 additions & 3 deletions neural_speed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def get_model_type(model_config):
model_type = "chatglm2"
return model_type

def init(self, model_name, use_quant=True, use_gptq=False, use_awq=False,
def init(self, model_name, use_quant=True, use_gptq=False, use_awq=False, use_autoround=False,
weight_dtype="int4", alg="sym", group_size=32,
scale_dtype="fp32", compute_dtype="int8", use_ggml=False):
self.config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
Expand All @@ -107,6 +107,8 @@ def init(self, model_name, use_quant=True, use_gptq=False, use_awq=False,
quant_desc = "gptq"
if use_awq:
quant_desc = "awq"
if use_awq:
quant_desc = "autoround"
quant_bin = "{}/ne_{}_q_{}.bin".format(output_path, model_type, quant_desc)

if not use_quant:
Expand All @@ -119,8 +121,8 @@ def init(self, model_name, use_quant=True, use_gptq=False, use_awq=False,
format(self.bin_file))
return

if use_gptq or use_awq:
convert_model(model_name, quant_bin, "f32")
if use_gptq or use_awq or use_autoround:
convert_model(model_name, quant_bin, use_quantized_model=True)
return

if not os.path.exists(fp32_bin):
Expand Down
5 changes: 2 additions & 3 deletions neural_speed/convert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,11 @@
model_maps = {"gpt_neox": "gptneox", "gpt_bigcode": "starcoder", "whisper": "whisper"}


def convert_model(model, outfile, outtype, whisper_repo_path=None):
def convert_model(model, outfile, outtype="f32", whisper_repo_path=None, use_quantized_model=False):
config = AutoConfig.from_pretrained(model, trust_remote_code=True)
model_type = model_maps.get(config.model_type, config.model_type)

quantized_model = 'gptq' in str(model).lower() or 'awq' in str(model).lower()
if quantized_model:
if use_quantized_model:
path = Path(Path(__file__).parent.absolute(), "convert_quantized_{}.py".format(model_type))
else:
path = Path(Path(__file__).parent.absolute(), "convert_{}.py".format(model_type))
Expand Down

0 comments on commit cf5e06e

Please sign in to comment.