Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
update
Browse files Browse the repository at this point in the history
Signed-off-by: zhenwei-intel <[email protected]>
  • Loading branch information
zhenwei-intel committed Feb 4, 2024
1 parent cf5e06e commit 3c3f1cc
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 6 deletions.
6 changes: 3 additions & 3 deletions neural_speed/convert/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ def convert_q4_tensor(src_name, dst_name, model, fout, q_config, n_head, n_head2
# gptq_scale = torch.cat([gptq_scale,gptq_scale,gptq_scale,gptq_scale], dim=1).view(-1,1)
pack_tensor = torch.cat((gptq_scale.half().view(torch.int8), tensor), dim=-1)
pack_tensor.numpy().tofile(fout)
print(f"converting {dst_name} qauntized tensor to ggml q4 block")
print(f"converting {dst_name} quantized tensor to ggml q4 block")

def convert_q4_1_tensor(src_name, dst_name, model, fout, q_config, n_head, n_head2=0, permute_func=None):
qzeros = model[f"{src_name}.qzeros"]
Expand All @@ -381,7 +381,7 @@ def convert_q4_1_tensor(src_name, dst_name, model, fout, q_config, n_head, n_hea
gptq_zeros = -gptq_scale*gptq_zeros
pack_tensor = torch.cat((gptq_scale.half().view(torch.int8), gptq_zeros.half().view(torch.int8), tensor), dim=-1)
pack_tensor.numpy().tofile(fout)
print(f"converting {dst_name} qauntized tensor to ggml q4 1 block")
print(f"converting {dst_name} quantized tensor to ggml q4 1 block")


def convert_q4_f32_tensor(src_name, dst_name, model, fout, q_config, n_head, n_head_kv=0, permute_func=None):
Expand Down Expand Up @@ -411,4 +411,4 @@ def convert_q4_f32_tensor(src_name, dst_name, model, fout, q_config, n_head, n_h
write_header(fout, shape, dst_name, 0)
weight.numpy().tofile(fout)

print(f"converting {dst_name} qauntized tensor to fp32 tensor")
print(f"converting {dst_name} quantized tensor to fp32 tensor")
2 changes: 1 addition & 1 deletion neural_speed/convert/convert_quantized_gptj.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def convert_to_qx_bestla_tensor(src_name, dst_name, model, fout, q_config):
alg="sym" if q_config['sym'] else "asym",
compute_dtype="int8")
dst.flatten()[:byte_size].tofile(fout)
print(f"converting {dst_name} qauntized tensor to bestla q4 block")
print(f"converting {dst_name} quantized tensor to bestla q4 block")


def main(args_in: Optional[List[str]] = None) -> None:
Expand Down
2 changes: 1 addition & 1 deletion neural_speed/convert/convert_quantized_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def convert_q4_bestla_tensor(src_name, dst_name, model, fout, q_config, n_head,
alg="sym" if q_config['sym'] else "asym",
compute_dtype="int8")
dst.flatten()[:byte_size].tofile(fout)
print(f"converting {dst_name} qauntized tensor to bestla q4 block")
print(f"converting {dst_name} quantized tensor to bestla q4 block")

def main(args_in: Optional[List[str]] = None) -> None:
parser = argparse.ArgumentParser(description="Convert a model to a NE compatible file")
Expand Down
3 changes: 2 additions & 1 deletion scripts/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,15 @@ def main(args_in: Optional[List[str]] = None) -> None:
)
parser.add_argument("--outfile", type=Path, required=True, help="path to write to")
parser.add_argument("model", type=Path, help="directory containing model file or model id")
parser.add_argument("--use_quantized_model", action="store_true", help="use quantized model: awq/gptq/autoround")
args = parser.parse_args(args_in)

if args.model.exists():
dir_model = args.model.as_posix()
else:
dir_model = args.model

convert_model(dir_model, args.outfile, args.outtype)
convert_model(dir_model, args.outfile, args.outtype, use_quantized_model=args.use_quantized_model)


if __name__ == "__main__":
Expand Down

0 comments on commit 3c3f1cc

Please sign in to comment.