Skip to content

Commit

Permalink
convert-hf : simplify BitNet pre-quantization
Browse files Browse the repository at this point in the history
This still results in the exact same tensor weights and scales,
but it reveals some weirdness in the current algorithm.
  • Loading branch information
compilade committed Jun 27, 2024
1 parent 89dc3b2 commit 961e293
Showing 1 changed file with 12 additions and 8 deletions.
20 changes: 12 additions & 8 deletions convert-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,10 @@ def write_tensors(self):
break

for new_name, data in ((n, d.squeeze().numpy()) for n, d in self.modify_tensors(data_torch, name, bid)):
data: np.ndarray = data # type hint
data: np.ndarray # type hint
if len(data.shape) == 0:
# otherwise single-value tensors get squeezed
data = data.reshape((1,))
n_dims = len(data.shape)
data_dtype = data.dtype
data_qtype: gguf.GGMLQuantizationType | None = None
Expand Down Expand Up @@ -336,7 +339,7 @@ def write_tensors(self):
shape = gguf.quant_shape_from_byte_shape(data.shape, data_qtype) if data.dtype == np.uint8 else data.shape

# reverse shape to make it similar to the internal ggml dimension order
shape_str = f"{{{', '.join(str(n) for n in reversed(shape)) or '1'}}}"
shape_str = f"{{{', '.join(str(n) for n in reversed(shape))}}}"

# n_dims is implicit in the shape
logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}")
Expand Down Expand Up @@ -1446,12 +1449,13 @@ def set_gguf_parameters(self):
def weight_quant(self, weight):
dtype = weight.dtype
weight = weight.float()
s = 1 / weight.abs().mean().clamp(min=1e-5)
weight = (weight * s).round().clamp(-1, 1) / s
scale = weight.abs().max().unsqueeze(0)
weight = torch.where(weight.abs().less(1e-6), 0, weight).type(dtype)
weight = torch.sign(weight).type(dtype)
return weight.type(dtype), scale.type(torch.float32)
scale = weight.abs().mean().clamp(min=1e-5)
iscale = 1 / scale
weight = (weight * iscale).round().clamp(-1, 1)
# TODO: use the scale directly instead of inverting it twice
# (this is also unnecessarily doubly inverted upstream)
# ref: https://huggingface.co/1bitLLM/bitnet_b1_58-3B/blob/af89e318d78a70802061246bf037199d2fb97020/utils_quant.py#L10
return weight.type(dtype), (1 / iscale).type(torch.float32)

def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
new_name = self.map_tensor_name(name)
Expand Down

0 comments on commit 961e293

Please sign in to comment.