Skip to content

Commit

Permalink
convert-hf : allow converting the weird BitNet 1.3B
Browse files Browse the repository at this point in the history
Its FFN size is 5460 which is not convenient.
The offending tensors are kept in F16,
which makes the final model 5.01 bpw.
  • Loading branch information
compilade committed Jun 27, 2024
1 parent 58b9064 commit 4522ed7
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 6 deletions.
16 changes: 10 additions & 6 deletions convert-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,12 +299,16 @@ def write_tensors(self):
if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32:
# TODO: cleaner model-specific per-tensor types
# NOTE: Q1_3 is only relevant for BitNet 1.58b
if self.ftype == gguf.LlamaFileType.MOSTLY_Q1_3 and not any(
self.match_model_tensor_name(new_name, key, None)
for key in [
gguf.MODEL_TENSOR.TOKEN_EMBD,
gguf.MODEL_TENSOR.OUTPUT,
]
if (
self.ftype == gguf.LlamaFileType.MOSTLY_Q1_3
and gguf.can_quantize_to_q1_3(data)
and not any(
self.match_model_tensor_name(new_name, key, None)
for key in [
gguf.MODEL_TENSOR.TOKEN_EMBD,
gguf.MODEL_TENSOR.OUTPUT,
]
)
):
data = gguf.quantize_q1_3(data)
assert data.dtype == np.uint8
Expand Down
4 changes: 4 additions & 0 deletions gguf-py/gguf/quants.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ def quantize_q8_0(data: np.ndarray):
__q1_3_block_size, __q1_3_type_size = GGML_QUANT_SIZES[GGMLQuantizationType.Q1_3]


def can_quantize_to_q1_3(n: np.ndarray) -> bool:
return n.shape[-1] % __q1_3_block_size == 0


def __quantize_q1_3_shape_change(s: tuple[int, ...]) -> tuple[int, ...]:
return (*s[:-1], s[-1] // __q1_3_block_size * __q1_3_type_size)

Expand Down

0 comments on commit 4522ed7

Please sign in to comment.