Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for BitnetForCausalLM (new model / new datatype) #7931

Merged
merged 38 commits into from
Jun 23, 2024
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
076b4a1
hf bitnet v1
Eddie-Wang1120 Jun 5, 2024
57dfc3b
hf bitnet e2e v2
Eddie-Wang1120 Jun 5, 2024
1f2e0ee
finish bitnet e2e
Eddie-Wang1120 Jun 6, 2024
5e59660
finish f16 hf bitnet e2e
Eddie-Wang1120 Jun 7, 2024
2a01a7c
remove unsed
Eddie-Wang1120 Jun 7, 2024
4e1ab50
finish bitnet i2 e2e
Eddie-Wang1120 Jun 8, 2024
ca09085
move i2s to quantize v1
Eddie-Wang1120 Jun 9, 2024
dbee0a8
move i2 to quantize
Jun 9, 2024
1c5a8b7
clean code
Jun 9, 2024
3a0f8b0
clean code 2
Jun 9, 2024
97d22be
fix codestyle
Eddie-Wang1120 Jun 9, 2024
344467f
fix code
Eddie-Wang1120 Jun 9, 2024
65ac3a3
fix
Eddie-Wang1120 Jun 9, 2024
abd798d
fix code
Eddie-Wang1120 Jun 10, 2024
841c903
Merge branch 'ggerganov:master' into bitnet
Eddie-Wang1120 Jun 10, 2024
c0fd4df
fix merge
Eddie-Wang1120 Jun 10, 2024
de1d507
remove unused
Eddie-Wang1120 Jun 11, 2024
2322e9d
Merge branch 'ggerganov:master' into bitnet
Eddie-Wang1120 Jun 11, 2024
c0cd08d
Merge branch 'ggerganov:master' into bitnet
Eddie-Wang1120 Jun 12, 2024
f395dd9
change table name
Eddie-Wang1120 Jun 12, 2024
5e5eee7
fix whitespace
Eddie-Wang1120 Jun 12, 2024
7a8961f
delete redundant
Eddie-Wang1120 Jun 14, 2024
95dced0
i2_s to absmax
Eddie-Wang1120 Jun 15, 2024
569a03e
finish i2_s/i8_s vec_dot x86 simd
Eddie-Wang1120 Jun 15, 2024
a03eff3
i2s->q22
Eddie-Wang1120 Jun 17, 2024
4edc958
fix code
Eddie-Wang1120 Jun 18, 2024
89c7e4c
remove block scale
Eddie-Wang1120 Jun 18, 2024
fcf2da4
add dequantize
Eddie-Wang1120 Jun 19, 2024
fa9a742
fix seq
Eddie-Wang1120 Jun 19, 2024
230396b
update avx2
Eddie-Wang1120 Jun 19, 2024
2b09768
remove q2_2
Eddie-Wang1120 Jun 20, 2024
a58cf0d
remove q22_grid
Eddie-Wang1120 Jun 20, 2024
abcdc50
Merge branch 'ggerganov:master' into bitnet
Eddie-Wang1120 Jun 20, 2024
c6ddfa7
fix whitespace
Eddie-Wang1120 Jun 20, 2024
55a57a5
reuse llm_build_kv
Eddie-Wang1120 Jun 21, 2024
0520d88
Merge branch 'ggerganov:master' into bitnet
Eddie-Wang1120 Jun 21, 2024
16f0c30
Merge branch 'ggerganov:master' into bitnet
Eddie-Wang1120 Jun 23, 2024
226c5ee
fix bo
Eddie-Wang1120 Jun 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions convert-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1400,6 +1400,48 @@ def write_tensors(self):
raise ValueError(f"Unprocessed experts: {experts}")


@Model.register("BitnetForCausalLM")
class BitnetModel(Model):
model_arch = gguf.MODEL_ARCH.BITNET

def set_vocab(self):
self._set_vocab_sentencepiece()

def set_gguf_parameters(self):
super().set_gguf_parameters()
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
self.gguf_writer.add_rope_scaling_factor(1.0)

def weight_quant(self, weight):
dtype = weight.dtype
weight = weight.float()
s = 1 / weight.abs().mean().clamp(min=1e-5)
weight = (weight * s).round().clamp(-1, 1) / s
scale = weight.abs().max().unsqueeze(0)
weight = torch.where(weight.abs().less(1e-6), 0, weight).type(dtype)
weight = torch.sign(weight).type(dtype)
return weight.type(dtype), scale.type(torch.float32)

def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
new_name = self.map_tensor_name(name)

if any(self.match_model_tensor_name(new_name, key, bid) for key in [
gguf.MODEL_TENSOR.ATTN_Q,
gguf.MODEL_TENSOR.ATTN_K,
gguf.MODEL_TENSOR.ATTN_V,
gguf.MODEL_TENSOR.ATTN_OUT,
gguf.MODEL_TENSOR.FFN_UP,
gguf.MODEL_TENSOR.FFN_DOWN,
gguf.MODEL_TENSOR.FFN_GATE,
]):
# transform weight into 1/0/-1 (in fp32)
weight_torch, scale_torch = self.weight_quant(data_torch)
yield (new_name, weight_torch)
yield (new_name.removesuffix(".weight") + ".scale", scale_torch)
else:
yield (new_name, data_torch)


@Model.register("GrokForCausalLM")
class GrokModel(Model):
model_arch = gguf.MODEL_ARCH.GROK
Expand Down
21 changes: 21 additions & 0 deletions gguf-py/gguf/constants.py
compilade marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ class MODEL_ARCH(IntEnum):
OLMO = auto()
ARCTIC = auto()
DEEPSEEK2 = auto()
BITNET = auto()


class MODEL_TENSOR(IntEnum):
Expand Down Expand Up @@ -200,6 +201,8 @@ class MODEL_TENSOR(IntEnum):
ATTN_KV_B = auto()
ATTN_Q_A_NORM = auto()
ATTN_KV_A_NORM = auto()
FFN_SUB_NORM = auto()
ATTN_SUB_NORM = auto()


MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
Expand Down Expand Up @@ -237,6 +240,7 @@ class MODEL_TENSOR(IntEnum):
MODEL_ARCH.OLMO: "olmo",
MODEL_ARCH.ARCTIC: "arctic",
MODEL_ARCH.DEEPSEEK2: "deepseek2",
MODEL_ARCH.BITNET: "bitnet",
}

TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
Expand Down Expand Up @@ -288,6 +292,8 @@ class MODEL_TENSOR(IntEnum):
MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
MODEL_TENSOR.FFN_SUB_NORM: "blk.{bid}.ffn_sub_norm",
}

MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
Expand Down Expand Up @@ -808,6 +814,21 @@ class MODEL_TENSOR(IntEnum):
MODEL_TENSOR.FFN_DOWN_SHEXP,
MODEL_TENSOR.FFN_UP_SHEXP,
],
MODEL_ARCH.BITNET: [
MODEL_TENSOR.ATTN_Q,
MODEL_TENSOR.ATTN_K,
MODEL_TENSOR.ATTN_V,
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.FFN_GATE,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
MODEL_TENSOR.ATTN_SUB_NORM,
MODEL_TENSOR.FFN_SUB_NORM,
],
# TODO
}

Expand Down
8 changes: 8 additions & 0 deletions gguf-py/gguf/tensor_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,14 @@ class TensorNameMap:
MODEL_TENSOR.ATTN_KV_A_NORM: (
"model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2
),

MODEL_TENSOR.ATTN_SUB_NORM: (
"model.layers.{bid}.self_attn.inner_attn_ln", # bitnet
),

MODEL_TENSOR.FFN_SUB_NORM: (
"model.layers.{bid}.mlp.ffn_layernorm", # bitnet
),
}

# architecture-specific block mappings
Expand Down
Loading
Loading