Skip to content

Commit

Permalink
Remove identical wte/etw logic for jais (ggerganov#10203)
Browse files Browse the repository at this point in the history
  • Loading branch information
fmz authored and arthw committed Nov 18, 2024
1 parent 918e8c9 commit c95fbb4
Showing 1 changed file with 0 additions and 6 deletions.
6 changes: 0 additions & 6 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3748,10 +3748,7 @@ def __init__(self, *args, **kwargs):

# Embeddings scale
self.embeddings_scale = 1.0
# note: For some JAIS flavors, output is tied to (same as) wte in original model
self.output_is_wte = False
if 'mup_embeddings_scale' in self.hparams:
self.output_is_wte = True # Hack (?)
self.embeddings_scale = self.hparams['mup_embeddings_scale']
elif 'embeddings_scale' in self.hparams:
self.embeddings_scale = self.hparams['embeddings_scale']
Expand Down Expand Up @@ -3808,10 +3805,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter

if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
tensors.append((new_name, data_torch * self.embeddings_scale))
if self.output_is_wte:
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale))
elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
assert not self.output_is_wte
tensors.append((new_name, data_torch * self.width_scale))
else:
tensors.append((new_name, data_torch))
Expand Down

0 comments on commit c95fbb4

Please sign in to comment.