Skip to content

Commit

Permalink
Set prepend_bos to false by default for Qwen models (#815)
Browse files Browse the repository at this point in the history
* Set prepend_bos to false by default for Qwen

* Fix typo in warning for center_unembed when logit softcap is activated

---------

Co-authored-by: Fabian Degen <[email protected]>
  • Loading branch information
degenfabian and Fabian Degen authored Dec 28, 2024
1 parent 358eba7 commit 5e9a339
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 1 deletion.
2 changes: 1 addition & 1 deletion transformer_lens/HookedTransformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1311,7 +1311,7 @@ def from_pretrained(
center_writing_weights = False
if center_unembed and cfg.output_logits_soft_cap > 0.0:
logging.warning(
"You tried to specify center_unembed=True for a model using logit softcap, but this can't be done! Softcapping is not invariant upon adding a constant"
"You tried to specify center_unembed=True for a model using logit softcap, but this can't be done! Softcapping is not invariant upon adding a constant "
"Setting center_unembed=False instead."
)
center_unembed = False
Expand Down
2 changes: 2 additions & 0 deletions transformer_lens/loading_from_pretrained.py
Original file line number Diff line number Diff line change
Expand Up @@ -1241,6 +1241,7 @@ def convert_hf_model_config(model_name: str, **kwargs):
"trust_remote_code": True,
"final_rms": True,
"gated_mlp": True,
"default_prepend_bos": False,
}
elif architecture == "Qwen2ForCausalLM":
# Note that Qwen1.5 models have architecture type Qwen2ForCausalLM.
Expand All @@ -1265,6 +1266,7 @@ def convert_hf_model_config(model_name: str, **kwargs):
"tokenizer_prepends_bos": True,
"final_rms": True,
"gated_mlp": True,
"default_prepend_bos": False,
}
elif architecture == "PhiForCausalLM":
# Architecture for microsoft/phi models
Expand Down

0 comments on commit 5e9a339

Please sign in to comment.