fix

Signed-off-by: shunxing12345 <[email protected]>
huggingface · Dec 30, 2024 · 9a8151c · 9a8151c
1 parent a11b9a7
commit 9a8151c
Show file tree

Hide file tree

Showing 5 changed files with 53 additions and 22 deletions.
diff --git a/docs/source/en/model_doc/telechat2.md b/docs/source/en/model_doc/telechat2.md
@@ -18,21 +18,54 @@ rendered properly in your Markdown viewer.
 
 ## Overview
 
-The TeleChat2 model was proposed in [<INSERT PAPER NAME HERE>](<INSERT PAPER LINK HERE>) by <INSERT AUTHORS HERE>.
-<INSERT SHORT SUMMARY HERE>
+The TeleChat2 model was proposed in [TELECHAT TECHNICAL REPORT](https://arxiv.org/pdf/2401.03804) by TeleAI.
+
+### Summary
 
 The abstract from the paper is the following:
 
-*<INSERT PAPER ABSTRACT HERE>*
+TeleChat is a series of large language models, offering decoder-based language models in various sizes (3B, 7B, and 12B). For each size, we provide both the base pretrained model and the fine-tuned chat model aligned with human preferences. TeleChat leverages a Transformer architecture with features such as SwiGLU activation, advanced attention mechanisms (QKV bias, group query attention), and support for sliding window attention. The models are optimized for bilingual proficiency (English and Chinese) and include an enhanced tokenizer adaptable to diverse natural languages and coding formats.
+
+## Usage tips
+
+The original code for telechat2 can be found [here](https://huggingface.co/Tele-AI/TeleChat2-7B).
+
+In the following, we demonstrate how to use `TeleChat2-7B` for the inference. Note that we have used the ChatML format for dialog, in this demo we show how to leverage `apply_chat_template` for this purpose.
+
+```python
+>>> from transformers import AutoModelForCausalLM, AutoTokenizer
+>>> device = "cuda" # the device to load the model onto
+
+>>> model = AutoModelForCausalLM.from_pretrained("Tele-AI/TeleChat2-7B", device_map="auto")
+>>> tokenizer = AutoTokenizer.from_pretrained("Tele-AI/TeleChat2-7B")
+
+>>> prompt = "Give me a short introduction to large language model."
 
-Tips:
+>>> messages = [{"role": "user", "content": prompt}]
 
-<INSERT TIPS ABOUT MODEL HERE>
+>>> text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
-This model was contributed by [INSERT YOUR HF USERNAME HERE](https://huggingface.co/<INSERT YOUR HF USERNAME HERE>).
-The original code can be found [here](<INSERT LINK TO GITHUB REPO HERE>).
+>>> model_inputs = tokenizer([text], return_tensors="pt").to(device)
 
+>>> generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True)
+
+>>> generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
+
+>>> response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+```
 
 ## TeleChat2Config
 
 [[autodoc]] TeleChat2Config
+
+
+## TeleChat2Model
+
+[[autodoc]] TeleChat2Model
+    - forward
+
+## TeleChat2ForCausalLM
+
+[[autodoc]] TeleChat2ForCausalLM
+    - forward
+    - generate
diff --git a/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py b/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py
@@ -30,7 +30,9 @@
 
 TOKENIZER_CLASSES = {
     # Phi3 uses Llama tokenizer
-    name: getattr(transformers, "LlamaTokenizerFast" if name in ["Phi3Tokenizer", "TeleChat2Tokenizer"] else name + "Fast")
+    name: getattr(
+        transformers, "LlamaTokenizerFast" if name in ["Phi3Tokenizer", "TeleChat2Tokenizer"] else name + "Fast"
+    )
     for name in SLOW_TO_FAST_CONVERTERS
 }
 

diff --git a/src/transformers/models/telechat2/configuration_telechat2.py b/src/transformers/models/telechat2/configuration_telechat2.py
@@ -177,7 +177,7 @@ def __init__(
         mlp_bias=False,
         head_dim=None,
         sliding_window=None,
-        embed_layernorm = False,
+        embed_layernorm=False,
         use_sliding_window=False,
         max_window_layers=28,
         **kwargs,

diff --git a/src/transformers/models/telechat2/modeling_telechat2.py b/src/transformers/models/telechat2/modeling_telechat2.py
@@ -261,12 +261,8 @@ def __init__(self, config: TeleChat2Config, layer_idx: int):
         self.attention_dropout = config.attention_dropout
         self.is_causal = True
 
-        self.query = nn.Linear(
-                config.hidden_size, config.num_attention_heads * self.head_dim, bias=False
-        )
-        self.key_value = nn.Linear(
-                config.hidden_size, self.head_dim * config.num_key_value_heads * 2, bias=False
-        )
+        self.query = nn.Linear(config.hidden_size, config.num_attention_heads * self.head_dim, bias=False)
+        self.key_value = nn.Linear(config.hidden_size, self.head_dim * config.num_key_value_heads * 2, bias=False)
         self.dense = nn.Linear(config.num_attention_heads * self.head_dim, config.hidden_size)
 
     def forward(
@@ -378,7 +374,6 @@ def forward(
         return outputs
 
 
-
 TELECHAT2_START_DOCSTRING = r"""
     This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
     library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
@@ -518,9 +513,7 @@ def __init__(self, config: TeleChat2Config):
 
         self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
 
-        self.h = nn.ModuleList(
-                [TeleChat2DecoderLayer(config, i) for i in range(config.num_hidden_layers)]
-        )
+        self.h = nn.ModuleList([TeleChat2DecoderLayer(config, i) for i in range(config.num_hidden_layers)])
         self.ln_f = TeleChat2RMSNorm(config.hidden_size, eps=config.layer_norm_epsilon)
         self.rotary_emb = TeleChat2RotaryEmbedding(config=config)
         self.gradient_checkpointing = False

diff --git a/tests/models/telechat2/test_modeling_telechat2.py b/tests/models/telechat2/test_modeling_telechat2.py
@@ -435,7 +435,9 @@ def test_model_450m_long_prompt_sdpa(self):
         EXPECTED_OUTPUT_TOKEN_IDS = [306, 338]
         # An input with 4097 tokens that is above the size of the sliding window
         input_ids = [1] + [306, 338] * 2048
-        model = TeleChat2ForCausalLM.from_pretrained("TeleAI/TeleChat2-3B", device_map="auto", attn_implementation="sdpa")
+        model = TeleChat2ForCausalLM.from_pretrained(
+            "TeleAI/TeleChat2-3B", device_map="auto", attn_implementation="sdpa"
+        )
         input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device)
         generated_ids = model.generate(input_ids, max_new_tokens=4, temperature=0)
         self.assertEqual(EXPECTED_OUTPUT_TOKEN_IDS, generated_ids[0][-2:].tolist())
@@ -472,7 +474,9 @@ def test_speculative_generation(self):
         )
         prompt = "My favourite condiment is "
         tokenizer = AutoTokenizer.from_pretrained("TeleAIt/TeleChat2-7B", use_fast=False)
-        model = TeleChat2ForCausalLM.from_pretrained("TeleAI/TeleChat2-3B", device_map="auto", torch_dtype=torch.float16)
+        model = TeleChat2ForCausalLM.from_pretrained(
+            "TeleAI/TeleChat2-3B", device_map="auto", torch_dtype=torch.float16
+        )
         assistant_model = TeleChat2ForCausalLM.from_pretrained(
             "TeleChat/TeleChat2-3B", device_map="auto", torch_dtype=torch.float16
         )
@@ -544,4 +548,3 @@ def test_export_static_cache(self):
         )
         ep_generated_text = tokenizer.batch_decode(ep_generated_ids, skip_special_tokens=True)
         self.assertEqual(EXPECTED_TEXT_COMPLETION, ep_generated_text)
-