Skip to content

Commit

Permalink
Add Cohere2 model (#35224)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexrs-cohere authored Dec 13, 2024
1 parent e4e404f commit 64478c7
Show file tree
Hide file tree
Showing 19 changed files with 2,508 additions and 9 deletions.
2 changes: 2 additions & 0 deletions docs/source/en/_toctree.yml
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,8 @@
title: CodeLlama
- local: model_doc/cohere
title: Cohere
- local: model_doc/cohere2
title: Cohere2
- local: model_doc/convbert
title: ConvBERT
- local: model_doc/cpm
Expand Down
1 change: 1 addition & 0 deletions docs/source/en/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ Flax), PyTorch, and/or TensorFlow.
| [CodeGen](model_doc/codegen) ||||
| [CodeLlama](model_doc/code_llama) ||||
| [Cohere](model_doc/cohere) ||||
| [Cohere2](model_doc/cohere2) ||||
| [Conditional DETR](model_doc/conditional_detr) ||||
| [ConvBERT](model_doc/convbert) ||||
| [ConvNeXT](model_doc/convnext) ||||
Expand Down
44 changes: 44 additions & 0 deletions docs/source/en/model_doc/cohere2.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Cohere

## Usage tips
The model and tokenizer can be loaded via:

```python
# pip install transformers
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = "CohereForAI/c4ai-command-r7b-12-2024"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

# Format message with the command-r chat template
messages = [{"role": "user", "content": "Hello, how are you?"}]
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")

gen_tokens = model.generate(
input_ids,
max_new_tokens=100,
do_sample=True,
temperature=0.3,
)

gen_text = tokenizer.decode(gen_tokens[0])
print(gen_text)
```

## Cohere2Config

[[autodoc]] Cohere2Config

## Cohere2Model

[[autodoc]] Cohere2Model
- forward


## Cohere2ForCausalLM

[[autodoc]] Cohere2ForCausalLM
- forward


2 changes: 2 additions & 0 deletions docs/source/en/perf_infer_gpu_one.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ FlashAttention-2 is currently supported for the following architectures:
* [Chameleon](https://huggingface.co/docs/transformers/model_doc/chameleon#transformers.Chameleon)
* [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPModel)
* [Cohere](https://huggingface.co/docs/transformers/model_doc/cohere#transformers.CohereModel)
* [Cohere2](https://huggingface.co/docs/transformers/model_doc/cohere2#transformers.Cohere2Model)
* [GLM](https://huggingface.co/docs/transformers/model_doc/glm#transformers.GLMModel)
* [Dbrx](https://huggingface.co/docs/transformers/model_doc/dbrx#transformers.DbrxModel)
* [DistilBert](https://huggingface.co/docs/transformers/model_doc/distilbert#transformers.DistilBertModel)
Expand Down Expand Up @@ -227,6 +228,7 @@ For now, Transformers supports SDPA inference and training for the following arc
* [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPModel)
* [GLM](https://huggingface.co/docs/transformers/model_doc/glm#transformers.GLMModel)
* [Cohere](https://huggingface.co/docs/transformers/model_doc/cohere#transformers.CohereModel)
* [Cohere2](https://huggingface.co/docs/transformers/model_doc/cohere2#transformers.Cohere2Model)
* [data2vec_audio](https://huggingface.co/docs/transformers/main/en/model_doc/data2vec#transformers.Data2VecAudioModel)
* [Dbrx](https://huggingface.co/docs/transformers/model_doc/dbrx#transformers.DbrxModel)
* [DeiT](https://huggingface.co/docs/transformers/model_doc/deit#transformers.DeiTModel)
Expand Down
8 changes: 8 additions & 0 deletions src/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@
"CodeGenTokenizer",
],
"models.cohere": ["CohereConfig"],
"models.cohere2": ["Cohere2Config"],
"models.conditional_detr": ["ConditionalDetrConfig"],
"models.convbert": [
"ConvBertConfig",
Expand Down Expand Up @@ -1787,6 +1788,7 @@
]
)
_import_structure["models.cohere"].extend(["CohereForCausalLM", "CohereModel", "CoherePreTrainedModel"])
_import_structure["models.cohere2"].extend(["Cohere2ForCausalLM", "Cohere2Model", "Cohere2PreTrainedModel"])
_import_structure["models.conditional_detr"].extend(
[
"ConditionalDetrForObjectDetection",
Expand Down Expand Up @@ -5204,6 +5206,7 @@
CodeGenTokenizer,
)
from .models.cohere import CohereConfig
from .models.cohere2 import Cohere2Config
from .models.conditional_detr import (
ConditionalDetrConfig,
)
Expand Down Expand Up @@ -6681,6 +6684,11 @@
CohereModel,
CoherePreTrainedModel,
)
from .models.cohere2 import (
Cohere2ForCausalLM,
Cohere2Model,
Cohere2PreTrainedModel,
)
from .models.conditional_detr import (
ConditionalDetrForObjectDetection,
ConditionalDetrForSegmentation,
Expand Down
3 changes: 2 additions & 1 deletion src/transformers/cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1634,8 +1634,9 @@ def __init__(
self.num_key_value_heads = (
config.num_attention_heads if config.num_key_value_heads is None else config.num_key_value_heads
)
layer_switch = config.sliding_window_pattern if hasattr(config, "sliding_window_pattern") else 2 # 2 is for BC
self.is_sliding = torch.tensor(
[not bool(i % 2) for i in range(config.num_hidden_layers)], dtype=torch.bool, device=device
[bool((i + 1) % layer_switch) for i in range(config.num_hidden_layers)], dtype=torch.bool, device=device
)
self.key_cache: List[torch.Tensor] = []
self.value_cache: List[torch.Tensor] = []
Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
code_llama,
codegen,
cohere,
cohere2,
conditional_detr,
convbert,
convnext,
Expand Down
2 changes: 2 additions & 0 deletions src/transformers/models/auto/configuration_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
("code_llama", "LlamaConfig"),
("codegen", "CodeGenConfig"),
("cohere", "CohereConfig"),
("cohere2", "Cohere2Config"),
("conditional_detr", "ConditionalDetrConfig"),
("convbert", "ConvBertConfig"),
("convnext", "ConvNextConfig"),
Expand Down Expand Up @@ -371,6 +372,7 @@
("code_llama", "CodeLlama"),
("codegen", "CodeGen"),
("cohere", "Cohere"),
("cohere2", "Cohere2"),
("conditional_detr", "Conditional DETR"),
("convbert", "ConvBERT"),
("convnext", "ConvNeXT"),
Expand Down
2 changes: 2 additions & 0 deletions src/transformers/models/auto/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
("code_llama", "LlamaModel"),
("codegen", "CodeGenModel"),
("cohere", "CohereModel"),
("cohere2", "Cohere2Model"),
("conditional_detr", "ConditionalDetrModel"),
("convbert", "ConvBertModel"),
("convnext", "ConvNextModel"),
Expand Down Expand Up @@ -482,6 +483,7 @@
("code_llama", "LlamaForCausalLM"),
("codegen", "CodeGenForCausalLM"),
("cohere", "CohereForCausalLM"),
("cohere2", "Cohere2ForCausalLM"),
("cpmant", "CpmAntForCausalLM"),
("ctrl", "CTRLLMHeadModel"),
("data2vec-text", "Data2VecTextForCausalLM"),
Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/auto/tokenization_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@
),
("codegen", ("CodeGenTokenizer", "CodeGenTokenizerFast" if is_tokenizers_available() else None)),
("cohere", (None, "CohereTokenizerFast" if is_tokenizers_available() else None)),
("cohere2", (None, "CohereTokenizerFast" if is_tokenizers_available() else None)),
("convbert", ("ConvBertTokenizer", "ConvBertTokenizerFast" if is_tokenizers_available() else None)),
(
"cpm",
Expand Down
27 changes: 27 additions & 0 deletions src/transformers/models/cohere2/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright 2024 Cohere and The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING

from ...utils import _LazyModule
from ...utils.import_utils import define_import_structure


if TYPE_CHECKING:
from .configuration_cohere2 import *
from .modeling_cohere2 import *
else:
import sys

_file = globals()["__file__"]
sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__)
Loading

0 comments on commit 64478c7

Please sign in to comment.