first adding diffllama

huggingface · Oct 11, 2024 · 89077bc · 89077bc
1 parent 70b07d9
commit 89077bc
Show file tree

Hide file tree

Showing 12 changed files with 3,072 additions and 1 deletion.
diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
@@ -376,6 +376,8 @@
         title: DeBERTa-v2
       - local: model_doc/dialogpt
         title: DialoGPT
+      - local: model_doc/diffllama
+        title: DiffLlama
       - local: model_doc/distilbert
         title: DistilBERT
       - local: model_doc/dpr
@@ -969,4 +971,4 @@
     - local: internal/time_series_utils
       title: Utilities for Time Series
     title: Internal Helpers
-  title: API
+  title: API
diff --git a/docs/source/en/model_doc/diffllama.md b/docs/source/en/model_doc/diffllama.md
@@ -0,0 +1,63 @@
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+# DiffLlama
+
+## Overview
+
+The DiffLlama model was proposed in [<INSERT PAPER NAME HERE>](<INSERT PAPER LINK HERE>) by <INSERT AUTHORS HERE>.
+<INSERT SHORT SUMMARY HERE>
+
+The abstract from the paper is the following:
+
+*<INSERT PAPER ABSTRACT HERE>*
+
+Tips:
+
+<INSERT TIPS ABOUT MODEL HERE>
+
+This model was contributed by [INSERT YOUR HF USERNAME HERE](https://huggingface.co/<INSERT YOUR HF USERNAME HERE>).
+The original code can be found [here](<INSERT LINK TO GITHUB REPO HERE>).
+
+
+## DiffLlamaConfig
+
+[[autodoc]] DiffLlamaConfig
+
+## DiffLlamaModel
+
+[[autodoc]] DiffLlamaModel
+    - forward
+
+## DiffLlamaForCausalLM
+
+[[autodoc]] DiffLlamaForCausalLM
+    - forward
+
+## DiffLlamaForSequenceClassification
+
+[[autodoc]] DiffLlamaForSequenceClassification
+    - forward
+
+## DiffLlamaForQuestionAnswering
+
+[[autodoc]] DiffLlamaForQuestionAnswering
+    - forward
+
+## DiffLlamaForTokenClassification
+
+[[autodoc]] DiffLlamaForTokenClassification
+    - forward
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
@@ -524,6 +524,7 @@
     "models.levit": ["LevitConfig"],
     "models.lilt": ["LiltConfig"],
     "models.llama": ["LlamaConfig"],
+    "models.diffllama": ["DiffLlamaConfig"],
     "models.llava": [
         "LlavaConfig",
         "LlavaProcessor",
@@ -1007,6 +1008,7 @@
     _import_structure["models.gpt_sw3"].append("GPTSw3Tokenizer")
     _import_structure["models.layoutxlm"].append("LayoutXLMTokenizer")
     _import_structure["models.llama"].append("LlamaTokenizer")
+    _import_structure["models.diffllama"].append("DiffLlamaTokenizer")
     _import_structure["models.m2m_100"].append("M2M100Tokenizer")
     _import_structure["models.marian"].append("MarianTokenizer")
     _import_structure["models.mbart"].append("MBartTokenizer")
@@ -2554,6 +2556,16 @@
             "LlamaPreTrainedModel",
         ]
     )
+    _import_structure["models.diffllama"].extend(
+        [
+            "DiffLlamaForCausalLM",
+            "DiffLlamaForQuestionAnswering",
+            "DiffLlamaForSequenceClassification",
+            "DiffLlamaForTokenClassification",
+            "DiffLlamaModel",
+            "DiffLlamaPreTrainedModel",
+        ]
+    )
     _import_structure["models.llava"].extend(
         [
             "LlavaForConditionalGeneration",
@@ -4728,6 +4740,7 @@
     )
     _import_structure["models.gptj"].extend(["FlaxGPTJForCausalLM", "FlaxGPTJModel", "FlaxGPTJPreTrainedModel"])
     _import_structure["models.llama"].extend(["FlaxLlamaForCausalLM", "FlaxLlamaModel", "FlaxLlamaPreTrainedModel"])
+    _import_structure["models.diffllama"].extend(["FlaxDiffLlamaForCausalLM", "FlaxDiffLlamaModel", "FlaxDiffLlamaPreTrainedModel"])
     _import_structure["models.gemma"].extend(["FlaxGemmaForCausalLM", "FlaxGemmaModel", "FlaxGemmaPreTrainedModel"])
     _import_structure["models.longt5"].extend(
         [
@@ -5364,6 +5377,7 @@
     from .models.levit import LevitConfig
     from .models.lilt import LiltConfig
     from .models.llama import LlamaConfig
+    from .models.diffllama import DiffLlamaConfig
     from .models.llava import (
         LlavaConfig,
         LlavaProcessor,
@@ -5903,6 +5917,7 @@
         from .models.gpt_sw3 import GPTSw3Tokenizer
         from .models.layoutxlm import LayoutXLMTokenizer
         from .models.llama import LlamaTokenizer
+        from .models.diffllama import DiffLlamaTokenizer
         from .models.m2m_100 import M2M100Tokenizer
         from .models.marian import MarianTokenizer
         from .models.mbart import MBartTokenizer
@@ -7207,6 +7222,14 @@
             LlamaModel,
             LlamaPreTrainedModel,
         )
+        from .models.diffllama import (
+            DiffLlamaForCausalLM,
+            DiffLlamaForQuestionAnswering,
+            DiffLlamaForSequenceClassification,
+            DiffLlamaForTokenClassification,
+            DiffLlamaModel,
+            DiffLlamaPreTrainedModel,
+        )
         from .models.llava import (
             LlavaForConditionalGeneration,
             LlavaPreTrainedModel,
@@ -8956,6 +8979,11 @@
             FlaxLlamaModel,
             FlaxLlamaPreTrainedModel,
         )
+        from .models.diffllama import (
+            FlaxDiffLlamaForCausalLM,
+            FlaxDiffLlamaModel,
+            FlaxDiffLlamaPreTrainedModel,
+        )
         from .models.longt5 import (
             FlaxLongT5ForConditionalGeneration,
             FlaxLongT5Model,

diff --git a/src/transformers/models/__init__.py b/src/transformers/models/__init__.py
@@ -131,6 +131,7 @@
     levit,
     lilt,
     llama,
+    diffllama,
     llava,
     llava_next,
     llava_next_video,

diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py
@@ -149,6 +149,7 @@
         ("levit", "LevitConfig"),
         ("lilt", "LiltConfig"),
         ("llama", "LlamaConfig"),
+        ("diffllama", "DiffLlamaConfig"),
         ("llava", "LlavaConfig"),
         ("llava_next", "LlavaNextConfig"),
         ("llava_next_video", "LlavaNextVideoConfig"),
@@ -452,6 +453,7 @@
         ("levit", "LeViT"),
         ("lilt", "LiLT"),
         ("llama", "LLaMA"),
+        ("diffllama", "DiffDiffLlama"),
         ("llama2", "Llama2"),
         ("llama3", "Llama3"),
         ("llava", "LLaVa"),

diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py
@@ -144,6 +144,7 @@
         ("levit", "LevitModel"),
         ("lilt", "LiltModel"),
         ("llama", "LlamaModel"),
+        ("diffllama", "DiffLlamaModel"),
         ("longformer", "LongformerModel"),
         ("longt5", "LongT5Model"),
         ("luke", "LukeModel"),
@@ -497,6 +498,7 @@
         ("jamba", "JambaForCausalLM"),
         ("jetmoe", "JetMoeForCausalLM"),
         ("llama", "LlamaForCausalLM"),
+        ("diffllama", "DiffLlamaForCausalLM"),
         ("mamba", "MambaForCausalLM"),
         ("mamba2", "Mamba2ForCausalLM"),
         ("marian", "MarianForCausalLM"),
@@ -954,6 +956,7 @@
         ("led", "LEDForSequenceClassification"),
         ("lilt", "LiltForSequenceClassification"),
         ("llama", "LlamaForSequenceClassification"),
+        ("diffllama", "DiffLlamaForSequenceClassification"),
         ("longformer", "LongformerForSequenceClassification"),
         ("luke", "LukeForSequenceClassification"),
         ("markuplm", "MarkupLMForSequenceClassification"),
@@ -1039,6 +1042,7 @@
         ("led", "LEDForQuestionAnswering"),
         ("lilt", "LiltForQuestionAnswering"),
         ("llama", "LlamaForQuestionAnswering"),
+        ("diffllama", "DiffLlamaForQuestionAnswering"),
         ("longformer", "LongformerForQuestionAnswering"),
         ("luke", "LukeForQuestionAnswering"),
         ("lxmert", "LxmertForQuestionAnswering"),
@@ -1136,6 +1140,7 @@
         ("layoutlmv3", "LayoutLMv3ForTokenClassification"),
         ("lilt", "LiltForTokenClassification"),
         ("llama", "LlamaForTokenClassification"),
+        ("diffllama", "DiffLlamaForTokenClassification"),
         ("longformer", "LongformerForTokenClassification"),
         ("luke", "LukeForTokenClassification"),
         ("markuplm", "MarkupLMForTokenClassification"),

diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py
@@ -257,6 +257,12 @@
                     "LlamaTokenizerFast" if is_tokenizers_available() else None,
                 ),
             ),
+            (
+                "diffllama",
+                (
+                    "LlamaTokenizer" if is_sentencepiece_available() else None,
+                    "LlamaTokenizerFast" if is_tokenizers_available() else None,
+                ),
             ("llava", ("LlamaTokenizer", "LlamaTokenizerFast" if is_tokenizers_available() else None)),
             ("llava-onevision", ("LlamaTokenizer", "LlamaTokenizerFast" if is_tokenizers_available() else None)),
             ("llava_next", ("LlamaTokenizer", "LlamaTokenizerFast" if is_tokenizers_available() else None)),

diff --git a/src/transformers/models/diffllama/__init__.py b/src/transformers/models/diffllama/__init__.py
@@ -0,0 +1,63 @@
+# Copyright 2024 EleutherAI and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+
+from ...utils import (
+    OptionalDependencyNotAvailable,
+    _LazyModule,
+    is_torch_available,
+)
+
+
+_import_structure = {
+    "configuration_diffllama": ["DiffLlamaConfig"],
+}
+
+try:
+    if not is_torch_available():
+        raise OptionalDependencyNotAvailable()
+except OptionalDependencyNotAvailable:
+    pass
+else:
+    _import_structure["modeling_diffllama"] = [
+        "DiffLlamaForCausalLM",
+        "DiffLlamaModel",
+        "DiffLlamaPreTrainedModel",
+        "DiffLlamaForSequenceClassification",
+        "DiffLlamaForQuestionAnswering",
+        "DiffLlamaForTokenClassification",
+    ]
+
+if TYPE_CHECKING:
+    from .configuration_diffllama import DiffLlamaConfig
+
+    try:
+        if not is_torch_available():
+            raise OptionalDependencyNotAvailable()
+    except OptionalDependencyNotAvailable:
+        pass
+    else:
+        from .modeling_diffllama import (
+            DiffLlamaForCausalLM,
+            DiffLlamaForQuestionAnswering,
+            DiffLlamaForSequenceClassification,
+            DiffLlamaForTokenClassification,
+            DiffLlamaModel,
+            DiffLlamaPreTrainedModel,
+        )
+
+else:
+    import sys
+
+    sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
-Original file line number
+Diff line change
@@ Expand Up / @@ -131,6 +131,7 @@ @@
         levit,
         lilt,
         llama,
+        diffllama,
         llava,
         llava_next,
         llava_next_video,
@@ Expand Down @@