make fix-copies

huggingface · Dec 17, 2024 · b3da73d · b3da73d
1 parent 407cd3e
commit b3da73d
Show file tree

Hide file tree

Showing 4 changed files with 31 additions and 25 deletions.
diff --git a/docs/source/en/index.md b/docs/source/en/index.md
@@ -224,6 +224,7 @@ Flax), PyTorch, and/or TensorFlow.
 |                  [MobileNetV2](model_doc/mobilenet_v2)                   |       ✅        |         ❌         |      ❌      |
 |                     [MobileViT](model_doc/mobilevit)                     |       ✅        |         ✅         |      ❌      |
 |                   [MobileViTV2](model_doc/mobilevitv2)                   |       ✅        |         ❌         |      ❌      |
+|                     [Moonshine](model_doc/moonshine)                     |       ✅        |         ❌         |      ❌      |
 |                         [Moshi](model_doc/moshi)                         |       ✅        |         ❌         |      ❌      |
 |                         [MPNet](model_doc/mpnet)                         |       ✅        |         ✅         |      ❌      |
 |                           [MPT](model_doc/mpt)                           |       ✅        |         ❌         |      ❌      |

diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
@@ -1082,7 +1082,6 @@
     _import_structure["models.gemma"].append("GemmaTokenizerFast")
     _import_structure["models.gpt2"].append("GPT2TokenizerFast")
     _import_structure["models.gpt_neox"].append("GPTNeoXTokenizerFast")
-    _import_structure["models.moonshine"].append("MoonshineTokenizer")
     _import_structure["models.gpt_neox_japanese"].append("GPTNeoXJapaneseTokenizer")
     _import_structure["models.herbert"].append("HerbertTokenizerFast")
     _import_structure["models.layoutlm"].append("LayoutLMTokenizerFast")
@@ -2364,10 +2363,7 @@
     )
     _import_structure["models.moonshine"].extend(
         [
-            "MoonshineForCausalLM",
-            "MoonshineForQuestionAnswering",
-            "MoonshineForSequenceClassification",
-            "MoonshineForTokenClassification",
+            "MoonshineForConditionalGeneration",
             "MoonshineModel",
             "MoonshinePreTrainedModel",
         ]
@@ -3722,13 +3718,6 @@
             "WhisperPreTrainedModel",
         ]
     )
-    _import_structure["models.moonshine"].extend(
-        [
-            "MoonshineForConditionalGeneration",
-            "MoonshineModel",
-            "MoonshinePreTrainedModel",
-        ]
-    )
     _import_structure["models.x_clip"].extend(
         [
             "XCLIPModel",
@@ -6043,7 +6032,6 @@
         from .models.mbart import MBartTokenizerFast
         from .models.mbart50 import MBart50TokenizerFast
         from .models.mobilebert import MobileBertTokenizerFast
-        from .models.moonshine import MoonshineTokenizer
         from .models.mpnet import MPNetTokenizerFast
         from .models.mt5 import MT5TokenizerFast
         from .models.mvp import MvpTokenizerFast
@@ -7463,11 +7451,7 @@
             MobileViTV2PreTrainedModel,
         )
         from .models.moonshine import (
-            MoonshineForCausalLM,
-            MoonshineForConditionalGeneration,
-            MoonshineForQuestionAnswering,
-            MoonshineForSequenceClassification,
-            MoonshineForTokenClassification,
+            MoonshineForConditionalGeneration, 
             MoonshineModel,
             MoonshinePreTrainedModel,
         )

diff --git a/src/transformers/models/moonshine/configuration_moonshine.py b/src/transformers/models/moonshine/configuration_moonshine.py
@@ -43,28 +43,28 @@ class MoonshineConfig(PretrainedConfig):
             The non-linear activation function (function or string) in the decoder.
         initializer_range (`float`, *optional*, defaults to 0.02):
             The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
-        layer_norm_eps (`float`, *optional*, defaults to 1e-5):
+        layer_norm_eps (`float`, *optional*, defaults to 1e-05):
             The epsilon used by the layer normalization layers.
         decoder_start_token_id (`int`, *optional*, defaults to 1):
             Corresponds to the "<|startoftranscript|>" token, which is automatically used when no `decoder_input_ids`
             are provided to the `generate` function. It is used to guide the model`s generation process depending on
             the task.
         use_cache (`bool`, *optional*, defaults to `True`):
             Whether or not the model should return the last key/values attentions (not used by all models).
+        rope_theta (`float`, *optional*, defaults to 10000.0):
+            The base period of the RoPE embeddings.
         is_encoder_decoder (`bool`, *optional*, defaults to `True`):
             Whether the model is used as an encoder/decoder or not.
         min_rotary_ndims (`int`, *optional*, defaults to 32):
             The minimum number of dimensions of the RoPE.
-        rope_theta (`float`, *optional*, defaults to 10000.0):
-            The base period of the RoPE embeddings.
-        ff_mult (`int`, *optional*, defaults to 4):
-            Factor by which to scale the intermediate size.
         attention_bias (`bool`, *optional*, defaults to `False`):
             Whether to use a bias in the query, key, value and output projection layers during self-attention.
         attention_dropout (`float`, *optional*, defaults to 0.0):
             The dropout ratio for the attention probabilities.
         qk_layernorm (`bool`, *optional*, defaults to `False`):
             Whether or not to normalize the Queries and Keys after projecting the hidden states.
+        ff_mult (`int`, *optional*, defaults to 4):
+            Factor by which to scale the intermediate size.
         bos_token_id (`int`, *optional*, defaults to 1):
             Denotes beginning of sequences token id.
         eos_token_id (`int`, *optional*, defaults to 2):
@@ -81,10 +81,10 @@ class MoonshineConfig(PretrainedConfig):
             actual percentage of masked vectors. This is only relevant if `apply_spec_augment == True`.
         mask_time_length (`int`, *optional*, defaults to 10):
             Length of vector span along the time axis.
-        mask_time_min_masks (`int`, *optional*, defaults to 2),:
             The minimum number of masks of length `mask_feature_length` generated along the time axis, each time step,
             irrespectively of `mask_feature_prob`. Only relevant if ''mask_time_prob*len(time_axis)/mask_time_length <
             mask_time_min_masks''
+        mask_time_min_masks (`<fill_type>`, *optional*, defaults to 2): <fill_docstring>
         mask_feature_prob (`float`, *optional*, defaults to 0.0):
             Percentage (between 0 and 1) of all feature vectors along the feature axis which will be masked. The
             masking procecure generates `mask_feature_prob*len(feature_axis)/mask_time_length` independent masks over
@@ -94,10 +94,10 @@ class MoonshineConfig(PretrainedConfig):
             True`.
         mask_feature_length (`int`, *optional*, defaults to 10):
             Length of vector span along the feature axis.
-        mask_feature_min_masks (`int`, *optional*, defaults to 0),:
             The minimum number of masks of length `mask_feature_length` generated along the feature axis, each time
             step, irrespectively of `mask_feature_prob`. Only relevant if
             `mask_feature_prob*len(feature_axis)/mask_feature_length < mask_feature_min_masks`.
+        mask_feature_min_masks (`<fill_type>`, *optional*, defaults to 0): <fill_docstring>
 
     Example:
 

diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py
@@ -6289,6 +6289,27 @@ def __init__(self, *args, **kwargs):
         requires_backends(self, ["torch"])
 
 
+class MoonshineForConditionalGeneration(metaclass=DummyObject):
+    _backends = ["torch"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch"])
+
+
+class MoonshineModel(metaclass=DummyObject):
+    _backends = ["torch"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch"])
+
+
+class MoonshinePreTrainedModel(metaclass=DummyObject):
+    _backends = ["torch"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch"])
+
+
 class MoshiForCausalLM(metaclass=DummyObject):
     _backends = ["torch"]