Refactor, cleanup, fix style

huggingface · Nov 14, 2024 · 93b1d19 · 93b1d19
1 parent 8c8c882
commit 93b1d19
Show file tree

Hide file tree

Showing 12 changed files with 13 additions and 737 deletions.
diff --git a/docs/source/en/index.md b/docs/source/en/index.md
@@ -152,7 +152,7 @@ Flax), PyTorch, and/or TensorFlow.
 |                           [GIT](model_doc/git)                           |       ✅        |         ❌         |      ❌      |
 |                           [GLM](model_doc/glm)                           |       ✅        |         ❌         |      ❌      |
 |                          [GLPN](model_doc/glpn)                          |       ✅        |         ❌         |      ❌      |
-|                      [GOT-OCR2](model_doc/got-ocr2)                      |       ✅        |         ❌         |      ❌      |
+|                      [GOT-OCR2](model_doc/got_ocr2)                      |       ✅        |         ❌         |      ❌      |
 |                       [GPT Neo](model_doc/gpt_neo)                       |       ✅        |         ❌         |      ✅      |
 |                      [GPT NeoX](model_doc/gpt_neox)                      |       ✅        |         ❌         |      ❌      |
 |             [GPT NeoX Japanese](model_doc/gpt_neox_japanese)             |       ✅        |         ❌         |      ❌      |

diff --git a/docs/source/en/model_doc/got-ocr2.md b/docs/source/en/model_doc/got-ocr2.md
@@ -37,14 +37,9 @@ The original code can be found [here](<INSERT LINK TO GITHUB REPO HERE>).
 
 [[autodoc]] GotOcr2Config
 
-## GotOcr2Tokenizer
+## Qwen2VLProcessor
 
-[[autodoc]] GotOcr2Tokenizer
-    - save_vocabulary
-
-## GotOcr2TokenizerFast
-
-[[autodoc]] GotOcr2TokenizerFast
+[[autodoc]] Qwen2VLProcessor
 
 ## GotOcr2Model
 

diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
@@ -685,8 +685,8 @@
     ],
     "models.got_ocr2": [
         "GotOcr2Config",
+        "GotOcr2Processor",
         "GotOcr2VisionConfig",
-        "GotOcr2Tokenizer",
     ],
     "models.qwen2_audio": [
         "Qwen2AudioConfig",
@@ -1107,7 +1107,6 @@
     _import_structure["models.openai"].append("OpenAIGPTTokenizerFast")
     _import_structure["models.pegasus"].append("PegasusTokenizerFast")
     _import_structure["models.qwen2"].append("Qwen2TokenizerFast")
-    _import_structure["models.got_ocr2"].append("GotOcr2TokenizerFast")
     _import_structure["models.reformer"].append("ReformerTokenizerFast")
     _import_structure["models.rembert"].append("RemBertTokenizerFast")
     _import_structure["models.roberta"].append("RobertaTokenizerFast")
@@ -5335,7 +5334,7 @@
     )
     from .models.glm import GlmConfig
     from .models.glpn import GLPNConfig
-    from .models.got_ocr2 import GotOcr2Config, GotOcr2Tokenizer, GotOcr2VisionConfig
+    from .models.got_ocr2 import GotOcr2Config, GotOcr2Processor, GotOcr2VisionConfig
     from .models.gpt2 import (
         GPT2Config,
         GPT2Tokenizer,
@@ -6012,7 +6011,6 @@
         from .models.fnet import FNetTokenizerFast
         from .models.funnel import FunnelTokenizerFast
         from .models.gemma import GemmaTokenizerFast
-        from .models.got_ocr2 import GotOcr2TokenizerFast
         from .models.gpt2 import GPT2TokenizerFast
         from .models.gpt_neox import GPTNeoXTokenizerFast
         from .models.gpt_neox_japanese import GPTNeoXJapaneseTokenizer

diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py
@@ -116,7 +116,7 @@
         ("git", "GitConfig"),
         ("glm", "GlmConfig"),
         ("glpn", "GLPNConfig"),
-        ("got-ocr2", "GotOcr2Config"),
+        ("got_ocr2", "GotOcr2Config"),
         ("gpt-sw3", "GPT2Config"),
         ("gpt2", "GPT2Config"),
         ("gpt_bigcode", "GPTBigCodeConfig"),
@@ -420,7 +420,7 @@
         ("git", "GIT"),
         ("glm", "GLM"),
         ("glpn", "GLPN"),
-        ("got-ocr2", "GOT-OCR2"),
+        ("got_ocr2", "GOT-OCR2"),
         ("gpt-sw3", "GPT-Sw3"),
         ("gpt2", "OpenAI GPT-2"),
         ("gpt_bigcode", "GPTBigCode"),

diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py
@@ -113,7 +113,7 @@
         ("git", "GitModel"),
         ("glm", "GlmModel"),
         ("glpn", "GLPNModel"),
-        ("got-ocr2", "GotOcr2Model"),
+        ("got_ocr2", "GotOcr2Model"),
         ("gpt-sw3", "GPT2Model"),
         ("gpt2", "GPT2Model"),
         ("gpt_bigcode", "GPTBigCodeModel"),
@@ -489,7 +489,7 @@
         ("gemma2", "Gemma2ForCausalLM"),
         ("git", "GitForCausalLM"),
         ("glm", "GlmForCausalLM"),
-        ("got-ocr2", "GotOcr2ForConditionalGeneration"),
+        ("got_ocr2", "GotOcr2ForConditionalGeneration"),
         ("gpt-sw3", "GPT2LMHeadModel"),
         ("gpt2", "GPT2LMHeadModel"),
         ("gpt_bigcode", "GPTBigCodeForCausalLM"),
@@ -770,6 +770,7 @@
         ("chameleon", "ChameleonForConditionalGeneration"),
         ("fuyu", "FuyuForCausalLM"),
         ("git", "GitForCausalLM"),
+        ("got_ocr2", "GotOcr2ForConditionalGeneration"),
         ("idefics", "IdeficsForVisionText2Text"),
         ("idefics2", "Idefics2ForConditionalGeneration"),
         ("idefics3", "Idefics3ForConditionalGeneration"),

diff --git a/src/transformers/models/auto/processing_auto.py b/src/transformers/models/auto/processing_auto.py
@@ -60,6 +60,7 @@
         ("flava", "FlavaProcessor"),
         ("fuyu", "FuyuProcessor"),
         ("git", "GitProcessor"),
+        ("got_ocr2", "GotOcr2Processor"),
         ("grounding-dino", "GroundingDinoProcessor"),
         ("groupvit", "CLIPProcessor"),
         ("hubert", "Wav2Vec2Processor"),

diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py
@@ -205,13 +205,6 @@
             ),
             ("git", ("BertTokenizer", "BertTokenizerFast" if is_tokenizers_available() else None)),
             ("glm", (None, "PreTrainedTokenizerFast" if is_tokenizers_available() else None)),
-            (
-                "got-ocr2",
-                (
-                    "GotOcr2Tokenizer",
-                    "GotOcr2TokenizerFast" if is_tokenizers_available() else None,
-                ),
-            ),
             ("gpt-sw3", ("GPTSw3Tokenizer" if is_sentencepiece_available() else None, None)),
             ("gpt2", ("GPT2Tokenizer", "GPT2TokenizerFast" if is_tokenizers_available() else None)),
             ("gpt_bigcode", ("GPT2Tokenizer", "GPT2TokenizerFast" if is_tokenizers_available() else None)),

diff --git a/src/transformers/models/got_ocr2/__init__.py b/src/transformers/models/got_ocr2/__init__.py
@@ -23,17 +23,9 @@
 
 _import_structure = {
     "configuration_got_ocr2": ["GotOcr2Config", "GotOcr2VisionConfig"],
-    "tokenization_got_ocr2": ["GotOcr2Tokenizer"],
+    "processing_got_ocr2": ["GotOcr2Processor"],
 }
 
-try:
-    if not is_tokenizers_available():
-        raise OptionalDependencyNotAvailable()
-except OptionalDependencyNotAvailable:
-    pass
-else:
-    _import_structure["tokenization_got_ocr2_fast"] = ["GotOcr2TokenizerFast"]
-
 try:
     if not is_torch_available():
         raise OptionalDependencyNotAvailable()
@@ -49,15 +41,7 @@
 
 if TYPE_CHECKING:
     from .configuration_got_ocr2 import GotOcr2Config, GotOcr2VisionConfig
-    from .tokenization_got_ocr2 import GotOcr2Tokenizer
-
-    try:
-        if not is_tokenizers_available():
-            raise OptionalDependencyNotAvailable()
-    except OptionalDependencyNotAvailable:
-        pass
-    else:
-        from .tokenization_got_ocr2_fast import GotOcr2TokenizerFast
+    from .processing_got_ocr2 import GotOcr2Processor
 
     try:
         if not is_torch_available():