From 20d74e51db597471e650ae400c3c4875538b2cb6 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 12 Mar 2024 10:03:11 +0000 Subject: [PATCH] Putting back the imports. --- bindings/python/Makefile | 2 +- bindings/python/pyproject.toml | 2 ++ bindings/python/tests/bindings/test_encoding.py | 2 ++ bindings/python/tests/bindings/test_models.py | 1 + bindings/python/tests/bindings/test_processors.py | 2 ++ bindings/python/tests/bindings/test_tokenizer.py | 2 +- bindings/python/tests/bindings/test_trainers.py | 2 ++ bindings/python/tests/documentation/test_pipeline.py | 1 + bindings/python/tests/documentation/test_quicktour.py | 1 + .../tests/documentation/test_tutorial_train_from_iterators.py | 2 ++ bindings/python/tests/implementations/test_bert_wordpiece.py | 2 +- bindings/python/tests/implementations/test_byte_level_bpe.py | 2 +- bindings/python/tests/implementations/test_char_bpe.py | 2 +- bindings/python/tests/test_serialization.py | 1 + 14 files changed, 19 insertions(+), 5 deletions(-) diff --git a/bindings/python/Makefile b/bindings/python/Makefile index f07cff584..8eeaf83a1 100644 --- a/bindings/python/Makefile +++ b/bindings/python/Makefile @@ -9,7 +9,7 @@ check_dirs := examples py_src/tokenizers tests style: python stub.py ruff check $(check_dirs) --fix - ruff format $(check_dirs) + ruff format $(check_dirs)t # Check the source code is formatted correctly check-style: diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml index 8702fc37b..5cdf090fa 100644 --- a/bindings/python/pyproject.toml +++ b/bindings/python/pyproject.toml @@ -67,4 +67,6 @@ lint.ignore = [ "E721", # Import order "E402", + # Fixtures unused import + "F811", ] diff --git a/bindings/python/tests/bindings/test_encoding.py b/bindings/python/tests/bindings/test_encoding.py index e82fc6dda..80b8cc2bb 100644 --- a/bindings/python/tests/bindings/test_encoding.py +++ b/bindings/python/tests/bindings/test_encoding.py @@ -2,6 +2,8 @@ from tokenizers import BertWordPieceTokenizer +from ..utils import bert_files, data_dir + class TestEncoding: @pytest.fixture(scope="class") diff --git a/bindings/python/tests/bindings/test_models.py b/bindings/python/tests/bindings/test_models.py index 919b5043b..c6a50ce86 100644 --- a/bindings/python/tests/bindings/test_models.py +++ b/bindings/python/tests/bindings/test_models.py @@ -3,6 +3,7 @@ import pytest from tokenizers.models import BPE, Model, WordLevel, WordPiece +from ..utils import bert_files, data_dir, roberta_files class TestBPE: diff --git a/bindings/python/tests/bindings/test_processors.py b/bindings/python/tests/bindings/test_processors.py index f30c93d2f..842754a69 100644 --- a/bindings/python/tests/bindings/test_processors.py +++ b/bindings/python/tests/bindings/test_processors.py @@ -15,6 +15,8 @@ TemplateProcessing, ) +from ..utils import data_dir, roberta_files + class TestBertProcessing: def test_instantiate(self): diff --git a/bindings/python/tests/bindings/test_tokenizer.py b/bindings/python/tests/bindings/test_tokenizer.py index 833d96f66..01deb7a85 100644 --- a/bindings/python/tests/bindings/test_tokenizer.py +++ b/bindings/python/tests/bindings/test_tokenizer.py @@ -9,7 +9,7 @@ from tokenizers.pre_tokenizers import ByteLevel from tokenizers.processors import RobertaProcessing -from ..utils import multiprocessing_with_parallelism +from ..utils import bert_files, data_dir, multiprocessing_with_parallelism, roberta_files class TestAddedToken: diff --git a/bindings/python/tests/bindings/test_trainers.py b/bindings/python/tests/bindings/test_trainers.py index 0f406c288..87021533c 100644 --- a/bindings/python/tests/bindings/test_trainers.py +++ b/bindings/python/tests/bindings/test_trainers.py @@ -14,6 +14,8 @@ trainers, ) +from ..utils import data_dir, train_files + class TestBpeTrainer: def test_can_modify(self): diff --git a/bindings/python/tests/documentation/test_pipeline.py b/bindings/python/tests/documentation/test_pipeline.py index ae8127be6..25300ff64 100644 --- a/bindings/python/tests/documentation/test_pipeline.py +++ b/bindings/python/tests/documentation/test_pipeline.py @@ -1,5 +1,6 @@ from tokenizers import Tokenizer +from ..utils import data_dir, doc_pipeline_bert_tokenizer, doc_wiki_tokenizer disable_printing = True original_print = print diff --git a/bindings/python/tests/documentation/test_quicktour.py b/bindings/python/tests/documentation/test_quicktour.py index 8bca8d680..a98b0c12e 100644 --- a/bindings/python/tests/documentation/test_quicktour.py +++ b/bindings/python/tests/documentation/test_quicktour.py @@ -1,4 +1,5 @@ from tokenizers import Tokenizer +from ..utils import data_dir, doc_wiki_tokenizer disable_printing = True diff --git a/bindings/python/tests/documentation/test_tutorial_train_from_iterators.py b/bindings/python/tests/documentation/test_tutorial_train_from_iterators.py index 2ba51c4be..fc9ffce19 100644 --- a/bindings/python/tests/documentation/test_tutorial_train_from_iterators.py +++ b/bindings/python/tests/documentation/test_tutorial_train_from_iterators.py @@ -5,6 +5,8 @@ import datasets import pytest +from ..utils import data_dir, train_files + class TestTrainFromIterators: @staticmethod diff --git a/bindings/python/tests/implementations/test_bert_wordpiece.py b/bindings/python/tests/implementations/test_bert_wordpiece.py index 2c47beb68..4e7c29cf5 100644 --- a/bindings/python/tests/implementations/test_bert_wordpiece.py +++ b/bindings/python/tests/implementations/test_bert_wordpiece.py @@ -1,6 +1,6 @@ from tokenizers import BertWordPieceTokenizer -from ..utils import multiprocessing_with_parallelism +from ..utils import bert_files, data_dir, multiprocessing_with_parallelism class TestBertWordPieceTokenizer: diff --git a/bindings/python/tests/implementations/test_byte_level_bpe.py b/bindings/python/tests/implementations/test_byte_level_bpe.py index d6af6e31e..441aded7a 100644 --- a/bindings/python/tests/implementations/test_byte_level_bpe.py +++ b/bindings/python/tests/implementations/test_byte_level_bpe.py @@ -1,6 +1,6 @@ from tokenizers import ByteLevelBPETokenizer -from ..utils import multiprocessing_with_parallelism +from ..utils import data_dir, multiprocessing_with_parallelism, roberta_files class TestByteLevelBPE: diff --git a/bindings/python/tests/implementations/test_char_bpe.py b/bindings/python/tests/implementations/test_char_bpe.py index b786ca850..3ce5cf9a3 100644 --- a/bindings/python/tests/implementations/test_char_bpe.py +++ b/bindings/python/tests/implementations/test_char_bpe.py @@ -1,6 +1,6 @@ from tokenizers import CharBPETokenizer -from ..utils import multiprocessing_with_parallelism +from ..utils import data_dir, multiprocessing_with_parallelism, openai_files class TestCharBPETokenizer: diff --git a/bindings/python/tests/test_serialization.py b/bindings/python/tests/test_serialization.py index d0111a88e..a56c6bb33 100644 --- a/bindings/python/tests/test_serialization.py +++ b/bindings/python/tests/test_serialization.py @@ -6,6 +6,7 @@ from huggingface_hub import HfApi, cached_download, hf_hub_url from tokenizers import Tokenizer +from .utils import albert_base, data_dir class TestSerialization: