From 4208c428f6a42e6f58ab44014e696bdf49def855 Mon Sep 17 00:00:00 2001 From: Arthur <48595927+ArthurZucker@users.noreply.github.com> Date: Tue, 7 May 2024 13:56:56 +0200 Subject: [PATCH] Separate tokenizer tests (#30675) * nit * better filter * pipeline tests should only be models/xxx not anything else * nit to better see filtering of the files that are passed to test torch * oups --- .circleci/create_circleci_config.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py index f03506af52eb23..553f28eaa17e6d 100644 --- a/.circleci/create_circleci_config.py +++ b/.circleci/create_circleci_config.py @@ -132,7 +132,7 @@ def to_dict(self): if tests is None: folder = os.environ["test_preparation_dir"] test_file = os.path.join(folder, "filtered_test_list.txt") - if os.path.exists(test_file): + if os.path.exists(test_file): # We take this job's tests from the filtered test_list.txt with open(test_file) as f: tests = f.read().split(" ") @@ -144,9 +144,20 @@ def to_dict(self): if test.endswith(".py"): expanded_tests.append(test) elif test == "tests/models": - expanded_tests.extend(glob.glob("tests/models/**/test*.py", recursive=True)) + if "tokenization" in self.name: + expanded_tests.extend(glob.glob("tests/models/**/test_tokenization*.py", recursive=True)) + elif self.name in ["flax","torch","tf"]: + name = self.name if self.name != "torch" else "" + if self.name == "torch": + all_tests = glob.glob(f"tests/models/**/test_modeling_{name}*.py", recursive=True) + filtered = [k for k in all_tests if ("_tf_") not in k and "_flax_" not in k] + expanded_tests.extend(filtered) + else: + expanded_tests.extend(glob.glob(f"tests/models/**/test_modeling_{name}*.py", recursive=True)) + else: + expanded_tests.extend(glob.glob("tests/models/**/test_modeling*.py", recursive=True)) elif test == "tests/pipelines": - expanded_tests.extend([os.path.join(test, x) for x in os.listdir(test)]) + expanded_tests.extend(glob.glob("tests/models/**/test_modeling*.py", recursive=True)) else: expanded_tests.append(test) tests = " ".join(expanded_tests) @@ -234,6 +245,14 @@ def job_name(self): pytest_num_workers=16 ) +tokenization_job = CircleCIJob( + "tokenization", + docker_image=[{"image": "huggingface/transformers-torch-light"}], + install_steps=["uv venv && uv pip install ."], + parallelism=6, + pytest_num_workers=16 +) + tf_job = CircleCIJob( "tf", @@ -404,6 +423,7 @@ def job_name(self): hub_job, onnx_job, exotic_models_job, + tokenization_job ] EXAMPLES_TESTS = [ examples_torch_job,