Skip to content

Commit

Permalink
Fix flaky Hub CI (test_trainer.py) (#35062)
Browse files Browse the repository at this point in the history
* fix

* Update src/transformers/testing_utils.py

Co-authored-by: Lucain <[email protected]>

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* check

* check

* check

* check

* check

* check

* Update src/transformers/testing_utils.py

Co-authored-by: Lucain <[email protected]>

* Update src/transformers/testing_utils.py

Co-authored-by: Lucain <[email protected]>

* check

* check

* check

* Final space

* Final adjustment

---------

Co-authored-by: ydshieh <[email protected]>
Co-authored-by: Lucain <[email protected]>
  • Loading branch information
3 people authored Dec 5, 2024
1 parent a928d9c commit b0a51e5
Show file tree
Hide file tree
Showing 11 changed files with 701 additions and 953 deletions.
34 changes: 34 additions & 0 deletions src/transformers/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@
from unittest import mock
from unittest.mock import patch

import huggingface_hub.utils
import urllib3
from huggingface_hub import delete_repo

from transformers import logging as transformers_logging

Expand Down Expand Up @@ -1570,6 +1572,38 @@ def LoggingLevel(level):
transformers_logging.set_verbosity(orig_level)


class TemporaryHubRepo:
"""Create a temporary Hub repository and return its `RepoUrl` object. This is similar to
`tempfile.TemporaryDirectory` and can be used as a context manager. For example:
with TemporaryHubRepo(token=self._token) as temp_repo:
...
Upon exiting the context, the repository and everything contained in it are removed.
Example:
```python
with TemporaryHubRepo(token=self._token) as temp_repo:
model.push_to_hub(tmp_repo.repo_id, token=self._token)
```
"""

def __init__(self, namespace: Optional[str] = None, token: Optional[str] = None) -> None:
self.token = token
with tempfile.TemporaryDirectory() as tmp_dir:
repo_id = Path(tmp_dir).name
if namespace is not None:
repo_id = f"{namespace}/{repo_id}"
self.repo_url = huggingface_hub.create_repo(repo_id, token=self.token)

def __enter__(self):
return self.repo_url

def __exit__(self, exc, value, tb):
delete_repo(repo_id=self.repo_url.repo_id, token=self.token, missing_ok=True)


@contextlib.contextmanager
# adapted from https://stackoverflow.com/a/64789046/9201239
def ExtendSysPath(path: Union[str, os.PathLike]) -> Iterator[None]:
Expand Down
177 changes: 72 additions & 105 deletions tests/generation/test_configuration_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@
import tempfile
import unittest
import warnings
from pathlib import Path

from huggingface_hub import HfFolder, create_pull_request, create_repo, delete_repo
from huggingface_hub import HfFolder, create_pull_request
from parameterized import parameterized

from transformers import AutoConfig, GenerationConfig, WatermarkingConfig, is_torch_available
Expand Down Expand Up @@ -57,7 +56,7 @@
UnbatchedClassifierFreeGuidanceLogitsProcessor,
WatermarkLogitsProcessor,
)
from transformers.testing_utils import TOKEN, USER, is_staging_test, torch_device
from transformers.testing_utils import TOKEN, TemporaryHubRepo, is_staging_test, torch_device


class GenerationConfigTest(unittest.TestCase):
Expand Down Expand Up @@ -679,114 +678,82 @@ def setUpClass(cls):
cls._token = TOKEN
HfFolder.save_token(TOKEN)

@staticmethod
def _try_delete_repo(repo_id, token):
try:
# Reset repo
delete_repo(repo_id=repo_id, token=token)
except: # noqa E722
pass

def test_push_to_hub(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-generation-config-{Path(tmp_dir).name}"
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
config.push_to_hub(tmp_repo, token=self._token)

new_config = GenerationConfig.from_pretrained(tmp_repo)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
with TemporaryHubRepo(token=self._token) as tmp_repo:
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
config.push_to_hub(tmp_repo.repo_id, token=self._token)

new_config = GenerationConfig.from_pretrained(tmp_repo.repo_id)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))

def test_push_to_hub_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-generation-config-{Path(tmp_dir).name}"
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
# Push to hub via save_pretrained
config.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)

new_config = GenerationConfig.from_pretrained(tmp_repo)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
with TemporaryHubRepo(token=self._token) as tmp_repo:
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
config.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token)

new_config = GenerationConfig.from_pretrained(tmp_repo.repo_id)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))

def test_push_to_hub_in_organization(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-generation-config-org-{Path(tmp_dir).name}"
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
config.push_to_hub(tmp_repo, token=self._token)

new_config = GenerationConfig.from_pretrained(tmp_repo)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
config.push_to_hub(tmp_repo.repo_id, token=self._token)

new_config = GenerationConfig.from_pretrained(tmp_repo.repo_id)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))

def test_push_to_hub_in_organization_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-generation-config-org-{Path(tmp_dir).name}"
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
# Push to hub via save_pretrained
config.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)

new_config = GenerationConfig.from_pretrained(tmp_repo)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
config.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token)

new_config = GenerationConfig.from_pretrained(tmp_repo.repo_id)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))

def test_push_to_hub_on_pr_revision(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
# create a repo and a PR
repo_id = f"{USER}/test-generation-config-{Path(tmp_dir).name}"
create_repo(repo_id=repo_id, token=self._token)
pr = create_pull_request(repo_id=repo_id, title="Test PR", token=self._token)
revision = f"refs/pr/{pr.num}"

# push to PR ref
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
config.push_to_hub(repo_id, token=self._token, revision=revision)

# load from PR ref
new_config = GenerationConfig.from_pretrained(repo_id, revision=revision)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=repo_id, token=self._token)
with TemporaryHubRepo(token=self._token) as tmp_repo:
# create a PR
pr = create_pull_request(repo_id=tmp_repo.repo_id, title="Test PR", token=self._token)
revision = f"refs/pr/{pr.num}"

# push to PR ref
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
config.push_to_hub(tmp_repo.repo_id, token=self._token, revision=revision)

# load from PR ref
new_config = GenerationConfig.from_pretrained(tmp_repo.repo_id, revision=revision)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
92 changes: 34 additions & 58 deletions tests/models/auto/test_processor_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from pathlib import Path
from shutil import copyfile

from huggingface_hub import HfFolder, Repository, create_repo, delete_repo
from huggingface_hub import HfFolder, Repository

import transformers
from transformers import (
Expand All @@ -39,7 +39,7 @@
Wav2Vec2FeatureExtractor,
Wav2Vec2Processor,
)
from transformers.testing_utils import TOKEN, USER, get_tests_dir, is_staging_test
from transformers.testing_utils import TOKEN, TemporaryHubRepo, get_tests_dir, is_staging_test
from transformers.tokenization_utils import TOKENIZER_CONFIG_FILE
from transformers.utils import FEATURE_EXTRACTOR_NAME, PROCESSOR_NAME, is_tokenizers_available

Expand Down Expand Up @@ -372,72 +372,52 @@ def setUpClass(cls):
cls._token = TOKEN
HfFolder.save_token(TOKEN)

@staticmethod
def _try_delete_repo(repo_id, token):
try:
# Reset repo
delete_repo(repo_id=repo_id, token=token)
except: # noqa E722
pass

def test_push_to_hub_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-processor-{Path(tmp_dir).name}"
processor = Wav2Vec2Processor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR)
# Push to hub via save_pretrained
processor.save_pretrained(tmp_repo, repo_id=tmp_repo, push_to_hub=True, token=self._token)

new_processor = Wav2Vec2Processor.from_pretrained(tmp_repo)
for k, v in processor.feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_processor.feature_extractor, k))
self.assertDictEqual(new_processor.tokenizer.get_vocab(), processor.tokenizer.get_vocab())
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
with TemporaryHubRepo(token=self._token) as tmp_repo:
processor = Wav2Vec2Processor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
processor.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token)

def test_push_to_hub_in_organization_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-processor-org-{Path(tmp_dir).name}"
processor = Wav2Vec2Processor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR)
new_processor = Wav2Vec2Processor.from_pretrained(tmp_repo.repo_id)
for k, v in processor.feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_processor.feature_extractor, k))
self.assertDictEqual(new_processor.tokenizer.get_vocab(), processor.tokenizer.get_vocab())

# Push to hub via save_pretrained
def test_push_to_hub_in_organization_via_save_pretrained(self):
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
processor = Wav2Vec2Processor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
processor.save_pretrained(
tmp_dir,
repo_id=tmp_repo,
repo_id=tmp_repo.repo_id,
push_to_hub=True,
token=self._token,
)

new_processor = Wav2Vec2Processor.from_pretrained(tmp_repo)
for k, v in processor.feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_processor.feature_extractor, k))
self.assertDictEqual(new_processor.tokenizer.get_vocab(), processor.tokenizer.get_vocab())
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_processor = Wav2Vec2Processor.from_pretrained(tmp_repo.repo_id)
for k, v in processor.feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_processor.feature_extractor, k))
self.assertDictEqual(new_processor.tokenizer.get_vocab(), processor.tokenizer.get_vocab())

def test_push_to_hub_dynamic_processor(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-dynamic-processor-{Path(tmp_dir).name}"
with TemporaryHubRepo(token=self._token) as tmp_repo:
CustomFeatureExtractor.register_for_auto_class()
CustomTokenizer.register_for_auto_class()
CustomProcessor.register_for_auto_class()

CustomFeatureExtractor.register_for_auto_class()
CustomTokenizer.register_for_auto_class()
CustomProcessor.register_for_auto_class()

feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR)
feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR)

with tempfile.TemporaryDirectory() as tmp_dir:
vocab_file = os.path.join(tmp_dir, "vocab.txt")
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
tokenizer = CustomTokenizer(vocab_file)
with tempfile.TemporaryDirectory() as tmp_dir:
vocab_file = os.path.join(tmp_dir, "vocab.txt")
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
tokenizer = CustomTokenizer(vocab_file)

processor = CustomProcessor(feature_extractor, tokenizer)
processor = CustomProcessor(feature_extractor, tokenizer)

create_repo(tmp_repo, token=self._token)
with tempfile.TemporaryDirectory() as tmp_dir:
repo = Repository(tmp_dir, clone_from=tmp_repo, token=self._token)
processor.save_pretrained(tmp_dir)

Expand Down Expand Up @@ -468,10 +448,6 @@ def test_push_to_hub_dynamic_processor(self):

repo.push_to_hub()

new_processor = AutoProcessor.from_pretrained(tmp_repo, trust_remote_code=True)
new_processor = AutoProcessor.from_pretrained(tmp_repo.repo_id, trust_remote_code=True)
# Can't make an isinstance check because the new_processor is from the CustomProcessor class of a dynamic module
self.assertEqual(new_processor.__class__.__name__, "CustomProcessor")

finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
Loading

0 comments on commit b0a51e5

Please sign in to comment.