Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate legacy_cache_layout parameter in hf_hub_download #2317

Merged
merged 7 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def get_version() -> str:
+ [
"jedi",
"Jinja2",
"pytest",
"pytest>=8.1.1",
"pytest-cov",
"pytest-env",
"pytest-xdist",
Expand Down
12 changes: 12 additions & 0 deletions src/huggingface_hub/file_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
tqdm,
validate_hf_hub_args,
)
from .utils._deprecation import _deprecate_arguments, _deprecate_method
from .utils._runtime import _PY_VERSION # noqa: F401 # for backward compatibility
from .utils._typing import HTTP_METHOD_T
from .utils.insecure_hashlib import sha256
Expand Down Expand Up @@ -273,6 +274,7 @@ def hf_hub_url(
return url


@_deprecate_method(version="0.26", message="Use `hf_hub_download` to benefit from the new cache layout.")
def url_to_filename(url: str, etag: Optional[str] = None) -> str:
"""Generate a local filename from a url.

Expand Down Expand Up @@ -304,6 +306,7 @@ def url_to_filename(url: str, etag: Optional[str] = None) -> str:
return filename


@_deprecate_method(version="0.26", message="Use `hf_hub_url` instead.")
def filename_to_url(
filename,
cache_dir: Optional[str] = None,
Expand Down Expand Up @@ -580,6 +583,7 @@ def http_get(


@validate_hf_hub_args
@_deprecate_method(version="0.26", message="Use `hf_hub_download` instead.")
def cached_download(
url: str,
*,
Expand Down Expand Up @@ -994,6 +998,14 @@ def _check_disk_space(expected_size: int, target_dir: Union[str, Path]) -> None:
pass


@_deprecate_arguments(
version="0.26.0",
deprecated_args=["legacy_cache_layout"],
custom_message=(
"Legacy cache layout has been deprecated since August 2022 and will soon be removed. "
"See https://huggingface.co/docs/huggingface_hub/guides/manage-cache for more details."
),
)
@validate_hf_hub_args
def hf_hub_download(
repo_id: str,
Expand Down
29 changes: 29 additions & 0 deletions tests/test_file_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,11 +190,15 @@ def test_download_regular_file_from_private_renamed_repo(self, repo_url: RepoUrl

@with_production_testing
class CachedDownloadTests(unittest.TestCase):
@expect_deprecation("cached_download")
@expect_deprecation("url_to_filename")
def test_bogus_url(self):
url = "https://bogus"
with self.assertRaisesRegex(ValueError, "Connection error"):
_ = cached_download(url, legacy_cache_layout=True)

@expect_deprecation("cached_download")
@expect_deprecation("url_to_filename")
def test_no_connection(self):
invalid_url = hf_hub_url(
DUMMY_MODEL_ID,
Expand All @@ -211,6 +215,7 @@ def test_no_connection(self):
_ = cached_download(valid_url, force_download=True, legacy_cache_layout=True)
self.assertIsNotNone(cached_download(valid_url, legacy_cache_layout=True))

@expect_deprecation("cached_download")
def test_file_not_found_on_repo(self):
# Valid revision (None) but missing file on repo.
url = hf_hub_url(DUMMY_MODEL_ID, filename="missing.bin")
Expand Down Expand Up @@ -243,6 +248,8 @@ def test_file_not_found_locally_and_network_disabled(self):
local_files_only=True,
)

@expect_deprecation("cached_download")
@expect_deprecation("url_to_filename")
def test_file_not_found_locally_and_network_disabled_legacy(self):
# Valid file but missing locally and network is disabled.
url = hf_hub_url(DUMMY_MODEL_ID, filename=CONFIG_NAME)
Expand Down Expand Up @@ -289,6 +296,7 @@ def test_file_cached_and_read_only_access(self):
# Set permission back for cleanup
_recursive_chmod(tmpdir, 0o777)

@expect_deprecation("cached_download")
def test_revision_not_found(self):
# Valid file but missing revision
url = hf_hub_url(
Expand All @@ -302,6 +310,7 @@ def test_revision_not_found(self):
):
_ = cached_download(url, legacy_cache_layout=True)

@expect_deprecation("cached_download")
def test_repo_not_found(self):
# Invalid model file.
url = hf_hub_url("bert-base", filename="pytorch_model.bin")
Expand All @@ -311,12 +320,18 @@ def test_repo_not_found(self):
):
_ = cached_download(url, legacy_cache_layout=True)

@expect_deprecation("cached_download")
@expect_deprecation("url_to_filename")
@expect_deprecation("filename_to_url")
def test_standard_object(self):
url = hf_hub_url(DUMMY_MODEL_ID, filename=CONFIG_NAME, revision=REVISION_ID_DEFAULT)
filepath = cached_download(url, force_download=True, legacy_cache_layout=True)
metadata = filename_to_url(filepath, legacy_cache_layout=True)
self.assertEqual(metadata, (url, f'"{DUMMY_MODEL_ID_PINNED_SHA1}"'))

@expect_deprecation("cached_download")
@expect_deprecation("url_to_filename")
@expect_deprecation("filename_to_url")
def test_standard_object_rev(self):
# Same object, but different revision
url = hf_hub_url(
Expand All @@ -329,12 +344,18 @@ def test_standard_object_rev(self):
self.assertNotEqual(metadata[1], f'"{DUMMY_MODEL_ID_PINNED_SHA1}"')
# Caution: check that the etag is *not* equal to the one from `test_standard_object`

@expect_deprecation("cached_download")
@expect_deprecation("url_to_filename")
@expect_deprecation("filename_to_url")
def test_lfs_object(self):
url = hf_hub_url(DUMMY_MODEL_ID, filename=PYTORCH_WEIGHTS_NAME, revision=REVISION_ID_DEFAULT)
filepath = cached_download(url, force_download=True, legacy_cache_layout=True)
metadata = filename_to_url(filepath, legacy_cache_layout=True)
self.assertEqual(metadata, (url, f'"{DUMMY_MODEL_ID_PINNED_SHA256}"'))

@expect_deprecation("cached_download")
@expect_deprecation("url_to_filename")
@expect_deprecation("filename_to_url")
def test_dataset_standard_object_rev(self):
url = hf_hub_url(
DATASET_ID,
Expand All @@ -347,6 +368,9 @@ def test_dataset_standard_object_rev(self):
metadata = filename_to_url(filepath, legacy_cache_layout=True)
self.assertNotEqual(metadata[1], f'"{DUMMY_MODEL_ID_PINNED_SHA1}"')

@expect_deprecation("cached_download")
@expect_deprecation("url_to_filename")
@expect_deprecation("filename_to_url")
def test_dataset_lfs_object(self):
url = hf_hub_url(
DATASET_ID,
Expand Down Expand Up @@ -523,6 +547,10 @@ def test_hf_hub_url_with_endpoint(self):
"https://hf-ci.co/julien-c/dummy-unknown/resolve/main/config.json",
)

@expect_deprecation("hf_hub_download")
@expect_deprecation("cached_download")
@expect_deprecation("filename_to_url")
@expect_deprecation("url_to_filename")
def test_hf_hub_download_legacy(self):
filepath = hf_hub_download(
DUMMY_MODEL_ID,
Expand Down Expand Up @@ -715,6 +743,7 @@ def _mocked_hf_file_metadata(*args, **kwargs):
hf_hub_download(DUMMY_MODEL_ID, filename="pytorch_model.bin", cache_dir=cache_dir)

@expect_deprecation("cached_download")
@expect_deprecation("url_to_filename")
def test_cached_download_from_github(self):
"""Regression test for #1449.

Expand Down
Loading