Skip to content

Commit

Permalink
Fix finding cached files for flavor in load() (#471)
Browse files Browse the repository at this point in the history
* Fix finding cached files for flavor in load()

* Add test for missing files

* Fix implementation

* Improve test

* Improve code
  • Loading branch information
hagenw authored Dec 6, 2024
1 parent 0442f55 commit 675c485
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 13 deletions.
24 changes: 11 additions & 13 deletions audb/core/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,23 +860,21 @@ def _missing_files(
list of missing files or table IDs
"""
missing_files = []

for file in audeer.progress_bar(
files,
desc=f"Missing {files_type}",
disable=not verbose,
):
def is_cached(file):
if files_type == "table":
if not os.path.exists(
os.path.join(db_root, f"db.{file}.csv")
) and not os.path.exists(os.path.join(db_root, f"db.{file}.parquet")):
missing_files.append(file)
path1 = os.path.join(db_root, f"db.{file}.csv")
path2 = os.path.join(db_root, f"db.{file}.parquet")
return os.path.exists(path1) or os.path.exists(path2)
elif files_type == "media" and flavor.format is not None:
# https://github.com/audeering/audb/issues/324
cached_file = audeer.replace_file_extension(file, flavor.format)
return os.path.exists(os.path.join(db_root, cached_file))
else:
if not os.path.exists(os.path.join(db_root, file)):
missing_files.append(file)
return os.path.exists(os.path.join(db_root, file))

return missing_files
pbar = audeer.progress_bar(files, desc=f"Missing {files_type}", disable=not verbose)
return [file for file in pbar if not is_cached(file)]


def _remove_media(
Expand Down
15 changes: 15 additions & 0 deletions tests/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,21 @@ def test_load_from_cache(dbs):
for file in db.files:
assert os.path.exists(os.path.join(db_root, file))

# Ensure no media files in flavor cache are marked as missing files,
# when flavor format is different from original format
# (https://github.com/audeering/audb/issues/324)
original_files = audformat.utils.replace_file_extension(db.files, "wav")
assert (
audb.core.load._missing_files(
original_files,
"media",
db_root,
audb.Flavor(format="flac"),
False,
)
== []
)

version = "2.0.0"
db = audb.load(
DB_NAME,
Expand Down

0 comments on commit 675c485

Please sign in to comment.