Skip to content

Commit

Permalink
fix: check md5 when fetching from cache
Browse files Browse the repository at this point in the history
  • Loading branch information
18alantom committed May 6, 2024
1 parent 8854551 commit efb5171
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 21 deletions.
82 changes: 63 additions & 19 deletions bench/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,49 +350,68 @@ def get_app_cache_hashed_name(self, temp_path: Path) -> Path:
assert self.cache_key is not None

ext = temp_path.suffix[1:]
md5 = get_file_md5(temp_path.as_posix())
md5 = get_file_md5(temp_path)
tarfile_name = f"{self.app_name}.{self.cache_key}.md5-{md5}.{ext}"

return temp_path.with_name(tarfile_name)

def get_app_cache_path(self, is_compressed=False) -> Path:
def get_app_cache_path(self) -> "Optional[Path]":
assert self.cache_key is not None

cache_path = get_bench_cache_path("apps")
tarfile_name = get_cache_filename(
self.app_name,
self.cache_key,
is_compressed,
)
return cache_path / tarfile_name
glob_pattern = f"{self.app_name}.{self.cache_key}.md5-*"

def get_cached(self) -> bool:
for app_cache_path in cache_path.glob(glob_pattern):
return app_cache_path

return None

def validate_cache_and_get_path(self) -> "Optional[Path]":
if not self.cache_key:
return False
return

cache_path = self.get_app_cache_path(False)
mode = "r"
if not (cache_path := self.get_app_cache_path()):
return

# Check if cache exists without gzip
if not cache_path.is_file():
cache_path = self.get_app_cache_path(True)
mode = "r:gz"
click.secho(
f"Bench app-cache: file check failed for {cache_path.as_posix()}, skipping cache",
fg="yellow",
)
unlink_no_throw(cache_path)
return

# Check if cache exists with gzip
if not cache_path.is_file():
if not is_cache_hash_valid(cache_path):
click.secho(
f"Bench app-cache: hash validation failed for {cache_path.as_posix()}, skipping cache",
fg="yellow",
)
unlink_no_throw(cache_path)
return

return cache_path

def get_cached(self) -> bool:
if not (cache_path := self.validate_cache_and_get_path()):
return False

app_path = self.get_app_path()
if app_path.is_dir():
shutil.rmtree(app_path)

click.secho(f"Bench app-cache: getting {self.app_name} from cache", fg="yellow")

mode = "r:gz" if cache_path.suffix.endswith(".tgz") else "r"
with tarfile.open(cache_path, mode) as tar:
extraction_filter = get_app_cache_extract_filter(count_threshold=150_000)
try:
tar.extractall(app_path.parent, filter=extraction_filter)
except Exception:
message = f"Bench app-cache: extraction failed for {self.app_name}, skipping cache"
click.secho(message, fg="yellow")
click.secho(
message,
fg="yellow",
)
logger.exception(message)
shutil.rmtree(app_path)
return False
Expand Down Expand Up @@ -423,7 +442,10 @@ def set_cache(self, compress_artifacts=False) -> bool:
try:
with tarfile.open(cache_path, mode) as tar:
tar.add(app_path.name)

hashed_path = self.get_app_cache_hashed_name(cache_path)
unlink_no_throw(hashed_path)

cache_path.rename(hashed_path)

success = True
Expand Down Expand Up @@ -501,7 +523,10 @@ def can_frappe_use_cached(app: App) -> bool:
"""
return sv.Version("15.12.0") not in sv.SimpleSpec(min_frappe)
except ValueError:
click.secho(f"Bench app-cache: invalid value found for frappe version '{min_frappe}'", fg="yellow")
click.secho(
f"Bench app-cache: invalid value found for frappe version '{min_frappe}'",
fg="yellow",
)
# Invalid expression
return False

Expand Down Expand Up @@ -1056,3 +1081,22 @@ def get_apps_json(path):

with open(path) as f:
return json.load(f)


def is_cache_hash_valid(cache_path: Path) -> bool:
parts = cache_path.name.split(".")
if len(parts) < 2 or not parts[-2].startswith("md5-"):
return False

md5 = parts[-2].split("-")[1]
return get_file_md5(cache_path) == md5


def unlink_no_throw(path: Path):
if not path.exists():
return

try:
path.unlink(True)
except Exception:
pass
4 changes: 2 additions & 2 deletions bench/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,8 +613,8 @@ def filter_function(member: TarInfo, dest_path: str) -> Optional[TarInfo]:

return filter_function

def get_file_md5(p: str) -> "str":
with open(p, "rb") as f:
def get_file_md5(p: Path) -> "str":
with open(p.as_posix(), "rb") as f:
file_md5 = hashlib.md5()
while chunk := f.read(2**16):
file_md5.update(chunk)
Expand Down

0 comments on commit efb5171

Please sign in to comment.