From 4a28faade347368d7c354ab3943fe8a7fa3356c6 Mon Sep 17 00:00:00 2001 From: Karl Rister Date: Thu, 11 Jul 2024 16:55:09 -0500 Subject: [PATCH 1/2] update the remotehosts.py and endpoints.py code to properly handle variable image expiration - when possible use the current expiration values from the quay repository by retreiving them using the quay API - when the quay repository cannot be accessed via the quay API, use the expiration settings from rickshaw-settings.json instead of it being hard coded at 2 weeks --- endpoints/endpoints.py | 31 +++++++++++++---- endpoints/remotehosts/remotehosts.py | 52 +++++++++++++++++++++++++--- 2 files changed, 73 insertions(+), 10 deletions(-) diff --git a/endpoints/endpoints.py b/endpoints/endpoints.py index 37257c64..01ffd5c3 100644 --- a/endpoints/endpoints.py +++ b/endpoints/endpoints.py @@ -797,24 +797,43 @@ def gmtimestamp_to_gmepoch(gmtimestamp): return gmepoch -def image_expiration_gmepoch(): +def image_created_expiration_gmepoch(weeks): """ Determine the UTC epoch timetamp that any image created before is expired Args: - None + weeks (int): The number of weeks to consider as the expiration time post creation Globals: None Returns: - gmepoch (int): A UTC epoch timestamp such as 1712949457 from 2 weeks ago. Any image with a creation + gmepoch (int): A UTC epoch timestamp such as 1712949457 from X weeks ago. Any image with a creation date older than this is expired """ - # seconds/min minutes/hour hours/day days - two_weeks = 60 * 60 * 24 * 14 + # seconds/min minutes/hour hours/day days/week weeks + delta = 60 * 60 * 24 * 7 * weeks + + gmepoch = calendar.timegm(time.gmtime()) - delta + + return gmepoch - gmepoch = calendar.timegm(time.gmtime()) - two_weeks +def image_expiration_gmepoch(): + """ + Return the UTC epoch timestamp that any image expiring before is expired + + Args: + None + + Globals: + None + + + Returns: + gmepoch (int): The current UTC epoch timestamp such as 1712949457. Any image with an + expiration date older than this is expired + """ + gmepoch = calendar.timegm(time.gmtime()) return gmepoch diff --git a/endpoints/remotehosts/remotehosts.py b/endpoints/remotehosts/remotehosts.py index 42955cf9..e6837f34 100755 --- a/endpoints/remotehosts/remotehosts.py +++ b/endpoints/remotehosts/remotehosts.py @@ -14,6 +14,7 @@ from pathlib import Path import queue import re +import requests import sys import tempfile import threading @@ -1931,7 +1932,7 @@ def remote_image_manager(thread_name, remote_name, connection, image_max_cache_s in order to cache a complete set of images for a run) Globals: - None + settings (dict): the one data structure to rule then all Returns: None @@ -1945,6 +1946,7 @@ def remote_image_manager(thread_name, remote_name, connection, image_max_cache_s images = dict() images["rickshaw"] = dict() images["podman"] = dict() + images["quay"] = dict() result = endpoints.run_remote(connection, "cat /var/lib/crucible/remotehosts-container-image-census") if result.exited != 0: @@ -1985,8 +1987,38 @@ def remote_image_manager(thread_name, remote_name, connection, image_max_cache_s } thread_logger(thread_name, "images[podman]:\n%s" % (endpoints.dump_json(images["podman"])), remote_name = remote_name, log_prefix = log_prefix) + if settings["rickshaw"]["quay"]["refresh-expiration"]["api-url"] is not None: + thread_logger(thread_name, "Found configuration information necessary to utilize the quay API to obtain image expiration", remote_name = remote_name, log_prefix = log_prefix) + thread_logger(thread_name, "Quay API URL: %s" % (settings["rickshaw"]["quay"]["refresh-expiration"]["api-url"]), remote_name = remote_name, log_prefix = log_prefix) + + for image in images["podman"].keys(): + image_parts = image.split(":") + + get_request = requests.get(settings["rickshaw"]["quay"]["refresh-expiration"]["api-url"] + "/tag", params = { "onlyActiveTags": True, "specificTag": image_parts[1] }) + + query_log_level = "info" + if get_request.status_code != requests.codes.ok: + query_log_level = "warning" + + thread_logger(thread_name, "Quay API query for %s returned %d" % (image, get_request.status_code), log_level = query_log_level, remote_name = remote_name, log_prefix = log_prefix) + + if get_request.status_code == requests.codes.ok: + image_json = get_request.json() + if len(image_json["tags"]) == 1: + images["quay"][image] = image_json["tags"][0] + else: + thread_logger(thread_name, "Quay API query for %s found %d tags" % (image, len(image_json["tags"])), log_level = "warning", remote_name = remote_name, log_prefix = log_prefix) + + thread_logger(thread_name, "images[quay]:\n%s" % (endpoints.dump_json(images["quay"])), remote_name = remote_name, log_prefix = log_prefix) + else: + thread_logger(thread_name, "Configuration information necessary to utilize the quay API to obtain image expiration timestamps is not available", remote_name = remote_name, log_prefix = log_prefix) + image_expiration = endpoints.image_expiration_gmepoch() - thread_logger(thread_name, "Images created before %d will be considered expired" % (image_expiration), remote_name = remote_name, log_prefix = log_prefix) + thread_logger(thread_name, "Images evaludated by their expiration data will be considered expired if it is before %d" % (image_expiration), remote_name = remote_name, log_prefix = log_prefix) + + expiration_weeks = int(settings["rickshaw"]["quay"]["image-expiration"].rstrip("w")) + image_created_expiration = endpoints.image_created_expiration_gmepoch(expiration_weeks) + thread_logger(thread_name, "Images evaludated by their creation data will be considered expired if it is before %d (%d weeks ago)" % (image_created_expiration, expiration_weeks), remote_name = remote_name, log_prefix = log_prefix) deletes = [] for image in images["rickshaw"].keys(): @@ -2020,12 +2052,24 @@ def remote_image_manager(thread_name, remote_name, connection, image_max_cache_s thread_logger(thread_name, "Podman image '%s' is not present in rickshaw container image census, removing it from the image cache" % (image), remote_name = remote_name, log_prefix = log_prefix) deletes["podman"].append(image) remove_image(thread_name, remote_name, log_prefix, connection, image) - elif images["podman"][image]["created"] < image_expiration: - thread_logger(thread_name, "Podman image '%s' has expired, removing it from the image cache" % (image), remote_name = remote_name, log_prefix = log_prefix) + + if image in images["quay"]: + if images["quay"][image]["end_ts"] < image_expiration: + thread_logger(thread_name, "Podman image '%s' has been evaluated based on it's expiration data and has expired, removing it from the image cache" % (image), remote_name = remote_name, log_prefix = log_prefix) + deletes["podman"].append(image) + deletes["rickshaw"].append(image) + remove_image(thread_name, remote_name, log_prefix, connection, image) + else: + thread_logger(thread_name, "Podman image '%s' has been evaluated based on it's expiration data and has not expired" % (image), remote_name = remote_name, log_prefix = log_prefix) + elif images["podman"][image]["created"] < image_created_expiration: + thread_logger(thread_name, "Podman image '%s' has been evaluated based on it's creation data and has expired, removing it from the image cache" % (image), remote_name = remote_name, log_prefix = log_prefix) deletes["podman"].append(image) deletes["rickshaw"].append(image) remove_image(thread_name, remote_name, log_prefix, connection, image) else: + thread_logger(thread_name, "Podman image '%s' has been evaluated based on it's creation data and has not expired" % (image), remote_name = remote_name, log_prefix = log_prefix) + + if not image in deletes["podman"]: thread_logger(thread_name, "Podman image '%s' is valid and remains under consideration" % (image), remote_name = remote_name, log_prefix = log_prefix) for kind in deletes.keys(): for image in deletes[kind]: From a26ba361de7156ac97ec9666e75c5776a4a9ba80 Mon Sep 17 00:00:00 2001 From: Karl Rister Date: Fri, 12 Jul 2024 09:06:43 -0500 Subject: [PATCH 2/2] update a quay API call to use onlyActiveTags instead of limit - I think this is just a better way of running the query since it purposefully avoids the possibility of returning a stale (inactive) tag --- rickshaw-run | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rickshaw-run b/rickshaw-run index 5298e5f6..48314ab4 100755 --- a/rickshaw-run +++ b/rickshaw-run @@ -1073,7 +1073,7 @@ sub source_container_image { my $query_cmd = 'curl --silent' . ' -X GET -H "Authorization: Bearer ' . $quay_refresh_expiration_token . '"' . ' "' . $quay_refresh_expiration_api_url . - '/tag/?limit=1&specificTag=' . $workshop_args[$x]{'tag'} . '"'; + '/tag/?onlyActiveTags=true&specificTag=' . $workshop_args[$x]{'tag'} . '"'; ($query_cmd, my $query_status, my $query_rc) = run_cmd($query_cmd); chomp($query_status);