From 2d969488003a20350ac073c65e3879843db0c1d2 Mon Sep 17 00:00:00 2001 From: Gondermann Date: Thu, 10 Aug 2023 16:43:14 +0200 Subject: [PATCH] Force update script to download the image and calculate SHA512 Since we want to check the hash against the openstack backend, we need to have the SHA512. Sadly, most images creators do not provide us with that hash pre-computed. That means we will compute the SHA512 for every new image update in the CI worker by downloading the image. Signed-off-by: Gondermann --- openstack_image_manager/update.py | 109 +++++++++++++++++++++++------- 1 file changed, 83 insertions(+), 26 deletions(-) diff --git a/openstack_image_manager/update.py b/openstack_image_manager/update.py index 46469a1e..2bdc8dc7 100644 --- a/openstack_image_manager/update.py +++ b/openstack_image_manager/update.py @@ -2,6 +2,8 @@ # source of latest URLs: https://gitlab.com/libosinfo/osinfo-db from datetime import datetime +import hashlib +import math import os import re import shutil @@ -105,14 +107,14 @@ def get_latest_debubu(shortname, latest_checksum_url, latest_url, checksum_type= IMAGES = { "almalinux": get_latest_default, "centos": get_latest_default, - "debian": get_latest_debubu, - "rockylinux": get_latest_default, + "debian": get_latest_debubu, + "rockylinux": get_latest_default, "ubuntu": get_latest_debubu, } def mirror_image( - image, minio_server, minio_bucket, minio_access_key, minio_secret_key + image, extracted_file, minio_server, minio_bucket, minio_access_key, minio_secret_key ): client = Minio( minio_server, @@ -121,14 +123,7 @@ def mirror_image( ) version = image["versions"][0] - - path = urlparse(version["url"]) dirname = image["shortname"] - filename, fileextension = os.path.splitext(os.path.basename(path.path)) - - if fileextension not in [".bz2", ".zip", ".xz", ".gz"]: - filename += fileextension - shortname = image["shortname"] format = image["format"] new_version = version["version"] @@ -139,27 +134,75 @@ def mirror_image( logger.info("'%s' available in '%s'" % (new_filename, dirname)) except S3Error: logger.info("'%s' not yet available in '%s'" % (new_filename, dirname)) - logger.info("Downloading '%s'" % version["url"]) - response = requests.get(version["url"], stream=True) - with open(os.path.basename(path.path), "wb") as fp: - shutil.copyfileobj(response.raw, fp) - del response - - if fileextension in [".bz2", ".zip", ".xz", ".gz"]: - logger.info("Decompressing '%s'" % os.path.basename(path.path)) - patoolib.extract_archive(os.path.basename(path.path), outdir=".") - os.remove(os.path.basename(path.path)) logger.info( - "Uploading '%s' to '%s' as '%s'" % (filename, dirname, new_filename) + "Uploading '%s' to '%s' as '%s'" % (extracted_file, dirname, new_filename) ) - client.fput_object(minio_bucket, os.path.join(dirname, new_filename), filename) - os.remove(filename) + client.fput_object(minio_bucket, os.path.join(dirname, new_filename), extracted_file) + +def size_clean(size): + size_name = ("B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB") + i = int(math.floor(math.log(size, 1024))) + s = size / 1024 ** i + return f"{s:.2f} {size_name[i]}" + +def download_and_hash(download_url: str): + path = urlparse(download_url) + filename, fileextension = os.path.splitext(os.path.basename(path.path)) + is_archive = fileextension in [".bz2", ".zip", ".xz", ".gz"] + if not is_archive: + filename += fileextension + download_filename = os.path.basename(path.path) + http_headers = None + hash_obj = hashlib.new("sha512") + + with requests.get(url=download_url, stream=True, timeout=30) as response: + if response.status_code != 200: + logger.error(f"Downloading image '{download_url}' failed with error code {response.status_code}") + return None, None, None + + http_headers = response.headers + file_size = int(http_headers["Content-Length"]) + logger.info(f"Image size {size_clean(file_size)}") + + downloadedBytes = 0 + lastProgress = 0 + with open(download_filename, "wb") as fp: + for chunk in response.iter_content(chunk_size=8192): + downloadedBytes += 8192 + progressPercent = (downloadedBytes / file_size) * 100 + progress = round(min(max(progressPercent, 0), 100)) + if progress - lastProgress >= 5: + logger.info(f"Downloading image: {progress}%") + lastProgress = progress + + fp.write(chunk) + + if not is_archive: + hash_obj.update(chunk) + + if not is_archive: + sha512 = hash_obj.hexdigest() + return http_headers, f"sha512:{sha512}", download_filename + else: + assert download_filename not in ["", ".", " ", "/", ".."] + logger.info("Decompressing '%s'" % download_filename) + patoolib.extract_archive(download_filename, outdir=".") + os.remove(download_filename) + + with open(filename, 'rb') as fp: + chunk = fp.read(8192) + while chunk: + hash_obj.update(chunk) + chunk = fp.read(8192) + sha512 = hash_obj.hexdigest() + return http_headers, f"sha512:{sha512}", filename def update_image(image, getter, minio_server, minio_bucket, minio_access_key, minio_secret_key): name = image["name"] + logger.info("---") logger.info(f"Checking image {name}") latest_url = image["latest_url"] @@ -181,6 +224,7 @@ def update_image(image, getter, minio_server, minio_bucket, minio_access_key, mi "checksum": None, "url": None, "version": None, + "verify_checksum": None } ) @@ -191,12 +235,20 @@ def update_image(image, getter, minio_server, minio_bucket, minio_access_key, mi logger.info(f"Image {name} is up-to-date, nothing to do") return 0 + logger.info(f"Image {name} change detected. Downloading Image...") + + headers, verify_checksum, extracted_file = download_and_hash(current_url) + if verify_checksum == None or extracted_file in ["", ".", " ", "/", ".."]: + logger.error(f"Downloading and hashing {name} failed") + return 0 + + logger.info(f"Image {name} has the verification checksum {verify_checksum}") + if current_version is None: - logger.info(f"Checking {current_url}") + logger.info(f"Using HTTP 'last-modified' header as current version") - conn = urlopen(current_url, timeout=30) dt = datetime.strptime( - conn.headers["last-modified"], "%a, %d %b %Y %H:%M:%S %Z" + headers["last-modified"], "%a, %d %b %Y %H:%M:%S %Z" ) current_version = dt.strftime("%Y%m%d") @@ -205,6 +257,7 @@ def update_image(image, getter, minio_server, minio_bucket, minio_access_key, mi "build_date": datetime.strptime(current_version, "%Y%m%d").date(), "checksum": current_checksum, "url": current_url, + "verify_checksum": verify_checksum, } logger.info(f"New values are {new_values}") image["versions"][0].update(new_values) @@ -220,11 +273,15 @@ def update_image(image, getter, minio_server, minio_bucket, minio_access_key, mi mirror_image( image, + extracted_file, minio_server, minio_bucket, minio_access_key, minio_secret_key, ) + + os.remove(extracted_file) + return 1