Skip to content

Commit

Permalink
Force update script to download the image and calculate SHA512
Browse files Browse the repository at this point in the history
Since we want to check the hash against the openstack backend,
we need to have the SHA512. Sadly, most images creators do not
provide us with that hash pre-computed. That means we will
compute the SHA512 for every new image update in the CI worker
by downloading the image.

Signed-off-by: Gondermann <[email protected]>
  • Loading branch information
gndrmnn committed Sep 22, 2023
1 parent d3a7142 commit 2d96948
Showing 1 changed file with 83 additions and 26 deletions.
109 changes: 83 additions & 26 deletions openstack_image_manager/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# source of latest URLs: https://gitlab.com/libosinfo/osinfo-db

from datetime import datetime
import hashlib
import math
import os
import re
import shutil
Expand Down Expand Up @@ -105,14 +107,14 @@ def get_latest_debubu(shortname, latest_checksum_url, latest_url, checksum_type=
IMAGES = {
"almalinux": get_latest_default,
"centos": get_latest_default,
"debian": get_latest_debubu,
"rockylinux": get_latest_default,
"debian": get_latest_debubu,
"rockylinux": get_latest_default,
"ubuntu": get_latest_debubu,
}


def mirror_image(
image, minio_server, minio_bucket, minio_access_key, minio_secret_key
image, extracted_file, minio_server, minio_bucket, minio_access_key, minio_secret_key
):
client = Minio(
minio_server,
Expand All @@ -121,14 +123,7 @@ def mirror_image(
)

version = image["versions"][0]

path = urlparse(version["url"])
dirname = image["shortname"]
filename, fileextension = os.path.splitext(os.path.basename(path.path))

if fileextension not in [".bz2", ".zip", ".xz", ".gz"]:
filename += fileextension

shortname = image["shortname"]
format = image["format"]
new_version = version["version"]
Expand All @@ -139,27 +134,75 @@ def mirror_image(
logger.info("'%s' available in '%s'" % (new_filename, dirname))
except S3Error:
logger.info("'%s' not yet available in '%s'" % (new_filename, dirname))
logger.info("Downloading '%s'" % version["url"])
response = requests.get(version["url"], stream=True)
with open(os.path.basename(path.path), "wb") as fp:
shutil.copyfileobj(response.raw, fp)
del response

if fileextension in [".bz2", ".zip", ".xz", ".gz"]:
logger.info("Decompressing '%s'" % os.path.basename(path.path))
patoolib.extract_archive(os.path.basename(path.path), outdir=".")
os.remove(os.path.basename(path.path))

logger.info(
"Uploading '%s' to '%s' as '%s'" % (filename, dirname, new_filename)
"Uploading '%s' to '%s' as '%s'" % (extracted_file, dirname, new_filename)
)

client.fput_object(minio_bucket, os.path.join(dirname, new_filename), filename)
os.remove(filename)
client.fput_object(minio_bucket, os.path.join(dirname, new_filename), extracted_file)

def size_clean(size):
size_name = ("B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB")
i = int(math.floor(math.log(size, 1024)))
s = size / 1024 ** i
return f"{s:.2f} {size_name[i]}"

def download_and_hash(download_url: str):
path = urlparse(download_url)
filename, fileextension = os.path.splitext(os.path.basename(path.path))
is_archive = fileextension in [".bz2", ".zip", ".xz", ".gz"]
if not is_archive:
filename += fileextension
download_filename = os.path.basename(path.path)
http_headers = None
hash_obj = hashlib.new("sha512")

with requests.get(url=download_url, stream=True, timeout=30) as response:
if response.status_code != 200:
logger.error(f"Downloading image '{download_url}' failed with error code {response.status_code}")
return None, None, None

http_headers = response.headers
file_size = int(http_headers["Content-Length"])
logger.info(f"Image size {size_clean(file_size)}")

downloadedBytes = 0
lastProgress = 0
with open(download_filename, "wb") as fp:
for chunk in response.iter_content(chunk_size=8192):
downloadedBytes += 8192
progressPercent = (downloadedBytes / file_size) * 100
progress = round(min(max(progressPercent, 0), 100))
if progress - lastProgress >= 5:
logger.info(f"Downloading image: {progress}%")
lastProgress = progress

fp.write(chunk)

if not is_archive:
hash_obj.update(chunk)

if not is_archive:
sha512 = hash_obj.hexdigest()
return http_headers, f"sha512:{sha512}", download_filename
else:
assert download_filename not in ["", ".", " ", "/", ".."]
logger.info("Decompressing '%s'" % download_filename)
patoolib.extract_archive(download_filename, outdir=".")
os.remove(download_filename)

with open(filename, 'rb') as fp:
chunk = fp.read(8192)
while chunk:
hash_obj.update(chunk)
chunk = fp.read(8192)

sha512 = hash_obj.hexdigest()
return http_headers, f"sha512:{sha512}", filename

def update_image(image, getter, minio_server, minio_bucket, minio_access_key, minio_secret_key):
name = image["name"]
logger.info("---")
logger.info(f"Checking image {name}")

latest_url = image["latest_url"]
Expand All @@ -181,6 +224,7 @@ def update_image(image, getter, minio_server, minio_bucket, minio_access_key, mi
"checksum": None,
"url": None,
"version": None,
"verify_checksum": None
}
)

Expand All @@ -191,12 +235,20 @@ def update_image(image, getter, minio_server, minio_bucket, minio_access_key, mi
logger.info(f"Image {name} is up-to-date, nothing to do")
return 0

logger.info(f"Image {name} change detected. Downloading Image...")

headers, verify_checksum, extracted_file = download_and_hash(current_url)
if verify_checksum == None or extracted_file in ["", ".", " ", "/", ".."]:
logger.error(f"Downloading and hashing {name} failed")
return 0

logger.info(f"Image {name} has the verification checksum {verify_checksum}")

if current_version is None:
logger.info(f"Checking {current_url}")
logger.info(f"Using HTTP 'last-modified' header as current version")

conn = urlopen(current_url, timeout=30)
dt = datetime.strptime(
conn.headers["last-modified"], "%a, %d %b %Y %H:%M:%S %Z"
headers["last-modified"], "%a, %d %b %Y %H:%M:%S %Z"
)
current_version = dt.strftime("%Y%m%d")

Expand All @@ -205,6 +257,7 @@ def update_image(image, getter, minio_server, minio_bucket, minio_access_key, mi
"build_date": datetime.strptime(current_version, "%Y%m%d").date(),
"checksum": current_checksum,
"url": current_url,
"verify_checksum": verify_checksum,
}
logger.info(f"New values are {new_values}")
image["versions"][0].update(new_values)
Expand All @@ -220,11 +273,15 @@ def update_image(image, getter, minio_server, minio_bucket, minio_access_key, mi

mirror_image(
image,
extracted_file,
minio_server,
minio_bucket,
minio_access_key,
minio_secret_key,
)

os.remove(extracted_file)

return 1


Expand Down

0 comments on commit 2d96948

Please sign in to comment.