From 84502b9615b2407967637feee4c920cab9cb6040 Mon Sep 17 00:00:00 2001 From: Karl Rister Date: Thu, 20 Jun 2024 14:53:25 -0500 Subject: [PATCH 1/2] log the data used to calculate an image tag --- rickshaw-run | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/rickshaw-run b/rickshaw-run index 69ae812f..b14f4a41 100755 --- a/rickshaw-run +++ b/rickshaw-run @@ -417,6 +417,10 @@ sub calc_image_md5 { die "calc_image_md5(): \$userenv_arg must be defined" if (!defined $userenv_arg); my $req_args = shift; my $arch_suffix = shift; + my $userenv = shift; + my $benchmark_tool = shift; + my $stage = shift; + debug_log(sprintf "calc_image_md5(): userenv=%s benchmark/tool=%s stage=%d\n", $userenv, $benchmark_tool, $stage); my $workshop_sub_cmd; if (defined $req_args) { $workshop_sub_cmd = $workshop_base_cmd . " " . $userenv_arg . " " . $req_args; @@ -489,23 +493,46 @@ sub calc_image_md5 { } } + my $tag_calc_data = $workshop_build_dir . "tag-calc-data__" . $userenv . "__" . $benchmark_tool . "__stage-" . $stage . ".txt"; + debug_log(sprintf "calc_image_md5(): logging tag calculation data to %s\n", $tag_calc_data); + my $tag_fh = open_write_text_file($tag_calc_data) || die "Failed to open " . $tag_calc_data . " for writing\n"; + # compute an md5 hash of relevant information to identify the # userenv my $md5 = Digest::MD5->new; + my $item_header = "# Item #########################################################################\n"; + my $item; + # First is the Initial hash calc on workshop reqs - $md5->add(join("", @config_analysis_output)); + $item = $item_header . "Workshop Config Output:\n" . join("", @config_analysis_output) . "\n"; + $md5->add($item); + print $tag_fh $item; # Second is the hashing contents of files for my $file (sort @files) { debug_log(sprintf "calc_image_md5(): adding '%s' to hash\n", $file); + + $item = $item_header . "File: " . $file . "\nFile Contents:\n"; + $md5->add($item); + print $tag_fh $item; + open(my $fh, $file); + while(<$fh>) { + print $tag_fh $_; + } + print $tag_fh "\n"; + seek $fh, 0, 0; + binmode($fh); - $md5->add($file); $md5->addfile($fh); + $md5->add("\n"); + close($fh); } my $base_hash = $md5->hexdigest; + print $tag_fh $item_header . "Hash: " . $base_hash . "\n"; + close($tag_fh); my $full_hash = $base_hash . "_" . $arch_suffix; debug_log(sprintf "calc_image_md5(): returning '%s'\n", $full_hash); @@ -950,7 +977,7 @@ sub source_container_image { printf "put_json_file(): initial %s: failed\n", $cs_conf_file; exit 1; } - my $tag = calc_image_md5($workshop_base_cmd, $userenv_arg, $req_arg, $container_arch); + my $tag = calc_image_md5($workshop_base_cmd, $userenv_arg, $req_arg, $container_arch, $userenv, $benchmark, scalar(@workshop_args) + 1); $cs_conf{'config'}{'labels'} = [ 'quay.expires-after=' . $quay_image_expiration ]; if (put_json_file($cs_conf_file, \%cs_conf) > 0) { printf "put_json_file(): update %s: failed\n", $cs_conf_file; From 1bbdeda59961f6cff5f8ccb4c32360027d86313b Mon Sep 17 00:00:00 2001 From: Karl Rister Date: Fri, 21 Jun 2024 08:20:57 -0500 Subject: [PATCH 2/2] remove extraneous information (including filenames) from the image tag calculations - most of the extraneous information is harmless, but the filenames can actually vary from one installation to another if the pathing is not the same (installed somewhere besides /opt/crucible, CI environment, etc.) - since the paths are not important, but the file contents are, they are being removed from the hash calculation to avoid simple path differences from resulting in different image tags --- rickshaw-run | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/rickshaw-run b/rickshaw-run index b14f4a41..bcd20b01 100755 --- a/rickshaw-run +++ b/rickshaw-run @@ -505,17 +505,14 @@ sub calc_image_md5 { my $item; # First is the Initial hash calc on workshop reqs - $item = $item_header . "Workshop Config Output:\n" . join("", @config_analysis_output) . "\n"; - $md5->add($item); - print $tag_fh $item; + print $tag_fh $item_header . "Workshop Config Output:\n" . join("", @config_analysis_output) . "\n"; + $md5->add(join("", @config_analysis_output)); # Second is the hashing contents of files for my $file (sort @files) { debug_log(sprintf "calc_image_md5(): adding '%s' to hash\n", $file); - $item = $item_header . "File: " . $file . "\nFile Contents:\n"; - $md5->add($item); - print $tag_fh $item; + print $tag_fh $item_header . "File: " . $file . "\nFile Contents:\n"; open(my $fh, $file); while(<$fh>) { @@ -526,14 +523,13 @@ sub calc_image_md5 { binmode($fh); $md5->addfile($fh); - $md5->add("\n"); close($fh); } my $base_hash = $md5->hexdigest; - print $tag_fh $item_header . "Hash: " . $base_hash . "\n"; - close($tag_fh); my $full_hash = $base_hash . "_" . $arch_suffix; + print $tag_fh $item_header . "Hash: " . $full_hash . "\n"; + close($tag_fh); debug_log(sprintf "calc_image_md5(): returning '%s'\n", $full_hash); return $full_hash;