From d8b9c23f5eaef933f9c3ef5e579aa75acf2ec199 Mon Sep 17 00:00:00 2001 From: HS Date: Wed, 20 Sep 2023 12:47:54 -0700 Subject: [PATCH] A few handy scripts for extracting and comparing UHDM DB * extract - Cherry-pick log and/or DB from CI artifacts * compare - Batch compare using uhdm-cmp --- scripts/dbcompare.py | 294 +++++++++++++++++++++++++++++++++++++++++++ scripts/extract.py | 257 +++++++++++++++++++++++++++++++++++++ 2 files changed, 551 insertions(+) create mode 100644 scripts/dbcompare.py create mode 100644 scripts/extract.py diff --git a/scripts/dbcompare.py b/scripts/dbcompare.py new file mode 100644 index 0000000000..d16a43c788 --- /dev/null +++ b/scripts/dbcompare.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 + +""" +Batch compare UHDM DB outputs. Useful to compare outputs generated by two CI builds. +""" + +import argparse +import multiprocessing +import os +import platform +import pprint +import re +import shutil +import subprocess +import sys +import time +import traceback + +from collections import OrderedDict +from contextlib import redirect_stdout, redirect_stderr +from datetime import datetime +from enum import Enum, unique +from pathlib import Path +from threading import Lock + + +_this_filepath = os.path.realpath(__file__) +_default_dbname = 'surelog.uhdm' + + +_log_mutex = Lock() +def log(text, end='\n'): + _log_mutex.acquire() + try: + print(text, end=end, flush=True) + finally: + _log_mutex.release() + + +@unique +class Status(Enum): + PASS = 0 + FAIL = -1 + MISSING = -2 + + def __str__(self): + return str(self.name) + + +def _mkdir(dirpath, retries=10): + count = 0 + while count < retries: + os.makedirs(dirpath, exist_ok=True) + + if os.path.exists(dirpath): + return True + + count += 1 + time.sleep(0.1) + + return os.path.exists(dirpath) + + +def _rmdir(dirpath, retries=10): + count = 0 + while count < retries: + shutil.rmtree(dirpath, ignore_errors=True) + + if not os.path.exists(dirpath): + return True + + count += 1 + time.sleep(0.1) + + shutil.rmtree(dirpath) + return not os.path.exists(dirpath) + + +def _find_files(dirpath, pattern): + return list(Path(dirpath).rglob(pattern)) + + +def _is_filtered(name, filters): + if not filters: + return True + + for filter in filters: + if isinstance(filter, str): + if filter.lower() == name.lower(): + return True + else: + if filter.search(name): # Note: match() reports success only if the match is at index 0 + return True + + return False + + +def _compare_one(params): + start_dt = datetime.now() + name, lhs_dirpath, rhs_dirpath, uhdm_cmp_filepath, output_dirpath = params + + log_filepath = os.path.join(output_dirpath, f'{name}.log') + lhs_filepath = os.path.join(lhs_dirpath, name, _default_dbname) + rhs_filepath = os.path.join(rhs_dirpath, name, _default_dbname) + + log(f'Comparing {name}') + + status = Status.FAIL + with open(log_filepath, 'wt') as log_strm, redirect_stdout(log_strm), redirect_stderr(log_strm): + lhs_exists = os.path.isfile(lhs_filepath) + rhs_exists = os.path.isfile(rhs_filepath) + + print(f'start-time: {start_dt}') + print( '') + print( 'Environment:') + print(f' name: {name}') + print(f' lhs-dirpath: {lhs_dirpath}') + print(f' rhs-filepath: {rhs_dirpath}') + print(f'uhdm_cmp-filepath: {uhdm_cmp_filepath}') + print(f' output-dirpath: {output_dirpath}') + print(f' lhs-filepath: {lhs_filepath}') + print(f' rhs-filepath: {rhs_filepath}') + print(f' log-filepath: {log_filepath}') + print(f' lhs-exists: {lhs_exists}') + print(f' rhs-exists: {rhs_exists}') + print( '\n') + + if lhs_exists and rhs_exists: + args = [uhdm_cmp_filepath, lhs_filepath, rhs_filepath] + + print(f'Launching uhdm-cmp with arguments:') + pprint.pprint(args) + print('\n', flush=True) + + try: + result = subprocess.run( + args, + stdout=log_strm, + stderr=subprocess.STDOUT, + check=False, + cwd=os.path.dirname(uhdm_cmp_filepath)) + print(f'uhdm-cmp terminated with exit code: {result.returncode}') + status = Status.PASS if result.returncode == 0 else Status.FAIL + except: + status = Status.FAIL + print(f'uhdm-cmp threw an exception with exit code: {result.returncode}') + traceback.print_exc() + else: + print(f'Existence mismatch: lhs={lhs_exists}, rhs={rhs_exists}') + status = Status.MISSING + + end_dt = datetime.now() + delta = end_dt - start_dt + print(f'end-time: {str(end_dt)} {str(delta)}', flush=True) + + return (name, status) + + +def _compare(args): + lhs_filepaths = _find_files(args.lhs_dirpath, _default_dbname) + rhs_filepaths = _find_files(args.rhs_dirpath, _default_dbname) + + test_names = set(Path(Path(filepath).parent).stem for filepath in lhs_filepaths + rhs_filepaths) + + params = [( + name, + args.lhs_dirpath, + args.rhs_dirpath, + args.uhdm_cmp_filepath, + args.output_dirpath + ) for name in test_names if _is_filtered(name, args.filters)] + + jobs = min(args.jobs, len(params)) + + if jobs <= 1: + results = [_compare_one(param) for param in params] + else: + with multiprocessing.Pool(processes=jobs) as pool: + results = pool.map(_compare_one, params) + + return dict(results) + + +def _print_report(results): + columns = ['TESTNAME', 'STATUS'] + + rows = [] + summary = OrderedDict([(status.name, 0) for status in Status]) + for name, status in sorted(results.items(), key=lambda r: (-r[1].value, r[0])): + summary[status.name] += 1 + rows.append([name, status.name]) + + widths = [max([len(row[index]) for row in [columns] + rows]) for index in range(len(columns))] + row_format = '| ' + ' | '.join([f'{{:{widths[i]}}}' for i in range(len(widths))]) + ' |' + separator = '+-' + '-+-'.join(['-' * width for width in widths]) + '-+' + + print('') + print('Results: ') + print(separator) + print(row_format.format(*columns)) + print(separator) + for row in rows: + print(row_format.format(*row)) + print(separator, flush=True) + + rows = [[k, str(v)] for k, v in summary.items()] + widths = [max([len(str(row[index])) for row in rows]) for index in range(2)] + row_format = '| ' + ' | '.join([f'{{:{width}}}' for width in widths]) + ' |' + separator = '+-' + '-+-'.join(['-' * width for width in widths]) + '-+' + + print('') + print('Summary: ') + print(separator) + for row in rows: + print(row_format.format(*row)) + print(separator, flush=True) + + +def _main(): + start_dt = datetime.now() + print(f'Starting UHDM Database Compare Regression Tests @ {str(start_dt)}') + + parser = argparse.ArgumentParser() + parser.add_argument('lhs_dirpath', type=str, help='LHS directory containing uhdm DBs.') + parser.add_argument('rhs_dirpath', type=str, help='RHS directory containing uhdm DBs.') + + parser.add_argument( + '--uhdm-cmp-filepath', dest='uhdm_cmp_filepath', required=True, type=str, help='Location of uhdm-cmp executable') + parser.add_argument( + '--output-dirpath', dest='output_dirpath', required=True, type=str, help='Output directory path') + + parser.add_argument( + '--filters', nargs='+', required=False, default=[], type=str, + help='Filter comparing only matching these regex inputs') + parser.add_argument( + '--jobs', nargs='?', required=False, default=multiprocessing.cpu_count(), type=int, + help='Run tests in parallel, optionally providing max number of concurrent processes. Set 0 to run sequentially.') + args = parser.parse_args() + + args.uhdm_cmp_filepath = os.path.abspath(args.uhdm_cmp_filepath) + args.output_dirpath = os.path.abspath(args.output_dirpath) + + if args.filters: + filters = set() + for filter in args.filters: + if filter.startswith('@'): + with open(filter[1:], 'rt') as strm: + for line in strm: + line = line.strip() + if line: + filters.add(line) + else: + filters.add(filter) + args.filters = filters + + args.filters = [text if text.isalnum() else re.compile(text, re.IGNORECASE) for text in args.filters] + + if (args.jobs == None) or (args.jobs > multiprocessing.cpu_count()): + args.jobs = multiprocessing.cpu_count() + + print( 'Environment:') + print(f' command-line: {" ".join(sys.argv)}') + print(f' current-dirpath: {os.getcwd()}') + print(f' lhs_dirpath: {args.lhs_dirpath}') + print(f' rhs_dirpath: {args.rhs_dirpath}') + print(f' uhdm-cmp-filepath: {args.uhdm_cmp_filepath}') + print(f' output-dirpath: {args.output_dirpath}') + print(f' filters: {args.filters}') + print(f' jobs: {args.jobs}') + print( '\n\n') + + _mkdir(args.output_dirpath) + + + print(f'Comparing UHDM Databases ...') + results = _compare(args) + + failed_count = len([_ for _ in results.values() if _ != Status.PASS]) + compare_result = sum(_.value for _ in results.values() if _ != Status.PASS) + print(f'Sucessfully compared {len(results)} databases with {failed_count} failures.') + + _print_report(results) + + end_dt = datetime.now() + delta = round((end_dt - start_dt).total_seconds()) + + print('') + print(f'Surelog UHDM Database Compare Regression Test Completed @ {str(end_dt)} in {str(delta)} seconds') + + return compare_result + + +if __name__ == '__main__': + sys.exit(_main()) diff --git a/scripts/extract.py b/scripts/extract.py new file mode 100644 index 0000000000..5751970f7e --- /dev/null +++ b/scripts/extract.py @@ -0,0 +1,257 @@ +#!/usr/bin/env python3 + +""" +Script to extract log and/or UHDM DBs out from artifacts generated by CI builds. +NOTE: CI artifacts are hierarhical tarballs i.e. tarball within tarball within tarball. +""" + +import argparse +import multiprocessing +import os +import pprint +import re +import shutil +import sys +import tarfile +import time +import zipfile + +from datetime import datetime +from enum import Enum, unique +from pathlib import Path +from threading import Lock + + +_default_dbname = 'surelog.uhdm' +_platform_ids = ['', '.linux', '.osx', '.msys', '.win'] + + +_log_mutex = Lock() +def log(text, end='\n'): + _log_mutex.acquire() + try: + print(text, end=end, flush=True) + finally: + _log_mutex.release() + + +@unique +class Status(Enum): + PASS = 0 + FAIL = -1 + NULL = -2 + + def __str__(self): + return str(self.name) + + +def _mkdir(dirpath, retries=10): + count = 0 + while count < retries: + os.makedirs(dirpath, exist_ok=True) + + if os.path.exists(dirpath): + return True + + count += 1 + time.sleep(0.1) + + return os.path.exists(dirpath) + + +def _rmdir(dirpath, retries=10): + count = 0 + while count < retries: + shutil.rmtree(dirpath, ignore_errors=True) + + if not os.path.exists(dirpath): + return True + + count += 1 + time.sleep(0.1) + + shutil.rmtree(dirpath) + return not os.path.exists(dirpath) + + +def _is_filtered(name, filters): + if not filters: + return True + + for filter in filters: + if isinstance(filter, str): + if filter.lower() == name.lower(): + return True + else: + if filter.search(name): # Note: match() reports success only if the match is at index 0 + return True + + return False + + +def _extract_worker(params): + zip_filepath, output_dirpath, modes, filters = params + archive_name = Path(zip_filepath).stem + + results = {} + with zipfile.ZipFile(zip_filepath, 'r') as zipfile_strm: + with zipfile_strm.open(f'{archive_name}.tar.gz') as tarfile_strm: + with tarfile.open(fileobj=tarfile_strm) as archive_strm: + for test_archive_path in archive_strm.getnames(): + test_archive_name = Path(Path(test_archive_path).stem).stem + + if not _is_filtered(test_archive_name, filters): + continue + + dst_dirpath = os.path.join(output_dirpath, test_archive_name) + _mkdir(dst_dirpath) + + results[test_archive_name] = {mode: Status.NULL for mode in modes} + + with tarfile.open(fileobj=archive_strm.extractfile(test_archive_path)) as test_archive_strm: + if 'db' in modes: + for slpp in ['slpp_all', 'slpp_unit']: + src_filepath = f'{test_archive_name}/{slpp}/{_default_dbname}' + + if src_filepath in test_archive_strm.getnames(): + dst_filepath = os.path.join(dst_dirpath, _default_dbname) + + try: + src_strm = test_archive_strm.extractfile(src_filepath) + + with open(dst_filepath, 'wb') as dst_strm: + dst_strm.write(src_strm.read()) + dst_strm.flush() + + if results[test_archive_name]['db'] == Status.NULL: + results[test_archive_name]['db'] = Status.PASS + except Exception: + results[test_archive_name]['db'] = Status.FAIL + + break + + if 'log' in modes: + for platform_id in _platform_ids: + src_filepath = f'{test_archive_name}/{test_archive_name}{platform_id}.log' + + if src_filepath in test_archive_strm.getnames(): + dst_filepath = os.path.join(dst_dirpath, f'{test_archive_name}{platform_id}.log') + + try: + src_strm = test_archive_strm.extractfile(src_filepath) + + with open(dst_filepath, 'wb') as dst_strm: + dst_strm.write(src_strm.read()) + dst_strm.flush() + + if results[test_archive_name]['log'] == Status.NULL: + results[test_archive_name]['log'] = Status.PASS + except Exception: + results[test_archive_name]['log'] = Status.FAIL + + return results + + +def _main(): + start_dt = datetime.now() + print(f'Starting CI artifact extraction @ {str(start_dt)}') + + parser = argparse.ArgumentParser() + parser.add_argument('modes', nargs='+', choices=['db', 'log'], type=str, help='Pick what to extract from available choices') + parser.add_argument('--build-no', dest='build_no', required=True, type=int, help='CI build no.') + parser.add_argument('--archive-filename-pattern', dest='archive_filename_pattern', required=False, type=str, + default='sl-{}-linux-gcc-release-regression-{}.zip', help='Archive filepath pattern (with extension).') + parser.add_argument('--shards', dest='shards', nargs='+', type=int, + default=[0, 1, 2, 3, 4, 5], help='List of shards to extract') + parser.add_argument( + '--input-dirpath', dest='input_dirpath', required=False, type=str, + help='Input directory path to find the artifacts.') + parser.add_argument( + '--output-dirpath', dest='output_dirpath', required=False, type=str, + help='Output directory path to extract to.') + parser.add_argument( + '--filters', nargs='+', required=False, default=[], type=str, help='Filter tests matching these regex inputs') + parser.add_argument( + '--jobs', nargs='?', required=False, default=multiprocessing.cpu_count(), type=int, + help='Run tests in parallel, optionally providing max number of concurrent processes. Set 0 to run sequentially.') + args = parser.parse_args() + + args.modes = sorted(set(args.modes)) + args.shards = sorted(set(args.shards)) + + if not os.path.isabs(args.input_dirpath): + args.input_dirpath = os.getcwd() + args.input_dirpath = os.path.abspath(args.input_dirpath) + + if not os.path.isabs(args.output_dirpath): + args.output_dirpath = os.getcwd() + args.output_dirpath = os.path.abspath(args.output_dirpath) + + if args.filters: + filters = set() + for filter in args.filters: + if filter.startswith('@'): + with open(filter[1:], 'rt') as strm: + for line in strm: + line = line.strip() + if line: + filters.add(line) + else: + filters.add(filter) + args.filters = filters + + args.filters = [text if text.isalnum() else re.compile(text, re.IGNORECASE) for text in args.filters] + + if (args.jobs == None) or (args.jobs > multiprocessing.cpu_count()): + args.jobs = multiprocessing.cpu_count() + + + print( 'Environment:') + print(f' command-line: {" ".join(sys.argv)}') + print(f' current-dirpath: {os.getcwd()}') + print(f' build-no: {args.build_no}') + print(f'archive-filename-pattern: {args.archive_filename_pattern}') + print(f' shards: {args.shards}') + print(f' input-dirpath: {args.input_dirpath}') + print(f' output-dirpath: {args.output_dirpath}') + print(f' filters: {args.filters}') + print(f' jobs: {args.jobs}') + print( '\n\n') + + _mkdir(args.output_dirpath) + + params = [( + os.path.join(args.input_dirpath, args.archive_filename_pattern.format(args.build_no, shard)), + args.output_dirpath, + args.modes, args.filters + ) for shard in args.shards] + + jobs = min(args.jobs, len(params)) + + if jobs <= 1: + results = [_extract_worker(param) for param in params] + else: + with multiprocessing.Pool(processes=jobs) as pool: + results = pool.map(_extract_worker, params) + + def _test_failed(v: dict): + return sum(0 if s == Status.PASS else 1 for s in v.values()) != 0 + + results = {k: v for result in results for k, v in result.items()} + failures = {k: v for k, v in results.items() if _test_failed(v)} + + end_dt = datetime.now() + delta = round((end_dt - start_dt).total_seconds()) + + if failures: + print(f'Following {len(failures)} failed:') + pprint.pprint(sorted(failures.keys())) + + print('') + print(f'Completed CI artifact extraction @ {str(end_dt)} in {str(delta)} seconds') + + return -1 if failures else 0 + + +if __name__ == '__main__': + sys.exit(_main())