From 0086cc07d73a6255fb20184fe1ec765e6474fc1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Wed, 20 Nov 2024 12:41:11 +0100 Subject: [PATCH 01/10] Use hash signatures to uniquely identify state results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the idea of error hash signatures and use it to identify the parse results of states as well. Each state may define a set of 'signature fields' and, after the parsing is done and the data is collected, these fields will be used to identify the result set and generate a unique hash string for them. These signatures aren't necessarily related to errors, they simply represent a result. For now, the new states '_summary', '_signature_fields' and '_signature' are defined as hidden and are invisible unless the `--json-full` flag is used. This change also moves the hash generation to a common utility function. Signed-off-by: Ricardo Cañuelo --- logspec/errors/error.py | 11 +++++------ logspec/main.py | 25 +++++++++++++++++++++++-- logspec/utils/utils.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 8 deletions(-) create mode 100644 logspec/utils/utils.py diff --git a/logspec/errors/error.py b/logspec/errors/error.py index 3a5cafc..290a362 100644 --- a/logspec/errors/error.py +++ b/logspec/errors/error.py @@ -3,8 +3,8 @@ # Copyright (C) 2024 Collabora Limited # Author: Ricardo Cañuelo -import hashlib -import json +from logspec.utils.utils import generate_signature + class Error(): def __init__(self): @@ -34,8 +34,8 @@ def parse(self, text): return parse_ret def _generate_signature(self): - """Generates a hash string to uniquely identify this error, - based on a custom set of error fields. + """Uses utils.generate_signature() to generate a unique hash + string for this error, based on a custom set of error fields. This method is meant to be called after the parsing has been done. @@ -48,5 +48,4 @@ def _generate_signature(self): signature_dict[field] = val except AttributeError: continue - signature_json = json.dumps(signature_dict, sort_keys=True, ensure_ascii=False) - self._signature = hashlib.sha1(signature_json.encode('utf-8')).hexdigest() + self._signature = generate_signature(signature_dict) diff --git a/logspec/main.py b/logspec/main.py index ff81534..52bbc5d 100644 --- a/logspec/main.py +++ b/logspec/main.py @@ -10,6 +10,7 @@ import logspec.version from logspec.parser_loader import parser_loader from logspec.utils.defs import JsonSerialize, JsonSerializeDebug +from logspec.utils.utils import update_dict, generate_signature def format_data_output(data, full=False): @@ -46,9 +47,28 @@ def parse_log(log, start_state): The FSM data (dict) after the parsing is done. """ state = start_state - data = {} + data = { + '_signature_fields': [], + '_summary': [], + } cumulative_errors = [] log_start = 0 + + def _generate_signature(data_dict): + """Uses utils.generate_signature() to generate and return a + unique hash for the list of '_signature_fields' found in + data_dict, if any. The returned signature can be used to + uniquely identify the conditions described by those fields. + + Returns None if data_dict doesn't define any signature fields. + """ + signature_dict = {} + if not data_dict.get('_signature_fields'): + return None + for field in data_dict['_signature_fields']: + signature_dict[field] = data_dict[field] + return generate_signature(signature_dict) + while state: # The log fragment to parse is adjusted after every state # transition if the state function sets a `match_end' field in @@ -65,12 +85,13 @@ def parse_log(log, start_state): state = state.transition() if 'errors' in state_data: cumulative_errors.extend(state_data['errors']) - data.update(state_data) + update_dict(data, state_data) if '_match_end' in data: log_start += data['_match_end'] log = log[data['_match_end']:] data['_match_end'] = log_start data['errors'] = cumulative_errors + data['_signature'] = _generate_signature(data) return data diff --git a/logspec/utils/utils.py b/logspec/utils/utils.py new file mode 100644 index 0000000..22dd090 --- /dev/null +++ b/logspec/utils/utils.py @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# Copyright (C) 2024 Collabora Limited +# Author: Ricardo Cañuelo + +import hashlib +import json + +def update_dict(dest_dict, new_data): + """Updates dest_dict in place with the contents of dict + new_data. This is equivalent to dest_dict.update(new_data) except + that the nested lists in dest_dict are extended/appended if found in + new_data rather than replaced. + """ + for k, v in new_data.items(): + if k in dest_dict and isinstance(dest_dict[k], list): + if isinstance(v, list): + dest_dict[k].extend(v) + else: + dest_dict[k].append(v) + else: + dest_dict[k] = v + + +def generate_signature(data_dict): + """Generates a hash string of the data_dict contents""" + signature_json = json.dumps(data_dict, sort_keys=True, ensure_ascii=False) + return hashlib.sha1(signature_json.encode('utf-8')).hexdigest() From 0dae9a09c486f309ee129b9c5b167595ee016428 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Wed, 20 Nov 2024 12:46:57 +0100 Subject: [PATCH 02/10] states/chromebook_boot: define summary messages and signature fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ricardo Cañuelo --- logspec/states/chromebook_boot.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/logspec/states/chromebook_boot.py b/logspec/states/chromebook_boot.py index 1295ede..56ae31a 100644 --- a/logspec/states/chromebook_boot.py +++ b/logspec/states/chromebook_boot.py @@ -39,16 +39,23 @@ def detect_bootloader_start(text, start=None, end=None): ] if start or end: text = text[start:end] - data = {} + data = { + '_signature_fields': [ + 'bootloader.start', + 'bootloader.id', + ], + } regex = '|'.join(tags) match = re.search(regex, text) if match: data['_match_end'] = match.end() data['bootloader.start'] = True data['bootloader.id'] = 'depthcharge' + data['_summary'] = "Depthcharge started" else: data['_match_end'] = end if end else len(text) data['bootloader.start'] = False + data['_summary'] = "Depthcharge start not found" return data From 7c0fcaca832f926fecc13771c989ae0e33927980 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Wed, 20 Nov 2024 12:47:35 +0100 Subject: [PATCH 03/10] states/generic_boot: define summary messages and signature fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ricardo Cañuelo --- logspec/states/generic_boot.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/logspec/states/generic_boot.py b/logspec/states/generic_boot.py index 05ed270..a0fac20 100644 --- a/logspec/states/generic_boot.py +++ b/logspec/states/generic_boot.py @@ -35,15 +35,22 @@ def detect_bootloader_end(text, start=None, end=None): ] if start or end: text = text[start:end] - data = {} + data = { + '_signature_fields': [ + 'bootloader.done', + ], + } regex = '|'.join(tags) match = re.search(regex, text) if match: data['_match_end'] = match.end() + start if start else match.end() data['bootloader.done'] = True + data['_summary'] = "Bootloader stage done, jump to kernel" else: data['_match_end'] = end if end else len(text) data['bootloader.done'] = False + data['_summary'] = ("Bootloader stage failed, inconclusive or " + "couldn't detect handover to kernel") return data From ace1a535e53e1e5d4db362c22e6ac34a8a165624 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Wed, 20 Nov 2024 12:47:56 +0100 Subject: [PATCH 04/10] states/linux_kernel: define summary messages and signature fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ricardo Cañuelo --- logspec/states/linux_kernel.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/logspec/states/linux_kernel.py b/logspec/states/linux_kernel.py index 8493dc3..6355edb 100644 --- a/logspec/states/linux_kernel.py +++ b/logspec/states/linux_kernel.py @@ -34,15 +34,21 @@ def detect_linux_prompt(text, start=None, end=None): ] if start or end: text = text[start:end] - data = {} + data = { + '_signature_fields': [ + 'linux.boot.prompt', + ], + } regex = '|'.join(tags) match = re.search(regex, text) if match: data['_match_end'] = match.end() + start if start else match.end() data['linux.boot.prompt'] = True + data['_summary'] = "Linux boot prompt found" else: data['_match_end'] = end if end else len(text) data['linux.boot.prompt'] = False + data['_summary'] = "Linux boot prompt not found" # Check for linux-specific errors in the log. If the `done' # condition was found, search only before it. Otherwise search in From 251642bb82177fd2a2400a0a423f24b30ddae7ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Wed, 20 Nov 2024 12:48:13 +0100 Subject: [PATCH 05/10] states/test_baseline: define summary messages and signature fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ricardo Cañuelo --- logspec/states/test_baseline.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/logspec/states/test_baseline.py b/logspec/states/test_baseline.py index d2ca320..15b109d 100644 --- a/logspec/states/test_baseline.py +++ b/logspec/states/test_baseline.py @@ -19,7 +19,11 @@ def detect_test_baseline(text, start=None, end=None): ] if start or end: text = text[start:end] - data = {} + data = { + '_signature_fields': [ + 'test.baseline.start', + ], + } regex = '|'.join(start_tags) # Check for test start @@ -27,10 +31,12 @@ def detect_test_baseline(text, start=None, end=None): if not match: data['test.baseline.start'] = False data['_match_end'] = end if end else len(text) + data['_summary'] = "Baseline test not detected" return data test_start = match.end() test_end = None data['test.baseline.start'] = True + data['_summary'] = "Baseline test started" # Check for test end end_tags = [ From 5c0a2a6076897ef47d11d23e22c5cf1577092076 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Wed, 20 Nov 2024 12:48:33 +0100 Subject: [PATCH 06/10] Generate signatures for test_baseline errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ricardo Cañuelo --- logspec/errors/test.py | 6 ++++++ logspec/utils/test_baseline_errors.py | 3 +++ 2 files changed, 9 insertions(+) diff --git a/logspec/errors/test.py b/logspec/errors/test.py index 2d5b07d..1964d1a 100644 --- a/logspec/errors/test.py +++ b/logspec/errors/test.py @@ -12,3 +12,9 @@ class TestError(Error): def __init__(self): super().__init__() self.error_type = "test" + + def _parse(self, text): + """Dummy parse function. The purpose of this is to keep the + caller code working if it calls parse() to generate the error + signature""" + pass diff --git a/logspec/utils/test_baseline_errors.py b/logspec/utils/test_baseline_errors.py index f84264d..c99a8a1 100644 --- a/logspec/utils/test_baseline_errors.py +++ b/logspec/utils/test_baseline_errors.py @@ -17,6 +17,9 @@ def find_test_baseline_dmesg_error(text): error = TestError() error.error_type += ".baseline.dmesg" error.error_summary = match.group('message') + # Parsing on a generic TestError object simply generates a + # signature, we already did the parsing above + error.parse(text) return { 'error': error, '_end': match.end(), From 96665692232194d3f28b06f761cb3a278ac50da2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Thu, 21 Nov 2024 09:44:19 +0100 Subject: [PATCH 07/10] logspec/main: collect states summaries into data['_states_summary'] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ricardo Cañuelo --- logspec/main.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/logspec/main.py b/logspec/main.py index 52bbc5d..34a8fd7 100644 --- a/logspec/main.py +++ b/logspec/main.py @@ -49,7 +49,7 @@ def parse_log(log, start_state): state = start_state data = { '_signature_fields': [], - '_summary': [], + '_states_summary': [], } cumulative_errors = [] log_start = 0 @@ -83,8 +83,13 @@ def _generate_signature(data_dict): logging.debug(f"State: {state}") state_data = state.run(log) state = state.transition() + + # Update collected data with the data generated in this state if 'errors' in state_data: cumulative_errors.extend(state_data['errors']) + state_summary = state_data.pop('_summary', None) + if state_summary: + data['_states_summary'].append(state_summary) update_dict(data, state_data) if '_match_end' in data: log_start += data['_match_end'] From e59e2163751e4b6dd406d583c7b343f1bd9d8938 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Thu, 21 Nov 2024 10:40:21 +0100 Subject: [PATCH 08/10] states/linux_kernel: detect kernel load start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a state data field: linux.boot.kernel_started that will be True if the parser detects at least one line of kernel log and False if it doesn't. Signed-off-by: Ricardo Cañuelo --- logspec/states/linux_kernel.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/logspec/states/linux_kernel.py b/logspec/states/linux_kernel.py index 6355edb..8425c70 100644 --- a/logspec/states/linux_kernel.py +++ b/logspec/states/linux_kernel.py @@ -7,10 +7,22 @@ from logspec.parser_classes import State from logspec.utils.linux_kernel_errors import find_kernel_error from logspec.parser_loader import register_state +from logspec.utils.defs import * MODULE_NAME = 'linux_kernel' +# Utility functions +def _detect_kernel_start(text): + """Checks if the first line of text looks like the output of a Linux + kernel starting. Returns a Match object if it does, None if it + doesn't. + """ + first_line_end = text.index('\n') + return re.match(fr'{LINUX_TIMESTAMP} .*', + text[:first_line_end]) + + # State functions def detect_linux_prompt(text, start=None, end=None): @@ -37,15 +49,22 @@ def detect_linux_prompt(text, start=None, end=None): data = { '_signature_fields': [ 'linux.boot.prompt', + 'linux.boot.kernel_started', ], } regex = '|'.join(tags) match = re.search(regex, text) if match: data['_match_end'] = match.end() + start if start else match.end() + data['linux.boot.kernel_started'] = True data['linux.boot.prompt'] = True data['_summary'] = "Linux boot prompt found" else: + kernel_first_line_start = text.index('\n') + 1 + if _detect_kernel_start(text[kernel_first_line_start:]): + data['linux.boot.kernel_started'] = True + else: + data['linux.boot.kernel_started'] = False data['_match_end'] = end if end else len(text) data['linux.boot.prompt'] = False data['_summary'] = "Linux boot prompt not found" From cdd0aa867ab9464ff1a6c369d10a6167eaf16db9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Thu, 21 Nov 2024 10:41:49 +0100 Subject: [PATCH 09/10] tests: sync to latest code changes (linux.boot.kernel_started) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ricardo Cañuelo --- tests/test_baseline.py | 4 ++++ tests/test_linux_boot.py | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/test_baseline.py b/tests/test_baseline.py index 7f26048..3370970 100644 --- a/tests/test_baseline.py +++ b/tests/test_baseline.py @@ -22,6 +22,7 @@ { "bootloader.done": True, "errors": [], + "linux.boot.kernel_started": True, "linux.boot.prompt": True, "test.baseline.start": False, }), @@ -32,6 +33,7 @@ { "bootloader.done": True, "errors": [], + "linux.boot.kernel_started": True, "linux.boot.prompt": True, "test.baseline.start": True, }), @@ -55,6 +57,7 @@ "error_type": "test.baseline.dmesg", } ], + "linux.boot.kernel_started": True, "linux.boot.prompt": True, "test.baseline.start": True, }), @@ -146,6 +149,7 @@ "error_type": "test.baseline.dmesg", } ], + "linux.boot.kernel_started": True, "linux.boot.prompt": True, "test.baseline.start": True, }), diff --git a/tests/test_linux_boot.py b/tests/test_linux_boot.py index 14f0632..37289b2 100644 --- a/tests/test_linux_boot.py +++ b/tests/test_linux_boot.py @@ -77,6 +77,7 @@ "hardware": "BCM2835" }, ], + "linux.boot.kernel_started": True, "linux.boot.prompt": False, }), @@ -86,6 +87,7 @@ { "bootloader.done": True, "errors": [], + "linux.boot.kernel_started": False, "linux.boot.prompt": False, }), @@ -95,7 +97,8 @@ { "bootloader.done": True, "errors": [], - "linux.boot.prompt": True, + "linux.boot.kernel_started": True, + "linux.boot.prompt": True, }), # Command-line prompt found, multiple errors found (WARNINGs and BUGs) @@ -274,6 +277,7 @@ "modules": [] } ], + "linux.boot.kernel_started": True, "linux.boot.prompt": True, }), @@ -290,6 +294,7 @@ "location": "./include/linux/log2.h:57:13" } ], + "linux.boot.kernel_started": True, "linux.boot.prompt": True, }), ]) From 6e2bdbb0f7fe658bfdb09f750b5e5321ba82c221 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Thu, 21 Nov 2024 12:02:49 +0100 Subject: [PATCH 10/10] states/linux_kernel: summary msg for "kernel didn't start" scenario MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a summary message for the scenario where the parser can't detect any kernel output. With this into place, the possible result cases for this state are: 1) Kernel booted ok and reached a prompt: _summary = Linux boot prompt found 2) Kernel produced no output at all: _summary = Kernel didn't start 3) Kernel started to load but didn't reach a prompt: _summary = Linux boot prompt not found Signed-off-by: Ricardo Cañuelo --- logspec/states/linux_kernel.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/logspec/states/linux_kernel.py b/logspec/states/linux_kernel.py index 8425c70..0d1bd2a 100644 --- a/logspec/states/linux_kernel.py +++ b/logspec/states/linux_kernel.py @@ -60,14 +60,15 @@ def detect_linux_prompt(text, start=None, end=None): data['linux.boot.prompt'] = True data['_summary'] = "Linux boot prompt found" else: + data['linux.boot.prompt'] = False kernel_first_line_start = text.index('\n') + 1 if _detect_kernel_start(text[kernel_first_line_start:]): data['linux.boot.kernel_started'] = True + data['_summary'] = "Linux boot prompt not found" else: data['linux.boot.kernel_started'] = False + data['_summary'] = "Kernel didn't start" data['_match_end'] = end if end else len(text) - data['linux.boot.prompt'] = False - data['_summary'] = "Linux boot prompt not found" # Check for linux-specific errors in the log. If the `done' # condition was found, search only before it. Otherwise search in