From 0086cc07d73a6255fb20184fe1ec765e6474fc1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= <ricardo.canuelo@collabora.com>
Date: Wed, 20 Nov 2024 12:41:11 +0100
Subject: [PATCH 01/10] Use hash signatures to uniquely identify state results
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extend the idea of error hash signatures and use it to identify the
parse results of states as well. Each state may define a set of
'signature fields' and, after the parsing is done and the data is
collected, these fields will be used to identify the result set and
generate a unique hash string for them.

These signatures aren't necessarily related to errors, they simply
represent a result.

For now, the new states '_summary', '_signature_fields' and '_signature'
are defined as hidden and are invisible unless the `--json-full` flag is
used.

This change also moves the hash generation to a common utility function.

Signed-off-by: Ricardo Cañuelo <ricardo.canuelo@collabora.com>
---
 logspec/errors/error.py | 11 +++++------
 logspec/main.py         | 25 +++++++++++++++++++++++--
 logspec/utils/utils.py  | 28 ++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+), 8 deletions(-)
 create mode 100644 logspec/utils/utils.py

diff --git a/logspec/errors/error.py b/logspec/errors/error.py
index 3a5cafc..290a362 100644
--- a/logspec/errors/error.py
+++ b/logspec/errors/error.py
@@ -3,8 +3,8 @@
 # Copyright (C) 2024 Collabora Limited
 # Author: Ricardo Cañuelo <ricardo.canuelo@collabora.com>
 
-import hashlib
-import json
+from logspec.utils.utils import generate_signature
+
 
 class Error():
     def __init__(self):
@@ -34,8 +34,8 @@ def parse(self, text):
         return parse_ret
 
     def _generate_signature(self):
-        """Generates a hash string to uniquely identify this error,
-        based on a custom set of error fields.
+        """Uses utils.generate_signature() to generate a unique hash
+        string for this error, based on a custom set of error fields.
 
         This method is meant to be called after the parsing has been
         done.
@@ -48,5 +48,4 @@ def _generate_signature(self):
                     signature_dict[field] = val
             except AttributeError:
                 continue
-        signature_json = json.dumps(signature_dict, sort_keys=True, ensure_ascii=False)
-        self._signature = hashlib.sha1(signature_json.encode('utf-8')).hexdigest()
+        self._signature = generate_signature(signature_dict)
diff --git a/logspec/main.py b/logspec/main.py
index ff81534..52bbc5d 100644
--- a/logspec/main.py
+++ b/logspec/main.py
@@ -10,6 +10,7 @@
 import logspec.version
 from logspec.parser_loader import parser_loader
 from logspec.utils.defs import JsonSerialize, JsonSerializeDebug
+from logspec.utils.utils import update_dict, generate_signature
 
 
 def format_data_output(data, full=False):
@@ -46,9 +47,28 @@ def parse_log(log, start_state):
       The FSM data (dict) after the parsing is done.
     """
     state = start_state
-    data = {}
+    data = {
+        '_signature_fields': [],
+        '_summary': [],
+    }
     cumulative_errors = []
     log_start = 0
+
+    def _generate_signature(data_dict):
+        """Uses utils.generate_signature() to generate and return a
+        unique hash for the list of '_signature_fields' found in
+        data_dict, if any.  The returned signature can be used to
+        uniquely identify the conditions described by those fields.
+
+        Returns None if data_dict doesn't define any signature fields.
+        """
+        signature_dict = {}
+        if not data_dict.get('_signature_fields'):
+            return None
+        for field in data_dict['_signature_fields']:
+            signature_dict[field] = data_dict[field]
+        return generate_signature(signature_dict)
+
     while state:
         # The log fragment to parse is adjusted after every state
         # transition if the state function sets a `match_end' field in
@@ -65,12 +85,13 @@ def parse_log(log, start_state):
         state = state.transition()
         if 'errors' in state_data:
             cumulative_errors.extend(state_data['errors'])
-        data.update(state_data)
+        update_dict(data, state_data)
         if '_match_end' in data:
             log_start += data['_match_end']
             log = log[data['_match_end']:]
             data['_match_end'] = log_start
     data['errors'] = cumulative_errors
+    data['_signature'] = _generate_signature(data)
     return data
 
 
diff --git a/logspec/utils/utils.py b/logspec/utils/utils.py
new file mode 100644
index 0000000..22dd090
--- /dev/null
+++ b/logspec/utils/utils.py
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# Copyright (C) 2024 Collabora Limited
+# Author: Ricardo Cañuelo <ricardo.canuelo@collabora.com>
+
+import hashlib
+import json
+
+def update_dict(dest_dict, new_data):
+    """Updates dest_dict in place with the contents of dict
+    new_data. This is equivalent to dest_dict.update(new_data) except
+    that the nested lists in dest_dict are extended/appended if found in
+    new_data rather than replaced.
+    """
+    for k, v in new_data.items():
+        if k in dest_dict and isinstance(dest_dict[k], list):
+            if isinstance(v, list):
+                dest_dict[k].extend(v)
+            else:
+                dest_dict[k].append(v)
+        else:
+            dest_dict[k] = v
+
+
+def generate_signature(data_dict):
+    """Generates a hash string of the data_dict contents"""
+    signature_json = json.dumps(data_dict, sort_keys=True, ensure_ascii=False)
+    return hashlib.sha1(signature_json.encode('utf-8')).hexdigest()

From 0dae9a09c486f309ee129b9c5b167595ee016428 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= <ricardo.canuelo@collabora.com>
Date: Wed, 20 Nov 2024 12:46:57 +0100
Subject: [PATCH 02/10] states/chromebook_boot: define summary messages and
 signature fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ricardo Cañuelo <ricardo.canuelo@collabora.com>
---
 logspec/states/chromebook_boot.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/logspec/states/chromebook_boot.py b/logspec/states/chromebook_boot.py
index 1295ede..56ae31a 100644
--- a/logspec/states/chromebook_boot.py
+++ b/logspec/states/chromebook_boot.py
@@ -39,16 +39,23 @@ def detect_bootloader_start(text, start=None, end=None):
     ]
     if start or end:
         text = text[start:end]
-    data = {}
+    data = {
+        '_signature_fields': [
+            'bootloader.start',
+            'bootloader.id',
+        ],
+    }
     regex = '|'.join(tags)
     match = re.search(regex, text)
     if match:
         data['_match_end'] = match.end()
         data['bootloader.start'] = True
         data['bootloader.id'] = 'depthcharge'
+        data['_summary'] = "Depthcharge started"
     else:
         data['_match_end'] = end if end else len(text)
         data['bootloader.start'] = False
+        data['_summary'] = "Depthcharge start not found"
     return data
 
 

From 7c0fcaca832f926fecc13771c989ae0e33927980 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= <ricardo.canuelo@collabora.com>
Date: Wed, 20 Nov 2024 12:47:35 +0100
Subject: [PATCH 03/10] states/generic_boot: define summary messages and
 signature fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ricardo Cañuelo <ricardo.canuelo@collabora.com>
---
 logspec/states/generic_boot.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/logspec/states/generic_boot.py b/logspec/states/generic_boot.py
index 05ed270..a0fac20 100644
--- a/logspec/states/generic_boot.py
+++ b/logspec/states/generic_boot.py
@@ -35,15 +35,22 @@ def detect_bootloader_end(text, start=None, end=None):
     ]
     if start or end:
         text = text[start:end]
-    data = {}
+    data = {
+        '_signature_fields': [
+            'bootloader.done',
+        ],
+    }
     regex = '|'.join(tags)
     match = re.search(regex, text)
     if match:
         data['_match_end'] = match.end() + start if start else match.end()
         data['bootloader.done'] = True
+        data['_summary'] = "Bootloader stage done, jump to kernel"
     else:
         data['_match_end'] = end if end else len(text)
         data['bootloader.done'] = False
+        data['_summary'] = ("Bootloader stage failed, inconclusive or "
+                            "couldn't detect handover to kernel")
     return data
 
 

From ace1a535e53e1e5d4db362c22e6ac34a8a165624 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= <ricardo.canuelo@collabora.com>
Date: Wed, 20 Nov 2024 12:47:56 +0100
Subject: [PATCH 04/10] states/linux_kernel: define summary messages and
 signature fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ricardo Cañuelo <ricardo.canuelo@collabora.com>
---
 logspec/states/linux_kernel.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/logspec/states/linux_kernel.py b/logspec/states/linux_kernel.py
index 8493dc3..6355edb 100644
--- a/logspec/states/linux_kernel.py
+++ b/logspec/states/linux_kernel.py
@@ -34,15 +34,21 @@ def detect_linux_prompt(text, start=None, end=None):
     ]
     if start or end:
         text = text[start:end]
-    data = {}
+    data = {
+        '_signature_fields': [
+            'linux.boot.prompt',
+        ],
+    }
     regex = '|'.join(tags)
     match = re.search(regex, text)
     if match:
         data['_match_end'] = match.end() + start if start else match.end()
         data['linux.boot.prompt'] = True
+        data['_summary'] = "Linux boot prompt found"
     else:
         data['_match_end'] = end if end else len(text)
         data['linux.boot.prompt'] = False
+        data['_summary'] = "Linux boot prompt not found"
 
     # Check for linux-specific errors in the log. If the `done'
     # condition was found, search only before it. Otherwise search in

From 251642bb82177fd2a2400a0a423f24b30ddae7ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= <ricardo.canuelo@collabora.com>
Date: Wed, 20 Nov 2024 12:48:13 +0100
Subject: [PATCH 05/10] states/test_baseline: define summary messages and
 signature fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ricardo Cañuelo <ricardo.canuelo@collabora.com>
---
 logspec/states/test_baseline.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/logspec/states/test_baseline.py b/logspec/states/test_baseline.py
index d2ca320..15b109d 100644
--- a/logspec/states/test_baseline.py
+++ b/logspec/states/test_baseline.py
@@ -19,7 +19,11 @@ def detect_test_baseline(text, start=None, end=None):
     ]
     if start or end:
         text = text[start:end]
-    data = {}
+    data = {
+        '_signature_fields': [
+            'test.baseline.start',
+        ],
+    }
     regex = '|'.join(start_tags)
 
     # Check for test start
@@ -27,10 +31,12 @@ def detect_test_baseline(text, start=None, end=None):
     if not match:
         data['test.baseline.start'] = False
         data['_match_end'] = end if end else len(text)
+        data['_summary'] = "Baseline test not detected"
         return data
     test_start = match.end()
     test_end = None
     data['test.baseline.start'] = True
+    data['_summary'] = "Baseline test started"
 
     # Check for test end
     end_tags = [

From 5c0a2a6076897ef47d11d23e22c5cf1577092076 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= <ricardo.canuelo@collabora.com>
Date: Wed, 20 Nov 2024 12:48:33 +0100
Subject: [PATCH 06/10] Generate signatures for test_baseline errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ricardo Cañuelo <ricardo.canuelo@collabora.com>
---
 logspec/errors/test.py                | 6 ++++++
 logspec/utils/test_baseline_errors.py | 3 +++
 2 files changed, 9 insertions(+)

diff --git a/logspec/errors/test.py b/logspec/errors/test.py
index 2d5b07d..1964d1a 100644
--- a/logspec/errors/test.py
+++ b/logspec/errors/test.py
@@ -12,3 +12,9 @@ class TestError(Error):
     def __init__(self):
         super().__init__()
         self.error_type = "test"
+
+    def _parse(self, text):
+        """Dummy parse function. The purpose of this is to keep the
+        caller code working if it calls parse() to generate the error
+        signature"""
+        pass
diff --git a/logspec/utils/test_baseline_errors.py b/logspec/utils/test_baseline_errors.py
index f84264d..c99a8a1 100644
--- a/logspec/utils/test_baseline_errors.py
+++ b/logspec/utils/test_baseline_errors.py
@@ -17,6 +17,9 @@ def find_test_baseline_dmesg_error(text):
     error = TestError()
     error.error_type += ".baseline.dmesg"
     error.error_summary = match.group('message')
+    # Parsing on a generic TestError object simply generates a
+    # signature, we already did the parsing above
+    error.parse(text)
     return {
         'error': error,
         '_end': match.end(),

From 96665692232194d3f28b06f761cb3a278ac50da2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= <ricardo.canuelo@collabora.com>
Date: Thu, 21 Nov 2024 09:44:19 +0100
Subject: [PATCH 07/10] logspec/main: collect states summaries into
 data['_states_summary']
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ricardo Cañuelo <ricardo.canuelo@collabora.com>
---
 logspec/main.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/logspec/main.py b/logspec/main.py
index 52bbc5d..34a8fd7 100644
--- a/logspec/main.py
+++ b/logspec/main.py
@@ -49,7 +49,7 @@ def parse_log(log, start_state):
     state = start_state
     data = {
         '_signature_fields': [],
-        '_summary': [],
+        '_states_summary': [],
     }
     cumulative_errors = []
     log_start = 0
@@ -83,8 +83,13 @@ def _generate_signature(data_dict):
         logging.debug(f"State: {state}")
         state_data = state.run(log)
         state = state.transition()
+
+        # Update collected data with the data generated in this state
         if 'errors' in state_data:
             cumulative_errors.extend(state_data['errors'])
+        state_summary = state_data.pop('_summary', None)
+        if state_summary:
+            data['_states_summary'].append(state_summary)
         update_dict(data, state_data)
         if '_match_end' in data:
             log_start += data['_match_end']

From e59e2163751e4b6dd406d583c7b343f1bd9d8938 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= <ricardo.canuelo@collabora.com>
Date: Thu, 21 Nov 2024 10:40:21 +0100
Subject: [PATCH 08/10] states/linux_kernel: detect kernel load start
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a state data field: linux.boot.kernel_started that will be True if
the parser detects at least one line of kernel log and False if it
doesn't.

Signed-off-by: Ricardo Cañuelo <ricardo.canuelo@collabora.com>
---
 logspec/states/linux_kernel.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/logspec/states/linux_kernel.py b/logspec/states/linux_kernel.py
index 6355edb..8425c70 100644
--- a/logspec/states/linux_kernel.py
+++ b/logspec/states/linux_kernel.py
@@ -7,10 +7,22 @@
 from logspec.parser_classes import State
 from logspec.utils.linux_kernel_errors import find_kernel_error
 from logspec.parser_loader import register_state
+from logspec.utils.defs import *
 
 MODULE_NAME = 'linux_kernel'
 
 
+# Utility functions
+def _detect_kernel_start(text):
+    """Checks if the first line of text looks like the output of a Linux
+    kernel starting. Returns a Match object if it does, None if it
+    doesn't.
+    """
+    first_line_end = text.index('\n')
+    return re.match(fr'{LINUX_TIMESTAMP} .*',
+                    text[:first_line_end])
+
+
 # State functions
 
 def detect_linux_prompt(text, start=None, end=None):
@@ -37,15 +49,22 @@ def detect_linux_prompt(text, start=None, end=None):
     data = {
         '_signature_fields': [
             'linux.boot.prompt',
+            'linux.boot.kernel_started',
         ],
     }
     regex = '|'.join(tags)
     match = re.search(regex, text)
     if match:
         data['_match_end'] = match.end() + start if start else match.end()
+        data['linux.boot.kernel_started'] = True
         data['linux.boot.prompt'] = True
         data['_summary'] = "Linux boot prompt found"
     else:
+        kernel_first_line_start = text.index('\n') + 1
+        if _detect_kernel_start(text[kernel_first_line_start:]):
+            data['linux.boot.kernel_started'] = True
+        else:
+            data['linux.boot.kernel_started'] = False
         data['_match_end'] = end if end else len(text)
         data['linux.boot.prompt'] = False
         data['_summary'] = "Linux boot prompt not found"

From cdd0aa867ab9464ff1a6c369d10a6167eaf16db9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= <ricardo.canuelo@collabora.com>
Date: Thu, 21 Nov 2024 10:41:49 +0100
Subject: [PATCH 09/10] tests: sync to latest code changes
 (linux.boot.kernel_started)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ricardo Cañuelo <ricardo.canuelo@collabora.com>
---
 tests/test_baseline.py   | 4 ++++
 tests/test_linux_boot.py | 7 ++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/tests/test_baseline.py b/tests/test_baseline.py
index 7f26048..3370970 100644
--- a/tests/test_baseline.py
+++ b/tests/test_baseline.py
@@ -22,6 +22,7 @@
      {
          "bootloader.done": True,
          "errors": [],
+         "linux.boot.kernel_started": True,
          "linux.boot.prompt": True,
          "test.baseline.start": False,
      }),
@@ -32,6 +33,7 @@
      {
          "bootloader.done": True,
          "errors": [],
+         "linux.boot.kernel_started": True,
          "linux.boot.prompt": True,
          "test.baseline.start": True,
      }),
@@ -55,6 +57,7 @@
                  "error_type": "test.baseline.dmesg",
              }
          ],
+         "linux.boot.kernel_started": True,
          "linux.boot.prompt": True,
          "test.baseline.start": True,
      }),
@@ -146,6 +149,7 @@
                  "error_type": "test.baseline.dmesg",
              }
          ],
+         "linux.boot.kernel_started": True,
          "linux.boot.prompt": True,
          "test.baseline.start": True,
      }),
diff --git a/tests/test_linux_boot.py b/tests/test_linux_boot.py
index 14f0632..37289b2 100644
--- a/tests/test_linux_boot.py
+++ b/tests/test_linux_boot.py
@@ -77,6 +77,7 @@
                 "hardware": "BCM2835"
             },
         ],
+        "linux.boot.kernel_started": True,
         "linux.boot.prompt": False,
      }),
 
@@ -86,6 +87,7 @@
      {
          "bootloader.done": True,
          "errors": [],
+         "linux.boot.kernel_started": False,
          "linux.boot.prompt": False,
      }),
 
@@ -95,7 +97,8 @@
      {
         "bootloader.done": True,
         "errors": [],
-         "linux.boot.prompt": True,
+        "linux.boot.kernel_started": True,
+        "linux.boot.prompt": True,
      }),
 
     # Command-line prompt found, multiple errors found (WARNINGs and BUGs)
@@ -274,6 +277,7 @@
                 "modules": []
             }
         ],
+        "linux.boot.kernel_started": True,
         "linux.boot.prompt": True,
     }),
 
@@ -290,6 +294,7 @@
                  "location": "./include/linux/log2.h:57:13"
              }
          ],
+         "linux.boot.kernel_started": True,
          "linux.boot.prompt": True,
      }),
 ])

From 6e2bdbb0f7fe658bfdb09f750b5e5321ba82c221 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= <ricardo.canuelo@collabora.com>
Date: Thu, 21 Nov 2024 12:02:49 +0100
Subject: [PATCH 10/10] states/linux_kernel: summary msg for "kernel didn't
 start" scenario
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a summary message for the scenario where the parser can't detect any
kernel output. With this into place, the possible result cases for this
state are:

1) Kernel booted ok and reached a prompt:
   _summary = Linux boot prompt found
2) Kernel produced no output at all:
   _summary = Kernel didn't start
3) Kernel started to load but didn't reach a prompt:
   _summary = Linux boot prompt not found

Signed-off-by: Ricardo Cañuelo <ricardo.canuelo@collabora.com>
---
 logspec/states/linux_kernel.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/logspec/states/linux_kernel.py b/logspec/states/linux_kernel.py
index 8425c70..0d1bd2a 100644
--- a/logspec/states/linux_kernel.py
+++ b/logspec/states/linux_kernel.py
@@ -60,14 +60,15 @@ def detect_linux_prompt(text, start=None, end=None):
         data['linux.boot.prompt'] = True
         data['_summary'] = "Linux boot prompt found"
     else:
+        data['linux.boot.prompt'] = False
         kernel_first_line_start = text.index('\n') + 1
         if _detect_kernel_start(text[kernel_first_line_start:]):
             data['linux.boot.kernel_started'] = True
+            data['_summary'] = "Linux boot prompt not found"
         else:
             data['linux.boot.kernel_started'] = False
+            data['_summary'] = "Kernel didn't start"
         data['_match_end'] = end if end else len(text)
-        data['linux.boot.prompt'] = False
-        data['_summary'] = "Linux boot prompt not found"
 
     # Check for linux-specific errors in the log. If the `done'
     # condition was found, search only before it. Otherwise search in