From 2a632932e4ebc61ec9ef03d914c400cf9ac807e7 Mon Sep 17 00:00:00 2001
From: Edward Hope-Morley <edward.hope-morley@canonical.com>
Date: Sun, 27 Aug 2023 13:38:49 +0100
Subject: [PATCH] Consistency cleanup

---
 searchkit/constraints.py | 292 +++++++++++++++++----------------------
 1 file changed, 123 insertions(+), 169 deletions(-)

diff --git a/searchkit/constraints.py b/searchkit/constraints.py
index e0cf055..6b823ac 100644
--- a/searchkit/constraints.py
+++ b/searchkit/constraints.py
@@ -153,23 +153,23 @@ def _line_date_is_valid(self, extracted_datetime):
         return True
 
 
-class NoMatchingLogLineWithDate(Exception):
+class ValidLinesNotFound(Exception):
     """Raised when a log file contains proper timestamps but
     no log lines after the since date."""
 
 
-class NoDateFoundInLogs(Exception):
+class ValidFormattedDateNotFound(Exception):
     """Raised when a log file does not contain any line with
     date suitable to specified date format"""
 
 
-class DateSearchFailedAtOffset(Exception):
+class DateNotFoundInLine(Exception):
     """Raised when searcher has encountered a line with no date
     and performed forward-backward searches, but still yet, could
     not found a line with date."""
 
 
-class UncheckedAccess(Exception):
+class InvalidSearchState(Exception):
     """Raised when a variable dependent on another variable (e.g.
     the variable x only has value when y is True) is accessed without
     checking the prerequisite variable."""
@@ -181,11 +181,15 @@ class FindTokenStatus(Enum):
     FAILED = 3
 
 
-class FindTokenResult(object):
-    def __init__(self, status: FindTokenStatus, found_offset=0, read_bytes=0):
+class SearchState(object):
+    def __init__(self, status: FindTokenStatus, offset=0):
+        """
+        @param status: current status of search
+        @param offset: current position in file from which next search will be
+                       started.
+        """
         self._status = status
-        self._found_offset = found_offset
-        self._read_bytes = read_bytes
+        self._offset = offset
 
     @property
     def status(self):
@@ -194,21 +198,14 @@ def status(self):
     @property
     def offset(self):
         if self.status == FindTokenStatus.FAILED:
-            raise UncheckedAccess()
-        return self._found_offset
+            raise InvalidSearchState()
 
-    @property
-    def read_bytes(self):
-        if self.status == FindTokenStatus.FAILED:
-            raise UncheckedAccess()
-        return self._read_bytes
+        return self._offset
 
 
-class PeekFile():
-    """A context manager class that allows peeking the
-    file by keeping the initial read position and returning
-    to it back.
-    """
+class FileReadNonDestructive(object):
+    """ Context manager class that saves current position at start and restores
+        once finished. """
     def __init__(self, file):
         self.file = file
         self.original_position = file.tell()
@@ -221,11 +218,10 @@ def __exit__(self, exc_type, exc_value, exc_traceback):
 
 
 class LogLine(object):
-    """A class that represents a line in a log file.
+    """Class representing a line in a log file.
 
-    The class only keeps the start/end offsets of the line and
-    the line itself is lazily loaded on demand (i.e. by calling
-    the `text` function).
+    Keeps the start/end offsets of the line and line content is lazy-loaded on
+    demand (i.e. by calling the `text` function).
     """
 
     def __init__(self, file, constraint, line_start_lf, line_end_lf):
@@ -249,6 +245,7 @@ def start_offset(self):
         # (being the \n)
         if self.start_lf.status == FindTokenStatus.FOUND:
             return self.start_lf.offset + 1
+
         return self.start_lf.offset
 
     @property
@@ -259,6 +256,7 @@ def end_offset(self):
         # the \n)
         if self.end_lf.status == FindTokenStatus.FOUND:
             return self.end_lf.offset - 1
+
         return self.end_lf.offset
 
     @property
@@ -278,9 +276,7 @@ def date(self):
         The function will use extracted_datetime function to parse
         the date/time.
 
-        Returns:
-            datetime: if `text` contains a valid datetime
-            None: otherwise
+        @return: datetime: if `text` contains a valid datetime otherwise None.
         """
         return self._constraint.extracted_datetime(self.text)
 
@@ -292,105 +288,95 @@ def text(self):
         on demand. The function will revert the file offset back after reading
         to where it was before.
 
-        Returns:
-            str: The line text
+        @return: the line text string
         """
-        with PeekFile(self._file) as f:
+        with FileReadNonDestructive(self._file) as f:
             f.seek(self.start_offset)
             line_text = f.read(len(self))
             return line_text
 
 
-class LogFileDateSinceOffsetSeeker:
-    """This class allows user to perform `since` date lookups with
-    file offsets. This is useful for performing line-based binary
-    date searches on a log file.
+class LogFileDateSinceOffsetSeeker(object):
+    """ This class performs `since` date lookups with file offsets. This is
+    useful for performing line-based binary date searches on a log file.
 
-    The class implements __len__ and __getitem__ methods in order to
-    behave like a list. When __getitem__ is called with ane offset,
-    the algorithm locates the rightmost and leftmost line feed (`\n`)
-    to form a line. Assume the following file contents:
+    Implements __len__ and __getitem__ methods in order to behave like a list.
+    When __getitem__ is called with an offset the algorithm locates the
+    rightmost and leftmost line feed '\n' to form a line. For example with the
+    following file contents:
 
     13:15 AAAAAA\n13:16 BBBBBBB\n13:17 CCCCCC
 
-    ... and let's assume that the __getitem__ function is called with
-    offset `19`:
+    and assuming __getitem__ is called with offset 19 i.e.
 
     13:15 AAAAAA\n13:16 BBBBBBB\n13:17 CCCCCC
                         ^19
 
-    The algorithm first will read SEEK_HORIZON bytes forward, starting
+    The algorithm will first read SEEK_HORIZON bytes forward, starting
     from offset `19`, and then try to find the first line feed:
 
     13:15 AAAAAA\n13:16 BBBBBBB\n13:17 CCCCCC
                         ^19     ^r-lf
 
-    Consequently, the algorithm will seek SEEK_HORIZON bytes backward,
-    starting from offset `19`, read SEEK_HORIZON bytes and then try to
-    find the first line feed, scanning in reverse:
+    Then the algorithm will seek SEEK_HORIZON bytes backward, starting from
+    offset 19, read SEEK_HORIZON bytes and then try to find the first line feed
+    scanning in reverse:
 
     13:15 AAAAAA\n13:16 BBBBBBB\n13:17 CCCCCC
                 ^l-lf   ^19     ^r-lf
 
-    Then, the algoritm will extract the characters between l-lf and r-lf
+    Then, the algorithm will extract the characters between l-lf and r-lf
     to form a line. The line will be checked against the date matcher
-    to extract the date. If the date matcher yields a valid date, the
-    __getitem__ function will return that date. Otherwise, the search will
-    be extended to other nearby lines, prioritizing the lines prior to the
-    current, until either:
+    to extract the date. If the date matcher yields a valid date, __getitem__
+    will return that date. Otherwise, the search will be extended to other
+    nearby lines, prioritising the lines prior to the current, until either of
+    the following is true:
 
-        - a line with timestamp found, or
-        - MAX_*_FALLBACK_LINES has reached.
+        - a line with a timestamp is found
+        - MAX_*_FALLBACK_LINES has been reached
     """
 
-    # Amount of characters to read while searching
+    # Number of characters to read while searching
     SEEK_HORIZON = 256
 
-    # How many times we can expand the search
-    # horizon while trying to find a line feed.
-    # This means the search will read SEEK_HORIZON
-    # times MAX_SEEK_HORIZON_EXPAND bytes in total
-    # when a line feed character is not found.
+    # How many times we can expand the search horizon while trying to find a
+    # line feed. This means the search will read SEEK_HORIZON times
+    # MAX_SEEK_HORIZON_EXPAND bytes in total when a line feed character is not
+    # found.
     MAX_SEEK_HORIZON_EXPAND = 100
 
-    # How many lines should we search forwards utmost
-    # when the algorithm encounters lines with no date.
+    # Number of lines to search forwards when the algorithm encounters lines
+    # with no date.
     MAX_FWD_FALLBACK_LINES = 500
 
-    # How many lines should we search backwards utmost
-    # when the algorithm encounters lines with no date.
+    # Number of lines to search backwards when the algorithm encounters lines
+    # with no date.
     MAX_RWD_FALLBACK_LINES = 500
 
-    def __init__(self, fd, c) -> None:
+    LF_TOKEN = b"\n"
+
+    def __init__(self, fd, c):
         self.file = fd
         self.constraint = c
         self.line_info = None
         self.found_any_date = False
 
-    def find_token_reverse(
-        self,
-        file,
-        start_offset,
-        horizon,
-        token=b"\n",
-        max_iterations=MAX_SEEK_HORIZON_EXPAND,
-    ):
-        r"""Find `token` in `file` starting from `start_offset` and backing off
+    def find_token_reverse(self, file, start_offset, horizon,
+                           max_iterations=MAX_SEEK_HORIZON_EXPAND):
+        r"""Find LF in `file` starting from `start_offset` and backing off
         `horizon`bytes on each iteration for maximum of `max_iterations` times.
 
-        Args:
-            file (file): File descriptor, open in read mode
-            start_offset (int): start offset of search
-            horizon (int): Amount of bytes to be processed on each step.
-            token (str, optional): Search token. Defaults to `\n`.
-            max_iterations (int, optional): Maximum amount of search
-                                            iterations. Defaults to 100.
+        @param file (file): File descriptor, open in read mode
+        @param start_offset (int): start offset of search
+        @param horizon (int): Amount of bytes to be processed on each step.
+        @param max_iterations (int, optional): Maximum amount of search
+                                               iterations. Defaults to 100.
 
-        Returns:
-            FindDelimiterResult(FindTokenStatus.FOUND, ...) if token is found
-            FindDelimiterResult(FindTokenStatus.REACHED_EOF, ...) if token is
+        @return:
+            FindDelimiterResult(FindTokenStatus.FOUND, ...) if LF is found
+            FindDelimiterResult(FindTokenStatus.REACHED_EOF, ...) if LF is
                 not found because the scan reached the EOF
-            FindDelimiterResult(FindTokenStatus.FAILED, ...) if token is
+            FindDelimiterResult(FindTokenStatus.FAILED, ...) if LF is
                 not found because scan exhausted `max_iterations`
         """
 
@@ -405,58 +391,42 @@ def find_token_reverse(
             file.seek(read_offset)
             chunk = file.read(read_size)
             if not chunk or len(chunk) == 0:
-                # We've reached to start of the file and
-                # could not find the token.
-                return FindTokenResult(
-                    status=FindTokenStatus.REACHED_EOF,
-                    found_offset=0,
-                    read_bytes=0)
-
-            chunk_offset = chunk.rfind(token)
+                # We've reached the start of the file and could not find the
+                # token.
+                return SearchState(status=FindTokenStatus.REACHED_EOF,
+                                   offset=0)
 
+            chunk_offset = chunk.rfind(self.LF_TOKEN)
             if chunk_offset == -1:
                 current_offset = current_offset - len(chunk)
                 max_iterations -= 1
                 if (start_offset + current_offset) < 0:
-                    return FindTokenResult(
-                        status=FindTokenStatus.REACHED_EOF,
-                        found_offset=0,
-                        read_bytes=start_offset + current_offset)
+                    return SearchState(status=FindTokenStatus.REACHED_EOF,
+                                       offset=0)
                 continue
 
-            read_bytes = start_offset + current_offset - len(chunk)
-            return FindTokenResult(
-                status=FindTokenStatus.FOUND,
-                found_offset=read_offset + chunk_offset,
-                read_bytes=read_bytes if read_bytes > 0 else 0)
-
-        return FindTokenResult(FindTokenStatus.FAILED)
-
-    def find_token(
-        self,
-        file,
-        start_offset,
-        horizon,
-        token=b"\n",
-        max_iterations=MAX_SEEK_HORIZON_EXPAND,
-    ):
+            return SearchState(status=FindTokenStatus.FOUND,
+                               offset=read_offset + chunk_offset)
+
+        return SearchState(FindTokenStatus.FAILED)
+
+    def find_token(self, file, start_offset, horizon,
+                   max_iterations=MAX_SEEK_HORIZON_EXPAND):
         r"""Find `token` in `file` starting from `start_offset` and moving
         forward `horizon` bytes on each iteration for maximum of
         `max_iterations` times.
 
-        Args:
-            file (file): File descriptor, open in read mode
-            start_offset (int): start offset of search
-            horizon (int): Amount of bytes to be processed on each step.
-            token (str, optional): Search token. Defaults to "\n".
-            max_iterations (int, optional): Maximum amount of search
-            iterations. Defaults to 100.
+        @param file (file): File descriptor, open in read mode
+        @param start_offset (int): start offset of search
+        @param horizon (int): Amount of bytes to be processed on each step.
+        @param max_iterations (int, optional): Maximum amount of search
+               iterations. Defaults to 100.
 
-        Returns:
-            FindDelimiterResult(FindTokenStatus.FOUND, ...) if token is found
-            FindDelimiterResult(FindTokenStatus.REACHED_EOF, ...) if token is
+        @return:
+            FindDelimiterResult(FindTokenStatus.FOUND, ...) if LF is found
+            FindDelimiterResult(FindTokenStatus.REACHED_EOF, ...) if LF is
                 not found because the scan reached the EOF
-            FindDelimiterResult(FindTokenStatus.FAILED, ...) if token is
+            FindDelimiterResult(FindTokenStatus.FAILED, ...) if LF is
                 not found because scan exhausted `max_iterations`
         """
 
@@ -469,33 +439,30 @@ def find_token(
 
             if not chunk or len(chunk) == 0:
                 # Reached end of file
-                return FindTokenResult(
-                    status=FindTokenStatus.REACHED_EOF,
-                    found_offset=len(self),
-                    read_bytes=len(self)
-                )
+                return SearchState(status=FindTokenStatus.REACHED_EOF,
+                                   offset=len(self))
 
-            chunk_offset = chunk.find(token)
+            chunk_offset = chunk.find(self.LF_TOKEN)
             if chunk_offset == -1:
-                # We failed to find the token in the chunk.
+                # We failed to find the LF in the chunk.
                 # Progress the current offset forward by
                 # chunk's length.
                 current_offset = current_offset + len(chunk)
                 max_iterations -= 1
                 continue
-            # We've found the token in the chunk.
+
+            # We've found the LF in the chunk.
             # As the chunk_offset is a relative offset to the chunk
             # translate it to file offset while returning.
-            return FindTokenResult(
-                status=FindTokenStatus.FOUND,
-                found_offset=start_offset + current_offset + chunk_offset,
-                read_bytes=start_offset + current_offset + len(chunk)
-            )
+            return SearchState(status=FindTokenStatus.FOUND,
+                               offset=(start_offset + current_offset +
+                                       chunk_offset))
+
         # Reached max_iterations and found nothing.
-        return FindTokenResult(FindTokenStatus.FAILED)
+        return SearchState(FindTokenStatus.FAILED)
 
     def try_find_line(self, epicenter, slf_off=None, elf_off=None):
-        r"""Try to find a line at `epicenter`. This function allows extracting
+        """ Try to find a line at `epicenter`. This function allows extracting
         the corresponding line from a file offset. "Line" is a string between
         two line feed characters i.e.;
 
@@ -576,8 +543,7 @@ def try_find_line(self, epicenter, slf_off=None, elf_off=None):
         #         ^epicenter    ^line end lf
         line_end_lf = self.find_token(
             self.file, epicenter, LogFileDateSinceOffsetSeeker.SEEK_HORIZON
-        ) if elf_off is None else FindTokenResult(
-            FindTokenStatus.FOUND, elf_off, elf_off)
+        ) if elf_off is None else SearchState(FindTokenStatus.FOUND, elf_off)
 
         if line_end_lf.status == FindTokenStatus.FAILED:
             raise ValueError("Could not find ending line feed "
@@ -588,8 +554,7 @@ def try_find_line(self, epicenter, slf_off=None, elf_off=None):
         # line start lf  ^ ^epicenter
         line_start_lf = self.find_token_reverse(
             self.file, epicenter, LogFileDateSinceOffsetSeeker.SEEK_HORIZON
-        ) if slf_off is None else FindTokenResult(
-            FindTokenStatus.FOUND, slf_off, slf_off)
+        ) if slf_off is None else SearchState(FindTokenStatus.FOUND, slf_off)
 
         if line_start_lf.status == FindTokenStatus.FAILED:
             raise ValueError("Could not find start line feed "
@@ -603,15 +568,11 @@ def try_find_line(self, epicenter, slf_off=None, elf_off=None):
         # Ensure that end lf offset is >= start lf offset
         assert line_end_lf.offset >= line_start_lf.offset
 
-        return LogLine(
-            file=self.file,
-            constraint=self.constraint,
-            line_start_lf=line_start_lf,
-            line_end_lf=line_end_lf)
+        return LogLine(file=self.file, constraint=self.constraint,
+                       line_start_lf=line_start_lf, line_end_lf=line_end_lf)
 
-    def try_find_line_w_date_for(
-        self, how_many_lines, start_offset, prev_offset=None, forwards=True
-    ):
+    def try_find_line_w_date_for(self, how_many_lines, start_offset,
+                                 prev_offset=None, forwards=True):
         """Try to fetch a line with date, starting from `start_offset`.
 
         The algorithm will try to fetch a new line searching for a valid date
@@ -638,11 +599,9 @@ def try_find_line_w_date_for(
         offset = start_offset
         log_line = None
         while how_many_lines > 0:
-            log_line = self.try_find_line(
-                offset,
-                (None, prev_offset)[forwards],
-                (None, prev_offset)[not forwards]
-            )
+            log_line = self.try_find_line(offset,
+                                          (None, prev_offset)[forwards],
+                                          (None, prev_offset)[not forwards])
 
             log.debug(
                 "    > TRY_FETCH, REMAINING_ATTEMPTS:%d, START_LF_OFFSET: %d, "
@@ -668,7 +627,7 @@ def try_find_line_w_date_for(
         return None
 
     def __len__(self):
-        with PeekFile(self.file) as f:
+        with FileReadNonDestructive(self.file) as f:
             return f.seek(0, 2)
 
     def __getitem__(self, offset):
@@ -750,15 +709,10 @@ def __getitem__(self, offset):
         functions, so therefore it only returns the `date` for
         the comparison.
 
-        Args:
-            offset (int): Lookup offset
-
-        Raises:
-            DateSearchFailedAtOffset: When a line with a date could not
-            be found.
-
-        Returns:
-            date: Date of the line at `offset`
+        @param offset (int): Lookup offset
+        @raise DateNotFoundInLine: When a line with a date could not
+                                   be found.
+        @return: Date of the line at `offset`
         """
 
         log.debug("-------------------------------------------")
@@ -799,7 +753,7 @@ def __getitem__(self, offset):
             log.debug("######### FORWARDS SEARCH END #########")
 
         if not result or result.date is None:
-            raise DateSearchFailedAtOffset(
+            raise DateNotFoundInLine(
                 f"Date search failed at offset `{offset}`")
 
         # This is mostly for diagnostics. If we could not find
@@ -826,9 +780,9 @@ def run(self):
         bisect.bisect_left(self, self.constraint._since_date)
 
         if not self.found_any_date:
-            raise NoDateFoundInLogs
+            raise ValidFormattedDateNotFound
         if not self.line_info:
-            raise NoMatchingLogLineWithDate
+            raise ValidLinesNotFound
 
         log.debug(
             "RUN END, FOUND LINE(START:%d, END:%d, CONTENT:%s)",
@@ -979,15 +933,15 @@ def apply_to_file(self, fd, destructive=True):
                 self._results[fd.name] = None
             else:
                 self._results[fd.name] = result[0]
-        except NoDateFoundInLogs:
+        except ValidFormattedDateNotFound:
             log.debug("c:%s No timestamp found in file", self.id)
             fd.seek(0)
             return fd.tell()
-        except NoMatchingLogLineWithDate:
+        except ValidLinesNotFound:
             log.debug("c:%s No date after found in file", self.id)
             fd.seek(0, 2)
             return fd.tell()
-        except DateSearchFailedAtOffset as ed:
+        except DateNotFoundInLine as ed:
             log.debug("c:%s Expanded date search failed for a line: %s",
                       self.id, ed)
             fd.seek(0)