From d8dd0cd9902b4cbec801970c54897aafce7e11aa Mon Sep 17 00:00:00 2001 From: Edward Hope-Morley Date: Sat, 16 Sep 2023 15:05:08 +0100 Subject: [PATCH] fixup logs and some code style --- searchkit/constraints.py | 151 +++++++++++++++++++-------------------- 1 file changed, 75 insertions(+), 76 deletions(-) diff --git a/searchkit/constraints.py b/searchkit/constraints.py index a067cd8..e96d4b7 100644 --- a/searchkit/constraints.py +++ b/searchkit/constraints.py @@ -163,12 +163,18 @@ class ValidFormattedDateNotFound(Exception): date suitable to specified date format""" -class DateNotFoundInLine(Exception): +class LineWithDateNotFound(Exception): """Raised when searcher has encountered a line with no date and performed forward-backward searches, but still yet, could not found a line with date.""" +class MaxSearchableLineLengthReached(Exception): + """ Raises when we exceed the number of characters we can search in a line + before finding a line feed. + """ + + class InvalidSearchState(Exception): """Raised when a variable dependent on another variable (e.g. the variable x only has value when y is True) is accessed without @@ -356,6 +362,7 @@ class LogFileDateSinceSeeker(object): # with no date. MAX_TRY_FIND_WITH_DATE_ATTEMPTS = 500 + MAX_SEARCHABLE_LINE_LENGTH = MAX_SEEK_HORIZON_EXPAND * SEEK_HORIZON LINE_FEED_TOKEN = b'\n' def __init__(self, fd, c): @@ -363,7 +370,7 @@ def __init__(self, fd, c): self.constraint = c self.line_info = None self.found_any_date = False - self.lookup_times = 0 + self.lookup_count = 0 with NonDestructiveFileRead(self.file) as f: self.length = f.seek(0, 2) @@ -407,7 +414,9 @@ def find_token_reverse(self, start_offset): if (start_offset + current_offset) < 0: return SearchState(status=FindTokenStatus.REACHED_EOF, offset=0) - log.debug("reached max line length search without finding a line feed") + + log.debug("reached max line length (%s) search without finding a line " + "feed", self.MAX_SEARCHABLE_LINE_LENGTH) return SearchState(FindTokenStatus.FAILED) def find_token(self, start_offset): @@ -446,8 +455,10 @@ def find_token(self, start_offset): # chunk's length. current_offset = current_offset + len(chunk) attempts -= 1 + # Exhausted all the attempts and found nothing. - log.debug("reached max line length search without finding a line feed") + log.debug("reached max line length (%s) search without finding a line " + "feed", self.MAX_SEARCHABLE_LINE_LENGTH) return SearchState(FindTokenStatus.FAILED) def try_find_line(self, epicenter, slf_off=None, elf_off=None): @@ -516,37 +527,36 @@ def try_find_line(self, epicenter, slf_off=None, elf_off=None): to None. @param elf_off: Optional ending line feed offset, if known. Defaults to None. - - @raise ValueError: when ending line feed offset could not be found or - when starting line feed offset could not be found. - + @raise MaxSearchableLineLengthReached @return: found logline """ - log.debug(" > EPICENTER: %d", epicenter) + log.debug("searching either side of pos=%d", epicenter) # Find the first LF token from the right of the epicenter # e.g. \nThis is a line\n # ^epicenter ^line end lf - line_end_lf = self.find_token( - epicenter - ) if elf_off is None else SearchState( - FindTokenStatus.FOUND, elf_off) + if elf_off is None: + line_end_lf = self.find_token(epicenter) + else: + line_end_lf = SearchState(FindTokenStatus.FOUND, elf_off) if line_end_lf.status == FindTokenStatus.FAILED: - raise ValueError("Could not find ending line feed " - f"offset at epicenter {epicenter}") + raise MaxSearchableLineLengthReached("Could not find ending line " + "feed offset at epicenter " + f"{epicenter}") # Find the first LF token from the left of the epicenter # e.g. \nThis is a line\n # line start lf ^ ^epicenter - line_start_lf = self.find_token_reverse( - epicenter - ) if slf_off is None else SearchState( - FindTokenStatus.FOUND, slf_off) + if slf_off is None: + line_start_lf = self.find_token_reverse(epicenter) + else: + line_start_lf = SearchState(FindTokenStatus.FOUND, slf_off) if line_start_lf.status == FindTokenStatus.FAILED: - raise ValueError("Could not find start line feed " - f"offset at epicenter {epicenter}") + raise MaxSearchableLineLengthReached("Could not find start line " + "feed offset at epicenter " + f"{epicenter}") # Ensure that found offsets are in file range assert line_start_lf.offset <= len(self) @@ -580,6 +590,13 @@ def try_find_line_with_date(self, start_offset, line_feed_offset=None, @return: line if found otherwise None. """ + if line_feed_offset is None: + direction = 'reverse' + else: + direction = 'forward' + + log.debug(f"starting {direction} search") + attempts = LogFileDateSinceSeeker.MAX_TRY_FIND_WITH_DATE_ATTEMPTS offset = start_offset log_line = None @@ -590,29 +607,27 @@ def try_find_line_with_date(self, start_offset, line_feed_offset=None, (None, line_feed_offset)[not forwards] ) - log.debug( - " > TRY_FETCH, REMAINING_ATTEMPTS:%d, START_LF_OFFSET: %d, " - "END_LF_OFFSET: %d >: on line -> %s", - attempts, - log_line.start_lf.offset, - log_line.end_lf.offset, - log_line.text, - ) - # If the line has a valid date, return it. if log_line.date: + log.debug(f"finished {direction} search - date found") return log_line + log.debug("looking further for date: attempts_remaining=%d, " + "start=%d, end=%d", attempts, log_line.start_lf.offset, + log_line.end_lf.offset) + # Set offset of the found line feed line_feed_offset = (log_line.start_lf, log_line.end_lf)[forwards].offset # Set the next search starting point offset = line_feed_offset + (-1, +1)[forwards] if offset < 0 or offset > len(self): - log.debug(" > TRY_FETCH EXIT EOF/SOF") + log.debug("search hit eof/sof - exiting") break attempts -= 1 + + log.debug(f"finished {direction} search - no date found") return None def __len__(self): @@ -699,40 +714,27 @@ def __getitem__(self, offset): the comparison. @param offset: integer lookup offset - @raise DateNotFoundInLine: When a line with a date could not + @raise LineWithDateNotFound: When a line with a date could not be found. @return: Date of the line at `offset` """ - self.lookup_times += 1 - log.debug("-------------------------------------------") - log.debug("-------------------------------------------") - log.debug("-------------------------------------------") - log.debug("-------------------------------------------") - log.debug("LOOKUP (#%d) AT OFFSET: %d", self.lookup_times, offset) + self.lookup_count += 1 + log.debug("timestamp lookup attempt=%d, offset=%d", self.lookup_count, + offset) result = None # Try to search backwards first. # First call will effectively be a regular forward search given # that we're not passing a line feed offset to the function. # Any subsequent search attempt will be made backwards. - log.debug("######### BACKWARDS SEARCH START #########") - result = self.try_find_line_with_date( - offset, - None, - False, - ) - log.debug("######### BACKWARDS SEARCH END #########") - + result = self.try_find_line_with_date(offset, None, False) # ... then, forwards. if not result or result.date is None: - log.debug("######### FORWARDS SEARCH START #########") result = self.try_find_line_with_date(offset + 1, offset, True) - log.debug("######### FORWARDS SEARCH END #########") - if not result or result.date is None: - raise DateNotFoundInLine( + raise LineWithDateNotFound( f"Date search failed at offset `{offset}`") # This is mostly for diagnostics. If we could not find @@ -746,24 +748,21 @@ def __getitem__(self, offset): # lookup. self.line_info = result - log.debug( - " > EXTRACTED_DATE: `%s` >= SINCE DATE: `%s` == %s", - result.date, - self.constraint._since_date, - (result.date >= self.constraint._since_date) - if result.date else False, - ) + log.debug("found date='%s' >= '%s' == %s", result.date, + self.constraint._since_date, + (result.date is not False and + result.date >= self.constraint._since_date)) return result.date def run(self): - # bisect_left will give us the first occurenct of the date - # that satisfies the constraint. - # Similarly, bisect_right would allow the last occurence of - # a date that satisfies the criteria. - + """ + Bisect_left will give us the first occurrence of the date + that satisfies the constraint. Similarly, bisect_right would allow the + last occurrence of a date that satisfies the criteria. + """ try: bisect.bisect_left(self, self.constraint._since_date) - except DateNotFoundInLine as exc: + except LineWithDateNotFound as exc: if not self.found_any_date: raise ValidFormattedDateNotFound from exc @@ -772,14 +771,10 @@ def run(self): if not self.line_info: raise ValidLinesNotFound - log.debug( - "RUN END, FOUND LINE(START:%d, END:%d, CONTENT:%s)" - " IN %d LOOKUP(S)", - self.line_info.start_offset, - self.line_info.end_offset, - self.line_info.text, - self.lookup_times - ) + log.debug("run end: found line start=%d, end=%d, content=%s in %d " + "lookup(s)", self.line_info.start_offset, + self.line_info.end_offset, self.line_info.text, + self.lookup_count) return (self.line_info.start_offset, self.line_info.end_offset) @@ -931,11 +926,16 @@ def apply_to_file(self, fd, destructive=True): log.debug("c:%s No date after found in file", self.id) fd.seek(0, 2) return fd.tell() - except DateNotFoundInLine as ed: - log.debug("c:%s Expanded date search failed for a line: %s", - self.id, ed) + except LineWithDateNotFound as exc: + log.warning("c:%s failed to find a line containing a date: %s", + self.id, exc) fd.seek(0) return fd.tell() + except MaxSearchableLineLengthReached as exc: + log.warning("c:%s exceeded allowed line length search limit " + "before finding line feed: %s", self.id, exc) + fd.seek(0) + return fd.tell() log.debug("c:%s: finished binary seek search in file %s, offset %d", self.id, fd.name, self._results[fd.name]) @@ -947,5 +947,4 @@ def stats(self): return _stats def __repr__(self): - return ("id={}, since={}, current={}". - format(self.id, self._since_date, self.current_date)) + return ("id={}, since={}".format(self.id, self._since_date))