Skip to content

Commit

Permalink
Update pylint, tox etc
Browse files Browse the repository at this point in the history
  • Loading branch information
dosaboy committed Jul 12, 2024
1 parent 146c650 commit b8501b7
Show file tree
Hide file tree
Showing 10 changed files with 320 additions and 246 deletions.
24 changes: 2 additions & 22 deletions pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -12,36 +12,16 @@ suggestion-mode=yes

[FORMAT]
max-line-length=79
# Allow doctrings containing long urls
ignore-long-lines=^\s+.+<?https?://\S+>?$

[REPORTS]
#reports=yes
score=yes

[MESSAGES CONTROL]
disable=
invalid-name,
pointless-statement,
missing-module-docstring,
missing-class-docstring,
missing-function-docstring,
too-many-arguments,
too-many-locals,
too-many-branches,
too-many-instance-attributes,
too-many-ancestors,
too-many-public-methods,
too-many-lines,
too-many-nested-blocks,
too-many-statements,
protected-access,
super-init-not-called,
useless-object-inheritance,
inconsistent-return-statements,
too-few-public-methods,
abstract-method,
no-self-use,
broad-except,
unnecessary-lambda,
broad-exception-raised,
unspecified-encoding,
consider-using-f-string,
100 changes: 53 additions & 47 deletions searchkit/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from searchkit.log import log


class TimestampMatcherBase(object):
class TimestampMatcherBase():
"""
Match start of line timestamps in a standard way.
Expand Down Expand Up @@ -74,6 +74,7 @@ def strptime(self):


class ConstraintBase(abc.ABC):
""" Base class for all constraints. """

@cached_property
def id(self):
Expand Down Expand Up @@ -124,7 +125,7 @@ def extracted_datetime(self, line):

@property
@abc.abstractmethod
def _since_date(self):
def since_date(self):
""" A datetime.datetime object representing the "since" date/time """

def _line_date_is_valid(self, extracted_datetime):
Expand All @@ -140,12 +141,12 @@ def _line_date_is_valid(self, extracted_datetime):
# unique_search_id, ', '.join(self.exprs))
return False

if ts < self._since_date:
# log.debug("%s < %s at (%s) i.e. False", ts, self._since_date,
if ts < self.since_date:
# log.debug("%s < %s at (%s) i.e. False", ts, self.since_date,
# line[-3:].strip())
return False

# log.debug("%s >= %s at (%s) i.e. True", ts, self._since_date,
# log.debug("%s >= %s at (%s) i.e. True", ts, self.since_date,
# line[-3:].strip())

return True
Expand Down Expand Up @@ -178,15 +179,15 @@ class MaxSearchableLineLengthReached(Exception):


class FindTokenStatus(Enum):
""" Token Search Status Representation. """
FOUND = 1
REACHED_EOF = 2


class SearchState(object):
class SearchState():
""" Representation of binary search state. """
def __init__(self, status: FindTokenStatus, offset=0):
"""
Representation of binary search state.
@param status: current status of search
@param offset: current position in file from which next search will be
started.
Expand All @@ -203,7 +204,7 @@ def offset(self):
return self._offset


class SavedFilePosition(object):
class SavedFilePosition():
"""
Context manager class that saves current position at start and restores
once finished.
Expand All @@ -219,7 +220,7 @@ def __exit__(self, exc_type, exc_value, exc_traceback):
self.file.seek(self.original_position)


class LogLine(object):
class LogLine():
"""
Class representing a line extracted from a log file.
Expand Down Expand Up @@ -316,7 +317,7 @@ def _read_line(self, max_len):
return line_text


class LogFileDateSinceSeeker(object):
class LogFileDateSinceSeeker():
"""
Performs "since" date lookups with file offsets. This is
useful for performing line-based binary date searches on a log file.
Expand Down Expand Up @@ -425,9 +426,9 @@ def find_token_reverse(self, start_offset):
return SearchState(status=FindTokenStatus.REACHED_EOF,
offset=0)

msg = ("reached max line length ({}) search without finding a line "
"feed (epicenter={})".format(self.MAX_SEARCHABLE_LINE_LENGTH,
start_offset_saved))
msg = (f"reached max line length ({self.MAX_SEARCHABLE_LINE_LENGTH}) "
"search without finding a line "
f"feed (epicenter={start_offset_saved})")
raise MaxSearchableLineLengthReached(msg)

def find_token(self, start_offset):
Expand Down Expand Up @@ -469,9 +470,9 @@ def find_token(self, start_offset):
# chunk's length.
current_offset = current_offset + len(chunk)

msg = ("reached max line length ({}) search without finding a line "
"feed (epicenter={})".format(self.MAX_SEARCHABLE_LINE_LENGTH,
start_offset_saved))
msg = (f"reached max line length ({self.MAX_SEARCHABLE_LINE_LENGTH}) "
"search without finding a line feed "
f"(epicenter={start_offset_saved})")
raise MaxSearchableLineLengthReached(msg)

def try_find_line(self, epicenter, slf_off=None, elf_off=None):
Expand Down Expand Up @@ -746,16 +747,16 @@ def __getitem__(self, offset):
# to indicate that.

self.found_any_date = True
if result.date >= self.constraint._since_date:
if result.date >= self.constraint.since_date:
# Keep the matching line so we can access it
# after the bisect without having to perform another
# lookup.
self.line_info = result

constraint_met = ((result.date >= self.constraint._since_date)
constraint_met = ((result.date >= self.constraint.since_date)
if result.date else False)
log.debug("extracted_date='%s' >= since_date='%s' == %s", result.date,
self.constraint._since_date, constraint_met)
self.constraint.since_date, constraint_met)
return result.date

def run(self):
Expand Down Expand Up @@ -788,14 +789,14 @@ def run(self):
SearchState(FindTokenStatus.FOUND, -1),
SearchState(FindTokenStatus.FOUND, 100))
if result.date is not None:
if result.date >= self.constraint._since_date:
if result.date >= self.constraint.since_date:
log.debug("first line has date that is valid so assuming "
"rest of file is valid")
return current

log.debug("starting full binary search")
try:
bisect.bisect_left(self, self.constraint._since_date)
bisect.bisect_left(self, self.constraint.since_date)
except TooManyLinesWithoutDate as exc:
if not self.found_any_date:
raise NoTimestampsFoundInFile from exc
Expand All @@ -813,8 +814,12 @@ def run(self):
return self.line_info.start_offset


class SearchConstraintSearchSince(BinarySeekSearchBase):

class SearchConstraintSearchSince(BinarySeekSearchBase): # noqa, pylint: disable=too-many-instance-attributes
"""
Search constraints implementation so filter lines that are after a given
date/time. The constraint can be a applied to a line or an entire file and
for the latter a binary search is performed.
"""
def __init__(self, current_date, ts_matcher_cls, days=0, hours=24,
**kwargs):
"""
Expand All @@ -833,13 +838,6 @@ def __init__(self, current_date, ts_matcher_cls, days=0, hours=24,
"""
super().__init__(**kwargs)
self.ts_matcher_cls = ts_matcher_cls
if ts_matcher_cls:
self.date_format = ts_matcher_cls.DEFAULT_DATETIME_FORMAT
else:
log.warning("using patterns to identify timestamp is deprecated - "
"use ts_matcher_cls instead")
self.date_format = TimestampMatcherBase.DEFAULT_DATETIME_FORMAT

self.current_date = datetime.strptime(current_date, self.date_format)
self._line_pass = 0
self._line_fail = 0
Expand All @@ -852,6 +850,15 @@ def __init__(self, current_date, ts_matcher_cls, days=0, hours=24,

self._results = {}

@property
def date_format(self):
if self.ts_matcher_cls:
return self.ts_matcher_cls.DEFAULT_DATETIME_FORMAT

log.warning("using patterns to identify timestamp is deprecated - "
"use ts_matcher_cls instead")
return TimestampMatcherBase.DEFAULT_DATETIME_FORMAT

def extracted_datetime(self, line):
if isinstance(line, bytes):
# need this for e.g. gzipped files
Expand All @@ -861,19 +868,19 @@ def extracted_datetime(self, line):
if timestamp.matched:
return timestamp.strptime

return
return None

@property
def _is_valid(self):
return self._since_date is not None
return self.since_date is not None

@cached_property
def _since_date(self): # pylint: disable=W0236
def since_date(self):
"""
Reflects the date from which we will start to apply searches.
"""
if not self.current_date:
return
return None

return self.current_date - timedelta(days=self.days,
hours=self.hours or 0)
Expand All @@ -882,17 +889,16 @@ def apply_to_line(self, line):
if not self._is_valid:
# The caller is expected to catch this and handle it appropriately,
# perhaps deciding to continue.
raise CouldNotApplyConstraint("c:{} unable to apply constraint to "
"line as since_date not valid".
format(self.id))
raise CouldNotApplyConstraint(f"c:{self.id} unable to apply "
"constraint to line as since_date "
"not valid")

extracted_datetime = self.extracted_datetime(line)
if not extracted_datetime:
raise CouldNotApplyConstraint("c:{} unable to apply constraint to "
"line since unable to extract "
"a datetime from the start of the "
"line to compare against".
format(self.id))
raise CouldNotApplyConstraint(f"c:{self.id} unable to apply "
"constraint to line since unable to "
"extract a datetime from the start "
"of the line to compare against")

if self._line_date_is_valid(extracted_datetime):
self._line_pass += 1
Expand All @@ -905,14 +911,14 @@ def apply_to_file(self, fd, destructive=True):
if not self._is_valid:
log.warning("c:%s unable to apply constraint to %s", self.id,
fd.name)
return
return None

if fd.name in self._results:
log.debug("using cached offset")
return self._results[fd.name]

log.debug("c:%s: starting binary seek search to %s in file %s "
"(destructive=True)", self.id, self._since_date, fd.name)
"(destructive=True)", self.id, self.since_date, fd.name)
try:
orig_offset = fd.tell()
seeker = LogFileDateSinceSeeker(fd, self)
Expand All @@ -933,7 +939,7 @@ def apply_to_file(self, fd, destructive=True):
return fd.tell()
except NoValidLinesFoundInFile:
log.debug("c:%s no date after %s found in file - seeking to end",
self._since_date, self.id)
self.since_date, self.id)
fd.seek(0, 2)
return fd.tell()
except TooManyLinesWithoutDate as exc:
Expand All @@ -957,4 +963,4 @@ def stats(self):
return _stats

def __repr__(self):
return "id={}, since={}".format(self.id, self._since_date)
return f"id={self.id}, since={self.since_date}"
4 changes: 2 additions & 2 deletions searchkit/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
import logging

log = logging.getLogger('searchkit')
logformat = ("%(asctime)s %(process)d %(levelname)s %(name)s [-] "
LOGFORMAT = ("%(asctime)s %(process)d %(levelname)s %(name)s [-] "
"%(message)s")


def configure_handler():
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter(logformat))
handler.setFormatter(logging.Formatter(LOGFORMAT))
log.addHandler(handler)


Expand Down
Loading

0 comments on commit b8501b7

Please sign in to comment.