From 2b7a4477bf5f45bd6160c96f4bbd5b88aa848876 Mon Sep 17 00:00:00 2001 From: Mustafa Kemal Gilor Date: Tue, 12 Sep 2023 14:10:23 +0300 Subject: [PATCH] LogFileDateSinceSeeker: bump MAX_SEEK_HORIZON_EXPAND to 4096 Signed-off-by: Mustafa Kemal Gilor --- searchkit/constraints.py | 50 +++++++++++++-------------- tests/unit/test_search_constraints.py | 10 +++--- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/searchkit/constraints.py b/searchkit/constraints.py index e4a2a69..c17c035 100644 --- a/searchkit/constraints.py +++ b/searchkit/constraints.py @@ -343,7 +343,7 @@ class LogFileDateSinceSeeker(object): # line feed. This means the search will read SEEK_HORIZON times # MAX_SEEK_HORIZON_EXPAND bytes in total when a line feed character is not # found. - MAX_SEEK_HORIZON_EXPAND = 100 + MAX_SEEK_HORIZON_EXPAND = 4096 # Number of lines to search forwards when the algorithm encounters lines # with no date. @@ -379,7 +379,8 @@ def find_token_reverse(self, start_offset, horizon, """ current_offset = -horizon - while attempts > 0: + while True: + attempts -= 1 read_offset = start_offset + current_offset read_offset = read_offset if read_offset > 0 else 0 read_size = horizon @@ -396,16 +397,17 @@ def find_token_reverse(self, start_offset, horizon, chunk_offset = chunk.rfind(self.LINE_FEED_TOKEN) - if chunk_offset == -1: - current_offset = current_offset - len(chunk) - attempts -= 1 - if (start_offset + current_offset) < 0: - return SearchState(status=FindTokenStatus.REACHED_EOF, - offset=0) - continue + if chunk_offset != -1: + return SearchState(status=FindTokenStatus.FOUND, + offset=read_offset + chunk_offset) - return SearchState(status=FindTokenStatus.FOUND, - offset=read_offset + chunk_offset) + if attempts <= 0: + break + + current_offset = current_offset - len(chunk) + if (start_offset + current_offset) < 0: + return SearchState(status=FindTokenStatus.REACHED_EOF, + offset=0) return SearchState(FindTokenStatus.FAILED) @@ -441,20 +443,18 @@ def find_token(self, start_offset, horizon, offset=len(self)) chunk_offset = chunk.find(self.LINE_FEED_TOKEN) - if chunk_offset == -1: - # We failed to find the token in the chunk. - # Progress the current offset forward by - # chunk's length. - current_offset = current_offset + len(chunk) - attempts -= 1 - continue - - # We've found the token in the chunk. - # As the chunk_offset is a relative offset to the chunk - # translate it to file offset while returning. - return SearchState(status=FindTokenStatus.FOUND, - offset=(start_offset + current_offset + - chunk_offset)) + if chunk_offset != -1: + # We've found the token in the chunk. + # As the chunk_offset is a relative offset to the chunk + # translate it to file offset while returning. + return SearchState(status=FindTokenStatus.FOUND, + offset=(start_offset + current_offset + + chunk_offset)) + # We failed to find the token in the chunk. + # Progress the current offset forward by + # chunk's length. + current_offset = current_offset + len(chunk) + attempts -= 1 # Reached max_iterations and found nothing. return SearchState(FindTokenStatus.FAILED) diff --git a/tests/unit/test_search_constraints.py b/tests/unit/test_search_constraints.py index 4f8000d..67c30e2 100644 --- a/tests/unit/test_search_constraints.py +++ b/tests/unit/test_search_constraints.py @@ -263,6 +263,8 @@ def setUp(self): self.mock_constraint = mock.MagicMock() self.mock_constraint.extracted_datetime.return_value = datetime( 2019, 4, 4, 14, 47, 33) + self.max_line_length = LogFileDateSinceSeeker.MAX_SEEK_HORIZON_EXPAND + self.max_line_length *= LogFileDateSinceSeeker.SEEK_HORIZON def test_construct(self): uut = LogFileDateSinceSeeker( @@ -297,7 +299,7 @@ def test_find_token_reverse_fail(self): uut = LogFileDateSinceSeeker(self.mock_file, self.mock_constraint) # Expectation: find_token_reverse should give up the search and # status should be `failed` - result = uut.find_token_reverse(100000, 256) + result = uut.find_token_reverse(self.max_line_length + 257, 256) self.assertEqual(result.status, FindTokenStatus.FAILED) def test_find_token(self): @@ -389,12 +391,12 @@ def test_try_find_line_elf_failed(self): " feed offset at epicenter 83") def test_try_find_line_slf_failed(self): - self.sio = StringIO(('A' * 102399) + '\n') + self.sio = StringIO(('A' * ((self.max_line_length * 2) - 1)) + '\n') uut = LogFileDateSinceSeeker(self.mock_file, self.mock_constraint) with self.assertRaises(ValueError) as rexc: - uut.try_find_line(76800) + uut.try_find_line(self.max_line_length) self.assertEqual(str(rexc.exception), "Could not find start line feed " - "offset at epicenter 76800") + "offset at epicenter 1048576") def test_try_find_line_w_constraint(self): uut = LogFileDateSinceSeeker(self.mock_file, self.constraint)