Skip to content

Commit

Permalink
LogFileDateSinceSeeker: bump MAX_SEEK_HORIZON_EXPAND to 4096
Browse files Browse the repository at this point in the history
Signed-off-by: Mustafa Kemal Gilor <[email protected]>
  • Loading branch information
xmkg committed Sep 12, 2023
1 parent ecdd62e commit 2b7a447
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 29 deletions.
50 changes: 25 additions & 25 deletions searchkit/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ class LogFileDateSinceSeeker(object):
# line feed. This means the search will read SEEK_HORIZON times
# MAX_SEEK_HORIZON_EXPAND bytes in total when a line feed character is not
# found.
MAX_SEEK_HORIZON_EXPAND = 100
MAX_SEEK_HORIZON_EXPAND = 4096

# Number of lines to search forwards when the algorithm encounters lines
# with no date.
Expand Down Expand Up @@ -379,7 +379,8 @@ def find_token_reverse(self, start_offset, horizon,
"""

current_offset = -horizon
while attempts > 0:
while True:
attempts -= 1
read_offset = start_offset + current_offset
read_offset = read_offset if read_offset > 0 else 0
read_size = horizon
Expand All @@ -396,16 +397,17 @@ def find_token_reverse(self, start_offset, horizon,

chunk_offset = chunk.rfind(self.LINE_FEED_TOKEN)

if chunk_offset == -1:
current_offset = current_offset - len(chunk)
attempts -= 1
if (start_offset + current_offset) < 0:
return SearchState(status=FindTokenStatus.REACHED_EOF,
offset=0)
continue
if chunk_offset != -1:
return SearchState(status=FindTokenStatus.FOUND,
offset=read_offset + chunk_offset)

return SearchState(status=FindTokenStatus.FOUND,
offset=read_offset + chunk_offset)
if attempts <= 0:
break

current_offset = current_offset - len(chunk)
if (start_offset + current_offset) < 0:
return SearchState(status=FindTokenStatus.REACHED_EOF,
offset=0)

return SearchState(FindTokenStatus.FAILED)

Expand Down Expand Up @@ -441,20 +443,18 @@ def find_token(self, start_offset, horizon,
offset=len(self))

chunk_offset = chunk.find(self.LINE_FEED_TOKEN)
if chunk_offset == -1:
# We failed to find the token in the chunk.
# Progress the current offset forward by
# chunk's length.
current_offset = current_offset + len(chunk)
attempts -= 1
continue

# We've found the token in the chunk.
# As the chunk_offset is a relative offset to the chunk
# translate it to file offset while returning.
return SearchState(status=FindTokenStatus.FOUND,
offset=(start_offset + current_offset +
chunk_offset))
if chunk_offset != -1:
# We've found the token in the chunk.
# As the chunk_offset is a relative offset to the chunk
# translate it to file offset while returning.
return SearchState(status=FindTokenStatus.FOUND,
offset=(start_offset + current_offset +
chunk_offset))
# We failed to find the token in the chunk.
# Progress the current offset forward by
# chunk's length.
current_offset = current_offset + len(chunk)
attempts -= 1

# Reached max_iterations and found nothing.
return SearchState(FindTokenStatus.FAILED)
Expand Down
10 changes: 6 additions & 4 deletions tests/unit/test_search_constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@ def setUp(self):
self.mock_constraint = mock.MagicMock()
self.mock_constraint.extracted_datetime.return_value = datetime(
2019, 4, 4, 14, 47, 33)
self.max_line_length = LogFileDateSinceSeeker.MAX_SEEK_HORIZON_EXPAND
self.max_line_length *= LogFileDateSinceSeeker.SEEK_HORIZON

def test_construct(self):
uut = LogFileDateSinceSeeker(
Expand Down Expand Up @@ -297,7 +299,7 @@ def test_find_token_reverse_fail(self):
uut = LogFileDateSinceSeeker(self.mock_file, self.mock_constraint)
# Expectation: find_token_reverse should give up the search and
# status should be `failed`
result = uut.find_token_reverse(100000, 256)
result = uut.find_token_reverse(self.max_line_length + 257, 256)
self.assertEqual(result.status, FindTokenStatus.FAILED)

def test_find_token(self):
Expand Down Expand Up @@ -389,12 +391,12 @@ def test_try_find_line_elf_failed(self):
" feed offset at epicenter 83")

def test_try_find_line_slf_failed(self):
self.sio = StringIO(('A' * 102399) + '\n')
self.sio = StringIO(('A' * ((self.max_line_length * 2) - 1)) + '\n')
uut = LogFileDateSinceSeeker(self.mock_file, self.mock_constraint)
with self.assertRaises(ValueError) as rexc:
uut.try_find_line(76800)
uut.try_find_line(self.max_line_length)
self.assertEqual(str(rexc.exception), "Could not find start line feed "
"offset at epicenter 76800")
"offset at epicenter 1048576")

def test_try_find_line_w_constraint(self):
uut = LogFileDateSinceSeeker(self.mock_file, self.constraint)
Expand Down

0 comments on commit 2b7a447

Please sign in to comment.