Skip to content

Commit

Permalink
refactor: log reading implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
BreadGenie committed Nov 28, 2024
1 parent a5e3816 commit 20a565e
Showing 1 changed file with 91 additions and 27 deletions.
118 changes: 91 additions & 27 deletions agent/log_browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,21 +213,21 @@ def monitor_json_log_formatter(log_entries: list) -> list:
list: A list of dictionaries, where each dictionary represents a formatted log entry.
"""

import json

if not log_entries:
return [] # Return empty list if no log entries

formatted_logs = []
for entry in log_entries:
# parse the json log entry
# Extract the timestamp using string operations
try:
log_entry = json.loads(entry)
time = log_entry.get("timestamp")
timestamp_key = '"timestamp":"'
timestamp_start = entry.index(timestamp_key) + len(timestamp_key)
timestamp_end = entry.index('"', timestamp_start)
time = entry[timestamp_start:timestamp_end]
formatted_time = datetime.strptime(time, "%Y-%m-%d %H:%M:%S.%f%z").strftime("%Y-%m-%d %H:%M:%S")

formatted_logs.append({"time": formatted_time, "description": entry})
except json.JSONDecodeError:
except ValueError:
formatted_logs.append({"description": entry})

return formatted_logs
Expand Down Expand Up @@ -298,8 +298,6 @@ class LogBrowser:
"monitor.json.log": monitor_json_log_formatter,
}

LOGS_WITH_MULTI_LINE_ENTRIES = ("database.log", "scheduler.log", "worker", "ipython")

def get_log_key(self, log_name: str) -> str:
# if the log file has a number at the end, it's a rotated log
# and we don't need to consider the number for formatter mapping
Expand Down Expand Up @@ -332,14 +330,17 @@ def retrieve_merged_log(
return ""

# Sort log files to ensure correct order of rotated logs
log_files.sort(key=self._sort_by_number_suffix)
log_entries = self._process_log_files(log_files)
log_entries = self._apply_filters(log_entries, log_level, search_query)
log_files.sort(key=self._sort_by_number_suffix, reverse=True)
# return log_files
log_entries = self._process_log_files(log_files, page_start, page_length, log_level, search_query)
# return list([*log_entries, page_start, page_length, log_level, search_query])
# log_entries = self._apply_filters(log_entries, log_level, search_query)
log_entries = list(log_entries)

if order_by and order_by == "desc":
log_entries.reverse()
# if order_by and order_by == "desc":
# log_entries.reverse()

return self.format_log(name, log_entries)
return self._paginate_entries(name, log_entries, page_start, page_length)

def _get_log_files(self, name):
Expand All @@ -350,25 +351,88 @@ def _sort_by_number_suffix(self, log_file):
suffix = log_file.split(".")[-1]
return int(suffix) if suffix.isdigit() else 0

def _process_log_files(self, log_files):
def _process_log_files(self, log_files, page_start, page_length, log_level=None, search_query=None):
entries_read = 0
for log in log_files:
yield from self._read_log_file(log)
for entry in self._read_log_file(log, page_start, page_length, log_level, search_query):
if entries_read >= page_length:
break
entries_read += 1
yield entry

page_start = max(0, page_start - entries_read)

def _read_log_file(self, log, page_start=0, page_length=10, log_level=None, search_query=None):
LOGS_WITH_MULTI_LINE_ENTRIES = ("database.log", "scheduler.log", "worker", "ipython")

def _read_log_file(self, log):
log_file = os.path.join(self.logs_directory, log)
with open(log_file) as lf:
if log.startswith(self.LOGS_WITH_MULTI_LINE_ENTRIES):
buffer = []
for line in lf:
if re.match(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}", line) and buffer:
yield "".join(buffer)
buffer = []
buffer.append(line)
if buffer:
yield "".join(buffer)
if log.startswith(LOGS_WITH_MULTI_LINE_ENTRIES):
yield from self._read_multi_line_log(lf, page_start, page_length, log_level, search_query)
else:
yield from self._read_single_line_log(lf, page_start, page_length, log_level, search_query)

def apply_search_or_filter(self, entry, log_level=None, search_query=None):
if (log_level and log_level not in entry) or (search_query and search_query not in entry): # noqa: SIM103
return False
return True

def _read_multi_line_log(self, file, page_start, page_length, log_level, search_query):
"""
Read a log file with multi-line entries.
If an entry looks like this:
```
2021-09-15 14:48:46,608 ERROR [site] Exception on /api/method/frappe.desk.form.load.getdoc
Traceback (most recent call last):
File "/home/frappe/frappe-bench/apps/frappe/frappe/app.py", line 68, in application
...
```
This function will read the entire entry as a single log entry.
"""

entry_pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}"
entry_regex = re.compile(entry_pattern)

current_entry = []
line_count = -1 # start at -1 to account for the first line

for line in file:
if line_count >= page_start + page_length:
break

# if matches regex pattern, it's a new entry
# else it's a continuation of the previous entry
if entry_regex.match(line):
if current_entry and line_count >= page_start and line_count < page_start + page_length:
log_entry = " ".join(current_entry)
if self.apply_search_or_filter(log_entry, log_level, search_query):
yield log_entry

current_entry = []
line_count += 1

current_entry.append(line.strip())

# Handle the last entry if it exists
if current_entry and line_count >= page_start and line_count < page_start + page_length:
log_entry = " ".join(current_entry)
if self.apply_search_or_filter(log_entry, log_level, search_query):
yield log_entry

def _read_single_line_log(self, lf, page_start=0, page_length=10, log_level=None, search_query=None):
line_count = 0
for line in lf:
if (log_level and log_level not in line) or (search_query and search_query not in line):
continue
if line_count >= page_start:
line_count += 1
yield line
else:
for line in lf:
yield line.strip()
line_count += 1
if line_count >= page_start + page_length:
break

def _apply_filters(self, log_entries, log_level=None, search_query=None):
if log_level:
Expand Down

0 comments on commit 20a565e

Please sign in to comment.