Skip to content

Commit

Permalink
Fixed snapshot backward compatibility to v0.7.9
Browse files Browse the repository at this point in the history
  • Loading branch information
davidohana committed Oct 22, 2020
1 parent 2f1af3f commit a9b8f72
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 10 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,9 @@ Our project welcomes external contributions. Please refer to [CONTRIBUTING.md](C

## Change Log

##### v0.8.2
* Fixed snapshot backward compatibility to v0.7.9

##### v0.8.1
* Bugfix in profiling configuration read

Expand Down
19 changes: 12 additions & 7 deletions drain3/drain.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
Modified by : [email protected], [email protected]
License : MIT
"""
from drain3.simple_profiler import Profiler
from drain3.simple_profiler import Profiler, NullProfiler

param_str = '<*>'

Expand Down Expand Up @@ -32,7 +32,7 @@ def __init__(self, key, depth):

class Drain:

def __init__(self, depth=4, sim_th=0.4, max_children=100, profiler: Profiler = None):
def __init__(self, depth=4, sim_th=0.4, max_children=100, profiler: Profiler = NullProfiler()):
"""
Attributes
----------
Expand Down Expand Up @@ -233,13 +233,16 @@ def add_log_message(self, content: str):
content = content.strip()
content_tokens = content.split()

self.profiler.start_section("tree_search")
if self.profiler:
self.profiler.start_section("tree_search")
match_cluster = self.tree_search(self.root_node, content_tokens)
self.profiler.end_section()
if self.profiler:
self.profiler.end_section()

# Match no existing log cluster
if match_cluster is None:
self.profiler.start_section("create_cluster")
if self.profiler:
self.profiler.start_section("create_cluster")
cluster_num = len(self.clusters) + 1
cluster_id = self.num_to_cluster_id(cluster_num)
match_cluster = LogCluster(content_tokens, cluster_id)
Expand All @@ -249,7 +252,8 @@ def add_log_message(self, content: str):

# Add the new log message to the existing cluster
else:
self.profiler.start_section("cluster_exist")
if self.profiler:
self.profiler.start_section("cluster_exist")
new_template_tokens = self.get_template(content_tokens, match_cluster.log_template_tokens)
if ' '.join(new_template_tokens) != ' '.join(match_cluster.log_template_tokens):
match_cluster.log_template_tokens = new_template_tokens
Expand All @@ -258,7 +262,8 @@ def add_log_message(self, content: str):
update_type = "none"
match_cluster.size += 1

self.profiler.end_section()
if self.profiler:
self.profiler.end_section()

return match_cluster, update_type

Expand Down
4 changes: 2 additions & 2 deletions drain3/simple_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def end_section(self):
pass

@abstractmethod
def report(self, report_internal_sec: int):
def report(self, report_internal_sec=30):
pass

@abstractmethod
Expand All @@ -35,7 +35,7 @@ def start_section(self, section_name: str):
def end_section(self):
pass

def report(self, report_internal_sec: int):
def report(self, report_internal_sec=30):
pass

def print_results(self):
Expand Down
56 changes: 56 additions & 0 deletions examples/drain_bigfile_demo2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""
Description : Example of using Drain3 to process a real world file
Author : David Ohana
Author_email: [email protected]
License : MIT
"""
import json
import logging
import os
import subprocess
import sys
import time

from drain3 import TemplateMiner

logger = logging.getLogger(__name__)
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')

in_log_file = "/Volumes/machd/Users/davidoh/dev/repos/ad/decorus-online-log-parser/logs/LogParserApp/2020-10-21__13.46.23.707_P0000/mined_content_2020-10-21__13.46.25.552_P0000.log"

template_miner = TemplateMiner()
template_miner.drain.depth = 3

line_count = 0
start_time = time.time()
batch_start_time = start_time
batch_size = 10000
with open(in_log_file) as f:
for line in f:
line = line.rstrip()
# line = line.partition(": ")[2]
result = template_miner.add_log_message(line)
line_count += 1
if line_count % batch_size == 0:
time_took = time.time() - batch_start_time
rate = batch_size / time_took
logger.info(f"Processing line: {line_count}, rate {rate:.1f} lines/sec, "
f"{len(template_miner.drain.clusters)} clusters so far.")
batch_start_time = time.time()
if result["change_type"] != "none":
result_json = json.dumps(result)
logger.info(f"Input ({line_count}): " + line)
logger.info("Result: " + result_json)

time_took = time.time() - start_time
rate = line_count / time_took
logger.info(f"--- Done processing file. Total of {line_count} lines, rate {rate:.1f} lines/sec, "
f"{len(template_miner.drain.clusters)} clusters")
sorted_clusters = sorted(template_miner.drain.clusters, key=lambda it: it.size, reverse=True)
for cluster in sorted_clusters:
logger.info(cluster)

print("Prefix Tree:")
template_miner.drain.print_tree()

template_miner.profiler.print_results()
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
setup(
name='drain3',
packages=['drain3'],
version="0.8.1",
version="0.8.2",
license='MIT',
description="Persistent & streaming log template miner",
long_description=long_description,
Expand Down

0 comments on commit a9b8f72

Please sign in to comment.