Fixed snapshot backward compatibility to v0.7.9

logpai · Oct 22, 2020 · a9b8f72 · a9b8f72
1 parent 2f1af3f
commit a9b8f72
Show file tree

Hide file tree

Showing 5 changed files with 74 additions and 10 deletions.
diff --git a/README.md b/README.md
@@ -192,6 +192,9 @@ Our project welcomes external contributions. Please refer to [CONTRIBUTING.md](C
 
 ## Change Log
 
+##### v0.8.2
+* Fixed snapshot backward compatibility to v0.7.9 
+
 ##### v0.8.1
 * Bugfix in profiling configuration read
 

diff --git a/drain3/drain.py b/drain3/drain.py
@@ -4,7 +4,7 @@
 Modified by : [email protected], [email protected]
 License     : MIT
 """
-from drain3.simple_profiler import Profiler
+from drain3.simple_profiler import Profiler, NullProfiler
 
 param_str = '<*>'
 
@@ -32,7 +32,7 @@ def __init__(self, key, depth):
 
 class Drain:
 
-    def __init__(self, depth=4, sim_th=0.4, max_children=100, profiler: Profiler = None):
+    def __init__(self, depth=4, sim_th=0.4, max_children=100, profiler: Profiler = NullProfiler()):
         """
         Attributes
         ----------
@@ -233,13 +233,16 @@ def add_log_message(self, content: str):
         content = content.strip()
         content_tokens = content.split()
 
-        self.profiler.start_section("tree_search")
+        if self.profiler:
+            self.profiler.start_section("tree_search")
         match_cluster = self.tree_search(self.root_node, content_tokens)
-        self.profiler.end_section()
+        if self.profiler:
+            self.profiler.end_section()
 
         # Match no existing log cluster
         if match_cluster is None:
-            self.profiler.start_section("create_cluster")
+            if self.profiler:
+                self.profiler.start_section("create_cluster")
             cluster_num = len(self.clusters) + 1
             cluster_id = self.num_to_cluster_id(cluster_num)
             match_cluster = LogCluster(content_tokens, cluster_id)
@@ -249,7 +252,8 @@ def add_log_message(self, content: str):
 
         # Add the new log message to the existing cluster
         else:
-            self.profiler.start_section("cluster_exist")
+            if self.profiler:
+                self.profiler.start_section("cluster_exist")
             new_template_tokens = self.get_template(content_tokens, match_cluster.log_template_tokens)
             if ' '.join(new_template_tokens) != ' '.join(match_cluster.log_template_tokens):
                 match_cluster.log_template_tokens = new_template_tokens
@@ -258,7 +262,8 @@ def add_log_message(self, content: str):
                 update_type = "none"
             match_cluster.size += 1
 
-        self.profiler.end_section()
+        if self.profiler:
+            self.profiler.end_section()
 
         return match_cluster, update_type
 

diff --git a/drain3/simple_profiler.py b/drain3/simple_profiler.py
@@ -20,7 +20,7 @@ def end_section(self):
         pass
 
     @abstractmethod
-    def report(self, report_internal_sec: int):
+    def report(self, report_internal_sec=30):
         pass
 
     @abstractmethod
@@ -35,7 +35,7 @@ def start_section(self, section_name: str):
     def end_section(self):
         pass
 
-    def report(self, report_internal_sec: int):
+    def report(self, report_internal_sec=30):
         pass
 
     def print_results(self):

diff --git a/examples/drain_bigfile_demo2.py b/examples/drain_bigfile_demo2.py
@@ -0,0 +1,56 @@
+"""
+Description : Example of using Drain3 to process a real world file
+Author      : David Ohana
+Author_email: [email protected]
+License     : MIT
+"""
+import json
+import logging
+import os
+import subprocess
+import sys
+import time
+
+from drain3 import TemplateMiner
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
+
+in_log_file = "/Volumes/machd/Users/davidoh/dev/repos/ad/decorus-online-log-parser/logs/LogParserApp/2020-10-21__13.46.23.707_P0000/mined_content_2020-10-21__13.46.25.552_P0000.log"
+
+template_miner = TemplateMiner()
+template_miner.drain.depth = 3
+
+line_count = 0
+start_time = time.time()
+batch_start_time = start_time
+batch_size = 10000
+with open(in_log_file) as f:
+    for line in f:
+        line = line.rstrip()
+        # line = line.partition(": ")[2]
+        result = template_miner.add_log_message(line)
+        line_count += 1
+        if line_count % batch_size == 0:
+            time_took = time.time() - batch_start_time
+            rate = batch_size / time_took
+            logger.info(f"Processing line: {line_count}, rate {rate:.1f} lines/sec, "
+                        f"{len(template_miner.drain.clusters)} clusters so far.")
+            batch_start_time = time.time()
+        if result["change_type"] != "none":
+            result_json = json.dumps(result)
+            logger.info(f"Input ({line_count}): " + line)
+            logger.info("Result: " + result_json)
+
+time_took = time.time() - start_time
+rate = line_count / time_took
+logger.info(f"--- Done processing file. Total of {line_count} lines, rate {rate:.1f} lines/sec, "
+            f"{len(template_miner.drain.clusters)} clusters")
+sorted_clusters = sorted(template_miner.drain.clusters, key=lambda it: it.size, reverse=True)
+for cluster in sorted_clusters:
+    logger.info(cluster)
+
+print("Prefix Tree:")
+template_miner.drain.print_tree()
+
+template_miner.profiler.print_results()
diff --git a/setup.py b/setup.py
@@ -9,7 +9,7 @@
 setup(
     name='drain3',
     packages=['drain3'],
-    version="0.8.1",
+    version="0.8.2",
     license='MIT',
     description="Persistent & streaming log template miner",
     long_description=long_description,