From 13021990f1ea610a68b7b3dc4eb6e3ee0dc1016c Mon Sep 17 00:00:00 2001
From: Rohit kumar <rohittourister@gmail.com>
Date: Sun, 30 Jun 2024 21:13:06 +0530
Subject: [PATCH] Added import support and improved docs

---
 hawk_scanner/commands/couchdb.py          |  22 +--
 hawk_scanner/commands/firebase.py         |  24 +--
 hawk_scanner/commands/fs.py               |  25 ++-
 hawk_scanner/commands/gcs.py              |  24 +--
 hawk_scanner/commands/gdrive.py           |  16 +-
 hawk_scanner/commands/gdrive_workspace.py |  14 +-
 hawk_scanner/commands/mongodb.py          |  24 +--
 hawk_scanner/commands/mysql.py            |  20 +--
 hawk_scanner/commands/postgresql.py       |  20 +--
 hawk_scanner/commands/redis.py            |  18 +--
 hawk_scanner/commands/s3.py               |  30 ++--
 hawk_scanner/commands/slack.py            |  30 ++--
 hawk_scanner/commands/text.py             |  12 +-
 hawk_scanner/internals/system.py          | 189 +++++++++++-----------
 hawk_scanner/main.py                      |  34 ++--
 readme.md                                 |  24 ++-
 setup.py                                  |   2 +-
 17 files changed, 273 insertions(+), 255 deletions(-)

diff --git a/hawk_scanner/commands/couchdb.py b/hawk_scanner/commands/couchdb.py
index 888634a..d4d6592 100644
--- a/hawk_scanner/commands/couchdb.py
+++ b/hawk_scanner/commands/couchdb.py
@@ -9,13 +9,13 @@ def connect_couchdb(host, port, username, password, database):
     try:
         server = couchdb.Server(f"http://{username}:{password}@{host}:{port}/")
         if database not in server:
-            system.print_error(f"Database {database} not found on CouchDB server.")
+            system.print_error(args, f"Database {database} not found on CouchDB server.")
             return None
         db = server[database]
-        system.print_info(f"Connected to CouchDB database")
+        system.print_info(args, f"Connected to CouchDB database")
         return db
     except Exception as e:
-        system.print_error(f"Failed to connect to CouchDB database with error: {e}")
+        system.print_error(args, f"Failed to connect to CouchDB database with error: {e}")
         return None
 
 def check_data_patterns(db, patterns, profile_name, database_name):
@@ -25,7 +25,7 @@ def check_data_patterns(db, patterns, profile_name, database_name):
         for field_name, field_value in document.items():
             if field_value:
                 value_str = str(field_value)
-                matches = system.match_strings(value_str)
+                matches = system.match_strings(args, value_str)
                 if matches:
                     for match in matches:
                         results.append({
@@ -44,15 +44,15 @@ def check_data_patterns(db, patterns, profile_name, database_name):
 
 def execute(args):
     results = []
-    system.print_info(f"Running Checks for CouchDB Sources")
-    connections = system.get_connection()
+    system.print_info(args, f"Running Checks for CouchDB Sources")
+    connections = system.get_connection(args)
 
     if 'sources' in connections:
         sources_config = connections['sources']
         couchdb_config = sources_config.get('couchdb')
 
         if couchdb_config:
-            patterns = system.get_fingerprint_file()
+            patterns = system.get_fingerprint_file(args)
 
             for key, config in couchdb_config.items():
                 host = config.get('host')
@@ -62,16 +62,16 @@ def execute(args):
                 database = config.get('database')
 
                 if host and username and password and database:
-                    system.print_info(f"Checking CouchDB Profile {key} with host and authentication")
+                    system.print_info(args, f"Checking CouchDB Profile {key} with host and authentication")
                 else:
-                    system.print_error(f"Incomplete CouchDB configuration for key: {key}")
+                    system.print_error(args, f"Incomplete CouchDB configuration for key: {key}")
                     continue
 
                 db = connect_couchdb(host, port, username, password, database)
                 if db:
                     results += check_data_patterns(db, patterns, key, database)
         else:
-            system.print_error("No CouchDB connection details found in connection.yml")
+            system.print_error(args, "No CouchDB connection details found in connection.yml")
     else:
-        system.print_error("No 'sources' section found in connection.yml")
+        system.print_error(args, "No 'sources' section found in connection.yml")
     return results
diff --git a/hawk_scanner/commands/firebase.py b/hawk_scanner/commands/firebase.py
index 64553c7..c159d76 100644
--- a/hawk_scanner/commands/firebase.py
+++ b/hawk_scanner/commands/firebase.py
@@ -10,7 +10,7 @@ def connect_firebase(credentials_file, bucket_name):
         cred = credentials.Certificate(credentials_file)
         firebase_admin.initialize_app(cred)
         bucket = storage.bucket(bucket_name)
-        system.print_info(f"Connected to Firebase bucket: {bucket_name}")
+        system.print_info(args, f"Connected to Firebase bucket: {bucket_name}")
         return bucket
     except Exception as e:
         print(f"Failed to connect to Firebase bucket: {e}")
@@ -18,7 +18,7 @@ def connect_firebase(credentials_file, bucket_name):
 def execute(args):
     results = []
     shouldDownload = True
-    connections = system.get_connection()
+    connections = system.get_connection(args)
 
     if 'sources' in connections:
         sources_config = connections['sources']
@@ -37,7 +37,7 @@ def execute(args):
                             file_name = blob.name
                             ## get unique etag or hash of file
                             remote_etag = blob.etag
-                            system.print_debug(f"Remote etag: {remote_etag}")
+                            system.print_debug(args, f"Remote etag: {remote_etag}")
 
                             if system.should_exclude_file(file_name, exclude_patterns):
                                 continue
@@ -49,20 +49,20 @@ def execute(args):
                                 if os.path.exists(file_path):
                                     shouldDownload = False
                                     local_etag = file_path.split('/')[-1].split('-')[0]
-                                    system.print_debug(f"Local etag: {local_etag}")
-                                    system.print_debug(f"File already exists in cache, using it. You can disable cache by setting 'cache: false' in connection.yml")
+                                    system.print_debug(args, f"Local etag: {local_etag}")
+                                    system.print_debug(args, f"File already exists in cache, using it. You can disable cache by setting 'cache: false' in connection.yml")
                                     if remote_etag != local_etag:
-                                        system.print_debug(f"File in firebase bucket has changed, downloading it again...")
+                                        system.print_debug(args, f"File in firebase bucket has changed, downloading it again...")
                                         shouldDownload = True
                                     else:
                                         shouldDownload = False
 
                             if shouldDownload:
                                 file_path = f"data/firebase/{remote_etag}-{file_name}"
-                                system.print_debug(f"Downloading file: {file_name} to {file_path}...")
+                                system.print_debug(args, f"Downloading file: {file_name} to {file_path}...")
                                 blob.download_to_filename(file_path)
                             
-                            matches = system.read_match_strings(file_path, 'google_cloud_storage')
+                            matches = system.read_match_strings(args, file_path, 'google_cloud_storage')
                             if matches:
                                 for match in matches:
                                     results.append({
@@ -76,13 +76,13 @@ def execute(args):
                                     })
 
                     else:
-                        system.print_error(f"Failed to connect to Firebase bucket: {bucket_name}")
+                        system.print_error(args, f"Failed to connect to Firebase bucket: {bucket_name}")
                 else:
-                    system.print_error(f"Incomplete Firebase configuration for key: {key}")
+                    system.print_error(args, f"Incomplete Firebase configuration for key: {key}")
         else:
-            system.print_error("No Firebase connection details found in connection file")
+            system.print_error(args, "No Firebase connection details found in connection file")
     else:
-        system.print_error("No 'sources' section found in connection.yml")
+        system.print_error(args, "No 'sources' section found in connection.yml")
     
     if config.get("cache") == False:
         os.system("rm -rf data/firebase")
diff --git a/hawk_scanner/commands/fs.py b/hawk_scanner/commands/fs.py
index a74e452..ce7baca 100644
--- a/hawk_scanner/commands/fs.py
+++ b/hawk_scanner/commands/fs.py
@@ -6,8 +6,8 @@
 import concurrent.futures
 import time
 
-def process_file(file_path, key, results):
-    matches = system.read_match_strings(file_path, 'fs')
+def process_file(args, file_path, key, results):
+    matches = system.read_match_strings(args, file_path, 'fs')
     file_data = system.getFileData(file_path)
     if matches:
         for match in matches:
@@ -24,18 +24,18 @@ def process_file(file_path, key, results):
 
 def execute(args):
     results = []
-    connections = system.get_connection()
+    connections = system.get_connection(args)
     if 'sources' in connections:
         sources_config = connections['sources']
         fs_config = sources_config.get('fs')
         if fs_config:
             for key, config in fs_config.items():
                 if 'path' not in config:
-                    system.print_error(f"Path not found in fs profile '{key}'")
+                    system.print_error(args, f"Path not found in fs profile '{key}'")
                     continue
                 path = config.get('path')
                 if not os.path.exists(path):
-                    system.print_error(f"Path '{path}' does not exist")
+                    system.print_error(args, f"Path '{path}' does not exist")
                 
                 exclude_patterns = fs_config.get(key, {}).get('exclude_patterns', [])
                 start_time = time.time()
@@ -51,21 +51,14 @@ def execute(args):
                     futures = []
                     for file_path in files:
                         file_count += 1
-                        futures.append(executor.submit(process_file, file_path, key, results))
+                        futures.append(executor.submit(process_file, args, file_path, key, results))
                     
                     # Wait for all tasks to complete
                     concurrent.futures.wait(futures)
                 end_time = time.time()
-                system.print_info(f"Time taken to analyze {file_count} files: {end_time - start_time} seconds")
+                system.print_info(args, f"Time taken to analyze {file_count} files: {end_time - start_time} seconds")
         else:
-            system.print_error("No filesystem 'fs' connection details found in connection.yml")
+            system.print_error(args, "No filesystem 'fs' connection details found in connection.yml")
     else:
-        system.print_error("No 'sources' section found in connection.yml")
+        system.print_error(args, "No 'sources' section found in connection.yml")
     return results
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    # Add your command-line arguments here if needed
-    args = parser.parse_args()
-    results = execute(args)
-    # Handle results as needed
diff --git a/hawk_scanner/commands/gcs.py b/hawk_scanner/commands/gcs.py
index c22adf0..059fbe5 100644
--- a/hawk_scanner/commands/gcs.py
+++ b/hawk_scanner/commands/gcs.py
@@ -13,7 +13,7 @@ def connect_google_cloud(bucket_name, credentials_file):
         os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_file
         client = storage.Client()
         bucket = client.get_bucket(bucket_name)
-        system.print_debug(f"Connected to Google Cloud Storage bucket: {bucket_name}")
+        system.print_debug(args, f"Connected to Google Cloud Storage bucket: {bucket_name}")
         return bucket
     except Exception as e:
         print(f"Failed to connect to Google Cloud Storage bucket: {e}")
@@ -25,7 +25,7 @@ def get_last_update_time(blob):
 def execute(args):
     results = []
     shouldDownload = True
-    connections = system.get_connection()
+    connections = system.get_connection(args)
 
     if 'sources' in connections:
         sources_config = connections['sources']
@@ -44,7 +44,7 @@ def execute(args):
                             file_name = blob.name
                             ## get unique etag or hash of file
                             remote_etag = get_last_update_time(blob)
-                            system.print_debug(f"Remote etag: {remote_etag}")
+                            system.print_debug(args, f"Remote etag: {remote_etag}")
 
                             if system.should_exclude_file(file_name, exclude_patterns):
                                 continue
@@ -56,19 +56,19 @@ def execute(args):
                                 if os.path.exists(file_path):
                                     shouldDownload = False
                                     local_etag = file_path.split('/')[-1].split('-')[0]
-                                    system.print_debug(f"Local etag: {local_etag}")
-                                    system.print_debug(f"File already exists in cache, using it. You can disable cache by setting 'cache: false' in connection.yml")
+                                    system.print_debug(args, f"Local etag: {local_etag}")
+                                    system.print_debug(args, f"File already exists in cache, using it. You can disable cache by setting 'cache: false' in connection.yml")
                                     if remote_etag != local_etag:
-                                        system.print_debug(f"File in Google Cloud Storage bucket has changed, downloading it again...")
+                                        system.print_debug(args, f"File in Google Cloud Storage bucket has changed, downloading it again...")
                                         shouldDownload = True
                                     else:
                                         shouldDownload = False
 
                             if shouldDownload:
-                                system.print_debug(f"Downloading file: {file_name} to {file_path}...")
+                                system.print_debug(args, f"Downloading file: {file_name} to {file_path}...")
                                 blob.download_to_filename(file_path)
 
-                            matches = system.read_match_strings(file_path, 'google_cloud_storage')
+                            matches = system.read_match_strings(args, file_path, 'google_cloud_storage')
                             if matches:
                                 for match in matches:
                                     results.append({
@@ -81,13 +81,13 @@ def execute(args):
                                         'data_source': 'gcs'
                                     })
                     else:
-                        system.print_error(f"Failed to connect to Google Cloud Storage bucket: {bucket_name}")
+                        system.print_error(args, f"Failed to connect to Google Cloud Storage bucket: {bucket_name}")
                 else:
-                    system.print_error(f"Incomplete Google Cloud Storage configuration for key: {key}")
+                    system.print_error(args, f"Incomplete Google Cloud Storage configuration for key: {key}")
         else:
-            system.print_error("No Google Cloud Storage connection details found in connection.yml")
+            system.print_error(args, "No Google Cloud Storage connection details found in connection.yml")
     else:
-        system.print_error("No 'sources' section found in connection.yml")
+        system.print_error(args, "No 'sources' section found in connection.yml")
     if config.get("cache") == False:
         os.system("rm -rf data/google_cloud_storage")
     return results
diff --git a/hawk_scanner/commands/gdrive.py b/hawk_scanner/commands/gdrive.py
index c457d18..d6c3cd2 100644
--- a/hawk_scanner/commands/gdrive.py
+++ b/hawk_scanner/commands/gdrive.py
@@ -16,7 +16,7 @@ def connect_google_drive(credentials_file):
 
     try:
         fs = GDriveFileSystem("root", client_id=client_id, client_secret=client_secret, token=credentials_file)
-        system.print_debug("Connected to Google Drive")
+        system.print_debug(args, "Connected to Google Drive")
         drive = fs.client
         return drive
     except Exception as e:
@@ -53,7 +53,7 @@ def download_file(drive, file_obj, base_path):
         else:
             file_obj.GetContentFile(file_path)
 
-        system.print_debug(f"File downloaded to: {file_path}")
+        system.print_debug(args, f"File downloaded to: {file_path}")
     except Exception as e:
         print(f"Failed to download file: {e}")
 
@@ -68,7 +68,7 @@ def list_files(drive, folder_name=None):
 def execute(args):
     results = []
     should_download = True
-    connections = system.get_connection()
+    connections = system.get_connection(args)
     is_cache_enabled = False
     drive_config = None
 
@@ -76,7 +76,7 @@ def execute(args):
         sources_config = connections['sources']
         drive_config = sources_config.get('gdrive')
     else:
-        system.print_error("No 'sources' section found in connection.yml")
+        system.print_error(args, "No 'sources' section found in connection.yml")
 
     if drive_config:
         for key, config in drive_config.items():
@@ -113,14 +113,14 @@ def execute(args):
 
                     if config.get("cache") and os.path.exists(file_path):
                         should_download = False
-                        system.print_debug(f"File already exists in cache, using it.")
+                        system.print_debug(args, f"File already exists in cache, using it.")
                     else:
                         should_download = True
 
                     if should_download:
                         download_file(drive, file_obj, "data/google_drive")
 
-                    matches = system.read_match_strings(file_path, 'gdrive')
+                    matches = system.read_match_strings(args, file_path, 'gdrive')
                     if matches:
                         for match in matches:
                             results.append({
@@ -134,9 +134,9 @@ def execute(args):
                                 'data_source': 'gdrive'
                             })
             else:
-                system.print_error("Failed to connect to Google Drive")
+                system.print_error(args, "Failed to connect to Google Drive")
     else:
-        system.print_error("No Google Drive connection details found in connection file")
+        system.print_error(args, "No Google Drive connection details found in connection file")
 
     if not is_cache_enabled:
         os.system("rm -rf data/google_drive")
diff --git a/hawk_scanner/commands/gdrive_workspace.py b/hawk_scanner/commands/gdrive_workspace.py
index 364782a..26f86d4 100644
--- a/hawk_scanner/commands/gdrive_workspace.py
+++ b/hawk_scanner/commands/gdrive_workspace.py
@@ -64,7 +64,7 @@ def download_file(drive, file_obj, base_path):
             except Exception as e:
                 print(f"Failed to write file: {e}")
 
-        system.print_debug(f"File downloaded to: {folder_path}")
+        system.print_debug(args, f"File downloaded to: {folder_path}")
     except Exception as e:
         print(f"Failed to download file: {e}")
 
@@ -83,14 +83,14 @@ def list_files(drive, impersonate_user=None):
 
 def execute(args):
     results = []
-    connections = system.get_connection()
+    connections = system.get_connection(args)
     is_cache_enabled = False
 
     if 'sources' in connections:
         sources_config = connections['sources']
         drive_config = sources_config.get('gdrive_workspace')
     else:
-        system.print_error("No 'sources' section found in connection.yml")
+        system.print_error(args, "No 'sources' section found in connection.yml")
 
     if drive_config:
         for key, config in drive_config.items():
@@ -121,14 +121,14 @@ def execute(args):
 
                         if config.get("cache") and os.path.exists(file_path):
                             is_cache_enabled = False
-                            system.print_debug(f"File already exists in cache, using it.")
+                            system.print_debug(args, f"File already exists in cache, using it.")
                         else:
                             is_cache_enabled = True
 
                         if is_cache_enabled:
                             download_file(drive, file_obj, "data/google_drive/")
 
-                        matches = system.read_match_strings(file_path, 'gdrive_workspace')
+                        matches = system.read_match_strings(args, file_path, 'gdrive_workspace')
                         file_name = file_name.replace('-runtime.pdf', '')
                         if matches:
                             for match in matches:
@@ -144,9 +144,9 @@ def execute(args):
                                     'data_source': 'gdrive_workspace'
                                 })
                 else:
-                    system.print_error("Failed to connect to Google Drive")
+                    system.print_error(args, "Failed to connect to Google Drive")
     else:
-        system.print_error("No Google Drive connection details found in connection file")
+        system.print_error(args, "No Google Drive connection details found in connection file")
 
     """if not is_cache_enabled:
         os.system("rm -rf data/google_drive")"""
diff --git a/hawk_scanner/commands/mongodb.py b/hawk_scanner/commands/mongodb.py
index 7ce6359..f532cc4 100644
--- a/hawk_scanner/commands/mongodb.py
+++ b/hawk_scanner/commands/mongodb.py
@@ -12,14 +12,14 @@ def connect_mongodb(host, port, username, password, database, uri=None):
             client = pymongo.MongoClient(host=host, port=port, username=username, password=password)
 
         if database not in client.list_database_names():
-            system.print_error(f"Database {database} not found on MongoDB server.")
+            system.print_error(args, f"Database {database} not found on MongoDB server.")
             return None
 
         db = client[database]
-        system.print_info(f"Connected to MongoDB database")
+        system.print_info(args, f"Connected to MongoDB database")
         return db
     except Exception as e:
-        system.print_error(f"Failed to connect to MongoDB database with error: {e}")
+        system.print_error(args, f"Failed to connect to MongoDB database with error: {e}")
         return None
 
 
@@ -42,7 +42,7 @@ def check_data_patterns(db, patterns, profile_name, database_name, limit_start=0
             for field_name, field_value in document.items():
                 if field_value:
                     value_str = str(field_value)
-                    matches = system.match_strings(value_str)
+                    matches = system.match_strings(args, value_str)
                     if matches:
                         for match in matches:
                             results.append({
@@ -61,15 +61,15 @@ def check_data_patterns(db, patterns, profile_name, database_name, limit_start=0
 
 def execute(args):
     results = []
-    system.print_info(f"Running Checks for MongoDB Sources")
-    connections = system.get_connection()
+    system.print_info(args, f"Running Checks for MongoDB Sources")
+    connections = system.get_connection(args)
 
     if 'sources' in connections:
         sources_config = connections['sources']
         mongodb_config = sources_config.get('mongodb')
 
         if mongodb_config:
-            patterns = system.get_fingerprint_file()
+            patterns = system.get_fingerprint_file(args)
 
             for key, config in mongodb_config.items():
                 host = config.get('host')
@@ -83,20 +83,20 @@ def execute(args):
                 collections = config.get('collections', [])
 
                 if uri:
-                    system.print_info(f"Checking MongoDB Profile {key} using URI")
+                    system.print_info(args, f"Checking MongoDB Profile {key} using URI")
                 elif host and username and password and database:
-                    system.print_info(f"Checking MongoDB Profile {key} with host and authentication")
+                    system.print_info(args, f"Checking MongoDB Profile {key} with host and authentication")
                 else:
-                    system.print_error(f"Incomplete MongoDB configuration for key: {key}")
+                    system.print_error(args, f"Incomplete MongoDB configuration for key: {key}")
                     continue
 
                 db = connect_mongodb(host, port, username, password, database, uri)
                 if db:
                     results += check_data_patterns(db, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_collections=collections)
         else:
-            system.print_error("No MongoDB connection details found in connection.yml")
+            system.print_error(args, "No MongoDB connection details found in connection.yml")
     else:
-        system.print_error("No 'sources' section found in connection.yml")
+        system.print_error(args, "No 'sources' section found in connection.yml")
     return results
 
 # Example usage
diff --git a/hawk_scanner/commands/mysql.py b/hawk_scanner/commands/mysql.py
index c32c51c..fe43886 100644
--- a/hawk_scanner/commands/mysql.py
+++ b/hawk_scanner/commands/mysql.py
@@ -14,10 +14,10 @@ def connect_mysql(host, port, user, password, database):
             database=database
         )
         if conn:
-            system.print_info(f"Connected to MySQL database at {host}")
+            system.print_info(args, f"Connected to MySQL database at {host}")
             return conn
     except Exception as e:
-        system.print_error(f"Failed to connect to MySQL database at {host} with error: {e}")
+        system.print_error(args, f"Failed to connect to MySQL database at {host} with error: {e}")
 
 def check_data_patterns(conn, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_tables=None):
     cursor = conn.cursor()
@@ -42,7 +42,7 @@ def check_data_patterns(conn, patterns, profile_name, database_name, limit_start
             for column, value in zip(columns, row):
                 if value:
                     value_str = str(value)
-                    matches = system.match_strings(value_str)
+                    matches = system.match_strings(args, value_str)
                     if matches:
                         for match in matches:
                             results.append({
@@ -66,14 +66,14 @@ def check_data_patterns(conn, patterns, profile_name, database_name, limit_start
 
 def execute(args):
     results = []
-    system.print_info(f"Running Checks for MySQL Sources")
-    connections = system.get_connection()
+    system.print_info(args, f"Running Checks for MySQL Sources")
+    connections = system.get_connection(args)
     if 'sources' in connections:
         sources_config = connections['sources']
         mysql_config = sources_config.get('mysql')
 
         if mysql_config:
-            patterns = system.get_fingerprint_file()
+            patterns = system.get_fingerprint_file(args)
 
             for key, config in mysql_config.items():
                 host = config.get('host')
@@ -86,17 +86,17 @@ def execute(args):
                 tables = config.get('tables', [])
 
                 if host and user and database:
-                    system.print_info(f"Checking MySQL Profile {key} and database {database}")
+                    system.print_info(args, f"Checking MySQL Profile {key} and database {database}")
                     conn = connect_mysql(host, port, user, password, database)
                     if conn:
                         results += check_data_patterns(conn, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_tables=tables)
                         conn.close()
                 else:
-                    system.print_error(f"Incomplete MySQL configuration for key: {key}")
+                    system.print_error(args, f"Incomplete MySQL configuration for key: {key}")
         else:
-            system.print_error("No MySQL connection details found in connection.yml")
+            system.print_error(args, "No MySQL connection details found in connection.yml")
     else:
-        system.print_error("No 'sources' section found in connection.yml")
+        system.print_error(args, "No 'sources' section found in connection.yml")
     return results
 
 # Example usage
diff --git a/hawk_scanner/commands/postgresql.py b/hawk_scanner/commands/postgresql.py
index 282dc0e..141edbd 100644
--- a/hawk_scanner/commands/postgresql.py
+++ b/hawk_scanner/commands/postgresql.py
@@ -14,10 +14,10 @@ def connect_postgresql(host, port, user, password, database):
             database=database
         )
         if conn:
-            system.print_info(f"Connected to PostgreSQL database at {host}")
+            system.print_info(args, f"Connected to PostgreSQL database at {host}")
             return conn
     except Exception as e:
-        system.print_error(f"Failed to connect to PostgreSQL database at {host} with error: {e}")
+        system.print_error(args, f"Failed to connect to PostgreSQL database at {host} with error: {e}")
 
 def check_data_patterns(conn, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_tables=None):
     cursor = conn.cursor()
@@ -46,7 +46,7 @@ def check_data_patterns(conn, patterns, profile_name, database_name, limit_start
             for column, value in zip(columns, row):
                 if value:
                     value_str = str(value)
-                    matches = system.match_strings(value_str)
+                    matches = system.match_strings(args, value_str)
                     if matches:
                         for match in matches:
                             results.append({
@@ -70,15 +70,15 @@ def check_data_patterns(conn, patterns, profile_name, database_name, limit_start
 
 def execute(args):
     results = []
-    system.print_info(f"Running Checks for PostgreSQL Sources")
-    connections = system.get_connection()
+    system.print_info(args, f"Running Checks for PostgreSQL Sources")
+    connections = system.get_connection(args)
 
     if 'sources' in connections:
         sources_config = connections['sources']
         postgresql_config = sources_config.get('postgresql')
 
         if postgresql_config:
-            patterns = system.get_fingerprint_file()
+            patterns = system.get_fingerprint_file(args)
 
             for key, config in postgresql_config.items():
                 host = config.get('host')
@@ -91,17 +91,17 @@ def execute(args):
                 tables = config.get('tables', [])
 
                 if host and user and password and database:
-                    system.print_info(f"Checking PostgreSQL Profile {key}, database {database}")
+                    system.print_info(args, f"Checking PostgreSQL Profile {key}, database {database}")
                     conn = connect_postgresql(host, port, user, password, database)
                     if conn:
                         results += check_data_patterns(conn, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_tables=tables)
                         conn.close()
                 else:
-                    system.print_error(f"Incomplete PostgreSQL configuration for key: {key}")
+                    system.print_error(args, f"Incomplete PostgreSQL configuration for key: {key}")
         else:
-            system.print_error("No PostgreSQL connection details found in connection.yml")
+            system.print_error(args, "No PostgreSQL connection details found in connection.yml")
     else:
-        system.print_error("No 'sources' section found in connection.yml")
+        system.print_error(args, "No 'sources' section found in connection.yml")
     return results
 
 # Example usage
diff --git a/hawk_scanner/commands/redis.py b/hawk_scanner/commands/redis.py
index 91cd5f7..03dc11c 100644
--- a/hawk_scanner/commands/redis.py
+++ b/hawk_scanner/commands/redis.py
@@ -10,12 +10,12 @@ def connect_redis(host, port, password=None):
     try:
         r = redis.Redis(host=host, port=port, password=password)
         if r.ping():
-            system.print_info(f"Redis instance at {host}:{port} is accessible")
+            system.print_info(args, f"Redis instance at {host}:{port} is accessible")
             return r
         else:
-            system.print_error(f"Redis instance at {host}:{port} is not accessible")
+            system.print_error(args, f"Redis instance at {host}:{port} is not accessible")
     except Exception as e:
-        system.print_error(f"Redis instance at {host}:{port} is not accessible with error: {e}")
+        system.print_error(args, f"Redis instance at {host}:{port} is not accessible with error: {e}")
 
 def get_patterns_from_file(file_path):
     with open(file_path, 'r', encoding='utf-8') as file:
@@ -30,7 +30,7 @@ def check_data_patterns(redis_instance, patterns, profile_name, host):
         data = redis_instance.get(key)
         if data:
             data_str = data.decode('utf-8')
-            matches = system.match_strings(data_str)
+            matches = system.match_strings(args, data_str)
             if matches:
                 for match in matches:
                     results.append({
@@ -46,14 +46,14 @@ def check_data_patterns(redis_instance, patterns, profile_name, host):
 
 def execute(args):
     results = []
-    connections = system.get_connection()
+    connections = system.get_connection(args)
 
     if 'sources' in connections:
         sources_config = connections['sources']
         redis_config = sources_config.get('redis')
 
         if redis_config:
-            patterns = system.get_fingerprint_file()
+            patterns = system.get_fingerprint_file(args)
 
             for profile_name, config in redis_config.items():
                 host = config.get('host')
@@ -66,9 +66,9 @@ def execute(args):
                         results = check_data_patterns(redis_instance, patterns, profile_name, host)
                         redis_instance.close()
                 else:
-                    system.print_error(f"Incomplete Redis configuration for key: {profile_name}")
+                    system.print_error(args, f"Incomplete Redis configuration for key: {profile_name}")
         else:
-            system.print_error("No Redis connection details found in connection.yml")
+            system.print_error(args, "No Redis connection details found in connection.yml")
     else:
-        system.print_error("No 'sources' section found in connection.yml")
+        system.print_error(args, "No 'sources' section found in connection.yml")
     return results
diff --git a/hawk_scanner/commands/s3.py b/hawk_scanner/commands/s3.py
index cec5dbc..d3b21ef 100644
--- a/hawk_scanner/commands/s3.py
+++ b/hawk_scanner/commands/s3.py
@@ -15,10 +15,10 @@ def connect_s3(access_key, secret_key, bucket_name):
         )
         s3 = session.resource('s3')
         bucket = s3.Bucket(bucket_name)
-        system.print_info(f"Connected to S3 bucket: {bucket_name}")
+        system.print_info(args, f"Connected to S3 bucket: {bucket_name}")
         return bucket
     except Exception as e:
-        system.print_error(f"[bold red]Failed[/bold red] to connect to S3 bucket: {e}")
+        system.print_error(args, f"[bold red]Failed[/bold red] to connect to S3 bucket: {e}")
 
 def get_last_update_time(obj):
     last_modified = obj.last_modified
@@ -34,8 +34,8 @@ def get_patterns_from_file(file_path):
 def execute(args):
     results = []
     shouldDownload = True
-    system.print_info(f"Running Checks for S3 Sources")
-    connections = system.get_connection()
+    system.print_info(args, f"Running Checks for S3 Sources")
+    connections = system.get_connection(args)
     if 'sources' in connections:
         sources_config = connections['sources']
         s3_config = sources_config.get('s3')
@@ -47,7 +47,7 @@ def execute(args):
                 bucket_name = config.get('bucket_name')
                 exclude_patterns = config.get(key, {}).get('exclude_patterns', [])
 
-                system.print_info(f"Checking S3 profile: '{key}' with bucket '{bucket_name}'")
+                system.print_info(args, f"Checking S3 profile: '{key}' with bucket '{bucket_name}'")
                 profile_name = key
                 if access_key and secret_key and bucket_name:
                     bucket = connect_s3(access_key, secret_key, bucket_name)
@@ -55,7 +55,7 @@ def execute(args):
 
                         for obj in bucket.objects.all():
                             remote_etag = obj.e_tag.replace('"', '')
-                            system.print_debug(f"Remote etag: {remote_etag}")
+                            system.print_debug(args, f"Remote etag: {remote_etag}")
                             file_name = obj.key
                             if system.should_exclude_file(file_name, exclude_patterns):
                                 continue
@@ -66,20 +66,20 @@ def execute(args):
                                 if os.path.exists(file_path):
                                     shouldDownload = False
                                     local_etag = file_path.split('/')[-1].split('-')[0]
-                                    system.print_debug(f"Local etag: {local_etag}")
-                                    system.print_debug(f"File already exists in cache, using it. You can disable cache by setting 'cache: false' in connection.yml")
+                                    system.print_debug(args, f"Local etag: {local_etag}")
+                                    system.print_debug(args, f"File already exists in cache, using it. You can disable cache by setting 'cache: false' in connection.yml")
                                     if remote_etag != local_etag:
-                                        system.print_debug(f"File in S3 bucket has changed, downloading it again...")
+                                        system.print_debug(args, f"File in S3 bucket has changed, downloading it again...")
                                         shouldDownload = True
                                     else:
                                         shouldDownload = False
 
                             if shouldDownload:
                                 file_path = f"data/s3/{remote_etag}-{file_name}"
-                                system.print_debug(f"Downloading file: {file_name} to {file_path}...")
+                                system.print_debug(args, f"Downloading file: {file_name} to {file_path}...")
                                 bucket.download_file(file_name, file_path)
                             
-                            matches = system.read_match_strings(file_path, 'google_cloud_storage')
+                            matches = system.read_match_strings(args, file_path, 'google_cloud_storage')
                             if matches:
                                 for match in matches:
                                     results.append({
@@ -93,13 +93,13 @@ def execute(args):
                                     })
 
                     else:
-                        system.print_error(f"Failed to connect to S3 bucket: {bucket_name}")
+                        system.print_error(args, f"Failed to connect to S3 bucket: {bucket_name}")
                 else:
-                    system.print_error(f"Incomplete S3 configuration for key: {key}")
+                    system.print_error(args, f"Incomplete S3 configuration for key: {key}")
             if config.get("cache") == False:
                 os.system("rm -rf data/s3")
         else:
-            system.print_error("No S3 connection details found in connection.yml")
+            system.print_error(args, "No S3 connection details found in connection.yml")
     else:
-        system.print_error("No 'sources' section found in connection.yml")
+        system.print_error(args, "No 'sources' section found in connection.yml")
     return results
diff --git a/hawk_scanner/commands/slack.py b/hawk_scanner/commands/slack.py
index a2e09e8..1e71a7f 100644
--- a/hawk_scanner/commands/slack.py
+++ b/hawk_scanner/commands/slack.py
@@ -12,13 +12,13 @@ def connect_slack(token):
         # Test the connection by making an API call
         response = client.auth_test()
         if response["ok"]:
-            system.print_info("Connected to Slack")
+            system.print_info(args, "Connected to Slack")
             return client
         else:
-            system.print_error("Failed to authenticate with Slack")
+            system.print_error(args, "Failed to authenticate with Slack")
             return None
     except SlackApiError as e:
-        system.print_error(f"Failed to connect to Slack with error: {e.response['error']}")
+        system.print_error(args, f"Failed to connect to Slack with error: {e.response['error']}")
         return None
 
 def check_slack_messages(client, patterns, profile_name, channel_types, channel_names=None):
@@ -33,22 +33,22 @@ def check_slack_messages(client, patterns, profile_name, channel_types, channel_
         if channel_names:
             channels = [channel for channel in channels if channel['name'] in channel_names]
         
-        system.print_info(f"Found {len(channels)} channels of type {channel_types}")
-        system.print_info(f"Checking messages in channels: {', '.join([channel['name'] for channel in channels])}")
+        system.print_info(args, f"Found {len(channels)} channels of type {channel_types}")
+        system.print_info(args, f"Checking messages in channels: {', '.join([channel['name'] for channel in channels])}")
         
         for channel in channels:
             channel_name = channel["name"]
             channel_id = channel["id"]
 
             # Get messages from the channel
-            system.print_info(f"Checking messages in channel {channel_name} ({channel_id})")
+            system.print_info(args, f"Checking messages in channel {channel_name} ({channel_id})")
             messages = client.conversations_history(channel=channel_id)["messages"]
 
             for message in messages:
                 user = message.get("user", "")
                 text = message.get("text")
                 if text:
-                    matches = system.match_strings(text)
+                    matches = system.match_strings(args, text)
                     if matches:
                         for match in matches:
                             results.append({
@@ -64,20 +64,20 @@ def check_slack_messages(client, patterns, profile_name, channel_types, channel_
                             })
         return results
     except SlackApiError as e:
-        system.print_error(f"Failed to fetch messages from Slack with error: {e.response['error']}")
+        system.print_error(args, f"Failed to fetch messages from Slack with error: {e.response['error']}")
         return results
 
 def execute(args):
     results = []
-    system.print_info("Running Checks for Slack Sources")
-    connections = system.get_connection()
+    system.print_info(args, "Running Checks for Slack Sources")
+    connections = system.get_connection(args)
 
     if 'sources' in connections:
         sources_config = connections['sources']
         slack_config = sources_config.get('slack')
 
         if slack_config:
-            patterns = system.get_fingerprint_file()
+            patterns = system.get_fingerprint_file(args)
 
             for key, config in slack_config.items():
                 token = config.get('token')
@@ -85,17 +85,17 @@ def execute(args):
                 channel_names = config.get('channel_names', None)
 
                 if token:
-                    system.print_info(f"Checking Slack Profile {key}")
+                    system.print_info(args, f"Checking Slack Profile {key}")
                 else:
-                    system.print_error(f"Incomplete Slack configuration for key: {key}")
+                    system.print_error(args, f"Incomplete Slack configuration for key: {key}")
                     continue
 
                 client = connect_slack(token)
                 if client:
                     results += check_slack_messages(client, patterns, key, channel_types, channel_names)
         else:
-            system.print_error("No Slack connection details found in connection.yml")
+            system.print_error(args, "No Slack connection details found in connection.yml")
     else:
-        system.print_error("No 'sources' section found in connection.yml")
+        system.print_error(args, "No 'sources' section found in connection.yml")
 
     return results
\ No newline at end of file
diff --git a/hawk_scanner/commands/text.py b/hawk_scanner/commands/text.py
index 5baa088..33c64aa 100644
--- a/hawk_scanner/commands/text.py
+++ b/hawk_scanner/commands/text.py
@@ -5,7 +5,7 @@
 
 def check_data_patterns(value, patterns, profile_name):
     value_str = str(value)
-    matches = system.match_strings(value_str)
+    matches = system.match_strings(args, value_str)
     results = []
     if matches:
         for match in matches:
@@ -20,9 +20,9 @@ def check_data_patterns(value, patterns, profile_name):
 
 def execute(args):
     results = []
-    system.print_info(f"Running Checks for Simple text")
-    connections = system.get_connection()
-    patterns = system.get_fingerprint_file()
+    system.print_info(args, f"Running Checks for Simple text")
+    connections = system.get_connection(args)
+    patterns = system.get_fingerprint_file(args)
     if 'sources' in connections:
         sources_config = connections['sources']
         text_config = sources_config.get('text')
@@ -32,9 +32,9 @@ def execute(args):
                 text = config.get('text', None)
                 results += check_data_patterns(text, patterns, key)
         else:
-            system.print_error("No text connection details found in connection.yml")
+            system.print_error(args, "No text connection details found in connection.yml")
     else:
-        system.print_error("No 'sources' section found in connection.yml")
+        system.print_error(args, "No 'sources' section found in connection.yml")
     return results
 
 # Example usage
diff --git a/hawk_scanner/internals/system.py b/hawk_scanner/internals/system.py
index 9b02171..43dfffd 100644
--- a/hawk_scanner/internals/system.py
+++ b/hawk_scanner/internals/system.py
@@ -16,50 +16,41 @@
 data_sources = ['s3', 'mysql', 'redis', 'firebase', 'gcs', 'fs', 'postgresql', 'mongodb', 'slack', 'couchdb', 'gdrive', 'gdrive_workspace', 'text']
 data_sources_option = ['all'] + data_sources
 
-parser = argparse.ArgumentParser(description='🦅 A powerful scanner to scan your Filesystem, S3, MySQL, PostgreSQL, MongoDB, Redis, Google Cloud Storage and Firebase storage for PII and sensitive data.')
-parser.add_argument('command', nargs='?', choices=data_sources_option, help='Command to execute')
-parser.add_argument('--connection', action='store', help='YAML Connection file path')
-parser.add_argument('--connection-json', type=str, help='Connection details in JSON format, useful for passing connection info directly as CLI Input')
-parser.add_argument('--fingerprint', action='store', help='Override YAML fingerprint file path')
-parser.add_argument('--json', help='Save output to a json file')
-parser.add_argument('--stdout', action='store_true', help='Print output to stdout in JSON format')
-parser.add_argument('--quiet', action='store_true', help='Print only the results')
-parser.add_argument('--debug', action='store_true', help='Enable debug mode')
-parser.add_argument('--no-write', action='store_true', help='Do not write previous alerts to file, this may flood you with duplicate alerts')
-parser.add_argument('--shutup', action='store_true', help='Suppress the Hawk Eye banner 🫣', default=False)
-
-args = parser.parse_args()
-
-# Create a TinyDB instance for storing previous alert hashes
-db = None
-
-if not args.no_write:
-    db = TinyDB('previous_alerts.json')
-
-if args.quiet:
-    args.shutup = True
+def parse_args(args=None):
+    parser = argparse.ArgumentParser(description='🦅 A powerful scanner to scan your Filesystem, S3, MySQL, PostgreSQL, MongoDB, Redis, Google Cloud Storage and Firebase storage for PII and sensitive data.')
+    parser.add_argument('command', nargs='?', choices=data_sources_option, help='Command to execute')
+    parser.add_argument('--connection', action='store', help='YAML Connection file path')
+    parser.add_argument('--connection-json', type=str, help='Connection details in JSON format, useful for passing connection info directly as CLI Input')
+    parser.add_argument('--fingerprint', action='store', help='Override YAML fingerprint file path')
+    parser.add_argument('--json', help='Save output to a json file')
+    parser.add_argument('--stdout', action='store_true', help='Print output to stdout in JSON format')
+    parser.add_argument('--quiet', action='store_true', help='Print only the results')
+    parser.add_argument('--debug', action='store_true', help='Enable debug mode')
+    parser.add_argument('--no-write', action='store_true', help='Do not write previous alerts to file, this may flood you with duplicate alerts')
+    parser.add_argument('--shutup', action='store_true', help='Suppress the Hawk Eye banner 🫣', default=False)
+    return parser.parse_args(args)
     
 console = Console()
 
 def calculate_msg_hash(msg):
     return hashlib.sha256(msg.encode()).hexdigest()
 
-def print_info(message):
+def print_info(args, message):
     if not args.quiet:
         console.print(f"[yellow][INFO][/yellow] {str(message)}")
 
-def print_debug(message):
-    if args.debug and not args.quiet:
+def print_debug(args, message):
+    if args and type(args) == argparse.Namespace and args.debug and not args.quiet:
         try:
             console.print(f"[blue][DEBUG][/blue] {str(message)}")
         except Exception as e:
             pass
 
-def print_error(message):
+def print_error(args, message):
     if not args.quiet:
         console.print(f"[bold red]❌ {message}")
 
-def print_success(message):
+def print_success(args, message):
     if not args.quiet:
         console.print(f"[bold green]✅ {message}")
 
@@ -118,53 +109,57 @@ def RedactData(input_string):
 
     return redacted_string
 
-def get_connection():
+def get_connection(args):
     if args.connection:
         if os.path.exists(args.connection):
             with open(args.connection, 'r') as file:
                 connections = yaml.safe_load(file)
                 return connections
         else:
-            print_error(f"Connection file not found: {args.connection}")
+            print_error(args, f"Connection file not found: {args.connection}")
             exit(1)
     elif args.connection_json:
         try:
             connections = json.loads(args.connection_json)
             return connections
         except json.JSONDecodeError as e:
-            print_error(f"Error parsing JSON: {e}")
+            print_error(args, f"Error parsing JSON: {e}")
             exit(1)
     else:
-        print_error("Please provide a connection file using --connection flag or connection details using --connection-json flag")
+        print_error(args, "Please provide a connection file using --connection flag or connection details using --connection-json flag")
         exit(1)
 
-def get_fingerprint_file():
-    if args.fingerprint:
+def get_fingerprint_file(args=None):
+    if args and type(args) == argparse.Namespace and args.fingerprint:
         if os.path.exists(args.fingerprint):
             with open(args.fingerprint, 'r') as file:
                 return yaml.safe_load(file)
         else:
-            print_error(f"Fingerprint file not found: {args.fingerprint}")
+            if args:
+                print_error(args, f"Fingerprint file not found: {args.fingerprint}")
             exit(1)
+    elif args and type(args) == dict and 'fingerprint' in args:
+        return args['fingerprint']
     else:
         file_path = "https://github.com/rohitcoder/hawk-eye/raw/main/fingerprint.yml"
         try:
             response = requests.get(file_path, timeout=10)
-            print_info(f"Downloading default fingerprint.yml from {file_path}")
+            if args:
+                print_info(args, f"Downloading default fingerprint.yml from {file_path}")
             if response.status_code == 200:
                 with open('fingerprint.yml', 'wb') as file:
                     file.write(response.content)
                 return yaml.safe_load(response.content)
             else:
-                print_error(f"Unable to download default fingerprint.yml please provide your own fingerprint file using --fingerprint flag")
+                if args:
+                    print_error(args, f"Unable to download default fingerprint.yml please provide your own fingerprint file using --fingerprint flag")
                 exit(1)
         except Exception as e:
-            print_error(f"Unable to download default fingerprint.yml please provide your own fingerprint file using --fingerprint flag")
+            if args:
+                print_error(args, f"Unable to download default fingerprint.yml please provide your own fingerprint file using --fingerprint flag")
             exit(1)
 
-patterns = get_fingerprint_file()
-
-def print_banner():
+def print_banner(args):
     banner = r"""
                                 /T /I
                                 / |/ | .-~/
@@ -199,37 +194,42 @@ def print_banner():
                 (_/  /   | | j-"             ~^~^
                     ~-<_(_.^-~"
     """
+    if args.quiet:
+        args.shutup = True
     if not args.shutup:
         console.print(banner)
 
-connections = get_connection()
-
-def match_strings(content, source='text'):
+def match_strings(args, content, source='text'):
+    redacted = False
+    if args and 'connection' in args:
+        connections = get_connection(args)
+        if 'notify' in connections:
+            redacted: bool = connections.get('notify', {}).get('redacted', False)
+    
+    patterns = get_fingerprint_file(args)
     matched_strings = []
 
-    if 'notify' in connections:
-        redacted: bool = connections.get('notify', {}).get('redacted', False)
-    else:
-        redacted = False
-        
     for pattern_name, pattern_regex in patterns.items():
-        print_debug(f"Matching pattern: {pattern_name}")
+        if args:
+            print_debug(args, f"Matching pattern: {pattern_name}")
         found = {} 
         ## parse pattern_regex as Regex
         complied_regex = re.compile(pattern_regex, re.IGNORECASE)
-        print_debug(f"Regex: {complied_regex}")
-        print_debug(f"Content: {content}")
+        if args:
+            print_debug(args, f"Regex: {complied_regex}")
+            print_debug(args, f"Content: {content}")
         matches = re.findall(complied_regex, content)
-        print_debug(f"Matches: {matches}")
+        print_debug(args, f"Matches: {matches}")
         found['data_source'] = source
         if matches:
-            print_debug(f"Found {len(matches)} matches for pattern: {pattern_name}")
+            print_debug(args, f"Found {len(matches)} matches for pattern: {pattern_name}")
             found['pattern_name'] = pattern_name
             redacted_matches = []
             if redacted:
-                print_debug(f"Redacting matches for pattern: {pattern_name}")
+                if args:
+                    print_debug(args, f"Redacting matches for pattern: {pattern_name}")
                 for match in matches:
-                    print_debug(f"Redacting match: {match}")
+                    print_debug(args, f"Redacting match: {match}")
                     redacted_matches.append(RedactData(match))
                 found['matches'] = redacted_matches
             else:
@@ -241,18 +241,19 @@ def match_strings(content, source='text'):
                 found['sample_text'] = content[:50]
             
             matched_strings.append(found)
-    print_debug(f"Matched strings: {matched_strings}")
+    if args:
+        print_debug(args, f"Matched strings: {matched_strings}")
     return matched_strings
 
 def should_exclude_file(file_name, exclude_patterns):
     _, extension = os.path.splitext(file_name)
     if extension in exclude_patterns:
-        print_debug(f"Excluding file: {file_name} because of extension: {extension}")
+        print_debug(args, f"Excluding file: {file_name} because of extension: {extension}")
         return True
     
     for pattern in exclude_patterns:
         if pattern in file_name:
-            print_debug(f"Excluding file: {file_name} because of pattern: {pattern}")
+            print_debug(args, f"Excluding file: {file_name} because of pattern: {pattern}")
             return True
     return False
 
@@ -270,37 +271,40 @@ def list_all_files_iteratively(path, exclude_patterns):
             if not should_exclude_file(file, exclude_patterns):
                 yield os.path.join(root, file)
 
-def read_match_strings(file_path, source):
-    print_info(f"Scanning file: {file_path} for Source: {source}")
+def scan_file(file_path, args=None, source=None):
     content = ''
     is_archive = False
+    # Check if the file is an image
+    if file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
+        content = enhance_and_ocr(file_path)
+    # Check if the file is a PDF document
+    elif file_path.lower().endswith('.pdf'):
+        content = read_pdf(file_path)
+    # Check if the file is an office document (Word, Excel, PowerPoint)
+    elif file_path.lower().endswith(('.docx', '.xlsx', '.pptx')):
+        content = read_office_document(file_path)
+    # Check if the file is an archive (zip, rar, tar, tar.gz)
+    elif file_path.lower().endswith(('.zip', '.rar', '.tar', '.tar.gz')):
+        ## this is archive, so we need to extract it and find pii from it, and return matched_strings
+        matched_strings = find_pii_in_archive(file_path, source)
+        is_archive = True
+    else:
+        # For other file types, read content normally
+        with open(file_path, 'rb') as file:
+            # Attempt to decode using UTF-8, fallback to 'latin-1' if needed
+            content = file.read().decode('utf-8', errors='replace')
+
+    if not is_archive:
+        matched_strings = match_strings(args, content, source)
+    return matched_strings
+
+def read_match_strings(args, file_path, source):
+    print_info(args, f"Scanning file: {file_path} for Source: {source}")
     try:
-        # Check if the file is an image
-        if file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
-            content = enhance_and_ocr(file_path)
-        # Check if the file is a PDF document
-        elif file_path.lower().endswith('.pdf'):
-            content = read_pdf(file_path)
-        # Check if the file is an office document (Word, Excel, PowerPoint)
-        elif file_path.lower().endswith(('.docx', '.xlsx', '.pptx')):
-            content = read_office_document(file_path)
-        # Check if the file is an archive (zip, rar, tar, tar.gz)
-        elif file_path.lower().endswith(('.zip', '.rar', '.tar', '.tar.gz')):
-            ## this is archive, so we need to extract it and find pii from it, and return matched_strings
-            matched_strings = find_pii_in_archive(file_path, source)
-            is_archive = True
-        else:
-            # For other file types, read content normally
-            with open(file_path, 'rb') as file:
-                # Attempt to decode using UTF-8, fallback to 'latin-1' if needed
-                content = file.read().decode('utf-8', errors='replace')
+        matched_strings = scan_file(file_path, args, source)
     except Exception as e:
-        print_debug(f"Error in read_match_strings: {e}")
-        pass
-    
-    if not is_archive:
-        matched_strings = match_strings(content, source)
-        
+        print_debug(args, f"Error in read_match_strings: {e}")
+        matched_strings = []
     return matched_strings
 
 def read_pdf(file_path):
@@ -317,7 +321,7 @@ def read_pdf(file_path):
                     # Handle decoding errors by trying a different encoding
                     content += page.extract_text(encoding='latin-1')
     except Exception as e:
-        print_debug(f"Error in read_pdf: {e}")
+        print_debug(args, f"Error in read_pdf: {e}")
     return content
 
 
@@ -343,7 +347,7 @@ def read_office_document(file_path):
             # You can add specific logic for PowerPoint if needed
             pass
     except Exception as e:
-        print_debug(f"Error in read_office_document: {e}")
+        print_debug(args, f"Error in read_office_document: {e}")
     return content
 
 def find_pii_in_archive(file_path, source):
@@ -365,7 +369,7 @@ def find_pii_in_archive(file_path, source):
         for root, dirs, files in os.walk(tmp_dir):
             for file in files:
                 file_path = os.path.join(root, file)
-                data = read_match_strings(file_path, source)
+                data = read_match_strings(args, file_path, source)
                 for d in data:
                     content.append(d)
         # Clean up the temporary directory
@@ -399,7 +403,10 @@ def getFileData(file_path):
         return json.dumps({"error": str(e)})
 
 
-def SlackNotify(msg):
+def SlackNotify(msg, args):
+    connections = get_connection(args)
+    if not args.no_write:
+        db = TinyDB('previous_alerts.json')
     if 'notify' in connections:
         notify_config = connections['notify']
         # Check if suppress_duplicates is set to True
@@ -412,7 +419,7 @@ def SlackNotify(msg):
             # Check if the message hash already exists in the previous alerts database
             Alert = Query()
             if db.contains(Alert.msg_hash == msg_hash):
-                print_debug("Duplicate message detected. Skipping webhook trigger.")
+                print_info(args, "Duplicate message detected. Skipping webhook trigger.")
                 return
         
         slack_config = notify_config.get('slack', {})
@@ -429,7 +436,7 @@ def SlackNotify(msg):
                     # Store the message hash in the previous alerts database
                     db.insert({'msg_hash': msg_hash})
             except Exception as e:
-                print_error(f"An error occurred: {str(e)}")
+                print_error(args, f"An error occurred: {str(e)}")
 
 def enhance_and_ocr(image_path):
     # Load the image
diff --git a/hawk_scanner/main.py b/hawk_scanner/main.py
index acf6fc5..ea040ae 100644
--- a/hawk_scanner/main.py
+++ b/hawk_scanner/main.py
@@ -16,10 +16,8 @@ def clear_screen():
 
 
 clear_screen()
-system.print_banner()
 
 console = Console()
-args = system.args
 def load_command_module(command):
     try:
         module = importlib.import_module(f"hawk_scanner.commands.{command}")
@@ -39,6 +37,8 @@ def execute_command(command, args):
 
 
 def main():
+    args = system.parse_args()
+    system.print_banner(args)
     results = []
     if args.command:
         if args.command == 'all':
@@ -49,8 +49,8 @@ def main():
         else:
             for data in execute_command(args.command, args):
                 results.append(data)
-    else:
-        system.print_error("Please provide a command to execute")
+    else: 
+        system.print_error(args, "Please provide a command to execute")
 
     ## GROUP results in grouped_results by datasource by key val
     grouped_results = {}
@@ -71,7 +71,7 @@ def main():
                 file.write(json.dumps(grouped_results, indent=4))
         else:
             print(json.dumps(grouped_results, indent=4))
-        system.print_success(f"Results saved to {args.json}")
+        system.print_success(args, f"Results saved to {args.json}")
         sys.exit(0)
     
     if args.stdout:
@@ -141,7 +141,7 @@ def main():
                     exposed_values=records_mini
                 )
                 
-                system.SlackNotify(AlertMsg)
+                system.SlackNotify(AlertMsg, args)
                 
             elif group == 'mysql':
                 table.add_row(
@@ -176,7 +176,7 @@ def main():
                     exposed_values=records_mini
                 )
                 
-                system.SlackNotify(AlertMsg)
+                system.SlackNotify(AlertMsg, args)
            
             elif group == 'mongodb':
                 table.add_row(
@@ -211,7 +211,7 @@ def main():
                     exposed_values=records_mini
                 )
 
-                system.SlackNotify(AlertMsg)
+                system.SlackNotify(AlertMsg, args)
             elif group == 'slack':
                 table.add_row(
                     str(i),
@@ -239,7 +239,7 @@ def main():
                     exposed_values=records_mini
                 )
                 
-                system.SlackNotify(AlertMsg)
+                system.SlackNotify(AlertMsg, args)
             elif group == 'postgresql':
                 table.add_row(
                     str(i),
@@ -273,7 +273,7 @@ def main():
                     exposed_values=records_mini
                 )
 
-                system.SlackNotify(AlertMsg)
+                system.SlackNotify(AlertMsg, args)
 
             elif group == 'redis':
                 table.add_row(
@@ -302,7 +302,7 @@ def main():
                     exposed_values=records_mini
                 )
                 
-                system.SlackNotify(AlertMsg)
+                system.SlackNotify(AlertMsg, args)
             elif group == 'firebase' or group == 'gcs':
                 table.add_row(
                     str(i),
@@ -332,7 +332,7 @@ def main():
                     exposed_values=records_mini
                 )
                 
-                system.SlackNotify(AlertMsg)
+                system.SlackNotify(AlertMsg, args)
                 
             elif group == 'fs':
                 table.add_row(
@@ -364,7 +364,7 @@ def main():
                     total_exposed=str(len(result['matches'])),
                     exposed_values=records_mini
                 )
-                system.SlackNotify(AlertMsg)
+                system.SlackNotify(AlertMsg, args)
             elif group == 'couchdb':
                 table.add_row(
                     str(i),
@@ -396,7 +396,7 @@ def main():
                     exposed_values=records_mini
                 )
                 
-                system.SlackNotify(AlertMsg)
+                system.SlackNotify(AlertMsg, args)
             elif group == 'gdrive':
                 table.add_row(
                     str(i),
@@ -422,7 +422,7 @@ def main():
                     exposed_values=records_mini
                 )
                 
-                system.SlackNotify(AlertMsg)
+                system.SlackNotify(AlertMsg, args)
             elif group == 'gdrive_workspace':
                 table.add_row(
                     str(i),
@@ -451,7 +451,7 @@ def main():
                     exposed_values=records_mini
                 )
                 
-                system.SlackNotify(AlertMsg)
+                system.SlackNotify(AlertMsg, args)
             elif group == 'text':
                 table.add_row(
                     str(i),
@@ -474,7 +474,7 @@ def main():
                     exposed_values=records_mini
                 )
                 
-                system.SlackNotify(AlertMsg)
+                system.SlackNotify(AlertMsg, args)
             else:
                 # Handle other cases or do nothing for unsupported groups
                 pass
diff --git a/readme.md b/readme.md
index a308964..3c5cf3d 100644
--- a/readme.md
+++ b/readme.md
@@ -40,16 +40,34 @@ See how this works on Youtube - https://youtu.be/LuPXE7UJKOY
    ```
 
 
-### Example working command (Use all/fs/s3/gcs etc...)
+### Examples
+1. Example working command (Use all/fs/s3/gcs etc...)
    ```bash
       hawk_scanner all --connection connection.yml --fingerprint fingerprint.yml --json output.json --debug
    ```
-
-### Pass connection data as CLI input in --connection-json flag, and output in json data (Helpful for CI/CD pipeline or automation)
+2. Pass connection data as CLI input in --connection-json flag, and output in json data (Helpful for CI/CD pipeline or automation)
    ```bash
      hawk_scanner fs --connection-json '{"sources": {"fs": {"fs1": {"quick_scan": true, "path": "/Users/rohitcoder/Downloads/data/KYC_PDF.pdf"}}}}' --stdout --quiet --fingerprint fingerprint.yml
    ```
 
+3. You can also import Hawk-eye in your own python scripts and workflows, for better flexibility
+   ```python
+      from hawk_scanner.internals import system
+      pii = system.scan_file("/Users/kumarohit/Downloads/Resume.pdf")
+      print(pii)
+   ```
+
+4. You can also import Hawk-eye with custom fingerprints in your own python scripts like this
+```python
+   from hawk_scanner.internals import system
+   pii = system.scan_file("/Users/kumarohit/Downloads/Resume.pdf", {
+       "fingerprint": {
+         "Email": '[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}',
+      }
+   )
+   print(pii)
+   ```
+
 ## Platform and arch-specific guidelines
 
 ### Postgresql
diff --git a/setup.py b/setup.py
index 4cbe81a..0bd0ecf 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-VERSION = "0.3.14"
+VERSION = "0.3.15"
 
 from setuptools import setup, find_packages