Skip to content

Commit

Permalink
fix secret config load and add clean-up for re-run
Browse files Browse the repository at this point in the history
  • Loading branch information
stevenwinship committed Apr 5, 2024
1 parent 4c662cd commit a60f476
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 3 deletions.
25 changes: 22 additions & 3 deletions config/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import yaml
import os
from models import *
import logging
import input_processor as ip
import output_processor as op
import sys
Expand All @@ -19,11 +20,12 @@ class _Config:

ALLOWED_ENV = ('LOG_NAME_PATTERN', 'ROBOTS_URL', 'MACHINES_URL', 'YEAR_MONTH',
'OUTPUT_FILE', 'PLATFORM', 'HUB_API_TOKEN', 'HUB_BASE_URL', 'UPLOAD_TO_HUB',
'SIMULATE_DATE', 'MAXMIND_GEOIP_COUNTRY_PATH', 'OUTPUT_VOLUME')
'SIMULATE_DATE', 'MAXMIND_GEOIP_COUNTRY_PATH', 'OUTPUT_VOLUME', 'CLEAN_FOR_RERUN')

logging.basicConfig(format='%(message)s', level=logging.INFO)
# thismodule = sys.modules[__name__] # not sure this is needed

def __init__(self):
self.log = logging.getLogger(__name__)
# things that come from the configuration file
self.robots_reg = None
self.machines_reg = None
Expand All @@ -40,6 +42,7 @@ def __init__(self):
self.simulate_date = None
self.maxmind_geoip_country_path = None
self.output_volume = None
self.clean_for_rerun = None

# things that are stored or calculated separately
self.start_date = None
Expand Down Expand Up @@ -72,7 +75,7 @@ def __init__(self):
secret = os.path.join(os.path.dirname(self.config_file), 'secrets.yaml')
if os.path.isfile(secret) == True:
with open(secret, 'r') as ymlfile:
cfg = yaml.load(ymlfile)
cfg = yaml.safe_load(ymlfile)
for x in cfg:
setattr(self, x, cfg[x])

Expand All @@ -88,6 +91,8 @@ def __init__(self):
if isinstance(self.output_volume, str):
self.output_volume = (self.output_volume.lower() == 'true')

if isinstance(self.clean_for_rerun, str):
self.clean_for_rerun = (self.clean_for_rerun.lower() == 'true')

# simulate date, in case someone wants to simulate running on a day besides now
if self.simulate_date is not None:
Expand Down Expand Up @@ -238,6 +243,20 @@ def filenames_to_process(self):
return [ self.log_name_pattern.replace('(yyyy-mm-dd)', self.year_month + '-' + ("%02d" % x))
for x in range(to_process_from, ld + 1) ]

def delete_log_processed_date(self):
# clean up data for this period, so it can be re-run
if self.year_month in self.state_dict:
self.log.info(f"Removing state: {self.year_month}")
# remove the info from the state json
self.state_dict.pop(self.year_month)
# delete the specific database for this time period
my_file = f'state/counter_db_{self.year_month}.sqlite3'
if os.path.exists(my_file):
self.log.info(f"Deleting file: {my_file}")
os.remove(my_file)
with open('state/statefile.json', 'w') as f:
json.dump(self.state_dict, f, sort_keys = True, indent = 4, ensure_ascii=False)

def update_log_processed_date(self):
if self.year_month in self.state_dict:
self.state_dict[self.year_month]['last_processed_day'] = int(self.last_day().split('-')[2])
Expand Down
4 changes: 4 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ def main():
else:
DbActions.vacuum() # cleans up DB indices for speed

# if re-running a particular month then remove the db and entry in the state file
if config.Config().clean_for_rerun == True:
config.Config().delete_log_processed_date()

the_filenames = config.Config().filenames_to_process()


Expand Down

0 comments on commit a60f476

Please sign in to comment.