Skip to content

Commit

Permalink
Updating to read from json environment
Browse files Browse the repository at this point in the history
Updating to be able to save results to /tmp (configurable)
  • Loading branch information
jonespm committed Jul 5, 2019
1 parent a17534d commit 1243b15
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 53 deletions.
20 changes: 2 additions & 18 deletions .env.sample
Original file line number Diff line number Diff line change
@@ -1,18 +1,2 @@
# Fill in your values for user, dbname and password!
DSN_ucdm=dbname=entity_store host=192.168.0.1 user=user_readonly password=user_password connect_timeout=5
DSN_udw=dbname=entity_store host=192.168.0.1 user=user_readonly password=user_password connect_timeout=5
# This is the date on your SIS files. This is filledi n the {date} in the 'sis_file' value in dbqueries.py
SIS_DATE=2018-09-03
#Select tables to read from
SELECT_TABLES=person
# Test this with python -m smtpd -n -c DebuggingServer localhost:1025
#Host name of SMTP to send mail
SMTP_HOST=localhost
#Port for SMTP
SMTP_PORT=1025
#From address of SMTP for mail
SMTP_FROM=
#To address of SMTP for mail
SMTP_TO=
# The schedule for crontab
CRONTAB_SCHEDULE=* * * * *
# Location to env.json file
ENV_FILE=/unizin-csv-validation/env.json
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
*.csv
.env
env.json
2 changes: 1 addition & 1 deletion dbqueries.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@
"""},
'unizin_metadata' : {
'index' : '',
'sis_file' : 'unizin_metadata.csv',
'sis_file' : 'metadata.csv',
'dsn' : 'udw',
'query_name': 'Unizin Metadata',
'query' : """
Expand Down
18 changes: 18 additions & 0 deletions env_sample.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"/* Fill in your values for user, dbname and password!": "*/",
"DSN_ucdm": "dbname=entity_store host=192.168.0.1 user=user_readonly password=user_password connect_timeout=5",
"DSN_udw": "dbname=entity_store host=192.168.0.1 user=user_readonly password=user_password connect_timeout=5",
"/* This is the date on your SIS files. This is filledi n the {date} in the 'sis_file' value in dbqueries.py": "*/",
"SIS_DATE": "2018-09-03",
"/* Select tables to read from": "*/",
"SELECT_TABLES": "person",
"/* Test this with python -m smtpd -n -c DebuggingServer localhost:1025": "*/",
"/* Host name of SMTP to send mail": "*/",
"SMTP_HOST": "localhost",
"/* Port for SMTP": "*/",
"SMTP_PORT": 1025,
"/* From address of SMTP for mail": "*/",
"SMTP_FROM": "",
"/* To address of SMTP for mail": "*/",
"SMTP_TO": ""
}
19 changes: 1 addition & 18 deletions start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,4 @@

env

if [ -z "${CRONTAB_SCHEDULE}" ]; then
echo "CRONTAB_SCHEDULE environment variable not set, crontab cannot be started. Please set this to a crontab acceptable format. Just running command."
python /unizin-csv-validation/validate.py -o 5
else
# in cron pod
echo Running cron job pod
echo "CRONTAB_SCHEDULE is ${CRONTAB_SCHEDULE}, RUN_AT_TIMES is ${RUN_AT_TIMES}"

# Make the log file available
touch /var/log/cron.log

# Get the environment from docker saved
# https://ypereirareis.github.io/blog/2016/02/29/docker-crontab-environment-variables/
printenv | sed 's/^\([a-zA-Z0-9_]*\)=\(.*\)$/export \1="\2"/g' >> $HOME/.profile

echo "${CRONTAB_SCHEDULE} . $HOME/.profile; python /unizin-csv-validation/validate.py -o 5 >> /var/log/cron.log 2>&1" | crontab
crontab -l && cron -L 15 && tail -f /var/log/cron.log
fi
python /unizin-csv-validation/validate.py -o 5
42 changes: 26 additions & 16 deletions validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,11 @@
#
# Copyright (C) 2018 University of Michigan Teaching and Learning

UNIZIN_FILE = "unizin_{table}.csv"

RESULTS_FILE = open("u_results.txt", "w")
ERRORS_FILE = open("u_errors.txt", "w")
UNIZIN_FILE_FORMAT = "unizin_{table}.csv"

## don't modify anything below this line (except for experimenting)

import sys, os, csv, itertools, argparse, smtplib
import sys, os, csv, itertools, argparse, smtplib, tempfile, json

from email.mime.text import MIMEText
from collections import OrderedDict
Expand All @@ -28,8 +25,18 @@

from tqdm import tqdm

from dotenv import load_dotenv
load_dotenv()

try:
with open(os.getenv("ENV_FILE", "/unizin-csv-validation/config/env.json")) as f:
ENV = json.load(f)
except FileNotFoundError as fnfe:
print("Default config file or one defined in environment variable ENV_FILE not found. This is normal for the build, should define for operation")
# Set ENV so collectstatic will still run in the build
ENV = os.environ

OUT_DIR = ENV.get("TMP_DIR", "/tmp/")
RESULTS_FILE = open(OUT_DIR + "u_results.txt", "w")
ERRORS_FILE = open(OUT_DIR + "u_errors.txt", "w")

class SimpleQuoter(object):
@staticmethod
Expand Down Expand Up @@ -63,11 +70,11 @@ def close_compare(i, j):

def compare_CSV(tablename):
RESULTS_FILE.write(f"Comparing on {tablename}\n")
sis_file = dbqueries.QUERIES[tablename]['sis_file'].format(date=os.getenv("SIS_DATE"))
sis_file = dbqueries.QUERIES[tablename]['sis_file'].format(date=ENV.get("SIS_DATE"))
index = dbqueries.QUERIES[tablename]['index']
try:
SIS_df = load_CSV_to_dict(sis_file.format(table=tablename), index)
Unizin_df = load_CSV_to_dict(UNIZIN_FILE.format(table=tablename), index)
Unizin_df = load_CSV_to_dict(OUT_DIR + UNIZIN_FILE_FORMAT.format(table=tablename), index)
except Exception as e:
print ("Exception ",e)
return
Expand Down Expand Up @@ -118,10 +125,10 @@ def compare_CSV(tablename):
continue

def load_Unizin_to_CSV(tablename):
out_filename = UNIZIN_FILE.format(table=tablename)
out_filename = OUT_DIR + UNIZIN_FILE_FORMAT.format(table=tablename)
print (f"Loading ucdm {tablename} table to {out_filename}")
# The DSN might switch depending on the data file
conn = psycopg2.connect(os.getenv("DSN_"+dbqueries.QUERIES[tablename]['dsn']))
conn = psycopg2.connect(ENV.get("DSN_"+dbqueries.QUERIES[tablename]['dsn']))

curs = conn.cursor()

Expand Down Expand Up @@ -163,16 +170,16 @@ def email_results(filenames, subject=None):
if canvas_date:
subject = f"{subject} for {canvas_date:%B %d, %Y}"
msg['Subject'] = subject
msg['From'] = os.getenv("SMTP_FROM")
msg['To'] = os.getenv("SMTP_TO")
msg['From'] = ENV.get("SMTP_FROM")
msg['To'] = ENV.get("SMTP_TO")

print (f"Emailing out {filename}")
server = smtplib.SMTP(os.getenv("SMTP_HOST"), os.getenv("SMTP_PORT"), None, 5)
server = smtplib.SMTP(ENV.get("SMTP_HOST"), ENV.get("SMTP_PORT"), None, 5)
server.send_message(msg)
server.quit()

#select_tables = ['academic_term']
select_tables = list(csv.reader([os.getenv("SELECT_TABLES", "academic_term")]))[0]
select_tables = list(csv.reader([ENV.get("SELECT_TABLES", "academic_term")]))[0]

print (select_tables)

Expand Down Expand Up @@ -213,8 +220,11 @@ def email_results(filenames, subject=None):
load_Unizin_to_CSV("number_of_courses_by_term")
load_Unizin_to_CSV("unizin_metadata")
subject = dbqueries.QUERIES["number_of_courses_by_term"].get('query_name')
email_results([UNIZIN_FILE.format(table="unizin_metadata"),UNIZIN_FILE.format(table="number_of_courses_by_term")], subject=subject)
email_results([OUT_DIR + UNIZIN_FILE_FORMAT.format(table="unizin_metadata"),OUT_DIR + UNIZIN_FILE_FORMAT.format(table="number_of_courses_by_term")], subject=subject)
else:
print(f"{option} is not currently a valid option")

RESULTS_FILE.close()
ERRORS_FILE.close()

sys.exit(0)

0 comments on commit 1243b15

Please sign in to comment.