Skip to content

Commit

Permalink
Further comments added
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilAppleby committed Nov 8, 2024
1 parent 8bf1755 commit 28f1e4d
Show file tree
Hide file tree
Showing 7 changed files with 29 additions and 35 deletions.
1 change: 1 addition & 0 deletions carrot_transform.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Provides an entry point for the built executable
# Build with "pyinstaller --onefile carrot_transform.py"
from carrottransform.cli.command import transform
if __name__ == '__main__':
transform()
2 changes: 1 addition & 1 deletion carrottransform/_version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# TODO - pick this up automatically when building
__version__ = '0.3.1'
__version__ = '0.3.2'
34 changes: 18 additions & 16 deletions carrottransform/cli/subcommands/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,14 @@ def run():
@click.option("--person-file",
required=True,
help="File containing person_ids in the first column")
@click.option("--omop-ddl-file",
required=False,
help="File containing OHDSI ddl statements for OMOP tables")
@click.option("--omop-config-file",
required=False,
help="File containing additional / override json config for omop outputs")
@click.option("--omop-version",
required=True,
required=False,
help="Quoted string containing opmop version - eg '5.3'")
@click.option("--saved-person-id-file",
default=None,
Expand All @@ -49,7 +55,10 @@ def run():
@click.argument("input-dir",
required=False,
nargs=-1)
def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, saved_person_id_file, use_input_person_ids, last_used_ids_file, log_file_threshold, input_dir):
def mapstream(rules_file, output_dir, write_mode,
person_file, omop_ddl_file, omop_config_file,
omop_version, saved_person_id_file, use_input_person_ids,
last_used_ids_file, log_file_threshold, input_dir):
"""
Map to output using input streams
"""
Expand All @@ -59,9 +68,10 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
# - check main directories for existence
# - handle saved persion ids
# - initialise metrics
omop_config_file = str(importlib.resources.files('carrottransform')) + '/' + 'config/omop.json'
omop_ddl_file_name = "OMOPCDM_postgresql_" + omop_version + "_ddl.sql"
omop_ddl_file = str(importlib.resources.files('carrottransform')) + '/' + 'config/' + omop_ddl_file_name
if (omop_ddl_file == None) and (omop_config_file == None) and (omop_version != None):
omop_config_file = str(importlib.resources.files('carrottransform')) + '/' + 'config/omop.json'
omop_ddl_file_name = "OMOPCDM_postgresql_" + omop_version + "_ddl.sql"
omop_ddl_file = str(importlib.resources.files('carrottransform')) + '/' + 'config/' + omop_ddl_file_name

if os.path.isdir(input_dir[0]) == False:
print("Not a directory, input dir {0}".format(input_dir[0]))
Expand All @@ -78,13 +88,12 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav

starttime = time.time()
omopcdm = tools.omopcdm.OmopCDM(omop_ddl_file, omop_config_file)
#print(omopcdm.dump_ddl())
mappingrules = tools.mappingrules.MappingRules(rules_file, omopcdm)
metrics = tools.metrics.Metrics(mappingrules.get_dataset_name(), log_file_threshold)
nowtime = time.time()

print("--------------------------------------------------------------------------------")
print("Loaded mapping rules from: {0} after {1:.5f} secs".format(rules_file, (nowtime - starttime)))
print("Loaded mapping rules from: {0} in {1:.5f} secs".format(rules_file, (nowtime - starttime)))
output_files = mappingrules.get_all_outfile_names()
record_numbers = {}
for output_file in output_files:
Expand Down Expand Up @@ -132,12 +141,10 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
if infile not in rules_input_files:
msg = "ERROR: no mapping rules found for existing input file - {0}".format(infile)
print(msg)
metrics.add_log_data(msg)
for infile in rules_input_files:
if infile not in existing_input_files:
msg = "ERROR: no data for mapped input file - {0}".format(infile)
print(msg)
metrics.add_log_data(msg)

# set up overall counts
rejidcounts = {}
Expand Down Expand Up @@ -243,26 +250,21 @@ def mapstream(rules_file, output_dir, write_mode, person_file, omop_version, sav
print("INPUT file data : {0}: input count {1}, time since start {2:.5} secs".format(srcfilename, str(rcount), (nowtime - starttime)))
for outtablename, count in outcounts.items():
print("TARGET: {0}: output count {1}".format(outtablename, str(count)))
# END main processing loop

print("--------------------------------------------------------------------------------")
data_summary = metrics.get_mapstream_summary()
log_report = metrics.get_log_data()
try:
dsfh = open(output_dir + "/summary_mapstream.tsv", mode="w")
dsfh.write(data_summary)
dsfh.close()
logfh = open(output_dir + "/error_report.txt", mode="w")
logfh.write(log_report)
logfh.close()
except IOError as e:
print("I/O error({0}): {1}".format(e.errno, e.strerror))
print("Unable to write file")

# END mapstream
nowtime = time.time()
print("Elapsed time = {0:.5f} secs".format(nowtime - starttime))
#profiler.disable()
#stats = pstats.Stats(profiler).sort_stats('ncalls')
#stats.print_stats()

def get_target_records(tgtfilename, tgtcolmap, rulesmap, srcfield, srcdata, srccolmap, srcfilename, omopcdm, metrics):
"""
Expand Down
13 changes: 6 additions & 7 deletions carrottransform/tools/file_helpers.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import os
import sys
import json

# Function inherited from the "old" CaRROT-CDM
# Function inherited from the "old" CaRROT-CDM (modfied to exit on error)

def load_json(f_in):
if os.path.exists(f_in):
data = json.load(open(f_in))
else:
try:
data = json.loads(f_in)
data = json.load(open(f_in))
except Exception as err:
raise FileNotFoundError(f"{f_in} not found. Or cannot parse as json")
print ("{0} not found. Or cannot parse as json".format(f_in))
sys.exit()

return data
return data

8 changes: 0 additions & 8 deletions carrottransform/tools/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ def __init__(self, dataset_name, log_threshold=0):
"""
self.datasummary={}
self.allcounts={}
self.log_data=""
self.dataset_name=dataset_name
self.log_threshold = log_threshold

Expand Down Expand Up @@ -128,10 +127,3 @@ def get_mapstream_summary(self):
summary_str += self.dataset_name + "\t" + source + "\t" + fieldname + "\t" + tablename + "\t" + concept_id + "\t" + additional +"\t" + input_count + "\t" + invalid_person_ids + "\t" + invalid_date_fields + "\t" + invalid_source_fields + "\t" + output_count + "\n"

return summary_str

def add_log_data(self, msg):
self.log_data += msg + "\n"

def get_log_data(self):
return self.log_data

4 changes: 2 additions & 2 deletions carrottransform/tools/omopcdm.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ def __init__(self, omopddl, omopcfg):
def load_ddl(self, omopddl):
try:
fp = open(omopddl, "r")
except IOError as e:
print("I/O error for ddl file ({0}): {1}".format(e.errno, e.strerror))
except Exception as err:
print("OMOP ddl file ({0}) not found".format(omopddl))
sys.exit()

return(self.process_ddl(fp))
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "carrot-transform"
version = "0.3.1"
version = "0.3.2"
authors = [
{ name="PD Appleby", email="[email protected]" },
]
Expand Down

0 comments on commit 28f1e4d

Please sign in to comment.