From 254389803dedc7ecb41ddc159a850439e04371e6 Mon Sep 17 00:00:00 2001 From: Rinke Hoekstra Date: Wed, 12 Dec 2012 14:13:47 +0100 Subject: [PATCH] Graph file writer for Virtuoso --- src/util/graph_mappings.csv | 14 +++++++ src/util/virtuoso_graph_files.py | 71 ++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 src/util/graph_mappings.csv create mode 100644 src/util/virtuoso_graph_files.py diff --git a/src/util/graph_mappings.csv b/src/util/graph_mappings.csv new file mode 100644 index 0000000..591651d --- /dev/null +++ b/src/util/graph_mappings.csv @@ -0,0 +1,14 @@ +annotations-anita.ttl;http://aers.data2semantics.org/resource/graph/annotations +annotations-richard.ttl;http://aers.data2semantics.org/resource/graph/annotations +annotations-first-list.n3;http://aers.data2semantics.org/resource/graph/annotations +annotations-second-list.n3;http://aers.data2semantics.org/resource/graph/annotations +patients.nt;http://aers.data2semantics.org/resource/graph/patients +dbpedia_3.7.owl;http://dbpedia.org +instance_types_en.nt;http://dbpedia.org +labels_en.nt;http://dbpedia.org +redirects_en.nt;http://dbpedia.org +nci_thesaurus.owl;http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl +CTCAE_4.03_2010-06-14_v4.03.owl;http://ncicb.nci.nih.gov/xml/owl/EVS/ctcae.owl +diseasome_dump.nt;http://www4.wiwiss.fu-berlin.de/diseasome +drugbank_dump.nt;http://www4.wiwiss.fu-berlin.de/drugbank +sider_dump.nt;http://www4.wiwiss.fu-berlin.de/sider diff --git a/src/util/virtuoso_graph_files.py b/src/util/virtuoso_graph_files.py new file mode 100644 index 0000000..51bc7c4 --- /dev/null +++ b/src/util/virtuoso_graph_files.py @@ -0,0 +1,71 @@ +''' +Created on Dec 7, 2012 + +@author: hoekstra +''' +from csv import reader +from glob import glob +import argparse +import os.path +import logging + +TRANSLATION_TABLE = 'graph_mappings.csv' +GRAPH_BASE = 'http://aers.data2semantics.org/resource/graph/' + +## GLOBAL SETTINGS + +log = logging.getLogger(__name__) +log.setLevel(logging.DEBUG) + +logHandler = logging.StreamHandler() +logHandler.setLevel(logging.DEBUG) + +logFormatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logHandler.setFormatter(logFormatter) + +log.addHandler(logHandler) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument('path',help='Path to prepare') + + args = parser.parse_args() + + path = args.path + if not path.endswith('/') : + path += '/' + + files = [] + + files.extend(glob("{}*.nt".format(path))) + files.extend(glob("{}*.n3".format(path))) + files.extend(glob("{}*.ttl".format(path))) + files.extend(glob("{}*.owl".format(path))) + + r = reader(open(TRANSLATION_TABLE,"r"),delimiter=';') + + table = {} + for row in r: + table.setdefault(row[0], row[1]) + + for p in files : + (dir,f) = os.path.split(p) + + if f in table: + graph_uri = table[f] + log.info("Found graph URI <{}> for '{}'".format(graph_uri,f)) + else : + graph_uri = "{}{}".format(GRAPH_BASE,f) + log.info("Generated graph URI <{}> for '{}'".format(graph_uri,f)) + + + graph_filename = "{}.graph".format(p) + + # Open a file with the graph_filename name, if it already exists: overwrite + graph_file = open(graph_filename,"w") + + graph_file.write("{}\n".format(graph_uri)) + log.info("Wrote <{}> to '{}'".format(graph_uri,graph_filename)) + + \ No newline at end of file