forked from ppquadrat/DigThatLick
-
Notifications
You must be signed in to change notification settings - Fork 0
/
map_instruments.py
executable file
·110 lines (79 loc) · 2.92 KB
/
map_instruments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Python3
""" mapping original instrument labels to DTL canonical labels
also adding a generic dtl:orig_inst_label property
Polina Proutskova
August 2019
"""
##############################################################
# paths
#PROPERTY_PREFIX = "lord" # 'je', 'lord', 'orig'
if PROPERTY_PREFIX == "je":
RDFfile = "TTL/JE_fprints.ttl"
RDFnewfile = "TTL/JE_inst.ttl"
else:
RDFfile = "TTL/ILL_leaders.ttl"
RDFnewfile = "TTL/ILL_inst.ttl"
INST_LABELS = "DATA/orig2DTL_instruments.csv"
##############################################################
import dtlutil
# logging
import logging
MIN_LEVEL = logging.DEBUG
dtlutil.setup_log(MIN_LEVEL)
##############################################################
# read in rdf graph
import rdflib
from rdflib.graph import Graph, Store, URIRef, Literal, BNode
from rdflib.namespace import Namespace, RDFS
from rdflib import plugin
from rdflib.plugins import sparql
from rdflib import Namespace
from rdflib.namespace import RDF, FOAF, RDFS, DC, XSD
MO, TL, EVENT, OLO, DTL, initNs = dtlutil.init_namespaces()
g = dtlutil.create_graph()
dtlutil.read_in_rdf(g, RDFfile)
##############################################################
import csv
INSTRUMENT_LABELS = {}
with open(INST_LABELS, 'r') as csvfile:
logging.info("\nInstrument labels from file: %s", INST_LABELS)
instcsvreader = csv.reader(csvfile, delimiter=',')
instcsvreader.__next__()
for row in instcsvreader:
DTLlabel = row[1]
logging.debug("dtl label %s to be mapped to:", DTLlabel)
for x in range(1,len(row)-1):
""" NB the canonical acronyms are also in the dict """
if len(row[x]) > 0:
orig_label = row[x]
logging.debug(orig_label)
INSTRUMENT_LABELS[orig_label] = DTLlabel
def map_inst_label(orig_label):
if orig_label in INSTRUMENT_LABELS.keys():
return INSTRUMENT_LABELS[orig_label]
else:
return "other"
#########################################################################
# main
"""for all instrument objects:
get label
map to dtl acronym
add dtl label
save rdf
!!! this script does not merge instruments with the same dtl label.
This might be the right thing to do and should be implemented here.
"""
if PROPERTY_PREFIX == "lord":
PROPERTY = DTL.lord_inst_label
elif PROPERTY_PREFIX == "je":
PROPERTY = DTL.je_inst_label
else:
PROPERTY = DTL.orig_inst_label
for instrumentURI in g.subjects(RDF.type, MO.Instrument):
orig_label = str(g.value(instrumentURI, PROPERTY))
logging.debug("original label: %s", orig_label)
DTLlabel = map_inst_label(orig_label.strip(" ,."))
logging.debug("mapped to: %s", DTLlabel)
g.add( (instrumentURI, DTL.dtl_inst_label, Literal(DTLlabel)) )
g.add( (instrumentURI, DTL.orig_inst_label, Literal(orig_label)) )
dtlutil.write_rdf(g, RDFnewfile)