-
Notifications
You must be signed in to change notification settings - Fork 0
/
landscape.py
138 lines (120 loc) · 4.7 KB
/
landscape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""Module that describes all commonly used methods for enriching ontologies.
"""
from rdflib import Graph, Literal, BNode, URIRef
from rdflib.namespace import RDF
from SPARQLWrapper import SPARQLWrapper, JSON
class DBpedia:
"""Describes all possible approaches to extract data from DBpedia.
"""
def query(self, query):
"""Queries data from dbpedia.
"""
sparql = SPARQLWrapper('http://dbpedia.org/sparql')
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
return results['results']['bindings']
class Blacklist(list):
"""Describes which predicates should be excluded while enriching the ontology.
"""
def add(self, pred):
"""Adds a predicate to the blacklist.
"""
self.append(pred)
def remove(self, pred):
"""Removes a predicate from the blacklist.
"""
self.remove(pred)
def check(self, pred):
"""Checks whether the predicate is in the black list.
"""
if pred in self:
return True
else:
return False
class Ontology(object):
"""A semantic ontology. Ontologies have the following properties:
Attributes:
path: A string representing the path to ontology.
name: A string representing the name of ontology.
blacklist: A list of the predicates to exclude while enriching.
"""
def __init__(self, path, name='Ontology'):
"""Initializes a semantic ontology.
"""
self.name = name
self.graph = Graph().parse(path, format='turtle')
self.blacklist = Blacklist()
def export(self, filename):
"""Saves generated ontology as a turtle file.
"""
self.graph.serialize(destination=filename, format='turtle')
def add(self, sub, pred, obj):
"""Adds triple to ontology.
"""
self.graph.add([sub, pred, obj])
def remove(self, sub=None, pred=None, obj=None):
"""Removes triple from ontology.
"""
self.graph.remove([sub, pred, obj])
def check(self, sub, pred=None, obj=None):
"""Returns True if triple is in ontology and False o/w.
By default (with pred and obj not specified) checks if it is an sto:Standard.
"""
sub = URIRef('https://w3id.org/i40/sto#' + sub)
if not pred and not obj:
pred = RDF.type
obj = URIRef('https://w3id.org/i40/sto#Standard')
if (sub, pred, obj) in self.graph:
return True
else:
return False
def set_prefix(self, prefix, url):
"""Sets ontology prefixes.
"""
self.graph.bind(prefix, url)
def query(self, query):
"""Qyeries data from the ontology.
"""
return self.graph.query(query)
def enrich(self, sub, source):
"""Enriches the ontology based on existing subject and its triples from other source.
Returns information regarding the number of enriched entities.
"""
logs = {
"subj": '',
"trip": []
}
logs["subj"] = sub
for triple in source:
pred = triple['pred']['value']
logs["trip"].append(pred)
if pred not in self.blacklist:
if sub is None:
sub = URIRef(triple['sub']['value'])
pred = URIRef(pred)
obj = triple['obj']
obj_val = obj['value']
obj_type = obj['type']
lang = 'xml:lang'
if obj_type == 'uri':
self.graph.add([sub, pred, URIRef(obj_val)])
elif obj_type == 'literal':
if lang in obj:
obj_lang = obj['xml:lang']
self.graph.add([sub, pred, Literal(obj_val, obj_lang)])
else:
self.graph.add([sub, pred, Literal(obj_val)])
elif obj_type == 'typed-literal':
obj_datatype = obj['datatype']
if lang in obj:
obj_lang = obj['xml:lang']
self.graph.add([sub, pred, \
Literal(obj_val, obj_lang, datatype=obj_datatype)])
else:
self.graph.add([sub, pred, Literal(obj_val, datatype=obj_datatype)])
elif obj_type == 'bnode':
self.graph.add([sub, pred, BNode(obj_val)])
else:
print('---UNKNOWN OBJECT TYPE ' + obj_type + ' FOR ' + sub + '---')
return logs