Skip to content

Commit

Permalink
Merge pull request #8 from white-gecko/feature/fixSchemaGeneration
Browse files Browse the repository at this point in the history
Fix schema generation and reorganize some schema code
  • Loading branch information
briesenberg07 authored Jun 14, 2022
2 parents 17d83c7 + 821e12d commit a5d1d2b
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 98 deletions.
21 changes: 21 additions & 0 deletions shaclgen/generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from rdflib import Graph
from urllib.parse import urlparse


class Generator:
def __init__(self, graph: Graph, prefixes=None):
pass

def sh_label_gen(self, uri):
prefix, namespace, name = self.namespaces.compute_qname(uri)
return prefix + "_" + name

def uri_validator(self, x):
try:
result = urlparse(x)
return all([result.scheme, result.netloc])
except Exception:
return False

def gen_graph(self, namespace=None, implicit_class_target=False):
pass
110 changes: 25 additions & 85 deletions shaclgen/schema.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from rdflib import Graph, RDF, RDFS, OWL, Namespace
from rdflib.namespace import XSD
from rdflib import Graph, Namespace
from rdflib.namespace import XSD, RDF, RDFS, OWL, SH
from rdflib.namespace import NamespaceManager
from rdflib.term import URIRef, Literal, BNode
import collections
import json
from urllib.parse import urlparse
from rdflib.collection import Collection
import pkg_resources
from .generator import Generator

"""
current assumptions:
Expand All @@ -15,15 +16,14 @@
"""


class schema:
class schema(Generator):
def __init__(self, graph: Graph, prefixes=None):
self.G = graph

self.CLASSES = collections.OrderedDict()
self.PROPS = collections.OrderedDict()
self.REST = collections.OrderedDict()

self.namespaces = []
self.datatypes = [
XSD.string,
XSD.boolean,
Expand All @@ -42,50 +42,17 @@ def __init__(self, graph: Graph, prefixes=None):
path = "prefixes/namespaces.json"
filepath = pkg_resources.resource_filename(__name__, path)

self.namespaces = NamespaceManager(graph=Graph())
self.namespaces.bind("sh", SH)

with open(filepath, "r", encoding="utf-8") as fin:
self.names = json.load(fin)
for prefix, namespace in json.load(fin).items():
self.namespaces.bind(prefix, namespace)

if prefixes:
with open(prefixes, "r", encoding="utf-8") as fin:
self.names.update(json.load(fin))

def parse_uri(self, URI):
if "#" in URI:
label = URI.split("#")[-1]
else:
label = URI.split("/")[-1]
return str(label)

def gen_prefix_bindings(self):
count = 0
subs = []
for s, p, o in self.G.triples((None, RDF.type, None)):
if type(s) != BNode:
subs.append(s)

for pred in subs:
if pred.replace(self.parse_uri(pred), "") not in self.names.values():
count = count + 1
self.names["ns" + str(count)] = pred.replace(self.parse_uri(pred), "")
subs = list(set(subs))
for pref, uri in self.names.items():
for s in subs:
if uri == s.replace(self.parse_uri(s), ""):
self.namespaces.append((pref, uri))
self.namespaces = list(set(self.namespaces))

def sh_label_gen(self, uri):
parsed = uri.replace(self.parse_uri(uri), "")
for cur, pref in self.names.items():
if pref == parsed:
return cur + "_" + self.parse_uri(uri)

def uri_validator(self, x):
try:
result = urlparse(x)
return all([result.scheme, result.netloc])
except:
return False
for prefix, namespace in json.load(fin).items():
self.namespaces.bind(prefix, namespace)

def extract_props(self):
properties = []
Expand All @@ -105,16 +72,6 @@ def extract_props(self):
for types in property_types:
for s, p, o in self.G.triples((None, RDF.type, types)):
properties.append(s)
#
# for s,p,o in self.G.triples((None,RDF.type,OWL.ObjectProperty)):
# properties.append(s)
# for s,p,o in self.G.triples((None,RDF.type,OWL.AnnotationProperty)):
# properties.append(s)
# for s,p,o in self.G.triples((None,RDF.type,OWL.TransitiveProperty)):
# properties.append(s)
#
# for s,p,o in self.G.triples((None,RDF.type,RDF.Property)):
# properties.append(s)

for p in sorted(properties):
self.PROPS[p] = {}
Expand All @@ -136,10 +93,6 @@ def extract_props(self):
self.PROPS[prop]["definition"] = None
self.PROPS[prop]["type"] = []

# for domain in self.G.objects(subject=s, predicate=RDFS.domain):
# if type(domain) != BNode:
# self.PROPS[prop]['domain'] = domain

for obje in self.G.objects(subject=prop, predicate=RDF.type):
self.PROPS[prop]["type"].append(obje)
for sub, pred, ob in self.G.triples((s, RDFS.domain, None)):
Expand Down Expand Up @@ -173,11 +126,10 @@ def extract_props(self):
for name in self.G.objects(subject=s, predicate=RDFS.label):

self.PROPS[prop]["shape_name"] = name
if self.PROPS[prop]["shape_name"] == None:
if self.PROPS[prop]["shape_name"] is None:
self.PROPS[prop]["shape_name"] = self.sh_label_gen(prop)

def extract_classes(self):
self.gen_prefix_bindings()
classes = []
for s, p, o in self.G.triples((None, RDF.type, OWL.Class)):
if type(s) != BNode:
Expand All @@ -198,7 +150,7 @@ def extract_classes(self):
s = URIRef(c)
for name in self.G.objects(subject=s, predicate=RDFS.label):
self.CLASSES[c]["shape_name"] = name
if self.CLASSES[c]["shape_name"] == None:
if self.CLASSES[c]["shape_name"] is None:
self.CLASSES[c]["shape_name"] = self.sh_label_gen(c)
for defin in self.G.objects(subject=s, predicate=RDFS.comment):
self.CLASSES[c]["definition"] = defin
Expand Down Expand Up @@ -255,23 +207,13 @@ def extract_restrictions(self):
self.REST[rest]["value"] = rest_val[0]

def gen_graph(self, namespace=None, implicit_class_target=False):
self.gen_prefix_bindings()
self.extract_props()
self.extract_classes()
self.extract_restrictions()
ng = Graph()

for prefix, namespace in self.G.namespace_manager.namespaces():
ng.bind(prefix, namespace)

SH = Namespace("http://www.w3.org/ns/shacl#")
ng.bind("sh", SH)

EX = Namespace("http://www.example.org/")
ng.bind("ex", EX)
ng = Graph(namespace_manager=self.namespaces)

if namespace != None:
if self.uri_validator(namespace[0]) != False:
if namespace is not None:
if self.uri_validator(namespace[0]):
uri = namespace[0]
if namespace[0][-1] not in ["#", "/", "\\"]:
uri = namespace[0] + "/"
Expand All @@ -285,7 +227,7 @@ def gen_graph(self, namespace=None, implicit_class_target=False):
EX = Namespace("http://www.example.org/")
ng.bind("ex", EX)

# add class Node Shapes
# add class Node Shapes
for c in self.CLASSES.keys():
subject = c
clabel = self.CLASSES[c]["label"]
Expand All @@ -300,7 +242,7 @@ def gen_graph(self, namespace=None, implicit_class_target=False):
ng.add(t)

ng.add((subject, RDF.type, SH.NodeShape))
# ng.add((EX[clabel], SH.name, Literal(self.CLASSES[c]['shape_name']+' Node shape')))
# ng.add((EX[clabel], SH.name, Literal(self.CLASSES[c]['shape_name']+' Node shape')))
ng.add((subject, SH.nodeKind, SH.BlankNodeOrIRI))
if self.CLASSES[c]["definition"] is not None:
ng.add(
Expand All @@ -309,7 +251,7 @@ def gen_graph(self, namespace=None, implicit_class_target=False):

for p in self.PROPS.keys():
label = self.PROPS[p]["label"]
# ng.add((EX[label], SH.name, Literal(str(self.PROPS[p]['shape_name']) +' Property shape')))
# ng.add((EX[label], SH.name, Literal(str(self.PROPS[p]['shape_name']) +' Property shape')))
# copy rdfs:label as property shape names
for o in self.G.objects(subject=p, predicate=RDFS.label):
ng.add((EX[label], SH.name, o))
Expand Down Expand Up @@ -343,10 +285,10 @@ def gen_graph(self, namespace=None, implicit_class_target=False):
for x in self.PROPS[p]["e_prop"]:
ng.add((EX[label], SH.equals, x))

## create range unions using sh:or
# create range unions using sh:or
if self.PROPS[p]["range_union"] is not None:
rang = self.PROPS[p]["range_union"]
if set(rang).issubset(self.datatypes) == True:
if set(rang).issubset(self.datatypes):

st = BNode(label + str(0) + "a")
ng.add((EX[label], EX["or"], st))
Expand Down Expand Up @@ -418,7 +360,7 @@ def gen_graph(self, namespace=None, implicit_class_target=False):
if self.PROPS[p]["domain"] is not None:
subject = self.PROPS[p]["domain"]
if subject in self.CLASSES.keys():
plabel = self.PROPS[p]["label"] #
plabel = self.PROPS[p]["label"]
if implicit_class_target:
ng.add((subject, SH.property, EX[plabel]))
else:
Expand All @@ -428,7 +370,7 @@ def gen_graph(self, namespace=None, implicit_class_target=False):
if self.PROPS[p]["domain_union"] is not None:
for d in self.PROPS[p]["domain_union"]:
if d in self.CLASSES.keys():
plabel = self.PROPS[p]["label"] #
plabel = self.PROPS[p]["label"]

if implicit_class_target:
ng.add((d, SH.property, EX[plabel]))
Expand All @@ -439,7 +381,7 @@ def gen_graph(self, namespace=None, implicit_class_target=False):
for r in self.REST.keys():
blank = BNode()

# if self.REST[r]['onProp'] == p: #and self.REST[r]['onClass'] == self.PROPS[p]['domain']:
# if self.REST[r]['onProp'] == p: #and self.REST[r]['onClass'] == self.PROPS[p]['domain']:

ng.add((EX[self.sh_label_gen(self.REST[r]["onClass"])], SH.property, blank))
ng.add((blank, SH.path, self.REST[r]["onProp"]))
Expand Down Expand Up @@ -537,8 +479,6 @@ def gen_graph(self, namespace=None, implicit_class_target=False):
BNode(dummy + str(x + 1) + "a"),
)
)
#
#
elif type(self.REST[r]["value"]) in self.datatypes:
ng.add((blank, SH["datatype"], self.REST[r]["value"]))
else:
Expand Down
15 changes: 2 additions & 13 deletions shaclgen/shaclgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
import collections
from rdflib.namespace import XSD, RDF, SH
from rdflib.namespace import NamespaceManager
from urllib.parse import urlparse
import pkg_resources
from .generator import Generator


class data_graph:
class data_graph(Generator):
def __init__(self, graph: Graph, prefixes=None):
self.G = graph

Expand All @@ -31,17 +31,6 @@ def __init__(self, graph: Graph, prefixes=None):
for prefix, namespace in json.load(fin).items():
self.namespaces.bind(prefix, namespace)

def sh_label_gen(self, uri):
prefix, namespace, name = self.namespaces.compute_qname(uri)
return prefix + "_" + name

def uri_validator(self, x):
try:
result = urlparse(x)
return all([result.scheme, result.netloc])
except Exception:
return False

def extract_classes(self):
types_query = "select distinct ?class_ { ?s rdf:type ?class_ }"
for row in self.G.query(types_query, initNs={"rdf": RDF}):
Expand Down
32 changes: 32 additions & 0 deletions tests/test_schema_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from rdflib import Graph
from shaclgen.schema import schema
from helpers import assertAskQuery


def test_namespace():
source_graph = Graph()

data = """
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
<urn:test:Class> a rdfs:Class .
"""

source_graph.parse(data=data, format="turtle")

extraction_graph = schema(source_graph)
shacl_graph = extraction_graph.gen_graph(
namespace=("http://custom.namespace.org/", "custom")
)

assertAskQuery(
shacl_graph,
"""
prefix sh: <http://www.w3.org/ns/shacl#>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
ask {
?nodeShape a sh:NodeShape .
filter regex(str(?nodeShape), "^http://custom.namespace.org/")
}
""",
)

0 comments on commit a5d1d2b

Please sign in to comment.