From 5456eb651d75a0855eb3e83d201e7f8b825c8be3 Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Thu, 9 Jun 2022 17:35:41 +0200 Subject: [PATCH 1/7] Add test for schema generation --- tests/test_schema_interface.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 tests/test_schema_interface.py diff --git a/tests/test_schema_interface.py b/tests/test_schema_interface.py new file mode 100644 index 0000000..8b17421 --- /dev/null +++ b/tests/test_schema_interface.py @@ -0,0 +1,32 @@ +from rdflib import Graph +from shaclgen.schema import schema +from helpers import assertAskQuery + + +def test_namespace(): + source_graph = Graph() + + data = """ + @prefix rdfs: . + a rdfs:Class . + """ + + source_graph.parse(data=data, format="turtle") + + extraction_graph = schema(source_graph) + shacl_graph = extraction_graph.gen_graph( + namespace=("http://custom.namespace.org/", "custom") + ) + + assertAskQuery( + shacl_graph, + """ + prefix sh: + prefix xsd: + ask { + ?nodeShape a sh:NodeShape . + + filter regex(str(?nodeShape), "^http://custom.namespace.org/") + } + """, + ) From c01b424d1e15db17356fc7cc808bc8603e7bccda Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Thu, 9 Jun 2022 17:38:03 +0200 Subject: [PATCH 2/7] Some linter hints --- shaclgen/schema.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/shaclgen/schema.py b/shaclgen/schema.py index 2807bca..ed32dd2 100644 --- a/shaclgen/schema.py +++ b/shaclgen/schema.py @@ -84,7 +84,7 @@ def uri_validator(self, x): try: result = urlparse(x) return all([result.scheme, result.netloc]) - except: + except Exception: return False def extract_props(self): @@ -173,7 +173,7 @@ def extract_props(self): for name in self.G.objects(subject=s, predicate=RDFS.label): self.PROPS[prop]["shape_name"] = name - if self.PROPS[prop]["shape_name"] == None: + if self.PROPS[prop]["shape_name"] is None: self.PROPS[prop]["shape_name"] = self.sh_label_gen(prop) def extract_classes(self): @@ -198,7 +198,7 @@ def extract_classes(self): s = URIRef(c) for name in self.G.objects(subject=s, predicate=RDFS.label): self.CLASSES[c]["shape_name"] = name - if self.CLASSES[c]["shape_name"] == None: + if self.CLASSES[c]["shape_name"] is None: self.CLASSES[c]["shape_name"] = self.sh_label_gen(c) for defin in self.G.objects(subject=s, predicate=RDFS.comment): self.CLASSES[c]["definition"] = defin @@ -270,8 +270,8 @@ def gen_graph(self, namespace=None, implicit_class_target=False): EX = Namespace("http://www.example.org/") ng.bind("ex", EX) - if namespace != None: - if self.uri_validator(namespace[0]) != False: + if namespace is not None: + if self.uri_validator(namespace[0]): uri = namespace[0] if namespace[0][-1] not in ["#", "/", "\\"]: uri = namespace[0] + "/" @@ -346,7 +346,7 @@ def gen_graph(self, namespace=None, implicit_class_target=False): ## create range unions using sh:or if self.PROPS[p]["range_union"] is not None: rang = self.PROPS[p]["range_union"] - if set(rang).issubset(self.datatypes) == True: + if set(rang).issubset(self.datatypes): st = BNode(label + str(0) + "a") ng.add((EX[label], EX["or"], st)) From b8e4c71081d35ee2c155d016aaff1ee753aedf03 Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Thu, 9 Jun 2022 17:44:22 +0200 Subject: [PATCH 3/7] clean up some comments --- shaclgen/schema.py | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/shaclgen/schema.py b/shaclgen/schema.py index ed32dd2..fbf75b5 100644 --- a/shaclgen/schema.py +++ b/shaclgen/schema.py @@ -105,16 +105,6 @@ def extract_props(self): for types in property_types: for s, p, o in self.G.triples((None, RDF.type, types)): properties.append(s) - # - # for s,p,o in self.G.triples((None,RDF.type,OWL.ObjectProperty)): - # properties.append(s) - # for s,p,o in self.G.triples((None,RDF.type,OWL.AnnotationProperty)): - # properties.append(s) - # for s,p,o in self.G.triples((None,RDF.type,OWL.TransitiveProperty)): - # properties.append(s) - # - # for s,p,o in self.G.triples((None,RDF.type,RDF.Property)): - # properties.append(s) for p in sorted(properties): self.PROPS[p] = {} @@ -136,10 +126,6 @@ def extract_props(self): self.PROPS[prop]["definition"] = None self.PROPS[prop]["type"] = [] - # for domain in self.G.objects(subject=s, predicate=RDFS.domain): - # if type(domain) != BNode: - # self.PROPS[prop]['domain'] = domain - for obje in self.G.objects(subject=prop, predicate=RDF.type): self.PROPS[prop]["type"].append(obje) for sub, pred, ob in self.G.triples((s, RDFS.domain, None)): @@ -285,7 +271,7 @@ def gen_graph(self, namespace=None, implicit_class_target=False): EX = Namespace("http://www.example.org/") ng.bind("ex", EX) - # add class Node Shapes + # add class Node Shapes for c in self.CLASSES.keys(): subject = c clabel = self.CLASSES[c]["label"] @@ -300,7 +286,7 @@ def gen_graph(self, namespace=None, implicit_class_target=False): ng.add(t) ng.add((subject, RDF.type, SH.NodeShape)) - # ng.add((EX[clabel], SH.name, Literal(self.CLASSES[c]['shape_name']+' Node shape'))) + # ng.add((EX[clabel], SH.name, Literal(self.CLASSES[c]['shape_name']+' Node shape'))) ng.add((subject, SH.nodeKind, SH.BlankNodeOrIRI)) if self.CLASSES[c]["definition"] is not None: ng.add( @@ -309,7 +295,7 @@ def gen_graph(self, namespace=None, implicit_class_target=False): for p in self.PROPS.keys(): label = self.PROPS[p]["label"] - # ng.add((EX[label], SH.name, Literal(str(self.PROPS[p]['shape_name']) +' Property shape'))) + # ng.add((EX[label], SH.name, Literal(str(self.PROPS[p]['shape_name']) +' Property shape'))) # copy rdfs:label as property shape names for o in self.G.objects(subject=p, predicate=RDFS.label): ng.add((EX[label], SH.name, o)) @@ -343,7 +329,7 @@ def gen_graph(self, namespace=None, implicit_class_target=False): for x in self.PROPS[p]["e_prop"]: ng.add((EX[label], SH.equals, x)) - ## create range unions using sh:or + # create range unions using sh:or if self.PROPS[p]["range_union"] is not None: rang = self.PROPS[p]["range_union"] if set(rang).issubset(self.datatypes): @@ -418,7 +404,7 @@ def gen_graph(self, namespace=None, implicit_class_target=False): if self.PROPS[p]["domain"] is not None: subject = self.PROPS[p]["domain"] if subject in self.CLASSES.keys(): - plabel = self.PROPS[p]["label"] # + plabel = self.PROPS[p]["label"] if implicit_class_target: ng.add((subject, SH.property, EX[plabel])) else: @@ -428,7 +414,7 @@ def gen_graph(self, namespace=None, implicit_class_target=False): if self.PROPS[p]["domain_union"] is not None: for d in self.PROPS[p]["domain_union"]: if d in self.CLASSES.keys(): - plabel = self.PROPS[p]["label"] # + plabel = self.PROPS[p]["label"] if implicit_class_target: ng.add((d, SH.property, EX[plabel])) @@ -439,7 +425,7 @@ def gen_graph(self, namespace=None, implicit_class_target=False): for r in self.REST.keys(): blank = BNode() - # if self.REST[r]['onProp'] == p: #and self.REST[r]['onClass'] == self.PROPS[p]['domain']: + # if self.REST[r]['onProp'] == p: #and self.REST[r]['onClass'] == self.PROPS[p]['domain']: ng.add((EX[self.sh_label_gen(self.REST[r]["onClass"])], SH.property, blank)) ng.add((blank, SH.path, self.REST[r]["onProp"])) @@ -537,8 +523,6 @@ def gen_graph(self, namespace=None, implicit_class_target=False): BNode(dummy + str(x + 1) + "a"), ) ) - # - # elif type(self.REST[r]["value"]) in self.datatypes: ng.add((blank, SH["datatype"], self.REST[r]["value"])) else: From dd051f480faf472110df428bc9e4c19765cfb3af Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Thu, 9 Jun 2022 17:45:29 +0200 Subject: [PATCH 4/7] import namespace from rdflib --- shaclgen/schema.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/shaclgen/schema.py b/shaclgen/schema.py index fbf75b5..9ac8ada 100644 --- a/shaclgen/schema.py +++ b/shaclgen/schema.py @@ -1,5 +1,5 @@ -from rdflib import Graph, RDF, RDFS, OWL, Namespace -from rdflib.namespace import XSD +from rdflib import Graph, Namespace +from rdflib.namespace import XSD, RDF, RDFS, OWL, SH from rdflib.term import URIRef, Literal, BNode import collections import json @@ -250,7 +250,6 @@ def gen_graph(self, namespace=None, implicit_class_target=False): for prefix, namespace in self.G.namespace_manager.namespaces(): ng.bind(prefix, namespace) - SH = Namespace("http://www.w3.org/ns/shacl#") ng.bind("sh", SH) EX = Namespace("http://www.example.org/") From fceedb03adac97a9c4d911cdd67c22e8f5aafb71 Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Thu, 9 Jun 2022 17:57:29 +0200 Subject: [PATCH 5/7] reorganize namespace handling in the same way aas for shaclgen --- shaclgen/schema.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/shaclgen/schema.py b/shaclgen/schema.py index 9ac8ada..ba18e45 100644 --- a/shaclgen/schema.py +++ b/shaclgen/schema.py @@ -1,5 +1,6 @@ from rdflib import Graph, Namespace from rdflib.namespace import XSD, RDF, RDFS, OWL, SH +from rdflib.namespace import NamespaceManager from rdflib.term import URIRef, Literal, BNode import collections import json @@ -23,7 +24,6 @@ def __init__(self, graph: Graph, prefixes=None): self.PROPS = collections.OrderedDict() self.REST = collections.OrderedDict() - self.namespaces = [] self.datatypes = [ XSD.string, XSD.boolean, @@ -42,12 +42,17 @@ def __init__(self, graph: Graph, prefixes=None): path = "prefixes/namespaces.json" filepath = pkg_resources.resource_filename(__name__, path) + self.namespaces = NamespaceManager(graph=Graph()) + self.namespaces.bind("sh", SH) + with open(filepath, "r", encoding="utf-8") as fin: - self.names = json.load(fin) + for prefix, namespace in json.load(fin).items(): + self.namespaces.bind(prefix, namespace) if prefixes: with open(prefixes, "r", encoding="utf-8") as fin: - self.names.update(json.load(fin)) + for prefix, namespace in json.load(fin).items(): + self.namespaces.bind(prefix, namespace) def parse_uri(self, URI): if "#" in URI: @@ -245,15 +250,7 @@ def gen_graph(self, namespace=None, implicit_class_target=False): self.extract_props() self.extract_classes() self.extract_restrictions() - ng = Graph() - - for prefix, namespace in self.G.namespace_manager.namespaces(): - ng.bind(prefix, namespace) - - ng.bind("sh", SH) - - EX = Namespace("http://www.example.org/") - ng.bind("ex", EX) + ng = Graph(namespace_manager=self.namespaces) if namespace is not None: if self.uri_validator(namespace[0]): From 34d9280bea02f4585c289439ff2c9b1384fc9b54 Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Thu, 9 Jun 2022 20:05:09 +0200 Subject: [PATCH 6/7] Unify common methods in a Generator base class --- shaclgen/generator.py | 21 +++++++++++++++++++++ shaclgen/schema.py | 43 ++----------------------------------------- shaclgen/shaclgen.py | 14 ++------------ 3 files changed, 25 insertions(+), 53 deletions(-) create mode 100644 shaclgen/generator.py diff --git a/shaclgen/generator.py b/shaclgen/generator.py new file mode 100644 index 0000000..ed66305 --- /dev/null +++ b/shaclgen/generator.py @@ -0,0 +1,21 @@ +from rdflib import Graph +from urllib.parse import urlparse + + +class Generator: + def __init__(self, graph: Graph, prefixes=None): + pass + + def sh_label_gen(self, uri): + prefix, namespace, name = self.namespaces.compute_qname(uri) + return prefix + "_" + name + + def uri_validator(self, x): + try: + result = urlparse(x) + return all([result.scheme, result.netloc]) + except Exception: + return False + + def gen_graph(self, namespace=None, implicit_class_target=False): + pass diff --git a/shaclgen/schema.py b/shaclgen/schema.py index ba18e45..a65c20d 100644 --- a/shaclgen/schema.py +++ b/shaclgen/schema.py @@ -4,9 +4,9 @@ from rdflib.term import URIRef, Literal, BNode import collections import json -from urllib.parse import urlparse from rdflib.collection import Collection import pkg_resources +from .generator import Generator """ current assumptions: @@ -16,7 +16,7 @@ """ -class schema: +class schema(Generator): def __init__(self, graph: Graph, prefixes=None): self.G = graph @@ -54,43 +54,6 @@ def __init__(self, graph: Graph, prefixes=None): for prefix, namespace in json.load(fin).items(): self.namespaces.bind(prefix, namespace) - def parse_uri(self, URI): - if "#" in URI: - label = URI.split("#")[-1] - else: - label = URI.split("/")[-1] - return str(label) - - def gen_prefix_bindings(self): - count = 0 - subs = [] - for s, p, o in self.G.triples((None, RDF.type, None)): - if type(s) != BNode: - subs.append(s) - - for pred in subs: - if pred.replace(self.parse_uri(pred), "") not in self.names.values(): - count = count + 1 - self.names["ns" + str(count)] = pred.replace(self.parse_uri(pred), "") - subs = list(set(subs)) - for pref, uri in self.names.items(): - for s in subs: - if uri == s.replace(self.parse_uri(s), ""): - self.namespaces.append((pref, uri)) - self.namespaces = list(set(self.namespaces)) - - def sh_label_gen(self, uri): - parsed = uri.replace(self.parse_uri(uri), "") - for cur, pref in self.names.items(): - if pref == parsed: - return cur + "_" + self.parse_uri(uri) - - def uri_validator(self, x): - try: - result = urlparse(x) - return all([result.scheme, result.netloc]) - except Exception: - return False def extract_props(self): properties = [] @@ -168,7 +131,6 @@ def extract_props(self): self.PROPS[prop]["shape_name"] = self.sh_label_gen(prop) def extract_classes(self): - self.gen_prefix_bindings() classes = [] for s, p, o in self.G.triples((None, RDF.type, OWL.Class)): if type(s) != BNode: @@ -246,7 +208,6 @@ def extract_restrictions(self): self.REST[rest]["value"] = rest_val[0] def gen_graph(self, namespace=None, implicit_class_target=False): - self.gen_prefix_bindings() self.extract_props() self.extract_classes() self.extract_restrictions() diff --git a/shaclgen/shaclgen.py b/shaclgen/shaclgen.py index 94a05db..211e96b 100644 --- a/shaclgen/shaclgen.py +++ b/shaclgen/shaclgen.py @@ -4,11 +4,11 @@ import collections from rdflib.namespace import XSD, RDF, SH from rdflib.namespace import NamespaceManager -from urllib.parse import urlparse import pkg_resources +from .generator import Generator -class data_graph: +class data_graph(Generator): def __init__(self, graph: Graph, prefixes=None): self.G = graph @@ -31,16 +31,6 @@ def __init__(self, graph: Graph, prefixes=None): for prefix, namespace in json.load(fin).items(): self.namespaces.bind(prefix, namespace) - def sh_label_gen(self, uri): - prefix, namespace, name = self.namespaces.compute_qname(uri) - return prefix + "_" + name - - def uri_validator(self, x): - try: - result = urlparse(x) - return all([result.scheme, result.netloc]) - except Exception: - return False def extract_classes(self): types_query = "select distinct ?class_ { ?s rdf:type ?class_ }" From 821e12d7d776aa3c984f27630fad68ae876fea53 Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Thu, 9 Jun 2022 20:07:10 +0200 Subject: [PATCH 7/7] Code formating --- shaclgen/schema.py | 1 - shaclgen/shaclgen.py | 1 - 2 files changed, 2 deletions(-) diff --git a/shaclgen/schema.py b/shaclgen/schema.py index a65c20d..6025c05 100644 --- a/shaclgen/schema.py +++ b/shaclgen/schema.py @@ -54,7 +54,6 @@ def __init__(self, graph: Graph, prefixes=None): for prefix, namespace in json.load(fin).items(): self.namespaces.bind(prefix, namespace) - def extract_props(self): properties = [] self.PROPS = {} diff --git a/shaclgen/shaclgen.py b/shaclgen/shaclgen.py index 211e96b..5b43e70 100644 --- a/shaclgen/shaclgen.py +++ b/shaclgen/shaclgen.py @@ -31,7 +31,6 @@ def __init__(self, graph: Graph, prefixes=None): for prefix, namespace in json.load(fin).items(): self.namespaces.bind(prefix, namespace) - def extract_classes(self): types_query = "select distinct ?class_ { ?s rdf:type ?class_ }" for row in self.G.query(types_query, initNs={"rdf": RDF}):