Skip to content

Commit

Permalink
add schema module
Browse files Browse the repository at this point in the history
  • Loading branch information
alexiskeely committed Dec 1, 2019
1 parent f449625 commit 3fb4f21
Show file tree
Hide file tree
Showing 6 changed files with 268 additions and 203 deletions.
68 changes: 56 additions & 12 deletions README.rst
Original file line number Diff line number Diff line change
@@ -1,25 +1,69 @@
shaclgen
========
SHACLGEN
===============

shaclgen generates shacl templates based on the properties and classes
present in a graph. This module uses the rdflib library for working with
rdf.
Shaclgen takes either a data graph(s) or schema(s) as input and generates a basic shape file based on the classes and properties present.

From the command line:
~~~~~~~~~~~~~~~~~~~~~~
**Shape files from data graphs:**
By default, the input graph is processed as a data graph (instance triples). Three formats are possible for data graphs: simple, nested, and extended.

- Simple: Each class and property generate individual Node- and PropertyShapes.

- Nested: Property shapes will be nested in nodeshapes iif they occur with one class.

- Extended: Expands nested shapes to create individual property shapes for each property, in addition to nesting them when appropriate.

**Shape files from ontologies:**
If the input is a schema or ontology, shaclgen generates a nested shape file: properties with rdfs:domain defined in the ontology will be nested within the appropriate NodeShape. rdfs:range definitions for XML and rdfs datatypes are included.

Added support for OWL constructions is planned.

***************




Installation
***************
Using pip:
::

pip install shaclgen

From source:

https://github.com/alexiskeely/shaclgen


Command line use:
*****************
::

$ shaclgen [uri to data] [serialization]
$ shaclgen graph [optional arguments]

Example usage:
::

Supported serializations include: - ``ttl`` for turtle - ``xml`` for
rdf/xml - ``nt`` for ntriples
$ shaclgen https://www.lib.washington.edu/static/public/cams/data/datasets/uwSemWebParts/webResource-1-0-0.nt

example:


Command line arguments:
::

$ shaclgen https://www.lib.washington.edu/static/public/cams/data/datasets/uwSemWebParts/aggregation-1-0-0.ttl ttl
positional arguments:
graph The data graph(s).

::

optional arguments:
-h, --help show this help message and exit
-nf, --nested generates a nested shape file
-ef, --extended generates an expanded shape file
-o, --ontology input file(s) or URL(s) is a schema or ontology
-s SERIALIZATION, --serialization SERIALIZATION
result graph serialization, default is turtle

***************

This project is still in development. Comments, questions, and issues
are welcome!
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

setup(
name = 'shaclgen',
version = '0.1.3',
version = '0.1.4',
packages = ['shaclgen'],
description='Shacl graph generator',
long_description=l_description,
Expand Down
7 changes: 7 additions & 0 deletions shaclgen/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-
"""
Created on Sat Nov 30 13:30:18 2019
@author: alexi
"""

75 changes: 53 additions & 22 deletions shaclgen/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,69 @@
#!/usr/bin/env python

#%%
from .shaclgen import generate_groups, generate_triples, generate_shacl,generate_merged
from .shaclgen import data_graph
from .schema import schema

import argparse
from argparse import RawDescriptionHelpFormatter
from argparse import RawTextHelpFormatter

parser = argparse.ArgumentParser(description="""
Shacl file generator.
Shaclgen will create a simple shape file by default:
every class and property will get their own shape.
Nested and extended shape files are possible.""")
parser = argparse.ArgumentParser(
formatter_class=RawDescriptionHelpFormatter,
description=("""
---------------------------Shaclgen------------------------------------------
Shaclgen takes either a data graph(s) or schema(s) as input and generates
a basic shape file based on the classes and properties present.
Shape files from data graphs:
By default, the input graph is processed as a data graph (instance triples).
Three formats are possible for data grapghs: simple, nested, and extended.
Simple: Each class and property generate individual Node- and PropertyShapes
Nested: Property shapes will be nested in nodeshapes iif
they occur with one class.
Extended: Expands nested shapes to create individual property shapes for each
property, in addition to nesting them when appropriate.
Shape files from ontologies:
If the input is a schema or ontology (-o), shaclgen will generate
a nested shape file: properties with rdfs:domain defined in the ontology
will be nested within the appropriate NodeShape. rdfs:range definitions
for XML and rdfs datatypes are included."""))

parser.add_argument("graph", nargs='+',type=str, help="the data graph(s)")
parser.add_argument("serialization", type=str,help="the data graph rdf serialization")
parser.add_argument("graph", nargs='+',type=str, help="The data graph(s).")

group = parser.add_mutually_exclusive_group()
group.add_argument("-nf", "--nested", action="store_true", help='Property shapes will be nested in nodeshapes iif they occur with one class.')
group.add_argument("-ef", "--extended", action="store_true", help='Expands nested shapes to create individual property shapes for each property, in addition to nesting them when appropriate.')
group.add_argument("-nf", "--nested", action="store_true", help='generates a nested shape file')
group.add_argument("-ef", "--extended", action="store_true", help='generates an expanded shape file')
parser.add_argument("-o", "--ontology", action="store_true", help='input file(s) or URL(s) is a schema or ontology')
parser.add_argument("-s", "--serialization", help='result graph serialization, default is turtle')

args = parser.parse_args()
#
#print(args.graph[0])
#print(args.serialization)


def main():
output = generate_groups(args.graph, args.serialization)
if args.nested:
triples = generate_triples(output, 'nf')
elif args.extended:
triples = generate_triples(output, 'ef')
if args.ontology:
g = schema(args.graph)
if args.serialization:
print('...generating schema shape file...\n')
g.gen_graph(args.serialization)
else:
print('...generating schema shape file...\n')
g.gen_graph('turtle')
else:
triples = generate_triples(output, 'sf')
kwargs = {'serial': 'turtle'}
g = data_graph(args.graph)
if args.nested:
kwargs['graph_format'] = 'nf'
elif args.extended:
kwargs['graph_format'] = 'ef'
if args.serialization:
kwargs['serial'] = args.serialization
print('...generating data shape file...\n')
g.gen_graph(**kwargs)

graph = generate_shacl(triples)
print(graph)
#
if __name__ == '__main__':
main()

40 changes: 21 additions & 19 deletions shaclgen/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
import collections

class schema():
def __init__(self, graph=None,):
def __init__(self, args:list):
self.G = Graph()
self.G.load(graph,format=guess_format(graph))
for graph in args:
self.G.parse(graph,format=guess_format(graph))
self.CLASSES = collections.OrderedDict()
self.PROPS = collections.OrderedDict()
self.REST = collections.OrderedDict()
Expand Down Expand Up @@ -39,7 +40,9 @@ def extract_props(self):


#gather property values
count = 0
for prop in self.PROPS.keys():
count = count +1
s = URIRef(prop)
self.PROPS[prop]['domain']= None
self.PROPS[prop]['range']= None
Expand All @@ -57,7 +60,7 @@ def extract_props(self):
self.PROPS[prop]['e_prop'] = o

for o in self.G.objects(subject=s, predicate=RDFS.label):
self.PROPS[prop]['label'] = o
self.PROPS[prop]['label'] = self.gen_shape_labels(prop)+str(count)



Expand All @@ -75,11 +78,12 @@ def extract_classes(self):
classes.append(s)
else:
pass

count = 0
for c in sorted(classes):
self.CLASSES[c] = {}
for c in self.CLASSES.keys():
self.CLASSES[c]['label'] = self.gen_shape_labels(c)
for c in self.CLASSES.keys():
count = count +1
self.CLASSES[c]['label'] = self.gen_shape_labels(c)+str(count)

def extract_restrictions(self):
# does not handle nested restrictions within other class descriptions
Expand Down Expand Up @@ -132,29 +136,30 @@ def gen_shape_labels(self, URI):
label = URI.split("#")[-1]
else:
label = URI.split("/")[-1]
return label
return label+'_'

def gen_graph(self):
def gen_graph(self, serial='turtle'):

self.extract_props()
self.extract_classes()
self.extract_restrictions()
ng = Graph()
SH = Namespace('http://www.w3.org/ns/shacl#')
ng.bind('SH', SH)
ng.bind('sh', SH)

EX = Namespace('http://www.example.org/')
ng.bind('EX', EX)
ng.bind('ex', EX)

# add class Node Shapes
for c in self.CLASSES.keys():
label = self.gen_shape_labels(c)+'_ClassShape'
label = self.CLASSES[c]['label']
ng.add((EX[label], RDF.type, SH.NodeShape))
ng.add((EX[label], SH.targetClass, c))
for p in self.PROPS.keys():
if self.PROPS[p]['domain'] is not None:
blank = BNode()
if self.PROPS[p]['domain'] in self.CLASSES.keys():
label = self.gen_shape_labels(self.PROPS[p]['domain'])+'_ClassShape'
label = self.CLASSES[self.PROPS[p]['domain']]['label']
ng.add((EX[label], SH.property, blank))
ng.add((blank, SH.path, p))
if self.PROPS[p]['range'] is not None:
Expand All @@ -177,7 +182,7 @@ def gen_graph(self):
else:
pass
else:
label = self.gen_shape_labels(self.PROPS[p])+'_PropShape'
label = self.PROPS[p]['label']
ng.add((EX[label], RDF.type, SH.NodeShape))
ng.add((EX[label], SH.targetSubjectsOf, p))
ng.add((EX[label], SH.nodeKind, SH.BlankNodeOrIRI))
Expand All @@ -191,7 +196,7 @@ def gen_graph(self):
ng.add((blank, SH['class'], rang ))
else:
blank = BNode()
label = self.gen_shape_labels(p)+'_PropShape'
label = self.PROPS[p]['label']
ng.add((EX[label], RDF.type, SH.NodeShape))
ng.add((EX[label], SH.targetSubjectsOf, p))
ng.add((EX[label], SH.nodeKind, SH.BlankNodeOrIRI))
Expand All @@ -204,11 +209,8 @@ def gen_graph(self):
else:
ng.add((blank, SH['class'], rang ))

print(ng.serialize(format='turtle').decode())
return ng
print(ng.serialize(format=serial).decode())



def save_graph(self, path):
ng = self.gen_graph()
ng.serialize(path, format='turtle')

Loading

0 comments on commit 3fb4f21

Please sign in to comment.