Skip to content

Commit

Permalink
Merge pull request #400 from mpsonntag/rdf_subclass
Browse files Browse the repository at this point in the history
RDF Subclassing feature

LGTM
  • Loading branch information
achilleas-k authored Jul 16, 2020
2 parents 7a1ca1f + 41d72b0 commit c03d5cf
Show file tree
Hide file tree
Showing 5 changed files with 308 additions and 9 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ install:
export PIPCMD=pip;
fi;

- $PIPCMD install lxml enum34 pyyaml rdflib
- $PIPCMD install lxml enum34 pyyaml rdflib owlrl requests

script:
- which $PYCMD
Expand Down
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ init:
build: false

install:
- python -m pip install lxml enum34 pyyaml rdflib
- python -m pip install lxml enum34 pyyaml rdflib owlrl requests

test_script:
- python --version
Expand Down
69 changes: 64 additions & 5 deletions odml/tools/rdf_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
"""

import os
import string
import uuid
import warnings

from io import StringIO
from rdflib import Graph, Literal, URIRef
from rdflib.graph import Seq
from rdflib.namespace import XSD, RDF
from rdflib.namespace import XSD, RDF, RDFS

import yaml

Expand Down Expand Up @@ -57,14 +59,32 @@ class RDFWriter(object):
"""
A writer to parse odML files into RDF documents.
Use the 'rdf_subclassing' flag to disable default usage of Section type conversion to
RDF Subclasses.
Provide a custom Section type to RDF Subclass Name mapping dictionary via the
'custom_subclasses' attribute to add custom or overwrite default RDF Subclass mappings.
Usage:
RDFWriter(odml_docs).get_rdf_str('turtle')
RDFWriter(odml_docs).write_file("/output_path", "rdf_format")
RDFWriter(odml_docs, rdf_subclassing=False).write_file("path", "rdf_format")
RDFWriter(odml_docs, custom_subclasses=custom_dict).write_file("path", "rdf_format")
"""

def __init__(self, odml_documents):
def __init__(self, odml_documents, rdf_subclassing=True, custom_subclasses=None):
"""
:param odml_documents: list of odML documents
:param rdf_subclassing: Flag whether Section types should be converted to RDF Subclasses
for enhanced SPARQL queries. Default is 'True'.
:param custom_subclasses: A dict where the keys reference a Section type and the
corresponding values reference an RDF Class Name. When exporting
a Section of a type contained in this dict, the resulting RDF
Instance will be of the corresponding Class and this Class will
be added as a Subclass of RDF Class "odml:Section" to the
RDF document.
Key:value pairs of the "custom_subclasses" dict will overwrite
existing key:value pairs of the default subclassing dict.
"""
if not isinstance(odml_documents, list):
odml_documents = [odml_documents]
Expand All @@ -74,7 +94,13 @@ def __init__(self, odml_documents):
self.graph = Graph()
self.graph.bind("odml", ODML_NS)

self.rdf_subclassing = rdf_subclassing

self.section_subclasses = load_rdf_subclasses()
# If a custom Section type to RDF Subclass dict has been provided,
# parse it and update the default section_subclasses dict with the content.
if custom_subclasses and isinstance(custom_subclasses, dict):
self._parse_custom_subclasses(custom_subclasses)

def convert_to_rdf(self):
"""
Expand Down Expand Up @@ -221,10 +247,16 @@ def save_section(self, sec, curr_node):

# Add type of current node to the RDF graph
curr_type = fmt.rdf_type

# Handle section subclass types
sub_sec = self._get_section_subclass(sec)
if sub_sec:
curr_type = sub_sec
if self.rdf_subclassing:
sub_sec = self._get_section_subclass(sec)
if sub_sec:
curr_type = sub_sec
self.graph.add((URIRef(fmt.rdf_type), RDF.type, RDFS.Class))
self.graph.add((URIRef(curr_type), RDF.type, RDFS.Class))
self.graph.add((URIRef(curr_type), RDFS.subClassOf, URIRef(fmt.rdf_type)))

self.graph.add((curr_node, RDF.type, URIRef(curr_type)))

for k in fmt.rdf_map_keys:
Expand Down Expand Up @@ -294,6 +326,33 @@ class Section.

return None

def _parse_custom_subclasses(self, custom_subclasses):
"""
Parses a provided dictionary of "Section type": "RDF Subclass name"
key value pairs and adds the pairs to the parsers' 'section_subclasses'
default dictionary. Existing key:value pairs will be overwritten
with provided custom key:value pairs and a Warning will be issued.
Dictionary values containing whitespaces will raise a ValueError.
:param custom_subclasses: dictionary of "Section type": "RDF Subclass name" key value pairs.
Values must not contain whitespaces, a ValueError will be raised
otherwise.
"""

# Do not allow any whitespace characters in values
vals = "".join(custom_subclasses.values()).encode()
if vals != vals.translate(None, string.whitespace.encode()):
msg = "Custom RDF Subclass names must not contain any whitespace characters."
raise ValueError(msg)

for k in custom_subclasses:
val = custom_subclasses[k]
if k in self.section_subclasses:
msg = "RDFWriter custom subclasses: Key '%s' already exists. " % k
msg += "Value '%s' replaces default value '%s'." % (val, self.section_subclasses[k])
warnings.warn(msg, stacklevel=2)
self.section_subclasses[k] = val

def __str__(self):
return self.convert_to_rdf().serialize(format='turtle').decode("utf-8")

Expand Down
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@

install_req = ["lxml", "pyyaml>=5.1", "rdflib", "docopt", "pathlib"]

tests_req = ["owlrl", "requests"]

if sys.version_info < (3, 4):
install_req += ["enum34"]

Expand All @@ -45,6 +47,7 @@
packages=packages,
test_suite='test',
install_requires=install_req,
tests_require=tests_req,
include_package_data=True,
long_description=description_text,
long_description_content_type="text/markdown",
Expand Down
Loading

0 comments on commit c03d5cf

Please sign in to comment.