Skip to content

Commit

Permalink
update the >>update<< function to reuse BlankNode labels to refer to …
Browse files Browse the repository at this point in the history
…the same BlankNode
  • Loading branch information
Simaris committed Feb 10, 2021
1 parent d5da755 commit 6bc4b6e
Show file tree
Hide file tree
Showing 2 changed files with 139 additions and 10 deletions.
116 changes: 106 additions & 10 deletions quit/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
from pygit2 import GIT_MERGE_ANALYSIS_NORMAL
from pygit2 import GIT_SORT_REVERSE, GIT_RESET_HARD, GIT_STATUS_CURRENT

import rdflib
from rdflib import Graph, ConjunctiveGraph, BNode, Literal, URIRef
import rdflib.plugins.parsers.ntriples as ntriples

import re

from quit.conf import Feature, QuitGraphConfiguration
Expand Down Expand Up @@ -189,7 +192,12 @@ def instance(self, reference, force=False):
for blob in self.getFilesForCommit(commit):
try:
(name, oid) = blob
(f, context) = self.getFileReferenceAndContext(blob, commit)
result = self.getFileReferenceAndContext(blob, commit)
try:
(f, context, nameMap) = result
except ValueError:
print(result)

internal_identifier = context.identifier + '-' + str(oid)

if force or not self.config.hasFeature(Feature.Persistence):
Expand Down Expand Up @@ -330,13 +338,15 @@ def changeset(self, commit):
blob = (entity.name, entity.oid)

try:
f, context = self.getFileReferenceAndContext(blob, commit)
f, context, nameMap = self.getFileReferenceAndContext(blob, commit)
except KeyError:
graph = Graph(identifier=graphUri)
graph.parse(data=entity.content, format='nt')
parserGraph = ntriples.W3CNTriplesParser(ntriples.NTGraphSink(graph))
source = rdflib.parser.create_input_source(data=entity.content)
parserGraph.parse(source.getCharacterStream())

self._blobs.set(
blob, (FileReference(entity.name, entity.content), graph)
blob, (FileReference(entity.name, entity.content), graph, {})
)

private_uri = QUIT["graph-{}".format(entity.oid)]
Expand Down Expand Up @@ -413,17 +423,74 @@ def getFileReferenceAndContext(self, blob, commit):
content = commit.node(path=name).content
graphUri = self._graphconfigs.get(commit.id).getgraphuriforfile(name)
graph = Graph(identifier=URIRef(graphUri))
graph.parse(data=content, format='nt')
quitWorkingData = (FileReference(name, content), graph)
parserGraph = ntriples.W3CNTriplesParser(ntriples.NTGraphSink(graph))
source = rdflib.parser.create_input_source(data=content)
parserGraph.parse(source.getCharacterStream())
nameMap = {v: k for k, v in parserGraph._bnode_ids.items()}
quitWorkingData = (FileReference(name, content), graph, nameMap)
self._blobs.set(blob, quitWorkingData)
return quitWorkingData
return self._blobs.get(blob)

def _replaceLabledBlankNodes(self, parsedQuery, parent_commit_ref):
"""Replaces blanknodes in parsedQuery with Blanknodes that have the same label in the graph.nt
E.g. We have a Graph with the content: '_:a <urn:pred> _:b'
A BNode('a') found in parsedQuery would be replaced by the blanknode _:a found in the graph.nt.
That way, updates can pass Blanknodes as instances and do not have to work on string representations.
"""
def replaceBlankNode(parsedQuery, nameMap):
nameMap = {v: k for k, v in nameMap.items()}
for update in parsedQuery:
for graphURI in update['quads']:
new_triples = []
for triple in update['quads'][graphURI]:
new_triple_subj = None
new_triple_obj = None
if isinstance(triple[0], rdflib.BNode):
bNode_key = triple[0].n3()
bNode_key = bNode_key[2:]
if bNode_key in nameMap:
new_triple_subj = nameMap[bNode_key]
else:
new_triple_subj = triple[0]
nameMap[bNode_key] = triple[0]
else:
new_triple_subj = triple[0]
if isinstance(triple[2], rdflib.BNode):
bNode_key = triple[2].n3()
bNode_key = bNode_key[2:]
if bNode_key in nameMap:
new_triple_obj = nameMap[bNode_key]
else:
new_triple_obj = triple[2]
nameMap[bNode_key] = triple[2]
else:
new_triple_obj = triple[2]
new_triples.append((new_triple_subj, triple[1], new_triple_obj))
update['quads'][graphURI] = new_triples

if parent_commit_ref == None:
return {}
parent_commit = self.repository.revision(parent_commit_ref)
blobs = self.getFilesForCommit(parent_commit)
for blob in blobs:
(name, oid) = blob
if(name == "graph.nt"):
file_reference, context, nameMap = self.getFileReferenceAndContext(
blob, parent_commit)
replaceBlankNode(parsedQuery, nameMap)
return nameMap
return {}

def applyQueryOnCommit(self, parsedQuery, parent_commit_ref, target_ref, query=None,
default_graph=[], named_graph=[]):
"""Apply an update query on the graph and the git repository."""
graph, commitid = self.instance(parent_commit_ref)
triples = {(x.n3(), y.n3(), z.n3()) for x, y, z in graph.store}
nameMap = self._replaceLabledBlankNodes(parsedQuery, parent_commit_ref)
resultingChanges, exception = graph.update(parsedQuery)
self._replaceExplicitNamedBlankNodesInChanges(resultingChanges, nameMap)
triples = {(x.n3(), y.n3(), z.n3()) for x, y, z in graph.store}
if exception:
# TODO need to revert or invalidate the graph at this point.
pass
Expand All @@ -432,6 +499,7 @@ def applyQueryOnCommit(self, parsedQuery, parent_commit_ref, target_ref, query=N
named_graph=named_graph)
if exception:
raise exception
triples = {(x.n3(), y.n3(), z.n3()) for x, y, z in graph.store}
return oid

def commit(self, graph, delta, message, parent_commit_ref, target_ref, query=None,
Expand Down Expand Up @@ -494,7 +562,7 @@ def commit(self, graph, delta, message, parent_commit_ref, target_ref, query=Non

# Update Cache and add new contexts to store
blob = fileReference.path, index.stash[fileReference.path][0]
self._blobs.set(blob, (fileReference, graph.store.get_context(identifier)))
self._blobs.set(blob, (fileReference, graph.store.get_context(identifier), {}))
blobs_new.add(blob)
if graphconfig.mode == 'configuration':
index.add('config.ttl', new_config.graphconf.serialize(format='turtle').decode())
Expand Down Expand Up @@ -541,12 +609,40 @@ def _build_message(self, message, query, result, default_graph, named_graph, **k
out.append('{}: "{}"'.format(k, v.replace('"', "\\\"")))
return "\n".join(out)

def _replaceExplicitNamedBlankNodesInChanges(self, changes, nameMap):
"""Any changes applied to the update query by _replaceLabledBlankNodes have to be reverted for git deltas.
Otherwise the serialization results in Blanknodes being represented as random hashes instead of their original labels.
"""
def lookUpBNode(bNode, nameMap):
if(bNode in nameMap):
return rdflib.BNode(nameMap[bNode])
return bNode

def replaceBNodesByName(triple, nameMap):
new_subject = triple[0]
new_object = triple[2]
if(isinstance(new_subject, BNode)):
new_subject = lookUpBNode(new_subject, nameMap)
if(isinstance(new_object, BNode)):
new_object = lookUpBNode(new_object, nameMap)
return (new_subject, triple[1], new_object)

if len(nameMap) == 0:
return
for change in changes:
for context in change['delta']:
for payload in change['delta'][context]:
if(isinstance(payload[1], list)):
for i in range(0, len(payload[1])):
payload[1][i] = replaceBNodesByName(payload[1][i], nameMap)

def _applyKnownGraphs(self, delta, blobs, parent_commit, index):
blobs_new = set()
for blob in blobs:
(fileName, oid) = blob
try:
file_reference, context = self.getFileReferenceAndContext(blob, parent_commit)
file_reference, context, nameMap = self.getFileReferenceAndContext(
blob, parent_commit)
for entry in delta:
changeset = entry['delta'].get(context.identifier, None)

Expand All @@ -558,7 +654,7 @@ def _applyKnownGraphs(self, delta, blobs, parent_commit, index):

self._blobs.remove(blob)
blob = fileName, index.stash[file_reference.path][0]
self._blobs.set(blob, (file_reference, context))
self._blobs.set(blob, (file_reference, context, nameMap))
blobs_new.add(blob)
except KeyError:
pass
Expand All @@ -580,7 +676,7 @@ def _applyUnknownGraphs(self, delta, known_blobs):
n = [
int(m.group(1)) for b in known_blobs for m in [reg.search(b)] if m
] + [0]
fileName = '{}_{}.nt'.format(iri_to_name(identifier), max(n)+1)
fileName = '{}_{}.nt'.format(iri_to_name(identifier), max(n) + 1)

new_contexts[identifier] = FileReference(fileName, '')

Expand Down
33 changes: 33 additions & 0 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3841,6 +3841,39 @@ def testDeleteWithWhitespaceFile(self):
with open(path.join(repo.workdir, 'graph.nt'), 'r') as f:
self.assertEqual('\n', f.read())

def testUpdateWithBlankNode(self):
# Prepate a git Repository
graphContent = """<urn:x> <urn:y> <urn:z> .
_:a <urn:pred> _:c .
_:c <urn:pred> _:d .
"""
with TemporaryRepositoryFactory().withGraph("http://example.org/", graphContent) as repo:

# Start Quit
args = quitApp.getDefaults()
args['targetdir'] = repo.workdir
app = create_app(args).test_client()

with open(path.join(repo.workdir, 'graph.nt'), 'r') as f:
self.assertEqual(graphContent, f.read())

# execute Update query
update = 'INSERT DATA { GRAPH <http://example.org/> { _:c <urn:pred> _:e .}}'
result = app.post('/sparql',
content_type="application/sparql-update",
data=update)
targetContent = """
<urn:x> <urn:y> <urn:z> .
_:a <urn:pred> _:c .
_:c <urn:pred> _:d .
_:c <urn:pred> _:e .
"""

reference = repo.lookup_reference('refs/heads/%s' % "master")
branchOid = reference.resolve().target
branchCommit = repo.get(branchOid)
self.assertEqual(targetContent, branchCommit.tree["graph.nt"].data.decode("utf-8"))


if __name__ == '__main__':
unittest.main()

0 comments on commit 6bc4b6e

Please sign in to comment.