Skip to content

Commit

Permalink
Versa Literate (Markdown) reader fixes. add versa.util.lookup(). Add …
Browse files Browse the repository at this point in the history
…NTriples & CSV writers.
  • Loading branch information
uogbuji committed Mar 28, 2018
1 parent 1891895 commit 749a7e4
Show file tree
Hide file tree
Showing 8 changed files with 161 additions and 21 deletions.
41 changes: 24 additions & 17 deletions tools/py/reader/md.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,24 @@
import itertools

import markdown
from versa.contrib import mkdcomments

from amara3 import iri #for absolutize & matches_uri_syntax
from amara3.uxml.parser import parse, event
from amara3.uxml.tree import treebuilder, element, text
from amara3.uxml.treeutil import *
#from amara import namespaces

from versa.contrib import mkdcomments
from versa import I, VERSA_BASEIRI
from versa.contrib.datachefids import idgen, FROM_EMPTY_64BIT_HASH

TEXT_VAL, RES_VAL, UNKNOWN_VAL = 1, 2, 3

TYPE_REL = I(iri.absolutize('type', VERSA_BASEIRI))

#Does not support the empty URL <> as a property name
REL_PAT = re.compile('((<(.+)>)|([@\\-_\\w#/]+)):\s*((<(.+)>)|("(.*?)")|(\'(.*?)\')|(.*))', re.DOTALL)
#REL_PAT = re.compile('((<(.+)>)|([@\\-_\\w#/]+)):\s*((<(.+)>)|("(.*?)")|(\'(.*?)\')|(.*))', re.DOTALL)
REL_PAT = re.compile('((<(.+)>)|([@\\-_\\w#/]+)):\s*((<(.+)>)|("(.*)")|(\'(.*)\')|(.*))', re.DOTALL)

#
URI_ABBR_PAT = re.compile('@([\\-_\\w]+)([#/@])(.+)', re.DOTALL)
Expand Down Expand Up @@ -100,7 +102,7 @@ def parse(md, model, encoding='utf-8', config=None):
Translate the Versa Markdown syntax into Versa model relationships
md -- markdown source text
output -- Versa model to take the output relationship
model -- Versa model to take the output relationship
encoding -- character encoding (defaults to UTF-8)
Returns: The overall base URI (`@base`) specified in the Markdown file, or None
Expand Down Expand Up @@ -138,6 +140,9 @@ def setup_interpretations(interp):

setup_interpretations(interp_stanza)

#Prep ID generator, in case needed
idg = idgen(None)

#Parse the Markdown
#Alternately:
#from xml.sax.saxutils import escape, unescape
Expand Down Expand Up @@ -263,10 +268,10 @@ def parse_li(pair):
rid = document_iri or base
fullprop = I(iri.absolutize(prop, propbase or base))
if fullprop in interpretations:
val = interpretations[fullprop](val, rid=rid, fullprop=fullprop, base=base, model=output)
if val is not None: output.add(rid, fullprop, val)
val = interpretations[fullprop](val, rid=rid, fullprop=fullprop, base=base, model=model)
if val is not None: model.add(rid, fullprop, val)
else:
output.add(rid, fullprop, val)
model.add(rid, fullprop, val)


#Default IRI prefixes if @iri/@base is set
Expand All @@ -284,17 +289,19 @@ def parse_li(pair):
raise ValueError(_('Syntax error in resource header: {0}'.format(sect.xml_value)))
rid = matched.group(1)
rtype = matched.group(3)
if rtype:
rtype = I(iri.absolutize(rtype, base))

if rid:
rid = I(iri.absolutize(rid, base))
if not rid:
rid = I(iri.absolutize(output.generate_resource(), base))
if rtype:
rtype = I(iri.absolutize(rtype, base))
rid = next(idg)

#Resource type might be set by syntax config
if not rtype:
rtype = syntaxtypemap.get(sect.xml_name)
if rtype:
output.add(rid, TYPE_REL, rtype)
model.add(rid, TYPE_REL, rtype)
#Add the property
for prop, val, typeindic, subfield_list in fields(sect):
attrs = {}
Expand All @@ -311,7 +318,7 @@ def parse_li(pair):
elif atype == UNKNOWN_VAL:
attrs[aprop] = aval
if aprop in interpretations:
aval = interpretations[aprop](aval, rid=rid, fullprop=aprop, base=base, model=output)
aval = interpretations[aprop](aval, rid=rid, fullprop=aprop, base=base, model=model)
if aval is not None: attrs[aprop] = aval
else:
attrs[aprop] = aval
Expand All @@ -328,21 +335,21 @@ def parse_li(pair):
val = URI_ABBR_PAT.sub(uri + '\\2\\3', val)
else:
val = I(iri.absolutize(val, rtbase))
output.add(rid, fullprop, val, attrs)
model.add(rid, fullprop, val, attrs)
elif typeindic == TEXT_VAL:
if '@lang' not in attrs: attrs['@lang'] = default_lang
output.add(rid, fullprop, val, attrs)
model.add(rid, fullprop, val, attrs)
elif typeindic == UNKNOWN_VAL:
if fullprop in interpretations:
val = interpretations[fullprop](val, rid=rid, fullprop=fullprop, base=base, model=output)
if val is not None: output.add(rid, fullprop, val)
val = interpretations[fullprop](val, rid=rid, fullprop=fullprop, base=base, model=model)
if val is not None: model.add(rid, fullprop, val)
else:
output.add(rid, fullprop, val, attrs)
model.add(rid, fullprop, val, attrs)
#resinfo = AB_RESOURCE_PAT.match(val)
#if resinfo:
# val = resinfo.group(1)
# valtype = resinfo.group(3)
# if not val: val = output.generate_resource()
# if not val: val = model.generate_resource()
# if valtype: attrs[TYPE_REL] = valtype

return document_iri
20 changes: 17 additions & 3 deletions tools/py/reader/rdfalite.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,13 @@

from versa.reader import statement_prep, dumb_triples, rdfize, versalinks

from rdflib import URIRef, Literal
from rdflib import BNode
try:
from rdflib import BNode as bnode
RDFLIB_AVAILABLE = True
except:
def bnode(object):
pass
RDFLIB_AVAILABLE = False

from amara3 import iri
from amara3.uxml import tree
Expand Down Expand Up @@ -65,6 +70,15 @@ def tordf(htmlsource, rdfgr, source_uri):
return parse(htmlsource, sink, source_uri)


def totriples(htmlsource, triples, source_uri):
'''
'''
sink = dumb_triples(triples)
next(sink) #Prime the coroutine
return parse(htmlsource, sink, source_uri)


def parse(htmlsource, statement_sink, source_uri):
'''
Expand Down Expand Up @@ -114,7 +128,7 @@ def do_parse(elem, resource, vocab=None, prop=None, prefixes=None):
if new_prop_list:
#FIXME: Should this only be when about is used?
if typeof_list and not new_resource:
new_value = BNode()
new_value = bnode()
#new_value = I(BNODE_ROOT + str(g_bnode_counter))
#g_bnode_counter += 1
elif new_resource:
Expand Down
11 changes: 11 additions & 0 deletions tools/py/terms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from amara3 import iri

from . import iriref
from . import I, VERSA_BASEIRI

RDF_NS = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
RDFS_NS = 'http://www.w3.org/2000/01/rdf-schema#'

VERSA_TYPE_REL = I(VERSA_BASEIRI + 'type')
RDF_TYPE_REL = I(RDF_NS + 'type')

5 changes: 5 additions & 0 deletions tools/py/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ def simple_lookup_byvalue(m, rel, target):
return links[0][ORIGIN] if links else None


def lookup(m, orig, rel):
for link in m.match(orig, rel):
yield link[TARGET]


def transitive_closure(m, orig, rel):
'''
Generate the closure over a transitive relationship in depth-first fashion
Expand Down
2 changes: 1 addition & 1 deletion tools/py/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#http://legacy.python.org/dev/peps/pep-0440/
version_info = ('0', '3', '6')
version_info = ('0', '3', '7')
53 changes: 53 additions & 0 deletions tools/py/writer/csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#versa.writer.ntriples
"""
Render a Versa vocab model as CSV, using a given set of ruls to flatten
Import as:
from versa.writer import csv as vcsv
"""

import logging

from amara3 import iri

from versa import I, VERSA_BASEIRI, ORIGIN, RELATIONSHIP, TARGET
from versa.terms import VERSA_BASEIRI, RDF_NS, RDFS_NS, VERSA_TYPE_REL, RDF_TYPE_REL
from versa.util import all_origins, lookup, resourcetypes


def fromlist(l):
return '|'.join(l)


def write(models, csvout, rulelist, write_header, base=None, logger=logging):
'''
models - one or more input Versa models from which output is generated.
'''
properties = [ k for (k, v) in rulelist ]
numprops = len(properties)
headers = [ v for (k, v) in rulelist ]
if write_header:
csvout.writerow(['id', 'type'] + headers)

rules = { k: v for (k, v) in rulelist }

if not isinstance(models, list): models = [models]
for m in models:
for rid in all_origins(m):
#print(rid, list(m.match(rid, RDF_TYPE_REL)))
rtypes = list(lookup(m, rid, RDF_TYPE_REL))
#if not rtypes: rtypes = list(lookup(m, rid, VERSA_TYPE_REL))
#Ignore if no type
if not rtypes: continue
row = [rid, fromlist(rtypes)] + [None] * numprops
for ix, p in enumerate(properties):
#v = next(lookup(m, rid, RDF_TYPE_REL), None)
v = list(lookup(m, rid, p))
if v:
row[ix + 2] = fromlist(v)
csvout.writerow(row)

return

50 changes: 50 additions & 0 deletions tools/py/writer/ntriples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#versa.writer.ntriples
"""
Render a Versa vocab model as NTriples
https://www.w3.org/TR/rdf-testcases/#ntriples
"""

from amara3 import iri

from versa import I, VERSA_BASEIRI, ORIGIN, RELATIONSHIP, TARGET
from versa.terms import VERSA_BASEIRI, RDF_NS, RDFS_NS, VERSA_TYPE_REL, RDF_TYPE_REL
from versa.driver import memory
from versa import VERSA_BASEIRI

RESOURCE_MAPPING = {
I(VERSA_BASEIRI + 'Resource'): I(RDFS_NAMESPACE + 'Class'),
I(VERSA_BASEIRI + 'Property'): I(RDF_NAMESPACE + 'Property'),
I(VERSA_BASEIRI + 'description'): I(RDFS_NAMESPACE + 'comment'),
I(VERSA_BASEIRI + 'label'): I(RDFS_NAMESPACE + 'label'),
}


def strconv(item):
'''
Prepare a statement into a triple ready for rdflib
'''
if isinstance(item, I):
return('<' + str(item) + '>')
else:
return('"' + str(item) + '"')


def write(models, out=None, base=None, logger=logging):
'''
models - one or more input Versa models from which output is generated.
'''
assert out is not None #Output stream required
if not isinstance(models, list): models = [models]
for m in models:
for link in m.match():
s, p, o = link[:3]
#Skip docheader statements
if s == (base or '') + '@docheader': continue
if p in RESOURCE_MAPPING: p = RESOURCE_MAPPING[p]
if o in RESOURCE_MAPPING: o = RESOURCE_MAPPING[o]

if p == VERSA_TYPE_REL: p = RDF_TYPE_REL
print(strconv(s), strconv(p), strconv(o), '.', file=out)
return

Empty file modified tools/py/writer/rdfs.py
100755 → 100644
Empty file.

0 comments on commit 749a7e4

Please sign in to comment.