Versa Literate (Markdown) reader fixes. add versa.util.lookup(). Add …

…NTriples & CSV writers.
uogbuji · Mar 28, 2018 · 749a7e4 · 749a7e4
1 parent 1891895
commit 749a7e4
Show file tree

Hide file tree

Showing 8 changed files with 161 additions and 21 deletions.
diff --git a/tools/py/reader/md.py b/tools/py/reader/md.py
@@ -14,22 +14,24 @@
 import itertools
 
 import markdown
-from versa.contrib import mkdcomments
 
 from amara3 import iri #for absolutize & matches_uri_syntax
 from amara3.uxml.parser import parse, event
 from amara3.uxml.tree import treebuilder, element, text
 from amara3.uxml.treeutil import *
 #from amara import namespaces
 
+from versa.contrib import mkdcomments
 from versa import I, VERSA_BASEIRI
+from versa.contrib.datachefids import idgen, FROM_EMPTY_64BIT_HASH
 
 TEXT_VAL, RES_VAL, UNKNOWN_VAL = 1, 2, 3
 
 TYPE_REL = I(iri.absolutize('type', VERSA_BASEIRI))
 
 #Does not support the empty URL <> as a property name
-REL_PAT = re.compile('((<(.+)>)|([@\\-_\\w#/]+)):\s*((<(.+)>)|("(.*?)")|(\'(.*?)\')|(.*))', re.DOTALL)
+#REL_PAT = re.compile('((<(.+)>)|([@\\-_\\w#/]+)):\s*((<(.+)>)|("(.*?)")|(\'(.*?)\')|(.*))', re.DOTALL)
+REL_PAT = re.compile('((<(.+)>)|([@\\-_\\w#/]+)):\s*((<(.+)>)|("(.*)")|(\'(.*)\')|(.*))', re.DOTALL)
 
 #
 URI_ABBR_PAT = re.compile('@([\\-_\\w]+)([#/@])(.+)', re.DOTALL)
@@ -100,7 +102,7 @@ def parse(md, model, encoding='utf-8', config=None):
     Translate the Versa Markdown syntax into Versa model relationships
 
     md -- markdown source text
-    output -- Versa model to take the output relationship
+    model -- Versa model to take the output relationship
     encoding -- character encoding (defaults to UTF-8)
 
     Returns: The overall base URI (`@base`) specified in the Markdown file, or None
@@ -138,6 +140,9 @@ def setup_interpretations(interp):
 
     setup_interpretations(interp_stanza)
 
+    #Prep ID generator, in case needed
+    idg = idgen(None)
+
     #Parse the Markdown
     #Alternately:
     #from xml.sax.saxutils import escape, unescape
@@ -263,10 +268,10 @@ def parse_li(pair):
             rid = document_iri or base
             fullprop = I(iri.absolutize(prop, propbase or base))
             if fullprop in interpretations:
-                val = interpretations[fullprop](val, rid=rid, fullprop=fullprop, base=base, model=output)
-                if val is not None: output.add(rid, fullprop, val)
+                val = interpretations[fullprop](val, rid=rid, fullprop=fullprop, base=base, model=model)
+                if val is not None: model.add(rid, fullprop, val)
             else:
-                output.add(rid, fullprop, val)
+                model.add(rid, fullprop, val)
 
 
     #Default IRI prefixes if @iri/@base is set
@@ -284,17 +289,19 @@ def parse_li(pair):
             raise ValueError(_('Syntax error in resource header: {0}'.format(sect.xml_value)))
         rid = matched.group(1)
         rtype = matched.group(3)
+        if rtype:
+            rtype = I(iri.absolutize(rtype, base))
+
         if rid:
             rid = I(iri.absolutize(rid, base))
         if not rid:
-            rid = I(iri.absolutize(output.generate_resource(), base))
-        if rtype:
-            rtype = I(iri.absolutize(rtype, base))
+            rid = next(idg)
+
         #Resource type might be set by syntax config
         if not rtype:
             rtype = syntaxtypemap.get(sect.xml_name)
         if rtype:
-            output.add(rid, TYPE_REL, rtype)
+            model.add(rid, TYPE_REL, rtype)
         #Add the property
         for prop, val, typeindic, subfield_list in fields(sect):
             attrs = {}
@@ -311,7 +318,7 @@ def parse_li(pair):
                 elif atype == UNKNOWN_VAL:
                     attrs[aprop] = aval
                     if aprop in interpretations:
-                        aval = interpretations[aprop](aval, rid=rid, fullprop=aprop, base=base, model=output)
+                        aval = interpretations[aprop](aval, rid=rid, fullprop=aprop, base=base, model=model)
                         if aval is not None: attrs[aprop] = aval
                     else:
                         attrs[aprop] = aval
@@ -328,21 +335,21 @@ def parse_li(pair):
                     val = URI_ABBR_PAT.sub(uri + '\\2\\3', val)
                 else:
                     val = I(iri.absolutize(val, rtbase))
-                output.add(rid, fullprop, val, attrs)
+                model.add(rid, fullprop, val, attrs)
             elif typeindic == TEXT_VAL:
                 if '@lang' not in attrs: attrs['@lang'] = default_lang
-                output.add(rid, fullprop, val, attrs)
+                model.add(rid, fullprop, val, attrs)
             elif typeindic == UNKNOWN_VAL:
                 if fullprop in interpretations:
-                    val = interpretations[fullprop](val, rid=rid, fullprop=fullprop, base=base, model=output)
-                    if val is not None: output.add(rid, fullprop, val)
+                    val = interpretations[fullprop](val, rid=rid, fullprop=fullprop, base=base, model=model)
+                    if val is not None: model.add(rid, fullprop, val)
                 else:
-                    output.add(rid, fullprop, val, attrs)
+                    model.add(rid, fullprop, val, attrs)
             #resinfo = AB_RESOURCE_PAT.match(val)
             #if resinfo:
             #    val = resinfo.group(1)
             #    valtype = resinfo.group(3)
-            #    if not val: val = output.generate_resource()
+            #    if not val: val = model.generate_resource()
             #    if valtype: attrs[TYPE_REL] = valtype
 
     return document_iri
diff --git a/tools/py/reader/rdfalite.py b/tools/py/reader/rdfalite.py
@@ -11,8 +11,13 @@
 
 from versa.reader import statement_prep, dumb_triples, rdfize, versalinks
 
-from rdflib import URIRef, Literal
-from rdflib import BNode
+try:
+    from rdflib import BNode as bnode
+    RDFLIB_AVAILABLE = True
+except:
+    def bnode(object):
+        pass
+    RDFLIB_AVAILABLE = False
 
 from amara3 import iri
 from amara3.uxml import tree
@@ -65,6 +70,15 @@ def tordf(htmlsource, rdfgr, source_uri):
     return parse(htmlsource, sink, source_uri)
 
 
+def totriples(htmlsource, triples, source_uri):
+    '''
+
+    '''
+    sink = dumb_triples(triples)
+    next(sink) #Prime the coroutine
+    return parse(htmlsource, sink, source_uri)
+
+
 def parse(htmlsource, statement_sink, source_uri):
     '''
 
@@ -114,7 +128,7 @@ def do_parse(elem, resource, vocab=None, prop=None, prefixes=None):
             if new_prop_list:
                 #FIXME: Should this only be when about is used?
                 if typeof_list and not new_resource:
-                    new_value = BNode()
+                    new_value = bnode()
                     #new_value = I(BNODE_ROOT + str(g_bnode_counter))
                     #g_bnode_counter += 1
                 elif new_resource:

diff --git a/tools/py/terms.py b/tools/py/terms.py
@@ -0,0 +1,11 @@
+from amara3 import iri
+
+from . import iriref
+from . import I, VERSA_BASEIRI
+
+RDF_NS = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
+RDFS_NS = 'http://www.w3.org/2000/01/rdf-schema#'
+
+VERSA_TYPE_REL = I(VERSA_BASEIRI + 'type')
+RDF_TYPE_REL = I(RDF_NS + 'type')
+
diff --git a/tools/py/util.py b/tools/py/util.py
@@ -28,6 +28,11 @@ def simple_lookup_byvalue(m, rel, target):
     return links[0][ORIGIN] if links else None
 
 
+def lookup(m, orig, rel):
+    for link in m.match(orig, rel):
+        yield link[TARGET]
+
+
 def transitive_closure(m, orig, rel):
     '''
     Generate the closure over a transitive relationship in depth-first fashion

diff --git a/tools/py/version.py b/tools/py/version.py
@@ -1,2 +1,2 @@
 #http://legacy.python.org/dev/peps/pep-0440/
-version_info = ('0', '3', '6')
+version_info = ('0', '3', '7')
diff --git a/tools/py/writer/csv.py b/tools/py/writer/csv.py
@@ -0,0 +1,53 @@
+#versa.writer.ntriples
+"""
+Render a Versa vocab model as CSV, using a given set of ruls to flatten
+
+Import as:
+
+from versa.writer import csv as vcsv
+
+"""
+
+import logging
+
+from amara3 import iri
+
+from versa import I, VERSA_BASEIRI, ORIGIN, RELATIONSHIP, TARGET
+from versa.terms import VERSA_BASEIRI, RDF_NS, RDFS_NS, VERSA_TYPE_REL, RDF_TYPE_REL
+from versa.util import all_origins, lookup, resourcetypes
+
+
+def fromlist(l):
+    return '|'.join(l)
+
+
+def write(models, csvout, rulelist, write_header, base=None, logger=logging):
+    '''
+    models - one or more input Versa models from which output is generated.
+    '''
+    properties = [ k for (k, v) in rulelist ]
+    numprops = len(properties)
+    headers = [ v for (k, v) in rulelist ]
+    if write_header:
+        csvout.writerow(['id', 'type'] + headers)
+
+    rules = { k: v for (k, v) in rulelist }
+
+    if not isinstance(models, list): models = [models]
+    for m in models:
+        for rid in all_origins(m):
+            #print(rid, list(m.match(rid, RDF_TYPE_REL)))
+            rtypes = list(lookup(m, rid, RDF_TYPE_REL))
+            #if not rtypes: rtypes = list(lookup(m, rid, VERSA_TYPE_REL))
+            #Ignore if no type
+            if not rtypes: continue
+            row = [rid, fromlist(rtypes)] + [None] * numprops
+            for ix, p in enumerate(properties):
+                #v = next(lookup(m, rid, RDF_TYPE_REL), None)
+                v = list(lookup(m, rid, p))
+                if v:
+                    row[ix + 2] = fromlist(v)
+                    csvout.writerow(row)
+
+    return
+
diff --git a/tools/py/writer/ntriples.py b/tools/py/writer/ntriples.py
@@ -0,0 +1,50 @@
+#versa.writer.ntriples
+"""
+Render a Versa vocab model as NTriples
+
+https://www.w3.org/TR/rdf-testcases/#ntriples
+"""
+
+from amara3 import iri
+
+from versa import I, VERSA_BASEIRI, ORIGIN, RELATIONSHIP, TARGET
+from versa.terms import VERSA_BASEIRI, RDF_NS, RDFS_NS, VERSA_TYPE_REL, RDF_TYPE_REL
+from versa.driver import memory
+from versa import VERSA_BASEIRI
+
+RESOURCE_MAPPING = {
+    I(VERSA_BASEIRI + 'Resource'): I(RDFS_NAMESPACE + 'Class'),
+    I(VERSA_BASEIRI + 'Property'): I(RDF_NAMESPACE + 'Property'),
+    I(VERSA_BASEIRI + 'description'): I(RDFS_NAMESPACE + 'comment'),
+    I(VERSA_BASEIRI + 'label'): I(RDFS_NAMESPACE + 'label'),
+}
+
+
+def strconv(item):
+    '''
+    Prepare a statement into a triple ready for rdflib
+    '''
+    if isinstance(item, I):
+        return('<' + str(item) + '>')
+    else:
+        return('"' + str(item) + '"')
+
+
+def write(models, out=None, base=None, logger=logging):
+    '''
+    models - one or more input Versa models from which output is generated.
+    '''
+    assert out is not None #Output stream required
+    if not isinstance(models, list): models = [models]
+    for m in models:
+        for link in m.match():
+            s, p, o = link[:3]
+            #Skip docheader statements
+            if s == (base or '') + '@docheader': continue
+            if p in RESOURCE_MAPPING: p = RESOURCE_MAPPING[p]
+            if o in RESOURCE_MAPPING: o = RESOURCE_MAPPING[o]
+
+            if p == VERSA_TYPE_REL: p = RDF_TYPE_REL
+            print(strconv(s), strconv(p), strconv(o), '.', file=out)
+    return
+
diff --git a/tools/py/writer/rdfs.py b/tools/py/writer/rdfs.py