Skip to content

Commit

Permalink
Merge pull request #8 from uogbuji/magic
Browse files Browse the repository at this point in the history
Versa pipeline improvements, & more
  • Loading branch information
uogbuji authored Nov 11, 2019
2 parents fe9e152 + 2fed8ee commit 68a22f9
Show file tree
Hide file tree
Showing 10 changed files with 609 additions and 169 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
scratch
#PyCharm
.idea
.vscode
MANIFEST

#----
Expand Down
34 changes: 24 additions & 10 deletions test/py/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from versa.util import jsondump, jsonload


#Move to a test utils module
# FIXME: Move to a test utils module
import os, inspect
def module_path(local_function):
'''
Expand All @@ -24,7 +24,7 @@ def module_path(local_function):
'''
return os.path.abspath(inspect.getsourcefile(local_function))

#hack to locate test resource (data) files regardless of from where nose was run
# Hack to locate test resource (data) files regardless of from where nose was run
RESOURCEPATH = os.path.normpath(os.path.join(module_path(lambda _: None), '../../resource/'))

SIMPLE_BOOK = {
Expand All @@ -36,24 +36,24 @@ def module_path(local_function):
'cover': 'http://example.org/book/catcher-in-the-rye-book-cover.jpg',
}

#logging.basicConfig(level=logging.DEBUG)
BOOK_TYPE = 'http://schema.org/Book'
SCH = SCHEMA_ORG = 'http://schema.org/'
EXAMPLE_ORG = 'http://example.org/'

BOOK_ID = 'http://example.org/book/catcher-in-the-rye'
SCHEMA_NAME = I(iri.absolutize('name', SCHEMA_ORG))
SCHEMA_AUTHOR = I(iri.absolutize('author', SCHEMA_ORG))
XXX_WROTE = 'http://example.org/wrote'

#Not really needed.
# Not really needed.
IN_M = memory.connection(baseiri='http://example.org/')

BOOK_CASES = []

transforms = {
'id': discard(),
'id': ignore(),
'title': link(rel=SCH+'name'),
'author': materialize(SCH+'Person', rel=SCH+'author', unique=[(SCH+'name', run('target'))], links=[(SCH+'name', run('target'))]),
'author': materialize(SCH+'Person', rel=SCH+'author', unique=[(SCH+'name', target())], links=[(SCH+'name', target())]),
'link': link(rel=SCH+'link'),
'cover': link(rel=SCH+'cover'),
}
Expand All @@ -62,26 +62,40 @@ def asserter(out_m):
assert out_m.size() == 7, repr(out_m)
assert next(out_m.match(BOOK_ID, VTYPE_REL))[TARGET] == BOOK_TYPE
assert next(out_m.match(BOOK_ID, SCHEMA_NAME))[TARGET] == 'The Catcher in the Rye'
author = next(out_m.match(BOOK_ID, SCHEMA_AUTHOR, None))[TARGET]
assert next(out_m.match(author, SCHEMA_NAME), None)[TARGET] == 'J.D. Salinger'

BOOK_CASES.append(('simple1', transforms, asserter))

# Inverted form
transforms = {
'id': discard(),
'id': ignore(),
'title': link(rel=SCH+'name'),
#For testing; doesn't make much sense, really, otherwise
'author': materialize(SCH+'Person', rel=SCH+'author', unique=[(SCH+'name', run('target'))], links=[(SCH+'name', run('target'))], inverse=True),
'author': link(
origin=materialize(
SCH+'Person',
unique=[(SCH+'name', target())],
links=[(SCH+'name', target())],
attach=False),
rel=XXX_WROTE,
target=origin()),
'link': link(rel=SCH+'link'),
'cover': link(rel=SCH+'cover'),
}

def asserter(out_m):
#import pprint; pprint.pprint(out_m)
assert out_m.size() == 7, repr(out_m)
assert next(out_m.match(BOOK_ID, VTYPE_REL))[TARGET] == BOOK_TYPE
assert next(out_m.match(BOOK_ID, SCHEMA_NAME))[TARGET] == 'The Catcher in the Rye'
author = next(out_m.match(None, SCHEMA_AUTHOR), BOOK_ID)[ORIGIN]
author = next(out_m.match(None, XXX_WROTE), BOOK_ID)[ORIGIN]
assert next(out_m.match(author, SCHEMA_NAME), None)[TARGET] == 'J.D. Salinger'

BOOK_CASES.append(('simple2', transforms, asserter))

BOOK_CASES.append(('inverted1', transforms, asserter))

# 'author': link(rel=SCH+'author') materialize(SCH+'Person', unique=[(SCH+'name', run('target'))], links=[(SCH+'name', target()), (None, SCH+'wrote', origin())]),


@pytest.mark.parametrize('label,transforms,asserter', BOOK_CASES)
Expand Down
12 changes: 8 additions & 4 deletions tools/py/driver/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,23 @@


class connection(connection_base):
def __init__(self, baseiri=None, attr_cls=dict, logger=None):
def __init__(self, baseiri=None, attr_cls=dict):
'''
Initialize connection object
Args:
baseiri: IRI used by default to resolve relative IRIs
attr_cls: class used to hold relationship attributes. By default use dict
'''
self._attr_cls = attr_cls # class used to hold attributes within a relationship
self._attr_cls = attr_cls
self.create_space()
self._baseiri = baseiri
self._id_counter = 1
self._logger = logger or logging
return

def copy(self, contents=True):
'''Create a copy of this model, optionally without contents (i.e. just configuration)'''
cp = connection(self._baseiri, self._attr_cls, self._logger)
cp = connection(self._baseiri, self._attr_cls)
if contents: cp.add_many(self._relationships)

return cp
Expand Down
17 changes: 9 additions & 8 deletions tools/py/pipeline/__init__.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
#versa.pipeline
# versa.pipeline
'''
Framework for expressing transforms from one pattern of Versa links to another
This is especially useful if you've used a tool to extract Versa from some data
source but would like to tweak the interpretation of that data. It's also useful
for mapping from one vocabulary to another.
Useful for iterative processing or vocabulary mapping
The concept is similar to XProc (http://en.wikipedia.org/wiki/XProc). You define
Concept is similar to XProc (http://en.wikipedia.org/wiki/XProc). You define
the overall transform in terms of transform steps or stages, implemented as
Python functions. Each function can have inputs, which might be simple Versa
scalars or even functions in themselves. The outputs are Versa scalars.
There is also a shared environment across the steps, called the context (`versa.context`).
The context includes a resource which is considered the origin for purposes
There is shared context (`versa.context`) across the steps.
This includes a resource which is considered the origin for purposes
of linking, an input Versa model considered to be an overall input to the transform
and an output Versa model considered to be an overall output.
You can use the `transform` function to take a raw record in any format,
define an edge stage transform to convert the raw data to an initial Versa context,
and then iterate through the other defined transform stages.
'''

from .main import *
from .core_actions import *

Loading

0 comments on commit 68a22f9

Please sign in to comment.