Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:zepheira/pybibframe into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
uogbuji committed Nov 12, 2019
2 parents 51a38dc + 00baee4 commit d639d01
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 297 deletions.
263 changes: 4 additions & 259 deletions lib/reader/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from versa.pipeline import context as versacontext
from versa import I, VERSA_BASEIRI, ORIGIN, RELATIONSHIP, TARGET, ATTRIBUTES
from versa.pipeline import values, target, origin, rel, ifexists, toiri, res, url, if_, replace_from, lookup, SKIP, regex_match_modify

from bibframe.contrib.datachefids import slugify#, FROM_EMPTY_64BIT_HASH
from bibframe.contrib.datachefids import idgen as default_idgen
Expand All @@ -24,11 +25,11 @@

from amara3 import iri

__all__ = ["bfcontext", "base_transformer", "link", "ignore", "anchor", "target", "origin",
__all__ = ["bfcontext", "base_transformer", "link", "ignore", "anchor", "target", "rel", "origin",
"all_subfields", "subfield", "values", "relator_property", "replace_from",
"if_", "ifexists", "foreach", "indicator", "materialize", "url", "normalize_isbn",
"onwork", "oninstance", "lookup", "regex_match_modify", "register_transforms",
"subfields", "abort_on", "lookup_inline"]
"subfields", "abort_on", "SKIP", "ifexists", "if_"]

RDA_PARENS_PAT = re.compile('\\(.*\\)')

Expand Down Expand Up @@ -254,42 +255,6 @@ def _anchor(ctx):
return _anchor


def target():
'''
Action function generator to return the target of the context's current link
:return: target of the context's current link
'''
#Action function generator to multiplex a relationship at processing time
def _target(ctx):
'''
Versa action function Utility to return the target of the context's current link
:param ctx: Versa context used in processing (e.g. includes the prototype link
:return: Target of the context's current link
'''
return ctx.current_link[TARGET]
return _target


def origin():
'''
Action function generator to return the origin of the context's current link
:return: origin of the context's current link
'''
#Action function generator to multiplex a relationship at processing time
def _origin(ctx):
'''
Versa action function Utility to return the origin of the context's current link
:param ctx: Versa context used in processing (e.g. includes the prototype link
:return: Origin of the context's current link
'''
return ctx.current_link[ORIGIN]
return _origin


NS_PATCH = lambda ns, k, v: (ns+k, v) if not iri.is_absolute(k) else (k, v)
def all_subfields(ctx):
'''
Expand Down Expand Up @@ -328,35 +293,6 @@ def _subfield(ctx):
return _subfield


def values(*rels):
'''
Action function generator to compute a set of relationships from criteria
:param rels: List of relationships to compute
:return: Versa action function to do the actual work
'''
#Action function generator to multiplex a relationship at processing time
def _values(ctx):
'''
Versa action function Utility to specify a list of relationships
:param ctx: Versa context used in processing (e.g. includes the prototype link
:return: Tuple of key/value tuples from the attributes; suitable for hashing
'''
computed_rels = []
for rel in rels:
if callable(rel):
rel = rel(ctx)

if isinstance(rel, list):
computed_rels.extend(rel)
else:
computed_rels.append(rel)

return computed_rels
return _values


def relator_property(text_in, allowed=None, default=None, prefix=None):
'''
Action function generator to take some text and compute a relationship slug therefrom
Expand All @@ -382,102 +318,6 @@ def _relator_property(ctx):
return _relator_property


def replace_from(patterns, old_text):
'''
Action function generator to take some text and replace it with another value based on a regular expression pattern
:param specs: List of replacement specifications to use, each one a (pattern, replacement) tuple
:param old_text: Source text for the value to be created. If this is a list, the return value will be a list processed from each item
:return: Versa action function to do the actual work
'''
def _replace_from(ctx):
'''
Versa action function Utility to do the text replacement
:param ctx: Versa context used in processing (e.g. includes the prototype link)
:return: Replacement text
'''
#If we get a list arg, take the first
_old_text = old_text(ctx) if callable(old_text) else old_text
_old_text = [] if _old_text is None else _old_text
old_text_list = isinstance(_old_text, list)
_old_text = _old_text if old_text_list else [_old_text]
#print(old_text_list, _old_text)
new_text_list = set()
for text in _old_text:
new_text = text #So just return the original string, if a replacement is not processed
for pat, repl in patterns:
m = pat.match(text)
if not m: continue
new_text = pat.sub(repl, text)

new_text_list.add(new_text)
#print(new_text_list)
return list(new_text_list) if old_text_list else list(new_text_list)[0]
return _replace_from


def ifexists(test, value, alt=None):
'''
Action function generator providing a limited if/then/else type primitive
:param test: Expression to be tested to determine the branch path
:param value: Expression providing the result if test is true
:param alt: Expression providing the result if test is false
:return: Versa action function to do the actual work
'''
def _ifexists(ctx):
'''
Versa action function utility to execute a limited if/then/else type primitive
:param ctx: Versa context used in processing (e.g. includes the prototype link)
:return: Value computed according to the test expression result
'''
_test = test(ctx) if callable(test) else test
if _test:
return value(ctx) if callable(value) else value
else:
return alt(ctx) if callable(alt) else alt
return _ifexists


def if_(test, iftrue, iffalse=None, vars_=None):
'''
Action function generator providing a fuller if/then/else type primitive
:param test: Expression to be tested to determine the branch path
:param iftrue: Expression to be executed (perhaps for side effects) if test is true
:param iffalse: Expression to be executed (perhaps for side effects) if test is false
:param vars: Optional dictionary of variables to be used in computing string test
:return: Versa action function to do the actual work. This function returns the value computed from iftrue if the test computes to true, otherwise iffalse
'''
vars_ = vars_ or {}
def _if_(ctx):
'''
Versa action function utility to execute an if/then/else type primitive
:param ctx: Versa context used in processing (e.g. includes the prototype link)
:return: Value computed according to the test expression result
'''
out_vars = {'target': ctx.current_link[TARGET]}
if isinstance(test, str):
for k, v in vars_.items():
#FIXME: Less crude test
assert isinstance(k, str)
_v = v(ctx) if callable(v) else v
out_vars[k] = _v

_test = eval(test, out_vars, out_vars)
#Test is an expression to be dynamically computed
#for m in ACTION_FUNCTION_PAT.findall(test):
# func_name = m.group(1)
else:
_test = test(ctx) if callable(test) else test
if _test:
return iftrue(ctx) if callable(iftrue) else iftrue
elif iffalse:
return iffalse(ctx) if callable(iffalse) else iffalse
return _if_


def foreach(origin=None, rel=None, target=None, attributes=None):
'''
Action function generator to compute a combination of links from a list of expressions
Expand Down Expand Up @@ -590,7 +430,7 @@ def materialize(typ, rel=DEFAULT_REL, derive_origin=None, unique=None, links=Non
links, or a Versa action function returning None, which signals that
the particular link is skipped entirely.
:param postprocess: IRI or list of IRI queueing up actiona to be postprocessed
:param postprocess: IRI or list of IRI queueing up actions to be postprocessed
for this materialized resource. None, the default, signals no special postprocessing
For examples of all these scenarios see marcpatterns.py
Expand Down Expand Up @@ -724,40 +564,6 @@ def _materialize(ctx):
return _materialize


#def url(arg, base=iri.absolutize('authrec/', BFZ)):
def url(arg, base=None, ignore_refs=True):
'''
Convert the argument into an IRI ref or list thereof
:param base: base IRI to resolve relative references against
:param ignore_refs: if True, make no attempt to convert would-be IRI refs to IRI type
'''
def _res(ctx):
_arg = arg(ctx) if callable(arg) else arg
_arg = [_arg] if not isinstance(_arg, list) else _arg
ret = []
for u in _arg:
iu = u
if not (ignore_refs and not iri.is_absolute(iu)):
# coerce into an IRIref, but fallout as untyped text otherwise
try:
iu = I(iu)
except ValueError as e:
# attempt to recover by percent encoding
try:
iu = I(iri.percent_encode(iu))
except ValueError as e:
ctx.extras['logger'].warn('Unable to convert "{}" to IRI reference:\n{}'.format(iu, e))

if base is not None and isinstance(iu, I):
iu = I(iri.absolutize(iu, base))

ret.append(iu)

return ret
return _res


def normalize_isbn(isbn):
'''
Turn isbnplus into an action function to normalize ISBNs outside of 020, e.g. 776$z
Expand All @@ -769,67 +575,6 @@ def _normalize_isbn(ctx):
return _normalize_isbn


def lookup(table, key):
'''
Generic lookup mechanism
'''
def _lookup(ctx):
table_mapping = ctx.extras['lookups']
_key = key(ctx) if callable(key) else key
return table_mapping[table].get(_key)
return _lookup


def regex_match_modify(pattern, group_or_func, value=None):
'''
Action function generator to take some text and modify it either according to a named group or a modification function for the match
:param pattern: regex string or compiled pattern
:param group_or_func: string or function that takes a regex match. If string, a named group to use for the result. If a function, executed to return the result
:param pattern: value to use instead of the current link target
:return: Versa action function to do the actual work
'''
def _regex_modify(ctx):
'''
Versa action function Utility to do the text replacement
:param ctx: Versa context used in processing (e.g. includes the prototype link)
:return: Replacement text
'''
_pattern = re.compile(pattern) if isinstance(pattern, str) else pattern
(origin, _, t, a) = ctx.current_link
_value = value(ctx) if callable(value) else (t if value is None else value)
match = _pattern.match(_value)
if not match: return _value
if callable(group_or_func):
return group_or_func(match)
else:
return match.groupdict().get(group_or_func, '')
return _regex_modify


def lookup_inline(mapping, value=None):
'''
Action function generator to look up a value from a provided mapping
:param mapping: dictionary for the lookup
:param pattern: value to use instead of the current link target
:return: Versa action function to do the actual work
'''
def _lookup_inline(ctx):
'''
Versa action function Utility to do the text replacement
:param ctx: Versa context used in processing (e.g. includes the prototype link)
:return: Replacement text, or input text if not found
'''
(origin, _, t, a) = ctx.current_link
_value = value(ctx) if callable(value) else (t if value is None else value)
result = mapping.get(_value, _value)
return result
return _lookup_inline


def abort_on(vals=None, regex=None):
'''
Send a signal to abort processing current record if condition met
Expand Down
38 changes: 0 additions & 38 deletions lib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,41 +107,3 @@ def materialize_entity(etype, ctx_params=None, model_to_update=None, data=None,
for p in plugin[BF_MATRES_TASK](output_model, params): pass
return eid


#It could be argued from code modularization that this should go in versa, but the algorithm was really conceived in and for the BIBFRAME world, and one could of course select different strategies for other uses of Versa, so here we have it
def resource_id(etype, unique=None, idgen=default_idgen(None), vocabbase=None):
'''
Very low level routine for generating a, ID value using the hash algorithm
outlined by the Libhub initiative for for BIBFRAME Lite (Libhub Resource Hash Convention).
https://github.com/zepheira/pybibframe/wiki/From-Records-to-Resources:-the-Library.Link-resource-ID-generation-algorithm
Takes the entity (resource) type and an ordered data mapping.
etype - type IRI for th enew entity
unique - list of key/value tuples of data to use in generating its unique ID, or None in which case one is just randomly generated
defaultvocabbase - for convenience, provided, use to resolve relative etype & data keys
>>> from bibframe.util import resource_id
>>> resource_id("http://schema.org/Person", [("http://schema.org/name", "Jonathan Bruce Postel"), ("http://schema.org/birthDate", "1943-08-06")])
'-7hP9d_Xo8M'
>>> resource_id("http://schema.org/Person", [("http://schema.org/name", "Augusta Ada King")])
'xjgOrUFiw_o'
'''
params = {}
#XXX: Use proper URI normalization? Have a philosophical discussion with Mark about this :)
if vocabbase: etype = vocabbase + etype

unique_computed = []
for k, v in unique:
if vocabbase:
#XXX OK absolutize used here. Go figure
k = k if iri.is_absolute(k) else iri.absolutize(k, vocabbase)
unique_computed.append((k, v))

if unique_computed:
unique_computed.insert(0, [VTYPE_REL, etype])
plaintext = json.dumps(unique_computed, separators=(',', ':'))
eid = idgen.send(plaintext)
else:
#We only have a type; no other distinguishing data. Generate a random hash
eid = next(idgen)
return eid

0 comments on commit d639d01

Please sign in to comment.