Skip to content
This repository has been archived by the owner on May 3, 2018. It is now read-only.

Initial work on a regex implementation. #13

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion test/Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

LEVEL = 4

test: spec-examples spec-examples-by-section extended variables
test: spec-examples spec-examples-by-section extended variables regex

spec-examples:
PYTHONPATH=..:$(PYTHONPATH) python uritemplate_test.py cases/spec-examples.json $(LEVEL)
Expand All @@ -17,3 +17,6 @@ negative:

variables:
PYTHONPATH=..:$(PYTHONPATH) python variables_test.py

regex:
PYTHONPATH=..:$(PYTHONPATH) python regex_test.py
175 changes: 175 additions & 0 deletions test/regex_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
'''
Tests related to the as_regex function. Uses the same testcases as the
expand function, but repurposes them to ensure the regex created
produces the right output.
'''
import sys
from os.path import join, dirname
try:
import json
except ImportError:
import simplejson as json
import urllib
import traceback
import pdb

import uritemplate

TESTFILES = [
'spec-examples.json',
'spec-examples-by-section.json',
'extended-tests.json',
]


def correct_answers(var):
'''Take a variable and produce a list of possibly correct
answers'''
safe = ":/?#[]@!$&'()*+,;="
def quote(v):
v = '' if v is None else v
return urllib.quote(str(v), '')

def restrictquote(v):
v = '' if v is None else v
return urllib.quote(str(v), safe)

if isinstance(var, list):
return [','.join(map(restrictquote, var)),
','.join(map(quote, var))]
elif isinstance(var, dict):
return [urllib.urlencode(var, safe),
','.join(restrictquote(v)
for item in var.iteritems() for v in item),
','.join(quote(v) for item in var.iteritems() for v in item),
]
else:
return [restrictquote(var), quote(var)]

def _print_level(level, prefix):
def _print_method(self, tpl, *args, **kwargs):
if self.verbosity >= level:
print prefix, tpl.format(*args, **kwargs)
return _print_method


class TestRunner(object):
def __init__(self, verbosity=0, one_failure=False, fail_into_pdb=False):
self.verbosity = verbosity
self.one_failure = one_failure
self.fail_into_pdb = fail_into_pdb

self.failures = 0
self.successes = 0

print1 = _print_level(1, '||')
print2 = _print_level(2, ';;')
print3 = _print_level(3, ',,')
print4 = _print_level(4, '. ')

def main(self):
cases_dir = join(dirname(__file__), 'cases')
for testfile in TESTFILES:
self.print2('Running Testfile: {0}', testfile)
self.print2('=' * 80)
with open(join(cases_dir, testfile), 'r') as tf:
self.test_document(json.load(tf))
self.finish()

def test_document(self, test_doc):
for testname, testdef in sorted(test_doc.iteritems()):
fails, succeeds = 0, 0
self.print2('{0}:', testname)
variables = testdef['variables']
testcases = testdef['testcases']
for major_num, (template, inputs) in enumerate(testcases, 1):
if not isinstance(inputs, list):
# Correct for multiple 'expected'
inputs = [inputs]
for minor_num, to_match in enumerate(inputs, 1):
self.print3(' Case # {0}.{1}', major_num, minor_num)
if not self.test(variables, template, to_match):
fails += 1
self.failures += 1
if self.one_failure:
self.finish()
else:
self.successes += 1
succeeds += 1
self.print2(" {0} Successes, {1} Failures", succeeds, fails)

def finish(self, final=False):
self.print1('{0} tests succeeded.', self.successes)
self.print1('{0} tests failed', self.failures)
sys.exit(self.failures)

def test(self, variables, template, to_match):
# Normalize url escaping since mixed quoting is not what the
# regex will be used for
self.print4("'{0}' matching '{1}'", template, to_match)
try:
testvars = uritemplate.variables(template)
regex = uritemplate.as_regex(template)
except Exception as e:
if self.fail_into_pdb:
pdb.post_mortem()
self.print4(traceback.format_exc())
self.print3(' Failed with: ' + repr(e))
return False
self.print4('Regex is: {0}', regex.pattern)

try:
matchvars = regex.match(to_match).groupdict()
except AttributeError:
if self.fail_into_pdb:
pdb.post_mortem()
self.print3(' Failed with: Regex did not match expected')
return False

for var in testvars:
match_var = matchvars.get(var)
if not self.matches(match_var, variables[var], var):
if self.fail_into_pdb:
pdb.set_trace()
return False
return True

def matches(self, match_var, expect_var, varname):
possible_correct = correct_answers(expect_var)
for answer in possible_correct:
if answer.startswith(match_var):
result = True
break
else:
result = False
self.print3_expectation(match_var, possible_correct, varname)
return result

def print3_expectation(self, match_var, answers, var):
if len(set(answers)) == 1:
outstring = " For '{var}' expected '{varU}',"\
" got '{match_var}'"
else:
outstring = " For '{var}' expected one of {answers!r}"\
", got '{match_var}'"
self.print3(
outstring, var=var, match_var=match_var, answers=answers)


if __name__ == "__main__":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bit of a strange way to run the tests? Does the project not have a test runner?

verbosity, one_failure = 0, False
if '-1' in sys.argv:
verbosity = 1
if '-2' in sys.argv:
verbosity = 2
if '-3' in sys.argv:
verbosity = 3
if '-4' in sys.argv:
verbosity = 4
TR = TestRunner(
verbosity=verbosity,
one_failure='-x' in sys.argv,
fail_into_pdb='-pdb' in sys.argv,
)
TR.main()

2 changes: 1 addition & 1 deletion uritemplate/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

from uritemplate import expand, variables
from uritemplate import expand, variables, as_regex

__version__ = "0.5.2"
111 changes: 111 additions & 0 deletions uritemplate/uritemplate.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,117 @@ def variables(template):
return vars


class Regexifier(object):

PERCENT_ENCODED = r'%[a-zA-Z0-9][a-zA-Z0-9]'
# Add ',' to unreserved since a single variable can be expanded to
# multiple values if a composite value is passed in
UNRESERVED = r'''[a-zA-Z0-9_.~\-,]'''
RESERVED = r'''[:/?#\[\]@!$&'()*+,;=]'''

@classmethod
def capture(cls, varname, op=None, joiner=',', cap=None, explode=False):
'''Returns a string for a variable capture regex for the given
operation'''
if op in ('+', '#'):
allowed = '|'.join([cls.UNRESERVED.replace(joiner, ''),
cls.RESERVED.replace(joiner, ''),
cls.PERCENT_ENCODED])
else:
allowed = '|'.join([cls.UNRESERVED.replace(op, ''),
cls.PERCENT_ENCODED])

return r'''{key}(?P<{varname}>(?:{allowed}){repeat})'''.format(
key=varname + '=?' if cls.is_keyval(op) else '',
varname=varname,
allowed=allowed,
repeat = '{0,' + cap + '}' if cap else '*'
)

@classmethod
def is_keyval(cls, op):
return op in (';', '?', '&')

@classmethod
def escape_or_substitute(cls, section):
'''If the section passed in is a variable expression, replace
it with a variable capture group. Otherwise, escape it so
characters don't interfere with the final regex'''
match = TEMPLATE.match(section)
if not match:
return re.escape(section)
else:
return cls.process_expression(match.group(1))

@classmethod
def process_expression(cls, expression):
'''Breaks a variable expression into its parts and creates the
proper regex for them'''
if expression[0] in OPERATOR:
op = expression[0]
expression = expression[1:]
else:
op = ''
expressions = cls.split_vars(expression)
joiner = cls.joiner_for(op)
prefix = cls.prefix_for(op)
pieces = (cls.capture(varname, op, joiner, cap, explode)
for varname, cap, explode in expressions)
# Need to escape joiner since some of them have regex meaning
return prefix + ('\\' + joiner).join(pieces)

@classmethod
def split_vars(cls, expression):
vars = expression.split(',')
def cap_or_none(var):
explode = False
try:
var, cap = var.split(':')
except Exception:
var, cap = var, None
if var.endswith('*'):
explode = True
var = var[:-1]
return var, cap, explode
return (cap_or_none(var) for var in vars)

@classmethod
def joiner_for(cls, op):
if op in ('?', '&'):
return '&'
elif op in ('.', '/', ';'):
return op
else:
return ','

@classmethod
def prefix_for(cls, op):
if op in ('#', '?', '.', '/', ';', '&'):
return '\\' + op
else:
return ''

@classmethod
def explode(cls, string):
'''Explodes a string based on variable expressions.'''
var_expr = re.compile(r'({[^\\}]+})')
return var_expr.split(string)

@classmethod
def regexify(cls, uritemplate):
'''Converts the given uritemplate and converts it to a regex
with named capture groups for each template variable'''
exploded = cls.explode(uritemplate)
return re.compile(''.join(
cls.escape_or_substitute(section) for section in exploded))


def as_regex(uritemplate):
'''Returns a regex matching the given template, with capture
groups named after the template variable names'''
return Regexifier.regexify(uritemplate)


def _quote(value, safe, prefix=None):
if prefix is not None:
return quote(str(value)[:prefix], safe)
Expand Down