Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Verify via tool #92

Open
wants to merge 8 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ __pycache__/
dist/
onto_tool.egg-info/
tests-output/
.vscode
37 changes: 37 additions & 0 deletions onto_tool/bundle_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ properties:
- ask
- select
- construct
- tool
allOf:
- if:
properties:
Expand Down Expand Up @@ -307,9 +308,45 @@ properties:
$ref: '#/definitions/non_empty_string'
includes:
$ref: '#/definitions/include_list'
required:
- source
required:
- source
- shapes
- if:
properties:
type:
const: tool
then:
properties:
tool:
$ref: '#/definitions/non_empty_string'
source:
$ref: '#/definitions/non_empty_string'
includes:
$ref: '#/definitions/include_list'
redirectOutput:
type: boolean
target:
$ref: '#/definitions/non_empty_string'
failOn:
type: string
enum:
- "warning"
- "violation"
shapes:
type: object
properties:
source:
$ref: '#/definitions/non_empty_string'
includes:
$ref: '#/definitions/include_list'
required:
- source
required:
- tool
- source
#- shapes
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pwin we should remove this rather than commenting it out.

required:
- type
- if:
Expand Down
124 changes: 111 additions & 13 deletions onto_tool/onto_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import logging
import os
import sys
import traceback
from os.path import join, isdir, isfile, basename, splitext
from glob import glob
import re
import tempfile
import subprocess
import shutil
import gzip
Expand Down Expand Up @@ -555,7 +557,8 @@ def __parse_update_query__(query_text):
return parsed_query


def __bundle_transform__(action, tools, variables):
@register(name="transform")
def __bundle_transform__(action, variables, tools):
logging.debug('Transform %s', action)
tool = next((t for t in tools if t['name'] == action['tool']), None)
if not tool:
Expand All @@ -571,7 +574,7 @@ def __bundle_transform__(action, tools, variables):


@register(name="definedBy")
def __bundle_defined_by__(action, variables):
def __bundle_defined_by__(action, variables, **kwargs):
logging.debug('Add definedBy %s', action)
for in_out in __bundle_file_list(action, variables):
g = Graph()
Expand Down Expand Up @@ -599,7 +602,7 @@ def __bundle_defined_by__(action, variables):


@register(name="copy")
def __bundle_copy__(action, variables):
def __bundle_copy__(action, variables, **kwargs):
logging.debug('Copy %s', action)
for in_out in __bundle_file_list(action, variables):
if isfile(in_out['inputFile']):
Expand All @@ -613,7 +616,7 @@ def __bundle_copy__(action, variables):


@register(name="move")
def __bundle_move__(action, variables):
def __bundle_move__(action, variables, **kwargs):
logging.debug('Move %s', action)
for in_out in __bundle_file_list(action, variables):
if isfile(in_out['inputFile']):
Expand All @@ -627,7 +630,7 @@ def __bundle_move__(action, variables):


@register(name="markdown")
def __bundle_markdown__(action, variables):
def __bundle_markdown__(action, variables, **kwargs):
logging.debug('Markdown %s', action)
conv = Markdown2HTML()
filepath_in = action['source'].format(**variables)
Expand All @@ -640,7 +643,7 @@ def __bundle_markdown__(action, variables):


@register(name="graph")
def __bundle_graph__(action, variables):
def __bundle_graph__(action, variables, **kwargs):
logging.debug('Graph %s', action)
documentation = action['target'].format(**variables)
version = action['version'].format(**variables)
Expand Down Expand Up @@ -808,7 +811,7 @@ def __bundle_endpoint_sparql__(action, variables, output, queries):


@register(name="sparql")
def __bundle_sparql__(action, variables):
def __bundle_sparql__(action, variables, **kwargs):
logging.debug('SPARQL %s', action)
output = action['target'].format(**variables) if 'target' in action else None
queries = __build_query_list__(action, variables)
Expand Down Expand Up @@ -839,7 +842,7 @@ def __build_graph_from_inputs__(action, variables):


@register(name="verify")
def __bundle_verify__(action, variables):
def __bundle_verify__(action, variables, tools):
logging.debug('Verify %s', action)
if action['type'] == 'select':
__verify_select__(action, variables)
Expand All @@ -849,6 +852,8 @@ def __bundle_verify__(action, variables):
__verify_construct__(action, variables)
elif action['type'] == 'shacl':
__verify_shacl__(action, variables)
elif action['type'] == 'tool':
__verify_tool__(action, variables, tools)


def __verify_select__(action, variables):
Expand Down Expand Up @@ -1036,6 +1041,101 @@ def __verify_shacl__(action, variables):
exit(1)


def __verify_tool__(action, variables, tools):
with tempfile.TemporaryDirectory() as tempdir:
tool = next((t for t in tools if t['name'] == action['tool']), None)
if not tool:
raise Exception('Missing tool ', action['tool'])

data_graph = __build_graph_from_inputs__(action, variables)
data_file = os.path.join(tempdir, "data.ttl")
data_graph.serialize(data_file, format="turtle", encoding="utf-8")
if "shapes" in action:
shape_graph = __build_graph_from_inputs__(action['shapes'], variables)
shape_file = os.path.join(tempdir, "shapes.ttl")
shape_graph.serialize(shape_file, format="turtle", encoding="utf-8")
logging.debug("Shape graph has %s triples", sum(1 for _ in shape_graph))
else:
shape_file = None
logging.debug("Data graph has %s triples", sum(1 for _ in data_graph))

arguments = []
if tool['type'] == 'Java':
arguments = ["java", "-jar", tool['jar']] + tool['arguments']
elif tool['type'] == 'shell':
arguments = tool['arguments']
else:
raise Exception('Unsupported tool type for verify', tool['type'])

outputPath, interpreted_args = __run_verify_tool__(action, variables, data_file, shape_file, arguments)

if outputPath:
__report_violations_from_tool__(action, outputPath, interpreted_args)


def __run_verify_tool__(action, variables, data_file, shape_file, arguments):
invocation_vars = VarDict()
invocation_vars.update(variables)
invocation_vars.update({
'dataFile': data_file,
'shapeFile': shape_file
})
outputPath = None
if action.get('target'):
outputPath = action['target'].format(**variables)
invocation_vars.update({'outputFile': outputPath})
interpreted_args = [arg.format(**invocation_vars) for arg in arguments]
logging.debug('Running %s', interpreted_args)
try:
status = subprocess.run(interpreted_args, capture_output=True)
except Exception:
exc_type, exc_value, exc_traceback = sys.exc_info()
traceback.print_tb(exc_traceback, limit=30, file=sys.stdout)
print(exc_value, file=sys.stdout)
print(exc_type, file=sys.stdout)
sys.exit()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need to print the traceback (which should be for internal failures) - if the user provided a bad path for a tool, that should be reported as a regular issue, using logging.error(), we should not have print() statements in the code. Also, a sys.exit(1) should be used to indicate an error termination.


if status.stdout:
logging.debug('stdout for %s is %s', action, status.stdout)
if __boolean_option__(action, 'redirectOutput', variables):
if not outputPath:
raise Exception('Must provide target when redirecting output', action)
with open(outputPath, 'wb') as redirected:
redirected.write(status.stdout)
if status.stderr:
logging.debug('stderr for %s is %s', action, status.stderr)
if status.returncode != 0:
logging.error("Tool %s exited with %d: %s", interpreted_args, status.returncode, status.stderr)
exit(1)
return outputPath, interpreted_args


def __report_violations_from_tool__(action, outputPath, interpreted_args):
if not os.path.isfile(outputPath):
logging.error("Tool %s exited without generating expected output", interpreted_args)
exit(1)

results_graph = Graph()
results_graph.parse(outputPath, format=guess_format(outputPath))

not_conforms = results_graph.query("""
prefix sh: <http://www.w3.org/ns/shacl#>
ASK { ?report a sh:ValidationReport; sh:conforms false . }
""")
if not_conforms.askAnswer:
result_table, count, violation = __format_validation_results__(results_graph)
fail_on_warning = 'failOn' in action and action['failOn'] == 'warning'
if not count:
logging.warning("Tool verification did not produce a well-formed ViolationReport:\n%s", result_table)
else:
if violation or fail_on_warning:
logging.error("Tool verification produced non-empty results:\n%s", result_table)
else:
logging.warning("Tool verification produced non-empty results:\n%s", result_table)
if fail_on_warning or violation:
exit(1)


def __format_validation_results__(results_graph: Graph) -> Tuple[str, int, bool]:
"""Format validation results as text table.

Expand Down Expand Up @@ -1144,7 +1244,7 @@ def __boolean_option__(action, key, variables, default=False):


@register(name="export")
def __bundle_export__(action, variables):
def __bundle_export__(action, variables, **kwargs):
logging.debug('Export %s', action)
if __boolean_option__(action, 'compress', variables):
output = gzip.open(action['target'].format(**variables), 'wt', encoding="utf-8")
Expand Down Expand Up @@ -1217,14 +1317,12 @@ def bundle_ontology(command_line_variables, bundle_path):
for action in bundle['actions']:
if 'message' in action:
logging.info(action['message'].format(**substituted))
if action['action'] == 'transform':
__bundle_transform__(action, bundle['tools'], substituted)
elif action['action'] == 'mkdir':
if action['action'] == 'mkdir':
path = action['directory'].format(**substituted)
if not isdir(path):
os.makedirs(path)
elif action['action'] in BUNDLE_ACTIONS:
BUNDLE_ACTIONS[action['action']](action, substituted)
BUNDLE_ACTIONS[action['action']](action, substituted, tools=bundle.get('tools', {}))
else:
raise Exception('Unknown action ' + str(action))

Expand Down
40 changes: 40 additions & 0 deletions tests/bundle/test_verify.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import csv
from os.path import isfile
from pytest import raises
import pytest
import sys
import re
from rdflib import Graph
from rdflib.namespace import Namespace, RDF
Expand Down Expand Up @@ -133,3 +135,41 @@ def test_verify_shacl(caplog):
sh = Namespace('http://www.w3.org/ns/shacl#')
errors = [validation_graph.subjects(RDF.type, sh.ValidationResult)]
assert len(errors) == 1

@pytest.mark.skipif(sys.platform == 'win32', reason="No /bin/cp")
def test_verify_tool_shell(caplog):
with raises(SystemExit) as wrapped_exit:
onto_tool.main([
'bundle', '-v', 'output', 'tests-output/bundle', 'tests/bundle/verify_tool.yaml'
])
assert wrapped_exit.type == SystemExit
assert wrapped_exit.value.code == 1

logs = caplog.text
assert 'Tool verification' in logs
assert 'Fake Violation' in logs

validation_graph = Graph()
validation_graph.parse('tests-output/bundle/verify_tool_copy.ttl', format='turtle')
sh = Namespace('http://www.w3.org/ns/shacl#')
errors = [validation_graph.subjects(RDF.type, sh.ValidationResult)]
assert len(errors) == 1

@pytest.mark.skipif(sys.platform == 'win32', reason="No /bin/cp")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
@pytest.mark.skipif(sys.platform == 'win32', reason="No /bin/cp")
@pytest.mark.skipif(sys.platform == 'win32', reason="No /bin/cat")

def test_verify_tool_shell_redirect(caplog):
with raises(SystemExit) as wrapped_exit:
onto_tool.main([
'bundle', '-v', 'output', 'tests-output/bundle', 'tests/bundle/verify_tool_redirect.yaml'
])
assert wrapped_exit.type == SystemExit
assert wrapped_exit.value.code == 1

logs = caplog.text
assert 'Tool verification' in logs
assert 'Fake Violation' in logs

validation_graph = Graph()
validation_graph.parse('tests-output/bundle/verify_tool_redirect.ttl', format='turtle')
sh = Namespace('http://www.w3.org/ns/shacl#')
errors = [validation_graph.subjects(RDF.type, sh.ValidationResult)]
assert len(errors) == 1
13 changes: 13 additions & 0 deletions tests/bundle/verify_fixed_error.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
@prefix : <http://example.com/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[] a sh:ValidationReport ;
sh:conforms false ;
sh:result [ a sh:ValidationResult ;
sh:focusNode :fails ;
sh:resultMessage "Fake Violation Report" ;
sh:resultPath :fake_property ;
sh:resultSeverity sh:Violation ;
sh:value :fake_value ] .

21 changes: 21 additions & 0 deletions tests/bundle/verify_tool.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
bundle: test
variables:
input: "tests/bundle"
output: "tests/bundle"
tools:
- name: 'copy_results'
type: 'shell'
arguments:
- '/bin/cp'
- '{input}/verify_fixed_error.ttl'
- '{outputFile}'
actions:
- action: 'verify'
type: 'tool'
tool: 'copy_results'
source: '{input}'
includes:
- 'verify_data.ttl'
shapes:
source: '{input}/verify_shacl_shapes.ttl'
target: '{output}/verify_tool_copy.ttl'
21 changes: 21 additions & 0 deletions tests/bundle/verify_tool_redirect.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
bundle: test
variables:
input: "tests/bundle"
output: "tests/bundle"
tools:
- name: 'echo_results'
type: 'shell'
arguments:
- '/bin/cat'
- '{input}/verify_fixed_error.ttl'
actions:
- action: 'verify'
type: 'tool'
tool: 'echo_results'
source: '{input}'
includes:
- 'verify_data.ttl'
shapes:
source: '{input}/verify_shacl_shapes.ttl'
redirectOutput: true
target: '{output}/verify_tool_redirect.ttl'