semanticarts · bpelakh · Nov 16, 2021 · Nov 16, 2021 · Nov 17, 2021 · Nov 18, 2021
diff --git a/.gitignore b/.gitignore
@@ -14,3 +14,4 @@ __pycache__/
 dist/
 onto_tool.egg-info/
 tests-output/
+.vscode
diff --git a/onto_tool/bundle_schema.yaml b/onto_tool/bundle_schema.yaml
@@ -185,6 +185,7 @@ properties:
                   - ask
                   - select
                   - construct
+                  - tool
             allOf:
               - if:
                   properties:
@@ -307,9 +308,45 @@ properties:
                           $ref: '#/definitions/non_empty_string'
                         includes:
                           $ref: '#/definitions/include_list'
+                      required:
+                        - source
                   required:
                     - source
                     - shapes
+              - if:
+                  properties:
+                    type:
+                      const: tool
+                then:
+                  properties:
+                    tool:
+                      $ref: '#/definitions/non_empty_string'
+                    source:
+                      $ref: '#/definitions/non_empty_string'
+                    includes:
+                      $ref: '#/definitions/include_list'
+                    redirectOutput:
+                      type: boolean
+                    target:
+                      $ref: '#/definitions/non_empty_string'
+                    failOn:
+                      type: string
+                      enum:
+                        - "warning"
+                        - "violation"
+                    shapes:
+                      type: object
+                      properties:
+                        source:
+                          $ref: '#/definitions/non_empty_string'
+                        includes:
+                          $ref: '#/definitions/include_list'
+                      required:
+                        - source
+                  required:
+                    - tool
+                    - source
+                    #- shapes
             required:
               - type
         - if:

diff --git a/onto_tool/onto_tool.py b/onto_tool/onto_tool.py
@@ -3,9 +3,11 @@
 import logging
 import os
 import sys
+import traceback
 from os.path import join, isdir, isfile, basename, splitext
 from glob import glob
 import re
+import tempfile
 import subprocess
 import shutil
 import gzip
@@ -555,7 +557,8 @@ def __parse_update_query__(query_text):
     return parsed_query
 
 
-def __bundle_transform__(action, tools, variables):
+@register(name="transform")
+def __bundle_transform__(action, variables, tools):
     logging.debug('Transform %s', action)
     tool = next((t for t in tools if t['name'] == action['tool']), None)
     if not tool:
@@ -571,7 +574,7 @@ def __bundle_transform__(action, tools, variables):
 
 
 @register(name="definedBy")
-def __bundle_defined_by__(action, variables):
+def __bundle_defined_by__(action, variables, **kwargs):
     logging.debug('Add definedBy %s', action)
     for in_out in __bundle_file_list(action, variables):
         g = Graph()
@@ -599,7 +602,7 @@ def __bundle_defined_by__(action, variables):
 
 
 @register(name="copy")
-def __bundle_copy__(action, variables):
+def __bundle_copy__(action, variables, **kwargs):
     logging.debug('Copy %s', action)
     for in_out in __bundle_file_list(action, variables):
         if isfile(in_out['inputFile']):
@@ -613,7 +616,7 @@ def __bundle_copy__(action, variables):
 
 
 @register(name="move")
-def __bundle_move__(action, variables):
+def __bundle_move__(action, variables, **kwargs):
     logging.debug('Move %s', action)
     for in_out in __bundle_file_list(action, variables):
         if isfile(in_out['inputFile']):
@@ -627,7 +630,7 @@ def __bundle_move__(action, variables):
 
 
 @register(name="markdown")
-def __bundle_markdown__(action, variables):
+def __bundle_markdown__(action, variables, **kwargs):
     logging.debug('Markdown %s', action)
     conv = Markdown2HTML()
     filepath_in = action['source'].format(**variables)
@@ -640,7 +643,7 @@ def __bundle_markdown__(action, variables):
 
 
 @register(name="graph")
-def __bundle_graph__(action, variables):
+def __bundle_graph__(action, variables, **kwargs):
     logging.debug('Graph %s', action)
     documentation = action['target'].format(**variables)
     version = action['version'].format(**variables)
@@ -808,7 +811,7 @@ def __bundle_endpoint_sparql__(action, variables, output, queries):
 
 
 @register(name="sparql")
-def __bundle_sparql__(action, variables):
+def __bundle_sparql__(action, variables, **kwargs):
     logging.debug('SPARQL %s', action)
     output = action['target'].format(**variables) if 'target' in action else None
     queries = __build_query_list__(action, variables)
@@ -839,7 +842,7 @@ def __build_graph_from_inputs__(action, variables):
 
 
 @register(name="verify")
-def __bundle_verify__(action, variables):
+def __bundle_verify__(action, variables, tools):
     logging.debug('Verify %s', action)
     if action['type'] == 'select':
         __verify_select__(action, variables)
@@ -849,6 +852,8 @@ def __bundle_verify__(action, variables):
         __verify_construct__(action, variables)
     elif action['type'] == 'shacl':
         __verify_shacl__(action, variables)
+    elif action['type'] == 'tool':
+        __verify_tool__(action, variables, tools)
 
 
 def __verify_select__(action, variables):
@@ -1036,6 +1041,101 @@ def __verify_shacl__(action, variables):
             exit(1)
 
 
+def __verify_tool__(action, variables, tools):
+    with tempfile.TemporaryDirectory() as tempdir:
+        tool = next((t for t in tools if t['name'] == action['tool']), None)
+        if not tool:
+            raise Exception('Missing tool ', action['tool'])
+
+        data_graph = __build_graph_from_inputs__(action, variables)
+        data_file = os.path.join(tempdir, "data.ttl")
+        data_graph.serialize(data_file, format="turtle", encoding="utf-8")
+        if "shapes" in action:
+            shape_graph = __build_graph_from_inputs__(action['shapes'], variables)
+            shape_file = os.path.join(tempdir, "shapes.ttl")
+            shape_graph.serialize(shape_file, format="turtle", encoding="utf-8")
+            logging.debug("Shape graph has %s triples", sum(1 for _ in shape_graph))
+        else:
+            shape_file = None
+        logging.debug("Data graph has %s triples", sum(1 for _ in data_graph))
+
+        arguments = []
+        if tool['type'] == 'Java':
+            arguments = ["java", "-jar", tool['jar']] + tool['arguments']
+        elif tool['type'] == 'shell':
+            arguments = tool['arguments']
+        else:
+            raise Exception('Unsupported tool type for verify', tool['type'])
+
+        outputPath, interpreted_args = __run_verify_tool__(action, variables, data_file, shape_file, arguments)
+
+        if outputPath:
+            __report_violations_from_tool__(action, outputPath, interpreted_args)
+
+
+def __run_verify_tool__(action, variables, data_file, shape_file, arguments):
+    invocation_vars = VarDict()
+    invocation_vars.update(variables)
+    invocation_vars.update({
+            'dataFile': data_file,
+            'shapeFile': shape_file
+        })
+    outputPath = None
+    if action.get('target'):
+        outputPath = action['target'].format(**variables)
+        invocation_vars.update({'outputFile': outputPath})
+    interpreted_args = [arg.format(**invocation_vars) for arg in arguments]
+    logging.debug('Running %s', interpreted_args)
+    try:
+        status = subprocess.run(interpreted_args, capture_output=True)
+    except Exception:
+        exc_type, exc_value, exc_traceback = sys.exc_info()
+        traceback.print_tb(exc_traceback, limit=30, file=sys.stdout)
+        print(exc_value, file=sys.stdout)
+        print(exc_type, file=sys.stdout)
+        sys.exit()
+
+    if status.stdout:
+        logging.debug('stdout for %s is %s', action, status.stdout)
+        if __boolean_option__(action, 'redirectOutput', variables):
+            if not outputPath:
+                raise Exception('Must provide target when redirecting output', action)
+            with open(outputPath, 'wb') as redirected:
+                redirected.write(status.stdout)
+    if status.stderr:
+        logging.debug('stderr for %s is %s', action, status.stderr)
+    if status.returncode != 0:
+        logging.error("Tool %s exited with %d: %s", interpreted_args, status.returncode, status.stderr)
+        exit(1)
+    return outputPath, interpreted_args
+
+
+def __report_violations_from_tool__(action, outputPath, interpreted_args):
+    if not os.path.isfile(outputPath):
+        logging.error("Tool %s exited without generating expected output", interpreted_args)
+        exit(1)
+
+    results_graph = Graph()
+    results_graph.parse(outputPath, format=guess_format(outputPath))
+
+    not_conforms = results_graph.query("""
+        prefix sh: <http://www.w3.org/ns/shacl#>
+        ASK { ?report a sh:ValidationReport; sh:conforms false . }
+    """)
+    if not_conforms.askAnswer:
+        result_table, count, violation = __format_validation_results__(results_graph)
+        fail_on_warning = 'failOn' in action and action['failOn'] == 'warning'
+        if not count:
+            logging.warning("Tool verification did not produce a well-formed ViolationReport:\n%s", result_table)
+        else:
+            if violation or fail_on_warning:
+                logging.error("Tool verification produced non-empty results:\n%s", result_table)
+            else:
+                logging.warning("Tool verification produced non-empty results:\n%s", result_table)
+        if fail_on_warning or violation:
+            exit(1)
+
+
 def __format_validation_results__(results_graph: Graph) -> Tuple[str, int, bool]:
     """Format validation results as text table.
 
@@ -1144,7 +1244,7 @@ def __boolean_option__(action, key, variables, default=False):
 
 
 @register(name="export")
-def __bundle_export__(action, variables):
+def __bundle_export__(action, variables, **kwargs):
     logging.debug('Export %s', action)
     if __boolean_option__(action, 'compress', variables):
         output = gzip.open(action['target'].format(**variables), 'wt', encoding="utf-8")
@@ -1217,14 +1317,12 @@ def bundle_ontology(command_line_variables, bundle_path):
     for action in bundle['actions']:
         if 'message' in action:
             logging.info(action['message'].format(**substituted))
-        if action['action'] == 'transform':
-            __bundle_transform__(action, bundle['tools'], substituted)
-        elif action['action'] == 'mkdir':
+        if action['action'] == 'mkdir':
             path = action['directory'].format(**substituted)
             if not isdir(path):
                 os.makedirs(path)
         elif action['action'] in BUNDLE_ACTIONS:
-            BUNDLE_ACTIONS[action['action']](action, substituted)
+            BUNDLE_ACTIONS[action['action']](action, substituted, tools=bundle.get('tools', {}))
         else:
             raise Exception('Unknown action ' + str(action))
 

diff --git a/tests/bundle/test_verify.py b/tests/bundle/test_verify.py
@@ -2,6 +2,8 @@
 import csv
 from os.path import isfile
 from pytest import raises
+import pytest
+import sys
 import re
 from rdflib import Graph
 from rdflib.namespace import Namespace, RDF
@@ -133,3 +135,41 @@ def test_verify_shacl(caplog):
     sh = Namespace('http://www.w3.org/ns/shacl#')
     errors = [validation_graph.subjects(RDF.type, sh.ValidationResult)]
     assert len(errors) == 1
+
+@pytest.mark.skipif(sys.platform == 'win32', reason="No /bin/cp")
+def test_verify_tool_shell(caplog):
+    with raises(SystemExit) as wrapped_exit:
+        onto_tool.main([
+            'bundle', '-v', 'output', 'tests-output/bundle', 'tests/bundle/verify_tool.yaml'
+        ])
+    assert wrapped_exit.type == SystemExit
+    assert wrapped_exit.value.code == 1
+
+    logs = caplog.text
+    assert 'Tool verification' in logs
+    assert 'Fake Violation' in logs
+
+    validation_graph = Graph()
+    validation_graph.parse('tests-output/bundle/verify_tool_copy.ttl', format='turtle')
+    sh = Namespace('http://www.w3.org/ns/shacl#')
+    errors = [validation_graph.subjects(RDF.type, sh.ValidationResult)]
+    assert len(errors) == 1
+
+@pytest.mark.skipif(sys.platform == 'win32', reason="No /bin/cp")
-@pytest.mark.skipif(sys.platform == 'win32', reason="No /bin/cp")
+@pytest.mark.skipif(sys.platform == 'win32', reason="No /bin/cat")
-@pytest.mark.skipif(sys.platform == 'win32', reason="No /bin/cp")
+@pytest.mark.skipif(sys.platform == 'win32', reason="No /bin/cat")
+def test_verify_tool_shell_redirect(caplog):
+    with raises(SystemExit) as wrapped_exit:
+        onto_tool.main([
+            'bundle', '-v', 'output', 'tests-output/bundle', 'tests/bundle/verify_tool_redirect.yaml'
+        ])
+    assert wrapped_exit.type == SystemExit
+    assert wrapped_exit.value.code == 1
+
+    logs = caplog.text
+    assert 'Tool verification' in logs
+    assert 'Fake Violation' in logs
+
+    validation_graph = Graph()
+    validation_graph.parse('tests-output/bundle/verify_tool_redirect.ttl', format='turtle')
+    sh = Namespace('http://www.w3.org/ns/shacl#')
+    errors = [validation_graph.subjects(RDF.type, sh.ValidationResult)]
+    assert len(errors) == 1
diff --git a/tests/bundle/verify_fixed_error.ttl b/tests/bundle/verify_fixed_error.ttl
@@ -0,0 +1,13 @@
+@prefix : <http://example.com/> .
+@prefix sh: <http://www.w3.org/ns/shacl#> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+[] a sh:ValidationReport ;
+    sh:conforms false ;
+    sh:result [ a sh:ValidationResult ;
+            sh:focusNode :fails ;
+            sh:resultMessage "Fake Violation Report" ;
+            sh:resultPath :fake_property ;
+            sh:resultSeverity sh:Violation ;
+            sh:value :fake_value ] .
+
diff --git a/tests/bundle/verify_tool.yaml b/tests/bundle/verify_tool.yaml
@@ -0,0 +1,21 @@
+bundle: test
+variables:
+  input: "tests/bundle"
+  output: "tests/bundle"
+tools:
+  - name: 'copy_results'
+    type: 'shell'
+    arguments:
+      - '/bin/cp'
+      - '{input}/verify_fixed_error.ttl'
+      - '{outputFile}'
+actions:
+  - action: 'verify'
+    type: 'tool'
+    tool: 'copy_results'
+    source: '{input}'
+    includes:
+      - 'verify_data.ttl'
+    shapes:
+      source: '{input}/verify_shacl_shapes.ttl'
+    target: '{output}/verify_tool_copy.ttl'
diff --git a/tests/bundle/verify_tool_redirect.yaml b/tests/bundle/verify_tool_redirect.yaml
@@ -0,0 +1,21 @@
+bundle: test
+variables:
+  input: "tests/bundle"
+  output: "tests/bundle"
+tools:
+  - name: 'echo_results'
+    type: 'shell'
+    arguments:
+      - '/bin/cat'
+      - '{input}/verify_fixed_error.ttl'
+actions:
+  - action: 'verify'
+    type: 'tool'
+    tool: 'echo_results'
+    source: '{input}'
+    includes:
+      - 'verify_data.ttl'
+    shapes:
+      source: '{input}/verify_shacl_shapes.ttl'
+    redirectOutput: true
+    target: '{output}/verify_tool_redirect.ttl'
-Original file line number
+Diff line change
@@ Expand Up / @@ -14,3 +14,4 @@ __pycache__/ @@
     dist/
     onto_tool.egg-info/
     tests-output/
+    .vscode