From 25e170a8171c5c725ffd7c30542fd486c9222210 Mon Sep 17 00:00:00 2001 From: muddymudskipper Date: Mon, 1 Jul 2024 17:37:38 +0100 Subject: [PATCH] parameter settings in prov data --- CHANGELOG.md | 2 ++ cmem_plugin_reason/plugin_reason.py | 9 ++++++--- cmem_plugin_reason/plugin_validate.py | 29 +++++++-------------------- cmem_plugin_reason/utils.py | 27 ++++++++++++++++++++++++- tests/test_elk.ttl | 3 +-- tests/test_emr.ttl | 3 +-- tests/test_hermit.ttl | 3 +-- tests/test_jfact.ttl | 3 +-- tests/test_structural.ttl | 3 +-- tests/test_validate_output.ttl | 3 +-- tests/test_whelk.ttl | 3 +-- 11 files changed, 48 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ee41293..8a74e3f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this p ## [Unreleased] + ### Fixed - `prov:generatedBy` in output graphs now refers to a plugin IRI instead of a literal @@ -13,6 +14,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this p ### Changed - Keep original output ("No explanations found.") if no inconsistencies found with Validate plugin +- Provenance data in output graphs now includes plugin parameter settings ## [1.0.0beta1] 2024-07-01 diff --git a/cmem_plugin_reason/plugin_reason.py b/cmem_plugin_reason/plugin_reason.py index 969c81f..f9f2578 100644 --- a/cmem_plugin_reason/plugin_reason.py +++ b/cmem_plugin_reason/plugin_reason.py @@ -25,12 +25,11 @@ ROBOT, create_xml_catalog_file, get_graphs_tree, + post_provenance, remove_temp, send_result, ) -PLUGIN_IRI = "https://plugin.eccenca.com/cmem-plugin-reason/reason" - @Plugin( label="Reason", @@ -268,7 +267,6 @@ def reason(self, graphs: dict) -> None: f"--language-annotation rdfs:comment " f'"Reasoning result set of <{self.data_graph_iri}> and ' f'<{self.ontology_graph_iri}>" en ' - f'--link-annotation prov:wasGeneratedBy "{PLUGIN_IRI}/{self.reasoner}" ' f'--link-annotation prov:wasDerivedFrom "{self.data_graph_iri}" ' f"--link-annotation prov:wasDerivedFrom " f'"{self.ontology_graph_iri}" ' @@ -292,4 +290,9 @@ def execute(self, inputs: tuple, context: ExecutionContext) -> None: # noqa: AR self.reason(graphs) setup_cmempy_user_access(context.user) send_result(self.result_graph_iri, Path(self.temp) / "result.ttl") + post_provenance( + self.result_graph_iri, + "cmem_plugin_reason-plugin_reason-ReasonPlugin", + context, + ) remove_temp(self) diff --git a/cmem_plugin_reason/plugin_validate.py b/cmem_plugin_reason/plugin_validate.py index c158e9f..8920676 100644 --- a/cmem_plugin_reason/plugin_validate.py +++ b/cmem_plugin_reason/plugin_validate.py @@ -32,12 +32,11 @@ ROBOT, create_xml_catalog_file, get_graphs_tree, + post_provenance, remove_temp, send_result, ) -PLUGIN_IRI = "https://plugin.eccenca.com/cmem-plugin-reason/validate" - @Plugin( label="Validate", @@ -152,7 +151,6 @@ def validate(self, graphs: dict) -> None: f'--language-annotation rdfs:label "Ontology Validation Result {utctime}" en ' f"--language-annotation rdfs:comment " f'"Ontology validation of <{self.ontology_graph_iri}>" en ' - f'--link-annotation prov:wasGeneratedBy "{PLUGIN_IRI}/{self.reasoner}" ' f'--link-annotation prov:wasDerivedFrom "{self.ontology_graph_iri}" ' f'--typed-annotation dc:created "{utctime}" xsd:dateTime ' f'--output "{self.temp}/output.ttl"' @@ -175,25 +173,6 @@ def make_resource(self, context: ExecutionContext) -> None: replace=True, ) - def post_provenance(self, plugin_id: str, context: ExecutionContext) -> None: - """TODO: Post provenance with query""" - plugin_iri = f"http://dataintegration.eccenca.com/{context.task.project_id()}/{context.task.task_id()}" - project_graph = f"http://di.eccenca.com/project/{context.task.project_id()}" - construct = f""" - PREFIX dif: - CONSTRUCT {{ - GRAPH <{self.output_graph_iri}> {{ - <{self.output_graph_iri}> prov:generatedBy <{plugin_iri}> . - <{plugin_iri}> a . - <{plugin_iri}> ?p ?o . - }} - }} - FROM <{project_graph}> - WHERE {{ - <{plugin_iri}> ?p ?o . - FILTER((STRSTARTS(STR(?p), 'https://vocab.eccenca.com/di/functions/param_')) - }}""" # noqa: F841 - def execute(self, inputs: tuple, context: ExecutionContext) -> Entities | None: # noqa: ARG002 """Run the workflow operator.""" setup_cmempy_user_access(context.user) @@ -205,6 +184,12 @@ def execute(self, inputs: tuple, context: ExecutionContext) -> Entities | None: if self.produce_graph: setup_cmempy_user_access(context.user) send_result(self.output_graph_iri, Path(self.temp) / "output.ttl") + post_provenance( + self.output_graph_iri, + "cmem_plugin_reason-plugin_validate-ValidatePlugin", + context, + ) + if self.write_md: setup_cmempy_user_access(context.user) self.make_resource(context) diff --git a/cmem_plugin_reason/utils.py b/cmem_plugin_reason/utils.py index 627b637..cb7561c 100644 --- a/cmem_plugin_reason/utils.py +++ b/cmem_plugin_reason/utils.py @@ -8,10 +8,11 @@ from xml.etree.ElementTree import Element, SubElement, tostring from cmem.cmempy.dp.proxy.graph import get_graph_import_tree, post_streamed +from cmem.cmempy.dp.proxy.update import post from cmem_plugin_base.dataintegration.description import PluginParameter from cmem_plugin_base.dataintegration.parameter.choice import ChoiceParameterType from cmem_plugin_base.dataintegration.parameter.graph import GraphParameterType -from cmem_plugin_base.dataintegration.plugins import WorkflowPlugin +from cmem_plugin_base.dataintegration.plugins import ExecutionContext, WorkflowPlugin from cmem_plugin_base.dataintegration.types import IntParameterType from defusedxml import minidom @@ -115,3 +116,27 @@ def remove_temp(plugin: WorkflowPlugin) -> None: rmtree(plugin.temp) except (OSError, FileNotFoundError) as err: plugin.log.warning(f"Cannot remove directory {plugin.temp} ({err})") + + +def post_provenance(graph: str, plugin_id: str, context: ExecutionContext) -> None: + """Insert provenance""" + plugin_iri = ( + f"http://dataintegration.eccenca.com/{context.task.project_id()}/{context.task.task_id()}" + ) + project_graph = f"http://di.eccenca.com/project/{context.task.project_id()}" + query = f""" + INSERT {{ + GRAPH <{graph}> {{ + <{graph}> <{plugin_iri}> . + <{plugin_iri}> a . + <{plugin_iri}> ?p ?o . + }} + }} + WHERE {{ + GRAPH <{project_graph}> {{ + <{plugin_iri}> ?p ?o . + FILTER(STRSTARTS(STR(?p), "https://vocab.eccenca.com/di/functions/param_")) + }} + }}""" + + post(query=query) diff --git a/tests/test_elk.ttl b/tests/test_elk.ttl index 21dc22b..0bac834 100644 --- a/tests/test_elk.ttl +++ b/tests/test_elk.ttl @@ -12,8 +12,7 @@ owl:imports vocab: ; rdfs:comment "Reasoning result set of and "@en ; prov:wasDerivedFrom - , ; - prov:wasGeneratedBy . + , . ################################################################# # Individuals diff --git a/tests/test_emr.ttl b/tests/test_emr.ttl index 22130a0..2ddd71a 100644 --- a/tests/test_emr.ttl +++ b/tests/test_emr.ttl @@ -12,8 +12,7 @@ owl:imports vocab: ; rdfs:comment "Reasoning result set of and "@en ; prov:wasDerivedFrom - , ; - prov:wasGeneratedBy . + , . ################################################################# # Individuals diff --git a/tests/test_hermit.ttl b/tests/test_hermit.ttl index 376fcf9..a81aaf7 100644 --- a/tests/test_hermit.ttl +++ b/tests/test_hermit.ttl @@ -12,8 +12,7 @@ owl:imports vocab: ; rdfs:comment "Reasoning result set of and "@en ; prov:wasDerivedFrom - , ; - prov:wasGeneratedBy . + , . ################################################################# # Individuals diff --git a/tests/test_jfact.ttl b/tests/test_jfact.ttl index 6f8a857..a81aaf7 100644 --- a/tests/test_jfact.ttl +++ b/tests/test_jfact.ttl @@ -12,8 +12,7 @@ owl:imports vocab: ; rdfs:comment "Reasoning result set of and "@en ; prov:wasDerivedFrom - , ; - prov:wasGeneratedBy . + , . ################################################################# # Individuals diff --git a/tests/test_structural.ttl b/tests/test_structural.ttl index ce025bf..587f20b 100644 --- a/tests/test_structural.ttl +++ b/tests/test_structural.ttl @@ -12,8 +12,7 @@ owl:imports vocab: ; rdfs:comment "Reasoning result set of and "@en ; prov:wasDerivedFrom - , ; - prov:wasGeneratedBy . + , . ################################################################# # Individuals diff --git a/tests/test_validate_output.ttl b/tests/test_validate_output.ttl index 4b777ad..b9b5886 100644 --- a/tests/test_validate_output.ttl +++ b/tests/test_validate_output.ttl @@ -7,8 +7,7 @@ a owl:Ontology ; rdfs:comment "Ontology validation of "@en ; - prov:wasDerivedFrom ; - prov:wasGeneratedBy . + prov:wasDerivedFrom . a owl:NamedIndividual, . diff --git a/tests/test_whelk.ttl b/tests/test_whelk.ttl index eccd0d9..2247138 100644 --- a/tests/test_whelk.ttl +++ b/tests/test_whelk.ttl @@ -12,8 +12,7 @@ owl:imports vocab: ; rdfs:comment "Reasoning result set of and "@en ; prov:wasDerivedFrom - , ; - prov:wasGeneratedBy . + , . ################################################################# # Individuals