Skip to content

Commit

Permalink
move common functions to utils.py
Browse files Browse the repository at this point in the history
  • Loading branch information
muddymudskipper committed Jun 27, 2024
1 parent 66c559e commit e670da8
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 145 deletions.
85 changes: 11 additions & 74 deletions cmem_plugin_reason/plugin_reason.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,15 @@
"""Reasoning with robot plugin module"""

import re
import shlex
import unicodedata
from collections import OrderedDict
from collections.abc import Sequence
from datetime import UTC, datetime
from pathlib import Path
from subprocess import run
from time import time
from uuid import uuid4
from xml.etree.ElementTree import (
Element,
SubElement,
tostring,
)

import validators.url
from cmem.cmempy.dp.proxy.graph import get, get_graph_import_tree, post_streamed
from cmem.cmempy.dp.proxy.graph import get
from cmem_plugin_base.dataintegration.context import ExecutionContext
from cmem_plugin_base.dataintegration.description import Icon, Plugin, PluginParameter
from cmem_plugin_base.dataintegration.entity import Entities
Expand All @@ -26,33 +18,16 @@
from cmem_plugin_base.dataintegration.plugins import WorkflowPlugin
from cmem_plugin_base.dataintegration.types import BoolParameterType, StringParameterType
from cmem_plugin_base.dataintegration.utils import setup_cmempy_user_access
from defusedxml import minidom

from . import __path__

ROBOT = Path(__path__[0]) / "bin" / "robot.jar"
REASONERS = OrderedDict(
{
"elk": "ELK",
"emr": "Expression Materializing Reasoner",
"hermit": "HermiT",
"jfact": "JFact",
"structural": "Structural Reasoner",
"whelk": "Whelk",
}
from cmem_plugin_reason.utils import (
REASONERS,
ROBOT,
create_xml_catalog_file,
get_graphs_tree,
send_result,
)


def convert_iri_to_filename(value: str) -> str:
"""Convert IRI to filename"""
value = unicodedata.normalize("NFKD", value).encode("ascii", "ignore").decode("ascii")
value = re.sub(r"\.", "_", value.lower())
value = re.sub(r"/", "_", value.lower())
value = re.sub(r"[^\w\s-]", "", value.lower())
value = re.sub(r"[-\s]+", "-", value).strip("-_")
return value + ".nt"


@Plugin(
label="Reason",
icon=Icon(file_name="obofoundry.png", package=__package__),
Expand Down Expand Up @@ -274,23 +249,7 @@ def __init__( # noqa: PLR0913
self.ontology_graph_iri = ontology_graph_iri
self.result_graph_iri = result_graph_iri
self.reasoner = reasoner
self.temp = f"robot_{uuid4().hex}"

def create_xml_catalog_file(self, graphs: dict) -> None:
"""Create XML catalog file"""
file_name = Path(self.temp) / "catalog-v001.xml"
catalog = Element("catalog")
catalog.set("prefer", "public")
catalog.set("xmlns", "urn:oasis:names:tc:entity:xmlns:xml:catalog")
for i, graph in enumerate(graphs):
uri = SubElement(catalog, "uri")
uri.set("id", f"id{i}")
uri.set("name", graph)
uri.set("uri", graphs[graph])
reparsed = minidom.parseString(tostring(catalog, "utf-8")).toxml()
with Path(file_name).open("w", encoding="utf-8") as file:
file.truncate(0)
file.write(reparsed)
self.temp = f"reason_{uuid4().hex}"

def get_graphs(self, graphs: dict, context: ExecutionContext) -> None:
"""Get graphs from CMEM"""
Expand All @@ -306,19 +265,6 @@ def get_graphs(self, graphs: dict, context: ExecutionContext) -> None:
f"<http://www.w3.org/2002/07/owl#imports> <{self.ontology_graph_iri}> ."
)

def get_graphs_tree(self) -> dict:
"""Get graph import tree"""
graphs = {}
for graph_iri in (self.data_graph_iri, self.ontology_graph_iri):
if graph_iri not in graphs:
graphs[graph_iri] = convert_iri_to_filename(graph_iri)
tree = get_graph_import_tree(graph_iri)
for value in tree["tree"].values():
for iri in value:
if iri not in graphs:
graphs[iri] = convert_iri_to_filename(iri)
return graphs

def reason(self, graphs: dict) -> None:
"""Reason"""
axioms = " ".join(k for k, v in self.axioms.items() if v)
Expand Down Expand Up @@ -358,15 +304,6 @@ def reason(self, graphs: dict) -> None:
raise OSError(response.stderr.decode())
raise OSError("ROBOT error")

def send_result(self) -> None:
"""Send result"""
post_streamed(
self.result_graph_iri,
str(Path(self.temp) / "result.ttl"),
replace=True,
content_type="text/turtle",
)

def clean_up(self, graphs: dict) -> None:
"""Remove temporary files"""
files = ["catalog-v001.xml", "result.ttl"]
Expand All @@ -384,10 +321,10 @@ def clean_up(self, graphs: dict) -> None:
def execute(self, inputs: Sequence[Entities], context: ExecutionContext) -> None: # noqa: ARG002
"""Execute plugin"""
setup_cmempy_user_access(context.user)
graphs = self.get_graphs_tree()
graphs = get_graphs_tree((self.data_graph_iri, self.ontology_graph_iri))
self.get_graphs(graphs, context)
self.create_xml_catalog_file(graphs)
create_xml_catalog_file(self.temp, graphs)
self.reason(graphs)
setup_cmempy_user_access(context.user)
self.send_result()
send_result(self.result_graph_iri, Path(self.temp) / "result.ttl")
self.clean_up(graphs)
82 changes: 11 additions & 71 deletions cmem_plugin_reason/plugin_validate.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,15 @@
"""Random values workflow plugin module"""

import re
import shlex
import unicodedata
from collections import OrderedDict
from collections.abc import Sequence
from datetime import UTC, datetime
from pathlib import Path
from subprocess import run
from time import time
from uuid import uuid4
from xml.etree.ElementTree import (
Element,
SubElement,
tostring,
)

import validators.url
from cmem.cmempy.dp.proxy.graph import get, get_graph_import_tree, post_streamed
from cmem.cmempy.dp.proxy.graph import get
from cmem.cmempy.workspace.projects.resources.resource import create_resource
from cmem_plugin_base.dataintegration.context import ExecutionContext
from cmem_plugin_base.dataintegration.description import Icon, Plugin, PluginParameter
Expand All @@ -32,34 +24,17 @@
from cmem_plugin_base.dataintegration.plugins import WorkflowPlugin
from cmem_plugin_base.dataintegration.types import BoolParameterType, StringParameterType
from cmem_plugin_base.dataintegration.utils import setup_cmempy_user_access
from defusedxml import minidom
from pathvalidate import validate_filename

from . import __path__

ROBOT = Path(__path__[0]) / "bin" / "robot.jar"
REASONERS = OrderedDict(
{
"elk": "ELK",
"emr": "Expression Materializing Reasoner",
"hermit": "HermiT",
"jfact": "JFact",
"structural": "Structural Reasoner",
"whelk": "Whelk",
}
from cmem_plugin_reason.utils import (
REASONERS,
ROBOT,
create_xml_catalog_file,
get_graphs_tree,
send_result,
)


def convert_iri_to_filename(value: str) -> str:
"""Convert IRI to filename"""
value = unicodedata.normalize("NFKD", value).encode("ascii", "ignore").decode("ascii")
value = re.sub(r"\.", "_", value.lower())
value = re.sub(r"/", "_", value.lower())
value = re.sub(r"[^\w\s-]", "", value.lower())
value = re.sub(r"[-\s]+", "-", value).strip("-_")
return value + ".nt"


@Plugin(
label="Validate ontology consistency",
description="",
Expand Down Expand Up @@ -150,25 +125,9 @@ def __init__( # noqa: PLR0913
self.output_graph_iri = output_graph_iri
self.write_md = write_md
self.stop_at_inconsistencies = stop_at_inconsistencies
self.temp = f"robot_{uuid4().hex}"
self.temp = f"reason_{uuid4().hex}"
self.md_filename = md_filename if md_filename and write_md else "mdfile.md"

def create_xml_catalog_file(self, graphs: dict) -> None:
"""Create XML catalog file"""
file_name = Path(self.temp) / "catalog-v001.xml"
catalog = Element("catalog")
catalog.set("prefer", "public")
catalog.set("xmlns", "urn:oasis:names:tc:entity:xmlns:xml:catalog")
for i, graph in enumerate(graphs):
uri = SubElement(catalog, "uri")
uri.set("id", f"id{i}")
uri.set("name", graph)
uri.set("uri", graphs[graph])
reparsed = minidom.parseString(tostring(catalog, "utf-8")).toxml()
with Path(file_name).open("w", encoding="utf-8") as file:
file.truncate(0)
file.write(reparsed)

def get_graphs(self, graphs: dict, context: ExecutionContext) -> None:
"""Get graphs from CMEM"""
if not Path(self.temp).exists():
Expand All @@ -178,16 +137,6 @@ def get_graphs(self, graphs: dict, context: ExecutionContext) -> None:
setup_cmempy_user_access(context.user)
file.write(get(graph).text)

def get_graphs_tree(self) -> dict:
"""Get graph import tree"""
graphs = {self.ontology_graph_iri: convert_iri_to_filename(self.ontology_graph_iri)}
tree = get_graph_import_tree(self.ontology_graph_iri)
for value in tree["tree"].values():
for iri in value:
if iri not in graphs:
graphs[iri] = convert_iri_to_filename(iri)
return graphs

def validate(self, graphs: dict) -> None:
"""Reason"""
data_location = f"{self.temp}/{graphs[self.ontology_graph_iri]}"
Expand Down Expand Up @@ -220,15 +169,6 @@ def validate(self, graphs: dict) -> None:
raise OSError(response.stderr.decode())
raise OSError("ROBOT error")

def send_output_graph(self) -> None:
"""Send result graph"""
post_streamed(
self.output_graph_iri,
str(Path(self.temp) / "output.ttl"),
replace=True,
content_type="text/turtle",
)

def make_resource(self, context: ExecutionContext) -> None:
"""Make MD resource in project"""
create_resource(
Expand Down Expand Up @@ -259,9 +199,9 @@ def execute(
) -> Entities | None:
"""Run the workflow operator."""
setup_cmempy_user_access(context.user)
graphs = self.get_graphs_tree()
graphs = get_graphs_tree((self.ontology_graph_iri,))
self.get_graphs(graphs, context)
self.create_xml_catalog_file(graphs)
create_xml_catalog_file(self.temp, graphs)
self.validate(graphs)

text = (Path(self.temp) / self.md_filename).read_text()
Expand All @@ -271,7 +211,7 @@ def execute(

if self.produce_graph:
setup_cmempy_user_access(context.user)
self.send_output_graph()
send_result(self.output_graph_iri, Path(self.temp) / "output.ttl")

if self.write_md:
setup_cmempy_user_access(context.user)
Expand Down
80 changes: 80 additions & 0 deletions cmem_plugin_reason/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""Common functions"""

import re
import unicodedata
from collections import OrderedDict
from pathlib import Path
from xml.etree.ElementTree import (
Element,
SubElement,
tostring,
)

from cmem.cmempy.dp.proxy.graph import get_graph_import_tree, post_streamed
from defusedxml import minidom

from . import __path__

ROBOT = Path(__path__[0]) / "bin" / "robot.jar"

REASONERS = OrderedDict(
{
"elk": "ELK",
"emr": "Expression Materializing Reasoner",
"hermit": "HermiT",
"jfact": "JFact",
"structural": "Structural Reasoner",
"whelk": "Whelk",
}
)


def convert_iri_to_filename(value: str) -> str:
"""Convert IRI to filename"""
value = unicodedata.normalize("NFKD", value).encode("ascii", "ignore").decode("ascii")
value = re.sub(r"\.", "_", value.lower())
value = re.sub(r"/", "_", value.lower())
value = re.sub(r"[^\w\s-]", "", value.lower())
value = re.sub(r"[-\s]+", "-", value).strip("-_")
return value + ".nt"


def create_xml_catalog_file(temp: str, graphs: dict) -> None:
"""Create XML catalog file"""
file_name = Path(temp) / "catalog-v001.xml"
catalog = Element("catalog")
catalog.set("prefer", "public")
catalog.set("xmlns", "urn:oasis:names:tc:entity:xmlns:xml:catalog")
for i, graph in enumerate(graphs):
uri = SubElement(catalog, "uri")
uri.set("id", f"id{i}")
uri.set("name", graph)
uri.set("uri", graphs[graph])
reparsed = minidom.parseString(tostring(catalog, "utf-8")).toxml()
with Path(file_name).open("w", encoding="utf-8") as file:
file.truncate(0)
file.write(reparsed)


def get_graphs_tree(graph_iris: tuple) -> dict:
"""Get graph import tree"""
graphs = {}
for graph_iri in graph_iris:
if graph_iri not in graphs:
graphs[graph_iri] = convert_iri_to_filename(graph_iri)
tree = get_graph_import_tree(graph_iri)
for value in tree["tree"].values():
for iri in value:
if iri not in graphs:
graphs[iri] = convert_iri_to_filename(iri)
return graphs


def send_result(iri: str, filepath: Path) -> None:
"""Send result"""
post_streamed(
iri,
str(filepath),
replace=True,
content_type="text/turtle",
)

0 comments on commit e670da8

Please sign in to comment.