Skip to content

Commit

Permalink
Merge pull request #17600 from bernt-matthias/datatype-lint
Browse files Browse the repository at this point in the history
[24.2] Add linters for datatypes
  • Loading branch information
jdavcs authored Nov 22, 2024
2 parents 77b7f58 + f2c105d commit 6bc9940
Show file tree
Hide file tree
Showing 6 changed files with 171 additions and 9 deletions.
3 changes: 2 additions & 1 deletion doc/source/dev/data_types.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ tag section of the `datatypes_conf.xml` file. Sample
where

- `extension` - the data type's Dataset file extension (e.g., `ab1`, `bed`,
`gff`, `qual`, etc.)
`gff`, `qual`, etc.). The extension must consist only of lowercase letters,
numbers, `_`, `-`, and `.`.
- `type` - the path to the class for that data type.
- `mimetype` - if present (it's optional), the data type's mime type
- `display_in_upload` - if present (it's optional and defaults to False), the
Expand Down
84 changes: 84 additions & 0 deletions lib/galaxy/tool_util/linters/datatypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import os.path
from typing import (
Set,
TYPE_CHECKING,
)

# from galaxy import config
from galaxy.tool_util.lint import Linter
from galaxy.util import (
listify,
parse_xml,
)

if TYPE_CHECKING:
from galaxy.tool_util.lint import LintContext
from galaxy.tool_util.parser import ToolSource

DATATYPES_CONF = os.path.join(os.path.dirname(__file__), "datatypes_conf.xml.sample")


def _parse_datatypes(datatype_conf_path: str) -> Set[str]:
datatypes = set()
tree = parse_xml(datatype_conf_path)
root = tree.getroot()
for elem in root.findall("./registration/datatype"):
extension = elem.get("extension", "")
datatypes.add(extension)
auto_compressed_types = listify(elem.get("auto_compressed_types", ""))
for act in auto_compressed_types:
datatypes.add(f"{extension}.{act}")
return datatypes


class DatatypesCustomConf(Linter):
"""
Check if a custom datatypes_conf.xml is present
"""

@classmethod
def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"):
if not tool_source.source_path:
return
tool_xml = getattr(tool_source, "xml_tree", None)
if not tool_xml:
return
tool_node = tool_xml.getroot()
tool_dir = os.path.dirname(tool_source.source_path)
datatypes_conf_path = os.path.join(tool_dir, "datatypes_conf.xml")
if os.path.exists(datatypes_conf_path):
lint_ctx.warn(
"Tool uses a custom datatypes_conf.xml which is discouraged",
linter=cls.name(),
node=tool_node,
)


class ValidDatatypes(Linter):
"""
Check that used datatypes are available
"""

@classmethod
def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"):
tool_xml = getattr(tool_source, "xml_tree", None)
if not tool_xml:
return
# get Galaxy built-in dataypes
datatypes = _parse_datatypes(DATATYPES_CONF)
# add custom tool data types
if tool_source.source_path:
tool_dir = os.path.dirname(tool_source.source_path)
datatypes_conf_path = os.path.join(tool_dir, "datatypes_conf.xml")
if os.path.exists(datatypes_conf_path):
datatypes |= _parse_datatypes(datatypes_conf_path)
for attrib in ["format", "ftype", "ext"]:
for elem in tool_xml.findall(f".//*[@{attrib}]"):
formats = elem.get(attrib, "").split(",")
for format in formats:
if format not in datatypes:
lint_ctx.error(
f"Unknown datatype [{format}] used in {elem.tag} element",
linter=cls.name(),
node=elem,
)
1 change: 1 addition & 0 deletions lib/galaxy/tool_util/linters/datatypes_conf.xml.sample
26 changes: 19 additions & 7 deletions lib/galaxy/tool_util/xsd/galaxy.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -1572,7 +1572,7 @@ used to load typed parameters. This string will be loaded as JSON and its type w
attempt to be preserved through API requests to Galaxy.</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="ftype" type="xs:string">
<xs:attribute name="ftype" type="Format">
<xs:annotation>
<xs:documentation xml:lang="en">This attribute name should be included
only with parameters of ``type`` ``data`` for the tool. If this
Expand Down Expand Up @@ -1717,7 +1717,7 @@ generated as JSON. This can be useful for testing tool outputs that are not file
]]></xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="ftype" type="xs:string">
<xs:attribute name="ftype" type="Format">
<xs:annotation>
<xs:documentation xml:lang="en"><![CDATA[
Expand Down Expand Up @@ -4208,7 +4208,7 @@ only when the ``type`` attribute value is ``data``, ``float``, or ``integer``.</
only when the ``type`` attribute value is ``data``, ``float``, or ``integer``.</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="format" type="xs:string">
<xs:attribute name="format" type="FormatList">
<xs:annotation>
<xs:documentation xml:lang="en">The comma-separated list of accepted
data formats for this input. The list of supported data formats is contained in the
Expand Down Expand Up @@ -5936,7 +5936,7 @@ The default is ``galaxy.json``.
</xs:choice>
</xs:group>
<xs:attributeGroup name="OutputCommon">
<xs:attribute name="format" type="xs:string">
<xs:attribute name="format" type="Format">
<xs:annotation>
<xs:documentation xml:lang="en"><![CDATA[
The short name for the output datatype. The valid values for format can be found in
Expand Down Expand Up @@ -6324,12 +6324,12 @@ Therefore a filter for such a variable looks like the following example.
<xs:documentation xml:lang="en">Indicates that the entire path of the discovered dataset relative to the specified directory should be available for matching patterns.</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="format" type="xs:string" use="optional">
<xs:attribute name="format" type="Format" use="optional">
<xs:annotation>
<xs:documentation xml:lang="en">Format (or datatype) of discovered datasets (an alias with ``ext``).</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="ext" type="xs:string" use="optional">
<xs:attribute name="ext" type="Format" use="optional">
<xs:annotation>
<xs:documentation xml:lang="en">Format (or datatype) of discovered datasets (an alias with ``format``).</xs:documentation>
</xs:annotation>
Expand Down Expand Up @@ -7653,7 +7653,7 @@ parameter (e.g. ``value="interval"`` above), or of the deprecated ``input_datase
attribute.</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="format" type="xs:string" use="required">
<xs:attribute name="format" type="Format" use="required">
<xs:annotation>
<xs:documentation xml:lang="en">This value must be a supported data type
(e.g. ``format="interval"``). See
Expand Down Expand Up @@ -8075,6 +8075,18 @@ favour of a ``has_size`` assertion.</xs:documentation>
</xs:simpleType>
</xs:union>
</xs:simpleType>

<xs:simpleType name="Format">
<xs:restriction base="xs:string">
<xs:pattern value="[0-9a-z.-_]+"/>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="FormatList">
<xs:restriction base="xs:string">
<xs:pattern value="([a-z0-9._-]+)(,([a-z0-9._-]+))*"/>
</xs:restriction>
</xs:simpleType>

<xs:complexType name="EdamTopics">
<xs:annotation gxdocs:best_practices="tool-annotations-edam">
<xs:documentation xml:lang="en"><![CDATA[
Expand Down
1 change: 1 addition & 0 deletions packages/tool_util/MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ include galaxy/tool_util/toolbox/filters/*.sample
include galaxy/tool_util/verify/test_config.sample.yml
include galaxy/tool_util/xsd/*
include galaxy/tool_util/ontologies/*tsv
include galaxy/tool_util/linters/datatypes_conf.xml.sample
65 changes: 64 additions & 1 deletion test/unit/tool_util/test_tool_linters.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from galaxy.tool_util.linters import (
citations,
command,
datatypes,
general,
help,
inputs,
Expand Down Expand Up @@ -2022,6 +2023,68 @@ def test_xml_order(lint_ctx):
assert lint_ctx.error_messages == ["Invalid XML: Element 'wrong_tag': This element is not expected."]


VALID_DATATYPES = """
<tool id="id" name="name">
<inputs>
<param name="adata" type="data" format="txt"/>
<param name="bdata" type="data" format="invalid,fasta,custom"/>
<param name="fdata" type="data" format="fasta.gz"/>
</inputs>
<outputs>
<data name="name" format="another_invalid">
<change_format>
<when input="input_text" value="foo" format="just_another_invalid"/>
</change_format>
</data>
<collection name="collection_name" format="collection_format">
<discover_datasets format="txt"/>
<discover_datasets ext="invalid"/>
</collection>
</outputs>
<tests>
<test>
<param name="adata" ftype="txt"/>
<param name="bdata" ftype="invalid"/>
</test>
</tests>
</tool>
"""
DATATYPES_CONF = """
<datatypes>
<registration>
<datatype extension="custom"/>
</registration>
</datatypes>
"""


def test_valid_datatypes(lint_ctx):
"""
test datatypes linters
"""
with tempfile.TemporaryDirectory() as tmp:
tool_path = os.path.join(tmp, "tool.xml")
datatypes_path = os.path.join(tmp, "datatypes_conf.xml")
with open(tool_path, "w") as tmpf:
tmpf.write(VALID_DATATYPES)
with open(datatypes_path, "w") as tmpf:
tmpf.write(DATATYPES_CONF)
tool_xml, _ = load_with_references(tool_path)
tool_source = XmlToolSource(tool_xml, source_path=tool_path)
run_lint_module(lint_ctx, datatypes, tool_source)
assert not lint_ctx.info_messages
assert not lint_ctx.valid_messages
assert "Tool uses a custom datatypes_conf.xml which is discouraged" in lint_ctx.warn_messages
assert len(lint_ctx.warn_messages) == 1
assert "Unknown datatype [invalid] used in param" in lint_ctx.error_messages
assert "Unknown datatype [another_invalid] used in data" in lint_ctx.error_messages
assert "Unknown datatype [just_another_invalid] used in when" in lint_ctx.error_messages
assert "Unknown datatype [collection_format] used in collection" in lint_ctx.error_messages
assert "Unknown datatype [invalid] used in param" in lint_ctx.error_messages
assert "Unknown datatype [invalid] used in discover_datasets" in lint_ctx.error_messages
assert len(lint_ctx.error_messages) == 6


DATA_MANAGER = """<tool id="test_dm" name="test dm" version="1" tool_type="manage_data">
<inputs>
<param name="select" type="select">
Expand Down Expand Up @@ -2237,7 +2300,7 @@ def test_skip_by_module(lint_ctx):
def test_list_linters():
linter_names = Linter.list_listers()
# make sure to add/remove a test for new/removed linters if this number changes
assert len(linter_names) == 135
assert len(linter_names) == 137
assert "Linter" not in linter_names
# make sure that linters from all modules are available
for prefix in [
Expand Down

0 comments on commit 6bc9940

Please sign in to comment.