diff --git a/doc/source/dev/data_types.md b/doc/source/dev/data_types.md index fb03c4fe5d04..16b970290be2 100644 --- a/doc/source/dev/data_types.md +++ b/doc/source/dev/data_types.md @@ -36,7 +36,8 @@ tag section of the `datatypes_conf.xml` file. Sample where - `extension` - the data type's Dataset file extension (e.g., `ab1`, `bed`, - `gff`, `qual`, etc.) + `gff`, `qual`, etc.). The extension must consist only of lowercase letters, + numbers, `_`, `-`, and `.`. - `type` - the path to the class for that data type. - `mimetype` - if present (it's optional), the data type's mime type - `display_in_upload` - if present (it's optional and defaults to False), the diff --git a/lib/galaxy/tool_util/linters/datatypes.py b/lib/galaxy/tool_util/linters/datatypes.py new file mode 100644 index 000000000000..534868ed01ec --- /dev/null +++ b/lib/galaxy/tool_util/linters/datatypes.py @@ -0,0 +1,84 @@ +import os.path +from typing import ( + Set, + TYPE_CHECKING, +) + +# from galaxy import config +from galaxy.tool_util.lint import Linter +from galaxy.util import ( + listify, + parse_xml, +) + +if TYPE_CHECKING: + from galaxy.tool_util.lint import LintContext + from galaxy.tool_util.parser import ToolSource + +DATATYPES_CONF = os.path.join(os.path.dirname(__file__), "datatypes_conf.xml.sample") + + +def _parse_datatypes(datatype_conf_path: str) -> Set[str]: + datatypes = set() + tree = parse_xml(datatype_conf_path) + root = tree.getroot() + for elem in root.findall("./registration/datatype"): + extension = elem.get("extension", "") + datatypes.add(extension) + auto_compressed_types = listify(elem.get("auto_compressed_types", "")) + for act in auto_compressed_types: + datatypes.add(f"{extension}.{act}") + return datatypes + + +class DatatypesCustomConf(Linter): + """ + Check if a custom datatypes_conf.xml is present + """ + + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + if not tool_source.source_path: + return + tool_xml = getattr(tool_source, "xml_tree", None) + if not tool_xml: + return + tool_node = tool_xml.getroot() + tool_dir = os.path.dirname(tool_source.source_path) + datatypes_conf_path = os.path.join(tool_dir, "datatypes_conf.xml") + if os.path.exists(datatypes_conf_path): + lint_ctx.warn( + "Tool uses a custom datatypes_conf.xml which is discouraged", + linter=cls.name(), + node=tool_node, + ) + + +class ValidDatatypes(Linter): + """ + Check that used datatypes are available + """ + + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + tool_xml = getattr(tool_source, "xml_tree", None) + if not tool_xml: + return + # get Galaxy built-in dataypes + datatypes = _parse_datatypes(DATATYPES_CONF) + # add custom tool data types + if tool_source.source_path: + tool_dir = os.path.dirname(tool_source.source_path) + datatypes_conf_path = os.path.join(tool_dir, "datatypes_conf.xml") + if os.path.exists(datatypes_conf_path): + datatypes |= _parse_datatypes(datatypes_conf_path) + for attrib in ["format", "ftype", "ext"]: + for elem in tool_xml.findall(f".//*[@{attrib}]"): + formats = elem.get(attrib, "").split(",") + for format in formats: + if format not in datatypes: + lint_ctx.error( + f"Unknown datatype [{format}] used in {elem.tag} element", + linter=cls.name(), + node=elem, + ) diff --git a/lib/galaxy/tool_util/linters/datatypes_conf.xml.sample b/lib/galaxy/tool_util/linters/datatypes_conf.xml.sample new file mode 120000 index 000000000000..6a8d2e103481 --- /dev/null +++ b/lib/galaxy/tool_util/linters/datatypes_conf.xml.sample @@ -0,0 +1 @@ +../../config/sample/datatypes_conf.xml.sample \ No newline at end of file diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd index 82af0d17ff8f..cd3fa9df84cb 100644 --- a/lib/galaxy/tool_util/xsd/galaxy.xsd +++ b/lib/galaxy/tool_util/xsd/galaxy.xsd @@ -1572,7 +1572,7 @@ used to load typed parameters. This string will be loaded as JSON and its type w attempt to be preserved through API requests to Galaxy. - + This attribute name should be included only with parameters of ``type`` ``data`` for the tool. If this @@ -1717,7 +1717,7 @@ generated as JSON. This can be useful for testing tool outputs that are not file ]]> - + - + The comma-separated list of accepted data formats for this input. The list of supported data formats is contained in the @@ -5936,7 +5936,7 @@ The default is ``galaxy.json``. - + Indicates that the entire path of the discovered dataset relative to the specified directory should be available for matching patterns. - + Format (or datatype) of discovered datasets (an alias with ``ext``). - + Format (or datatype) of discovered datasets (an alias with ``format``). @@ -7653,7 +7653,7 @@ parameter (e.g. ``value="interval"`` above), or of the deprecated ``input_datase attribute. - + This value must be a supported data type (e.g. ``format="interval"``). See @@ -8075,6 +8075,18 @@ favour of a ``has_size`` assertion. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""" +DATATYPES_CONF = """ + + + + + +""" + + +def test_valid_datatypes(lint_ctx): + """ + test datatypes linters + """ + with tempfile.TemporaryDirectory() as tmp: + tool_path = os.path.join(tmp, "tool.xml") + datatypes_path = os.path.join(tmp, "datatypes_conf.xml") + with open(tool_path, "w") as tmpf: + tmpf.write(VALID_DATATYPES) + with open(datatypes_path, "w") as tmpf: + tmpf.write(DATATYPES_CONF) + tool_xml, _ = load_with_references(tool_path) + tool_source = XmlToolSource(tool_xml, source_path=tool_path) + run_lint_module(lint_ctx, datatypes, tool_source) + assert not lint_ctx.info_messages + assert not lint_ctx.valid_messages + assert "Tool uses a custom datatypes_conf.xml which is discouraged" in lint_ctx.warn_messages + assert len(lint_ctx.warn_messages) == 1 + assert "Unknown datatype [invalid] used in param" in lint_ctx.error_messages + assert "Unknown datatype [another_invalid] used in data" in lint_ctx.error_messages + assert "Unknown datatype [just_another_invalid] used in when" in lint_ctx.error_messages + assert "Unknown datatype [collection_format] used in collection" in lint_ctx.error_messages + assert "Unknown datatype [invalid] used in param" in lint_ctx.error_messages + assert "Unknown datatype [invalid] used in discover_datasets" in lint_ctx.error_messages + assert len(lint_ctx.error_messages) == 6 + + DATA_MANAGER = """ @@ -2237,7 +2300,7 @@ def test_skip_by_module(lint_ctx): def test_list_linters(): linter_names = Linter.list_listers() # make sure to add/remove a test for new/removed linters if this number changes - assert len(linter_names) == 135 + assert len(linter_names) == 137 assert "Linter" not in linter_names # make sure that linters from all modules are available for prefix in [