Warning
This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.tool_util.linters.outputs
"""This module contains a linting functions for tool outputs."""
from galaxy.util import (
etree,
string_as_bool,
)
from ._util import is_valid_cheetah_placeholder
from ..parser.output_collection_def import NAMED_PATTERNS
[docs]def lint_output(tool_xml, lint_ctx):
"""Check output elements, ensure there is at least one and check attributes."""
outputs = tool_xml.findall("./outputs")
# determine node to report for general problems with outputs
tool_node = tool_xml.find("./outputs")
if tool_node is None:
tool_node = tool_xml.getroot()
if len(outputs) == 0:
lint_ctx.warn("Tool contains no outputs section, most tools should produce outputs.", node=tool_node)
return
if len(outputs) > 1:
lint_ctx.warn("Tool contains multiple output sections, behavior undefined.", node=outputs[1])
num_outputs = 0
labels = set()
names = set()
for output in list(outputs[0]):
if output.tag is etree.Comment:
continue
if output.tag not in ["data", "collection"]:
lint_ctx.warn(f"Unknown element found in outputs [{output.tag}]", node=output)
continue
num_outputs += 1
if "name" not in output.attrib:
lint_ctx.warn("Tool output doesn't define a name - this is likely a problem.", node=output)
# TODO make this an error if there is no discover_datasets / from_work_dir (is this then still a problem)
elif not is_valid_cheetah_placeholder(output.attrib["name"]):
lint_ctx.warn(
f'Tool output name [{output.attrib["name"]}] is not a valid Cheetah placeholder.', node=output
)
name = output.attrib.get("name")
if name is not None:
if name in names:
lint_ctx.error(f"Tool output [{name}] has duplicated name", node=output)
names.add(name)
label = output.attrib.get("label", "${tool.name} on ${on_string}")
if label in labels:
lint_ctx.error(f"Tool output [{name}] uses duplicated label '{label}'", node=output)
labels.add(label)
format_set = False
if __check_format(output, lint_ctx):
format_set = True
if output.tag == "data":
if "auto_format" in output.attrib and output.attrib["auto_format"]:
format_set = True
elif output.tag == "collection":
if "type" not in output.attrib:
lint_ctx.warn("Collection output with undefined 'type' found.", node=output)
if "structured_like" in output.attrib and "inherit_format" in output.attrib:
format_set = True
for sub in output:
if __check_pattern(sub):
format_set = True
elif __check_format(sub, lint_ctx, allow_ext=True):
format_set = True
if not format_set:
lint_ctx.warn(
f"Tool {output.tag} output {output.attrib.get('name', 'with missing name')} doesn't define an output format.",
node=output,
)
# TODO: check for different labels in case of multiple outputs
lint_ctx.info(f"{num_outputs} outputs found.", node=outputs[0])
def __check_format(node, lint_ctx, allow_ext=False):
"""
check if format/ext/format_source attribute is set in a given node
issue a warning if the value is input
return true (node defines format/ext) / false (else)
"""
if "format_source" in node.attrib and ("ext" in node.attrib or "format" in node.attrib):
lint_ctx.warn(
f"Tool {node.tag} output '{node.attrib.get('name', 'with missing name')}' should use either format_source or format/ext",
node=node,
)
if "format_source" in node.attrib:
return True
if node.find(".//action[@type='format']") is not None:
return True
# if allowed (e.g. for discover_datasets), ext takes precedence over format
fmt = None
if allow_ext:
fmt = node.attrib.get("ext")
if fmt is None:
fmt = node.attrib.get("format")
if fmt == "input":
lint_ctx.error(
f"Using format='input' on {node.tag}, format_source attribute is less ambiguous and should be used instead.",
node=node,
)
return fmt is not None
def __check_pattern(node):
"""
check if
- pattern attribute is set and defines the extension or
- from_tool_provided_metadata is true
"""
if node.tag != "discover_datasets":
return False
if "from_tool_provided_metadata" in node.attrib and string_as_bool(
node.attrib.get("from_tool_provided_metadata", "false")
):
return True
if "pattern" not in node.attrib:
return False
pattern = node.attrib["pattern"]
regex_pattern = NAMED_PATTERNS.get(pattern, pattern)
# TODO error on wrong pattern or non-regexp
if "(?P<ext>" in regex_pattern:
return True