Warning

This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.tool_util.linters.outputs

"""This module contains a linting functions for tool outputs."""
from galaxy.util import (
    etree,
    string_as_bool,
)
from ._util import is_valid_cheetah_placeholder
from ..parser.output_collection_def import NAMED_PATTERNS


[docs]def lint_output(tool_xml, lint_ctx): """Check output elements, ensure there is at least one and check attributes.""" outputs = tool_xml.findall("./outputs") # determine node to report for general problems with outputs tool_node = tool_xml.find("./outputs") if tool_node is None: tool_node = tool_xml.getroot() if len(outputs) == 0: lint_ctx.warn("Tool contains no outputs section, most tools should produce outputs.", node=tool_node) return if len(outputs) > 1: lint_ctx.warn("Tool contains multiple output sections, behavior undefined.", node=outputs[1]) num_outputs = 0 labels = set() names = set() for output in list(outputs[0]): if output.tag is etree.Comment: continue if output.tag not in ["data", "collection"]: lint_ctx.warn(f"Unknown element found in outputs [{output.tag}]", node=output) continue num_outputs += 1 if "name" not in output.attrib: lint_ctx.warn("Tool output doesn't define a name - this is likely a problem.", node=output) # TODO make this an error if there is no discover_datasets / from_work_dir (is this then still a problem) elif not is_valid_cheetah_placeholder(output.attrib["name"]): lint_ctx.warn( f'Tool output name [{output.attrib["name"]}] is not a valid Cheetah placeholder.', node=output ) name = output.attrib.get("name") if name is not None: if name in names: lint_ctx.error(f"Tool output [{name}] has duplicated name", node=output) names.add(name) label = output.attrib.get("label", "${tool.name} on ${on_string}") if label in labels: filter_node = output.find(".//filter") if filter_node is not None: lint_ctx.warn( f"Tool output [{name}] uses duplicated label '{label}', double check if filters imply disjoint cases", node=output, ) else: lint_ctx.warn(f"Tool output [{name}] uses duplicated label '{label}'", node=output) labels.add(label) format_set = False if __check_format(output, lint_ctx): format_set = True if output.tag == "data": if "auto_format" in output.attrib and output.attrib["auto_format"]: format_set = True elif output.tag == "collection": if "type" not in output.attrib: lint_ctx.warn("Collection output with undefined 'type' found.", node=output) if "structured_like" in output.attrib and "inherit_format" in output.attrib: format_set = True for sub in output: if __check_pattern(sub): format_set = True elif __check_format(sub, lint_ctx, allow_ext=True): format_set = True if not format_set: lint_ctx.warn( f"Tool {output.tag} output {output.attrib.get('name', 'with missing name')} doesn't define an output format.", node=output, ) # TODO: check for different labels in case of multiple outputs lint_ctx.info(f"{num_outputs} outputs found.", node=outputs[0])
def __check_format(node, lint_ctx, allow_ext=False): """ check if format/ext/format_source attribute is set in a given node issue a warning if the value is input return true (node defines format/ext) / false (else) """ if "format_source" in node.attrib and ("ext" in node.attrib or "format" in node.attrib): lint_ctx.warn( f"Tool {node.tag} output '{node.attrib.get('name', 'with missing name')}' should use either format_source or format/ext", node=node, ) if "format_source" in node.attrib: return True if node.find(".//action[@type='format']") is not None: return True # if allowed (e.g. for discover_datasets), ext takes precedence over format fmt = None if allow_ext: fmt = node.attrib.get("ext") if fmt is None: fmt = node.attrib.get("format") if fmt == "input": lint_ctx.error( f"Using format='input' on {node.tag}, format_source attribute is less ambiguous and should be used instead.", node=node, ) return fmt is not None def __check_pattern(node): """ check if - pattern attribute is set and defines the extension or - from_tool_provided_metadata is true """ if node.tag != "discover_datasets": return False if "from_tool_provided_metadata" in node.attrib and string_as_bool( node.attrib.get("from_tool_provided_metadata", "false") ): return True if "pattern" not in node.attrib: return False pattern = node.attrib["pattern"] regex_pattern = NAMED_PATTERNS.get(pattern, pattern) # TODO error on wrong pattern or non-regexp if "(?P<ext>" in regex_pattern: return True