Warning

This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.tool_util.parser.output_collection_def

""" This module define an abstract class for reasoning about Galaxy's
dataset collection after jobs are finished.
"""

import abc
from typing import (
    List,
    Optional,
)

from galaxy.util import asbool
from .output_models import (
    DatasetCollectionDescriptionT,
    DiscoverViaT,
    FilePatternDatasetCollectionDescription as FilePatternDatasetCollectionDescriptionModel,
    SortCompT,
    SortKeyT,
    ToolProvidedMetadataDatasetCollection as ToolProvidedMetadataDatasetCollectionModel,
)
from .util import is_dict

DEFAULT_EXTRA_FILENAME_PATTERN = (
    r"primary_DATASET_ID_(?P<designation>[^_]+)_(?P<visible>[^_]+)_(?P<ext>[^_]+)(_(?P<dbkey>[^_]+))?"
)
DEFAULT_SORT_BY = "filename"
DEFAULT_SORT_COMP = "lexical"


# XML can describe custom patterns, but these literals describe named
# patterns that will be replaced.
NAMED_PATTERNS = {
    "__default__": DEFAULT_EXTRA_FILENAME_PATTERN,
    "__name__": r"(?P<name>.*)",
    "__designation__": r"(?P<designation>.*)",
    "__name_and_ext__": r"(?P<name>.*)\.(?P<ext>[^\.]+)?",
    "__designation_and_ext__": r"(?P<designation>.*)\.(?P<ext>[^\._]+)?",
}

INPUT_DBKEY_TOKEN = "__input__"
LEGACY_DEFAULT_DBKEY = None  # don't use __input__ for legacy default collection


[docs]def dataset_collector_descriptions_from_elem(elem, legacy=True): primary_dataset_elems = elem.findall("discover_datasets") num_discover_dataset_blocks = len(primary_dataset_elems) if num_discover_dataset_blocks == 0 and legacy: collectors = [DEFAULT_DATASET_COLLECTOR_DESCRIPTION] else: default_format = elem.attrib.get("format") collectors = [] for e in primary_dataset_elems: description_attributes = e.attrib if default_format and "format" not in description_attributes and "ext" not in description_attributes: description_attributes["format"] = default_format collectors.append(dataset_collection_description(**description_attributes)) return _validate_collectors(collectors)
[docs]def dataset_collector_descriptions_from_output_dict(as_dict): discover_datasets_dicts = as_dict.get("discover_datasets", []) if is_dict(discover_datasets_dicts): discover_datasets_dicts = [discover_datasets_dicts] dataset_collector_descriptions = dataset_collector_descriptions_from_list(discover_datasets_dicts) return _validate_collectors(dataset_collector_descriptions)
def _validate_collectors(collectors): num_discover_dataset_blocks = len(collectors) if num_discover_dataset_blocks > 1: for collector in collectors: if collector.discover_via == "tool_provided_metadata": raise Exception( "Cannot specify more than one discover dataset condition if any of them specify tool_provided_metadata." ) return collectors
[docs]def dataset_collector_descriptions_from_list(discover_datasets_dicts): return [dataset_collection_description(**kwds) for kwds in discover_datasets_dicts]
[docs]def dataset_collection_description(**kwargs): from_provided_metadata = asbool(kwargs.get("from_provided_metadata", False)) discover_via = kwargs.get("discover_via", "tool_provided_metadata" if from_provided_metadata else "pattern") if discover_via == "tool_provided_metadata": for key in ["pattern", "sort_by"]: if kwargs.get(key): raise Exception(f"Cannot specify attribute [{key}] if from_provided_metadata is True") return ToolProvidedMetadataDatasetCollection(**kwargs) else: return FilePatternDatasetCollectionDescription(**kwargs)
[docs]class DatasetCollectionDescription(metaclass=abc.ABCMeta): discover_via: DiscoverViaT default_ext: Optional[str] default_visible: bool assign_primary_output: bool directory: Optional[str] recurse: bool match_relative_path: bool
[docs] def __init__(self, **kwargs): self.default_dbkey = kwargs.get("dbkey", INPUT_DBKEY_TOKEN) self.default_ext = kwargs.get("ext", None) if self.default_ext is None and "format" in kwargs: self.default_ext = kwargs.get("format") self.default_visible = asbool(kwargs.get("visible", None)) self.assign_primary_output = asbool(kwargs.get("assign_primary_output", False)) self.directory = kwargs.get("directory", None) self.recurse = False self.match_relative_path = asbool(kwargs.get("match_relative_path", False))
def _common_model_props(self): return { "discover_via": self.discover_via, "dbkey": self.default_dbkey, "format": self.default_ext, "visible": self.default_visible, "assign_primary_output": self.assign_primary_output, "directory": self.directory, "recurse": self.recurse, "match_relative_path": self.match_relative_path, }
[docs] @abc.abstractmethod def to_model(self) -> DatasetCollectionDescriptionT: ...
[docs] def to_dict(self) -> dict: return self.to_model().model_dump()
@property def discover_patterns(self) -> List[str]: return []
[docs]class ToolProvidedMetadataDatasetCollection(DatasetCollectionDescription): discover_via = "tool_provided_metadata"
[docs] def to_model(self) -> ToolProvidedMetadataDatasetCollectionModel: return ToolProvidedMetadataDatasetCollectionModel( discover_via=self.discover_via, dbkey=self.default_dbkey, format=self.default_ext, visible=self.default_visible, assign_primary_output=self.assign_primary_output, directory=self.directory, recurse=self.recurse, match_relative_path=self.match_relative_path, )
[docs] def to_dict(self) -> dict: return self.to_model().model_dump()
[docs]class FilePatternDatasetCollectionDescription(DatasetCollectionDescription): discover_via = "pattern" sort_key: SortKeyT sort_comp: SortCompT pattern: str
[docs] def __init__(self, **kwargs): super().__init__(**kwargs) pattern = kwargs.get("pattern", "__default__") self.recurse = asbool(kwargs.get("recurse", False)) self.match_relative_path = asbool(kwargs.get("match_relative_path", False)) if pattern in NAMED_PATTERNS: pattern = NAMED_PATTERNS[pattern] self.pattern = pattern self.sort_by = sort_by = kwargs.get("sort_by", DEFAULT_SORT_BY) if sort_by.startswith("reverse_"): self.sort_reverse = True sort_by = sort_by[len("reverse_") :] else: self.sort_reverse = False if "_" in sort_by: sort_comp, sort_by = sort_by.split("_", 1) assert sort_comp in ["lexical", "numeric"] else: sort_comp = DEFAULT_SORT_COMP assert sort_by in ["filename", "name", "designation", "dbkey"] self.sort_key = sort_by self.sort_comp = sort_comp
[docs] def to_model(self) -> FilePatternDatasetCollectionDescriptionModel: return FilePatternDatasetCollectionDescriptionModel( discover_via=self.discover_via, dbkey=self.default_dbkey, format=self.default_ext, visible=self.default_visible, assign_primary_output=self.assign_primary_output, directory=self.directory, recurse=self.recurse, match_relative_path=self.match_relative_path, sort_key=self.sort_key, sort_comp=self.sort_comp, pattern=self.pattern, sort_by=self.sort_by, )
@property def discover_patterns(self) -> List[str]: return [self.pattern]
DEFAULT_DATASET_COLLECTOR_DESCRIPTION = FilePatternDatasetCollectionDescription( default_dbkey=LEGACY_DEFAULT_DBKEY, )