Warning
This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.model.dataset_collections.type_description
"""Collection type descriptions and the compatibility algebra.
Three operations on collection types, each answering a distinct question:
- ``accepts(other)``: asymmetric direct-edge check. True iff an output
collection of type ``other`` can be connected to an input slot whose
declared type is ``self``. Used at workflow-editor edge validation.
Convention: ``input_type.accepts(output_type)``.
- ``compatible(other)``: symmetric sibling-matching check. True iff two
collection types match such that they could drive a common map-over
over sibling inputs of one tool. Used where neither side is the input
and order of arrival must not change the answer.
- ``can_map_over(other)``: asymmetric nesting check. True iff ``self`` has
proper subcollections of type ``other`` — i.e. ``self`` can be mapped
over to feed a slot expecting ``other``. Convention:
``output_type.can_map_over(input_type)``.
The TypeScript equivalents live in
``client/src/components/Workflow/Editor/modules/collectionTypeDescription.ts``
and must stay in sync (``accepts`` / ``compatible`` / ``canMapOver``). See
``types/collection_semantics.yml`` "Type Compatibility Algebra" for the
lattice diagram and worked examples.
"""
import re
from typing import (
Optional,
TYPE_CHECKING,
Union,
)
from galaxy.exceptions import RequestParameterInvalidException
from .registry import DATASET_COLLECTION_TYPES_REGISTRY
if TYPE_CHECKING:
from galaxy.tool_util_models.tool_source import FieldDict
COLLECTION_TYPE_REGEX = re.compile(
r"^((list|paired|paired_or_unpaired|record)(:(list|paired|paired_or_unpaired|record))*|sample_sheet|sample_sheet:paired|sample_sheet:record|sample_sheet:paired_or_unpaired)$"
)
[docs]
class CollectionTypeDescriptionFactory:
[docs]
def __init__(self, type_registry=DATASET_COLLECTION_TYPES_REGISTRY):
# taking in type_registry though not using it, because we will someday
# I think.
self.type_registry = type_registry
[docs]
def for_collection_type(self, collection_type, fields: Optional[Union[str, list["FieldDict"]]] = None):
assert collection_type is not None
return CollectionTypeDescription(collection_type, self, fields=fields)
[docs]
class CollectionTypeDescription:
"""Abstraction over dataset collection type that ties together string
representation in database/model with type registry.
"""
collection_type: str
[docs]
def __init__(
self,
collection_type: Union[str, "CollectionTypeDescription"],
collection_type_description_factory: CollectionTypeDescriptionFactory,
fields: Optional[Union[str, list["FieldDict"]]] = None,
):
if isinstance(collection_type, CollectionTypeDescription):
self.collection_type = collection_type.collection_type
else:
self.collection_type = collection_type
self.collection_type_description_factory = collection_type_description_factory
self.fields = fields
self.__has_subcollections = self.collection_type.find(":") > 0
[docs]
def child_collection_type(self):
rank_collection_type = self.rank_collection_type()
return self.collection_type[len(rank_collection_type) + 1 :]
[docs]
def child_collection_type_description(self):
child_collection_type = self.child_collection_type()
return self.collection_type_description_factory.for_collection_type(child_collection_type)
[docs]
def effective_collection_type_description(self, subcollection_type):
effective_collection_type = self.effective_collection_type(subcollection_type)
return self.collection_type_description_factory.for_collection_type(effective_collection_type)
[docs]
def effective_collection_type(self, subcollection_type):
if hasattr(subcollection_type, "collection_type"):
subcollection_type = subcollection_type.collection_type
if not self.can_map_over(subcollection_type):
raise ValueError(f"Cannot compute effective subcollection type of {subcollection_type} over {self}")
if subcollection_type == "single_datasets":
return self.collection_type
normalized = _normalize_collection_type(self.collection_type)
normalized_sub = _normalize_collection_type(subcollection_type)
if subcollection_type == "paired_or_unpaired":
if self.collection_type.endswith(":paired"):
# paired_or_unpaired consumes the :paired suffix
return self.collection_type[: -len(":paired")]
elif normalized.endswith("list"):
# paired_or_unpaired acts like single_datasets for collections
# whose innermost type is list (each element wrapped as unpaired)
return self.collection_type
else:
# strip last rank (paired_or_unpaired consumes it)
return self.collection_type[: self.collection_type.rfind(":")]
if normalized_sub.endswith(":paired_or_unpaired"):
# Compound :paired_or_unpaired suffix — iterative peel-off matching
# TS effectiveMapOver logic. Strip ranks from both sides, then
# optionally strip one more if :paired was consumed.
current = self.collection_type
current_other = subcollection_type
while ":" in current_other:
current_other = current_other[: current_other.rfind(":")]
current = current[: current.rfind(":")]
if normalized.endswith(":paired"):
current = current[: current.rfind(":")]
return current
return self.collection_type[: -(len(subcollection_type) + 1)]
[docs]
def can_map_over(self, other_collection_type) -> bool:
"""Asymmetric nesting check: can this collection be mapped over to
feed an input requiring ``other_collection_type``?
Convention: ``output.can_map_over(input)``. True iff ``self`` has
proper subcollections matching ``other`` — a type is not considered
to map over itself (that's a direct edge, handled by ``accepts``).
Mirrors TypeScript ``CollectionTypeDescription.canMapOver``. Naming
kept parallel across languages because both encode the same
operational question at workflow-editor connection time.
"""
if hasattr(other_collection_type, "collection_type"):
other_collection_type = other_collection_type.collection_type
# sample_sheet asymmetry: a sample_sheet input can only be fed by a
# sample_sheet output (a plain-list output lacks the column metadata
# the input expects). ``self`` is the output being mapped over;
# ``other`` is the input collection type. Check before normalization
# (which equates sample_sheet and list).
# Duplicates the asymmetry encoded in ``accepts`` — load-bearing for
# ``multiply`` / ``effective_collection_type`` map-over arithmetic.
# Removing this guard is a separate refactor; see follow-up issue.
if other_collection_type.startswith("sample_sheet") and not self.collection_type.startswith("sample_sheet"):
return False
collection_type = _normalize_collection_type(self.collection_type)
other_collection_type = _normalize_collection_type(other_collection_type)
if collection_type == other_collection_type:
return False
if collection_type.endswith(f":{other_collection_type}"):
return True
if other_collection_type == "paired_or_unpaired":
# this can be thought of as a subcollection of anything except a pair
# since it would match a pair exactly
return collection_type != "paired"
if other_collection_type.endswith(":paired_or_unpaired"):
# Compound :paired_or_unpaired suffix — e.g. list:list can map over
# list:paired_or_unpaired. Strip the :paired_or_unpaired to get the
# required higher ranks, optionally strip :paired from self (since
# paired_or_unpaired consumes paired), then check alignment.
higher_ranks_required = other_collection_type[: other_collection_type.rfind(":")]
if collection_type.endswith(":paired"):
higher_ranks = collection_type[: collection_type.rfind(":")]
else:
higher_ranks = collection_type
return higher_ranks.endswith(higher_ranks_required) and higher_ranks != higher_ranks_required
if other_collection_type == "single_datasets":
# effectively any collection has unpaired subcollections
return True
return False
[docs]
def accepts(self, other_collection_type) -> bool:
"""Asymmetric direct-edge check: does an input slot of type ``self``
accept an output of type ``other_collection_type``?
Convention: ``input_type.accepts(output_type)``. Used at
workflow-editor edge validation. For sibling-matching (where
neither side is the input slot), use ``compatible`` instead.
See ``types/collection_semantics.yml`` "Type Compatibility Algebra".
"""
if hasattr(other_collection_type, "collection_type"):
other_collection_type = other_collection_type.collection_type
# sample_sheet asymmetry: a sample_sheet input is only satisfied by a
# sample_sheet output — a plain-list output lacks the column metadata
# the sample_sheet input expects. Check before normalization (which
# otherwise equates the two).
if self.collection_type.startswith("sample_sheet") and not other_collection_type.startswith("sample_sheet"):
return False
collection_type = _normalize_collection_type(self.collection_type)
other_collection_type = _normalize_collection_type(other_collection_type)
if other_collection_type == collection_type:
return True
elif other_collection_type == "paired" and collection_type == "paired_or_unpaired":
return True
if collection_type.endswith(":paired_or_unpaired"):
as_plain_list = collection_type[: -len(":paired_or_unpaired")]
if other_collection_type == as_plain_list:
return True
as_paired_list = f"{as_plain_list}:paired"
if other_collection_type == as_paired_list:
return True
return False
[docs]
def compatible(self, other_collection_type) -> bool:
"""Symmetric sibling-matching check: do ``self`` and ``other`` match
such that they could drive a common map-over over sibling inputs of
a single tool?
Implemented as ``self.accepts(other) or other.accepts(self)``. Used
at sibling-matching sites (Python ``Tree.compatible_shape`` at
runtime; TS ``mappingConstraints`` at connection time) where
neither side is the input slot and order of arrival should not
change the answer.
See ``types/collection_semantics.yml`` "Type Compatibility Algebra".
"""
if not hasattr(other_collection_type, "collection_type"):
other_collection_type = self.collection_type_description_factory.for_collection_type(other_collection_type)
return self.accepts(other_collection_type) or other_collection_type.accepts(self)
[docs]
def subcollection_type_description(self):
if not self.__has_subcollections:
raise ValueError(f"Cannot generate subcollection type description for flat type {self.collection_type}")
subcollection_type = self.collection_type.split(":", 1)[1]
return self.collection_type_description_factory.for_collection_type(subcollection_type)
[docs]
def rank_collection_type(self):
"""Return the top-level collection type corresponding to this
collection type. For instance the "rank" type of a list of paired
data ("list:paired") is "list".
"""
return self.collection_type.split(":")[0]
[docs]
def rank_type_plugin(self):
return self.collection_type_description_factory.type_registry.get(self.rank_collection_type())
@property
def dimension(self):
return len(self.collection_type.split(":")) + 1
[docs]
def multiply(self, other_collection_type):
collection_type = map_over_collection_type(self, other_collection_type)
return self.collection_type_description_factory.for_collection_type(collection_type)
def __str__(self):
return f"CollectionTypeDescription[{self.collection_type}]"
[docs]
def validate(self):
"""Validate that this collection type is a valid Galaxy collection type."""
if COLLECTION_TYPE_REGEX.match(self.collection_type) is None:
raise RequestParameterInvalidException(f"Invalid collection type: [{self.collection_type}]")
[docs]
def map_over_collection_type(mapped_over_collection_type, target_collection_type):
if hasattr(mapped_over_collection_type, "collection_type"):
mapped_over_collection_type = mapped_over_collection_type.collection_type
if not target_collection_type:
return mapped_over_collection_type
else:
if hasattr(target_collection_type, "collection_type"):
target_collection_type = target_collection_type.collection_type
return f"{mapped_over_collection_type}:{target_collection_type}"
def _normalize_collection_type(collection_type: str) -> str:
"""Normalize collection type for comparison purposes.
sample_sheet behaves like list for mapping/matching.
"""
if collection_type.startswith("sample_sheet"):
return "list" + collection_type[len("sample_sheet") :]
return collection_type
COLLECTION_TYPE_DESCRIPTION_FACTORY = CollectionTypeDescriptionFactory()