Source code for galaxy.model.dataset_collections.type_description

from typing import (
    List,
    Optional,
    TYPE_CHECKING,
    Union,
)

from .registry import DATASET_COLLECTION_TYPES_REGISTRY

if TYPE_CHECKING:
    from galaxy.tool_util_models.tool_source import FieldDict



[docs]
class CollectionTypeDescriptionFactory:

[docs]
    def __init__(self, type_registry=DATASET_COLLECTION_TYPES_REGISTRY):
        # taking in type_registry though not using it, because we will someday
        # I think.
        self.type_registry = type_registry



[docs]
    def for_collection_type(self, collection_type, fields: Optional[Union[str, List["FieldDict"]]] = None):
        assert collection_type is not None
        return CollectionTypeDescription(collection_type, self, fields=fields)





[docs]
class CollectionTypeDescription:
    """Abstraction over dataset collection type that ties together string
    representation in database/model with type registry.
    """

    collection_type: str


[docs]
    def __init__(
        self,
        collection_type: Union[str, "CollectionTypeDescription"],
        collection_type_description_factory: CollectionTypeDescriptionFactory,
        fields: Optional[Union[str, List["FieldDict"]]] = None,
    ):
        if isinstance(collection_type, CollectionTypeDescription):
            self.collection_type = collection_type.collection_type
        else:
            self.collection_type = collection_type
        self.collection_type_description_factory = collection_type_description_factory
        self.fields = fields
        self.__has_subcollections = self.collection_type.find(":") > 0



[docs]
    def child_collection_type(self):
        rank_collection_type = self.rank_collection_type()
        return self.collection_type[len(rank_collection_type) + 1 :]



[docs]
    def child_collection_type_description(self):
        child_collection_type = self.child_collection_type()
        return self.collection_type_description_factory.for_collection_type(child_collection_type)



[docs]
    def effective_collection_type_description(self, subcollection_type):
        effective_collection_type = self.effective_collection_type(subcollection_type)
        return self.collection_type_description_factory.for_collection_type(effective_collection_type)



[docs]
    def effective_collection_type(self, subcollection_type):
        if hasattr(subcollection_type, "collection_type"):
            subcollection_type = subcollection_type.collection_type

        if not self.has_subcollections_of_type(subcollection_type):
            raise ValueError(f"Cannot compute effective subcollection type of {subcollection_type} over {self}")

        if subcollection_type == "single_datasets":
            return self.collection_type

        return self.collection_type[: -(len(subcollection_type) + 1)]



[docs]
    def has_subcollections_of_type(self, other_collection_type) -> bool:
        """Take in another type (either flat string or another
        CollectionTypeDescription) and determine if this collection contains
        subcollections matching that type.

        The way this is used in map/reduce it seems to make the most sense
        for this to return True if these subtypes are proper (i.e. a type
        is not considered to have subcollections of its own type).
        """
        if hasattr(other_collection_type, "collection_type"):
            other_collection_type = other_collection_type.collection_type
        collection_type = self.collection_type
        if collection_type == other_collection_type:
            return False
        if collection_type.endswith(other_collection_type):
            return True
        if other_collection_type == "paired_or_unpaired":
            # this can be thought of as a subcollection of anything except a pair
            # since it would match a pair exactly
            return collection_type != "paired"
        if other_collection_type == "single_datasets":
            # effectively any collection has unpaired subcollections
            return True
        return False



[docs]
    def is_subcollection_of_type(self, other_collection_type):
        if not hasattr(other_collection_type, "collection_type"):
            other_collection_type = self.collection_type_description_factory.for_collection_type(other_collection_type)
        return other_collection_type.has_subcollections_of_type(self)



[docs]
    def can_match_type(self, other_collection_type) -> bool:
        if hasattr(other_collection_type, "collection_type"):
            other_collection_type = other_collection_type.collection_type
        collection_type = self.collection_type
        if other_collection_type == collection_type:
            return True
        elif other_collection_type == "paired" and collection_type == "paired_or_unpaired":
            return True

        if collection_type.endswith(":paired_or_unpaired"):
            as_plain_list = collection_type[: -len(":paired_or_unpaired")]
            if other_collection_type == as_plain_list:
                return True
            as_paired_list = f"{as_plain_list}:paired"
            if other_collection_type == as_paired_list:
                return True

        # can we push this to the type registry somehow?
        return False



[docs]
    def subcollection_type_description(self):
        if not self.__has_subcollections:
            raise ValueError(f"Cannot generate subcollection type description for flat type {self.collection_type}")
        subcollection_type = self.collection_type.split(":", 1)[1]
        return self.collection_type_description_factory.for_collection_type(subcollection_type)



[docs]
    def has_subcollections(self):
        return self.__has_subcollections



[docs]
    def rank_collection_type(self):
        """Return the top-level collection type corresponding to this
        collection type. For instance the "rank" type of a list of paired
        data ("list:paired") is "list".
        """
        return self.collection_type.split(":")[0]



[docs]
    def rank_type_plugin(self):
        return self.collection_type_description_factory.type_registry.get(self.rank_collection_type())


    @property
    def dimension(self):
        return len(self.collection_type.split(":")) + 1


[docs]
    def multiply(self, other_collection_type):
        collection_type = map_over_collection_type(self, other_collection_type)
        return self.collection_type_description_factory.for_collection_type(collection_type)


    def __str__(self):
        return f"CollectionTypeDescription[{self.collection_type}]"




[docs]
def map_over_collection_type(mapped_over_collection_type, target_collection_type):
    if hasattr(mapped_over_collection_type, "collection_type"):
        mapped_over_collection_type = mapped_over_collection_type.collection_type

    if not target_collection_type:
        return mapped_over_collection_type
    else:
        if hasattr(target_collection_type, "collection_type"):
            target_collection_type = target_collection_type.collection_type

        return f"{mapped_over_collection_type}:{target_collection_type}"



COLLECTION_TYPE_DESCRIPTION_FACTORY = CollectionTypeDescriptionFactory()