Source code for galaxy.model.dataset_collections.type_description

from typing import Union

from .registry import DATASET_COLLECTION_TYPES_REGISTRY


[docs]class CollectionTypeDescriptionFactory:
[docs] def __init__(self, type_registry=DATASET_COLLECTION_TYPES_REGISTRY): # taking in type_registry though not using it, because we will someday # I think. self.type_registry = type_registry
[docs] def for_collection_type(self, collection_type): assert collection_type is not None return CollectionTypeDescription(collection_type, self)
[docs]class CollectionTypeDescription: """Abstraction over dataset collection type that ties together string reprentation in database/model with type registry. >>> factory = CollectionTypeDescriptionFactory(None) >>> nested_type_description = factory.for_collection_type("list:paired") >>> paired_type_description = factory.for_collection_type("paired") >>> nested_type_description.has_subcollections_of_type("list") False >>> nested_type_description.has_subcollections_of_type("list:paired") False >>> nested_type_description.has_subcollections_of_type("paired") True >>> nested_type_description.has_subcollections_of_type(paired_type_description) True >>> nested_type_description.has_subcollections() True >>> paired_type_description.has_subcollections() False >>> paired_type_description.rank_collection_type() 'paired' >>> nested_type_description.rank_collection_type() 'list' >>> nested_type_description.effective_collection_type(paired_type_description) 'list' >>> nested_type_description.effective_collection_type_description(paired_type_description).collection_type 'list' >>> nested_type_description.child_collection_type() 'paired' """ collection_type: str
[docs] def __init__(self, collection_type: Union[str, "CollectionTypeDescription"], collection_type_description_factory): if isinstance(collection_type, CollectionTypeDescription): self.collection_type = collection_type.collection_type else: self.collection_type = collection_type self.collection_type_description_factory = collection_type_description_factory self.__has_subcollections = self.collection_type.find(":") > 0
[docs] def child_collection_type(self): rank_collection_type = self.rank_collection_type() return self.collection_type[len(rank_collection_type) + 1 :]
[docs] def child_collection_type_description(self): child_collection_type = self.child_collection_type() return self.collection_type_description_factory.for_collection_type(child_collection_type)
[docs] def effective_collection_type_description(self, subcollection_type): effective_collection_type = self.effective_collection_type(subcollection_type) return self.collection_type_description_factory.for_collection_type(effective_collection_type)
[docs] def effective_collection_type(self, subcollection_type): if hasattr(subcollection_type, "collection_type"): subcollection_type = subcollection_type.collection_type if not self.has_subcollections_of_type(subcollection_type): raise ValueError(f"Cannot compute effective subcollection type of {subcollection_type} over {self}") return self.collection_type[: -(len(subcollection_type) + 1)]
[docs] def has_subcollections_of_type(self, other_collection_type): """Take in another type (either flat string or another CollectionTypeDescription) and determine if this collection contains subcollections matching that type. The way this is used in map/reduce it seems to make the most sense for this to return True if these subtypes are proper (i.e. a type is not considered to have subcollections of its own type). """ if hasattr(other_collection_type, "collection_type"): other_collection_type = other_collection_type.collection_type collection_type = self.collection_type return collection_type.endswith(other_collection_type) and collection_type != other_collection_type
[docs] def is_subcollection_of_type(self, other_collection_type): if not hasattr(other_collection_type, "collection_type"): other_collection_type = self.collection_type_description_factory.for_collection_type(other_collection_type) return other_collection_type.has_subcollections_of_type(self)
[docs] def can_match_type(self, other_collection_type): if hasattr(other_collection_type, "collection_type"): other_collection_type = other_collection_type.collection_type collection_type = self.collection_type return other_collection_type == collection_type
[docs] def subcollection_type_description(self): if not self.__has_subcollections: raise ValueError(f"Cannot generate subcollection type description for flat type {self.collection_type}") subcollection_type = self.collection_type.split(":", 1)[1] return self.collection_type_description_factory.for_collection_type(subcollection_type)
[docs] def has_subcollections(self): return self.__has_subcollections
[docs] def rank_collection_type(self): """Return the top-level collection type corresponding to this collection type. For instance the "rank" type of a list of paired data ("list:paired") is "list". """ return self.collection_type.split(":")[0]
[docs] def rank_type_plugin(self): return self.collection_type_description_factory.type_registry.get(self.rank_collection_type())
@property def dimension(self): return len(self.collection_type.split(":")) + 1
[docs] def multiply(self, other_collection_type): collection_type = map_over_collection_type(self, other_collection_type) return self.collection_type_description_factory.for_collection_type(collection_type)
def __str__(self): return f"CollectionTypeDescription[{self.collection_type}]"
[docs]def map_over_collection_type(mapped_over_collection_type, target_collection_type): if hasattr(mapped_over_collection_type, "collection_type"): mapped_over_collection_type = mapped_over_collection_type.collection_type if not target_collection_type: return mapped_over_collection_type else: if hasattr(target_collection_type, "collection_type"): target_collection_type = target_collection_type.collection_type return f"{mapped_over_collection_type}:{target_collection_type}"
COLLECTION_TYPE_DESCRIPTION_FACTORY = CollectionTypeDescriptionFactory()