Warning

This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.model.dataset_collections.builder

from typing import (
    cast,
    Dict,
    List,
    Optional,
    Set,
    TYPE_CHECKING,
    Union,
)

from galaxy.model import (
    DatasetCollection,
    DatasetInstance,
)
from galaxy.model.orm.util import add_object_to_object_session
from galaxy.util.oset import OrderedSet
from .type_description import COLLECTION_TYPE_DESCRIPTION_FACTORY

if TYPE_CHECKING:
    from galaxy.model.dataset_collections.adapters import CollectionAdapter
    from galaxy.model.dataset_collections.type_description import CollectionTypeDescription
    from galaxy.model.dataset_collections.types import (
        BaseDatasetCollectionType,
        DatasetInstanceMapping,
    )
    from galaxy.tool_util_models.tool_source import FieldDict


[docs] def build_collection( type: "BaseDatasetCollectionType", dataset_instances: "DatasetInstanceMapping", collection: Optional[DatasetCollection] = None, associated_identifiers: Optional[Set[str]] = None, fields: Optional[Union[str, List["FieldDict"]]] = None, ) -> DatasetCollection: """ Build DatasetCollection with populated DatasetcollectionElement objects corresponding to the supplied dataset instances or throw exception if this is not a valid collection of the specified type. """ dataset_collection = collection or DatasetCollection(fields=fields) associated_identifiers = associated_identifiers or set() set_collection_elements(dataset_collection, type, dataset_instances, associated_identifiers, fields=fields) return dataset_collection
[docs] def set_collection_elements( dataset_collection: DatasetCollection, type: "BaseDatasetCollectionType", dataset_instances: "DatasetInstanceMapping", associated_identifiers: Set[str], fields: Optional[Union[str, List["FieldDict"]]] = None, ) -> DatasetCollection: new_element_keys = OrderedSet(dataset_instances.keys()) - associated_identifiers new_dataset_instances = {k: dataset_instances[k] for k in new_element_keys} dataset_collection.element_count = dataset_collection.element_count or 0 element_index = dataset_collection.element_count elements = [] if type.collection_type == "record" and fields == "auto": fields = guess_fields(dataset_instances) for element in type.generate_elements(new_dataset_instances, fields=fields): element.element_index = element_index add_object_to_object_session(element, dataset_collection) element.collection = dataset_collection elements.append(element) element_index += 1 assert element.element_identifier associated_identifiers.add(element.element_identifier) dataset_collection.element_count = element_index return dataset_collection
[docs] def guess_fields(dataset_instances: "DatasetInstanceMapping") -> List["FieldDict"]: fields: List[FieldDict] = [] for identifier, element in dataset_instances.items(): if isinstance(element, DatasetCollection): return [] else: fields.append({"type": "File", "name": identifier}) return fields
ElementsDict = Dict[str, Union["CollectionBuilder", DatasetInstance]]
[docs] class CollectionBuilder: """Purely functional builder pattern for building a dataset collection."""
[docs] def __init__(self, collection_type_description: "CollectionTypeDescription"): self._collection_type_description = collection_type_description self._current_elements: ElementsDict = {} # Store collection here so we don't recreate the collection all the time self.collection: Optional[DatasetCollection] = None self.associated_identifiers: Set[str] = set()
[docs] def replace_elements_in_collection( self, template_collection: Union["CollectionAdapter", DatasetCollection], replacement_dict: Dict[DatasetInstance, DatasetInstance], ) -> None: self._current_elements = self._replace_elements_in_collection( template_collection=template_collection, replacement_dict=replacement_dict, )
def _replace_elements_in_collection( self, template_collection: Union["CollectionAdapter", DatasetCollection], replacement_dict: Dict[DatasetInstance, DatasetInstance], ) -> ElementsDict: elements: ElementsDict = {} for element in template_collection.elements: assert element.element_identifier if element.child_collection: collection_builder = CollectionBuilder( collection_type_description=self._collection_type_description.child_collection_type_description() ) collection_builder.replace_elements_in_collection( template_collection=element.child_collection, replacement_dict=replacement_dict ) elements[element.element_identifier] = collection_builder else: assert isinstance(element.element_object, DatasetInstance) elements[element.element_identifier] = replacement_dict.get( element.element_object, element.element_object ) return elements
[docs] def get_level(self, identifier: str) -> "CollectionBuilder": if not self._nested_collection: message_template = "Cannot add nested collection to collection of type [%s]" message = message_template % (self._collection_type_description) raise AssertionError(message) if identifier in self._current_elements: subcollection_builder = self._current_elements[identifier] assert isinstance(subcollection_builder, CollectionBuilder) else: subcollection_builder = CollectionBuilder(self._subcollection_type_description) self._current_elements[identifier] = subcollection_builder return subcollection_builder
[docs] def add_dataset(self, identifier: str, dataset_instance: DatasetInstance) -> None: self._current_elements[identifier] = dataset_instance
[docs] def build_elements(self) -> "DatasetInstanceMapping": elements = self._current_elements if self._nested_collection: new_elements = {} for identifier, element in elements.items(): assert isinstance(element, CollectionBuilder) new_elements[identifier] = element.build() return new_elements else: self._current_elements = {} return cast(Dict[str, DatasetInstance], elements)
[docs] def build(self) -> DatasetCollection: type_plugin = self._collection_type_description.rank_type_plugin() self.collection = build_collection( type_plugin, self.build_elements(), self.collection, self.associated_identifiers ) self.collection.collection_type = self._collection_type_description.collection_type return self.collection
@property def _subcollection_type_description(self) -> "CollectionTypeDescription": return self._collection_type_description.subcollection_type_description() @property def _nested_collection(self) -> bool: return self._collection_type_description.has_subcollections()
[docs] class BoundCollectionBuilder(CollectionBuilder): """More stateful builder that is bound to a particular model object."""
[docs] def __init__(self, dataset_collection): self.dataset_collection = dataset_collection if dataset_collection.populated: raise Exception("Cannot reset elements of an already populated dataset collection.") collection_type = dataset_collection.collection_type collection_type_description = COLLECTION_TYPE_DESCRIPTION_FACTORY.for_collection_type(collection_type) super().__init__(collection_type_description)
[docs] def populate_partial(self): elements = self.build_elements() type_plugin = self._collection_type_description.rank_type_plugin() set_collection_elements(self.dataset_collection, type_plugin, elements, self.associated_identifiers)
[docs] def populate(self): self.populate_partial() self.dataset_collection.mark_as_populated()