Warning
This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.model.dataset_collections.builder
from typing import (
cast,
Dict,
List,
Optional,
Set,
TYPE_CHECKING,
Union,
)
from galaxy.model import (
DatasetCollection,
DatasetInstance,
)
from galaxy.model.orm.util import add_object_to_object_session
from galaxy.util.oset import OrderedSet
from .type_description import COLLECTION_TYPE_DESCRIPTION_FACTORY
if TYPE_CHECKING:
from galaxy.model.dataset_collections.adapters import CollectionAdapter
from galaxy.model.dataset_collections.type_description import CollectionTypeDescription
from galaxy.model.dataset_collections.types import (
BaseDatasetCollectionType,
DatasetInstanceMapping,
)
from galaxy.tool_util_models.tool_source import FieldDict
[docs]
def build_collection(
type: "BaseDatasetCollectionType",
dataset_instances: "DatasetInstanceMapping",
collection: Optional[DatasetCollection] = None,
associated_identifiers: Optional[Set[str]] = None,
fields: Optional[Union[str, List["FieldDict"]]] = None,
) -> DatasetCollection:
"""
Build DatasetCollection with populated DatasetcollectionElement objects
corresponding to the supplied dataset instances or throw exception if
this is not a valid collection of the specified type.
"""
dataset_collection = collection or DatasetCollection(fields=fields)
associated_identifiers = associated_identifiers or set()
set_collection_elements(dataset_collection, type, dataset_instances, associated_identifiers, fields=fields)
return dataset_collection
[docs]
def set_collection_elements(
dataset_collection: DatasetCollection,
type: "BaseDatasetCollectionType",
dataset_instances: "DatasetInstanceMapping",
associated_identifiers: Set[str],
fields: Optional[Union[str, List["FieldDict"]]] = None,
) -> DatasetCollection:
new_element_keys = OrderedSet(dataset_instances.keys()) - associated_identifiers
new_dataset_instances = {k: dataset_instances[k] for k in new_element_keys}
dataset_collection.element_count = dataset_collection.element_count or 0
element_index = dataset_collection.element_count
elements = []
if type.collection_type == "record" and fields == "auto":
fields = guess_fields(dataset_instances)
for element in type.generate_elements(new_dataset_instances, fields=fields):
element.element_index = element_index
add_object_to_object_session(element, dataset_collection)
element.collection = dataset_collection
elements.append(element)
element_index += 1
assert element.element_identifier
associated_identifiers.add(element.element_identifier)
dataset_collection.element_count = element_index
return dataset_collection
[docs]
def guess_fields(dataset_instances: "DatasetInstanceMapping") -> List["FieldDict"]:
fields: List[FieldDict] = []
for identifier, element in dataset_instances.items():
if isinstance(element, DatasetCollection):
return []
else:
fields.append({"type": "File", "name": identifier})
return fields
ElementsDict = Dict[str, Union["CollectionBuilder", DatasetInstance]]
[docs]
class CollectionBuilder:
"""Purely functional builder pattern for building a dataset collection."""
[docs]
def __init__(self, collection_type_description: "CollectionTypeDescription"):
self._collection_type_description = collection_type_description
self._current_elements: ElementsDict = {}
# Store collection here so we don't recreate the collection all the time
self.collection: Optional[DatasetCollection] = None
self.associated_identifiers: Set[str] = set()
[docs]
def replace_elements_in_collection(
self,
template_collection: Union["CollectionAdapter", DatasetCollection],
replacement_dict: Dict[DatasetInstance, DatasetInstance],
) -> None:
self._current_elements = self._replace_elements_in_collection(
template_collection=template_collection,
replacement_dict=replacement_dict,
)
def _replace_elements_in_collection(
self,
template_collection: Union["CollectionAdapter", DatasetCollection],
replacement_dict: Dict[DatasetInstance, DatasetInstance],
) -> ElementsDict:
elements: ElementsDict = {}
for element in template_collection.elements:
assert element.element_identifier
if element.child_collection:
collection_builder = CollectionBuilder(
collection_type_description=self._collection_type_description.child_collection_type_description()
)
collection_builder.replace_elements_in_collection(
template_collection=element.child_collection, replacement_dict=replacement_dict
)
elements[element.element_identifier] = collection_builder
else:
assert isinstance(element.element_object, DatasetInstance)
elements[element.element_identifier] = replacement_dict.get(
element.element_object, element.element_object
)
return elements
[docs]
def get_level(self, identifier: str) -> "CollectionBuilder":
if not self._nested_collection:
message_template = "Cannot add nested collection to collection of type [%s]"
message = message_template % (self._collection_type_description)
raise AssertionError(message)
if identifier in self._current_elements:
subcollection_builder = self._current_elements[identifier]
assert isinstance(subcollection_builder, CollectionBuilder)
else:
subcollection_builder = CollectionBuilder(self._subcollection_type_description)
self._current_elements[identifier] = subcollection_builder
return subcollection_builder
[docs]
def add_dataset(self, identifier: str, dataset_instance: DatasetInstance) -> None:
self._current_elements[identifier] = dataset_instance
[docs]
def build_elements(self) -> "DatasetInstanceMapping":
elements = self._current_elements
if self._nested_collection:
new_elements = {}
for identifier, element in elements.items():
assert isinstance(element, CollectionBuilder)
new_elements[identifier] = element.build()
return new_elements
else:
self._current_elements = {}
return cast(Dict[str, DatasetInstance], elements)
[docs]
def build(self) -> DatasetCollection:
type_plugin = self._collection_type_description.rank_type_plugin()
self.collection = build_collection(
type_plugin, self.build_elements(), self.collection, self.associated_identifiers
)
self.collection.collection_type = self._collection_type_description.collection_type
return self.collection
@property
def _subcollection_type_description(self) -> "CollectionTypeDescription":
return self._collection_type_description.subcollection_type_description()
@property
def _nested_collection(self) -> bool:
return self._collection_type_description.has_subcollections()
[docs]
class BoundCollectionBuilder(CollectionBuilder):
"""More stateful builder that is bound to a particular model object."""
[docs]
def __init__(self, dataset_collection):
self.dataset_collection = dataset_collection
if dataset_collection.populated:
raise Exception("Cannot reset elements of an already populated dataset collection.")
collection_type = dataset_collection.collection_type
collection_type_description = COLLECTION_TYPE_DESCRIPTION_FACTORY.for_collection_type(collection_type)
super().__init__(collection_type_description)
[docs]
def populate_partial(self):
elements = self.build_elements()
type_plugin = self._collection_type_description.rank_type_plugin()
set_collection_elements(self.dataset_collection, type_plugin, elements, self.associated_identifiers)