Warning

This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.dataset_collections.structure

""" Module for reasoning about structure of and matching hierarchical collections of data.
"""
import logging
log = logging.getLogger(__name__)

from .type_description import map_over_collection_type


[docs]class Leaf(object): def __len__(self): return 1 @property def is_leaf(self): return True
[docs] def clone(self): return self
[docs] def multiply(self, other_structure): return other_structure.clone()
leaf = Leaf()
[docs]class Tree(object):
[docs] def __init__(self, children, collection_type_description): self.children = children self.collection_type_description = collection_type_description
[docs] @staticmethod def for_dataset_collection(dataset_collection, collection_type_description): children = [] for element in dataset_collection.elements: if collection_type_description.has_subcollections(): child_collection = element.child_collection subcollection_type_description = collection_type_description.subcollection_type_description() # Type description of children tree = Tree.for_dataset_collection(child_collection, collection_type_description=subcollection_type_description) children.append((element.element_identifier, tree)) else: children.append((element.element_identifier, leaf)) return Tree(children, collection_type_description)
[docs] def walk_collections(self, hdca_dict): return self._walk_collections(dict_map(lambda hdca: hdca.collection, hdca_dict))
def _walk_collections(self, collection_dict): for index, (identifier, substructure) in enumerate(self.children): def element(collection): return collection[index] if substructure.is_leaf: yield dict_map(element, collection_dict) else: sub_collections = dict_map(lambda collection: element(collection).child_collection, collection_dict) for element in substructure._walk_collections(sub_collections): yield element @property def is_leaf(self): return False
[docs] def can_match(self, other_structure): if not self.collection_type_description.can_match_type(other_structure.collection_type_description): return False if len(self.children) != len(other_structure.children): return False for my_child, other_child in zip(self.children, other_structure.children): # At least one is nested collection... if my_child[1].is_leaf != other_child[1].is_leaf: return False if not my_child[1].is_leaf and not my_child[1].can_match(other_child[1]): return False return True
def __len__(self): return sum([len(c[1]) for c in self.children])
[docs] def element_identifiers_for_outputs(self, trans, outputs): element_identifiers = [] elements_collection_type = None for identifier, child in self.children: if isinstance(child, Tree): child_identifiers = child.element_identifiers_for_outputs(trans, outputs[0:len(child)]) child_identifiers["name"] = identifier element_identifiers.append(child_identifiers) elements_collection_type = child_identifiers["collection_type"] else: output_object = outputs[0] element_identifiers.append(dict(name=identifier, __object__=output_object)) if hasattr(output_object, "collection_type"): elements_collection_type = output_object.collection_type outputs = outputs[len(child):] collection_type = map_over_collection_type(self.collection_type_description.rank_collection_type(), elements_collection_type) return dict( src="new_collection", collection_type=collection_type, element_identifiers=element_identifiers, )
[docs] def multiply(self, other_structure): if other_structure.is_leaf: return self.clone() new_collection_type = self.collection_type_description.multiply(other_structure.collection_type_description) new_children = [] for (identifier, structure) in self.children: new_children.append((identifier, structure.multiply(other_structure))) return Tree(new_children, new_collection_type)
[docs] def clone(self): cloned_children = [(_[0], _[1].clone()) for _ in self.children] return Tree(cloned_children, self.collection_type_description)
[docs]def dict_map(func, input_dict): return dict((k, func(v)) for k, v in input_dict.items())
[docs]def get_structure(dataset_collection_instance, collection_type_description, leaf_subcollection_type=None): if leaf_subcollection_type: collection_type_description = collection_type_description.effective_collection_type_description(leaf_subcollection_type) return Tree.for_dataset_collection(dataset_collection_instance.collection, collection_type_description)