Warning
This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.tools.parameters.dataset_matcher
from logging import getLogger
import galaxy.model
log = getLogger(__name__)
[docs]def set_dataset_matcher_factory(trans, tool):
trans.dataset_matcher_factory = DatasetMatcherFactory(trans, tool)
[docs]def get_dataset_matcher_factory(trans):
dataset_matcher_factory = getattr(trans, "dataset_matcher_factory", None)
return dataset_matcher_factory or DatasetMatcherFactory(trans)
class DatasetMatcherFactory(object):
""""""
def __init__(self, trans, tool=None):
self._trans = trans
self._tool = tool
self._data_inputs = []
self._matches_format_cache = {}
if tool:
valid_input_states = tool.valid_input_states
else:
valid_input_states = galaxy.model.Dataset.valid_input_states
self.valid_input_states = valid_input_states
can_process_summary = False
if tool is not None:
for input in tool.inputs.values():
self._collect_data_inputs(input)
require_public = self._tool and self._tool.tool_type == 'data_destination'
if not require_public and self._data_inputs:
can_process_summary = True
for data_input in self._data_inputs:
if data_input.options:
can_process_summary = False
break
self._can_process_summary = can_process_summary
def matches_any_format(self, hda_extension, formats):
for format in formats:
if self.matches_format(hda_extension, format):
return True
return False
def matches_format(self, hda_extension, format):
# cache datatype checking combinations for fast recall
if hda_extension not in self._matches_format_cache:
self._matches_format_cache[hda_extension] = {}
formats = self._matches_format_cache[hda_extension]
if format not in formats:
datatype = galaxy.model.datatype_for_extension(hda_extension, datatypes_registry=self._trans.app.datatypes_registry)
formats[format] = datatype.matches_any([format])
return formats[format]
def _collect_data_inputs(self, input):
type_name = input.type
if type_name == "repeat" or type_name == "upload_dataset" or type_name == "section":
for child_input in input.inputs.values():
self._collect_data_inputs(child_input)
elif type_name == "conditional":
for case in input.cases:
for child_input in case.inputs.values():
self._collect_data_inputs(child_input)
elif type_name == "data" or type_name == "data_collection":
self._data_inputs.append(input)
def dataset_matcher(self, param, other_values):
return DatasetMatcher(self, self._trans, param, other_values)
def dataset_collection_matcher(self, dataset_matcher):
if self._can_process_summary:
return SummaryDatasetCollectionMatcher(self, self._trans, dataset_matcher)
else:
return DatasetCollectionMatcher(self._trans, dataset_matcher)
class DatasetMatcher(object):
""" Utility class to aid DataToolParameter and similar classes in reasoning
about what HDAs could match or are selected for a parameter and value.
Goal here is to both encapsulate and reuse logic related to filtering,
datatype matching, hiding errored dataset, finding implicit conversions,
and permission handling.
"""
def __init__(self, dataset_matcher_factory, trans, param, other_values):
self.dataset_matcher_factory = dataset_matcher_factory
self.trans = trans
self.param = param
self.tool = param.tool
filter_values = set()
if param.options and other_values:
try:
for v in param.options.get_options(trans, other_values):
filter_values.add(v[0])
except IndexError:
pass # no valid options
self.filter_values = filter_values
def valid_hda_match(self, hda, check_implicit_conversions=True):
""" Return False of this parameter can not be matched to the supplied
HDA, otherwise return a description of the match (either a
HdaDirectMatch describing a direct match or a HdaImplicitMatch
describing an implicit conversion.)
"""
rval = False
formats = self.param.formats
if self.dataset_matcher_factory.matches_any_format(hda.extension, formats):
rval = HdaDirectMatch(hda)
else:
if not check_implicit_conversions:
return False
target_ext, converted_dataset = hda.find_conversion_destination(formats)
if target_ext:
original_hda = hda
if converted_dataset:
hda = converted_dataset
rval = HdaImplicitMatch(hda, target_ext, original_hda)
else:
return False
if self.filter(hda):
return False
return rval
def hda_match(self, hda, check_implicit_conversions=True, ensure_visible=True):
""" If HDA is accessible, return information about whether it could
match this parameter and if so how. See valid_hda_match for more
information.
"""
dataset = hda.dataset
valid_state = dataset.state in self.dataset_matcher_factory.valid_input_states
if valid_state and (not ensure_visible or hda.visible):
# If we are sending data to an external application, then we need to make sure there are no roles
# associated with the dataset that restrict its access from "public".
require_public = self.tool and self.tool.tool_type == 'data_destination'
if require_public and not self.trans.app.security_agent.dataset_is_public(dataset):
return False
return self.valid_hda_match(hda, check_implicit_conversions=check_implicit_conversions)
def filter(self, hda):
""" Filter out this value based on other values for job (if
applicable).
"""
param = self.param
return param.options and param.get_options_filter_attribute(hda) not in self.filter_values
class HdaDirectMatch(object):
""" Supplied HDA was a valid option directly (did not need to find implicit
conversion).
"""
def __init__(self, hda):
self.hda = hda
@property
def implicit_conversion(self):
return False
class HdaImplicitMatch(object):
""" Supplied HDA was a valid option directly (did not need to find implicit
conversion).
"""
def __init__(self, hda, target_ext, original_hda):
self.original_hda = original_hda
self.hda = hda
self.target_ext = target_ext
@property
def implicit_conversion(self):
return True
class HdcaDirectMatch(object):
implicit_conversion = False
def __init__(self):
pass
class HdcaImplicitMatch(object):
implicit_conversion = True
def __init__(self):
pass
class SummaryDatasetCollectionMatcher(object):
def __init__(self, dataset_matcher_factory, trans, dataset_matcher):
self.dataset_matcher_factory = dataset_matcher_factory
self._trans = trans
self.dataset_matcher = dataset_matcher
def hdca_match(self, history_dataset_collection_association):
dataset_collection = history_dataset_collection_association.collection
if not dataset_collection.populated_optimized:
return False
(states, extensions) = dataset_collection.dataset_states_and_extensions_summary
for state in states:
if state not in self.dataset_matcher_factory.valid_input_states:
return False
formats = self.dataset_matcher.param.formats
uses_implicit_conversion = False
for extension in extensions:
if self.dataset_matcher_factory.matches_any_format(extension, formats):
continue
datatypes_registry = self._trans.app.datatypes_registry
converted_ext, _ = datatypes_registry.find_conversion_destination_for_dataset_by_extensions(extension, formats)
if not converted_ext:
return False
else:
uses_implicit_conversion = True
return HdcaImplicitMatch() if uses_implicit_conversion else HdcaDirectMatch()
class DatasetCollectionMatcher(object):
def __init__(self, trans, dataset_matcher):
self.dataset_matcher = dataset_matcher
self._trans = trans
def __valid_element(self, element):
# Simplify things for now and assume these are hdas and not implicit
# converts. One could imagine handling both of those cases down the
# road.
if element.ldda:
return False
child_collection = element.child_collection
if child_collection:
return self.dataset_collection_match(child_collection)
hda = element.hda
if not hda:
return False
hda_match = self.dataset_matcher.hda_match(hda, ensure_visible=False)
return hda_match
def hdca_match(self, history_dataset_collection_association):
dataset_collection = history_dataset_collection_association.collection
return self.dataset_collection_match(dataset_collection)
def dataset_collection_match(self, dataset_collection):
# If dataset collection not yet populated, cannot determine if it
# would be a valid match for this parameter.
if not dataset_collection.populated:
return False
valid = True
uses_implicit_conversion = False
for element in dataset_collection.elements:
match_element = self.__valid_element(element)
if not match_element:
valid = False
break
elif match_element.implicit_conversion:
uses_implicit_conversion = True
return valid and (HdcaImplicitMatch() if uses_implicit_conversion else HdcaDirectMatch())
__all__ = ('get_dataset_matcher_factory', 'set_dataset_matcher_factory', 'unset_dataset_matcher_factory')