Warning
This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.visualization.data_providers.registry
from typing import Dict, Optional, Type, Union
from typing_extensions import Literal
from galaxy.datatypes.data import Data, Newick, Nexus
from galaxy.datatypes.interval import (
Bed,
ChromatinInteractions,
ENCODEPeak,
Gff,
Gtf,
Interval
)
from galaxy.datatypes.tabular import Tabular, Vcf
from galaxy.datatypes.xml import Phyloxml
from galaxy.model import NoConverterException
from galaxy.visualization.data_providers import genome
from galaxy.visualization.data_providers.basic import BaseDataProvider, ColumnDataProvider
from galaxy.visualization.data_providers.phyloviz import PhylovizDataProvider
# a dict keyed on datatype with a 'default' string key.
PROVIDER_BY_DATATYPE_CLASS_DICT = Dict[Union[Literal["default"], Type[Data]], Type[BaseDataProvider]]
DATA_PROVIDER_BY_TYPE_NAME_DICT = Dict[str, Union[Type[BaseDataProvider], PROVIDER_BY_DATATYPE_CLASS_DICT]]
[docs]class DataProviderRegistry:
"""
Registry for data providers that enables listing and lookup.
"""
[docs] def __init__(self):
# Mapping from dataset type name to a class that can fetch data from a file of that
# type. First key is converted dataset type; if result is another dict, second key
# is original dataset type.
self.dataset_type_name_to_data_provider: DATA_PROVIDER_BY_TYPE_NAME_DICT = {
"tabix": {
Vcf: genome.VcfTabixDataProvider,
Bed: genome.BedTabixDataProvider,
Gtf: genome.GtfTabixDataProvider,
ENCODEPeak: genome.ENCODEPeakTabixDataProvider,
Interval: genome.IntervalTabixDataProvider,
ChromatinInteractions: genome.ChromatinInteractionsTabixDataProvider,
"default": genome.TabixDataProvider
},
"interval_index": genome.IntervalIndexDataProvider,
"bai": genome.BamDataProvider,
"bam": genome.SamDataProvider,
"bigwig": genome.BigWigDataProvider,
"bigbed": genome.BigBedDataProvider,
"column_with_stats": ColumnDataProvider
}
[docs] def get_data_provider(self, trans, name=None, source='data', raw=False, original_dataset=None):
"""
Returns data provider matching parameter values. For standalone data
sources, source parameter is ignored.
"""
data_provider: Optional[BaseDataProvider]
data_provider_class: Type[BaseDataProvider]
# any datatype class that is a subclass of another needs to be
# checked before the parent in this conditional.
if raw:
# Working with raw data.
if isinstance(original_dataset.datatype, Gff):
data_provider_class = genome.RawGFFDataProvider
elif isinstance(original_dataset.datatype, Bed):
data_provider_class = genome.RawBedDataProvider
elif isinstance(original_dataset.datatype, Vcf):
data_provider_class = genome.RawVcfDataProvider
elif isinstance(original_dataset.datatype, Tabular):
data_provider_class = ColumnDataProvider
elif isinstance(original_dataset.datatype, (Nexus, Newick, Phyloxml)):
data_provider_class = PhylovizDataProvider
data_provider = data_provider_class(original_dataset=original_dataset)
else:
# Working with converted or standalone dataset.
if name:
# Provider requested by name; get from mappings.
value = self.dataset_type_name_to_data_provider[name]
if isinstance(value, dict):
# value is a PROVIDER_BY_DATATYPE_CLASS_DICT
# Get converter by dataset extension; if there is no data provider,
# get the default.
default_type = value.get("default")
assert default_type
data_provider_class = value.get(original_dataset.datatype.__class__, default_type)
else:
data_provider_class = value
# If name is the same as original dataset's type, dataset is standalone.
# Otherwise, a converted dataset is being used.
if name == original_dataset.ext:
data_provider = data_provider_class(original_dataset=original_dataset)
else:
converted_dataset = original_dataset.get_converted_dataset(trans, name)
deps = original_dataset.get_converted_dataset_deps(trans, name)
data_provider = data_provider_class(original_dataset=original_dataset,
converted_dataset=converted_dataset,
dependencies=deps)
elif original_dataset:
# No name, so look up a provider name from datatype's information.
# Dataset must have data sources to get data.
if not original_dataset.datatype.data_sources:
return None
# Get data provider mapping and data provider.
data_provider_mapping = original_dataset.datatype.data_sources
if 'data_standalone' in data_provider_mapping:
data_provider = self.get_data_provider(trans,
name=data_provider_mapping['data_standalone'],
original_dataset=original_dataset)
else:
source_list = data_provider_mapping[source]
if isinstance(source_list, str):
source_list = [source_list]
# Find a valid data provider in the source list.
for source in source_list:
try:
data_provider = self.get_data_provider(trans, name=source, original_dataset=original_dataset)
break
except NoConverterException:
pass
return data_provider