Source code for galaxy.tools.data
"""
extend base tool data table implementations with special implementations
requiring full Galaxy dependencies (specifically the refgenie configuration
client currently).
"""
import logging
from typing import (
Any,
Dict,
List,
Type,
)
import refgenconf
from galaxy import util
from galaxy.tool_util.data import (
TabularToolDataField,
TabularToolDataTable,
tool_data_table_types_list as tool_util_tool_data_table_types_list,
ToolDataTable,
ToolDataTableManager as BaseToolDataTableManager,
)
from galaxy.util.template import fill_template
log = logging.getLogger(__name__)
def table_from_dict(d: Dict[str, Any]) -> ToolDataTable:
data_table_class = globals()[d["model_class"]]
data_table = data_table_class.__new__(data_table_class)
for attr, val in d.items():
if not attr == "model_class":
setattr(data_table, attr, val)
data_table._loaded_content_version = 1
return data_table
def from_dict(d: Dict[str, Any]) -> "ToolDataTableManager":
tdtm = ToolDataTableManager.__new__(ToolDataTableManager)
tdtm.data_tables = {name: table_from_dict(data) for name, data in d.items()}
return tdtm
[docs]class RefgenieToolDataTable(TabularToolDataTable):
"""
Data stored in refgenie
.. code-block:: xml
<table name="all_fasta" type="refgenie" asset="fasta" >
<file path="refgenie.yml" />
<field name="value" template="true">${__REFGENIE_UUID__}</field>
<field name="dbkey" template="true">${__REFGENIE_GENOME__}</field>
<field name="name" template="true">${__REFGENIE_DISPLAY_NAME__}</field>
<field name="path" template="true">${__REFGENIE_ASSET__}</field>
</table>
"""
dict_collection_visible_keys = ["name"]
dict_element_visible_keys = ["name", "fields"]
dict_export_visible_keys = ["name", "data", "rg_asset", "largest_index", "columns", "missing_index_file"]
type_key = "refgenie"
[docs] def __init__(
self,
config_element,
tool_data_path,
tool_data_path_files,
from_shed_config=False,
filename=None,
other_config_dict=None,
) -> None:
super().__init__(
config_element,
tool_data_path,
tool_data_path_files,
from_shed_config,
filename,
other_config_dict=other_config_dict,
)
self.config_element = config_element
self.data: List[List[str]] = []
self.configure_and_load(config_element, tool_data_path, from_shed_config)
[docs] def configure_and_load(self, config_element, tool_data_path, from_shed_config=False, url_timeout=10):
self.rg_asset = config_element.get("asset", None)
assert self.rg_asset, ValueError("You must specify an asset attribute.")
super().configure_and_load(
config_element, tool_data_path, from_shed_config=from_shed_config, url_timeout=url_timeout
)
[docs] def parse_column_spec(self, config_element):
self.columns = {}
self.key_map = {}
self.template_for_column = {}
self.strip_for_column = {}
self.largest_index = 0
for i, elem in enumerate(config_element.findall("field")):
name = elem.get("name", None)
assert name, ValueError("You must provide a name refgenie field element.")
value = elem.text
self.key_map[name] = value
column_index = int(elem.get("column_index", i))
empty_field_value = elem.get("empty_field_value", None)
if empty_field_value is not None:
self.empty_field_values[name] = empty_field_value
self.template_for_column[name] = util.asbool(elem.get("template", False))
self.strip_for_column[name] = util.asbool(elem.get("strip", False))
self.columns[name] = column_index
self.largest_index = max(self.largest_index, column_index)
if "name" not in self.columns:
self.columns["name"] = self.columns["value"]
[docs] def parse_file_fields(self, filename, errors=None, here="__HERE__"):
try:
rgc = refgenconf.RefGenConf(filename, writable=False, skip_read_lock=True)
except refgenconf.exceptions.RefgenconfError as e:
log.error('Unable to load refgenie config file "%s": %s', filename, e)
if errors is not None:
errors.append(e)
return []
rval = []
for genome in rgc.list_genomes_by_asset(self.rg_asset):
genome_attributes = rgc.get_genome_attributes(genome)
genome_description = genome_attributes.get("genome_description", None)
asset_list = rgc.list(genome, include_tags=True)[genome]
for tagged_asset in asset_list:
asset, tag = tagged_asset.rsplit(":", 1)
if asset != self.rg_asset:
continue
digest = rgc.id(genome, asset, tag=tag)
uuid = f"refgenie:{genome}/{self.rg_asset}:{tag}@{digest}"
if genome_description:
display_name = f"{genome_description} (refgenie: {genome}@{digest})"
else:
display_name = f"{genome}/{tagged_asset}@{digest}"
def _seek_key(key):
return rgc.seek(genome, asset, tag_name=tag, seek_key=key) # noqa: B023
template_dict = {
"__REFGENIE_UUID__": uuid,
"__REFGENIE_GENOME__": genome,
"__REFGENIE_TAG__": tag,
"__REFGENIE_DISPLAY_NAME__": display_name,
"__REFGENIE_ASSET__": rgc.seek(genome, asset, tag_name=tag),
"__REFGENIE_ASSET_NAME__": asset,
"__REFGENIE_DIGEST__": digest,
"__REFGENIE_GENOME_ATTRIBUTES__": genome_attributes,
"__REFGENIE__": rgc,
"__REFGENIE_SEEK_KEY__": _seek_key,
}
fields = [""] * (self.largest_index + 1)
for name, index in self.columns.items():
rg_value = self.key_map[name]
# Default is hard-coded value
if self.template_for_column.get(name, False):
rg_value = fill_template(rg_value, template_dict)
if self.strip_for_column.get(name, False):
rg_value = rg_value.strip()
fields[index] = rg_value
rval.append(fields)
log.debug(
"Loaded %i entries from refgenie '%s' asset '%s' for '%s'", len(rval), filename, self.rg_asset, self.name
)
return rval
def _remove_entry(self, values):
log.warning(
"Deletion from refgenie-backed '%s' data table is not supported, will only try to delete from .loc files",
self.name,
)
# Update every non-refgenie files
super()._remove_entry(values)
# Registry of tool data types by type_key
tool_data_table_types_list: List[Type[ToolDataTable]] = tool_util_tool_data_table_types_list + [RefgenieToolDataTable]
tool_data_table_types = {cls.type_key: cls for cls in tool_data_table_types_list}
[docs]class ToolDataTableManager(BaseToolDataTableManager):
tool_data_table_types = {cls.type_key: cls for cls in tool_data_table_types_list}
__all__ = (
"RefgenieToolDataTable",
"TabularToolDataField",
"TabularToolDataTable",
"ToolDataTable",
"ToolDataTableManager",
"tool_data_table_types",
)