Warning
This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.tool_util.ontologies.ontology_data
from typing import (
cast,
Dict,
List,
NamedTuple,
Optional,
Tuple,
)
from galaxy.tool_util.biotools import BiotoolsMetadataSource
from galaxy.tool_util.parser import ToolSource
from galaxy.util.resources import files
def _multi_dict_mapping(content: str) -> Dict[str, List[str]]:
mapping: Dict[str, List[str]] = {}
for x in content.splitlines():
if x.startswith("#"):
continue
key, value = cast(Tuple[str, str], tuple(x.split("\t")))
mapping.setdefault(key, []).append(value)
return mapping
def _read_ontology_data_text(filename: str) -> str:
return files(PACKAGE).joinpath(filename).read_text()
PACKAGE = "galaxy.tool_util.ontologies"
BIOTOOLS_MAPPING_FILENAME = "biotools_mappings.tsv"
EDAM_OPERATION_MAPPING_FILENAME = "edam_operation_mappings.tsv"
EDAM_TOPIC_MAPPING_FILENAME = "edam_topic_mappings.tsv"
BIOTOOLS_MAPPING_CONTENT = _read_ontology_data_text(BIOTOOLS_MAPPING_FILENAME)
BIOTOOLS_MAPPING: Dict[str, str] = dict(
[
cast(Tuple[str, str], tuple(x.split("\t")))
for x in BIOTOOLS_MAPPING_CONTENT.splitlines()
if not x.startswith("#")
]
)
EDAM_OPERATION_MAPPING_CONTENT = _read_ontology_data_text(EDAM_OPERATION_MAPPING_FILENAME)
EDAM_OPERATION_MAPPING: Dict[str, List[str]] = _multi_dict_mapping(EDAM_OPERATION_MAPPING_CONTENT)
EDAM_TOPIC_MAPPING_CONTENT = _read_ontology_data_text(EDAM_TOPIC_MAPPING_FILENAME)
EDAM_TOPIC_MAPPING: Dict[str, List[str]] = _multi_dict_mapping(EDAM_TOPIC_MAPPING_CONTENT)
[docs]class OntologyData(NamedTuple):
xrefs: List[Dict[str, str]]
edam_operations: Optional[List[str]]
edam_topics: Optional[List[str]]
[docs]def biotools_reference(xrefs):
for xref in xrefs:
if xref["reftype"] == "bio.tools":
return xref["value"]
return None
[docs]def legacy_biotools_external_reference(all_ids: List[str]) -> Optional[str]:
for tool_id in all_ids:
if tool_id in BIOTOOLS_MAPPING:
return BIOTOOLS_MAPPING[tool_id]
return None
[docs]def expand_ontology_data(
tool_source: ToolSource, all_ids: List[str], biotools_metadata_source: Optional[BiotoolsMetadataSource]
) -> OntologyData:
xrefs = tool_source.parse_xrefs()
has_biotools_reference = any(x["reftype"] == "bio.tools" for x in xrefs)
if not has_biotools_reference:
legacy_biotools_ref = legacy_biotools_external_reference(all_ids)
if legacy_biotools_ref is not None:
xrefs.append({"value": legacy_biotools_ref, "reftype": "bio.tools"})
edam_operations = tool_source.parse_edam_operations()
edam_topics = tool_source.parse_edam_topics()
for tool_id in all_ids:
if tool_id in EDAM_OPERATION_MAPPING:
edam_operations = EDAM_OPERATION_MAPPING[tool_id]
break
for tool_id in all_ids:
if tool_id in EDAM_TOPIC_MAPPING:
edam_topics = EDAM_TOPIC_MAPPING[tool_id]
break
has_missing_data = len(edam_operations) == 0 or len(edam_topics) == 0
if has_missing_data:
biotools_reference_str = biotools_reference(xrefs)
if biotools_reference_str and biotools_metadata_source:
biotools_entry = biotools_metadata_source.get_biotools_metadata(biotools_reference_str)
if biotools_entry:
edam_info = biotools_entry.edam_info
if len(edam_operations) == 0:
edam_operations = edam_info.edam_operations
if len(edam_topics) == 0:
edam_topics = edam_info.edam_topics
return OntologyData(
xrefs,
edam_operations,
edam_topics,
)