Warning

This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.tool_util.ontologies.ontology_data

from typing import (
    cast,
    Dict,
    List,
    NamedTuple,
    Optional,
    Tuple,
)

from galaxy.tool_util.biotools import BiotoolsMetadataSource
from galaxy.tool_util.parser import ToolSource
from galaxy.util.resources import files


def _multi_dict_mapping(content: str) -> Dict[str, List[str]]:
    mapping: Dict[str, List[str]] = {}
    for x in content.splitlines():
        if x.startswith("#"):
            continue
        key, value = cast(Tuple[str, str], tuple(x.split("\t")))
        mapping.setdefault(key, []).append(value)
    return mapping


def _read_ontology_data_text(filename: str) -> str:
    return files(PACKAGE).joinpath(filename).read_text()


PACKAGE = "galaxy.tool_util.ontologies"
BIOTOOLS_MAPPING_FILENAME = "biotools_mappings.tsv"
EDAM_OPERATION_MAPPING_FILENAME = "edam_operation_mappings.tsv"
EDAM_TOPIC_MAPPING_FILENAME = "edam_topic_mappings.tsv"

BIOTOOLS_MAPPING_CONTENT = _read_ontology_data_text(BIOTOOLS_MAPPING_FILENAME)
BIOTOOLS_MAPPING: Dict[str, str] = dict(
    [
        cast(Tuple[str, str], tuple(x.split("\t")))
        for x in BIOTOOLS_MAPPING_CONTENT.splitlines()
        if not x.startswith("#")
    ]
)
EDAM_OPERATION_MAPPING_CONTENT = _read_ontology_data_text(EDAM_OPERATION_MAPPING_FILENAME)
EDAM_OPERATION_MAPPING: Dict[str, List[str]] = _multi_dict_mapping(EDAM_OPERATION_MAPPING_CONTENT)

EDAM_TOPIC_MAPPING_CONTENT = _read_ontology_data_text(EDAM_TOPIC_MAPPING_FILENAME)
EDAM_TOPIC_MAPPING: Dict[str, List[str]] = _multi_dict_mapping(EDAM_TOPIC_MAPPING_CONTENT)


[docs]class OntologyData(NamedTuple): xrefs: List[Dict[str, str]] edam_operations: Optional[List[str]] edam_topics: Optional[List[str]]
[docs]def biotools_reference(xrefs): for xref in xrefs: if xref["reftype"] == "bio.tools": return xref["value"] return None
[docs]def legacy_biotools_external_reference(all_ids: List[str]) -> Optional[str]: for tool_id in all_ids: if tool_id in BIOTOOLS_MAPPING: return BIOTOOLS_MAPPING[tool_id] return None
[docs]def expand_ontology_data( tool_source: ToolSource, all_ids: List[str], biotools_metadata_source: Optional[BiotoolsMetadataSource] ) -> OntologyData: xrefs = tool_source.parse_xrefs() has_biotools_reference = any(x["reftype"] == "bio.tools" for x in xrefs) if not has_biotools_reference: legacy_biotools_ref = legacy_biotools_external_reference(all_ids) if legacy_biotools_ref is not None: xrefs.append({"value": legacy_biotools_ref, "reftype": "bio.tools"}) edam_operations = tool_source.parse_edam_operations() edam_topics = tool_source.parse_edam_topics() for tool_id in all_ids: if tool_id in EDAM_OPERATION_MAPPING: edam_operations = EDAM_OPERATION_MAPPING[tool_id] break for tool_id in all_ids: if tool_id in EDAM_TOPIC_MAPPING: edam_topics = EDAM_TOPIC_MAPPING[tool_id] break has_missing_data = len(edam_operations) == 0 or len(edam_topics) == 0 if has_missing_data: biotools_reference_str = biotools_reference(xrefs) if biotools_reference_str and biotools_metadata_source: biotools_entry = biotools_metadata_source.get_biotools_metadata(biotools_reference_str) if biotools_entry: edam_info = biotools_entry.edam_info if len(edam_operations) == 0: edam_operations = edam_info.edam_operations if len(edam_topics) == 0: edam_topics = edam_info.edam_topics return OntologyData( xrefs, edam_operations, edam_topics, )