Source code for galaxy.tool_util.edam_util

import os
from typing import (

    from edam_ontology.streams import tabular_stream
except ImportError:
    tabular_stream = None


ROOT_OPERATION = "operation_0004"
ROOT_TOPIC = "topic_0003"

[docs]def load_edam_tree(path: Optional[str] = None, *included_terms: str): if path is not None: assert os.path.exists(path), f"Failed to load EDAM tabular data at [{path}] path does not exist." handle = open(path) else: assert ( tabular_stream is not None ), "Failed to load optional import from edam-ontology package, install using [pip install edam-ontology]." handle = tabular_stream() return load_edam_tree_from_tsv_stream(handle, *included_terms)
def load_edam_tree_from_tsv_stream(tsv_stream: TextIO, *included_terms: str): edam: Dict[str, Dict] = {} def _recurse_edam_parents(term, path=None): if edam[term]["parents"] and len(edam[term]["parents"]) > 0: for parent in edam[term]["parents"]: yield from _recurse_edam_parents(parent, path + [parent]) else: yield path is_first = True for line in tsv_stream.readlines(): fields = line.split("\t") if is_first: columns = {} for i, field in enumerate(fields): columns[field] = i is_first = False definition_column = columns[""] term_column = columns["Class ID"] label_column = columns["Preferred Label"] parents_column = columns["Parents"] continue term = fields[term_column] if not term.startswith(EDAM_PREFIX): continue term_id = term[len(EDAM_PREFIX) :] # Only care about included terms if included_terms and not (term_id.startswith(included_terms)): continue parents = fields[parents_column].split("|") edam[term_id] = { "label": fields[label_column].strip('"'), "definition": fields[definition_column].strip('"'), "parents": [x[len(EDAM_PREFIX) :] for x in parents if x.startswith(EDAM_PREFIX)], } for term in sorted(edam.keys()): tails = [] for x in _recurse_edam_parents(term, path=[]): if x[-2:] not in tails: tails.append(x[-2:]) edam[term]["path"] = tails return edam __all__ = ("load_edam_tree", "ROOT_OPERATION", "ROOT_TOPIC")