Warning

This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.datatypes.graph

"""
Graph content classes.
"""

import logging
from typing import List

from galaxy.datatypes.dataproviders.column import ColumnarDataProvider
from galaxy.datatypes.dataproviders.dataset import DatasetDataProvider
from galaxy.datatypes.dataproviders.hierarchy import XMLDataProvider
from galaxy.datatypes.protocols import DatasetProtocol
from galaxy.util import simplegraph
from . import (
    data,
    dataproviders,
    tabular,
    xml,
)

log = logging.getLogger(__name__)


[docs]@dataproviders.decorators.has_dataproviders class Xgmml(xml.GenericXml): """ XGMML graph format (http://wiki.cytoscape.org/Cytoscape_User_Manual/Network_Formats). """ file_ext = "xgmml"
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: """ Set the peek and blurb text """ if not dataset.dataset.purged: dataset.peek = data.get_file_peek(dataset.get_file_name()) dataset.blurb = "XGMML data" else: dataset.peek = "file does not exist" dataset.blurb = "file purged from disk"
[docs] def sniff(self, filename: str) -> bool: """ Returns false and the user must manually set. """ return False
[docs] @staticmethod def merge(split_files: List[str], output_file: str) -> None: """ Merging multiple XML files is non-trivial and must be done in subclasses. """ if len(split_files) > 1: raise NotImplementedError( "Merging multiple XML files is non-trivial " + "and must be implemented for each XML type" ) # For one file only, use base class method (move/copy) data.Text.merge(split_files, output_file)
[docs] @dataproviders.decorators.dataprovider_factory("node-edge", XMLDataProvider.settings) def node_edge_dataprovider(self, dataset: DatasetProtocol, **settings) -> "XGMMLGraphDataProvider": dataset_source = DatasetDataProvider(dataset) return XGMMLGraphDataProvider(dataset_source, **settings)
[docs]@dataproviders.decorators.has_dataproviders class Sif(tabular.Tabular): """ SIF graph format (http://wiki.cytoscape.org/Cytoscape_User_Manual/Network_Formats). First column: node id Second column: relationship type Third to Nth column: target ids for link """ file_ext = "sif"
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: """ Set the peek and blurb text """ if not dataset.dataset.purged: dataset.peek = data.get_file_peek(dataset.get_file_name()) dataset.blurb = "SIF data" else: dataset.peek = "file does not exist" dataset.blurb = "file purged from disk"
[docs] def sniff(self, filename: str) -> bool: """ Returns false and the user must manually set. """ return False
[docs] @staticmethod def merge(split_files: List[str], output_file: str) -> None: data.Text.merge(split_files, output_file)
[docs] @dataproviders.decorators.dataprovider_factory("node-edge", ColumnarDataProvider.settings) def node_edge_dataprovider(self, dataset: DatasetProtocol, **settings) -> "SIFGraphDataProvider": dataset_source = DatasetDataProvider(dataset) return SIFGraphDataProvider(dataset_source, **settings)
# ----------------------------------------------------------------------------- graph specific data providers
[docs]class XGMMLGraphDataProvider(XMLDataProvider): """ Provide two lists: nodes, edges:: 'nodes': contains objects of the form: { 'id' : <some string id>, 'data': <any extra data> } 'edges': contains objects of the form: { 'source' : <an index into nodes>, 'target': <an index into nodes>, 'data': <any extra data> } """ def __iter__(self): # use simple graph to store nodes and links, later providing them as a dict # essentially this is a form of aggregation graph = simplegraph.SimpleGraph() parent_gen = super().__iter__() for graph_elem in parent_gen: if "children" not in graph_elem: continue for elem in graph_elem["children"]: # use endswith to work around Elementtree namespaces if elem["tag"].endswith("node"): node_id = elem["attrib"]["id"] # pass the entire, parsed xml element as the data graph.add_node(node_id, **elem) elif elem["tag"].endswith("edge"): source_id = elem["attrib"]["source"] target_id = elem["attrib"]["target"] graph.add_edge(source_id, target_id, **elem) yield graph.as_dict()
[docs]class SIFGraphDataProvider(ColumnarDataProvider): """ Provide two lists: nodes, edges:: 'nodes': contains objects of the form: { 'id' : <some string id>, 'data': <any extra data> } 'edges': contains objects of the form: { 'source' : <an index into nodes>, 'target': <an index into nodes>, 'data': <any extra data> } """ def __iter__(self): # use simple graph to store nodes and links, later providing them as a dict # essentially this is a form of aggregation graph = simplegraph.SimpleGraph() # SIF is tabular with the source, link-type, and all targets in the columns parent_gen = super().__iter__() for columns in parent_gen: if columns: source_id = columns[0] # there's no extra data for nodes (or links) in the examples I've seen graph.add_node(source_id) # targets are the (variadic) remaining columns if len(columns) >= 3: relation = columns[1] targets = columns[2:] for target_id in targets: graph.add_node(target_id) graph.add_edge(source_id, target_id, type=relation) yield graph.as_dict()