Source code for galaxy.datatypes.graph

"""
Graph content classes.
"""
import logging

from galaxy.util import simplegraph
from . import (
    data,
    dataproviders,
    tabular,
    xml
)

log = logging.getLogger(__name__)


[docs]@dataproviders.decorators.has_dataproviders
class Xgmml(xml.GenericXml):
    """
    XGMML graph format
    (http://wiki.cytoscape.org/Cytoscape_User_Manual/Network_Formats).
    """
    file_ext = "xgmml"

[docs]    def set_peek(self, dataset, is_multi_byte=False):
        """
        Set the peek and blurb text
        """
        if not dataset.dataset.purged:
            dataset.peek = data.get_file_peek(dataset.file_name)
            dataset.blurb = 'XGMML data'
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

[docs]    def sniff(self, filename):
        """
        Returns false and the user must manually set.
        """
        return False

[docs]    @staticmethod
    def merge(split_files, output_file):
        """
        Merging multiple XML files is non-trivial and must be done in subclasses.
        """
        if len(split_files) > 1:
            raise NotImplementedError("Merging multiple XML files is non-trivial " +
                                      "and must be implemented for each XML type")
        # For one file only, use base class method (move/copy)
        data.Text.merge(split_files, output_file)

[docs]    @dataproviders.decorators.dataprovider_factory('node-edge', dataproviders.hierarchy.XMLDataProvider.settings)
    def node_edge_dataprovider(self, dataset, **settings):
        dataset_source = dataproviders.dataset.DatasetDataProvider(dataset)
        return XGMMLGraphDataProvider(dataset_source, **settings)


[docs]@dataproviders.decorators.has_dataproviders
class Sif(tabular.Tabular):
    """
    SIF graph format
    (http://wiki.cytoscape.org/Cytoscape_User_Manual/Network_Formats).

    First column: node id
    Second column: relationship type
    Third to Nth column: target ids for link
    """
    file_ext = "sif"

[docs]    def set_peek(self, dataset, is_multi_byte=False):
        """
        Set the peek and blurb text
        """
        if not dataset.dataset.purged:
            dataset.peek = data.get_file_peek(dataset.file_name)
            dataset.blurb = 'SIF data'
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

[docs]    def sniff(self, filename):
        """
        Returns false and the user must manually set.
        """
        return False

[docs]    @staticmethod
    def merge(split_files, output_file):
        data.Text.merge(split_files, output_file)

[docs]    @dataproviders.decorators.dataprovider_factory('node-edge', dataproviders.column.ColumnarDataProvider.settings)
    def node_edge_dataprovider(self, dataset, **settings):
        dataset_source = dataproviders.dataset.DatasetDataProvider(dataset)
        return SIFGraphDataProvider(dataset_source, **settings)


# ----------------------------------------------------------------------------- graph specific data providers
[docs]class XGMMLGraphDataProvider(dataproviders.hierarchy.XMLDataProvider):
    """
    Provide two lists: nodes, edges::

        'nodes': contains objects of the form:
            { 'id' : <some string id>, 'data': <any extra data> }
        'edges': contains objects of the form:
            { 'source' : <an index into nodes>, 'target': <an index into nodes>, 'data': <any extra data> }
    """

    def __iter__(self):
        # use simple graph to store nodes and links, later providing them as a dict
        #   essentially this is a form of aggregation
        graph = simplegraph.SimpleGraph()

        parent_gen = super().__iter__()
        for graph_elem in parent_gen:
            if 'children' not in graph_elem:
                continue
            for elem in graph_elem['children']:
                # use endswith to work around Elementtree namespaces
                if elem['tag'].endswith('node'):
                    node_id = elem['attrib']['id']
                    # pass the entire, parsed xml element as the data
                    graph.add_node(node_id, **elem)

                elif elem['tag'].endswith('edge'):
                    source_id = elem['attrib']['source']
                    target_id = elem['attrib']['target']
                    graph.add_edge(source_id, target_id, **elem)

        yield graph.as_dict()


[docs]class SIFGraphDataProvider(dataproviders.column.ColumnarDataProvider):
    """
    Provide two lists: nodes, edges::

        'nodes': contains objects of the form:
            { 'id' : <some string id>, 'data': <any extra data> }
        'edges': contains objects of the form:
            { 'source' : <an index into nodes>, 'target': <an index into nodes>, 'data': <any extra data> }
    """

    def __iter__(self):
        # use simple graph to store nodes and links, later providing them as a dict
        #   essentially this is a form of aggregation
        graph = simplegraph.SimpleGraph()
        # SIF is tabular with the source, link-type, and all targets in the columns
        parent_gen = super().__iter__()
        for columns in parent_gen:
            if columns:
                source_id = columns[0]
                # there's no extra data for nodes (or links) in the examples I've seen
                graph.add_node(source_id)

                # targets are the (variadic) remaining columns
                if len(columns) >= 3:
                    relation = columns[1]
                    targets = columns[2:]
                    for target_id in targets:
                        graph.add_node(target_id)
                        graph.add_edge(source_id, target_id, type=relation)

        yield graph.as_dict()