Source code for galaxy.datatypes.graph

"""
Graph content classes.
"""
import logging

from galaxy.util import simplegraph
from . import (
    data,
    dataproviders,
    tabular,
    xml
)

log = logging.getLogger(__name__)


[docs]@dataproviders.decorators.has_dataproviders class Xgmml(xml.GenericXml): """ XGMML graph format (http://wiki.cytoscape.org/Cytoscape_User_Manual/Network_Formats). """ file_ext = "xgmml"
[docs] def set_peek(self, dataset, is_multi_byte=False): """ Set the peek and blurb text """ if not dataset.dataset.purged: dataset.peek = data.get_file_peek(dataset.file_name) dataset.blurb = 'XGMML data' else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk'
[docs] def sniff(self, filename): """ Returns false and the user must manually set. """ return False
[docs] @staticmethod def merge(split_files, output_file): """ Merging multiple XML files is non-trivial and must be done in subclasses. """ if len(split_files) > 1: raise NotImplementedError("Merging multiple XML files is non-trivial " + "and must be implemented for each XML type") # For one file only, use base class method (move/copy) data.Text.merge(split_files, output_file)
[docs] @dataproviders.decorators.dataprovider_factory('node-edge', dataproviders.hierarchy.XMLDataProvider.settings) def node_edge_dataprovider(self, dataset, **settings): dataset_source = dataproviders.dataset.DatasetDataProvider(dataset) return XGMMLGraphDataProvider(dataset_source, **settings)
[docs]@dataproviders.decorators.has_dataproviders class Sif(tabular.Tabular): """ SIF graph format (http://wiki.cytoscape.org/Cytoscape_User_Manual/Network_Formats). First column: node id Second column: relationship type Third to Nth column: target ids for link """ file_ext = "sif"
[docs] def set_peek(self, dataset, is_multi_byte=False): """ Set the peek and blurb text """ if not dataset.dataset.purged: dataset.peek = data.get_file_peek(dataset.file_name) dataset.blurb = 'SIF data' else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk'
[docs] def sniff(self, filename): """ Returns false and the user must manually set. """ return False
[docs] @staticmethod def merge(split_files, output_file): data.Text.merge(split_files, output_file)
[docs] @dataproviders.decorators.dataprovider_factory('node-edge', dataproviders.column.ColumnarDataProvider.settings) def node_edge_dataprovider(self, dataset, **settings): dataset_source = dataproviders.dataset.DatasetDataProvider(dataset) return SIFGraphDataProvider(dataset_source, **settings)
# ----------------------------------------------------------------------------- graph specific data providers
[docs]class XGMMLGraphDataProvider(dataproviders.hierarchy.XMLDataProvider): """ Provide two lists: nodes, edges:: 'nodes': contains objects of the form: { 'id' : <some string id>, 'data': <any extra data> } 'edges': contains objects of the form: { 'source' : <an index into nodes>, 'target': <an index into nodes>, 'data': <any extra data> } """ def __iter__(self): # use simple graph to store nodes and links, later providing them as a dict # essentially this is a form of aggregation graph = simplegraph.SimpleGraph() parent_gen = super(XGMMLGraphDataProvider, self).__iter__() for graph_elem in parent_gen: if 'children' not in graph_elem: continue for elem in graph_elem['children']: # use endswith to work around Elementtree namespaces if elem['tag'].endswith('node'): node_id = elem['attrib']['id'] # pass the entire, parsed xml element as the data graph.add_node(node_id, **elem) elif elem['tag'].endswith('edge'): source_id = elem['attrib']['source'] target_id = elem['attrib']['target'] graph.add_edge(source_id, target_id, **elem) yield graph.as_dict()
[docs]class SIFGraphDataProvider(dataproviders.column.ColumnarDataProvider): """ Provide two lists: nodes, edges:: 'nodes': contains objects of the form: { 'id' : <some string id>, 'data': <any extra data> } 'edges': contains objects of the form: { 'source' : <an index into nodes>, 'target': <an index into nodes>, 'data': <any extra data> } """ def __iter__(self): # use simple graph to store nodes and links, later providing them as a dict # essentially this is a form of aggregation graph = simplegraph.SimpleGraph() # SIF is tabular with the source, link-type, and all targets in the columns parent_gen = super(SIFGraphDataProvider, self).__iter__() for columns in parent_gen: if columns: source_id = columns[0] # there's no extra data for nodes (or links) in the examples I've seen graph.add_node(source_id) # targets are the (variadic) remaining columns if len(columns) >= 3: relation = columns[1] targets = columns[2:] for target_id in targets: graph.add_node(target_id) graph.add_edge(source_id, target_id, type=relation) yield graph.as_dict()