Warning

This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.visualization.data_providers.phyloviz.phyloxmlparser

from baseparser import Base_Parser, PhyloTree, Node
from xml.etree import ElementTree


[docs]class Phyloxml_Parser(Base_Parser): """Parses a phyloxml file into a json file that will be passed to PhyloViz for display"""
[docs] def __init__(self): super(Phyloxml_Parser, self).__init__() self.phyloTree = PhyloTree() self.tagsOfInterest = { "clade": "", "name" : "name", "branch_length" : "length", "confidence" : "bootstrap", "events" : "events" }
[docs] def parseFile(self, filePath): """passes a file and extracts its Phylogeny Tree content.""" phyloXmlFile = open(filePath, "r") xmlTree = ElementTree.parse(phyloXmlFile) xmlRoot = xmlTree.getroot()[0] self.nameSpaceIndex = xmlRoot.tag.rfind("}") + 1 # used later by the clean tag method to remove the name space in every element.tag phyloRoot = None for child in xmlRoot: childTag = self.cleanTag(child.tag) if childTag == "clade": phyloRoot = child elif childTag == "name": self.phyloTree.title = child.text self.phyloTree.root = self.parseNode(phyloRoot, 0) jsonDict = self.phyloTree.generateJsonableDict() return [jsonDict], "Success"
[docs] def parseNode(self, node, depth): """Parses any node within a phyloxml tree and looks out for claude, which signals the creation of nodes - internal OR leaf""" tag = self.cleanTag(node.tag) if not tag == "clade": return None hasInnerClade = False # peeking once for parent and once for child to check if the node is internal for child in node: childTag = self.cleanTag(child.tag) if childTag == "clade": hasInnerClade = True break if hasInnerClade: # this node is an internal node currentNode = self._makeInternalNode(node, depth=depth) for child in node: child = self.parseNode(child, depth + 1) if isinstance(child, Node): currentNode.addChildNode(child) else: # this node is a leaf node currentNode = self._makeLeafNode(node, depth=depth + 1) return currentNode
def _makeLeafNode(self, leafNode, depth=0): """Makes leaf nodes by calling Phylotree methods""" node = {} for child in leafNode: childTag = self.cleanTag(child.tag) if childTag in self.tagsOfInterest: key = self.tagsOfInterest[childTag] # need to map phyloxml terms to ours node[key] = child.text node["depth"] = depth return self.phyloTree.makeNode(self._getNodeName(leafNode), **node) def _getNodeName(self, node, depth=-1): """Gets the name of a claude. It handles the case where a taxonomy node is involved""" def getTagFromTaxonomyNode(node): """Returns the name of a taxonomy node. A taxonomy node have to be treated differently as the name is embedded one level deeper""" phyloxmlTaxoNames = { "common_name" : "", "scientific_name" : "", "code" : "" } for child in node: childTag = self.cleanTag(child.tag) if childTag in phyloxmlTaxoNames: return child.text return "" nodeName = "" for child in node: childTag = self.cleanTag(child.tag) if childTag == "name" : nodeName = child.text break elif childTag == "taxonomy": nodeName = getTagFromTaxonomyNode(child) break return nodeName def _makeInternalNode(self, internalNode, depth=0): """ Makes an internal node from an element object that is guranteed to be a parent node. Gets the value of interests like events and appends it to a custom node object that will be passed to PhyloTree to make nodes """ node = {} for child in internalNode: childTag = self.cleanTag(child.tag) if childTag == "clade": continue elif childTag in self.tagsOfInterest: if childTag == "events": # events is nested 1 more level deeper than others key, text = "events", self.cleanTag(child[0].tag) else: key = self.tagsOfInterest[childTag] text = child.text node[key] = text return self.phyloTree.makeNode(self._getNodeName(internalNode, depth), **node)
[docs] def cleanTag(self, tagString): return tagString[self.nameSpaceIndex:]