from galaxy.util import parse_xml
from .baseparser import (
Base_Parser,
Node,
PhyloTree,
)
[docs]class Phyloxml_Parser(Base_Parser):
"""Parses a phyloxml file into a json file that will be passed to PhyloViz for display"""
[docs] def __init__(self):
super().__init__()
self.phyloTree = PhyloTree()
self.tagsOfInterest = {
"clade": "",
"name": "name",
"branch_length": "length",
"confidence": "bootstrap",
"events": "events",
}
[docs] def parseFile(self, filePath):
"""passes a file and extracts its Phylogeny Tree content."""
xmlTree = parse_xml(filePath)
xmlRoot = xmlTree.getroot()[0]
self.nameSpaceIndex = (
xmlRoot.tag.rfind("}") + 1
) # used later by the clean tag method to remove the name space in every element.tag
phyloRoot = None
for child in xmlRoot:
childTag = self.cleanTag(child.tag)
if childTag == "clade":
phyloRoot = child
elif childTag == "name":
self.phyloTree.title = child.text
self.phyloTree.root = self.parseNode(phyloRoot, 0)
jsonDict = self.phyloTree.generateJsonableDict()
return [jsonDict], "Success"
[docs] def parseNode(self, node, depth):
"""Parses any node within a phyloxml tree and looks out for claude, which signals the creation of
nodes - internal OR leaf"""
tag = self.cleanTag(node.tag)
if not tag == "clade":
return None
hasInnerClade = False
# peeking once for parent and once for child to check if the node is internal
for child in node:
childTag = self.cleanTag(child.tag)
if childTag == "clade":
hasInnerClade = True
break
if hasInnerClade: # this node is an internal node
currentNode = self._makeInternalNode(node, depth=depth)
for child in node:
child = self.parseNode(child, depth + 1)
if isinstance(child, Node):
currentNode.addChildNode(child)
else: # this node is a leaf node
currentNode = self._makeLeafNode(node, depth=depth + 1)
return currentNode
def _makeLeafNode(self, leafNode, depth=0):
"""Makes leaf nodes by calling Phylotree methods"""
node = {}
for child in leafNode:
childTag = self.cleanTag(child.tag)
if childTag in self.tagsOfInterest:
key = self.tagsOfInterest[childTag] # need to map phyloxml terms to ours
node[key] = child.text
node["depth"] = depth
return self.phyloTree.makeNode(self._getNodeName(leafNode), **node)
def _getNodeName(self, node, depth=-1):
"""Gets the name of a claude. It handles the case where a taxonomy node is involved"""
def getTagFromTaxonomyNode(node):
"""Returns the name of a taxonomy node. A taxonomy node have to be treated differently as the name
is embedded one level deeper"""
phyloxmlTaxoNames = {"common_name": "", "scientific_name": "", "code": ""}
for child in node:
childTag = self.cleanTag(child.tag)
if childTag in phyloxmlTaxoNames:
return child.text
return ""
nodeName = ""
for child in node:
childTag = self.cleanTag(child.tag)
if childTag == "name":
nodeName = child.text
break
elif childTag == "taxonomy":
nodeName = getTagFromTaxonomyNode(child)
break
return nodeName
def _makeInternalNode(self, internalNode, depth=0):
"""Makes an internal node from an element object that is guranteed to be a parent node.
Gets the value of interests like events and appends it to a custom node object that will be passed to PhyloTree to make nodes
"""
node = {}
for child in internalNode:
childTag = self.cleanTag(child.tag)
if childTag == "clade":
continue
elif childTag in self.tagsOfInterest:
if childTag == "events": # events is nested 1 more level deeper than others
key, text = "events", self.cleanTag(child[0].tag)
else:
key = self.tagsOfInterest[childTag]
text = child.text
node[key] = text
return self.phyloTree.makeNode(self._getNodeName(internalNode, depth), **node)
[docs] def cleanTag(self, tagString):
return tagString[self.nameSpaceIndex :]