Warning
This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.visualization.data_providers.phyloviz.phyloxmlparser
from xml.etree import ElementTree
from .baseparser import (
    Base_Parser,
    Node,
    PhyloTree
)
[docs]class Phyloxml_Parser(Base_Parser):
    """Parses a phyloxml file into a json file that will be passed to PhyloViz for display"""
[docs]    def __init__(self):
        super(Phyloxml_Parser, self).__init__()
        self.phyloTree = PhyloTree()
        self.tagsOfInterest = {
            "clade": "",
            "name" : "name",
            "branch_length" : "length",
            "confidence"    : "bootstrap",
            "events"        : "events"
        }
[docs]    def parseFile(self, filePath):
        """passes a file and extracts its Phylogeny Tree content."""
        phyloXmlFile = open(filePath, "r")
        xmlTree = ElementTree.parse(phyloXmlFile)
        xmlRoot = xmlTree.getroot()[0]
        self.nameSpaceIndex = xmlRoot.tag.rfind("}") + 1  # used later by the clean tag method to remove the name space in every element.tag
        phyloRoot = None
        for child in xmlRoot:
            childTag = self.cleanTag(child.tag)
            if childTag == "clade":
                phyloRoot = child
            elif childTag == "name":
                self.phyloTree.title = child.text
        self.phyloTree.root = self.parseNode(phyloRoot, 0)
        jsonDict = self.phyloTree.generateJsonableDict()
        return [jsonDict], "Success"
[docs]    def parseNode(self, node, depth):
        """Parses any node within a phyloxml tree and looks out for claude, which signals the creation of
        nodes - internal OR leaf"""
        tag = self.cleanTag(node.tag)
        if not tag == "clade":
            return None
        hasInnerClade = False
        # peeking once for parent and once for child to check if the node is internal
        for child in node:
            childTag = self.cleanTag(child.tag)
            if childTag == "clade":
                hasInnerClade = True
                break
        if hasInnerClade:       # this node is an internal node
            currentNode = self._makeInternalNode(node, depth=depth)
            for child in node:
                child = self.parseNode(child, depth + 1)
                if isinstance(child, Node):
                    currentNode.addChildNode(child)
        else:                   # this node is a leaf node
            currentNode = self._makeLeafNode(node, depth=depth + 1)
        return currentNode
    def _makeLeafNode(self, leafNode, depth=0):
        """Makes leaf nodes by calling Phylotree methods"""
        node = {}
        for child in leafNode:
            childTag = self.cleanTag(child.tag)
            if childTag in self.tagsOfInterest:
                key = self.tagsOfInterest[childTag]    # need to map phyloxml terms to ours
                node[key] = child.text
        node["depth"] = depth
        return self.phyloTree.makeNode(self._getNodeName(leafNode), **node)
    def _getNodeName(self, node, depth=-1):
        """Gets the name of a claude. It handles the case where a taxonomy node is involved"""
        def getTagFromTaxonomyNode(node):
            """Returns the name of a taxonomy node. A taxonomy node have to be treated differently as the name
            is embedded one level deeper"""
            phyloxmlTaxoNames = {
                "common_name" : "",
                "scientific_name" : "",
                "code"  : ""
            }
            for child in node:
                childTag = self.cleanTag(child.tag)
                if childTag in phyloxmlTaxoNames:
                    return child.text
            return ""
        nodeName = ""
        for child in node:
            childTag = self.cleanTag(child.tag)
            if childTag == "name" :
                nodeName = child.text
                break
            elif childTag == "taxonomy":
                nodeName = getTagFromTaxonomyNode(child)
                break
        return nodeName
    def _makeInternalNode(self, internalNode, depth=0):
        """ Makes an internal node from an element object that is guranteed to be a parent node.
        Gets the value of interests like events and appends it to a custom node object that will be passed to PhyloTree to make nodes
        """
        node = {}
        for child in internalNode:
            childTag = self.cleanTag(child.tag)
            if childTag == "clade":
                continue
            elif childTag in self.tagsOfInterest:
                if childTag == "events":    # events is nested 1 more level deeper than others
                    key, text = "events", self.cleanTag(child[0].tag)
                else:
                    key = self.tagsOfInterest[childTag]
                    text = child.text
                node[key] = text
        return self.phyloTree.makeNode(self._getNodeName(internalNode, depth), **node)