Warning
This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.visualization.data_providers.phyloviz.nexusparser
import re
from .newickparser import Newick_Parser
MAX_READLINES = 200000
[docs]class Nexus_Parser(Newick_Parser):
[docs]    def parseFile(self, filePath):
        """passes a file and extracts its Nexus content."""
        return self.parseNexus(filePath)
[docs]    def parseNexus(self, filename):
        """ Nexus data is stored in blocks between a line starting with begin and another line starting with end;
        Commends inside square brackets are to be ignored,
        For more information: http://wiki.christophchamp.com/index.php/NEXUS_file_format
        Nexus can store multiple trees
        """
        with open(filename) as nex_file:
            nexlines = nex_file.readlines()
        rowCount = 0
        inTreeBlock = False         # sentinel to check if we are in a tree block
        intranslateBlock = False    # sentinel to check if we are in the translate region of the tree. Stores synonyms of the labellings
        self.inCommentBlock = False
        self.nameMapping = None       # stores mapping representation used in nexus format
        treeNames = []
        for line in nexlines:
            line = line.replace(";\n", "")
            lline = line.lower()
            if rowCount > MAX_READLINES or (not nex_file) :
                break
            rowCount += 1
            # We are only interested in the tree block.
            if "begin" in lline and "tree" in lline and not inTreeBlock:
                inTreeBlock = True
                continue
            if inTreeBlock and "end" in lline[:3]:
                inTreeBlock, currPhyloTree = False, None
                continue
            if inTreeBlock:
                if "title" in lline:        # Adding title to the tree
                    continue
                if "translate" in lline:
                    intranslateBlock = True
                    self.nameMapping = {}
                    continue
                if intranslateBlock:
                    mappingLine = self.splitLinebyWhitespaces(line)
                    key, value = mappingLine[1], mappingLine[2].replace(",", "").replace("'", "")    # replacing illegal json characters
                    self.nameMapping[key] = value
                # Extracting newick Trees
                if "tree" in lline:
                    intranslateBlock = False
                    treeLineCols = self.splitLinebyWhitespaces(line)
                    treeName, newick = treeLineCols[2], treeLineCols[-1]
                    if newick == "":    # Empty lines can be found in tree blocks
                        continue
                    currPhyloTree = self._parseNewickToJson(newick, treeName, nameMap=self.nameMapping)
                    self.phyloTrees.append(currPhyloTree)
                    treeIndex = len(self.phyloTrees) - 1
                    treeNames.append((treeName, treeIndex))    # appending name of tree, and its index
                    continue
        return self.phyloTrees, treeNames
[docs]    def splitLinebyWhitespaces(self, line):
        """replace tabs and write spaces to a single write space, so we can properly split it."""
        return re.split(r"\s+", line)
[docs]    def checkComments(self, line):
        """Check to see if the line/lines is a comment."""
        if not self.inCommentBlock:
            if "[" in line:
                if "]" not in line:
                    self.inCommentBlock = True
                else:
                    return "Nextline"   # need to move on to the nextline after getting out of comment
        else :
            if "]" in line:
                if line.rfind("[") > line.rfind("]"):
                    pass                # a comment block is closed but another is open.
                else:
                    self.inCommentBlock = False
                    return "Nextline"   # need to move on to the nextline after getting out of comment
        return ""