Warning

This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.visualization.data_providers.phyloviz.nexusparser

import re

from .newickparser import Newick_Parser

MAX_READLINES = 200000


[docs]class Nexus_Parser(Newick_Parser):
[docs] def __init__(self): super().__init__()
[docs] def parseFile(self, filePath): """passes a file and extracts its Nexus content.""" return self.parseNexus(filePath)
[docs] def parseNexus(self, filename): """ Nexus data is stored in blocks between a line starting with begin and another line starting with end; Commends inside square brackets are to be ignored, For more information: http://wiki.christophchamp.com/index.php/NEXUS_file_format Nexus can store multiple trees """ with open(filename) as nex_file: nexlines = nex_file.readlines() rowCount = 0 inTreeBlock = False # sentinel to check if we are in a tree block intranslateBlock = False # sentinel to check if we are in the translate region of the tree. Stores synonyms of the labellings self.inCommentBlock = False self.nameMapping = None # stores mapping representation used in nexus format treeNames = [] for line in nexlines: line = line.replace(";\n", "") lline = line.lower() if rowCount > MAX_READLINES or (not nex_file): break rowCount += 1 # We are only interested in the tree block. if "begin" in lline and "tree" in lline and not inTreeBlock: inTreeBlock = True continue if inTreeBlock and "end" in lline[:3]: inTreeBlock, currPhyloTree = False, None continue if inTreeBlock: if "title" in lline: # Adding title to the tree continue if "translate" in lline: intranslateBlock = True self.nameMapping = {} continue if intranslateBlock: mappingLine = self.splitLinebyWhitespaces(line) key, value = mappingLine[1], mappingLine[2].replace(",", "").replace("'", "") # replacing illegal json characters self.nameMapping[key] = value # Extracting newick Trees if "tree" in lline: intranslateBlock = False treeLineCols = self.splitLinebyWhitespaces(line) treeName, newick = treeLineCols[2], treeLineCols[-1] if newick == "": # Empty lines can be found in tree blocks continue currPhyloTree = self._parseNewickToJson(newick, treeName, nameMap=self.nameMapping) self.phyloTrees.append(currPhyloTree) treeIndex = len(self.phyloTrees) - 1 treeNames.append((treeName, treeIndex)) # appending name of tree, and its index continue return self.phyloTrees, treeNames
[docs] def splitLinebyWhitespaces(self, line): """replace tabs and write spaces to a single write space, so we can properly split it.""" return re.split(r"\s+", line)
[docs] def checkComments(self, line): """Check to see if the line/lines is a comment.""" if not self.inCommentBlock: if "[" in line: if "]" not in line: self.inCommentBlock = True else: return "Nextline" # need to move on to the nextline after getting out of comment else: if "]" in line: if line.rfind("[") > line.rfind("]"): pass # a comment block is closed but another is open. else: self.inCommentBlock = False return "Nextline" # need to move on to the nextline after getting out of comment return ""