Warning
This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.datatypes.phylip
"""
Created on January. 05, 2018
@authors: Kenzo-Hugo Hillion and Fabien Mareuil, Institut Pasteur, Paris
@contacts: kehillio@pasteur.fr and fabien.mareuil@pasteur.fr
@project: galaxy
@githuborganization: C3BI
Phylip datatype sniffer
"""
from galaxy import util
from galaxy.datatypes.data import get_file_peek, Text
from galaxy.datatypes.sniff import build_sniff_from_prefix
from galaxy.util import nice_size
from .metadata import MetadataElement
[docs]@build_sniff_from_prefix
class Phylip(Text):
"""Phylip format stores a multiple sequence alignment"""
edam_data = "data_0863"
edam_format = "format_1997"
file_ext = "phylip"
"""Add metadata elements"""
MetadataElement(name="sequences", default=0, desc="Number of sequences", readonly=True,
visible=False, optional=True, no_value=0)
[docs] def set_meta(self, dataset, **kwd):
"""
Set the number of sequences and the number of data lines in dataset.
"""
dataset.metadata.data_lines = self.count_data_lines(dataset)
try:
dataset.metadata.sequences = int(open(dataset.file_name).readline().split()[0])
except Exception:
raise Exception("Header does not correspond to PHYLIP header.")
[docs] def set_peek(self, dataset, is_multi_byte=False):
if not dataset.dataset.purged:
dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte)
if dataset.metadata.sequences:
dataset.blurb = "%s sequences" % util.commaify(str(dataset.metadata.sequences))
else:
dataset.blurb = nice_size(dataset.get_size())
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disk'
[docs] def sniff_prefix(self, file_prefix):
"""
All Phylip files starts with the number of sequences so we can use this
to count the following number of sequences in the first 'stack'
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname('test.phylip')
>>> Phylip().sniff(fname)
True
"""
f = file_prefix.string_io()
# Get number of sequence from first line
nb_seq = int(f.readline().split()[0])
assert nb_seq > 0
# counts number of sequence from first stack
count = 0
for line in f:
if not line.split():
break
count += 1
if count > nb_seq:
return False
return count == nb_seq