Warning
This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.datatypes.converters.fasta_to_len
#!/usr/bin/env python
"""
Input: fasta, int
Output: tabular
Return titles with lengths of corresponding seq
"""
import sys
assert sys.version_info[:2] >= (2, 4)
[docs]def compute_fasta_length(fasta_file, out_file, keep_first_char, keep_first_word=False):
infile = fasta_file
keep_first_char = int(keep_first_char)
fasta_title = ''
seq_len = 0
# number of char to keep in the title
if keep_first_char == 0:
keep_first_char = None
else:
keep_first_char += 1
first_entry = True
with open(out_file, 'w') as out:
with open(infile) as fh:
for line in fh:
line = line.strip()
if not line or line.startswith('#'):
continue
if line[0] == '>':
if first_entry is False:
if keep_first_word:
fasta_title = fasta_title.split()[0]
out.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len))
else:
first_entry = False
fasta_title = line
seq_len = 0
else:
seq_len += len(line)
# last fasta-entry
if keep_first_word:
fasta_title = fasta_title.split()[0]
out.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len))
if __name__ == "__main__":
compute_fasta_length(sys.argv[1], sys.argv[2], sys.argv[3], True)