Warning

This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.datatypes.converters.fasta_to_len

#!/usr/bin/env python
"""
Input: fasta, int
Output: tabular
Return titles with lengths of corresponding seq
"""
import sys

assert sys.version_info[:2] >= (2, 4)


[docs]def compute_fasta_length(fasta_file, out_file, keep_first_char, keep_first_word=False): infile = fasta_file out = open(out_file, 'w') keep_first_char = int(keep_first_char) fasta_title = '' seq_len = 0 # number of char to keep in the title if keep_first_char == 0: keep_first_char = None else: keep_first_char += 1 first_entry = True for line in open(infile): line = line.strip() if not line or line.startswith('#'): continue if line[0] == '>': if first_entry is False: if keep_first_word: fasta_title = fasta_title.split()[0] out.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len)) else: first_entry = False fasta_title = line seq_len = 0 else: seq_len += len(line) # last fasta-entry if keep_first_word: fasta_title = fasta_title.split()[0] out.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len)) out.close()
if __name__ == "__main__" : compute_fasta_length(sys.argv[1], sys.argv[2], sys.argv[3], True)