Warning

This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.datatypes.converters.fasta_to_len

#!/usr/bin/env python
"""
Input: fasta, int
Output: tabular
Return titles with lengths of corresponding seq
"""
import sys

assert sys.version_info[:2] >= (2, 4)


[docs]def compute_fasta_length(fasta_file, out_file, keep_first_char, keep_first_word=False): infile = fasta_file keep_first_char = int(keep_first_char) fasta_title = '' seq_len = 0 # number of char to keep in the title if keep_first_char == 0: keep_first_char = None else: keep_first_char += 1 first_entry = True with open(out_file, 'w') as out: with open(infile) as fh: for line in fh: line = line.strip() if not line or line.startswith('#'): continue if line[0] == '>': if first_entry is False: if keep_first_word: fasta_title = fasta_title.split()[0] out.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len)) else: first_entry = False fasta_title = line seq_len = 0 else: seq_len += len(line) # last fasta-entry if keep_first_word: fasta_title = fasta_title.split()[0] out.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len))
if __name__ == "__main__": compute_fasta_length(sys.argv[1], sys.argv[2], sys.argv[3], True)