Source code for galaxy.datatypes.converters.fasta_to_len

#!/usr/bin/env python
"""
Input: fasta, int
Output: tabular
Return titles with lengths of corresponding seq
"""
import sys

assert sys.version_info[:2] >= (2, 4)


[docs]def compute_fasta_length(fasta_file, out_file, keep_first_char, keep_first_word=False): infile = fasta_file keep_first_char = int(keep_first_char) fasta_title = "" seq_len = 0 # number of char to keep in the title if keep_first_char == 0: keep_first_char = None else: keep_first_char += 1 first_entry = True with open(out_file, "w") as out: with open(infile) as fh: for line in fh: line = line.strip() if not line or line.startswith("#"): continue if line[0] == ">": if first_entry is False: if keep_first_word: fasta_title = fasta_title.split()[0] out.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len)) else: first_entry = False fasta_title = line seq_len = 0 else: seq_len += len(line) # last fasta-entry if keep_first_word: fasta_title = fasta_title.split()[0] out.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len))
if __name__ == "__main__": compute_fasta_length(sys.argv[1], sys.argv[2], sys.argv[3], True)