Source code for galaxy.datatypes.converters.fastq_to_fqtoc

#!/usr/bin/env python
from __future__ import print_function

import sys

from galaxy.util.checkers import is_gzip


[docs]def main(): """ The format of the file is JSON:: { "sections" : [ { "start" : "x", "end" : "y", "sequences" : "z" }, ... ]} This works only for UNCOMPRESSED fastq files. The Python GzipFile does not provide seekable offsets via tell(), so clients just have to split the slow way """ input_fname = sys.argv[1] if is_gzip(input_fname): sys.exit('Conversion is only possible for uncompressed files') current_line = 0 sequences = 1000000 lines_per_chunk = 4 * sequences chunk_begin = 0 with open(input_fname) as in_file, open(sys.argv[2], 'w') as out_file: out_file.write('{"sections" : [') line = in_file.readline() while line: current_line += 1 if 0 == current_line % lines_per_chunk: chunk_end = in_file.tell() out_file.write('{"start":"%s","end":"%s","sequences":"%s"},' % (chunk_begin, chunk_end, sequences)) chunk_begin = chunk_end line = in_file.readline() chunk_end = in_file.tell() out_file.write('{"start":"%s","end":"%s","sequences":"%s"}' % (chunk_begin, chunk_end, (current_line % lines_per_chunk) / 4)) out_file.write(']}\n')
if __name__ == "__main__": main()