Warning

This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.datatypes.converters.fastq_to_fqtoc

#!/usr/bin/env python

import sys

from galaxy.util.checkers import is_gzip


[docs]def main(): """ The format of the file is JSON:: { "sections" : [ { "start" : "x", "end" : "y", "sequences" : "z" }, ... ]} This works only for UNCOMPRESSED fastq files. The Python GzipFile does not provide seekable offsets via tell(), so clients just have to split the slow way """ input_fname = sys.argv[1] if is_gzip(input_fname): sys.exit('Conversion is only possible for uncompressed files') current_line = 0 sequences = 1000000 lines_per_chunk = 4 * sequences chunk_begin = 0 with open(input_fname) as in_file, open(sys.argv[2], 'w') as out_file: out_file.write('{"sections" : [') line = in_file.readline() while line: current_line += 1 if 0 == current_line % lines_per_chunk: chunk_end = in_file.tell() out_file.write('{{"start":"{}","end":"{}","sequences":"{}"}},'.format(chunk_begin, chunk_end, sequences)) chunk_begin = chunk_end line = in_file.readline() chunk_end = in_file.tell() out_file.write('{{"start":"{}","end":"{}","sequences":"{}"}}'.format(chunk_begin, chunk_end, (current_line % lines_per_chunk) / 4)) out_file.write(']}\n')
if __name__ == "__main__": main()