Warning
This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.datatypes.converters.fastq_to_fqtoc
#!/usr/bin/env python
import sys
from galaxy.util.checkers import is_gzip
[docs]def main():
"""
The format of the file is JSON::
{ "sections" : [
{ "start" : "x", "end" : "y", "sequences" : "z" },
...
]}
This works only for UNCOMPRESSED fastq files. The Python GzipFile does not provide seekable
offsets via tell(), so clients just have to split the slow way
"""
input_fname = sys.argv[1]
if is_gzip(input_fname):
sys.exit("Conversion is only possible for uncompressed files")
current_line = 0
sequences = 1000000
lines_per_chunk = 4 * sequences
chunk_begin = 0
with open(input_fname) as in_file, open(sys.argv[2], "w") as out_file:
out_file.write('{"sections" : [')
for _ in iter(in_file.readline, ""):
current_line += 1
if 0 == current_line % lines_per_chunk:
chunk_end = in_file.tell()
out_file.write(f'{{"start":"{chunk_begin}","end":"{chunk_end}","sequences":"{sequences}"}},')
chunk_begin = chunk_end
chunk_end = in_file.tell()
out_file.write(
f'{{"start":"{chunk_begin}","end":"{chunk_end}","sequences":"{current_line % lines_per_chunk / 4}"}}'
)
out_file.write("]}\n")
if __name__ == "__main__":
main()