Warning

This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.datatypes.converters.bgzip

#!/usr/bin/env python

"""
Uses pysam to bgzip a file

usage: %prog in_file out_file
"""
import optparse
import subprocess
import tempfile

import pysam


[docs]def main(): # Read options, args. parser = optparse.OptionParser() parser.add_option('-c', '--chr-col', type='int', dest='chrom_col') parser.add_option('-s', '--start-col', type='int', dest='start_col') parser.add_option('-e', '--end-col', type='int', dest='end_col') parser.add_option('-P', '--preset', dest='preset') (options, args) = parser.parse_args() input_fname, output_fname = args tmpfile = tempfile.NamedTemporaryFile() sort_params = None if options.chrom_col and options.start_col and options.end_col: sort_params = [ "sort", "-k{i},{i}".format(i=options.chrom_col), "-k%(i)i,%(i)in" % {'i': options.start_col}, "-k%(i)i,%(i)in" % {'i': options.end_col} ] elif options.preset == "bed": sort_params = ["sort", "-k1,1", "-k2,2n", "-k3,3n"] elif options.preset == "vcf": sort_params = ["sort", "-k1,1", "-k2,2n"] elif options.preset == "gff": sort_params = ["sort", "-s", "-k1,1", "-k4,4n"] # stable sort on start column # Skip any lines starting with "#" and "track" grepped = subprocess.Popen(["grep", "-e", "^\"#\"", "-e", "^track", "-v", input_fname], stderr=subprocess.PIPE, stdout=subprocess.PIPE) after_sort = subprocess.Popen(sort_params, stdin=grepped.stdout, stderr=subprocess.PIPE, stdout=tmpfile) grepped.stdout.close() output, err = after_sort.communicate() pysam.tabix_compress(tmpfile.name, output_fname, force=True)
if __name__ == "__main__": main()