Warning

This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.datatypes.converters.sam_to_bam

#!/usr/bin/env python
# Dan Blankenberg

"""
A wrapper script for converting SAM to BAM, with sorting.
%prog input_filename.sam output_filename.bam
"""
import optparse
import os
import shutil
import subprocess
import sys
import tempfile

import packaging.version

CHUNK_SIZE = 2 ** 20  # 1mb


[docs]def cleanup_before_exit(tmp_dir): if tmp_dir and os.path.exists(tmp_dir): shutil.rmtree(tmp_dir)
[docs]def cmd_exists(cmd): # http://stackoverflow.com/questions/5226958/which-equivalent-function-in-python for path in os.environ["PATH"].split(":"): if os.path.exists(os.path.join(path, cmd)): return True return False
def _get_samtools_version(): version = '0.0.0' if not cmd_exists('samtools'): raise Exception('This tool needs samtools, but it is not on PATH.') # Get the version of samtools via --version-only, if available p = subprocess.Popen(['samtools', '--version-only'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) output, error = p.communicate() # --version-only is available # Format is <version x.y.z>+htslib-<a.b.c> if p.returncode == 0: version = output.split('+')[0] return version output = subprocess.Popen(['samtools'], stderr=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[1] lines = output.split('\n') for line in lines: if line.lower().startswith('version'): # Assuming line looks something like: version: 0.1.12a (r862) version = line.split()[1] break return version def __main__(): # Parse Command Line parser = optparse.OptionParser() (options, args) = parser.parse_args() assert len(args) == 2, 'You must specify the input and output filenames' input_filename, output_filename = args tmp_dir = tempfile.mkdtemp(prefix='tmp-sam_to_bam_converter-') # convert to SAM unsorted_bam_filename = os.path.join(tmp_dir, 'unsorted.bam') unsorted_stderr_filename = os.path.join(tmp_dir, 'unsorted.stderr') proc = subprocess.Popen(['samtools', 'view', '-bS', input_filename], stdout=open(unsorted_bam_filename, 'wb'), stderr=open(unsorted_stderr_filename, 'wb'), cwd=tmp_dir) return_code = proc.wait() if return_code: stderr_target = sys.stderr else: stderr_target = sys.stdout with open(unsorted_stderr_filename) as stderr: while True: chunk = stderr.read(CHUNK_SIZE) if chunk: stderr_target.write(chunk) else: break # sort sam, so indexing will not fail sorted_stderr_filename = os.path.join(tmp_dir, 'sorted.stderr') sorting_prefix = os.path.join(tmp_dir, 'sorted_bam') # samtools changed sort command arguments (starting from version 1.3) samtools_version = packaging.version.parse(_get_samtools_version()) if samtools_version < packaging.version.parse('1.0'): sort_args = ['-o', unsorted_bam_filename, sorting_prefix] else: sort_args = ['-T', sorting_prefix, unsorted_bam_filename] proc = subprocess.Popen(['samtools', 'sort'] + sort_args, stdout=open(output_filename, 'wb'), stderr=open(sorted_stderr_filename, 'wb'), cwd=tmp_dir) return_code = proc.wait() if return_code: stderr_target = sys.stderr else: stderr_target = sys.stdout with open(sorted_stderr_filename) as stderr: while True: chunk = stderr.read(CHUNK_SIZE) if chunk: stderr_target.write(chunk) else: break cleanup_before_exit(tmp_dir) if __name__ == "__main__": __main__()