Warning
This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.datatypes.converters.sam_to_bam
#!/usr/bin/env python
# Dan Blankenberg
"""
A wrapper script for converting SAM to BAM, with sorting.
%prog input_filename.sam output_filename.bam
"""
import optparse
import os
import shutil
import subprocess
import sys
import tempfile
import packaging.version
CHUNK_SIZE = 2 ** 20 # 1mb
[docs]def cleanup_before_exit(tmp_dir):
if tmp_dir and os.path.exists(tmp_dir):
shutil.rmtree(tmp_dir)
[docs]def cmd_exists(cmd):
# http://stackoverflow.com/questions/5226958/which-equivalent-function-in-python
for path in os.environ["PATH"].split(":"):
if os.path.exists(os.path.join(path, cmd)):
return True
return False
def _get_samtools_version():
version = '0.0.0'
if not cmd_exists('samtools'):
raise Exception('This tool needs samtools, but it is not on PATH.')
# Get the version of samtools via --version-only, if available
p = subprocess.Popen(['samtools', '--version-only'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, error = p.communicate()
# --version-only is available
# Format is <version x.y.z>+htslib-<a.b.c>
if p.returncode == 0:
version = output.split('+')[0]
return version
output = subprocess.Popen(['samtools'], stderr=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[1]
lines = output.split('\n')
for line in lines:
if line.lower().startswith('version'):
# Assuming line looks something like: version: 0.1.12a (r862)
version = line.split()[1]
break
return version
def __main__():
# Parse Command Line
parser = optparse.OptionParser()
(options, args) = parser.parse_args()
assert len(args) == 2, 'You must specify the input and output filenames'
input_filename, output_filename = args
tmp_dir = tempfile.mkdtemp(prefix='tmp-sam_to_bam_converter-')
# convert to SAM
unsorted_bam_filename = os.path.join(tmp_dir, 'unsorted.bam')
unsorted_stderr_filename = os.path.join(tmp_dir, 'unsorted.stderr')
proc = subprocess.Popen(['samtools', 'view', '-bS', input_filename],
stdout=open(unsorted_bam_filename, 'wb'),
stderr=open(unsorted_stderr_filename, 'wb'),
cwd=tmp_dir)
return_code = proc.wait()
if return_code:
stderr_target = sys.stderr
else:
stderr_target = sys.stdout
stderr = open(unsorted_stderr_filename)
while True:
chunk = stderr.read(CHUNK_SIZE)
if chunk:
stderr_target.write(chunk)
else:
break
stderr.close()
# sort sam, so indexing will not fail
sorted_stderr_filename = os.path.join(tmp_dir, 'sorted.stderr')
sorting_prefix = os.path.join(tmp_dir, 'sorted_bam')
# samtools changed sort command arguments (starting from version 1.3)
samtools_version = packaging.version.parse(_get_samtools_version())
if samtools_version < packaging.version.parse('1.0'):
sort_args = ['-o', unsorted_bam_filename, sorting_prefix]
else:
sort_args = ['-T', sorting_prefix, unsorted_bam_filename]
proc = subprocess.Popen(['samtools', 'sort'] + sort_args,
stdout=open(output_filename, 'wb'),
stderr=open(sorted_stderr_filename, 'wb'),
cwd=tmp_dir)
return_code = proc.wait()
if return_code:
stderr_target = sys.stderr
else:
stderr_target = sys.stdout
stderr = open(sorted_stderr_filename)
while True:
chunk = stderr.read(CHUNK_SIZE)
if chunk:
stderr_target.write(chunk)
else:
break
stderr.close()
cleanup_before_exit(tmp_dir)
if __name__ == "__main__":
__main__()