Warning

This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.tools.imp_exp.unpack_tar_gz_archive

#!/usr/bin/env python
"""
Unpack a tar or tar.gz archive into a directory.

usage: %prog archive_source dest_dir
    --[url|file] source type, either a URL or a file.
"""
from __future__ import print_function

import math
import optparse
import os
import sys
import tarfile
import tempfile
from base64 import b64decode

import requests

# Set max size of archive/file that will be handled to be 100 GB. This is
# arbitrary and should be adjusted as needed.
MAX_SIZE = 100 * math.pow(2, 30)


[docs]def url_to_file(url, dest_file): """ Transfer a file from a remote URL to a temporary file. """ try: url_reader = requests.get(url, stream=True) CHUNK = 10 * 1024 # 10k total = 0 fp = open(dest_file, 'wb') for chunk in url_reader.iter_content(chunk_size=CHUNK): if chunk: fp.write(chunk) total += CHUNK if total > MAX_SIZE: break fp.close() return dest_file except Exception as e: print("Exception getting file from URL: %s" % e, file=sys.stderr) return None
[docs]def check_archive(archive_file, dest_dir): """ Ensure that a tar archive has no absolute paths or relative paths outside the archive. """ with tarfile.open(archive_file, mode='r') as archive_fp: for arc_path in archive_fp.getnames(): assert os.path.normpath( os.path.join( dest_dir, arc_path )).startswith(dest_dir.rstrip(os.sep) + os.sep), \ "Archive member would extract outside target directory: %s" % arc_path return True
[docs]def unpack_archive(archive_file, dest_dir): """ Unpack a tar and/or gzipped archive into a destination directory. """ archive_fp = tarfile.open(archive_file, mode='r') archive_fp.extractall(path=dest_dir) archive_fp.close()
[docs]def main(options, args): is_url = bool(options.is_url) is_file = bool(options.is_file) archive_source, dest_dir = args if options.is_b64encoded: archive_source = b64decode(archive_source).decode('utf-8') dest_dir = b64decode(dest_dir).decode('utf-8') # Get archive from URL. if is_url: archive_file = url_to_file(archive_source, tempfile.NamedTemporaryFile(dir=dest_dir).name) elif is_file: archive_file = archive_source # Unpack archive. check_archive(archive_file, dest_dir) unpack_archive(archive_file, dest_dir)
if __name__ == "__main__": # Parse command line. parser = optparse.OptionParser() parser.add_option('-U', '--url', dest='is_url', action="store_true", help='Source is a URL.') parser.add_option('-F', '--file', dest='is_file', action="store_true", help='Source is a file.') parser.add_option('-e', '--encoded', dest='is_b64encoded', action="store_true", default=False, help='Source and destination dir values are base64 encoded.') (options, args) = parser.parse_args() try: main(options, args) except Exception as e: print("Error unpacking tar/gz archive: %s" % e, file=sys.stderr)