Warning

This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.tools.imp_exp.unpack_tar_gz_archive

#!/usr/bin/env python
"""
Unpack a tar or tar.gz archive into a directory.

usage: %prog archive_source dest_dir
    --[url|file] source type, either a URL or a file.
"""

import json
import math
import optparse
import os
import tarfile
from base64 import b64decode

from galaxy.datatypes import sniff

# Set max size of archive/file that will be handled to be 100 GB. This is
# arbitrary and should be adjusted as needed.
MAX_SIZE = 100 * math.pow(2, 30)


[docs]def get_file_sources(file_sources_path): assert os.path.exists(file_sources_path), f"file sources path [{file_sources_path}] does not exist" from galaxy.files import ConfiguredFileSources with open(file_sources_path) as f: file_sources_as_dict = json.load(f) file_sources = ConfiguredFileSources.from_dict(file_sources_as_dict) return file_sources
[docs]def check_archive(archive_file, dest_dir): """ Ensure that a tar archive has no absolute paths or relative paths outside the archive. """ with tarfile.open(archive_file, mode='r') as archive_fp: for arc_path in archive_fp.getnames(): assert os.path.normpath( os.path.join( dest_dir, arc_path )).startswith(dest_dir.rstrip(os.sep) + os.sep), \ f"Archive member would extract outside target directory: {arc_path}" return True
[docs]def unpack_archive(archive_file, dest_dir): """ Unpack a tar and/or gzipped archive into a destination directory. """ archive_fp = tarfile.open(archive_file, mode='r') archive_fp.extractall(path=dest_dir) archive_fp.close()
[docs]def main(options, args): is_url = bool(options.is_url) is_file = bool(options.is_file) archive_source, dest_dir = args if options.is_b64encoded: archive_source = b64decode(archive_source).decode('utf-8') dest_dir = b64decode(dest_dir).decode('utf-8') # Get archive from URL. if is_url: archive_file = sniff.stream_url_to_file(archive_source, file_sources=get_file_sources(options.file_sources)) elif is_file: archive_file = archive_source # Unpack archive. check_archive(archive_file, dest_dir) unpack_archive(archive_file, dest_dir)
if __name__ == "__main__": # Parse command line. parser = optparse.OptionParser() parser.add_option('-U', '--url', dest='is_url', action="store_true", help='Source is a URL.') parser.add_option('-F', '--file', dest='is_file', action="store_true", help='Source is a file.') parser.add_option('-e', '--encoded', dest='is_b64encoded', action="store_true", default=False, help='Source and destination dir values are base64 encoded.') parser.add_option('--file-sources', type=str, help='file sources json') (options, args) = parser.parse_args() main(options, args)