Warning

This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.tools.imp_exp.export_history

#!/usr/bin/env python
"""
Export a history to an archive file using attribute files.

usage: %prog history_attrs dataset_attrs job_attrs out_file
    -G, --gzip: gzip archive file
"""
from __future__ import print_function

import optparse
import os
import sys
import tarfile
from json import dumps, loads

from galaxy.util import FILENAME_VALID_CHARS


[docs]def get_dataset_filename(name, ext, hid): """ Builds a filename for a dataset using its name an extension. """ base = ''.join(c in FILENAME_VALID_CHARS and c or '_' for c in name) return base + "_%s.%s" % (hid, ext)
[docs]def create_archive(history_attrs_file, datasets_attrs_file, jobs_attrs_file, out_file, gzip=False): """Create archive from the given attribute/metadata files and save it to out_file.""" tarfile_mode = "w" if gzip: tarfile_mode += ":gz" try: history_archive = tarfile.open(out_file, tarfile_mode) # Read datasets attributes from file. with open(datasets_attrs_file) as datasets_attr_in: datasets_attr_str = '' buffsize = 1048576 try: while True: datasets_attr_str += datasets_attr_in.read(buffsize) if not datasets_attr_str or len(datasets_attr_str) % buffsize != 0: break except OverflowError: pass datasets_attrs = loads(datasets_attr_str) # Add datasets to archive and update dataset attributes. # TODO: security check to ensure that files added are in Galaxy dataset directory? for dataset_attrs in datasets_attrs: if dataset_attrs['exported']: dataset_file_name = dataset_attrs['file_name'] # Full file name. dataset_hid = dataset_attrs['hid'] dataset_archive_name = os.path.join('datasets', get_dataset_filename(dataset_attrs['name'], dataset_attrs['extension'], dataset_hid)) history_archive.add(dataset_file_name, arcname=dataset_archive_name) # Include additional files for example, files/images included in HTML output. extra_files_path = dataset_attrs['extra_files_path'] if extra_files_path: try: file_list = os.listdir(extra_files_path) except OSError: file_list = [] if len(file_list): dataset_extra_files_path = 'datasets/extra_files_path_%s' % dataset_hid for fname in file_list: history_archive.add(os.path.join(extra_files_path, fname), arcname=(os.path.join(dataset_extra_files_path, fname))) dataset_attrs['extra_files_path'] = dataset_extra_files_path else: dataset_attrs['extra_files_path'] = '' # Update dataset filename to be archive name. dataset_attrs['file_name'] = dataset_archive_name # Rewrite dataset attributes file. with open(datasets_attrs_file, 'w') as datasets_attrs_out: datasets_attrs_out.write(dumps(datasets_attrs)) # Finish archive. history_archive.add(history_attrs_file, arcname="history_attrs.txt") history_archive.add(datasets_attrs_file, arcname="datasets_attrs.txt") if os.path.exists(datasets_attrs_file + ".provenance"): history_archive.add(datasets_attrs_file + ".provenance", arcname="datasets_attrs.txt.provenance") history_archive.add(jobs_attrs_file, arcname="jobs_attrs.txt") history_archive.close() # Status. return 'Created history archive.' except Exception as e: return 'Error creating history archive: %s' % str(e), sys.stderr
[docs]def main(): # Parse command line. parser = optparse.OptionParser() parser.add_option('-G', '--gzip', dest='gzip', action="store_true", help='Compress archive using gzip.') (options, args) = parser.parse_args() gzip = bool(options.gzip) history_attrs, dataset_attrs, job_attrs, out_file = args # Create archive. status = create_archive(history_attrs, dataset_attrs, job_attrs, out_file, gzip) print(status)
if __name__ == "__main__": main()