Warning
This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.tools.imp_exp.export_history
#!/usr/bin/env python
"""
Export a history to an archive file using attribute files.
usage: %prog history_attrs dataset_attrs job_attrs out_file
-G, --gzip: gzip archive file
"""
from __future__ import print_function
import optparse
import os
import sys
import tarfile
from json import dumps, loads
from galaxy.util import FILENAME_VALID_CHARS
[docs]def get_dataset_filename(name, ext, hid):
"""
Builds a filename for a dataset using its name an extension.
"""
base = ''.join(c in FILENAME_VALID_CHARS and c or '_' for c in name)
return base + "_%s.%s" % (hid, ext)
[docs]def create_archive(history_attrs_file, datasets_attrs_file, jobs_attrs_file, out_file, gzip=False):
"""Create archive from the given attribute/metadata files and save it to out_file."""
tarfile_mode = "w"
if gzip:
tarfile_mode += ":gz"
try:
history_archive = tarfile.open(out_file, tarfile_mode)
# Read datasets attributes from file.
with open(datasets_attrs_file) as datasets_attr_in:
datasets_attr_str = ''
buffsize = 1048576
try:
while True:
datasets_attr_str += datasets_attr_in.read(buffsize)
if not datasets_attr_str or len(datasets_attr_str) % buffsize != 0:
break
except OverflowError:
pass
datasets_attrs = loads(datasets_attr_str)
# Add datasets to archive and update dataset attributes.
# TODO: security check to ensure that files added are in Galaxy dataset directory?
for dataset_attrs in datasets_attrs:
if dataset_attrs['exported']:
dataset_file_name = dataset_attrs['file_name'] # Full file name.
dataset_hid = dataset_attrs['hid']
dataset_archive_name = os.path.join('datasets',
get_dataset_filename(dataset_attrs['name'], dataset_attrs['extension'], dataset_hid))
history_archive.add(dataset_file_name, arcname=dataset_archive_name)
# Include additional files for example, files/images included in HTML output.
extra_files_path = dataset_attrs['extra_files_path']
if extra_files_path:
try:
file_list = os.listdir(extra_files_path)
except OSError:
file_list = []
if len(file_list):
dataset_extra_files_path = 'datasets/extra_files_path_%s' % dataset_hid
for fname in file_list:
history_archive.add(os.path.join(extra_files_path, fname),
arcname=(os.path.join(dataset_extra_files_path, fname)))
dataset_attrs['extra_files_path'] = dataset_extra_files_path
else:
dataset_attrs['extra_files_path'] = ''
# Update dataset filename to be archive name.
dataset_attrs['file_name'] = dataset_archive_name
# Rewrite dataset attributes file.
with open(datasets_attrs_file, 'w') as datasets_attrs_out:
datasets_attrs_out.write(dumps(datasets_attrs))
# Finish archive.
history_archive.add(history_attrs_file, arcname="history_attrs.txt")
history_archive.add(datasets_attrs_file, arcname="datasets_attrs.txt")
if os.path.exists(datasets_attrs_file + ".provenance"):
history_archive.add(datasets_attrs_file + ".provenance", arcname="datasets_attrs.txt.provenance")
history_archive.add(jobs_attrs_file, arcname="jobs_attrs.txt")
history_archive.close()
# Status.
return 'Created history archive.'
except Exception as e:
return 'Error creating history archive: %s' % str(e), sys.stderr
[docs]def main():
# Parse command line.
parser = optparse.OptionParser()
parser.add_option('-G', '--gzip', dest='gzip', action="store_true", help='Compress archive using gzip.')
(options, args) = parser.parse_args()
gzip = bool(options.gzip)
history_attrs, dataset_attrs, job_attrs, out_file = args
# Create archive.
status = create_archive(history_attrs, dataset_attrs, job_attrs, out_file, gzip)
print(status)
if __name__ == "__main__":
main()