Source code for galaxy.util.zipstream

import os
import zlib
from urllib.parse import quote

import zipstream

from .path import safe_walk

CRC32_MIN = 1444
CRC32_MAX = 1459


[docs]class ZipstreamWrapper:
[docs]    def __init__(self, archive_name=None, upstream_mod_zip=False, upstream_gzip=False):
        self.upstream_mod_zip = upstream_mod_zip
        self.archive_name = archive_name
        if not self.upstream_mod_zip:
            self.archive = zipstream.ZipFile(
                allowZip64=True, compression=zipstream.ZIP_STORED if upstream_gzip else zipstream.ZIP_DEFLATED
            )
        self.files = []
        self.directories = set()
        self.size = 0

[docs]    def response(self):
        if self.upstream_mod_zip:
            dir_lines = [f"0 0 @directory {directory}" for directory in self.directories]
            yield "\n".join(dir_lines + self.files).encode()
        else:
            yield from iter(self.archive)

[docs]    def get_headers(self):
        headers = {}
        if self.archive_name:
            headers["Content-Disposition"] = f'attachment; filename="{self.archive_name}.zip"'
        if self.upstream_mod_zip:
            headers["X-Archive-Files"] = "zip"
        else:
            headers["Content-Type"] = "application/x-zip-compressed"
        return headers

[docs]    def add_path(self, path, archive_name):
        size = int(os.stat(path).st_size)
        if self.upstream_mod_zip:
            # calculating crc32 would defeat the point of using mod-zip, but if we ever calculate hashsums we should consider this
            crc32 = "-"
            # We do have to calculate the crc32 for files that are between 1444 and 1459 bytes in size, xref: https://github.com/evanmiller/mod_zip/issues/44#issuecomment-656660686
            # Oddly that seems to be only true for usegalaxy.org (nginx version 1.12.2), and works fine locally (nginx 1.19.10).
            # May have been fixed in nginx 1.17.0
            if CRC32_MIN <= os.path.getsize(path) <= CRC32_MAX:
                with open(path, "rb") as contents:
                    crc32 = hex(zlib.crc32(contents.read()))[2:]
            line = f"{crc32} {size} {quote(path)} {archive_name}"
            head, tail = os.path.split(archive_name)
            if head:
                self.directories.add(head)
            self.files.append(line)
        else:
            self.size += size
            self.archive.write(path, archive_name)

[docs]    def write(self, path, archive_name=None):
        if os.path.isdir(path):
            pardir = os.path.join(path, os.pardir)
            for root, directories, files in safe_walk(path):
                for directory in directories:
                    dir_path = os.path.join(root, directory)
                    self.add_path(dir_path, os.path.relpath(dir_path, pardir))
                for file in files:
                    file_path = os.path.join(root, file)
                    self.add_path(file_path, os.path.relpath(file_path, pardir))
        else:
            self.add_path(path, archive_name or os.path.basename(path))