Warning

This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.tool_util.verify.asserts.archive

import io
import re
import tarfile
import tempfile
import zipfile

from galaxy.util import asbool
from ._types import (
    Annotated,
    AssertionParameter,
    ChildAssertions,
    Delta,
    Max,
    Min,
    N,
    Negate,
    NEGATE_DEFAULT,
    OutputBytes,
    VerifyAssertionsFunction,
    XmlBool,
)
from ._util import _assert_presence_number


def _extract_from_tar(output_bytes, fn):
    with io.BytesIO(output_bytes) as temp:
        with tarfile.open(fileobj=temp, mode="r") as tar_temp:
            ti = tar_temp.getmember(fn)
            # zip treats directories like empty files.
            # so make this consistent for tar
            if ti.isdir():
                return ""
            tar_file = tar_temp.extractfile(fn)
            assert tar_file is not None
            with tar_file as member_fh:
                return member_fh.read()


def _list_from_tar(output_bytes, path):
    lst = []
    with io.BytesIO(output_bytes) as temp:
        with tarfile.open(fileobj=temp, mode="r") as tar_temp:
            for fn in tar_temp.getnames():
                if not re.match(path, fn):
                    continue
                lst.append(fn)
    return sorted(lst)


def _extract_from_zip(output_bytes, fn):
    with io.BytesIO(output_bytes) as temp:
        with zipfile.ZipFile(temp, mode="r") as zip_temp:
            with zip_temp.open(fn) as member_fh:
                return member_fh.read()


def _list_from_zip(output_bytes, path):
    lst = []
    with io.BytesIO(output_bytes) as temp:
        with zipfile.ZipFile(temp, mode="r") as zip_temp:
            for fn in zip_temp.namelist():
                if not re.match(path, fn):
                    continue
                lst.append(fn)
    return sorted(lst)


Path = Annotated[str, AssertionParameter("The regular expression specifying the archive member.")]
All = Annotated[
    XmlBool,
    AssertionParameter(
        "Check the sub-assertions for all paths matching the path. Default: false, i.e. only the first",
        xml_type="PermissiveBoolean",
    ),
]


[docs]def assert_has_archive_member( output_bytes: OutputBytes, path: Path, verify_assertions_function: VerifyAssertionsFunction, children: ChildAssertions = None, all: All = False, n: N = None, delta: Delta = 0, min: Min = None, max: Max = None, negate: Negate = NEGATE_DEFAULT, ) -> None: """This tag allows to check if ``path`` is contained in a compressed file. The path is a regular expression that is matched against the full paths of the objects in the compressed file (remember that "matching" means it is checked if a prefix of the full path of an archive member is described by the regular expression). Valid archive formats include ``.zip``, ``.tar``, and ``.tar.gz``. Note that depending on the archive creation method: - full paths of the members may be prefixed with ``./`` - directories may be treated as empty files ```xml <has_archive_member path="./path/to/my-file.txt"/> ``` With ``n`` and ``delta`` (or ``min`` and ``max``) assertions on the number of archive members matching ``path`` can be expressed. The following could be used, e.g., to assert an archive containing n&plusmn;1 elements out of which at least 4 need to have a ``txt`` extension. ```xml <has_archive_member path=".*" n="10" delta="1"/> <has_archive_member path=".*\\.txt" min="4"/> ``` In addition the tag can contain additional assertions as child elements about the first member in the archive matching the regular expression ``path``. For instance ```xml <has_archive_member path=".*/my-file.txt"> <not_has_text text="EDK72998.1"/> </has_archive_member> ``` If the ``all`` attribute is set to ``true`` then all archive members are subject to the assertions. Note that, archive members matching the ``path`` are sorted alphabetically. The ``negate`` attribute of the ``has_archive_member`` assertion only affects the asserts on the presence and number of matching archive members, but not any sub-assertions (which can offer the ``negate`` attribute on their own). The check if the file is an archive at all, which is also done by the function, is not affected.""" all = asbool(all) extract_foo = None # from python 3.9 is_tarfile supports file like objects then we do not need # the tempfile detour but can use io.BytesIO(output_bytes) with tempfile.NamedTemporaryFile() as tmp: tmp.write(output_bytes) tmp.flush() if zipfile.is_zipfile(tmp.name): extract_foo = _extract_from_zip list_foo = _list_from_zip elif tarfile.is_tarfile(tmp.name): extract_foo = _extract_from_tar list_foo = _list_from_tar assert extract_foo is not None, f"Expected path '{path}' to be an archive" # get list of matching file names in archive and check against n, delta, # min, max (slightly abusing the output and text as well as the function # parameters) fns = list_foo(output_bytes, path) _assert_presence_number( None, path, n, delta, min, max, negate, lambda o, t: len(fns) > 0, lambda o, t: len(fns), "{expected} path '{text}' in archive", "{expected} {n}+-{delta} matches for path '{text}' in archive", "{expected} that the number of matches for path '{text}' in archive is in [{min}:{max}]", ) # check sub-assertions on members matching path for fn in fns: contents = extract_foo(output_bytes, fn) try: verify_assertions_function(contents, children) except AssertionError as e: raise AssertionError(f"Archive member '{path}': {str(e)}") if not all: break