Warning

This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.tool_util.deps.container_resolvers.mulled

"""This module describes the :class:`MulledContainerResolver` ContainerResolver plugin."""

import logging
import os
import subprocess
from abc import (
    ABCMeta,
    abstractmethod,
)
from typing import (
    List,
    NamedTuple,
    Optional,
    TYPE_CHECKING,
)

from galaxy.util import (
    safe_makedirs,
    string_as_bool,
    unicodify,
    which,
)
from galaxy.util.commands import shell
from . import (
    ContainerResolver,
    ResolutionCache,
)
from ..container_classes import CONTAINER_CLASSES
from ..docker_util import build_docker_images_command
from ..mulled.mulled_build import (
    DEFAULT_CHANNELS,
    ensure_installed,
    InvolucroContext,
    mull_targets,
)
from ..mulled.mulled_build_tool import requirements_to_mulled_targets
from ..mulled.util import (
    default_mulled_conda_channels_from_env,
    mulled_tags_for,
    split_tag,
    Target,
    v1_image_name,
    v2_image_name,
    version_sorted,
)
from ..requirements import (
    ContainerDescription,
    DEFAULT_CONTAINER_SHELL,
)

if TYPE_CHECKING:
    from ..dependencies import AppInfo

log = logging.getLogger(__name__)


class CachedMulledImageSingleTarget(NamedTuple):
    package_name: str
    version: str
    build: str
    image_identifier: str

    multi_target: bool = False


class CachedV1MulledImageMultiTarget(NamedTuple):
    hash: str
    build: str
    image_identifier: str

    multi_target: str = "v1"


class CachedV2MulledImageMultiTarget(NamedTuple):
    image_name: str
    version_hash: str
    build: str
    image_identifier: str

    multi_target: str = "v2"

    @property
    def package_hash(target):
        # Make this work for Singularity file name or fully qualified Docker repository
        # image names.
        image_name = target.image_name
        if "/" not in image_name:
            return image_name
        else:
            return image_name.rsplit("/")[-1]


class CacheDirectory(metaclass=ABCMeta):
    def __init__(self, path, hash_func="v2"):
        self.path = path
        self.hash_func = hash_func

    def _list_cached_mulled_images_from_path(self):
        contents = os.listdir(self.path)
        sorted_images = version_sorted(contents)
        raw_images = map(lambda name: identifier_to_cached_target(name, self.hash_func), sorted_images)
        return list(i for i in raw_images if i is not None)

    @abstractmethod
    def list_cached_mulled_images_from_path(self):
        """Generate a list of cached, mulled images in the cache."""

    @abstractmethod
    def invalidate_cache(self):
        """Invalidate the cache."""


class UncachedCacheDirectory(CacheDirectory):
    cacher_type = "uncached"

    def list_cached_mulled_images_from_path(self):
        return self._list_cached_mulled_images_from_path()

    def invalidate_cache(self):
        pass


class DirMtimeCacheDirectory(CacheDirectory):
    cacher_type = "dir_mtime"

    def __init__(self, path, **kwargs):
        super().__init__(path, **kwargs)
        self.invalidate_cache()

    def __get_mtime(self):
        return os.stat(self.path).st_mtime

    def __cache(self):
        self.__contents = self._list_cached_mulled_images_from_path()
        self.__mtime = self.__get_mtime()
        log.debug(f"Cached images in path {self.path} at directory mtime {self.__mtime}")

    def list_cached_mulled_images_from_path(self):
        mtime = self.__get_mtime()
        if mtime != self.__mtime:
            if mtime < self.__mtime:
                log.warning(
                    f"Modification time '{mtime}' of cache directory '{self.path}' is older than previous "
                    f"modification time '{self.__mtime}'! Cache directory will be recached"
                )
            self.__cache()
        return self.__contents

    def invalidate_cache(self):
        self.__mtime = -1
        self.__contents = []


def get_cache_directory_cacher(cacher_type):
    # these can become a separate module and use plugin_config if we need more
    cachers = {
        UncachedCacheDirectory.cacher_type: UncachedCacheDirectory,
        DirMtimeCacheDirectory.cacher_type: DirMtimeCacheDirectory,
    }
    cacher_type = cacher_type or "uncached"
    return cachers[cacher_type]


def list_docker_cached_mulled_images(namespace=None, hash_func="v2", resolution_cache=None):
    cache_key = "galaxy.tool_util.deps.container_resolvers.mulled:cached_images"
    if resolution_cache is not None and cache_key in resolution_cache:
        images_and_versions = resolution_cache.get(cache_key)
    else:
        command = build_docker_images_command(truncate=True, sudo=False, to_str=False)
        try:
            images_and_versions = unicodify(subprocess.check_output(command)).strip().splitlines()
        except subprocess.CalledProcessError:
            log.info("Call to `docker images` failed, configured container resolution may be broken")
            return []
        images_and_versions = [":".join(line.split()[0:2]) for line in images_and_versions[1:]]
        if resolution_cache is not None:
            resolution_cache[cache_key] = images_and_versions

    def output_line_to_image(line):
        image = identifier_to_cached_target(line, hash_func, namespace=namespace)
        return image

    name_filter = get_filter(namespace)
    sorted_images = version_sorted([_ for _ in filter(name_filter, images_and_versions)])
    raw_images = (output_line_to_image(_) for _ in sorted_images)
    return [i for i in raw_images if i is not None]


def identifier_to_cached_target(identifier, hash_func, namespace=None):
    if ":" in identifier:
        image_name, version = identifier.rsplit(":", 1)
    else:
        image_name = identifier
        version = None

    if not version or version == "latest":
        version = None

    image = None
    prefix = ""
    if namespace is not None:
        prefix = f"quay.io/{namespace}/"
    if image_name.startswith(f"{prefix}mulled-v1-"):
        if hash_func == "v2":
            return None

        hash = image_name
        build = None
        if version and version.isdigit():
            build = version
        image = CachedV1MulledImageMultiTarget(hash, build, identifier)
    elif image_name.startswith(f"{prefix}mulled-v2-"):
        if hash_func == "v1":
            return None

        version_hash = None
        build = None

        if version and "-" in version:
            version_hash, build = version.rsplit("-", 1)
        elif version.isdigit():
            version_hash, build = None, version
        elif version:
            log.debug(f"Unparsable mulled image tag encountered [{version}]")

        image = CachedV2MulledImageMultiTarget(image_name, version_hash, build, identifier)
    else:
        build = None
        if version and "--" in version:
            version, build = split_tag(version)
        if prefix and image_name.startswith(prefix):
            image_name = image_name[len(prefix) :]
        image = CachedMulledImageSingleTarget(image_name, version, build, identifier)
    return image


def get_filter(namespace):
    prefix = "quay.io/" if namespace is None else f"quay.io/{namespace}"
    return lambda name: name.startswith(prefix) and name.count("/") == 2


def find_best_matching_cached_image(targets, cached_images, hash_func):
    if len(targets) == 0:
        return None

    image = None
    if len(targets) == 1:
        target = targets[0]
        for cached_image in cached_images:
            if cached_image.multi_target:
                continue
            if not cached_image.package_name == target.package_name:
                continue
            if not target.version or target.version == cached_image.version:
                image = cached_image
                break
    elif hash_func == "v2":
        name = v2_image_name(targets)
        if ":" in name:
            package_hash, version_hash = name.split(":", 2)
        else:
            package_hash, version_hash = name, None

        for cached_image in cached_images:
            if cached_image.multi_target != "v2":
                continue

            if version_hash is None:
                # Just match on package hash...
                if package_hash == cached_image.package_hash:
                    image = cached_image
                    break
            else:
                # Match on package and version hash...
                if package_hash == cached_image.package_hash and version_hash == cached_image.version_hash:
                    image = cached_image
                    break

    elif hash_func == "v1":
        name = v1_image_name(targets)
        for cached_image in cached_images:
            if cached_image.multi_target != "v1":
                continue

            if name == cached_image.hash:
                image = cached_image
                break
    return image


def docker_cached_container_description(
    targets: List[Target],
    namespace: str,
    hash_func: str = "v2",
    shell: str = DEFAULT_CONTAINER_SHELL,
    resolution_cache: Optional[ResolutionCache] = None,
):
    if len(targets) == 0:
        return None

    cached_images = list_docker_cached_mulled_images(namespace, hash_func=hash_func, resolution_cache=resolution_cache)
    image = find_best_matching_cached_image(targets, cached_images, hash_func)

    container = None
    if image:
        container = ContainerDescription(
            image.image_identifier,
            type="docker",
            shell=shell,
        )

    return container


def singularity_cached_container_description(targets, cache_directory, hash_func="v2", shell=DEFAULT_CONTAINER_SHELL):
    if len(targets) == 0:
        return None

    if not os.path.exists(cache_directory.path):
        return None

    cached_images = cache_directory.list_cached_mulled_images_from_path()
    image = find_best_matching_cached_image(targets, cached_images, hash_func)

    container = None
    if image:
        container = ContainerDescription(
            os.path.abspath(os.path.join(cache_directory.path, image.image_identifier)),
            type="singularity",
            shell=shell,
        )

    return container


def targets_to_mulled_name(
    targets, hash_func, namespace, resolution_cache: Optional[ResolutionCache] = None, session=None
):
    unresolved_cache_key = "galaxy.tool_util.deps.container_resolvers.mulled:unresolved"
    if resolution_cache is not None:
        if unresolved_cache_key not in resolution_cache:
            resolution_cache[unresolved_cache_key] = set()
        unresolved_cache = resolution_cache.get(unresolved_cache_key)
    else:
        unresolved_cache = set()

    mulled_resolution_cache = None
    if resolution_cache and resolution_cache.mulled_resolution_cache:
        mulled_resolution_cache = resolution_cache.mulled_resolution_cache

    name = None

    def cached_name(cache_key):
        if mulled_resolution_cache:
            try:
                return resolution_cache.get(cache_key)
            except KeyError:
                return None
        return None

    if len(targets) == 1:
        target = targets[0]
        target_version = target.version
        cache_key = f"ns[{namespace}]__single__{target.package_name}__@__{target_version}"
        if cache_key in unresolved_cache:
            return None
        name = cached_name(cache_key)
        if name:
            return name

        tags = mulled_tags_for(namespace, target.package_name, resolution_cache=resolution_cache, session=session)

        if tags:
            for tag in tags:
                if "--" in tag:
                    version, _ = split_tag(tag)
                else:
                    version = tag
                if target_version and version == target_version:
                    name = f"{target.package_name}:{tag}"
                    break

    else:

        def first_tag_if_available(image_name):
            if ":" in image_name:
                repo_name, tag_prefix = image_name.split(":", 2)
            else:
                repo_name = image_name
                tag_prefix = None
            tags = mulled_tags_for(
                namespace, repo_name, tag_prefix=tag_prefix, resolution_cache=resolution_cache, session=session
            )
            return tags[0] if tags else None

        if hash_func == "v2":
            base_image_name = v2_image_name(targets)
        elif hash_func == "v1":
            base_image_name = v1_image_name(targets)
        else:
            raise Exception(f"Unimplemented mulled hash_func [{hash_func}]")

        cache_key = f"ns[{namespace}]__{hash_func}__{base_image_name}"
        if cache_key in unresolved_cache:
            return None
        name = cached_name(cache_key)
        if name:
            return name

        tag = first_tag_if_available(base_image_name)
        if tag:
            if ":" in base_image_name:
                assert hash_func != "v1"
                # base_image_name of form <package_hash>:<version_hash>, expand tag
                # to include build number in tag.
                name = f"{base_image_name.split(':')[0]}:{tag}"
            else:
                # base_image_name of form <package_hash>, simply add build number
                # as tag to fully qualify image.
                name = f"{base_image_name}:{tag}"

    if name and mulled_resolution_cache:
        mulled_resolution_cache.put(cache_key, name)

    if name is None:
        unresolved_cache.add(name)

    return name


class CliContainerResolver(ContainerResolver):
    container_type = "docker"
    cli = "docker"

    def __init__(self, *args, **kwargs):
        self._cli_available = bool(which(self.cli))
        super().__init__(*args, **kwargs)

    @property
    def cli_available(self):
        return self._cli_available

    @cli_available.setter
    def cli_available(self, value):
        if not value:
            log.info(
                f"{self.cli} CLI not available, cannot list or pull images in Galaxy process. Does not impact kubernetes."
            )
        self._cli_available = value


class SingularityCliContainerResolver(CliContainerResolver):
    container_type = "singularity"
    cli = "singularity"

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.cache_directory_path = kwargs.get(
            "cache_directory", os.path.join(kwargs["app_info"].container_image_cache_path, "singularity", "mulled")
        )
        self.cache_directory_cacher_type = kwargs.get("cache_directory_cacher_type", None)
        self.cache_directory = None
        self.hash_func = None

    def _init_cache_directory(self):
        cacher_class = get_cache_directory_cacher(self.cache_directory_cacher_type)
        self.cache_directory = cacher_class(self.cache_directory_path, hash_func=self.hash_func)
        safe_makedirs(self.cache_directory.path)


[docs]class CachedMulledDockerContainerResolver(CliContainerResolver): resolver_type = "cached_mulled" shell = "/bin/bash"
[docs] def __init__(self, app_info=None, namespace="biocontainers", hash_func="v2", **kwds): super().__init__(app_info=app_info, **kwds) self.namespace = namespace self.hash_func = hash_func
[docs] def resolve(self, enabled_container_types, tool_info, **kwds): if ( not self.cli_available or tool_info.requires_galaxy_python_environment or self.container_type not in enabled_container_types ): return None targets = mulled_targets(tool_info) log.debug(f"Image name for tool {tool_info.tool_id}: {image_name(targets, self.hash_func)}") resolution_cache = kwds.get("resolution_cache") return docker_cached_container_description( targets, self.namespace, hash_func=self.hash_func, shell=self.shell, resolution_cache=resolution_cache )
def __str__(self): return f"CachedMulledDockerContainerResolver[namespace={self.namespace}]"
[docs]class CachedMulledSingularityContainerResolver(SingularityCliContainerResolver): resolver_type = "cached_mulled_singularity" shell = "/bin/bash"
[docs] def __init__(self, app_info=None, hash_func="v2", **kwds): super().__init__(app_info=app_info, **kwds) self.hash_func = hash_func self._init_cache_directory()
[docs] def resolve(self, enabled_container_types, tool_info, **kwds): if tool_info.requires_galaxy_python_environment or self.container_type not in enabled_container_types: return None targets = mulled_targets(tool_info) log.debug(f"Image name for tool {tool_info.tool_id}: {image_name(targets, self.hash_func)}") return singularity_cached_container_description( targets, self.cache_directory, hash_func=self.hash_func, shell=self.shell )
def __str__(self): return f"CachedMulledSingularityContainerResolver[cache_directory={self.cache_directory.path}]"
[docs]class MulledDockerContainerResolver(CliContainerResolver): """Look for mulled images matching tool dependencies.""" resolver_type = "mulled" shell = "/bin/bash" protocol: Optional[str] = None
[docs] def __init__(self, app_info=None, namespace="biocontainers", hash_func="v2", auto_install=True, **kwds): super().__init__(app_info=app_info, **kwds) self.namespace = namespace self.hash_func = hash_func self.auto_install = string_as_bool(auto_install)
[docs] def cached_container_description(self, targets, namespace, hash_func, resolution_cache): try: return docker_cached_container_description( targets, namespace, hash_func=hash_func, resolution_cache=resolution_cache ) except subprocess.CalledProcessError: # We should only get here if a docker binary is available, but command quits with a non-zero exit code, # e.g if the docker daemon is not available log.exception("An error occured while listing cached docker image. Docker daemon may need to be restarted.") return None
[docs] def pull(self, container): if self.cli_available: command = container.build_pull_command() shell(command)
@property def can_list_containers(self): return self.cli_available
[docs] def resolve(self, enabled_container_types, tool_info, install=False, session=None, **kwds): resolution_cache = kwds.get("resolution_cache") if tool_info.requires_galaxy_python_environment or self.container_type not in enabled_container_types: return None targets = mulled_targets(tool_info) log.debug(f"Image name for tool {tool_info.tool_id}: {image_name(targets, self.hash_func)}") if len(targets) == 0: return None name = targets_to_mulled_name( targets=targets, hash_func=self.hash_func, namespace=self.namespace, resolution_cache=resolution_cache, session=session, ) if name: container_id = f"quay.io/{self.namespace}/{name}" if self.protocol: container_id = f"{self.protocol}{container_id}" container_description = ContainerDescription( container_id, type=self.container_type, shell=self.shell, ) if self.can_list_containers: if install and not self.cached_container_description( targets, namespace=self.namespace, hash_func=self.hash_func, resolution_cache=resolution_cache, ): destination_info = {} destination_for_container_type = kwds.get("destination_for_container_type") if destination_for_container_type: destination_info = destination_for_container_type(self.container_type) container = CONTAINER_CLASSES[self.container_type]( container_description.identifier, self.app_info, tool_info, destination_info, {}, container_description, ) self.pull(container) if not self.auto_install: container_description = ( self.cached_container_description( targets, namespace=self.namespace, hash_func=self.hash_func, resolution_cache=resolution_cache, ) or container_description ) return container_description
def __str__(self): return f"MulledDockerContainerResolver[namespace={self.namespace}]"
[docs]class MulledSingularityContainerResolver(SingularityCliContainerResolver, MulledDockerContainerResolver): resolver_type = "mulled_singularity" protocol = "docker://"
[docs] def __init__(self, app_info=None, namespace="biocontainers", hash_func="v2", auto_install=True, **kwds): super().__init__(app_info=app_info, **kwds) self.namespace = namespace self.hash_func = hash_func self._init_cache_directory() self.auto_install = string_as_bool(auto_install)
[docs] def cached_container_description(self, targets, namespace, hash_func, resolution_cache): return singularity_cached_container_description( targets, cache_directory=self.cache_directory, hash_func=hash_func )
@property def can_list_containers(self): # Only needs access to path, doesn't require CLI return True
[docs] def pull(self, container): if self.cli_available: cmds = container.build_mulled_singularity_pull_command( cache_directory=self.cache_directory.path, namespace=self.namespace ) shell(cmds=cmds) self.cache_directory.invalidate_cache()
def __str__(self): return f"MulledSingularityContainerResolver[namespace={self.namespace}]"
[docs]class BuildMulledDockerContainerResolver(CliContainerResolver): """Build for Docker mulled images matching tool dependencies.""" resolver_type = "build_mulled" shell = "/bin/bash" builds_on_resolution = True
[docs] def __init__( self, app_info: Optional["AppInfo"] = None, namespace: str = "local", hash_func: str = "v2", auto_install: bool = True, **kwds, ) -> None: super().__init__(app_info=app_info, **kwds) self._involucro_context_kwds = {"involucro_bin": self._get_config_option("involucro_path", None)} self.namespace = namespace self.hash_func = hash_func self.auto_install = string_as_bool(auto_install) self._mulled_kwds = { "namespace": namespace, "hash_func": self.hash_func, "command": "build-and-test", "use_mamba": True, } self._mulled_kwds["channels"] = default_mulled_conda_channels_from_env() or self._get_config_option( "mulled_channels", DEFAULT_CHANNELS ) self.involucro_context = InvolucroContext(**self._involucro_context_kwds) auto_init = self._get_config_option("involucro_auto_init", True) self.enabled = ensure_installed(self.involucro_context, auto_init)
[docs] def resolve(self, enabled_container_types, tool_info, install=False, **kwds): if tool_info.requires_galaxy_python_environment or self.container_type not in enabled_container_types: return None targets = mulled_targets(tool_info) log.debug(f"Image name for tool {tool_info.tool_id}: {image_name(targets, self.hash_func)}") if len(targets) == 0: return None if self.auto_install or install: mull_targets(targets, involucro_context=self.involucro_context, **self._mulled_kwds) return docker_cached_container_description(targets, self.namespace, hash_func=self.hash_func, shell=self.shell)
def __str__(self): return f"BuildDockerContainerResolver[namespace={self.namespace}]"
[docs]class BuildMulledSingularityContainerResolver(SingularityCliContainerResolver): """Build for Singularity mulled images matching tool dependencies.""" resolver_type = "build_mulled_singularity" shell = "/bin/bash" builds_on_resolution = True
[docs] def __init__( self, app_info: Optional["AppInfo"] = None, hash_func: str = "v2", auto_install: bool = True, **kwds ) -> None: super().__init__(app_info=app_info, **kwds) self._involucro_context_kwds = {"involucro_bin": self._get_config_option("involucro_path", None)} self.hash_func = hash_func self._init_cache_directory() self.auto_install = string_as_bool(auto_install) self._mulled_kwds = { "channels": self._get_config_option("mulled_channels", DEFAULT_CHANNELS), "hash_func": self.hash_func, "command": "build-and-test", "singularity": True, "singularity_image_dir": self.cache_directory.path, "use_mamba": True, } self.involucro_context = InvolucroContext(**self._involucro_context_kwds) auto_init = self._get_config_option("involucro_auto_init", True) self.enabled = ensure_installed(self.involucro_context, auto_init)
[docs] def resolve(self, enabled_container_types, tool_info, install=False, **kwds): if tool_info.requires_galaxy_python_environment or self.container_type not in enabled_container_types: return None targets = mulled_targets(tool_info) log.debug(f"Image name for tool {tool_info.tool_id}: {image_name(targets, self.hash_func)}") if len(targets) == 0: return None if self.auto_install or install: mull_targets(targets, involucro_context=self.involucro_context, **self._mulled_kwds) return singularity_cached_container_description( targets, self.cache_directory, hash_func=self.hash_func, shell=self.shell )
def __str__(self): return f"BuildSingularityContainerResolver[cache_directory={self.cache_directory.path}]"
def mulled_targets(tool_info): return requirements_to_mulled_targets(tool_info.requirements) def image_name(targets, hash_func): if len(targets) == 0: return "no targets" elif hash_func == "v2": return v2_image_name(targets) else: return v1_image_name(targets) __all__ = ( "CachedMulledDockerContainerResolver", "CachedMulledSingularityContainerResolver", "MulledDockerContainerResolver", "MulledSingularityContainerResolver", "BuildMulledDockerContainerResolver", "BuildMulledSingularityContainerResolver", )