Warning

This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.tool_util.deps.mulled.mulled_search

#!/usr/bin/env python

import argparse
import json
import logging
import sys
import tempfile
from datetime import (
    datetime,
    timezone,
)

import requests

from galaxy.tool_util.deps.conda_util import CondaContext
from galaxy.util import which
from .mulled_list import get_singularity_containers
from .util import (
    build_target,
    MULLED_SOCKET_TIMEOUT,
    v2_image_name,
)

try:
    from whoosh.fields import (
        Schema,
        STORED,
        TEXT,
    )
    from whoosh.index import create_in
    from whoosh.qparser import QueryParser
except ImportError:
    Schema = TEXT = STORED = create_in = QueryParser = None

QUAY_API_URL = "https://quay.io/api/v1/repository"
conda_path = which("conda")


class QuaySearch:
    """
    Tool to search within a quay organization for a given software name.
    """

    def __init__(self, organization):
        self.index = None
        self.organization = organization

    def build_index(self):
        """
        Create an index to quickly examine the repositories of a given quay.io organization.
        """
        # download all information about the repositories from the
        # given organization in self.organization

        parameters = {"public": "true", "namespace": self.organization}
        r = requests.get(
            QUAY_API_URL, headers={"Accept-encoding": "gzip"}, params=parameters, timeout=MULLED_SOCKET_TIMEOUT
        )
        tmp_dir = tempfile.mkdtemp()
        schema = Schema(title=TEXT(stored=True), content=STORED)
        self.index = create_in(tmp_dir, schema)

        json_decoder = json.JSONDecoder()
        decoded_request = json_decoder.decode(r.text)
        writer = self.index.writer()
        for repository in decoded_request["repositories"]:
            writer.add_document(title=repository["name"], content=repository["description"])
        writer.commit()

    def search_repository(self, search_string, non_strict):
        """
        Search Docker containers on quay.io.
        Results are displayed with all available versions,
        including the complete image name.
        """
        # with statement closes searcher after usage.
        with self.index.searcher() as searcher:
            query = QueryParser("title", self.index.schema).parse(search_string)
            results = searcher.search(query)
            if non_strict:
                # look for spelling errors and use suggestions as a search term too
                corrector = searcher.corrector("title")
                suggestions = corrector.suggest(search_string, limit=2)

                # get all repositories with suggested keywords
                for suggestion in suggestions:
                    search_string = f"*{suggestion}*"
                    query = QueryParser("title", self.index.schema).parse(search_string)
                    results_tmp = searcher.search(query)
                    results.extend(results_tmp)

            out = list()

            for result in results:
                title = result["title"]
                for version in self.get_additional_repository_information(title):
                    out.append(
                        {
                            "package": title,
                            "version": version,
                        }
                    )

            return out

    def get_additional_repository_information(self, repository_string):
        """
        Function downloads additional information from quay.io to
        get the tag-field which includes the version number.
        """
        url = f"{QUAY_API_URL}/{self.organization}/{repository_string}"
        r = requests.get(url, headers={"Accept-encoding": "gzip"}, timeout=MULLED_SOCKET_TIMEOUT)

        json_decoder = json.JSONDecoder()
        decoded_request = json_decoder.decode(r.text)
        return decoded_request["tags"]


class CondaSearch:
    """
    Tool to search the bioconda channel
    """

    def __init__(self, channel):
        self.channel = channel

    def get_json(self, search_string):
        """
        Function takes search_string variable and returns results from the bioconda channel in JSON format

        """
        if not conda_path:
            raise Exception("Invalid search destination. Required dependency [conda] is not in your PATH.")
        try:
            conda_context = CondaContext(conda_exec=conda_path, ensure_channels=self.channel)
            raw_out = conda_context.exec_search([search_string])
        except Exception as e:
            logging.info(f"Search failed with: {e}")
            return []
        return [
            {"package": n.split()[0], "version": n.split()[1], "build": n.split()[2]} for n in raw_out.split("\n")[2:-1]
        ]


class GitHubSearch:
    """
    Tool to search the GitHub bioconda-recipes repo
    """

    @staticmethod
    def _check_response_rate_limit(response):
        if response.status_code == 403 and "API rate limit exceeded" in response.json()["message"]:
            # It can take tens of minutes before the rate limit window resets
            message = "GitHub API rate limit exceeded."
            rate_limit_reset_UTC_timestamp = response.headers.get("X-RateLimit-Reset")
            if rate_limit_reset_UTC_timestamp:
                rate_limit_reset_datetime = datetime.fromtimestamp(int(rate_limit_reset_UTC_timestamp), tz=timezone.utc)
                message += f" The rate limit window will reset at {rate_limit_reset_datetime.isoformat()}."
            raise Exception(message)

    def get_json(self, search_string):
        """
        Takes search_string variable and return results from the bioconda-recipes github repository in JSON format

        DEPRECATED: this method is currently unreliable because the API query
        sometimes succeeds but returns no items.
        """
        response = requests.get(
            f"https://api.github.com/search/code?q={search_string}+in:path+repo:bioconda/bioconda-recipes+path:recipes",
            timeout=MULLED_SOCKET_TIMEOUT,
        )
        self._check_response_rate_limit(response)
        response.raise_for_status()
        return response.json()

    def process_json(self, json_response, search_string):
        """
        Take JSON input and process it, returning the required data
        """
        top_10_items = json_response["items"][0:10]  # get top ten results
        return [{"name": result["name"], "path": result["path"]} for result in top_10_items]

    def recipe_present(self, search_string):
        """
        Check if a recipe exists in bioconda-recipes which matches search_string exactly
        """
        response = requests.get(
            f"https://api.github.com/repos/bioconda/bioconda-recipes/contents/recipes/{search_string}",
            timeout=MULLED_SOCKET_TIMEOUT,
        )
        self._check_response_rate_limit(response)
        return response.status_code == 200


def get_package_hash(packages, versions):
    """
    Take packages and versions (if the latter are given) and returns a hash for each. Also checks github to see if the container is already present.
    """
    hash_results = {}
    targets = []
    if versions:
        for p in packages:
            targets.append(build_target(p, version=versions[p]))
    else:  # if versions are not given only calculate the package hash
        for p in packages:
            targets.append(build_target(p))
    # make the hash from the processed targets
    package_hash = v2_image_name(targets)
    hash_results["package_hash"] = package_hash.split(":")[0]
    if versions:
        hash_results["version_hash"] = package_hash.split(":")[1]

    r = requests.get(
        f"https://quay.io/api/v1/repository/biocontainers/{hash_results['package_hash']}", timeout=MULLED_SOCKET_TIMEOUT
    )
    if r.status_code == 200:
        hash_results["container_present"] = True
        if versions:  # now test if the version hash is listed in the repository tags
            # remove -0, -1, etc from end of the tag
            tags = [n[:-2] for n in r.json()["tags"]]
            if hash_results["version_hash"] in tags:
                hash_results["container_present_with_version"] = True
            else:
                hash_results["container_present_with_version"] = False
    else:
        hash_results["container_present"] = False
    return hash_results


def singularity_search(search_string):
    """
    Check if a singularity package is present and return the link.
    """
    results = []

    containers = get_singularity_containers()

    for container in containers:
        if search_string in container:
            name = container.split(":")[0]
            version = container.split(":")[1]
            results.append({"package": name, "version": version})

    return results


def readable_output(json, organization="biocontainers", channel="bioconda"):

    # if json is empty:
    if sum(len(json[destination][results]) for destination in json for results in json[destination]) == 0:
        sys.stdout.write("No results found for that query.\n")
        return

    # return results for quay, conda and singularity together
    if (
        sum(
            len(json[destination][results])
            for destination in [
                "quay",
                "conda",
                "singularity",
            ]
            for results in json.get(destination, [])
        )
        > 0
    ):
        sys.stdout.write("The query returned the following result(s).\n")
        # put quay, conda etc results as lists in lines
        lines = [["LOCATION", "NAME", "VERSION", "COMMAND\n"]]
        for results in json.get("quay", {}).values():
            for result in results:
                lines.append(
                    [
                        "quay",
                        result["package"],
                        result["version"],
                        f"docker pull quay.io/{organization}/{result['package']}:{result['version']}\n",
                    ]
                )  # NOT a real solution
        for results in json.get("conda", {}).values():
            for result in results:
                lines.append(
                    [
                        "conda",
                        result["package"],
                        f"{result['version']}--{result['build']}",
                        f"conda install -c {channel} {result['package']}={result['version']}={result['build']}\n",
                    ]
                )
        for results in json.get("singularity", {}).values():
            for result in results:
                lines.append(
                    [
                        "singularity",
                        result["package"],
                        result["version"],
                        f"wget https://depot.galaxyproject.org/singularity/{result['package']}:{result['version']}\n",
                    ]
                )

        col_width0, col_width1, col_width2 = (
            max(len(line[n]) for line in lines) + 2 for n in (0, 1, 2)
        )  # def max col widths for the output

        # create table
        for line in lines:
            sys.stdout.write(
                "".join((line[0].ljust(col_width0), line[1].ljust(col_width1), line[2].ljust(col_width2), line[3]))
            )  # output

    if json.get("github_recipe_present", False):
        sys.stdout.write("\n" if "lines" in locals() else "")
        sys.stdout.write(
            "The following recipes were found in the bioconda-recipes repository which exactly matched one of the search terms:\n"
        )
        lines = [["QUERY", "LOCATION\n"]]
        for recipe in json["github_recipe_present"]["recipes"]:
            lines.append(
                [recipe, f"https://api.github.com/repos/bioconda/bioconda-recipes/contents/recipes/{recipe}\n"]
            )

        col_width0 = max(len(line[0]) for line in lines) + 2

        for line in lines:
            sys.stdout.write("".join((line[0].ljust(col_width0), line[1])))  # output

    if sum(len(json["github"][results]) for results in json.get("github", [])) > 0:
        sys.stdout.write("\n" if "lines" in locals() else "")
        sys.stdout.write("Other result(s) on the bioconda-recipes GitHub repository:\n")
        lines = [["QUERY", "FILE", "URL\n"]]
        for search_string, results in json.get("github", {}).items():
            for result in results:
                lines.append(
                    [
                        search_string,
                        result["name"],
                        f"https://github.com/bioconda/bioconda-recipes/tree/master/{result['path']}\n",
                    ]
                )

        # def max col widths for the output
        col_width0, col_width1 = (max(len(line[n]) for line in lines) + 2 for n in (0, 1))

        for line in lines:
            sys.stdout.write("".join((line[0].ljust(col_width0), line[1].ljust(col_width1), line[2])))  # output


def deps_error_message(package):
    return f"Required dependency [{package}] is not installed. Run 'pip install galaxy-tool-util[mulled]'."


[docs]def main(argv=None): if Schema is None: sys.stdout.write(deps_error_message("Whoosh")) return destination_defaults = ["quay", "singularity", "github"] if conda_path: destination_defaults.append("conda") parser = argparse.ArgumentParser(description="Searches in a given quay organization for a repository") parser.add_argument( "-d", "--destination", dest="search_dest", nargs="+", default=destination_defaults, help="Choose where to search. Options are 'conda', 'quay', 'singularity' and 'github'. If no option are given, all will be searched.", ) parser.add_argument( "-o", "--organization", dest="organization_string", default="biocontainers", help="Change quay organization to search; default is biocontainers.", ) parser.add_argument( "-c", "--channel", dest="channel_string", default="bioconda", help="Change conda channels to search; default is bioconda.", ) parser.add_argument( "--non-strict", dest="non_strict", action="store_true", help="Autocorrection of typos activated. Lists more results but can be confusing.\ For too many queries quay.io blocks the request and the results can be incomplete.", ) parser.add_argument("-j", "--json", dest="json", action="store_true", help="Returns results as JSON.") parser.add_argument("-s", "--search", required=True, nargs="+", help="The name of the tool(s) to search for.") args = parser.parse_args() json_results = {dest: {} for dest in args.search_dest} versions = {} if len(args.search) > 1: # get hash if multiple packages are searched args.search.append(get_package_hash(args.search, versions)["package_hash"]) if "conda" in args.search_dest: conda_results = {} conda = CondaSearch(args.channel_string) for item in args.search: conda_results[item] = conda.get_json(item) json_results["conda"] = conda_results if "github" in args.search_dest: github_results = {} github_recipe_present = [] github = GitHubSearch() for item in args.search: if github.recipe_present(item): github_recipe_present.append(item) else: github_json = github.get_json(item) github_results[item] = github.process_json(github_json, item) json_results["github"] = github_results json_results["github_recipe_present"] = {"recipes": github_recipe_present} if "quay" in args.search_dest: quay_results = {} quay = QuaySearch(args.organization_string) quay.build_index() for item in args.search: quay_results[item] = quay.search_repository(item, args.non_strict) json_results["quay"] = quay_results if "singularity" in args.search_dest: singularity_results = {} for item in args.search: singularity_results[item] = singularity_search(item) json_results["singularity"] = singularity_results if args.json: print(json_results) else: readable_output(json_results, args.organization_string, args.channel_string)
__all__ = ("main",) if __name__ == "__main__": main()