Warning

This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.tool_util.deps.mulled.mulled_search

#!/usr/bin/env python

import argparse
import json
import logging
import sys
import tempfile
from datetime import (
    datetime,
    timezone,
)

import requests

from .mulled_list import get_singularity_containers
from .util import (
    build_target,
    MULLED_SOCKET_TIMEOUT,
    v2_image_name,
)

try:
    from conda.cli.python_api import run_command
except ImportError:
    run_command = None

try:
    from whoosh.fields import Schema
    from whoosh.fields import TEXT
    from whoosh.fields import STORED
    from whoosh.index import create_in
    from whoosh.qparser import QueryParser
except ImportError:
    Schema = TEXT = STORED = create_in = QueryParser = None

QUAY_API_URL = 'https://quay.io/api/v1/repository'


[docs]class QuaySearch(): """ Tool to search within a quay organization for a given software name. """
[docs] def __init__(self, organization): self.index = None self.organization = organization
[docs] def build_index(self): """ Create an index to quickly examine the repositories of a given quay.io organization. """ # download all information about the repositories from the # given organization in self.organization parameters = {'public': 'true', 'namespace': self.organization} r = requests.get(QUAY_API_URL, headers={ 'Accept-encoding': 'gzip'}, params=parameters, timeout=MULLED_SOCKET_TIMEOUT) tmp_dir = tempfile.mkdtemp() schema = Schema(title=TEXT(stored=True), content=STORED) self.index = create_in(tmp_dir, schema) json_decoder = json.JSONDecoder() decoded_request = json_decoder.decode(r.text) writer = self.index.writer() for repository in decoded_request['repositories']: writer.add_document( title=repository['name'], content=repository['description']) writer.commit()
[docs] def search_repository(self, search_string, non_strict): """ Search Docker containers on quay.io. Results are displayed with all available versions, including the complete image name. """ # with statement closes searcher after usage. with self.index.searcher() as searcher: query = QueryParser( "title", self.index.schema).parse(search_string) results = searcher.search(query) if non_strict: # look for spelling errors and use suggestions as a search term too corrector = searcher.corrector("title") suggestions = corrector.suggest(search_string, limit=2) # get all repositories with suggested keywords for suggestion in suggestions: search_string = f"*{suggestion}*" query = QueryParser( "title", self.index.schema).parse(search_string) results_tmp = searcher.search(query) results.extend(results_tmp) out = list() for result in results: title = result['title'] for version in self.get_additional_repository_information(title): out.append({'package': title, 'version': version, }) return out
[docs] def get_additional_repository_information(self, repository_string): """ Function downloads additional information from quay.io to get the tag-field which includes the version number. """ url = f"{QUAY_API_URL}/{self.organization}/{repository_string}" r = requests.get(url, headers={'Accept-encoding': 'gzip'}, timeout=MULLED_SOCKET_TIMEOUT) json_decoder = json.JSONDecoder() decoded_request = json_decoder.decode(r.text) return decoded_request['tags']
[docs]class CondaSearch(): """ Tool to search the bioconda channel """
[docs] def __init__(self, channel): self.channel = channel
[docs] def get_json(self, search_string): """ Function takes search_string variable and returns results from the bioconda channel in JSON format """ if run_command is None: raise Exception(f"Invalid search destination. {deps_error_message('conda')}") raw_out, err, exit_code = run_command( 'search', '-c', self.channel, search_string, use_exception_handler=True) if exit_code != 0: logging.info(f'Search failed with: {err}') return [] return [{'package': n.split()[0], 'version': n.split()[1], 'build': n.split()[2]} for n in raw_out.split('\n')[2:-1]]
[docs]class GitHubSearch(): """ Tool to search the GitHub bioconda-recipes repo """ @staticmethod def _check_response_rate_limit(response): if response.status_code == 403 and "API rate limit exceeded" in response.json()["message"]: # It can take tens of minutes before the rate limit window resets message = "GitHub API rate limit exceeded." rate_limit_reset_UTC_timestamp = response.headers.get("X-RateLimit-Reset") if rate_limit_reset_UTC_timestamp: rate_limit_reset_datetime = datetime.fromtimestamp(int(rate_limit_reset_UTC_timestamp), tz=timezone.utc) message += f" The rate limit window will reset at {rate_limit_reset_datetime.isoformat()}." raise Exception(message)
[docs] def get_json(self, search_string): """ Takes search_string variable and return results from the bioconda-recipes github repository in JSON format DEPRECATED: this method is currently unreliable because the API query sometimes succeeds but returns no items. """ response = requests.get( f"https://api.github.com/search/code?q={search_string}+in:path+repo:bioconda/bioconda-recipes+path:recipes", timeout=MULLED_SOCKET_TIMEOUT, ) self._check_response_rate_limit(response) response.raise_for_status() return response.json()
[docs] def process_json(self, json_response, search_string): """ Take JSON input and process it, returning the required data """ top_10_items = json_response['items'][0:10] # get top ten results return [{'name': result['name'], 'path': result['path']} for result in top_10_items]
[docs] def recipe_present(self, search_string): """ Check if a recipe exists in bioconda-recipes which matches search_string exactly """ response = requests.get( f"https://api.github.com/repos/bioconda/bioconda-recipes/contents/recipes/{search_string}", timeout=MULLED_SOCKET_TIMEOUT, ) self._check_response_rate_limit(response) return response.status_code == 200
[docs]def get_package_hash(packages, versions): """ Take packages and versions (if the latter are given) and returns a hash for each. Also checks github to see if the container is already present. """ hash_results = {} targets = [] if versions: for p in packages: targets.append(build_target(p, version=versions[p])) else: # if versions are not given only calculate the package hash for p in packages: targets.append(build_target(p)) # make the hash from the processed targets package_hash = v2_image_name(targets) hash_results['package_hash'] = package_hash.split(':')[0] if versions: hash_results['version_hash'] = package_hash.split(':')[1] r = requests.get(f"https://quay.io/api/v1/repository/biocontainers/{hash_results['package_hash']}", timeout=MULLED_SOCKET_TIMEOUT) if r.status_code == 200: hash_results['container_present'] = True if versions: # now test if the version hash is listed in the repository tags # remove -0, -1, etc from end of the tag tags = [n[:-2] for n in r.json()['tags']] if hash_results['version_hash'] in tags: hash_results['container_present_with_version'] = True else: hash_results['container_present_with_version'] = False else: hash_results['container_present'] = False return hash_results
[docs]def readable_output(json, organization='biocontainers', channel='bioconda'): # if json is empty: if sum(len(json[destination][results]) for destination in json for results in json[destination]) == 0: sys.stdout.write('No results found for that query.\n') return # return results for quay, conda and singularity together if sum(len(json[destination][results]) for destination in ['quay', 'conda', 'singularity', ] for results in json.get(destination, [])) > 0: sys.stdout.write("The query returned the following result(s).\n") # put quay, conda etc results as lists in lines lines = [['LOCATION', 'NAME', 'VERSION', 'COMMAND\n']] for results in json.get('quay', {}).values(): for result in results: lines.append(['quay', result['package'], result['version'], 'docker pull quay.io/%s/%s:%s\n' % (organization, result['package'], result['version'])]) # NOT a real solution for results in json.get('conda', {}).values(): for result in results: lines.append(['conda', result['package'], f"{result['version']}--{result['build']}", f"conda install -c {channel} {result['package']}={result['version']}={result['build']}\n"]) for results in json.get('singularity', {}).values(): for result in results: lines.append(['singularity', result['package'], result['version'], f"wget https://depot.galaxyproject.org/singularity/{result['package']}:{result['version']}\n"]) col_width0, col_width1, col_width2 = (max(len( line[n]) for line in lines) + 2 for n in (0, 1, 2)) # def max col widths for the output # create table for line in lines: sys.stdout.write("".join((line[0].ljust(col_width0), line[1].ljust( col_width1), line[2].ljust(col_width2), line[3]))) # output if json.get('github_recipe_present', False): sys.stdout.write('\n' if 'lines' in locals() else '') sys.stdout.write( 'The following recipes were found in the bioconda-recipes repository which exactly matched one of the search terms:\n') lines = [['QUERY', 'LOCATION\n']] for recipe in json['github_recipe_present']['recipes']: lines.append( [recipe, f"https://api.github.com/repos/bioconda/bioconda-recipes/contents/recipes/{recipe}\n"]) col_width0 = max(len(line[0]) for line in lines) + 2 for line in lines: sys.stdout.write( "".join((line[0].ljust(col_width0), line[1]))) # output if sum(len(json['github'][results]) for results in json.get('github', [])) > 0: sys.stdout.write('\n' if 'lines' in locals() else '') sys.stdout.write( "Other result(s) on the bioconda-recipes GitHub repository:\n") lines = [['QUERY', 'FILE', 'URL\n']] for search_string, results in json.get('github', {}).items(): for result in results: lines.append([search_string, result['name'], f"https://github.com/bioconda/bioconda-recipes/tree/master/{result['path']}\n"]) # def max col widths for the output col_width0, col_width1 = ( max(len(line[n]) for line in lines) + 2 for n in (0, 1)) for line in lines: sys.stdout.write("".join( (line[0].ljust(col_width0), line[1].ljust(col_width1), line[2]))) # output
[docs]def deps_error_message(package): return f"Required dependency [{package}] is not installed. Run 'pip install galaxy-tool-util[mulled]'."
[docs]def main(argv=None): if Schema is None: sys.stdout.write(deps_error_message("Whoosh")) return destination_defaults = ['quay', 'singularity', 'github'] if run_command is not None: destination_defaults.append('conda') parser = argparse.ArgumentParser( description='Searches in a given quay organization for a repository') parser.add_argument('-d', '--destination', dest='search_dest', nargs='+', default=destination_defaults, help="Choose where to search. Options are 'conda', 'quay', 'singularity' and 'github'. If no option are given, all will be searched.") parser.add_argument('-o', '--organization', dest='organization_string', default="biocontainers", help='Change quay organization to search; default is biocontainers.') parser.add_argument('-c', '--channel', dest='channel_string', default="bioconda", help='Change conda channel to search; default is bioconda.') parser.add_argument('--non-strict', dest='non_strict', action="store_true", help='Autocorrection of typos activated. Lists more results but can be confusing.\ For too many queries quay.io blocks the request and the results can be incomplete.') parser.add_argument('-j', '--json', dest='json', action="store_true", help='Returns results as JSON.') parser.add_argument('-s', '--search', required=True, nargs='+', help='The name of the tool(s) to search for.') args = parser.parse_args() json_results = {dest: {} for dest in args.search_dest} versions = {} if len(args.search) > 1: # get hash if multiple packages are searched args.search.append(get_package_hash( args.search, versions)['package_hash']) if 'conda' in args.search_dest: conda_results = {} conda = CondaSearch(args.channel_string) for item in args.search: conda_results[item] = conda.get_json(item) json_results['conda'] = conda_results if 'github' in args.search_dest: github_results = {} github_recipe_present = [] github = GitHubSearch() for item in args.search: if github.recipe_present(item): github_recipe_present.append(item) else: github_json = github.get_json(item) github_results[item] = github.process_json(github_json, item) json_results['github'] = github_results json_results['github_recipe_present'] = { 'recipes': github_recipe_present} if 'quay' in args.search_dest: quay_results = {} quay = QuaySearch(args.organization_string) quay.build_index() for item in args.search: quay_results[item] = quay.search_repository(item, args.non_strict) json_results['quay'] = quay_results if 'singularity' in args.search_dest: singularity_results = {} for item in args.search: singularity_results[item] = singularity_search(item) json_results['singularity'] = singularity_results if args.json: print(json_results) else: readable_output(json_results, args.organization_string, args.channel_string)
if __name__ == "__main__": main()