Source code for tool_shed.webapp.search.tool_search

"""Module for searching the toolshed tools within all repositories"""

import logging
import os

import whoosh.index
from whoosh import scoring
from whoosh.fields import (
    ID,
    Schema,
    TEXT,
)
from whoosh.qparser import MultifieldParser

from galaxy import exceptions
from galaxy.exceptions import ObjectNotFound
from galaxy.util import unicodify

log = logging.getLogger(__name__)

schema = Schema(
    name=TEXT(stored=True),
    description=TEXT(stored=True),
    owner=TEXT(stored=True),
    id=TEXT(stored=True),
    help=TEXT(stored=True),
    version=TEXT(stored=True),
    repo_name=TEXT(stored=True),
    repo_owner_username=TEXT(stored=True),
    repo_id=ID(stored=True),
)


[docs]class ToolSearch:
[docs] def search(self, app, search_term, page, page_size, boosts): """ Perform the search on the given search_term :param search_term: unicode encoded string with the search term(s) :returns results: dictionary containing number of hits, hits themselves and matched terms for each """ tool_index_dir = os.path.join(app.config.whoosh_index_dir, "tools") index_exists = whoosh.index.exists_in(tool_index_dir) if index_exists: index = whoosh.index.open_dir(tool_index_dir) try: # Some literature about BM25F: # http://trec.nist.gov/pubs/trec13/papers/microsoft-cambridge.web.hard.pdf # http://en.wikipedia.org/wiki/Okapi_BM25 # __Basically__ the higher number the bigger weight. tool_weighting = scoring.BM25F( field_B={ "name_B": boosts.tool_name_boost, "description_B": boosts.tool_description_boost, "help_B": boosts.tool_help_boost, "repo_owner_username_B": boosts.tool_repo_owner_username_boost, } ) searcher = index.searcher(weighting=tool_weighting) parser = MultifieldParser(["name", "description", "help", "repo_owner_username"], schema=schema) user_query = parser.parse(f"*{search_term}*") try: hits = searcher.search_page(user_query, page, pagelen=page_size, terms=True) except ValueError: raise ObjectNotFound("The requested page does not exist.") log.debug(f"searching tools for: #{str(search_term)}") log.debug(f"total hits: {str(len(hits))}") log.debug(f"scored hits: {str(hits.scored_length())}") results = {} results["total_results"] = str(len(hits)) results["page"] = str(page) results["page_size"] = str(page_size) results["hits"] = [] for hit in hits: hit_dict = {} hit_dict["id"] = hit.get("id") hit_dict["repo_owner_username"] = hit.get("repo_owner_username") hit_dict["repo_name"] = hit.get("repo_name") hit_dict["name"] = hit.get("name") hit_dict["description"] = hit.get("description") matched_terms = {k: unicodify(v) for k, v in hit.matched_terms()} results["hits"].append({"tool": hit_dict, "matched_terms": matched_terms, "score": hit.score}) return results finally: searcher.close() else: raise exceptions.InternalServerError("The search index file is missing.")