Warning

This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.webapps.tool_shed.search.tool_search

"""Module for searching the toolshed tools within all repositories"""
import logging
import os

import whoosh.index
from whoosh import scoring
from whoosh.fields import (
    Schema,
    STORED,
    TEXT
)
from whoosh.qparser import MultifieldParser

from galaxy import exceptions
from galaxy.exceptions import ObjectNotFound

log = logging.getLogger(__name__)

tool_schema = Schema(
    name=TEXT(stored=True),
    description=TEXT(stored=True),
    owner=TEXT(stored=True),
    id=TEXT(stored=True),
    help=TEXT(stored=True),
    version=TEXT(stored=True),
    repo_name=TEXT(stored=True),
    repo_owner_username=TEXT(stored=True),
    repo_id=STORED)


[docs]class ToolSearch(object):
[docs] def search(self, trans, search_term, page, page_size, boosts): """ Perform the search on the given search_term :param search_term: unicode encoded string with the search term(s) :returns results: dictionary containing number of hits, hits themselves and matched terms for each """ tool_index_dir = os.path.join(trans.app.config.whoosh_index_dir, 'tools') index_exists = whoosh.index.exists_in(tool_index_dir) if index_exists: index = whoosh.index.open_dir(tool_index_dir) try: # Some literature about BM25F: # http://trec.nist.gov/pubs/trec13/papers/microsoft-cambridge.web.hard.pdf # http://en.wikipedia.org/wiki/Okapi_BM25 # __Basically__ the higher number the bigger weight. tool_weighting = scoring.BM25F(field_B={ 'name_B' : boosts.tool_name_boost, 'description_B' : boosts.tool_description_boost, 'help_B' : boosts.tool_help_boost, 'repo_owner_username_B' : boosts.tool_repo_owner_username_boost}) searcher = index.searcher(weighting=tool_weighting) parser = MultifieldParser([ 'name', 'description', 'help', 'repo_owner_username'], schema=tool_schema) user_query = parser.parse('*' + search_term + '*') try: hits = searcher.search_page(user_query, page, pagelen=page_size, terms=True) except ValueError: raise ObjectNotFound('The requested page does not exist.') log.debug('searching tools for: #' + str(search_term)) log.debug('total hits: ' + str(len(hits))) log.debug('scored hits: ' + str(hits.scored_length())) results = {} results['total_results'] = str(len(hits)) results['page'] = str(page) results['page_size'] = str(page_size) results['hits'] = [] for hit in hits: hit_dict = {} hit_dict['id'] = hit.get('id') hit_dict['repo_owner_username'] = hit.get('repo_owner_username') hit_dict['repo_name'] = hit.get('repo_name') hit_dict['name'] = hit.get('name') hit_dict['description'] = hit.get('description') results['hits'].append({'tool': hit_dict, 'matched_terms': hit.matched_terms(), 'score': hit.score}) return results finally: searcher.close() else: raise exceptions.InternalServerError('The search index file is missing.')