Source code for tool_shed.metadata.repository_metadata_manager

import logging
import tempfile
from typing import (
    Any,
    Dict,
    List,
    Optional,
)

from sqlalchemy import (
    false,
    select,
)

from galaxy import util
from galaxy.model.base import transaction
from galaxy.tool_shed.metadata.metadata_generator import (
    BaseMetadataGenerator,
    HandleResultT,
    InvalidFileT,
)
from galaxy.util import inflector
from galaxy.web.form_builder import SelectField
from tool_shed.context import ProvidesRepositoriesContext
from tool_shed.repository_types import util as rt_util
from tool_shed.repository_types.metadata import TipOnly
from tool_shed.structured_app import ToolShedApp
from tool_shed.util import (
    basic_util,
    common_util,
    hg_util,
    metadata_util,
    repository_util,
    shed_util_common as suc,
    tool_util,
)
from tool_shed.util.metadata_util import repository_metadata_by_changeset_revision
from tool_shed.webapp.model import (
    Repository,
    RepositoryMetadata,
    User,
)

log = logging.getLogger(__name__)


[docs]class ToolShedMetadataGenerator(BaseMetadataGenerator): """A MetadataGenerator building on ToolShed's app and repository constructs.""" app: ToolShedApp repository: Optional[Repository] # why is mypy making me re-annotate these things from the base class, it didn't # when they were in the same file invalid_file_tups: List[InvalidFileT] repository_clone_url: Optional[str]
[docs] def __init__( self, trans: ProvidesRepositoriesContext, repository: Optional[Repository] = None, changeset_revision: Optional[str] = None, repository_clone_url: Optional[str] = None, shed_config_dict: Optional[Dict[str, Any]] = None, relative_install_dir=None, repository_files_dir=None, resetting_all_metadata_on_repository=False, updating_installed_repository=False, persist=False, metadata_dict=None, user=None, ): self.trans = trans self.app = trans.app self.user = user self.repository = repository if changeset_revision is None and self.repository is not None: self.changeset_revision = self.repository.tip() else: self.changeset_revision = changeset_revision if repository_clone_url is None and self.repository is not None: self.repository_clone_url = common_util.generate_clone_url_for(self.trans, self.repository) else: self.repository_clone_url = repository_clone_url if shed_config_dict is None: self.shed_config_dict = {} else: self.shed_config_dict = shed_config_dict if relative_install_dir is None and self.repository is not None: relative_install_dir = self.repository.repo_path(self.app) if repository_files_dir is None and self.repository is not None: repository_files_dir = self.repository.repo_path(self.app) if metadata_dict is None: self.metadata_dict = {} else: self.metadata_dict = metadata_dict self.relative_install_dir = relative_install_dir self.repository_files_dir = repository_files_dir self.resetting_all_metadata_on_repository = resetting_all_metadata_on_repository self.updating_installed_repository = updating_installed_repository self.persist = persist self.invalid_file_tups = [] self.sa_session = trans.app.model.session
[docs] def initial_metadata_dict(self) -> Dict[str, Any]: return {}
[docs] def set_repository( self, repository, relative_install_dir: Optional[str] = None, changeset_revision: Optional[str] = None ): self.repository = repository if relative_install_dir is None and self.repository is not None: relative_install_dir = repository.repo_path(self.app) if changeset_revision is None and self.repository is not None: self.set_changeset_revision(self.repository.tip()) else: self.set_changeset_revision(changeset_revision) self.shed_config_dict = {} self._reset_attributes_after_repository_update(relative_install_dir)
[docs] def handle_repository_elem(self, repository_elem, only_if_compiling_contained_td=False) -> HandleResultT: """ Process the received repository_elem which is a <repository> tag either from a repository_dependencies.xml file or a tool_dependencies.xml file. If the former, we're generating repository dependencies metadata for a repository in the Tool Shed. If the latter, we're generating package dependency metadata within Galaxy or the Tool Shed. """ is_valid = True error_message = "" toolshed = repository_elem.get("toolshed", None) name = repository_elem.get("name", None) owner = repository_elem.get("owner", None) changeset_revision = repository_elem.get("changeset_revision", None) prior_installation_required = str(repository_elem.get("prior_installation_required", False)) repository_dependency_tup = [ toolshed, name, owner, changeset_revision, prior_installation_required, str(only_if_compiling_contained_td), ] if not toolshed: # Default to the current tool shed. toolshed = self.trans.repositories_hostname log.warning(f"\n\n\n\n\n\nin not toolshed with {toolshed}\n\n\n\n") # toolshed = str(url_for("/", qualified=True)).rstrip("/") repository_dependency_tup[0] = toolshed else: log.warning(f"moooocww.....{toolshed}\n\n\n\n\n") toolshed = common_util.remove_protocol_from_tool_shed_url(toolshed) if suc.tool_shed_is_this_tool_shed(toolshed, trans=self.trans): try: user = get_user_by_username(self.sa_session, owner) except Exception: error_message = ( f"Ignoring repository dependency definition for tool shed {toolshed}, name {name}, owner {owner}, " ) error_message += f"changeset revision {changeset_revision} because the owner is invalid." log.debug(error_message) is_valid = False return repository_dependency_tup, is_valid, error_message try: repository = get_repository(self.sa_session, name, user.id) except Exception: error_message = f"Ignoring repository dependency definition for tool shed {toolshed}," error_message += f"name {name}, owner {owner}, " error_message += f"changeset revision {changeset_revision} because the name is invalid. " log.debug(error_message) is_valid = False return repository_dependency_tup, is_valid, error_message repo = repository.hg_repo # The received changeset_revision may be None since defining it in the dependency definition is optional. # If this is the case, the default will be to set its value to the repository dependency tip revision. # This probably occurs only when handling circular dependency definitions. tip_ctx = repo[repo.changelog.tip()] # Make sure the repo.changlog includes at least 1 revision. if changeset_revision is None and tip_ctx.rev() >= 0: changeset_revision = str(tip_ctx) repository_dependency_tup = [ toolshed, name, owner, changeset_revision, prior_installation_required, str(only_if_compiling_contained_td), ] return repository_dependency_tup, is_valid, error_message else: # Find the specified changeset revision in the repository's changelog to see if it's valid. found = False for changeset in repo.changelog: changeset_hash = str(repo[changeset]) if changeset_hash == changeset_revision: found = True break if not found: error_message = f"Ignoring repository dependency definition for tool shed {toolshed}, name {name}, owner {owner}, " error_message += ( f"changeset revision {changeset_revision} because the changeset revision is invalid. " ) log.debug(error_message) is_valid = False return repository_dependency_tup, is_valid, error_message else: # Repository dependencies are currently supported within a single tool shed. error_message = "Repository dependencies are currently supported only within the same tool shed. Ignoring " error_message += f"repository dependency definition for tool shed {toolshed}, name {name}, owner {owner}, changeset revision {changeset_revision}. " log.debug(error_message) is_valid = False return repository_dependency_tup, is_valid, error_message return repository_dependency_tup, is_valid, error_message
[docs]class RepositoryMetadataManager(ToolShedMetadataGenerator):
[docs] def __init__( self, trans: ProvidesRepositoriesContext, repository=None, changeset_revision=None, repository_clone_url=None, shed_config_dict=None, relative_install_dir=None, repository_files_dir=None, resetting_all_metadata_on_repository=False, updating_installed_repository=False, persist=False, metadata_dict=None, ): super().__init__( trans, repository, changeset_revision, repository_clone_url, shed_config_dict, relative_install_dir, repository_files_dir, resetting_all_metadata_on_repository, updating_installed_repository, persist, metadata_dict=metadata_dict, user=trans.user, ) app = trans.app user = trans.user self.sa_session = app.model.context self.app = app self.user = user # Repository metadata comparisons for changeset revisions. self.EQUAL = "equal" self.NO_METADATA = "no metadata" self.NOT_EQUAL_AND_NOT_SUBSET = "not equal and not subset" self.SUBSET = "subset" self.SUBSET_VALUES = [self.EQUAL, self.SUBSET]
def _add_tool_versions(self, id: int, repository_metadata, changeset_revisions): # Build a dictionary of { 'tool id' : 'parent tool id' } pairs for each tool in repository_metadata. metadata = repository_metadata.metadata tool_versions_dict = {} for tool_dict in metadata.get("tools", []): # We have at least 2 changeset revisions to compare tool guids and tool ids. parent_id = self._get_parent_id( id, tool_dict["id"], tool_dict["version"], tool_dict["guid"], changeset_revisions ) tool_versions_dict[tool_dict["guid"]] = parent_id if tool_versions_dict: repository_metadata.tool_versions = tool_versions_dict self.sa_session.add(repository_metadata) session = self.sa_session() with transaction(session): session.commit()
[docs] def build_repository_ids_select_field( self, name="repository_ids", multiple=True, display="checkboxes", my_writable=False ): """Generate the current list of repositories for resetting metadata.""" repositories_select_field = SelectField(name=name, multiple=multiple, display=display) for repository in self.get_repositories_for_setting_metadata(my_writable=my_writable, order=True): owner = str(repository.user.username) option_label = f"{str(repository.name)} ({owner})" option_value = f"{self.app.security.encode_id(repository.id)}" repositories_select_field.add_option(option_label, option_value) return repositories_select_field
def _clean_repository_metadata(self, changeset_revisions): assert self.repository # Delete all repository_metadata records associated with the repository that have # a changeset_revision that is not in changeset_revisions. We sometimes see multiple # records with the same changeset revision value - no idea how this happens. We'll # assume we can delete the older records, so we'll order by update_time descending and # delete records that have the same changeset_revision we come across later. for repository_metadata in get_repository_metadata(self.sa_session, self.repository.id): changeset_revision = repository_metadata.changeset_revision if changeset_revision not in changeset_revisions: self.sa_session.delete(repository_metadata) session = self.sa_session() with transaction(session): session.commit()
[docs] def compare_changeset_revisions(self, ancestor_changeset_revision, ancestor_metadata_dict): """ Compare the contents of two changeset revisions to determine if a new repository metadata revision should be created. """ # The metadata associated with ancestor_changeset_revision is ancestor_metadata_dict. # This changeset_revision is an ancestor of self.changeset_revision which is associated # with self.metadata_dict. A new repository_metadata record will be created only # when this method returns the constant value self.NOT_EQUAL_AND_NOT_SUBSET. ancestor_tools = ancestor_metadata_dict.get("tools", []) ancestor_guids = [tool_dict["guid"] for tool_dict in ancestor_tools] ancestor_guids.sort() ancestor_repository_dependencies_dict = ancestor_metadata_dict.get("repository_dependencies", {}) ancestor_repository_dependencies = ancestor_repository_dependencies_dict.get("repository_dependencies", []) ancestor_tool_dependencies = ancestor_metadata_dict.get("tool_dependencies", {}) ancestor_data_manager = ancestor_metadata_dict.get("data_manager", {}) current_tools = self.metadata_dict.get("tools", []) current_guids = [tool_dict["guid"] for tool_dict in current_tools] current_guids.sort() current_repository_dependencies_dict = self.metadata_dict.get("repository_dependencies", {}) current_repository_dependencies = current_repository_dependencies_dict.get("repository_dependencies", []) current_tool_dependencies = self.metadata_dict.get("tool_dependencies", {}) current_data_manager = self.metadata_dict.get("data_manager", {}) # Handle case where no metadata exists for either changeset. no_repository_dependencies = not ancestor_repository_dependencies and not current_repository_dependencies no_tool_dependencies = not ancestor_tool_dependencies and not current_tool_dependencies no_tools = not ancestor_guids and not current_guids no_data_manager = not ancestor_data_manager and not current_data_manager if no_repository_dependencies and no_tool_dependencies and no_tools and no_data_manager: return self.NO_METADATA repository_dependency_comparison = self.compare_repository_dependencies( ancestor_repository_dependencies, current_repository_dependencies ) tool_dependency_comparison = self.compare_tool_dependencies( ancestor_tool_dependencies, current_tool_dependencies ) data_manager_comparison = self.compare_data_manager(ancestor_data_manager, current_data_manager) # Handle case where all metadata is the same. if ( ancestor_guids == current_guids and repository_dependency_comparison == self.EQUAL and tool_dependency_comparison == self.EQUAL and data_manager_comparison == self.EQUAL ): return self.EQUAL # Handle case where ancestor metadata is a subset of current metadata. # readme_file_is_subset = readme_file_comparision in [ self.EQUAL, self.SUBSET ] repository_dependency_is_subset = repository_dependency_comparison in self.SUBSET_VALUES tool_dependency_is_subset = tool_dependency_comparison in self.SUBSET_VALUES datamanager_is_subset = data_manager_comparison in self.SUBSET_VALUES if repository_dependency_is_subset and tool_dependency_is_subset and datamanager_is_subset: is_subset = True for guid in ancestor_guids: if guid not in current_guids: is_subset = False break if is_subset: return self.SUBSET return self.NOT_EQUAL_AND_NOT_SUBSET
[docs] def compare_data_manager(self, ancestor_metadata, current_metadata): """Determine if ancestor_metadata is the same as or a subset of current_metadata for data_managers.""" def __data_manager_dict_to_tuple_list(metadata_dict): # we do not check tool_guid or tool conf file name return set( sorted( ( name, tuple(sorted(value.get("data_tables", []))), value.get("guid"), value.get("version"), value.get("name"), value.get("id"), ) for name, value in metadata_dict.items() ) ) # only compare valid entries, any invalid entries are ignored ancestor_metadata = __data_manager_dict_to_tuple_list(ancestor_metadata.get("data_managers", {})) current_metadata = __data_manager_dict_to_tuple_list(current_metadata.get("data_managers", {})) # use set comparisons if ancestor_metadata.issubset(current_metadata): if ancestor_metadata == current_metadata: return self.EQUAL return self.SUBSET return self.NOT_EQUAL_AND_NOT_SUBSET
[docs] def compare_repository_dependencies(self, ancestor_repository_dependencies, current_repository_dependencies): """ Determine if ancestor_repository_dependencies is the same as or a subset of current_repository_dependencies. """ # The list of repository_dependencies looks something like: # [["http://localhost:9009", "emboss_datatypes", "test", "ab03a2a5f407", "False", "False"]]. # Create a string from each tuple in the list for easier comparison. if len(ancestor_repository_dependencies) <= len(current_repository_dependencies): for ancestor_tup in ancestor_repository_dependencies: ( a_tool_shed, a_repo_name, a_repo_owner, a_changeset_revision, a_prior_installation_required, a_only_if_compiling_contained_td, ) = ancestor_tup cleaned_a_tool_shed = common_util.remove_protocol_from_tool_shed_url(a_tool_shed) found_in_current = False for current_tup in current_repository_dependencies: ( c_tool_shed, c_repo_name, c_repo_owner, c_changeset_revision, c_prior_installation_required, c_only_if_compiling_contained_td, ) = current_tup cleaned_c_tool_shed = common_util.remove_protocol_from_tool_shed_url(c_tool_shed) if ( cleaned_c_tool_shed == cleaned_a_tool_shed and c_repo_name == a_repo_name and c_repo_owner == a_repo_owner and c_changeset_revision == a_changeset_revision and util.string_as_bool(c_prior_installation_required) == util.string_as_bool(a_prior_installation_required) and util.string_as_bool(c_only_if_compiling_contained_td) == util.string_as_bool(a_only_if_compiling_contained_td) ): found_in_current = True break if not found_in_current: # In some cases, the only difference between a dependency definition in the lists # is the changeset_revision value. We'll check to see if this is the case, and if # the defined dependency is a repository that has metadata set only on its tip. if not self.different_revision_defines_tip_only_repository_dependency( ancestor_tup, current_repository_dependencies ): return self.NOT_EQUAL_AND_NOT_SUBSET return self.SUBSET if len(ancestor_repository_dependencies) == len(current_repository_dependencies): return self.EQUAL else: return self.SUBSET return self.NOT_EQUAL_AND_NOT_SUBSET
[docs] def compare_tool_dependencies(self, ancestor_tool_dependencies, current_tool_dependencies): """ Determine if ancestor_tool_dependencies is the same as or a subset of current_tool_dependencies. """ # The tool_dependencies dictionary looks something like: # {'bwa/0.5.9': {'readme': 'some string', 'version': '0.5.9', 'type': 'package', 'name': 'bwa'}} if len(ancestor_tool_dependencies) <= len(current_tool_dependencies): for ancestor_td_key in ancestor_tool_dependencies.keys(): if ancestor_td_key in current_tool_dependencies: # The only values that could have changed between the 2 dictionaries are the # "readme" or "type" values. Changing the readme value makes no difference. # Changing the type will change the installation process, but for now we'll # assume it was a typo, so new metadata shouldn't be generated. continue else: return self.NOT_EQUAL_AND_NOT_SUBSET # At this point we know that ancestor_tool_dependencies is at least a subset of current_tool_dependencies. if len(ancestor_tool_dependencies) == len(current_tool_dependencies): return self.EQUAL else: return self.SUBSET return self.NOT_EQUAL_AND_NOT_SUBSET
[docs] def create_or_update_repository_metadata(self, changeset_revision, metadata_dict): """Create or update a repository_metadata record in the tool shed.""" has_repository_dependencies = False has_repository_dependencies_only_if_compiling_contained_td = False includes_tools = False includes_tool_dependencies = False if metadata_dict: repository_dependencies_dict = metadata_dict.get("repository_dependencies", {}) repository_dependencies = repository_dependencies_dict.get("repository_dependencies", []) ( has_repository_dependencies, has_repository_dependencies_only_if_compiling_contained_td, ) = repository_util.get_repository_dependency_types(repository_dependencies) if "tools" in metadata_dict: includes_tools = True if "tool_dependencies" in metadata_dict: includes_tool_dependencies = True if ( has_repository_dependencies or has_repository_dependencies_only_if_compiling_contained_td or includes_tools or includes_tool_dependencies ): downloadable = True else: downloadable = False assert self.repository repository_metadata = repository_metadata_by_changeset_revision( self.app.model, self.repository.id, changeset_revision ) if repository_metadata: repository_metadata.metadata = metadata_dict repository_metadata.downloadable = downloadable repository_metadata.has_repository_dependencies = has_repository_dependencies repository_metadata.includes_datatypes = False repository_metadata.includes_tools = includes_tools repository_metadata.includes_tool_dependencies = includes_tool_dependencies repository_metadata.includes_workflows = False else: repository_metadata = self.app.model.RepositoryMetadata( repository_id=self.repository.id, changeset_revision=changeset_revision, metadata=metadata_dict, downloadable=downloadable, has_repository_dependencies=has_repository_dependencies, includes_datatypes=False, includes_tools=includes_tools, includes_tool_dependencies=includes_tool_dependencies, includes_workflows=False, ) assert repository_metadata # Always set the default values for the following columns. When resetting all metadata # on a repository this will reset the values. assert repository_metadata repository_metadata.missing_test_components = False self.sa_session.add(repository_metadata) session = self.sa_session() with transaction(session): session.commit() return repository_metadata
[docs] def different_revision_defines_tip_only_repository_dependency(self, rd_tup, repository_dependencies): """ Determine if the only difference between rd_tup and a dependency definition in the list of repository_dependencies is the changeset_revision value. """ ( rd_tool_shed, rd_name, rd_owner, rd_changeset_revision, rd_prior_installation_required, rd_only_if_compiling_contained_td, ) = common_util.parse_repository_dependency_tuple(rd_tup) cleaned_rd_tool_shed = common_util.remove_protocol_from_tool_shed_url(rd_tool_shed) for repository_dependency in repository_dependencies: ( tool_shed, name, owner, changeset_revision, prior_installation_required, only_if_compiling_contained_td, ) = common_util.parse_repository_dependency_tuple(repository_dependency) cleaned_tool_shed = common_util.remove_protocol_from_tool_shed_url(tool_shed) if cleaned_rd_tool_shed == cleaned_tool_shed and rd_name == name and rd_owner == owner: # Determine if the repository represented by the dependency tuple is an instance of the repository type TipOnly. required_repository = repository_util.get_repository_by_name_and_owner(self.app, name, owner) repository_type_class = self.app.repository_types_registry.get_class_by_label(required_repository.type) return isinstance(repository_type_class, TipOnly) return False
def _get_parent_id(self, id: int, old_id, version, guid, changeset_revisions): parent_id = None # Compare from most recent to oldest. changeset_revisions.reverse() for changeset_revision in changeset_revisions: repository_metadata = repository_metadata_by_changeset_revision(self.app.model, id, changeset_revision) assert repository_metadata metadata = repository_metadata.metadata tools_dicts = metadata.get("tools", []) for tool_dict in tools_dicts: if tool_dict["guid"] == guid: # The tool has not changed between the compared changeset revisions. continue if tool_dict["id"] == old_id and tool_dict["version"] != version: # The tool version is different, so we've found the parent. return tool_dict["guid"] if parent_id is None: # The tool did not change through all of the changeset revisions. return old_id
[docs] def get_repositories_for_setting_metadata(self, my_writable=False, order=True): """ Return a list of repositories for resetting metadata. The order parameter is used for displaying the list of repositories ordered alphabetically for display on a page. When called from the Tool Shed API, order is False. """ # When called from the Tool Shed API, the metadata is reset on all repositories of types # repository_suite_definition and tool_dependency_definition in addition to other selected # repositories. if my_writable: username = self.user.username repo_ids = [] for repository in get_current_repositories(self.sa_session): # Always reset metadata on all repositories of types repository_suite_definition and # tool_dependency_definition. if repository.type in [rt_util.REPOSITORY_SUITE_DEFINITION, rt_util.TOOL_DEPENDENCY_DEFINITION]: repo_ids.append(repository.id) else: allow_push = repository.allow_push() if allow_push: # Include all repositories that are writable by the current user. allow_push_usernames = allow_push.split(",") if username in allow_push_usernames: repo_ids.append(repository.id) if repo_ids: return get_filtered_repositories(self.sa_session, repo_ids, order) else: return [] else: return get_current_repositories(self.sa_session, order)
[docs] def new_metadata_required_for_utilities(self): """ This method compares the last stored repository_metadata record associated with self.repository against the contents of self.metadata_dict and returns True or False for the union set of Galaxy utilities contained in both metadata dictionaries. The metadata contained in self.metadata_dict may not be a subset of that contained in the last stored repository_metadata record associated with self.repository because one or more Galaxy utilities may have been deleted from self.repository in the new tip. """ assert self.repository repository_metadata = metadata_util.get_latest_repository_metadata( self.app, self.repository.id, downloadable=False ) repository_dependencies_required = self.new_repository_dependency_metadata_required(repository_metadata) tools_required = self.new_tool_metadata_required(repository_metadata) tool_dependencies_required = self.new_tool_dependency_metadata_required(repository_metadata) data_managers_required = self.new_data_manager_required(repository_metadata) if repository_dependencies_required or tools_required or tool_dependencies_required or data_managers_required: return True return False
[docs] def new_repository_dependency_metadata_required(self, repository_metadata): """ Compare the last saved metadata for each repository dependency in the repository with the new metadata in self.metadata_dict to determine if a new repository_metadata table record is required or if the last saved metadata record can be updated for repository_dependencies instead. """ if repository_metadata: metadata = repository_metadata.metadata if "repository_dependencies" in metadata: saved_repository_dependencies = metadata["repository_dependencies"]["repository_dependencies"] new_repository_dependencies_metadata = self.metadata_dict.get("repository_dependencies", None) if new_repository_dependencies_metadata: new_repository_dependencies = self.metadata_dict["repository_dependencies"][ "repository_dependencies" ] # TODO: We used to include the following here to handle the case where repository # dependency definitions were deleted. However this erroneously returned True in # cases where is should not have done so. This usually occurred where multiple single # files were uploaded when a single tarball should have been. We need to implement # support for handling deleted repository dependency definitions so that we can guarantee # reproducibility, but we need to do it in a way that is better than the following. # for new_repository_dependency in new_repository_dependencies: # if new_repository_dependency not in saved_repository_dependencies: # return True # The saved metadata must be a subset of the new metadata. for saved_repository_dependency in saved_repository_dependencies: if saved_repository_dependency not in new_repository_dependencies: # In some cases, the only difference between a dependency definition in the lists # is the changeset_revision value. We'll check to see if this is the case, and if # the defined dependency is a repository that has metadata set only on its tip. if not self.different_revision_defines_tip_only_repository_dependency( saved_repository_dependency, new_repository_dependencies ): return True return False else: # The repository_dependencies.xml file must have been deleted, so create a new # repository_metadata record so we always have access to the deleted file. return True else: return False else: if "repository_dependencies" in self.metadata_dict: # There is no saved repository metadata, so we need to create a new repository_metadata record. return True else: # self.metadata_dict includes no metadata for repository dependencies, so a new repository_metadata # record is not needed. return False
[docs] def new_data_manager_required(self, repository_metadata): if self.metadata_dict and repository_metadata and repository_metadata.metadata: return self.compare_data_manager(self.metadata_dict, repository_metadata.metadata) != self.EQUAL else: return bool( repository_metadata and repository_metadata.metadata and repository_metadata.metadata.get("data_managers") )
[docs] def new_tool_metadata_required(self, repository_metadata): """ Compare the last saved metadata for each tool in the repository with the new metadata in self.metadata_dict to determine if a new repository_metadata table record is required, or if the last saved metadata record can be updated instead. """ if "tools" in self.metadata_dict: if repository_metadata: metadata = repository_metadata.metadata if metadata: if "tools" in metadata: saved_tool_ids = [] # The metadata for one or more tools was successfully generated in the past # for this repository, so we first compare the version string for each tool id # in self.metadata_dict with what was previously saved to see if we need to create # a new table record or if we can simply update the existing record. for new_tool_metadata_dict in self.metadata_dict["tools"]: for saved_tool_metadata_dict in metadata["tools"]: if saved_tool_metadata_dict["id"] not in saved_tool_ids: saved_tool_ids.append(saved_tool_metadata_dict["id"]) if new_tool_metadata_dict["id"] == saved_tool_metadata_dict["id"]: if new_tool_metadata_dict["version"] != saved_tool_metadata_dict["version"]: return True # So far, a new metadata record is not required, but we still have to check to see if # any new tool ids exist in self.metadata_dict that are not in the saved metadata. We do # this because if a new tarball was uploaded to a repository that included tools, it # may have removed existing tool files if they were not included in the uploaded tarball. for new_tool_metadata_dict in self.metadata_dict["tools"]: if new_tool_metadata_dict["id"] not in saved_tool_ids: return True return False else: # The new metadata includes tools, but the stored metadata does not, so we can # update the stored metadata. return False else: # There is no stored metadata, so we can update the metadata column in the # repository_metadata table. return False else: # There is no stored repository metadata, so we need to create a new repository_metadata # table record. return True # self.metadata_dict includes no metadata for tools, so a new repository_metadata table # record is not needed. return False
[docs] def new_tool_dependency_metadata_required(self, repository_metadata): """ Compare the last saved metadata for each tool dependency in the repository with the new metadata in self.metadata_dict to determine if a new repository_metadata table record is required or if the last saved metadata record can be updated for tool_dependencies instead. """ if repository_metadata: metadata = repository_metadata.metadata if metadata: if "tool_dependencies" in metadata: saved_tool_dependencies = metadata["tool_dependencies"] new_tool_dependencies = self.metadata_dict.get("tool_dependencies", None) if new_tool_dependencies: # TODO: We used to include the following here to handle the case where # tool dependency definitions were deleted. However, this erroneously # returned True in cases where is should not have done so. This usually # occurred where multiple single files were uploaded when a single tarball # should have been. We need to implement support for handling deleted # tool dependency definitions so that we can guarantee reproducibility, # but we need to do it in a way that is better than the following. # for new_tool_dependency in new_tool_dependencies: # if new_tool_dependency not in saved_tool_dependencies: # return True # The saved metadata must be a subset of the new metadata. for saved_tool_dependency in saved_tool_dependencies: if saved_tool_dependency not in new_tool_dependencies: return True return False else: # The tool_dependencies.xml file must have been deleted, so create a new # repository_metadata record so we always have # access to the deleted file. return True else: return False else: # We have repository metadata that does not include metadata for any tool dependencies # in the repository, so we can update the existing repository metadata. return False else: if "tool_dependencies" in self.metadata_dict: # There is no saved repository metadata, so we need to create a new repository_metadata # record. return True else: # self.metadata_dict includes no metadata for tool dependencies, so a new repository_metadata # record is not needed. return False
[docs] def reset_all_metadata_on_repository_in_tool_shed(self, repository_clone_url=None): """Reset all metadata on a single repository in a tool shed.""" assert self.repository log.debug(f"Resetting all metadata on repository: {self.repository.name}") repo = self.repository.hg_repo # The list of changeset_revisions refers to repository_metadata records that have been created # or updated. When the following loop completes, we'll delete all repository_metadata records # for this repository that do not have a changeset_revision value in this list. changeset_revisions: List[Optional[str]] = [] # When a new repository_metadata record is created, it always uses the values of # metadata_changeset_revision and metadata_dict. metadata_changeset_revision = None metadata_dict = None ancestor_changeset_revision = None ancestor_metadata_dict = None for changeset in self.repository.get_changesets_for_setting_metadata(self.app): work_dir = tempfile.mkdtemp(prefix="tmp-toolshed-ramorits") ctx = repo[changeset] log.debug("Cloning repository changeset revision: %s", str(ctx.rev())) assert self.repository_clone_url repository_clone_url = repository_clone_url or self.repository_clone_url cloned_ok, error_message = hg_util.clone_repository(repository_clone_url, work_dir, str(ctx.rev())) if cloned_ok: log.debug("Generating metadata for changeset revision: %s", str(ctx.rev())) self.set_changeset_revision(str(ctx)) self.set_repository_files_dir(work_dir) self.generate_metadata_for_changeset_revision() if self.metadata_dict: if metadata_changeset_revision is None and metadata_dict is None: # We're at the first change set in the change log. metadata_changeset_revision = self.changeset_revision metadata_dict = self.metadata_dict if ancestor_changeset_revision: # Compare metadata from ancestor and current. The value of comparison will be one of: # self.NO_METADATA - no metadata for either ancestor or current, so continue from current # self.EQUAL - ancestor metadata is equivalent to current metadata, so continue from current # self.SUBSET - ancestor metadata is a subset of current metadata, so continue from current # self.NOT_EQUAL_AND_NOT_SUBSET - ancestor metadata is neither equal to nor a subset of current # metadata, so persist ancestor metadata. log.info(f"amd {ancestor_metadata_dict}") comparison = self.compare_changeset_revisions( ancestor_changeset_revision, ancestor_metadata_dict ) log.info(f"comparison {comparison}") if comparison in [self.NO_METADATA, self.EQUAL, self.SUBSET]: ancestor_changeset_revision = self.changeset_revision ancestor_metadata_dict = self.metadata_dict elif comparison == self.NOT_EQUAL_AND_NOT_SUBSET: metadata_changeset_revision = ancestor_changeset_revision metadata_dict = ancestor_metadata_dict self.create_or_update_repository_metadata(metadata_changeset_revision, metadata_dict) changeset_revisions.append(metadata_changeset_revision) ancestor_changeset_revision = self.changeset_revision ancestor_metadata_dict = self.metadata_dict else: # We're at the beginning of the change log. ancestor_changeset_revision = self.changeset_revision ancestor_metadata_dict = self.metadata_dict if not ctx.children(): metadata_changeset_revision = self.changeset_revision metadata_dict = self.metadata_dict # We're at the end of the change log. self.create_or_update_repository_metadata(metadata_changeset_revision, metadata_dict) changeset_revisions.append(metadata_changeset_revision) ancestor_changeset_revision = None ancestor_metadata_dict = None elif ancestor_metadata_dict: # We reach here only if self.metadata_dict is empty and ancestor_metadata_dict is not. if not ctx.children(): # We're at the end of the change log. self.create_or_update_repository_metadata(metadata_changeset_revision, metadata_dict) changeset_revisions.append(metadata_changeset_revision) ancestor_changeset_revision = None ancestor_metadata_dict = None basic_util.remove_dir(work_dir) # Delete all repository_metadata records for this repository that do not have a changeset_revision # value in changeset_revisions. self._clean_repository_metadata(changeset_revisions) # Set tool version information for all downloadable changeset revisions. Get the list of changeset # revisions from the changelog. self._reset_all_tool_versions(repo)
def _reset_all_tool_versions(self, repo): """Reset tool version lineage for those changeset revisions that include valid tools.""" assert self.repository changeset_revisions_that_contain_tools = _get_changeset_revisions_that_contain_tools( self.app, repo, self.repository ) # The list of changeset_revisions_that_contain_tools is now filtered to contain only those that # are downloadable and contain tools. If a repository includes tools, build a dictionary of # { 'tool id' : 'parent tool id' } pairs for each tool in each changeset revision. for index, changeset_revision in enumerate(changeset_revisions_that_contain_tools): tool_versions_dict = {} repository_metadata = repository_metadata_by_changeset_revision( self.app.model, self.repository.id, changeset_revision ) assert repository_metadata metadata = repository_metadata.metadata tool_dicts = metadata["tools"] if index == 0: # The first changeset_revision is a special case because it will have no ancestor # changeset_revisions in which to match tools. The parent tool id for tools in the # first changeset_revision will be the "old_id" in the tool config. for tool_dict in tool_dicts: tool_versions_dict[tool_dict["guid"]] = tool_dict["id"] else: for tool_dict in tool_dicts: parent_id = self._get_parent_id( self.repository.id, tool_dict["id"], tool_dict["version"], tool_dict["guid"], changeset_revisions_that_contain_tools[0:index], ) tool_versions_dict[tool_dict["guid"]] = parent_id if tool_versions_dict: repository_metadata.tool_versions = tool_versions_dict self.sa_session.add(repository_metadata) session = self.sa_session() with transaction(session): session.commit()
[docs] def reset_metadata_on_selected_repositories(self, **kwd): """ Inspect the repository changelog to reset metadata for all appropriate changeset revisions. This method is called from both Galaxy and the Tool Shed. """ repository_ids = util.listify(kwd.get("repository_ids", None)) message = "" status = "done" if repository_ids: successful_count = 0 unsuccessful_count = 0 for repository_id in repository_ids: try: repository = repository_util.get_repository_in_tool_shed(self.app, repository_id) self.set_repository(repository) self.resetting_all_metadata_on_repository = True self.reset_all_metadata_on_repository_in_tool_shed() if self.invalid_file_tups: message = tool_util.generate_message_for_invalid_tools( self.app, self.invalid_file_tups, repository, None, as_html=False ) log.debug(message) unsuccessful_count += 1 else: log.debug( "Successfully reset metadata on repository %s owned by %s", repository.name, repository.user.username, ) successful_count += 1 except Exception: log.exception("Error attempting to reset metadata on repository %s", str(repository.name)) unsuccessful_count += 1 message = "Successfully reset metadata on %d %s. " % ( successful_count, inflector.cond_plural(successful_count, "repository"), ) if unsuccessful_count: message += "Error setting metadata on %d %s - see the paster log for details. " % ( unsuccessful_count, inflector.cond_plural(unsuccessful_count, "repository"), ) else: message = "Select at least one repository to on which to reset all metadata." status = "error" return message, status
[docs] def set_repository(self, repository, repository_clone_url=None): super().set_repository(repository) self.repository_clone_url = repository_clone_url or common_util.generate_clone_url_for(self.trans, repository)
[docs] def set_repository_metadata(self, host, content_alert_str="", **kwd): """ Set metadata using the self.repository's current disk files, returning specific error messages (if any) to alert the repository owner that the changeset has problems. """ assert self.repository message = "" status = "done" repository_id = self.repository.id repo = self.repository.hg_repo self.generate_metadata_for_changeset_revision() if self.metadata_dict: repository_metadata = None repository_type_class = self.app.repository_types_registry.get_class_by_label(self.repository.type) tip_only = isinstance(repository_type_class, TipOnly) if not tip_only and self.new_metadata_required_for_utilities(): # Create a new repository_metadata table row. repository_metadata = self.create_or_update_repository_metadata( self.repository.tip(), self.metadata_dict ) # If this is the first record stored for this repository, see if we need to send any email alerts. if len(self.repository.downloadable_revisions) == 1: suc.handle_email_alerts( self.app, host, self.repository, content_alert_str="", new_repo_alert=True, admin_only=False ) else: # Update the latest stored repository metadata with the contents and attributes of self.metadata_dict. repository_metadata = metadata_util.get_latest_repository_metadata( self.app, repository_id, downloadable=False ) if repository_metadata: downloadable = metadata_util.is_downloadable(self.metadata_dict) # Update the last saved repository_metadata table row. repository_metadata.changeset_revision = self.repository.tip() repository_metadata.metadata = self.metadata_dict repository_metadata.downloadable = downloadable repository_metadata.includes_datatypes = False # We don't store information about the special type of repository dependency that is needed only for # compiling a tool dependency defined for the dependent repository. repository_dependencies_dict = self.metadata_dict.get("repository_dependencies", {}) repository_dependencies = repository_dependencies_dict.get("repository_dependencies", []) ( has_repository_dependencies, has_repository_dependencies_only_if_compiling_contained_td, ) = repository_util.get_repository_dependency_types(repository_dependencies) repository_metadata.has_repository_dependencies = has_repository_dependencies if "tool_dependencies" in self.metadata_dict: repository_metadata.includes_tool_dependencies = True else: repository_metadata.includes_tool_dependencies = False if "tools" in self.metadata_dict: repository_metadata.includes_tools = True else: repository_metadata.includes_tools = False repository_metadata.includes_workflows = False repository_metadata.missing_test_components = False self.sa_session.add(repository_metadata) session = self.sa_session() with transaction(session): session.commit() else: # There are no metadata records associated with the repository. repository_metadata = self.create_or_update_repository_metadata( self.repository.tip(), self.metadata_dict ) if "tools" in self.metadata_dict and repository_metadata and status != "error": # Set tool versions on the new downloadable change set. The order of the list of changesets is # critical, so we use the repo's changelog. changeset_revisions = [] for changeset in repo.changelog: changeset_revision = str(repo[changeset]) if repository_metadata_by_changeset_revision(self.app.model, repository_id, changeset_revision): changeset_revisions.append(changeset_revision) self._add_tool_versions(repository_id, repository_metadata, changeset_revisions) elif len(repo) == 1 and not self.invalid_file_tups: message = "Revision <b>%s</b> includes no Galaxy utilities for which metadata can " % str( self.repository.tip() ) message += "be defined so this revision cannot be automatically installed into a local Galaxy instance." status = "error" if self.invalid_file_tups: message = tool_util.generate_message_for_invalid_tools( self.app, self.invalid_file_tups, self.repository, self.metadata_dict ) status = "error" return message, status
[docs] def set_repository_metadata_due_to_new_tip(self, host, content_alert_str=None, **kwd): """Set metadata on the tip of self.repository in the tool shed.""" error_message, status = self.set_repository_metadata(host, content_alert_str=content_alert_str, **kwd) return status, error_message
def _get_changeset_revisions_that_contain_tools(app: "ToolShedApp", repo, repository) -> List[str]: changeset_revisions_that_contain_tools = [] for changeset in repo.changelog: changeset_revision = str(repo[changeset]) repository_metadata = repository_metadata_by_changeset_revision(app.model, repository.id, changeset_revision) if repository_metadata: metadata = repository_metadata.metadata if metadata: if metadata.get("tools", None): changeset_revisions_that_contain_tools.append(changeset_revision) return changeset_revisions_that_contain_tools
[docs]def get_user_by_username(session, username): stmt = select(User).where(User.username == username) return session.execute(stmt).scalar_one()
[docs]def get_repository(session, name, user_id): stmt = select(Repository).where(Repository.name == name).where(Repository.user_id == user_id) return session.execute(stmt).scalar_one()
[docs]def get_repository_metadata(session, repository_id): stmt = ( select(RepositoryMetadata) .where(RepositoryMetadata.repository_id == repository_id) .order_by(RepositoryMetadata.changeset_revision, RepositoryMetadata.update_time.desc()) # type: ignore[attr-defined] # mapped attribute ) return session.scalars(stmt)
[docs]def get_current_repositories(session, order=False): stmt = select(Repository).where(Repository.deleted == false()) if order: stmt = stmt.order_by(Repository.name, Repository.user_id) return session.scalars(stmt)
[docs]def get_filtered_repositories(session, repo_ids, order): stmt = select(Repository).where(Repository.id.in_(repo_ids)) if order: stmt = stmt.order_by(Repository.name, Repository.user_id) return session.scalars(stmt)