Warning
This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for tool_shed.metadata.repository_metadata_manager
import logging
import tempfile
from sqlalchemy import false, or_
from galaxy import util
from galaxy.util import inflector
from galaxy.web.form_builder import SelectField
from tool_shed.metadata import metadata_generator
from tool_shed.repository_types import util as rt_util
from tool_shed.repository_types.metadata import TipOnly
from tool_shed.util import (basic_util, common_util, hg_util, metadata_util,
repository_util, shed_util_common as suc, tool_util)
log = logging.getLogger(__name__)
[docs]class RepositoryMetadataManager(metadata_generator.MetadataGenerator):
[docs] def __init__(self, app, user, repository=None, changeset_revision=None, repository_clone_url=None,
shed_config_dict=None, relative_install_dir=None, repository_files_dir=None,
resetting_all_metadata_on_repository=False, updating_installed_repository=False,
persist=False, metadata_dict=None):
super(RepositoryMetadataManager, self).__init__(app, repository, changeset_revision,
repository_clone_url, shed_config_dict,
relative_install_dir, repository_files_dir,
resetting_all_metadata_on_repository,
updating_installed_repository, persist,
metadata_dict=metadata_dict, user=user)
self.app = app
self.user = user
# Repository metadata comparisons for changeset revisions.
self.EQUAL = 'equal'
self.NO_METADATA = 'no metadata'
self.NOT_EQUAL_AND_NOT_SUBSET = 'not equal and not subset'
self.SUBSET = 'subset'
self.SUBSET_VALUES = [self.EQUAL, self.SUBSET]
[docs] def add_tool_versions(self, id, repository_metadata, changeset_revisions):
# Build a dictionary of { 'tool id' : 'parent tool id' } pairs for each tool in repository_metadata.
metadata = repository_metadata.metadata
tool_versions_dict = {}
for tool_dict in metadata.get('tools', []):
# We have at least 2 changeset revisions to compare tool guids and tool ids.
parent_id = self.get_parent_id(id,
tool_dict['id'],
tool_dict['version'],
tool_dict['guid'],
changeset_revisions)
tool_versions_dict[tool_dict['guid']] = parent_id
if tool_versions_dict:
repository_metadata.tool_versions = tool_versions_dict
self.sa_session.add(repository_metadata)
self.sa_session.flush()
[docs] def build_repository_ids_select_field(self, name='repository_ids', multiple=True, display='checkboxes',
my_writable=False):
"""Generate the current list of repositories for resetting metadata."""
repositories_select_field = SelectField(name=name, multiple=multiple, display=display)
query = self.get_query_for_setting_metadata_on_repositories(my_writable=my_writable, order=True)
for repository in query:
owner = str(repository.user.username)
option_label = '%s (%s)' % (str(repository.name), owner)
option_value = '%s' % self.app.security.encode_id(repository.id)
repositories_select_field.add_option(option_label, option_value)
return repositories_select_field
[docs] def clean_repository_metadata(self, changeset_revisions):
# Delete all repository_metadata records associated with the repository that have
# a changeset_revision that is not in changeset_revisions. We sometimes see multiple
# records with the same changeset revision value - no idea how this happens. We'll
# assume we can delete the older records, so we'll order by update_time descending and
# delete records that have the same changeset_revision we come across later.
changeset_revisions_checked = []
for repository_metadata in \
self.sa_session.query(self.app.model.RepositoryMetadata) \
.filter(self.app.model.RepositoryMetadata.table.c.repository_id == self.repository.id) \
.order_by(self.app.model.RepositoryMetadata.table.c.changeset_revision,
self.app.model.RepositoryMetadata.table.c.update_time.desc()):
changeset_revision = repository_metadata.changeset_revision
if changeset_revision in changeset_revisions_checked or changeset_revision not in changeset_revisions:
self.sa_session.delete(repository_metadata)
self.sa_session.flush()
[docs] def compare_changeset_revisions(self, ancestor_changeset_revision, ancestor_metadata_dict):
"""
Compare the contents of two changeset revisions to determine if a new repository
metadata revision should be created.
"""
# The metadata associated with ancestor_changeset_revision is ancestor_metadata_dict.
# This changeset_revision is an ancestor of self.changeset_revision which is associated
# with self.metadata_dict. A new repository_metadata record will be created only
# when this method returns the constant value self.NOT_EQUAL_AND_NOT_SUBSET.
ancestor_datatypes = ancestor_metadata_dict.get('datatypes', [])
ancestor_tools = ancestor_metadata_dict.get('tools', [])
ancestor_guids = [tool_dict['guid'] for tool_dict in ancestor_tools]
ancestor_guids.sort()
ancestor_readme_files = ancestor_metadata_dict.get('readme_files', [])
ancestor_repository_dependencies_dict = ancestor_metadata_dict.get('repository_dependencies', {})
ancestor_repository_dependencies = ancestor_repository_dependencies_dict.get('repository_dependencies', [])
ancestor_tool_dependencies = ancestor_metadata_dict.get('tool_dependencies', {})
ancestor_workflows = ancestor_metadata_dict.get('workflows', [])
ancestor_data_manager = ancestor_metadata_dict.get('data_manager', {})
current_datatypes = self.metadata_dict.get('datatypes', [])
current_tools = self.metadata_dict.get('tools', [])
current_guids = [tool_dict['guid'] for tool_dict in current_tools]
current_guids.sort()
current_readme_files = self.metadata_dict.get('readme_files', [])
current_repository_dependencies_dict = self.metadata_dict.get('repository_dependencies', {})
current_repository_dependencies = current_repository_dependencies_dict.get('repository_dependencies', [])
current_tool_dependencies = self.metadata_dict.get('tool_dependencies', {})
current_workflows = self.metadata_dict.get('workflows', [])
current_data_manager = self.metadata_dict.get('data_manager', {})
# Handle case where no metadata exists for either changeset.
no_datatypes = not ancestor_datatypes and not current_datatypes
no_readme_files = not ancestor_readme_files and not current_readme_files
no_repository_dependencies = not ancestor_repository_dependencies and not current_repository_dependencies
no_tool_dependencies = not ancestor_tool_dependencies and not current_tool_dependencies
no_tools = not ancestor_guids and not current_guids
no_workflows = not ancestor_workflows and not current_workflows
no_data_manager = not ancestor_data_manager and not current_data_manager
if no_datatypes and no_readme_files and no_repository_dependencies and \
no_tool_dependencies and no_tools and no_workflows and \
no_data_manager:
return self.NO_METADATA
# Uncomment the following if we decide that README files should affect how installable
# repository revisions are defined. See the NOTE in self.compare_readme_files().
# readme_file_comparision = self.compare_readme_files( ancestor_readme_files, current_readme_files )
repository_dependency_comparison = self.compare_repository_dependencies(ancestor_repository_dependencies,
current_repository_dependencies)
tool_dependency_comparison = self.compare_tool_dependencies(ancestor_tool_dependencies,
current_tool_dependencies)
workflow_comparison = self.compare_workflows(ancestor_workflows, current_workflows)
datatype_comparison = self.compare_datatypes(ancestor_datatypes, current_datatypes)
data_manager_comparison = self.compare_data_manager(ancestor_data_manager, current_data_manager)
# Handle case where all metadata is the same.
if ancestor_guids == current_guids and \
repository_dependency_comparison == self.EQUAL and \
tool_dependency_comparison == self.EQUAL and \
workflow_comparison == self.EQUAL and \
datatype_comparison == self.EQUAL and \
data_manager_comparison == self.EQUAL:
return self.EQUAL
# Handle case where ancestor metadata is a subset of current metadata.
# readme_file_is_subset = readme_file_comparision in [ self.EQUAL, self.SUBSET ]
repository_dependency_is_subset = repository_dependency_comparison in self.SUBSET_VALUES
tool_dependency_is_subset = tool_dependency_comparison in self.SUBSET_VALUES
workflow_dependency_is_subset = workflow_comparison in self.SUBSET_VALUES
datatype_is_subset = datatype_comparison in self.SUBSET_VALUES
datamanager_is_subset = data_manager_comparison in self.SUBSET_VALUES
if repository_dependency_is_subset and tool_dependency_is_subset and \
workflow_dependency_is_subset and datatype_is_subset and \
datamanager_is_subset:
is_subset = True
for guid in ancestor_guids:
if guid not in current_guids:
is_subset = False
break
if is_subset:
return self.SUBSET
return self.NOT_EQUAL_AND_NOT_SUBSET
[docs] def compare_data_manager(self, ancestor_metadata, current_metadata):
"""Determine if ancestor_metadata is the same as or a subset of current_metadata for data_managers."""
def __data_manager_dict_to_tuple_list(metadata_dict):
# we do not check tool_guid or tool conf file name
return set(sorted([(name,
tuple(sorted(value.get('data_tables', []))),
value.get('guid'),
value.get('version'),
value.get('name'),
value.get('id')) for name, value in metadata_dict.items()]))
# only compare valid entries, any invalid entries are ignored
ancestor_metadata = __data_manager_dict_to_tuple_list(ancestor_metadata.get('data_managers', {}))
current_metadata = __data_manager_dict_to_tuple_list(current_metadata.get('data_managers', {}))
# use set comparisons
if ancestor_metadata.issubset(current_metadata):
if ancestor_metadata == current_metadata:
return self.EQUAL
return self.SUBSET
return self.NOT_EQUAL_AND_NOT_SUBSET
[docs] def compare_datatypes(self, ancestor_datatypes, current_datatypes):
"""Determine if ancestor_datatypes is the same as or a subset of current_datatypes."""
# Each datatype dict looks something like:
# {"dtype": "galaxy.datatypes.images:Image", "extension": "pdf", "mimetype": "application/pdf"}
if len(ancestor_datatypes) <= len(current_datatypes):
for ancestor_datatype in ancestor_datatypes:
# Currently the only way to differentiate datatypes is by name.
ancestor_datatype_dtype = ancestor_datatype['dtype']
ancestor_datatype_extension = ancestor_datatype['extension']
ancestor_datatype_mimetype = ancestor_datatype.get('mimetype', None)
found_in_current = False
for current_datatype in current_datatypes:
if current_datatype['dtype'] == ancestor_datatype_dtype and \
current_datatype['extension'] == ancestor_datatype_extension and \
current_datatype.get('mimetype', None) == ancestor_datatype_mimetype:
found_in_current = True
break
if not found_in_current:
return self.NOT_EQUAL_AND_NOT_SUBSET
if len(ancestor_datatypes) == len(current_datatypes):
return self.EQUAL
else:
return self.SUBSET
return self.NOT_EQUAL_AND_NOT_SUBSET
[docs] def compare_readme_files(self, ancestor_readme_files, current_readme_files):
"""Determine if ancestor_readme_files is equal to or a subset of current_readme_files."""
# NOTE: Although repository README files are considered a Galaxy utility similar to tools,
# repository dependency definition files, etc., we don't define installable repository revisions
# based on changes to README files. To understand why, consider the following scenario:
# 1. Upload the filtering tool to a new repository - this will result in installable revision 0.
# 2. Upload a README file to the repository - this will move the installable revision from revision
# 0 to revision 1.
# 3. Delete the README file from the repository - this will move the installable revision from
# revision 1 to revision 2.
# The above scenario is the current behavior, and that is why this method is not currently called.
# This method exists only in case we decide to change this current behavior.
# The lists of readme files looks something like: ["database/community_files/000/repo_2/readme.txt"]
if len(ancestor_readme_files) <= len(current_readme_files):
for ancestor_readme_file in ancestor_readme_files:
if ancestor_readme_file not in current_readme_files:
return self.NOT_EQUAL_AND_NOT_SUBSET
if len(ancestor_readme_files) == len(current_readme_files):
return self.EQUAL
else:
return self.SUBSET
return self.NOT_EQUAL_AND_NOT_SUBSET
[docs] def compare_repository_dependencies(self, ancestor_repository_dependencies, current_repository_dependencies):
"""
Determine if ancestor_repository_dependencies is the same as or a subset of
current_repository_dependencies.
"""
# The list of repository_dependencies looks something like:
# [["http://localhost:9009", "emboss_datatypes", "test", "ab03a2a5f407", "False", "False"]].
# Create a string from each tuple in the list for easier comparison.
if len(ancestor_repository_dependencies) <= len(current_repository_dependencies):
for ancestor_tup in ancestor_repository_dependencies:
a_tool_shed, a_repo_name, a_repo_owner, a_changeset_revision, \
a_prior_installation_required, \
a_only_if_compiling_contained_td = ancestor_tup
cleaned_a_tool_shed = common_util.remove_protocol_from_tool_shed_url(a_tool_shed)
found_in_current = False
for current_tup in current_repository_dependencies:
c_tool_shed, c_repo_name, c_repo_owner, \
c_changeset_revision, c_prior_installation_required, \
c_only_if_compiling_contained_td = current_tup
cleaned_c_tool_shed = common_util.remove_protocol_from_tool_shed_url(c_tool_shed)
if cleaned_c_tool_shed == cleaned_a_tool_shed and \
c_repo_name == a_repo_name and \
c_repo_owner == a_repo_owner and \
c_changeset_revision == a_changeset_revision and \
util.string_as_bool(c_prior_installation_required) == util.string_as_bool(a_prior_installation_required) and \
util.string_as_bool(c_only_if_compiling_contained_td) == util.string_as_bool(a_only_if_compiling_contained_td):
found_in_current = True
break
if not found_in_current:
# In some cases, the only difference between a dependency definition in the lists
# is the changeset_revision value. We'll check to see if this is the case, and if
# the defined dependency is a repository that has metadata set only on its tip.
if not self.different_revision_defines_tip_only_repository_dependency(ancestor_tup,
current_repository_dependencies):
return self.NOT_EQUAL_AND_NOT_SUBSET
return self.SUBSET
if len(ancestor_repository_dependencies) == len(current_repository_dependencies):
return self.EQUAL
else:
return self.SUBSET
return self.NOT_EQUAL_AND_NOT_SUBSET
[docs] def compare_tool_dependencies(self, ancestor_tool_dependencies, current_tool_dependencies):
"""
Determine if ancestor_tool_dependencies is the same as or a subset of current_tool_dependencies.
"""
# The tool_dependencies dictionary looks something like:
# {'bwa/0.5.9': {'readme': 'some string', 'version': '0.5.9', 'type': 'package', 'name': 'bwa'}}
if len(ancestor_tool_dependencies) <= len(current_tool_dependencies):
for ancestor_td_key, ancestor_requirements_dict in ancestor_tool_dependencies.items():
if ancestor_td_key in current_tool_dependencies:
# The only values that could have changed between the 2 dictionaries are the
# "readme" or "type" values. Changing the readme value makes no difference.
# Changing the type will change the installation process, but for now we'll
# assume it was a typo, so new metadata shouldn't be generated.
continue
else:
return self.NOT_EQUAL_AND_NOT_SUBSET
# At this point we know that ancestor_tool_dependencies is at least a subset of current_tool_dependencies.
if len(ancestor_tool_dependencies) == len(current_tool_dependencies):
return self.EQUAL
else:
return self.SUBSET
return self.NOT_EQUAL_AND_NOT_SUBSET
[docs] def compare_workflows(self, ancestor_workflows, current_workflows):
"""
Determine if ancestor_workflows is the same as current_workflows or if ancestor_workflows
is a subset of current_workflows.
"""
if len(ancestor_workflows) <= len(current_workflows):
for ancestor_workflow_tup in ancestor_workflows:
# ancestor_workflows is a list of tuples where each contained tuple is
# [ <relative path to the .ga file in the repository>, <exported workflow dict> ]
ancestor_workflow_dict = ancestor_workflow_tup[1]
# Currently the only way to differentiate workflows is by name.
ancestor_workflow_name = ancestor_workflow_dict['name']
num_ancestor_workflow_steps = len(ancestor_workflow_dict['steps'])
found_in_current = False
for current_workflow_tup in current_workflows:
current_workflow_dict = current_workflow_tup[1]
# Assume that if the name and number of steps are euqal, then the workflows
# are the same. Of course, this may not be true...
if current_workflow_dict['name'] == ancestor_workflow_name and \
len(current_workflow_dict['steps']) == num_ancestor_workflow_steps:
found_in_current = True
break
if not found_in_current:
return self.NOT_EQUAL_AND_NOT_SUBSET
if len(ancestor_workflows) == len(current_workflows):
return self.EQUAL
else:
return self.SUBSET
return self.NOT_EQUAL_AND_NOT_SUBSET
[docs] def create_or_update_repository_metadata(self, changeset_revision, metadata_dict):
"""Create or update a repository_metadata record in the tool shed."""
has_repository_dependencies = False
has_repository_dependencies_only_if_compiling_contained_td = False
includes_datatypes = False
includes_tools = False
includes_tool_dependencies = False
includes_workflows = False
if metadata_dict:
repository_dependencies_dict = metadata_dict.get('repository_dependencies', {})
repository_dependencies = repository_dependencies_dict.get('repository_dependencies', [])
has_repository_dependencies, has_repository_dependencies_only_if_compiling_contained_td = \
repository_util.get_repository_dependency_types(repository_dependencies)
if 'datatypes' in metadata_dict:
includes_datatypes = True
if 'tools' in metadata_dict:
includes_tools = True
if 'tool_dependencies' in metadata_dict:
includes_tool_dependencies = True
if 'workflows' in metadata_dict:
includes_workflows = True
if has_repository_dependencies or \
has_repository_dependencies_only_if_compiling_contained_td or \
includes_datatypes or includes_tools or \
includes_tool_dependencies or includes_workflows:
downloadable = True
else:
downloadable = False
repository_metadata = metadata_util.get_repository_metadata_by_changeset_revision(self.app,
self.app.security.encode_id(self.repository.id),
changeset_revision)
if repository_metadata:
repository_metadata.metadata = metadata_dict
repository_metadata.downloadable = downloadable
repository_metadata.has_repository_dependencies = has_repository_dependencies
repository_metadata.includes_datatypes = includes_datatypes
repository_metadata.includes_tools = includes_tools
repository_metadata.includes_tool_dependencies = includes_tool_dependencies
repository_metadata.includes_workflows = includes_workflows
else:
repository_metadata = \
self.app.model.RepositoryMetadata(repository_id=self.repository.id,
changeset_revision=changeset_revision,
metadata=metadata_dict,
downloadable=downloadable,
has_repository_dependencies=has_repository_dependencies,
includes_datatypes=includes_datatypes,
includes_tools=includes_tools,
includes_tool_dependencies=includes_tool_dependencies,
includes_workflows=includes_workflows)
# Always set the default values for the following columns. When resetting all metadata
# on a repository this will reset the values.
repository_metadata.missing_test_components = False
self.sa_session.add(repository_metadata)
self.sa_session.flush()
return repository_metadata
[docs] def different_revision_defines_tip_only_repository_dependency(self, rd_tup, repository_dependencies):
"""
Determine if the only difference between rd_tup and a dependency definition in the list of
repository_dependencies is the changeset_revision value.
"""
rd_tool_shed, rd_name, rd_owner, rd_changeset_revision, rd_prior_installation_required, rd_only_if_compiling_contained_td = \
common_util.parse_repository_dependency_tuple(rd_tup)
cleaned_rd_tool_shed = common_util.remove_protocol_from_tool_shed_url(rd_tool_shed)
for repository_dependency in repository_dependencies:
tool_shed, name, owner, changeset_revision, prior_installation_required, only_if_compiling_contained_td = \
common_util.parse_repository_dependency_tuple(repository_dependency)
cleaned_tool_shed = common_util.remove_protocol_from_tool_shed_url(tool_shed)
if cleaned_rd_tool_shed == cleaned_tool_shed and rd_name == name and rd_owner == owner:
# Determine if the repository represented by the dependency tuple is an instance of the repository type TipOnly.
required_repository = repository_util.get_repository_by_name_and_owner(self.app, name, owner)
repository_type_class = self.app.repository_types_registry.get_class_by_label(required_repository.type)
return isinstance(repository_type_class, TipOnly)
return False
[docs] def get_parent_id(self, id, old_id, version, guid, changeset_revisions):
parent_id = None
# Compare from most recent to oldest.
changeset_revisions.reverse()
for changeset_revision in changeset_revisions:
repository_metadata = metadata_util.get_repository_metadata_by_changeset_revision(self.app, id, changeset_revision)
metadata = repository_metadata.metadata
tools_dicts = metadata.get('tools', [])
for tool_dict in tools_dicts:
if tool_dict['guid'] == guid:
# The tool has not changed between the compared changeset revisions.
continue
if tool_dict['id'] == old_id and tool_dict['version'] != version:
# The tool version is different, so we've found the parent.
return tool_dict['guid']
if parent_id is None:
# The tool did not change through all of the changeset revisions.
return old_id
[docs] def get_query_for_setting_metadata_on_repositories(self, my_writable=False, order=True):
"""
Return a query containing repositories for resetting metadata. The order parameter
is used for displaying the list of repositories ordered alphabetically for display on
a page. When called from the Tool Shed API, order is False.
"""
# When called from the Tool Shed API, the metadata is reset on all repositories of types
# repository_suite_definition and tool_dependency_definition in addition to other selected
# repositories.
if my_writable:
username = self.user.username
clause_list = []
for repository in self.sa_session.query(self.app.model.Repository) \
.filter(self.app.model.Repository.table.c.deleted == false()):
# Always reset metadata on all repositories of types repository_suite_definition and
# tool_dependency_definition.
if repository.type in [rt_util.REPOSITORY_SUITE_DEFINITION, rt_util.TOOL_DEPENDENCY_DEFINITION]:
clause_list.append(self.app.model.Repository.table.c.id == repository.id)
else:
allow_push = repository.allow_push(self.app)
if allow_push:
# Include all repositories that are writable by the current user.
allow_push_usernames = allow_push.split(',')
if username in allow_push_usernames:
clause_list.append(self.app.model.Repository.table.c.id == repository.id)
if clause_list:
if order:
return self.sa_session.query(self.app.model.Repository) \
.filter(or_(*clause_list)) \
.order_by(self.app.model.Repository.table.c.name,
self.app.model.Repository.table.c.user_id)
else:
return self.sa_session.query(self.app.model.Repository) \
.filter(or_(*clause_list))
else:
# Return an empty query.
return self.sa_session.query(self.app.model.Repository) \
.filter(self.app.model.Repository.table.c.id == -1)
else:
if order:
return self.sa_session.query(self.app.model.Repository) \
.filter(self.app.model.Repository.table.c.deleted == false()) \
.order_by(self.app.model.Repository.table.c.name,
self.app.model.Repository.table.c.user_id)
else:
return self.sa_session.query(self.app.model.Repository) \
.filter(self.app.model.Repository.table.c.deleted == false())
[docs] def new_datatypes_metadata_required(self, repository_metadata):
"""
Compare the last saved metadata for each datatype in the repository with the new metadata
in self.metadata_dict to determine if a new repository_metadata table record is required
or if the last saved metadata record can be updated for datatypes instead.
"""
# Datatypes are stored in metadata as a list of dictionaries that looks like:
# [{'dtype': 'galaxy.datatypes.data:Text', 'subclass': 'True', 'extension': 'acedb'}]
if 'datatypes' in self.metadata_dict:
current_datatypes = self.metadata_dict['datatypes']
if repository_metadata:
metadata = repository_metadata.metadata
if metadata:
if 'datatypes' in metadata:
ancestor_datatypes = metadata['datatypes']
# The saved metadata must be a subset of the new metadata.
datatype_comparison = self.compare_datatypes(ancestor_datatypes, current_datatypes)
if datatype_comparison == self.NOT_EQUAL_AND_NOT_SUBSET:
return True
else:
return False
else:
# The new metadata includes datatypes, but the stored metadata does not,
# so we can update the stored metadata.
return False
else:
# There is no stored metadata, so we can update the metadata column in the
# repository_metadata table.
return False
else:
# There is no stored repository metadata, so we need to create a new repository_metadata
# table record.
return True
# self.metadata_dict includes no metadata for datatypes, so a new repository_metadata
# table record is not needed.
return False
[docs] def new_metadata_required_for_utilities(self):
"""
This method compares the last stored repository_metadata record associated with self.repository
against the contents of self.metadata_dict and returns True or False for the union set of Galaxy
utilities contained in both metadata dictionaries. The metadata contained in self.metadata_dict
may not be a subset of that contained in the last stored repository_metadata record associated with
self.repository because one or more Galaxy utilities may have been deleted from self.repository in
the new tip.
"""
repository_metadata = metadata_util.get_latest_repository_metadata(self.app,
self.repository.id,
downloadable=False)
datatypes_required = self.new_datatypes_metadata_required(repository_metadata)
# Uncomment the following if we decide that README files should affect how installable
# repository revisions are defined. See the NOTE in the compare_readme_files() method.
# readme_files_required = sewlf.new_readme_files_metadata_required( repository_metadata )
repository_dependencies_required = \
self.new_repository_dependency_metadata_required(repository_metadata)
tools_required = self.new_tool_metadata_required(repository_metadata)
tool_dependencies_required = self.new_tool_dependency_metadata_required(repository_metadata)
workflows_required = self.new_workflow_metadata_required(repository_metadata)
if datatypes_required or repository_dependencies_required or \
tools_required or tool_dependencies_required or workflows_required:
return True
return False
[docs] def new_readme_files_metadata_required(self, repository_metadata):
"""
Compare the last saved metadata for each readme file in the repository with the new metadata
in self.metadata_dict to determine if a new repository_metadata table record is required or
if the last saved metadata record can be updated for readme files instead.
"""
# Repository README files are kind of a special case because they have no effect on reproducibility.
# We'll simply inspect the file names to determine if any that exist in the saved metadata are
# eliminated from the new metadata in self.metadata_dict.
if 'readme_files' in self.metadata_dict:
current_readme_files = self.metadata_dict['readme_files']
if repository_metadata:
metadata = repository_metadata.metadata
if metadata:
if 'readme_files' in metadata:
ancestor_readme_files = metadata['readme_files']
# The saved metadata must be a subset of the new metadata.
readme_file_comparison = self.compare_readme_files(ancestor_readme_files,
current_readme_files)
if readme_file_comparison == self.NOT_EQUAL_AND_NOT_SUBSET:
return True
else:
return False
else:
# The new metadata includes readme_files, but the stored metadata does not, so
# we can update the stored metadata.
return False
else:
# There is no stored metadata, so we can update the metadata column in the repository_metadata
# table.
return False
else:
# There is no stored repository metadata, so we need to create a new repository_metadata
# table record.
return True
# self.metadata_dict includes no metadata for readme_files, so a new repository_metadata
# table record is not needed.
return False
[docs] def new_repository_dependency_metadata_required(self, repository_metadata):
"""
Compare the last saved metadata for each repository dependency in the repository
with the new metadata in self.metadata_dict to determine if a new repository_metadata
table record is required or if the last saved metadata record can be updated for
repository_dependencies instead.
"""
if repository_metadata:
metadata = repository_metadata.metadata
if 'repository_dependencies' in metadata:
saved_repository_dependencies = metadata['repository_dependencies']['repository_dependencies']
new_repository_dependencies_metadata = self.metadata_dict.get('repository_dependencies', None)
if new_repository_dependencies_metadata:
new_repository_dependencies = self.metadata_dict['repository_dependencies']['repository_dependencies']
# TODO: We used to include the following here to handle the case where repository
# dependency definitions were deleted. However this erroneously returned True in
# cases where is should not have done so. This usually occurred where multiple single
# files were uploaded when a single tarball should have been. We need to implement
# support for handling deleted repository dependency definitions so that we can guarantee
# reproducibility, but we need to do it in a way that is better than the following.
# for new_repository_dependency in new_repository_dependencies:
# if new_repository_dependency not in saved_repository_dependencies:
# return True
# The saved metadata must be a subset of the new metadata.
for saved_repository_dependency in saved_repository_dependencies:
if saved_repository_dependency not in new_repository_dependencies:
# In some cases, the only difference between a dependency definition in the lists
# is the changeset_revision value. We'll check to see if this is the case, and if
# the defined dependency is a repository that has metadata set only on its tip.
if not self.different_revision_defines_tip_only_repository_dependency(saved_repository_dependency,
new_repository_dependencies):
return True
return False
else:
# The repository_dependencies.xml file must have been deleted, so create a new
# repository_metadata record so we always have access to the deleted file.
return True
else:
return False
else:
if 'repository_dependencies' in self.metadata_dict:
# There is no saved repository metadata, so we need to create a new repository_metadata record.
return True
else:
# self.metadata_dict includes no metadata for repository dependencies, so a new repository_metadata
# record is not needed.
return False
[docs] def new_tool_metadata_required(self, repository_metadata):
"""
Compare the last saved metadata for each tool in the repository with the new metadata in
self.metadata_dict to determine if a new repository_metadata table record is required, or if
the last saved metadata record can be updated instead.
"""
if 'tools' in self.metadata_dict:
if repository_metadata:
metadata = repository_metadata.metadata
if metadata:
if 'tools' in metadata:
saved_tool_ids = []
# The metadata for one or more tools was successfully generated in the past
# for this repository, so we first compare the version string for each tool id
# in self.metadata_dict with what was previously saved to see if we need to create
# a new table record or if we can simply update the existing record.
for new_tool_metadata_dict in self.metadata_dict['tools']:
for saved_tool_metadata_dict in metadata['tools']:
if saved_tool_metadata_dict['id'] not in saved_tool_ids:
saved_tool_ids.append(saved_tool_metadata_dict['id'])
if new_tool_metadata_dict['id'] == saved_tool_metadata_dict['id']:
if new_tool_metadata_dict['version'] != saved_tool_metadata_dict['version']:
return True
# So far, a new metadata record is not required, but we still have to check to see if
# any new tool ids exist in self.metadata_dict that are not in the saved metadata. We do
# this because if a new tarball was uploaded to a repository that included tools, it
# may have removed existing tool files if they were not included in the uploaded tarball.
for new_tool_metadata_dict in self.metadata_dict['tools']:
if new_tool_metadata_dict['id'] not in saved_tool_ids:
return True
return False
else:
# The new metadata includes tools, but the stored metadata does not, so we can
# update the stored metadata.
return False
else:
# There is no stored metadata, so we can update the metadata column in the
# repository_metadata table.
return False
else:
# There is no stored repository metadata, so we need to create a new repository_metadata
# table record.
return True
# self.metadata_dict includes no metadata for tools, so a new repository_metadata table
# record is not needed.
return False
[docs] def new_tool_dependency_metadata_required(self, repository_metadata):
"""
Compare the last saved metadata for each tool dependency in the repository with the new
metadata in self.metadata_dict to determine if a new repository_metadata table record is
required or if the last saved metadata record can be updated for tool_dependencies instead.
"""
if repository_metadata:
metadata = repository_metadata.metadata
if metadata:
if 'tool_dependencies' in metadata:
saved_tool_dependencies = metadata['tool_dependencies']
new_tool_dependencies = self.metadata_dict.get('tool_dependencies', None)
if new_tool_dependencies:
# TODO: We used to include the following here to handle the case where
# tool dependency definitions were deleted. However, this erroneously
# returned True in cases where is should not have done so. This usually
# occurred where multiple single files were uploaded when a single tarball
# should have been. We need to implement support for handling deleted
# tool dependency definitions so that we can guarantee reproducibility,
# but we need to do it in a way that is better than the following.
# for new_tool_dependency in new_tool_dependencies:
# if new_tool_dependency not in saved_tool_dependencies:
# return True
# The saved metadata must be a subset of the new metadata.
for saved_tool_dependency in saved_tool_dependencies:
if saved_tool_dependency not in new_tool_dependencies:
return True
return False
else:
# The tool_dependencies.xml file must have been deleted, so create a new
# repository_metadata record so we always have
# access to the deleted file.
return True
else:
return False
else:
# We have repository metadata that does not include metadata for any tool dependencies
# in the repository, so we can update the existing repository metadata.
return False
else:
if 'tool_dependencies' in self.metadata_dict:
# There is no saved repository metadata, so we need to create a new repository_metadata
# record.
return True
else:
# self.metadata_dict includes no metadata for tool dependencies, so a new repository_metadata
# record is not needed.
return False
[docs] def new_workflow_metadata_required(self, repository_metadata):
"""
Currently everything about an exported workflow except the name is hard-coded, so
there's no real way to differentiate versions of exported workflows. If this changes
at some future time, this method should be enhanced accordingly.
"""
if 'workflows' in self.metadata_dict:
if repository_metadata:
# The repository has metadata, so update the workflows value -
# no new record is needed.
return False
else:
# There is no saved repository metadata, so we need to create a
# new repository_metadata table record.
return True
# self.metadata_dict includes no metadata for workflows, so a new
# repository_metadata table record is not needed.
return False
[docs] def reset_all_metadata_on_repository_in_tool_shed(self):
"""Reset all metadata on a single repository in a tool shed."""
log.debug("Resetting all metadata on repository: %s" % self.repository.name)
repo = hg_util.get_repo_for_repository(self.app, repository=self.repository)
# The list of changeset_revisions refers to repository_metadata records that have been created
# or updated. When the following loop completes, we'll delete all repository_metadata records
# for this repository that do not have a changeset_revision value in this list.
changeset_revisions = []
# When a new repository_metadata record is created, it always uses the values of
# metadata_changeset_revision and metadata_dict.
metadata_changeset_revision = None
metadata_dict = None
ancestor_changeset_revision = None
ancestor_metadata_dict = None
for changeset in self.repository.get_changesets_for_setting_metadata(self.app):
work_dir = tempfile.mkdtemp(prefix="tmp-toolshed-ramorits")
ctx = repo.changectx(changeset)
log.debug("Cloning repository changeset revision: %s", str(ctx.rev()))
cloned_ok, error_message = hg_util.clone_repository(self.repository_clone_url, work_dir, str(ctx.rev()))
if cloned_ok:
log.debug("Generating metadata for changset revision: %s", str(ctx.rev()))
self.set_changeset_revision(str(repo.changectx(changeset)))
self.set_repository_files_dir(work_dir)
self.generate_metadata_for_changeset_revision()
if self.metadata_dict:
if metadata_changeset_revision is None and metadata_dict is None:
# We're at the first change set in the change log.
metadata_changeset_revision = self.changeset_revision
metadata_dict = self.metadata_dict
if ancestor_changeset_revision:
# Compare metadata from ancestor and current. The value of comparison will be one of:
# self.NO_METADATA - no metadata for either ancestor or current, so continue from current
# self.EQUAL - ancestor metadata is equivalent to current metadata, so continue from current
# self.SUBSET - ancestor metadata is a subset of current metadata, so continue from current
# self.NOT_EQUAL_AND_NOT_SUBSET - ancestor metadata is neither equal to nor a subset of current
# metadata, so persist ancestor metadata.
comparison = self.compare_changeset_revisions(ancestor_changeset_revision, ancestor_metadata_dict)
if comparison in [self.NO_METADATA, self.EQUAL, self.SUBSET]:
ancestor_changeset_revision = self.changeset_revision
ancestor_metadata_dict = self.metadata_dict
elif comparison == self.NOT_EQUAL_AND_NOT_SUBSET:
metadata_changeset_revision = ancestor_changeset_revision
metadata_dict = ancestor_metadata_dict
self.create_or_update_repository_metadata(metadata_changeset_revision, metadata_dict)
changeset_revisions.append(metadata_changeset_revision)
ancestor_changeset_revision = self.changeset_revision
ancestor_metadata_dict = self.metadata_dict
else:
# We're at the beginning of the change log.
ancestor_changeset_revision = self.changeset_revision
ancestor_metadata_dict = self.metadata_dict
if not ctx.children():
metadata_changeset_revision = self.changeset_revision
metadata_dict = self.metadata_dict
# We're at the end of the change log.
self.create_or_update_repository_metadata(metadata_changeset_revision, metadata_dict)
changeset_revisions.append(metadata_changeset_revision)
ancestor_changeset_revision = None
ancestor_metadata_dict = None
elif ancestor_metadata_dict:
# We reach here only if self.metadata_dict is empty and ancestor_metadata_dict is not.
if not ctx.children():
# We're at the end of the change log.
self.create_or_update_repository_metadata(metadata_changeset_revision, metadata_dict)
changeset_revisions.append(metadata_changeset_revision)
ancestor_changeset_revision = None
ancestor_metadata_dict = None
basic_util.remove_dir(work_dir)
# Delete all repository_metadata records for this repository that do not have a changeset_revision
# value in changeset_revisions.
self.clean_repository_metadata(changeset_revisions)
# Set tool version information for all downloadable changeset revisions. Get the list of changeset
# revisions from the changelog.
self.reset_all_tool_versions(repo)
[docs] def reset_all_tool_versions(self, repo):
"""Reset tool version lineage for those changeset revisions that include valid tools."""
encoded_repository_id = self.app.security.encode_id(self.repository.id)
changeset_revisions_that_contain_tools = []
for changeset in repo.changelog:
changeset_revision = str(repo.changectx(changeset))
repository_metadata = metadata_util.get_repository_metadata_by_changeset_revision(self.app,
encoded_repository_id,
changeset_revision)
if repository_metadata:
metadata = repository_metadata.metadata
if metadata:
if metadata.get('tools', None):
changeset_revisions_that_contain_tools.append(changeset_revision)
# The list of changeset_revisions_that_contain_tools is now filtered to contain only those that
# are downloadable and contain tools. If a repository includes tools, build a dictionary of
# { 'tool id' : 'parent tool id' } pairs for each tool in each changeset revision.
for index, changeset_revision in enumerate(changeset_revisions_that_contain_tools):
tool_versions_dict = {}
repository_metadata = metadata_util.get_repository_metadata_by_changeset_revision(self.app,
encoded_repository_id,
changeset_revision)
metadata = repository_metadata.metadata
tool_dicts = metadata['tools']
if index == 0:
# The first changeset_revision is a special case because it will have no ancestor
# changeset_revisions in which to match tools. The parent tool id for tools in the
# first changeset_revision will be the "old_id" in the tool config.
for tool_dict in tool_dicts:
tool_versions_dict[tool_dict['guid']] = tool_dict['id']
else:
for tool_dict in tool_dicts:
parent_id = self.get_parent_id(encoded_repository_id,
tool_dict['id'],
tool_dict['version'],
tool_dict['guid'],
changeset_revisions_that_contain_tools[0:index])
tool_versions_dict[tool_dict['guid']] = parent_id
if tool_versions_dict:
repository_metadata.tool_versions = tool_versions_dict
self.sa_session.add(repository_metadata)
self.sa_session.flush()
[docs] def reset_metadata_on_selected_repositories(self, **kwd):
"""
Inspect the repository changelog to reset metadata for all appropriate changeset revisions.
This method is called from both Galaxy and the Tool Shed.
"""
repository_ids = util.listify(kwd.get('repository_ids', None))
message = ''
status = 'done'
if repository_ids:
successful_count = 0
unsuccessful_count = 0
for repository_id in repository_ids:
try:
repository = repository_util.get_repository_in_tool_shed(self.app, repository_id)
self.set_repository(repository)
self.resetting_all_metadata_on_repository = True
self.reset_all_metadata_on_repository_in_tool_shed()
if self.invalid_file_tups:
message = tool_util.generate_message_for_invalid_tools(self.app,
self.invalid_file_tups,
repository,
None,
as_html=False)
log.debug(message)
unsuccessful_count += 1
else:
log.debug("Successfully reset metadata on repository %s owned by %s" %
(str(repository.name), str(repository.user.username)))
successful_count += 1
except Exception:
log.exception("Error attempting to reset metadata on repository %s", str(repository.name))
unsuccessful_count += 1
message = "Successfully reset metadata on %d %s. " % \
(successful_count, inflector.cond_plural(successful_count, "repository"))
if unsuccessful_count:
message += "Error setting metadata on %d %s - see the paster log for details. " % \
(unsuccessful_count, inflector.cond_plural(unsuccessful_count, "repository"))
else:
message = 'Select at least one repository to on which to reset all metadata.'
status = 'error'
return message, status
[docs] def set_repository(self, repository):
super(RepositoryMetadataManager, self).set_repository(repository)
self.repository_clone_url = common_util.generate_clone_url_for_repository_in_tool_shed(self.user, repository)
[docs] def set_repository_metadata(self, host, content_alert_str='', **kwd):
"""
Set metadata using the self.repository's current disk files, returning specific error
messages (if any) to alert the repository owner that the changeset has problems.
"""
message = ''
status = 'done'
encoded_id = self.app.security.encode_id(self.repository.id)
repo_dir = self.repository.repo_path(self.app)
repo = hg_util.get_repo_for_repository(self.app, repo_path=repo_dir)
self.generate_metadata_for_changeset_revision()
if self.metadata_dict:
repository_metadata = None
repository_type_class = self.app.repository_types_registry.get_class_by_label(self.repository.type)
tip_only = isinstance(repository_type_class, TipOnly)
if not tip_only and self.new_metadata_required_for_utilities():
# Create a new repository_metadata table row.
repository_metadata = self.create_or_update_repository_metadata(self.repository.tip(self.app),
self.metadata_dict)
# If this is the first record stored for this repository, see if we need to send any email alerts.
if len(self.repository.downloadable_revisions) == 1:
suc.handle_email_alerts(self.app,
host,
self.repository,
content_alert_str='',
new_repo_alert=True,
admin_only=False)
else:
# Update the latest stored repository metadata with the contents and attributes of self.metadata_dict.
repository_metadata = metadata_util.get_latest_repository_metadata(self.app,
self.repository.id,
downloadable=False)
if repository_metadata:
downloadable = metadata_util.is_downloadable(self.metadata_dict)
# Update the last saved repository_metadata table row.
repository_metadata.changeset_revision = self.repository.tip(self.app)
repository_metadata.metadata = self.metadata_dict
repository_metadata.downloadable = downloadable
if 'datatypes' in self.metadata_dict:
repository_metadata.includes_datatypes = True
else:
repository_metadata.includes_datatypes = False
# We don't store information about the special type of repository dependency that is needed only for
# compiling a tool dependency defined for the dependent repository.
repository_dependencies_dict = self.metadata_dict.get('repository_dependencies', {})
repository_dependencies = repository_dependencies_dict.get('repository_dependencies', [])
has_repository_dependencies, has_repository_dependencies_only_if_compiling_contained_td = \
repository_util.get_repository_dependency_types(repository_dependencies)
repository_metadata.has_repository_dependencies = has_repository_dependencies
if 'tool_dependencies' in self.metadata_dict:
repository_metadata.includes_tool_dependencies = True
else:
repository_metadata.includes_tool_dependencies = False
if 'tools' in self.metadata_dict:
repository_metadata.includes_tools = True
else:
repository_metadata.includes_tools = False
if 'workflows' in self.metadata_dict:
repository_metadata.includes_workflows = True
else:
repository_metadata.includes_workflows = False
repository_metadata.missing_test_components = False
self.sa_session.add(repository_metadata)
self.sa_session.flush()
else:
# There are no metadata records associated with the repository.
repository_metadata = self.create_or_update_repository_metadata(self.repository.tip(self.app),
self.metadata_dict)
if 'tools' in self.metadata_dict and repository_metadata and status != 'error':
# Set tool versions on the new downloadable change set. The order of the list of changesets is
# critical, so we use the repo's changelog.
changeset_revisions = []
for changeset in repo.changelog:
changeset_revision = str(repo.changectx(changeset))
if metadata_util.get_repository_metadata_by_changeset_revision(self.app, encoded_id, changeset_revision):
changeset_revisions.append(changeset_revision)
self.add_tool_versions(encoded_id, repository_metadata, changeset_revisions)
elif len(repo) == 1 and not self.invalid_file_tups:
message = "Revision <b>%s</b> includes no Galaxy utilities for which metadata can " % \
str(self.repository.tip(self.app))
message += "be defined so this revision cannot be automatically installed into a local Galaxy instance."
status = "error"
if self.invalid_file_tups:
message = tool_util.generate_message_for_invalid_tools(self.app,
self.invalid_file_tups,
self.repository,
self.metadata_dict)
status = 'error'
return message, status
[docs] def set_repository_metadata_due_to_new_tip(self, host, content_alert_str=None, **kwd):
"""Set metadata on the tip of self.repository in the tool shed."""
error_message, status = self.set_repository_metadata(host, content_alert_str=content_alert_str, **kwd)
return status, error_message