Warning
This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.jobs.dynamic_tool_destination
from __future__ import print_function
import argparse
import collections
import copy
import json
import logging
import os
import re
import sys
from functools import reduce
import numpy as np
import yaml
from galaxy.util import parse_xml
__version__ = '1.1.0'
# log to galaxy's logger
log = logging.getLogger(__name__)
# does a lot more logging when set to true
verbose = True
"""
list of all valid priorities, inferred from the global
default_desinations section of the config
"""
priority_list = set()
"""
Instantiated to a list of all valid destinations in the job configuration file
if run directly to validate configs. Otherwise, remains None. We often check
to see if app is None, because if it is then we'll try using the
destination_list instead.
-"""
destination_list = set()
"""
The largest the edit distance can be for a word to be considered
A correction for another word.
"""
max_edit_dist = 2
"""
List of valid categories that can be expected in the configuration.
"""
valid_categories = ['verbose', 'tools', 'default_destination',
'users', 'default_priority']
# --- destination validation error messages --- #
dest_err_default_dest = "Default destination '%s' does not appear in the job configuration." # destination
dest_err_tool_default_dest = "Default destination for '%s': '%s' does not appear in the job configuration." # tool, destination
dest_err_tool_rule_dest = "Destination for '%s', rule %s: '%s' does not exist in job configuration." # tool, counter, destination
[docs]def get_keys_from_dict(dl, keys_list):
"""
This function builds a list using the keys from nest dictionaries
"""
if isinstance(dl, dict):
keys_list.extend(dl.keys())
for x in dl.values():
get_keys_from_dict(x, keys_list)
elif isinstance(dl, list):
for x in dl:
get_keys_from_dict(x, keys_list)
[docs]class RuleValidator(object):
"""
This class is the primary facility for validating configs. It's always
called in map_tool_to_destination and it's called for validating config
directly through DynamicToolDestination.py
"""
[docs] @classmethod
def validate_rule(cls, rule_type, app, return_bool=False, *args, **kwargs):
"""
This function is responsible for passing each rule to its relevant
function.
@type rule_type: str
@param rule_type: the current rule's type
@type return_bool: bool
@param return_bool: True when we are only interested in the result of
the validation, and not the validated rule itself.
@rtype: bool, dict (depending on return_bool)
@return: validated rule or result of validation (depending on
return_bool)
"""
if rule_type == 'file_size':
return cls.__validate_file_size_rule(app, return_bool, *args, **kwargs)
elif rule_type == 'num_input_datasets':
return cls.__validate_num_input_datasets_rule(app, return_bool, *args, **kwargs)
elif rule_type == 'records':
return cls.__validate_records_rule(app, return_bool, *args, **kwargs)
elif rule_type == 'arguments':
return cls.__validate_arguments_rule(app, return_bool, *args, **kwargs)
@classmethod
def __validate_file_size_rule(
cls, app, return_bool, original_rule, counter, tool):
"""
This function is responsible for validating 'file_size' rules.
@type return_bool: bool
@param return_bool: True when we are only interested in the result of
the validation, and not the validated rule itself.
@type original_rule: dict
@param original_rule: contains the original received rule
@type counter: int
@param counter: this counter is used to identify what rule # is
currently being validated. Necessary for log output.
@type tool: str
@param tool: the name of the current tool. Necessary for log output.
@rtype: bool, dict (depending on return_bool)
@return: validated rule or result of validation (depending on
return_bool)
"""
rule = copy.deepcopy(original_rule)
valid_rule = True
# Users Verification #
if rule is not None:
valid_rule, rule = cls.__validate_users(
valid_rule, return_bool, rule, tool, counter)
# Nice_value Verification #
if rule is not None:
valid_rule, rule = cls.__validate_nice_value(
valid_rule, return_bool, rule, tool, counter)
# Destination Verification #
if rule is not None:
valid_rule, rule = cls.__validate_destination(
valid_rule, app, return_bool, rule, tool, counter)
# Bounds Verification #
if rule is not None:
valid_rule, rule = cls.__validate_bounds(
valid_rule, return_bool, rule, tool, counter)
if return_bool:
return valid_rule
else:
return rule
@classmethod
def __validate_num_input_datasets_rule(
cls, app, return_bool, original_rule, counter, tool):
"""
This function is responsible for validating 'num_input_datasets' rules.
@type return_bool: bool
@param return_bool: True when we are only interested in the result of
the validation, and not the validated rule itself.
@type original_rule: dict
@param original_rule: contains the original received rule
@type counter: int
@param counter: this counter is used to identify what rule # is
currently being validated. Necessary for log output.
@type tool: str
@param tool: the name of the current tool. Necessary for log output.
@rtype: bool, dict (depending on return_bool)
@return: validated rule or result of validation (depending on
return_bool)
"""
rule = copy.deepcopy(original_rule)
valid_rule = True
# Users Verification #
if rule is not None:
valid_rule, rule = cls.__validate_users(
valid_rule, return_bool, rule, tool, counter)
# Nice_value Verification #
if rule is not None:
valid_rule, rule = cls.__validate_nice_value(
valid_rule, return_bool, rule, tool, counter)
# Destination Verification #
if rule is not None:
valid_rule, rule = cls.__validate_destination(
valid_rule, app, return_bool, rule, tool, counter)
# Bounds Verification #
if rule is not None:
valid_rule, rule = cls.__validate_bounds(
valid_rule, return_bool, rule, tool, counter)
if return_bool:
return valid_rule
else:
return rule
@classmethod
def __validate_records_rule(cls, app, return_bool, original_rule, counter, tool):
"""
This function is responsible for validating 'records' rules.
@type return_bool: bool
@param return_bool: True when we are only interested in the result of
the validation, and not the validated rule itself.
@type original_rule: dict
@param original_rule: contains the original received rule
@type counter: int
@param counter: this counter is used to identify what rule # is
currently being validated. Necessary for log output.
@type tool: str
@param tool: the name of the current tool. Necessary for log output.
@rtype: bool, dict (depending on return_bool)
@return: validated rule or result of validation (depending on
return_bool)
"""
rule = copy.deepcopy(original_rule)
valid_rule = True
# Users Verification #
if rule is not None:
valid_rule, rule = cls.__validate_users(
valid_rule, return_bool, rule, tool, counter)
# Nice_value Verification #
if rule is not None:
valid_rule, rule = cls.__validate_nice_value(
valid_rule, return_bool, rule, tool, counter)
# Destination Verification #
if rule is not None:
valid_rule, rule = cls.__validate_destination(
valid_rule, app, return_bool, rule, tool, counter)
# Bounds Verification #
if rule is not None:
valid_rule, rule = cls.__validate_bounds(
valid_rule, return_bool, rule, tool, counter)
if return_bool:
return valid_rule
else:
return rule
@classmethod
def __validate_arguments_rule(
cls, app, return_bool, original_rule, counter, tool):
"""
This is responsible for validating 'arguments' rules.
@type return_bool: bool
@param return_bool: True when we are only interested in the result of
the validation, and not the validated rule itself.
@type original_rule: dict
@param original_rule: contains the original received rule
@type counter: int
@param counter: this counter is used to identify what rule # is
currently being validated. Necessary for log output.
@type tool: str
@param tool: the name of the current tool. Necessary for log output.
@rtype: bool, dict (depending on return_bool)
@return: validated rule or result of validation (depending on
return_bool)
"""
rule = copy.deepcopy(original_rule)
valid_rule = True
# Users Verification #
if rule is not None:
valid_rule, rule = cls.__validate_users(
valid_rule, return_bool, rule, tool, counter)
# Nice_value Verification #
if rule is not None:
valid_rule, rule = cls.__validate_nice_value(
valid_rule, return_bool, rule, tool, counter)
# Destination Verification #
if rule is not None:
valid_rule, rule = cls.__validate_destination(
valid_rule, app, return_bool, rule, tool, counter)
# Arguments Verification (for rule_type arguments; read comment block at top
# of function for clarification.
if rule is not None:
valid_rule, rule = cls.__validate_arguments(
valid_rule, return_bool, rule, tool, counter)
if return_bool:
return valid_rule
else:
return rule
@classmethod
def __validate_nice_value(cls, valid_rule, return_bool, rule, tool, counter):
"""
This function is responsible for validating nice_value.
@type valid_rule: bool
@param valid_rule: returns True if everything is valid. False if it
encounters any abnormalities in the config.
@type return_bool: bool
@param return_bool: True when we are only interested in the result of
the validation, and not the validated rule itself.
@type rule: dict
@param rule: contains the original received rule
@type tool: str
@param tool: the name of the current tool. Necessary for log output.
@type counter: int
@param counter: this counter is used to identify what rule # is
currently being validated. Necessary for log output.
@rtype: bool, dict (tuple)
@return: validated rule and result of validation
"""
if "nice_value" in rule:
if rule["nice_value"] < -20 or rule["nice_value"] > 20:
error = "nice_value goes from -20 to 20; rule " + str(counter)
error += " in '" + str(tool) + "' has a nice_value of '"
error += str(rule["nice_value"]) + "'."
if not return_bool:
error += " Setting nice_value to 0."
rule["nice_value"] = 0
if verbose:
log.debug(error)
valid_rule = False
else:
error = "No nice_value found for rule " + str(counter) + " in '"
error += str(tool) + "'."
if not return_bool:
error += " Setting nice_value to 0."
rule["nice_value"] = 0
if verbose:
log.debug(error)
valid_rule = False
return valid_rule, rule
@classmethod
def __validate_destination(cls, valid_rule, app, return_bool, rule, tool, counter):
"""
This function is responsible for validating destination.
@type valid_rule: bool
@param valid_rule: returns True if everything is valid. False if it
encounters any abnormalities in the config.
@type return_bool: bool
@param return_bool: True when we are only interested in the result of
the validation, and not the validated rule itself.
@type rule: dict
@param rule: contains the original received rule
@type tool: str
@param tool: the name of the current tool. Necessary for log output.
@type counter: int
@param counter: this counter is used to identify what rule # is
currently being validated. Necessary for log output.
@rtype: bool, dict (tuple)
@return: validated rule and result of validation
"""
if "fail_message" in rule:
if "destination" not in rule or rule['destination'] != "fail":
error = "Found a fail_message for rule " + str(counter)
error += " in '" + str(tool) + "', but destination is not 'fail'!"
if not return_bool:
error += " Setting destination to 'fail'."
if verbose:
log.debug(error)
valid_rule = False
rule["destination"] = "fail"
if "destination" in rule:
suggestion = None
if isinstance(rule["destination"], str):
if rule["destination"] == "fail" and "fail_message" not in rule:
error = "Missing a fail_message for rule " + str(counter)
error += " in '" + str(tool) + "'."
if not return_bool:
error += " Adding generic fail_message."
message = "Invalid parameters for rule " + str(counter)
message += " in '" + str(tool) + "'."
rule["fail_message"] = message
if verbose:
log.debug(error)
valid_rule = False
else:
is_valid = validate_destination(app, rule["destination"],
dest_err_tool_rule_dest, (tool, counter, rule["destination"]),
return_bool)
if not is_valid:
valid_rule = False
elif isinstance(rule["destination"], dict):
if ("priority" in rule["destination"]
and isinstance(rule["destination"]["priority"], dict)):
for priority in rule["destination"]["priority"]:
if priority not in priority_list:
error = "Invalid priority '"
error += str(priority) + "' for rule "
error += str(counter) + " in '" + str(tool) + "'."
suggestion = get_typo_correction(priority,
priority_list, max_edit_dist)
if suggestion:
error += " Did you mean '" + str(suggestion) + "'?"
if not return_bool:
error += " Ignoring..."
if verbose:
log.debug(error)
valid_rule = False
elif not isinstance(rule["destination"]["priority"][priority], str):
error = "Cannot parse tool destination '"
error += str(rule["destination"]["priority"][priority])
error += "' for rule " + str(counter)
error += " in '" + str(tool) + "'."
if not return_bool:
error += " Ignoring..."
if verbose:
log.debug(error)
valid_rule = False
else:
is_valid = validate_destination(app,
rule["destination"]["priority"][priority],
dest_err_tool_rule_dest,
(tool, counter, rule["destination"]["priority"][priority]),
return_bool)
if not is_valid:
valid_rule = False
else:
error = "No destination specified for rule " + str(counter)
error += " in '" + str(tool) + "'."
if not return_bool:
error += " Ignoring..."
if verbose:
log.debug(error)
valid_rule = False
else:
error = "No destination specified for rule " + str(counter)
error += " in '" + str(tool) + "'."
if not return_bool:
error += " Ignoring..."
if verbose:
log.debug(error)
valid_rule = False
else:
error = "No destination specified for rule " + str(counter)
error += " in '" + str(tool) + "'."
if not return_bool:
error += " Ignoring..."
if verbose:
log.debug(error)
valid_rule = False
return valid_rule, rule
@classmethod
def __validate_bounds(cls, valid_rule, return_bool, rule, tool, counter):
"""
This function is responsible for validating bounds.
@type valid_rule: bool
@param valid_rule: returns True if everything is valid. False if it
encounters any abnormalities in the config.
@type return_bool: bool
@param return_bool: True when we are only interested in the result of
the validation, and not the validated rule itself.
@type rule: dict
@param rule: contains the original received rule
@type tool: str
@param tool: the name of the current tool. Necessary for log output.
@type counter: int
@param counter: this counter is used to identify what rule # is
currently being validated. Necessary for log output.
@rtype: bool/None, dict (tuple)
@return: validated rule (or None if invalid) and result of validation
"""
if "upper_bound" in rule and "lower_bound" in rule:
if rule["rule_type"] in ("file_size", "records"):
upper_bound = str_to_bytes(rule["upper_bound"])
lower_bound = str_to_bytes(rule["lower_bound"])
else:
upper_bound = rule["upper_bound"]
lower_bound = rule["lower_bound"]
if lower_bound == "Infinity":
error = "Error: lower_bound is set to Infinity, but must be "
error += "lower than upper_bound!"
if not return_bool:
error += " Setting lower_bound to 0!"
lower_bound = 0
rule["lower_bound"] = 0
else:
lower_bound = float('inf')
if verbose:
log.debug(error)
valid_rule = False
if upper_bound == "Infinity":
upper_bound = -1
if upper_bound != -1 and lower_bound > upper_bound:
error = "lower_bound exceeds upper_bound for rule " + str(counter)
error += " in '" + str(tool) + "'."
if not return_bool:
error += " Reversing bounds."
temp_upper_bound = rule["upper_bound"]
temp_lower_bound = rule["lower_bound"]
rule["upper_bound"] = temp_lower_bound
rule["lower_bound"] = temp_upper_bound
if verbose:
log.debug(error)
valid_rule = False
else:
error = "Missing bounds for rule " + str(counter)
error += " in '" + str(tool) + "'."
if not return_bool:
error += " Ignoring rule."
rule = None
if verbose:
log.debug(error)
valid_rule = False
return valid_rule, rule
@classmethod
def __validate_arguments(cls, valid_rule, return_bool, rule, tool, counter):
"""
This function is responsible for validating arguments.
@type valid_rule: bool
@param valid_rule: returns True if everything is valid. False if it
encounters any abnormalities in the config.
@type return_bool: bool
@param return_bool: True when we are only interested in the result of
the validation, and not the validated rule itself.
@type rule: dict
@param rule: contains the original received rule
@type tool: str
@param tool: the name of the current tool. Necessary for log output.
@type counter: int
@param counter: this counter is used to identify what rule # is
currently being validated. Necessary for log output.
@rtype: bool/None, dict (tuple)
@return: validated rule (or None if invalid) and result of validation
"""
if "arguments" not in rule or not isinstance(rule["arguments"], dict):
error = "No arguments found for rule " + str(counter) + " in '"
error += str(tool) + "' despite being of type arguments."
if not return_bool:
error += " Ignoring rule."
rule = None
if verbose:
log.debug(error)
valid_rule = False
return valid_rule, rule
@classmethod
def __validate_users(cls, valid_rule, return_bool, rule, tool, counter):
"""
This function is responsible for validating users (if present).
@type return_bool: bool
@param return_bool: True when we are only interested in the result of
the validation, and not the validated rule itself.
@type valid_rule: bool
@param valid_rule: returns True if everything is valid. False if it
encounters any abnormalities in the config.
@type rule: dict
@param rule: contains the original received rule
@type counter: int
@param counter: this counter is used to identify what rule # is
currently being validated. Necessary for log output.
@type tool: str
@param tool: the name of the current tool. Necessary for log output.
@rtype: bool, dict (tuple)
@return: validated rule and result of validation
"""
emailregex = r"^[A-Za-z0-9\.\+_-]+@[A-Za-z0-9\._-]+\.[a-zA-Z]*$"
if "users" in rule:
if isinstance(rule["users"], list):
for user in reversed(rule["users"]):
if not isinstance(user, str):
error = "Entry '" + str(user) + "' in users for rule "
error += str(counter) + " in tool '" + str(tool)
error += "' is in an " + "invalid format!"
if not return_bool:
error += " Ignoring entry."
if verbose:
log.debug(error)
valid_rule = False
rule["users"].remove(user)
else:
if re.match(emailregex, user) is None:
error = "Supplied email '" + str(user)
error += "' for rule " + str(counter) + " in tool '"
error += str(tool) + "' is in " + "an invalid format!"
if not return_bool:
error += " Ignoring email."
if verbose:
log.debug(error)
valid_rule = False
rule["users"].remove(user)
else:
error = "Couldn't find a list under 'users:'!"
if not return_bool:
error += " Ignoring rule."
rule = None
if verbose:
log.debug(error)
valid_rule = False
# post-processing checking to make sure we didn't just remove all the users
# if we did, we should ignore the rule
if rule is not None and rule["users"] is not None and len(rule["users"]) == 0:
error = "No valid user emails were specified for rule " + str(counter)
error += " in tool '" + str(tool) + "'!"
if not return_bool:
error += " Ignoring rule."
rule = None
if verbose:
log.debug(error)
valid_rule = False
return valid_rule, rule
[docs]def parse_yaml(path="/config/tool_destinations.yml",
job_conf_path="/config/job_conf.xml", app=None, test=False,
return_bool=False):
"""
Get a yaml file from path and send it to validate_config for validation.
@type path: str
@param path: the path to the tool destinations config file
@type job_conf_path: str
@param job_conf_path: the path to the job config file
@type test: bool
@param test: indicates whether to run in test mode or production mode
@type return_bool: bool
@param return_bool: True when we are only interested in the result of the
validation, and not the validated rule itself.
@rtype: bool, dict (depending on return_bool)
@return: validated rule or result of validation (depending on return_bool)
"""
if app is None:
global destination_list
destination_list = get_destination_list_from_job_config(job_conf_path)
# Import file from path
try:
if test:
config = yaml.safe_load(path)
else:
if path == "/config/tool_destinations.yml":
# os.path.realpath gets the path of DynamicToolDestination.py
# and then os.path.join is used to go back four directories
config_directory = os.path.join(
os.path.dirname(os.path.realpath(__file__)), '../../../..')
opt_file = config_directory + path
else:
opt_file = path
with open(opt_file, 'r') as stream:
config = yaml.safe_load(stream)
# Test imported file
try:
if return_bool:
valid_config = validate_config(config, app, return_bool)
else:
config = validate_config(config, app)
except MalformedYMLException:
if verbose:
log.error(str(sys.exc_value))
raise
except ScannerError:
if verbose:
log.error("Config is too malformed to fix!")
raise
if return_bool:
return valid_config
else:
return config
[docs]def validate_destination(app, destination, err_message, err_message_contents,
return_bool=True):
"""
Validate received destination id.
@type app:
@param app: Current app
@type destination: str
@param destination: string containing the destination id that is being
validated
@type err_message: str
@param err_message: Error message to be formatted with the contents of
`err_message_contents` upon the event of invalid
destination
@type err_message_contents: tuple
@param err_message_contents: A tuple of strings to be placed in
`err_message`
@type return_bool: bool
@param return_bool: Whether or not the calling function has been told to
return a boolean value or not. Determines whether or
not to print 'Ignoring...' after error messages.
@rtype: bool
@return: True if the destination is valid and False otherwise.
"""
valid_destination = False
suggestion = None
if destination == 'fail' and err_message is dest_err_tool_rule_dest: # It's a tool rule that is set to fail. It's valid
valid_destination = True
elif app is None:
if destination in destination_list:
valid_destination = True
else:
suggestion = get_typo_correction(destination,
destination_list, max_edit_dist)
elif app.job_config.get_destination(destination):
valid_destination = True
if not valid_destination:
error = err_message % err_message_contents
if suggestion:
error += " Did you mean '" + suggestion + "'?"
if not return_bool:
error += " Ignoring..."
if verbose:
log.debug(error)
return valid_destination
[docs]def validate_config(obj, app=None, return_bool=False,):
"""
Validate received config.
@type obj: dict
@param obj: the entire contents of the config
@type return_bool: bool
@param return_bool: True when we are only interested in the result of the
validation, and not the validated rule itself.
@rtype: bool, dict (depending on return_bool)
@return: validated rule or result of validation (depending on return_bool)
"""
global priority_list
priority_list = set()
def infinite_defaultdict():
return collections.defaultdict(infinite_defaultdict)
# Allow new_config to expand automatically when adding values to new levels
new_config = infinite_defaultdict()
global verbose
verbose = False
valid_config = True
valid_rule = True
tool_has_default = False
if return_bool:
verbose = True
elif obj is not None and 'verbose' in obj and isinstance(obj['verbose'], bool):
verbose = obj['verbose']
else:
valid_config = False
if obj:
log.debug("Verbose value '" + str(obj['verbose']) + "' is not True or False! Falling back to verbose...")
verbose = True
if not return_bool and verbose:
log.debug("Running config validation...")
# if this is false, then it's definitely because of verbose missing
if not valid_config and return_bool:
log.debug("Missing mandatory field 'verbose' in config!")
# a list with the available rule_types. Can be expanded on easily in the future
available_rule_types = ['file_size', 'num_input_datasets', 'records', 'arguments']
if obj is not None:
# in obj, there should always be only 5 categories: tools, default_destination,
# default_priority, users, and verbose
if 'default_destination' in obj:
suggestion = None
if isinstance(obj['default_destination'], str):
is_valid = validate_destination(app, obj['default_destination'],
dest_err_default_dest,
(obj['default_destination']))
if is_valid:
new_config["default_destination"] = obj['default_destination']
else:
valid_config = False
elif isinstance(obj['default_destination'], dict):
if ('priority' in obj['default_destination'] and
isinstance(obj['default_destination']['priority'], dict)):
for priority in obj['default_destination']['priority']:
if isinstance(obj['default_destination']['priority'][priority],
str):
priority_list.add(priority)
is_valid = validate_destination(
app, obj['default_destination']['priority'][priority],
dest_err_default_dest,
(obj['default_destination']['priority'][priority]))
if is_valid:
new_config["default_destination"]['priority'][priority] = (
obj['default_destination']['priority'][priority])
else:
valid_config = False
if len(priority_list) < 1:
error = ("No valid priorities found!")
if verbose:
log.debug(error)
valid_config = False
else:
if 'default_priority' in obj:
if isinstance(obj['default_priority'], str):
if obj['default_priority'] in priority_list:
new_config['default_priority'] = obj['default_priority']
else:
error = ("Default priority '" + str(obj['default_priority'])
+ "' is not a valid priority.")
suggestion = get_typo_correction(obj['default_priority'],
priority_list, max_edit_dist)
if suggestion:
error += " Did you mean '" + str(suggestion) + "'?"
if verbose:
log.debug(error)
else:
error = "default_priority in config is not valid."
if verbose:
log.debug(error)
valid_config = False
else:
error = "No default_priority section found in config."
if 'med' in priority_list:
# set 'med' as fallback default priority, so
# old tool_destination.yml configs still work
error += " Setting 'med' as default priority."
new_config['default_priority'] = 'med'
else:
error += " Things may not run as expected!"
valid_config = False
if verbose:
log.debug(error)
else:
error = "No global default destinations specified in config!"
if verbose:
log.debug(error)
valid_config = False
else:
error = "No global default destination specified in config!"
if verbose:
log.debug(error)
valid_config = False
else:
error = "No global default destination specified in config!"
if verbose:
log.debug(error)
valid_config = False
if 'users' in obj:
if isinstance(obj['users'], dict):
for user in obj['users']:
curr = obj['users'][user]
if isinstance(curr, dict):
if 'priority' in curr and isinstance(curr['priority'], str):
if curr['priority'] in priority_list:
new_config['users'][user]['priority'] = curr['priority']
else:
error = ("User '" + user + "', priority '"
+ str(curr['priority']) + "' is not defined "
+ "in the global default_destination section")
suggestion = get_typo_correction(curr['priority'],
priority_list, max_edit_dist)
if suggestion:
error += " Did you mean '" + str(suggestion) + "'?"
if verbose:
log.debug(error)
valid_config = False
else:
error = "User '" + user + "' is missing a priority!"
if verbose:
log.debug(error)
valid_config = False
else:
error = "User '" + user + "' is missing a priority!"
if verbose:
log.debug(error)
valid_config = False
else:
error = "Users option is not a dictionary!"
if verbose:
log.debug(error)
valid_config = False
if 'tools' in obj:
for tool in obj['tools']:
curr = obj['tools'][tool]
# This check is to make sure we have a tool name, and not just
# rules right way.
if not isinstance(curr, list):
curr_tool_rules = []
if curr is not None:
# in each tool, there should always be only 2 sub-categories:
# default_destination (not mandatory) and rules (mandatory)
if "default_destination" in curr:
suggestion = None
if isinstance(curr['default_destination'], str):
is_valid = validate_destination(app,
curr['default_destination'],
dest_err_tool_default_dest,
(tool, curr['default_destination']))
if is_valid:
new_config['tools'][tool]['default_destination'] = (
(curr['default_destination']))
tool_has_default = True
else:
valid_config = False
elif isinstance(curr['default_destination'], dict):
if ('priority' in curr['default_destination']
and isinstance(curr['default_destination']['priority'], dict)):
for priority in curr['default_destination']['priority']:
destination = curr['default_destination']['priority'][priority]
if priority in priority_list:
if isinstance(destination, str):
is_valid = validate_destination(
app, destination,
dest_err_tool_default_dest,
(tool, curr['default_destination']['priority'][priority]))
if is_valid:
new_config['tools'][tool]['default_destination']['priority'][priority] = destination
tool_has_default = True
else:
valid_config = False
else:
error = ("No default '" + str(priority)
+ "' priority destination for tool "
+ str(tool) + " in config!")
if verbose:
log.debug(error)
valid_config = False
else:
error = ("Invalid default destination priority '"
+ str(priority) + "' for '" + str(tool)
+ "'.")
suggestion = get_typo_correction(priority,
priority_list, max_edit_dist)
if suggestion:
error += " Did you mean '" + str(suggestion) + "'?"
if verbose:
log.debug(error)
valid_config = False
else:
error = "No default priority destinations specified"
error += " for " + str(tool) + " in config!"
if verbose:
log.debug(error)
valid_config = False
if "rules" in curr and isinstance(curr['rules'], list):
# under rules, there should only be a list of rules
curr_tool = curr
counter = 0
for rule in curr_tool['rules']:
if "rule_type" in rule:
if rule['rule_type'] in available_rule_types:
validated_rule = None
counter += 1
# if we're only interested in the result of
# the validation, then only retrieve the
# result
if return_bool:
valid_rule = RuleValidator.validate_rule(
rule['rule_type'], app, return_bool,
rule, counter, tool)
# otherwise, retrieve the processed rule
else:
validated_rule = (
RuleValidator.validate_rule(
rule['rule_type'],
app, return_bool,
rule, counter, tool))
# if the result we get is False, then
# indicate that the whole config is invalid
if not valid_rule:
valid_config = False
# if we got a rule back that seems to be
# valid (or was fixable) then append it to
# list of ready-to-use tools
if (not return_bool and
validated_rule is not None):
curr_tool_rules.append(
copy.deepcopy(validated_rule))
# if rule['rule_type'] in available_rule_types
else:
error = "Unrecognized rule_type '"
error += rule['rule_type'] + "' "
error += "found in '" + str(tool) + "'. "
if not return_bool:
error += "Ignoring..."
if verbose:
log.debug(error)
valid_config = False
# if "rule_type" in rule
else:
counter += 1
error = "No rule_type found for rule "
error += str(counter)
error += " in '" + str(tool) + "'."
if verbose:
log.debug(error)
valid_config = False
# if "rules" in curr and isinstance(curr['rules'], list):
elif not tool_has_default:
valid_config = False
error = "Tool '" + str(tool) + "' does not have"
error += " rules nor a default_destination!"
if verbose:
log.debug(error)
# if obj['tools'][tool] is not None:
else:
valid_config = False
error = "Config section for tool '" + str(tool) + "' is blank!"
if verbose:
log.debug(error)
if curr_tool_rules:
new_config['tools'][str(tool)]['rules'] = curr_tool_rules
# if not isinstance(curr, list)
else:
error = "Malformed YML; expected job name, "
error += "but found a list instead!"
if verbose:
log.debug(error)
valid_config = False
# quickly run through categories to detect unrecognized types
for category in obj.keys():
if category not in valid_categories:
error = "Unrecognized category '" + category
error += "' found in config file!"
if verbose:
log.debug(error)
valid_config = False
# if obj is not None
else:
if verbose:
log.debug("No (or empty) config file supplied!")
valid_config = False
if not return_bool:
if verbose:
log.debug("Finished config validation.")
if return_bool:
return valid_config
else:
return new_config
[docs]def bytes_to_str(size, unit="YB"):
'''
Uses the bi convention: 1024 B = 1 KB since this method primarily
has inputs of bytes for RAM
@type size: int
@param size: the size in int (bytes) to be converted to str
@rtype: str
@return return_str: the resulting string
'''
# converts size in bytes to most readable unit
units = ["B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]
i = 0
# mostly called in order to convert "infinity"
try:
size_changer = int(size)
except ValueError:
error = "bytes_to_str passed uncastable non numeric value "
raise ValueError(error + str(size))
try:
upto = units.index(unit.strip().upper())
except ValueError:
upto = 9
while size_changer >= 1024 and i < upto:
size_changer = size_changer / 1024.0
i += 1
if size_changer == -1:
size_changer = "Infinity"
i = 0
try:
return_str = "%.2f %s" % (size_changer, units[i])
except TypeError:
return_str = "%s" % (size_changer)
return return_str
[docs]def str_to_bytes(size):
'''
Uses the bi convention: 1024 B = 1 KB since this method primarily
has inputs of bytes for RAM
@type size: str
@param size: the size in str to be converted to int (bytes)
@rtype: int
@return curr_size: the resulting size converted from str
'''
units = ["", "b", "kb", "mb", "gb", "tb", "pb", "eb", "zb", "yb"]
curr_size = size
try:
if size.lower() != "infinity":
# Get the number
try:
curr_item = size.strip().split(" ")
curr_size = "".join(curr_item)
curr_size = int(curr_size)
except ValueError:
curr_item = size.strip().split(" ")
curr_unit = curr_item[-1].strip().lower()
curr_item = curr_item[0:-1]
curr_size = "".join(curr_item)
try:
curr_size = float(curr_size)
except ValueError:
error = "Unable to convert size " + str(size)
raise MalformedYMLException(error)
# Get the unit and convert to bytes
try:
pos = units.index(curr_unit)
for x in range(pos, 1, -1):
curr_size *= 1024
except ValueError:
error = "Unable to convert size " + str(size)
raise MalformedYMLException(error)
except (UnboundLocalError, NameError):
pass
else:
curr_size = -1
except AttributeError:
# If size is not a string (doesn't have .lower())
pass
return curr_size
[docs]def importer(test):
"""
Uses Mock galaxy for testing or real galaxy for production
@type test: bool
@param test: True when being run from a test
"""
global JobDestination
global JobMappingException
if test:
class JobDestination(object):
def __init__(self, *kwd):
self.id = kwd.get('id')
self.nativeSpec = kwd.get('params')['nativeSpecification']
self.runner = kwd.get('runner')
from galaxy.jobs.mapper import JobMappingException
else:
from galaxy.jobs import JobDestination
from galaxy.jobs.mapper import JobMappingException
[docs]def map_tool_to_destination(
job, app, tool, user_email, test=False, path=None, job_conf_path=None):
"""
Dynamically allocate resources
@param job: galaxy job
@param app: current app
@param tool: current tool
@type test: bool
@param test: True when running in test mode
@type path: str
@param path: path to tool_destinations.yml
@type job_conf_path: str
@param job_conf_path: path to job_conf.xml
"""
importer(test)
# set verbose to True by default, just in case (some tests fail without
# this due to how the tests apparently work)
global verbose
verbose = True
filesize_rule_present = False
num_input_datasets_rule_present = False
records_rule_present = False
# Get configuration from tool_destinations.yml and job_conf.xml
if path is None:
path = app.config.tool_destinations_config_file
if job_conf_path is None:
job_conf_path = app.config.job_config_file
try:
config = parse_yaml(path, job_conf_path, app)
except MalformedYMLException as e:
raise JobMappingException(e)
# Get all inputs from tool and databases
inp_data = dict([(da.name, da.dataset) for da in job.input_datasets])
inp_data.update([(da.name, da.dataset) for da in job.input_library_datasets])
if config is not None and str(tool.old_id) in config['tools']:
if 'rules' in config['tools'][str(tool.old_id)]:
for rule in config['tools'][str(tool.old_id)]['rules']:
if rule["rule_type"] == "file_size":
filesize_rule_present = True
if rule["rule_type"] == "num_input_datasets":
num_input_datasets_rule_present = True
if rule["rule_type"] == "records":
records_rule_present = True
file_size = 0
records = 0
num_input_datasets = 0
if filesize_rule_present or records_rule_present or num_input_datasets_rule_present:
# Loop through the database and look for amount of records
try:
for line in inp_db:
if line[0] == ">":
records += 1
except NameError:
pass
# Loops through each input file and adds the size to the total
# or looks through db for records
for da in inp_data:
try:
# If the input is a file, check and add the size
if inp_data[da] is not None and os.path.isfile(inp_data[da].file_name):
num_input_datasets += 1
if verbose:
message = "Loading file: " + str(da)
message += str(inp_data[da].file_name)
log.debug(message)
# Add to records if the file type is fasta
if inp_data[da].ext == "fasta":
if records_rule_present:
inp_db = open(inp_data[da].file_name)
# Try to find automatically computed sequences
metadata = inp_data[da].get_metadata()
try:
records += int(metadata.get("sequences"))
except (TypeError, KeyError):
for line in inp_db:
if line[0] == ">":
records += 1
if filesize_rule_present:
query_file = str(inp_data[da].file_name)
file_size += os.path.getsize(query_file)
except AttributeError:
# Otherwise, say that input isn't a file
if verbose:
log.debug("Not a file: " + str(inp_data[da]))
if verbose:
if filesize_rule_present:
log.debug("Total size: " + bytes_to_str(file_size))
if records_rule_present:
log.debug("Total amount of records: " + str(records))
if num_input_datasets_rule_present:
log.debug("Total number of files: " + str(num_input_datasets))
matched_rule = None
user_authorized = None
rule_counter = 0
# For each different rule for the tool that's running
fail_message = None
if fail_message is not None:
destination = "fail"
elif config is not None:
# Get the default priority from the config if necessary.
# If there isn't one, choose an arbitrary one as a fallback
if "default_destination" in config:
if isinstance(config['default_destination'], dict):
if 'default_priority' in config:
default_priority = config['default_priority']
priority = default_priority
else:
if len(priority_list) > 0:
default_priority = next(iter(priority_list))
priority = default_priority
error = ("No default priority found, arbitrarily setting '"
+ default_priority + "' as the default priority."
+ " Things may not work as expected!")
if verbose:
log.debug(error)
# fetch priority information from workflow/job parameters
job_parameter_list = job.get_parameters()
workflow_params = None
job_params = None
if job_parameter_list is not None:
for param in job_parameter_list:
if param.name == "__workflow_resource_params__":
workflow_params = param.value
if param.name == "__job_resource":
job_params = param.value
# Priority coming from workflow invocation takes precedence over job specific priorities
if workflow_params is not None:
resource_params = json.loads(workflow_params)
if 'priority' in resource_params:
# For by_group mapping, this priority has already been validated when the
# request was created.
if resource_params['priority'] is not None:
priority = resource_params['priority']
elif job_params is not None:
resource_params = json.loads(job_params)
if 'priority' in resource_params:
if resource_params['priority'] is not None:
priority = resource_params['priority']
# get the user's priority
if "users" in config:
if user_email in config["users"]:
priority = config["users"][user_email]["priority"]
if "default_destination" in config:
if isinstance(config['default_destination'], str):
destination = config['default_destination']
else:
if priority in config['default_destination']['priority']:
destination = config['default_destination']['priority'][priority]
elif default_priority in config['default_destination']['priority']:
destination = (config['default_destination']['priority'][default_priority])
config = config['tools']
if str(tool.old_id) in config:
if 'rules' in config[str(tool.old_id)]:
for rule in config[str(tool.old_id)]['rules']:
rule_counter += 1
user_authorized = False
if 'users' in rule and isinstance(rule['users'], list):
if user_email in rule['users']:
user_authorized = True
else:
user_authorized = True
if user_authorized:
matched = False
if rule["rule_type"] == "file_size":
# bounds comparisons
upper_bound = str_to_bytes(rule["upper_bound"])
lower_bound = str_to_bytes(rule["lower_bound"])
if upper_bound == -1:
if lower_bound <= file_size:
matched = True
else:
if (lower_bound <= file_size and file_size < upper_bound):
matched = True
elif rule["rule_type"] == "num_input_datasets":
# bounds comparisons
upper_bound = rule["upper_bound"]
lower_bound = rule["lower_bound"]
if upper_bound == "Infinity":
if lower_bound <= num_input_datasets:
matched = True
else:
if (lower_bound <= num_input_datasets and num_input_datasets < upper_bound):
matched = True
elif rule["rule_type"] == "records":
# bounds comparisons
upper_bound = str_to_bytes(rule["upper_bound"])
lower_bound = str_to_bytes(rule["lower_bound"])
if upper_bound == -1:
if lower_bound <= records:
matched = True
else:
if lower_bound <= records and records < upper_bound:
matched = True
elif rule["rule_type"] == "arguments":
options = job.get_param_values(app)
matched = True
# check if the args in the config file are available
for arg in rule["arguments"]:
arg_dict = {arg: rule["arguments"][arg]}
arg_keys_list = []
get_keys_from_dict(arg_dict, arg_keys_list)
try:
options_value = reduce(dict.__getitem__, arg_keys_list, options)
arg_value = reduce(dict.__getitem__, arg_keys_list, arg_dict)
if (arg_value != options_value):
matched = False
except KeyError:
matched = False
if verbose:
error = "Argument '" + str(arg)
error += "' not recognized!"
log.debug(error)
# if we matched a rule
if matched:
if (matched_rule is None or rule["nice_value"]
< matched_rule["nice_value"]):
matched_rule = rule
# if user_authorized
else:
if verbose:
error = "User email '" + str(user_email) + "' not "
error += "specified in list of authorized users for "
error += "rule " + str(rule_counter) + " in tool '"
error += str(tool.old_id) + "'! Ignoring rule."
log.debug(error)
# if str(tool.old_id) in config
else:
error = "Tool '" + str(tool.old_id) + "' not specified in config. "
error += "Using default destination."
if verbose:
log.debug(error)
if matched_rule is None:
if "default_destination" in config[str(tool.old_id)]:
default_tool_destination = (config[str(tool.old_id)]['default_destination'])
if isinstance(default_tool_destination, str):
destination = default_tool_destination
else:
if priority in default_tool_destination['priority']:
destination = default_tool_destination['priority'][priority]
elif default_priority in default_tool_destination['priority']:
destination = (default_tool_destination['priority'][default_priority])
# else global default destination is used
else:
if isinstance(matched_rule["destination"], str):
destination = matched_rule["destination"]
else:
if priority in matched_rule["destination"]["priority"]:
destination = matched_rule["destination"]["priority"][priority]
elif default_priority in matched_rule["destination"]["priority"]:
destination = (matched_rule["destination"]["priority"][default_priority])
# else global default destination is used
# if "default_destination" in config
else:
destination = "fail"
fail_message = "Job '" + str(tool.old_id) + "' failed; "
fail_message += "no global default destination specified in config!"
# if fail_message is not None
# elif config is not None
else:
destination = "fail"
fail_message = "No config file supplied!"
if destination == "fail":
if fail_message:
raise JobMappingException(fail_message)
else:
raise JobMappingException(matched_rule["fail_message"])
if config is not None:
if destination == "fail":
output = "An error occurred: " + fail_message
log.debug(output)
else:
output = "Running '" + str(tool.old_id) + "' with '"
output += destination + "'."
log.debug(output)
return destination
[docs]def get_destination_list_from_job_config(job_config_location):
"""
returns A list of all destination IDs declared in the job configuration
@type job_config_location: str
@param job_config_location: The location of the job config file relative
to the galaxy root directory. If NoneType, defaults to
galaxy/config/job_conf.xml,
galaxy/config/job_conf.xml.sample_advanced, or
galaxy/config/job_conf.xml.sample_basic
(first one that exists)
@rtype: list
@return: A list of all of the destination IDs declared in the job
configuration file.
"""
global destination_list
# os.path.realpath gets the path of DynamicToolDestination.py
# and then os.path.join is used to go back four directories
config_location = os.path.join(
os.path.dirname(os.path.realpath(__file__)), '../../..')
if job_config_location:
local_path = re.compile('^/config/.+$')
if local_path.match(job_config_location):
job_config_location = config_location + job_config_location
else: # Pick one of the default ones
message = "* No job config specified, "
if os.path.isfile(config_location + "/config/job_conf.xml"):
job_config_location = config_location + "/config/job_conf.xml"
message += "using 'config/job_conf.xml'. *"
elif os.path.isfile(config_location +
"/config/job_conf.xml.sample_advanced"):
job_config_location = (config_location
+ "/config/job_conf.xml.sample_advanced")
message += "using 'config/job_conf.xml.sample_advanced'. *"
elif os.path.isfile(config_location +
"/config/job_conf.xml.sample_basic"):
job_config_location = (config_location
+ "/config/job_conf.xml.sample_basic")
message += "using 'config/job_conf.xml.sample_basic'. *"
else:
message += ("and no default job configs in 'config/'. "
+ "Expect lots of failures. *")
if verbose:
log.debug(message)
if job_config_location:
job_conf = parse_xml(job_config_location, strip_whitespace=False)
# Add all destination IDs from the job configuration xml file
for destination in job_conf.getroot().iter("destination"):
if isinstance(destination.get("id"), str):
destination_list.add(destination.get("id"))
else:
error = "Destination ID '" + str(destination)
error += "' in job configuration file cannot be"
error += " parsed. Things may not work as expected!"
log.debug(error)
return destination_list
[docs]def get_edit_distance(source, target):
"""
returns the edit distance (levenshtein distance) between two strings.
code from:
en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance
@type str1: str
@param str1: The first string
@type str2: str
@param str2: The second string
@rtype: int
@return: The edit distance between str1 and str2
"""
if len(source) < len(target):
return get_edit_distance(target, source)
# So now we have len(source) >= len(target).
if len(target) == 0:
return len(source)
# We call tuple() to force strings to be used as sequences
# ('c', 'a', 't', 's') - numpy uses them as values by default.
source = np.array(tuple(source))
target = np.array(tuple(target))
# We use a dynamic programming algorithm, but with the
# added optimization that we only need the last two rows
# of the matrix.
previous_row = np.arange(target.size + 1)
for s in source:
# Insertion (target grows longer than source):
current_row = previous_row + 1
# Substitution or matching:
# Target and source items are aligned, and either
# are different (cost of 1), or are the same (cost of 0).
current_row[1:] = np.minimum(
current_row[1:],
np.add(previous_row[:-1], target != s))
# Deletion (target grows shorter than source):
current_row[1:] = np.minimum(
current_row[1:],
current_row[0:-1] + 1)
previous_row = current_row
return previous_row[-1]
[docs]def get_typo_correction(typo_str, word_set, max_dist):
"""
returns the string in a set that closest matches the
input string, as long as the edit distance between them
is equal to or smaller than a value, or the words are
the same when case is not considered. If there are no
appropriate matches, nothing is returned instead.
@type typo_str: str
@param typo_str: The string to be compared
@type word_set: set of str
@param word_set: The set of strings to compare to
@type max_dist: int
@param max_dist: the largest allowed edit distance between
the word and the result. If nothing is
within this range, nothing is returned
@rtype: str or NoneType
@return: The closest matching string, or None, if no strings
being compared to are within max_dist edit distance.
"""
# Start curr_best out as the largest
# edit distance we will tolerate plus one
curr_best = max_dist + 1
suggestion = None
for valid_word in word_set:
# If we've already found a best match,
# don't bother checking anything else.
if curr_best > 0:
if typo_str.lower() == valid_word.lower():
# if something matches when case insensitive,
# it is automatically set as the best
suggestion = valid_word
curr_best = 0
else:
edit_distance = get_edit_distance(typo_str, valid_word)
if edit_distance < curr_best:
suggestion = valid_word
curr_best = edit_distance
return suggestion
if __name__ == '__main__':
"""
This function is responsible for running the app if directly run through
the commandline. It offers the ability to specify a config through the
commandline for checking whether or not it is a valid config. It's to be
run from within Galaxy, assuming it is installed correctly within the
proper directories in Galaxy, and it looks for the config file in
galaxy/config/. It can also be run with a path pointing to a config file if
not being run directly from inside Galaxy install directory.
"""
verbose = True
parser = argparse.ArgumentParser()
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
parser.add_argument(
'-c', '--check-config', dest='check_config', nargs='?',
help='Use this option to validate tool_destinations.yml.'
+ ' Optionally, provide the path to the tool_destinations.yml'
+ ' that you would like to check, and/or the path to the related'
+ ' job_conf.xml. Default: galaxy/config/tool_destinations.yml'
+ 'and galaxy/config/job_conf.xml')
parser.add_argument(
'-j', '--job-config', dest='job_config')
parser.add_argument(
'-V', '--version', action='version', version="%(prog)s " + __version__)
args = parser.parse_args()
# if run with no arguments, display the help message
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
job_config_location = args.job_config
if args.check_config:
valid_config = parse_yaml(path=args.check_config,
job_conf_path=job_config_location,
return_bool=True)
else:
valid_config = parse_yaml(path="/config/tool_destinations.yml",
job_conf_path=job_config_location,
return_bool=True)
if valid_config:
print("Configuration is valid!")
else:
print("Errors detected; config not valid!")