Warning

This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.jobs.dynamic_tool_destination

from __future__ import print_function

import argparse
import collections
import copy
import logging
import os
import re
import sys
from functools import reduce

from yaml import load

__version__ = '1.0.0'

# log to galaxy's logger
log = logging.getLogger(__name__)

# does a lot more logging when set to true
verbose = True


[docs]class MalformedYMLException(Exception): pass
[docs]class ScannerError(Exception): pass
[docs]def get_keys_from_dict(dl, keys_list): """ This function builds a list using the keys from nest dictionaries """ if isinstance(dl, dict): keys_list += dl.keys() map(lambda x: get_keys_from_dict(x, keys_list), dl.values()) elif isinstance(dl, list): map(lambda x: get_keys_from_dict(x, keys_list), dl)
[docs]class RuleValidator: """ This class is the primary facility for validating configs. It's always called in map_tool_to_destination and it's called for validating config directly through DynamicToolDestination.py """
[docs] @classmethod def validate_rule(cls, rule_type, return_bool=False, *args, **kwargs): """ This function is responsible for passing each rule to its relevant function. @type rule_type: str @param rule_type: the current rule's type @type return_bool: bool @param return_bool: True when we are only interested in the result of the validation, and not the validated rule itself. @rtype: bool, dict (depending on return_bool) @return: validated rule or result of validation (depending on return_bool) """ if rule_type == 'file_size': return cls.__validate_file_size_rule(return_bool, *args, **kwargs) elif rule_type == 'num_input_datasets': return cls.__validate_num_input_datasets_rule(return_bool, *args, **kwargs) elif rule_type == 'records': return cls.__validate_records_rule(return_bool, *args, **kwargs) elif rule_type == 'arguments': return cls.__validate_arguments_rule(return_bool, *args, **kwargs)
@classmethod def __validate_file_size_rule( cls, return_bool, original_rule, counter, tool): """ This function is responsible for validating 'file_size' rules. @type return_bool: bool @param return_bool: True when we are only interested in the result of the validation, and not the validated rule itself. @type original_rule: dict @param original_rule: contains the original received rule @type counter: int @param counter: this counter is used to identify what rule # is currently being validated. Necessary for log output. @type tool: str @param tool: the name of the current tool. Necessary for log output. @rtype: bool, dict (depending on return_bool) @return: validated rule or result of validation (depending on return_bool) """ rule = copy.deepcopy(original_rule) valid_rule = True # Users Verification # if rule is not None: valid_rule, rule = cls.__validate_users( valid_rule, return_bool, rule, tool, counter) # Nice_value Verification # if rule is not None: valid_rule, rule = cls.__validate_nice_value( valid_rule, return_bool, rule, tool, counter) # Destination Verification # if rule is not None: valid_rule, rule = cls.__validate_destination( valid_rule, return_bool, rule, tool, counter) # Bounds Verification # if rule is not None: valid_rule, rule = cls.__validate_bounds( valid_rule, return_bool, rule, tool, counter) if return_bool: return valid_rule else: return rule @classmethod def __validate_num_input_datasets_rule( cls, return_bool, original_rule, counter, tool): """ This function is responsible for validating 'num_input_datasets' rules. @type return_bool: bool @param return_bool: True when we are only interested in the result of the validation, and not the validated rule itself. @type original_rule: dict @param original_rule: contains the original received rule @type counter: int @param counter: this counter is used to identify what rule # is currently being validated. Necessary for log output. @type tool: str @param tool: the name of the current tool. Necessary for log output. @rtype: bool, dict (depending on return_bool) @return: validated rule or result of validation (depending on return_bool) """ rule = copy.deepcopy(original_rule) valid_rule = True # Users Verification # if rule is not None: valid_rule, rule = cls.__validate_users( valid_rule, return_bool, rule, tool, counter) # Nice_value Verification # if rule is not None: valid_rule, rule = cls.__validate_nice_value( valid_rule, return_bool, rule, tool, counter) # Destination Verification # if rule is not None: valid_rule, rule = cls.__validate_destination( valid_rule, return_bool, rule, tool, counter) # Bounds Verification # if rule is not None: valid_rule, rule = cls.__validate_bounds( valid_rule, return_bool, rule, tool, counter) if return_bool: return valid_rule else: return rule @classmethod def __validate_records_rule(cls, return_bool, original_rule, counter, tool): """ This function is responsible for validating 'records' rules. @type return_bool: bool @param return_bool: True when we are only interested in the result of the validation, and not the validated rule itself. @type original_rule: dict @param original_rule: contains the original received rule @type counter: int @param counter: this counter is used to identify what rule # is currently being validated. Necessary for log output. @type tool: str @param tool: the name of the current tool. Necessary for log output. @rtype: bool, dict (depending on return_bool) @return: validated rule or result of validation (depending on return_bool) """ rule = copy.deepcopy(original_rule) valid_rule = True # Users Verification # if rule is not None: valid_rule, rule = cls.__validate_users( valid_rule, return_bool, rule, tool, counter) # Nice_value Verification # if rule is not None: valid_rule, rule = cls.__validate_nice_value( valid_rule, return_bool, rule, tool, counter) # Destination Verification # if rule is not None: valid_rule, rule = cls.__validate_destination( valid_rule, return_bool, rule, tool, counter) # Bounds Verification # if rule is not None: valid_rule, rule = cls.__validate_bounds( valid_rule, return_bool, rule, tool, counter) if return_bool: return valid_rule else: return rule @classmethod def __validate_arguments_rule( cls, return_bool, original_rule, counter, tool): """ This is responsible for validating 'arguments' rules. @type return_bool: bool @param return_bool: True when we are only interested in the result of the validation, and not the validated rule itself. @type original_rule: dict @param original_rule: contains the original received rule @type counter: int @param counter: this counter is used to identify what rule # is currently being validated. Necessary for log output. @type tool: str @param tool: the name of the current tool. Necessary for log output. @rtype: bool, dict (depending on return_bool) @return: validated rule or result of validation (depending on return_bool) """ rule = copy.deepcopy(original_rule) valid_rule = True # Users Verification # if rule is not None: valid_rule, rule = cls.__validate_users( valid_rule, return_bool, rule, tool, counter) # Nice_value Verification # if rule is not None: valid_rule, rule = cls.__validate_nice_value( valid_rule, return_bool, rule, tool, counter) # Destination Verification # if rule is not None: valid_rule, rule = cls.__validate_destination( valid_rule, return_bool, rule, tool, counter) # Arguments Verification (for rule_type arguments; read comment block at top # of function for clarification. if rule is not None: valid_rule, rule = cls.__validate_arguments( valid_rule, return_bool, rule, tool, counter) if return_bool: return valid_rule else: return rule @classmethod def __validate_nice_value(cls, valid_rule, return_bool, rule, tool, counter): """ This function is responsible for validating nice_value. @type return_bool: bool @param return_bool: True when we are only interested in the result of the validation, and not the validated rule itself. @type valid_rule: bool @param valid_rule: returns True if everything is valid. False if it encounters any abnormalities in the config. @type original_rule: dict @param original_rule: contains the original received rule @type counter: int @param counter: this counter is used to identify what rule # is currently being validated. Necessary for log output. @type tool: str @param tool: the name of the current tool. Necessary for log output. @rtype: bool, dict (tuple) @return: validated rule and result of validation """ if "nice_value" in rule: if rule["nice_value"] < -20 or rule["nice_value"] > 20: error = "nice_value goes from -20 to 20; rule " + str(counter) error += " in '" + str(tool) + "' has a nice_value of '" error += str(rule["nice_value"]) + "'." if not return_bool: error += " Setting nice_value to 0." rule["nice_value"] = 0 if verbose: log.debug(error) valid_rule = False else: error = "No nice_value found for rule " + str(counter) + " in '" + str(tool) error += "'." if not return_bool: error += " Setting nice_value to 0." rule["nice_value"] = 0 if verbose: log.debug(error) valid_rule = False return valid_rule, rule @classmethod def __validate_destination(cls, valid_rule, return_bool, rule, tool, counter): """ This function is responsible for validating destination. @type return_bool: bool @param return_bool: True when we are only interested in the result of the validation, and not the validated rule itself. @type valid_rule: bool @param valid_rule: returns True if everything is valid. False if it encounters any abnormalities in the config. @type original_rule: dict @param original_rule: contains the original received rule @type counter: int @param counter: this counter is used to identify what rule # is currently being validated. Necessary for log output. @type tool: str @param tool: the name of the current tool. Necessary for log output. @rtype: bool, dict (tuple) @return: validated rule and result of validation """ if "fail_message" in rule: if "destination" not in rule or rule['destination'] != "fail": error = "Found a fail_message for rule " + str(counter) error += " in '" + str(tool) + "', but destination is not 'fail'!" if not return_bool: error += " Setting destination to 'fail'." if verbose: log.debug(error) valid_rule = False rule["destination"] = "fail" if "destination" in rule: if isinstance(rule["destination"], str): if rule["destination"] == "fail" and "fail_message" not in rule: error = "Missing a fail_message for rule " + str(counter) error += " in '" + str(tool) + "'." if not return_bool: error += " Adding generic fail_message." message = "Invalid parameters for rule " + str(counter) message += " in '" + str(tool) + "'." rule["fail_message"] = message if verbose: log.debug(error) valid_rule = False elif isinstance(rule["destination"], dict): if ("priority" in rule["destination"] and isinstance(rule["destination"]["priority"], dict)): if "med" not in rule["destination"]["priority"]: error = "No 'med' priority destination for rule " + str(counter) error += " in '" + str(tool) + "'." if not return_bool: error += " Ignoring..." if verbose: log.debug(error) valid_rule = False else: for priority in rule["destination"]["priority"]: if priority not in ["low", "med", "high"]: error = "Invalid priority destination '" + str(priority) error += "' for rule " + str(counter) error += " in '" + str(tool) + "'." if not return_bool: error += " Ignoring..." if verbose: log.debug(error) valid_rule = False elif not isinstance(rule["destination"]["priority"][priority], str): error = "No '" + str(priority) error += "'priority destination for rule " + str(counter) error += " in '" + str(tool) + "'." if not return_bool: error += " Ignoring..." if verbose: log.debug(error) valid_rule = False else: error = "No destination specified for rule " + str(counter) error += " in '" + str(tool) + "'." if not return_bool: error += " Ignoring..." if verbose: log.debug(error) valid_rule = False else: error = "No destination specified for rule " + str(counter) error += " in '" + str(tool) + "'." if not return_bool: error += " Ignoring..." if verbose: log.debug(error) valid_rule = False else: error = "No destination specified for rule " + str(counter) error += " in '" + str(tool) + "'." if not return_bool: error += " Ignoring..." if verbose: log.debug(error) valid_rule = False return valid_rule, rule @classmethod def __validate_bounds(cls, valid_rule, return_bool, rule, tool, counter): """ This function is responsible for validating bounds. @type return_bool: bool @param return_bool: True when we are only interested in the result of the validation, and not the validated rule itself. @type valid_rule: bool @param valid_rule: returns True if everything is valid. False if it encounters any abnormalities in the config. @type original_rule: dict @param original_rule: contains the original received rule @type counter: int @param counter: this counter is used to identify what rule # is currently being validated. Necessary for log output. @type tool: str @param tool: the name of the current tool. Necessary for log output. @rtype: bool/None, dict (tuple) @return: validated rule (or None if invalid) and result of validation """ if "upper_bound" in rule and "lower_bound" in rule: if rule["rule_type"] == "file_size": upper_bound = str_to_bytes(rule["upper_bound"]) lower_bound = str_to_bytes(rule["lower_bound"]) else: upper_bound = rule["upper_bound"] lower_bound = rule["lower_bound"] if lower_bound == "Infinity": error = "Error: lower_bound is set to Infinity, but must be lower than " error += "upper_bound!" if not return_bool: error += " Setting lower_bound to 0!" lower_bound = 0 rule["lower_bound"] = 0 if verbose: log.debug(error) valid_rule = False if upper_bound == "Infinity": upper_bound = -1 if upper_bound != -1 and lower_bound > upper_bound: error = "lower_bound exceeds upper_bound for rule " + str(counter) error += " in '" + str(tool) + "'." if not return_bool: error += " Reversing bounds." temp_upper_bound = rule["upper_bound"] temp_lower_bound = rule["lower_bound"] rule["upper_bound"] = temp_lower_bound rule["lower_bound"] = temp_upper_bound if verbose: log.debug(error) valid_rule = False else: error = "Missing bounds for rule " + str(counter) error += " in '" + str(tool) + "'." if not return_bool: error += " Ignoring rule." rule = None if verbose: log.debug(error) valid_rule = False return valid_rule, rule @classmethod def __validate_arguments(cls, valid_rule, return_bool, rule, tool, counter): """ This function is responsible for validating arguments. @type return_bool: bool @param return_bool: True when we are only interested in the result of the validation, and not the validated rule itself. @type valid_rule: bool @param valid_rule: returns True if everything is valid. False if it encounters any abnormalities in the config. @type original_rule: dict @param original_rule: contains the original received rule @type counter: int @param counter: this counter is used to identify what rule # is currently being validated. Necessary for log output. @type tool: str @param tool: the name of the current tool. Necessary for log output. @rtype: bool/None, dict (tuple) @return: validated rule (or None if invalid) and result of validation """ if "arguments" not in rule or not isinstance(rule["arguments"], dict): error = "No arguments found for rule " + str(counter) + " in '" error += str(tool) + "' despite being of type arguments." if not return_bool: error += " Ignoring rule." rule = None if verbose: log.debug(error) valid_rule = False return valid_rule, rule @classmethod def __validate_users(cls, valid_rule, return_bool, rule, tool, counter): """ This function is responsible for validating users (if present). @type return_bool: bool @param return_bool: True when we are only interested in the result of the validation, and not the validated rule itself. @type valid_rule: bool @param valid_rule: returns True if everything is valid. False if it encounters any abnormalities in the config. @type original_rule: dict @param original_rule: contains the original received rule @type counter: int @param counter: this counter is used to identify what rule # is currently being validated. Necessary for log output. @type tool: str @param tool: the name of the current tool. Necessary for log output. @rtype: bool, dict (tuple) @return: validated rule and result of validation """ emailregex = "^[A-Za-z0-9\.\+_-]+@[A-Za-z0-9\._-]+\.[a-zA-Z]*$" if "users" in rule: if isinstance(rule["users"], list): for user in reversed(rule["users"]): if not isinstance(user, str): error = "Entry '" + str(user) + "' in users for rule " error += str(counter) + " in tool '" + str(tool) + "' is in an " error += "invalid format!" if not return_bool: error += " Ignoring entry." if verbose: log.debug(error) valid_rule = False rule["users"].remove(user) else: if re.match(emailregex, user) is None: error = "Supplied email '" + str(user) + "' for rule " error += str(counter) + " in tool '" + str(tool) + "' is in " error += "an invalid format!" if not return_bool: error += " Ignoring email." if verbose: log.debug(error) valid_rule = False rule["users"].remove(user) else: error = "Couldn't find a list under 'users:'!" if not return_bool: error += " Ignoring rule." rule = None if verbose: log.debug(error) valid_rule = False # post-processing checking to make sure we didn't just remove all the users # if we did, we should ignore the rule if rule is not None and rule["users"] is not None and len(rule["users"]) == 0: error = "No valid user emails were specified for rule " + str(counter) error += " in tool '" + str(tool) + "'!" if not return_bool: error += " Ignoring rule." rule = None if verbose: log.debug(error) valid_rule = False return valid_rule, rule
[docs]def parse_yaml(path="/config/tool_destinations.yml", test=False, return_bool=False): """ Get a yaml file from path and send it to validate_config for validation. @type path: str @param path: the path to the config file @type test: bool @param test: indicates whether to run in test mode or production mode @type return_bool: bool @param return_bool: True when we are only interested in the result of the validation, and not the validated rule itself. @rtype: bool, dict (depending on return_bool) @return: validated rule or result of validation (depending on return_bool) """ # Import file from path try: if test: config = load(path) else: if path == "/config/tool_destinations.yml": # os.path.realpath gets the path of DynamicToolDestination.py # and then os.path.join is used to go back four directories config_directory = os.path.join( os.path.dirname(os.path.realpath(__file__)), '../../../..') opt_file = config_directory + path else: opt_file = path with open(opt_file, 'r') as stream: config = load(stream) # Test imported file try: if return_bool: valid_config = validate_config(config, return_bool) else: config = validate_config(config) except MalformedYMLException: if verbose: log.error(str(sys.exc_value)) raise except ScannerError: if verbose: log.error("Config is too malformed to fix!") raise if return_bool: return valid_config else: return config
[docs]def validate_config(obj, return_bool=False): """ Validate received config. @type obj: dict @param obj: the entire contents of the config @type return_bool: bool @param return_bool: True when we are only interested in the result of the validation, and not the validated rule itself. @rtype: bool, dict (depending on return_bool) @return: validated rule or result of validation (depending on return_bool) """ def infinite_defaultdict(): return collections.defaultdict(infinite_defaultdict) # Allow new_config to expand automatically when adding values to new levels new_config = infinite_defaultdict() global verbose verbose = False valid_config = True valid_rule = True tool_has_default = False if return_bool: verbose = True elif obj is not None and 'verbose' in obj and isinstance(obj['verbose'], bool): verbose = obj['verbose'] else: valid_config = False if not return_bool and verbose: log.debug("Running config validation...") # if this is false, then it's definitely because of verbose missing if not valid_config and return_bool: log.debug("Missing mandatory field 'verbose' in config!") # a list with the available rule_types. Can be expanded on easily in the future available_rule_types = ['file_size', 'num_input_datasets', 'records', 'arguments'] if obj is not None: # in obj, there should always be only 4 categories: tools, default_destination, # users, and verbose if 'default_destination' in obj: if isinstance(obj['default_destination'], str): new_config["default_destination"] = obj['default_destination'] elif isinstance(obj['default_destination'], dict): if ('priority' in obj['default_destination'] and isinstance(obj['default_destination']['priority'], dict)): if 'med' not in obj['default_destination']['priority']: error = "No default 'med' priority destination!" if verbose: log.debug(error) valid_config = False else: for priority in obj['default_destination']['priority']: if priority in ['low', 'med', 'high']: if isinstance( obj['default_destination']['priority'][priority], str): new_config['default_destination']['priority'][ priority] = obj[ 'default_destination']['priority'][priority] else: error = ("No default '" + str(priority) + "' priority destination in config!") if verbose: log.debug(error) valid_config = False else: error = ("Invalid default priority destination '" + str(priority) + "' found in config!") if verbose: log.debug(error) valid_config = False else: error = "No default priority destinations specified in config!" if verbose: log.debug(error) valid_config = False else: error = "No global default destination specified in config!" if verbose: log.debug(error) valid_config = False else: error = "No global default destination specified in config!" if verbose: log.debug(error) valid_config = False if 'users' in obj: if isinstance(obj['users'], dict): for user in obj['users']: curr = obj['users'][user] if isinstance(curr, dict): if 'priority' in curr and isinstance(curr['priority'], str): if curr['priority'] in ['low', 'med', 'high']: new_config['users'][user]['priority'] = curr['priority'] else: error = ("User '" + user + "', priority is not valid!" + " Must be either low, med, or high.") if verbose: log.debug(error) valid_config = False else: error = "User '" + user + "' is missing a priority!" if verbose: log.debug(error) valid_config = False else: error = "User '" + user + "' is missing a priority!" if verbose: log.debug(error) valid_config = False else: error = "Users option is not a dictionary!" if verbose: log.debug(error) valid_config = False if 'tools' in obj: for tool in obj['tools']: curr = obj['tools'][tool] # This check is to make sure we have a tool name, and not just # rules right way. if not isinstance(curr, list): curr_tool_rules = [] if curr is not None: # in each tool, there should always be only 2 sub-categories: # default_destination (not mandatory) and rules (mandatory) if "default_destination" in curr: if isinstance(curr['default_destination'], str): new_config['tools'][tool]['default_destination'] = (curr['default_destination']) tool_has_default = True elif isinstance(curr['default_destination'], dict): if ('priority' in curr['default_destination'] and isinstance(curr['default_destination']['priority'], dict)): if ('med' not in curr['default_destination']['priority']): error = "No default 'med' priority destination " error += "for " + str(tool) + "!" if verbose: log.debug(error) valid_config = False else: for priority in curr['default_destination']['priority']: destination = curr['default_destination']['priority'][priority] if priority in ['low', 'med', 'high']: if isinstance(destination, str): new_config['tools'][tool]['default_destination']['priority'][priority] = destination tool_has_default = True else: error = ("No default '" + str(priority) + "' priority destination " + "for " + str(tool) + " in config!") if verbose: log.debug(error) valid_config = False else: error = ("Invalid default priority " + "destination '" + str(priority) + "' for " + str(tool) + "found in config!") if verbose: log.debug(error) valid_config = False else: error = "No default priority destinations specified" error += " for " + str(tool) + " in config!" if verbose: log.debug(error) valid_config = False if "rules" in curr and isinstance(curr['rules'], list): # under rules, there should only be a list of rules curr_tool = curr counter = 0 for rule in curr_tool['rules']: if "rule_type" in rule: if rule['rule_type'] in available_rule_types: validated_rule = None counter += 1 # if we're only interested in the result of # the validation, then only retrieve the # result if return_bool: valid_rule = RuleValidator.validate_rule( rule['rule_type'], return_bool, rule, counter, tool) # otherwise, retrieve the processed rule else: validated_rule = ( RuleValidator.validate_rule( rule['rule_type'], return_bool, rule, counter, tool)) # if the result we get is False, then # indicate that the whole config is invalid if not valid_rule: valid_config = False # if we got a rule back that seems to be # valid (or was fixable) then append it to # list of ready-to-use tools if (not return_bool and validated_rule is not None): curr_tool_rules.append( copy.deepcopy(validated_rule)) # if rule['rule_type'] in available_rule_types else: error = "Unrecognized rule_type '" error += rule['rule_type'] + "' " error += "found in '" + str(tool) + "'. " if not return_bool: error += "Ignoring..." if verbose: log.debug(error) valid_config = False # if "rule_type" in rule else: counter += 1 error = "No rule_type found for rule " error += str(counter) error += " in '" + str(tool) + "'." if verbose: log.debug(error) valid_config = False # if "rules" in curr and isinstance(curr['rules'], list): elif not tool_has_default: valid_config = False error = "Tool '" + str(tool) + "' does not have rules nor a" error += " default_destination!" if verbose: log.debug(error) # if obj['tools'][tool] is not None: else: valid_config = False error = "Config section for tool '" + str(tool) + "' is blank!" if verbose: log.debug(error) if curr_tool_rules: new_config['tools'][str(tool)]['rules'] = curr_tool_rules # if not isinstance(curr, list) else: error = "Malformed YML; expected job name, " error += "but found a list instead!" if verbose: log.debug(error) valid_config = False # quickly run through categories to detect unrecognized types for category in obj.keys(): if not (category == 'verbose' or category == 'tools' or category == 'default_destination' or category == 'users'): error = "Unrecognized category '" + category error += "' found in config file!" if verbose: log.debug(error) valid_config = False # if obj is not None else: if verbose: log.debug("No (or empty) config file supplied!") valid_config = False if not return_bool: if verbose: log.debug("Finished config validation.") if return_bool: return valid_config else: return new_config
[docs]def bytes_to_str(size, unit="YB"): ''' Uses the bi convention: 1024 B = 1 KB since this method primarily has inputs of bytes for RAM @type size: int @param size: the size in int (bytes) to be converted to str @rtype: str @return return_str: the resulting string ''' # converts size in bytes to most readable unit units = ["B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"] i = 0 # mostly called in order to convert "infinity" try: size_changer = int(size) except ValueError: error = "bytes_to_str passed uncastable non numeric value " raise ValueError(error + str(size)) try: upto = units.index(unit.strip().upper()) except ValueError: upto = 9 while size_changer >= 1024 and i < upto: size_changer = size_changer / 1024.0 i += 1 if size_changer == -1: size_changer = "Infinity" i = 0 try: return_str = "%.2f %s" % (size_changer, units[i]) except TypeError: return_str = "%s" % (size_changer) return return_str
[docs]def str_to_bytes(size): ''' Uses the bi convention: 1024 B = 1 KB since this method primarily has inputs of bytes for RAM @type size: str @param size: the size in str to be converted to int (bytes) @rtype: int @return curr_size: the resulting size converted from str ''' units = ["", "b", "kb", "mb", "gb", "tb", "pb", "eb", "zb", "yb"] curr_size = size try: if size.lower() != "infinity": # Get the number try: curr_item = size.strip().split(" ") curr_size = "".join(curr_item) curr_size = int(curr_size) except ValueError: curr_item = size.strip().split(" ") curr_unit = curr_item[-1].strip().lower() curr_item = curr_item[0:-1] curr_size = "".join(curr_item) try: curr_size = float(curr_size) except ValueError: error = "Unable to convert size " + str(size) raise MalformedYMLException(error) # Get the unit and convert to bytes try: pos = units.index(curr_unit) for x in range(pos, 1, -1): curr_size *= 1024 except ValueError: error = "Unable to convert size " + str(size) raise MalformedYMLException(error) except (UnboundLocalError, NameError): pass else: curr_size = -1 except AttributeError: # If size is not a string (doesn't have .lower()) pass return curr_size
[docs]def importer(test): """ Uses Mock galaxy for testing or real galaxy for production @type test: bool @param test: True when being run from a test """ global JobDestination global JobMappingException if test: class JobDestionation(object): def __init__(self, *kwd): self.id = kwd.get('id') self.nativeSpec = kwd.get('params')['nativeSpecification'] self.runner = kwd.get('runner') from galaxy.jobs.mapper import JobMappingException else: from galaxy.jobs import JobDestination from galaxy.jobs.mapper import JobMappingException
[docs]def map_tool_to_destination( job, app, tool, user_email, test=False, path=None): """ Dynamically allocate resources @param job: galaxy job @param app: current app @param tool: current tool @type test: bool @param test: True when running in test mode @type path: str @param path: path to tool_destinations.yml """ importer(test) # set verbose to True by default, just in case (some tests fail without this due to # how the tests apparently work) global verbose verbose = True filesize_rule_present = False num_input_datasets_rule_present = False records_rule_present = False # Get configuration from tool_destinations.yml if path is None: path = app.config.tool_destinations_config_file try: config = parse_yaml(path) except MalformedYMLException as e: raise JobMappingException(e) # Get all inputs from tool and databases inp_data = dict([(da.name, da.dataset) for da in job.input_datasets]) inp_data.update([(da.name, da.dataset) for da in job.input_library_datasets]) if config is not None and str(tool.old_id) in config['tools']: if 'rules' in config['tools'][str(tool.old_id)]: for rule in config['tools'][str(tool.old_id)]['rules']: if rule["rule_type"] == "file_size": filesize_rule_present = True if rule["rule_type"] == "num_input_datasets": num_input_datasets_rule_present = True if rule["rule_type"] == "records": records_rule_present = True file_size = 0 records = 0 num_input_datasets = 0 if filesize_rule_present or records_rule_present or num_input_datasets_rule_present: # Loop through the database and look for amount of records try: for line in inp_db: if line[0] == ">": records += 1 except NameError: pass # Loop through each input file and adds the size to the total # or looks through db for records for da in inp_data: try: # If the input is a file, check and add the size if inp_data[da] is not None and os.path.isfile(inp_data[da].file_name): num_input_datasets += 1 if verbose: message = "Loading file: " + str(da) message += str(inp_data[da].file_name) log.debug(message) # Add to records if the file type is fasta if inp_data[da].ext == "fasta": if records_rule_present: inp_db = open(inp_data[da].file_name) # Try to find automatically computed sequences metadata = inp_data[da].get_metadata() try: records += int(metadata.get("sequences")) except (TypeError, KeyError): for line in inp_db: if line[0] == ">": records += 1 elif filesize_rule_present: query_file = str(inp_data[da].file_name) file_size += os.path.getsize(query_file) except AttributeError: # Otherwise, say that input isn't a file if verbose: log.debug("Not a file: " + str(inp_data[da])) if verbose: if filesize_rule_present: log.debug("Total size: " + bytes_to_str(file_size)) if records_rule_present: log.debug("Total amount of records: " + str(records)) if num_input_datasets_rule_present: log.debug("Total number of files: " + str(num_input_datasets)) matched_rule = None user_authorized = None rule_counter = 0 # For each different rule for the tool that's running fail_message = None # set default priority to med default_priority = 'med' priority = default_priority if config is not None: # get the users priority if "users" in config: if user_email in config["users"]: priority = config["users"][user_email]["priority"] if "default_destination" in config: if isinstance(config['default_destination'], str): destination = config['default_destination'] else: if priority in config['default_destination']['priority']: destination = config['default_destination']['priority'][priority] else: destination = (config['default_destination']['priority'][default_priority]) config = config['tools'] if str(tool.old_id) in config: if 'rules' in config[str(tool.old_id)]: for rule in config[str(tool.old_id)]['rules']: rule_counter += 1 user_authorized = False if 'users' in rule and isinstance(rule['users'], list): if user_email in rule['users']: user_authorized = True else: user_authorized = True if user_authorized: matched = False if rule["rule_type"] == "file_size": # bounds comparisons upper_bound = str_to_bytes(rule["upper_bound"]) lower_bound = str_to_bytes(rule["lower_bound"]) if upper_bound == -1: if lower_bound <= file_size: matched = True else: if (lower_bound <= file_size and file_size < upper_bound): matched = True elif rule["rule_type"] == "num_input_datasets": # bounds comparisons upper_bound = rule["upper_bound"] lower_bound = rule["lower_bound"] if upper_bound == "Infinity": if lower_bound <= num_input_datasets: matched = True else: if (lower_bound <= num_input_datasets and num_input_datasets < upper_bound): matched = True elif rule["rule_type"] == "records": # bounds comparisons upper_bound = str_to_bytes(rule["upper_bound"]) lower_bound = str_to_bytes(rule["lower_bound"]) if upper_bound == -1: if lower_bound <= records: matched = True else: if lower_bound <= records and records < upper_bound: matched = True elif rule["rule_type"] == "arguments": options = job.get_param_values(app) matched = True # check if the args in the config file are available for arg in rule["arguments"]: arg_dict = {arg : rule["arguments"][arg]} arg_keys_list = [] get_keys_from_dict(arg_dict, arg_keys_list) try: options_value = reduce(dict.__getitem__, arg_keys_list, options) arg_value = reduce(dict.__getitem__, arg_keys_list, arg_dict) if (arg_value != options_value): matched = False except KeyError: matched = False if verbose: error = "Argument '" + str(arg) error += "' not recognized!" log.debug(error) # if we matched a rule if matched: if (matched_rule is None or rule["nice_value"] < matched_rule["nice_value"]): matched_rule = rule # if user_authorized else: if verbose: error = "User email '" + str(user_email) + "' not " error += "specified in list of authorized users for " error += "rule " + str(rule_counter) + " in tool '" error += str(tool.old_id) + "'! Ignoring rule." log.debug(error) # if str(tool.old_id) in config else: error = "Tool '" + str(tool.old_id) + "' not specified in config. " error += "Using default destination." if verbose: log.debug(error) if matched_rule is None: if "default_destination" in config[str(tool.old_id)]: default_tool_destination = (config[str(tool.old_id)]['default_destination']) if isinstance(default_tool_destination, str): destination = default_tool_destination else: if priority in default_tool_destination['priority']: destination = default_tool_destination['priority'][priority] else: destination = (default_tool_destination['priority'][default_priority]) else: if isinstance(matched_rule["destination"], str): destination = matched_rule["destination"] else: if priority in matched_rule["destination"]["priority"]: destination = matched_rule["destination"]["priority"][priority] else: destination = (matched_rule["destination"]["priority"][default_priority]) # if "default_destination" in config else: destination = "fail" fail_message = "Job '" + str(tool.old_id) + "' failed; " fail_message += "no global default destination specified in config!" # if config is not None else: destination = "fail" fail_message = "No config file supplied!" if destination == "fail": if fail_message: raise JobMappingException(fail_message) else: raise JobMappingException(matched_rule["fail_message"]) if config is not None: if destination == "fail": output = "An error occurred: " + fail_message else: output = "Running '" + str(tool.old_id) + "' with '" output += destination + "'." log.debug(output) return destination
if __name__ == '__main__': """ This function is responsible for running the app if directly run through the commandline. It offers the ability to specify a config through the commandline for checking whether or not it is a valid config. It's to be run from within Galaxy, assuming it is installed correctly within the proper directories in Galaxy, and it looks for the config file in galaxy/config/. It can also be run with a path pointing to a config file if not being run directly from inside Galaxy install directory. """ verbose = True parser = argparse.ArgumentParser() logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) parser.add_argument( '-c', '--check-config', dest='check_config', nargs='?', help='Use this option to validate tool_destinations.yml.' + ' Optionally, provide the path to the tool_destinations.yml' + ' that you would like to check. Default: galaxy/config/tool_destinations.yml') parser.add_argument( '-V', '--version', action='version', version="%(prog)s " + __version__) args = parser.parse_args() # if run with no arguments, display the help message if len(sys.argv) == 1: parser.print_help() sys.exit(1) if args.check_config: valid_config = parse_yaml(path=args.check_config, return_bool=True) else: valid_config = parse_yaml(path="/config/tool_destinations.yml", return_bool=True) if valid_config: print("Configuration is valid!") else: print("Errors detected; config not valid!")