Source code for galaxy.managers.jobs

import json
import logging
from datetime import (
    date,
    datetime,
)
from typing import (
    Any,
    cast,
    Dict,
    List,
    Optional,
    Union,
)

import sqlalchemy
from boltons.iterutils import remap
from pydantic import (
    BaseModel,
    Field,
)
from sqlalchemy import (
    and_,
    false,
    func,
    null,
    or_,
    true,
)
from sqlalchemy.orm import aliased
from sqlalchemy.sql import select
from typing_extensions import TypedDict

from galaxy import model
from galaxy.exceptions import (
    ItemAccessibilityException,
    ObjectNotFound,
    RequestParameterInvalidException,
    RequestParameterMissingException,
)
from galaxy.job_metrics import (
    RawMetric,
    Safety,
)
from galaxy.managers.collections import DatasetCollectionManager
from galaxy.managers.context import ProvidesUserContext
from galaxy.managers.datasets import DatasetManager
from galaxy.managers.hdas import HDAManager
from galaxy.managers.lddas import LDDAManager
from galaxy.model import (
    ImplicitCollectionJobs,
    ImplicitCollectionJobsJobAssociation,
    Job,
    JobParameter,
    User,
    Workflow,
    WorkflowInvocation,
    WorkflowInvocationStep,
    YIELD_PER_ROWS,
)
from galaxy.model.base import transaction
from galaxy.model.index_filter_util import (
    raw_text_column_filter,
    text_column_filter,
)
from galaxy.model.scoped_session import galaxy_scoped_session
from galaxy.schema.schema import (
    JobIndexQueryPayload,
    JobIndexSortByEnum,
)
from galaxy.security.idencoding import IdEncodingHelper
from galaxy.structured_app import StructuredApp
from galaxy.util import (
    defaultdict,
    ExecutionTimer,
    listify,
)
from galaxy.util.search import (
    FilteredTerm,
    parse_filters_structured,
    RawTextTerm,
)

log = logging.getLogger(__name__)


[docs]class JobLock(BaseModel):
    active: bool = Field(title="Job lock status", description="If active, jobs will not dispatch")


[docs]def get_path_key(path_tuple):
    path_key = ""
    tuple_elements = len(path_tuple)
    for i, p in enumerate(path_tuple):
        if isinstance(p, int):
            sep = "_"
        else:
            sep = "|"
        if i == (tuple_elements - 2) and p == "values":
            # dataset inputs are always wrapped in lists. To avoid 'rep_factorName_0|rep_factorLevel_2|countsFile|values_0',
            # we remove the last 2 items of the path tuple (values and list index)
            return path_key
        if path_key:
            path_key = f"{path_key}{sep}{p}"
        else:
            path_key = p
    return path_key


[docs]class JobManager:
[docs]    def __init__(self, app: StructuredApp):
        self.app = app
        self.dataset_manager = DatasetManager(app)

[docs]    def index_query(self, trans: ProvidesUserContext, payload: JobIndexQueryPayload) -> sqlalchemy.engine.ScalarResult:
        """The caller is responsible for security checks on the resulting job if
        history_id, invocation_id, or implicit_collection_jobs_id is set.
        Otherwise this will only return the user's jobs or all jobs if the requesting
        user is acting as an admin.
        """
        is_admin = trans.user_is_admin
        user_details = payload.user_details
        decoded_user_id = payload.user_id
        history_id = payload.history_id
        workflow_id = payload.workflow_id
        invocation_id = payload.invocation_id
        implicit_collection_jobs_id = payload.implicit_collection_jobs_id
        search = payload.search
        order_by = payload.order_by

        def build_and_apply_filters(stmt, objects, filter_func):
            if objects is not None:
                if isinstance(objects, (str, date, datetime)):
                    stmt = stmt.where(filter_func(objects))
                elif isinstance(objects, list):
                    t = []
                    for obj in objects:
                        t.append(filter_func(obj))
                    stmt = stmt.where(or_(*t))
            return stmt

        def add_workflow_jobs():
            wfi_step = select(WorkflowInvocationStep)
            if workflow_id is not None:
                wfi_step = (
                    wfi_step.join(WorkflowInvocation).join(Workflow).where(Workflow.stored_workflow_id == workflow_id)
                )
            elif invocation_id is not None:
                wfi_step = wfi_step.where(WorkflowInvocationStep.workflow_invocation_id == invocation_id)
            wfi_step_sq = wfi_step.subquery()

            stmt1 = stmt.join(wfi_step_sq)
            stmt2 = stmt.join(ImplicitCollectionJobsJobAssociation).join(
                wfi_step_sq,
                ImplicitCollectionJobsJobAssociation.implicit_collection_jobs_id
                == wfi_step_sq.c.implicit_collection_jobs_id,
            )
            # Ensure the result is models, not tuples
            sq = stmt1.union(stmt2).subquery()
            # SQLite won't recognize Job.foo as a valid column for the ORDER BY clause due to the UNION clause, so we'll use the subquery `columns` collection (`sq.c`).
            # Ref: https://github.com/galaxyproject/galaxy/pull/16852#issuecomment-1804676322
            return select(aliased(Job, sq)), sq.c

        def add_search_criteria(stmt):
            search_filters = {
                "tool": "tool",
                "t": "tool",
            }
            if user_details:
                search_filters.update(
                    {
                        "user": "user",
                        "u": "user",
                    }
                )

            if is_admin:
                search_filters.update(
                    {
                        "runner": "runner",
                        "r": "runner",
                        "handler": "handler",
                        "h": "handler",
                    }
                )
            assert search
            parsed_search = parse_filters_structured(search, search_filters)
            for term in parsed_search.terms:
                if isinstance(term, FilteredTerm):
                    key = term.filter
                    if key == "user":
                        stmt = stmt.where(text_column_filter(User.email, term))
                    elif key == "tool":
                        stmt = stmt.where(text_column_filter(Job.tool_id, term))
                    elif key == "handler":
                        stmt = stmt.where(text_column_filter(Job.handler, term))
                    elif key == "runner":
                        stmt = stmt.where(text_column_filter(Job.job_runner_name, term))
                elif isinstance(term, RawTextTerm):
                    columns: List = [Job.tool_id]
                    if user_details:
                        columns.append(User.email)
                    if is_admin:
                        columns.append(Job.handler)
                        columns.append(Job.job_runner_name)
                    stmt = stmt.filter(raw_text_column_filter(columns, term))
            return stmt

        stmt = select(Job)

        if is_admin:
            if decoded_user_id is not None:
                stmt = stmt.where(Job.user_id == decoded_user_id)
            if user_details:
                stmt = stmt.outerjoin(Job.user)
        else:
            if history_id is None and invocation_id is None and implicit_collection_jobs_id is None:
                # If we're not filtering on history, invocation or collection we filter the jobs owned by the current user
                if trans.user:
                    stmt = stmt.where(Job.user_id == trans.user.id)
                elif trans.galaxy_session:
                    stmt = stmt.where(Job.session_id == trans.galaxy_session.id)
                else:
                    raise RequestParameterMissingException("A session is required to list jobs for anonymous users")

        stmt = build_and_apply_filters(stmt, payload.states, lambda s: model.Job.state == s)
        stmt = build_and_apply_filters(stmt, payload.tool_ids, lambda t: model.Job.tool_id == t)
        stmt = build_and_apply_filters(stmt, payload.tool_ids_like, lambda t: model.Job.tool_id.like(t))
        stmt = build_and_apply_filters(stmt, payload.date_range_min, lambda dmin: model.Job.update_time >= dmin)
        stmt = build_and_apply_filters(stmt, payload.date_range_max, lambda dmax: model.Job.update_time <= dmax)

        if history_id is not None:
            stmt = stmt.where(Job.history_id == history_id)

        order_by_columns = Job
        if workflow_id or invocation_id:
            stmt, order_by_columns = add_workflow_jobs()
        elif implicit_collection_jobs_id:
            stmt = (
                stmt.join(ImplicitCollectionJobsJobAssociation, ImplicitCollectionJobsJobAssociation.job_id == Job.id)
                .join(
                    ImplicitCollectionJobs,
                    ImplicitCollectionJobs.id == ImplicitCollectionJobsJobAssociation.implicit_collection_jobs_id,
                )
                .where(ImplicitCollectionJobsJobAssociation.implicit_collection_jobs_id == implicit_collection_jobs_id)
            )
        if search:
            stmt = add_search_criteria(stmt)

        if order_by == JobIndexSortByEnum.create_time:
            stmt = stmt.order_by(order_by_columns.create_time.desc())
        else:
            stmt = stmt.order_by(order_by_columns.update_time.desc())

        stmt = stmt.offset(payload.offset)
        stmt = stmt.limit(payload.limit)
        return trans.sa_session.scalars(stmt)

[docs]    def job_lock(self) -> JobLock:
        return JobLock(active=self.app.job_manager.job_lock)

[docs]    def update_job_lock(self, job_lock: JobLock):
        self.app.queue_worker.send_control_task(
            "admin_job_lock", kwargs={"job_lock": job_lock.active}, get_response=True
        )
        return self.job_lock()

[docs]    def get_accessible_job(self, trans: ProvidesUserContext, decoded_job_id) -> Job:
        job = trans.sa_session.get(Job, decoded_job_id)
        if job is None:
            raise ObjectNotFound()
        belongs_to_user = False
        if trans.user:
            belongs_to_user = job.user_id == trans.user.id
        elif trans.galaxy_session:
            belongs_to_user = job.session_id == trans.galaxy_session.id
        if not trans.user_is_admin and not belongs_to_user:
            # Check access granted via output datasets.
            if not job.output_datasets:
                raise ItemAccessibilityException("Job has no output datasets.")
            for data_assoc in job.output_datasets:
                if not self.dataset_manager.is_accessible(data_assoc.dataset.dataset, trans.user):
                    raise ItemAccessibilityException("You are not allowed to rerun this job.")
        trans.sa_session.refresh(job)
        return job

[docs]    def stop(self, job, message=None):
        if not job.finished:
            job.mark_deleted(self.app.config.track_jobs_in_database, message)
            session = self.app.model.session
            with transaction(session):
                session.commit()
            self.app.job_manager.stop(job, message=message)
            return True
        else:
            return False


[docs]class JobSearch:
    """Search for jobs using tool inputs or other jobs"""

[docs]    def __init__(
        self,
        sa_session: galaxy_scoped_session,
        hda_manager: HDAManager,
        dataset_collection_manager: DatasetCollectionManager,
        ldda_manager: LDDAManager,
        id_encoding_helper: IdEncodingHelper,
    ):
        self.sa_session = sa_session
        self.hda_manager = hda_manager
        self.dataset_collection_manager = dataset_collection_manager
        self.ldda_manager = ldda_manager
        self.decode_id = id_encoding_helper.decode_id

[docs]    def by_tool_input(self, trans, tool_id, tool_version, param=None, param_dump=None, job_state="ok"):
        """Search for jobs producing same results using the 'inputs' part of a tool POST."""
        user = trans.user
        input_data = defaultdict(list)

        def populate_input_data_input_id(path, key, value):
            """Traverses expanded incoming using remap and collects input_ids and input_data."""
            if key == "id":
                path_key = get_path_key(path[:-2])
                current_case = param_dump
                for p in path:
                    current_case = current_case[p]
                src = current_case.get("src")
                if src is None:
                    # just a parameter named id.
                    # TODO: dispatch on tool parameter type instead of values,
                    # maybe with tool state variant
                    return key, value
                current_case = param
                for i, p in enumerate(path):
                    if p == "values" and i == len(path) - 2:
                        continue
                    if isinstance(current_case, (list, dict)):
                        current_case = current_case[p]
                identifier = getattr(current_case, "element_identifier", None)
                input_data[path_key].append(
                    {
                        "src": src,
                        "id": value,
                        "identifier": identifier,
                    }
                )
                return key, "__id_wildcard__"
            return key, value

        wildcard_param_dump = remap(param_dump, visit=populate_input_data_input_id)
        return self.__search(
            tool_id=tool_id,
            tool_version=tool_version,
            user=user,
            input_data=input_data,
            job_state=job_state,
            param_dump=param_dump,
            wildcard_param_dump=wildcard_param_dump,
        )

    def __search(
        self, tool_id, tool_version, user, input_data, job_state=None, param_dump=None, wildcard_param_dump=None
    ):
        search_timer = ExecutionTimer()

        def replace_dataset_ids(path, key, value):
            """Exchanges dataset_ids (HDA, LDA, HDCA, not Dataset) in param_dump with dataset ids used in job."""
            if key == "id":
                current_case = param_dump
                for p in path:
                    current_case = current_case[p]
                src = current_case["src"]
                value = job_input_ids[src][value]
                return key, value
            return key, value

        stmt_sq = self._build_job_subquery(tool_id, user.id, tool_version, job_state, wildcard_param_dump)

        stmt = select(Job.id).select_from(Job.table.join(stmt_sq, stmt_sq.c.id == Job.id))

        data_conditions: List = []

        # We now build the stmt filters that relate to the input datasets
        # that this job uses. We keep track of the requested dataset id in `requested_ids`,
        # the type (hda, hdca or lda) in `data_types`
        # and the ids that have been used in the job that has already been run in `used_ids`.
        requested_ids = []
        data_types = []
        used_ids: List = []
        for k, input_list in input_data.items():
            # k will be matched against the JobParameter.name column. This can be prefixed depending on whethter
            # the input is in a repeat, or not (section and conditional)
            k = {k, k.split("|")[-1]}
            for type_values in input_list:
                t = type_values["src"]
                v = type_values["id"]
                requested_ids.append(v)
                data_types.append(t)
                identifier = type_values["identifier"]
                if t == "hda":
                    stmt = self._build_stmt_for_hda(stmt, data_conditions, used_ids, k, v, identifier)
                elif t == "ldda":
                    stmt = self._build_stmt_for_ldda(stmt, data_conditions, used_ids, k, v)
                elif t == "hdca":
                    stmt = self._build_stmt_for_hdca(stmt, data_conditions, used_ids, k, v)
                elif t == "dce":
                    stmt = self._build_stmt_for_dce(stmt, data_conditions, used_ids, k, v)
                else:
                    return []

        stmt = stmt.where(*data_conditions).group_by(model.Job.id, *used_ids).order_by(model.Job.id.desc())

        for job in self.sa_session.execute(stmt):
            # We found a job that is equal in terms of tool_id, user, state and input datasets,
            # but to be able to verify that the parameters match we need to modify all instances of
            # dataset_ids (HDA, LDDA, HDCA) in the incoming param_dump to point to those used by the
            # possibly equivalent job, which may have been run on copies of the original input data.
            job_input_ids = {}
            if len(job) > 1:
                # We do have datasets to check
                job_id, current_jobs_data_ids = job[0], job[1:]
                job_parameter_conditions = [model.Job.id == job_id]
                for src, requested_id, used_id in zip(data_types, requested_ids, current_jobs_data_ids):
                    if src not in job_input_ids:
                        job_input_ids[src] = {requested_id: used_id}
                    else:
                        job_input_ids[src][requested_id] = used_id
                new_param_dump = remap(param_dump, visit=replace_dataset_ids)
                # new_param_dump has its dataset ids remapped to those used by the job.
                # We now ask if the remapped job parameters match the current job.
                for k, v in new_param_dump.items():
                    if v == {"__class__": "RuntimeValue"}:
                        # TODO: verify this is always None. e.g. run with runtime input input
                        v = None
                    elif k.endswith("|__identifier__"):
                        # We've taken care of this while constructing the conditions based on ``input_data`` above
                        continue
                    elif k == "chromInfo" and "?.len" in v:
                        continue
                    a = aliased(model.JobParameter)
                    job_parameter_conditions.append(
                        and_(model.Job.id == a.job_id, a.name == k, a.value == json.dumps(v, sort_keys=True))
                    )
            else:
                job_parameter_conditions = [model.Job.id == job[0]]
            job = get_job(self.sa_session, *job_parameter_conditions)
            if job is None:
                continue
            n_parameters = 0
            # Verify that equivalent jobs had the same number of job parameters
            # We skip chrominfo, dbkey, __workflow_invocation_uuid__ and identifer
            # parameter as these are not passed along when expanding tool parameters
            # and they can differ without affecting the resulting dataset.
            for parameter in job.parameters:
                if parameter.name.startswith("__"):
                    continue
                if parameter.name in {"chromInfo", "dbkey"} or parameter.name.endswith("|__identifier__"):
                    continue
                n_parameters += 1
            if not n_parameters == sum(
                1
                for k in param_dump
                if not k.startswith("__") and not k.endswith("|__identifier__") and k not in {"chromInfo", "dbkey"}
            ):
                continue
            log.info("Found equivalent job %s", search_timer)
            return job
        log.info("No equivalent jobs found %s", search_timer)
        return None

    def _build_job_subquery(self, tool_id, user_id, tool_version, job_state, wildcard_param_dump):
        """Build subquery that selects a job with correct job parameters."""
        stmt = select(model.Job.id).where(
            and_(
                model.Job.tool_id == tool_id,
                model.Job.user_id == user_id,
                model.Job.copied_from_job_id.is_(None),  # Always pick original job
            )
        )
        if tool_version:
            stmt = stmt.where(Job.tool_version == str(tool_version))

        if job_state is None:
            stmt = stmt.where(
                Job.state.in_(
                    [Job.states.NEW, Job.states.QUEUED, Job.states.WAITING, Job.states.RUNNING, Job.states.OK]
                )
            )
        else:
            if isinstance(job_state, str):
                stmt = stmt.where(Job.state == job_state)
            elif isinstance(job_state, list):
                stmt = stmt.where(or_(*[Job.state == s for s in job_state]))

        # exclude jobs with deleted outputs
        stmt = stmt.where(
            and_(
                model.Job.any_output_dataset_collection_instances_deleted == false(),
                model.Job.any_output_dataset_deleted == false(),
            )
        )

        for k, v in wildcard_param_dump.items():
            if v == {"__class__": "RuntimeValue"}:
                # TODO: verify this is always None. e.g. run with runtime input input
                v = None
            elif k.endswith("|__identifier__"):
                # We've taken care of this while constructing the conditions based on ``input_data`` above
                continue
            elif k == "chromInfo" and "?.len" in v:
                continue
            value_dump = json.dumps(v, sort_keys=True)
            wildcard_value = value_dump.replace('"id": "__id_wildcard__"', '"id": %')
            a = aliased(JobParameter)
            if value_dump == wildcard_value:
                stmt = stmt.join(a).where(
                    and_(
                        Job.id == a.job_id,
                        a.name == k,
                        a.value == value_dump,
                    )
                )
            else:
                stmt = stmt.join(a).where(
                    and_(
                        Job.id == a.job_id,
                        a.name == k,
                        a.value.like(wildcard_value),
                    )
                )

        return stmt.subquery()

    def _build_stmt_for_hda(self, stmt, data_conditions, used_ids, k, v, identifier):
        a = aliased(model.JobToInputDatasetAssociation)
        b = aliased(model.HistoryDatasetAssociation)
        c = aliased(model.HistoryDatasetAssociation)
        d = aliased(model.JobParameter)
        e = aliased(model.HistoryDatasetAssociationHistory)
        stmt = stmt.add_columns(a.dataset_id)
        used_ids.append(a.dataset_id)
        stmt = stmt.join(a, a.job_id == model.Job.id)
        hda_stmt = select(model.HistoryDatasetAssociation.id).where(
            model.HistoryDatasetAssociation.id == e.history_dataset_association_id
        )
        # b is the HDA used for the job
        stmt = stmt.join(b, a.dataset_id == b.id).join(c, c.dataset_id == b.dataset_id)  # type:ignore[attr-defined]
        name_condition = []
        if identifier:
            stmt = stmt.join(d)
            data_conditions.append(
                and_(
                    d.name.in_({f"{_}|__identifier__" for _ in k}),
                    d.value == json.dumps(identifier),
                )
            )
        else:
            hda_stmt = hda_stmt.where(e.name == c.name)
            name_condition.append(b.name == c.name)
        hda_stmt = (
            hda_stmt.where(
                e.extension == c.extension,
            )
            .where(
                a.dataset_version == e.version,
            )
            .where(
                e._metadata == c._metadata,
            )
        )
        data_conditions.append(
            and_(
                a.name.in_(k),
                c.id == v,  # c is the requested job input HDA
                # We need to make sure that the job we are looking for has been run with identical inputs.
                # Here we deal with 3 requirements:
                #  - the jobs' input dataset (=b) version is 0, meaning the job's input dataset is not yet ready
                #  - b's update_time is older than the job create time, meaning no changes occurred
                #  - the job has a dataset_version recorded, and that versions' metadata matches c's metadata.
                or_(
                    and_(
                        or_(a.dataset_version.in_([0, b.version]), b.update_time < model.Job.create_time),
                        b.extension == c.extension,
                        b.metadata == c.metadata,
                        *name_condition,
                    ),
                    b.id.in_(hda_stmt),
                ),
                or_(b.deleted == false(), c.deleted == false()),
            )
        )
        return stmt

    def _build_stmt_for_ldda(self, stmt, data_conditions, used_ids, k, v):
        a = aliased(model.JobToInputLibraryDatasetAssociation)
        stmt = stmt.add_columns(a.ldda_id)
        stmt = stmt.join(a, a.job_id == model.Job.id)
        data_conditions.append(and_(a.name.in_(k), a.ldda_id == v))
        used_ids.append(a.ldda_id)
        return stmt

    def _build_stmt_for_hdca(self, stmt, data_conditions, used_ids, k, v):
        a = aliased(model.JobToInputDatasetCollectionAssociation)
        b = aliased(model.HistoryDatasetCollectionAssociation)
        c = aliased(model.HistoryDatasetCollectionAssociation)
        stmt = stmt.add_columns(a.dataset_collection_id)
        stmt = stmt.join(a, a.job_id == model.Job.id).join(b, b.id == a.dataset_collection_id).join(c, b.name == c.name)
        data_conditions.append(
            and_(
                a.name.in_(k),
                c.id == v,
                or_(
                    and_(b.deleted == false(), b.id == v),
                    and_(
                        or_(
                            c.copied_from_history_dataset_collection_association_id == b.id,
                            b.copied_from_history_dataset_collection_association_id == c.id,
                        ),
                        c.deleted == false(),
                    ),
                ),
            )
        )
        used_ids.append(a.dataset_collection_id)
        return stmt

    def _build_stmt_for_dce(self, stmt, data_conditions, used_ids, k, v):
        a = aliased(model.JobToInputDatasetCollectionElementAssociation)
        b = aliased(model.DatasetCollectionElement)
        c = aliased(model.DatasetCollectionElement)
        d = aliased(model.HistoryDatasetAssociation)
        e = aliased(model.HistoryDatasetAssociation)
        stmt = stmt.add_columns(a.dataset_collection_element_id)
        stmt = (
            stmt.join(a, a.job_id == model.Job.id)
            .join(b, b.id == a.dataset_collection_element_id)
            .join(
                c,
                and_(
                    c.element_identifier == b.element_identifier,
                    or_(c.hda_id == b.hda_id, c.child_collection_id == b.child_collection_id),
                ),
            )
            .outerjoin(d, d.id == c.hda_id)
            .outerjoin(e, e.dataset_id == d.dataset_id)  # type:ignore[attr-defined]
        )
        data_conditions.append(
            and_(
                a.name.in_(k),
                or_(
                    c.child_collection_id == b.child_collection_id,
                    and_(
                        c.hda_id == b.hda_id,
                        d.id == c.hda_id,
                        e.dataset_id == d.dataset_id,  # type:ignore[attr-defined]
                    ),
                ),
                c.id == v,
            )
        )
        used_ids.append(a.dataset_collection_element_id)
        return stmt


[docs]def view_show_job(trans, job: Job, full: bool) -> Dict:
    is_admin = trans.user_is_admin
    job_dict = job.to_dict("element", system_details=is_admin)
    if trans.app.config.expose_dataset_path and "command_line" not in job_dict:
        job_dict["command_line"] = job.command_line
    if full:
        job_dict.update(
            dict(
                tool_stdout=job.tool_stdout,
                tool_stderr=job.tool_stderr,
                job_stdout=job.job_stdout,
                job_stderr=job.job_stderr,
                stderr=job.stderr,
                stdout=job.stdout,
                job_messages=job.job_messages,
                dependencies=job.dependencies,
            )
        )

        if is_admin:
            job_dict["user_email"] = job.get_user_email()
            job_dict["job_metrics"] = summarize_job_metrics(trans, job)
    return job_dict


[docs]def invocation_job_source_iter(sa_session, invocation_id):
    # TODO: Handle subworkflows.
    join = model.WorkflowInvocationStep.table.join(model.WorkflowInvocation)
    statement = (
        select(
            model.WorkflowInvocationStep.job_id,
            model.WorkflowInvocationStep.implicit_collection_jobs_id,
            model.WorkflowInvocationStep.state,
        )
        .select_from(join)
        .where(model.WorkflowInvocation.id == invocation_id)
    )
    for row in sa_session.execute(statement):
        if row[0]:
            yield ("Job", row[0], row[2])
        if row[1]:
            yield ("ImplicitCollectionJobs", row[1], row[2])


[docs]def fetch_job_states(sa_session, job_source_ids, job_source_types):
    assert len(job_source_ids) == len(job_source_types)
    job_ids = set()
    implicit_collection_job_ids = set()
    workflow_invocations_job_sources = {}
    workflow_invocation_states = (
        {}
    )  # should be set before we walk step states to be conservative on whether things are done expanding yet

    for job_source_id, job_source_type in zip(job_source_ids, job_source_types):
        if job_source_type == "Job":
            job_ids.add(job_source_id)
        elif job_source_type == "ImplicitCollectionJobs":
            implicit_collection_job_ids.add(job_source_id)
        elif job_source_type == "WorkflowInvocation":
            invocation_state = sa_session.get(model.WorkflowInvocation, job_source_id).state
            workflow_invocation_states[job_source_id] = invocation_state
            workflow_invocation_job_sources = []
            for (
                invocation_step_source_type,
                invocation_step_source_id,
                invocation_step_state,
            ) in invocation_job_source_iter(sa_session, job_source_id):
                workflow_invocation_job_sources.append(
                    (invocation_step_source_type, invocation_step_source_id, invocation_step_state)
                )
                if invocation_step_source_type == "Job":
                    job_ids.add(invocation_step_source_id)
                elif invocation_step_source_type == "ImplicitCollectionJobs":
                    implicit_collection_job_ids.add(invocation_step_source_id)
            workflow_invocations_job_sources[job_source_id] = workflow_invocation_job_sources
        else:
            raise RequestParameterInvalidException(f"Invalid job source type {job_source_type} found.")

    job_summaries = {}
    implicit_collection_jobs_summaries = {}

    for job_id in job_ids:
        job_summaries[job_id] = summarize_jobs_to_dict(sa_session, sa_session.get(Job, job_id))
    for implicit_collection_jobs_id in implicit_collection_job_ids:
        implicit_collection_jobs_summaries[implicit_collection_jobs_id] = summarize_jobs_to_dict(
            sa_session, sa_session.get(model.ImplicitCollectionJobs, implicit_collection_jobs_id)
        )

    rval = []
    for job_source_id, job_source_type in zip(job_source_ids, job_source_types):
        if job_source_type == "Job":
            rval.append(job_summaries[job_source_id])
        elif job_source_type == "ImplicitCollectionJobs":
            rval.append(implicit_collection_jobs_summaries[job_source_id])
        else:
            invocation_state = workflow_invocation_states[job_source_id]
            invocation_job_summaries = []
            invocation_implicit_collection_job_summaries = []
            invocation_step_states = []
            for (
                invocation_step_source_type,
                invocation_step_source_id,
                invocation_step_state,
            ) in workflow_invocations_job_sources[job_source_id]:
                invocation_step_states.append(invocation_step_state)
                if invocation_step_source_type == "Job":
                    invocation_job_summaries.append(job_summaries[invocation_step_source_id])
                else:
                    invocation_implicit_collection_job_summaries.append(
                        implicit_collection_jobs_summaries[invocation_step_source_id]
                    )
            rval.append(
                summarize_invocation_jobs(
                    job_source_id,
                    invocation_job_summaries,
                    invocation_implicit_collection_job_summaries,
                    invocation_state,
                    invocation_step_states,
                )
            )

    return rval


[docs]def summarize_invocation_jobs(
    invocation_id, job_summaries, implicit_collection_job_summaries, invocation_state, invocation_step_states
):
    states = {}
    if invocation_state == "scheduled":
        all_scheduled = True
        for invocation_step_state in invocation_step_states:
            all_scheduled = all_scheduled and invocation_step_state == "scheduled"
        if all_scheduled:
            populated_state = "ok"
        else:
            populated_state = "new"
    elif invocation_state in ["cancelled", "failed"]:
        populated_state = "failed"
    else:
        # call new, ready => new
        populated_state = "new"

    def merge_states(component_states):
        for key, value in component_states.items():
            if key not in states:
                states[key] = value
            else:
                states[key] += value

    for job_summary in job_summaries:
        merge_states(job_summary["states"])
    for implicit_collection_job_summary in implicit_collection_job_summaries:
        # 'new' (un-populated collections might not yet have a states entry)
        if "states" in implicit_collection_job_summary:
            merge_states(implicit_collection_job_summary["states"])
        component_populated_state = implicit_collection_job_summary["populated_state"]
        if component_populated_state == "failed":
            populated_state = "failed"
        elif component_populated_state == "new" and populated_state != "failed":
            populated_state = "new"

    rval = {
        "id": invocation_id,
        "model": "WorkflowInvocation",
        "states": states,
        "populated_state": populated_state,
    }
    return rval


[docs]class JobsSummary(TypedDict):
    populated_state: str
    states: Dict[str, int]
    model: str
    id: int


[docs]def summarize_jobs_to_dict(sa_session, jobs_source) -> Optional[JobsSummary]:
    """Produce a summary of jobs for job summary endpoints.

    :type   jobs_source: a Job or ImplicitCollectionJobs or None
    :param  jobs_source: the object to summarize

    :rtype:     dict
    :returns:   dictionary containing job summary information
    """
    rval: Optional[JobsSummary] = None
    if jobs_source is None:
        pass
    elif isinstance(jobs_source, model.Job):
        rval = {
            "populated_state": "ok",
            "states": {jobs_source.state: 1},
            "model": "Job",
            "id": jobs_source.id,
        }
    else:
        populated_state = jobs_source.populated_state
        rval = {
            "id": jobs_source.id,
            "populated_state": populated_state,
            "model": "ImplicitCollectionJobs",
            "states": {},
        }
        if populated_state == "ok":
            # produce state summary...
            join = model.ImplicitCollectionJobs.table.join(
                model.ImplicitCollectionJobsJobAssociation.table.join(model.Job)
            )
            statement = (
                select(model.Job.state, func.count())
                .select_from(join)
                .where(model.ImplicitCollectionJobs.id == jobs_source.id)
                .group_by(model.Job.state)
            )
            for row in sa_session.execute(statement):
                rval["states"][row[0]] = row[1]
    return rval


[docs]def summarize_job_metrics(trans, job):
    """Produce a dict-ified version of job metrics ready for tabular rendering.

    Precondition: the caller has verified the job is accessible to the user
    represented by the trans parameter.
    """
    safety_level = Safety.SAFE
    if trans.user_is_admin:
        safety_level = Safety.UNSAFE
    elif trans.app.config.expose_potentially_sensitive_job_metrics:
        safety_level = Safety.POTENTIALLY_SENSITVE
    raw_metrics = [
        RawMetric(
            m.metric_name,
            m.metric_value,
            m.plugin,
        )
        for m in job.metrics
    ]
    dictifiable_metrics = trans.app.job_metrics.dictifiable_metrics(raw_metrics, safety_level)
    return [d.dict() for d in dictifiable_metrics]


[docs]def summarize_destination_params(trans, job):
    """Produce a dict-ified version of job destination parameters ready for tabular rendering.

    Precondition: the caller has verified the job is accessible to the user
    represented by the trans parameter.
    """

    destination_params = {
        "Runner": job.job_runner_name,
        "Runner Job ID": job.job_runner_external_id,
        "Handler": job.handler,
    }
    if job_destination_params := job.destination_params:
        destination_params.update(job_destination_params)
    return destination_params


[docs]def summarize_job_parameters(trans, job: Job):
    """Produce a dict-ified version of job parameters ready for tabular rendering.

    Precondition: the caller has verified the job is accessible to the user
    represented by the trans parameter.
    """
    # More client logic here than is ideal but it is hard to reason about
    # tool parameter types on the client relative to the server.

    def inputs_recursive(input_params, param_values, depth=1, upgrade_messages=None):
        if upgrade_messages is None:
            upgrade_messages = {}

        rval = []

        for input in input_params.values():
            if input.name in param_values:
                input_value = param_values[input.name]
                if input.type == "repeat":
                    for i in range(len(input_value)):
                        rval.extend(inputs_recursive(input.inputs, input_value[i], depth=depth + 1))
                elif input.type == "section":
                    # Get the value of the current Section parameter
                    rval.append(dict(text=input.name, depth=depth))
                    rval.extend(
                        inputs_recursive(
                            input.inputs,
                            input_value,
                            depth=depth + 1,
                            upgrade_messages=upgrade_messages.get(input.name),
                        )
                    )
                elif input.type == "conditional":
                    try:
                        current_case = input_value["__current_case__"]
                        is_valid = True
                    except Exception:
                        current_case = None
                        is_valid = False
                    if is_valid:
                        rval.append(
                            dict(text=input.test_param.label, depth=depth, value=input.cases[current_case].value)
                        )
                        rval.extend(
                            inputs_recursive(
                                input.cases[current_case].inputs,
                                input_value,
                                depth=depth + 1,
                                upgrade_messages=upgrade_messages.get(input.name),
                            )
                        )
                    else:
                        rval.append(
                            dict(
                                text=input.name,
                                depth=depth,
                                notes="The previously used value is no longer valid.",
                                error=True,
                            )
                        )
                elif input.type == "upload_dataset":
                    rval.append(
                        dict(
                            text=input.group_title(param_values),
                            depth=depth,
                            value=f"{len(input_value)} uploaded datasets",
                        )
                    )
                elif (
                    input.type == "data"
                    or input.type == "data_collection"
                    or isinstance(input_value, model.HistoryDatasetAssociation)
                ):
                    value: List[Union[Dict[str, Any], None]] = []
                    for element in listify(input_value):
                        if isinstance(element, model.HistoryDatasetAssociation):
                            hda = element
                            value.append({"src": "hda", "id": element.id, "hid": hda.hid, "name": hda.name})
                        elif isinstance(element, model.DatasetCollectionElement):
                            value.append({"src": "dce", "id": element.id, "name": element.element_identifier})
                        elif isinstance(element, model.HistoryDatasetCollectionAssociation):
                            value.append({"src": "hdca", "id": element.id, "hid": element.hid, "name": element.name})
                        elif element is None:
                            value.append(None)
                        else:
                            raise Exception(
                                f"Unhandled data input parameter type encountered {element.__class__.__name__}"
                            )
                    rval.append(dict(text=input.label or input.name, depth=depth, value=value))
                elif input.visible:
                    if hasattr(input, "label") and input.label:
                        label = input.label
                    else:
                        # value for label not required, fallback to input name (same as tool panel)
                        label = input.name
                    rval.append(
                        dict(
                            text=label,
                            depth=depth,
                            value=input.value_to_display_text(input_value),
                            notes=upgrade_messages.get(input.name, ""),
                        )
                    )
            else:
                # Parameter does not have a stored value.
                # Get parameter label.
                if input.type == "conditional":
                    label = input.test_param.label
                else:
                    label = input.label or input.name
                rval.append(
                    dict(text=label, depth=depth, notes="not used (parameter was added after this job was run)")
                )

        return rval

    # Load the tool
    app = trans.app
    toolbox = app.toolbox
    tool = toolbox.get_tool(job.tool_id, job.tool_version)

    params_objects = None
    parameters = []
    upgrade_messages = {}
    has_parameter_errors = False

    # Load parameter objects, if a parameter type has changed, it's possible for the value to no longer be valid
    if tool:
        try:
            params_objects = job.get_param_values(app, ignore_errors=False)
        except Exception:
            params_objects = job.get_param_values(app, ignore_errors=True)
            # use different param_objects in the following line, since we want to display original values as much as possible
            upgrade_messages = tool.check_and_update_param_values(
                job.get_param_values(app, ignore_errors=True), trans, update_values=False
            )
            has_parameter_errors = True
        parameters = inputs_recursive(tool.inputs, params_objects, depth=1, upgrade_messages=upgrade_messages)
    else:
        has_parameter_errors = True

    return {
        "parameters": parameters,
        "has_parameter_errors": has_parameter_errors,
        "outputs": summarize_job_outputs(job=job, tool=tool, params=params_objects),
    }


[docs]def get_output_name(tool, output, params):
    try:
        return tool.tool_action.get_output_name(
            output,
            tool=tool,
            params=params,
        )
    except Exception:
        pass


[docs]def summarize_job_outputs(job: model.Job, tool, params):
    outputs = defaultdict(list)
    output_labels = {}
    possible_outputs = (
        ("hda", "dataset_id", job.output_datasets),
        ("ldda", "ldda_id", job.output_library_datasets),
        ("hdca", "dataset_collection_id", job.output_dataset_collection_instances),
    )
    for src, attribute, output_associations in possible_outputs:
        output_associations = cast(List, output_associations)  # during iteration, mypy sees it as object
        for output_association in output_associations:
            output_name = output_association.name
            if output_name not in output_labels and tool:
                tool_output = tool.output_collections if src == "hdca" else tool.outputs
                output_labels[output_name] = get_output_name(
                    tool=tool, output=tool_output.get(output_name), params=params
                )
            label = output_labels.get(output_name)
            outputs[output_name].append(
                {
                    "label": label,
                    "value": {"src": src, "id": getattr(output_association, attribute)},
                }
            )
    return outputs


[docs]def get_jobs_to_check_at_startup(session: galaxy_scoped_session, track_jobs_in_database: bool, config):
    if track_jobs_in_database:
        in_list = (Job.states.QUEUED, Job.states.RUNNING, Job.states.STOPPED)
    else:
        in_list = (Job.states.NEW, Job.states.QUEUED, Job.states.RUNNING)

    stmt = (
        select(Job)
        .execution_options(yield_per=YIELD_PER_ROWS)
        .filter(Job.state.in_(in_list) & (Job.handler == config.server_name))
    )
    if config.user_activation_on:
        # Filter out the jobs of inactive users.
        stmt = stmt.outerjoin(User).filter(or_((Job.user_id == null()), (User.active == true())))

    return session.scalars(stmt).all()


[docs]def get_job(session, *where_clauses):
    stmt = select(Job).where(*where_clauses).limit(1)
    return session.scalars(stmt).first()