Source code for galaxy.webapps.reports.controllers.jobs

import calendar
import logging
import re
from collections import namedtuple
from datetime import (
    date,
    datetime,
    timedelta
)
from math import ceil, floor

import sqlalchemy as sa
from markupsafe import escape
from sqlalchemy import and_, not_, or_

from galaxy import model, util
from galaxy.web.legacy_framework import grids
from galaxy.webapps.base.controller import BaseUIController, web
from galaxy.webapps.reports.controllers.query import ReportQueryBuilder

log = logging.getLogger(__name__)


[docs]class Timer:
[docs]    def __init__(self):
        self.start()
        self.stop()
        self.ERROR = self.time_elapsed()

[docs]    def start(self):
        self.start_time = datetime.now()

[docs]    def stop(self):
        try:
            self.stop_time = datetime.now()
            self.time_delta = self.stop_time - self.start_time
            del(self.stop_time)
            del(self.start_time)
        except NameError:
            print("You need to start before you can stop!")

[docs]    def time_elapsed(self):
        try:
            return_time = self.time_delta - self.ERROR
        except NameError:
            print("You need to start and stop before there's an elapsed time!")
        except AttributeError:
            return_time = self.time_delta

        return return_time


[docs]def sorter(default_sort_id, kwd):
    """
    Initialize sorting variables
    """
    SortSpec = namedtuple('SortSpec', ['sort_id', 'order', 'arrow', 'exc_order'])

    sort_id = kwd.get('sort_id')
    order = kwd.get('order')

    # Parse the default value
    if sort_id == "default":
        sort_id = default_sort_id

    # Create the sort
    if order == "asc":
        _order = sa.asc(sort_id)
    elif order == "desc":
        _order = sa.desc(sort_id)
    else:
        # In case of default
        order = "desc"
        _order = sa.desc(sort_id)

    # Create an arrow icon to put beside the ordered column
    up_arrow = "&#x2191;"
    down_arrow = "&#x2193;"
    arrow = " "

    if order == "asc":
        arrow += down_arrow
    else:
        arrow += up_arrow

    return SortSpec(sort_id, order, arrow, _order)


[docs]def get_spark_time(time_period):
    _time_period = 0

    if time_period == "days":
        _time_period = 1.0
    elif time_period == "weeks":
        _time_period = 7.0
    elif time_period == "months":
        _time_period = 30.0
    elif time_period == "years":
        _time_period = 365.0
    else:
        time_period = "days"
        _time_period = 1.0

    return time_period, _time_period


[docs]def get_curr_item(check_item, unique_items):
    """
    When rendering by item and destination_id,
    render the item uniquely.
    """
    if check_item in unique_items:
        return ('', unique_items)
    unique_items.add(check_item)
    return (check_item, unique_items)


[docs]class SpecifiedDateListGrid(grids.Grid):

[docs]    class JobIdColumn(grids.IntegerColumn):

[docs]        def get_value(self, trans, grid, job):
            return job.id

[docs]    class StateColumn(grids.TextColumn):

[docs]        def get_value(self, trans, grid, job):
            return f'<div class="count-box state-color-{job.state}">{job.state}</div>'

[docs]        def filter(self, trans, user, query, column_filter):
            if column_filter == 'Unfinished':
                return query.filter(not_(or_(model.Job.table.c.state == model.Job.states.OK,
                                             model.Job.table.c.state == model.Job.states.ERROR,
                                             model.Job.table.c.state == model.Job.states.DELETED)))
            return query

[docs]    class ToolColumn(grids.TextColumn):

[docs]        def get_value(self, trans, grid, job):
            return job.tool_id

[docs]        def filter(self, trans, user, query, column_filter):
            if column_filter is not None:
                query = query.filter(model.Job.table.c.tool_id == column_filter)

            return query

[docs]    class CreateTimeColumn(grids.DateTimeColumn):

[docs]        def get_value(self, trans, grid, job):
            return job.create_time.strftime("%b %d, %Y, %H:%M:%S")

[docs]    class UserColumn(grids.GridColumn):

[docs]        def get_value(self, trans, grid, job):
            if job.user:
                return escape(job.get_user_email())
            return 'anonymous'

[docs]    class EmailColumn(grids.GridColumn):

[docs]        def filter(self, trans, user, query, column_filter):
            if column_filter == 'All':
                return query
            return query.filter(and_(model.Job.table.c.user_id == model.User.table.c.id,
                                     model.User.table.c.email == column_filter))

[docs]    class DestinationIdColumn(grids.GridColumn):

[docs]        def filter(self, trans, user, query, column_filter):
            if column_filter == 'All':
                return query
            return query.filter(model.Job.table.c.destination_id == column_filter)

[docs]    class SpecifiedDateColumn(grids.GridColumn):

[docs]        def filter(self, trans, user, query, column_filter):
            if column_filter == 'All':
                return query
            # We are either filtering on a date like YYYY-MM-DD or on a month like YYYY-MM,
            # so we need to figure out which type of date we have
            if column_filter.count('-') == 2:  # We are filtering on a date like YYYY-MM-DD
                year, month, day = map(int, column_filter.split("-"))
                start_date = date(year, month, day)
                end_date = start_date + timedelta(days=1)
            if column_filter.count('-') == 1:  # We are filtering on a month like YYYY-MM
                year, month = map(int, column_filter.split("-"))
                start_date = date(year, month, 1)
                end_date = start_date + timedelta(days=calendar.monthrange(year, month)[1])

            return query.filter(and_(self.model_class.table.c.create_time >= start_date,
                                     self.model_class.table.c.create_time < end_date))

    # Grid definition
    use_async = False
    model_class = model.Job
    title = "Jobs"
    default_sort_key = "id"
    columns = [
        JobIdColumn("Id",
                    key="id",
                    link=(lambda item: dict(operation="job_info", id=item.id, webapp="reports")),
                    attach_popup=False,
                    filterable="advanced"),
        StateColumn("State",
                    key="state",
                    attach_popup=False),
        DestinationIdColumn("Destination Id",
                    key="destination_id",
                    attach_popup=False),
        ToolColumn("Tool Id",
                   key="tool_id",
                   link=(lambda item: dict(operation="tool_per_month", id=item.id, webapp="reports")),
                   attach_popup=False),
        CreateTimeColumn("Creation Time",
                         key="create_time",
                         attach_popup=False),
        UserColumn("User",
                   key="email",
                   model_class=model.User,
                   link=(lambda item: dict(operation="user_per_month", id=item.id, webapp="reports")),
                   attach_popup=False),
        # Columns that are valid for filtering but are not visible.
        SpecifiedDateColumn("Specified Date",
                            key="specified_date",
                            visible=False),
        EmailColumn("Email",
                    key="email",
                    model_class=model.User,
                    visible=False),
        grids.StateColumn("State",
                          key="state",
                          visible=False,
                          filterable="advanced")
    ]
    columns.append(grids.MulticolFilterColumn("Search",
                                              cols_to_filter=[columns[1], columns[2]],
                                              key="free-text-search",
                                              visible=False,
                                              filterable="standard"))
    default_filter = {'specified_date': 'All'}
    num_rows_per_page = 50
    use_paging = True

[docs]    def build_initial_query(self, trans, **kwd):
        params = util.Params(kwd)
        monitor_email = params.get('monitor_email', 'monitor@bx.psu.edu')
        monitor_user_id = get_monitor_id(trans, monitor_email)
        return trans.sa_session.query(self.model_class) \
                               .join(model.User) \
                               .filter(model.Job.table.c.user_id != monitor_user_id)\
                               .enable_eagerloads(False)


[docs]class Jobs(BaseUIController, ReportQueryBuilder):

    """
    Class contains functions for querying data requested by user via the webapp. It exposes the functions and
    responds to requests with the filled .mako templates.
    """

    specified_date_list_grid = SpecifiedDateListGrid()

[docs]    @web.expose
    def specified_date_handler(self, trans, **kwd):
        # We add params to the keyword dict in this method in order to rename the param
        # with an "f-" prefix, simulating filtering by clicking a search link.  We have
        # to take this approach because the "-" character is illegal in HTTP requests.
        kwd['sort_id'] = 'default'
        kwd['order'] = 'default'

        if 'f-specified_date' in kwd and 'specified_date' not in kwd:
            # The user clicked a State link in the Advanced Search box, so 'specified_date'
            # will have been eliminated.
            pass
        elif 'specified_date' not in kwd:
            kwd['f-specified_date'] = 'All'
        else:
            kwd['f-specified_date'] = kwd['specified_date']
        if 'operation' in kwd:
            operation = kwd['operation'].lower()
            if operation == "job_info":
                return trans.response.send_redirect(web.url_for(controller='jobs',
                                                                action='job_info',
                                                                **kwd))
            elif operation == "tool_for_month":
                kwd['f-tool_id'] = kwd['tool_id']
            elif operation == "tool_per_month":
                # The received id is the job id, so we need to get the job's tool_id.
                job_id = kwd.get('id', None)
                job = get_job(trans, job_id)
                kwd['tool_id'] = job.tool_id
                return trans.response.send_redirect(web.url_for(controller='jobs',
                                                                action='tool_per_month',
                                                                **kwd))
            elif operation == "user_for_month":
                kwd['f-email'] = util.restore_text(kwd['email'])
            elif operation == "user_for_month_by_destination":
                kwd['f-email'] = util.restore_text(kwd['email'])
                kwd['f-destination_id'] = kwd['destination_id']
            elif operation == "user_per_month":
                # The received id is the job id, so we need to get the id of the user
                # that submitted the job.
                job_id = kwd.get('id', None)
                job = get_job(trans, job_id)
                kwd['email'] = job.get_user_email()
                return trans.response.send_redirect(web.url_for(controller='jobs',
                                                                action='user_per_month',
                                                                **kwd))
            elif operation == "specified_date_in_error":
                kwd['f-state'] = 'error'
            elif operation == "unfinished":
                kwd['f-state'] = 'Unfinished'
            elif operation == "specified_tool_in_error":
                kwd['f-state'] = 'error'
                kwd['f-tool_id'] = kwd['tool_id']
        return self.specified_date_list_grid(trans, **kwd)

    def _calculate_trends_for_jobs(self, sa_session, jobs_query):
        trends = dict()
        for job in sa_session.execute(jobs_query):
            job_day = int(job.date.strftime("%-d")) - 1
            job_month = int(job.date.strftime("%-m"))
            job_month_name = job.date.strftime("%B")
            job_year = job.date.strftime("%Y")
            key = str(job_month_name + job_year)

            try:
                trends[key][job_day] += 1
            except KeyError:
                job_year = int(job_year)
                wday, day_range = calendar.monthrange(job_year, job_month)
                trends[key] = [0] * day_range
                trends[key][job_day] += 1
        return trends

    def _calculate_job_table(self, sa_session, jobs_query, by_destination=False):
        jobs = []
        unique_month_year_strs = set()
        for row in sa_session.execute(jobs_query):
            month_name = row.date.strftime("%B")
            year = int(row.date.strftime("%Y"))

            if str(by_destination).lower() == 'true':
                month_year_str = "%s %s" % (month_name, year)
                curr_month_year_str, unique_month_year_strs = get_curr_item(month_year_str, unique_month_year_strs)
                if curr_month_year_str == '':
                    curr_month = ''
                    curr_year = ''
                else:
                    curr_month = row.date.strftime("%B")
                    curr_year = row.date.strftime("%Y")
                jobs.append((
                    row.date.strftime("%Y-%m"),
                    row.total_jobs,
                    curr_month,
                    curr_year,
                    row.user_email,
                    row.destination_id,
                    row.execute_time
                ))
            else:
                jobs.append((
                    row.date.strftime("%Y-%m"),
                    row.total_jobs,
                    month_name,
                    year
                ))
        return jobs

[docs]    @web.expose
    def specified_month_all(self, trans, **kwd):
        """
        Queries the DB for all jobs in given month, defaults to current month.
        """
        message = ''
        PageSpec = namedtuple('PageSpec', ['entries', 'offset', 'page', 'pages_found'])

        params = util.Params(kwd)
        monitor_email = params.get('monitor_email', 'monitor@bx.psu.edu')
        specs = sorter('date', kwd)
        offset = 0
        limit = 10
        sort_id = specs.sort_id
        order = specs.order
        arrow = specs.arrow
        _order = specs.exc_order

        if "entries" in kwd:
            entries = int(kwd.get('entries'))
        else:
            entries = 10
        limit = entries * 4

        if "offset" in kwd:
            offset = int(kwd.get('offset'))
        else:
            offset = 0

        if "page" in kwd:
            page = int(kwd.get('page'))
        else:
            page = 1

        # In case we don't know which is the monitor user we will query for all jobs
        monitor_user_id = get_monitor_id(trans, monitor_email)

        # If specified_date is not received, we'll default to the current month
        specified_date = kwd.get('specified_date', datetime.utcnow().strftime("%Y-%m-%d"))
        specified_month = specified_date[:7]

        year, month = map(int, specified_month.split("-"))
        start_date = date(year, month, 1)
        end_date = start_date + timedelta(days=calendar.monthrange(year, month)[1])
        month_label = start_date.strftime("%B")
        year_label = start_date.strftime("%Y")

        # Use to make the page table
        month_jobs = sa.select((sa.func.date(model.Job.table.c.create_time).label('date'),
                                sa.func.count(model.Job.table.c.id).label('total_jobs')),
                               whereclause=sa.and_(model.Job.table.c.user_id != monitor_user_id,
                                                   model.Job.table.c.create_time >= start_date,
                                                   model.Job.table.c.create_time < end_date),
                               from_obj=[model.Job.table],
                               group_by=['date'],
                               order_by=[_order],
                               offset=offset,
                               limit=limit)

        # Use to make trendline
        all_jobs = sa.select((model.Job.table.c.create_time.label('date'), model.Job.table.c.id.label('id')),
                             whereclause=sa.and_(model.Job.table.c.user_id != monitor_user_id,
                                                 model.Job.table.c.create_time >= start_date,
                                                 model.Job.table.c.create_time < end_date))

        trends = dict()
        for job in trans.sa_session.execute(all_jobs):
            job_hour = int(job.date.strftime("%-H"))
            job_day = job.date.strftime("%d")

            try:
                trends[job_day][job_hour] += 1
            except KeyError:
                trends[job_day] = [0] * 24
                trends[job_day][job_hour] += 1

        jobs = []
        for row in trans.sa_session.execute(month_jobs):
            row_dayname = row.date.strftime("%A")
            row_day = row.date.strftime("%d")

            jobs.append((row_dayname,
                         row_day,
                         row.total_jobs,
                         row.date))

        pages_found = ceil(len(jobs) / float(entries))
        page_specs = PageSpec(entries, offset, page, pages_found)

        return trans.fill_template('/webapps/reports/jobs_specified_month_all.mako',
                                   order=order,
                                   arrow=arrow,
                                   sort_id=sort_id,
                                   month_label=month_label,
                                   year_label=year_label,
                                   month=month,
                                   page_specs=page_specs,
                                   jobs=jobs,
                                   trends=trends,
                                   is_user_jobs_only=monitor_user_id,
                                   message=message)

[docs]    @web.expose
    def specified_month_in_error(self, trans, **kwd):
        """
        Queries the DB for the user jobs in error.
        """
        message = ''
        PageSpec = namedtuple('PageSpec', ['entries', 'offset', 'page', 'pages_found'])

        params = util.Params(kwd)
        monitor_email = params.get('monitor_email', 'monitor@bx.psu.edu')
        specs = sorter('date', kwd)
        sort_id = specs.sort_id
        order = specs.order
        arrow = specs.arrow
        _order = specs.exc_order
        offset = 0
        limit = 10

        if "entries" in kwd:
            entries = int(kwd.get('entries'))
        else:
            entries = 10
        limit = entries * 4

        if "offset" in kwd:
            offset = int(kwd.get('offset'))
        else:
            offset = 0

        if "page" in kwd:
            page = int(kwd.get('page'))
        else:
            page = 1

        # In case we don't know which is the monitor user we will query for all jobs instead
        monitor_user_id = get_monitor_id(trans, monitor_email)

        # If specified_date is not received, we'll default to the current month
        specified_date = kwd.get('specified_date', datetime.utcnow().strftime("%Y-%m-%d"))
        specified_month = specified_date[:7]
        year, month = map(int, specified_month.split("-"))
        start_date = date(year, month, 1)
        end_date = start_date + timedelta(days=calendar.monthrange(year, month)[1])
        month_label = start_date.strftime("%B")
        year_label = start_date.strftime("%Y")

        month_jobs_in_error = sa.select((sa.func.date(model.Job.table.c.create_time).label('date'),
                                         sa.func.count(model.Job.table.c.id).label('total_jobs')),
                                        whereclause=sa.and_(model.Job.table.c.user_id != monitor_user_id,
                                                            model.Job.table.c.state == 'error',
                                                            model.Job.table.c.create_time >= start_date,
                                                            model.Job.table.c.create_time < end_date),
                                        from_obj=[model.Job.table],
                                        group_by=['date'],
                                        order_by=[_order],
                                        offset=offset,
                                        limit=limit)

        # Use to make trendline
        all_jobs_in_error = sa.select((model.Job.table.c.create_time.label('date'), model.Job.table.c.id.label('id')),
                                      whereclause=sa.and_(model.Job.table.c.user_id != monitor_user_id,
                                                          model.Job.table.c.state == 'error',
                                                          model.Job.table.c.create_time >= start_date,
                                                          model.Job.table.c.create_time < end_date))

        trends = dict()
        for job in trans.sa_session.execute(all_jobs_in_error):
            job_hour = int(job.date.strftime("%-H"))
            job_day = job.date.strftime("%d")

            try:
                trends[job_day][job_hour] += 1
            except KeyError:
                trends[job_day] = [0] * 24
                trends[job_day][job_hour] += 1

        jobs = []
        for row in trans.sa_session.execute(month_jobs_in_error):
            row_dayname = row.date.strftime("%A")
            row_day = row.date.strftime("%d")

            jobs.append((row_dayname,
                         row_day,
                         row.total_jobs,
                         row.date))

        pages_found = ceil(len(jobs) / float(entries))
        page_specs = PageSpec(entries, offset, page, pages_found)

        return trans.fill_template('/webapps/reports/jobs_specified_month_in_error.mako',
                                   order=order,
                                   arrow=arrow,
                                   sort_id=sort_id,
                                   month_label=month_label,
                                   year_label=year_label,
                                   month=month,
                                   jobs=jobs,
                                   trends=trends,
                                   message=message,
                                   is_user_jobs_only=monitor_user_id,
                                   page_specs=page_specs)

[docs]    @web.expose
    def per_month_all(self, trans, **kwd):
        """
        Queries the DB for all jobs. Avoids monitor jobs.  The
        by_destination param will group by User.email and
        Job.destination_id.
        """
        by_destination = str(kwd.get('by_destination', False)).lower()
        message = ''
        PageSpec = namedtuple('PageSpec', ['entries', 'offset', 'page', 'pages_found'])

        params = util.Params(kwd)
        monitor_email = params.get('monitor_email', 'monitor@bx.psu.edu')
        specs = sorter('date', kwd)
        sort_id = specs.sort_id
        order = specs.order
        arrow = specs.arrow
        _order = specs.exc_order
        offset = 0
        limit = 10

        if "entries" in kwd:
            entries = int(kwd.get('entries'))
        else:
            entries = 10
        limit = entries * 4

        if "offset" in kwd:
            offset = int(kwd.get('offset'))
        else:
            offset = 0

        if "page" in kwd:
            page = int(kwd.get('page'))
        else:
            page = 1

        # In case we don't know which is the monitor user we will query for all jobs
        monitor_user_id = get_monitor_id(trans, monitor_email)

        # Use to make the page table
        if by_destination == 'true':
            jobs_by_month = sa.select((self.select_month(model.Job.table.c.create_time).label('date'),
                                       model.Job.table.c.destination_id.label('destination_id'),
                                       sa.func.sum(model.Job.table.c.update_time - model.Job.table.c.create_time).label('execute_time'),
                                       sa.func.count(model.Job.table.c.id).label('total_jobs'),
                                       model.User.table.c.email.label('user_email')),
                                      whereclause=model.Job.table.c.user_id != monitor_user_id,
                                      from_obj=[sa.join(model.Job.table, model.User.table)],
                                      group_by=['user_email', 'date', 'destination_id'],
                                      order_by=[_order],
                                      offset=offset,
                                      limit=limit)
        else:
            jobs_by_month = sa.select((self.select_month(model.Job.table.c.create_time).label('date'),
                                       sa.func.count(model.Job.table.c.id).label('total_jobs')),
                                      whereclause=model.Job.table.c.user_id != monitor_user_id,
                                      from_obj=[model.Job.table],
                                      group_by=self.group_by_month(model.Job.table.c.create_time),
                                      order_by=[_order],
                                      offset=offset,
                                      limit=limit)

        # Use to make sparkline
        all_jobs = sa.select((self.select_day(model.Job.table.c.create_time).label('date'),
                              model.Job.table.c.id.label('id')))

        trends = self._calculate_trends_for_jobs(trans.sa_session, all_jobs)
        jobs = self._calculate_job_table(trans.sa_session, jobs_by_month, by_destination=by_destination)

        pages_found = ceil(len(jobs) / float(entries))
        page_specs = PageSpec(entries, offset, page, pages_found)

        if by_destination == 'true':
            page = '/webapps/reports/jobs_per_month_by_user_and_destination.mako'
        else:
            page = '/webapps/reports/jobs_per_month_all.mako'
        return trans.fill_template(page,
                                   order=order,
                                   arrow=arrow,
                                   sort_id=sort_id,
                                   trends=trends,
                                   jobs=jobs,
                                   is_user_jobs_only=monitor_user_id,
                                   message=message,
                                   page_specs=page_specs)

[docs]    @web.expose
    def per_month_in_error(self, trans, **kwd):
        """
        Queries the DB for user jobs in error. Filters out monitor jobs.
        """

        message = ''
        PageSpec = namedtuple('PageSpec', ['entries', 'offset', 'page', 'pages_found'])

        params = util.Params(kwd)
        monitor_email = params.get('monitor_email', 'monitor@bx.psu.edu')
        specs = sorter('date', kwd)
        sort_id = specs.sort_id
        order = specs.order
        arrow = specs.arrow
        _order = specs.exc_order
        offset = 0
        limit = 10

        if "entries" in kwd:
            entries = int(kwd.get('entries'))
        else:
            entries = 10
        limit = entries * 4

        if "offset" in kwd:
            offset = int(kwd.get('offset'))
        else:
            offset = 0

        if "page" in kwd:
            page = int(kwd.get('page'))
        else:
            page = 1

        # In case we don't know which is the monitor user we will query for all jobs
        monitor_user_id = get_monitor_id(trans, monitor_email)

        # Use to make the page table
        jobs_in_error_by_month = sa.select((self.select_month(model.Job.table.c.create_time).label('date'),
                                            sa.func.count(model.Job.table.c.id).label('total_jobs')),
                                           whereclause=sa.and_(model.Job.table.c.state == 'error',
                                                               model.Job.table.c.user_id != monitor_user_id),
                                           from_obj=[model.Job.table],
                                           group_by=self.group_by_month(model.Job.table.c.create_time),
                                           order_by=[_order],
                                           offset=offset,
                                           limit=limit)

        # Use to make trendline
        all_jobs = sa.select((self.select_day(model.Job.table.c.create_time).label('date'),
                              model.Job.table.c.id.label('id')),
                             whereclause=sa.and_(model.Job.table.c.state == 'error',
                                                 model.Job.table.c.user_id != monitor_user_id))

        trends = self._calculate_trends_for_jobs(trans.sa_session, all_jobs)
        jobs = self._calculate_job_table(trans.sa_session, jobs_in_error_by_month)

        pages_found = ceil(len(jobs) / float(entries))
        page_specs = PageSpec(entries, offset, page, pages_found)

        return trans.fill_template('/webapps/reports/jobs_per_month_in_error.mako',
                                   order=order,
                                   arrow=arrow,
                                   sort_id=sort_id,
                                   trends=trends,
                                   jobs=jobs,
                                   message=message,
                                   is_user_jobs_only=monitor_user_id,
                                   page_specs=page_specs,
                                   offset=offset,
                                   limit=limit)

[docs]    @web.expose
    def per_user(self, trans, **kwd):
        """
        Queries the DB for jobs per user.  The by_destination
        param will group by Job.destination_id.
        """
        by_destination = str(kwd.get('by_destination', False)).lower()
        total_time = Timer()
        q_time = Timer()

        total_time.start()
        params = util.Params(kwd)
        message = ''
        PageSpec = namedtuple('PageSpec', ['entries', 'offset', 'page', 'pages_found'])

        monitor_email = params.get('monitor_email', 'monitor@bx.psu.edu')
        specs = sorter('user_email', kwd)
        sort_id = specs.sort_id
        order = specs.order
        arrow = specs.arrow
        _order = specs.exc_order
        time_period = kwd.get('spark_time')
        time_period, _time_period = get_spark_time(time_period)
        spark_limit = 30
        offset = 0
        limit = 10

        if "entries" in kwd:
            entries = int(kwd.get('entries'))
        else:
            entries = 10
        limit = entries * 4

        if "offset" in kwd:
            offset = int(kwd.get('offset'))
        else:
            offset = 0

        if "page" in kwd:
            page = int(kwd.get('page'))
        else:
            page = 1

        jobs = []
        if by_destination == 'true':
            jobs_per_user = sa.select((model.User.table.c.email.label('user_email'),
                                       sa.func.count(model.Job.table.c.id).label('total_jobs'),
                                       model.Job.table.c.destination_id.label('destination_id')),
                                      from_obj=[sa.outerjoin(model.Job.table, model.User.table)],
                                      group_by=['user_email', 'destination_id'],
                                      order_by=[_order],
                                      offset=offset,
                                      limit=limit)

        else:
            jobs_per_user = sa.select((model.User.table.c.email.label('user_email'),
                                       sa.func.count(model.Job.table.c.id).label('total_jobs')),
                                      from_obj=[sa.outerjoin(model.Job.table, model.User.table)],
                                      group_by=['user_email'],
                                      order_by=[_order],
                                      offset=offset,
                                      limit=limit)

        q_time.start()
        unique_users = set()
        for row in trans.sa_session.execute(jobs_per_user):
            if (row.user_email is None):
                curr_user, unique_users = get_curr_item('Anonymous', unique_users)
                if by_destination == 'true':
                    jobs.append((curr_user, row.destination_id, row.total_jobs))
                else:
                    jobs.append((curr_user, row.total_jobs))
            elif (row.user_email == monitor_email):
                continue
            else:
                curr_user, unique_users = get_curr_item(row.user_email, unique_users)
                if by_destination == 'true':
                    jobs.append((curr_user, row.destination_id, row.total_jobs))
                else:
                    jobs.append((curr_user, row.total_jobs))
        q_time.stop()
        query1time = q_time.time_elapsed()

        users = sa.select([model.User.table.c.email],
                          from_obj=[model.User.table])

        all_jobs_per_user = sa.select((model.Job.table.c.id.label('id'),
                                       model.Job.table.c.create_time.label('date'),
                                       model.User.table.c.email.label('user_email')),
                                      from_obj=[sa.outerjoin(model.Job.table, model.User.table)],
                                      whereclause=model.User.table.c.email.in_(users))

        currday = datetime.today()
        trends = dict()
        q_time.start()
        for job in trans.sa_session.execute(all_jobs_per_user):
            if job.user_email is None:
                curr_user = 'Anonymous'
            else:
                curr_user = re.sub(r'\W+', '', job.user_email)

            try:
                day = currday - job.date
            except TypeError:
                day = currday - datetime.date(job.date)

            day = day.days
            container = floor(day / _time_period)
            container = int(container)
            try:
                if container < spark_limit:
                    trends[curr_user][container] += 1
            except KeyError:
                trends[curr_user] = [0] * spark_limit
                if container < spark_limit:
                    trends[curr_user][container] += 1
        q_time.stop()
        query2time = q_time.time_elapsed()

        pages_found = ceil(len(jobs) / float(entries))
        page_specs = PageSpec(entries, offset, page, pages_found)

        total_time.stop()
        ttime = total_time.time_elapsed()
        if by_destination == 'true':
            page = '/webapps/reports/jobs_per_user_by_destination.mako'
        else:
            page = '/webapps/reports/jobs_per_user.mako'
        return trans.fill_template(page,
                                   order=order,
                                   arrow=arrow,
                                   sort_id=sort_id,
                                   spark_limit=spark_limit,
                                   time_period=time_period,
                                   q1time=query1time,
                                   q2time=query2time,
                                   ttime=ttime,
                                   trends=trends,
                                   jobs=jobs,
                                   message=message,
                                   page_specs=page_specs)

[docs]    @web.expose
    def user_per_month(self, trans, **kwd):
        """
        Queries the DB for jobs per user per month.  The
        by_destination param will group by Job.destination_id.
        """
        by_destination = str(kwd.get('by_destination', False)).lower()
        params = util.Params(kwd)
        message = ''

        email = util.restore_text(params.get('email', ''))
        specs = sorter('date', kwd)
        sort_id = specs.sort_id
        order = specs.order
        arrow = specs.arrow
        _order = specs.exc_order

        if by_destination == 'true':
            q = sa.select((self.select_month(model.Job.table.c.create_time).label('date'),
                           model.Job.table.c.destination_id.label('destination_id'),
                           sa.func.sum(model.Job.table.c.update_time - model.Job.table.c.create_time).label('execute_time'),
                           sa.func.count(model.Job.table.c.id).label('total_jobs')),
                          whereclause=model.User.table.c.email == email,
                          from_obj=[sa.join(model.Job.table, model.User.table)],
                          group_by=['date', 'destination_id'],
                          order_by=[_order])
        else:
            q = sa.select((self.select_month(model.Job.table.c.create_time).label('date'),
                           sa.func.count(model.Job.table.c.id).label('total_jobs')),
                          whereclause=model.User.table.c.email == email,
                          from_obj=[sa.join(model.Job.table, model.User.table)],
                          group_by=self.group_by_month(model.Job.table.c.create_time),
                          order_by=[_order])

        all_jobs_per_user = sa.select((model.Job.table.c.create_time.label('date'),
                                       model.Job.table.c.id.label('job_id')),
                                      whereclause=sa.and_(model.User.table.c.email == email),
                                      from_obj=[sa.join(model.Job.table, model.User.table)])

        trends = dict()
        for job in trans.sa_session.execute(all_jobs_per_user):
            job_day = int(job.date.strftime("%-d")) - 1
            job_month = int(job.date.strftime("%-m"))
            job_month_name = job.date.strftime("%B")
            job_year = job.date.strftime("%Y")
            key = str(job_month_name + job_year)

            try:
                trends[key][job_day] += 1
            except KeyError:
                job_year = int(job_year)
                wday, day_range = calendar.monthrange(job_year, job_month)
                trends[key] = [0] * day_range
                trends[key][job_day] += 1

        jobs = []
        unique_month_year_strs = set()
        for row in trans.sa_session.execute(q):
            if by_destination == 'true':
                month_year_str = "%s %s" % (row.date.strftime("%B"), row.date.strftime("%Y"))
                curr_month_year_str, unique_month_year_strs = get_curr_item(month_year_str, unique_month_year_strs)
                if curr_month_year_str == '':
                    curr_month = ''
                    curr_year = ''
                else:
                    curr_month = row.date.strftime("%B")
                    curr_year = row.date.strftime("%Y")
                jobs.append((row.date.strftime("%Y-%m"),
                             row.execute_time,
                             row.total_jobs,
                             curr_month,
                             curr_year,
                             row.destination_id))
            else:
                jobs.append((row.date.strftime("%Y-%m"),
                             row.total_jobs,
                             row.date.strftime("%B"),
                             row.date.strftime("%Y")))
        if by_destination == 'true':
            page = '/webapps/reports/jobs_user_per_month_by_destination.mako'
        else:
            page = '/webapps/reports/jobs_user_per_month.mako'
        return trans.fill_template(page,
                                   order=order,
                                   arrow=arrow,
                                   sort_id=sort_id,
                                   id=kwd.get('id'),
                                   trends=trends,
                                   email=util.sanitize_text(email),
                                   jobs=jobs,
                                   message=message)

[docs]    @web.expose
    def per_tool(self, trans, **kwd):
        message = ''
        PageSpec = namedtuple('PageSpec', ['entries', 'offset', 'page', 'pages_found'])

        params = util.Params(kwd)
        monitor_email = params.get('monitor_email', 'monitor@bx.psu.edu')
        specs = sorter('tool_id', kwd)
        sort_id = specs.sort_id
        order = specs.order
        arrow = specs.arrow
        _order = specs.exc_order
        time_period = kwd.get('spark_time')
        time_period, _time_period = get_spark_time(time_period)
        spark_limit = 30
        offset = 0
        limit = 10

        if "entries" in kwd:
            entries = int(kwd.get('entries'))
        else:
            entries = 10
        limit = entries * 4

        if "offset" in kwd:
            offset = int(kwd.get('offset'))
        else:
            offset = 0

        if "page" in kwd:
            page = int(kwd.get('page'))
        else:
            page = 1

        # In case we don't know which is the monitor user we will query for all jobs
        monitor_user_id = get_monitor_id(trans, monitor_email)

        jobs = []
        q = sa.select((model.Job.table.c.tool_id.label('tool_id'),
                       sa.func.count(model.Job.table.c.id).label('total_jobs')),
                      whereclause=model.Job.table.c.user_id != monitor_user_id,
                      from_obj=[model.Job.table],
                      group_by=['tool_id'],
                      order_by=[_order],
                      offset=offset,
                      limit=limit)

        all_jobs_per_tool = sa.select((model.Job.table.c.tool_id.label('tool_id'),
                                       model.Job.table.c.id.label('id'),
                                       self.select_day(model.Job.table.c.create_time).label('date')),
                                      whereclause=model.Job.table.c.user_id != monitor_user_id,
                                      from_obj=[model.Job.table])

        currday = date.today()
        trends = dict()
        for job in trans.sa_session.execute(all_jobs_per_tool):
            curr_tool = re.sub(r'\W+', '', str(job.tool_id))
            try:
                day = currday - job.date
            except TypeError:
                day = currday - datetime.date(job.date)

            day = day.days
            container = floor(day / _time_period)
            container = int(container)
            try:
                if container < spark_limit:
                    trends[curr_tool][container] += 1
            except KeyError:
                trends[curr_tool] = [0] * spark_limit
                if container < spark_limit:
                    trends[curr_tool][container] += 1

        for row in trans.sa_session.execute(q):
            jobs.append((row.tool_id,
                         row.total_jobs))

        pages_found = ceil(len(jobs) / float(entries))
        page_specs = PageSpec(entries, offset, page, pages_found)

        return trans.fill_template('/webapps/reports/jobs_per_tool.mako',
                                   order=order,
                                   arrow=arrow,
                                   sort_id=sort_id,
                                   spark_limit=spark_limit,
                                   time_period=time_period,
                                   trends=trends,
                                   jobs=jobs,
                                   message=message,
                                   is_user_jobs_only=monitor_user_id,
                                   page_specs=page_specs)

[docs]    @web.expose
    def errors_per_tool(self, trans, **kwd):
        """
        Queries the DB for user jobs in error. Filters out monitor jobs.
        """

        message = ''
        PageSpec = namedtuple('PageSpec', ['entries', 'offset', 'page', 'pages_found'])

        params = util.Params(kwd)
        monitor_email = params.get('monitor_email', 'monitor@bx.psu.edu')
        specs = sorter('tool_id', kwd)
        sort_id = specs.sort_id
        order = specs.order
        arrow = specs.arrow
        _order = specs.exc_order
        time_period = kwd.get('spark_time')
        time_period, _time_period = get_spark_time(time_period)
        spark_limit = 30
        offset = 0
        limit = 10

        if "entries" in kwd:
            entries = int(kwd.get('entries'))
        else:
            entries = 10
        limit = entries * 4

        if "offset" in kwd:
            offset = int(kwd.get('offset'))
        else:
            offset = 0

        if "page" in kwd:
            page = int(kwd.get('page'))
        else:
            page = 1

        # In case we don't know which is the monitor user we will query for all jobs
        monitor_user_id = get_monitor_id(trans, monitor_email)

        jobs_in_error_per_tool = sa.select((model.Job.table.c.tool_id.label('tool_id'),
                                            sa.func.count(model.Job.table.c.id).label('total_jobs')),
                                           whereclause=sa.and_(model.Job.table.c.state == 'error',
                                                               model.Job.table.c.user_id != monitor_user_id),
                                           from_obj=[model.Job.table],
                                           group_by=['tool_id'],
                                           order_by=[_order],
                                           offset=offset,
                                           limit=limit)

        all_jobs_per_tool_errors = sa.select((self.select_day(model.Job.table.c.create_time).label('date'),
                                              model.Job.table.c.id.label('id'),
                                              model.Job.table.c.tool_id.label('tool_id')),
                                             whereclause=sa.and_(model.Job.table.c.state == 'error',
                                                                 model.Job.table.c.user_id != monitor_user_id),
                                             from_obj=[model.Job.table])

        currday = date.today()
        trends = dict()
        for job in trans.sa_session.execute(all_jobs_per_tool_errors):
            curr_tool = re.sub(r'\W+', '', str(job.tool_id))
            try:
                day = currday - job.date
            except TypeError:
                day = currday - datetime.date(job.date)

            # convert day into days/weeks/months/years
            day = day.days
            container = floor(day / _time_period)
            container = int(container)
            try:
                if container < spark_limit:
                    trends[curr_tool][container] += 1
            except KeyError:
                trends[curr_tool] = [0] * spark_limit
                if day < spark_limit:
                    trends[curr_tool][container] += 1
        jobs = []
        for row in trans.sa_session.execute(jobs_in_error_per_tool):
            jobs.append((row.total_jobs, row.tool_id))

        pages_found = ceil(len(jobs) / float(entries))
        page_specs = PageSpec(entries, offset, page, pages_found)

        return trans.fill_template('/webapps/reports/jobs_errors_per_tool.mako',
                                   order=order,
                                   arrow=arrow,
                                   sort_id=sort_id,
                                   spark_limit=spark_limit,
                                   time_period=time_period,
                                   trends=trends,
                                   jobs=jobs,
                                   message=message,
                                   is_user_jobs_only=monitor_user_id,
                                   page_specs=page_specs)

[docs]    @web.expose
    def tool_per_month(self, trans, **kwd):
        message = ''

        params = util.Params(kwd)
        monitor_email = params.get('monitor_email', 'monitor@bx.psu.edu')
        specs = sorter('date', kwd)
        sort_id = specs.sort_id
        order = specs.order
        arrow = specs.arrow
        _order = specs.exc_order
        # In case we don't know which is the monitor user we will query for all jobs
        monitor_user_id = get_monitor_id(trans, monitor_email)

        tool_id = params.get('tool_id', 'Add a column1')
        specified_date = params.get('specified_date', datetime.utcnow().strftime("%Y-%m-%d"))
        q = sa.select((self.select_month(model.Job.table.c.create_time).label('date'),
                       sa.func.count(model.Job.table.c.id).label('total_jobs')),
                      whereclause=sa.and_(model.Job.table.c.tool_id == tool_id,
                                          model.Job.table.c.user_id != monitor_user_id),
                      from_obj=[model.Job.table],
                      group_by=self.group_by_month(model.Job.table.c.create_time),
                      order_by=[_order])

        # Use to make sparkline
        all_jobs_for_tool = sa.select((self.select_month(model.Job.table.c.create_time).label('month'),
                                       self.select_day(model.Job.table.c.create_time).label('day'),
                                       model.Job.table.c.id.label('id')),
                                      whereclause=sa.and_(model.Job.table.c.tool_id == tool_id,
                                                          model.Job.table.c.user_id != monitor_user_id),
                             from_obj=[model.Job.table])
        trends = dict()
        for job in trans.sa_session.execute(all_jobs_for_tool):
            job_day = int(job.day.strftime("%-d")) - 1
            job_month = int(job.month.strftime("%-m"))
            job_month_name = job.month.strftime("%B")
            job_year = job.month.strftime("%Y")
            key = str(job_month_name + job_year)

            try:
                trends[key][job_day] += 1
            except KeyError:
                job_year = int(job_year)
                wday, day_range = calendar.monthrange(job_year, job_month)
                trends[key] = [0] * day_range
                trends[key][job_day] += 1

        jobs = []
        for row in trans.sa_session.execute(q):
            jobs.append((row.date.strftime("%Y-%m"),
                         row.total_jobs,
                         row.date.strftime("%B"),
                         row.date.strftime("%Y")))
        return trans.fill_template('/webapps/reports/jobs_tool_per_month.mako',
                                   order=order,
                                   arrow=arrow,
                                   sort_id=sort_id,
                                   specified_date=specified_date,
                                   tool_id=tool_id,
                                   trends=trends,
                                   jobs=jobs,
                                   message=message,
                                   is_user_jobs_only=monitor_user_id)

[docs]    @web.expose
    def job_info(self, trans, **kwd):
        message = ''
        job = trans.sa_session.query(model.Job) \
                              .get(trans.security.decode_id(kwd.get('id', '')))
        return trans.fill_template('/webapps/reports/job_info.mako',
                                   job=job,
                                   message=message)

# ---- Utility methods -------------------------------------------------------


[docs]def get_job(trans, id):
    return trans.sa_session.query(trans.model.Job).get(trans.security.decode_id(id))


[docs]def get_monitor_id(trans, monitor_email):
    """
    A convenience method to obtain the monitor job id.
    """
    monitor_user_id = None
    monitor_row = trans.sa_session.query(trans.model.User.id) \
        .filter(trans.model.User.table.c.email == monitor_email) \
        .first()
    if monitor_row is not None:
        monitor_user_id = monitor_row[0]
    return monitor_user_id