Warning
This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.model
"""
Galaxy data model classes
Naming: try to use class names that have a distinct plural form so that
the relationship cardinalities are obvious (e.g. prefer Dataset to Data)
"""
import abc
import base64
import errno
import json
import logging
import numbers
import operator
import os
import pwd
import random
import string
from collections import defaultdict
from collections.abc import Callable
from datetime import timedelta
from enum import Enum
from secrets import token_hex
from string import Template
from typing import (
Any,
Dict,
Iterable,
List,
NamedTuple,
Optional,
overload,
Set,
Tuple,
Type,
TYPE_CHECKING,
Union,
)
from uuid import (
UUID,
uuid4,
)
import sqlalchemy
from boltons.iterutils import remap
from pydantic import BaseModel
from social_core.storage import (
AssociationMixin,
CodeMixin,
NonceMixin,
PartialMixin,
UserMixin,
)
from sqlalchemy import (
alias,
and_,
asc,
BigInteger,
bindparam,
Boolean,
case,
Column,
column,
DateTime,
delete,
desc,
event,
false,
ForeignKey,
func,
Index,
inspect,
Integer,
join,
MetaData,
not_,
Numeric,
or_,
PrimaryKeyConstraint,
select,
String,
Table,
TEXT,
Text,
text,
true,
tuple_,
type_coerce,
Unicode,
UniqueConstraint,
update,
VARCHAR,
)
from sqlalchemy.exc import OperationalError
from sqlalchemy.ext import hybrid
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.orderinglist import ordering_list
from sqlalchemy.orm import (
aliased,
column_property,
deferred,
joinedload,
object_session,
Query,
reconstructor,
registry,
relationship,
)
from sqlalchemy.orm.attributes import flag_modified
from sqlalchemy.orm.collections import attribute_mapped_collection
from sqlalchemy.sql import exists
from typing_extensions import (
Literal,
Protocol,
TypeAlias,
TypedDict,
)
import galaxy.exceptions
import galaxy.model.metadata
import galaxy.model.tags
import galaxy.security.passwords
import galaxy.util
from galaxy.model.base import (
ensure_object_added_to_session,
transaction,
)
from galaxy.model.custom_types import (
DoubleEncodedJsonType,
JSONType,
MetadataType,
MutableJSONType,
TrimmedString,
UUIDType,
)
from galaxy.model.database_object_names import NAMING_CONVENTION
from galaxy.model.item_attrs import (
get_item_annotation_str,
UsesAnnotations,
)
from galaxy.model.orm.now import now
from galaxy.model.orm.util import add_object_to_object_session
from galaxy.objectstore import ObjectStore
from galaxy.schema.invocation import (
InvocationCancellationUserRequest,
InvocationState,
InvocationStepState,
)
from galaxy.schema.schema import (
DatasetCollectionPopulatedState,
DatasetState,
DatasetValidatedState,
InvocationsStateCounts,
JobState,
)
from galaxy.schema.workflow.comments import WorkflowCommentModel
from galaxy.security import get_permitted_actions
from galaxy.security.idencoding import IdEncodingHelper
from galaxy.security.validate_user_input import validate_password_str
from galaxy.util import (
directory_hash_id,
enum_values,
hex_to_lowercase_alphanum,
listify,
ready_name_for_url,
unicodify,
unique_id,
)
from galaxy.util.dictifiable import (
dict_for,
Dictifiable,
)
from galaxy.util.form_builder import (
AddressField,
CheckboxField,
HistoryField,
PasswordField,
SelectField,
TextArea,
TextField,
WorkflowField,
WorkflowMappingField,
)
from galaxy.util.hash_util import (
md5_hash_str,
new_insecure_hash,
)
from galaxy.util.json import safe_loads
from galaxy.util.sanitize_html import sanitize_html
if TYPE_CHECKING:
from galaxy.schema.invocation import InvocationMessageUnion
log = logging.getLogger(__name__)
_datatypes_registry = None
mapper_registry = registry()
# When constructing filters with in for a fixed set of ids, maximum
# number of items to place in the IN statement. Different databases
# are going to have different limits so it is likely best to not let
# this be unlimited - filter in Python if over this limit.
MAX_IN_FILTER_LENGTH = 100
# The column sizes for job metrics. Note: changing these values does not change the column sizes, a migration must be
# performed to do that.
JOB_METRIC_MAX_LENGTH = 1023
JOB_METRIC_PRECISION = 26
JOB_METRIC_SCALE = 7
# Tags that get automatically propagated from inputs to outputs when running jobs.
AUTO_PROPAGATED_TAGS = ["name"]
YIELD_PER_ROWS = 100
CANNOT_SHARE_PRIVATE_DATASET_MESSAGE = "Attempting to share a non-shareable dataset."
if TYPE_CHECKING:
# Workaround for https://github.com/python/mypy/issues/14182
from sqlalchemy.orm.decl_api import DeclarativeMeta as _DeclarativeMeta
class DeclarativeMeta(_DeclarativeMeta, type):
pass
from galaxy.datatypes.data import Data
from galaxy.tools import DefaultToolState
from galaxy.workflow.modules import WorkflowModule
class _HasTable:
table: Table
__table__: Table
else:
from sqlalchemy.orm.decl_api import DeclarativeMeta
_HasTable = object
[docs]def get_uuid(uuid: Optional[Union[UUID, str]] = None) -> UUID:
if isinstance(uuid, UUID):
return uuid
if not uuid:
return uuid4()
return UUID(str(uuid))
[docs]class Base(_HasTable, metaclass=DeclarativeMeta):
__abstract__ = True
metadata = MetaData(naming_convention=NAMING_CONVENTION)
mapper_registry.metadata = metadata
registry = mapper_registry
__init__ = mapper_registry.constructor
@classmethod
def __declare_last__(cls):
cls.table = cls.__table__
[docs]class RepresentById:
id: int
def __repr__(self):
try:
r = f"<galaxy.model.{self.__class__.__name__}({cached_id(self)}) at {hex(id(self))}>"
except Exception:
r = object.__repr__(self)
log.exception("Caught exception attempting to generate repr for: %s", r)
return r
def _get_datatypes_registry():
if _datatypes_registry is None:
raise Exception(
"galaxy.model.set_datatypes_registry must be called before performing certain DatasetInstance operations."
)
return _datatypes_registry
[docs]def set_datatypes_registry(d_registry):
"""
Set up datatypes_registry
"""
global _datatypes_registry
_datatypes_registry = d_registry
[docs]class HasTags:
dict_collection_visible_keys = ["tags"]
dict_element_visible_keys = ["tags"]
tags: List["ItemTagAssociation"]
[docs] def to_dict(self, *args, **kwargs):
rval = super().to_dict(*args, **kwargs)
rval["tags"] = self.make_tag_string_list()
return rval
[docs] def make_tag_string_list(self):
# add tags string list
tags_str_list = []
for tag in self.tags:
tag_str = tag.user_tname
if tag.value is not None:
tag_str += f":{tag.user_value}"
tags_str_list.append(tag_str)
return tags_str_list
[docs] def copy_tags_from(self, target_user, source):
for source_tag_assoc in source.tags:
new_tag_assoc = source_tag_assoc.copy()
new_tag_assoc.user = target_user
self.tags.append(new_tag_assoc)
@property
def auto_propagated_tags(self):
return [t for t in self.tags if t.user_tname in AUTO_PROPAGATED_TAGS]
[docs]class SerializationOptions:
[docs] def __init__(
self,
for_edit: bool,
serialize_dataset_objects: Optional[bool] = None,
serialize_files_handler: Optional[SerializeFilesHandler] = None,
strip_metadata_files: Optional[bool] = None,
) -> None:
self.for_edit = for_edit
if serialize_dataset_objects is None:
serialize_dataset_objects = for_edit
self.serialize_dataset_objects = serialize_dataset_objects
self.serialize_files_handler = serialize_files_handler
if strip_metadata_files is None:
# If we're editing datasets - keep MetadataFile(s) in tact. For pure export
# expect metadata tool to be rerun.
strip_metadata_files = not for_edit
self.strip_metadata_files = strip_metadata_files
[docs] def attach_identifier(self, id_encoder, obj, ret_val):
if self.for_edit and obj.id:
ret_val["id"] = obj.id
elif obj.id:
ret_val["encoded_id"] = id_encoder.encode_id(obj.id, kind="model_export")
else:
if not hasattr(obj, "temp_id"):
obj.temp_id = uuid4().hex
ret_val["encoded_id"] = obj.temp_id
[docs] def get_identifier(self, id_encoder, obj):
if self.for_edit and obj.id:
return obj.id
elif obj.id:
return id_encoder.encode_id(obj.id, kind="model_export")
else:
if not hasattr(obj, "temp_id"):
obj.temp_id = uuid4().hex
return obj.temp_id
[docs] def get_identifier_for_id(self, id_encoder, obj_id):
if self.for_edit and obj_id:
return obj_id
elif obj_id:
return id_encoder.encode_id(obj_id, kind="model_export")
else:
raise NotImplementedError()
[docs] def serialize_files(self, dataset, as_dict):
if self.serialize_files_handler is not None:
self.serialize_files_handler.serialize_files(dataset, as_dict)
[docs]class Serializable(RepresentById):
[docs] def serialize(
self, id_encoder: IdEncodingHelper, serialization_options: SerializationOptions, for_link: bool = False
) -> Dict[str, Any]:
"""Serialize model for a re-population in (potentially) another Galaxy instance."""
if for_link:
rval = dict_for(self)
serialization_options.attach_identifier(id_encoder, self, rval)
return rval
return self._serialize(id_encoder, serialization_options)
@abc.abstractmethod
def _serialize(self, id_encoder: IdEncodingHelper, serialization_options: SerializationOptions) -> Dict[str, Any]:
"""Serialize model for a re-population in (potentially) another Galaxy instance."""
[docs]class HasName:
[docs] def get_display_name(self):
"""
These objects have a name attribute can be either a string or a unicode
object. If string, convert to unicode object assuming 'utf-8' format.
"""
name = self.name
name = unicodify(name, "utf-8")
return name
[docs]class UsesCreateAndUpdateTime:
update_time: DateTime
@property
def seconds_since_updated(self):
update_time = self.update_time or now() # In case not yet flushed
return (now() - update_time).total_seconds()
@property
def seconds_since_created(self):
create_time = self.create_time or now() # In case not yet flushed
return (now() - create_time).total_seconds()
[docs]class WorkerProcess(Base, UsesCreateAndUpdateTime):
__tablename__ = "worker_process"
__table_args__ = (UniqueConstraint("server_name", "hostname"),)
id = Column(Integer, primary_key=True)
server_name = Column(String(255), index=True)
hostname = Column(String(255))
pid = Column(Integer)
update_time = Column(DateTime, default=now, onupdate=now)
[docs]def cached_id(galaxy_model_object):
"""Get model object id attribute without a firing a database query.
Useful to fetching the id of a typical Galaxy model object after a flush,
where SA is going to mark the id attribute as unloaded but we know the id
is immutable and so we can use the database identity to fetch.
With Galaxy's default SA initialization - any flush marks all attributes as
unloaded - even objects completely unrelated to the flushed changes and
even attributes we know to be immutable like id. See test_galaxy_mapping.py
for verification of this behavior. This method is a workaround that uses
the fact that we know all Galaxy objects use the id attribute as identity
and SA internals (_sa_instance_state) to infer the previously loaded ID
value. I tried digging into the SA internals extensively and couldn't find
a way to get the previously loaded values after a flush to allow a
generalization of this for other attributes.
"""
if hasattr(galaxy_model_object, "_sa_instance_state"):
identity = galaxy_model_object._sa_instance_state.identity
if identity:
assert len(identity) == 1
return identity[0]
return galaxy_model_object.id
[docs]class JobLike:
MAX_NUMERIC = 10 ** (JOB_METRIC_PRECISION - JOB_METRIC_SCALE) - 1
def _init_metrics(self):
self.text_metrics = []
self.numeric_metrics = []
[docs] def add_metric(self, plugin, metric_name, metric_value):
plugin = unicodify(plugin, "utf-8")
metric_name = unicodify(metric_name, "utf-8")
number = isinstance(metric_value, numbers.Number)
if number and int(metric_value) <= JobLike.MAX_NUMERIC:
metric = self._numeric_metric(plugin, metric_name, metric_value)
self.numeric_metrics.append(metric)
elif number:
log.warning(
"Cannot store metric due to database column overflow (max: %s): %s: %s",
JobLike.MAX_NUMERIC,
metric_name,
metric_value,
)
else:
metric_value = unicodify(metric_value, "utf-8")
if len(metric_value) > (JOB_METRIC_MAX_LENGTH - 1):
# Truncate these values - not needed with sqlite
# but other backends must need it.
metric_value = metric_value[: (JOB_METRIC_MAX_LENGTH - 1)]
metric = self._text_metric(plugin, metric_name, metric_value)
self.text_metrics.append(metric)
@property
def metrics(self):
# TODO: Make iterable, concatenate with chain
return self.text_metrics + self.numeric_metrics
[docs] def set_streams(self, tool_stdout, tool_stderr, job_stdout=None, job_stderr=None, job_messages=None):
def shrink_and_unicodify(what, stream):
if stream and len(stream) > galaxy.util.DATABASE_MAX_STRING_SIZE:
log.info(
"%s for %s %d is greater than %s, only a portion will be logged to database",
what,
type(self),
self.id,
galaxy.util.DATABASE_MAX_STRING_SIZE_PRETTY,
)
return galaxy.util.shrink_and_unicodify(stream)
self.tool_stdout = shrink_and_unicodify("tool_stdout", tool_stdout)
self.tool_stderr = shrink_and_unicodify("tool_stderr", tool_stderr)
if job_stdout is not None:
self.job_stdout = shrink_and_unicodify("job_stdout", job_stdout)
else:
self.job_stdout = None
if job_stderr is not None:
self.job_stderr = shrink_and_unicodify("job_stderr", job_stderr)
else:
self.job_stderr = None
if job_messages is not None:
self.job_messages = job_messages
[docs] def log_str(self):
extra = ""
if (safe_id := getattr(self, "id", None)) is not None:
extra += f"id={safe_id}"
else:
extra += "unflushed"
return f"{self.__class__.__name__}[{extra},tool_id={self.tool_id}]"
@property
def stdout(self):
stdout = self.tool_stdout or ""
if self.job_stdout:
stdout += f"\n{self.job_stdout}"
return stdout
@stdout.setter
def stdout(self, stdout):
raise NotImplementedError("Attempt to set stdout, must set tool_stdout or job_stdout")
@property
def stderr(self):
stderr = self.tool_stderr or ""
if self.job_stderr:
stderr += f"\n{self.job_stderr}"
return stderr
@stderr.setter
def stderr(self, stderr):
raise NotImplementedError("Attempt to set stdout, must set tool_stderr or job_stderr")
UNIQUE_DATASET_USER_USAGE = """
WITH per_user_histories AS
(
SELECT id
FROM history
WHERE user_id = :id
AND NOT purged
),
per_hist_hdas AS (
SELECT DISTINCT dataset_id
FROM history_dataset_association
WHERE NOT purged
AND history_id IN (SELECT id FROM per_user_histories)
)
SELECT COALESCE(SUM(COALESCE(dataset.total_size, dataset.file_size, 0)), 0)
FROM dataset
LEFT OUTER JOIN library_dataset_dataset_association ON dataset.id = library_dataset_dataset_association.dataset_id
WHERE dataset.id IN (SELECT dataset_id FROM per_hist_hdas)
AND library_dataset_dataset_association.id IS NULL
{and_dataset_condition}
"""
[docs]def calculate_user_disk_usage_statements(user_id, quota_source_map, for_sqlite=False):
"""Standalone function so can be reused for postgres directly in pgcleanup.py."""
statements = []
default_quota_enabled = quota_source_map.default_quota_enabled
default_exclude_ids = quota_source_map.default_usage_excluded_ids()
default_cond = "dataset.object_store_id IS NULL" if default_quota_enabled and default_exclude_ids else ""
exclude_cond = "dataset.object_store_id NOT IN :exclude_object_store_ids" if default_exclude_ids else ""
use_or = " OR " if (default_cond != "" and exclude_cond != "") else ""
default_usage_dataset_condition = f"{default_cond} {use_or} {exclude_cond}"
if default_usage_dataset_condition.strip():
default_usage_dataset_condition = f"AND ( {default_usage_dataset_condition} )"
default_usage = UNIQUE_DATASET_USER_USAGE.format(and_dataset_condition=default_usage_dataset_condition)
default_usage = (
"""
UPDATE galaxy_user SET disk_usage = (%s)
WHERE id = :id
"""
% default_usage
)
params = {"id": user_id}
if default_exclude_ids:
params["exclude_object_store_ids"] = default_exclude_ids
statements.append((default_usage, params))
source = quota_source_map.ids_per_quota_source()
# TODO: Merge a lot of these settings together by generating a temp table for
# the object_store_id to quota_source_label into a temp table of values
for quota_source_label, object_store_ids in source.items():
label_usage = UNIQUE_DATASET_USER_USAGE.format(
and_dataset_condition="AND ( dataset.object_store_id IN :include_object_store_ids )"
)
if for_sqlite:
# hacky alternative for older sqlite
statement = f"""
WITH new (user_id, quota_source_label, disk_usage) AS (
VALUES(:id, :label, ({label_usage}))
)
INSERT OR REPLACE INTO user_quota_source_usage (id, user_id, quota_source_label, disk_usage)
SELECT old.id, new.user_id, new.quota_source_label, new.disk_usage
FROM new
LEFT JOIN user_quota_source_usage AS old
ON new.user_id = old.user_id
AND new.quota_source_label = old.quota_source_label
"""
else:
statement = f"""
INSERT INTO user_quota_source_usage(user_id, quota_source_label, disk_usage)
VALUES(:id, :label, ({label_usage}))
ON CONFLICT
ON constraint uqsu_unique_label_per_user
DO UPDATE SET disk_usage = excluded.disk_usage
"""
statements.append(
(statement, {"id": user_id, "label": quota_source_label, "include_object_store_ids": object_store_ids})
)
params = {"id": user_id}
source_labels = list(source.keys())
if len(source_labels) > 0:
clean_old_statement = """
DELETE FROM user_quota_source_usage
WHERE user_id = :id AND quota_source_label NOT IN :labels
"""
params["labels"] = source_labels
else:
clean_old_statement = """
DELETE FROM user_quota_source_usage
WHERE user_id = :id AND quota_source_label IS NOT NULL
"""
statements.append((clean_old_statement, params))
return statements
# move these to galaxy.schema.schema once galaxy-data depends on
# galaxy-schema.
[docs]class UserQuotaBasicUsage(BaseModel):
quota_source_label: Optional[str] = None
total_disk_usage: float
[docs]class UserQuotaUsage(UserQuotaBasicUsage):
quota_percent: Optional[float] = None
quota_bytes: Optional[int] = None
quota: Optional[str] = None
[docs]class User(Base, Dictifiable, RepresentById):
"""
Data for a Galaxy user or admin and relations to their
histories, credentials, and roles.
"""
use_pbkdf2 = True
bootstrap_admin_user = False
# api_keys: 'List[APIKeys]' already declared as relationship()
__tablename__ = "galaxy_user"
id = Column(Integer, primary_key=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
email = Column(TrimmedString(255), index=True, nullable=False)
username = Column(TrimmedString(255), index=True, unique=True)
password = Column(TrimmedString(255), nullable=False)
last_password_change = Column(DateTime, default=now)
external = Column(Boolean, default=False)
form_values_id = Column(Integer, ForeignKey("form_values.id"), index=True)
preferred_object_store_id = Column(String(255), nullable=True)
deleted = Column(Boolean, index=True, default=False)
purged = Column(Boolean, index=True, default=False)
disk_usage = Column(Numeric(15, 0), index=True)
# Column("person_metadata", JSONType), # TODO: add persistent, configurable metadata rep for workflow creator
active = Column(Boolean, index=True, default=True, nullable=False)
activation_token = Column(TrimmedString(64), nullable=True, index=True)
addresses = relationship(
"UserAddress", back_populates="user", order_by=lambda: desc(UserAddress.update_time), cascade_backrefs=False
)
cloudauthz = relationship("CloudAuthz", back_populates="user")
custos_auth = relationship("CustosAuthnzToken", back_populates="user")
default_permissions = relationship("DefaultUserPermissions", back_populates="user")
groups = relationship("UserGroupAssociation", back_populates="user")
histories = relationship(
"History", back_populates="user", order_by=lambda: desc(History.update_time), cascade_backrefs=False # type: ignore[has-type]
)
active_histories = relationship(
"History",
primaryjoin=(lambda: (History.user_id == User.id) & (not_(History.deleted)) & (not_(History.archived))), # type: ignore[has-type]
viewonly=True,
order_by=lambda: desc(History.update_time), # type: ignore[has-type]
)
galaxy_sessions = relationship(
"GalaxySession", back_populates="user", order_by=lambda: desc(GalaxySession.update_time), cascade_backrefs=False # type: ignore[has-type]
)
quotas = relationship("UserQuotaAssociation", back_populates="user")
quota_source_usages = relationship("UserQuotaSourceUsage", back_populates="user")
social_auth = relationship("UserAuthnzToken", back_populates="user")
stored_workflow_menu_entries = relationship(
"StoredWorkflowMenuEntry",
primaryjoin=(
lambda: (StoredWorkflowMenuEntry.user_id == User.id)
& (StoredWorkflowMenuEntry.stored_workflow_id == StoredWorkflow.id) # type: ignore[has-type]
& not_(StoredWorkflow.deleted) # type: ignore[has-type]
),
back_populates="user",
cascade="all, delete-orphan",
collection_class=ordering_list("order_index"),
)
_preferences = relationship("UserPreference", collection_class=attribute_mapped_collection("name"))
values = relationship(
"FormValues", primaryjoin=(lambda: User.form_values_id == FormValues.id) # type: ignore[has-type]
)
# Add type hint (will this work w/SA?)
api_keys: "List[APIKeys]" = relationship(
"APIKeys",
back_populates="user",
order_by=lambda: desc(APIKeys.create_time),
primaryjoin=(
lambda: and_(
User.id == APIKeys.user_id, # type: ignore[attr-defined]
not_(APIKeys.deleted == true()), # type: ignore[has-type]
)
),
)
data_manager_histories = relationship("DataManagerHistoryAssociation", back_populates="user")
roles = relationship("UserRoleAssociation", back_populates="user")
stored_workflows = relationship(
"StoredWorkflow",
back_populates="user",
primaryjoin=(lambda: User.id == StoredWorkflow.user_id), # type: ignore[has-type]
cascade_backrefs=False,
)
all_notifications = relationship("UserNotificationAssociation", back_populates="user", cascade_backrefs=False)
non_private_roles = relationship(
"UserRoleAssociation",
viewonly=True,
primaryjoin=(
lambda: (User.id == UserRoleAssociation.user_id) # type: ignore[has-type]
& (UserRoleAssociation.role_id == Role.id) # type: ignore[has-type]
& not_(Role.name == User.email) # type: ignore[has-type]
),
)
preferences: association_proxy # defined at the end of this module
# attributes that will be accessed and returned when calling to_dict( view='collection' )
dict_collection_visible_keys = ["id", "email", "username", "deleted", "active", "last_password_change"]
# attributes that will be accessed and returned when calling to_dict( view='element' )
dict_element_visible_keys = [
"id",
"email",
"username",
"total_disk_usage",
"nice_total_disk_usage",
"deleted",
"active",
"last_password_change",
"preferred_object_store_id",
]
def __init__(self, email=None, password=None, username=None):
self.email = email
self.password = password
self.external = False
self.deleted = False
self.purged = False
self.active = False
self.username = username
[docs] def get_user_data_tables(self, data_table: str):
session = object_session(self)
assert session
metadata_select = (
select(HistoryDatasetAssociation)
.join(Dataset)
.join(History)
.where(
HistoryDatasetAssociation.deleted == false(),
HistoryDatasetAssociation.extension == "data_manager_json",
History.user_id == self.id,
Dataset.state == "ok",
# excludes data manager runs that actually populated tables.
# maybe track this formally by creating a different datatype for bundles ?
Dataset.total_size != Dataset.file_size,
HistoryDatasetAssociation._metadata.contains(data_table),
)
.order_by(HistoryDatasetAssociation.id)
)
return session.execute(metadata_select).scalars().all()
@property
def extra_preferences(self):
data = defaultdict(lambda: None)
if extra_user_preferences := self.preferences.get("extra_user_preferences"):
try:
data.update(json.loads(extra_user_preferences))
except Exception:
pass
return data
[docs] def set_password_cleartext(self, cleartext):
"""
Set user password to the digest of `cleartext`.
"""
if message := validate_password_str(cleartext):
raise Exception(f"Invalid password: {message}")
if User.use_pbkdf2:
self.password = galaxy.security.passwords.hash_password(cleartext)
else:
self.password = new_insecure_hash(text_type=cleartext)
self.last_password_change = now()
[docs] def set_random_password(self, length=16):
"""
Sets user password to a random string of the given length.
:return: void
"""
self.set_password_cleartext(
"".join(random.SystemRandom().choice(string.ascii_letters + string.digits) for _ in range(length))
)
[docs] def check_password(self, cleartext):
"""
Check if `cleartext` matches user password when hashed.
"""
return galaxy.security.passwords.check_password(cleartext, self.password)
[docs] def system_user_pwent(self, real_system_username):
"""
Gives the system user pwent entry based on e-mail or username depending
on the value in real_system_username
"""
if real_system_username == "user_email":
username = self.email.split("@")[0]
elif real_system_username == "username":
username = self.username
else:
username = real_system_username
try:
return pwd.getpwnam(username)
except Exception:
log.exception(f"Error getting the password database entry for user {username}")
raise
[docs] def all_roles(self):
"""
Return a unique list of Roles associated with this user or any of their groups.
"""
try:
db_session = object_session(self)
user = (
db_session.query(User)
.filter_by(id=self.id) # don't use get, it will use session variant.
.options(
joinedload(User.roles),
joinedload(User.roles.role),
joinedload(User.groups),
joinedload(User.groups.group),
joinedload(User.groups.group.roles),
joinedload(User.groups.group.roles.role),
)
.one()
)
except Exception:
# If not persistent user, just use models normaly and
# skip optimizations...
user = self
roles = [ura.role for ura in user.roles]
for group in [uga.group for uga in user.groups]:
for role in [gra.role for gra in group.roles]:
if role not in roles:
roles.append(role)
return roles
[docs] def all_roles_exploiting_cache(self):
""" """
roles = [ura.role for ura in self.roles]
for group in [uga.group for uga in self.groups]:
for role in [gra.role for gra in group.roles]:
if role not in roles:
roles.append(role)
return roles
[docs] def get_disk_usage(self, nice_size=False, quota_source_label=None):
"""
Return byte count of disk space used by user or a human-readable
string if `nice_size` is `True`.
"""
if quota_source_label is None:
rval = 0
if self.disk_usage is not None:
rval = self.disk_usage
else:
statement = """
SELECT DISK_USAGE
FROM user_quota_source_usage
WHERE user_id = :user_id and quota_source_label = :label
"""
sa_session = object_session(self)
params = {
"user_id": self.id,
"label": quota_source_label,
}
row = sa_session.execute(statement, params).fetchone()
if row is not None:
rval = row[0]
else:
rval = 0
if nice_size:
rval = galaxy.util.nice_size(rval)
return rval
[docs] def set_disk_usage(self, bytes):
"""
Manually set the disk space used by a user to `bytes`.
"""
self.disk_usage = bytes
total_disk_usage = property(get_disk_usage, set_disk_usage)
[docs] def adjust_total_disk_usage(self, amount, quota_source_label):
assert amount is not None
if amount != 0:
if quota_source_label is None:
self.disk_usage = (self.disk_usage or 0) + amount
else:
# else would work on newer sqlite - 3.24.0
engine = object_session(self).bind
if "sqlite" in engine.dialect.name:
# hacky alternative for older sqlite
statement = """
WITH new (user_id, quota_source_label) AS ( VALUES(:user_id, :label) )
INSERT OR REPLACE INTO user_quota_source_usage (id, user_id, quota_source_label, disk_usage)
SELECT old.id, new.user_id, new.quota_source_label, COALESCE(old.disk_usage + :amount, :amount)
FROM new LEFT JOIN user_quota_source_usage AS old ON new.user_id = old.user_id AND NEW.quota_source_label = old.quota_source_label;
"""
else:
statement = """
INSERT INTO user_quota_source_usage(user_id, disk_usage, quota_source_label)
VALUES(:user_id, :amount, :label)
ON CONFLICT
ON constraint uqsu_unique_label_per_user
DO UPDATE SET disk_usage = user_quota_source_usage.disk_usage + :amount
"""
statement = text(statement)
params = {
"user_id": self.id,
"amount": int(amount),
"label": quota_source_label,
}
with engine.connect() as conn, conn.begin():
conn.execute(statement, params)
def _get_social_auth(self, provider_backend):
if not self.social_auth:
return None
for auth in self.social_auth:
if auth.provider == provider_backend and auth.extra_data:
return auth
return None
def _get_custos_auth(self, provider_backend):
if not self.custos_auth:
return None
for auth in self.custos_auth:
if auth.provider == provider_backend and auth.refresh_token:
return auth
return None
[docs] def get_oidc_tokens(self, provider_backend):
tokens = {"id": None, "access": None, "refresh": None}
auth = self._get_social_auth(provider_backend)
if auth:
tokens["access"] = auth.extra_data.get("access_token", None)
tokens["refresh"] = auth.extra_data.get("refresh_token", None)
tokens["id"] = auth.extra_data.get("id_token", None)
return tokens
# no social auth found, check custos auth
auth = self._get_custos_auth(provider_backend)
if auth:
tokens["access"] = auth.access_token
tokens["refresh"] = auth.refresh_token
tokens["id"] = auth.id_token
return tokens
@property
def nice_total_disk_usage(self):
"""
Return byte count of disk space used in a human-readable string.
"""
return self.get_disk_usage(nice_size=True)
[docs] def calculate_disk_usage_default_source(self, object_store):
"""
Return byte count total of disk space used by all non-purged, non-library
HDAs in non-purged histories assigned to default quota source.
"""
# only used in set_user_disk_usage.py
assert object_store is not None
quota_source_map = object_store.get_quota_source_map()
default_quota_enabled = quota_source_map.default_quota_enabled
exclude_objectstore_ids = quota_source_map.default_usage_excluded_ids()
default_cond = "dataset.object_store_id IS NULL OR" if default_quota_enabled and exclude_objectstore_ids else ""
default_usage_dataset_condition = (
f"AND ( {default_cond} dataset.object_store_id NOT IN :exclude_object_store_ids )"
if exclude_objectstore_ids
else ""
)
default_usage = UNIQUE_DATASET_USER_USAGE.format(and_dataset_condition=default_usage_dataset_condition)
sql_calc = text(default_usage)
params = {"id": self.id}
bindparams = [bindparam("id")]
if exclude_objectstore_ids:
params["exclude_object_store_ids"] = exclude_objectstore_ids
bindparams.append(bindparam("exclude_object_store_ids", expanding=True))
sql_calc = sql_calc.bindparams(*bindparams)
sa_session = object_session(self)
usage = sa_session.scalar(sql_calc, params)
return usage
[docs] def calculate_and_set_disk_usage(self, object_store):
"""
Calculates and sets user disk usage.
"""
self._calculate_or_set_disk_usage(object_store=object_store)
def _calculate_or_set_disk_usage(self, object_store):
"""
Utility to calculate and return the disk usage. If dryrun is False,
the new value is set immediately.
"""
assert object_store is not None
quota_source_map = object_store.get_quota_source_map()
sa_session = object_session(self)
for_sqlite = "sqlite" in sa_session.bind.dialect.name
statements = calculate_user_disk_usage_statements(self.id, quota_source_map, for_sqlite)
for sql, args in statements:
statement = text(sql)
binds = []
for key, _ in args.items():
expand_binding = key.endswith("s")
binds.append(bindparam(key, expanding=expand_binding))
statement = statement.bindparams(*binds)
sa_session.execute(statement, args)
# expire user.disk_usage so sqlalchemy knows to ignore
# the existing value - we're setting it in raw SQL for
# performance reasons and bypassing object properties.
sa_session.expire(self, ["disk_usage"])
with transaction(sa_session):
sa_session.commit()
[docs] @staticmethod
def user_template_environment(user):
"""
>>> env = User.user_template_environment(None)
>>> env['__user_email__']
'Anonymous'
>>> env['__user_id__']
'Anonymous'
>>> user = User('foo@example.com')
>>> user.id = 6
>>> user.username = 'foo2'
>>> env = User.user_template_environment(user)
>>> env['__user_id__']
'6'
>>> env['__user_name__']
'foo2'
"""
if user:
user_id = "%d" % user.id
user_email = str(user.email)
user_name = str(user.username)
else:
user = None
user_id = "Anonymous"
user_email = "Anonymous"
user_name = "Anonymous"
environment = {}
environment["__user__"] = user
environment["__user_id__"] = environment["userId"] = user_id
environment["__user_email__"] = environment["userEmail"] = user_email
environment["__user_name__"] = user_name
return environment
[docs] @staticmethod
def expand_user_properties(user, in_string):
""" """
environment = User.user_template_environment(user)
return Template(in_string).safe_substitute(environment)
[docs] def is_authenticated(self):
# TODO: is required for python social auth (PSA); however, a user authentication is relative to the backend.
# For instance, a user who is authenticated with Google, is not necessarily authenticated
# with Amazon. Therefore, this function should also receive the backend and check if this
# user is already authenticated on that backend or not. For now, returning always True
# seems reasonable. Besides, this is also how a PSA example is implemented:
# https://github.com/python-social-auth/social-examples/blob/master/example-cherrypy/example/db/user.py
return True
[docs] def attempt_create_private_role(self):
session = object_session(self)
role_name = self.email
role_desc = f"Private Role for {self.email}"
role_type = Role.types.PRIVATE
role = Role(name=role_name, description=role_desc, type=role_type)
assoc = UserRoleAssociation(self, role)
session.add(assoc)
with transaction(session):
session.commit()
[docs] def dictify_usage(self, object_store=None) -> List[UserQuotaBasicUsage]:
"""Include object_store to include empty/unused usage info."""
used_labels: Set[Union[str, None]] = set()
rval: List[UserQuotaBasicUsage] = [
UserQuotaBasicUsage(
quota_source_label=None,
total_disk_usage=float(self.disk_usage or 0),
)
]
used_labels.add(None)
for quota_source_usage in self.quota_source_usages:
label = quota_source_usage.quota_source_label
rval.append(
UserQuotaBasicUsage(
quota_source_label=label,
total_disk_usage=float(quota_source_usage.disk_usage),
)
)
used_labels.add(label)
if object_store is not None:
for label in object_store.get_quota_source_map().ids_per_quota_source().keys():
if label not in used_labels:
rval.append(
UserQuotaBasicUsage(
quota_source_label=label,
total_disk_usage=0.0,
)
)
return rval
[docs] def dictify_usage_for(self, quota_source_label: Optional[str]) -> UserQuotaBasicUsage:
rval: UserQuotaBasicUsage
if quota_source_label is None:
rval = UserQuotaBasicUsage(
quota_source_label=None,
total_disk_usage=float(self.disk_usage or 0),
)
else:
quota_source_usage = self.quota_source_usage_for(quota_source_label)
if quota_source_usage is None:
rval = UserQuotaBasicUsage(
quota_source_label=quota_source_label,
total_disk_usage=0.0,
)
else:
rval = UserQuotaBasicUsage(
quota_source_label=quota_source_label,
total_disk_usage=float(quota_source_usage.disk_usage),
)
return rval
[docs] def quota_source_usage_for(self, quota_source_label: Optional[str]) -> Optional["UserQuotaSourceUsage"]:
for quota_source_usage in self.quota_source_usages:
if quota_source_usage.quota_source_label == quota_source_label:
return quota_source_usage
return None
[docs] def count_stored_workflow_user_assocs(self, stored_workflow) -> int:
stmt = select(StoredWorkflowUserShareAssociation).filter_by(user=self, stored_workflow=stored_workflow)
stmt = select(func.count()).select_from(stmt)
session = object_session(self)
return session.scalar(stmt)
[docs]class PasswordResetToken(Base):
__tablename__ = "password_reset_token"
token = Column(String(32), primary_key=True, unique=True, index=True)
expiration_time = Column(DateTime)
user_id = Column(Integer, ForeignKey("galaxy_user.id"), index=True)
user = relationship("User")
def __init__(self, user, token=None):
if token:
self.token = token
else:
self.token = unique_id()
self.user = user
self.expiration_time = now() + timedelta(hours=24)
[docs]class DynamicTool(Base, Dictifiable, RepresentById):
__tablename__ = "dynamic_tool"
id = Column(Integer, primary_key=True)
uuid = Column(UUIDType())
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, index=True, default=now, onupdate=now)
tool_id = Column(Unicode(255))
tool_version = Column(Unicode(255))
tool_format = Column(Unicode(255))
tool_path = Column(Unicode(255))
tool_directory = Column(Unicode(255))
hidden = Column(Boolean, default=True)
active = Column(Boolean, default=True)
value = Column(MutableJSONType)
dict_collection_visible_keys = ("id", "tool_id", "tool_format", "tool_version", "uuid", "active", "hidden")
dict_element_visible_keys = ("id", "tool_id", "tool_format", "tool_version", "uuid", "active", "hidden")
def __init__(self, active=True, hidden=True, **kwd):
super().__init__(**kwd)
self.active = active
self.hidden = hidden
_uuid = kwd.get("uuid")
self.uuid = get_uuid(_uuid)
[docs]class BaseJobMetric(Base):
__abstract__ = True
[docs] def __init__(self, plugin, metric_name, metric_value):
super().__init__()
self.plugin = plugin
self.metric_name = metric_name
self.metric_value = metric_value
[docs]class JobMetricText(BaseJobMetric, RepresentById):
__tablename__ = "job_metric_text"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True)
plugin = Column(Unicode(255))
metric_name = Column(Unicode(255))
metric_value = Column(Unicode(JOB_METRIC_MAX_LENGTH))
[docs]class JobMetricNumeric(BaseJobMetric, RepresentById):
__tablename__ = "job_metric_numeric"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True)
plugin = Column(Unicode(255))
metric_name = Column(Unicode(255))
metric_value = Column(Numeric(JOB_METRIC_PRECISION, JOB_METRIC_SCALE))
[docs]class TaskMetricText(BaseJobMetric, RepresentById):
__tablename__ = "task_metric_text"
id = Column(Integer, primary_key=True)
task_id = Column(Integer, ForeignKey("task.id"), index=True)
plugin = Column(Unicode(255))
metric_name = Column(Unicode(255))
metric_value = Column(Unicode(JOB_METRIC_MAX_LENGTH))
[docs]class TaskMetricNumeric(BaseJobMetric, RepresentById):
__tablename__ = "task_metric_numeric"
id = Column(Integer, primary_key=True)
task_id = Column(Integer, ForeignKey("task.id"), index=True)
plugin = Column(Unicode(255))
metric_name = Column(Unicode(255))
metric_value = Column(Numeric(JOB_METRIC_PRECISION, JOB_METRIC_SCALE))
[docs]class IoDicts(NamedTuple):
inp_data: Dict[str, Optional["DatasetInstance"]]
out_data: Dict[str, "DatasetInstance"]
out_collections: Dict[str, Union["DatasetCollectionInstance", "DatasetCollection"]]
[docs]class Job(Base, JobLike, UsesCreateAndUpdateTime, Dictifiable, Serializable):
"""
A job represents a request to run a tool given input datasets, tool
parameters, and output datasets.
"""
__tablename__ = "job"
id = Column(Integer, primary_key=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now, index=True)
history_id = Column(Integer, ForeignKey("history.id"), index=True)
library_folder_id = Column(Integer, ForeignKey("library_folder.id"), index=True)
tool_id = Column(String(255))
tool_version = Column(TEXT, default="1.0.0")
galaxy_version = Column(String(64), default=None)
dynamic_tool_id = Column(Integer, ForeignKey("dynamic_tool.id"), index=True, nullable=True)
state = Column(String(64), index=True)
info = Column(TrimmedString(255))
copied_from_job_id = Column(Integer, nullable=True)
command_line = Column(TEXT)
dependencies = Column(MutableJSONType, nullable=True)
job_messages = Column(MutableJSONType, nullable=True)
param_filename = Column(String(1024))
runner_name = Column(String(255))
job_stdout = Column(TEXT)
job_stderr = Column(TEXT)
tool_stdout = Column(TEXT)
tool_stderr = Column(TEXT)
exit_code = Column(Integer, nullable=True)
traceback = Column(TEXT)
session_id = Column(Integer, ForeignKey("galaxy_session.id"), index=True, nullable=True)
user_id = Column(Integer, ForeignKey("galaxy_user.id"), index=True, nullable=True)
job_runner_name = Column(String(255))
job_runner_external_id = Column(String(255), index=True)
destination_id = Column(String(255), nullable=True)
destination_params = Column(MutableJSONType, nullable=True)
object_store_id = Column(TrimmedString(255), index=True)
imported = Column(Boolean, default=False, index=True)
params = Column(TrimmedString(255), index=True)
handler = Column(TrimmedString(255), index=True)
preferred_object_store_id = Column(String(255), nullable=True)
object_store_id_overrides = Column(JSONType)
user = relationship("User")
galaxy_session = relationship("GalaxySession")
history = relationship("History", back_populates="jobs")
library_folder = relationship("LibraryFolder")
parameters = relationship("JobParameter")
input_datasets = relationship("JobToInputDatasetAssociation", back_populates="job")
input_dataset_collections = relationship("JobToInputDatasetCollectionAssociation", back_populates="job")
input_dataset_collection_elements = relationship(
"JobToInputDatasetCollectionElementAssociation", back_populates="job"
)
output_dataset_collection_instances = relationship("JobToOutputDatasetCollectionAssociation", back_populates="job")
output_dataset_collections = relationship("JobToImplicitOutputDatasetCollectionAssociation", back_populates="job")
post_job_actions = relationship("PostJobActionAssociation", back_populates="job", cascade_backrefs=False)
input_library_datasets = relationship("JobToInputLibraryDatasetAssociation", back_populates="job")
output_library_datasets = relationship("JobToOutputLibraryDatasetAssociation", back_populates="job")
external_output_metadata = relationship("JobExternalOutputMetadata", back_populates="job")
tasks = relationship("Task", back_populates="job")
output_datasets = relationship("JobToOutputDatasetAssociation", back_populates="job")
state_history = relationship("JobStateHistory")
text_metrics = relationship("JobMetricText")
numeric_metrics = relationship("JobMetricNumeric")
interactivetool_entry_points = relationship("InteractiveToolEntryPoint", back_populates="job", uselist=True)
implicit_collection_jobs_association = relationship(
"ImplicitCollectionJobsJobAssociation", back_populates="job", uselist=False, cascade_backrefs=False
)
container = relationship("JobContainerAssociation", back_populates="job", uselist=False)
data_manager_association = relationship(
"DataManagerJobAssociation", back_populates="job", uselist=False, cascade_backrefs=False
)
history_dataset_collection_associations = relationship("HistoryDatasetCollectionAssociation", back_populates="job")
workflow_invocation_step = relationship(
"WorkflowInvocationStep", back_populates="job", uselist=False, cascade_backrefs=False
)
any_output_dataset_collection_instances_deleted: column_property # defined at the end of this module
any_output_dataset_deleted: column_property # defined at the end of this module
dict_collection_visible_keys = ["id", "state", "exit_code", "update_time", "create_time", "galaxy_version"]
dict_element_visible_keys = [
"id",
"state",
"exit_code",
"update_time",
"create_time",
"galaxy_version",
"command_version",
"copied_from_job_id",
]
_numeric_metric = JobMetricNumeric
_text_metric = JobMetricText
states: TypeAlias = JobState
# states that are not expected to change, except through admin action or re-scheduling
terminal_states = [states.OK, states.ERROR, states.DELETED]
# deleting state should not turn back into any of the non-ready states
finished_states = terminal_states + [states.DELETING]
#: job states where the job hasn't finished and the model may still change
non_ready_states = [
states.NEW,
states.RESUBMITTED,
states.UPLOAD,
states.WAITING,
states.QUEUED,
states.RUNNING,
]
# Please include an accessor (get/set pair) for any new columns/members.
def __init__(self):
self.dependencies = []
self.state = Job.states.NEW
self.imported = False
self._init_metrics()
self.state_history.append(JobStateHistory(self))
@property
def running(self):
return self.state == Job.states.RUNNING
@property
def finished(self):
return self.state in self.finished_states
[docs] def io_dicts(self, exclude_implicit_outputs=False) -> IoDicts:
inp_data: Dict[str, Optional[DatasetInstance]] = {da.name: da.dataset for da in self.input_datasets}
out_data: Dict[str, DatasetInstance] = {da.name: da.dataset for da in self.output_datasets}
inp_data.update([(da.name, da.dataset) for da in self.input_library_datasets])
out_data.update([(da.name, da.dataset) for da in self.output_library_datasets])
out_collections: Dict[str, Union[DatasetCollectionInstance, DatasetCollection]]
if not exclude_implicit_outputs:
out_collections = {
obj.name: obj.dataset_collection_instance for obj in self.output_dataset_collection_instances
}
else:
out_collections = {}
for obj in self.output_dataset_collection_instances:
if obj.name not in out_data:
out_collections[obj.name] = obj.dataset_collection_instance
# else this is a mapped over output
out_collections.update([(obj.name, obj.dataset_collection) for obj in self.output_dataset_collections])
return IoDicts(inp_data, out_data, out_collections)
# TODO: Add accessors for members defined in SQL Alchemy for the Job table and
# for the mapper defined to the Job table.
[docs] def get_external_output_metadata(self):
"""
The external_output_metadata is currently a reference from Job to
JobExternalOutputMetadata. It exists for a job but not a task.
"""
return self.external_output_metadata
[docs] def get_job_runner_name(self):
# This differs from the Task class in that job_runner_name is
# accessed instead of task_runner_name. Note that the field
# runner_name is not the same thing.
return self.job_runner_name
[docs] def get_job_runner_external_id(self):
# This is different from the Task just in the member accessed:
return self.job_runner_external_id
[docs] def get_user(self):
# This is defined in the SQL Alchemy mapper as a relation to the User.
return self.user
[docs] def get_tasks(self):
# The tasks member is pert of a reference in the SQL Alchemy schema:
return self.tasks
[docs] def get_id_tag(self):
"""
Return a tag that can be useful in identifying a Job.
This returns the Job's get_id
"""
return f"{self.id}"
[docs] def get_user_email(self):
if self.user is not None:
return self.user.email
elif self.galaxy_session is not None and self.galaxy_session.user is not None:
return self.galaxy_session.user.email
elif self.history is not None and self.history.user is not None:
return self.history.user.email
return None
[docs] def set_input_library_datasets(self, input_library_datasets):
self.input_library_datasets = input_library_datasets
[docs] def set_output_library_datasets(self, output_library_datasets):
self.output_library_datasets = output_library_datasets
[docs] def get_job(self):
# Added so job and task have same interface (.get_job() ) to get at
# underlying job object.
return self
[docs] def set_runner_external_id(self, job_runner_external_id):
self.job_runner_external_id = job_runner_external_id
[docs] def add_input_dataset(self, name, dataset=None, dataset_id=None):
assoc = JobToInputDatasetAssociation(name, dataset)
if dataset is None and dataset_id is not None:
assoc.dataset_id = dataset_id
add_object_to_object_session(self, assoc)
self.input_datasets.append(assoc)
[docs] def add_output_dataset(self, name, dataset):
joda = JobToOutputDatasetAssociation(name, dataset)
if dataset.dataset.job is None:
# Only set job if dataset doesn't already have associated job.
# database operation tools that make copies should not modify the job here.
dataset.dataset.job = self
add_object_to_object_session(self, joda)
self.output_datasets.append(joda)
[docs] def add_input_dataset_collection(self, name, dataset_collection):
self.input_dataset_collections.append(JobToInputDatasetCollectionAssociation(name, dataset_collection))
[docs] def add_input_dataset_collection_element(self, name, dataset_collection_element):
self.input_dataset_collection_elements.append(
JobToInputDatasetCollectionElementAssociation(name, dataset_collection_element)
)
[docs] def add_output_dataset_collection(self, name, dataset_collection_instance):
self.output_dataset_collection_instances.append(
JobToOutputDatasetCollectionAssociation(name, dataset_collection_instance)
)
[docs] def add_implicit_output_dataset_collection(self, name, dataset_collection):
self.output_dataset_collections.append(
JobToImplicitOutputDatasetCollectionAssociation(name, dataset_collection)
)
[docs] def add_input_library_dataset(self, name, dataset):
self.input_library_datasets.append(JobToInputLibraryDatasetAssociation(name, dataset))
[docs] def add_output_library_dataset(self, name, dataset):
self.output_library_datasets.append(JobToOutputLibraryDatasetAssociation(name, dataset))
[docs] def add_post_job_action(self, pja):
self.post_job_actions.append(PostJobActionAssociation(pja, self))
[docs] def get_change_datatype_actions(self):
return {
pja.post_job_action.output_name: pja.post_job_action.action_arguments["newtype"]
for pja in self.post_job_actions
if pja.post_job_action.action_type == "ChangeDatatypeAction"
}
@property
def all_entry_points_configured(self):
# consider an actual DB attribute for this.
all_configured = True
for ep in self.interactivetool_entry_points:
all_configured = ep.configured and all_configured
return all_configured
[docs] def set_state(self, state: JobState) -> bool:
"""
Save state history. Returns True if state has changed, else False.
"""
if self.state == state:
# Nothing changed, no action needed
return False
session = object_session(self)
if session and self.id and state not in Job.finished_states:
# Do not update if job is in a terminal state
rval = session.execute(
update(Job.table)
.where(Job.id == self.id, ~Job.state.in_((state, *Job.finished_states)))
.values(state=state)
)
if rval.rowcount == 1:
# Need to expire state since we just updated it, but ORM doesn't know about it.
session.expire(self, ["state"])
self.state_history.append(JobStateHistory(self))
return True
else:
return False
else:
self.state = state
self.state_history.append(JobStateHistory(self))
return True
[docs] def get_param_values(self, app, ignore_errors=False):
"""
Read encoded parameter values from the database and turn back into a
dict of tool parameter values.
"""
param_dict = self.raw_param_dict()
tool = app.toolbox.get_tool(self.tool_id, tool_version=self.tool_version)
param_dict = tool.params_from_strings(param_dict, app, ignore_errors=ignore_errors)
return param_dict
[docs] def raw_param_dict(self):
param_dict = {p.name: p.value for p in self.parameters}
return param_dict
[docs] def check_if_output_datasets_deleted(self):
"""
Return true if all of the output datasets associated with this job are
in the deleted state
"""
for dataset_assoc in self.output_datasets:
dataset = dataset_assoc.dataset
# only the originator of the job can delete a dataset to cause
# cancellation of the job, no need to loop through history_associations
if not dataset.deleted:
return False
return True
[docs] def mark_stopped(self, track_jobs_in_database=False):
"""
Mark this job as stopped
"""
if self.finished:
# Do not modify the state/outputs of jobs that are already terminal
return
if track_jobs_in_database:
self.state = Job.states.STOPPING
else:
self.state = Job.states.STOPPED
[docs] def mark_deleted(self, track_jobs_in_database=False):
"""
Mark this job as deleted, and mark any output datasets as discarded.
"""
if self.finished:
# Do not modify the state/outputs of jobs that are already terminal
return
if track_jobs_in_database:
self.state = Job.states.DELETING
else:
self.state = Job.states.DELETED
self.info = "Job output deleted by user before job completed."
for jtoda in self.output_datasets:
output_hda = jtoda.dataset
output_hda.deleted = True
output_hda.state = output_hda.states.DISCARDED
for shared_hda in output_hda.dataset.history_associations:
# propagate info across shared datasets
shared_hda.deleted = True
shared_hda.blurb = "deleted"
shared_hda.peek = "Job deleted"
shared_hda.info = "Job output deleted by user before job completed"
[docs] def mark_failed(self, info="Job execution failed", blurb=None, peek=None):
"""
Mark this job as failed, and mark any output datasets as errored.
"""
self.state = self.states.FAILED
self.info = info
for jtod in self.output_datasets:
jtod.dataset.state = jtod.dataset.states.ERROR
for hda in jtod.dataset.dataset.history_associations:
hda.state = hda.states.ERROR
if blurb:
hda.blurb = blurb
if peek:
hda.peek = peek
hda.info = info
[docs] def resume(self, flush=True):
if self.state == self.states.PAUSED:
self.set_state(self.states.NEW)
object_session(self).add(self)
jobs_to_resume = set()
for jtod in self.output_datasets:
jobs_to_resume.update(jtod.dataset.unpause_dependent_jobs(jobs_to_resume))
for job in jobs_to_resume:
job.resume(flush=False)
if flush:
session = object_session(self)
with transaction(session):
session.commit()
def _serialize(self, id_encoder, serialization_options):
job_attrs = dict_for(self)
serialization_options.attach_identifier(id_encoder, self, job_attrs)
job_attrs["tool_id"] = self.tool_id
job_attrs["tool_version"] = self.tool_version
job_attrs["galaxy_version"] = self.galaxy_version
job_attrs["state"] = self.state
job_attrs["info"] = self.info
job_attrs["traceback"] = self.traceback
job_attrs["command_line"] = self.command_line
job_attrs["tool_stderr"] = self.tool_stderr
job_attrs["job_stderr"] = self.job_stderr
job_attrs["tool_stdout"] = self.tool_stdout
job_attrs["job_stdout"] = self.job_stdout
job_attrs["exit_code"] = self.exit_code
job_attrs["create_time"] = self.create_time.isoformat()
job_attrs["update_time"] = self.update_time.isoformat()
job_attrs["job_messages"] = self.job_messages
# Get the job's parameters
param_dict = self.raw_param_dict()
params_objects = {}
for key in param_dict:
params_objects[key] = safe_loads(param_dict[key])
def remap_objects(p, k, obj):
if isinstance(obj, dict) and "src" in obj and obj["src"] in ["hda", "hdca", "dce"]:
new_id = serialization_options.get_identifier_for_id(id_encoder, obj["id"])
new_obj = obj.copy()
new_obj["id"] = new_id
return (k, new_obj)
return (k, obj)
params_objects = remap(params_objects, remap_objects)
params_dict = {}
for name, value in params_objects.items():
params_dict[name] = value
job_attrs["params"] = params_dict
return job_attrs
[docs] def to_dict(self, view="collection", system_details=False):
if view == "admin_job_list":
rval = super().to_dict(view="collection")
else:
rval = super().to_dict(view=view)
rval["tool_id"] = self.tool_id
rval["tool_version"] = self.tool_version
rval["history_id"] = self.history_id
if system_details or view == "admin_job_list":
# System level details that only admins should have.
rval["external_id"] = self.job_runner_external_id
rval["command_line"] = self.command_line
rval["traceback"] = self.traceback
if view == "admin_job_list":
rval["user_email"] = self.user.email if self.user else None
rval["handler"] = self.handler
rval["job_runner_name"] = self.job_runner_name
rval["info"] = self.info
rval["session_id"] = self.session_id
if self.galaxy_session and self.galaxy_session.remote_host:
rval["remote_host"] = self.galaxy_session.remote_host
if view == "element":
param_dict = {p.name: p.value for p in self.parameters}
rval["params"] = param_dict
input_dict = {}
for i in self.input_datasets:
if i.dataset is not None:
input_dict[i.name] = {
"id": i.dataset.id,
"src": "hda",
"uuid": str(i.dataset.dataset.uuid) if i.dataset.dataset.uuid is not None else None,
}
for i in self.input_library_datasets:
if i.dataset is not None:
input_dict[i.name] = {
"id": i.dataset.id,
"src": "ldda",
"uuid": str(i.dataset.dataset.uuid) if i.dataset.dataset.uuid is not None else None,
}
for k in input_dict:
if k in param_dict:
del param_dict[k]
rval["inputs"] = input_dict
output_dict = {}
for i in self.output_datasets:
if i.dataset is not None:
output_dict[i.name] = {
"id": i.dataset.id,
"src": "hda",
"uuid": str(i.dataset.dataset.uuid) if i.dataset.dataset.uuid is not None else None,
}
for i in self.output_library_datasets:
if i.dataset is not None:
output_dict[i.name] = {
"id": i.dataset.id,
"src": "ldda",
"uuid": str(i.dataset.dataset.uuid) if i.dataset.dataset.uuid is not None else None,
}
rval["outputs"] = output_dict
rval["output_collections"] = {
jtodca.name: {"id": jtodca.dataset_collection_instance.id, "src": "hdca"}
for jtodca in self.output_dataset_collection_instances
}
return rval
[docs] def update_hdca_update_time_for_job(self, update_time, sa_session, supports_skip_locked):
subq = (
sa_session.query(HistoryDatasetCollectionAssociation.id)
.join(ImplicitCollectionJobs)
.join(ImplicitCollectionJobsJobAssociation)
.filter(ImplicitCollectionJobsJobAssociation.job_id == self.id)
)
if supports_skip_locked:
subq = subq.with_for_update(skip_locked=True).subquery()
implicit_statement = (
HistoryDatasetCollectionAssociation.table.update()
.where(HistoryDatasetCollectionAssociation.table.c.id.in_(select(subq)))
.values(update_time=update_time)
)
explicit_statement = (
HistoryDatasetCollectionAssociation.table.update()
.where(HistoryDatasetCollectionAssociation.table.c.job_id == self.id)
.values(update_time=update_time)
)
sa_session.execute(explicit_statement)
if supports_skip_locked:
sa_session.execute(implicit_statement)
else:
conn = sa_session.connection(execution_options={"isolation_level": "SERIALIZABLE"})
with conn.begin() as trans:
try:
conn.execute(implicit_statement)
trans.commit()
except OperationalError as e:
# If this is a serialization failure on PostgreSQL, then e.orig is a psycopg2 TransactionRollbackError
# and should have attribute `code`. Other engines should just report the message and move on.
if int(getattr(e.orig, "pgcode", -1)) != 40001:
log.debug(
f"Updating implicit collection uptime_time for job {self.id} failed (this is expected for large collections and not a problem): {unicodify(e)}"
)
trans.rollback()
[docs] def set_final_state(self, final_state, supports_skip_locked):
self.set_state(final_state)
# TODO: migrate to where-in subqueries?
statement = text(
"""
UPDATE workflow_invocation_step
SET update_time = :update_time
WHERE job_id = :job_id;
"""
)
sa_session = object_session(self)
update_time = now()
self.update_hdca_update_time_for_job(
update_time=update_time, sa_session=sa_session, supports_skip_locked=supports_skip_locked
)
params = {"job_id": self.id, "update_time": update_time}
sa_session.execute(statement, params)
[docs] def get_destination_configuration(self, dest_params, config, key, default=None):
"""Get a destination parameter that can be defaulted back
in specified config if it needs to be applied globally.
"""
param_unspecified = object()
config_value = (self.destination_params or {}).get(key, param_unspecified)
if config_value is param_unspecified:
config_value = dest_params.get(key, param_unspecified)
if config_value is param_unspecified:
config_value = getattr(config, key, param_unspecified)
if config_value is param_unspecified:
config_value = default
return config_value
@property
def command_version(self):
# TODO: make actual database property and track properly - we should be recording this on the job and not on the datasets
for dataset_assoc in self.output_datasets:
return dataset_assoc.dataset.tool_version
[docs] def update_output_states(self, supports_skip_locked):
# TODO: migrate to where-in subqueries?
statements = [
text(
"""
UPDATE dataset
SET
state = :state,
update_time = :update_time
WHERE
dataset.job_id = :job_id
"""
),
text(
"""
UPDATE history_dataset_association
SET
info = :info,
update_time = :update_time
FROM dataset
WHERE
history_dataset_association.dataset_id = dataset.id
AND dataset.job_id = :job_id;
"""
),
text(
"""
UPDATE library_dataset_dataset_association
SET
info = :info,
update_time = :update_time
FROM dataset
WHERE
library_dataset_dataset_association.dataset_id = dataset.id
AND dataset.job_id = :job_id;
"""
),
]
sa_session = object_session(self)
update_time = now()
self.update_hdca_update_time_for_job(
update_time=update_time, sa_session=sa_session, supports_skip_locked=supports_skip_locked
)
params = {"job_id": self.id, "state": self.state, "info": self.info, "update_time": update_time}
for statement in statements:
sa_session.execute(statement, params)
[docs] def remappable(self):
"""
Check whether job is remappable when rerun
"""
if self.state == self.states.ERROR:
try:
for jtod in self.output_datasets:
if jtod.dataset.dependent_jobs:
return True
if self.output_dataset_collection_instances:
# We'll want to replace this item
return "job_produced_collection_elements"
except Exception:
log.exception(f"Error trying to determine if job {self.id} is remappable")
return False
[docs] def hide_outputs(self, flush=True):
for output_association in self.output_datasets + self.output_dataset_collection_instances:
output_association.item.visible = False
if flush:
session = object_session(self)
with transaction(session):
session.commit()
[docs]class Task(Base, JobLike, RepresentById):
"""
A task represents a single component of a job.
"""
__tablename__ = "task"
id = Column(Integer, primary_key=True)
create_time = Column(DateTime, default=now)
execution_time = Column(DateTime)
update_time = Column(DateTime, default=now, onupdate=now)
state = Column(String(64), index=True)
command_line = Column(TEXT)
param_filename = Column(String(1024))
runner_name = Column(String(255))
job_stdout = Column(TEXT) # job_stdout makes sense here because it is short for job script standard out
job_stderr = Column(TEXT)
tool_stdout = Column(TEXT)
tool_stderr = Column(TEXT)
exit_code = Column(Integer, nullable=True)
job_messages = Column(MutableJSONType, nullable=True)
info = Column(TrimmedString(255))
traceback = Column(TEXT)
job_id = Column(Integer, ForeignKey("job.id"), index=True, nullable=False)
working_directory = Column(String(1024))
task_runner_name = Column(String(255))
task_runner_external_id = Column(String(255))
prepare_input_files_cmd = Column(TEXT)
job = relationship("Job", back_populates="tasks")
text_metrics = relationship("TaskMetricText")
numeric_metrics = relationship("TaskMetricNumeric")
_numeric_metric = TaskMetricNumeric
_text_metric = TaskMetricText
[docs] class states(str, Enum):
NEW = "new"
WAITING = "waiting"
QUEUED = "queued"
RUNNING = "running"
OK = "ok"
ERROR = "error"
DELETED = "deleted"
# Please include an accessor (get/set pair) for any new columns/members.
def __init__(self, job, working_directory, prepare_files_cmd):
self.parameters = []
self.state = Task.states.NEW
self.working_directory = working_directory
add_object_to_object_session(self, job)
self.job = job
self.prepare_input_files_cmd = prepare_files_cmd
self._init_metrics()
[docs] def get_param_values(self, app):
"""
Read encoded parameter values from the database and turn back into a
dict of tool parameter values.
"""
param_dict = {p.name: p.value for p in self.job.parameters}
tool = app.toolbox.get_tool(self.job.tool_id, tool_version=self.job.tool_version)
param_dict = tool.params_from_strings(param_dict, app)
return param_dict
[docs] def get_id_tag(self):
"""
Return an id tag suitable for identifying the task.
This combines the task's job id and the task's own id.
"""
return f"{self.job.id}_{self.id}"
# The following accessors are for members that are in the Job class but
# not in the Task class. So they can either refer to the parent Job
# or return None, depending on whether Tasks need to point to the parent
# (e.g., for a session) or never use the member (e.g., external output
# metdata). These can be filled in as needed.
[docs] def get_external_output_metadata(self):
"""
The external_output_metadata is currently a backref to
JobExternalOutputMetadata. It exists for a job but not a task,
and when a task is cancelled its corresponding parent Job will
be cancelled. So None is returned now, but that could be changed
to self.get_job().get_external_output_metadata().
"""
return None
[docs] def get_job_runner_name(self):
"""
Since runners currently access Tasks the same way they access Jobs,
this method just refers to *this* instance's runner.
"""
return self.task_runner_name
[docs] def get_job_runner_external_id(self):
"""
Runners will use the same methods to get information about the Task
class as they will about the Job class, so this method just returns
the task's external id.
"""
# TODO: Merge into get_runner_external_id.
return self.task_runner_external_id
[docs] def get_session_id(self):
# The Job's galaxy session is equal to the Job's session, so the
# Job's session is the same as the Task's session.
return self.get_job().get_session_id()
[docs] def set_id(self, id):
# This is defined in the SQL Alchemy's mapper and not here.
# This should never be called.
self.id = id
[docs] def set_working_directory(self, working_directory):
self.working_directory = working_directory
[docs] def set_job_runner_external_id(self, task_runner_external_id):
# This method is available for runners that do not want/need to
# differentiate between the kinds of Runnable things (Jobs and Tasks)
# that they're using.
log.debug("Task %d: Set external id to %s" % (self.id, task_runner_external_id))
self.task_runner_external_id = task_runner_external_id
[docs] def set_task_runner_external_id(self, task_runner_external_id):
self.task_runner_external_id = task_runner_external_id
[docs] def set_prepare_input_files_cmd(self, prepare_input_files_cmd):
self.prepare_input_files_cmd = prepare_input_files_cmd
[docs]class JobParameter(Base, RepresentById):
__tablename__ = "job_parameter"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True)
name = Column(String(255))
value = Column(TEXT)
def __init__(self, name, value):
self.name = name
self.value = value
[docs]class JobToInputDatasetAssociation(Base, RepresentById):
__tablename__ = "job_to_input_dataset"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True)
dataset_id = Column(Integer, ForeignKey("history_dataset_association.id"), index=True)
dataset_version = Column(Integer)
name = Column(String(255))
dataset = relationship("HistoryDatasetAssociation", lazy="joined", back_populates="dependent_jobs")
job = relationship("Job", back_populates="input_datasets")
def __init__(self, name, dataset):
self.name = name
add_object_to_object_session(self, dataset)
self.dataset = dataset
self.dataset_version = 0 # We start with version 0 and update once the job is ready
[docs]class JobToOutputDatasetAssociation(Base, RepresentById):
__tablename__ = "job_to_output_dataset"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True)
dataset_id = Column(Integer, ForeignKey("history_dataset_association.id"), index=True)
name = Column(String(255))
dataset = relationship("HistoryDatasetAssociation", lazy="joined", back_populates="creating_job_associations")
job = relationship("Job", back_populates="output_datasets")
def __init__(self, name, dataset):
self.name = name
add_object_to_object_session(self, dataset)
self.dataset = dataset
@property
def item(self):
return self.dataset
[docs]class JobToInputDatasetCollectionAssociation(Base, RepresentById):
__tablename__ = "job_to_input_dataset_collection"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True)
dataset_collection_id = Column(Integer, ForeignKey("history_dataset_collection_association.id"), index=True)
name = Column(String(255))
dataset_collection = relationship("HistoryDatasetCollectionAssociation", lazy="joined")
job = relationship("Job", back_populates="input_dataset_collections")
def __init__(self, name, dataset_collection):
self.name = name
self.dataset_collection = dataset_collection
[docs]class JobToInputDatasetCollectionElementAssociation(Base, RepresentById):
__tablename__ = "job_to_input_dataset_collection_element"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True)
dataset_collection_element_id = Column(Integer, ForeignKey("dataset_collection_element.id"), index=True)
name = Column(Unicode(255))
dataset_collection_element = relationship("DatasetCollectionElement", lazy="joined")
job = relationship("Job", back_populates="input_dataset_collection_elements")
def __init__(self, name, dataset_collection_element):
self.name = name
self.dataset_collection_element = dataset_collection_element
# Many jobs may map to one HistoryDatasetCollection using these for a given
# tool output (if mapping over an input collection).
[docs]class JobToOutputDatasetCollectionAssociation(Base, RepresentById):
__tablename__ = "job_to_output_dataset_collection"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True)
dataset_collection_id = Column(Integer, ForeignKey("history_dataset_collection_association.id"), index=True)
name = Column(Unicode(255))
dataset_collection_instance = relationship("HistoryDatasetCollectionAssociation", lazy="joined")
job = relationship("Job", back_populates="output_dataset_collection_instances")
def __init__(self, name, dataset_collection_instance):
self.name = name
self.dataset_collection_instance = dataset_collection_instance
@property
def item(self):
return self.dataset_collection_instance
# A DatasetCollection will be mapped to at most one job per tool output
# using these. (You can think of many of these models as going into the
# creation of a JobToOutputDatasetCollectionAssociation.)
[docs]class JobToImplicitOutputDatasetCollectionAssociation(Base, RepresentById):
__tablename__ = "job_to_implicit_output_dataset_collection"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True)
dataset_collection_id = Column(Integer, ForeignKey("dataset_collection.id"), index=True)
name = Column(Unicode(255))
dataset_collection = relationship("DatasetCollection")
job = relationship("Job", back_populates="output_dataset_collections")
def __init__(self, name, dataset_collection):
self.name = name
self.dataset_collection = dataset_collection
[docs]class JobToInputLibraryDatasetAssociation(Base, RepresentById):
__tablename__ = "job_to_input_library_dataset"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True)
ldda_id = Column(Integer, ForeignKey("library_dataset_dataset_association.id"), index=True)
name = Column(Unicode(255))
job = relationship("Job", back_populates="input_library_datasets")
dataset = relationship("LibraryDatasetDatasetAssociation", lazy="joined", back_populates="dependent_jobs")
def __init__(self, name, dataset):
self.name = name
add_object_to_object_session(self, dataset)
self.dataset = dataset
[docs]class JobToOutputLibraryDatasetAssociation(Base, RepresentById):
__tablename__ = "job_to_output_library_dataset"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True)
ldda_id = Column(Integer, ForeignKey("library_dataset_dataset_association.id"), index=True)
name = Column(Unicode(255))
job = relationship("Job", back_populates="output_library_datasets")
dataset = relationship(
"LibraryDatasetDatasetAssociation", lazy="joined", back_populates="creating_job_associations"
)
def __init__(self, name, dataset):
self.name = name
add_object_to_object_session(self, dataset)
self.dataset = dataset
[docs]class JobStateHistory(Base, RepresentById):
__tablename__ = "job_state_history"
id = Column(Integer, primary_key=True)
create_time = Column(DateTime, default=now)
job_id = Column(Integer, ForeignKey("job.id"), index=True)
state = Column(String(64), index=True)
info = Column(TrimmedString(255))
def __init__(self, job):
self.job_id = job.id
self.state = job.state
self.info = job.info
[docs]class ImplicitlyCreatedDatasetCollectionInput(Base, RepresentById):
__tablename__ = "implicitly_created_dataset_collection_inputs"
id = Column(Integer, primary_key=True)
dataset_collection_id = Column(Integer, ForeignKey("history_dataset_collection_association.id"), index=True)
input_dataset_collection_id = Column(Integer, ForeignKey("history_dataset_collection_association.id"), index=True)
name = Column(Unicode(255))
input_dataset_collection = relationship(
"HistoryDatasetCollectionAssociation",
primaryjoin=(
lambda: HistoryDatasetCollectionAssociation.id # type: ignore[has-type]
== ImplicitlyCreatedDatasetCollectionInput.input_dataset_collection_id
), # type: ignore[has-type]
)
def __init__(self, name, input_dataset_collection):
self.name = name
self.input_dataset_collection = input_dataset_collection
[docs]class ImplicitCollectionJobs(Base, Serializable):
__tablename__ = "implicit_collection_jobs"
id = Column(Integer, primary_key=True)
populated_state = Column(TrimmedString(64), default="new", nullable=False)
jobs = relationship(
"ImplicitCollectionJobsJobAssociation", back_populates="implicit_collection_jobs", cascade_backrefs=False
)
[docs] class populated_states(str, Enum):
NEW = "new" # New implicit jobs object, unpopulated job associations
OK = "ok" # Job associations are set and fixed.
FAILED = "failed" # There were issues populating job associations, object is in error.
def __init__(self, populated_state=None):
self.populated_state = populated_state or ImplicitCollectionJobs.populated_states.NEW
@property
def job_list(self):
return [icjja.job for icjja in self.jobs]
def _serialize(self, id_encoder, serialization_options):
rval = dict_for(
self,
populated_state=self.populated_state,
jobs=[serialization_options.get_identifier(id_encoder, j_a.job) for j_a in self.jobs],
)
serialization_options.attach_identifier(id_encoder, self, rval)
return rval
[docs]class ImplicitCollectionJobsJobAssociation(Base, RepresentById):
__tablename__ = "implicit_collection_jobs_job_association"
id = Column(Integer, primary_key=True)
implicit_collection_jobs_id = Column(Integer, ForeignKey("implicit_collection_jobs.id"), index=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True) # Consider making this nullable...
order_index = Column(Integer, nullable=False)
implicit_collection_jobs = relationship("ImplicitCollectionJobs", back_populates="jobs")
job = relationship("Job", back_populates="implicit_collection_jobs_association")
[docs]class PostJobAction(Base, RepresentById):
__tablename__ = "post_job_action"
id = Column(Integer, primary_key=True)
workflow_step_id = Column(Integer, ForeignKey("workflow_step.id"), index=True, nullable=True)
action_type = Column(String(255), nullable=False)
output_name = Column(String(255), nullable=True)
action_arguments = Column(MutableJSONType, nullable=True)
workflow_step = relationship(
"WorkflowStep",
back_populates="post_job_actions",
primaryjoin=(lambda: WorkflowStep.id == PostJobAction.workflow_step_id), # type: ignore[has-type]
)
def __init__(self, action_type, workflow_step=None, output_name=None, action_arguments=None):
self.action_type = action_type
self.output_name = output_name
self.action_arguments = action_arguments
self.workflow_step = workflow_step
ensure_object_added_to_session(self, object_in_session=workflow_step)
[docs]class PostJobActionAssociation(Base, RepresentById):
__tablename__ = "post_job_action_association"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True, nullable=False)
post_job_action_id = Column(Integer, ForeignKey("post_job_action.id"), index=True, nullable=False)
post_job_action = relationship("PostJobAction")
job = relationship("Job", back_populates="post_job_actions")
def __init__(self, pja, job=None, job_id=None):
if job is not None:
self.job = job
ensure_object_added_to_session(self, object_in_session=job)
elif job_id is not None:
self.job_id = job_id
else:
raise Exception("PostJobActionAssociation must be created with a job or a job_id.")
self.post_job_action = pja
[docs]class JobExternalOutputMetadata(Base, RepresentById):
__tablename__ = "job_external_output_metadata"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True)
history_dataset_association_id = Column(
Integer, ForeignKey("history_dataset_association.id"), index=True, nullable=True
)
library_dataset_dataset_association_id = Column(
Integer, ForeignKey("library_dataset_dataset_association.id"), index=True, nullable=True
)
is_valid = Column(Boolean, default=True)
filename_in = Column(String(255))
filename_out = Column(String(255))
filename_results_code = Column(String(255))
filename_kwds = Column(String(255))
filename_override_metadata = Column(String(255))
job_runner_external_pid = Column(String(255))
history_dataset_association = relationship("HistoryDatasetAssociation", lazy="joined")
library_dataset_dataset_association = relationship("LibraryDatasetDatasetAssociation", lazy="joined")
job = relationship("Job", back_populates="external_output_metadata")
def __init__(self, job=None, dataset=None):
add_object_to_object_session(self, job)
self.job = job
if isinstance(dataset, galaxy.model.HistoryDatasetAssociation):
self.history_dataset_association = dataset
elif isinstance(dataset, galaxy.model.LibraryDatasetDatasetAssociation):
self.library_dataset_dataset_association = dataset
@property
def dataset(self):
if self.history_dataset_association:
return self.history_dataset_association
elif self.library_dataset_dataset_association:
return self.library_dataset_dataset_association
return None
# Set up output dataset association for export history jobs. Because job
# uses a Dataset rather than an HDA or LDA, it's necessary to set up a
# fake dataset association that provides the needed attributes for
# preparing a job.
[docs]class FakeDatasetAssociation:
fake_dataset_association = True
def __eq__(self, other):
return isinstance(other, FakeDatasetAssociation) and self.dataset == other.dataset
[docs]class JobExportHistoryArchive(Base, RepresentById):
__tablename__ = "job_export_history_archive"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True)
history_id = Column(Integer, ForeignKey("history.id"), index=True)
dataset_id = Column(Integer, ForeignKey("dataset.id"), index=True)
compressed = Column(Boolean, index=True, default=False)
history_attrs_filename = Column(TEXT)
job = relationship("Job")
dataset = relationship("Dataset")
history = relationship("History", back_populates="exports")
ATTRS_FILENAME_HISTORY = "history_attrs.txt"
def __init__(self, compressed=False, **kwd):
if "history" in kwd:
add_object_to_object_session(self, kwd["history"])
super().__init__(**kwd)
self.compressed = compressed
@property
def fda(self):
return FakeDatasetAssociation(self.dataset)
@property
def temp_directory(self):
return os.path.split(self.history_attrs_filename)[0]
@property
def up_to_date(self):
"""Return False, if a new export should be generated for corresponding
history.
"""
job = self.job
return job.state not in [Job.states.ERROR, Job.states.DELETED] and job.update_time > self.history.update_time
@property
def ready(self):
return self.job.state == Job.states.OK
@property
def preparing(self):
return self.job.state in [Job.states.RUNNING, Job.states.QUEUED, Job.states.WAITING]
@property
def export_name(self):
# Stream archive.
hname = ready_name_for_url(self.history.name)
hname = f"Galaxy-History-{hname}.tar"
if self.compressed:
hname += ".gz"
return hname
[docs] @staticmethod
def create_for_history(history, job, sa_session, object_store, compressed):
# Create dataset that will serve as archive.
archive_dataset = Dataset()
sa_session.add(archive_dataset)
with transaction(sa_session):
sa_session.commit() # ensure job.id and archive_dataset.id are available
object_store.create(archive_dataset) # set the object store id, create dataset (if applicable)
# Add association for keeping track of job, history, archive relationship.
jeha = JobExportHistoryArchive(job=job, history=history, dataset=archive_dataset, compressed=compressed)
sa_session.add(jeha)
#
# Create attributes/metadata files for export.
#
jeha.dataset.create_extra_files_path()
temp_output_dir = jeha.dataset.extra_files_path
history_attrs_filename = os.path.join(temp_output_dir, jeha.ATTRS_FILENAME_HISTORY)
jeha.history_attrs_filename = history_attrs_filename
return jeha
[docs] def to_dict(self):
return {
"id": self.id,
"job_id": self.job.id,
"ready": self.ready,
"preparing": self.preparing,
"up_to_date": self.up_to_date,
}
[docs]class JobImportHistoryArchive(Base, RepresentById):
__tablename__ = "job_import_history_archive"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True)
history_id = Column(Integer, ForeignKey("history.id"), index=True)
archive_dir = Column(TEXT)
job = relationship("Job")
history = relationship("History")
[docs]class StoreExportAssociation(Base, RepresentById):
__tablename__ = "store_export_association"
__table_args__ = (Index("ix_store_export_object", "object_id", "object_type"),)
id = Column(Integer, primary_key=True)
task_uuid = Column(UUIDType(), index=True, unique=True)
create_time = Column(DateTime, default=now)
object_type = Column(TrimmedString(32))
object_id = Column(Integer)
export_metadata = Column(JSONType)
[docs]class JobContainerAssociation(Base, RepresentById):
__tablename__ = "job_container_association"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True)
container_type = Column(TEXT)
container_name = Column(TEXT)
container_info = Column(MutableJSONType, nullable=True)
created_time = Column(DateTime, default=now)
modified_time = Column(DateTime, default=now, onupdate=now)
job = relationship("Job", back_populates="container")
def __init__(self, **kwd):
if "job" in kwd:
add_object_to_object_session(self, kwd["job"])
super().__init__(**kwd)
self.container_info = self.container_info or {}
[docs]class InteractiveToolEntryPoint(Base, Dictifiable, RepresentById):
__tablename__ = "interactivetool_entry_point"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True)
name = Column(TEXT)
token = Column(TEXT)
tool_port = Column(Integer)
host = Column(TEXT)
port = Column(Integer)
protocol = Column(TEXT)
entry_url = Column(TEXT)
requires_domain = Column(Boolean, default=True)
requires_path_in_url = Column(Boolean, default=False)
requires_path_in_header_named = Column(TEXT)
info = Column(MutableJSONType, nullable=True)
configured = Column(Boolean, default=False)
deleted = Column(Boolean, default=False)
created_time = Column(DateTime, default=now)
modified_time = Column(DateTime, default=now, onupdate=now)
label = Column(TEXT)
job = relationship("Job", back_populates="interactivetool_entry_points", uselist=False)
dict_collection_visible_keys = [
"id",
"job_id",
"name",
"label",
"active",
"created_time",
"modified_time",
"output_datasets_ids",
]
dict_element_visible_keys = [
"id",
"job_id",
"name",
"label",
"active",
"created_time",
"modified_time",
"output_datasets_ids",
]
def __init__(
self,
requires_domain=True,
requires_path_in_url=False,
configured=False,
deleted=False,
**kwd,
):
super().__init__(**kwd)
self.requires_domain = requires_domain
self.requires_path_in_url = requires_path_in_url
self.configured = configured
self.deleted = deleted
self.token = self.token or hex_to_lowercase_alphanum(token_hex(8))
self.info = self.info or {}
@property
def active(self):
if self.configured and not self.deleted:
# FIXME: don't included queued?
return not self.job.finished
return False
@property
def class_id(self):
return "ep"
@property
def output_datasets_ids(self):
return [da.dataset.id for da in self.job.output_datasets]
[docs]class GenomeIndexToolData(Base, RepresentById): # TODO: params arg is lost
__tablename__ = "genome_index_tool_data"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True)
dataset_id = Column(Integer, ForeignKey("dataset.id"), index=True)
fasta_path = Column(String(255))
created_time = Column(DateTime, default=now)
modified_time = Column(DateTime, default=now, onupdate=now)
indexer = Column(String(64))
user_id = Column(Integer, ForeignKey("galaxy_user.id"), index=True)
job = relationship("Job")
dataset = relationship("Dataset")
user = relationship("User")
[docs]class Group(Base, Dictifiable, RepresentById):
__tablename__ = "galaxy_group"
id = Column(Integer, primary_key=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
name = Column(String(255), index=True, unique=True)
deleted = Column(Boolean, index=True, default=False)
quotas = relationship("GroupQuotaAssociation", back_populates="group")
roles = relationship("GroupRoleAssociation", back_populates="group", cascade_backrefs=False)
users = relationship("UserGroupAssociation", back_populates="group")
dict_collection_visible_keys = ["id", "name"]
dict_element_visible_keys = ["id", "name"]
def __init__(self, name=None):
self.name = name
self.deleted = False
[docs]class UserGroupAssociation(Base, RepresentById):
__tablename__ = "user_group_association"
id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey("galaxy_user.id"), index=True)
group_id = Column(Integer, ForeignKey("galaxy_group.id"), index=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
user = relationship("User", back_populates="groups")
group = relationship("Group", back_populates="users")
def __init__(self, user, group):
add_object_to_object_session(self, user)
self.user = user
self.group = group
[docs]class Notification(Base, Dictifiable, RepresentById):
__tablename__ = "notification"
id = Column(Integer, primary_key=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
publication_time = Column(
DateTime, default=now
) # The date of publication, can be a future date to allow scheduling
expiration_time = Column(
DateTime, default=now() + timedelta(days=30 * 6)
) # The expiration date, expired notifications will be permanently removed from DB regularly
source = Column(String(32), index=True) # Who (or what) generated the notification
category = Column(
String(64), index=True
) # Category of the notification, defines its contents. Used for filtering, un/subscribing, etc
variant = Column(
String(16), index=True
) # Defines the 'importance' of the notification ('info', 'warning', 'urgent', etc.). Used for filtering, highlight rendering, etc
# A bug in early 23.1 led to values being stored as json string, so we use this special type to process the result value twice.
# content should always be a dict
content = Column(DoubleEncodedJsonType)
user_notification_associations = relationship("UserNotificationAssociation", back_populates="notification")
def __init__(self, source: str, category: str, variant: str, content):
self.source = source
self.category = category
self.variant = variant
self.content = content
[docs]class UserNotificationAssociation(Base, RepresentById):
__tablename__ = "user_notification_association"
id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey("galaxy_user.id"), index=True)
notification_id = Column(Integer, ForeignKey("notification.id"), index=True)
seen_time = Column(DateTime, nullable=True)
deleted = Column(Boolean, index=True, default=False)
update_time = Column(DateTime, default=now, onupdate=now)
user = relationship("User", back_populates="all_notifications")
notification = relationship("Notification", back_populates="user_notification_associations")
def __init__(self, user, notification):
self.user = user
ensure_object_added_to_session(self, object_in_session=user)
self.notification = notification
[docs]class HistoryAudit(Base, RepresentById):
__tablename__ = "history_audit"
__table_args__ = (PrimaryKeyConstraint(sqlite_on_conflict="IGNORE"),)
history_id = Column(Integer, ForeignKey("history.id"), primary_key=True, nullable=False)
update_time = Column(DateTime, default=now, primary_key=True, nullable=False)
# This class should never be instantiated.
# See https://github.com/galaxyproject/galaxy/pull/11914 for details.
__init__ = None # type: ignore[assignment]
[docs] @classmethod
def prune(cls, sa_session):
latest_subq = (
select(cls.history_id, func.max(cls.update_time).label("max_update_time"))
.group_by(cls.history_id)
.subquery()
)
not_latest_query = (
select(cls.history_id, cls.update_time)
.select_from(latest_subq)
.join(
cls,
and_(
cls.update_time < latest_subq.columns.max_update_time,
cls.history_id == latest_subq.columns.history_id,
),
)
.subquery()
)
q = cls.__table__.delete().where(tuple_(cls.history_id, cls.update_time).in_(select(not_latest_query)))
with sa_session() as session, session.begin():
session.execute(q)
[docs]class History(Base, HasTags, Dictifiable, UsesAnnotations, HasName, Serializable, UsesCreateAndUpdateTime):
__tablename__ = "history"
__table_args__ = (Index("ix_history_slug", "slug", mysql_length=200),)
id = Column(Integer, primary_key=True)
create_time = Column(DateTime, default=now)
_update_time = Column("update_time", DateTime, index=True, default=now, onupdate=now)
user_id = Column(Integer, ForeignKey("galaxy_user.id"), index=True)
name = Column(TrimmedString(255))
hid_counter = Column(Integer, default=1)
deleted = Column(Boolean, index=True, default=False)
purged = Column(Boolean, index=True, default=False)
importing = Column(Boolean, index=True, default=False)
genome_build = Column(TrimmedString(40))
importable = Column(Boolean, default=False)
slug = Column(TEXT)
published = Column(Boolean, index=True, default=False)
preferred_object_store_id = Column(String(255), nullable=True)
archived = Column(Boolean, index=True, default=False, server_default=false())
archive_export_id = Column(Integer, ForeignKey("store_export_association.id"), nullable=True, default=None)
datasets = relationship(
"HistoryDatasetAssociation", back_populates="history", cascade_backrefs=False, order_by=lambda: asc(HistoryDatasetAssociation.hid) # type: ignore[has-type]
)
exports = relationship(
"JobExportHistoryArchive",
back_populates="history",
primaryjoin=lambda: JobExportHistoryArchive.history_id == History.id,
order_by=lambda: desc(JobExportHistoryArchive.id),
)
active_datasets = relationship(
"HistoryDatasetAssociation",
primaryjoin=(
lambda: and_(
HistoryDatasetAssociation.history_id == History.id, # type: ignore[attr-defined]
not_(HistoryDatasetAssociation.deleted), # type: ignore[has-type]
)
),
order_by=lambda: asc(HistoryDatasetAssociation.hid), # type: ignore[has-type]
viewonly=True,
)
dataset_collections = relationship("HistoryDatasetCollectionAssociation", back_populates="history")
active_dataset_collections = relationship(
"HistoryDatasetCollectionAssociation",
primaryjoin=(
lambda: (
and_(
HistoryDatasetCollectionAssociation.history_id == History.id, # type: ignore[has-type]
not_(HistoryDatasetCollectionAssociation.deleted), # type: ignore[has-type]
)
)
),
order_by=lambda: asc(HistoryDatasetCollectionAssociation.hid), # type: ignore[has-type]
viewonly=True,
)
visible_datasets = relationship(
"HistoryDatasetAssociation",
primaryjoin=(
lambda: and_(
HistoryDatasetAssociation.history_id == History.id, # type: ignore[attr-defined]
not_(HistoryDatasetAssociation.deleted), # type: ignore[has-type]
HistoryDatasetAssociation.visible, # type: ignore[has-type]
)
),
order_by=lambda: asc(HistoryDatasetAssociation.hid), # type: ignore[has-type]
viewonly=True,
)
visible_dataset_collections = relationship(
"HistoryDatasetCollectionAssociation",
primaryjoin=(
lambda: and_(
HistoryDatasetCollectionAssociation.history_id == History.id, # type: ignore[has-type]
not_(HistoryDatasetCollectionAssociation.deleted), # type: ignore[has-type]
HistoryDatasetCollectionAssociation.visible, # type: ignore[has-type]
)
),
order_by=lambda: asc(HistoryDatasetCollectionAssociation.hid), # type: ignore[has-type]
viewonly=True,
)
tags = relationship("HistoryTagAssociation", order_by=lambda: HistoryTagAssociation.id, back_populates="history")
annotations = relationship(
"HistoryAnnotationAssociation", order_by=lambda: HistoryAnnotationAssociation.id, back_populates="history"
)
ratings = relationship(
"HistoryRatingAssociation",
order_by=lambda: HistoryRatingAssociation.id, # type: ignore[has-type]
back_populates="history",
)
default_permissions = relationship("DefaultHistoryPermissions", back_populates="history")
users_shared_with = relationship("HistoryUserShareAssociation", back_populates="history")
galaxy_sessions = relationship("GalaxySessionToHistoryAssociation", back_populates="history")
workflow_invocations = relationship("WorkflowInvocation", back_populates="history", cascade_backrefs=False)
user = relationship("User", back_populates="histories")
jobs = relationship("Job", back_populates="history", cascade_backrefs=False)
update_time = column_property(
select(func.max(HistoryAudit.update_time)).where(HistoryAudit.history_id == id).scalar_subquery(),
)
users_shared_with_count: column_property # defined at the end of this module
average_rating: column_property # defined at the end of this module
# Set up proxy so that
# History.users_shared_with
# returns a list of users that history is shared with.
users_shared_with_dot_users = association_proxy("users_shared_with", "user")
dict_collection_visible_keys = ["id", "name", "published", "deleted"]
dict_element_visible_keys = [
"id",
"name",
"archived",
"create_time",
"deleted",
"empty",
"genome_build",
"hid_counter",
"importable",
"preferred_object_store_id",
"purged",
"published",
"slug",
"tags",
"update_time",
"username",
]
default_name = "Unnamed history"
def __init__(self, id=None, name=None, user=None):
self.id = id
self.name = name or History.default_name
self.deleted = False
self.purged = False
self.importing = False
self.published = False
add_object_to_object_session(self, user)
self.user = user
# Objects to eventually add to history
self._pending_additions = []
self._item_by_hid_cache = None
[docs] @reconstructor
def init_on_load(self):
# Restores properties that are not tracked in the database
self._pending_additions = []
[docs] def stage_addition(self, items):
history_id = self.id
for item in listify(items):
item.history = self
if history_id:
item.history_id = history_id
self._pending_additions.append(item)
@property
def empty(self):
return self.hid_counter is None or self.hid_counter == 1
@property
def username(self):
if user := self.user:
return user.username
return None
@property
def count(self):
return self.hid_counter - 1
[docs] def add_pending_items(self, set_output_hid=True):
# These are assumed to be either copies of existing datasets or new, empty datasets,
# so we don't need to set the quota.
self.add_datasets(
object_session(self), self._pending_additions, set_hid=set_output_hid, quota=False, flush=False
)
self._pending_additions = []
def _next_hid(self, n=1):
"""
Generate next_hid from the database in a concurrency safe way:
1. Retrieve hid_counter from database
2. Increment hid_counter by n and store in database
3. Return retrieved hid_counter.
Handle with SQLAlchemy Core to keep this independent from current session state, except:
expire hid_counter attribute, since its value in the session is no longer valid.
"""
session = object_session(self)
engine = session.bind
table = self.__table__
history_id = cached_id(self)
update_stmt = update(table).where(table.c.id == history_id).values(hid_counter=table.c.hid_counter + n)
with engine.begin() as conn:
if engine.name in ["postgres", "postgresql"]:
stmt = update_stmt.returning(table.c.hid_counter)
updated_hid = conn.execute(stmt).scalar()
hid = updated_hid - n
else:
select_stmt = select(table.c.hid_counter).where(table.c.id == history_id).with_for_update()
hid = conn.execute(select_stmt).scalar()
conn.execute(update_stmt)
session.expire(self, ["hid_counter"])
return hid
[docs] def add_galaxy_session(self, galaxy_session, association=None):
if association is None:
self.galaxy_sessions.append(GalaxySessionToHistoryAssociation(galaxy_session, self))
else:
self.galaxy_sessions.append(association)
[docs] def add_dataset(self, dataset, parent_id=None, genome_build=None, set_hid=True, quota=True):
if isinstance(dataset, Dataset):
dataset = HistoryDatasetAssociation(dataset=dataset)
object_session(self).add(dataset)
session = object_session(self)
with transaction(session):
session.commit()
elif not isinstance(dataset, (HistoryDatasetAssociation, HistoryDatasetCollectionAssociation)):
raise TypeError(
"You can only add Dataset and HistoryDatasetAssociation instances to a history"
+ f" ( you tried to add {str(dataset)} )."
)
is_dataset = is_hda(dataset)
if parent_id:
for data in self.datasets:
if data.id == parent_id:
dataset.hid = data.hid
break
else:
if set_hid:
dataset.hid = self._next_hid()
else:
if set_hid:
dataset.hid = self._next_hid()
add_object_to_object_session(dataset, self)
if quota and is_dataset and self.user:
quota_source_info = dataset.dataset.quota_source_info
if quota_source_info.use:
self.user.adjust_total_disk_usage(dataset.quota_amount(self.user), quota_source_info.label)
dataset.history = self
if is_dataset and genome_build not in [None, "?"]:
self.genome_build = genome_build
dataset.history_id = self.id
return dataset
[docs] def add_datasets(
self, sa_session, datasets, parent_id=None, genome_build=None, set_hid=True, quota=True, flush=False
):
"""Optimized version of add_dataset above that minimizes database
interactions when adding many datasets and collections to history at once.
"""
optimize = len(datasets) > 1 and parent_id is None and set_hid
if optimize:
self.__add_datasets_optimized(datasets, genome_build=genome_build)
if quota and self.user:
disk_usage = sum(d.get_total_size() for d in datasets if is_hda(d))
if disk_usage:
quota_source_info = datasets[0].dataset.quota_source_info
if quota_source_info.use:
self.user.adjust_total_disk_usage(disk_usage, quota_source_info.label)
sa_session.add_all(datasets)
if flush:
with transaction(sa_session):
sa_session.commit()
else:
for dataset in datasets:
self.add_dataset(dataset, parent_id=parent_id, genome_build=genome_build, set_hid=set_hid, quota=quota)
sa_session.add(dataset)
if flush:
with transaction(sa_session):
sa_session.commit()
def __add_datasets_optimized(self, datasets, genome_build=None):
"""Optimized version of add_dataset above that minimizes database
interactions when adding many datasets to history at once under
certain circumstances.
"""
n = len(datasets)
base_hid = self._next_hid(n=n)
set_genome = genome_build not in [None, "?"]
for i, dataset in enumerate(datasets):
dataset.hid = base_hid + i
dataset.history = self
dataset.history_id = cached_id(self)
if set_genome and is_hda(dataset):
self.genome_build = genome_build
return datasets
[docs] def add_dataset_collection(self, history_dataset_collection, set_hid=True):
if set_hid:
history_dataset_collection.hid = self._next_hid()
add_object_to_object_session(history_dataset_collection, self)
history_dataset_collection.history = self
# TODO: quota?
self.dataset_collections.append(history_dataset_collection)
return history_dataset_collection
[docs] def copy(self, name=None, target_user=None, activatable=False, all_datasets=False):
"""
Return a copy of this history using the given `name` and `target_user`.
If `activatable`, copy only non-deleted datasets. If `all_datasets`, copy
non-deleted, deleted, and purged datasets.
"""
name = name or self.name
applies_to_quota = target_user != self.user
# Create new history.
new_history = History(name=name, user=target_user)
db_session = object_session(self)
db_session.add(new_history)
db_session.flush([new_history])
# copy history tags and annotations (if copying user is not anonymous)
if target_user:
self.copy_item_annotation(db_session, self.user, self, target_user, new_history)
new_history.copy_tags_from(target_user=target_user, source=self)
# Copy HDAs.
if activatable:
hdas = self.activatable_datasets
elif all_datasets:
hdas = self.datasets
else:
hdas = self.active_datasets
for hda in hdas:
# Copy HDA.
new_hda = hda.copy(flush=False)
new_history.add_dataset(new_hda, set_hid=False, quota=applies_to_quota)
if target_user:
new_hda.copy_item_annotation(db_session, self.user, hda, target_user, new_hda)
new_hda.copy_tags_from(target_user, hda)
# Copy history dataset collections
if all_datasets:
hdcas = self.dataset_collections
else:
hdcas = self.active_dataset_collections
for hdca in hdcas:
new_hdca = hdca.copy(flush=False, element_destination=new_history, set_hid=False, minimize_copies=True)
new_history.add_dataset_collection(new_hdca, set_hid=False)
db_session.add(new_hdca)
if target_user:
new_hdca.copy_item_annotation(db_session, self.user, hdca, target_user, new_hdca)
new_hdca.copy_tags_from(target_user, hdca)
new_history.hid_counter = self.hid_counter
with transaction(db_session):
db_session.commit()
return new_history
[docs] def get_dataset_by_hid(self, hid):
if self._item_by_hid_cache is None:
self._item_by_hid_cache = {dataset.hid: dataset for dataset in self.datasets}
return self._item_by_hid_cache.get(hid)
@property
def has_possible_members(self):
return True
@property
def activatable_datasets(self):
# This needs to be a list
return [hda for hda in self.datasets if not hda.dataset.deleted]
def _serialize(self, id_encoder, serialization_options):
history_attrs = dict_for(
self,
create_time=self.create_time.__str__(),
update_time=self.update_time.__str__(),
name=unicodify(self.name),
hid_counter=self.hid_counter,
genome_build=self.genome_build,
annotation=unicodify(get_item_annotation_str(object_session(self), self.user, self)),
tags=self.make_tag_string_list(),
)
serialization_options.attach_identifier(id_encoder, self, history_attrs)
return history_attrs
[docs] def to_dict(self, view="collection", value_mapper=None):
# Get basic value.
rval = super().to_dict(view=view, value_mapper=value_mapper)
if view == "element":
rval["size"] = int(self.disk_size)
return rval
@property
def latest_export(self):
exports = self.exports
return exports and exports[0]
[docs] def resume_paused_jobs(self):
job = None
for job in self.paused_jobs:
job.resume(flush=False)
if job is not None:
# We'll flush once if there was a paused job
session = object_session(job)
with transaction(session):
session.commit()
@property
def paused_jobs(self):
stmt = select(Job).where(Job.history_id == self.id, Job.state == Job.states.PAUSED)
return object_session(self).scalars(stmt).all()
@hybrid.hybrid_property
def disk_size(self):
"""
Return the size in bytes of this history by summing the 'total_size's of
all non-purged, unique datasets within it.
"""
# non-.expression part of hybrid.hybrid_property: called when an instance is the namespace (not the class)
subq = (
select(HistoryDatasetAssociation.dataset_id, Dataset.total_size)
.join(Dataset)
.where(HistoryDatasetAssociation.history_id == self.id)
.where(HistoryDatasetAssociation.purged != true())
.where(Dataset.purged != true())
.distinct() # unique datasets only
.subquery()
)
stmt = select(func.sum(subq.c.total_size))
return object_session(self).scalar(stmt) or 0
@disk_size.expression # type: ignore[no-redef]
def disk_size(cls):
"""
Return a query scalar that will get any history's size in bytes by summing
the 'total_size's of all non-purged, unique datasets within it.
"""
# .expression acts as a column_property and should return a scalar
# first, get the distinct datasets within a history that are not purged
hda_to_dataset_join = join(
HistoryDatasetAssociation, Dataset, HistoryDatasetAssociation.table.c.dataset_id == Dataset.table.c.id
)
distinct_datasets = (
select(
# use labels here to better access from the query above
HistoryDatasetAssociation.table.c.history_id.label("history_id"),
Dataset.total_size.label("dataset_size"),
Dataset.id.label("dataset_id"),
)
.where(HistoryDatasetAssociation.table.c.purged != true())
.where(Dataset.table.c.purged != true())
.select_from(hda_to_dataset_join)
# TODO: slow (in general) but most probably here - index total_size for easier sorting/distinct?
.distinct()
)
# postgres needs an alias on FROM
distinct_datasets_alias = aliased(distinct_datasets.subquery(), name="datasets")
# then, bind as property of history using the cls.id
size_query = (
select(func.coalesce(func.sum(distinct_datasets_alias.c.dataset_size), 0))
.select_from(distinct_datasets_alias)
.where(distinct_datasets_alias.c.history_id == cls.id)
)
# label creates a scalar
return size_query.label("disk_size")
@property
def disk_nice_size(self):
"""Returns human readable size of history on disk."""
return galaxy.util.nice_size(self.disk_size)
def _active_dataset_and_roles_query(self):
return (
select(HistoryDatasetAssociation)
.where(HistoryDatasetAssociation.history_id == self.id)
.where(not_(HistoryDatasetAssociation.deleted))
.order_by(HistoryDatasetAssociation.hid.asc())
.options(
joinedload(HistoryDatasetAssociation.dataset)
.joinedload(Dataset.actions)
.joinedload(DatasetPermissions.role),
joinedload(HistoryDatasetAssociation.tags),
)
)
@property
def active_datasets_and_roles(self):
if not hasattr(self, "_active_datasets_and_roles"):
stmt = self._active_dataset_and_roles_query()
self._active_datasets_and_roles = object_session(self).scalars(stmt).unique().all()
return self._active_datasets_and_roles
@property
def active_visible_datasets_and_roles(self):
if not hasattr(self, "_active_visible_datasets_and_roles"):
stmt = self._active_dataset_and_roles_query().where(HistoryDatasetAssociation.visible)
self._active_visible_datasets_and_roles = object_session(self).scalars(stmt).unique().all()
return self._active_visible_datasets_and_roles
@property
def active_visible_dataset_collections(self):
if not hasattr(self, "_active_visible_dataset_collections"):
stmt = (
select(HistoryDatasetCollectionAssociation)
.where(HistoryDatasetCollectionAssociation.history_id == self.id)
.where(not_(HistoryDatasetCollectionAssociation.deleted))
.where(HistoryDatasetCollectionAssociation.visible)
.order_by(HistoryDatasetCollectionAssociation.hid.asc())
.options(
joinedload(HistoryDatasetCollectionAssociation.collection),
joinedload(HistoryDatasetCollectionAssociation.tags),
)
)
self._active_visible_dataset_collections = object_session(self).scalars(stmt).unique().all()
return self._active_visible_dataset_collections
@property
def active_contents(self):
"""Return all active contents ordered by hid."""
return self.contents_iter(types=["dataset", "dataset_collection"], deleted=False, visible=True)
[docs] def contents_iter(self, **kwds):
"""
Fetch filtered list of contents of history.
"""
default_contents_types = [
"dataset",
]
types = kwds.get("types", default_contents_types)
iters = []
if "dataset" in types:
iters.append(self.__dataset_contents_iter(**kwds))
if "dataset_collection" in types:
iters.append(self.__collection_contents_iter(**kwds))
return galaxy.util.merge_sorted_iterables(operator.attrgetter("hid"), *iters)
def __dataset_contents_iter(self, **kwds):
return self.__filter_contents(HistoryDatasetAssociation, **kwds)
def __collection_contents_iter(self, **kwds):
return self.__filter_contents(HistoryDatasetCollectionAssociation, **kwds)
def __filter_contents(self, content_class, **kwds):
session = object_session(self)
stmt = select(content_class).where(content_class.history_id == self.id).order_by(content_class.hid.asc())
deleted = galaxy.util.string_as_bool_or_none(kwds.get("deleted", None))
if deleted is not None:
stmt = stmt.where(content_class.deleted == deleted)
visible = galaxy.util.string_as_bool_or_none(kwds.get("visible", None))
if visible is not None:
stmt = stmt.where(content_class.visible == visible)
if "object_store_ids" in kwds:
if content_class == HistoryDatasetAssociation:
stmt = stmt.join(content_class.dataset).where(Dataset.object_store_id.in_(kwds.get("object_store_ids")))
# else ignoring object_store_ids on HDCAs...
if "ids" in kwds:
assert "object_store_ids" not in kwds
ids = kwds["ids"]
max_in_filter_length = kwds.get("max_in_filter_length", MAX_IN_FILTER_LENGTH)
if len(ids) < max_in_filter_length:
stmt = stmt.where(content_class.id.in_(ids))
else:
return (content for content in session.scalars(stmt) if content.id in ids)
return session.scalars(stmt)
[docs]class UserRoleAssociation(Base, RepresentById):
__tablename__ = "user_role_association"
id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey("galaxy_user.id"), index=True)
role_id = Column(Integer, ForeignKey("role.id"), index=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
user = relationship("User", back_populates="roles")
role = relationship("Role", back_populates="users")
def __init__(self, user, role):
add_object_to_object_session(self, user)
self.user = user
self.role = role
[docs]class GroupRoleAssociation(Base, RepresentById):
__tablename__ = "group_role_association"
id = Column(Integer, primary_key=True)
group_id = Column(Integer, ForeignKey("galaxy_group.id"), index=True)
role_id = Column(Integer, ForeignKey("role.id"), index=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
group = relationship("Group", back_populates="roles")
role = relationship("Role", back_populates="groups")
def __init__(self, group, role):
self.group = group
ensure_object_added_to_session(self, object_in_session=group)
self.role = role
[docs]class Role(Base, Dictifiable, RepresentById):
__tablename__ = "role"
id = Column(Integer, primary_key=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
name = Column(String(255), index=True, unique=True)
description = Column(TEXT)
type = Column(String(40), index=True)
deleted = Column(Boolean, index=True, default=False)
dataset_actions = relationship("DatasetPermissions", back_populates="role")
groups = relationship("GroupRoleAssociation", back_populates="role")
users = relationship("UserRoleAssociation", back_populates="role")
dict_collection_visible_keys = ["id", "name"]
dict_element_visible_keys = ["id", "name", "description", "type"]
private_id = None
[docs] class types(str, Enum):
PRIVATE = "private"
SYSTEM = "system"
USER = "user"
ADMIN = "admin"
SHARING = "sharing"
def __init__(self, name=None, description=None, type=types.SYSTEM, deleted=False):
self.name = name
self.description = description
self.type = type
self.deleted = deleted
[docs]class UserQuotaSourceUsage(Base, Dictifiable, RepresentById):
__tablename__ = "user_quota_source_usage"
__table_args__ = (UniqueConstraint("user_id", "quota_source_label", name="uqsu_unique_label_per_user"),)
dict_element_visible_keys = ["disk_usage", "quota_source_label"]
id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey("galaxy_user.id"), index=True)
quota_source_label = Column(String(32), index=True)
# user had an index on disk_usage - does that make any sense? -John
disk_usage = Column(Numeric(15, 0), default=0, nullable=False)
user = relationship("User", back_populates="quota_source_usages")
[docs]class UserQuotaAssociation(Base, Dictifiable, RepresentById):
__tablename__ = "user_quota_association"
id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey("galaxy_user.id"), index=True)
quota_id = Column(Integer, ForeignKey("quota.id"), index=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
user = relationship("User", back_populates="quotas")
quota = relationship("Quota", back_populates="users")
dict_element_visible_keys = ["user"]
def __init__(self, user, quota):
add_object_to_object_session(self, user)
self.user = user
self.quota = quota
[docs]class GroupQuotaAssociation(Base, Dictifiable, RepresentById):
__tablename__ = "group_quota_association"
id = Column(Integer, primary_key=True)
group_id = Column(Integer, ForeignKey("galaxy_group.id"), index=True)
quota_id = Column(Integer, ForeignKey("quota.id"), index=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
group = relationship("Group", back_populates="quotas")
quota = relationship("Quota", back_populates="groups")
dict_element_visible_keys = ["group"]
def __init__(self, group, quota):
add_object_to_object_session(self, group)
self.group = group
self.quota = quota
[docs]class Quota(Base, Dictifiable, RepresentById):
__tablename__ = "quota"
__table_args__ = (Index("ix_quota_quota_source_label", "quota_source_label"),)
id = Column(Integer, primary_key=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
name = Column(String(255), index=True, unique=True)
description = Column(TEXT)
bytes = Column(BigInteger)
operation = Column(String(8))
deleted = Column(Boolean, index=True, default=False)
quota_source_label = Column(String(32), default=None)
default = relationship("DefaultQuotaAssociation", back_populates="quota", cascade_backrefs=False)
groups = relationship("GroupQuotaAssociation", back_populates="quota")
users = relationship("UserQuotaAssociation", back_populates="quota")
dict_collection_visible_keys = ["id", "name", "quota_source_label"]
dict_element_visible_keys = [
"id",
"name",
"description",
"bytes",
"operation",
"display_amount",
"default",
"users",
"groups",
"quota_source_label",
]
valid_operations = ("+", "-", "=")
def __init__(self, name=None, description=None, amount=0, operation="=", quota_source_label=None):
self.name = name
self.description = description
if amount is None:
self.bytes = -1
else:
self.bytes = amount
self.operation = operation
self.quota_source_label = quota_source_label
amount = property(get_amount, set_amount)
@property
def display_amount(self):
if self.bytes == -1:
return "unlimited"
else:
return galaxy.util.nice_size(self.bytes)
[docs]class DefaultQuotaAssociation(Base, Dictifiable, RepresentById):
__tablename__ = "default_quota_association"
id = Column(Integer, primary_key=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
type = Column(String(32))
quota_id = Column(Integer, ForeignKey("quota.id"), index=True)
quota = relationship("Quota", back_populates="default")
dict_element_visible_keys = ["type"]
def __init__(self, type, quota):
assert type in self.types.__members__.values(), "Invalid type"
self.type = type
self.quota = quota
ensure_object_added_to_session(self, object_in_session=quota)
[docs]class DatasetPermissions(Base, RepresentById):
__tablename__ = "dataset_permissions"
id = Column(Integer, primary_key=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
action = Column(TEXT)
dataset_id = Column(Integer, ForeignKey("dataset.id"), index=True)
role_id = Column(Integer, ForeignKey("role.id"), index=True)
dataset = relationship("Dataset", back_populates="actions")
role = relationship("Role", back_populates="dataset_actions")
def __init__(self, action, dataset, role=None, role_id=None):
self.action = action
add_object_to_object_session(self, dataset)
self.dataset = dataset
if role is not None:
self.role = role
else:
self.role_id = role_id
[docs]class LibraryPermissions(Base, RepresentById):
__tablename__ = "library_permissions"
id = Column(Integer, primary_key=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
action = Column(TEXT)
library_id = Column(Integer, ForeignKey("library.id"), nullable=True, index=True)
role_id = Column(Integer, ForeignKey("role.id"), index=True)
library = relationship("Library", back_populates="actions")
role = relationship("Role")
def __init__(self, action, library_item, role):
self.action = action
if isinstance(library_item, Library):
self.library = library_item
ensure_object_added_to_session(self, object_in_session=library_item)
else:
raise Exception(f"Invalid Library specified: {library_item.__class__.__name__}")
self.role = role
[docs]class LibraryFolderPermissions(Base, RepresentById):
__tablename__ = "library_folder_permissions"
id = Column(Integer, primary_key=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
action = Column(TEXT)
library_folder_id = Column(Integer, ForeignKey("library_folder.id"), nullable=True, index=True)
role_id = Column(Integer, ForeignKey("role.id"), index=True)
folder = relationship("LibraryFolder", back_populates="actions")
role = relationship("Role")
def __init__(self, action, library_item, role):
self.action = action
if isinstance(library_item, LibraryFolder):
self.folder = library_item
ensure_object_added_to_session(self, object_in_session=library_item)
else:
raise Exception(f"Invalid LibraryFolder specified: {library_item.__class__.__name__}")
self.role = role
[docs]class LibraryDatasetPermissions(Base, RepresentById):
__tablename__ = "library_dataset_permissions"
id = Column(Integer, primary_key=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
action = Column(TEXT)
library_dataset_id = Column(Integer, ForeignKey("library_dataset.id"), nullable=True, index=True)
role_id = Column(Integer, ForeignKey("role.id"), index=True)
library_dataset = relationship("LibraryDataset", back_populates="actions")
role = relationship("Role")
def __init__(self, action, library_item, role):
self.action = action
if isinstance(library_item, LibraryDataset):
self.library_dataset = library_item
ensure_object_added_to_session(self, object_in_session=library_item)
else:
raise Exception(f"Invalid LibraryDataset specified: {library_item.__class__.__name__}")
self.role = role
[docs]class LibraryDatasetDatasetAssociationPermissions(Base, RepresentById):
__tablename__ = "library_dataset_dataset_association_permissions"
id = Column(Integer, primary_key=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
action = Column(TEXT)
library_dataset_dataset_association_id = Column(
Integer, ForeignKey("library_dataset_dataset_association.id"), nullable=True, index=True
)
role_id = Column(Integer, ForeignKey("role.id"), index=True)
library_dataset_dataset_association = relationship("LibraryDatasetDatasetAssociation", back_populates="actions")
role = relationship("Role")
def __init__(self, action, library_item, role):
self.action = action
if isinstance(library_item, LibraryDatasetDatasetAssociation):
add_object_to_object_session(self, library_item)
self.library_dataset_dataset_association = library_item
else:
raise Exception(f"Invalid LibraryDatasetDatasetAssociation specified: {library_item.__class__.__name__}")
self.role = role
[docs]class DefaultUserPermissions(Base, RepresentById):
__tablename__ = "default_user_permissions"
id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey("galaxy_user.id"), index=True)
action = Column(TEXT)
role_id = Column(Integer, ForeignKey("role.id"), index=True)
user = relationship("User", back_populates="default_permissions")
role = relationship("Role")
def __init__(self, user, action, role):
add_object_to_object_session(self, user)
self.user = user
self.action = action
self.role = role
[docs]class DefaultHistoryPermissions(Base, RepresentById):
__tablename__ = "default_history_permissions"
id = Column(Integer, primary_key=True)
history_id = Column(Integer, ForeignKey("history.id"), index=True)
action = Column(TEXT)
role_id = Column(Integer, ForeignKey("role.id"), index=True)
history = relationship("History", back_populates="default_permissions")
role = relationship("Role")
def __init__(self, history, action, role):
add_object_to_object_session(self, history)
self.history = history
self.action = action
self.role = role
[docs]class StorableObject:
[docs] def flush(self):
if sa_session := object_session(self):
with transaction(sa_session):
sa_session.commit()
[docs]class Dataset(Base, StorableObject, Serializable):
__tablename__ = "dataset"
id = Column(Integer, primary_key=True)
job_id = Column(Integer, ForeignKey("job.id"), index=True, nullable=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, index=True, default=now, onupdate=now)
state = Column(TrimmedString(64), index=True)
deleted = Column(Boolean, index=True, default=False)
purged = Column(Boolean, index=True, default=False)
purgable = Column(Boolean, default=True)
object_store_id = Column(TrimmedString(255), index=True)
external_filename = Column(TEXT)
_extra_files_path = Column(TEXT)
created_from_basename = Column(TEXT)
file_size = Column(Numeric(15, 0))
total_size = Column(Numeric(15, 0))
uuid = Column(UUIDType())
actions = relationship("DatasetPermissions", back_populates="dataset")
job = relationship(Job, primaryjoin=(lambda: Dataset.job_id == Job.id))
active_history_associations = relationship(
"HistoryDatasetAssociation",
primaryjoin=(
lambda: and_(
Dataset.id == HistoryDatasetAssociation.dataset_id, # type: ignore[attr-defined]
HistoryDatasetAssociation.deleted == false(), # type: ignore[has-type]
HistoryDatasetAssociation.purged == false(), # type: ignore[attr-defined]
)
),
viewonly=True,
)
purged_history_associations = relationship(
"HistoryDatasetAssociation",
primaryjoin=(
lambda: and_(
Dataset.id == HistoryDatasetAssociation.dataset_id, # type: ignore[attr-defined]
HistoryDatasetAssociation.purged == true(), # type: ignore[attr-defined]
)
),
viewonly=True,
)
active_library_associations = relationship(
"LibraryDatasetDatasetAssociation",
primaryjoin=(
lambda: and_(
Dataset.id == LibraryDatasetDatasetAssociation.dataset_id, # type: ignore[attr-defined]
LibraryDatasetDatasetAssociation.deleted == false(), # type: ignore[has-type]
)
),
viewonly=True,
)
hashes = relationship("DatasetHash", back_populates="dataset", cascade_backrefs=False)
sources = relationship("DatasetSource", back_populates="dataset")
history_associations = relationship("HistoryDatasetAssociation", back_populates="dataset", cascade_backrefs=False)
library_associations = relationship(
"LibraryDatasetDatasetAssociation",
primaryjoin=(lambda: LibraryDatasetDatasetAssociation.table.c.dataset_id == Dataset.id),
back_populates="dataset",
cascade_backrefs=False,
)
# failed_metadata is only valid as DatasetInstance state currently
states = DatasetState
non_ready_states = (states.NEW, states.UPLOAD, states.QUEUED, states.RUNNING, states.SETTING_METADATA)
ready_states = tuple(set(states.__members__.values()) - set(non_ready_states))
valid_input_states = tuple(
set(states.__members__.values()) - {states.ERROR, states.DISCARDED, states.FAILED_METADATA}
)
no_data_states = (states.PAUSED, states.DEFERRED, states.DISCARDED, *non_ready_states)
terminal_states = (
states.OK,
states.EMPTY,
states.ERROR,
states.DEFERRED,
states.DISCARDED,
states.FAILED_METADATA,
)
[docs] class conversion_messages(str, Enum):
PENDING = "pending"
NO_DATA = "no data"
NO_CHROMOSOME = "no chromosome"
NO_CONVERTER = "no converter"
NO_TOOL = "no tool"
DATA = "data"
ERROR = "error"
OK = "ok"
permitted_actions = get_permitted_actions(filter="DATASET")
file_path = "/tmp/"
object_store: Optional[ObjectStore] = None # This get initialized in mapping.py (method init) by app.py
engine = None
def __init__(
self,
id=None,
state=None,
external_filename=None,
extra_files_path=None,
file_size=None,
purgable=True,
uuid=None,
):
self.id = id
self.uuid = get_uuid(uuid)
self.state = state
self.deleted = False
self.purged = False
self.purgable = purgable
self.external_filename = external_filename
self.external_extra_files_path = None
self._extra_files_path = extra_files_path
self.file_size = file_size
self.sources = []
self.hashes = []
@property
def is_new(self):
return self.state == self.states.NEW
@property
def shareable(self):
"""Return True if placed into an objectstore not labeled as ``private``."""
if self.external_filename:
return True
else:
object_store = self._assert_object_store_set()
return not object_store.is_private(self)
[docs] def get_file_name(self, sync_cache=True):
if self.purged:
log.warning(f"Attempt to get file name of purged dataset {self.id}")
return ""
if not self.external_filename:
object_store = self._assert_object_store_set()
if object_store.exists(self):
file_name = object_store.get_filename(self, sync_cache=sync_cache)
else:
file_name = ""
if not file_name and self.state not in (self.states.NEW, self.states.QUEUED):
# Queued datasets can be assigned an object store and have a filename, but they aren't guaranteed to.
# Anything after queued should have a file name.
log.warning(f"Failed to determine file name for dataset {self.id}")
return file_name
else:
filename = self.external_filename
# Make filename absolute
return os.path.abspath(filename)
@property
def quota_source_label(self):
return self.quota_source_info.label
@property
def quota_source_info(self):
object_store_id = self.object_store_id
quota_source_map = self.object_store.get_quota_source_map()
return quota_source_map.get_quota_source_info(object_store_id)
@property
def device_source_label(self):
return self.device_source_info.label
@property
def device_source_info(self):
object_store_id = self.object_store_id
device_source_map = self.object_store.get_quota_source_map()
return device_source_map.get_device_source_info(object_store_id)
[docs] def set_file_name(self, filename):
if not filename:
self.external_filename = None
else:
self.external_filename = filename
def _assert_object_store_set(self):
assert self.object_store is not None, f"Object Store has not been initialized for dataset {self.id}"
return self.object_store
[docs] def get_extra_files_path(self):
# Unlike get_file_name - external_extra_files_path is not backed by an
# actual database column so if SA instantiates this object - the
# attribute won't exist yet.
if not getattr(self, "external_extra_files_path", None):
if self.object_store.exists(self, dir_only=True, extra_dir=self._extra_files_rel_path):
return self.object_store.get_filename(self, dir_only=True, extra_dir=self._extra_files_rel_path)
return self.object_store.construct_path(
self, dir_only=True, extra_dir=self._extra_files_rel_path, in_cache=True
)
else:
return os.path.abspath(self.external_extra_files_path)
[docs] def create_extra_files_path(self):
if not self.extra_files_path_exists():
self.object_store.create(self, dir_only=True, extra_dir=self._extra_files_rel_path)
[docs] def set_extra_files_path(self, extra_files_path):
if not extra_files_path:
self.external_extra_files_path = None
else:
self.external_extra_files_path = extra_files_path
extra_files_path = property(get_extra_files_path, set_extra_files_path)
[docs] def extra_files_path_exists(self):
return self.object_store.exists(self, extra_dir=self._extra_files_rel_path, dir_only=True)
@property
def store_by(self):
store_by = self.object_store.get_store_by(self)
return store_by
[docs] def extra_files_path_name_from(self, object_store):
if (store_by := self.store_by) is not None:
return f"dataset_{getattr(self, store_by)}_files"
else:
return None
@property
def extra_files_path_name(self):
return self.extra_files_path_name_from(self.object_store)
@property
def _extra_files_rel_path(self):
return self._extra_files_path or self.extra_files_path_name
def _calculate_size(self) -> int:
if self.external_filename:
try:
return os.path.getsize(self.external_filename)
except OSError:
return 0
assert self.object_store
return self.object_store.size(self)
@overload
def get_size(self, nice_size: Literal[False], calculate_size: bool = True) -> int: ...
@overload
def get_size(self, nice_size: Literal[True], calculate_size: bool = True) -> str: ...
[docs] def get_size(self, nice_size: bool = False, calculate_size: bool = True) -> Union[int, str]:
"""Returns the size of the data on disk"""
if self.file_size:
if nice_size:
return galaxy.util.nice_size(self.file_size)
else:
return self.file_size
elif calculate_size:
# Hopefully we only reach this branch in sessionless mode
if nice_size:
return galaxy.util.nice_size(self._calculate_size())
else:
return self._calculate_size()
else:
return self.file_size or 0
[docs] def set_size(self, no_extra_files=False):
"""Sets the size of the data on disk.
If the caller is sure there are no extra files, pass no_extra_files as True to optimize subsequent
calls to get_total_size or set_total_size - potentially avoiding both a database flush and check against
the file system.
"""
if not self.file_size:
self.file_size = self._calculate_size()
if no_extra_files:
self.total_size = self.file_size
[docs] def get_total_size(self):
if self.total_size is not None:
return self.total_size
# for backwards compatibility, set if unset
self.set_total_size()
db_session = object_session(self)
with transaction(db_session):
db_session.commit()
return self.total_size
[docs] def set_total_size(self):
if self.file_size is None:
self.set_size()
self.total_size = self.file_size or 0
if (rel_path := self._extra_files_rel_path) is not None:
if self.object_store.exists(self, extra_dir=rel_path, dir_only=True):
for root, _, files in os.walk(self.extra_files_path):
self.total_size += sum(
os.path.getsize(os.path.join(root, file))
for file in files
if os.path.exists(os.path.join(root, file))
)
return self.total_size
[docs] def has_data(self):
"""Detects whether there is any data"""
return not self.is_new and self.get_size() > 0
# FIXME: sqlalchemy will replace this
def _delete(self):
"""Remove the file that corresponds to this data"""
self.object_store.delete(self)
@property
def user_can_purge(self):
return (
self.purged is False
and not bool(self.library_associations)
and len(self.history_associations) == len(self.purged_history_associations)
)
[docs] def full_delete(self):
"""Remove the file and extra files, marks deleted and purged"""
# os.unlink( self.file_name )
try:
self.object_store.delete(self)
except galaxy.exceptions.ObjectNotFound:
pass
if (rel_path := self._extra_files_rel_path) is not None:
if self.object_store.exists(self, extra_dir=rel_path, dir_only=True):
self.object_store.delete(self, entire_dir=True, extra_dir=rel_path, dir_only=True)
# TODO: purge metadata files
self.deleted = True
self.purged = True
[docs] def get_access_roles(self, security_agent):
roles = []
for dp in self.actions:
if dp.action == security_agent.permitted_actions.DATASET_ACCESS.action:
roles.append(dp.role)
return roles
[docs] def get_manage_permissions_roles(self, security_agent):
roles = []
for dp in self.actions:
if dp.action == security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action:
roles.append(dp.role)
return roles
[docs] def has_manage_permissions_roles(self, security_agent):
for dp in self.actions:
if dp.action == security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action:
return True
return False
def _serialize(self, id_encoder, serialization_options):
# serialize Dataset objects only for jobs that can actually modify these models.
assert serialization_options.serialize_dataset_objects
def to_int(n) -> Optional[int]:
return int(n) if n is not None else None
rval = dict_for(
self,
state=self.state,
deleted=self.deleted,
purged=self.purged,
external_filename=self.external_filename,
_extra_files_path=self._extra_files_path,
file_size=to_int(self.file_size),
object_store_id=self.object_store_id,
total_size=to_int(self.total_size),
created_from_basename=self.created_from_basename,
uuid=str(self.uuid or "") or None,
hashes=list(map(lambda h: h.serialize(id_encoder, serialization_options), self.hashes)),
sources=list(map(lambda s: s.serialize(id_encoder, serialization_options), self.sources)),
)
serialization_options.attach_identifier(id_encoder, self, rval)
return rval
[docs]class DatasetSource(Base, Dictifiable, Serializable):
__tablename__ = "dataset_source"
id = Column(Integer, primary_key=True)
dataset_id = Column(Integer, ForeignKey("dataset.id"), index=True)
source_uri = Column(TEXT)
extra_files_path = Column(TEXT)
transform = Column(MutableJSONType)
dataset = relationship("Dataset", back_populates="sources")
hashes = relationship("DatasetSourceHash", back_populates="source")
dict_collection_visible_keys = ["id", "source_uri", "extra_files_path", "transform"]
dict_element_visible_keys = [
"id",
"source_uri",
"extra_files_path",
"transform",
] # TODO: implement to_dict and add hashes...
def _serialize(self, id_encoder, serialization_options):
rval = dict_for(
self,
source_uri=self.source_uri,
extra_files_path=self.extra_files_path,
transform=self.transform,
hashes=[h.serialize(id_encoder, serialization_options) for h in self.hashes],
)
serialization_options.attach_identifier(id_encoder, self, rval)
return rval
[docs] def copy(self) -> "DatasetSource":
new_source = DatasetSource()
new_source.source_uri = self.source_uri
new_source.extra_files_path = self.extra_files_path
new_source.transform = self.transform
new_source.hashes = [h.copy() for h in self.hashes]
return new_source
[docs]class DatasetSourceHash(Base, Serializable):
__tablename__ = "dataset_source_hash"
id = Column(Integer, primary_key=True)
dataset_source_id = Column(Integer, ForeignKey("dataset_source.id"), index=True)
hash_function = Column(TEXT)
hash_value = Column(TEXT)
source = relationship("DatasetSource", back_populates="hashes")
def _serialize(self, id_encoder, serialization_options):
rval = dict_for(
self,
hash_function=self.hash_function,
hash_value=self.hash_value,
)
serialization_options.attach_identifier(id_encoder, self, rval)
return rval
[docs] def copy(self) -> "DatasetSourceHash":
new_hash = DatasetSourceHash()
new_hash.hash_function = self.hash_function
new_hash.hash_value = self.hash_value
return new_hash
[docs]class DatasetHash(Base, Dictifiable, Serializable):
__tablename__ = "dataset_hash"
id = Column(Integer, primary_key=True)
dataset_id = Column(Integer, ForeignKey("dataset.id"), index=True)
hash_function = Column(TEXT)
hash_value = Column(TEXT)
extra_files_path = Column(TEXT)
dataset = relationship("Dataset", back_populates="hashes")
dict_collection_visible_keys = ["id", "hash_function", "hash_value", "extra_files_path"]
dict_element_visible_keys = ["id", "hash_function", "hash_value", "extra_files_path"]
def _serialize(self, id_encoder, serialization_options):
rval = dict_for(
self,
hash_function=self.hash_function,
hash_value=self.hash_value,
extra_files_path=self.extra_files_path,
)
serialization_options.attach_identifier(id_encoder, self, rval)
return rval
[docs] def copy(self) -> "DatasetHash":
new_hash = DatasetHash()
new_hash.hash_function = self.hash_function
new_hash.hash_value = self.hash_value
new_hash.extra_files_path = self.extra_files_path
return new_hash
[docs]def datatype_for_extension(extension, datatypes_registry=None) -> "Data":
if extension is not None:
extension = extension.lower()
if datatypes_registry is None:
datatypes_registry = _get_datatypes_registry()
if not extension or extension == "auto" or extension == "_sniff_":
extension = "data"
ret = datatypes_registry.get_datatype_by_extension(extension)
if ret is None:
log.warning(f"Datatype class not found for extension '{extension}'")
return datatypes_registry.get_datatype_by_extension("data")
return ret
[docs]class DatasetInstance(RepresentById, UsesCreateAndUpdateTime, _HasTable):
"""A base class for all 'dataset instances', HDAs, LDAs, etc"""
states = Dataset.states
_state: Optional[str]
conversion_messages = Dataset.conversion_messages
permitted_actions = Dataset.permitted_actions
purged: bool
creating_job_associations: List[Union[JobToOutputDatasetCollectionAssociation, JobToOutputDatasetAssociation]]
copied_from_history_dataset_association: Optional["HistoryDatasetAssociation"]
copied_from_library_dataset_dataset_association: Optional["LibraryDatasetDatasetAssociation"]
implicitly_converted_datasets: List["ImplicitlyConvertedDatasetAssociation"]
validated_states = DatasetValidatedState
[docs] def __init__(
self,
id=None,
hid=None,
name=None,
info=None,
blurb=None,
peek=None,
tool_version=None,
extension=None,
dbkey=None,
metadata=None,
history=None,
dataset=None,
deleted=False,
designation=None,
parent_id=None,
validated_state=DatasetValidatedState.UNKNOWN,
validated_state_message=None,
visible=True,
create_dataset=False,
sa_session=None,
extended_metadata=None,
flush=True,
metadata_deferred=False,
creating_job_id=None,
copied_from_history_dataset_association=None,
copied_from_library_dataset_dataset_association=None,
):
self.name = name or "Unnamed dataset"
self.id = id
self.info = info
self.blurb = blurb
self.peek = peek
self.tool_version = tool_version
self.extension = extension
self.designation = designation
# set private variable to None here, since the attribute may be needed in by MetadataCollection.__init__
self._metadata = None
self.metadata = metadata or dict()
self.metadata_deferred = metadata_deferred
self.extended_metadata = extended_metadata
if (
dbkey
): # dbkey is stored in metadata, only set if non-zero, or else we could clobber one supplied by input 'metadata'
self._metadata["dbkey"] = listify(dbkey)
self.deleted = deleted
self.visible = visible
self.validated_state = validated_state
self.validated_state_message = validated_state_message
# Relationships
if copied_from_history_dataset_association:
self.copied_from_history_dataset_association_id = copied_from_history_dataset_association.id
if copied_from_library_dataset_dataset_association:
self.copied_from_library_dataset_dataset_association_id = copied_from_library_dataset_dataset_association.id
if not dataset and create_dataset:
# Had to pass the sqlalchemy session in order to create a new dataset
dataset = Dataset(state=Dataset.states.NEW)
dataset.job_id = creating_job_id
if flush:
sa_session.add(dataset)
with transaction(sa_session):
sa_session.commit()
elif dataset:
add_object_to_object_session(self, dataset)
self.dataset = dataset
ensure_object_added_to_session(self, object_in_session=dataset)
self.parent_id = parent_id
@property
def peek(self):
return self._peek
@peek.setter
def peek(self, peek):
self._peek = unicodify(peek, strip_null=True)
@property
def ext(self):
return self.extension
@property
def has_deferred_data(self):
return self.dataset.state == Dataset.states.DEFERRED
@property
def state(self):
# self._state holds state that should only affect this particular dataset association, not the dataset state itself
if self._state:
return self._state
return self.dataset.state
@state.setter
def state(self, state: Optional[DatasetState]):
if state != self.state:
if state in (DatasetState.FAILED_METADATA, DatasetState.SETTING_METADATA):
self._state = state
else:
self.set_metadata_success_state()
sa_session = object_session(self)
if sa_session:
sa_session.add(self.dataset)
self.dataset.state = state
object_store_id = property(get_object_store_id)
quota_source_label = property(get_quota_source_label)
[docs] def get_file_name(self, sync_cache=True) -> str:
if self.dataset.purged:
return ""
return self.dataset.get_file_name(sync_cache=sync_cache)
[docs] def link_to(self, path):
self.dataset.set_file_name(os.path.abspath(path))
# Since we are not copying the file into Galaxy's managed
# default file location, the dataset should never be purgable.
self.dataset.purgable = False
@property
def extra_files_path(self):
return self.dataset.extra_files_path
@property
def datatype(self) -> "Data":
return datatype_for_extension(self.extension)
[docs] def get_metadata(self):
# using weakref to store parent (to prevent circ ref),
# does a Session.clear() cause parent to be invalidated, while still copying over this non-database attribute?
if not hasattr(self, "_metadata_collection") or self._metadata_collection.parent != self:
self._metadata_collection = galaxy.model.metadata.MetadataCollection(self)
return self._metadata_collection
@property
def set_metadata_requires_flush(self):
return self.metadata.requires_dataset_id
[docs] def set_metadata(self, bunch):
# Needs to accept a MetadataCollection, a bunch, or a dict
self._metadata = self.metadata.make_dict_copy(bunch)
metadata = property(get_metadata, set_metadata)
@property
def has_metadata_files(self):
return len(self.metadata_file_types) > 0
@property
def metadata_file_types(self):
meta_types = []
for meta_type in self.metadata.spec.keys():
if isinstance(self.metadata.spec[meta_type].param, galaxy.model.metadata.FileParameter):
meta_types.append(meta_type)
return meta_types
[docs] def get_metadata_file_paths_and_extensions(self) -> List[Tuple[str, str]]:
metadata = self.metadata
metadata_files = []
for metadata_name in self.metadata_file_types:
file_ext = metadata.spec[metadata_name].file_ext
metadata_file = metadata[metadata_name]
if metadata_file:
path = metadata_file.get_file_name()
metadata_files.append((file_ext, path))
return metadata_files
# This provide backwards compatibility with using the old dbkey
# field in the database. That field now maps to "old_dbkey" (see mapping.py).
[docs] def get_dbkey(self):
dbkey = self.metadata.dbkey
if not isinstance(dbkey, list):
dbkey = [dbkey]
if dbkey in [[None], []]:
return "?"
return dbkey[0]
[docs] def set_dbkey(self, value):
if "dbkey" in self.datatype.metadata_spec:
if not isinstance(value, list):
self.metadata.dbkey = [value]
dbkey = property(get_dbkey, set_dbkey)
[docs] def ok_to_edit_metadata(self):
"""
Prevent modifying metadata when dataset is queued or running as input/output:
return `False` if there exists an associated job with a non-terminal state.
"""
def exists_clause(assoc_model):
return (
select(assoc_model.job_id)
.join(Job)
.where(assoc_model.dataset_id == self.id)
.where(Job.state.not_in(Job.terminal_states))
.exists()
)
stmt = select(
or_(
exists_clause(JobToInputDatasetAssociation),
exists_clause(JobToOutputDatasetAssociation),
)
)
return not object_session(self).scalar(stmt)
[docs] def change_datatype(self, new_ext):
self.clear_associated_files()
_get_datatypes_registry().change_datatype(self, new_ext)
[docs] def get_size(self, nice_size=False, calculate_size=True):
"""Returns the size of the data on disk"""
if nice_size:
return galaxy.util.nice_size(self.dataset.get_size(calculate_size=calculate_size))
return self.dataset.get_size(calculate_size=calculate_size)
[docs] def set_size(self, **kwds):
"""Sets and gets the size of the data on disk"""
return self.dataset.set_size(**kwds)
[docs] def set_created_from_basename(self, created_from_basename):
if self.dataset.created_from_basename is not None:
raise Exception("Underlying dataset already has a created_from_basename set.")
self.dataset.created_from_basename = created_from_basename
created_from_basename = property(get_created_from_basename, set_created_from_basename)
@property
def sources(self):
return self.dataset.sources
@property
def hashes(self):
return self.dataset.hashes
[docs] def get_mime(self):
"""Returns the mime type of the data"""
try:
return _get_datatypes_registry().get_mimetype_by_extension(self.extension.lower())
except AttributeError:
# extension is None
return "data"
[docs] def set_peek(self, line_count=None, **kwd):
try:
# Certain datatype's set_peek methods contain a line_count argument
return self.datatype.set_peek(self, line_count=line_count, **kwd)
except TypeError:
# ... and others don't
return self.datatype.set_peek(self, **kwd)
except Exception:
# Never fail peek setting, but do log exception so datatype logic can be fixed
log.exception("Setting peek failed")
[docs] def init_meta(self, copy_from=None):
return self.datatype.init_meta(self, copy_from=copy_from)
[docs] def set_meta(self, **kwd):
self.clear_associated_files(metadata_safe=True)
return self.datatype.set_meta(self, **kwd)
[docs] def as_display_type(self, type, **kwd):
return self.datatype.as_display_type(self, type, **kwd)
[docs] def display_peek(self):
try:
return self.datatype.display_peek(self)
except Exception:
log.exception("Error occurred while generating dataset peek")
return None
[docs] def get_converted_files_by_type(self, file_type):
for assoc in self.implicitly_converted_datasets:
if not assoc.deleted and assoc.type == file_type:
item = assoc.dataset or assoc.dataset_ldda
if not item.deleted and item.state in Dataset.valid_input_states:
return item
return None
[docs] def get_converted_dataset_deps(self, trans, target_ext):
"""
Returns dict of { "dependency" => HDA }
"""
# List of string of dependencies
try:
depends_list = trans.app.datatypes_registry.converter_deps[self.extension][target_ext]
except KeyError:
depends_list = []
return {dep: self.get_converted_dataset(trans, dep) for dep in depends_list}
[docs] def get_converted_dataset(self, trans, target_ext, target_context=None, history=None):
"""
Return converted dataset(s) if they exist, along with a dict of dependencies.
If not converted yet, do so and return None (the first time). If unconvertible, raise exception.
"""
# See if we can convert the dataset
if target_ext not in self.get_converter_types():
raise NoConverterException(f"Conversion from '{self.extension}' to '{target_ext}' not possible")
# See if converted dataset already exists, either in metadata in conversions.
converted_dataset = self.get_metadata_dataset(target_ext)
if converted_dataset:
return converted_dataset
converted_dataset = self.get_converted_files_by_type(target_ext)
if converted_dataset:
return converted_dataset
deps = {}
# List of string of dependencies
try:
depends_list = trans.app.datatypes_registry.converter_deps[self.extension][target_ext]
except KeyError:
depends_list = []
# Conversion is possible but hasn't been done yet, run converter.
# Check if we have dependencies
try:
for dependency in depends_list:
dep_dataset = self.get_converted_dataset(trans, dependency)
if dep_dataset is None:
# None means converter is running first time
return None
elif dep_dataset.state == Job.states.ERROR:
raise ConverterDependencyException(f"A dependency ({dependency}) was in an error state.")
elif dep_dataset.state != Job.states.OK:
# Pending
return None
deps[dependency] = dep_dataset
except NoConverterException:
raise NoConverterException(f"A dependency ({dependency}) is missing a converter.")
except KeyError:
pass # No deps
return next(
iter(
self.datatype.convert_dataset(
trans,
self,
target_ext,
return_output=True,
visible=False,
deps=deps,
target_context=target_context,
history=history,
).values()
)
)
[docs] def attach_implicitly_converted_dataset(self, session, new_dataset, target_ext: str):
new_dataset.name = self.name
self.copy_attributes(new_dataset)
assoc = ImplicitlyConvertedDatasetAssociation(
parent=self, file_type=target_ext, dataset=new_dataset, metadata_safe=False
)
session.add(new_dataset)
session.add(assoc)
[docs] def copy_attributes(self, new_dataset):
"""
Copies attributes to a new datasets, used for implicit conversions
"""
[docs] def get_metadata_dataset(self, dataset_ext):
"""
Returns an HDA that points to a metadata file which contains a
converted data with the requested extension.
"""
for name, value in self.metadata.items():
# HACK: MetadataFile objects do not have a type/ext, so need to use metadata name
# to determine type.
if dataset_ext == "bai" and name == "bam_index" and isinstance(value, MetadataFile):
# HACK: MetadataFile objects cannot be used by tools, so return
# a fake HDA that points to metadata file.
fake_dataset = Dataset(state=Dataset.states.OK, external_filename=value.get_file_name())
fake_hda = HistoryDatasetAssociation(dataset=fake_dataset)
return fake_hda
[docs] def clear_associated_files(self, metadata_safe=False, purge=False):
raise Exception("Unimplemented")
[docs] def get_converter_types(self):
return self.datatype.get_converter_types(self, _get_datatypes_registry())
[docs] def find_conversion_destination(
self, accepted_formats: List[str], **kwd
) -> Tuple[bool, Optional[str], Optional["DatasetInstance"]]:
"""Returns ( target_ext, existing converted dataset )"""
return self.datatype.find_conversion_destination(self, accepted_formats, _get_datatypes_registry(), **kwd)
[docs] def add_validation_error(self, validation_error):
self.validation_errors.append(validation_error)
[docs] def extend_validation_errors(self, validation_errors):
self.validation_errors.extend(validation_errors)
@property
def is_ok(self):
return self.state == self.states.OK
@property
def is_pending(self):
"""
Return true if the dataset is neither ready nor in error
"""
return self.state in (
self.states.NEW,
self.states.UPLOAD,
self.states.QUEUED,
self.states.RUNNING,
self.states.SETTING_METADATA,
)
@property
def source_library_dataset(self):
def get_source(dataset):
if isinstance(dataset, LibraryDatasetDatasetAssociation):
if dataset.library_dataset:
return (dataset, dataset.library_dataset)
if dataset.copied_from_library_dataset_dataset_association:
source = get_source(dataset.copied_from_library_dataset_dataset_association)
if source:
return source
if dataset.copied_from_history_dataset_association:
source = get_source(dataset.copied_from_history_dataset_association)
if source:
return source
return (None, None)
return get_source(self)
@property
def source_dataset_chain(self):
def _source_dataset_chain(dataset, lst):
try:
cp_from_ldda = dataset.copied_from_library_dataset_dataset_association
if cp_from_ldda:
lst.append((cp_from_ldda, "(Data Library)"))
return _source_dataset_chain(cp_from_ldda, lst)
except Exception as e:
log.warning(e)
try:
cp_from_hda = dataset.copied_from_history_dataset_association
if cp_from_hda:
lst.append((cp_from_hda, cp_from_hda.history.name))
return _source_dataset_chain(cp_from_hda, lst)
except Exception as e:
log.warning(e)
return lst
return _source_dataset_chain(self, [])
@property
def creating_job(self) -> Optional[Job]:
# TODO this should work with `return self.dataset.job` (revise failing unit tests)
creating_job_associations = None
if self.creating_job_associations:
creating_job_associations = self.creating_job_associations
else:
inherit_chain = self.source_dataset_chain
if inherit_chain:
creating_job_associations = inherit_chain[-1][0].creating_job_associations
if creating_job_associations:
return creating_job_associations[0].job
return None
[docs] def get_display_applications(self, trans):
return self.datatype.get_display_applications_by_dataset(self, trans)
[docs] def get_datasources(self, trans):
"""
Returns datasources for dataset; if datasources are not available
due to indexing, indexing is started. Return value is a dictionary
with entries of type
(<datasource_type> : {<datasource_name>, <indexing_message>}).
"""
data_sources_dict = {}
msg = None
for source_type, source_list in self.datatype.data_sources.items():
data_source = None
if source_type == "data_standalone":
# Nothing to do.
msg = None
data_source = source_list
else:
# Convert.
if isinstance(source_list, str):
source_list = [source_list]
# Loop through sources until viable one is found.
for source in source_list:
msg = self.convert_dataset(trans, source)
# No message or PENDING means that source is viable. No
# message indicates conversion was done and is successful.
if not msg or msg == self.conversion_messages.PENDING:
data_source = source
break
# Store msg.
data_sources_dict[source_type] = {"name": data_source, "message": msg}
return data_sources_dict
[docs] def convert_dataset(self, trans, target_type):
"""
Converts a dataset to the target_type and returns a message indicating
status of the conversion. None is returned to indicate that dataset
was converted successfully.
"""
# Get converted dataset; this will start the conversion if necessary.
try:
converted_dataset = self.get_converted_dataset(trans, target_type)
except NoConverterException:
return self.conversion_messages.NO_CONVERTER
except ConverterDependencyException as dep_error:
return {"kind": self.conversion_messages.ERROR, "message": dep_error.value}
# Check dataset state and return any messages.
msg = None
if converted_dataset and converted_dataset.state == Dataset.states.ERROR:
stmt = select(JobToOutputDatasetAssociation.job_id).filter_by(dataset_id=converted_dataset.id).limit(1)
job_id = trans.sa_session.scalars(stmt).first()
job = trans.sa_session.get(Job, job_id)
msg = {"kind": self.conversion_messages.ERROR, "message": job.stderr}
elif not converted_dataset or converted_dataset.state != Dataset.states.OK:
msg = self.conversion_messages.PENDING
return msg
def _serialize(self, id_encoder, serialization_options):
metadata = _prepare_metadata_for_serialization(id_encoder, serialization_options, self.metadata)
rval = dict_for(
self,
create_time=self.create_time.__str__(),
update_time=self.update_time.__str__(),
name=unicodify(self.name),
info=unicodify(self.info),
blurb=self.blurb,
peek=self.peek,
extension=self.extension,
metadata=metadata,
designation=self.designation,
deleted=self.deleted,
visible=self.visible,
dataset_uuid=(lambda uuid: str(uuid) if uuid else None)(self.dataset.uuid),
validated_state=self.validated_state,
validated_state_message=self.validated_state_message,
)
serialization_options.attach_identifier(id_encoder, self, rval)
return rval
def _handle_serialize_files(self, id_encoder, serialization_options, rval):
if serialization_options.serialize_dataset_objects:
rval["dataset"] = self.dataset.serialize(id_encoder, serialization_options)
else:
serialization_options.serialize_files(self, rval)
file_metadata = {}
dataset = self.dataset
hashes = dataset.hashes
if hashes:
file_metadata["hashes"] = [h.serialize(id_encoder, serialization_options) for h in hashes]
if dataset.created_from_basename is not None:
file_metadata["created_from_basename"] = dataset.created_from_basename
sources = dataset.sources
if sources:
file_metadata["sources"] = [s.serialize(id_encoder, serialization_options) for s in sources]
rval["file_metadata"] = file_metadata
[docs]class HistoryDatasetAssociation(DatasetInstance, HasTags, Dictifiable, UsesAnnotations, HasName, Serializable):
"""
Resource class that creates a relation between a dataset and a user history.
"""
history_id: Optional[int]
def __init__(
self,
hid=None,
history=None,
sa_session=None,
**kwd,
):
"""
Create a a new HDA and associate it with the given history.
"""
# FIXME: sa_session is must be passed to DataSetInstance if the create_dataset
# parameter is True so that the new object can be flushed. Is there a better way?
DatasetInstance.__init__(self, sa_session=sa_session, **kwd)
self.hid = hid
# Relationships
self.history = history
def __strict_check_before_flush__(self):
if self.extension != "len":
# TODO: Custom builds (with .len extension) do not get a history or a HID.
# These should get some other type of permanent storage, perhaps UserDatasetAssociation ?
# Everything else needs to have a hid and a history
if not self.history and not getattr(self, "history_id", None):
raise Exception(f"HistoryDatasetAssociation {self} without history detected, this is not valid")
elif not self.hid:
raise Exception(f"HistoryDatasetAssociation {self} without hid, this is not valid")
elif self.dataset.file_size is None and self.dataset.state not in self.dataset.no_data_states:
raise Exception(
f"HistoryDatasetAssociation {self} in state {self.dataset.state} with null file size, this is not valid"
)
@property
def user(self):
if self.history:
return self.history.user
def __create_version__(self, session):
state = inspect(self)
changes = {}
for attr in state.mapper.columns:
# We only create a new version if columns of the HDA table have changed, and ignore relationships.
hist = state.get_history(attr.key, True)
if not hist.has_changes():
continue
# hist.deleted holds old value(s)
changes[attr.key] = hist.deleted
if changes and self.update_time and self.state == self.states.OK and not self.deleted:
# We only record changes to HDAs that exist in the database and have a update_time
new_values = {}
new_values["name"] = changes.get("name", self.name)
new_values["dbkey"] = changes.get("dbkey", self.dbkey)
new_values["extension"] = changes.get("extension", self.extension)
new_values["extended_metadata_id"] = changes.get("extended_metadata_id", self.extended_metadata_id)
for k, v in new_values.items():
if isinstance(v, list):
new_values[k] = v[0]
new_values["update_time"] = self.update_time
new_values["version"] = self.version or 1
new_values["metadata"] = self._metadata
past_hda = HistoryDatasetAssociationHistory(history_dataset_association_id=self.id, **new_values)
self.version = self.version + 1 if self.version else 1
session.add(past_hda)
[docs] def copy_from(self, other_hda, new_dataset=None, include_tags=True, include_metadata=False):
# This deletes the old dataset, so make sure to only call this on new things
# in the history (e.g. during job finishing).
old_dataset = self.dataset
if include_metadata:
self._metadata = other_hda._metadata
self.metadata_deferred = other_hda.metadata_deferred
self.info = other_hda.info
self.blurb = other_hda.blurb
self.peek = other_hda.peek
self.extension = other_hda.extension
self.designation = other_hda.designation
self.deleted = other_hda.deleted
self.visible = other_hda.visible
self.validated_state = other_hda.validated_state
self.validated_state_message = other_hda.validated_state_message
if include_tags and self.history:
self.copy_tags_from(self.user, other_hda)
self.dataset = new_dataset or other_hda.dataset
if old_dataset:
old_dataset.full_delete()
[docs] def copy(self, parent_id=None, copy_tags=None, flush=True, copy_hid=True, new_name=None):
"""
Create a copy of this HDA.
"""
hid = None
if copy_hid:
hid = self.hid
hda = HistoryDatasetAssociation(
hid=hid,
name=new_name or self.name,
info=self.info,
blurb=self.blurb,
peek=self.peek,
tool_version=self.tool_version,
extension=self.extension,
dbkey=self.dbkey,
dataset=self.dataset,
visible=self.visible,
deleted=self.deleted,
parent_id=parent_id,
copied_from_history_dataset_association=self,
flush=False,
)
# update init non-keywords as well
hda.purged = self.purged
hda.copy_tags_to(copy_tags)
object_session(self).add(hda)
hda.metadata = self.metadata
if flush:
session = object_session(self)
with transaction(session):
session.commit()
return hda
[docs] def copy_tags_to(self, copy_tags=None):
if copy_tags is not None:
if isinstance(copy_tags, dict):
copy_tags = copy_tags.values()
for tag in copy_tags:
copied_tag = tag.copy(cls=HistoryDatasetAssociationTagAssociation)
self.tags.append(copied_tag)
[docs] def copy_attributes(self, new_dataset):
if new_dataset.hid is None:
new_dataset.hid = self.hid
[docs] def to_library_dataset_dataset_association(
self,
trans,
target_folder,
replace_dataset=None,
parent_id=None,
roles=None,
ldda_message="",
element_identifier=None,
):
"""
Copy this HDA to a library optionally replacing an existing LDDA.
"""
if not self.dataset.shareable:
raise Exception("Attempting to share a non-shareable dataset.")
if replace_dataset:
# The replace_dataset param ( when not None ) refers to a LibraryDataset that
# is being replaced with a new version.
library_dataset = replace_dataset
else:
# If replace_dataset is None, the Library level permissions will be taken from the folder and
# applied to the new LibraryDataset, and the current user's DefaultUserPermissions will be applied
# to the associated Dataset.
library_dataset = LibraryDataset(folder=target_folder, name=self.name, info=self.info)
user = trans.user or self.history.user
ldda = LibraryDatasetDatasetAssociation(
name=element_identifier or self.name,
info=self.info,
blurb=self.blurb,
peek=self.peek,
tool_version=self.tool_version,
extension=self.extension,
dbkey=self.dbkey,
dataset=self.dataset,
library_dataset=library_dataset,
visible=self.visible,
deleted=self.deleted,
parent_id=parent_id,
copied_from_history_dataset_association=self,
user=user,
)
library_dataset.library_dataset_dataset_association = ldda
object_session(self).add(library_dataset)
# If roles were selected on the upload form, restrict access to the Dataset to those roles
roles = roles or []
for role in roles:
dp = trans.model.DatasetPermissions(
trans.app.security_agent.permitted_actions.DATASET_ACCESS.action, ldda.dataset, role
)
trans.sa_session.add(dp)
# Must set metadata after ldda flushed, as MetadataFiles require ldda.id
if self.set_metadata_requires_flush:
session = object_session(self)
with transaction(session):
session.commit()
ldda.metadata = self.metadata
# TODO: copy #tags from history
if ldda_message:
ldda.message = ldda_message
if not replace_dataset:
target_folder.add_library_dataset(library_dataset, genome_build=ldda.dbkey)
object_session(self).add(target_folder)
object_session(self).add(library_dataset)
session = object_session(self)
with transaction(session):
session.commit()
return ldda
[docs] def clear_associated_files(self, metadata_safe=False, purge=False):
""" """
# metadata_safe = True means to only clear when assoc.metadata_safe == False
for assoc in self.implicitly_converted_datasets:
if not assoc.deleted and (not metadata_safe or not assoc.metadata_safe):
assoc.clear(purge=purge)
for assoc in self.implicitly_converted_parent_datasets:
assoc.clear(purge=purge, delete_dataset=False)
[docs] def get_access_roles(self, security_agent):
"""
Return The access roles associated with this HDA's dataset.
"""
return self.dataset.get_access_roles(security_agent)
[docs] def purge_usage_from_quota(self, user, quota_source_info):
"""Remove this HDA's quota_amount from user's quota."""
if user and quota_source_info.use:
user.adjust_total_disk_usage(-self.quota_amount(user), quota_source_info.label)
[docs] def quota_amount(self, user):
"""
Return the disk space used for this HDA relevant to user quotas.
If the user has multiple instances of this dataset, it will not affect their
disk usage statistic.
"""
rval = 0
# Anon users are handled just by their single history size.
if not user:
return rval
# Gets an HDA disk usage, if the user does not already
# have an association of the same dataset
if not self.dataset.library_associations and not self.purged and not self.dataset.purged:
for hda in self.dataset.history_associations:
if hda.id == self.id:
continue
if not hda.purged and hda.history and hda.user and hda.user == user:
break
else:
rval += self.get_total_size()
return rval
def _serialize(self, id_encoder, serialization_options):
rval = super()._serialize(id_encoder, serialization_options)
rval["state"] = self.state
rval["hid"] = self.hid
rval["annotation"] = unicodify(getattr(self, "annotation", ""))
rval["tags"] = self.make_tag_string_list()
rval["tool_version"] = self.tool_version
if self.history:
rval["history_encoded_id"] = serialization_options.get_identifier(id_encoder, self.history)
# Handle copied_from_history_dataset_association information...
copied_from_history_dataset_association_chain = []
src_hda = self
while src_hda.copied_from_history_dataset_association:
src_hda = src_hda.copied_from_history_dataset_association
copied_from_history_dataset_association_chain.append(
serialization_options.get_identifier(id_encoder, src_hda)
)
rval["copied_from_history_dataset_association_id_chain"] = copied_from_history_dataset_association_chain
self._handle_serialize_files(id_encoder, serialization_options, rval)
return rval
[docs] def to_dict(self, view="collection", expose_dataset_path=False):
"""
Return attributes of this HDA that are exposed using the API.
"""
# Since this class is a proxy to rather complex attributes we want to
# display in other objects, we can't use the simpler method used by
# other model classes.
original_rval = super().to_dict(view=view)
hda = self
rval = dict(
id=hda.id,
hda_ldda="hda",
uuid=(lambda uuid: str(uuid) if uuid else None)(hda.dataset.uuid),
hid=hda.hid,
file_ext=hda.ext,
peek=unicodify(hda.display_peek()) if hda.peek and hda.peek != "no peek" else None,
model_class=self.__class__.__name__,
name=hda.name,
deleted=hda.deleted,
purged=hda.purged,
visible=hda.visible,
state=hda.state,
history_content_type=hda.history_content_type,
file_size=int(hda.get_size()),
create_time=hda.create_time.isoformat(),
update_time=hda.update_time.isoformat(),
data_type=f"{hda.datatype.__class__.__module__}.{hda.datatype.__class__.__name__}",
genome_build=hda.dbkey,
validated_state=hda.validated_state,
validated_state_message=hda.validated_state_message,
misc_info=hda.info.strip() if isinstance(hda.info, str) else hda.info,
misc_blurb=hda.blurb,
)
rval.update(original_rval)
if hda.copied_from_library_dataset_dataset_association is not None:
rval["copied_from_ldda_id"] = hda.copied_from_library_dataset_dataset_association.id
if hda.history is not None:
rval["history_id"] = hda.history.id
if hda.extended_metadata is not None:
rval["extended_metadata"] = hda.extended_metadata.data
for name in hda.metadata.spec.keys():
val = hda.metadata.get(name)
if isinstance(val, MetadataFile):
# only when explicitly set: fetching filepaths can be expensive
if not expose_dataset_path:
continue
val = val.get_file_name()
# If no value for metadata, look in datatype for metadata.
elif not hda.metadata.element_is_set(name) and hasattr(hda.datatype, name):
val = getattr(hda.datatype, name)
rval[f"metadata_{name}"] = val
return rval
[docs] def unpause_dependent_jobs(self, jobs=None):
if self.state == self.states.PAUSED:
self.state = self.states.NEW
self.info = None
jobs_to_unpause = jobs or set()
for jtida in self.dependent_jobs:
if jtida.job not in jobs_to_unpause:
jobs_to_unpause.add(jtida.job)
for jtoda in jtida.job.output_datasets:
jobs_to_unpause.update(jtoda.dataset.unpause_dependent_jobs(jobs=jobs_to_unpause))
return jobs_to_unpause
@property
def history_content_type(self):
return "dataset"
# TODO: down into DatasetInstance
content_type = "dataset"
@hybrid.hybrid_property
def type_id(self):
return "-".join((self.content_type, str(self.id)))
@type_id.expression # type: ignore[no-redef]
def type_id(cls):
return (type_coerce(cls.content_type, Unicode) + "-" + type_coerce(cls.id, Unicode)).label("type_id")
[docs]class HistoryDatasetAssociationHistory(Base, Serializable):
__tablename__ = "history_dataset_association_history"
id = Column(Integer, primary_key=True)
history_dataset_association_id = Column(Integer, ForeignKey("history_dataset_association.id"), index=True)
update_time = Column(DateTime, default=now)
version = Column(Integer)
name = Column(TrimmedString(255))
extension = Column(TrimmedString(64))
_metadata = Column("metadata", MetadataType)
extended_metadata_id = Column(Integer, ForeignKey("extended_metadata.id"), index=True)
def __init__(
self,
history_dataset_association_id,
name,
dbkey,
update_time,
version,
extension,
extended_metadata_id,
metadata,
):
self.history_dataset_association_id = history_dataset_association_id
self.name = name
self.dbkey = dbkey
self.update_time = update_time
self.version = version
self.extension = extension
self.extended_metadata_id = extended_metadata_id
self._metadata = metadata
# hda read access permission given by a user to a specific site (gen. for external display applications)
[docs]class HistoryDatasetAssociationDisplayAtAuthorization(Base, RepresentById):
__tablename__ = "history_dataset_association_display_at_authorization"
id = Column(Integer, primary_key=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, index=True, default=now, onupdate=now)
history_dataset_association_id = Column(Integer, ForeignKey("history_dataset_association.id"), index=True)
user_id = Column(Integer, ForeignKey("galaxy_user.id"), index=True)
site = Column(TrimmedString(255))
history_dataset_association = relationship("HistoryDatasetAssociation")
user = relationship("User")
def __init__(self, hda=None, user=None, site=None):
self.history_dataset_association = hda
self.user = user
self.site = site
[docs]class HistoryDatasetAssociationSubset(Base, RepresentById):
__tablename__ = "history_dataset_association_subset"
id = Column(Integer, primary_key=True)
history_dataset_association_id = Column(Integer, ForeignKey("history_dataset_association.id"), index=True)
history_dataset_association_subset_id = Column(Integer, ForeignKey("history_dataset_association.id"), index=True)
location = Column(Unicode(255), index=True)
hda = relationship(
"HistoryDatasetAssociation",
primaryjoin=(
lambda: HistoryDatasetAssociationSubset.history_dataset_association_id == HistoryDatasetAssociation.id
),
)
subset = relationship(
"HistoryDatasetAssociation",
primaryjoin=(
lambda: HistoryDatasetAssociationSubset.history_dataset_association_subset_id
== HistoryDatasetAssociation.id
),
)
def __init__(self, hda, subset, location):
self.hda = hda
self.subset = subset
self.location = location
[docs]class Library(Base, Dictifiable, HasName, Serializable):
__tablename__ = "library"
id = Column(Integer, primary_key=True)
root_folder_id = Column(Integer, ForeignKey("library_folder.id"), index=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
name = Column(String(255), index=True)
deleted = Column(Boolean, index=True, default=False)
purged = Column(Boolean, index=True, default=False)
description = Column(TEXT)
synopsis = Column(TEXT)
root_folder = relationship("LibraryFolder", back_populates="library_root")
actions = relationship("LibraryPermissions", back_populates="library", cascade_backrefs=False)
permitted_actions = get_permitted_actions(filter="LIBRARY")
dict_collection_visible_keys = ["id", "name"]
dict_element_visible_keys = ["id", "deleted", "name", "description", "synopsis", "root_folder_id", "create_time"]
def __init__(self, name=None, description=None, synopsis=None, root_folder=None):
self.name = name or "Unnamed library"
self.description = description
self.synopsis = synopsis
self.root_folder = root_folder
def _serialize(self, id_encoder, serialization_options):
rval = dict_for(
self,
name=self.name,
description=self.description,
synopsis=self.synopsis,
)
if self.root_folder:
rval["root_folder"] = self.root_folder.serialize(id_encoder, serialization_options)
serialization_options.attach_identifier(id_encoder, self, rval)
return rval
[docs] def to_dict(self, view="collection", value_mapper=None):
"""
We prepend an F to folders.
"""
rval = super().to_dict(view=view, value_mapper=value_mapper)
return rval
[docs] def get_active_folders(self, folder, folders=None):
# TODO: should we make sure the library is not deleted?
def sort_by_attr(seq, attr):
"""
Sort the sequence of objects by object's attribute
Arguments:
seq - the list or any sequence (including immutable one) of objects to sort.
attr - the name of attribute to sort by
"""
# Use the "Schwartzian transform"
# Create the auxiliary list of tuples where every i-th tuple has form
# (seq[i].attr, i, seq[i]) and sort it. The second item of tuple is needed not
# only to provide stable sorting, but mainly to eliminate comparison of objects
# (which can be expensive or prohibited) in case of equal attribute values.
intermed = [(getattr(v, attr), i, v) for i, v in enumerate(seq)]
intermed.sort()
return [_[-1] for _ in intermed]
if folders is None:
active_folders = [folder]
for active_folder in folder.active_folders:
active_folders.extend(self.get_active_folders(active_folder, folders))
return sort_by_attr(active_folders, "id")
[docs] def get_access_roles(self, security_agent):
roles = []
for lp in self.actions:
if lp.action == security_agent.permitted_actions.LIBRARY_ACCESS.action:
roles.append(lp.role)
return roles
[docs]class LibraryFolder(Base, Dictifiable, HasName, Serializable):
__tablename__ = "library_folder"
__table_args__ = (Index("ix_library_folder_name", "name", mysql_length=200),)
id = Column(Integer, primary_key=True)
parent_id = Column(Integer, ForeignKey("library_folder.id"), nullable=True, index=True)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
name = Column(TEXT)
description = Column(TEXT)
order_id = Column(Integer) # not currently being used, but for possible future use
item_count = Column(Integer)
deleted = Column(Boolean, index=True, default=False)
purged = Column(Boolean, index=True, default=False)
genome_build = Column(TrimmedString(40))
folders = relationship(
"LibraryFolder",
primaryjoin=(lambda: LibraryFolder.id == LibraryFolder.parent_id),
order_by=asc(name),
back_populates="parent",
)
parent = relationship("LibraryFolder", back_populates="folders", remote_side=[id])
active_folders = relationship(
"LibraryFolder",
primaryjoin=("and_(LibraryFolder.parent_id == LibraryFolder.id, not_(LibraryFolder.deleted))"),
order_by=asc(name),
# """sqlalchemy.exc.ArgumentError: Error creating eager relationship 'active_folders'
# on parent class '<class 'galaxy.model.LibraryFolder'>' to child class '<class 'galaxy.model.LibraryFolder'>':
# Cant use eager loading on a self referential relationship."""
# TODO: This is no longer the case. Fix this: https://docs.sqlalchemy.org/en/14/orm/self_referential.html#configuring-self-referential-eager-loading
viewonly=True,
)
datasets = relationship(
"LibraryDataset",
primaryjoin=(
lambda: LibraryDataset.folder_id == LibraryFolder.id
and LibraryDataset.library_dataset_dataset_association_id.isnot(None)
),
order_by=(lambda: asc(LibraryDataset._name)),
viewonly=True,
)
active_datasets = relationship(
"LibraryDataset",
primaryjoin=(
"and_(LibraryDataset.folder_id == LibraryFolder.id, not_(LibraryDataset.deleted), LibraryDataset.library_dataset_dataset_association_id.isnot(None))"
),
order_by=(lambda: asc(LibraryDataset._name)),
viewonly=True,
)
library_root = relationship("Library", back_populates="root_folder")
actions = relationship("LibraryFolderPermissions", back_populates="folder", cascade_backrefs=False)
dict_element_visible_keys = [
"id",
"parent_id",
"name",
"description",
"item_count",
"genome_build",
"update_time",
"deleted",
]
def __init__(self, name=None, description=None, item_count=0, order_id=None, genome_build=None):
self.name = name or "Unnamed folder"
self.description = description
self.item_count = item_count
self.order_id = order_id
self.genome_build = genome_build
[docs] def add_library_dataset(self, library_dataset, genome_build=None):
library_dataset.folder_id = self.id
library_dataset.order_id = self.item_count
self.item_count += 1
if genome_build not in [None, "?"]:
self.genome_build = genome_build
[docs] def add_folder(self, folder):
folder.parent_id = self.id
folder.order_id = self.item_count
self.item_count += 1
@property
def activatable_library_datasets(self):
# This needs to be a list
return [
ld
for ld in self.datasets
if ld.library_dataset_dataset_association and not ld.library_dataset_dataset_association.dataset.deleted
]
def _serialize(self, id_encoder, serialization_options):
rval = dict_for(
self,
id=self.id, # FIXME: serialize only in sessionless export mode
name=self.name,
description=self.description,
genome_build=self.genome_build,
item_count=self.item_count,
order_id=self.order_id,
# update_time=self.update_time,
deleted=self.deleted,
)
folders = []
for folder in self.folders:
folders.append(folder.serialize(id_encoder, serialization_options))
rval["folders"] = folders
datasets = []
for dataset in self.datasets:
datasets.append(dataset.serialize(id_encoder, serialization_options))
rval["datasets"] = datasets
serialization_options.attach_identifier(id_encoder, self, rval)
return rval
[docs] def to_dict(self, view="collection", value_mapper=None):
rval = super().to_dict(view=view, value_mapper=value_mapper)
rval["library_path"] = self.library_path
rval["parent_library_id"] = self.parent_library.id
return rval
@property
def library_path(self):
l_path = []
f = self
while f.parent:
l_path.insert(0, f.name)
f = f.parent
return l_path
@property
def parent_library(self):
f = self
while f.parent:
f = f.parent
return f.library_root[0]
[docs]class LibraryDataset(Base, Serializable):
__tablename__ = "library_dataset"
id = Column(Integer, primary_key=True)
# current version of dataset, if null, there is not a current version selected
library_dataset_dataset_association_id = Column(
Integer,
ForeignKey(
"library_dataset_dataset_association.id", use_alter=True, name="library_dataset_dataset_association_id_fk"
),
nullable=True,
index=True,
)
folder_id = Column(Integer, ForeignKey("library_folder.id"), index=True)
# not currently being used, but for possible future use
order_id = Column(Integer)
create_time = Column(DateTime, default=now)
update_time = Column(DateTime, default=now, onupdate=now)
# when not None/null this will supercede display in library (but not when imported into user's history?)
_name = Column("name", TrimmedString(255), index=True)
# when not None/null this will supercede display in library (but not when imported into user's history?)
_info = Column("info", TrimmedString(255))
deleted = Column(Boolean, index=True, default=False)
purged = Column(Boolean, index=True, default=False)
folder = relationship("LibraryFolder")
library_dataset_dataset_association = relationship(
"LibraryDatasetDatasetAssociation", foreign_keys=library_dataset_dataset_association_id, post_update=True
)
expired_datasets = relationship(
"LibraryDatasetDatasetAssociation",
foreign_keys=[id, library_dataset_dataset_association_id],
primaryjoin=(
"and_(LibraryDataset.id == LibraryDatasetDatasetAssociation.library_dataset_id, \
not_(LibraryDataset.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.id))"
),
viewonly=True,
uselist=True,
)
actions = relationship("LibraryDatasetPermissions", back_populates="library_dataset", cascade_backrefs=False)
# This class acts as a proxy to the currently selected LDDA
upload_options = [
("upload_file", "Upload files"),
("upload_directory", "Upload directory of files"),
("upload_paths", "Upload files from filesystem paths"),
("import_from_history", "Import datasets from your current history"),
]
[docs] def get_info(self):
if self.library_dataset_dataset_association:
return self.library_dataset_dataset_association.info
elif self._info:
return self._info
else:
return "no info"
info = property(get_info, set_info)
[docs] def get_name(self):
if self.library_dataset_dataset_association:
return self.library_dataset_dataset_association.name
elif self._name:
return self._name
else:
return "Unnamed dataset"
name = property(get_name, set_name)
def _serialize(self, id_encoder, serialization_options):
rval = dict_for(
self,
name=self.name,
info=self.info,
order_id=self.order_id,
ldda=self.library_dataset_dataset_association.serialize(id_encoder, serialization_options, for_link=True),
)
serialization_options.attach_identifier(id_encoder, self, rval)
return rval
[docs] def to_dict(self, view="collection"):
# Since this class is a proxy to rather complex attributes we want to
# display in other objects, we can't use the simpler method used by
# other model classes.
ldda = self.library_dataset_dataset_association
rval = dict(
id=self.id,
ldda_id=ldda.id,
parent_library_id=self.folder.parent_library.id,
folder_id=self.folder_id,
model_class=self.__class__.__name__,
state=ldda.state,
name=ldda.name,
file_name=ldda.get_file_name(),
created_from_basename=ldda.created_from_basename,
uploaded_by=ldda.user and ldda.user.email,
message=ldda.message,
date_uploaded=ldda.create_time.isoformat(),
update_time=ldda.update_time.isoformat(),
file_size=int(ldda.get_size()),
file_ext=ldda.ext,
data_type=f"{ldda.datatype.__class__.__module__}.{ldda.datatype.__class__.__name__}",
genome_build=ldda.dbkey,
misc_info=ldda.info,
misc_blurb=ldda.blurb,
peek=(lambda ldda: ldda.display_peek() if ldda.peek and ldda.peek != "no peek" else None)(ldda),
)
if ldda.dataset.uuid is None:
rval["uuid"] = None
else:
rval["uuid"] = str(ldda.dataset.uuid)
for name in ldda.metadata.spec.keys():
val = ldda.metadata.get(name)
if isinstance(val, MetadataFile):
val = val.get_file_name()
elif isinstance(val, list):
val = ", ".join(str(v) for v in val)
rval[f"metadata_{name}"] = val
return rval
[docs]class LibraryDatasetDatasetAssociation(DatasetInstance, HasName, Serializable):
def __init__(
self,
library_dataset=None,
user=None,
sa_session=None,
**kwd,
):
# FIXME: sa_session is must be passed to DataSetInstance if the create_dataset
# parameter in kwd is True so that the new object can be flushed. Is there a better way?
DatasetInstance.__init__(self, sa_session=sa_session, **kwd)
self.library_dataset = library_dataset
self.user = user
[docs] def to_history_dataset_association(self, target_history, parent_id=None, add_to_history=False, visible=None):
sa_session = object_session(self)
hda = HistoryDatasetAssociation(
name=self.name,
info=self.info,
blurb=self.blurb,
peek=self.peek,
tool_version=self.tool_version,
extension=self.extension,
dbkey=self.dbkey,
dataset=self.dataset,
visible=visible if visible is not None else self.visible,
deleted=self.deleted,
parent_id=parent_id,
copied_from_library_dataset_dataset_association=self,
history=target_history,
)
tag_manager = galaxy.model.tags.GalaxyTagHandler(sa_session)
src_ldda_tags = tag_manager.get_tags_str(self.tags)
tag_manager.apply_item_tags(user=self.user, item=hda, tags_str=src_ldda_tags, flush=False)
sa_session.add(hda)
hda.metadata = self.metadata
if add_to_history and target_history:
target_history.add_dataset(hda)
with transaction(sa_session):
sa_session.commit()
return hda
[docs] def copy(self, parent_id=None, target_folder=None, flush=True):
sa_session = object_session(self)
ldda = LibraryDatasetDatasetAssociation(
name=self.name,
info=self.info,
blurb=self.blurb,
peek=self.peek,
tool_version=self.tool_version,
extension=self.extension,
dbkey=self.dbkey,
dataset=self.dataset,
visible=self.visible,
deleted=self.deleted,
parent_id=parent_id,
copied_from_library_dataset_dataset_association=self,
folder=target_folder,
)
tag_manager = galaxy.model.tags.GalaxyTagHandler(sa_session)
src_ldda_tags = tag_manager.get_tags_str(self.tags)
tag_manager.apply_item_tags(user=self.user, item=ldda, tags_str=src_ldda_tags)
sa_session.add(ldda)
with transaction(sa_session):
sa_session.commit()
# Need to set after flushed, as MetadataFiles require dataset.id
ldda.metadata = self.metadata
with transaction(sa_session):
sa_session.commit()
return ldda
[docs] def get_access_roles(self, security_agent):
return self.dataset.get_access_roles(security_agent)
[docs] def get_manage_permissions_roles(self, security_agent):
return self.dataset.get_manage_permissions_roles(security_agent)
[docs] def has_manage_permissions_roles(self, security_agent):
return self.dataset.has_manage_permissions_roles(security_agent)
def _serialize(self, id_encoder, serialization_options):
rval = super()._serialize(id_encoder, serialization_options)
self._handle_serialize_files(id_encoder, serialization_options, rval)
return rval
[docs] def to_dict(self, view="collection"):
# Since this class is a proxy to rather complex attributes we want to
# display in other objects, we can't use the simpler method used by
# other model classes.
ldda = self
try:
file_size = int(ldda.get_size())
except OSError:
file_size = 0
# TODO: render tags here
rval = dict(
id=ldda.id,
hda_ldda="ldda",
model_class=self.__class__.__name__,
name=ldda.name,
deleted=ldda.deleted,
visible=ldda.visible,
state=ldda.state,
library_dataset_id=ldda.library_dataset_id,
file_size=file_size,
file_name=ldda.get_file_name(),
update_time=ldda.update_time.isoformat(),
file_ext=ldda.ext,
data_type=f"{ldda.datatype.__class__.__module__}.{ldda.datatype.__class__.__name__}",
genome_build=ldda.dbkey,
misc_info=ldda.info,
misc_blurb=ldda.blurb,
created_from_basename=ldda.created_from_basename,
)
if ldda.dataset.uuid is None:
rval["uuid"] = None
else:
rval["uuid"] = str(ldda.dataset.uuid)
rval["parent_library_id"] = ldda.library_dataset.folder.parent_library.id
if ldda.extended_metadata is not None:
rval["extended_metadata"] = ldda.extended_metadata.data
for name in ldda.metadata.spec.keys():
val = ldda.metadata.get(name)
if isinstance(val, MetadataFile):
val = val.get_file_name()
# If no value for metadata, look in datatype for metadata.
elif val is None and hasattr(ldda.datatype, name):
val = getattr(ldda.datatype, name)
rval[f"metadata_{name}"] = val
return rval
[docs] def update_parent_folder_update_times(self):
# sets the update_time for all continaing folders up the tree
ldda = self
sql = text(
"""
WITH RECURSIVE parent_folders_of(folder_id) AS
(SELECT folder_id
FROM library_dataset
WHERE id = :library_dataset_id
UNION ALL
SELECT library_folder.parent_id
FROM library_folder, parent_folders_of
WHERE library_folder.id = parent_folders_of.folder_id )
UPDATE library_folder
SET update_time =
(SELECT update_time
FROM library_dataset_dataset_association
WHERE id = :ldda_id)
WHERE exists (SELECT 1 FROM parent_folders_of
WHERE library_folder.id = parent_folders_of.folder_id)
"""
)
with object_session(self).bind.connect() as conn, conn.begin():
ret = conn.execute(sql, {"library_dataset_id": ldda.library_dataset_id, "ldda_id": ldda.id})
if ret.rowcount < 1:
log.warning(f"Attempt to updated parent folder times failed: {ret.rowcount} records updated.")
[docs]class ExtendedMetadata(Base, RepresentById):
__tablename__ = "extended_metadata"
id = Column(Integer, primary_key=True)
data = Column(MutableJSONType)
children = relationship("ExtendedMetadataIndex", back_populates="extended_metadata")
def __init__(self, data):
self.data = data
[docs]class ExtendedMetadataIndex(Base, RepresentById):
__tablename__ = "extended_metadata_index"
id = Column(Integer, primary_key=True)
extended_metadata_id = Column(
Integer, ForeignKey("extended_metadata.id", onupdate="CASCADE", ondelete="CASCADE"), index=True
)
path = Column(String(255))
value = Column(TEXT)
extended_metadata = relationship("ExtendedMetadata", back_populates="children")
def __init__(self, extended_metadata, path, value):
self.extended_metadata = extended_metadata
self.path = path
self.value = value
[docs]class LibraryInfoAssociation(Base, RepresentById):
__tablename__ = "library_info_association"
id = Column(Integer, primary_key=True)
library_id = Column(Integer, ForeignKey("library.id"), index=True)
form_definition_id = Column(Integer, ForeignKey("form_definition.id"), index=True)
form_values_id = Column(Integer, ForeignKey("form_values.id"), index=True)
inheritable = Column(Boolean, index=True, default=False)
deleted = Column(Boolean, index=True, default=False)
library = relationship(
"Library",
primaryjoin=(
lambda: and_(LibraryInfoAssociation.library_id == Library.id, not_(LibraryInfoAssociation.deleted))
),
)
template = relationship(
"FormDefinition", primaryjoin=lambda: LibraryInfoAssociation.form_definition_id == FormDefinition.id
)
info = relationship(
"FormValues", primaryjoin=lambda: LibraryInfoAssociation.form_values_id == FormValues.id # type: ignore[has-type]
)
def __init__(self, library, form_definition, info, inheritable=False):
self.library = library
self.template = form_definition
self.info = info
self.inheritable = inheritable
[docs]class LibraryFolderInfoAssociation(Base, RepresentById):
__tablename__ = "library_folder_info_association"
id = Column(Integer, primary_key=True)
library_folder_id = Column(Integer, ForeignKey("library_folder.id"), nullable=True, index=True)
form_definition_id = Column(Integer, ForeignKey("form_definition.id"), index=True)
form_values_id = Column(Integer,