Warning

This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

galaxy.job_execution package

Subpackages

Submodules

galaxy.job_execution.compute_environment module

galaxy.job_execution.compute_environment.dataset_path_to_extra_path(path: str) str[source]
class galaxy.job_execution.compute_environment.ComputeEnvironment[source]

Bases: object

Definition of the job as it will be run on the (potentially) remote compute server.

abstract output_names()[source]

Output unqualified filenames defined by job.

abstract input_path_rewrite(dataset)[source]

Input path for specified dataset.

abstract output_path_rewrite(dataset)[source]

Output path for specified dataset.

abstract input_extra_files_rewrite(dataset)[source]

Input extra files path rewrite for specified dataset.

abstract output_extra_files_rewrite(dataset)[source]

Output extra files path rewrite for specified dataset.

abstract input_metadata_rewrite(dataset, metadata_value)[source]

Input metadata path rewrite for specified dataset.

abstract unstructured_path_rewrite(path)[source]

Rewrite loc file paths, etc..

abstract working_directory()[source]

Job working directory (potentially remote)

abstract config_directory()[source]

Directory containing config files (potentially remote)

abstract env_config_directory()[source]

Working directory (possibly as environment variable evaluation).

abstract sep()[source]

os.path.sep for the platform this job will execute in.

abstract new_file_path()[source]

Absolute path to dump new files for this job on compute server.

abstract tool_directory()[source]

Absolute path to tool files for this job on compute server.

abstract version_path()[source]

Location of the version file for the underlying tool.

abstract home_directory()[source]

Home directory of target job - none if HOME should not be set.

abstract tmp_directory()[source]

Temp directory of target job - none if HOME should not be set.

abstract galaxy_url()[source]

URL to access Galaxy API from for this compute environment.

abstract get_file_sources_dict() Dict[str, Any][source]

Return file sources dict for current user.

class galaxy.job_execution.compute_environment.SimpleComputeEnvironment[source]

Bases: object

config_directory()[source]
sep()[source]
class galaxy.job_execution.compute_environment.SharedComputeEnvironment(job_io: JobIO, job: Job)[source]

Bases: SimpleComputeEnvironment, ComputeEnvironment

Default ComputeEnvironment for job and task wrapper to pass to ToolEvaluator - valid when Galaxy and compute share all the relevant file systems.

job_id: JobIO
__init__(job_io: JobIO, job: Job)[source]
job: Job
get_file_sources_dict() Dict[str, Any][source]

Return file sources dict for current user.

output_names()[source]

Output unqualified filenames defined by job.

output_paths()[source]
input_path_rewrite(dataset)[source]

Input path for specified dataset.

output_path_rewrite(dataset)[source]

Output path for specified dataset.

input_extra_files_rewrite(dataset)[source]

Input extra files path rewrite for specified dataset.

output_extra_files_rewrite(dataset)[source]

Output extra files path rewrite for specified dataset.

input_metadata_rewrite(dataset, metadata_value)[source]

Input metadata path rewrite for specified dataset.

unstructured_path_rewrite(path)[source]

Rewrite loc file paths, etc..

working_directory()[source]

Job working directory (potentially remote)

env_config_directory()[source]

Working directory (possibly as environment variable evaluation).

new_file_path()[source]

Absolute path to dump new files for this job on compute server.

version_path()[source]

Location of the version file for the underlying tool.

tool_directory()[source]

Absolute path to tool files for this job on compute server.

home_directory()[source]

Home directory of target job - none if HOME should not be set.

tmp_directory()[source]

Temp directory of target job - none if HOME should not be set.

galaxy_url()[source]

URL to access Galaxy API from for this compute environment.

galaxy.job_execution.datasets module

Utility classes allowing Job interface to reason about datasets.

galaxy.job_execution.datasets.dataset_path_rewrites(dataset_paths)[source]
class galaxy.job_execution.datasets.DatasetPath(dataset_id, real_path, false_path=None, false_extra_files_path=None, false_metadata_path=None, mutable=True, dataset_uuid=None, object_store_id=None)[source]

Bases: object

__init__(dataset_id, real_path, false_path=None, false_extra_files_path=None, false_metadata_path=None, mutable=True, dataset_uuid=None, object_store_id=None)[source]
with_path_for_job(false_path, false_extra_files_path=None, false_metadata_path=None)[source]

Clone the dataset path but with a new false_path.

class galaxy.job_execution.datasets.DatasetPathRewriter[source]

Bases: object

Used by runner to rewrite paths.

abstract rewrite_dataset_path(dataset, dataset_type)[source]

Dataset type is ‘input’ or ‘output’. Return None to indicate not to rewrite this path.

class galaxy.job_execution.datasets.NullDatasetPathRewriter[source]

Bases: DatasetPathRewriter

Used by default for jobwrapper, do not rewrite anything.

rewrite_dataset_path(dataset, dataset_type)[source]

Keep path the same.

class galaxy.job_execution.datasets.OutputsToWorkingDirectoryPathRewriter(working_directory, outputs_directory_name)[source]

Bases: DatasetPathRewriter

Rewrites all paths to place them in the specified working directory for normal jobs when Galaxy is configured with app.config.outputs_to_working_directory. Job runner base class is responsible for copying these out after job is complete.

__init__(working_directory, outputs_directory_name)[source]
rewrite_dataset_path(dataset, dataset_type)[source]

Keep path the same.

class galaxy.job_execution.datasets.TaskPathRewriter(working_directory, job_dataset_path_rewriter)[source]

Bases: DatasetPathRewriter

Rewrites all paths to place them in the specified working directory for TaskWrapper. TaskWrapper is responsible for putting them there and pulling them out.

__init__(working_directory, job_dataset_path_rewriter)[source]
rewrite_dataset_path(dataset, dataset_type)[source]
galaxy.job_execution.datasets.get_path_rewriter(outputs_to_working_directory, working_directory, outputs_directory, is_task) DatasetPathRewriter[source]

galaxy.job_execution.output_collect module

Code allowing tools to define extra files associated with an output datset.

class galaxy.job_execution.output_collect.PermissionProvider(inp_data, security_agent, job)[source]

Bases: PermissionProvider

__init__(inp_data, security_agent, job)[source]
property permissions
set_default_hda_permissions(primary_data)[source]
copy_dataset_permissions(init_from, primary_data)[source]

Copy dataset permissions from supplied input dataset.

class galaxy.job_execution.output_collect.MetadataSourceProvider(inp_data)[source]

Bases: MetadataSourceProvider

__init__(inp_data)[source]
get_metadata_source(input_name)[source]

Get metadata for supplied input_name.

galaxy.job_execution.output_collect.collect_dynamic_outputs(job_context, output_collections)[source]
class galaxy.job_execution.output_collect.BaseJobContext[source]

Bases: ModelPersistenceContext

tool_provided_metadata: BaseToolProvidedMetadata
job_working_directory: str
add_dataset_collection(collection)[source]
find_files(output_name, collection, dataset_collectors) list[source]
get_job_id()[source]
class galaxy.job_execution.output_collect.JobContext(tool, tool_provided_metadata: BaseToolProvidedMetadata, job, job_working_directory, permission_provider, metadata_source_provider, input_dbkey, object_store, final_job_state, max_discovered_files: int | None, flush_per_n_datasets=None)[source]

Bases: BaseJobContext

__init__(tool, tool_provided_metadata: BaseToolProvidedMetadata, job, job_working_directory, permission_provider, metadata_source_provider, input_dbkey, object_store, final_job_state, max_discovered_files: int | None, flush_per_n_datasets=None)[source]
job_working_directory: str
tool_provided_metadata: BaseToolProvidedMetadata
discovered_file_count: int
property change_datatype_actions
property tag_handler

Return a galaxy.model.tags.TagHandler-like object for persisting tags.

property work_context
property sa_session: scoped_session

If bound to a database, return the SQL Alchemy session.

Return None otherwise.

property permission_provider: PermissionProvider

If bound to a database, return the SQL Alchemy session.

Return None otherwise.

property metadata_source_provider: MetadataSourceProvider

Return associated MetadataSourceProvider object.

property job: Job

Return associated job object if bound to a job finish context connected to a database.

property flush_per_n_datasets: int | None
property input_dbkey: str
property object_store: ObjectStore

Return object store to use for populating discovered dataset contents.

property user

If bound to a database, return the user the datasets should be created for.

Return None otherwise.

persist_object(obj)[source]

Add the target to the persistence layer.

flush()[source]

If database bound, flush the persisted objects to ensure IDs.

get_library_folder(destination)[source]
get_hdca(object_id)[source]
create_library_folder(parent_folder, name, description)[source]

Create a library folder ready from supplied attributes for supplied parent.

create_hdca(name, structure)[source]
add_output_dataset_association(name, dataset)[source]

If discovering outputs for a job, persist output dataset association.

add_library_dataset_to_folder(library_folder, ld)[source]

Add library dataset to persisted library folder.

add_datasets_to_history(datasets, for_output_dataset=None)[source]

Add datasets to the history this context points at.

output_collection_def(name)[source]
output_def(name)[source]
job_id()[source]
get_job_id()[source]
get_implicit_collection_jobs_association_id()[source]

No-op, no job context.

class galaxy.job_execution.output_collect.SessionlessJobContext(metadata_params, tool_provided_metadata: BaseToolProvidedMetadata, object_store, export_store, import_store, working_directory, final_job_state, max_discovered_files: int | None)[source]

Bases: SessionlessModelPersistenceContext, BaseJobContext

__init__(metadata_params, tool_provided_metadata: BaseToolProvidedMetadata, object_store, export_store, import_store, working_directory, final_job_state, max_discovered_files: int | None)[source]
tool_provided_metadata: BaseToolProvidedMetadata
discovered_file_count: int
property change_datatype_actions
output_collection_def(name)[source]
output_def(name)[source]
job_id()[source]
get_hdca(object_id)[source]
add_dataset_collection(collection)[source]
add_output_dataset_association(name, dataset_instance)[source]

No-op, no job context to persist this association for.

get_job_id()[source]
get_implicit_collection_jobs_association_id()[source]

No-op, no job context.

job_working_directory: str
galaxy.job_execution.output_collect.collect_primary_datasets(job_context: JobContext | SessionlessJobContext, output, input_ext)[source]
galaxy.job_execution.output_collect.discover_files(output_name, tool_provided_metadata, extra_file_collectors, job_working_directory, matchable)[source]
galaxy.job_execution.output_collect.walk_over_file_collectors(extra_file_collectors, job_working_directory, matchable)[source]
galaxy.job_execution.output_collect.walk_over_extra_files(target_dir, extra_file_collector, job_working_directory, matchable, parent_paths=None)[source]

Walks through all files in a given directory, and returns all files that match the given collector’s match criteria. If the collector has the recurse flag enabled, will also recursively descend into child folders.

galaxy.job_execution.output_collect.dataset_collector(dataset_collection_description)[source]
class galaxy.job_execution.output_collect.ToolMetadataDatasetCollector(dataset_collection_description)[source]

Bases: object

__init__(dataset_collection_description)[source]
class galaxy.job_execution.output_collect.DatasetCollector(dataset_collection_description)[source]

Bases: object

__init__(dataset_collection_description)[source]
match(dataset_instance, filename, path=None, parent_paths=None)[source]
sort(matches)[source]
galaxy.job_execution.output_collect.read_exit_code_from(exit_code_file, id_tag)[source]

Read exit code reported for a Galaxy job.

galaxy.job_execution.output_collect.default_exit_code_file(files_dir, id_tag)[source]
galaxy.job_execution.output_collect.collect_extra_files(object_store: ObjectStore, dataset: DatasetInstance, job_working_directory: str, outputs_to_working_directory: bool = False)[source]
galaxy.job_execution.output_collect.collect_shrinked_content_from_path(path)[source]

galaxy.job_execution.setup module

Utilities to help job and tool code setup jobs.

class galaxy.job_execution.setup.JobOutput(output_name, dataset, dataset_path)[source]

Bases: tuple

output_name: str

Alias for field number 0

dataset: DatasetInstance

Alias for field number 1

dataset_path: DatasetPath

Alias for field number 2

class galaxy.job_execution.setup.JobOutputs[source]

Bases: _local

__init__() None[source]
property populated: bool
set_job_outputs(job_outputs: List[JobOutput]) None[source]
class galaxy.job_execution.setup.JobIO(sa_session, job: Job, working_directory: str, outputs_directory: str, outputs_to_working_directory: bool, galaxy_url: str, version_path: str, tool_directory: str, home_directory: str, tmp_directory: str, tool_data_path: str, galaxy_data_manager_data_path: str, new_file_path: str, len_file_path: str, builds_file_path: str, check_job_script_integrity: bool, check_job_script_integrity_count: int, check_job_script_integrity_sleep: float, file_sources_dict: Dict[str, Any], user_context: FileSourcesUserContext | Dict[str, Any], tool_source: str | None = None, tool_source_class: str | None = 'XmlToolSource', tool_dir: str | None = None, is_task: bool = False)[source]

Bases: UsesDictVisibleKeys

dict_collection_visible_keys = ('job_id', 'working_directory', 'outputs_directory', 'outputs_to_working_directory', 'galaxy_url', 'version_path', 'tool_directory', 'home_directory', 'tmp_directory', 'tool_data_path', 'galaxy_data_manager_data_path', 'new_file_path', 'len_file_path', 'builds_file_path', 'file_sources_dict', 'check_job_script_integrity', 'check_job_script_integrity_count', 'check_job_script_integrity_sleep', 'tool_source', 'tool_source_class', 'tool_dir', 'is_task')
__init__(sa_session, job: Job, working_directory: str, outputs_directory: str, outputs_to_working_directory: bool, galaxy_url: str, version_path: str, tool_directory: str, home_directory: str, tmp_directory: str, tool_data_path: str, galaxy_data_manager_data_path: str, new_file_path: str, len_file_path: str, builds_file_path: str, check_job_script_integrity: bool, check_job_script_integrity_count: int, check_job_script_integrity_sleep: float, file_sources_dict: Dict[str, Any], user_context: FileSourcesUserContext | Dict[str, Any], tool_source: str | None = None, tool_source_class: str | None = 'XmlToolSource', tool_dir: str | None = None, is_task: bool = False)[source]
property job
classmethod from_json(path, sa_session)[source]
classmethod from_dict(io_dict, sa_session)[source]
to_dict()[source]
to_json(path)[source]
property file_sources: ConfiguredFileSources
property dataset_path_rewriter: DatasetPathRewriter
property output_paths: List[DatasetPath]
property output_hdas_and_paths: Dict[str, Tuple[DatasetInstance, DatasetPath]]
get_input_dataset_fnames(ds: DatasetInstance) List[str][source]
get_input_datasets() List[DatasetInstance][source]
get_input_fnames() List[str][source]
get_input_paths() List[DatasetPath][source]
get_input_path(dataset: DatasetInstance) DatasetPath[source]
get_output_basenames() List[str][source]
get_output_fnames() List[DatasetPath][source]
get_output_path(dataset)[source]
get_mutable_output_fnames()[source]
get_output_hdas_and_fnames() Dict[str, Tuple[DatasetInstance, DatasetPath]][source]
compute_outputs() None[source]
get_output_file_id(file: str) int | None[source]
galaxy.job_execution.setup.ensure_configs_directory(work_dir: str) str[source]
galaxy.job_execution.setup.create_working_directory_for_job(object_store, job) str[source]