Warning

This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.webapps.galaxy.api.datasets

"""
API operations on the contents of a history dataset.
"""
import logging
from io import (
    BytesIO,
    IOBase,
)
from typing import (
    Any,
    cast,
    Dict,
    List,
    Optional,
)

from fastapi import (
    Body,
    Depends,
    Path,
    Query,
    Request,
)
from starlette.responses import (
    FileResponse,
    StreamingResponse,
)

from galaxy import (
    util,
    web,
)
from galaxy.schema import (
    FilterQueryParams,
    SerializationParams,
)
from galaxy.schema.fields import EncodedDatabaseIdField
from galaxy.schema.schema import (
    AnyHDA,
    AnyHistoryContentItem,
    DatasetAssociationRoles,
    DatasetSourceType,
    UpdateDatasetPermissionsPayload,
)
from galaxy.util.zipstream import ZipstreamWrapper
from galaxy.webapps.galaxy.api.common import (
    get_filter_query_params,
    get_query_parameters_from_request_excluding,
    get_update_permission_payload,
    parse_serialization_params,
    query_serialization_params,
)
from galaxy.webapps.galaxy.services.datasets import (
    ConvertedDatasetsMap,
    DatasetInheritanceChain,
    DatasetsService,
    DatasetStorageDetails,
    DatasetTextContentDetails,
    RequestDataType,
)
from . import (
    BaseGalaxyAPIController,
    depends,
    DependsOnTrans,
    Router,
)

log = logging.getLogger(__name__)

router = Router(tags=['datasets'])

DatasetIDPathParam: EncodedDatabaseIdField = Path(
    ...,
    description="The encoded database identifier of the dataset."
)

HistoryIDPathParam: EncodedDatabaseIdField = Path(
    ...,
    description="The encoded database identifier of the History."
)

DatasetSourceQueryParam: DatasetSourceType = Query(
    default=DatasetSourceType.hda,
    description="Whether this dataset belongs to a history (HDA) or a library (LDDA).",
)


[docs]@router.cbv class FastAPIDatasets: service: DatasetsService = depends(DatasetsService)
[docs] @router.get( '/api/datasets', summary='Search datasets or collections using a query system.', ) def index( self, trans=DependsOnTrans, history_id: Optional[EncodedDatabaseIdField] = Query( default=None, description="Optional identifier of a History. Use it to restrict the search whithin a particular History." ), serialization_params: SerializationParams = Depends(query_serialization_params), filter_query_params: FilterQueryParams = Depends(get_filter_query_params), ) -> List[AnyHistoryContentItem]: return self.service.index(trans, history_id, serialization_params, filter_query_params)
[docs] @router.get( '/api/datasets/{dataset_id}/storage', summary='Display user-facing storage details related to the objectstore a dataset resides in.', ) def show_storage( self, trans=DependsOnTrans, dataset_id: EncodedDatabaseIdField = DatasetIDPathParam, hda_ldda: DatasetSourceType = DatasetSourceQueryParam, ) -> DatasetStorageDetails: return self.service.show_storage(trans, dataset_id, hda_ldda)
[docs] @router.get( '/api/datasets/{dataset_id}/inheritance_chain', summary='For internal use, this endpoint may change without warning.', include_in_schema=True, # Can be changed to False if we don't really want to expose this ) def show_inheritance_chain( self, trans=DependsOnTrans, dataset_id: EncodedDatabaseIdField = DatasetIDPathParam, hda_ldda: DatasetSourceType = DatasetSourceQueryParam, ) -> DatasetInheritanceChain: return self.service.show_inheritance_chain(trans, dataset_id, hda_ldda)
[docs] @router.get( '/api/datasets/{dataset_id}/get_content_as_text', summary='Returns dataset content as Text.', ) def get_content_as_text( self, trans=DependsOnTrans, dataset_id: EncodedDatabaseIdField = DatasetIDPathParam, ) -> DatasetTextContentDetails: return self.service.get_content_as_text(trans, dataset_id)
[docs] @router.get( '/api/datasets/{dataset_id}/converted/{ext}', summary='Return information about datasets made by converting this dataset to a new format.', ) def converted_ext( self, trans=DependsOnTrans, dataset_id: EncodedDatabaseIdField = DatasetIDPathParam, ext: str = Path( ..., description="File extension of the new format to convert this dataset to.", ), serialization_params: SerializationParams = Depends(query_serialization_params), ) -> AnyHDA: """ Return information about datasets made by converting this dataset to a new format. If there is no existing converted dataset for the format in `ext`, one will be created. **Note**: `view` and `keys` are also available to control the serialization of the dataset. """ return self.service.converted_ext(trans, dataset_id, ext, serialization_params)
[docs] @router.get( '/api/datasets/{dataset_id}/converted', summary=( "Return a a map with all the existing converted datasets associated with this instance." ), ) def converted( self, trans=DependsOnTrans, dataset_id: EncodedDatabaseIdField = DatasetIDPathParam, ) -> ConvertedDatasetsMap: """ Return a map of `<converted extension> : <converted id>` containing all the *existing* converted datasets. """ return self.service.converted(trans, dataset_id)
[docs] @router.put( '/api/datasets/{dataset_id}/permissions', summary='Set permissions of the given history dataset to the given role ids.', ) def update_permissions( self, trans=DependsOnTrans, dataset_id: EncodedDatabaseIdField = DatasetIDPathParam, # Using a generic Dict here as an attempt on supporting multiple aliases for the permissions params. payload: Dict[str, Any] = Body( default=..., example=UpdateDatasetPermissionsPayload(), ), ) -> DatasetAssociationRoles: """Set permissions of the given history dataset to the given role ids.""" update_payload = get_update_permission_payload(payload) return self.service.update_permissions(trans, dataset_id, update_payload)
[docs] @router.get( '/api/histories/{history_id}/contents/{history_content_id}/extra_files', summary='Generate list of extra files.', tags=["histories"], ) def extra_files( self, trans=DependsOnTrans, history_id: EncodedDatabaseIdField = HistoryIDPathParam, history_content_id: EncodedDatabaseIdField = DatasetIDPathParam, ): return self.service.extra_files(trans, history_content_id)
[docs] @router.get( '/api/histories/{history_id}/contents/{history_content_id}/display', name="history_contents_display", summary='Displays dataset (preview) content.', tags=["histories"], response_class=StreamingResponse, ) def display( self, request: Request, trans=DependsOnTrans, history_id: EncodedDatabaseIdField = HistoryIDPathParam, history_content_id: EncodedDatabaseIdField = DatasetIDPathParam, preview: bool = Query( default=False, description=( "Whether to get preview contents to be directly displayed on the web. " "If preview is False (default) the contents will be downloaded instead." ), ), filename: Optional[str] = Query( default=None, description="TODO", ), to_ext: Optional[str] = Query( default=None, description=( "The file extension when downloading the display data. Use the value `data` to " "let the server infer it from the data type." ) ), raw: bool = Query( default=False, description=( "The query parameter 'raw' should be considered experimental and may be dropped at " "some point in the future without warning. Generally, data should be processed by its " "datatype prior to display." ), ), ): """Streams the preview contents of a dataset to be displayed in a browser.""" extra_params = get_query_parameters_from_request_excluding(request, {"preview", "filename", "to_ext", "raw"}) display_data, headers = self.service.display( trans, history_content_id, history_id, preview, filename, to_ext, raw, **extra_params ) if isinstance(display_data, IOBase): file_name = getattr(display_data, "name", None) if file_name: return FileResponse(file_name, headers=headers) elif isinstance(display_data, ZipstreamWrapper): return StreamingResponse(display_data.response(), headers=headers) elif isinstance(display_data, bytes): return StreamingResponse(BytesIO(display_data), headers=headers) return StreamingResponse(display_data, headers=headers)
[docs] @router.get( '/api/histories/{history_id}/contents/{history_content_id}/metadata_file', summary='Returns the metadata file associated with this history item.', tags=["histories"], response_class=FileResponse, ) def get_metadata_file( self, trans=DependsOnTrans, history_id: EncodedDatabaseIdField = HistoryIDPathParam, history_content_id: EncodedDatabaseIdField = DatasetIDPathParam, metadata_file: Optional[str] = Query( default=None, description="The name of the metadata file to retrieve.", ), ): metadata_file_path, headers = self.service.get_metadata_file(trans, history_content_id, metadata_file) return FileResponse(path=cast(str, metadata_file_path), headers=headers)
[docs] @router.get( '/api/datasets/{dataset_id}', summary="Displays information about and/or content of a dataset.", ) def show( self, request: Request, trans=DependsOnTrans, dataset_id: EncodedDatabaseIdField = DatasetIDPathParam, hda_ldda: DatasetSourceType = Query( default=DatasetSourceType.hda, description=( "The type of information about the dataset to be requested." ), ), data_type: Optional[RequestDataType] = Query( default=None, description=( "The type of information about the dataset to be requested. " "Each of these values may require additional parameters in the request and " "may return different responses." ), ), serialization_params: SerializationParams = Depends(query_serialization_params), ): """ **Note**: Due to the multipurpose nature of this endpoint, which can receive a wild variety of parameters and return different kinds of responses, the documentation here will be limited. To get more information please check the source code. """ exclude_params = set(["hda_ldda", "data_type"]) exclude_params.update(SerializationParams.__fields__.keys()) extra_params = get_query_parameters_from_request_excluding(request, exclude_params) return self.service.show(trans, dataset_id, hda_ldda, serialization_params, data_type, **extra_params)
[docs]class DatasetsController(BaseGalaxyAPIController): service: DatasetsService = depends(DatasetsService)
[docs] @web.expose_api def index(self, trans, limit=500, offset=0, history_id=None, **kwd): """ GET /api/datasets/ Search datasets or collections using a query system :rtype: list :returns: dictionaries containing summary of dataset or dataset_collection information The list returned can be filtered by using two optional parameters: :q: string, generally a property name to filter by followed by an (often optional) hyphen and operator string. :qv: string, the value to filter by ..example:: To filter the list to only those created after 2015-01-29, the query string would look like: '?q=create_time-gt&qv=2015-01-29' Multiple filters can be sent in using multiple q/qv pairs: '?q=create_time-gt&qv=2015-01-29&q=name-contains&qv=experiment-1' The list returned can be paginated using two optional parameters: limit: integer, defaults to no value and no limit (return all) how many items to return offset: integer, defaults to 0 and starts at the beginning skip the first ( offset - 1 ) items and begin returning at the Nth item ..example: limit and offset can be combined. Skip the first two and return five: '?limit=5&offset=3' The list returned can be ordered using the optional parameter: order: string containing one of the valid ordering attributes followed (optionally) by '-asc' or '-dsc' (default) for ascending and descending order respectively. Orders can be stacked as a comma- separated list of values. Allowed ordering attributes are: 'create_time', 'extension', 'hid', 'history_id', 'name', 'update_time'. 'order' defaults to 'create_time'. ..example: To sort by name descending then create time descending: '?order=name-dsc,create_time' """ serialization_params = parse_serialization_params(**kwd) filter_parameters = FilterQueryParams(**kwd) filter_parameters.limit = filter_parameters.limit or limit filter_parameters.offset = filter_parameters.offset or offset return self.service.index( trans, history_id, serialization_params, filter_parameters )
[docs] @web.expose_api_anonymous_and_sessionless def show(self, trans, id, hda_ldda='hda', data_type=None, provider=None, **kwd): """ GET /api/datasets/{encoded_dataset_id} Displays information about and/or content of a dataset. """ serialization_params = parse_serialization_params(**kwd) kwd.update({ "provider": provider, }) rval = self.service.show(trans, id, hda_ldda, serialization_params, data_type, **kwd) return rval
[docs] @web.expose_api_anonymous def show_storage(self, trans, dataset_id, hda_ldda='hda', **kwd): """ GET /api/datasets/{encoded_dataset_id}/storage Display user-facing storage details related to the objectstore a dataset resides in. """ return self.service.show_storage(trans, dataset_id, hda_ldda)
[docs] @web.expose_api_anonymous def show_inheritance_chain(self, trans, dataset_id, hda_ldda='hda', **kwd): """ GET /api/datasets/{dataset_id}/inheritance_chain Display inheritance chain for the given dataset For internal use, this endpoint may change without warning. """ return self.service.show_inheritance_chain(trans, dataset_id, hda_ldda)
[docs] @web.expose_api def update_permissions(self, trans, dataset_id, payload, **kwd): """ PUT /api/datasets/{encoded_dataset_id}/permissions Updates permissions of a dataset. :rtype: dict :returns: dictionary containing new permissions """ hda_ldda = kwd.pop('hda_ldda', DatasetSourceType.hda) if payload: kwd.update(payload) update_payload = get_update_permission_payload(kwd) return self.service.update_permissions(trans, dataset_id, update_payload, hda_ldda)
[docs] @web.expose_api_anonymous_and_sessionless def extra_files(self, trans, history_content_id, history_id, **kwd): """ GET /api/histories/{encoded_history_id}/contents/{encoded_content_id}/extra_files Generate list of extra files. """ return self.service.extra_files(trans, history_content_id)
[docs] @web.expose_api_raw_anonymous_and_sessionless def display(self, trans, history_content_id, history_id, preview=False, filename=None, to_ext=None, raw=False, **kwd): """ GET /api/histories/{encoded_history_id}/contents/{encoded_content_id}/display Displays history content (dataset). The query parameter 'raw' should be considered experimental and may be dropped at some point in the future without warning. Generally, data should be processed by its datatype prior to display (the defult if raw is unspecified or explicitly false. """ raw = util.string_as_bool(raw) display_data, headers = self.service.display( trans, history_content_id, history_id, preview, filename, to_ext, raw, **kwd ) trans.response.headers.update(headers) if isinstance(display_data, ZipstreamWrapper): return display_data.response() return display_data
[docs] @web.expose_api def get_content_as_text(self, trans, dataset_id): """ Returns item content as Text. """ return self.service.get_content_as_text(trans, dataset_id)
[docs] @web.expose_api_raw_anonymous_and_sessionless def get_metadata_file(self, trans, history_content_id, history_id, metadata_file=None, **kwd): """ GET /api/histories/{history_id}/contents/{history_content_id}/metadata_file """ # TODO: remove open_file parameter when deleting this legacy endpoint metadata_file, headers = self.service.get_metadata_file( trans, history_content_id, metadata_file, open_file=True ) trans.response.headers.update(headers) return metadata_file
[docs] @web.expose_api_anonymous def converted(self, trans, dataset_id, ext, **kwargs): """ converted( self, trans, dataset_id, ext, **kwargs ) * GET /api/datasets/{dataset_id}/converted/{ext} return information about datasets made by converting this dataset to a new format :type dataset_id: str :param dataset_id: the encoded id of the original HDA to check :type ext: str :param ext: file extension of the target format or None. If there is no existing converted dataset for the format in `ext`, one will be created. If `ext` is None, a dictionary will be returned of the form { <converted extension> : <converted id>, ... } containing all the *existing* converted datasets. ..note: `view` and `keys` are also available to control the serialization of individual datasets. They have no effect when `ext` is None. :rtype: dict :returns: dictionary containing detailed HDA information or (if `ext` is None) an extension->dataset_id map """ if ext: serialization_params = parse_serialization_params(**kwargs) return self.service.converted_ext(trans, dataset_id, ext, serialization_params) return self.service.converted(trans, dataset_id)