Warning

This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.datatypes.dataproviders.chunk

"""
Chunk (N number of bytes at M offset to a source's beginning) provider.

Primarily for file sources but usable by any iterator that has both
seek and read( N ).
"""

import base64
import logging
import os

from . import (
    base,
    exceptions,
)

log = logging.getLogger(__name__)


[docs]class ChunkDataProvider(base.DataProvider): """ Data provider that yields chunks of data from its file. Note: this version does not account for lines and works with Binary datatypes. """ MAX_CHUNK_SIZE = 2**16 DEFAULT_CHUNK_SIZE = MAX_CHUNK_SIZE settings = {"chunk_index": "int", "chunk_size": "int"} # TODO: subclass from LimitedOffsetDataProvider? # see web/framework/base.iterate_file, util/__init__.file_reader, and datatypes.tabular
[docs] def __init__(self, source, chunk_index=0, chunk_size=DEFAULT_CHUNK_SIZE, **kwargs): """ :param chunk_index: if a source can be divided into N number of `chunk_size` sections, this is the index of which section to return. :param chunk_size: how large are the desired chunks to return (gen. in bytes). """ super().__init__(source, **kwargs) self.chunk_size = int(chunk_size) self.chunk_pos = int(chunk_index) * self.chunk_size
[docs] def validate_source(self, source): """ Does the given source have both the methods `seek` and `read`? :raises InvalidDataProviderSource: if not. """ source = super().validate_source(source) if (not hasattr(source, "seek")) or (not hasattr(source, "read")): raise exceptions.InvalidDataProviderSource(source) return source
def __iter__(self): # not reeeally an iterator per se self.__enter__() self.source.seek(self.chunk_pos, os.SEEK_SET) chunk = self.encode(self.source.read(self.chunk_size)) yield chunk self.__exit__()
[docs] def encode(self, chunk): """ Called on the chunk before returning. Overrride to modify, encode, or decode chunks. """ return chunk
[docs]class Base64ChunkDataProvider(ChunkDataProvider): """ Data provider that yields chunks of base64 encoded data from its file. """
[docs] def encode(self, chunk): """ Return chunks encoded in base 64. """ return base64.b64encode(chunk)