import logging
from galaxy.datatypes import data
from galaxy.datatypes.binary import Cel # noqa: F401
from galaxy.datatypes.data import get_file_peek
from galaxy.datatypes.metadata import MetadataElement
from galaxy.datatypes.protocols import DatasetProtocol
from galaxy.datatypes.sniff import (
build_sniff_from_prefix,
FilePrefix,
get_headers,
)
log = logging.getLogger(__name__)
[docs]class GenericMicroarrayFile(data.Text):
"""
Abstract class for most of the microarray files.
"""
MetadataElement(
name="version_number",
default="1.0",
desc="Version number",
readonly=True,
visible=True,
optional=True,
no_value="1.0",
)
MetadataElement(
name="file_format",
default="ATF",
desc="File format",
readonly=True,
visible=True,
optional=True,
no_value="ATF",
)
MetadataElement(
name="number_of_optional_header_records",
default=1,
desc="Number of optional header records",
readonly=True,
visible=True,
optional=True,
no_value=1,
)
MetadataElement(
name="number_of_data_columns",
default=1,
desc="Number of data columns",
readonly=True,
visible=True,
optional=True,
no_value=1,
)
MetadataElement(
name="file_type",
default="GenePix",
desc="File type",
readonly=True,
visible=True,
optional=True,
no_value="GenePix",
)
MetadataElement(
name="block_count",
default=1,
desc="Number of blocks described in the file",
readonly=True,
visible=True,
optional=True,
no_value=1,
)
MetadataElement(
name="block_type", default=0, desc="Type of block", readonly=True, visible=True, optional=True, no_value=0
)
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
if not dataset.dataset.purged:
if dataset.metadata.block_count == 1:
dataset.blurb = f"{dataset.metadata.file_type} {dataset.metadata.version_number}: Format {dataset.metadata.file_format}, 1 block, {dataset.metadata.number_of_optional_header_records} headers and {dataset.metadata.number_of_data_columns} columns"
else:
dataset.blurb = f"{dataset.metadata.file_type} {dataset.metadata.version_number}: Format {dataset.metadata.file_format}, {dataset.metadata.block_count} blocks, {dataset.metadata.number_of_optional_header_records} headers and {dataset.metadata.number_of_data_columns} columns"
dataset.peek = get_file_peek(dataset.get_file_name())
else:
dataset.peek = "file does not exist"
dataset.blurb = "file purged from disk"
[docs] def get_mime(self) -> str:
return "text/plain"
[docs]@build_sniff_from_prefix
class Gal(GenericMicroarrayFile):
"""Gal File format described at:
http://mdc.custhelp.com/app/answers/detail/a_id/18883/#gal
"""
edam_format = "format_3829"
edam_data = "data_3110"
file_ext = "gal"
[docs] def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
"""
Try to guess if the file is a Gal file.
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname('test.gal')
>>> Gal().sniff(fname)
True
>>> fname = get_test_fname('test.gpr')
>>> Gal().sniff(fname)
False
"""
headers = get_headers(file_prefix, sep="\t", count=3)
return "ATF" in headers[0][0] and "GenePix ArrayList" in headers[2][0]
[docs]@build_sniff_from_prefix
class Gpr(GenericMicroarrayFile):
"""Gpr File format described at:
http://mdc.custhelp.com/app/answers/detail/a_id/18883/#gpr
"""
edam_format = "format_3829"
edam_data = "data_3110"
file_ext = "gpr"
[docs] def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
"""
Try to guess if the file is a Gpr file.
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname('test.gpr')
>>> Gpr().sniff(fname)
True
>>> fname = get_test_fname('test.gal')
>>> Gpr().sniff(fname)
False
"""
headers = get_headers(file_prefix, sep="\t", count=3)
return "ATF" in headers[0][0] and "GenePix Results" in headers[2][0]