Warning
This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.datatypes.qualityscore
"""
Qualityscore class
"""
import logging
from galaxy.datatypes.protocols import DatasetProtocol
from galaxy.datatypes.sniff import (
build_sniff_from_prefix,
FilePrefix,
)
from . import data
log = logging.getLogger(__name__)
[docs]class QualityScore(data.Text):
"""
until we know more about quality score formats
"""
edam_data = "data_2048"
edam_format = "format_3606"
file_ext = "qual"
[docs]@build_sniff_from_prefix
class QualityScoreSOLiD(QualityScore):
"""
until we know more about quality score formats
"""
edam_format = "format_3610"
file_ext = "qualsolid"
[docs] def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
"""
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname( 'sequence.fasta' )
>>> QualityScoreSOLiD().sniff( fname )
False
>>> fname = get_test_fname( 'sequence.qualsolid' )
>>> QualityScoreSOLiD().sniff( fname )
True
"""
fh = file_prefix.string_io()
readlen = None
goodblock = 0
for line in fh:
line = line.strip()
if not line.startswith("#"): # first non-empty non-comment line
if line.startswith(">"):
line = fh.readline().strip()
if line == "" or line.startswith(">"):
return False
try:
[int(x) for x in line.split()]
if not readlen:
readlen = len(line.split())
assert len(line.split()) == readlen # SOLiD reads should be of the same length
except Exception:
return False
goodblock += 1
if goodblock > 10:
return True
else:
return False
return goodblock > 0
[docs] def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None:
if self.max_optional_metadata_filesize >= 0 and dataset.get_size() > self.max_optional_metadata_filesize:
dataset.metadata.data_lines = None
return
return QualityScore.set_meta(self, dataset, overwrite=overwrite, **kwd)
[docs]@build_sniff_from_prefix
class QualityScore454(QualityScore):
"""
until we know more about quality score formats
"""
edam_format = "format_3611"
file_ext = "qual454"
[docs] def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
"""
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname( 'sequence.fasta' )
>>> QualityScore454().sniff( fname )
False
>>> fname = get_test_fname( 'sequence.qual454' )
>>> QualityScore454().sniff( fname )
True
"""
fh = file_prefix.string_io()
for line in fh:
line = line.strip()
if line and not line.startswith("#"): # first non-empty non-comment line
if line.startswith(">"):
line = fh.readline().strip()
if line == "" or line.startswith(">"):
break
try:
[int(x) for x in line.split()]
except Exception:
return False
return True
else:
break # we found a non-empty line, but it's not a header
return False
[docs]class QualityScoreSolexa(QualityScore):
"""
until we know more about quality score formats
"""
edam_format = "format_3608"
file_ext = "qualsolexa"
[docs]class QualityScoreIllumina(QualityScore):
"""
until we know more about quality score formats
"""
edam_format = "format_3609"
file_ext = "qualillumina"