Warning
This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.datatypes.anvio
"""
Datatypes for Anvi'o
https://github.com/merenlab/anvio
"""
import glob
import logging
import os
from typing import Optional
from galaxy.datatypes.metadata import MetadataElement
from galaxy.datatypes.protocols import (
DatasetProtocol,
HasExtraFilesAndMetadata,
)
from galaxy.datatypes.text import Html
log = logging.getLogger(__name__)
[docs]class AnvioComposite(Html):
"""
Base class to use for Anvi'o composite datatypes.
Generally consist of a sqlite database, plus optional additional files
"""
file_ext = "anvio_composite"
composite_type = "auto_primary_file"
[docs] def generate_primary_file(self, dataset: HasExtraFilesAndMetadata) -> str:
"""
This is called only at upload to write the html file
cannot rename the datasets here - they come with the default unfortunately
"""
defined_files = self.get_composite_files(dataset=dataset).items()
rval = [f"<html><head><title>Files for Anvi'o Composite Dataset ({self.file_ext})</title></head>"]
if defined_files:
rval.append("<p/>This composite dataset is composed of the following defined files:<p/><ul>")
for composite_name, composite_file in defined_files:
opt_text = ""
if composite_file.optional:
opt_text = " (optional)"
missing_text = ""
if not os.path.exists(os.path.join(dataset.extra_files_path, composite_name)):
missing_text = " (missing)"
rval.append(f'<li><a href="{composite_name}">{composite_name}</a>{opt_text}{missing_text}</li>')
rval.append("</ul>")
defined_files = (x[0] for x in defined_files)
extra_files = []
for dirpath, _dirnames, filenames in os.walk(dataset.extra_files_path, followlinks=True):
for filename in filenames:
rel_path = os.path.relpath(os.path.join(dirpath, filename), dataset.extra_files_path)
if rel_path not in defined_files:
extra_files.append(rel_path)
if extra_files:
rval.append("<p/>This composite dataset contains these undefined files:<p/><ul>")
for rel_path in extra_files:
rval.append(f'<li><a href="{rel_path}">{rel_path}</a></li>')
rval.append("</ul>")
if not (defined_files or extra_files):
rval.append("<p/>This composite dataset does not contain any files!<p/><ul>")
rval.append("</html>")
return "\n".join(rval)
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
"""Set the peek and blurb text"""
if not dataset.dataset.purged:
dataset.peek = "Anvio database (multiple files)"
dataset.blurb = "Anvio database (multiple files)"
else:
dataset.peek = "file does not exist"
dataset.blurb = "file purged from disk"
[docs] def display_peek(self, dataset: DatasetProtocol) -> str:
"""Create HTML content, used for displaying peek."""
try:
return dataset.peek
except Exception:
return "Anvio database (multiple files)"
[docs]class AnvioDB(AnvioComposite):
"""Class for AnvioDB database files."""
_anvio_basename: Optional[str] = None
MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True)
file_ext = "anvio_db"
[docs] def __init__(self, *args, **kwd):
super().__init__(*args, **kwd)
if self._anvio_basename is not None:
self.add_composite_file(self._anvio_basename, is_binary=True, optional=False)
[docs] def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None:
"""
Set the anvio_basename based upon actual extra_files_path contents.
"""
super().set_meta(dataset, overwrite=overwrite, **kwd)
if dataset.metadata.anvio_basename is not None and os.path.exists(
os.path.join(dataset.extra_files_path, dataset.metadata.anvio_basename)
):
return
found = False
for basename in [dataset.metadata.anvio_basename, self._anvio_basename]:
if found:
break
if basename is not None and not os.path.exists(os.path.join(dataset.extra_files_path, basename)):
for name in glob.glob(os.path.join(dataset.extra_files_path, f"*{basename}")):
dataset.metadata.anvio_basename = os.path.basename(name)
found = True
break
[docs]class AnvioStructureDB(AnvioDB):
"""Class for Anvio Structure DB database files."""
_anvio_basename = "STRUCTURE.db"
MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True)
file_ext = "anvio_structure_db"
[docs]class AnvioGenomesDB(AnvioDB):
"""Class for Anvio Genomes DB database files."""
_anvio_basename = "-GENOMES.db"
MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True)
file_ext = "anvio_genomes_db"
[docs]class AnvioContigsDB(AnvioDB):
"""Class for Anvio Contigs DB database files."""
_anvio_basename = "CONTIGS.db"
MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True)
file_ext = "anvio_contigs_db"
[docs] def __init__(self, *args, **kwd):
super().__init__(*args, **kwd)
self.add_composite_file("CONTIGS.h5", is_binary=True, optional=True)
[docs]class AnvioProfileDB(AnvioDB):
"""Class for Anvio Profile DB database files."""
_anvio_basename = "PROFILE.db"
MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True)
file_ext = "anvio_profile_db"
[docs] def __init__(self, *args, **kwd):
super().__init__(*args, **kwd)
self.add_composite_file("RUNINFO.cp", is_binary=True, optional=True)
self.add_composite_file("RUNINFO.mcp", is_binary=True, optional=True)
self.add_composite_file("AUXILIARY_DATA.db", is_binary=True, optional=True)
self.add_composite_file("RUNLOG.txt", is_binary=False, optional=True)
[docs]class AnvioPanDB(AnvioDB):
"""Class for Anvio Pan DB database files."""
_anvio_basename = "PAN.db"
MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True)
file_ext = "anvio_pan_db"
[docs]class AnvioSamplesDB(AnvioDB):
"""Class for Anvio Samples DB database files."""
_anvio_basename = "SAMPLES.db"
MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True)
file_ext = "anvio_samples_db"