Warning

This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.datatypes.anvio

"""
Datatypes for Anvi'o
https://github.com/merenlab/anvio
"""

import glob
import logging
import os
from typing import Optional

from galaxy.datatypes.metadata import MetadataElement
from galaxy.datatypes.protocols import (
    DatasetProtocol,
    HasExtraFilesAndMetadata,
)
from galaxy.datatypes.text import Html

log = logging.getLogger(__name__)


[docs]class AnvioComposite(Html): """ Base class to use for Anvi'o composite datatypes. Generally consist of a sqlite database, plus optional additional files """ file_ext = "anvio_composite" composite_type = "auto_primary_file"
[docs] def generate_primary_file(self, dataset: HasExtraFilesAndMetadata) -> str: """ This is called only at upload to write the html file cannot rename the datasets here - they come with the default unfortunately """ defined_files = self.get_composite_files(dataset=dataset).items() rval = [f"<html><head><title>Files for Anvi'o Composite Dataset ({self.file_ext})</title></head>"] if defined_files: rval.append("<p/>This composite dataset is composed of the following defined files:<p/><ul>") for composite_name, composite_file in defined_files: opt_text = "" if composite_file.optional: opt_text = " (optional)" missing_text = "" if not os.path.exists(os.path.join(dataset.extra_files_path, composite_name)): missing_text = " (missing)" rval.append(f'<li><a href="{composite_name}">{composite_name}</a>{opt_text}{missing_text}</li>') rval.append("</ul>") defined_files = map(lambda x: x[0], defined_files) extra_files = [] for dirpath, _dirnames, filenames in os.walk(dataset.extra_files_path, followlinks=True): for filename in filenames: rel_path = os.path.relpath(os.path.join(dirpath, filename), dataset.extra_files_path) if rel_path not in defined_files: extra_files.append(rel_path) if extra_files: rval.append("<p/>This composite dataset contains these undefined files:<p/><ul>") for rel_path in extra_files: rval.append(f'<li><a href="{rel_path}">{rel_path}</a></li>') rval.append("</ul>") if not (defined_files or extra_files): rval.append("<p/>This composite dataset does not contain any files!<p/><ul>") rval.append("</html>") return "\n".join(rval)
[docs] def get_mime(self) -> str: """Returns the mime type of the datatype""" return "text/html"
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: """Set the peek and blurb text""" if not dataset.dataset.purged: dataset.peek = "Anvio database (multiple files)" dataset.blurb = "Anvio database (multiple files)" else: dataset.peek = "file does not exist" dataset.blurb = "file purged from disk"
[docs] def display_peek(self, dataset: DatasetProtocol) -> str: """Create HTML content, used for displaying peek.""" try: return dataset.peek except Exception: return "Anvio database (multiple files)"
[docs]class AnvioDB(AnvioComposite): """Class for AnvioDB database files.""" _anvio_basename: Optional[str] = None MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True) file_ext = "anvio_db"
[docs] def __init__(self, *args, **kwd): super().__init__(*args, **kwd) if self._anvio_basename is not None: self.add_composite_file(self._anvio_basename, is_binary=True, optional=False)
[docs] def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None: """ Set the anvio_basename based upon actual extra_files_path contents. """ super().set_meta(dataset, overwrite=overwrite, **kwd) if dataset.metadata.anvio_basename is not None and os.path.exists( os.path.join(dataset.extra_files_path, dataset.metadata.anvio_basename) ): return found = False for basename in [dataset.metadata.anvio_basename, self._anvio_basename]: if found: break if basename is not None and not os.path.exists(os.path.join(dataset.extra_files_path, basename)): for name in glob.glob(os.path.join(dataset.extra_files_path, f"*{basename}")): dataset.metadata.anvio_basename = os.path.basename(name) found = True break
[docs]class AnvioStructureDB(AnvioDB): """Class for Anvio Structure DB database files.""" _anvio_basename = "STRUCTURE.db" MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True) file_ext = "anvio_structure_db"
[docs]class AnvioGenomesDB(AnvioDB): """Class for Anvio Genomes DB database files.""" _anvio_basename = "-GENOMES.db" MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True) file_ext = "anvio_genomes_db"
[docs]class AnvioContigsDB(AnvioDB): """Class for Anvio Contigs DB database files.""" _anvio_basename = "CONTIGS.db" MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True) file_ext = "anvio_contigs_db"
[docs] def __init__(self, *args, **kwd): super().__init__(*args, **kwd) self.add_composite_file("CONTIGS.h5", is_binary=True, optional=True)
[docs]class AnvioProfileDB(AnvioDB): """Class for Anvio Profile DB database files.""" _anvio_basename = "PROFILE.db" MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True) file_ext = "anvio_profile_db"
[docs] def __init__(self, *args, **kwd): super().__init__(*args, **kwd) self.add_composite_file("RUNINFO.cp", is_binary=True, optional=True) self.add_composite_file("RUNINFO.mcp", is_binary=True, optional=True) self.add_composite_file("AUXILIARY_DATA.db", is_binary=True, optional=True) self.add_composite_file("RUNLOG.txt", is_binary=False, optional=True)
[docs]class AnvioPanDB(AnvioDB): """Class for Anvio Pan DB database files.""" _anvio_basename = "PAN.db" MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True) file_ext = "anvio_pan_db"
[docs]class AnvioSamplesDB(AnvioDB): """Class for Anvio Samples DB database files.""" _anvio_basename = "SAMPLES.db" MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True) file_ext = "anvio_samples_db"