# TODO: revisit ignoring type and write some tests for this, the multi-inheritance in this
# this file is challenging, it should be broken into true mixins.
"""
Constructive Solid Geometry file formats.
"""
import abc
import logging
import re
from typing import (
List,
Optional,
Tuple,
TYPE_CHECKING,
)
from galaxy import util
from galaxy.datatypes import data
from galaxy.datatypes.binary import Binary
from galaxy.datatypes.data import (
get_file_peek,
nice_size,
)
from galaxy.datatypes.metadata import MetadataElement
from galaxy.datatypes.protocols import (
DatasetProtocol,
HasMetadata,
)
from galaxy.datatypes.sniff import (
build_sniff_from_prefix,
FilePrefix,
)
from galaxy.datatypes.tabular import Tabular
from galaxy.datatypes.xml import GenericXml
if TYPE_CHECKING:
from io import TextIOBase
MAX_HEADER_LINES = 500
MAX_LINE_LEN = 2000
COLOR_OPTS = ["COLOR_SCALARS", "red", "green", "blue"]
log = logging.getLogger(__name__)
[docs]
@build_sniff_from_prefix
class Ply:
"""
The PLY format describes an object as a collection of vertices,
faces and other elements, along with properties such as color and
normal direction that can be attached to these elements. A PLY
file contains the description of exactly one object.
"""
subtype = ""
# Add metadata elements.
MetadataElement(name="file_format", default=None, desc="File format", readonly=True, optional=True, visible=True)
MetadataElement(name="vertex", default=None, desc="Vertex", readonly=True, optional=True, visible=True)
MetadataElement(name="face", default=None, desc="Face", readonly=True, optional=True, visible=True)
MetadataElement(
name="other_elements",
default=[],
desc="Other elements",
readonly=True,
optional=True,
visible=True,
no_value=[],
)
[docs]
@abc.abstractmethod
def __init__(self, **kwd):
raise NotImplementedError
[docs]
def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
"""
The structure of a typical PLY file:
Header, Vertex List, Face List, (lists of other elements)
"""
if not self._is_ply_header(file_prefix.text_io(errors="ignore"), self.subtype):
return False
return True
def _is_ply_header(self, fh: "TextIOBase", subtype: str) -> bool:
"""
The header is a series of carriage-return terminated lines of
text that describe the remainder of the file.
"""
valid_header_items = ["comment", "obj_info", "element", "property"]
# Line 1: ply
line = get_next_line(fh)
if line != "ply":
return False
# Line 2: format ascii 1.0
line = get_next_line(fh)
if line.find(subtype) < 0:
return False
stop_index = 0
for line in util.iter_start_of_line(fh, MAX_LINE_LEN):
line = line.strip()
stop_index += 1
if line == "end_header":
return True
items = line.split()
if items[0] not in valid_header_items:
return False
if stop_index > MAX_HEADER_LINES:
# If this is a PLY file, there must be an unusually
# large number of comments.
break
return False
[docs]
def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
if not dataset.dataset.purged:
dataset.peek = get_file_peek(dataset.get_file_name())
dataset.blurb = f"Faces: {str(dataset.metadata.face)}, Vertices: {str(dataset.metadata.vertex)}"
else:
dataset.peek = "File does not exist"
dataset.blurb = "File purged from disc"
[docs]
def display_peek(self, dataset: DatasetProtocol) -> str:
try:
return dataset.peek
except Exception:
return f"Ply file ({nice_size(dataset.get_size())})"
[docs]
class PlyAscii(Ply, data.Text):
"""
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname('test.plyascii')
>>> PlyAscii().sniff(fname)
True
>>> fname = get_test_fname('test.vtkascii')
>>> PlyAscii().sniff(fname)
False
"""
file_ext = "plyascii"
subtype = "ascii"
[docs]
def __init__(self, **kwd):
data.Text.__init__(self, **kwd)
[docs]
class PlyBinary(Ply, Binary):
file_ext = "plybinary"
subtype = "binary"
[docs]
def __init__(self, **kwd):
Binary.__init__(self, **kwd)
[docs]
@build_sniff_from_prefix
class Vtk:
r"""
The Visualization Toolkit provides a number of source and writer objects to
read and write popular data file formats. The Visualization Toolkit also
provides some of its own file formats.
There are two different styles of file formats available in VTK. The simplest
are the legacy, serial formats that are easy to read and write either by hand
or programmatically. However, these formats are less flexible than the XML
based file formats which support random access, parallel I/O, and portable
data compression and are preferred to the serial VTK file formats whenever
possible.
All keyword phrases are written in ASCII form whether the file is binary or
ASCII. The binary section of the file (if in binary form) is the data proper;
i.e., the numbers that define points coordinates, scalars, cell indices, and
so forth.
Binary data must be placed into the file immediately after the newline
('\\n') character from the previous ASCII keyword and parameter sequence.
"""
subtype = ""
# Add metadata elements.
MetadataElement(name="vtk_version", default=None, desc="Vtk version", readonly=True, optional=True, visible=True)
MetadataElement(name="file_format", default=None, desc="File format", readonly=True, optional=True, visible=True)
MetadataElement(name="dataset_type", default=None, desc="Dataset type", readonly=True, optional=True, visible=True)
# STRUCTURED_GRID data_type.
MetadataElement(
name="dimensions", default=[], desc="Dimensions", readonly=True, optional=True, visible=True, no_value=[]
)
MetadataElement(name="origin", default=[], desc="Origin", readonly=True, optional=True, visible=True, no_value=[])
MetadataElement(name="spacing", default=[], desc="Spacing", readonly=True, optional=True, visible=True, no_value=[])
# POLYDATA data_type (Points element is also a component of UNSTRUCTURED_GRID..
MetadataElement(name="points", default=None, desc="Points", readonly=True, optional=True, visible=True)
MetadataElement(name="vertices", default=None, desc="Vertices", readonly=True, optional=True, visible=True)
MetadataElement(name="lines", default=None, desc="Lines", readonly=True, optional=True, visible=True)
MetadataElement(name="polygons", default=None, desc="Polygons", readonly=True, optional=True, visible=True)
MetadataElement(
name="triangle_strips", default=None, desc="Triangle strips", readonly=True, optional=True, visible=True
)
# UNSTRUCTURED_GRID data_type.
MetadataElement(name="cells", default=None, desc="Cells", readonly=True, optional=True, visible=True)
# Additional elements not categorized by data_type.
MetadataElement(
name="field_names", default=[], desc="Field names", readonly=True, optional=True, visible=True, no_value=[]
)
# The keys in the field_components map to the list of field_names in the above element
# which ensures order for select list options that are built from it.
MetadataElement(
name="field_components",
default={},
desc="Field names and components",
readonly=True,
optional=True,
visible=True,
no_value={},
)
[docs]
@abc.abstractmethod
def __init__(self, **kwd):
raise NotImplementedError
[docs]
def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
"""
VTK files can be either ASCII or binary, with two different
styles of file formats: legacy or XML. We'll assume if the
file contains a valid VTK header, then it is a valid VTK file.
"""
if self._is_vtk_header(file_prefix.text_io(errors="ignore"), self.subtype):
return True
return False
def _is_vtk_header(self, fh: "TextIOBase", subtype: str) -> bool:
"""
The Header section consists of at least 4, but possibly
5 lines. This is tricky because sometimes the 4th line
is blank (in which case the 5th line consists of the
data_kind) or the 4th line consists of the data_kind (in
which case the 5th line is blank).
"""
data_kinds = ["STRUCTURED_GRID", "POLYDATA", "UNSTRUCTURED_GRID", "STRUCTURED_POINTS", "RECTILINEAR_GRID"]
def check_data_kind(line):
for data_kind in data_kinds:
if line.find(data_kind) >= 0:
return True
return False
# Line 1: vtk DataFile Version 3.0
line = get_next_line(fh)
if line.find("vtk") < 0:
return False
# Line 2: can be anything - skip it
line = get_next_line(fh)
# Line 3: ASCII or BINARY
line = get_next_line(fh)
if line.find(subtype) < 0:
return False
# Line 4:
line = get_next_line(fh)
if line:
return check_data_kind(line)
# line 5:
line = get_next_line(fh)
if line:
return check_data_kind(line)
return False
[docs]
def get_blurb(self, dataset: HasMetadata) -> str:
blurb = ""
if dataset.metadata.vtk_version is not None:
blurb += f"VTK Version {str(dataset.metadata.vtk_version)}"
if dataset.metadata.dataset_type is not None:
if blurb:
blurb += " "
blurb += str(dataset.metadata.dataset_type)
return blurb or "VTK data"
[docs]
def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
if not dataset.dataset.purged:
dataset.peek = get_file_peek(dataset.get_file_name())
dataset.blurb = self.get_blurb(dataset)
else:
dataset.peek = "File does not exist"
dataset.blurb = "File purged from disc"
[docs]
def display_peek(self, dataset: DatasetProtocol) -> str:
try:
return dataset.peek
except Exception:
return f"Vtk file ({nice_size(dataset.get_size())})"
[docs]
class VtkAscii(Vtk, data.Text):
"""
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname('test.vtkascii')
>>> VtkAscii().sniff(fname)
True
>>> fname = get_test_fname('test.vtkbinary')
>>> VtkAscii().sniff(fname)
False
"""
file_ext = "vtkascii"
subtype = "ASCII"
[docs]
def __init__(self, **kwd):
data.Text.__init__(self, **kwd)
[docs]
class VtkBinary(Vtk, Binary):
"""
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname('test.vtkbinary')
>>> VtkBinary().sniff(fname)
True
>>> fname = get_test_fname('test.vtkascii')
>>> VtkBinary().sniff(fname)
False
"""
file_ext = "vtkbinary"
subtype = "BINARY"
[docs]
def __init__(self, **kwd):
Binary.__init__(self, **kwd)
[docs]
class STL(data.Data):
file_ext = "stl"
[docs]
@build_sniff_from_prefix
class NeperTess(data.Text):
"""
Neper Tessellation File
Example::
***tess
**format
format
**general
dim type
**cell
number_of_cells
"""
file_ext = "neper.tess"
MetadataElement(name="format", default=None, desc="format", readonly=True, visible=True)
MetadataElement(name="dimension", default=None, desc="dimension", readonly=True, visible=True)
MetadataElement(name="cells", default=None, desc="cells", readonly=True, visible=True)
[docs]
def __init__(self, **kwd):
data.Text.__init__(self, **kwd)
[docs]
def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
"""
Neper tess format, starts with ``***tess``
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname('test.neper.tess')
>>> NeperTess().sniff(fname)
True
>>> fname = get_test_fname('test.neper.tesr')
>>> NeperTess().sniff(fname)
False
"""
return file_prefix.text_io(errors="ignore").readline(10).startswith("***tess")
[docs]
def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
if not dataset.dataset.purged:
dataset.peek = get_file_peek(dataset.get_file_name(), line_count=7)
dataset.blurb = f"format: {str(dataset.metadata.format)} dim: {str(dataset.metadata.dimension)} cells: {str(dataset.metadata.cells)}"
else:
dataset.peek = "File does not exist"
dataset.blurb = "File purged from disc"
[docs]
@build_sniff_from_prefix
class NeperTesr(Binary):
"""
Neper Raster Tessellation File
Example::
***tesr
**format
format
**general
dimension
size_x size_y [size_z]
voxsize_x voxsize_y [voxsize_z]
[*origin
origin_x origin_y [origin_z]]
[*hasvoid has_void]
[**cell
number_of_cells
"""
file_ext = "neper.tesr"
MetadataElement(name="format", default=None, desc="format", readonly=True, visible=True)
MetadataElement(name="dimension", default=None, desc="dimension", readonly=True, visible=True)
MetadataElement(name="size", default=[], desc="size", readonly=True, visible=True)
MetadataElement(name="voxsize", default=[], desc="voxsize", readonly=True, visible=True)
MetadataElement(name="origin", default=[], desc="origin", readonly=True, visible=True)
MetadataElement(name="cells", default=None, desc="cells", readonly=True, visible=True)
[docs]
def __init__(self, **kwd):
Binary.__init__(self, **kwd)
[docs]
def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
"""
Neper tesr format, starts with ``***tesr``
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname('test.neper.tesr')
>>> NeperTesr().sniff(fname)
True
>>> fname = get_test_fname('test.neper.tess')
>>> NeperTesr().sniff(fname)
False
"""
return file_prefix.text_io(errors="ignore").readline(10).startswith("***tesr")
[docs]
def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
if not dataset.dataset.purged:
dataset.peek = get_file_peek(dataset.get_file_name(), line_count=9)
dataset.blurb = f"format: {str(dataset.metadata.format)} dim: {str(dataset.metadata.dimension)} cells: {str(dataset.metadata.cells)}"
else:
dataset.peek = "File does not exist"
dataset.blurb = "File purged from disc"
[docs]
class NeperPoints(data.Text):
"""
Neper Position File
Neper position format has 1 - 3 floats per line separated by white space.
"""
file_ext = "neper.points"
MetadataElement(name="dimension", default=None, desc="dimension", readonly=True, visible=True)
[docs]
def __init__(self, **kwd):
data.Text.__init__(self, **kwd)
def _get_dimension(self, fh: "TextIOBase", maxlines: int = 100, sep: Optional[str] = None) -> Optional[float]:
dim = None
try:
for i, line in enumerate(fh):
if not line:
break
pts = len([float(x) for x in line.strip().split(sep=sep)])
if dim is not None and pts != dim:
return None
elif 1 <= pts <= 3:
dim = pts
else:
return None
if i > maxlines:
break
except Exception:
return None
return dim
[docs]
def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
data.Text.set_peek(self, dataset)
if not dataset.dataset.purged:
dataset.blurb += f" dim: {str(dataset.metadata.dimension)}"
[docs]
class NeperPointsTabular(NeperPoints, Tabular):
"""
Neper Position File
Neper position format has 1 - 3 floats per line separated by TABs.
"""
file_ext = "neper.points.tsv"
[docs]
def __init__(self, **kwd):
Tabular.__init__(self, **kwd)
[docs]
def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
Tabular.set_peek(self, dataset)
if not dataset.dataset.purged:
dataset.blurb += f" dim: {str(dataset.metadata.dimension)}"
[docs]
class NeperMultiScaleCell(data.Text):
"""
Neper Multiscale Cell File
"""
file_ext = "neper.mscell"
[docs]
@build_sniff_from_prefix
class GmshMsh(Binary):
"""Gmsh Mesh File"""
file_ext = "gmsh.msh"
is_binary = "maybe"
MetadataElement(name="version", default=None, desc="version", readonly=True, visible=True)
MetadataElement(name="format", default=None, desc="format", readonly=True, visible=True)
[docs]
def __init__(self, **kwd):
Binary.__init__(self, **kwd)
[docs]
def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
"""
Gmsh msh format, starts with ``$MeshFormat``
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname('test.gmsh.msh')
>>> GmshMsh().sniff(fname)
True
>>> fname = get_test_fname('test.neper.tesr')
>>> GmshMsh().sniff(fname)
False
"""
return file_prefix.text_io(errors="ignore").readline().startswith("$MeshFormat")
[docs]
def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
if not dataset.dataset.purged:
dataset.peek = get_file_peek(dataset.get_file_name(), line_count=3)
dataset.blurb = f"Gmsh verion: {str(dataset.metadata.version)} {str(dataset.metadata.format)}"
else:
dataset.peek = "File does not exist"
dataset.blurb = "File purged from disc"
[docs]
class GmshGeo(data.Text):
"""Gmsh geometry File"""
file_ext = "gmsh.geo"
[docs]
class ZsetGeof(data.Text):
"""
Z-set geof File
"""
file_ext = "zset.geof"
# Utility functions
[docs]
def get_next_line(fh):
line = fh.readline(MAX_LINE_LEN)
if not line.endswith("\n"):
# Discard the rest of the line
fh.readline()
return line.strip()
[docs]
class VtkXml(GenericXml):
"""Format for defining VTK (XML based) and its sub-datatypes. https://docs.vtk.org/en/latest/design_documents/VTKFileFormats.html"""
edam_format = "edam:format_2332"
file_ext = "vtkxml"
# The same MetadataElements are also available for legacy VTK datatypes.
MetadataElement(name="vtk_version", default=None, desc="Vtk version", readonly=True, optional=True, visible=True)
MetadataElement(name="file_format", default=None, desc="File format", readonly=True, optional=True, visible=True)
MetadataElement(name="dataset_type", default=None, desc="Dataset type", readonly=True, optional=True, visible=True)
[docs]
def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
"""Set the peek and blurb text for VTK dataset files."""
if not dataset.dataset.purged:
dataset.peek = "VTK Dataset file"
dataset.blurb = f"type {dataset.metadata.dataset_type} version {dataset.metadata.vtk_version}"
else:
dataset.peek = "File does not exist"
dataset.blurb = "File purged from disk"
[docs]
def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
"""Check for the key string 'VTKFile' to determine if this is a VTK dataset file.
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname('data.vtu')
>>> VtkXml().sniff(fname)
True
>>> fname = get_test_fname('1.phyloxml')
>>> VtkXml().sniff(fname)
False
"""
return self._has_root_element_in_prefix(file_prefix, "VTKFile")
[docs]
@build_sniff_from_prefix
class Vtp:
"""
A VTP file is a Visualization Toolkit (VTK) file format that specifically stores polygonal data
(surface meshes) in a hierarchical, XML-based format. It's designed to efficiently represent and
communicate 3D geometric models with associated data attributes.
"""
subtype = ""
# Add metadata elements (sorted alphabetically by name).
MetadataElement(name="lines", default=0, desc="Number of lines", readonly=True, optional=True, visible=True)
MetadataElement(name="points", default=0, desc="Number of points", readonly=True, optional=True, visible=True)
MetadataElement(name="polys", default=0, desc="Number of polygons", readonly=True, optional=True, visible=True)
MetadataElement(
name="strips", default=0, desc="Number of triangle strips", readonly=True, optional=True, visible=True
)
MetadataElement(
name="version", default=None, desc="VTK file format version", readonly=True, optional=True, visible=True
)
MetadataElement(name="verts", default=0, desc="Number of vertices", readonly=True, optional=True, visible=True)
[docs]
@abc.abstractmethod
def __init__(self, **kwd):
raise NotImplementedError
[docs]
def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
"""
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname('test.plyascii')
>>> VtpAscii().sniff(fname)
False
>>> fname = get_test_fname('test.vtpascii')
>>> VtpAscii().sniff(fname)
True
"""
return self._is_vtp_header(file_prefix.text_io(errors="ignore"), self.subtype)
def _is_vtp_header(self, fh: "TextIOBase", subtype: str) -> bool:
line = get_next_line(fh)
if not line.startswith("<VTKFile") or 'type="PolyData"' not in line:
return False
found_polydata = False
found_format = False
found_offset = False
for stop_index, line in enumerate(util.iter_start_of_line(fh, MAX_LINE_LEN)):
line = line.strip()
if "<PolyData" in line:
found_polydata = True
if f'format="{subtype}"' in line:
found_format = True
if "offset=" in line:
found_offset = True
if "</VTKFile>" in line or stop_index > MAX_HEADER_LINES:
break
if subtype == "appended":
return found_polydata and found_format and found_offset
elif subtype == "ascii":
return found_polydata and found_format
return False
def _parse_attrs(self, line: str) -> dict:
"""Parse key="value" attributes from an XML tag line."""
attrs = {}
for part in line.split():
if "=" in part:
try:
key, val = part.split("=", 1)
val = val.strip().strip('"').rstrip('">')
attrs[key] = val
except Exception:
continue
return attrs
[docs]
def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
if not dataset.dataset.purged:
parts = []
if dataset.metadata.points:
parts.append(f"Points: {dataset.metadata.points}")
if dataset.metadata.lines:
parts.append(f"Lines: {dataset.metadata.lines}")
if dataset.metadata.polys:
parts.append(f"Polygons: {dataset.metadata.polys}")
if dataset.metadata.strips:
parts.append(f"Strips: {dataset.metadata.strips}")
if dataset.metadata.verts:
parts.append(f"Vertices: {dataset.metadata.verts}")
if dataset.metadata.version:
parts.append(f"Version: {dataset.metadata.version}")
dataset.peek = "VTP Dataset file"
dataset.blurb = ", ".join(parts) if parts else "VTP Dataset file (empty metadata)"
else:
dataset.peek = "File does not exist"
dataset.blurb = "File purged from disc"
[docs]
def display_peek(self, dataset: DatasetProtocol) -> str:
try:
return dataset.peek
except Exception:
return f"Vtp file ({nice_size(dataset.get_size())})"
[docs]
class VtpBinary(Vtp, Binary):
file_ext = "vtpbinary"
subtype = "appended"
[docs]
def __init__(self, **kwd):
Binary.__init__(self, **kwd)
[docs]
class VtpAscii(Vtp, data.Text):
file_ext = "vtpascii"
subtype = "ascii"
[docs]
def __init__(self, **kwd):
data.Text.__init__(self, **kwd)