Warning
This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.datatypes.triples
"""
Triple format classes
"""
import logging
import re
from galaxy.datatypes.metadata import MetadataElement
from galaxy.datatypes.protocols import DatasetProtocol
from galaxy.datatypes.sniff import (
build_sniff_from_prefix,
FilePrefix,
)
from . import (
binary,
data,
text,
xml,
)
log = logging.getLogger(__name__)
TURTLE_PREFIX_PATTERN = re.compile(r"@prefix\s+[^:]*:\s+<[^>]*>\s\.")
TURTLE_BASE_PATTERN = re.compile(r"@base\s+<[^>]*>\s\.")
SBOL_PATTERN = re.compile(r"http[s]?://[w\.]*sbol[s]?.org/v(\d{1})#")
[docs]class Triples(data.Data):
"""
The abstract base class for the file format that can contain triples
"""
edam_data = "data_0582"
edam_format = "format_2376"
file_ext = "triples"
[docs] def sniff(self, filename: str) -> bool:
"""
Returns false and the user must manually set.
"""
return False
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
"""Set the peek and blurb text"""
if not dataset.dataset.purged:
dataset.peek = data.get_file_peek(dataset.get_file_name())
dataset.blurb = "Triple data"
else:
dataset.peek = "file does not exist"
dataset.blurb = "file purged from disk"
[docs]@build_sniff_from_prefix
class NTriples(data.Text, Triples):
"""
The N-Triples triple data format
"""
edam_format = "format_3256"
file_ext = "nt"
[docs] def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
# <http://example.org/dir/relfile> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/type> .
if re.compile(r"<[^>]*>\s<[^>]*>\s<[^>]*>\s\.").search(file_prefix.contents_header):
return True
return False
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
"""Set the peek and blurb text"""
if not dataset.dataset.purged:
dataset.peek = data.get_file_peek(dataset.get_file_name())
dataset.blurb = "N-Triples triple data"
else:
dataset.peek = "file does not exist"
dataset.blurb = "file purged from disk"
[docs]class N3(data.Text, Triples):
"""
The N3 triple data format
"""
edam_format = "format_3257"
file_ext = "n3"
[docs] def sniff(self, filename: str) -> bool:
"""
Returns false and the user must manually set.
"""
return False
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
"""Set the peek and blurb text"""
if not dataset.dataset.purged:
dataset.peek = data.get_file_peek(dataset.get_file_name())
dataset.blurb = "Notation-3 Triple data"
else:
dataset.peek = "file does not exist"
dataset.blurb = "file purged from disk"
[docs]@build_sniff_from_prefix
class Turtle(data.Text, Triples):
"""
The Turtle triple data format
"""
edam_format = "format_3255"
file_ext = "ttl"
[docs] def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
# @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
if file_prefix.search(TURTLE_PREFIX_PATTERN):
return True
if file_prefix.search(TURTLE_BASE_PATTERN):
return True
return False
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
"""Set the peek and blurb text"""
if not dataset.dataset.purged:
dataset.peek = data.get_file_peek(dataset.get_file_name())
dataset.blurb = "Turtle triple data"
else:
dataset.peek = "file does not exist"
dataset.blurb = "file purged from disk"
# TODO: we might want to look at rdflib or a similar, larger lib/egg
[docs]@build_sniff_from_prefix
class Rdf(xml.GenericXml, Triples):
"""
Resource Description Framework format (http://www.w3.org/RDF/).
"""
edam_format = "format_3261"
file_ext = "rdf"
[docs] def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
# <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" ...
match = re.compile(r'xmlns:([^=]*)="http://www.w3.org/1999/02/22-rdf-syntax-ns#"').search(
file_prefix.contents_header
)
if match and (f"{match.group(1)}:RDF") in file_prefix.contents_header:
return True
return False
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
"""Set the peek and blurb text"""
if not dataset.dataset.purged:
dataset.peek = data.get_file_peek(dataset.get_file_name())
dataset.blurb = "RDF/XML triple data"
else:
dataset.peek = "file does not exist"
dataset.blurb = "file purged from disk"
[docs]@build_sniff_from_prefix
class Jsonld(text.Json, Triples):
"""
The JSON-LD data format
"""
# format not defined in edam so we use the json format number
edam_format = "format_3464"
file_ext = "jsonld"
[docs] def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
if self._looks_like_json(file_prefix):
if '"@id"' in file_prefix.contents_header or '"@context"' in file_prefix.contents_header:
return True
return False
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
"""Set the peek and blurb text"""
if not dataset.dataset.purged:
dataset.peek = data.get_file_peek(dataset.get_file_name())
dataset.blurb = "JSON-LD triple data"
else:
dataset.peek = "file does not exist"
dataset.blurb = "file purged from disk"
[docs]class HDT(binary.Binary, Triples):
"""
The HDT triple data format
"""
edam_format = "format_2376"
file_ext = "hdt"
[docs] def sniff(self, filename: str) -> bool:
with open(filename, "rb") as f:
if f.read(4) == b"$HDT":
return True
return False
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
"""Set the peek and blurb text"""
if not dataset.dataset.purged:
dataset.peek = data.get_file_peek(dataset.get_file_name())
dataset.blurb = "HDT triple data"
else:
dataset.peek = "file does not exist"
dataset.blurb = "file purged from disk"
[docs]@build_sniff_from_prefix
class Sbol(data.Text, Triples):
"""
The SBOL data format (https://sbolstandard.org).
"""
MetadataElement(name="version", default="", readonly=True, visible=True, optional=True)
edam_format = "format_3725"
file_ext = "sbol"
[docs] def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None:
file_prefix = FilePrefix(filename=dataset.get_file_name())
match = file_prefix.search(SBOL_PATTERN)
if match and match.group(1):
dataset.metadata.version = match.group(1)
[docs] def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
# http://sbols.org/v2#
if file_prefix.search(SBOL_PATTERN):
return True
return False
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
"""Set the peek and blurb text"""
if not dataset.dataset.purged:
dataset.peek = data.get_file_peek(dataset.get_file_name())
msg = "SBOL data"
if dataset.metadata.version != "":
msg += " v" + dataset.metadata.version
dataset.blurb = msg
else:
dataset.peek = "file does not exist"
dataset.blurb = "file purged from disk"