Warning

This document is for an in-development version of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.

Source code for galaxy.datatypes.triples

"""
Triple format classes
"""

import logging
import re

from galaxy.datatypes.metadata import MetadataElement
from galaxy.datatypes.protocols import DatasetProtocol
from galaxy.datatypes.sniff import (
    build_sniff_from_prefix,
    FilePrefix,
)
from . import (
    binary,
    data,
    text,
    xml,
)

log = logging.getLogger(__name__)

TURTLE_PREFIX_PATTERN = re.compile(r"@prefix\s+[^:]*:\s+<[^>]*>\s\.")
TURTLE_BASE_PATTERN = re.compile(r"@base\s+<[^>]*>\s\.")
SBOL_PATTERN = re.compile(r"http[s]?://[w\.]*sbol[s]?.org/v(\d{1})#")


[docs]class Triples(data.Data): """ The abstract base class for the file format that can contain triples """ edam_data = "data_0582" edam_format = "format_2376" file_ext = "triples"
[docs] def sniff(self, filename: str) -> bool: """ Returns false and the user must manually set. """ return False
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: """Set the peek and blurb text""" if not dataset.dataset.purged: dataset.peek = data.get_file_peek(dataset.get_file_name()) dataset.blurb = "Triple data" else: dataset.peek = "file does not exist" dataset.blurb = "file purged from disk"
[docs]@build_sniff_from_prefix class NTriples(data.Text, Triples): """ The N-Triples triple data format """ edam_format = "format_3256" file_ext = "nt"
[docs] def sniff_prefix(self, file_prefix: FilePrefix) -> bool: # <http://example.org/dir/relfile> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/type> . if re.compile(r"<[^>]*>\s<[^>]*>\s<[^>]*>\s\.").search(file_prefix.contents_header): return True return False
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: """Set the peek and blurb text""" if not dataset.dataset.purged: dataset.peek = data.get_file_peek(dataset.get_file_name()) dataset.blurb = "N-Triples triple data" else: dataset.peek = "file does not exist" dataset.blurb = "file purged from disk"
[docs]class N3(data.Text, Triples): """ The N3 triple data format """ edam_format = "format_3257" file_ext = "n3"
[docs] def sniff(self, filename: str) -> bool: """ Returns false and the user must manually set. """ return False
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: """Set the peek and blurb text""" if not dataset.dataset.purged: dataset.peek = data.get_file_peek(dataset.get_file_name()) dataset.blurb = "Notation-3 Triple data" else: dataset.peek = "file does not exist" dataset.blurb = "file purged from disk"
[docs]@build_sniff_from_prefix class Turtle(data.Text, Triples): """ The Turtle triple data format """ edam_format = "format_3255" file_ext = "ttl"
[docs] def sniff_prefix(self, file_prefix: FilePrefix) -> bool: # @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . if file_prefix.search(TURTLE_PREFIX_PATTERN): return True if file_prefix.search(TURTLE_BASE_PATTERN): return True return False
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: """Set the peek and blurb text""" if not dataset.dataset.purged: dataset.peek = data.get_file_peek(dataset.get_file_name()) dataset.blurb = "Turtle triple data" else: dataset.peek = "file does not exist" dataset.blurb = "file purged from disk"
# TODO: we might want to look at rdflib or a similar, larger lib/egg
[docs]@build_sniff_from_prefix class Rdf(xml.GenericXml, Triples): """ Resource Description Framework format (http://www.w3.org/RDF/). """ edam_format = "format_3261" file_ext = "rdf"
[docs] def sniff_prefix(self, file_prefix: FilePrefix) -> bool: # <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" ... match = re.compile(r'xmlns:([^=]*)="http://www.w3.org/1999/02/22-rdf-syntax-ns#"').search( file_prefix.contents_header ) if match and (f"{match.group(1)}:RDF") in file_prefix.contents_header: return True return False
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: """Set the peek and blurb text""" if not dataset.dataset.purged: dataset.peek = data.get_file_peek(dataset.get_file_name()) dataset.blurb = "RDF/XML triple data" else: dataset.peek = "file does not exist" dataset.blurb = "file purged from disk"
[docs]@build_sniff_from_prefix class Jsonld(text.Json, Triples): """ The JSON-LD data format """ # format not defined in edam so we use the json format number edam_format = "format_3464" file_ext = "jsonld"
[docs] def sniff_prefix(self, file_prefix: FilePrefix) -> bool: if self._looks_like_json(file_prefix): if '"@id"' in file_prefix.contents_header or '"@context"' in file_prefix.contents_header: return True return False
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: """Set the peek and blurb text""" if not dataset.dataset.purged: dataset.peek = data.get_file_peek(dataset.get_file_name()) dataset.blurb = "JSON-LD triple data" else: dataset.peek = "file does not exist" dataset.blurb = "file purged from disk"
[docs]class HDT(binary.Binary, Triples): """ The HDT triple data format """ edam_format = "format_2376" file_ext = "hdt"
[docs] def sniff(self, filename: str) -> bool: with open(filename, "rb") as f: if f.read(4) == b"$HDT": return True return False
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: """Set the peek and blurb text""" if not dataset.dataset.purged: dataset.peek = data.get_file_peek(dataset.get_file_name()) dataset.blurb = "HDT triple data" else: dataset.peek = "file does not exist" dataset.blurb = "file purged from disk"
[docs]@build_sniff_from_prefix class Sbol(data.Text, Triples): """ The SBOL data format (https://sbolstandard.org). """ MetadataElement(name="version", default="", readonly=True, visible=True, optional=True) edam_format = "format_3725" file_ext = "sbol"
[docs] def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None: file_prefix = FilePrefix(filename=dataset.get_file_name()) match = file_prefix.search(SBOL_PATTERN) if match and match.group(1): dataset.metadata.version = match.group(1)
[docs] def sniff_prefix(self, file_prefix: FilePrefix) -> bool: # http://sbols.org/v2# if file_prefix.search(SBOL_PATTERN): return True return False
[docs] def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: """Set the peek and blurb text""" if not dataset.dataset.purged: dataset.peek = data.get_file_peek(dataset.get_file_name()) msg = "SBOL data" if dataset.metadata.version != "": msg += " v" + dataset.metadata.version dataset.blurb = msg else: dataset.peek = "file does not exist" dataset.blurb = "file purged from disk"