Warning
This document is for an old release of Galaxy. You can alternatively view this page in the latest release if it exists or view the top of the latest release's documentation.
Source code for galaxy.datatypes.triples
"""
Triple format classes
"""
import logging
import re
from . import (
binary,
data,
text,
xml
)
log = logging.getLogger(__name__)
[docs]class Triples(data.Data):
"""
The abstract base class for the file format that can contain triples
"""
edam_data = "data_0582"
edam_format = "format_2376"
file_ext = "triples"
[docs] def sniff(self, filename):
"""
Returns false and the user must manually set.
"""
return False
[docs] def set_peek(self, dataset, is_multi_byte=False):
"""Set the peek and blurb text"""
if not dataset.dataset.purged:
dataset.peek = data.get_file_peek(dataset.file_name)
dataset.blurb = 'Triple data'
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disk'
[docs]class NTriples(data.Text, Triples):
"""
The N-Triples triple data format
"""
edam_format = "format_3256"
file_ext = "nt"
[docs] def sniff(self, filename):
with open(filename, "r") as f:
# <http://example.org/dir/relfile> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/type> .
if re.compile(r'<[^>]*>\s<[^>]*>\s<[^>]*>\s\.').search(f.readline(1024)):
return True
return False
[docs] def set_peek(self, dataset, is_multi_byte=False):
"""Set the peek and blurb text"""
if not dataset.dataset.purged:
dataset.peek = data.get_file_peek(dataset.file_name)
dataset.blurb = 'N-Triples triple data'
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disk'
[docs]class N3(data.Text, Triples):
"""
The N3 triple data format
"""
edam_format = "format_3257"
file_ext = "n3"
[docs] def sniff(self, filename):
"""
Returns false and the user must manually set.
"""
return False
[docs] def set_peek(self, dataset, is_multi_byte=False):
"""Set the peek and blurb text"""
if not dataset.dataset.purged:
dataset.peek = data.get_file_peek(dataset.file_name)
dataset.blurb = 'Notation-3 Triple data'
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disk'
[docs]class Turtle(data.Text, Triples):
"""
The Turtle triple data format
"""
edam_format = "format_3255"
file_ext = "ttl"
[docs] def sniff(self, filename):
with open(filename, "r") as f:
# @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
line = f.readline(1024)
if re.compile(r'@prefix\s+[^:]*:\s+<[^>]*>\s\.').search(line):
return True
if re.compile(r'@base\s+<[^>]*>\s\.').search(line):
return True
return False
[docs] def set_peek(self, dataset, is_multi_byte=False):
"""Set the peek and blurb text"""
if not dataset.dataset.purged:
dataset.peek = data.get_file_peek(dataset.file_name)
dataset.blurb = 'Turtle triple data'
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disk'
# TODO: we might want to look at rdflib or a similar, larger lib/egg
[docs]class Rdf(xml.GenericXml, Triples):
"""
Resource Description Framework format (http://www.w3.org/RDF/).
"""
edam_format = "format_3261"
file_ext = "rdf"
[docs] def sniff(self, filename):
with open(filename, "r") as f:
firstlines = "".join(f.readlines(5000))
# <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" ...
match = re.compile(r'xmlns:([^=]*)="http://www.w3.org/1999/02/22-rdf-syntax-ns#"').search(firstlines)
if not match and (match.group(1) + ":RDF") in firstlines:
return True
return False
[docs] def set_peek(self, dataset, is_multi_byte=False):
"""Set the peek and blurb text"""
if not dataset.dataset.purged:
dataset.peek = data.get_file_peek(dataset.file_name)
dataset.blurb = 'RDF/XML triple data'
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disk'
[docs]class Jsonld(text.Json, Triples):
"""
The JSON-LD data format
"""
# format not defined in edam so we use the json format number
edam_format = "format_3464"
file_ext = "jsonld"
[docs] def sniff(self, filename):
if self._looks_like_json(filename):
with open(filename, "r") as f:
firstlines = "".join(f.readlines(5000))
if "\"@id\"" in firstlines or "\"@context\"" in firstlines:
return True
return False
[docs] def set_peek(self, dataset, is_multi_byte=False):
"""Set the peek and blurb text"""
if not dataset.dataset.purged:
dataset.peek = data.get_file_peek(dataset.file_name)
dataset.blurb = 'JSON-LD triple data'
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disk'
[docs]class HDT(binary.Binary, Triples):
"""
The HDT triple data format
"""
edam_format = "format_2376"
file_ext = "hdt"
[docs] def sniff(self, filename):
with open(filename, "rb") as f:
if f.read(4) == "$HDT":
return True
return False
[docs] def set_peek(self, dataset, is_multi_byte=False):
"""Set the peek and blurb text"""
if not dataset.dataset.purged:
dataset.peek = data.get_file_peek(dataset.file_name)
dataset.blurb = 'HDT triple data'
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disk'