Files
gonum/graph/formats/rdf/iso_canonical.go
2024-08-17 08:41:18 +09:30

940 lines
25 KiB
Go

// Copyright ©2020 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package rdf
import (
"bytes"
"cmp"
"errors"
"fmt"
"hash"
"slices"
"sort"
"gonum.org/v1/gonum/internal/order"
)
// See "Canonical Forms for Isomorphic and Equivalent RDF Graphs: Algorithms
// for Leaning and Labelling Blank Nodes" by Aiden Hogan for description of
// the algorithm, https://doi.org/10.1145/3068333 and available free from
// the author's web page http://aidanhogan.com/docs/rdf-canonicalisation.pdf.
//
// Aspects of implementation from discussion in v1.0 of the readme of the PoC
// at https://doi.org/10.5281/zenodo.3154322
// Isomorphic returns whether the RDF graph datasets a and b are isomorphic,
// where there is a bijective mapping between blank nodes in a and b using
// the given hash function. If decomp is true, the graphs are decomposed
// before canonicalization.
func Isomorphic(a, b []*Statement, decomp bool, h hash.Hash) bool {
if len(a) != len(b) {
return false
}
zero := make([]byte, h.Size())
ah, _ := IsoCanonicalHashes(a, decomp, true, h, zero)
bh, _ := IsoCanonicalHashes(b, decomp, true, h, zero)
if len(ah) != len(bh) {
return false
}
work := make([][]byte, 2*len(ah))
lexicalHashes(work[:len(ah)], ah)
lexicalHashes(work[len(ah):], bh)
for i := range work[:len(ah)] {
if !bytes.Equal(work[i], work[i+len(ah)]) {
return false
}
}
return true
}
func lexicalHashes(dst [][]byte, hashes map[string][]byte) {
i := 0
for _, s := range hashes {
dst[i] = s
i++
}
order.BySliceValues(dst)
}
// IsoCanonicalHashes returns a mapping between the nodes of the RDF graph
// dataset described by the given statements using the provided hash
// function. If decomp is true, the graphs are decomposed before hashing.
// If dist is true the input graph is decomposed into identical splits, the
// entire graph will be hashed to distinguish nodes. If decomp is false,
// dist has no effect.
// Blank node hashes are initially set to the value of zero. Hash values
// are provided for literal and IRI nodes as well as for blank node. The
// hash input for literal nodes includes the quotes and the input for IRI
// nodes first removes the angle quotes around the IRI, although these are
// included in the map keys.
//
// Note that hashes returned by IsoCanonicalHashes with decomp=true are not
// comparable with hashes returned by IsoCanonicalHashes with decomp=false.
//
// See http://aidanhogan.com/docs/rdf-canonicalisation.pdf for details of
// the hashing algorithm.
func IsoCanonicalHashes(statements []*Statement, decomp, dist bool, h hash.Hash, zero []byte) (hashes map[string][]byte, terms map[string]map[string]bool) {
if len(statements) == 0 {
return nil, nil
}
if debug {
debug.log(0, "Statements:")
for _, s := range statements {
debug.log(0, s)
}
debug.log(0)
}
hash, parts, ok := hashBNodesPerSplit(statements, decomp, h, zero)
if debug {
debug.log(0, "Blanks:")
if len(hash.blanks) != 0 {
for _, b := range hash.blanks {
debug.log(0, b)
}
} else {
debug.log(0, "none")
}
debug.log(0)
debug.log(0, "Parts:")
debug.logParts(0, parts)
debug.logf(0, "Hashes from hashBNodesPerSplit (splitting=%t):\n", decomp)
debug.logHashes(0, hash.hashOf, h.Size())
}
if ok {
return hash.hashOf, hash.termsFor
}
// TODO: remove the triviality exception in distinguish and return
// the original hashes if this result is nil. Make the triviality
// exception optional.
hashes = distinguish(statements, dist, h, zero, hash, parts, nil, 0)
if hashes == nil {
// distinguish was given trivial parts and
// we did not ask it to try to merge them.
return hash.hashOf, hash.termsFor
}
if debug {
debug.log(0, "Final resolved Hashes:")
debug.logHashes(0, hashes, h.Size())
}
terms = make(map[string]map[string]bool, len(hashes))
for k, h := range hashes {
terms[string(h)] = map[string]bool{k: true}
}
return hashes, terms
}
// C14n performs a relabeling of the statements in src based on the terms
// obtained from IsoCanonicalHashes, placing the results in dst and returning
// them. The relabeling scheme is the same as for the Universal RDF Dataset
// Normalization Algorithm, blank terms are ordered lexically by their hash
// value and then given a blank label with the prefix "_:c14n" and an
// identifier counter corresponding to the label's sort rank.
//
// If dst is nil, it is allocated, otherwise the length of dst must match the
// length of src.
func C14n(dst, src []*Statement, terms map[string]map[string]bool) ([]*Statement, error) {
if dst == nil {
dst = make([]*Statement, len(src))
}
if len(dst) != len(src) {
return dst, errors.New("rdf: slice length mismatch")
}
need := make(map[string]bool)
for _, s := range src {
for _, t := range []string{
s.Subject.Value,
s.Object.Value,
s.Label.Value,
} {
if !isBlank(t) {
continue
}
need[t] = true
}
}
blanks := make([]string, len(need))
i := 0
for h, m := range terms {
var ok bool
for t := range m {
if isBlank(t) {
ok = true
break
}
}
if !ok {
continue
}
if i == len(blanks) {
return dst, errors.New("rdf: too many blanks in terms")
}
blanks[i] = h
i++
}
slices.Sort(blanks)
c14n := make(map[string]string)
for i, b := range blanks {
if len(terms[b]) == 0 {
return nil, fmt.Errorf("rdf: no term for blank with hash %x", b)
}
for t := range terms[b] {
if !isBlank(t) {
continue
}
if _, exists := c14n[t]; exists {
continue
}
delete(need, t)
c14n[t] = fmt.Sprintf("_:c14n%d", i)
}
}
if len(need) != 0 {
return dst, fmt.Errorf("rdf: missing term hashes for %d terms", len(need))
}
for i, s := range src {
if dst[i] == nil {
dst[i] = &Statement{}
}
n := dst[i]
n.Subject = Term{Value: translate(s.Subject.Value, c14n)}
n.Predicate = s.Predicate
n.Object = Term{Value: translate(s.Object.Value, c14n)}
n.Label = Term{Value: translate(s.Label.Value, c14n)}
}
sortC14nStatements(dst)
return dst, nil
}
func translate(term string, mapping map[string]string) string {
if term, ok := mapping[term]; ok {
return term
}
return term
}
func sortC14nStatements(statements []*Statement) {
slices.SortFunc(statements, func(a, b *Statement) int {
if n := cmp.Compare(a.Subject.Value, b.Subject.Value); n != 0 {
return n
}
// Always IRI.
if n := cmp.Compare(a.Predicate.Value, b.Predicate.Value); n != 0 {
return n
}
return cmp.Compare(a.Object.Value, b.Object.Value)
})
}
// hashBNodes returns the hashed blank nodes of the graph described by statements
// using the provided hash function. Hashes are initialised with zero.
//
// This is algorithm 1 in doi:10.1145/3068333.
func hashBNodes(statements []*Statement, h hash.Hash, zero []byte, hash0 map[string][]byte) (hash *table, disjoint bool) {
curr := newTable()
for _, s := range statements {
for i, t := range []string{
s.Subject.Value,
s.Predicate.Value,
s.Object.Value,
s.Label.Value,
} {
switch {
case i == 3 && t == "":
continue
case isBlank(t):
if hash0 == nil {
curr.set(t, zero)
} else {
curr.set(t, hash0[t])
}
case isIRI(t):
h.Reset()
h.Write([]byte(t[1 : len(t)-1]))
curr.set(t, h.Sum(nil))
default:
h.Reset()
h.Write([]byte(t))
curr.set(t, h.Sum(nil))
}
}
}
bag := newHashBag(h, curr)
last := curr.clone()
for {
curr, last = last, curr
for _, s := range statements {
if isBlank(s.Subject.Value) {
var lab []byte
if s.Label.Value != "" {
lab = last.hashOf[s.Label.Value]
}
c := hashTuple(h, last.hashOf[s.Object.Value], last.hashOf[s.Predicate.Value], lab, []byte{'+'})
bag.add(s.Subject.Value, c)
}
if isBlank(s.Object.Value) {
var lab []byte
if s.Label.Value != "" {
lab = last.hashOf[s.Label.Value]
}
c := hashTuple(h, last.hashOf[s.Subject.Value], last.hashOf[s.Predicate.Value], lab, []byte{'-'})
bag.add(s.Object.Value, c)
}
// This and the lab value above implement the label hashing
// required for RDF dataset hashing as described in
// https://doi.org/10.5281/zenodo.3154322 v1.0
// Readme.md#adaptation-of-the-algorithms-to-handle-datasets.
if isBlank(s.Label.Value) {
c := hashTuple(h, last.hashOf[s.Subject.Value], last.hashOf[s.Predicate.Value], last.hashOf[s.Object.Value], []byte{'.'})
bag.add(s.Label.Value, c)
}
}
for t := range bag.hashesFor {
curr.set(t, bag.sum(t))
}
disjoint = curr.allUnique()
if disjoint || !curr.changedFrom(last) {
return curr, disjoint
}
}
}
// table is a collision aware hash collection for RDF terms.
type table struct {
// hashOf holds the hash for each term.
hashOf map[string][]byte
// termsFor holds the set of nodes in
// the second key for terms that share
// the hash in the first key.
termsFor map[string]map[string]bool
// isBlank and blanks are the set of blank
// nodes.
// isBlank is nil for cloned tables.
isBlank map[string]bool
// blanks is nil for tables created
// with newTable.
blanks []string
}
// newTable returns a new hash table.
func newTable() *table {
return &table{
hashOf: make(map[string][]byte),
termsFor: make(map[string]map[string]bool),
isBlank: make(map[string]bool),
}
}
// wasCloned returns whether t is a parent or child of a cloning operation.
func (t *table) wasCloned() bool { return t.isBlank == nil }
// isNew returns whether t is a new table.
func (t *table) isNew() bool { return t.blanks == nil }
// clone returns a clone of the receiver.
func (t *table) clone() *table {
new := &table{
hashOf: make(map[string][]byte),
termsFor: make(map[string]map[string]bool),
}
for term, hash := range t.hashOf {
new.hashOf[term] = hash
}
for hash, coll := range t.termsFor {
if len(coll) == 0 {
continue
}
terms := make(map[string]bool)
for term := range coll {
terms[term] = true
}
new.termsFor[hash] = terms
}
if t.isNew() {
t.blanks = make([]string, len(t.isBlank))
i := 0
for n := range t.isBlank {
t.blanks[i] = n
i++
}
t.isBlank = nil
}
new.blanks = t.blanks
return new
}
// TODO(kortschak): Make hash table in table.hashOf reuse the []byte on update.
// This is not trivial since we need to check for changes, so we can't just get
// the current hash buffer and write into it. So if this is done we probably
// a pair of buffers, a current and a waiting.
// set sets the hash of the term, removing any previously set hash.
func (t *table) set(term string, hash []byte) {
prev := t.hashOf[term]
if bytes.Equal(prev, hash) {
return
}
t.hashOf[term] = hash
// Delete any existing hashes for this term.
switch terms := t.termsFor[string(prev)]; {
case len(terms) == 1:
delete(t.termsFor, string(prev))
case len(terms) > 1:
delete(terms, term)
}
terms, ok := t.termsFor[string(hash)]
if ok {
terms[term] = true
} else {
t.termsFor[string(hash)] = map[string]bool{term: true}
}
if !t.wasCloned() && isBlank(term) {
// We are in the original table, so note
// any blank node label that we see.
t.isBlank[term] = true
}
}
// allUnique returns whether every term has an unique hash. allUnique
// can only be called on a table that was returned by clone.
func (t *table) allUnique() bool {
if t.isNew() {
panic("checked hash bag from uncloned table")
}
for _, term := range t.blanks {
if len(t.termsFor[string(t.hashOf[term])]) > 1 {
return false
}
}
return true
}
// changedFrom returns whether the receiver has been updated from last.
// changedFrom can only be called on a table that was returned by clone.
func (t *table) changedFrom(last *table) bool {
if t.isNew() {
panic("checked hash bag from uncloned table")
}
for i, x := range t.blanks {
for _, y := range t.blanks[i+1:] {
if bytes.Equal(t.hashOf[x], t.hashOf[y]) != bytes.Equal(last.hashOf[x], last.hashOf[y]) {
return true
}
}
}
return false
}
// hashBag implements a commutative and associative hash.
// See notes in https://doi.org/10.5281/zenodo.3154322 v1.0
// Readme.md#what-is-the-precise-specification-of-hashbag.
type hashBag struct {
hash hash.Hash
hashesFor map[string][][]byte
}
// newHashBag returns a new hashBag using the provided hash function for
// the given hash table. newHashBag can only take a table parameter that
// was returned by newTable.
func newHashBag(h hash.Hash, t *table) hashBag {
if t.wasCloned() {
panic("made hash bag from cloned table")
}
b := hashBag{hash: h, hashesFor: make(map[string][][]byte, len(t.isBlank))}
for n := range t.isBlank {
b.hashesFor[n] = [][]byte{t.hashOf[n]}
}
return b
}
// add adds the hash to the hash bag for the term.
func (b hashBag) add(term string, hash []byte) {
b.hashesFor[term] = append(b.hashesFor[term], hash)
}
// sum calculates the hash sum for the given term, updates the hash bag
// state and returns the hash.
func (b hashBag) sum(term string) []byte {
p := b.hashesFor[term]
order.BySliceValues(p)
h := hashTuple(b.hash, p...)
b.hashesFor[term] = b.hashesFor[term][:1]
b.hashesFor[term][0] = h
return h
}
// hashTuple returns the h hash of the concatenation of t.
func hashTuple(h hash.Hash, t ...[]byte) []byte {
h.Reset()
for _, b := range t {
h.Write(b)
}
return h.Sum(nil)
}
// hashBNodesPerSplit returns the independently hashed blank nodes of the
// graph described by statements using the provided hash function. Hashes
// are initialised with zero.
//
// This is algorithm 2 in doi:10.1145/3068333.
func hashBNodesPerSplit(statements []*Statement, decomp bool, h hash.Hash, zero []byte) (hash *table, parts byLengthHash, disjoint bool) {
if !decomp {
hash, ok := hashBNodes(statements, h, zero, nil)
parts = appendOrdered(byLengthHash{}, hash.termsFor)
sort.Sort(parts)
return hash, parts, ok
}
splits := split(statements)
// Avoid recombination work if there is only one split.
if len(splits) == 1 {
hash, ok := hashBNodes(statements, h, zero, nil)
parts = appendOrdered(byLengthHash{}, hash.termsFor)
sort.Sort(parts)
return hash, parts, ok
}
hash = &table{hashOf: make(map[string][]byte)}
disjoint = true
for _, g := range splits {
part, ok := hashBNodes(g, h, zero, nil)
// Each split is guaranteed to be disjoint in its
// set of blank nodes, so we can just append to our
// collection of blanks.
hash.blanks = append(hash.blanks, part.blanks...)
if !ok {
// Allow a short-circuit of the allUnique check.
disjoint = false
}
for k, v := range part.hashOf {
hash.hashOf[k] = v
}
parts = appendOrdered(parts, part.termsFor)
}
sort.Sort(parts)
return hash, parts, disjoint && allUnique(hash.hashOf)
}
// appendOrdered adds parts (labels stored in the second key) for each
// hash (stored in the first key) to parts.
func appendOrdered(parts byLengthHash, partSets map[string]map[string]bool) byLengthHash {
for h, s := range partSets {
var p []string
for e := range s {
if isBlank(e) {
p = append(p, e)
}
}
if p != nil {
parts.nodes = append(parts.nodes, p)
parts.hashes = append(parts.hashes, h)
}
}
return parts
}
// byLengthHash implements ascending length sort of a set of blank RDF
// term partitions with ties broken by lexical ordering of the partitions'
// hashes.
type byLengthHash struct {
// nodes holds the blank nodes of a part.
nodes [][]string
// hashes holds the hashes corresponding
// to the nodes in the nodes field, using
// the same index.
hashes []string
}
func (s byLengthHash) Len() int { return len(s.nodes) }
func (s byLengthHash) Less(i, j int) bool {
switch {
case len(s.nodes[i]) < len(s.nodes[j]):
return true
case len(s.nodes[i]) > len(s.nodes[j]):
return false
}
return s.hashes[i] < s.hashes[j]
}
func (s byLengthHash) Swap(i, j int) {
s.nodes[i], s.nodes[j] = s.nodes[j], s.nodes[i]
s.hashes[i], s.hashes[j] = s.hashes[j], s.hashes[i]
}
// allUnique returns whether the []byte hash values in hashes are all unique.
func allUnique(hashes map[string][]byte) bool {
set := make(map[string]bool)
for _, h := range hashes {
if set[string(h)] {
return false
}
set[string(h)] = true
}
return true
}
// split returns the statements forming connected components in the graph
// described by statements.
//
// This is split in algorithm 2 in doi:10.1145/3068333.
func split(statements []*Statement) [][]*Statement {
ds := make(djSet)
for _, s := range statements {
ds.add(s.Subject.Value)
ds.add(s.Object.Value)
if isBlank(s.Subject.Value) && isBlank(s.Object.Value) {
ds.union(ds.find(s.Subject.Value), ds.find(s.Object.Value))
}
}
var (
splits [][]*Statement
ground []*Statement
)
idxOf := make(map[*dsNode]int)
for _, s := range statements {
var t string
switch {
case isBlank(s.Subject.Value):
t = s.Subject.Value
case isBlank(s.Object.Value):
t = s.Object.Value
default:
ground = append(ground, s)
continue
}
r := ds.find(t)
if r == nil {
panic(fmt.Sprintf("term not found: %q", t))
}
i, ok := idxOf[r]
if !ok {
i = len(splits)
idxOf[r] = i
splits = append(splits, []*Statement{s})
} else {
splits[i] = append(splits[i], s)
}
}
if ground != nil {
splits = append(splits, ground)
}
if debug {
debug.log(0, "Splits:")
for i, s := range splits {
for j, t := range s {
if j == 0 {
debug.logf(0, "%d.\t%s\n", i+1, t)
} else {
debug.logf(0, "\t%s\n", t)
}
}
debug.log(0)
}
}
return splits
}
// distinguish returns G⊥: smallest hash-labelled graph found thus far.
// The graph is returned as a node to hash lookup.
//
// This is part of algorithm 3 in doi:10.1145/3068333.
//
// The correspondence between the parameters for the function in the paper
// with the implementation here is as follows:
// - G = statements
// - hash = hash
// - P = parts (already sorted by hashBNodesPerSplit)
// - G⊥ = lowest
// - B = hash.blanks
//
// The additional parameter dist specifies that distinguish should treat
// coequal trivial parts as a coarse of intermediate part and distinguish
// the nodes in that merged part.
func distinguish(statements []*Statement, dist bool, h hash.Hash, zero []byte, hash *table, parts byLengthHash, lowest map[string][]byte, depth int) map[string][]byte {
if debug {
debug.log(depth, "Running Distinguish")
}
var small []string
var k int
for k, small = range parts.nodes {
if len(small) > 1 {
break
}
}
if len(small) < 2 {
if lowest != nil || !dist {
if debug {
debug.log(depth, "Return lowest (no non-trivial parts):")
debug.logHashes(depth, lowest, h.Size())
}
return lowest
}
// We have been given a set of fine parts,
// but to reach here they must have been
// non-uniquely labeled, so treat them
// as a single coarse part.
k, small = 0, parts.nodes[0]
}
if debug {
debug.logf(depth, "Part: %v %x\n\n", small, parts.hashes[k])
debug.log(depth, "Orig hash:")
debug.logHashes(depth, hash.hashOf, h.Size())
}
smallHash := hash.hashOf[small[0]]
for _, p := range parts.nodes[k:] {
if !bytes.Equal(smallHash, hash.hashOf[p[0]]) {
if debug {
debug.logf(depth, "End of co-equal hashes: %x != %x\n\n", smallHash, hash.hashOf[p[0]])
}
break
}
for i, b := range p {
if debug {
debug.logf(depth, "Iter: %d — B = %q\n\n", i, b)
if depth == 0 {
debug.log(depth, "Current lowest:\n")
debug.logHashes(depth, lowest, h.Size())
}
}
hashP := hash.clone()
hashP.set(b, hashTuple(h, hashP.hashOf[b], []byte{'@'}))
hashPP, ok := hashBNodes(statements, h, zero, hashP.hashOf)
if ok {
if debug {
debug.log(depth, "hashPP is trivial")
debug.log(depth, "comparing hashPP\n")
debug.logHashes(depth, hashPP.hashOf, h.Size())
debug.log(depth, "with previous\n")
debug.logHashes(depth, lowest, h.Size())
}
if lowest == nil || graphLess(statements, hashPP.hashOf, lowest) {
lowest = hashPP.hashOf
debug.log(depth, "choose hashPP\n")
}
} else {
partsP := appendOrdered(byLengthHash{}, hashPP.termsFor)
sort.Sort(partsP)
if debug {
debug.log(depth, "Parts':")
debug.logParts(depth, partsP)
debug.log(depth, "Recursive distinguish")
debug.log(depth, "Called with current lowest:\n")
debug.logHashes(depth, lowest, h.Size())
}
lowest = distinguish(statements, dist, h, zero, hashPP, partsP, lowest, depth+1)
}
}
}
if debug {
debug.log(depth, "Return lowest:")
debug.logHashes(depth, lowest, h.Size())
}
return lowest
}
// terms ordered syntactically, triples ordered lexicographically, and graphs
// ordered such that G < H if and only if G ⊂ H or there exists a triple
// t ∈ G \ H such that no triple t' ∈ H \ G exists where t' < t.
// p9 https://doi.org/10.1145/3068333
func graphLess(statements []*Statement, a, b map[string][]byte) bool {
g := newLexicalStatements(statements, a)
sort.Sort(g)
h := newLexicalStatements(statements, b)
sort.Sort(h)
gSubH := sub(g, h, len(g.statements))
if len(gSubH) == 0 {
return true
}
hSubG := sub(h, g, 1)
if len(hSubG) == 0 {
return true
}
lowestH := relabeledStatement{hSubG[0], h.hashes}
for _, s := range gSubH {
rs := relabeledStatement{s, g.hashes}
if rs.less(lowestH) {
return true
}
}
return false
}
// lexicalStatements is a sort implementation for Statements with blank
// node labels replaced with their hash.
type lexicalStatements struct {
statements []*Statement
hashes map[string][]byte
}
func newLexicalStatements(statements []*Statement, hash map[string][]byte) lexicalStatements {
s := lexicalStatements{
statements: make([]*Statement, len(statements)),
hashes: hash,
}
copy(s.statements, statements)
return s
}
// sub returns the difference between a and b up to max elements long.
func sub(a, b lexicalStatements, max int) []*Statement {
var d []*Statement
var i, j int
for i < len(a.statements) && j < len(b.statements) && len(d) < max {
ra := relabeledStatement{a.statements[i], a.hashes}
rb := relabeledStatement{b.statements[j], b.hashes}
switch {
case ra.less(rb):
d = append(d, a.statements[i])
i++
case rb.less(ra):
j++
default:
i++
}
}
if len(d) < max {
d = append(d, a.statements[i:min(len(a.statements), i+max-len(d))]...)
}
return d
}
func (s lexicalStatements) Len() int { return len(s.statements) }
func (s lexicalStatements) Less(i, j int) bool {
return relabeledStatement{s.statements[i], s.hashes}.less(relabeledStatement{s.statements[j], s.hashes})
}
func (s lexicalStatements) Swap(i, j int) {
s.statements[i], s.statements[j] = s.statements[j], s.statements[i]
}
// relabeledStatement is a statement that is orderable by its blank node
// hash relabeling.
type relabeledStatement struct {
statement *Statement
labels map[string][]byte
}
func (a relabeledStatement) less(b relabeledStatement) bool {
switch {
case relabeledTerm{a.statement.Subject, a.labels}.less(relabeledTerm{b.statement.Subject, b.labels}):
return true
case relabeledTerm{b.statement.Subject, b.labels}.less(relabeledTerm{a.statement.Subject, a.labels}):
return false
}
switch { // Always IRI.
case a.statement.Predicate.Value < b.statement.Predicate.Value:
return true
case a.statement.Predicate.Value > b.statement.Predicate.Value:
return false
}
switch {
case relabeledTerm{a.statement.Object, a.labels}.less(relabeledTerm{b.statement.Object, b.labels}):
return true
case relabeledTerm{b.statement.Object, b.labels}.less(relabeledTerm{a.statement.Object, a.labels}):
return false
}
return relabeledTerm{a.statement.Label, a.labels}.less(relabeledTerm{b.statement.Label, b.labels})
}
func (s relabeledStatement) String() string {
subj := relabeledTerm{term: s.statement.Subject, labels: s.labels}
obj := relabeledTerm{term: s.statement.Object, labels: s.labels}
if s.statement.Label.Value == "" {
return fmt.Sprintf("%s %s %s .", subj, s.statement.Predicate.Value, obj)
}
lab := relabeledTerm{term: s.statement.Label, labels: s.labels}
return fmt.Sprintf("%s %s %s %s .", subj, s.statement.Predicate.Value, obj, lab)
}
// relabeledTerm is a term that is orderable by its blank node hash relabeling.
type relabeledTerm struct {
term Term
labels map[string][]byte
}
func (a relabeledTerm) less(b relabeledTerm) bool {
aIsBlank := isBlank(a.term.Value)
bIsBlank := isBlank(b.term.Value)
switch {
case aIsBlank && bIsBlank:
return bytes.Compare(a.labels[a.term.Value], b.labels[b.term.Value]) < 0
case aIsBlank:
return blankPrefix < unquoteIRI(b.term.Value)
case bIsBlank:
return unquoteIRI(a.term.Value) < blankPrefix
default:
return unquoteIRI(a.term.Value) < unquoteIRI(b.term.Value)
}
}
func unquoteIRI(s string) string {
if len(s) > 1 && s[0] == '<' && s[len(s)-1] == '>' {
s = s[1 : len(s)-1]
}
return s
}
func (t relabeledTerm) String() string {
if !isBlank(t.term.Value) {
return t.term.Value
}
h, ok := t.labels[t.term.Value]
if !ok {
return t.term.Value + "_missing_hash"
}
return fmt.Sprintf("_:%0x", h)
}