graph/formats/rdf: use isBlank and isIRI helpers

This commit is contained in:
Dan Kortschak
2021-06-12 19:05:03 +09:30
parent e4c04dde5f
commit 0164be019d
4 changed files with 42 additions and 35 deletions

View File

@@ -10,7 +10,6 @@ import (
"fmt"
"hash"
"sort"
"strings"
)
// See "Canonical Forms for Isomorphic and Equivalent RDF Graphs: Algorithms
@@ -160,7 +159,7 @@ func C14n(dst, src []*Statement, terms map[string]map[string]bool) ([]*Statement
s.Object.Value,
s.Label.Value,
} {
if !strings.HasPrefix(t, "_:") {
if !isBlank(t) {
continue
}
need[t] = true
@@ -172,7 +171,7 @@ func C14n(dst, src []*Statement, terms map[string]map[string]bool) ([]*Statement
for h, m := range terms {
var ok bool
for t := range m {
if strings.HasPrefix(t, "_:") {
if isBlank(t) {
ok = true
break
}
@@ -194,7 +193,7 @@ func C14n(dst, src []*Statement, terms map[string]map[string]bool) ([]*Statement
return nil, fmt.Errorf("rdf: no term for blank with hash %x", b)
}
for t := range terms[b] {
if !strings.HasPrefix(t, "_:") {
if !isBlank(t) {
continue
}
if _, exists := c14n[t]; exists {
@@ -275,13 +274,13 @@ func hashBNodes(statements []*Statement, h hash.Hash, zero []byte, hash0 map[str
switch {
case i == 3 && t == "":
continue
case strings.HasPrefix(t, "_:"):
case isBlank(t):
if hash0 == nil {
curr.set(t, zero)
} else {
curr.set(t, hash0[t])
}
case strings.HasPrefix(t, "<") && strings.HasSuffix(t, ">"):
case isIRI(t):
h.Reset()
h.Write([]byte(t[1 : len(t)-1])) //nolint:errcheck
curr.set(t, h.Sum(nil))
@@ -298,7 +297,7 @@ func hashBNodes(statements []*Statement, h hash.Hash, zero []byte, hash0 map[str
for {
curr, last = last, curr
for _, s := range statements {
if strings.HasPrefix(s.Subject.Value, "_:") {
if isBlank(s.Subject.Value) {
var lab []byte
if s.Label.Value != "" {
lab = last.hashOf[s.Label.Value]
@@ -307,7 +306,7 @@ func hashBNodes(statements []*Statement, h hash.Hash, zero []byte, hash0 map[str
bag.add(s.Subject.Value, c)
}
if strings.HasPrefix(s.Object.Value, "_:") {
if isBlank(s.Object.Value) {
var lab []byte
if s.Label.Value != "" {
lab = last.hashOf[s.Label.Value]
@@ -320,7 +319,7 @@ func hashBNodes(statements []*Statement, h hash.Hash, zero []byte, hash0 map[str
// required for RDF dataset hashing as described in
// https://doi.org/10.5281/zenodo.3154322 v1.0
// Readme.md#adaptation-of-the-algorithms-to-handle-datasets.
if strings.HasPrefix(s.Label.Value, "_:") {
if isBlank(s.Label.Value) {
c := hashTuple(h, last.hashOf[s.Subject.Value], last.hashOf[s.Predicate.Value], last.hashOf[s.Object.Value], []byte{'.'})
bag.add(s.Label.Value, c)
}
@@ -430,7 +429,7 @@ func (t *table) set(term string, hash []byte) {
t.termsFor[string(hash)] = map[string]bool{term: true}
}
if !t.wasCloned() && strings.HasPrefix(term, "_:") {
if !t.wasCloned() && isBlank(term) {
// We are in the original table, so note
// any blank node label that we see.
t.isBlank[term] = true
@@ -571,7 +570,7 @@ func appendOrdered(parts byLengthHash, partSets map[string]map[string]bool) byLe
for h, s := range partSets {
var p []string
for e := range s {
if strings.HasPrefix(e, "_:") {
if isBlank(e) {
p = append(p, e)
}
}
@@ -631,7 +630,7 @@ func split(statements []*Statement) [][]*Statement {
for _, s := range statements {
ds.add(s.Subject.Value)
ds.add(s.Object.Value)
if strings.HasPrefix(s.Subject.Value, "_:") && strings.HasPrefix(s.Object.Value, "_:") {
if isBlank(s.Subject.Value) && isBlank(s.Object.Value) {
ds.union(ds.find(s.Subject.Value), ds.find(s.Object.Value))
}
}
@@ -644,9 +643,9 @@ func split(statements []*Statement) [][]*Statement {
for _, s := range statements {
var t string
switch {
case strings.HasPrefix(s.Subject.Value, "_:"):
case isBlank(s.Subject.Value):
t = s.Subject.Value
case strings.HasPrefix(s.Object.Value, "_:"):
case isBlank(s.Object.Value):
t = s.Object.Value
default:
ground = append(ground, s)
@@ -929,15 +928,15 @@ type relabeledTerm struct {
}
func (a relabeledTerm) less(b relabeledTerm) bool {
aBlank := strings.HasPrefix(a.term.Value, "_:")
bBlank := strings.HasPrefix(b.term.Value, "_:")
aIsBlank := isBlank(a.term.Value)
bIsBlank := isBlank(b.term.Value)
switch {
case aBlank && bBlank:
case aIsBlank && bIsBlank:
return bytes.Compare(a.labels[a.term.Value], b.labels[b.term.Value]) < 0
case aBlank:
return "_:" < unquoteIRI(b.term.Value)
case bBlank:
return unquoteIRI(a.term.Value) < "_:"
case aIsBlank:
return blankPrefix < unquoteIRI(b.term.Value)
case bIsBlank:
return unquoteIRI(a.term.Value) < blankPrefix
default:
return unquoteIRI(a.term.Value) < unquoteIRI(b.term.Value)
}
@@ -951,7 +950,7 @@ func unquoteIRI(s string) string {
}
func (t relabeledTerm) String() string {
if !strings.HasPrefix(t.term.Value, "_:") {
if !isBlank(t.term.Value) {
return t.term.Value
}
h, ok := t.labels[t.term.Value]

View File

@@ -14,7 +14,6 @@ import (
"path/filepath"
"reflect"
"sort"
"strings"
"testing"
"text/tabwriter"
"time"
@@ -287,7 +286,7 @@ func permuteBlanks(s []*Statement, src rand.Source) ([]*Statement, map[string]st
var blanks []string
for t := range terms {
if strings.HasPrefix(t, "_:") {
if isBlank(t) {
blanks = append(blanks, t)
}
}
@@ -309,7 +308,7 @@ func hashBlanks(s []*Statement, h hash.Hash) ([]*Statement, map[string]string) {
e.Object.Value,
e.Label.Value,
} {
if !strings.HasPrefix(t, "_:") {
if !isBlank(t) {
continue
}
h.Reset()
@@ -331,7 +330,7 @@ func mangleFirstIL(s []*Statement, h hash.Hash) ([]*Statement, map[string]string
e.Object.Value,
e.Label.Value,
} {
if strings.HasPrefix(t, "_:") {
if isBlank(t) {
continue
}
h.Reset()
@@ -354,7 +353,7 @@ func mergeFirst2B(s []*Statement) ([]*Statement, map[string]string) {
e.Object.Value,
e.Label.Value,
} {
if !strings.HasPrefix(t, "_:") {
if !isBlank(t) {
continue
}
terms[t] = t
@@ -444,7 +443,7 @@ func TestLexicalStatements(t *testing.T) {
func termsFor(hashes map[string][]byte, hash hash.Hash) map[string]string {
terms := make(map[string]string)
for t, h := range hashes {
if strings.HasPrefix(t, "_:") {
if isBlank(t) {
terms[t] = fmt.Sprintf("_:%0*x", 2*hash.Size(), h)
}
}

View File

@@ -75,7 +75,13 @@ func NewBlankTerm(label string) (Term, error) {
if err != nil {
return Term{}, err
}
return Term{Value: "_:" + label}, nil
return Term{Value: blankPrefix + label}, nil
}
const blankPrefix = "_:"
func isBlank(s string) bool {
return strings.HasPrefix(s, blankPrefix)
}
// NewIRITerm returns a Term based on the provided IRI which must
@@ -89,6 +95,10 @@ func NewIRITerm(iri string) (Term, error) {
return Term{Value: escape("<", iri, ">")}, nil
}
func isIRI(s string) bool {
return strings.HasPrefix(s, "<") && strings.HasSuffix(s, ">")
}
// NewLiteralTerm returns a Term based on the literal text and an
// optional qualifier which may either be a "@"-prefixed language
// tag or a valid IRI. The text will be escaped if necessary and quoted,

View File

@@ -12,7 +12,6 @@ import (
"fmt"
"hash"
"sort"
"strings"
"gonum.org/v1/gonum/stat/combin"
)
@@ -112,7 +111,7 @@ func (u *urna) relabel(dst, src []*Statement) ([]*Statement, error) {
s.Object.Value,
s.Label.Value,
} {
if !strings.HasPrefix(t, "_:") {
if !isBlank(t) {
continue
}
for _, e := range u.statementsFor[t] {
@@ -258,7 +257,7 @@ func (u *urna) hashFirstDegreeQuads(b string) string {
// replaceBlank implements 3.1 of the algorithm described at
// https://json-ld.github.io/rdf-dataset-canonicalization/spec/index.html#algorithm-1.
func replaceBlank(b, matching, label string) string {
if !strings.HasPrefix(b, "_:") { // 3,1
if !isBlank(b) { // 3.1
return b
}
if label != "" { // URGNA2012 modification.
@@ -372,7 +371,7 @@ func (u *urna) hashToRelatedURDNA2015(b string, names *issuer) map[string][]stri
s.Object.Value,
s.Label.Value,
} {
if !strings.HasPrefix(term, "_:") || term == b {
if !isBlank(term) || term == b {
continue
}
@@ -404,10 +403,10 @@ func (u *urna) hashToRelatedURGNA2012(b string, names *issuer) map[string][]stri
pos byte
)
switch {
case strings.HasPrefix(s.Subject.Value, "_:") && s.Subject.Value != b: // 1.1
case isBlank(s.Subject.Value) && s.Subject.Value != b: // 1.1
term = s.Subject.Value
pos = 'p'
case strings.HasPrefix(s.Object.Value, "_:") && s.Object.Value != b: // 1.2
case isBlank(s.Object.Value) && s.Object.Value != b: // 1.2
term = s.Object.Value
pos = 'r'
default: