gonum/graph/formats/rdf/graph.go

// Copyright ©2022 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package rdf

import (
	"fmt"

	"gonum.org/v1/gonum/graph"
	"gonum.org/v1/gonum/graph/iterator"
	"gonum.org/v1/gonum/graph/multi"
	"gonum.org/v1/gonum/graph/set/uid"
)

// Graph implements an RDF graph satisfying the graph.Graph and graph.Multigraph
// interfaces.
type Graph struct {
	nodes map[int64]graph.Node
	from  map[int64]map[int64]map[int64]graph.Line
	to    map[int64]map[int64]map[int64]graph.Line
	pred  map[int64]map[*Statement]bool

	termIDs map[string]int64
	ids     *uid.Set
}

// NewGraph returns a new empty Graph.
func NewGraph() *Graph {
	return &Graph{
		nodes: make(map[int64]graph.Node),
		from:  make(map[int64]map[int64]map[int64]graph.Line),
		to:    make(map[int64]map[int64]map[int64]graph.Line),
		pred:  make(map[int64]map[*Statement]bool),

		termIDs: make(map[string]int64),
		ids:     uid.NewSet(),
	}
}

// addNode adds n to the graph. It panics if the added node ID matches an
// existing node ID.
func (g *Graph) addNode(n graph.Node) {
	if _, exists := g.nodes[n.ID()]; exists {
		panic(fmt.Sprintf("rdf: node ID collision: %d", n.ID()))
	}
	g.nodes[n.ID()] = n
	g.ids.Use(n.ID())
}

// AddStatement adds s to the graph. It panics if Term UIDs in the statement
// are not consistent with existing terms in the graph. Statements must not
// be altered while being held by the graph. If the UID fields of the terms
// in s are zero, they will be set to values consistent with the rest of the
// graph on return, mutating the parameter, otherwise the UIDs must match terms
// that already exist in the graph. The statement must be a valid RDF statement
// otherwise AddStatement will panic.
func (g *Graph) AddStatement(s *Statement) {
	_, _, kind, err := s.Predicate.Parts()
	if err != nil {
		panic(fmt.Errorf("rdf: error extracting predicate: %w", err))
	}
	if kind != IRI {
		panic(fmt.Errorf("rdf: predicate is not an IRI: %s", s.Predicate.Value))
	}

	_, _, kind, err = s.Subject.Parts()
	if err != nil {
		panic(fmt.Errorf("rdf: error extracting subject: %w", err))
	}
	switch kind {
	case IRI, Blank:
	default:
		panic(fmt.Errorf("rdf: subject is not an IRI or blank node: %s", s.Subject.Value))
	}

	_, _, kind, err = s.Object.Parts()
	if err != nil {
		panic(fmt.Errorf("rdf: error extracting object: %w", err))
	}
	if kind == Invalid {
		panic(fmt.Errorf("rdf: object is not a valid term: %s", s.Object.Value))
	}

	statements, ok := g.pred[s.Predicate.UID]
	if !ok {
		statements = make(map[*Statement]bool)
		g.pred[s.Predicate.UID] = statements
	}
	statements[s] = true
	g.addTerm(&s.Subject)
	g.addTerm(&s.Predicate)
	g.addTerm(&s.Object)
	g.setLine(s)
}

// addTerm adds t to the graph. It panics if the added node ID matches an existing node ID.
func (g *Graph) addTerm(t *Term) {
	if t.UID == 0 {
		id, ok := g.termIDs[t.Value]
		if ok {
			t.UID = id
			return
		}
		id = g.ids.NewID()
		g.ids.Use(id)
		t.UID = id
		g.termIDs[t.Value] = id
		return
	}

	id, ok := g.termIDs[t.Value]
	if !ok {
		g.termIDs[t.Value] = t.UID
	} else if id != t.UID {
		panic(fmt.Sprintf("rdf: term ID collision: term:%s new ID:%d old ID:%d", t.Value, t.UID, id))
	}
}

// AllStatements returns an iterator of the statements that make up the graph.
func (g *Graph) AllStatements() *Statements {
	return &Statements{eit: g.Edges()}
}

// Edge returns the edge from u to v if such an edge exists and nil otherwise.
// The node v must be directly reachable from u as defined by the From method.
// The returned graph.Edge is a multi.Edge if an edge exists.
func (g *Graph) Edge(uid, vid int64) graph.Edge {
	l := g.Lines(uid, vid)
	if l == graph.Empty {
		return nil
	}
	return multi.Edge{F: g.Node(uid), T: g.Node(vid), Lines: l}
}

// Edges returns all the edges in the graph. Each edge in the returned slice
// is a multi.Edge.
func (g *Graph) Edges() graph.Edges {
	if len(g.nodes) == 0 {
		return graph.Empty
	}
	var edges []graph.Edge
	for _, u := range g.nodes {
		for _, e := range g.from[u.ID()] {
			var lines []graph.Line
			for _, l := range e {
				lines = append(lines, l)
			}
			if len(lines) != 0 {
				edges = append(edges, multi.Edge{
					F:     g.Node(u.ID()),
					T:     g.Node(lines[0].To().ID()),
					Lines: iterator.NewOrderedLines(lines),
				})
			}
		}
	}
	if len(edges) == 0 {
		return graph.Empty
	}
	return iterator.NewOrderedEdges(edges)
}

// From returns all nodes in g that can be reached directly from n.
//
// The returned graph.Nodes is only valid until the next mutation of
// the receiver.
func (g *Graph) From(id int64) graph.Nodes {
	if len(g.from[id]) == 0 {
		return graph.Empty
	}
	return iterator.NewNodesByLines(g.nodes, g.from[id])
}

// FromSubject returns all nodes in g that can be reached directly from an
// RDF subject term.
//
// The returned graph.Nodes is only valid until the next mutation of
// the receiver.
func (g *Graph) FromSubject(t Term) graph.Nodes {
	return g.From(t.UID)
}

// HasEdgeBetween returns whether an edge exists between nodes x and y without
// considering direction.
func (g *Graph) HasEdgeBetween(xid, yid int64) bool {
	if _, ok := g.from[xid][yid]; ok {
		return true
	}
	_, ok := g.from[yid][xid]
	return ok
}

// HasEdgeFromTo returns whether an edge exists in the graph from u to v.
func (g *Graph) HasEdgeFromTo(uid, vid int64) bool {
	_, ok := g.from[uid][vid]
	return ok
}

// Lines returns the lines from u to v if such any such lines exists and nil otherwise.
// The node v must be directly reachable from u as defined by the From method.
func (g *Graph) Lines(uid, vid int64) graph.Lines {
	edge := g.from[uid][vid]
	if len(edge) == 0 {
		return graph.Empty
	}
	var lines []graph.Line
	for _, l := range edge {
		lines = append(lines, l)
	}
	return iterator.NewOrderedLines(lines)
}

// newLine returns a new Line from the source to the destination node.
// The returned Line will have a graph-unique ID.
// The Line's ID does not become valid in g until the Line is added to g.
func (g *Graph) newLine(from, to graph.Node) graph.Line {
	return multi.Line{F: from, T: to, UID: g.ids.NewID()}
}

// newNode returns a new unique Node to be added to g. The Node's ID does
// not become valid in g until the Node is added to g.
func (g *Graph) newNode() graph.Node {
	if len(g.nodes) == 0 {
		return multi.Node(0)
	}
	if int64(len(g.nodes)) == uid.Max {
		panic("rdf: cannot allocate node: no slot")
	}
	return multi.Node(g.ids.NewID())
}

// Node returns the node with the given ID if it exists in the graph,
// and nil otherwise.
func (g *Graph) Node(id int64) graph.Node {
	return g.nodes[id]
}

// TermFor returns the Term for the given text. The text must be
// an exact match for the Term's Value field.
func (g *Graph) TermFor(text string) (term Term, ok bool) {
	id, ok := g.termIDs[text]
	if !ok {
		return
	}
	n, ok := g.nodes[id]
	if !ok {
		var s map[*Statement]bool
		s, ok = g.pred[id]
		if !ok {
			return
		}
		for k := range s {
			return k.Predicate, true
		}
	}
	return n.(Term), true
}

// Nodes returns all the nodes in the graph.
//
// The returned graph.Nodes is only valid until the next mutation of
// the receiver.
func (g *Graph) Nodes() graph.Nodes {
	if len(g.nodes) == 0 {
		return graph.Empty
	}
	return iterator.NewNodes(g.nodes)
}

// Predicates returns a slice of all the predicates used in the graph.
func (g *Graph) Predicates() []Term {
	p := make([]Term, len(g.pred))
	i := 0
	for _, statements := range g.pred {
		for s := range statements {
			p[i] = s.Predicate
			i++
			break
		}
	}
	return p
}

// removeLine removes the line with the given end point and line IDs from
// the graph, leaving the terminal nodes. If the line does not exist it is
// a no-op.
func (g *Graph) removeLine(fid, tid, id int64) {
	if _, ok := g.nodes[fid]; !ok {
		return
	}
	if _, ok := g.nodes[tid]; !ok {
		return
	}

	delete(g.from[fid][tid], id)
	if len(g.from[fid][tid]) == 0 {
		delete(g.from[fid], tid)
	}
	delete(g.to[tid][fid], id)
	if len(g.to[tid][fid]) == 0 {
		delete(g.to[tid], fid)
	}

	g.ids.Release(id)
}

// removeNode removes the node with the given ID from the graph, as well as
// any edges attached to it. If the node is not in the graph it is a no-op.
func (g *Graph) removeNode(id int64) {
	if _, ok := g.nodes[id]; !ok {
		return
	}
	delete(g.nodes, id)

	for from := range g.from[id] {
		delete(g.to[from], id)
	}
	delete(g.from, id)

	for to := range g.to[id] {
		delete(g.from[to], id)
	}
	delete(g.to, id)

	g.ids.Release(id)
}

// RemoveStatement removes s from the graph, leaving the terminal nodes if they
// are part of another statement. If the statement does not exist in g it is a no-op.
func (g *Graph) RemoveStatement(s *Statement) {
	if !g.pred[s.Predicate.UID][s] {
		return
	}

	// Remove the connection.
	g.removeLine(s.Subject.UID, s.Object.UID, s.Predicate.UID)
	statements := g.pred[s.Predicate.UID]
	delete(statements, s)
	if len(statements) == 0 {
		delete(g.pred, s.Predicate.UID)
		if len(g.from[s.Predicate.UID]) == 0 {
			g.ids.Release(s.Predicate.UID)
			delete(g.termIDs, s.Predicate.Value)
		}
	}

	// Remove any orphan terms.
	if g.From(s.Subject.UID).Len() == 0 && g.To(s.Subject.UID).Len() == 0 {
		g.removeNode(s.Subject.UID)
		delete(g.termIDs, s.Subject.Value)
	}
	if g.From(s.Object.UID).Len() == 0 && g.To(s.Object.UID).Len() == 0 {
		g.removeNode(s.Object.UID)
		delete(g.termIDs, s.Object.Value)
	}
}

// RemoveTerm removes t and any statements referencing t from the graph. If
// the term is a predicate, all statements with the predicate are removed. If
// the term does not exist it is a no-op.
func (g *Graph) RemoveTerm(t Term) {
	// Remove any predicates.
	if statements, ok := g.pred[t.UID]; ok {
		for s := range statements {
			g.RemoveStatement(s)
		}
	}

	// Quick return.
	_, nok := g.nodes[t.UID]
	_, fok := g.from[t.UID]
	_, tok := g.to[t.UID]
	if !nok && !fok && !tok {
		return
	}

	// Remove any statements that impinge on the term.
	to := g.From(t.UID)
	for to.Next() {
		lines := g.Lines(t.UID, to.Node().ID())
		for lines.Next() {
			g.RemoveStatement(lines.Line().(*Statement))
		}
	}
	from := g.To(t.UID)
	if from.Next() {
		lines := g.Lines(from.Node().ID(), t.UID)
		for lines.Next() {
			g.RemoveStatement(lines.Line().(*Statement))
		}
	}

	// Remove the node.
	g.removeNode(t.UID)
	delete(g.termIDs, t.Value)
}

// setLine adds l, a line from one node to another. If the nodes do not exist,
// they are added, and are set to the nodes of the line otherwise.
func (g *Graph) setLine(l graph.Line) {
	var (
		from = l.From()
		fid  = from.ID()
		to   = l.To()
		tid  = to.ID()
		lid  = l.ID()
	)

	if _, ok := g.nodes[fid]; !ok {
		g.addNode(from)
	} else {
		g.nodes[fid] = from
	}
	if _, ok := g.nodes[tid]; !ok {
		g.addNode(to)
	} else {
		g.nodes[tid] = to
	}

	switch {
	case g.from[fid] == nil:
		g.from[fid] = map[int64]map[int64]graph.Line{tid: {lid: l}}
	case g.from[fid][tid] == nil:
		g.from[fid][tid] = map[int64]graph.Line{lid: l}
	default:
		g.from[fid][tid][lid] = l
	}
	switch {
	case g.to[tid] == nil:
		g.to[tid] = map[int64]map[int64]graph.Line{fid: {lid: l}}
	case g.to[tid][fid] == nil:
		g.to[tid][fid] = map[int64]graph.Line{lid: l}
	default:
		g.to[tid][fid][lid] = l
	}

	g.ids.Use(lid)
}

// Statements returns an iterator of the statements that connect the subject
// term node u to the object term node v.
func (g *Graph) Statements(uid, vid int64) *Statements {
	return &Statements{lit: g.Lines(uid, vid)}
}

// To returns all nodes in g that can reach directly to n.
//
// The returned graph.Nodes is only valid until the next mutation of
// the receiver.
func (g *Graph) To(id int64) graph.Nodes {
	if len(g.to[id]) == 0 {
		return graph.Empty
	}
	return iterator.NewNodesByLines(g.nodes, g.to[id])
}

// ToObject returns all nodes in g that can reach directly to an RDF object
// term.
//
// The returned graph.Nodes is only valid until the next mutation of
// the receiver.
func (g *Graph) ToObject(t Term) graph.Nodes {
	return g.To(t.UID)
}

// Statements is an RDF statement iterator.
type Statements struct {
	eit graph.Edges
	lit graph.Lines
}

// Next returns whether the iterator holds any additional statements.
func (s *Statements) Next() bool {
	if s.lit != nil && s.lit.Next() {
		return true
	}
	if s.eit == nil || !s.eit.Next() {
		return false
	}
	s.lit = s.eit.Edge().(multi.Edge).Lines
	return s.lit.Next()
}

// Statement returns the current statement.
func (s *Statements) Statement() *Statement {
	return s.lit.Line().(*Statement)
}

// ConnectedByAny is a helper function to for simplifying graph traversal
// conditions.
func ConnectedByAny(e graph.Edge, with func(*Statement) bool) bool {
	switch e := e.(type) {
	case *Statement:
		return with(e)
	case graph.Lines:
		it := e
		for it.Next() {
			s, ok := it.Line().(*Statement)
			if !ok {
				continue
			}
			ok = with(s)
			if ok {
				return true
			}
		}
	}
	return false
}