mirror of
https://github.com/gonum/gonum.git
synced 2025-10-19 21:44:41 +08:00
network: add edge-weighted PageRank implementations
This commit is contained in:

committed by
Dan Kortschak

parent
e4cc524e41
commit
d05be515f6
1
AUTHORS
1
AUTHORS
@@ -61,6 +61,7 @@ source{d} <hello@sourced.tech>
|
|||||||
Shawn Smith <shawnpsmith@gmail.com>
|
Shawn Smith <shawnpsmith@gmail.com>
|
||||||
Spencer Lyon <spencerlyon2@gmail.com>
|
Spencer Lyon <spencerlyon2@gmail.com>
|
||||||
Steve McCoy <mccoyst@gmail.com>
|
Steve McCoy <mccoyst@gmail.com>
|
||||||
|
Takeshi Yoneda <cz.rk.t0415y.g@gmail.com>
|
||||||
The University of Adelaide
|
The University of Adelaide
|
||||||
The University of Minnesota
|
The University of Minnesota
|
||||||
The University of Washington
|
The University of Washington
|
||||||
|
@@ -67,6 +67,7 @@ Sebastien Binet <seb.binet@gmail.com>
|
|||||||
Shawn Smith <shawnpsmith@gmail.com>
|
Shawn Smith <shawnpsmith@gmail.com>
|
||||||
Spencer Lyon <spencerlyon2@gmail.com>
|
Spencer Lyon <spencerlyon2@gmail.com>
|
||||||
Steve McCoy <mccoyst@gmail.com>
|
Steve McCoy <mccoyst@gmail.com>
|
||||||
|
Takeshi Yoneda <cz.rk.t0415y.g@gmail.com>
|
||||||
Tobin Harding <me@tobin.cc>
|
Tobin Harding <me@tobin.cc>
|
||||||
Vladimír Chalupecký <vladimir.chalupecky@gmail.com>
|
Vladimír Chalupecký <vladimir.chalupecky@gmail.com>
|
||||||
Yevgeniy Vahlis <evahlis@gmail.com>
|
Yevgeniy Vahlis <evahlis@gmail.com>
|
||||||
|
@@ -18,8 +18,197 @@ import (
|
|||||||
// using the given damping factor and terminating when the 2-norm of the
|
// using the given damping factor and terminating when the 2-norm of the
|
||||||
// vector difference between iterations is below tol. The returned map is
|
// vector difference between iterations is below tol. The returned map is
|
||||||
// keyed on the graph node IDs.
|
// keyed on the graph node IDs.
|
||||||
|
// If g is a graph.WeightedDirected, an edge-weighted PageRank is calculated.
|
||||||
func PageRank(g graph.Directed, damp, tol float64) map[int64]float64 {
|
func PageRank(g graph.Directed, damp, tol float64) map[int64]float64 {
|
||||||
// PageRank is implemented according to "How Google Finds Your Needle
|
if g, ok := g.(graph.WeightedDirected); ok {
|
||||||
|
return edgeWeightedPageRank(g, damp, tol)
|
||||||
|
}
|
||||||
|
return pageRank(g, damp, tol)
|
||||||
|
}
|
||||||
|
|
||||||
|
// PageRankSparse returns the PageRank weights for nodes of the sparse directed
|
||||||
|
// graph g using the given damping factor and terminating when the 2-norm of the
|
||||||
|
// vector difference between iterations is below tol. The returned map is
|
||||||
|
// keyed on the graph node IDs.
|
||||||
|
// If g is a graph.WeightedDirected, an edge-weighted PageRank is calculated.
|
||||||
|
func PageRankSparse(g graph.Directed, damp, tol float64) map[int64]float64 {
|
||||||
|
if g, ok := g.(graph.WeightedDirected); ok {
|
||||||
|
return edgeWeightedPageRankSparse(g, damp, tol)
|
||||||
|
}
|
||||||
|
return pageRankSparse(g, damp, tol)
|
||||||
|
}
|
||||||
|
|
||||||
|
// edgeWeightedPageRank returns the PageRank weights for nodes of the weighted directed graph g
|
||||||
|
// using the given damping factor and terminating when the 2-norm of the
|
||||||
|
// vector difference between iterations is below tol. The returned map is
|
||||||
|
// keyed on the graph node IDs.
|
||||||
|
func edgeWeightedPageRank(g graph.WeightedDirected, damp, tol float64) map[int64]float64 {
|
||||||
|
// edgeWeightedPageRank is implemented according to "How Google Finds Your Needle
|
||||||
|
// in the Web's Haystack" with the modification that
|
||||||
|
// the columns of hyperlink matrix H are calculated with edge weights.
|
||||||
|
//
|
||||||
|
// G.I^k = alpha.H.I^k + alpha.A.I^k + (1-alpha).1/n.1.I^k
|
||||||
|
//
|
||||||
|
// http://www.ams.org/samplings/feature-column/fcarc-pagerank
|
||||||
|
|
||||||
|
nodes := g.Nodes()
|
||||||
|
indexOf := make(map[int64]int, len(nodes))
|
||||||
|
for i, n := range nodes {
|
||||||
|
indexOf[n.ID()] = i
|
||||||
|
}
|
||||||
|
|
||||||
|
m := mat.NewDense(len(nodes), len(nodes), nil)
|
||||||
|
dangling := damp / float64(len(nodes))
|
||||||
|
for j, u := range nodes {
|
||||||
|
to := g.From(u.ID())
|
||||||
|
var z float64
|
||||||
|
for _, v := range to {
|
||||||
|
if w, ok := g.Weight(u.ID(), v.ID()); ok {
|
||||||
|
z += w
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if z != 0 {
|
||||||
|
for _, v := range to {
|
||||||
|
if w, ok := g.Weight(u.ID(), v.ID()); ok {
|
||||||
|
m.Set(indexOf[v.ID()], j, (w*damp)/z)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for i := range nodes {
|
||||||
|
m.Set(i, j, dangling)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
matrix := m.RawMatrix().Data
|
||||||
|
dt := (1 - damp) / float64(len(nodes))
|
||||||
|
for i := range matrix {
|
||||||
|
matrix[i] += dt
|
||||||
|
}
|
||||||
|
|
||||||
|
last := make([]float64, len(nodes))
|
||||||
|
for i := range last {
|
||||||
|
last[i] = 1
|
||||||
|
}
|
||||||
|
lastV := mat.NewVecDense(len(nodes), last)
|
||||||
|
|
||||||
|
vec := make([]float64, len(nodes))
|
||||||
|
var sum float64
|
||||||
|
for i := range vec {
|
||||||
|
r := rand.NormFloat64()
|
||||||
|
sum += r
|
||||||
|
vec[i] = r
|
||||||
|
}
|
||||||
|
f := 1 / sum
|
||||||
|
for i := range vec {
|
||||||
|
vec[i] *= f
|
||||||
|
}
|
||||||
|
v := mat.NewVecDense(len(nodes), vec)
|
||||||
|
|
||||||
|
for {
|
||||||
|
lastV, v = v, lastV
|
||||||
|
v.MulVec(m, lastV)
|
||||||
|
if normDiff(vec, last) < tol {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ranks := make(map[int64]float64, len(nodes))
|
||||||
|
for i, r := range v.RawVector().Data {
|
||||||
|
ranks[nodes[i].ID()] = r
|
||||||
|
}
|
||||||
|
|
||||||
|
return ranks
|
||||||
|
}
|
||||||
|
|
||||||
|
// edgeWeightedPageRankSparse returns the PageRank weights for nodes of the sparse weighted directed
|
||||||
|
// graph g using the given damping factor and terminating when the 2-norm of the
|
||||||
|
// vector difference between iterations is below tol. The returned map is
|
||||||
|
// keyed on the graph node IDs.
|
||||||
|
func edgeWeightedPageRankSparse(g graph.WeightedDirected, damp, tol float64) map[int64]float64 {
|
||||||
|
// edgeWeightedPageRankSparse is implemented according to "How Google Finds Your Needle
|
||||||
|
// in the Web's Haystack" with the modification that
|
||||||
|
// the columns of hyperlink matrix H are calculated with edge weights.
|
||||||
|
//
|
||||||
|
// G.I^k = alpha.H.I^k + alpha.A.I^k + (1-alpha).1/n.1.I^k
|
||||||
|
//
|
||||||
|
// http://www.ams.org/samplings/feature-column/fcarc-pagerank
|
||||||
|
|
||||||
|
nodes := g.Nodes()
|
||||||
|
indexOf := make(map[int64]int, len(nodes))
|
||||||
|
for i, n := range nodes {
|
||||||
|
indexOf[n.ID()] = i
|
||||||
|
}
|
||||||
|
|
||||||
|
m := make(rowCompressedMatrix, len(nodes))
|
||||||
|
var dangling compressedRow
|
||||||
|
df := damp / float64(len(nodes))
|
||||||
|
for j, u := range nodes {
|
||||||
|
to := g.From(u.ID())
|
||||||
|
var z float64
|
||||||
|
for _, v := range to {
|
||||||
|
if w, ok := g.Weight(u.ID(), v.ID()); ok {
|
||||||
|
z += w
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if z != 0 {
|
||||||
|
for _, v := range to {
|
||||||
|
if w, ok := g.Weight(u.ID(), v.ID()); ok {
|
||||||
|
m.addTo(indexOf[v.ID()], j, (w*damp)/z)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
dangling.addTo(j, df)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
last := make([]float64, len(nodes))
|
||||||
|
for i := range last {
|
||||||
|
last[i] = 1
|
||||||
|
}
|
||||||
|
lastV := mat.NewVecDense(len(nodes), last)
|
||||||
|
|
||||||
|
vec := make([]float64, len(nodes))
|
||||||
|
var sum float64
|
||||||
|
for i := range vec {
|
||||||
|
r := rand.NormFloat64()
|
||||||
|
sum += r
|
||||||
|
vec[i] = r
|
||||||
|
}
|
||||||
|
f := 1 / sum
|
||||||
|
for i := range vec {
|
||||||
|
vec[i] *= f
|
||||||
|
}
|
||||||
|
v := mat.NewVecDense(len(nodes), vec)
|
||||||
|
|
||||||
|
dt := (1 - damp) / float64(len(nodes))
|
||||||
|
for {
|
||||||
|
lastV, v = v, lastV
|
||||||
|
|
||||||
|
m.mulVecUnitary(v, lastV) // First term of the G matrix equation;
|
||||||
|
with := dangling.dotUnitary(lastV) // Second term;
|
||||||
|
away := onesDotUnitary(dt, lastV) // Last term.
|
||||||
|
|
||||||
|
floats.AddConst(with+away, v.RawVector().Data)
|
||||||
|
if normDiff(vec, last) < tol {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ranks := make(map[int64]float64, len(nodes))
|
||||||
|
for i, r := range v.RawVector().Data {
|
||||||
|
ranks[nodes[i].ID()] = r
|
||||||
|
}
|
||||||
|
|
||||||
|
return ranks
|
||||||
|
}
|
||||||
|
|
||||||
|
// pageRank returns the PageRank weights for nodes of the directed graph g
|
||||||
|
// using the given damping factor and terminating when the 2-norm of the
|
||||||
|
// vector difference between iterations is below tol. The returned map is
|
||||||
|
// keyed on the graph node IDs.
|
||||||
|
func pageRank(g graph.Directed, damp, tol float64) map[int64]float64 {
|
||||||
|
// pageRank is implemented according to "How Google Finds Your Needle
|
||||||
// in the Web's Haystack".
|
// in the Web's Haystack".
|
||||||
//
|
//
|
||||||
// G.I^k = alpha.S.I^k + (1-alpha).1/n.1.I^k
|
// G.I^k = alpha.S.I^k + (1-alpha).1/n.1.I^k
|
||||||
@@ -87,12 +276,12 @@ func PageRank(g graph.Directed, damp, tol float64) map[int64]float64 {
|
|||||||
return ranks
|
return ranks
|
||||||
}
|
}
|
||||||
|
|
||||||
// PageRankSparse returns the PageRank weights for nodes of the sparse directed
|
// pageRankSparse returns the PageRank weights for nodes of the sparse directed
|
||||||
// graph g using the given damping factor and terminating when the 2-norm of the
|
// graph g using the given damping factor and terminating when the 2-norm of the
|
||||||
// vector difference between iterations is below tol. The returned map is
|
// vector difference between iterations is below tol. The returned map is
|
||||||
// keyed on the graph node IDs.
|
// keyed on the graph node IDs.
|
||||||
func PageRankSparse(g graph.Directed, damp, tol float64) map[int64]float64 {
|
func pageRankSparse(g graph.Directed, damp, tol float64) map[int64]float64 {
|
||||||
// PageRankSparse is implemented according to "How Google Finds Your Needle
|
// pageRankSparse is implemented according to "How Google Finds Your Needle
|
||||||
// in the Web's Haystack".
|
// in the Web's Haystack".
|
||||||
//
|
//
|
||||||
// G.I^k = alpha.H.I^k + alpha.A.I^k + (1-alpha).1/n.1.I^k
|
// G.I^k = alpha.H.I^k + alpha.A.I^k + (1-alpha).1/n.1.I^k
|
||||||
|
@@ -91,7 +91,7 @@ func TestPageRank(t *testing.T) {
|
|||||||
g.SetEdge(simple.Edge{F: simple.Node(u), T: simple.Node(v)})
|
g.SetEdge(simple.Edge{F: simple.Node(u), T: simple.Node(v)})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
got := PageRank(g, test.damp, test.tol)
|
got := pageRank(g, test.damp, test.tol)
|
||||||
prec := 1 - int(math.Log10(test.wantTol))
|
prec := 1 - int(math.Log10(test.wantTol))
|
||||||
for n := range test.g {
|
for n := range test.g {
|
||||||
if !floats.EqualWithinAbsOrRel(got[int64(n)], test.want[int64(n)], test.wantTol, test.wantTol) {
|
if !floats.EqualWithinAbsOrRel(got[int64(n)], test.want[int64(n)], test.wantTol, test.wantTol) {
|
||||||
@@ -115,7 +115,124 @@ func TestPageRankSparse(t *testing.T) {
|
|||||||
g.SetEdge(simple.Edge{F: simple.Node(u), T: simple.Node(v)})
|
g.SetEdge(simple.Edge{F: simple.Node(u), T: simple.Node(v)})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
got := PageRankSparse(g, test.damp, test.tol)
|
got := pageRankSparse(g, test.damp, test.tol)
|
||||||
|
prec := 1 - int(math.Log10(test.wantTol))
|
||||||
|
for n := range test.g {
|
||||||
|
if !floats.EqualWithinAbsOrRel(got[int64(n)], test.want[int64(n)], test.wantTol, test.wantTol) {
|
||||||
|
t.Errorf("unexpected PageRank result for test %d:\ngot: %v\nwant:%v",
|
||||||
|
i, orderedFloats(got, prec), orderedFloats(test.want, prec))
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var edgeWeightedPageRankTests = []struct {
|
||||||
|
g []set
|
||||||
|
self, absent float64
|
||||||
|
edges map[int]map[int64]float64
|
||||||
|
damp float64
|
||||||
|
tol float64
|
||||||
|
|
||||||
|
wantTol float64
|
||||||
|
want map[int64]float64
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
// This test case is created according to the result with the following python code
|
||||||
|
// on python 3.6.4 (using "networkx" of version 2.1)
|
||||||
|
//
|
||||||
|
// >>> import networkx as nx
|
||||||
|
// >>> D = nx.DiGraph()
|
||||||
|
// >>> D.add_weighted_edges_from([('A', 'B', 0.3), ('A','C', 1.2), ('B', 'A', 0.4), ('C', 'B', 0.3), ('D', 'A', 0.3), ('D', 'B', 2.1)])
|
||||||
|
// >>> nx.pagerank(D, alpha=0.85, tol=1e-10)
|
||||||
|
// {'A': 0.3409109390701202, 'B': 0.3522682754411842, 'C': 0.2693207854886954, 'D': 0.037500000000000006}
|
||||||
|
|
||||||
|
g: []set{
|
||||||
|
A: linksTo(B, C),
|
||||||
|
B: linksTo(A),
|
||||||
|
C: linksTo(B),
|
||||||
|
D: linksTo(A, B),
|
||||||
|
},
|
||||||
|
edges: map[int]map[int64]float64{
|
||||||
|
A: {
|
||||||
|
B: 0.3,
|
||||||
|
C: 1.2,
|
||||||
|
},
|
||||||
|
B: {
|
||||||
|
A: 0.4,
|
||||||
|
},
|
||||||
|
C: {
|
||||||
|
B: 0.3,
|
||||||
|
},
|
||||||
|
D: {
|
||||||
|
A: 0.3,
|
||||||
|
B: 2.1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
damp: 0.85,
|
||||||
|
tol: 1e-10,
|
||||||
|
|
||||||
|
wantTol: 1e-8,
|
||||||
|
want: map[int64]float64{
|
||||||
|
A: 0.3409120160955594,
|
||||||
|
B: 0.3522678129306601,
|
||||||
|
C: 0.2693201709737804,
|
||||||
|
D: 0.037500000000000006,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEdgeWeightedPageRank(t *testing.T) {
|
||||||
|
for i, test := range edgeWeightedPageRankTests {
|
||||||
|
g := simple.NewWeightedDirectedGraph(test.self, test.absent)
|
||||||
|
for u, e := range test.g {
|
||||||
|
// Add nodes that are not defined by an edge.
|
||||||
|
if !g.Has(int64(u)) {
|
||||||
|
g.AddNode(simple.Node(u))
|
||||||
|
}
|
||||||
|
ws, ok := test.edges[u]
|
||||||
|
if !ok {
|
||||||
|
t.Errorf("edges not found for %v", u)
|
||||||
|
}
|
||||||
|
|
||||||
|
for v := range e {
|
||||||
|
if w, ok := ws[v]; ok {
|
||||||
|
g.SetWeightedEdge(g.NewWeightedEdge(simple.Node(u), simple.Node(v), w))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
got := edgeWeightedPageRank(g, test.damp, test.tol)
|
||||||
|
prec := 1 - int(math.Log10(test.wantTol))
|
||||||
|
for n := range test.g {
|
||||||
|
if !floats.EqualWithinAbsOrRel(got[int64(n)], test.want[int64(n)], test.wantTol, test.wantTol) {
|
||||||
|
t.Errorf("unexpected PageRank result for test %d:\ngot: %v\nwant:%v",
|
||||||
|
i, orderedFloats(got, prec), orderedFloats(test.want, prec))
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEdgeWeightedPageRankSparse(t *testing.T) {
|
||||||
|
for i, test := range edgeWeightedPageRankTests {
|
||||||
|
g := simple.NewWeightedDirectedGraph(test.self, test.absent)
|
||||||
|
for u, e := range test.g {
|
||||||
|
// Add nodes that are not defined by an edge.
|
||||||
|
if !g.Has(int64(u)) {
|
||||||
|
g.AddNode(simple.Node(u))
|
||||||
|
}
|
||||||
|
ws, ok := test.edges[u]
|
||||||
|
if !ok {
|
||||||
|
t.Errorf("edges not found for %v", u)
|
||||||
|
}
|
||||||
|
|
||||||
|
for v := range e {
|
||||||
|
if w, ok := ws[v]; ok {
|
||||||
|
g.SetWeightedEdge(g.NewWeightedEdge(simple.Node(u), simple.Node(v), w))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
got := edgeWeightedPageRankSparse(g, test.damp, test.tol)
|
||||||
prec := 1 - int(math.Log10(test.wantTol))
|
prec := 1 - int(math.Log10(test.wantTol))
|
||||||
for n := range test.g {
|
for n := range test.g {
|
||||||
if !floats.EqualWithinAbsOrRel(got[int64(n)], test.want[int64(n)], test.wantTol, test.wantTol) {
|
if !floats.EqualWithinAbsOrRel(got[int64(n)], test.want[int64(n)], test.wantTol, test.wantTol) {
|
||||||
|
Reference in New Issue
Block a user