network: add edge-weighted PageRank implementations

2025-10-19 21:44:41 +08:00 · 2018-06-10 16:57:38 +09:00
parent e4cc524e41
commit d05be515f6
4 changed files with 314 additions and 6 deletions
--- a/1
+++ b/1
@@ -61,6 +61,7 @@ source{d} <hello@sourced.tech>
 Shawn Smith <shawnpsmith@gmail.com>
 Spencer Lyon <spencerlyon2@gmail.com>
 Steve McCoy <mccoyst@gmail.com>
 Takeshi Yoneda <cz.rk.t0415y.g@gmail.com>
 The University of Adelaide
 The University of Minnesota
 The University of Washington
--- a/1
+++ b/1
@@ -67,6 +67,7 @@ Sebastien Binet <seb.binet@gmail.com>
 Shawn Smith <shawnpsmith@gmail.com>
 Spencer Lyon <spencerlyon2@gmail.com>
 Steve McCoy <mccoyst@gmail.com>
 Takeshi Yoneda <cz.rk.t0415y.g@gmail.com>
 Tobin Harding <me@tobin.cc>
 Vladimír Chalupecký <vladimir.chalupecky@gmail.com>
 Yevgeniy Vahlis <evahlis@gmail.com>
--- a/graph/network/page.go
+++ b/graph/network/page.go
@@ -18,8 +18,197 @@ import (
 // using the given damping factor and terminating when the 2-norm of the
 // vector difference between iterations is below tol. The returned map is
 // keyed on the graph node IDs.
 // If g is a graph.WeightedDirected, an edge-weighted PageRank is calculated.
 func PageRank(g graph.Directed, damp, tol float64) map[int64]float64 {
-	// PageRank is implemented according to "How Google Finds Your Needle
+	if g, ok := g.(graph.WeightedDirected); ok {
 		return edgeWeightedPageRank(g, damp, tol)
 	}
 	return pageRank(g, damp, tol)
 }
 // PageRankSparse returns the PageRank weights for nodes of the sparse directed
 // graph g using the given damping factor and terminating when the 2-norm of the
 // vector difference between iterations is below tol. The returned map is
 // keyed on the graph node IDs.
 // If g is a graph.WeightedDirected, an edge-weighted PageRank is calculated.
 func PageRankSparse(g graph.Directed, damp, tol float64) map[int64]float64 {
 	if g, ok := g.(graph.WeightedDirected); ok {
 		return edgeWeightedPageRankSparse(g, damp, tol)
 	}
 	return pageRankSparse(g, damp, tol)
 }
 // edgeWeightedPageRank returns the PageRank weights for nodes of the weighted directed graph g
 // using the given damping factor and terminating when the 2-norm of the
 // vector difference between iterations is below tol. The returned map is
 // keyed on the graph node IDs.
 func edgeWeightedPageRank(g graph.WeightedDirected, damp, tol float64) map[int64]float64 {
 	// edgeWeightedPageRank is implemented according to "How Google Finds Your Needle
 	// in the Web's Haystack" with the modification that
 	// the columns of hyperlink matrix H are calculated with edge weights.
 	//
 	// G.I^k = alpha.H.I^k + alpha.A.I^k + (1-alpha).1/n.1.I^k
 	//
 	// http://www.ams.org/samplings/feature-column/fcarc-pagerank
 	nodes := g.Nodes()
 	indexOf := make(map[int64]int, len(nodes))
 	for i, n := range nodes {
 		indexOf[n.ID()] = i
 	}
 	m := mat.NewDense(len(nodes), len(nodes), nil)
 	dangling := damp / float64(len(nodes))
 	for j, u := range nodes {
 		to := g.From(u.ID())
 		var z float64
 		for _, v := range to {
 			if w, ok := g.Weight(u.ID(), v.ID()); ok {
 				z += w
 			}
 		}
 		if z != 0 {
 			for _, v := range to {
 				if w, ok := g.Weight(u.ID(), v.ID()); ok {
 					m.Set(indexOf[v.ID()], j, (w*damp)/z)
 				}
 			}
 		} else {
 			for i := range nodes {
 				m.Set(i, j, dangling)
 			}
 		}
 	}
 	matrix := m.RawMatrix().Data
 	dt := (1 - damp) / float64(len(nodes))
 	for i := range matrix {
 		matrix[i] += dt
 	}
 	last := make([]float64, len(nodes))
 	for i := range last {
 		last[i] = 1
 	}
 	lastV := mat.NewVecDense(len(nodes), last)
 	vec := make([]float64, len(nodes))
 	var sum float64
 	for i := range vec {
 		r := rand.NormFloat64()
 		sum += r
 		vec[i] = r
 	}
 	f := 1 / sum
 	for i := range vec {
 		vec[i] *= f
 	}
 	v := mat.NewVecDense(len(nodes), vec)
 	for {
 		lastV, v = v, lastV
 		v.MulVec(m, lastV)
 		if normDiff(vec, last) < tol {
 			break
 		}
 	}
 	ranks := make(map[int64]float64, len(nodes))
 	for i, r := range v.RawVector().Data {
 		ranks[nodes[i].ID()] = r
 	}
 	return ranks
 }
 // edgeWeightedPageRankSparse returns the PageRank weights for nodes of the sparse weighted directed
 // graph g using the given damping factor and terminating when the 2-norm of the
 // vector difference between iterations is below tol. The returned map is
 // keyed on the graph node IDs.
 func edgeWeightedPageRankSparse(g graph.WeightedDirected, damp, tol float64) map[int64]float64 {
 	// edgeWeightedPageRankSparse is implemented according to "How Google Finds Your Needle
 	// in the Web's Haystack" with the modification that
 	// the columns of hyperlink matrix H are calculated with edge weights.
 	//
 	// G.I^k = alpha.H.I^k + alpha.A.I^k + (1-alpha).1/n.1.I^k
 	//
 	// http://www.ams.org/samplings/feature-column/fcarc-pagerank
 	nodes := g.Nodes()
 	indexOf := make(map[int64]int, len(nodes))
 	for i, n := range nodes {
 		indexOf[n.ID()] = i
 	}
 	m := make(rowCompressedMatrix, len(nodes))
 	var dangling compressedRow
 	df := damp / float64(len(nodes))
 	for j, u := range nodes {
 		to := g.From(u.ID())
 		var z float64
 		for _, v := range to {
 			if w, ok := g.Weight(u.ID(), v.ID()); ok {
 				z += w
 			}
 		}
 		if z != 0 {
 			for _, v := range to {
 				if w, ok := g.Weight(u.ID(), v.ID()); ok {
 					m.addTo(indexOf[v.ID()], j, (w*damp)/z)
 				}
 			}
 		} else {
 			dangling.addTo(j, df)
 		}
 	}
 	last := make([]float64, len(nodes))
 	for i := range last {
 		last[i] = 1
 	}
 	lastV := mat.NewVecDense(len(nodes), last)
 	vec := make([]float64, len(nodes))
 	var sum float64
 	for i := range vec {
 		r := rand.NormFloat64()
 		sum += r
 		vec[i] = r
 	}
 	f := 1 / sum
 	for i := range vec {
 		vec[i] *= f
 	}
 	v := mat.NewVecDense(len(nodes), vec)
 	dt := (1 - damp) / float64(len(nodes))
 	for {
 		lastV, v = v, lastV
 		m.mulVecUnitary(v, lastV)          // First term of the G matrix equation;
 		with := dangling.dotUnitary(lastV) // Second term;
 		away := onesDotUnitary(dt, lastV)  // Last term.
 		floats.AddConst(with+away, v.RawVector().Data)
 		if normDiff(vec, last) < tol {
 			break
 		}
 	}
 	ranks := make(map[int64]float64, len(nodes))
 	for i, r := range v.RawVector().Data {
 		ranks[nodes[i].ID()] = r
 	}
 	return ranks
 }
 // pageRank returns the PageRank weights for nodes of the directed graph g
 // using the given damping factor and terminating when the 2-norm of the
 // vector difference between iterations is below tol. The returned map is
 // keyed on the graph node IDs.
 func pageRank(g graph.Directed, damp, tol float64) map[int64]float64 {
 	// pageRank is implemented according to "How Google Finds Your Needle
 	// in the Web's Haystack".
 	//
 	// G.I^k = alpha.S.I^k + (1-alpha).1/n.1.I^k
@@ -87,12 +276,12 @@ func PageRank(g graph.Directed, damp, tol float64) map[int64]float64 {
 	return ranks
 }
-// PageRankSparse returns the PageRank weights for nodes of the sparse directed
+// pageRankSparse returns the PageRank weights for nodes of the sparse directed
 // graph g using the given damping factor and terminating when the 2-norm of the
 // vector difference between iterations is below tol. The returned map is
 // keyed on the graph node IDs.
-func PageRankSparse(g graph.Directed, damp, tol float64) map[int64]float64 {
+func pageRankSparse(g graph.Directed, damp, tol float64) map[int64]float64 {
-	// PageRankSparse is implemented according to "How Google Finds Your Needle
+	// pageRankSparse is implemented according to "How Google Finds Your Needle
 	// in the Web's Haystack".
 	//
 	// G.I^k = alpha.H.I^k + alpha.A.I^k + (1-alpha).1/n.1.I^k
--- a/graph/network/page_test.go
+++ b/graph/network/page_test.go
@@ -91,7 +91,7 @@ func TestPageRank(t *testing.T) {
 				g.SetEdge(simple.Edge{F: simple.Node(u), T: simple.Node(v)})
 			}
 		}
-		got := PageRank(g, test.damp, test.tol)
+		got := pageRank(g, test.damp, test.tol)
 		prec := 1 - int(math.Log10(test.wantTol))
 		for n := range test.g {
 			if !floats.EqualWithinAbsOrRel(got[int64(n)], test.want[int64(n)], test.wantTol, test.wantTol) {
@@ -115,7 +115,124 @@ func TestPageRankSparse(t *testing.T) {
 				g.SetEdge(simple.Edge{F: simple.Node(u), T: simple.Node(v)})
 			}
 		}
-		got := PageRankSparse(g, test.damp, test.tol)
+		got := pageRankSparse(g, test.damp, test.tol)
 		prec := 1 - int(math.Log10(test.wantTol))
 		for n := range test.g {
 			if !floats.EqualWithinAbsOrRel(got[int64(n)], test.want[int64(n)], test.wantTol, test.wantTol) {
 				t.Errorf("unexpected PageRank result for test %d:\ngot: %v\nwant:%v",
 					i, orderedFloats(got, prec), orderedFloats(test.want, prec))
 				break
 			}
 		}
 	}
 }
 var edgeWeightedPageRankTests = []struct {
 	g            []set
 	self, absent float64
 	edges        map[int]map[int64]float64
 	damp         float64
 	tol          float64
 	wantTol float64
 	want    map[int64]float64
 }{
 	{
 		// This test case is created according to the result with the following python code
 		// on python 3.6.4 (using "networkx" of version 2.1)
 		//
 		// >>> import networkx as nx
 		// >>> D = nx.DiGraph()
 		// >>> D.add_weighted_edges_from([('A', 'B', 0.3), ('A','C', 1.2), ('B', 'A', 0.4), ('C', 'B', 0.3), ('D', 'A', 0.3), ('D', 'B', 2.1)])
 		// >>> nx.pagerank(D, alpha=0.85, tol=1e-10)
 		// {'A': 0.3409109390701202, 'B': 0.3522682754411842, 'C': 0.2693207854886954, 'D': 0.037500000000000006}
 		g: []set{
 			A: linksTo(B, C),
 			B: linksTo(A),
 			C: linksTo(B),
 			D: linksTo(A, B),
 		},
 		edges: map[int]map[int64]float64{
 			A: {
 				B: 0.3,
 				C: 1.2,
 			},
 			B: {
 				A: 0.4,
 			},
 			C: {
 				B: 0.3,
 			},
 			D: {
 				A: 0.3,
 				B: 2.1,
 			},
 		},
 		damp: 0.85,
 		tol:  1e-10,
 		wantTol: 1e-8,
 		want: map[int64]float64{
 			A: 0.3409120160955594,
 			B: 0.3522678129306601,
 			C: 0.2693201709737804,
 			D: 0.037500000000000006,
 		},
 	},
 }
 func TestEdgeWeightedPageRank(t *testing.T) {
 	for i, test := range edgeWeightedPageRankTests {
 		g := simple.NewWeightedDirectedGraph(test.self, test.absent)
 		for u, e := range test.g {
 			// Add nodes that are not defined by an edge.
 			if !g.Has(int64(u)) {
 				g.AddNode(simple.Node(u))
 			}
 			ws, ok := test.edges[u]
 			if !ok {
 				t.Errorf("edges not found for %v", u)
 			}
 			for v := range e {
 				if w, ok := ws[v]; ok {
 					g.SetWeightedEdge(g.NewWeightedEdge(simple.Node(u), simple.Node(v), w))
 				}
 			}
 		}
 		got := edgeWeightedPageRank(g, test.damp, test.tol)
 		prec := 1 - int(math.Log10(test.wantTol))
 		for n := range test.g {
 			if !floats.EqualWithinAbsOrRel(got[int64(n)], test.want[int64(n)], test.wantTol, test.wantTol) {
 				t.Errorf("unexpected PageRank result for test %d:\ngot: %v\nwant:%v",
 					i, orderedFloats(got, prec), orderedFloats(test.want, prec))
 				break
 			}
 		}
 	}
 }
 func TestEdgeWeightedPageRankSparse(t *testing.T) {
 	for i, test := range edgeWeightedPageRankTests {
 		g := simple.NewWeightedDirectedGraph(test.self, test.absent)
 		for u, e := range test.g {
 			// Add nodes that are not defined by an edge.
 			if !g.Has(int64(u)) {
 				g.AddNode(simple.Node(u))
 			}
 			ws, ok := test.edges[u]
 			if !ok {
 				t.Errorf("edges not found for %v", u)
 			}
 			for v := range e {
 				if w, ok := ws[v]; ok {
 					g.SetWeightedEdge(g.NewWeightedEdge(simple.Node(u), simple.Node(v), w))
 				}
 			}
 		}
 		got := edgeWeightedPageRankSparse(g, test.damp, test.tol)
 		prec := 1 - int(math.Log10(test.wantTol))
 		for n := range test.g {
 			if !floats.EqualWithinAbsOrRel(got[int64(n)], test.want[int64(n)], test.wantTol, test.wantTol) {