diff --git a/AUTHORS b/AUTHORS index e63b77bf..dd17494a 100644 --- a/AUTHORS +++ b/AUTHORS @@ -61,6 +61,7 @@ source{d} Shawn Smith Spencer Lyon Steve McCoy +Takeshi Yoneda The University of Adelaide The University of Minnesota The University of Washington diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 9f1f6b8d..007d13b7 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -67,6 +67,7 @@ Sebastien Binet Shawn Smith Spencer Lyon Steve McCoy +Takeshi Yoneda Tobin Harding Vladimír Chalupecký Yevgeniy Vahlis diff --git a/graph/network/page.go b/graph/network/page.go index 7d7ee0a5..006c1db7 100644 --- a/graph/network/page.go +++ b/graph/network/page.go @@ -18,8 +18,197 @@ import ( // using the given damping factor and terminating when the 2-norm of the // vector difference between iterations is below tol. The returned map is // keyed on the graph node IDs. +// If g is a graph.WeightedDirected, an edge-weighted PageRank is calculated. func PageRank(g graph.Directed, damp, tol float64) map[int64]float64 { - // PageRank is implemented according to "How Google Finds Your Needle + if g, ok := g.(graph.WeightedDirected); ok { + return edgeWeightedPageRank(g, damp, tol) + } + return pageRank(g, damp, tol) +} + +// PageRankSparse returns the PageRank weights for nodes of the sparse directed +// graph g using the given damping factor and terminating when the 2-norm of the +// vector difference between iterations is below tol. The returned map is +// keyed on the graph node IDs. +// If g is a graph.WeightedDirected, an edge-weighted PageRank is calculated. +func PageRankSparse(g graph.Directed, damp, tol float64) map[int64]float64 { + if g, ok := g.(graph.WeightedDirected); ok { + return edgeWeightedPageRankSparse(g, damp, tol) + } + return pageRankSparse(g, damp, tol) +} + +// edgeWeightedPageRank returns the PageRank weights for nodes of the weighted directed graph g +// using the given damping factor and terminating when the 2-norm of the +// vector difference between iterations is below tol. The returned map is +// keyed on the graph node IDs. +func edgeWeightedPageRank(g graph.WeightedDirected, damp, tol float64) map[int64]float64 { + // edgeWeightedPageRank is implemented according to "How Google Finds Your Needle + // in the Web's Haystack" with the modification that + // the columns of hyperlink matrix H are calculated with edge weights. + // + // G.I^k = alpha.H.I^k + alpha.A.I^k + (1-alpha).1/n.1.I^k + // + // http://www.ams.org/samplings/feature-column/fcarc-pagerank + + nodes := g.Nodes() + indexOf := make(map[int64]int, len(nodes)) + for i, n := range nodes { + indexOf[n.ID()] = i + } + + m := mat.NewDense(len(nodes), len(nodes), nil) + dangling := damp / float64(len(nodes)) + for j, u := range nodes { + to := g.From(u.ID()) + var z float64 + for _, v := range to { + if w, ok := g.Weight(u.ID(), v.ID()); ok { + z += w + } + } + if z != 0 { + for _, v := range to { + if w, ok := g.Weight(u.ID(), v.ID()); ok { + m.Set(indexOf[v.ID()], j, (w*damp)/z) + } + } + } else { + for i := range nodes { + m.Set(i, j, dangling) + } + } + } + + matrix := m.RawMatrix().Data + dt := (1 - damp) / float64(len(nodes)) + for i := range matrix { + matrix[i] += dt + } + + last := make([]float64, len(nodes)) + for i := range last { + last[i] = 1 + } + lastV := mat.NewVecDense(len(nodes), last) + + vec := make([]float64, len(nodes)) + var sum float64 + for i := range vec { + r := rand.NormFloat64() + sum += r + vec[i] = r + } + f := 1 / sum + for i := range vec { + vec[i] *= f + } + v := mat.NewVecDense(len(nodes), vec) + + for { + lastV, v = v, lastV + v.MulVec(m, lastV) + if normDiff(vec, last) < tol { + break + } + } + + ranks := make(map[int64]float64, len(nodes)) + for i, r := range v.RawVector().Data { + ranks[nodes[i].ID()] = r + } + + return ranks +} + +// edgeWeightedPageRankSparse returns the PageRank weights for nodes of the sparse weighted directed +// graph g using the given damping factor and terminating when the 2-norm of the +// vector difference between iterations is below tol. The returned map is +// keyed on the graph node IDs. +func edgeWeightedPageRankSparse(g graph.WeightedDirected, damp, tol float64) map[int64]float64 { + // edgeWeightedPageRankSparse is implemented according to "How Google Finds Your Needle + // in the Web's Haystack" with the modification that + // the columns of hyperlink matrix H are calculated with edge weights. + // + // G.I^k = alpha.H.I^k + alpha.A.I^k + (1-alpha).1/n.1.I^k + // + // http://www.ams.org/samplings/feature-column/fcarc-pagerank + + nodes := g.Nodes() + indexOf := make(map[int64]int, len(nodes)) + for i, n := range nodes { + indexOf[n.ID()] = i + } + + m := make(rowCompressedMatrix, len(nodes)) + var dangling compressedRow + df := damp / float64(len(nodes)) + for j, u := range nodes { + to := g.From(u.ID()) + var z float64 + for _, v := range to { + if w, ok := g.Weight(u.ID(), v.ID()); ok { + z += w + } + } + if z != 0 { + for _, v := range to { + if w, ok := g.Weight(u.ID(), v.ID()); ok { + m.addTo(indexOf[v.ID()], j, (w*damp)/z) + } + } + } else { + dangling.addTo(j, df) + } + } + + last := make([]float64, len(nodes)) + for i := range last { + last[i] = 1 + } + lastV := mat.NewVecDense(len(nodes), last) + + vec := make([]float64, len(nodes)) + var sum float64 + for i := range vec { + r := rand.NormFloat64() + sum += r + vec[i] = r + } + f := 1 / sum + for i := range vec { + vec[i] *= f + } + v := mat.NewVecDense(len(nodes), vec) + + dt := (1 - damp) / float64(len(nodes)) + for { + lastV, v = v, lastV + + m.mulVecUnitary(v, lastV) // First term of the G matrix equation; + with := dangling.dotUnitary(lastV) // Second term; + away := onesDotUnitary(dt, lastV) // Last term. + + floats.AddConst(with+away, v.RawVector().Data) + if normDiff(vec, last) < tol { + break + } + } + + ranks := make(map[int64]float64, len(nodes)) + for i, r := range v.RawVector().Data { + ranks[nodes[i].ID()] = r + } + + return ranks +} + +// pageRank returns the PageRank weights for nodes of the directed graph g +// using the given damping factor and terminating when the 2-norm of the +// vector difference between iterations is below tol. The returned map is +// keyed on the graph node IDs. +func pageRank(g graph.Directed, damp, tol float64) map[int64]float64 { + // pageRank is implemented according to "How Google Finds Your Needle // in the Web's Haystack". // // G.I^k = alpha.S.I^k + (1-alpha).1/n.1.I^k @@ -87,12 +276,12 @@ func PageRank(g graph.Directed, damp, tol float64) map[int64]float64 { return ranks } -// PageRankSparse returns the PageRank weights for nodes of the sparse directed +// pageRankSparse returns the PageRank weights for nodes of the sparse directed // graph g using the given damping factor and terminating when the 2-norm of the // vector difference between iterations is below tol. The returned map is // keyed on the graph node IDs. -func PageRankSparse(g graph.Directed, damp, tol float64) map[int64]float64 { - // PageRankSparse is implemented according to "How Google Finds Your Needle +func pageRankSparse(g graph.Directed, damp, tol float64) map[int64]float64 { + // pageRankSparse is implemented according to "How Google Finds Your Needle // in the Web's Haystack". // // G.I^k = alpha.H.I^k + alpha.A.I^k + (1-alpha).1/n.1.I^k diff --git a/graph/network/page_test.go b/graph/network/page_test.go index ac53148e..350db403 100644 --- a/graph/network/page_test.go +++ b/graph/network/page_test.go @@ -91,7 +91,7 @@ func TestPageRank(t *testing.T) { g.SetEdge(simple.Edge{F: simple.Node(u), T: simple.Node(v)}) } } - got := PageRank(g, test.damp, test.tol) + got := pageRank(g, test.damp, test.tol) prec := 1 - int(math.Log10(test.wantTol)) for n := range test.g { if !floats.EqualWithinAbsOrRel(got[int64(n)], test.want[int64(n)], test.wantTol, test.wantTol) { @@ -115,7 +115,124 @@ func TestPageRankSparse(t *testing.T) { g.SetEdge(simple.Edge{F: simple.Node(u), T: simple.Node(v)}) } } - got := PageRankSparse(g, test.damp, test.tol) + got := pageRankSparse(g, test.damp, test.tol) + prec := 1 - int(math.Log10(test.wantTol)) + for n := range test.g { + if !floats.EqualWithinAbsOrRel(got[int64(n)], test.want[int64(n)], test.wantTol, test.wantTol) { + t.Errorf("unexpected PageRank result for test %d:\ngot: %v\nwant:%v", + i, orderedFloats(got, prec), orderedFloats(test.want, prec)) + break + } + } + } +} + +var edgeWeightedPageRankTests = []struct { + g []set + self, absent float64 + edges map[int]map[int64]float64 + damp float64 + tol float64 + + wantTol float64 + want map[int64]float64 +}{ + { + // This test case is created according to the result with the following python code + // on python 3.6.4 (using "networkx" of version 2.1) + // + // >>> import networkx as nx + // >>> D = nx.DiGraph() + // >>> D.add_weighted_edges_from([('A', 'B', 0.3), ('A','C', 1.2), ('B', 'A', 0.4), ('C', 'B', 0.3), ('D', 'A', 0.3), ('D', 'B', 2.1)]) + // >>> nx.pagerank(D, alpha=0.85, tol=1e-10) + // {'A': 0.3409109390701202, 'B': 0.3522682754411842, 'C': 0.2693207854886954, 'D': 0.037500000000000006} + + g: []set{ + A: linksTo(B, C), + B: linksTo(A), + C: linksTo(B), + D: linksTo(A, B), + }, + edges: map[int]map[int64]float64{ + A: { + B: 0.3, + C: 1.2, + }, + B: { + A: 0.4, + }, + C: { + B: 0.3, + }, + D: { + A: 0.3, + B: 2.1, + }, + }, + damp: 0.85, + tol: 1e-10, + + wantTol: 1e-8, + want: map[int64]float64{ + A: 0.3409120160955594, + B: 0.3522678129306601, + C: 0.2693201709737804, + D: 0.037500000000000006, + }, + }, +} + +func TestEdgeWeightedPageRank(t *testing.T) { + for i, test := range edgeWeightedPageRankTests { + g := simple.NewWeightedDirectedGraph(test.self, test.absent) + for u, e := range test.g { + // Add nodes that are not defined by an edge. + if !g.Has(int64(u)) { + g.AddNode(simple.Node(u)) + } + ws, ok := test.edges[u] + if !ok { + t.Errorf("edges not found for %v", u) + } + + for v := range e { + if w, ok := ws[v]; ok { + g.SetWeightedEdge(g.NewWeightedEdge(simple.Node(u), simple.Node(v), w)) + } + } + } + got := edgeWeightedPageRank(g, test.damp, test.tol) + prec := 1 - int(math.Log10(test.wantTol)) + for n := range test.g { + if !floats.EqualWithinAbsOrRel(got[int64(n)], test.want[int64(n)], test.wantTol, test.wantTol) { + t.Errorf("unexpected PageRank result for test %d:\ngot: %v\nwant:%v", + i, orderedFloats(got, prec), orderedFloats(test.want, prec)) + break + } + } + } +} + +func TestEdgeWeightedPageRankSparse(t *testing.T) { + for i, test := range edgeWeightedPageRankTests { + g := simple.NewWeightedDirectedGraph(test.self, test.absent) + for u, e := range test.g { + // Add nodes that are not defined by an edge. + if !g.Has(int64(u)) { + g.AddNode(simple.Node(u)) + } + ws, ok := test.edges[u] + if !ok { + t.Errorf("edges not found for %v", u) + } + + for v := range e { + if w, ok := ws[v]; ok { + g.SetWeightedEdge(g.NewWeightedEdge(simple.Node(u), simple.Node(v), w)) + } + } + } + got := edgeWeightedPageRankSparse(g, test.damp, test.tol) prec := 1 - int(math.Log10(test.wantTol)) for n := range test.g { if !floats.EqualWithinAbsOrRel(got[int64(n)], test.want[int64(n)], test.wantTol, test.wantTol) {