Files
gonum/graph/community/louvain_common.go
2024-08-17 08:41:18 +09:30

434 lines
13 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package community
import (
"fmt"
"slices"
"golang.org/x/exp/rand"
"gonum.org/v1/gonum/graph"
"gonum.org/v1/gonum/graph/internal/set"
)
// Q returns the modularity Q score of the graph g subdivided into the
// given communities at the given resolution. If communities is nil, the
// unclustered modularity score is returned. The resolution parameter
// is γ as defined in Reichardt and Bornholdt doi:10.1103/PhysRevE.74.016110.
// Q will panic if g has any edge with negative edge weight.
//
// If g is undirected, Q is calculated according to
//
// Q = 1/2m \sum_{ij} [ A_{ij} - (\gamma k_i k_j)/2m ] \delta(c_i,c_j),
//
// If g is directed, it is calculated according to
//
// Q = 1/m \sum_{ij} [ A_{ij} - (\gamma k_i^in k_j^out)/m ] \delta(c_i,c_j).
//
// graph.Undirect may be used as a shim to allow calculation of Q for
// directed graphs with the undirected modularity function.
func Q(g graph.Graph, communities [][]graph.Node, resolution float64) float64 {
switch g := g.(type) {
case graph.Undirected:
return qUndirected(g, communities, resolution)
case graph.Directed:
return qDirected(g, communities, resolution)
default:
panic(fmt.Sprintf("community: invalid graph type: %T", g))
}
}
// ReducedGraph is a modularised graph.
type ReducedGraph interface {
graph.Graph
// Communities returns the community memberships
// of the nodes in the graph used to generate
// the reduced graph.
Communities() [][]graph.Node
// Structure returns the community structure of
// the current level of the module clustering.
// Each slice in the returned value recursively
// describes the membership of a community at
// the current level by indexing via the node
// ID into the structure of the non-nil
// ReducedGraph returned by Expanded, or when the
// ReducedGraph is nil, by containing nodes
// from the original input graph.
//
// The returned value should not be mutated.
Structure() [][]graph.Node
// Expanded returns the next lower level of the
// module clustering or nil if at the lowest level.
//
// The returned ReducedGraph will be the same
// concrete type as the receiver.
Expanded() ReducedGraph
}
// Modularize returns the hierarchical modularization of g at the given resolution
// using the Louvain algorithm. If src is nil, rand.Intn is used as the random
// generator. Modularize will panic if g has any edge with negative edge weight.
//
// If g is undirected it is modularised to minimise
//
// Q = 1/2m \sum_{ij} [ A_{ij} - (\gamma k_i k_j)/2m ] \delta(c_i,c_j),
//
// If g is directed it is modularised to minimise
//
// Q = 1/m \sum_{ij} [ A_{ij} - (\gamma k_i^in k_j^out)/m ] \delta(c_i,c_j).
//
// The concrete type of the ReducedGraph will be a pointer to either a
// ReducedUndirected or a ReducedDirected depending on the type of g.
//
// graph.Undirect may be used as a shim to allow modularization of
// directed graphs with the undirected modularity function.
func Modularize(g graph.Graph, resolution float64, src rand.Source) ReducedGraph {
switch g := g.(type) {
case graph.Undirected:
return louvainUndirected(g, resolution, src)
case graph.Directed:
return louvainDirected(g, resolution, src)
default:
panic(fmt.Sprintf("community: invalid graph type: %T", g))
}
}
// Multiplex is a multiplex graph.
type Multiplex interface {
// Nodes returns the nodes
// for the multiplex graph.
// All layers must refer to the same
// set of nodes.
Nodes() graph.Nodes
// Depth returns the number of layers
// in the multiplex graph.
Depth() int
}
// QMultiplex returns the modularity Q score of the multiplex graph layers
// subdivided into the given communities at the given resolutions and weights. Q is
// returned as the vector of weighted Q scores for each layer of the multiplex graph.
// If communities is nil, the unclustered modularity score is returned.
// If weights is nil layers are equally weighted, otherwise the length of
// weights must equal the number of layers. If resolutions is nil, a resolution
// of 1.0 is used for all layers, otherwise either a single element slice may be used
// to specify a global resolution, or the length of resolutions must equal the number
// of layers. The resolution parameter is γ as defined in Reichardt and Bornholdt
// doi:10.1103/PhysRevE.74.016110.
// QMultiplex will panic if the graph has any layer weight-scaled edge with
// negative edge weight.
//
// If g is undirected, Q is calculated according to
//
// Q_{layer} = w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i k_j)/2m_{layer} ] \delta(c_i,c_j),
//
// If g is directed, it is calculated according to
//
// Q_{layer} = w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i^in k_j^out)/m_{layer} ] \delta(c_i,c_j).
//
// Note that Q values for multiplex graphs are not scaled by the total layer edge weight.
//
// graph.Undirect may be used as a shim to allow calculation of Q for
// directed graphs.
func QMultiplex(g Multiplex, communities [][]graph.Node, weights, resolutions []float64) []float64 {
if weights != nil && len(weights) != g.Depth() {
panic("community: weights vector length mismatch")
}
if resolutions != nil && len(resolutions) != 1 && len(resolutions) != g.Depth() {
panic("community: resolutions vector length mismatch")
}
switch g := g.(type) {
case UndirectedMultiplex:
return qUndirectedMultiplex(g, communities, weights, resolutions)
case DirectedMultiplex:
return qDirectedMultiplex(g, communities, weights, resolutions)
default:
panic(fmt.Sprintf("community: invalid graph type: %T", g))
}
}
// ReducedMultiplex is a modularised multiplex graph.
type ReducedMultiplex interface {
Multiplex
// Communities returns the community memberships
// of the nodes in the graph used to generate
// the reduced graph.
Communities() [][]graph.Node
// Structure returns the community structure of
// the current level of the module clustering.
// Each slice in the returned value recursively
// describes the membership of a community at
// the current level by indexing via the node
// ID into the structure of the non-nil
// ReducedGraph returned by Expanded, or when the
// ReducedGraph is nil, by containing nodes
// from the original input graph.
//
// The returned value should not be mutated.
Structure() [][]graph.Node
// Expanded returns the next lower level of the
// module clustering or nil if at the lowest level.
//
// The returned ReducedGraph will be the same
// concrete type as the receiver.
Expanded() ReducedMultiplex
}
// ModularizeMultiplex returns the hierarchical modularization of g at the given resolution
// using the Louvain algorithm. If all is true and g have negatively weighted layers, all
// communities will be searched during the modularization. If src is nil, rand.Intn is
// used as the random generator. ModularizeMultiplex will panic if g has any edge with
// edge weight that does not sign-match the layer weight.
//
// If g is undirected it is modularised to minimise
//
// Q = \sum w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i k_j)/2m ] \delta(c_i,c_j).
//
// If g is directed it is modularised to minimise
//
// Q = \sum w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i^in k_j^out)/m_{layer} ] \delta(c_i,c_j).
//
// The concrete type of the ReducedMultiplex will be a pointer to a
// ReducedUndirectedMultiplex.
//
// graph.Undirect may be used as a shim to allow modularization of
// directed graphs with the undirected modularity function.
func ModularizeMultiplex(g Multiplex, weights, resolutions []float64, all bool, src rand.Source) ReducedMultiplex {
if weights != nil && len(weights) != g.Depth() {
panic("community: weights vector length mismatch")
}
if resolutions != nil && len(resolutions) != 1 && len(resolutions) != g.Depth() {
panic("community: resolutions vector length mismatch")
}
switch g := g.(type) {
case UndirectedMultiplex:
return louvainUndirectedMultiplex(g, weights, resolutions, all, src)
case DirectedMultiplex:
return louvainDirectedMultiplex(g, weights, resolutions, all, src)
default:
panic(fmt.Sprintf("community: invalid graph type: %T", g))
}
}
// undirectedEdges is the edge structure of a reduced undirected graph.
type undirectedEdges struct {
// edges and weights is the set
// of edges between nodes.
// weights is keyed such that
// the first element of the key
// is less than the second.
edges [][]int
weights map[[2]int]float64
}
// directedEdges is the edge structure of a reduced directed graph.
type directedEdges struct {
// edgesFrom, edgesTo and weights
// is the set of edges between nodes.
edgesFrom [][]int
edgesTo [][]int
weights map[[2]int]float64
}
// isValidID returns whether id is a valid ID for a community,
// multiplexCommunity or node. These are all graph.Node types
// stored in []T with a mapping between their index and their ID
// so IDs must be positive and fit within the int type.
func isValidID(id int64) bool {
return id == int64(int(id)) && id >= 0
}
// community is a reduced graph node describing its membership.
type community struct {
// community graphs are internal, in-memory
// with dense IDs, so id is always an int.
id int
nodes []graph.Node
weight float64
}
func (n community) ID() int64 { return int64(n.id) }
// edge is a reduced graph edge.
type edge struct {
from, to community
weight float64
}
func (e edge) From() graph.Node { return e.from }
func (e edge) To() graph.Node { return e.to }
func (e edge) ReversedEdge() graph.Edge { e.from, e.to = e.to, e.from; return e }
func (e edge) Weight() float64 { return e.weight }
// multiplexCommunity is a reduced multiplex graph node describing its membership.
type multiplexCommunity struct {
// community graphs are internal, in-memory
// with dense IDs, so id is always an int.
id int
nodes []graph.Node
weights []float64
}
func (n multiplexCommunity) ID() int64 { return int64(n.id) }
// multiplexEdge is a reduced graph edge for a multiplex graph.
type multiplexEdge struct {
from, to multiplexCommunity
weight float64
}
func (e multiplexEdge) From() graph.Node { return e.from }
func (e multiplexEdge) To() graph.Node { return e.to }
func (e multiplexEdge) ReversedEdge() graph.Edge { e.from, e.to = e.to, e.from; return e }
func (e multiplexEdge) Weight() float64 { return e.weight }
// commIdx is an index of a node in a community held by a localMover.
type commIdx struct {
community int
node int
}
// node is defined to avoid an import of .../graph/simple. node is
// used in in-memory, dense ID graphs and so is always an int.
type node int
func (n node) ID() int64 { return int64(n) }
// minTaker is a set iterator.
type minTaker interface {
TakeMin(p *int) bool
}
// dense is a dense integer set iterator.
type dense struct {
pos int
n int
}
// TakeMin mimics intsets.Sparse TakeMin for dense sets. If the dense
// iterator position is less than the iterator size, TakeMin sets *p
// to the iterator position and increments the position and returns
// true.
// Otherwise, it returns false and *p is undefined.
func (d *dense) TakeMin(p *int) bool {
if d.pos >= d.n {
return false
}
*p = d.pos
d.pos++
return true
}
// slice is a sparse integer set iterator.
type slice struct {
pos int
elems []int
}
// newSlice returns a new slice of elements from s, sorted ascending.
func newSlice(s set.Ints[int]) *slice {
elems := make([]int, 0, len(s))
for i := range s {
elems = append(elems, i)
}
slices.Sort(elems)
return &slice{elems: elems}
}
// TakeMin mimics intsets.Sparse TakeMin for a sorted set. If the set
// iterator position is less than the iterator size, TakeMin sets *p
// to the iterator position's element and increments the position
// and returns true.
// Otherwise, it returns false and *p is undefined.
func (s *slice) TakeMin(p *int) bool {
if s.pos >= len(s.elems) {
return false
}
*p = s.elems[s.pos]
s.pos++
return true
}
const (
negativeWeight = "community: unexpected negative edge weight"
positiveWeight = "community: unexpected positive edge weight"
// deltaQtol is the tolerance for progression of the local moving heuristic's improvement of Q.
deltaQtol = 1e-15
)
// positiveWeightFuncFor returns a constructed weight function for the
// positively weighted g. Unweighted graphs have unit weight for existing
// edges.
func positiveWeightFuncFor(g graph.Graph) func(xid, yid int64) float64 {
if wg, ok := g.(graph.Weighted); ok {
return func(xid, yid int64) float64 {
w, ok := wg.Weight(xid, yid)
if !ok {
return 0
}
if w < 0 {
panic(negativeWeight)
}
return w
}
}
return func(xid, yid int64) float64 {
e := g.Edge(xid, yid)
if e == nil {
return 0
}
return 1
}
}
// negativeWeightFuncFor returns a constructed weight function for the
// negatively weighted g. Unweighted graphs have unit weight for existing
// edges.
func negativeWeightFuncFor(g graph.Graph) func(xid, yid int64) float64 {
if wg, ok := g.(graph.Weighted); ok {
return func(xid, yid int64) float64 {
w, ok := wg.Weight(xid, yid)
if !ok {
return 0
}
if w > 0 {
panic(positiveWeight)
}
return -w
}
}
return func(xid, yid int64) float64 {
e := g.Edge(xid, yid)
if e == nil {
return 0
}
return 1
}
}
// depth returns max(1, len(weights)). It is used to ensure
// that multiplex community weights are properly initialised.
func depth(weights []float64) int {
if weights == nil {
return 1
}
return len(weights)
}