stat: add principal components calculation

This commit is contained in:
kortschak
2016-02-29 11:35:01 +10:30
parent 999fb0b610
commit 388e9295cf
3 changed files with 299 additions and 0 deletions

81
pca.go Normal file
View File

@@ -0,0 +1,81 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stat
import (
"math"
"github.com/gonum/floats"
"github.com/gonum/matrix"
"github.com/gonum/matrix/mat64"
)
// PrincipalComponents returns the principal component direction vectors and
// the column variances of the principle component scores, vecs * a, computed
// using the singular value decomposition of the input. The input a is an n×d
// matrix where each row is an observation and each column represents a variable.
//
// PrincipalComponents centers the variables but does not scale the variance.
//
// The slice weights is used to weight the observations. If weights is nil,
// each weight is considered to have a value of one, otherwise the length of
// weights must match the number of observations or PrincipalComponents will
// panic.
//
// On successful completion, the principal component direction vectors are
// returned in vecs as a d×min(n, d) matrix, and the variances are returned in
// vars as a min(n, d)-long slice in descending sort order.
//
// If no singular value decomposition is possible, vecs and vars are returned
// nil and ok is returned false.
func PrincipalComponents(a mat64.Matrix, weights []float64) (vecs *mat64.Dense, vars []float64, ok bool) {
n, d := a.Dims()
if weights != nil && len(weights) != n {
panic("stat: len(weights) != observations")
}
centered := mat64.NewDense(n, d, nil)
col := make([]float64, n)
for j := 0; j < d; j++ {
mat64.Col(col, j, a)
floats.AddConst(-Mean(col, weights), col)
centered.SetCol(j, col)
}
for i, w := range weights {
floats.Scale(math.Sqrt(w), centered.RawRowView(i))
}
kind := matrix.SVDFull
if n > d {
kind = matrix.SVDThin
}
var svd mat64.SVD
ok = svd.Factorize(centered, kind)
if !ok {
return nil, nil, false
}
var v mat64.Dense
v.VFromSVD(&svd)
vecs = v.View(0, 0, d, min(n, d)).(*mat64.Dense)
vars = svd.Values(nil)
var f float64
if weights == nil {
f = 1 / float64(n-1)
} else {
f = 1 / (floats.Sum(weights) - 1)
}
for i, v := range vars {
vars[i] = f * v * v
}
return vecs, vars, true
}
func min(a, b int) int {
if a < b {
return a
}
return b
}