mirror of
https://github.com/gonum/gonum.git
synced 2025-10-17 04:31:03 +08:00
stat: add principal components calculation
This commit is contained in:
81
pca.go
Normal file
81
pca.go
Normal file
@@ -0,0 +1,81 @@
|
||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package stat
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"github.com/gonum/floats"
|
||||
"github.com/gonum/matrix"
|
||||
"github.com/gonum/matrix/mat64"
|
||||
)
|
||||
|
||||
// PrincipalComponents returns the principal component direction vectors and
|
||||
// the column variances of the principle component scores, vecs * a, computed
|
||||
// using the singular value decomposition of the input. The input a is an n×d
|
||||
// matrix where each row is an observation and each column represents a variable.
|
||||
//
|
||||
// PrincipalComponents centers the variables but does not scale the variance.
|
||||
//
|
||||
// The slice weights is used to weight the observations. If weights is nil,
|
||||
// each weight is considered to have a value of one, otherwise the length of
|
||||
// weights must match the number of observations or PrincipalComponents will
|
||||
// panic.
|
||||
//
|
||||
// On successful completion, the principal component direction vectors are
|
||||
// returned in vecs as a d×min(n, d) matrix, and the variances are returned in
|
||||
// vars as a min(n, d)-long slice in descending sort order.
|
||||
//
|
||||
// If no singular value decomposition is possible, vecs and vars are returned
|
||||
// nil and ok is returned false.
|
||||
func PrincipalComponents(a mat64.Matrix, weights []float64) (vecs *mat64.Dense, vars []float64, ok bool) {
|
||||
n, d := a.Dims()
|
||||
if weights != nil && len(weights) != n {
|
||||
panic("stat: len(weights) != observations")
|
||||
}
|
||||
|
||||
centered := mat64.NewDense(n, d, nil)
|
||||
col := make([]float64, n)
|
||||
for j := 0; j < d; j++ {
|
||||
mat64.Col(col, j, a)
|
||||
floats.AddConst(-Mean(col, weights), col)
|
||||
centered.SetCol(j, col)
|
||||
}
|
||||
for i, w := range weights {
|
||||
floats.Scale(math.Sqrt(w), centered.RawRowView(i))
|
||||
}
|
||||
|
||||
kind := matrix.SVDFull
|
||||
if n > d {
|
||||
kind = matrix.SVDThin
|
||||
}
|
||||
var svd mat64.SVD
|
||||
ok = svd.Factorize(centered, kind)
|
||||
if !ok {
|
||||
return nil, nil, false
|
||||
}
|
||||
|
||||
var v mat64.Dense
|
||||
v.VFromSVD(&svd)
|
||||
vecs = v.View(0, 0, d, min(n, d)).(*mat64.Dense)
|
||||
vars = svd.Values(nil)
|
||||
var f float64
|
||||
if weights == nil {
|
||||
f = 1 / float64(n-1)
|
||||
} else {
|
||||
f = 1 / (floats.Sum(weights) - 1)
|
||||
}
|
||||
for i, v := range vars {
|
||||
vars[i] = f * v * v
|
||||
}
|
||||
return vecs, vars, true
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
59
pca_example_test.go
Normal file
59
pca_example_test.go
Normal file
@@ -0,0 +1,59 @@
|
||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package stat_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/gonum/matrix/mat64"
|
||||
"github.com/gonum/stat"
|
||||
)
|
||||
|
||||
func ExamplePrincipalComponents() {
|
||||
// iris is a truncated sample of the Fisher's Iris dataset.
|
||||
n := 10
|
||||
d := 4
|
||||
iris := mat64.NewDense(n, d, []float64{
|
||||
5.1, 3.5, 1.4, 0.2,
|
||||
4.9, 3.0, 1.4, 0.2,
|
||||
4.7, 3.2, 1.3, 0.2,
|
||||
4.6, 3.1, 1.5, 0.2,
|
||||
5.0, 3.6, 1.4, 0.2,
|
||||
5.4, 3.9, 1.7, 0.4,
|
||||
4.6, 3.4, 1.4, 0.3,
|
||||
5.0, 3.4, 1.5, 0.2,
|
||||
4.4, 2.9, 1.4, 0.2,
|
||||
4.9, 3.1, 1.5, 0.1,
|
||||
})
|
||||
|
||||
// Calculate the principal component direction vectors
|
||||
// and variances.
|
||||
vecs, vars, ok := stat.PrincipalComponents(iris, nil)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
fmt.Printf("variances = %.4f\n\n", vars)
|
||||
|
||||
// Project the data onto the first 2 principal components.
|
||||
k := 2
|
||||
var proj mat64.Dense
|
||||
proj.Mul(iris, vecs.View(0, 0, d, k))
|
||||
|
||||
fmt.Printf("proj = %.4f", mat64.Formatted(&proj, mat64.Prefix(" ")))
|
||||
|
||||
// Output:
|
||||
// variances = [0.1666 0.0207 0.0079 0.0019]
|
||||
//
|
||||
// proj = ⎡-6.1686 1.4659⎤
|
||||
// ⎢-5.6767 1.6459⎥
|
||||
// ⎢-5.6699 1.3642⎥
|
||||
// ⎢-5.5643 1.3816⎥
|
||||
// ⎢-6.1734 1.3309⎥
|
||||
// ⎢-6.7278 1.4021⎥
|
||||
// ⎢-5.7743 1.1498⎥
|
||||
// ⎢-6.0466 1.4714⎥
|
||||
// ⎢-5.2709 1.3570⎥
|
||||
// ⎣-5.7533 1.6207⎦
|
||||
}
|
159
pca_test.go
Normal file
159
pca_test.go
Normal file
@@ -0,0 +1,159 @@
|
||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package stat
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/gonum/floats"
|
||||
"github.com/gonum/matrix/mat64"
|
||||
)
|
||||
|
||||
func TestPrincipalComponents(t *testing.T) {
|
||||
for i, test := range []struct {
|
||||
data mat64.Matrix
|
||||
weights []float64
|
||||
wantVecs *mat64.Dense
|
||||
wantVars []float64
|
||||
epsilon float64
|
||||
}{
|
||||
// Test results verified using R.
|
||||
{
|
||||
data: mat64.NewDense(3, 3, []float64{
|
||||
1, 2, 3,
|
||||
4, 5, 6,
|
||||
7, 8, 9,
|
||||
}),
|
||||
wantVecs: mat64.NewDense(3, 3, []float64{
|
||||
0.5773502691896258, 0.8164965809277261, 0,
|
||||
0.577350269189626, -0.4082482904638632, -0.7071067811865476,
|
||||
0.5773502691896258, -0.4082482904638631, 0.7071067811865475,
|
||||
}),
|
||||
wantVars: []float64{27, 0, 0},
|
||||
epsilon: 1e-12,
|
||||
},
|
||||
{ // Truncated iris data.
|
||||
data: mat64.NewDense(10, 4, []float64{
|
||||
5.1, 3.5, 1.4, 0.2,
|
||||
4.9, 3.0, 1.4, 0.2,
|
||||
4.7, 3.2, 1.3, 0.2,
|
||||
4.6, 3.1, 1.5, 0.2,
|
||||
5.0, 3.6, 1.4, 0.2,
|
||||
5.4, 3.9, 1.7, 0.4,
|
||||
4.6, 3.4, 1.4, 0.3,
|
||||
5.0, 3.4, 1.5, 0.2,
|
||||
4.4, 2.9, 1.4, 0.2,
|
||||
4.9, 3.1, 1.5, 0.1,
|
||||
}),
|
||||
wantVecs: mat64.NewDense(4, 4, []float64{
|
||||
-0.6681110197952722, 0.7064764857539533, -0.14026590216895132, -0.18666578956412125,
|
||||
-0.7166344774801547, -0.6427036135482664, -0.135650285905254, 0.23444848208629923,
|
||||
-0.164411275166307, 0.11898477441068218, 0.9136367900709548, 0.35224901970831746,
|
||||
-0.11415613655453069, -0.2714141920887426, 0.35664028439226514, -0.8866286823515034,
|
||||
}),
|
||||
wantVars: []float64{0.1665786313282786, 0.02065509475412993, 0.007944620317765855, 0.0019327647109368329},
|
||||
epsilon: 1e-12,
|
||||
},
|
||||
{ // Truncated iris data transposed to check for operation on fat input.
|
||||
data: mat64.NewDense(10, 4, []float64{
|
||||
5.1, 3.5, 1.4, 0.2,
|
||||
4.9, 3.0, 1.4, 0.2,
|
||||
4.7, 3.2, 1.3, 0.2,
|
||||
4.6, 3.1, 1.5, 0.2,
|
||||
5.0, 3.6, 1.4, 0.2,
|
||||
5.4, 3.9, 1.7, 0.4,
|
||||
4.6, 3.4, 1.4, 0.3,
|
||||
5.0, 3.4, 1.5, 0.2,
|
||||
4.4, 2.9, 1.4, 0.2,
|
||||
4.9, 3.1, 1.5, 0.1,
|
||||
}).T(),
|
||||
wantVecs: mat64.NewDense(10, 4, []float64{
|
||||
-0.3366602459946619, -0.1373634006401213, 0.3465102523547623, -0.10290179303893479,
|
||||
-0.31381852053861975, 0.5197145790632827, 0.5567296129086686, -0.15923062170153618,
|
||||
-0.30857197637565165, -0.07670930360819002, 0.36159923003337235, 0.3342301027853355,
|
||||
-0.29527124351656137, 0.16885455995353074, -0.5056204762881208, 0.32580913261444344,
|
||||
-0.3327611073694004, -0.39365834489416474, 0.04900050959307464, 0.46812879383236555,
|
||||
-0.34445484362044815, -0.2985206914561878, -0.1009714701361799, -0.16803618186050803,
|
||||
-0.2986246350957691, -0.4222037823717799, -0.11838613462182519, -0.580283530375069,
|
||||
-0.325911246223126, 0.024366468758217238, -0.12082035131864265, 0.16756027181337868,
|
||||
-0.2814284432361538, 0.240812316260054, -0.24061437569068145, -0.365034616264623,
|
||||
-0.31906138507685167, 0.4423912824105986, -0.2906412122303604, 0.027551046870337714,
|
||||
}),
|
||||
wantVars: []float64{41.8851906634233, 0.07762619213464989, 0.010516477775373585, 0},
|
||||
epsilon: 1e-12,
|
||||
},
|
||||
{ // Truncated iris data unitary weights.
|
||||
data: mat64.NewDense(10, 4, []float64{
|
||||
5.1, 3.5, 1.4, 0.2,
|
||||
4.9, 3.0, 1.4, 0.2,
|
||||
4.7, 3.2, 1.3, 0.2,
|
||||
4.6, 3.1, 1.5, 0.2,
|
||||
5.0, 3.6, 1.4, 0.2,
|
||||
5.4, 3.9, 1.7, 0.4,
|
||||
4.6, 3.4, 1.4, 0.3,
|
||||
5.0, 3.4, 1.5, 0.2,
|
||||
4.4, 2.9, 1.4, 0.2,
|
||||
4.9, 3.1, 1.5, 0.1,
|
||||
}),
|
||||
weights: []float64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
wantVecs: mat64.NewDense(4, 4, []float64{
|
||||
-0.6681110197952722, 0.7064764857539533, -0.14026590216895132, -0.18666578956412125,
|
||||
-0.7166344774801547, -0.6427036135482664, -0.135650285905254, 0.23444848208629923,
|
||||
-0.164411275166307, 0.11898477441068218, 0.9136367900709548, 0.35224901970831746,
|
||||
-0.11415613655453069, -0.2714141920887426, 0.35664028439226514, -0.8866286823515034,
|
||||
}),
|
||||
wantVars: []float64{0.1665786313282786, 0.02065509475412993, 0.007944620317765855, 0.0019327647109368329},
|
||||
epsilon: 1e-12,
|
||||
},
|
||||
{ // Truncated iris data non-unitary weights.
|
||||
data: mat64.NewDense(10, 4, []float64{
|
||||
5.1, 3.5, 1.4, 0.2,
|
||||
4.9, 3.0, 1.4, 0.2,
|
||||
4.7, 3.2, 1.3, 0.2,
|
||||
4.6, 3.1, 1.5, 0.2,
|
||||
5.0, 3.6, 1.4, 0.2,
|
||||
5.4, 3.9, 1.7, 0.4,
|
||||
4.6, 3.4, 1.4, 0.3,
|
||||
5.0, 3.4, 1.5, 0.2,
|
||||
4.4, 2.9, 1.4, 0.2,
|
||||
4.9, 3.1, 1.5, 0.1,
|
||||
}),
|
||||
weights: []float64{2, 3, 1, 1, 1, 1, 1, 1, 1, 2},
|
||||
wantVecs: mat64.NewDense(4, 4, []float64{
|
||||
-0.618936145422414, 0.763069301531647, 0.124857741232537, 0.138035623677211,
|
||||
-0.763958271606519, -0.603881770702898, 0.118267155321333, -0.194184052457746,
|
||||
-0.143552119754944, 0.090014599564871, -0.942209377020044, -0.289018426115945,
|
||||
-0.112599271966947, -0.212012782487076, -0.287515067921680, 0.927203898682805,
|
||||
}),
|
||||
wantVars: []float64{0.129621985550623, 0.022417487771598, 0.006454461065715, 0.002495076601075},
|
||||
epsilon: 1e-12,
|
||||
},
|
||||
} {
|
||||
vecs, vars, ok := PrincipalComponents(test.data, test.weights)
|
||||
if !ok {
|
||||
t.Errorf("unexpected SVD failure for test %d", i)
|
||||
continue
|
||||
}
|
||||
if !mat64.EqualApprox(vecs, test.wantVecs, test.epsilon) {
|
||||
t.Errorf("%d: unexpected PCA result got:\n%v\nwant:\n%v",
|
||||
i, mat64.Formatted(vecs), mat64.Formatted(test.wantVecs))
|
||||
}
|
||||
if !approxEqual(vars, test.wantVars, test.epsilon) {
|
||||
t.Errorf("%d: unexpected variance result got:%v, want:%v", i, vars, test.wantVars)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func approxEqual(a, b []float64, epsilon float64) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i, v := range a {
|
||||
if !floats.EqualWithinAbsOrRel(v, b[i], epsilon, epsilon) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
Reference in New Issue
Block a user