mirror of
				https://github.com/gonum/gonum.git
				synced 2025-10-27 01:00:26 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			167 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			167 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright ©2016 The Gonum Authors. All rights reserved.
 | |
| // Use of this source code is governed by a BSD-style
 | |
| // license that can be found in the LICENSE file.
 | |
| 
 | |
| package stat_test
 | |
| 
 | |
| import (
 | |
| 	"fmt"
 | |
| 	"log"
 | |
| 
 | |
| 	"gonum.org/v1/gonum/floats"
 | |
| 	"gonum.org/v1/gonum/mat"
 | |
| 	"gonum.org/v1/gonum/stat"
 | |
| )
 | |
| 
 | |
| // symView is a helper for getting a View of a SymDense.
 | |
| type symView struct {
 | |
| 	sym *mat.SymDense
 | |
| 
 | |
| 	i, j, r, c int
 | |
| }
 | |
| 
 | |
| func (s symView) Dims() (r, c int) { return s.r, s.c }
 | |
| 
 | |
| func (s symView) At(i, j int) float64 {
 | |
| 	if i < 0 || s.r <= i {
 | |
| 		panic("i out of bounds")
 | |
| 	}
 | |
| 	if j < 0 || s.c <= j {
 | |
| 		panic("j out of bounds")
 | |
| 	}
 | |
| 	return s.sym.At(s.i+i, s.j+j)
 | |
| }
 | |
| 
 | |
| func (s symView) T() mat.Matrix { return mat.Transpose{Matrix: s} }
 | |
| 
 | |
| func ExampleCC() {
 | |
| 	// This example is directly analogous to Example 3.5 on page 87 of
 | |
| 	// Koch, Inge. Analysis of multivariate and high-dimensional data.
 | |
| 	// Vol. 32. Cambridge University Press, 2013. ISBN: 9780521887939
 | |
| 
 | |
| 	// bostonData is the Boston Housing Data of Harrison and Rubinfeld (1978)
 | |
| 	n, _ := bostonData.Dims()
 | |
| 	var xd, yd = 7, 4
 | |
| 	// The variables (columns) of bostonData can be partitioned into two sets:
 | |
| 	// those that deal with environmental/social variables (xdata), and those
 | |
| 	// that contain information regarding the individual (ydata). Because the
 | |
| 	// variables can be naturally partitioned in this way, these data are
 | |
| 	// appropriate for canonical correlation analysis. The columns (variables)
 | |
| 	// of xdata are, in order:
 | |
| 	//  per capita crime rate by town,
 | |
| 	//  proportion of non-retail business acres per town,
 | |
| 	//  nitric oxide concentration (parts per 10 million),
 | |
| 	//  weighted distances to Boston employment centres,
 | |
| 	//  index of accessibility to radial highways,
 | |
| 	//  pupil-teacher ratio by town, and
 | |
| 	//  proportion of blacks by town.
 | |
| 	xdata := bostonData.Slice(0, n, 0, xd)
 | |
| 
 | |
| 	// The columns (variables) of ydata are, in order:
 | |
| 	//  average number of rooms per dwelling,
 | |
| 	//  proportion of owner-occupied units built prior to 1940,
 | |
| 	//  full-value property-tax rate per $10000, and
 | |
| 	//  median value of owner-occupied homes in $1000s.
 | |
| 	ydata := bostonData.Slice(0, n, xd, xd+yd)
 | |
| 
 | |
| 	// For comparison, calculate the correlation matrix for the original data.
 | |
| 	var cor mat.SymDense
 | |
| 	stat.CorrelationMatrix(&cor, bostonData, nil)
 | |
| 
 | |
| 	// Extract just those correlations that are between xdata and ydata.
 | |
| 	var corRaw = symView{sym: &cor, i: 0, j: xd, r: xd, c: yd}
 | |
| 
 | |
| 	// Note that the strongest correlation between individual variables is 0.91
 | |
| 	// between the 5th variable of xdata (index of accessibility to radial
 | |
| 	// highways) and the 3rd variable of ydata (full-value property-tax rate per
 | |
| 	// $10000).
 | |
| 	fmt.Printf("corRaw = %.4f", mat.Formatted(corRaw, mat.Prefix("         ")))
 | |
| 
 | |
| 	// Calculate the canonical correlations.
 | |
| 	var cc stat.CC
 | |
| 	err := cc.CanonicalCorrelations(xdata, ydata, nil)
 | |
| 	if err != nil {
 | |
| 		log.Fatal(err)
 | |
| 	}
 | |
| 
 | |
| 	// Unpack cc.
 | |
| 	var pVecs, qVecs, phiVs, psiVs mat.Dense
 | |
| 	ccors := cc.CorrsTo(nil)
 | |
| 	cc.LeftTo(&pVecs, true)
 | |
| 	cc.RightTo(&qVecs, true)
 | |
| 	cc.LeftTo(&phiVs, false)
 | |
| 	cc.RightTo(&psiVs, false)
 | |
| 
 | |
| 	// Canonical Correlation Matrix, or the correlations between the sphered
 | |
| 	// data.
 | |
| 	var corSph mat.Dense
 | |
| 	corSph.CloneFrom(&pVecs)
 | |
| 	col := make([]float64, xd)
 | |
| 	for j := 0; j < yd; j++ {
 | |
| 		mat.Col(col, j, &corSph)
 | |
| 		floats.Scale(ccors[j], col)
 | |
| 		corSph.SetCol(j, col)
 | |
| 	}
 | |
| 	corSph.Product(&corSph, qVecs.T())
 | |
| 	fmt.Printf("\n\ncorSph = %.4f", mat.Formatted(&corSph, mat.Prefix("         ")))
 | |
| 
 | |
| 	// Canonical Correlations. Note that the first canonical correlation is
 | |
| 	// 0.95, stronger than the greatest correlation in the original data, and
 | |
| 	// much stronger than the greatest correlation in the sphered data.
 | |
| 	fmt.Printf("\n\nccors = %.4f", ccors)
 | |
| 
 | |
| 	// Left and right eigenvectors of the canonical correlation matrix.
 | |
| 	fmt.Printf("\n\npVecs = %.4f", mat.Formatted(&pVecs, mat.Prefix("        ")))
 | |
| 	fmt.Printf("\n\nqVecs = %.4f", mat.Formatted(&qVecs, mat.Prefix("        ")))
 | |
| 
 | |
| 	// Canonical Correlation Transforms. These can be useful as they represent
 | |
| 	// the canonical variables as linear combinations of the original variables.
 | |
| 	fmt.Printf("\n\nphiVs = %.4f", mat.Formatted(&phiVs, mat.Prefix("        ")))
 | |
| 	fmt.Printf("\n\npsiVs = %.4f", mat.Formatted(&psiVs, mat.Prefix("        ")))
 | |
| 
 | |
| 	// Output:
 | |
| 	// corRaw = ⎡-0.2192   0.3527   0.5828  -0.3883⎤
 | |
| 	//          ⎢-0.3917   0.6448   0.7208  -0.4837⎥
 | |
| 	//          ⎢-0.3022   0.7315   0.6680  -0.4273⎥
 | |
| 	//          ⎢ 0.2052  -0.7479  -0.5344   0.2499⎥
 | |
| 	//          ⎢-0.2098   0.4560   0.9102  -0.3816⎥
 | |
| 	//          ⎢-0.3555   0.2615   0.4609  -0.5078⎥
 | |
| 	//          ⎣ 0.1281  -0.2735  -0.4418   0.3335⎦
 | |
| 	//
 | |
| 	// corSph = ⎡ 0.0118   0.0525   0.2300  -0.1363⎤
 | |
| 	//          ⎢-0.1810   0.3213   0.3814  -0.1412⎥
 | |
| 	//          ⎢ 0.0166   0.2241   0.0104  -0.2235⎥
 | |
| 	//          ⎢ 0.0346  -0.5481  -0.0034  -0.1994⎥
 | |
| 	//          ⎢ 0.0303  -0.0956   0.7152   0.2039⎥
 | |
| 	//          ⎢-0.0298  -0.0022   0.0739  -0.3703⎥
 | |
| 	//          ⎣-0.1226  -0.0746  -0.3899   0.1541⎦
 | |
| 	//
 | |
| 	// ccors = [0.9451 0.6787 0.5714 0.2010]
 | |
| 	//
 | |
| 	// pVecs = ⎡-0.2574   0.0158   0.2122  -0.0946⎤
 | |
| 	//         ⎢-0.4837   0.3837   0.1474   0.6597⎥
 | |
| 	//         ⎢-0.0801   0.3494   0.3287  -0.2862⎥
 | |
| 	//         ⎢ 0.1278  -0.7337   0.4851   0.2248⎥
 | |
| 	//         ⎢-0.6969  -0.4342  -0.3603   0.0291⎥
 | |
| 	//         ⎢-0.0991   0.0503   0.6384   0.1022⎥
 | |
| 	//         ⎣ 0.4260   0.0323  -0.2290   0.6419⎦
 | |
| 	//
 | |
| 	// qVecs = ⎡ 0.0182  -0.1583  -0.0067  -0.9872⎤
 | |
| 	//         ⎢-0.2348   0.9483  -0.1462  -0.1554⎥
 | |
| 	//         ⎢-0.9701  -0.2406  -0.0252   0.0209⎥
 | |
| 	//         ⎣ 0.0593  -0.1330  -0.9889   0.0291⎦
 | |
| 	//
 | |
| 	// phiVs = ⎡-0.0027   0.0093   0.0490  -0.0155⎤
 | |
| 	//         ⎢-0.0429  -0.0242   0.0361   0.1839⎥
 | |
| 	//         ⎢-1.2248   5.6031   5.8094  -4.7927⎥
 | |
| 	//         ⎢-0.0044  -0.3424   0.4470   0.1150⎥
 | |
| 	//         ⎢-0.0742  -0.1193  -0.1116   0.0022⎥
 | |
| 	//         ⎢-0.0233   0.1046   0.3853  -0.0161⎥
 | |
| 	//         ⎣ 0.0001   0.0005  -0.0030   0.0082⎦
 | |
| 	//
 | |
| 	// psiVs = ⎡ 0.0302  -0.3002   0.0878  -1.9583⎤
 | |
| 	//         ⎢-0.0065   0.0392  -0.0118  -0.0061⎥
 | |
| 	//         ⎢-0.0052  -0.0046  -0.0023   0.0008⎥
 | |
| 	//         ⎣ 0.0020   0.0037  -0.1293   0.1038⎦
 | |
| }
 | 
