mirror of
				https://github.com/gonum/gonum.git
				synced 2025-10-26 00:30:27 +08:00 
			
		
		
		
	Initial commit of sample statistics functions to the stat package
This commit is contained in:
		
							
								
								
									
										507
									
								
								stat.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										507
									
								
								stat.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,507 @@ | |||||||
|  | package stat | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"math" | ||||||
|  | 	"sort" | ||||||
|  |  | ||||||
|  | 	"github.com/gonum/floats" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | // Correlation returns the weighted correlation between the samples of x and y | ||||||
|  | // with the given means. | ||||||
|  | // 		sum_i {w_i (x_i - meanX) * (y_i - meanY)} / ((sum_j {w_j} - 1) * stdX * stdY) | ||||||
|  | // The lengths of x and y must be equal | ||||||
|  | // If weights is nil then all of the weights are 1 | ||||||
|  | // If weights is not nil, then len(x) must equal len(weights) | ||||||
|  | func Correlation(x []float64, meanX, stdX float64, y []float64, meanY, stdY float64, weights []float64) float64 { | ||||||
|  | 	return Covariance(x, meanX, y, meanY, weights) / (stdX * stdY) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Covariance returns the weighted covariance between the samples of x and y | ||||||
|  | // with the given means. | ||||||
|  | // 		sum_i {w_i (x_i - meanX) * (y_i - meanY)} / (sum_j {w_j} - 1) | ||||||
|  | // The lengths of x and y must be equal | ||||||
|  | // If weights is nil then all of the weights are 1 | ||||||
|  | // If weights is not nil, then len(x) must equal len(weights) | ||||||
|  | func Covariance(x []float64, meanX float64, y []float64, meanY float64, weights []float64) float64 { | ||||||
|  | 	if len(x) != len(y) { | ||||||
|  | 		panic("stat: slice length mismatch") | ||||||
|  | 	} | ||||||
|  | 	if weights == nil { | ||||||
|  | 		var s float64 | ||||||
|  | 		for i, v := range x { | ||||||
|  | 			s += (v - meanX) * (y[i] - meanY) | ||||||
|  | 		} | ||||||
|  | 		s /= float64(len(x) - 1) | ||||||
|  | 		return s | ||||||
|  | 	} | ||||||
|  | 	if weights != nil && len(weights) != len(x) { | ||||||
|  | 		panic("stat: slice length mismatch") | ||||||
|  | 	} | ||||||
|  | 	var s float64 | ||||||
|  | 	var sumWeights float64 | ||||||
|  | 	for i, v := range x { | ||||||
|  | 		s += weights[i] * (v - meanX) * (y[i] - meanY) | ||||||
|  | 		sumWeights += weights[i] | ||||||
|  | 	} | ||||||
|  | 	return s / (sumWeights - 1) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // CrossEntropy computes the cross-entropy between the two distributions specified | ||||||
|  | // in p and q. | ||||||
|  | func CrossEntropy(p, q []float64) float64 { | ||||||
|  | 	if len(p) != len(q) { | ||||||
|  | 		panic("stat: slice length mismatch") | ||||||
|  | 	} | ||||||
|  | 	var ce float64 | ||||||
|  | 	for i, v := range p { | ||||||
|  | 		w := q[i] | ||||||
|  | 		if v == 0 && w == 0 { | ||||||
|  | 			continue | ||||||
|  | 		} | ||||||
|  | 		ce -= v * math.Log(w) | ||||||
|  | 	} | ||||||
|  | 	return ce | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Entropy computes the Shannon entropy of a distribution or the distance between | ||||||
|  | // two distributions. The natural logarithm is used. | ||||||
|  | //		- sum_i (p_i * log_e(p_i)) | ||||||
|  | func Entropy(p []float64) float64 { | ||||||
|  | 	var e float64 | ||||||
|  | 	for _, v := range p { | ||||||
|  | 		if v != 0 { // Entropy needs 0 * log(0) == 0 | ||||||
|  | 			e -= v * math.Log(v) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return e | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // ExKurtosis returns the population excess kurtosis of the sample. | ||||||
|  | // The kurtosis is defined by the 4th moment of the mean divided by the squared | ||||||
|  | // variance. The excess kurtosis subtracts 3.0 so that the excess kurtosis of | ||||||
|  | // the normal distribution is zero. | ||||||
|  | // If weights is nil then all of the weights are 1 | ||||||
|  | // If weights is not nil, then len(x) must equal len(weights) | ||||||
|  | func ExKurtosis(x []float64, mean, stdev float64, weights []float64) float64 { | ||||||
|  | 	if weights == nil { | ||||||
|  | 		var e float64 | ||||||
|  | 		for _, v := range x { | ||||||
|  | 			z := (v - mean) / stdev | ||||||
|  | 			e += z * z * z * z | ||||||
|  | 		} | ||||||
|  | 		mul, offset := kurtosisCorrection(float64(len(x))) | ||||||
|  | 		return e*mul - offset | ||||||
|  | 	} | ||||||
|  | 	if len(x) != len(weights) { | ||||||
|  | 		panic("stat: slice length mismatch") | ||||||
|  | 	} | ||||||
|  | 	var e float64 | ||||||
|  | 	var sumWeights float64 | ||||||
|  | 	for i, v := range x { | ||||||
|  | 		z := (v - mean) / stdev | ||||||
|  | 		e += weights[i] * z * z * z * z | ||||||
|  | 		sumWeights += weights[i] | ||||||
|  | 	} | ||||||
|  | 	mul, offset := kurtosisCorrection(sumWeights) | ||||||
|  | 	return e*mul - offset | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // n is the number of samples | ||||||
|  | // see https://en.wikipedia.org/wiki/Kurtosis | ||||||
|  | func kurtosisCorrection(n float64) (mul, offset float64) { | ||||||
|  | 	return ((n + 1) / (n - 1)) * (n / (n - 2)) * (1 / (n - 3)), 3 * ((n - 1) / (n - 2)) * ((n - 1) / (n - 3)) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // GeoMean returns the weighted geometric mean of the dataset | ||||||
|  | // 		\prod_i {x_i ^ w_i} | ||||||
|  | // This only applies with positive x and positive weights | ||||||
|  | // If weights is nil then all of the weights are 1 | ||||||
|  | // If weights is not nil, then len(x) must equal len(weights) | ||||||
|  | func GeometricMean(x, weights []float64) float64 { | ||||||
|  | 	if weights == nil { | ||||||
|  | 		var s float64 | ||||||
|  | 		for _, v := range x { | ||||||
|  | 			s += math.Log(v) | ||||||
|  | 		} | ||||||
|  | 		s /= float64(len(x)) | ||||||
|  | 		return math.Exp(s) | ||||||
|  | 	} | ||||||
|  | 	if len(x) != len(weights) { | ||||||
|  | 		panic("stat: slice length mismatch") | ||||||
|  | 	} | ||||||
|  | 	var s float64 | ||||||
|  | 	var sumWeights float64 | ||||||
|  | 	for i, v := range x { | ||||||
|  | 		s += weights[i] * math.Log(v) | ||||||
|  | 		sumWeights += weights[i] | ||||||
|  | 	} | ||||||
|  | 	s /= sumWeights | ||||||
|  | 	return math.Exp(s) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // GeoMean returns the weighted harmonic mean of the dataset | ||||||
|  | // 		\sum_i {w_i} / ( sum_i {w_i / x_i} ) | ||||||
|  | // This only applies with positive x and positive weights | ||||||
|  | // If weights is nil then all of the weights are 1 | ||||||
|  | // If weights is not nil, then len(x) must equal len(weights) | ||||||
|  | func HarmonicMean(x, weights []float64) float64 { | ||||||
|  | 	if weights != nil && len(x) != len(weights) { | ||||||
|  | 		panic("stat: slice length mismatch") | ||||||
|  | 	} | ||||||
|  | 	// TODO: Fix this to make it more efficient and avoid allocation | ||||||
|  |  | ||||||
|  | 	// This can be numerically unstable (for exapmle if x is very small) | ||||||
|  | 	// W = \sum_i {w_i} | ||||||
|  | 	// hm = exp(log(W) - log(\sum_i w_i / x_i)) | ||||||
|  |  | ||||||
|  | 	logs := make([]float64, len(x)) | ||||||
|  | 	var W float64 | ||||||
|  | 	for i := range x { | ||||||
|  | 		if weights == nil { | ||||||
|  | 			logs[i] = -math.Log(x[i]) | ||||||
|  | 			W += 1 | ||||||
|  | 			continue | ||||||
|  | 		} | ||||||
|  | 		logs[i] = math.Log(weights[i]) - math.Log(x[i]) | ||||||
|  | 		W += weights[i] | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// Sum all of the logs | ||||||
|  | 	v := floats.LogSumExp(logs) // this computes log(\sum_i { w_i / x_i}) | ||||||
|  | 	return math.Exp(math.Log(W) - v) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Histogram sums up the weighted number of data points in each bin. | ||||||
|  | // The weight of data point x[i] will be placed into count[j] if | ||||||
|  | // dividers[j-1] <= x < dividers[j]. The "span" function in the floats package can assist | ||||||
|  | // with bin creation. The count variable must either be nil or have length of | ||||||
|  | // one less than dividers. | ||||||
|  | // If weights is nil then all of the weights are 1 | ||||||
|  | // If weights is not nil, then len(x) must equal len(weights) | ||||||
|  | func Histogram(count, dividers, x, weights []float64) []float64 { | ||||||
|  | 	if weights != nil && len(x) != len(weights) { | ||||||
|  | 		panic("stat: slice length mismatch") | ||||||
|  | 	} | ||||||
|  | 	if count == nil { | ||||||
|  | 		count = make([]float64, len(dividers)+1) | ||||||
|  | 	} | ||||||
|  | 	if len(count) != len(dividers)+1 { | ||||||
|  | 		panic("histogram: bin count mismatch") | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	sortX, sortWeight := sortXandWeight(x, weights) | ||||||
|  |  | ||||||
|  | 	idx := 0 | ||||||
|  | 	comp := dividers[idx] | ||||||
|  | 	if sortWeight == nil { | ||||||
|  | 		for _, v := range sortX { | ||||||
|  | 			if v < comp || idx == len(count)-1 { | ||||||
|  | 				// Still in the current bucket | ||||||
|  | 				count[idx] += 1 | ||||||
|  | 				continue | ||||||
|  | 			} | ||||||
|  | 			// Need to find the next divider where v is less than the divider | ||||||
|  | 			// or to set the maximum divider if no such exists | ||||||
|  | 			for j := idx + 1; j < len(count); j++ { | ||||||
|  | 				if j == len(dividers) { | ||||||
|  | 					idx = len(dividers) | ||||||
|  | 					break | ||||||
|  | 				} | ||||||
|  | 				if v < dividers[j] { | ||||||
|  | 					idx = j | ||||||
|  | 					comp = dividers[j] | ||||||
|  | 					break | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 			count[idx] += 1 | ||||||
|  | 		} | ||||||
|  | 		return count | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for i, v := range sortX { | ||||||
|  | 		if v < comp || idx == len(count)-1 { | ||||||
|  | 			// Still in the current bucket | ||||||
|  | 			count[idx] += sortWeight[i] | ||||||
|  | 			continue | ||||||
|  | 		} | ||||||
|  | 		// Need to find the next divider where v is less than the divider | ||||||
|  | 		// or to set the maximum divider if no such exists | ||||||
|  | 		for j := idx + 1; j < len(count); j++ { | ||||||
|  | 			if j == len(dividers) { | ||||||
|  | 				idx = len(dividers) | ||||||
|  | 				break | ||||||
|  | 			} | ||||||
|  | 			if v < dividers[j] { | ||||||
|  | 				idx = j | ||||||
|  | 				comp = dividers[j] | ||||||
|  | 				break | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 		count[idx] += sortWeight[i] | ||||||
|  | 	} | ||||||
|  | 	return count | ||||||
|  |  | ||||||
|  | 	return count | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // KulbeckLeibler computes the Kulbeck-Leibler distance between the | ||||||
|  | // distributions p and q. The natural logarithm is used. | ||||||
|  | //		sum_i(p_i * log(p_i / q_i)) | ||||||
|  | // Note that the Kulbeck-Leibler distance is not symmetric; | ||||||
|  | // KulbeckLeibler(p,q) != KulbeckLeibler(q,p) | ||||||
|  | func KulbeckLeibler(p, q []float64) float64 { | ||||||
|  | 	if len(p) != len(q) { | ||||||
|  | 		panic("stat: slice length mismatch") | ||||||
|  | 	} | ||||||
|  | 	var kl float64 | ||||||
|  | 	for i, v := range p { | ||||||
|  | 		if v != 0 { // Entropy needs 0 * log(0) == 0 | ||||||
|  | 			kl += v * (math.Log(v) - math.Log(q[i])) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return kl | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Mean computes the weighted mean of the data set. | ||||||
|  | //     sum_i {w_i * x_i} / sum_i {w_i} | ||||||
|  | // If weights is nil then all of the weights are 1 | ||||||
|  | // If weights is not nil, then len(x) must equal len(weights) | ||||||
|  | func Mean(x, weights []float64) float64 { | ||||||
|  | 	if weights == nil { | ||||||
|  | 		return floats.Sum(x) / float64(len(x)) | ||||||
|  | 	} | ||||||
|  | 	if len(x) != len(weights) { | ||||||
|  | 		panic("stat: slice length mismatch") | ||||||
|  | 	} | ||||||
|  | 	var sumValues float64 | ||||||
|  | 	var sumWeights float64 | ||||||
|  | 	for i, w := range weights { | ||||||
|  | 		sumValues += w * x[i] | ||||||
|  | 		sumWeights += w | ||||||
|  | 	} | ||||||
|  | 	return sumValues / sumWeights | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Mode returns the most common value in the dataset specified by x and the | ||||||
|  | // given weights. Strict float64 equality is used when comparing values, so users | ||||||
|  | // should take caution. If several values are the mode, any of them may be returned. | ||||||
|  | func Mode(x []float64, weights []float64) (val float64, count float64) { | ||||||
|  | 	if weights != nil && len(x) != len(weights) { | ||||||
|  | 		panic("stat: slice length mismatch") | ||||||
|  | 	} | ||||||
|  | 	if len(x) == 0 { | ||||||
|  | 		return 0, 0 | ||||||
|  | 	} | ||||||
|  | 	m := make(map[float64]float64) | ||||||
|  | 	if weights == nil { | ||||||
|  | 		for _, v := range x { | ||||||
|  | 			m[v] += 1 | ||||||
|  | 		} | ||||||
|  | 	} else { | ||||||
|  | 		for i, v := range x { | ||||||
|  | 			m[v] += weights[i] | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	var maxCount float64 | ||||||
|  | 	var max float64 | ||||||
|  | 	for val, count := range m { | ||||||
|  | 		if count > maxCount { | ||||||
|  | 			maxCount = count | ||||||
|  | 			max = val | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return max, maxCount | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Moment computes the weighted n^th moment of the samples, | ||||||
|  | // 		E[(x - μ)^N] | ||||||
|  | // No degrees of freedom correction is done. | ||||||
|  | // If weights is nil then all of the weights are 1 | ||||||
|  | // If weights is not nil, then len(x) must equal len(weights) | ||||||
|  | func Moment(moment float64, x []float64, mean float64, weights []float64) float64 { | ||||||
|  | 	if weights == nil { | ||||||
|  | 		var m float64 | ||||||
|  | 		for _, v := range x { | ||||||
|  | 			m += math.Pow(v-mean, moment) | ||||||
|  | 		} | ||||||
|  | 		m /= float64(len(x)) | ||||||
|  | 		return m | ||||||
|  | 	} | ||||||
|  | 	if len(weights) != len(x) { | ||||||
|  | 		panic("stat: slice length mismatch") | ||||||
|  | 	} | ||||||
|  | 	var m float64 | ||||||
|  | 	var sumWeights float64 | ||||||
|  | 	for i, v := range x { | ||||||
|  | 		m += weights[i] * math.Pow(v-mean, moment) | ||||||
|  | 		sumWeights += weights[i] | ||||||
|  | 	} | ||||||
|  | 	return m / sumWeights | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Percentile returns the lowest sample of x such that x is greater than or | ||||||
|  | // equal to the fraction p of samples. p should be a number between 0 and 1 | ||||||
|  | // If no such sample exists, the lowest value is returned | ||||||
|  | // If weights is nil then all of the weights are 1 | ||||||
|  | // If weights is not nil, then len(x) must equal len(weights) | ||||||
|  | func Percentile(p float64, x, weights []float64) float64 { | ||||||
|  | 	if p < 0 || p > 1 { | ||||||
|  | 		panic("stat: percentile out of bounds") | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if weights != nil && len(x) != len(weights) { | ||||||
|  | 		panic("stat: slice length mismatch") | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	sortX, sortWeight := sortXandWeight(x, weights) | ||||||
|  | 	if weights == nil { | ||||||
|  | 		loc := p * float64(len(x)) | ||||||
|  | 		idx := int(math.Floor(loc)) | ||||||
|  | 		if (loc == float64(idx) && idx != 0) || idx == len(x) { | ||||||
|  | 			idx-- | ||||||
|  | 		} | ||||||
|  | 		return sortX[idx] | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	idx := p * floats.Sum(weights) | ||||||
|  | 	var cumsum float64 | ||||||
|  | 	for i, w := range sortWeight { | ||||||
|  | 		cumsum += w | ||||||
|  | 		if cumsum >= idx { | ||||||
|  | 			return sortX[i] | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	panic("shouldn't be here") | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Quantile returns the lowest number p such that q is >= the fraction p of samples | ||||||
|  | // It is the inverse of the Percentile function. | ||||||
|  | // If weights is nil then all of the weights are 1 | ||||||
|  | // If weights is not nil, then len(x) must equal len(weights) | ||||||
|  | func Quantile(q float64, x, weights []float64) float64 { | ||||||
|  | 	if weights != nil && len(x) != len(weights) { | ||||||
|  | 		panic("stat: slice length mismatch") | ||||||
|  | 	} | ||||||
|  | 	sortX, sortWeight := sortXandWeight(x, weights) | ||||||
|  |  | ||||||
|  | 	// Find the first x that is greater than the supplied x | ||||||
|  | 	if q < sortX[0] { | ||||||
|  | 		return 0 | ||||||
|  | 	} | ||||||
|  | 	if q >= sortX[len(sortX)-1] { | ||||||
|  | 		return 1 | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if weights == nil { | ||||||
|  | 		for i, v := range sortX { | ||||||
|  | 			if v > q { | ||||||
|  | 				return float64(i) / float64(len(x)) | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	sumWeights := floats.Sum(weights) | ||||||
|  | 	var w float64 | ||||||
|  | 	for i, v := range sortX { | ||||||
|  | 		if v > q { | ||||||
|  | 			return w / sumWeights | ||||||
|  | 		} | ||||||
|  | 		w += sortWeight[i] | ||||||
|  | 	} | ||||||
|  | 	panic("Impossible. Maybe x contains NaNs.") | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Skew computes the skewness of the sample data | ||||||
|  | // If weights is nil then all of the weights are 1 | ||||||
|  | // If weights is not nil, then len(x) must equal len(weights) | ||||||
|  | func Skew(x []float64, mean, stdev float64, weights []float64) float64 { | ||||||
|  | 	if weights == nil { | ||||||
|  | 		var s float64 | ||||||
|  | 		for _, v := range x { | ||||||
|  | 			z := (v - mean) / stdev | ||||||
|  | 			s += z * z * z | ||||||
|  | 		} | ||||||
|  | 		return s * skewCorrection(float64(len(x))) | ||||||
|  | 	} | ||||||
|  | 	if len(x) != len(weights) { | ||||||
|  | 		panic("stat: slice length mismatch") | ||||||
|  | 	} | ||||||
|  | 	var s float64 | ||||||
|  | 	var sumWeights float64 | ||||||
|  | 	for i, v := range x { | ||||||
|  | 		z := (v - mean) / stdev | ||||||
|  | 		s += weights[i] * z * z * z | ||||||
|  | 		sumWeights += weights[i] | ||||||
|  | 	} | ||||||
|  | 	return s * skewCorrection(sumWeights) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func skewCorrection(n float64) float64 { | ||||||
|  | 	// http://www.amstat.org/publications/jse/v19n2/doane.pdf page 7 | ||||||
|  | 	return (n / (n - 1)) * (1 / (n - 2)) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // StdDev returns the population standard deviation with the provided mean | ||||||
|  | func StDev(x []float64, mean float64, weights []float64) float64 { | ||||||
|  | 	return math.Sqrt(Variance(x, mean, weights)) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // StandardError returns the standard error in the mean with the given values | ||||||
|  | func StdErr(stdev, sampleSize float64) float64 { | ||||||
|  | 	return stdev / math.Sqrt(sampleSize) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // StdScore returns the standard score (a.k.a. z-score, z-value) for the value x | ||||||
|  | // with the givem mean and variance, i.e. | ||||||
|  | //		(x - mean) / variance | ||||||
|  | func StdScore(x, mean, variance float64) float64 { | ||||||
|  | 	return (x - mean) / variance | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Variance computes the weighted sample variance with the provided mean. | ||||||
|  | //    \sum_i w_i (x_i - mean)^2 / (sum_i w_i - 1) | ||||||
|  | // If weights is nil, then all of the weights are 1. | ||||||
|  | // If weights in not nil, then len(x) must equal len(weights). | ||||||
|  | func Variance(x []float64, mean float64, weights []float64) float64 { | ||||||
|  | 	if weights == nil { | ||||||
|  | 		var s float64 | ||||||
|  | 		for _, v := range x { | ||||||
|  | 			s += (v - mean) * (v - mean) | ||||||
|  | 		} | ||||||
|  | 		return s / float64(len(x)-1) | ||||||
|  | 	} | ||||||
|  | 	if len(x) != len(weights) { | ||||||
|  | 		panic("stat: slice length mismatch") | ||||||
|  | 	} | ||||||
|  | 	var ss float64 | ||||||
|  | 	var sumWeights float64 | ||||||
|  | 	for i, v := range x { | ||||||
|  | 		ss += weights[i] * (v - mean) * (v - mean) | ||||||
|  | 		sumWeights += weights[i] | ||||||
|  | 	} | ||||||
|  | 	return ss / (sumWeights - 1) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Quartile returns | ||||||
|  | //func Quartile(x []float64, weights []float64) float64 {} | ||||||
|  |  | ||||||
|  | func sortXandWeight(x, weights []float64) (sortX, sortWeight []float64) { | ||||||
|  |  | ||||||
|  | 	sorted := sort.Float64sAreSorted(x) | ||||||
|  | 	if !sorted { | ||||||
|  | 		sortX = make([]float64, len(x)) | ||||||
|  | 		copy(sortX, x) | ||||||
|  | 		inds := make([]int, len(x)) | ||||||
|  | 		floats.Argsort(sortX, inds) | ||||||
|  | 		if weights != nil { | ||||||
|  | 			sortWeight = make([]float64, len(x)) | ||||||
|  | 			for i, v := range inds { | ||||||
|  | 				sortWeight[i] = weights[v] | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} else { | ||||||
|  | 		sortX = x | ||||||
|  | 		sortWeight = weights | ||||||
|  | 	} | ||||||
|  | 	return | ||||||
|  | } | ||||||
							
								
								
									
										627
									
								
								stat_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										627
									
								
								stat_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,627 @@ | |||||||
|  | package stat | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"fmt" | ||||||
|  | 	"math" | ||||||
|  | 	"testing" | ||||||
|  |  | ||||||
|  | 	"github.com/gonum/floats" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | func ExampleCorrelation() { | ||||||
|  | 	x := []float64{8, -3, 7, 8, -4} | ||||||
|  | 	y := []float64{10, 5, 6, 3, -1} | ||||||
|  | 	w := []float64{2, 1.5, 3, 3, 2} | ||||||
|  |  | ||||||
|  | 	fmt.Println("Correlation computes the degree to which two datasets move together") | ||||||
|  | 	fmt.Println("about their mean. For example, x and y above move similarly.") | ||||||
|  | 	fmt.Println("Package can be used to compute the mean and standard deviation") | ||||||
|  | 	fmt.Println("or they can be supplied if they are known") | ||||||
|  | 	meanX := Mean(x, w) | ||||||
|  | 	meanY := Mean(x, w) | ||||||
|  | 	c := Correlation(x, meanX, 3, y, meanY, 4, w) | ||||||
|  | 	fmt.Printf("Correlation with set standard deviatons is %.5f\n", c) | ||||||
|  | 	stdX := StDev(x, meanX, w) | ||||||
|  | 	stdY := StDev(x, meanY, w) | ||||||
|  | 	c2 := Correlation(x, meanX, stdX, y, meanY, stdY, w) | ||||||
|  | 	fmt.Printf("Correlation with computed standard deviatons is %.5f\n", c2) | ||||||
|  | 	// Output: | ||||||
|  | 	// Correlation computes the degree to which two datasets move together | ||||||
|  | 	// about their mean. For example, x and y above move similarly. | ||||||
|  | 	// Package can be used to compute the mean and standard deviation | ||||||
|  | 	// or they can be supplied if they are known | ||||||
|  | 	// Correlation with set standard deviatons is 0.96894 | ||||||
|  | 	// Correlation with computed standard deviatons is 0.39644 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func TestCorrelation(t *testing.T) { | ||||||
|  | 	for i, test := range []struct { | ||||||
|  | 		x   []float64 | ||||||
|  | 		y   []float64 | ||||||
|  | 		w   []float64 | ||||||
|  | 		ans float64 | ||||||
|  | 	}{ | ||||||
|  | 		{ | ||||||
|  | 			x:   []float64{8, -3, 7, 8, -4}, | ||||||
|  | 			y:   []float64{8, -3, 7, 8, -4}, | ||||||
|  | 			w:   nil, | ||||||
|  | 			ans: 1, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			x:   []float64{8, -3, 7, 8, -4}, | ||||||
|  | 			y:   []float64{8, -3, 7, 8, -4}, | ||||||
|  | 			w:   []float64{1, 1, 1, 1, 1}, | ||||||
|  | 			ans: 1, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			x:   []float64{8, -3, 7, 8, -4}, | ||||||
|  | 			y:   []float64{8, -3, 7, 8, -4}, | ||||||
|  | 			w:   []float64{1, 6, 7, 0.8, 2.1}, | ||||||
|  | 			ans: 1, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			x:   []float64{8, -3, 7, 8, -4}, | ||||||
|  | 			y:   []float64{10, 15, 4, 5, -1}, | ||||||
|  | 			w:   nil, | ||||||
|  | 			ans: 0.0093334660769059, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			x:   []float64{8, -3, 7, 8, -4}, | ||||||
|  | 			y:   []float64{10, 15, 4, 5, -1}, | ||||||
|  | 			w:   nil, | ||||||
|  | 			ans: 0.0093334660769059, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			x:   []float64{8, -3, 7, 8, -4}, | ||||||
|  | 			y:   []float64{10, 15, 4, 5, -1}, | ||||||
|  | 			w:   []float64{1, 3, 1, 2, 2}, | ||||||
|  | 			ans: -0.13966633352689, | ||||||
|  | 		}, | ||||||
|  | 	} { | ||||||
|  | 		meanX := Mean(test.x, test.w) | ||||||
|  | 		meanY := Mean(test.y, test.w) | ||||||
|  | 		stdX := StDev(test.x, meanX, test.w) | ||||||
|  | 		stdY := StDev(test.y, meanY, test.w) | ||||||
|  | 		c := Correlation(test.x, meanX, stdX, test.y, meanY, stdY, test.w) | ||||||
|  | 		if math.Abs(test.ans-c) > 1e-14 { | ||||||
|  | 			t.Errorf("Correlation mismatch case %d. Expected %v, Found %v", i, test.ans, c) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func ExampleCovariance() { | ||||||
|  | 	fmt.Println("Covariance computes the degree to which datasets move together") | ||||||
|  | 	fmt.Println("about their mean.") | ||||||
|  | 	x := []float64{8, -3, 7, 8, -4} | ||||||
|  | 	y := []float64{10, 2, 2, 4, 1} | ||||||
|  | 	meanX := Mean(x, nil) | ||||||
|  | 	meanY := Mean(y, nil) | ||||||
|  | 	cov := Covariance(x, meanX, y, meanY, nil) | ||||||
|  | 	fmt.Printf("Cov = %.4f\n", cov) | ||||||
|  | 	fmt.Println("If datasets move perfectly together, the variance equals the covariance") | ||||||
|  | 	y2 := []float64{12, 1, 11, 12, 0} | ||||||
|  | 	meanY2 := Mean(y2, nil) | ||||||
|  | 	cov2 := Covariance(x, meanX, y2, meanY2, nil) | ||||||
|  | 	varX := Variance(x, meanX, nil) | ||||||
|  | 	fmt.Printf("Cov2 is %.4f, VarX is %.4f", cov2, varX) | ||||||
|  | 	// Output: | ||||||
|  | 	// Covariance computes the degree to which datasets move together | ||||||
|  | 	// about their mean. | ||||||
|  | 	// Cov = 13.8000 | ||||||
|  | 	// If datasets move perfectly together, the variance equals the covariance | ||||||
|  | 	// Cov2 is 37.7000, VarX is 37.7000 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func TestCrossEntropy(t *testing.T) { | ||||||
|  | 	for i, test := range []struct { | ||||||
|  | 		p   []float64 | ||||||
|  | 		q   []float64 | ||||||
|  | 		ans float64 | ||||||
|  | 	}{ | ||||||
|  | 		{ | ||||||
|  | 			p:   []float64{0.75, 0.1, 0.05}, | ||||||
|  | 			q:   []float64{0.5, 0.25, 0.25}, | ||||||
|  | 			ans: 0.7278045395879426, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			p:   []float64{0.75, 0.1, 0.05, 0, 0, 0}, | ||||||
|  | 			q:   []float64{0.5, 0.25, 0.25, 0, 0, 0}, | ||||||
|  | 			ans: 0.7278045395879426, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			p:   []float64{0.75, 0.1, 0.05, 0, 0, 0.1}, | ||||||
|  | 			q:   []float64{0.5, 0.25, 0.25, 0, 0, 0}, | ||||||
|  | 			ans: math.Inf(1), | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			p:   nil, | ||||||
|  | 			q:   nil, | ||||||
|  | 			ans: 0, | ||||||
|  | 		}, | ||||||
|  | 	} { | ||||||
|  | 		c := CrossEntropy(test.p, test.q) | ||||||
|  | 		if math.Abs(c-test.ans) > 1e-14 { | ||||||
|  | 			t.Errorf("Cross entropy mismatch case %d: Expected %v, Found %v", i, test.ans, c) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func ExampleEntropy() { | ||||||
|  |  | ||||||
|  | 	p := []float64{0.05, 0.1, 0.9, 0.05} | ||||||
|  | 	entP := Entropy(p) | ||||||
|  |  | ||||||
|  | 	q := []float64{0.2, 0.4, 0.25, 0.15} | ||||||
|  | 	entQ := Entropy(q) | ||||||
|  |  | ||||||
|  | 	r := []float64{0.2, 0, 0, 0.5, 0, 0.2, 0.1, 0, 0, 0} | ||||||
|  | 	entR := Entropy(r) | ||||||
|  |  | ||||||
|  | 	s := []float64{0, 0, 1, 0} | ||||||
|  | 	entS := Entropy(s) | ||||||
|  |  | ||||||
|  | 	fmt.Println("Entropy is a measure of the amount of uncertainty in a distribution") | ||||||
|  | 	fmt.Printf("The second bin of p is very likely to occur. It's entropy is %.4f\n", entP) | ||||||
|  | 	fmt.Printf("The distribution of q is more spread out. It's entropy is %.4f\n", entQ) | ||||||
|  | 	fmt.Println("Adding buckets with zero probability does not change the entropy.") | ||||||
|  | 	fmt.Printf("The entropy of r is: %.4f\n", entR) | ||||||
|  | 	fmt.Printf("A distribution with no uncertainty has entropy %.4f\n", entS) | ||||||
|  | 	// Output: | ||||||
|  | 	// Entropy is a measure of the amount of uncertainty in a distribution | ||||||
|  | 	// The second bin of p is very likely to occur. It's entropy is 0.6247 | ||||||
|  | 	// The distribution of q is more spread out. It's entropy is 1.3195 | ||||||
|  | 	// Adding buckets with zero probability does not change the entropy. | ||||||
|  | 	// The entropy of r is: 1.2206 | ||||||
|  | 	// A distribution with no uncertainty has entropy 0.0000 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func ExampleExKurtosis() { | ||||||
|  | 	fmt.Println(`Kurtosis is a measure of the 'peakedness' of a distribution, and the | ||||||
|  | excess kurtosis is the kurtosis above or below that of the standard normal | ||||||
|  | distribution`) | ||||||
|  | 	x := []float64{5, 4, -3, -2} | ||||||
|  | 	mean := Mean(x, nil) | ||||||
|  | 	stdev := StDev(x, mean, nil) | ||||||
|  | 	kurt := ExKurtosis(x, mean, stdev, nil) | ||||||
|  | 	fmt.Printf("ExKurtosis = %.5f\n", kurt) | ||||||
|  | 	weights := []float64{1, 2, 3, 5} | ||||||
|  | 	wMean := Mean(x, weights) | ||||||
|  | 	wStdev := StDev(x, wMean, weights) | ||||||
|  | 	wKurt := ExKurtosis(x, wMean, wStdev, weights) | ||||||
|  | 	fmt.Printf("Weighted ExKurtosis is %.4f", wKurt) | ||||||
|  | 	// Output: | ||||||
|  | 	// Kurtosis is a measure of the 'peakedness' of a distribution, and the | ||||||
|  | 	// excess kurtosis is the kurtosis above or below that of the standard normal | ||||||
|  | 	// distribution | ||||||
|  | 	// ExKurtosis = -5.41200 | ||||||
|  | 	// Weighted ExKurtosis is -0.6779 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func ExampleGeometricMean() { | ||||||
|  | 	x := []float64{8, 2, 9, 15, 4} | ||||||
|  | 	weights := []float64{2, 2, 6, 7, 1} | ||||||
|  | 	mean := Mean(x, weights) | ||||||
|  | 	gmean := GeometricMean(x, weights) | ||||||
|  |  | ||||||
|  | 	logx := make([]float64, len(x)) | ||||||
|  | 	for i, v := range x { | ||||||
|  | 		logx[i] = math.Log(v) | ||||||
|  | 	} | ||||||
|  | 	expMeanLog := math.Exp(Mean(logx, weights)) | ||||||
|  | 	fmt.Printf("The arithmetic mean is %.4f, but the geometric mean is %.4f.\n", mean, gmean) | ||||||
|  | 	fmt.Printf("The exponential of the mean of the logs is %.4f\n", expMeanLog) | ||||||
|  | 	// Output: | ||||||
|  | 	// The arithmetic mean is 10.1667, but the geometric mean is 8.7637. | ||||||
|  | 	// The exponential of the mean of the logs is 8.7637 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func ExampleHarmonicMean() { | ||||||
|  | 	x := []float64{8, 2, 9, 15, 4} | ||||||
|  | 	weights := []float64{2, 2, 6, 7, 1} | ||||||
|  | 	mean := Mean(x, weights) | ||||||
|  | 	hmean := HarmonicMean(x, weights) | ||||||
|  |  | ||||||
|  | 	fmt.Printf("The arithmetic mean is %.5f, but the harmonic mean is %.4f.\n", mean, hmean) | ||||||
|  | 	// Output: | ||||||
|  | 	// The arithmetic mean is 10.16667, but the harmonic mean is 6.8354. | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func TestHistogram(t *testing.T) { | ||||||
|  | 	for i, test := range []struct { | ||||||
|  | 		x        []float64 | ||||||
|  | 		weights  []float64 | ||||||
|  | 		dividers []float64 | ||||||
|  | 		ans      []float64 | ||||||
|  | 	}{ | ||||||
|  | 		{ | ||||||
|  | 			x:        []float64{1, 3, 5, 6, 7, 8}, | ||||||
|  | 			dividers: []float64{2, 4, 6, 7}, | ||||||
|  | 			ans:      []float64{1, 1, 1, 1, 2}, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			x:        []float64{1, 3, 5, 6, 7, 8}, | ||||||
|  | 			dividers: []float64{2, 4, 6, 7}, | ||||||
|  | 			weights:  []float64{1, 2, 1, 1, 1, 2}, | ||||||
|  | 			ans:      []float64{1, 2, 1, 1, 3}, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			x:        []float64{1, 8}, | ||||||
|  | 			dividers: []float64{2, 4, 6, 7}, | ||||||
|  | 			weights:  []float64{1, 2}, | ||||||
|  | 			ans:      []float64{1, 0, 0, 0, 2}, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			x:        []float64{1, 8}, | ||||||
|  | 			dividers: []float64{2, 4, 6, 7}, | ||||||
|  | 			ans:      []float64{1, 0, 0, 0, 1}, | ||||||
|  | 		}, | ||||||
|  | 	} { | ||||||
|  | 		hist := Histogram(nil, test.dividers, test.x, test.weights) | ||||||
|  | 		if !floats.Equal(hist, test.ans) { | ||||||
|  | 			t.Errorf("Hist mismatch case %d. Expected %v, Found %v", i, test.ans, hist) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func ExampleHistogram() { | ||||||
|  | 	x := make([]float64, 101) | ||||||
|  | 	for i := range x { | ||||||
|  | 		x[i] = 1.1 * float64(i) // x data ranges from 0 to 110 | ||||||
|  | 	} | ||||||
|  | 	dividers := []float64{7, 20, 100, 1000} | ||||||
|  | 	fmt.Println(`Histogram counts the amount of data in the bins specified by | ||||||
|  | the dividers. In this data set, there are 7 data points less than 7 (dividers[0]), | ||||||
|  | 12 data points between 7 and 20 (dividers[2] and dividers[1]), and 0 data points | ||||||
|  | above 1000. Since dividers has length 4, there will be 5 bins.`) | ||||||
|  | 	hist := Histogram(nil, dividers, x, nil) | ||||||
|  | 	fmt.Printf("Hist = %v\n", hist) | ||||||
|  |  | ||||||
|  | 	fmt.Println() | ||||||
|  | 	fmt.Println("For ease, the floats Span function can be used to set the dividers") | ||||||
|  | 	nBins := 10 | ||||||
|  | 	// Create one fewer divider than bins, but add two to work with Span (see | ||||||
|  | 	// note below) | ||||||
|  | 	dividers = make([]float64, nBins+1) | ||||||
|  | 	min, _ := floats.Min(x) | ||||||
|  | 	max, _ := floats.Max(x) | ||||||
|  | 	floats.Span(dividers, min, max) | ||||||
|  | 	// Span includes the min and the max. Trim the dividers to create 10 buckets | ||||||
|  | 	dividers = dividers[1 : len(dividers)-1] | ||||||
|  | 	fmt.Println("len dividers = ", len(dividers)) | ||||||
|  | 	hist = Histogram(nil, dividers, x, nil) | ||||||
|  | 	fmt.Printf("Hist = %v\n", hist) | ||||||
|  | 	fmt.Println() | ||||||
|  | 	fmt.Println(`Histogram also works with weighted data, and allows reusing of | ||||||
|  | the count field in order to avoid extra garbage`) | ||||||
|  | 	weights := make([]float64, len(x)) | ||||||
|  | 	for i := range weights { | ||||||
|  | 		weights[i] = float64(i + 1) | ||||||
|  | 	} | ||||||
|  | 	Histogram(hist, dividers, x, weights) | ||||||
|  | 	fmt.Printf("Weighted Hist = %v\n", hist) | ||||||
|  |  | ||||||
|  | 	// Output: | ||||||
|  | 	// Histogram counts the amount of data in the bins specified by | ||||||
|  | 	// the dividers. In this data set, there are 7 data points less than 7 (dividers[0]), | ||||||
|  | 	// 12 data points between 7 and 20 (dividers[2] and dividers[1]), and 0 data points | ||||||
|  | 	// above 1000. Since dividers has length 4, there will be 5 bins. | ||||||
|  | 	// Hist = [7 12 72 10 0] | ||||||
|  | 	// | ||||||
|  | 	// For ease, the floats Span function can be used to set the dividers | ||||||
|  | 	// len dividers =  9 | ||||||
|  | 	// Hist = [11 10 10 10 9 11 10 10 9 11] | ||||||
|  | 	// | ||||||
|  | 	// Histogram also works with weighted data, and allows reusing of | ||||||
|  | 	// the count field in order to avoid extra garbage | ||||||
|  | 	// Weighted Hist = [77 175 275 375 423 627 675 775 783 1067] | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func ExampleKulbeckLiebler() { | ||||||
|  |  | ||||||
|  | 	p := []float64{0.05, 0.1, 0.9, 0.05} | ||||||
|  | 	q := []float64{0.2, 0.4, 0.25, 0.15} | ||||||
|  | 	s := []float64{0, 0, 1, 0} | ||||||
|  |  | ||||||
|  | 	klPQ := KulbeckLeibler(p, q) | ||||||
|  | 	klPS := KulbeckLeibler(p, s) | ||||||
|  | 	klPP := KulbeckLeibler(p, p) | ||||||
|  |  | ||||||
|  | 	fmt.Println("Kulbeck-Liebler is one measure of the difference between two distributions") | ||||||
|  | 	fmt.Printf("The K-L distance between p and q is %.4f\n", klPQ) | ||||||
|  | 	fmt.Println("It is impossible for s and p to be the same distribution, because") | ||||||
|  | 	fmt.Println("the first bucket has zero probability in s and non-zero in p. Thus,") | ||||||
|  | 	fmt.Printf("the K-L distance between them is %.4f\n", klPS) | ||||||
|  | 	fmt.Printf("The K-L distance between identical distributions is %.4f\n", klPP) | ||||||
|  |  | ||||||
|  | 	// Kulbeck-Liebler is one measure of the difference between two distributions | ||||||
|  | 	// The K-L distance between p and q is 0.8900 | ||||||
|  | 	// It is impossible for s and p to be the same distribution, because | ||||||
|  | 	// the first bucket has zero probability in s and non-zero in p. Thus, | ||||||
|  | 	// the K-L distance between them is +Inf | ||||||
|  | 	// The K-L distance between identical distributions is 0.0000 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func ExampleMean() { | ||||||
|  | 	x := []float64{8.2, -6, 5, 7} | ||||||
|  | 	mean := Mean(x, nil) | ||||||
|  | 	fmt.Printf("The mean of the samples is %.4f\n", mean) | ||||||
|  | 	w := []float64{2, 6, 3, 5} | ||||||
|  | 	weightedMean := Mean(x, w) | ||||||
|  | 	fmt.Printf("The weighted mean of the samples is %.4f\n", weightedMean) | ||||||
|  | 	x2 := []float64{8.2, 8.2, -6, -6, -6, -6, -6, -6, 5, 5, 5, 7, 7, 7, 7, 7} | ||||||
|  | 	mean2 := Mean(x2, nil) | ||||||
|  | 	fmt.Printf("The mean of x2 is %.4f\n", mean2) | ||||||
|  | 	fmt.Println("The weights act as if there were more samples of that number") | ||||||
|  | 	// Output: | ||||||
|  | 	// The mean of the samples is 3.5500 | ||||||
|  | 	// The weighted mean of the samples is 1.9000 | ||||||
|  | 	// The mean of x2 is 1.9000 | ||||||
|  | 	// The weights act as if there were more samples of that number | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func TestMode(t *testing.T) { | ||||||
|  | 	for i, test := range []struct { | ||||||
|  | 		x       []float64 | ||||||
|  | 		weights []float64 | ||||||
|  | 		ans     float64 | ||||||
|  | 		count   float64 | ||||||
|  | 	}{ | ||||||
|  | 		{}, | ||||||
|  | 		{ | ||||||
|  | 			x:     []float64{1, 6, 1, 9, -2}, | ||||||
|  | 			ans:   1, | ||||||
|  | 			count: 2, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			x:       []float64{1, 6, 1, 9, -2}, | ||||||
|  | 			weights: []float64{1, 7, 3, 5, 0}, | ||||||
|  | 			ans:     6, | ||||||
|  | 			count:   7, | ||||||
|  | 		}, | ||||||
|  | 	} { | ||||||
|  | 		m, count := Mode(test.x, test.weights) | ||||||
|  | 		if test.ans != m { | ||||||
|  | 			t.Errorf("Mode mismatch case %d. Expected %v, found %v", i, test.ans, m) | ||||||
|  | 		} | ||||||
|  | 		if test.count != count { | ||||||
|  | 			t.Errorf("Mode count mismatch case %d. Expected %v, found %v", i, test.count, count) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func TestMoment(t *testing.T) { | ||||||
|  | 	for i, test := range []struct { | ||||||
|  | 		x       []float64 | ||||||
|  | 		weights []float64 | ||||||
|  | 		moment  float64 | ||||||
|  | 		mean    float64 | ||||||
|  | 		ans     float64 | ||||||
|  | 	}{ | ||||||
|  | 		{}, | ||||||
|  | 		{ | ||||||
|  | 			x:      []float64{6, 2, 4, 8, 9}, | ||||||
|  | 			mean:   3, | ||||||
|  | 			moment: 5, | ||||||
|  | 			ans:    2.2288e3, | ||||||
|  | 		}, | ||||||
|  | 	} { | ||||||
|  | 		m := Moment(test.moment, test.x, test.mean, test.weights) | ||||||
|  | 		if math.Abs(test.ans-m) > 1e-14 { | ||||||
|  | 			t.Errorf("Moment mismatch case %d. Expected %v, found %v", i, test.ans, m) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func TestPercentile(t *testing.T) { | ||||||
|  | 	for i, test := range []struct { | ||||||
|  | 		p   []float64 | ||||||
|  | 		x   []float64 | ||||||
|  | 		w   []float64 | ||||||
|  | 		ans []float64 | ||||||
|  | 	}{ | ||||||
|  | 		{ | ||||||
|  | 			p:   []float64{0, 0.05, 0.1, 0.15, 0.45, 0.5, 0.55, 0.85, 0.9, 0.95, 1}, | ||||||
|  | 			x:   []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, | ||||||
|  | 			w:   nil, | ||||||
|  | 			ans: []float64{1, 1, 1, 2, 5, 5, 6, 9, 9, 10, 10}, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			p:   []float64{0, 0.05, 0.1, 0.15, 0.45, 0.5, 0.55, 0.85, 0.9, 0.95, 1}, | ||||||
|  | 			x:   []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, | ||||||
|  | 			w:   []float64{3, 3, 3, 3, 3, 3, 3, 3, 3, 3}, | ||||||
|  | 			ans: []float64{1, 1, 1, 2, 5, 5, 6, 9, 9, 10, 10}, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			p:   []float64{0, 0.05, 0.1, 0.15, 0.45, 0.5, 0.55, 0.85, 0.9, 0.95, 1}, | ||||||
|  | 			x:   []float64{3, 10, 1, 2, 5, 9, 7, 8, 6, 4}, | ||||||
|  | 			w:   []float64{3, 3, 3, 3, 3, 3, 3, 3, 3, 3}, | ||||||
|  | 			ans: []float64{1, 1, 1, 2, 5, 5, 6, 9, 9, 10, 10}, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			p:   []float64{0, 0.05, 0.1, 0.15, 0.45, 0.5, 0.55, 0.85, 0.9, 0.95, 1}, | ||||||
|  | 			x:   []float64{3, 10, 1, 2, 5, 9, 7, 8, 6, 4}, | ||||||
|  | 			w:   nil, | ||||||
|  | 			ans: []float64{1, 1, 1, 2, 5, 5, 6, 9, 9, 10, 10}, | ||||||
|  | 		}, | ||||||
|  | 	} { | ||||||
|  | 		if len(test.p) != len(test.ans) { | ||||||
|  | 			panic("bad test") | ||||||
|  | 		} | ||||||
|  | 		copyX := make([]float64, len(test.x)) | ||||||
|  | 		copy(copyX, test.x) | ||||||
|  | 		var copyW []float64 | ||||||
|  | 		if test.w != nil { | ||||||
|  | 			copyW = make([]float64, len(test.w)) | ||||||
|  | 			copy(copyW, test.w) | ||||||
|  | 		} | ||||||
|  | 		for j, p := range test.p { | ||||||
|  | 			v := Percentile(p, test.x, test.w) | ||||||
|  | 			if !floats.Equal(copyX, test.x) { | ||||||
|  | 				t.Errorf("x changed for case %d percentile %v", i, p) | ||||||
|  | 			} | ||||||
|  | 			if !floats.Equal(copyW, test.w) { | ||||||
|  | 				t.Errorf("x changed for case %d percentile %v", i, p) | ||||||
|  | 			} | ||||||
|  | 			if v != test.ans[j] { | ||||||
|  | 				t.Errorf("mismatch case %d percentile %v. Expected: %v, found: %v", i, p, test.ans[j], v) | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func TestQuantile(t *testing.T) { | ||||||
|  | 	for i, test := range []struct { | ||||||
|  | 		q       []float64 | ||||||
|  | 		x       []float64 | ||||||
|  | 		weights []float64 | ||||||
|  | 		ans     []float64 | ||||||
|  | 	}{ | ||||||
|  | 		{}, | ||||||
|  | 		{ | ||||||
|  | 			q:   []float64{0, 0.9, 1, 1.1, 2.9, 3, 3.1, 4.9, 5, 5.1}, | ||||||
|  | 			x:   []float64{1, 2, 3, 4, 5}, | ||||||
|  | 			ans: []float64{0, 0, 0.2, 0.2, 0.4, 0.6, 0.6, 0.8, 1, 1}, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			q:   []float64{0, 0.9, 1, 1.1, 2.9, 3, 3.1, 4.9, 5, 5.1}, | ||||||
|  | 			x:   []float64{5, 2, 3, 4, 1}, | ||||||
|  | 			ans: []float64{0, 0, 0.2, 0.2, 0.4, 0.6, 0.6, 0.8, 1, 1}, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			q:       []float64{0, 0.9, 1, 1.1, 2.9, 3, 3.1, 4.9, 5, 5.1}, | ||||||
|  | 			x:       []float64{5, 2, 3, 4, 1}, | ||||||
|  | 			weights: []float64{1, 1, 1, 1, 1}, | ||||||
|  | 			ans:     []float64{0, 0, 0.2, 0.2, 0.4, 0.6, 0.6, 0.8, 1, 1}, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			q:       []float64{0, 0.9, 1, 1.1, 2.9, 3, 3.1, 4.9, 5, 5.1}, | ||||||
|  | 			x:       []float64{5, 2, 3, 4, 1}, | ||||||
|  | 			weights: []float64{1, 1, 2, 5, 1}, | ||||||
|  | 			ans:     []float64{0, 0, 0.1, 0.1, 0.2, 0.4, 0.4, 0.9, 1, 1}, | ||||||
|  | 		}, | ||||||
|  | 	} { | ||||||
|  | 		copyX := make([]float64, len(test.x)) | ||||||
|  | 		copy(copyX, test.x) | ||||||
|  | 		var copyW []float64 | ||||||
|  | 		if test.weights != nil { | ||||||
|  | 			copyW = make([]float64, len(test.weights)) | ||||||
|  | 			copy(copyW, test.weights) | ||||||
|  | 		} | ||||||
|  | 		for j, q := range test.q { | ||||||
|  | 			v := Quantile(q, test.x, test.weights) | ||||||
|  | 			if !floats.Equal(copyX, test.x) { | ||||||
|  | 				t.Errorf("x changed for case %d percentile %v", i, q) | ||||||
|  | 			} | ||||||
|  | 			if !floats.Equal(copyW, test.weights) { | ||||||
|  | 				t.Errorf("x changed for case %d percentile %v", i, q) | ||||||
|  | 			} | ||||||
|  | 			if v != test.ans[j] { | ||||||
|  | 				t.Errorf("mismatch case %d percentile %v. Expected: %v, found: %v", i, q, test.ans[j], v) | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func ExampleStDev() { | ||||||
|  | 	x := []float64{8, 2, -9, 15, 4} | ||||||
|  | 	mean := Mean(x, nil) | ||||||
|  | 	stdev := StDev(x, mean, nil) | ||||||
|  | 	fmt.Printf("The standard deviation of the samples is %.4f\n", stdev) | ||||||
|  |  | ||||||
|  | 	weights := []float64{2, 2, 6, 7, 1} | ||||||
|  | 	weightedMean := Mean(x, weights) | ||||||
|  | 	weightedStdev := StDev(x, weightedMean, weights) | ||||||
|  | 	fmt.Printf("The weighted standard deviation of the samples is %.4f\n", weightedStdev) | ||||||
|  | 	// Output: | ||||||
|  | 	// The standard deviation of the samples is 8.8034 | ||||||
|  | 	// The weighted standard deviation of the samples is 10.5733 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func ExampleStdErr() { | ||||||
|  | 	x := []float64{8, 2, -9, 15, 4} | ||||||
|  | 	weights := []float64{2, 2, 6, 7, 1} | ||||||
|  | 	mean := Mean(x, weights) | ||||||
|  | 	stdev := StDev(x, mean, weights) | ||||||
|  | 	nSamples := floats.Sum(weights) | ||||||
|  | 	stdErr := StdErr(stdev, nSamples) | ||||||
|  | 	fmt.Printf("The standard deviation is %.4f and there are %g samples, so the mean\nis likely %.4f ± %.4f.", stdev, nSamples, mean, stdErr) | ||||||
|  | 	// Output: | ||||||
|  | 	// The standard deviation is 10.5733 and there are 18 samples, so the mean | ||||||
|  | 	// is likely 4.1667 ± 2.4921. | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func TestSkew(t *testing.T) { | ||||||
|  | 	for i, test := range []struct { | ||||||
|  | 		x       []float64 | ||||||
|  | 		weights []float64 | ||||||
|  | 		ans     float64 | ||||||
|  | 	}{ | ||||||
|  | 		{ | ||||||
|  | 			x:       []float64{8, 3, 7, 8, 4}, | ||||||
|  | 			weights: nil, | ||||||
|  | 			ans:     -0.581456499151665, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			x:       []float64{8, 3, 7, 8, 4}, | ||||||
|  | 			weights: []float64{1, 1, 1, 1, 1}, | ||||||
|  | 			ans:     -0.581456499151665, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			x:       []float64{8, 3, 7, 8, 4}, | ||||||
|  | 			weights: []float64{2, 1, 2, 1, 1}, | ||||||
|  | 			ans:     -1.12066646837198, | ||||||
|  | 		}, | ||||||
|  | 	} { | ||||||
|  | 		mean := Mean(test.x, test.weights) | ||||||
|  | 		std := StDev(test.x, mean, test.weights) | ||||||
|  | 		skew := Skew(test.x, mean, std, test.weights) | ||||||
|  | 		if math.Abs(skew-test.ans) > 1e-14 { | ||||||
|  | 			t.Errorf("Skew mismatch case %d. Expected %v, Found %v", i, test.ans, skew) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func TestVariance(t *testing.T) { | ||||||
|  | 	for i, test := range []struct { | ||||||
|  | 		x       []float64 | ||||||
|  | 		weights []float64 | ||||||
|  | 		ans     float64 | ||||||
|  | 	}{ | ||||||
|  | 		{ | ||||||
|  | 			x:       []float64{8, -3, 7, 8, -4}, | ||||||
|  | 			weights: nil, | ||||||
|  | 			ans:     37.7, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			x:       []float64{8, -3, 7, 8, -4}, | ||||||
|  | 			weights: []float64{1, 1, 1, 1, 1}, | ||||||
|  | 			ans:     37.7, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			x:       []float64{8, 3, 7, 8, 4}, | ||||||
|  | 			weights: []float64{2, 1, 2, 1, 1}, | ||||||
|  | 			ans:     4.2857142857142865, | ||||||
|  | 		}, | ||||||
|  | 	} { | ||||||
|  | 		mean := Mean(test.x, test.weights) | ||||||
|  | 		variance := Variance(test.x, mean, test.weights) | ||||||
|  | 		if math.Abs(variance-test.ans) > 1e-14 { | ||||||
|  | 			t.Errorf("Variance mismatch case %d. Expected %v, Found %v", i, test.ans, variance) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func ExampleVariance() { | ||||||
|  | 	x := []float64{8, 2, -9, 15, 4} | ||||||
|  | 	mean := Mean(x, nil) | ||||||
|  | 	variance := Variance(x, mean, nil) | ||||||
|  | 	fmt.Printf("The variance of the samples is %.4f\n", variance) | ||||||
|  |  | ||||||
|  | 	weights := []float64{2, 2, 6, 7, 1} | ||||||
|  | 	weightedMean := Mean(x, weights) | ||||||
|  | 	weightedVariance := Variance(x, weightedMean, weights) | ||||||
|  | 	fmt.Printf("The weighted variance of the samples is %.4f\n", weightedVariance) | ||||||
|  | 	// Output: | ||||||
|  | 	// The variance of the samples is 77.5000 | ||||||
|  | 	// The weighted variance of the samples is 111.7941 | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user
	 btracey
					btracey