diff --git a/stat/stat.go b/stat/stat.go index 0780552f..be177026 100644 --- a/stat/stat.go +++ b/stat/stat.go @@ -20,6 +20,8 @@ type CumulantKind int const ( // Empirical treats the distribution as the actual empirical distribution. Empirical CumulantKind = 1 + // LinInterp linearly interpolates the empirical distribution between sample values, with a flat extrapolation. + LinInterp CumulantKind = 4 ) // bhattacharyyaCoeff computes the Bhattacharyya Coefficient for probability distributions given by: @@ -1028,6 +1030,7 @@ func MomentAbout(moment float64, x []float64, mean float64, weights []float64) f // CumulantKind behaviors: // - Empirical: Returns the lowest value q for which q is greater than or equal // to the fraction p of samples +// - LinInterp: Returns the linearly interpolated value func Quantile(p float64, c CumulantKind, x, weights []float64) float64 { if !(p >= 0 && p <= 1) { panic("stat: percentile out of bounds") @@ -1052,6 +1055,8 @@ func Quantile(p float64, c CumulantKind, x, weights []float64) float64 { switch c { case Empirical: return empiricalQuantile(p, x, weights, sumWeights) + case LinInterp: + return linInterpQuantile(p, x, weights, sumWeights) default: panic("stat: bad cumulant kind") } @@ -1073,6 +1078,29 @@ func empiricalQuantile(p float64, x, weights []float64, sumWeights float64) floa panic("impossible") } +func linInterpQuantile(p float64, x, weights []float64, sumWeights float64) float64 { + var cumsum float64 + fidx := p * sumWeights + for i := range x { + if weights == nil { + cumsum++ + } else { + cumsum += weights[i] + } + if cumsum >= fidx { + if i == 0 { + return x[0] + } + t := cumsum - fidx + if weights != nil { + t /= weights[i] + } + return t*x[i-1] + (1-t)*x[i] + } + } + panic("impossible") +} + // Skew computes the skewness of the sample data. // If weights is nil then all of the weights are 1. If weights is not nil, then // len(x) must equal len(weights). diff --git a/stat/stat_test.go b/stat/stat_test.go index 147806f8..995e0b0e 100644 --- a/stat/stat_test.go +++ b/stat/stat_test.go @@ -1423,7 +1423,10 @@ func TestCDF(t *testing.T) { } func TestQuantile(t *testing.T) { - cumulantKinds := []CumulantKind{Empirical} + cumulantKinds := []CumulantKind{ + Empirical, + LinInterp, + } for i, test := range []struct { p []float64 x []float64 @@ -1431,21 +1434,39 @@ func TestQuantile(t *testing.T) { ans [][]float64 }{ { - p: []float64{0, 0.05, 0.1, 0.15, 0.45, 0.5, 0.55, 0.85, 0.9, 0.95, 1}, - x: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - w: nil, - ans: [][]float64{{1, 1, 1, 2, 5, 5, 6, 9, 9, 10, 10}}, + p: []float64{0, 0.05, 0.1, 0.15, 0.45, 0.5, 0.55, 0.85, 0.9, 0.95, 1}, + x: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + w: nil, + ans: [][]float64{ + {1, 1, 1, 2, 5, 5, 6, 9, 9, 10, 10}, + {1, 1, 1, 1.5, 4.5, 5, 5.5, 8.5, 9, 9.5, 10}, + }, }, { - p: []float64{0, 0.05, 0.1, 0.15, 0.45, 0.5, 0.55, 0.85, 0.9, 0.95, 1}, - x: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - w: []float64{3, 3, 3, 3, 3, 3, 3, 3, 3, 3}, - ans: [][]float64{{1, 1, 1, 2, 5, 5, 6, 9, 9, 10, 10}}, + p: []float64{0, 0.05, 0.1, 0.15, 0.45, 0.5, 0.55, 0.85, 0.9, 0.95, 1}, + x: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + w: []float64{3, 3, 3, 3, 3, 3, 3, 3, 3, 3}, + ans: [][]float64{ + {1, 1, 1, 2, 5, 5, 6, 9, 9, 10, 10}, + {1, 1, 1, 1.5, 4.5, 5, 5.5, 8.5, 9, 9.5, 10}, + }, }, { - p: []float64{0.5}, - x: []float64{1, 2, 3, 4, 5, 6, 7, 8, math.NaN(), 10}, - ans: [][]float64{{math.NaN()}}, + p: []float64{0, 0.05, 0.1, 0.15, 0.45, 0.5, 0.55, 0.85, 0.9, 0.95, 1}, + x: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + w: []float64{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0}, + ans: [][]float64{ + {1, 2, 3, 4, 7, 7, 8, 10, 10, 10, 10}, + {1, 1.875, 2.833333333333333, 3.5625, 6.535714285714286, 6.928571428571429, 7.281250000000001, 9.175, 9.45, 9.725, 10}, + }, + }, + { + p: []float64{0.5}, + x: []float64{1, 2, 3, 4, 5, 6, 7, 8, math.NaN(), 10}, + ans: [][]float64{ + {math.NaN()}, + {math.NaN()}, + }, }, } { copyX := make([]float64, len(test.x)) @@ -1470,55 +1491,57 @@ func TestQuantile(t *testing.T) { } } } - // panic cases +} + +func TestQuantileInvalidInput(t *testing.T) { + cumulantKinds := []CumulantKind{ + Empirical, + LinInterp, + } for _, test := range []struct { name string p float64 - c CumulantKind x []float64 w []float64 }{ { name: "p < 0", - c: Empirical, p: -1, }, { name: "p > 1", - c: Empirical, p: 2, }, { name: "p is NaN", - c: Empirical, p: math.NaN(), }, { name: "len(x) != len(weights)", - c: Empirical, p: .5, x: make([]float64, 4), w: make([]float64, 2), }, { name: "x not sorted", - c: Empirical, p: .5, x: []float64{3, 2, 1}, }, - { - name: "CumulantKind is unknown", - c: CumulantKind(1000), - p: .5, - x: []float64{1, 2, 3}, - }, } { - if !panics(func() { Quantile(test.p, test.c, test.x, test.w) }) { - t.Errorf("Quantile did not panic when %s", test.name) + for _, kind := range cumulantKinds { + if !panics(func() { Quantile(test.p, kind, test.x, test.w) }) { + t.Errorf("Quantile did not panic when %s", test.name) + } } } } +func TestQuantileInvalidCumulantKind(t *testing.T) { + if !panics(func() { Quantile(0.5, CumulantKind(1000), []float64{1, 2, 3}, nil) }) { + t.Errorf("Quantile did not panic when CumulantKind is unknown") + } +} + func ExampleStdDev() { x := []float64{8, 2, -9, 15, 4} stdev := StdDev(x, nil)