stat/combin: add Cartesian for constructing the cartesian product of … (#303)

* stat/combin: add Cartesian for constructing the cartesian product of a set of slices
This commit is contained in:
Brendan Tracey
2017-11-19 22:17:43 -07:00
committed by GitHub
parent 790b7356e7
commit a3eff45798
2 changed files with 216 additions and 4 deletions

View File

@@ -4,12 +4,17 @@
package combin
import "math"
import (
"math"
"gonum.org/v1/gonum/mat"
)
const (
badNegInput = "combin: negative input"
badSetSize = "combin: n < k"
badInput = "combin: wrong input slice length"
nonpositiveDimension = "combin: non-positive dimension"
)
// Binomial returns the binomial coefficient of (n,k), also commonly referred to
@@ -179,3 +184,115 @@ func nextCombination(s []int, n, k int) {
break
}
}
// Cartesian returns the cartesian product of the slices in data. The Cartesian
// product of two sets is the set of all combinations of the items. For example,
// given the input
// [][]float64{{1,2},{3,4},{5,6}}
// the returned matrix will be
// [ 1 3 5 ]
// [ 1 3 6 ]
// [ 1 4 5 ]
// [ 1 4 6 ]
// [ 2 3 5 ]
// [ 2 3 6 ]
// [ 2 4 5 ]
// [ 2 4 6 ]
// If dst is nil, a new matrix will be allocated and returned, otherwise the number
// of rows of dst must equal \prod_i len(data[i]), and the number of columns in
// dst must equal len(data). Cartesian also panics if len(data) = 0.
func Cartesian(dst *mat.Dense, data [][]float64) *mat.Dense {
if len(data) == 0 {
panic("combin: empty data input")
}
cols := len(data)
rows := 1
lens := make([]int, cols)
for i, d := range data {
v := len(d)
lens[i] = v
rows *= v
}
if dst == nil {
dst = mat.NewDense(rows, cols, nil)
}
r, c := dst.Dims()
if r != rows || c != cols {
panic("combin: destination matrix size mismatch")
}
idxs := make([]int, cols)
for i := 0; i < rows; i++ {
SubFor(idxs, i, lens)
for j := 0; j < len(data); j++ {
dst.Set(i, j, data[j][idxs[j]])
}
}
return dst
}
// IdxFor converts a multi-dimensional index into a linear index for a
// multi-dimensional space. sub specifies the index for each dimension, and dims
// specifies the size of each dimension. IdxFor is the inverse of SubFor.
// IdxFor panics if any of the entries of sub are negative, any of the entries
// of dim are non-positive, or if sub[i] >= dims[i] for any i.
func IdxFor(sub, dims []int) int {
// The index returned is "row-major", that is the last index of sub is
// continuous.
var idx int
stride := 1
for i := len(dims) - 1; i >= 0; i-- {
v := sub[i]
d := dims[i]
if d <= 0 {
panic(nonpositiveDimension)
}
if v < 0 || v >= d {
panic("combin: invalid subscript")
}
idx += v * stride
stride *= d
}
return idx
}
// SubFor returns the multi-dimensional subscript for the input linear index to
// the multi-dimensional space. dims specifies the size of each dimension, and
// idx specifies the linear index. SubFor is the inverse of IdxFor.
//
// If sub is non-nil the result is stored in-place into sub, and SubFor will panic
// if len(sub) != len(dims). If sub is nil a new slice of the appropriate length
// is allocated. SubFor panics if idx < 0 or if idx is greater than or equal to
// the product of the dimensions.
func SubFor(sub []int, idx int, dims []int) []int {
if sub == nil {
sub = make([]int, len(dims))
}
if len(sub) != len(dims) {
panic(badInput)
}
if idx < 0 {
panic(badNegInput)
}
stride := 1
for i := len(dims) - 1; i >= 1; i-- {
stride *= dims[i]
}
for i := 0; i < len(dims)-1; i++ {
v := idx / stride
d := dims[i]
if d < 0 {
panic(nonpositiveDimension)
}
if v >= dims[i] {
panic("combin: index too large")
}
sub[i] = v
idx -= v * stride
stride /= dims[i+1]
}
if idx > dims[len(sub)-1] {
panic("combin: index too large")
}
sub[len(sub)-1] = idx
return sub
}

View File

@@ -6,9 +6,11 @@ package combin
import (
"math/big"
"reflect"
"testing"
"gonum.org/v1/gonum/floats"
"gonum.org/v1/gonum/mat"
)
// intSosMatch returns true if the two slices of slices are equal.
@@ -179,3 +181,96 @@ func TestCombinationGenerator(t *testing.T) {
}
}
}
func TestCartesian(t *testing.T) {
// First, test with a known return.
data := [][]float64{
{1, 2},
{3, 4},
{5, 6},
}
want := mat.NewDense(8, 3, []float64{
1, 3, 5,
1, 3, 6,
1, 4, 5,
1, 4, 6,
2, 3, 5,
2, 3, 6,
2, 4, 5,
2, 4, 6,
})
got := Cartesian(nil, data)
if !mat.Equal(want, got) {
t.Errorf("cartesian data mismatch.\nwant:\n%v\ngot:\n%v", mat.Formatted(want), mat.Formatted(got))
}
gotTo := mat.NewDense(8, 3, nil)
Cartesian(gotTo, data)
if !mat.Equal(want, got) {
t.Errorf("cartesian data mismatch with supplied.\nwant:\n%v\ngot:\n%v", mat.Formatted(want), mat.Formatted(gotTo))
}
// Test that Cartesian generates unique vectors.
for cas, data := range [][][]float64{
{{1}, {2, 3}, {8, 9, 10}},
{{1, 10}, {2, 3}, {8, 9, 10}},
{{1, 10, 11}, {2, 3}, {8}},
} {
cart := Cartesian(nil, data)
r, c := cart.Dims()
if c != len(data) {
t.Errorf("Case %v: wrong number of columns. Want %v, got %v", cas, len(data), c)
}
wantRows := 1
for _, v := range data {
wantRows *= len(v)
}
if r != wantRows {
t.Errorf("Case %v: wrong number of rows. Want %v, got %v", cas, wantRows, r)
}
for i := 0; i < r; i++ {
for j := i + 1; j < r; j++ {
if floats.Equal(cart.RawRowView(i), cart.RawRowView(j)) {
t.Errorf("Cas %v: rows %d and %d are equal", cas, i, j)
}
}
}
cartTo := mat.NewDense(r, c, nil)
Cartesian(cartTo, data)
if !mat.Equal(cart, cartTo) {
t.Errorf("cartesian data mismatch with supplied.\nwant:\n%v\ngot:\n%v", mat.Formatted(cart), mat.Formatted(cartTo))
}
}
}
func TestIdxSubFor(t *testing.T) {
for cas, dims := range [][]int{
{2, 2},
{3, 1, 6},
{2, 4, 6, 7},
} {
// Loop over all of the indexes. Confirm that the subscripts make sense
// and that IdxFor is the converse of SubFor.
maxIdx := 1
for _, v := range dims {
maxIdx *= v
}
into := make([]int, len(dims))
for idx := 0; idx < maxIdx; idx++ {
sub := SubFor(nil, idx, dims)
for i := range sub {
if sub[i] < 0 || sub[i] >= dims[i] {
t.Errorf("cas %v: bad subscript. dims: %v, sub: %v", cas, dims, sub)
}
}
SubFor(into, idx, dims)
if !reflect.DeepEqual(sub, into) {
t.Errorf("cas %v: subscript mismatch with supplied slice. Got %v, want %v", cas, into, sub)
}
idxOut := IdxFor(sub, dims)
if idxOut != idx {
t.Errorf("cas %v: returned index mismatch. Got %v, want %v", cas, idxOut, idx)
}
}
}
}