Faces: Add dedicated vector algebra package #4691 #4669 #4328

Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
Michael Mayer
2025-02-03 16:48:46 +01:00
parent 163db703d1
commit 5738d838e5
6 changed files with 604 additions and 0 deletions

11
pkg/vector/const.go Normal file
View File

@@ -0,0 +1,11 @@
package vector
import "math"
const (
Epsilon = math.SmallestNonzeroFloat64
)
func NaN() float64 {
return math.NaN()
}

57
pkg/vector/mean.go Normal file
View File

@@ -0,0 +1,57 @@
package vector
import "math"
// Mean gets the average of a slice of numbers
func Mean(v Vector) float64 {
s := v.Sum()
n := float64(len(v))
return s / n
}
// GeometricMean gets the geometric mean for a slice of numbers
func GeometricMean(v Vector) float64 {
l := v.Dim()
if l == 0 {
return NaN()
}
// Get the product of all the numbers
var p float64
for _, n := range v {
if p == 0 {
p = n
} else {
p *= n
}
}
// Calculate the geometric mean
return math.Pow(p, 1/float64(l))
}
// HarmonicMean gets the harmonic mean for a slice of numbers
func HarmonicMean(v Vector) float64 {
l := v.Dim()
if l == 0 {
return NaN()
}
// Get the sum of all the numbers reciprocals and return an
// error for values that cannot be included in harmonic mean
var p float64
for _, n := range v {
if n < 0 {
return NaN()
} else if n == 0 {
return NaN()
}
p += 1 / n
}
return float64(l) / p
}

235
pkg/vector/values.go Normal file
View File

@@ -0,0 +1,235 @@
package vector
import (
"fmt"
"math"
)
// Copy returns a copy of the vector.
func (v Vector) Copy() Vector {
y := make(Vector, len(v))
copy(y, v)
return y
}
// Dim returns the number of values (dimension).
func (v Vector) Dim() int {
return len(v)
}
// Sum returns the sum of the vector values.
func (v Vector) Sum() float64 {
s := 0.0
for _, f := range v {
s += f
}
return s
}
// weightedSum returns the weighted sum of the vector. This is really only useful in
// calculating the weighted mean.
func (v Vector) weightedSum(w Vector) (float64, error) {
if len(v) != len(w) {
return Epsilon, fmt.Errorf("Length of weights unequal to vector length")
}
ws := 0.0
for i := range v {
ws += v[i] * w[i]
}
return ws, nil
}
// Mean returns the vector's mean value.
func (v Vector) Mean() float64 {
return Mean(v)
}
// GeometricMean returns the vector's geometric mean value.
func (v Vector) GeometricMean() float64 {
return GeometricMean(v)
}
// HarmonicMean returns the vector's harmonic mean value.
func (v Vector) HarmonicMean() float64 {
return HarmonicMean(v)
}
// WeightedMean returns the vector's weighted mean value based of the specified weights.
func (v Vector) WeightedMean(w Vector) (float64, error) {
ws, err := v.weightedSum(w)
if err != nil {
return Epsilon, err
}
sw := w.Sum()
return ws / sw, nil
}
// Sd calculates the vector's standard deviation.
func (v Vector) Sd() float64 {
return math.Sqrt(v.Variance())
}
// Variance calculates the vector's variance.
func (v Vector) Variance() float64 {
return v.variance(v.Mean())
}
// EuclideanDist returns the Euclidean distance between the vectors,
func (v Vector) EuclideanDist(w Vector) float64 {
return EuclideanDist(v, w)
}
// CosineDist returns the cosine distance between two vectors.
func (v Vector) CosineDist(w Vector) float64 {
return CosineDist(v, w)
}
// Norm returns the vector size (magnitude),
// see https://builtin.com/data-science/vector-norms.
func (v Vector) Norm(pow float64) float64 {
return Norm(v, pow)
}
// EuclideanNorm returns the Euclidean vector size (magnitude),
// see https://builtin.com/data-science/vector-norms.
func (v Vector) EuclideanNorm() float64 {
return v.Norm(2.0)
}
func (v Vector) variance(mean float64) float64 {
n := float64(len(v))
if n == 1 {
return 0
} else if n < 2 {
n = 2
}
ss := 0.0
for _, f := range v {
ss += math.Pow(f-mean, 2.0)
}
return ss / (n - 1)
}
// Product returns a vector of element-wise products of two input vectors.
func Product(a, b Vector) (Vector, error) {
if len(a) != len(b) {
return nil, fmt.Errorf("vector dimentions do not match (%d, %d)", len(a), len(b))
}
p := make(Vector, len(a))
for i := range a {
p[i] = a[i] * b[i]
}
return p, nil
}
// DotProduct returns the dot product of two vectors.
func DotProduct(a, b Vector) (float64, error) {
p, err := Product(a, b)
if err != nil {
return Epsilon, err
}
return p.Sum(), nil
}
// Norm returns the size of the vector (use pow = 2.0 for the Euclidean distance),
// see https://builtin.com/data-science/vector-norms.
func Norm(v Vector, pow float64) float64 {
s := 0.0
for _, f := range v {
s += math.Pow(f, pow)
}
return math.Pow(s, 1/pow)
}
// EuclideanDist returns the Euclidean distance between multiple vectors.
func EuclideanDist(a, b Vector) float64 {
if a.Dim() != b.Dim() {
return NaN()
}
var (
s, t float64
)
for i := range a {
t = a[i] - b[i]
s += t * t
}
return math.Sqrt(s)
}
// CosineDist returns the CosineDist distance between two vectors.
func CosineDist(a, b Vector) float64 {
if a.Dim() != b.Dim() {
return NaN()
}
var sum, s1, s2 float64
for i := 0; i < len(a); i++ {
sum += a[i] * b[i]
s1 += math.Pow(a[i], 2)
s2 += math.Pow(b[i], 2)
}
if s1 == 0 || s2 == 0 {
return 0.0
}
return sum / (math.Sqrt(s1) * math.Sqrt(s2))
}
// CosineDists returns the cosine distances between two sets of vectors.
func CosineDists(x, y Vectors) Vectors {
result := make(Vectors, len(x))
for i, a := range x {
result[i] = make([]float64, len(y))
for j, b := range y {
result[i][j] = CosineDist(a, b)
}
}
return result
}
// Cor returns the Pearson correlation between two vectors.
func Cor(a, b Vector) (float64, error) {
n := float64(len(a))
xy, err := Product(a, b)
if err != nil {
return Epsilon, err
}
sx := a.Sd()
sy := b.Sd()
mx := a.Mean()
my := b.Mean()
r := (xy.Sum() - n*mx*my) / ((n - 1) * sx * sy)
return r, nil
}

108
pkg/vector/values_test.go Normal file

File diff suppressed because one or more lines are too long

163
pkg/vector/vector.go Normal file
View File

@@ -0,0 +1,163 @@
package vector
import (
"fmt"
)
// Vector represents a set of floating-point values.
type Vector []float64
// Vectors represents a set of vectors.
type Vectors = []Vector
// NewVector creates a new vector from the given values.
func NewVector(values interface{}) (Vector, error) {
switch v := values.(type) {
case []uint8:
return uint8ToVector(v), nil
case []uint16:
return uint16ToVector(v), nil
case []uint32:
return uint32ToVector(v), nil
case []uint64:
return uint64ToVector(v), nil
case []int:
return intToVector(v), nil
case []int8:
return int8ToVector(v), nil
case []int16:
return int16ToVector(v), nil
case []int32:
return int32ToVector(v), nil
case []int64:
return int64ToVector(v), nil
case []float32:
return float32ToVector(v), nil
case []float64:
return float64ToVector(v), nil
case Vector:
return v, nil
default:
return nil, fmt.Errorf("cannot create vector from type %T", values)
}
}
// NullVector creates a new null vector with the given dimension.
func NullVector(dim int) Vector {
return make(Vector, dim)
}
// uint8ToVector creates a new vector from a non-empty uint8 slice.
func uint8ToVector(values []uint8) Vector {
v := make(Vector, len(values))
for i := range values {
v[i] = float64(values[i])
}
return v
}
// uint16ToVector creates a new vector from a non-empty uint16 slice.
func uint16ToVector(values []uint16) Vector {
v := make(Vector, len(values))
for i := range values {
v[i] = float64(values[i])
}
return v
}
// uint32ToVector creates a new vector from a non-empty uint32 slice.
func uint32ToVector(values []uint32) Vector {
v := make(Vector, len(values))
for i := range values {
v[i] = float64(values[i])
}
return v
}
// uint64ToVector creates a new vector from a non-empty uint64 slice.
func uint64ToVector(values []uint64) Vector {
v := make(Vector, len(values))
for i := range values {
v[i] = float64(values[i])
}
return v
}
// intToVector creates a new vector from a non-empty int slice.
func intToVector(values []int) Vector {
v := make(Vector, len(values))
for i := range values {
v[i] = float64(values[i])
}
return v
}
// int8ToVector creates a new vector from a non-empty int8 slice.
func int8ToVector(values []int8) Vector {
v := make(Vector, len(values))
for i := range values {
v[i] = float64(values[i])
}
return v
}
// int16ToVector creates a new vector from a non-empty int16 slice.
func int16ToVector(values []int16) Vector {
v := make(Vector, len(values))
for i := range values {
v[i] = float64(values[i])
}
return v
}
// int32ToVector creates a new vector from a non-empty int32 slice.
func int32ToVector(values []int32) Vector {
v := make(Vector, len(values))
for i := range values {
v[i] = float64(values[i])
}
return v
}
// int64ToVector creates a new vector from a non-empty int64 slice.
func int64ToVector(values []int64) Vector {
v := make(Vector, len(values))
for i := range values {
v[i] = float64(values[i])
}
return v
}
// float32ToVector creates a new vector from a non-empty float32 slice.
func float32ToVector(values []float32) Vector {
v := make(Vector, len(values))
for i := range values {
v[i] = float64(values[i])
}
return v
}
// float64ToVector creates a new vector from a non-empty float64 slice.
func float64ToVector(values []float64) Vector {
return Vector(values)
}

30
pkg/vector/vector_test.go Normal file
View File

@@ -0,0 +1,30 @@
package vector
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestNewVector(t *testing.T) {
t.Run("Int", func(t *testing.T) {
v, err := NewVector([]int{1, 2, 3, 4, 6, 5})
assert.IsType(t, Vector{}, v)
assert.NoError(t, err)
})
t.Run("Float32", func(t *testing.T) {
v, err := NewVector([]float32{1.0, 2.1, 3.54, 4.9, 6.666666, 5.33333333})
assert.IsType(t, Vector{}, v)
assert.NoError(t, err)
})
t.Run("Float64", func(t *testing.T) {
v, err := NewVector([]float64{1.0, 2.1, 3.54, 4.9, 6.666666, 5.33333333})
assert.IsType(t, Vector{}, v)
assert.NoError(t, err)
})
t.Run("String", func(t *testing.T) {
v, err := NewVector([]string{"a", "b", "c"})
assert.IsType(t, Vector{}, v)
assert.Error(t, err)
})
}