Faces: Add dedicated vector algebra package #4691 #4669 #4328

Signed-off-by: Michael Mayer <michael@photoprism.app>
2025-09-26 21:01:58 +08:00 · 2025-02-03 16:48:46 +01:00
parent 163db703d1
commit 5738d838e5
6 changed files with 604 additions and 0 deletions
--- a/pkg/vector/const.go
+++ b/pkg/vector/const.go
@@ -0,0 +1,11 @@
+package vector
+
+import "math"
+
+const (
+	Epsilon = math.SmallestNonzeroFloat64
+)
+
+func NaN() float64 {
+	return math.NaN()
+}
--- a/pkg/vector/mean.go
+++ b/pkg/vector/mean.go
@@ -0,0 +1,57 @@
+package vector
+
+import "math"
+
+// Mean gets the average of a slice of numbers
+func Mean(v Vector) float64 {
+	s := v.Sum()
+
+	n := float64(len(v))
+
+	return s / n
+}
+
+// GeometricMean gets the geometric mean for a slice of numbers
+func GeometricMean(v Vector) float64 {
+	l := v.Dim()
+
+	if l == 0 {
+		return NaN()
+	}
+
+	// Get the product of all the numbers
+	var p float64
+	for _, n := range v {
+		if p == 0 {
+			p = n
+		} else {
+			p *= n
+		}
+	}
+
+	// Calculate the geometric mean
+	return math.Pow(p, 1/float64(l))
+}
+
+// HarmonicMean gets the harmonic mean for a slice of numbers
+func HarmonicMean(v Vector) float64 {
+	l := v.Dim()
+
+	if l == 0 {
+		return NaN()
+	}
+
+	// Get the sum of all the numbers reciprocals and return an
+	// error for values that cannot be included in harmonic mean
+	var p float64
+	for _, n := range v {
+		if n < 0 {
+			return NaN()
+		} else if n == 0 {
+			return NaN()
+		}
+		p += 1 / n
+	}
+
+	return float64(l) / p
+}
--- a/pkg/vector/values.go
+++ b/pkg/vector/values.go
@@ -0,0 +1,235 @@
+package vector
+
+import (
+	"fmt"
+	"math"
+)
+
+// Copy returns a copy of the vector.
+func (v Vector) Copy() Vector {
+	y := make(Vector, len(v))
+	copy(y, v)
+	return y
+}
+
+// Dim returns the number of values (dimension).
+func (v Vector) Dim() int {
+	return len(v)
+}
+
+// Sum returns the sum of the vector values.
+func (v Vector) Sum() float64 {
+	s := 0.0
+
+	for _, f := range v {
+		s += f
+	}
+
+	return s
+}
+
+// weightedSum returns the weighted sum of the vector.  This is really only useful in
+// calculating the weighted mean.
+func (v Vector) weightedSum(w Vector) (float64, error) {
+	if len(v) != len(w) {
+		return Epsilon, fmt.Errorf("Length of weights unequal to vector length")
+	}
+
+	ws := 0.0
+
+	for i := range v {
+		ws += v[i] * w[i]
+	}
+
+	return ws, nil
+}
+
+// Mean returns the vector's mean value.
+func (v Vector) Mean() float64 {
+	return Mean(v)
+}
+
+// GeometricMean returns the vector's geometric mean value.
+func (v Vector) GeometricMean() float64 {
+	return GeometricMean(v)
+}
+
+// HarmonicMean returns the vector's harmonic mean value.
+func (v Vector) HarmonicMean() float64 {
+	return HarmonicMean(v)
+}
+
+// WeightedMean returns the vector's weighted mean value based of the specified weights.
+func (v Vector) WeightedMean(w Vector) (float64, error) {
+	ws, err := v.weightedSum(w)
+
+	if err != nil {
+		return Epsilon, err
+	}
+
+	sw := w.Sum()
+
+	return ws / sw, nil
+}
+
+// Sd calculates the vector's standard deviation.
+func (v Vector) Sd() float64 {
+	return math.Sqrt(v.Variance())
+}
+
+// Variance calculates the vector's variance.
+func (v Vector) Variance() float64 {
+	return v.variance(v.Mean())
+}
+
+// EuclideanDist returns the Euclidean distance between the vectors,
+func (v Vector) EuclideanDist(w Vector) float64 {
+	return EuclideanDist(v, w)
+}
+
+// CosineDist returns the cosine distance between two vectors.
+func (v Vector) CosineDist(w Vector) float64 {
+	return CosineDist(v, w)
+}
+
+// Norm returns the vector size (magnitude),
+// see https://builtin.com/data-science/vector-norms.
+func (v Vector) Norm(pow float64) float64 {
+	return Norm(v, pow)
+}
+
+// EuclideanNorm returns the Euclidean vector size (magnitude),
+// see https://builtin.com/data-science/vector-norms.
+func (v Vector) EuclideanNorm() float64 {
+	return v.Norm(2.0)
+}
+
+func (v Vector) variance(mean float64) float64 {
+	n := float64(len(v))
+
+	if n == 1 {
+		return 0
+	} else if n < 2 {
+		n = 2
+	}
+
+	ss := 0.0
+
+	for _, f := range v {
+		ss += math.Pow(f-mean, 2.0)
+	}
+
+	return ss / (n - 1)
+}
+
+// Product returns a vector of element-wise products of two input vectors.
+func Product(a, b Vector) (Vector, error) {
+	if len(a) != len(b) {
+		return nil, fmt.Errorf("vector dimentions do not match (%d, %d)", len(a), len(b))
+	}
+
+	p := make(Vector, len(a))
+
+	for i := range a {
+		p[i] = a[i] * b[i]
+	}
+
+	return p, nil
+}
+
+// DotProduct returns the dot product of two vectors.
+func DotProduct(a, b Vector) (float64, error) {
+	p, err := Product(a, b)
+
+	if err != nil {
+		return Epsilon, err
+	}
+
+	return p.Sum(), nil
+}
+
+// Norm returns the size of the vector (use pow = 2.0 for the Euclidean distance),
+// see https://builtin.com/data-science/vector-norms.
+func Norm(v Vector, pow float64) float64 {
+	s := 0.0
+
+	for _, f := range v {
+		s += math.Pow(f, pow)
+	}
+
+	return math.Pow(s, 1/pow)
+}
+
+// EuclideanDist returns the Euclidean distance between multiple vectors.
+func EuclideanDist(a, b Vector) float64 {
+	if a.Dim() != b.Dim() {
+		return NaN()
+	}
+
+	var (
+		s, t float64
+	)
+
+	for i := range a {
+		t = a[i] - b[i]
+		s += t * t
+	}
+
+	return math.Sqrt(s)
+}
+
+// CosineDist returns the CosineDist distance between two vectors.
+func CosineDist(a, b Vector) float64 {
+	if a.Dim() != b.Dim() {
+		return NaN()
+	}
+
+	var sum, s1, s2 float64
+
+	for i := 0; i < len(a); i++ {
+		sum += a[i] * b[i]
+		s1 += math.Pow(a[i], 2)
+		s2 += math.Pow(b[i], 2)
+	}
+
+	if s1 == 0 || s2 == 0 {
+		return 0.0
+	}
+
+	return sum / (math.Sqrt(s1) * math.Sqrt(s2))
+}
+
+// CosineDists returns the cosine distances between two sets of vectors.
+func CosineDists(x, y Vectors) Vectors {
+	result := make(Vectors, len(x))
+
+	for i, a := range x {
+		result[i] = make([]float64, len(y))
+
+		for j, b := range y {
+			result[i][j] = CosineDist(a, b)
+		}
+	}
+
+	return result
+}
+
+// Cor returns the Pearson correlation between two vectors.
+func Cor(a, b Vector) (float64, error) {
+	n := float64(len(a))
+	xy, err := Product(a, b)
+
+	if err != nil {
+		return Epsilon, err
+	}
+
+	sx := a.Sd()
+	sy := b.Sd()
+
+	mx := a.Mean()
+	my := b.Mean()
+
+	r := (xy.Sum() - n*mx*my) / ((n - 1) * sx * sy)
+
+	return r, nil
+}
--- a/pkg/vector/values_test.go
+++ b/pkg/vector/values_test.go
--- a/pkg/vector/vector.go
+++ b/pkg/vector/vector.go
@@ -0,0 +1,163 @@
+package vector
+
+import (
+	"fmt"
+)
+
+// Vector represents a set of floating-point values.
+type Vector []float64
+
+// Vectors represents a set of vectors.
+type Vectors = []Vector
+
+// NewVector creates a new vector from the given values.
+func NewVector(values interface{}) (Vector, error) {
+	switch v := values.(type) {
+	case []uint8:
+		return uint8ToVector(v), nil
+	case []uint16:
+		return uint16ToVector(v), nil
+	case []uint32:
+		return uint32ToVector(v), nil
+	case []uint64:
+		return uint64ToVector(v), nil
+	case []int:
+		return intToVector(v), nil
+	case []int8:
+		return int8ToVector(v), nil
+	case []int16:
+		return int16ToVector(v), nil
+	case []int32:
+		return int32ToVector(v), nil
+	case []int64:
+		return int64ToVector(v), nil
+	case []float32:
+		return float32ToVector(v), nil
+	case []float64:
+		return float64ToVector(v), nil
+	case Vector:
+		return v, nil
+	default:
+		return nil, fmt.Errorf("cannot create vector from type %T", values)
+	}
+}
+
+// NullVector creates a new null vector with the given dimension.
+func NullVector(dim int) Vector {
+	return make(Vector, dim)
+}
+
+// uint8ToVector creates a new vector from a non-empty uint8 slice.
+func uint8ToVector(values []uint8) Vector {
+	v := make(Vector, len(values))
+
+	for i := range values {
+		v[i] = float64(values[i])
+	}
+
+	return v
+}
+
+// uint16ToVector creates a new vector from a non-empty uint16 slice.
+func uint16ToVector(values []uint16) Vector {
+	v := make(Vector, len(values))
+
+	for i := range values {
+		v[i] = float64(values[i])
+	}
+
+	return v
+}
+
+// uint32ToVector creates a new vector from a non-empty uint32 slice.
+func uint32ToVector(values []uint32) Vector {
+	v := make(Vector, len(values))
+
+	for i := range values {
+		v[i] = float64(values[i])
+	}
+
+	return v
+}
+
+// uint64ToVector creates a new vector from a non-empty uint64 slice.
+func uint64ToVector(values []uint64) Vector {
+	v := make(Vector, len(values))
+
+	for i := range values {
+		v[i] = float64(values[i])
+	}
+
+	return v
+}
+
+// intToVector creates a new vector from a non-empty int slice.
+func intToVector(values []int) Vector {
+	v := make(Vector, len(values))
+
+	for i := range values {
+		v[i] = float64(values[i])
+	}
+
+	return v
+}
+
+// int8ToVector creates a new vector from a non-empty int8 slice.
+func int8ToVector(values []int8) Vector {
+	v := make(Vector, len(values))
+
+	for i := range values {
+		v[i] = float64(values[i])
+	}
+
+	return v
+}
+
+// int16ToVector creates a new vector from a non-empty int16 slice.
+func int16ToVector(values []int16) Vector {
+	v := make(Vector, len(values))
+
+	for i := range values {
+		v[i] = float64(values[i])
+	}
+
+	return v
+}
+
+// int32ToVector creates a new vector from a non-empty int32 slice.
+func int32ToVector(values []int32) Vector {
+	v := make(Vector, len(values))
+
+	for i := range values {
+		v[i] = float64(values[i])
+	}
+
+	return v
+}
+
+// int64ToVector creates a new vector from a non-empty int64 slice.
+func int64ToVector(values []int64) Vector {
+	v := make(Vector, len(values))
+
+	for i := range values {
+		v[i] = float64(values[i])
+	}
+
+	return v
+}
+
+// float32ToVector creates a new vector from a non-empty float32 slice.
+func float32ToVector(values []float32) Vector {
+	v := make(Vector, len(values))
+
+	for i := range values {
+		v[i] = float64(values[i])
+	}
+
+	return v
+}
+
+// float64ToVector creates a new vector from a non-empty float64 slice.
+func float64ToVector(values []float64) Vector {
+	return Vector(values)
+}
--- a/pkg/vector/vector_test.go
+++ b/pkg/vector/vector_test.go
@@ -0,0 +1,30 @@
+package vector
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestNewVector(t *testing.T) {
+	t.Run("Int", func(t *testing.T) {
+		v, err := NewVector([]int{1, 2, 3, 4, 6, 5})
+		assert.IsType(t, Vector{}, v)
+		assert.NoError(t, err)
+	})
+	t.Run("Float32", func(t *testing.T) {
+		v, err := NewVector([]float32{1.0, 2.1, 3.54, 4.9, 6.666666, 5.33333333})
+		assert.IsType(t, Vector{}, v)
+		assert.NoError(t, err)
+	})
+	t.Run("Float64", func(t *testing.T) {
+		v, err := NewVector([]float64{1.0, 2.1, 3.54, 4.9, 6.666666, 5.33333333})
+		assert.IsType(t, Vector{}, v)
+		assert.NoError(t, err)
+	})
+	t.Run("String", func(t *testing.T) {
+		v, err := NewVector([]string{"a", "b", "c"})
+		assert.IsType(t, Vector{}, v)
+		assert.Error(t, err)
+	})
+}