From 0ef58ce7e9a32b883f34c5c437e8a6d29f453b52 Mon Sep 17 00:00:00 2001 From: Chad Kunde Date: Sat, 21 May 2016 14:14:54 -0700 Subject: [PATCH] f64 lnorm implementations for L=1 (L1norm) and L=inf (LinfNorm) with tests --- asm/f64/l1norm_amd64.s | 22 +++---- asm/f64/linfnorm_amd.s | 47 +++++++++++++++ asm/f64/stubs_amd64.go | 2 + asm/f64/stubs_test.go | 131 +++++++++++++++++++++++++++++++++++++---- 4 files changed, 181 insertions(+), 21 deletions(-) create mode 100644 asm/f64/linfnorm_amd.s diff --git a/asm/f64/l1norm_amd64.s b/asm/f64/l1norm_amd64.s index 786eab41..472eed99 100644 --- a/asm/f64/l1norm_amd64.s +++ b/asm/f64/l1norm_amd64.s @@ -13,14 +13,14 @@ TEXT ·L1norm(SB), NOSPLIT, $0 CMOVQLE t_len+32(FP), DX PXOR X3, X3 XORQ AX, AX - CMPQ DX, $0 - JE l1_end CMPQ DX, $1 - JL l1_tail + JL l1_end + SUBQ $1, DX + JE l1_tail l1_loop: MOVUPS (SI)(AX*8), X0 MOVUPS (DI)(AX*8), X1 - MOVAPS X0,X2 + MOVAPS X0, X2 SUBPD X1, X0 SUBPD X2, X1 MAXPD X1, X0 @@ -28,20 +28,20 @@ l1_loop: ADDQ $2, AX CMPQ AX, DX JL l1_loop - JE l1_end + JG l1_end l1_tail: PXOR X0 ,X0 PXOR X1 ,X1 MOVSD (SI)(AX*8), X0 MOVSD (DI)(AX*8), X1 - MOVUPS X0, X2 - SUBPD X1, X0 - SUBPD X2, X1 - MAXPD X1, X0 - ADDPD X0, X3 + MOVAPD X0, X2 + SUBSD X1, X0 + SUBSD X2, X1 + MAXSD X1, X0 + ADDSD X0, X3 l1_end: MOVAPS X3, X2 - SHUFPD $1, X3, X2 + SHUFPD $1, X2, X2 ADDSD X3, X2 MOVSD X2, ret+48(FP) RET diff --git a/asm/f64/linfnorm_amd.s b/asm/f64/linfnorm_amd.s new file mode 100644 index 00000000..76c6eb1b --- /dev/null +++ b/asm/f64/linfnorm_amd.s @@ -0,0 +1,47 @@ +// Copyright ©2016 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +//func LinfNorm(s, t []float64) float64 +TEXT ·LinfNorm(SB), NOSPLIT, $0 + MOVQ s_base+0(FP), DI + MOVQ t_base+24(FP), SI + MOVQ s_len+8(FP), DX + CMPQ t_len+32(FP), DX + CMOVQLE t_len+32(FP), DX + PXOR X3, X3 + XORQ AX, AX + CMPQ DX, $1 + JL l1_end + SUBQ $1, DX + JE l1_tail +l1_loop: + MOVUPS (SI)(AX*8), X0 + MOVUPS (DI)(AX*8), X1 + MOVAPS X0, X2 + SUBPD X1, X0 + SUBPD X2, X1 + MAXPD X1, X0 + MAXPD X0, X3 + ADDQ $2, AX + CMPQ AX, DX + JL l1_loop + JG l1_end +l1_tail: + PXOR X0 ,X0 + PXOR X1 ,X1 + MOVSD (SI)(AX*8), X0 + MOVSD (DI)(AX*8), X1 + MOVAPD X0, X2 + SUBSD X1, X0 + SUBSD X2, X1 + MAXSD X1, X0 + MAXSD X0, X3 +l1_end: + MOVAPS X3, X2 + SHUFPD $1, X2, X2 + MAXSD X3, X2 + MOVSD X2, ret+48(FP) + RET diff --git a/asm/f64/stubs_amd64.go b/asm/f64/stubs_amd64.go index 4f64d893..53d8b74b 100644 --- a/asm/f64/stubs_amd64.go +++ b/asm/f64/stubs_amd64.go @@ -32,6 +32,8 @@ func DotInc(x, y []float64, n, incX, incY, ix, iy uintptr) (sum float64) func L1norm(s, t []float64) float64 +func LinfNorm(s, t []float64) float64 + func ScalUnitary(alpha float64, x []float64) func ScalUnitaryTo(dst []float64, alpha float64, x []float64) diff --git a/asm/f64/stubs_test.go b/asm/f64/stubs_test.go index 583a4915..4a10f35e 100644 --- a/asm/f64/stubs_test.go +++ b/asm/f64/stubs_test.go @@ -6,15 +6,22 @@ package f64 import ( "math" + "runtime" "testing" ) -var nan, inf, ninf float64 +var ( + nan, inf, ninf float64 +) func init() { nan, inf, ninf = math.NaN(), math.Inf(1), math.Inf(-1) } +func diff(a, b float64) bool { + return a != b && !math.IsNaN(a) && !math.IsNaN(b) || (math.IsNaN(a) != math.IsNaN(b)) +} + func TestAdd(t *testing.T) { for j, v := range []struct { dst, src, expect []float64 @@ -32,13 +39,14 @@ func TestAdd(t *testing.T) { } { Add(v.dst, v.src) for i := range v.expect { - if v.dst[i] != v.expect[i] && (math.IsNaN(v.dst[i]) != math.IsNaN(v.expect[i])) { + if diff(v.dst[i], v.expect[i]) { t.Log("Test", j, "Add error at", i, "Got:", v.dst[i], "Expected:", v.expect[i]) t.Fail() } } } + runtime.GC() } func TestAddConst(t *testing.T) { @@ -54,12 +62,13 @@ func TestAddConst(t *testing.T) { } { AddConst(v.alpha, v.src) for i := range v.expect { - if v.src[i] != v.expect[i] && (math.IsNaN(v.src[i]) != math.IsNaN(v.expect[i])) { + if diff(v.src[i], v.expect[i]) { t.Log("Test", j, "AddConst error at", i, "Got:", v.src[i], "Expected:", v.expect[i]) t.Fail() } } } + runtime.GC() } func TestCumSum(t *testing.T) { @@ -80,26 +89,26 @@ func TestCumSum(t *testing.T) { } { ret := CumSum(v.dst, v.src) for i := range v.expect { - if ret[i] != v.expect[i] && (math.IsNaN(ret[i]) != math.IsNaN(v.expect[i])) { + if diff(ret[i], v.expect[i]) { t.Log("Test", j, "CumSum error at", i, "Got:", ret[i], "Expected:", v.expect[i]) t.Fail() } - if ret[i] != v.dst[i] && (math.IsNaN(ret[i]) != math.IsNaN(v.dst[i])) { + if diff(ret[i], v.dst[i]) { t.Log("Test", j, "CumSum ret/dst mismatch", i, "Ret:", ret[i], "Dst:", v.dst[i]) t.Fail() } } } + runtime.GC() } func TestCumProd(t *testing.T) { - nan, inf, ninf := math.NaN(), math.Inf(1), math.Inf(-1) for j, v := range []struct { dst, src, expect []float64 }{ {[]float64{1}, []float64{1}, []float64{1}}, {[]float64{nan}, []float64{nan}, []float64{nan}}, - {[]float64{0, 0, 0, 0}, []float64{1, 2, 3, 4}, []float64{1, 2, 6, 12}}, + {[]float64{0, 0, 0, 0}, []float64{1, 2, 3, 4}, []float64{1, 2, 6, 24}}, {[]float64{0, 0, 0}, []float64{1, 2, 3, 4}, []float64{1, 2, 6}}, {[]float64{0, 0, 0, 0}, []float64{1, 2, 3}, []float64{1, 2, 6}}, {[]float64{nan, 1, nan, 1, 0}, @@ -111,16 +120,118 @@ func TestCumProd(t *testing.T) { } { ret := CumProd(v.dst, v.src) for i := range v.expect { - if ret[i] != v.expect[i] && (math.IsNaN(ret[i]) != math.IsNaN(v.expect[i])) { + if diff(ret[i], v.expect[i]) { t.Log("Test", j, "CumProd error at", i, "Got:", ret[i], "Expected:", v.expect[i]) t.Fail() } - if ret[i] != v.dst[i] && (math.IsNaN(ret[i]) != math.IsNaN(v.dst[i])) { + if diff(ret[i], v.dst[i]) { t.Log("Test", j, "CumProd ret/dst mismatch", i, "Ret:", ret[i], "Dst:", v.dst[i]) t.Fail() } } } + runtime.GC() } -//func TestDiv +func TestDiv(t *testing.T) { + for j, v := range []struct { + dst, src, expect []float64 + }{ + {[]float64{1}, []float64{1}, []float64{1}}, + {[]float64{nan}, []float64{nan}, []float64{nan}}, + {[]float64{1, 2, 3, 4}, []float64{1, 2, 3, 4}, []float64{1, 1, 1, 1}}, + {[]float64{2, 4, 6}, []float64{1, 2, 3, 4}, []float64{2, 2, 2}}, + {[]float64{0, 0, 0, 0}, []float64{1, 2, 3}, []float64{0, 0, 0}}, + {[]float64{nan, 1, nan, 1, 0}, + []float64{1, 1, nan, 1, 1}, + []float64{nan, 1, nan, 1, 0}}, + {[]float64{inf, 4, nan, ninf, 9}, + []float64{inf, 4, nan, ninf, 3}, + []float64{nan, 1, nan, nan, 3}}, + } { + Div(v.dst, v.src) + for i := range v.expect { + if diff(v.dst[i], v.expect[i]) { + t.Log("Test", j, "Div error at", i, "Got:", v.dst[i], "Expected:", v.expect[i]) + t.Fail() + } + } + } + runtime.GC() +} + +func TestDivTo(t *testing.T) { + for j, v := range []struct { + dst, src, expect []float64 + }{ + {[]float64{1}, []float64{1}, []float64{1}}, + {[]float64{nan}, []float64{nan}, []float64{nan}}, + {[]float64{1, 2, 3, 4}, []float64{1, 2, 3, 4}, []float64{1, 1, 1, 1}}, + {[]float64{2, 4, 6}, []float64{1, 2, 3, 4}, []float64{2, 2, 2}}, + {[]float64{0, 0, 0, 0}, []float64{1, 2, 3}, []float64{0, 0, 0}}, + {[]float64{nan, 1, nan, 1, 0}, + []float64{1, 1, nan, 1, 1}, + []float64{nan, 1, nan, 1, 0}}, + {[]float64{inf, 4, nan, ninf, 9}, + []float64{inf, 4, nan, ninf, 3}, + []float64{nan, 1, nan, nan, 3}}, + } { + ret := DivTo(v.dst, v.dst, v.src) + for i := range v.expect { + if diff(ret[i], v.expect[i]) { + t.Log("Test", j, "DivTo error at", i, "Got:", v.dst[i], "Expected:", v.expect[i]) + t.Fail() + } + if diff(ret[i], v.dst[i]) { + t.Log("Test", j, "DivTo ret/dst mismatch", i, "Ret:", ret[i], "Dst:", v.dst[i]) + t.Fail() + } + } + } + runtime.GC() +} + +func TestL1norm(t *testing.T) { + for j, v := range []struct { + s, t []float64 + expect float64 + }{ + {[]float64{1}, []float64{1}, 0}, + {[]float64{nan}, []float64{nan}, nan}, + {[]float64{1, 2, 3, 4}, []float64{1, 2, 3, 4}, 0}, + {[]float64{2, 4, 6}, []float64{1, 2, 3, 4}, 6}, + {[]float64{0, 0, 0, 0}, []float64{1, 2, 3}, 6}, + {[]float64{0, -4, -10, 0}, []float64{1, 2, 3}, 20}, + {[]float64{0, 1, 0, 1, 0}, []float64{1, 1, inf, 1, 1}, inf}, + {[]float64{inf, 4, nan, ninf, 9}, []float64{inf, 4, nan, ninf, 3}, nan}, + } { + ret := L1norm(v.s, v.t) + if diff(ret, v.expect) { + t.Log("Test", j, "L1norm error. Got:", ret, "Expected:", v.expect) + t.Fail() + } + } + runtime.GC() +} + +func TestLinfNorm(t *testing.T) { + for j, v := range []struct { + s, t []float64 + expect float64 + }{ + {[]float64{1}, []float64{1}, 0}, + {[]float64{nan}, []float64{nan}, nan}, + {[]float64{1, 2, 3, 4}, []float64{1, 2, 3, 4}, 0}, + {[]float64{2, 4, 6}, []float64{1, 2, 3, 4}, 3}, + {[]float64{0, 0, 0, 0}, []float64{1, 2, 3}, 3}, + {[]float64{0, 1, 0, 1, 0}, []float64{1, 1, inf, 1, 1}, inf}, + {[]float64{inf, 4, nan, ninf, 9}, []float64{inf, 4, nan, ninf, 3}, 6}, + } { + ret := LinfNorm(v.s, v.t) + if diff(ret, v.expect) { + t.Log("Test", j, "LinfNorm error. Got:", ret, "Expected:", v.expect) + t.Fail() + } + } + runtime.GC() +}