From 0ef58ce7e9a32b883f34c5c437e8a6d29f453b52 Mon Sep 17 00:00:00 2001
From: Chad Kunde <Kunde21@gmail.com>
Date: Sat, 21 May 2016 14:14:54 -0700
Subject: [PATCH] f64 lnorm implementations for L=1 (L1norm) and L=inf
 (LinfNorm) with tests

---
 asm/f64/l1norm_amd64.s |  22 +++----
 asm/f64/linfnorm_amd.s |  47 +++++++++++++++
 asm/f64/stubs_amd64.go |   2 +
 asm/f64/stubs_test.go  | 131 +++++++++++++++++++++++++++++++++++++----
 4 files changed, 181 insertions(+), 21 deletions(-)
 create mode 100644 asm/f64/linfnorm_amd.s

diff --git a/asm/f64/l1norm_amd64.s b/asm/f64/l1norm_amd64.s
index 786eab41..472eed99 100644
--- a/asm/f64/l1norm_amd64.s
+++ b/asm/f64/l1norm_amd64.s
@@ -13,14 +13,14 @@ TEXT ·L1norm(SB), NOSPLIT, $0
 	CMOVQLE t_len+32(FP), DX
 	PXOR    X3, X3
 	XORQ	AX, AX
-	CMPQ    DX, $0
-	JE      l1_end
 	CMPQ    DX, $1
-	JL      l1_tail
+	JL      l1_end
+	SUBQ	$1, DX
+	JE	l1_tail
 l1_loop:
 	MOVUPS  (SI)(AX*8), X0
 	MOVUPS  (DI)(AX*8), X1
-	MOVAPS  X0,X2
+	MOVAPS  X0, X2
 	SUBPD   X1, X0
 	SUBPD   X2, X1
 	MAXPD   X1, X0
@@ -28,20 +28,20 @@ l1_loop:
 	ADDQ	$2, AX
 	CMPQ    AX, DX
 	JL	l1_loop
-	JE      l1_end
+	JG      l1_end
 l1_tail:
 	PXOR    X0 ,X0
 	PXOR    X1 ,X1
 	MOVSD   (SI)(AX*8), X0
 	MOVSD   (DI)(AX*8), X1
-	MOVUPS  X0, X2
-	SUBPD   X1, X0
-	SUBPD   X2, X1
-	MAXPD   X1, X0
-	ADDPD   X0, X3
+	MOVAPD  X0, X2
+	SUBSD   X1, X0
+	SUBSD   X2, X1
+	MAXSD   X1, X0
+	ADDSD   X0, X3
 l1_end:
 	MOVAPS  X3, X2
-	SHUFPD  $1, X3, X2
+	SHUFPD  $1, X2, X2
 	ADDSD   X3, X2
 	MOVSD   X2, ret+48(FP)
 	RET
diff --git a/asm/f64/linfnorm_amd.s b/asm/f64/linfnorm_amd.s
new file mode 100644
index 00000000..76c6eb1b
--- /dev/null
+++ b/asm/f64/linfnorm_amd.s
@@ -0,0 +1,47 @@
+// Copyright ©2016 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+	
+//func LinfNorm(s, t []float64) float64
+TEXT ·LinfNorm(SB), NOSPLIT, $0
+	MOVQ    s_base+0(FP), DI
+	MOVQ    t_base+24(FP), SI
+	MOVQ    s_len+8(FP), DX
+	CMPQ    t_len+32(FP), DX
+	CMOVQLE t_len+32(FP), DX
+	PXOR    X3, X3
+	XORQ	AX, AX
+	CMPQ    DX, $1
+	JL      l1_end
+	SUBQ	$1, DX
+	JE	l1_tail
+l1_loop:
+	MOVUPS  (SI)(AX*8), X0
+	MOVUPS  (DI)(AX*8), X1
+	MOVAPS  X0, X2
+	SUBPD   X1, X0
+	SUBPD   X2, X1
+	MAXPD   X1, X0
+	MAXPD   X0, X3
+	ADDQ	$2, AX
+	CMPQ    AX, DX
+	JL	l1_loop
+	JG      l1_end
+l1_tail:
+	PXOR    X0 ,X0
+	PXOR    X1 ,X1
+	MOVSD   (SI)(AX*8), X0
+	MOVSD   (DI)(AX*8), X1
+	MOVAPD  X0, X2
+	SUBSD   X1, X0
+	SUBSD   X2, X1
+	MAXSD   X1, X0
+	MAXSD   X0, X3
+l1_end:
+	MOVAPS  X3, X2
+	SHUFPD  $1, X2, X2
+	MAXSD   X3, X2
+	MOVSD   X2, ret+48(FP)
+	RET
diff --git a/asm/f64/stubs_amd64.go b/asm/f64/stubs_amd64.go
index 4f64d893..53d8b74b 100644
--- a/asm/f64/stubs_amd64.go
+++ b/asm/f64/stubs_amd64.go
@@ -32,6 +32,8 @@ func DotInc(x, y []float64, n, incX, incY, ix, iy uintptr) (sum float64)
 
 func L1norm(s, t []float64) float64
 
+func LinfNorm(s, t []float64) float64
+
 func ScalUnitary(alpha float64, x []float64)
 
 func ScalUnitaryTo(dst []float64, alpha float64, x []float64)
diff --git a/asm/f64/stubs_test.go b/asm/f64/stubs_test.go
index 583a4915..4a10f35e 100644
--- a/asm/f64/stubs_test.go
+++ b/asm/f64/stubs_test.go
@@ -6,15 +6,22 @@ package f64
 
 import (
 	"math"
+	"runtime"
 	"testing"
 )
 
-var nan, inf, ninf float64
+var (
+	nan, inf, ninf float64
+)
 
 func init() {
 	nan, inf, ninf = math.NaN(), math.Inf(1), math.Inf(-1)
 }
 
+func diff(a, b float64) bool {
+	return a != b && !math.IsNaN(a) && !math.IsNaN(b) || (math.IsNaN(a) != math.IsNaN(b))
+}
+
 func TestAdd(t *testing.T) {
 	for j, v := range []struct {
 		dst, src, expect []float64
@@ -32,13 +39,14 @@ func TestAdd(t *testing.T) {
 	} {
 		Add(v.dst, v.src)
 		for i := range v.expect {
-			if v.dst[i] != v.expect[i] && (math.IsNaN(v.dst[i]) != math.IsNaN(v.expect[i])) {
+			if diff(v.dst[i], v.expect[i]) {
 
 				t.Log("Test", j, "Add error at", i, "Got:", v.dst[i], "Expected:", v.expect[i])
 				t.Fail()
 			}
 		}
 	}
+	runtime.GC()
 }
 
 func TestAddConst(t *testing.T) {
@@ -54,12 +62,13 @@ func TestAddConst(t *testing.T) {
 	} {
 		AddConst(v.alpha, v.src)
 		for i := range v.expect {
-			if v.src[i] != v.expect[i] && (math.IsNaN(v.src[i]) != math.IsNaN(v.expect[i])) {
+			if diff(v.src[i], v.expect[i]) {
 				t.Log("Test", j, "AddConst error at", i, "Got:", v.src[i], "Expected:", v.expect[i])
 				t.Fail()
 			}
 		}
 	}
+	runtime.GC()
 }
 
 func TestCumSum(t *testing.T) {
@@ -80,26 +89,26 @@ func TestCumSum(t *testing.T) {
 	} {
 		ret := CumSum(v.dst, v.src)
 		for i := range v.expect {
-			if ret[i] != v.expect[i] && (math.IsNaN(ret[i]) != math.IsNaN(v.expect[i])) {
+			if diff(ret[i], v.expect[i]) {
 				t.Log("Test", j, "CumSum error at", i, "Got:", ret[i], "Expected:", v.expect[i])
 				t.Fail()
 			}
-			if ret[i] != v.dst[i] && (math.IsNaN(ret[i]) != math.IsNaN(v.dst[i])) {
+			if diff(ret[i], v.dst[i]) {
 				t.Log("Test", j, "CumSum ret/dst mismatch", i, "Ret:", ret[i], "Dst:", v.dst[i])
 				t.Fail()
 			}
 		}
 	}
+	runtime.GC()
 }
 
 func TestCumProd(t *testing.T) {
-	nan, inf, ninf := math.NaN(), math.Inf(1), math.Inf(-1)
 	for j, v := range []struct {
 		dst, src, expect []float64
 	}{
 		{[]float64{1}, []float64{1}, []float64{1}},
 		{[]float64{nan}, []float64{nan}, []float64{nan}},
-		{[]float64{0, 0, 0, 0}, []float64{1, 2, 3, 4}, []float64{1, 2, 6, 12}},
+		{[]float64{0, 0, 0, 0}, []float64{1, 2, 3, 4}, []float64{1, 2, 6, 24}},
 		{[]float64{0, 0, 0}, []float64{1, 2, 3, 4}, []float64{1, 2, 6}},
 		{[]float64{0, 0, 0, 0}, []float64{1, 2, 3}, []float64{1, 2, 6}},
 		{[]float64{nan, 1, nan, 1, 0},
@@ -111,16 +120,118 @@ func TestCumProd(t *testing.T) {
 	} {
 		ret := CumProd(v.dst, v.src)
 		for i := range v.expect {
-			if ret[i] != v.expect[i] && (math.IsNaN(ret[i]) != math.IsNaN(v.expect[i])) {
+			if diff(ret[i], v.expect[i]) {
 				t.Log("Test", j, "CumProd error at", i, "Got:", ret[i], "Expected:", v.expect[i])
 				t.Fail()
 			}
-			if ret[i] != v.dst[i] && (math.IsNaN(ret[i]) != math.IsNaN(v.dst[i])) {
+			if diff(ret[i], v.dst[i]) {
 				t.Log("Test", j, "CumProd ret/dst mismatch", i, "Ret:", ret[i], "Dst:", v.dst[i])
 				t.Fail()
 			}
 		}
 	}
+	runtime.GC()
 }
 
-//func TestDiv
+func TestDiv(t *testing.T) {
+	for j, v := range []struct {
+		dst, src, expect []float64
+	}{
+		{[]float64{1}, []float64{1}, []float64{1}},
+		{[]float64{nan}, []float64{nan}, []float64{nan}},
+		{[]float64{1, 2, 3, 4}, []float64{1, 2, 3, 4}, []float64{1, 1, 1, 1}},
+		{[]float64{2, 4, 6}, []float64{1, 2, 3, 4}, []float64{2, 2, 2}},
+		{[]float64{0, 0, 0, 0}, []float64{1, 2, 3}, []float64{0, 0, 0}},
+		{[]float64{nan, 1, nan, 1, 0},
+			[]float64{1, 1, nan, 1, 1},
+			[]float64{nan, 1, nan, 1, 0}},
+		{[]float64{inf, 4, nan, ninf, 9},
+			[]float64{inf, 4, nan, ninf, 3},
+			[]float64{nan, 1, nan, nan, 3}},
+	} {
+		Div(v.dst, v.src)
+		for i := range v.expect {
+			if diff(v.dst[i], v.expect[i]) {
+				t.Log("Test", j, "Div error at", i, "Got:", v.dst[i], "Expected:", v.expect[i])
+				t.Fail()
+			}
+		}
+	}
+	runtime.GC()
+}
+
+func TestDivTo(t *testing.T) {
+	for j, v := range []struct {
+		dst, src, expect []float64
+	}{
+		{[]float64{1}, []float64{1}, []float64{1}},
+		{[]float64{nan}, []float64{nan}, []float64{nan}},
+		{[]float64{1, 2, 3, 4}, []float64{1, 2, 3, 4}, []float64{1, 1, 1, 1}},
+		{[]float64{2, 4, 6}, []float64{1, 2, 3, 4}, []float64{2, 2, 2}},
+		{[]float64{0, 0, 0, 0}, []float64{1, 2, 3}, []float64{0, 0, 0}},
+		{[]float64{nan, 1, nan, 1, 0},
+			[]float64{1, 1, nan, 1, 1},
+			[]float64{nan, 1, nan, 1, 0}},
+		{[]float64{inf, 4, nan, ninf, 9},
+			[]float64{inf, 4, nan, ninf, 3},
+			[]float64{nan, 1, nan, nan, 3}},
+	} {
+		ret := DivTo(v.dst, v.dst, v.src)
+		for i := range v.expect {
+			if diff(ret[i], v.expect[i]) {
+				t.Log("Test", j, "DivTo error at", i, "Got:", v.dst[i], "Expected:", v.expect[i])
+				t.Fail()
+			}
+			if diff(ret[i], v.dst[i]) {
+				t.Log("Test", j, "DivTo ret/dst mismatch", i, "Ret:", ret[i], "Dst:", v.dst[i])
+				t.Fail()
+			}
+		}
+	}
+	runtime.GC()
+}
+
+func TestL1norm(t *testing.T) {
+	for j, v := range []struct {
+		s, t   []float64
+		expect float64
+	}{
+		{[]float64{1}, []float64{1}, 0},
+		{[]float64{nan}, []float64{nan}, nan},
+		{[]float64{1, 2, 3, 4}, []float64{1, 2, 3, 4}, 0},
+		{[]float64{2, 4, 6}, []float64{1, 2, 3, 4}, 6},
+		{[]float64{0, 0, 0, 0}, []float64{1, 2, 3}, 6},
+		{[]float64{0, -4, -10, 0}, []float64{1, 2, 3}, 20},
+		{[]float64{0, 1, 0, 1, 0}, []float64{1, 1, inf, 1, 1}, inf},
+		{[]float64{inf, 4, nan, ninf, 9}, []float64{inf, 4, nan, ninf, 3}, nan},
+	} {
+		ret := L1norm(v.s, v.t)
+		if diff(ret, v.expect) {
+			t.Log("Test", j, "L1norm error. Got:", ret, "Expected:", v.expect)
+			t.Fail()
+		}
+	}
+	runtime.GC()
+}
+
+func TestLinfNorm(t *testing.T) {
+	for j, v := range []struct {
+		s, t   []float64
+		expect float64
+	}{
+		{[]float64{1}, []float64{1}, 0},
+		{[]float64{nan}, []float64{nan}, nan},
+		{[]float64{1, 2, 3, 4}, []float64{1, 2, 3, 4}, 0},
+		{[]float64{2, 4, 6}, []float64{1, 2, 3, 4}, 3},
+		{[]float64{0, 0, 0, 0}, []float64{1, 2, 3}, 3},
+		{[]float64{0, 1, 0, 1, 0}, []float64{1, 1, inf, 1, 1}, inf},
+		{[]float64{inf, 4, nan, ninf, 9}, []float64{inf, 4, nan, ninf, 3}, 6},
+	} {
+		ret := LinfNorm(v.s, v.t)
+		if diff(ret, v.expect) {
+			t.Log("Test", j, "LinfNorm error. Got:", ret, "Expected:", v.expect)
+			t.Fail()
+		}
+	}
+	runtime.GC()
+}