From 7a0b4e5d979a50042221e843179ce1c00a01312b Mon Sep 17 00:00:00 2001
From: swdee <shanon@bokumail.com>
Date: Sun, 22 Sep 2024 14:34:22 +1200
Subject: [PATCH] changed float16 ro float32 from lookup table to CGO call

---
 float16.go   | 33 +++++++++++++++++++++++++--------
 inference.go |  6 ++----
 2 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/float16.go b/float16.go
index a7c5cac..46cf697 100644
--- a/float16.go
+++ b/float16.go
@@ -1,13 +1,30 @@
 package rknnlite
 
-import "github.com/x448/float16"
+/*
+#cgo CFLAGS: -march=native -mtune=native -Ofast -flto
+#cgo LDFLAGS: -march=native -mtune=native -Ofast
 
-var f16LookupTable [65536]float32
+#include <stdint.h>
 
-func init() {
-	// precompute float16 lookup table for faster conversion to float32
-	for i := range f16LookupTable {
-		f16 := float16.Frombits(uint16(i))
-		f16LookupTable[i] = f16.Float32()
-	}
+void float16_to_float32_buffer(const uint16_t* input, float* output, size_t count) {
+    for (size_t i = 0; i < count; i++) {
+        _Float16 tmp = *(_Float16*)&input[i];
+        output[i] = (float)tmp;
+    }
+}
+
+*/
+import "C"
+import (
+	"unsafe"
+)
+
+// float16toFloat32Buffer takes a float16 and 32 buffer and converts it using
+// optimisation via C
+func float16ToFloat32Buffer(float16Buf []uint16, float32Buf []float32) {
+	C.float16_to_float32_buffer(
+		(*C.uint16_t)(unsafe.Pointer(&float16Buf[0])), // Pointer to the input buffer
+		(*C.float)(unsafe.Pointer(&float32Buf[0])),    // Pointer to the output buffer
+		C.size_t(len(float16Buf)),                     // Number of elements to convert
+	)
 }
diff --git a/inference.go b/inference.go
index b3f961c..e03c360 100644
--- a/inference.go
+++ b/inference.go
@@ -236,11 +236,9 @@ func (r *Runtime) GetOutputs(nOutputs uint32, wantFloat bool) (*Outputs, error)
 // convertFloat16BufferToFloat32 converts a float16 buffer to float32 as Go
 // has not support for FP16.
 func convertFloat16BufferToFloat32(float16Buf []uint16) []float32 {
-	float32Buf := make([]float32, len(float16Buf))
 
-	for i, val := range float16Buf {
-		float32Buf[i] = f16LookupTable[val]
-	}
+	float32Buf := make([]float32, len(float16Buf))
+	float16ToFloat32Buffer(float16Buf, float32Buf)
 
 	return float32Buf
 }