mirror of
https://github.com/swdee/go-rknnlite.git
synced 2025-09-26 19:31:12 +08:00
changed float16 ro float32 from lookup table to CGO call
This commit is contained in:
33
float16.go
33
float16.go
@@ -1,13 +1,30 @@
|
||||
package rknnlite
|
||||
|
||||
import "github.com/x448/float16"
|
||||
/*
|
||||
#cgo CFLAGS: -march=native -mtune=native -Ofast -flto
|
||||
#cgo LDFLAGS: -march=native -mtune=native -Ofast
|
||||
|
||||
var f16LookupTable [65536]float32
|
||||
#include <stdint.h>
|
||||
|
||||
func init() {
|
||||
// precompute float16 lookup table for faster conversion to float32
|
||||
for i := range f16LookupTable {
|
||||
f16 := float16.Frombits(uint16(i))
|
||||
f16LookupTable[i] = f16.Float32()
|
||||
}
|
||||
void float16_to_float32_buffer(const uint16_t* input, float* output, size_t count) {
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
_Float16 tmp = *(_Float16*)&input[i];
|
||||
output[i] = (float)tmp;
|
||||
}
|
||||
}
|
||||
|
||||
*/
|
||||
import "C"
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// float16toFloat32Buffer takes a float16 and 32 buffer and converts it using
|
||||
// optimisation via C
|
||||
func float16ToFloat32Buffer(float16Buf []uint16, float32Buf []float32) {
|
||||
C.float16_to_float32_buffer(
|
||||
(*C.uint16_t)(unsafe.Pointer(&float16Buf[0])), // Pointer to the input buffer
|
||||
(*C.float)(unsafe.Pointer(&float32Buf[0])), // Pointer to the output buffer
|
||||
C.size_t(len(float16Buf)), // Number of elements to convert
|
||||
)
|
||||
}
|
||||
|
@@ -236,11 +236,9 @@ func (r *Runtime) GetOutputs(nOutputs uint32, wantFloat bool) (*Outputs, error)
|
||||
// convertFloat16BufferToFloat32 converts a float16 buffer to float32 as Go
|
||||
// has not support for FP16.
|
||||
func convertFloat16BufferToFloat32(float16Buf []uint16) []float32 {
|
||||
float32Buf := make([]float32, len(float16Buf))
|
||||
|
||||
for i, val := range float16Buf {
|
||||
float32Buf[i] = f16LookupTable[val]
|
||||
}
|
||||
float32Buf := make([]float32, len(float16Buf))
|
||||
float16ToFloat32Buffer(float16Buf, float32Buf)
|
||||
|
||||
return float32Buf
|
||||
}
|
||||
|
Reference in New Issue
Block a user