Files
go-rknnlite/inference.go

414 lines
10 KiB
Go

package rknnlite
/*
#include "rknn_api.h"
#include <stdlib.h>
#include <string.h>
*/
import "C"
import (
"fmt"
"gocv.io/x/gocv"
"sync"
"unsafe"
)
// Input represents the C.rknn_input struct and defines the Input used for
// inference
type Input struct {
// Index is the input index
Index uint32
// Buf is the gocv Mat input
Buf unsafe.Pointer
// Size is the number of bytes of Buf
Size uint32
// Passthrough defines the mode, if True the buf data is passed directly to
// the input node of the rknn model without any conversion. If False the
// buf data is converted into an input consistent with the model according
// to the following type and fmt
PassThrough bool
// Type is the data type of Buf. This is a required parameter if Passthrough
// is False
Type TensorType
// Fmt is the data format of Buf. This is a required parameter if Passthrough
// is False
Fmt TensorFormat
}
// Inference runs the model inference on the given inputs
func (r *Runtime) Inference(mats []gocv.Mat) (*Outputs, error) {
// convert the cv Mat's into RKNN inputs
inputs := make([]Input, len(mats))
for idx, mat := range mats {
// make mat continuous
if !mat.IsContinuous() {
mat = mat.Clone()
}
if r.inputTypeFloat32 {
// pass data as float32 to RKNN backend
data, err := mat.DataPtrFloat32()
if err != nil {
return &Outputs{}, fmt.Errorf("error getting data pointer to Mat: %w", err)
}
inputs[idx] = Input{
Index: uint32(idx),
Type: TensorFloat32,
// multiply by 4 for size of float32
Size: uint32(mat.Cols() * mat.Rows() * mat.Channels() * 4),
Fmt: TensorNHWC,
Buf: unsafe.Pointer(&data[0]),
PassThrough: false,
}
} else {
// pass data as uint8 to RKNN backend
data, err := mat.DataPtrUint8()
if err != nil {
return &Outputs{}, fmt.Errorf("error getting data pointer to Mat: %w", err)
}
inputs[idx] = Input{
Index: uint32(idx),
Type: TensorUint8,
Size: uint32(mat.Cols() * mat.Rows() * mat.Channels()),
Fmt: TensorNHWC,
Buf: unsafe.Pointer(&data[0]),
PassThrough: false,
}
}
}
// set the Inputs
err := r.SetInputs(inputs)
if err != nil {
return &Outputs{}, fmt.Errorf("error setting inputs: %w", err)
}
// run the model
err = r.RunModel()
if err != nil {
return &Outputs{}, fmt.Errorf("error running model: %w", err)
}
// get Outputs
return r.GetOutputs(r.ioNum.NumberOutput, r.wantFloat)
}
// setInputs wraps C.rknn_inputs_set
func (r *Runtime) SetInputs(inputs []Input) error {
nInputs := C.uint32_t(len(inputs))
// make a C array of inputs
cInputs := make([]C.rknn_input, len(inputs))
for i, input := range inputs {
cInputs[i].index = C.uint32_t(input.Index)
cInputs[i].buf = input.Buf
cInputs[i].size = C.uint32_t(input.Size)
cInputs[i].pass_through = C.uint8_t(0)
if input.PassThrough {
cInputs[i].pass_through = C.uint8_t(1)
}
cInputs[i]._type = C.rknn_tensor_type(input.Type)
cInputs[i].fmt = C.rknn_tensor_format(input.Fmt)
}
ret := C.rknn_inputs_set(r.ctx, nInputs, &cInputs[0])
if ret != 0 {
return fmt.Errorf("C.rknn_inputs_set failed with code %d, error: %s",
int(ret), ErrorCodes(ret).String())
}
return nil
}
// RunModel wraps C.rknn_run
func (r *Runtime) RunModel() error {
ret := C.rknn_run(r.ctx, nil)
if ret < 0 {
return fmt.Errorf("C.rknn_run failed with code %d, error: %s",
int(ret), ErrorCodes(ret).String())
}
return nil
}
// Output wraps C.rknn_output
type Output struct {
WantFloat uint8 // want transfer output data to float
IsPrealloc uint8 // whether buf is pre-allocated
Index uint32 // the output index
// the output buf cast to float32, when WantFloat = 1
// this is a slice header that points to C memory
BufFloat []float32
// the output buf cast to int8, when WantFloat = 0
// this is a slice header that points to C memory
BufInt []int8
Size uint32 // the size of output buf
}
// Outputs is a struct containing Go and C output data
type Outputs struct {
Output []Output
cOutputs []C.rknn_output
// freed is a flag to indicate if the cOutputs have been released from
// memory or not
freed bool
// mutex to lock access to freed variable
sync.Mutex
// rknn runtime instance
rt *Runtime
}
// GetOutputs returns the Output results
func (r *Runtime) GetOutputs(nOutputs uint32, wantFloat bool) (*Outputs, error) {
outputs := &Outputs{
Output: make([]Output, nOutputs),
cOutputs: make([]C.rknn_output, nOutputs),
rt: r,
}
// set want float for all outputs
useWantFloat := uint8(1)
if !wantFloat {
useWantFloat = 0
}
for idx := range outputs.cOutputs {
outputs.cOutputs[idx].index = C.uint32_t(idx)
outputs.cOutputs[idx].want_float = C.uint8_t(useWantFloat)
}
// call C function
ret := C.rknn_outputs_get(r.ctx, C.uint32_t(nOutputs),
(*C.rknn_output)(unsafe.Pointer(&outputs.cOutputs[0])), nil)
if ret < 0 {
return &Outputs{}, fmt.Errorf("C.rknn_outputs_get failed with code %d, error: %s",
int(ret), ErrorCodes(ret).String())
}
// convert C.rknn_output array back to Go Output array
for i, cOutput := range outputs.cOutputs {
outputs.Output[i] = Output{
WantFloat: uint8(cOutput.want_float),
IsPrealloc: uint8(cOutput.is_prealloc),
Index: uint32(cOutput.index),
Size: uint32(cOutput.size),
}
if outputs.Output[i].WantFloat == 1 {
// convert buffer to []float32
outputs.Output[i].BufFloat = (*[1 << 30]float32)(outputs.cOutputs[i].buf)[:outputs.cOutputs[i].size/4]
} else if outputs.Output[i].WantFloat == 0 {
// yolov8-pose has output tensors of int8 and fp16, so we need to
// handle the fp16 specially
if r.outputAttrs[i].Type == TensorFloat16 {
// convert float16 buffer to []float32
float16Buf := (*[1 << 30]uint16)(outputs.cOutputs[i].buf)[:outputs.cOutputs[i].size/2]
outputs.Output[i].BufFloat = convertFloat16BufferToFloat32(float16Buf)
} else {
// convert buffer to []int8
outputs.Output[i].BufInt = (*[1 << 30]int8)(outputs.cOutputs[i].buf)[:outputs.cOutputs[i].size]
}
}
}
return outputs, nil
}
// convertFloat16BufferToFloat32 converts a float16 buffer to float32 as Go
// has not support for FP16.
func convertFloat16BufferToFloat32(float16Buf []uint16) []float32 {
float32Buf := make([]float32, len(float16Buf))
float16ToFloat32Buffer(float16Buf, float32Buf)
return float32Buf
}
// Free C memory buffer holding RKNN inference outputs
func (o *Outputs) Free() error {
o.Lock()
defer o.Unlock()
if o.freed {
// C memory already released
return nil
}
o.freed = true
return o.rt.releaseOutputs(o.cOutputs)
}
// InputAttribute of trained model input tensor
type InputAttribute struct {
Width uint32
Height uint32
Channel uint32
}
// InputAttributes queries the Model and returns Input image dimensions
func (o *Outputs) InputAttributes() InputAttribute {
// set default vars where inputAttr is NCHW
channel := o.rt.inputAttrs[0].Dims[1]
height := o.rt.inputAttrs[0].Dims[2]
width := o.rt.inputAttrs[0].Dims[3]
if o.rt.inputAttrs[0].Fmt == TensorNHWC {
height = o.rt.inputAttrs[0].Dims[1]
width = o.rt.inputAttrs[0].Dims[2]
channel = o.rt.inputAttrs[0].Dims[3]
}
return InputAttribute{
Width: width,
Height: height,
Channel: channel,
}
}
// OutputAttribute of trained model output tensor
type OutputAttribute struct {
DimForDFL uint32
Scales []float32
ZPs []int32
DimHeights []uint32
DimWidths []uint32
IONumber uint32
}
// OutputAttributes returns the Model output attribute scales and zero points
func (o *Outputs) OutputAttributes() OutputAttribute {
data := OutputAttribute{
DimForDFL: o.rt.outputAttrs[0].Dims[1],
Scales: make([]float32, 0),
ZPs: make([]int32, 0),
DimHeights: make([]uint32, 0),
DimWidths: make([]uint32, 0),
IONumber: o.rt.ioNum.NumberOutput,
}
for i := 0; i < int(o.rt.ioNum.NumberOutput); i++ {
data.Scales = append(data.Scales, o.rt.outputAttrs[i].Scale)
data.ZPs = append(data.ZPs, o.rt.outputAttrs[i].ZP)
data.DimHeights = append(data.DimHeights, o.rt.outputAttrs[i].Dims[2])
data.DimWidths = append(data.DimWidths, o.rt.outputAttrs[i].Dims[3])
}
return data
}
// releaseOutputs releases the memory allocated for the outputs by the RKNN
// toolkit directly using C rknn_output structs
func (r *Runtime) releaseOutputs(cOutputs []C.rknn_output) error {
// directly use the C array of rknn_output obtained from getOutputs or similar.
outputsPtr := (*C.rknn_output)(unsafe.Pointer(&cOutputs[0]))
// call C.rknn_outputs_release with the context and the outputs pointer
ret := C.rknn_outputs_release(r.ctx, C.uint32_t(len(cOutputs)), outputsPtr)
if ret != 0 {
return fmt.Errorf("C.rknn_outputs_release failed with code %d, error: %s",
ret, ErrorCodes(ret).String())
}
return nil
}
type Probability struct {
LabelIndex int32
Probability float32
}
// GetTop5 outputs the Top5 matches in the model, with left column as label
// index and right column the match probability. The results are returned
// in the Probability slice in descending order from top match.
func GetTop5(outputs []Output) []Probability {
probs := make([]Probability, 5)
for i := 0; i < len(outputs); i++ {
var MaxClass [5]int32
var fMaxProb [5]float32
GetTop(outputs[i].BufFloat, fMaxProb[:], MaxClass[:], int32(len(outputs[i].BufFloat)), 5)
for i := 0; i < 5; i++ {
probs[i] = Probability{
LabelIndex: MaxClass[i],
Probability: fMaxProb[i],
}
}
}
return probs
}
const MAX_TOP_NUM = 20
// GetTop takes outputs and produces a top list of matches by probability
func GetTop(pfProb []float32, pfMaxProb []float32, pMaxClass []int32,
outputCount int32, topNum int32) int {
if topNum > MAX_TOP_NUM {
return 0
}
// initialize pfMaxProb with default values, ie: 0
for j := range pfMaxProb {
pfMaxProb[j] = 0
}
// initialize pMaxClass with default values, ie: -1
for j := range pMaxClass {
pMaxClass[j] = -1
}
for j := int32(0); j < topNum; j++ {
for i := int32(0); i < outputCount; i++ {
// skip if the current class is already in the top list
skip := false
for k := 0; k < len(pMaxClass); k++ {
if i == pMaxClass[k] {
skip = true
break
}
}
if skip {
continue
}
// if the current probability is greater than the j'th max
// probability, update pfMaxProb and pMaxClass
if pfProb[i] > pfMaxProb[j] && pfProb[i] > 0.000001 {
pfMaxProb[j] = pfProb[i]
pMaxClass[j] = i
}
}
}
return 1
}