mirror of
https://github.com/Ascend/ascend-docker-runtime.git
synced 2025-10-22 05:49:28 +08:00
179 lines
5.6 KiB
Go
179 lines
5.6 KiB
Go
// Package dcmi is used to work with Ascend devices
|
||
/*
|
||
* Copyright(C) Huawei Technologies Co.,Ltd. 2020-2021. All rights reserved.
|
||
*/
|
||
package dcmi
|
||
|
||
// #cgo LDFLAGS: -ldl
|
||
// #include "dcmi_interface_api.h"
|
||
import "C"
|
||
import (
|
||
"fmt"
|
||
"math"
|
||
)
|
||
|
||
const (
|
||
// RetError return error when the function failed
|
||
retError = -1
|
||
|
||
// dcmiMaxVdevNum is max number of vdevice, value is from driver specification
|
||
dcmiMaxVdevNum = 16
|
||
|
||
// maxErrorCodeCount is the max number of error code
|
||
hiAIMaxCardNum = 16
|
||
coreNumLen = 32
|
||
deviceNum = 4294967295 // vfg_id表示指定虚拟设备所属的虚拟分组ID,默认自动分配,默认值为0xFFFFFFFF,转换成10进制为4294967295。
|
||
)
|
||
|
||
// NpuWorker Dcmi worker
|
||
type NpuWorker struct {
|
||
}
|
||
|
||
// Initialize dcmi lib init
|
||
func (w *NpuWorker) Initialize() error {
|
||
if err := C.dcmiInit_dl(); err != C.SUCCESS {
|
||
errInfo := fmt.Errorf("dcmi lib load failed, , error code: %d", int32(err))
|
||
return errInfo
|
||
}
|
||
if err := C.dcmi_init(); err != C.SUCCESS {
|
||
errInfo := fmt.Errorf("dcmi init failed, , error code: %d", int32(err))
|
||
return errInfo
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// ShutDown shutdown dcmi lib
|
||
func (w *NpuWorker) ShutDown() {
|
||
if err := C.dcmiShutDown(); err != C.SUCCESS {
|
||
println(fmt.Errorf("dcmi shut down failed, error code: %d", int32(err)))
|
||
}
|
||
}
|
||
|
||
// GetCardList list all cards on system
|
||
func GetCardList() (int32, []int32, error) {
|
||
var ids [hiAIMaxCardNum]C.int
|
||
var cNum C.int
|
||
if err := C.dcmi_get_card_num_list(&cNum, &ids[0], hiAIMaxCardNum); err != 0 {
|
||
errInfo := fmt.Errorf("get card list failed, error code: %d", int32(err))
|
||
return retError, nil, errInfo
|
||
}
|
||
// checking card's quantity
|
||
if cNum <= 0 || cNum > hiAIMaxCardNum {
|
||
errInfo := fmt.Errorf("get error card quantity: %d", int32(cNum))
|
||
return retError, nil, errInfo
|
||
}
|
||
var cardNum = int32(cNum)
|
||
var cardIDList []int32
|
||
for i := int32(0); i < cardNum && i < hiAIMaxCardNum; i++ {
|
||
cardID := int32(ids[i])
|
||
if cardID < 0 {
|
||
continue
|
||
}
|
||
cardIDList = append(cardIDList, cardID)
|
||
}
|
||
return cardNum, cardIDList, nil
|
||
}
|
||
|
||
// GetDeviceNumInCard get device number in the npu card
|
||
func GetDeviceNumInCard(cardID int32) (int32, error) {
|
||
var deviceNum C.int
|
||
if err := C.dcmi_get_device_num_in_card(C.int(cardID), &deviceNum); err != 0 {
|
||
errInfo := fmt.Errorf("get device count on the card failed, error code: %d", int32(err))
|
||
return retError, errInfo
|
||
}
|
||
if deviceNum <= 0 {
|
||
errInfo := fmt.Errorf("the number of chips obtained is invalid, the number is: %d", int32(deviceNum))
|
||
return retError, errInfo
|
||
}
|
||
return int32(deviceNum), nil
|
||
}
|
||
|
||
// GetDeviceLogicID get device logicID
|
||
func GetDeviceLogicID(cardID, deviceID int32) (int32, error) {
|
||
var logicID C.int
|
||
if err := C.dcmi_get_device_logic_id(&logicID, C.int(cardID), C.int(deviceID)); err != 0 {
|
||
errInfo := fmt.Errorf("get logicID failed, error code: %d", int32(err))
|
||
return retError, errInfo
|
||
}
|
||
|
||
// check whether phyID is too big
|
||
if logicID < 0 || uint32(logicID) > uint32(math.MaxInt8) {
|
||
errInfo := fmt.Errorf("the logicID value is invalid, logicID is: %d", logicID)
|
||
return retError, errInfo
|
||
}
|
||
return int32(logicID), nil
|
||
}
|
||
|
||
// CreateVDevice create virtual device
|
||
func (w *NpuWorker) CreateVDevice(cardID, deviceID int32, coreNum string) (int32, error) {
|
||
var createInfo C.struct_dcmi_create_vdev_out
|
||
createInfo.vdev_id = C.uint(math.MaxUint32)
|
||
var deviceCreateStr C.struct_dcmi_create_vdev_res_stru
|
||
deviceCreateStr = C.struct_dcmi_create_vdev_res_stru{
|
||
vdev_id: C.uint(deviceNum),
|
||
vfg_id: C.uint(deviceNum),
|
||
}
|
||
deviceCreateStrArr := [coreNumLen]C.char{0}
|
||
for i := 0; i < len(coreNum); i++ {
|
||
if i >= coreNumLen {
|
||
return math.MaxInt32, fmt.Errorf("wrong template")
|
||
}
|
||
deviceCreateStrArr[i] = C.char(coreNum[i])
|
||
}
|
||
deviceCreateStr.template_name = deviceCreateStrArr
|
||
err := C.dcmi_create_vdevice(C.int(cardID), C.int(deviceID), &deviceCreateStr, &createInfo)
|
||
if err != 0 {
|
||
errInfo := fmt.Errorf("create virtual device failed, error code: %d", int32(err))
|
||
return math.MaxInt32, errInfo
|
||
}
|
||
if createInfo.vdev_id > math.MaxInt32 {
|
||
return math.MaxInt32, fmt.Errorf("create virtual device failed, vdeviceId too large")
|
||
}
|
||
return int32(createInfo.vdev_id), nil
|
||
}
|
||
|
||
// DestroyVDevice destroy virtual device
|
||
func (w *NpuWorker) DestroyVDevice(cardID, deviceID int32, vDevID int32) error {
|
||
if vDevID < 0 {
|
||
return fmt.Errorf("param error on vDevID")
|
||
}
|
||
if err := C.dcmi_set_destroy_vdevice(C.int(cardID), C.int(deviceID), C.uint(vDevID)); err != 0 {
|
||
errInfo := fmt.Errorf("destroy virtual device failed, error code: %d", int32(err))
|
||
return errInfo
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// FindDevice find device by phyical id
|
||
func (w *NpuWorker) FindDevice(visibleDevice int32) (int32, int32, error) {
|
||
var dcmiLogicID C.uint
|
||
if err := C.dcmi_get_device_logicid_from_phyid(C.uint(visibleDevice), &dcmiLogicID); err != 0 {
|
||
return 0, 0, fmt.Errorf("phy id can not be converted to logic id : %v", err)
|
||
}
|
||
if uint(dcmiLogicID) > math.MaxInt32 {
|
||
return 0, 0, fmt.Errorf("logic id too large")
|
||
}
|
||
targetLogicID := int32(dcmiLogicID)
|
||
_, cardList, err := GetCardList()
|
||
if err != nil {
|
||
return 0, 0, fmt.Errorf("get card list err : %v", err)
|
||
}
|
||
targetDeviceID, targetCardID := int32(math.MaxInt32), int32(math.MaxInt32)
|
||
for _, cardID := range cardList {
|
||
deviceCount, err := GetDeviceNumInCard(cardID)
|
||
if err != nil {
|
||
return 0, 0, fmt.Errorf("cannot get device num in card : %v", err)
|
||
}
|
||
for deviceID := int32(0); deviceID < deviceCount; deviceID++ {
|
||
logicID, err := GetDeviceLogicID(cardID, deviceID)
|
||
if err != nil {
|
||
return 0, 0, fmt.Errorf("cannot get logic id : %v", err)
|
||
}
|
||
if logicID == int32(targetLogicID) {
|
||
targetCardID, targetDeviceID = cardID, deviceID
|
||
}
|
||
}
|
||
}
|
||
return targetDeviceID, targetCardID, nil
|
||
}
|