Match-id-5c716d76e688a3a1318647816ea6aa5016092cf1

This commit is contained in:
BianTanggui
2022-03-18 18:41:04 +08:00
parent 6a183da863
commit bf76614a71
7 changed files with 1516 additions and 16 deletions

View File

@@ -3,6 +3,6 @@ module main
go 1.16
require (
github.com/opencontainers/runtime-spec v1.0.3-0.20200520003142-237cc4f519e2
github.com/opencontainers/runtime-spec v1.0.2
github.com/prashantv/gostub v1.1.0
)

View File

@@ -1,7 +1,7 @@
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/opencontainers/runtime-spec v1.0.3-0.20200520003142-237cc4f519e2 h1:9mv9SC7GWmRWE0J/+oD8w3GsN2KYGKtg6uwLN7hfP5E=
github.com/opencontainers/runtime-spec v1.0.3-0.20200520003142-237cc4f519e2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.0.2 h1:UfAcuLBJB9Coz72x1hgl8O5RVzTdNiaglX6v2DM6FI0=
github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=

357
runtime/dcmi/dcmi.go Normal file
View File

@@ -0,0 +1,357 @@
// Package dcmi is used to work with Ascend devices
/*
* Copyright(C) Huawei Technologies Co.,Ltd. 2020-2021. All rights reserved.
*/
package dcmi
// #cgo LDFLAGS: -ldl
/* #include <stddef.h>
#include <dlfcn.h>
#include <stdlib.h>
#include <stdio.h>
#include "dcmi_interface_api.h"
void *dcmiHandle;
#define SO_NOT_FOUND -99999
#define FUNCTION_NOT_FOUND -99998
#define SUCCESS 0
#define ERROR_UNKNOWN -99997
#define CALL_FUNC(name,...) if(name##_func==NULL){return FUNCTION_NOT_FOUND;}return name##_func(__VA_ARGS__);
// dcmi
int (*dcmi_init_func)();
int dcmi_init(){
CALL_FUNC(dcmi_init)
}
int (*dcmi_get_card_num_list_func)(int *card_num, int *card_list, int list_length);
int dcmi_get_card_num_list(int *card_num, int *card_list, int list_length){
CALL_FUNC(dcmi_get_card_num_list,card_num,card_list,list_length)
}
int (*dcmi_get_device_num_in_card_func)(int card_id, int *device_num);
int dcmi_get_device_num_in_card(int card_id, int *device_num){
CALL_FUNC(dcmi_get_device_num_in_card,card_id,device_num)
}
int (*dcmi_get_device_id_in_card_func)(int card_id, int *device_id_max, int *mcu_id, int *cpu_id);
int dcmi_get_device_id_in_card(int card_id, int *device_id_max, int *mcu_id, int *cpu_id){
CALL_FUNC(dcmi_get_device_id_in_card,card_id,device_id_max,mcu_id,cpu_id)
}
int (*dcmi_get_device_logic_id_func)(int *device_logic_id, int card_id, int device_id);
int dcmi_get_device_logic_id(int *device_logic_id, int card_id, int device_id){
CALL_FUNC(dcmi_get_device_logic_id,device_logic_id,card_id,device_id)
}
int (*dcmi_set_create_vdevice_func)(int card_id, int device_id, struct dcmi_vdev_create_info *info);
int dcmi_set_create_vdevice(int card_id, int device_id, struct dcmi_vdev_create_info *info){
CALL_FUNC(dcmi_set_create_vdevice,card_id,device_id,info)
}
int (*dcmi_set_destroy_vdevice_func)(int card_id, int device_id, unsigned int VDevid);
int dcmi_set_destroy_vdevice(int card_id, int device_id, unsigned int VDevid){
CALL_FUNC(dcmi_set_destroy_vdevice,card_id,device_id,VDevid)
}
int (*dcmi_get_vdevice_info_func)(int card_id, int device_id, struct dcmi_vdev_info *info);
int dcmi_get_vdevice_info(int card_id, int device_id, struct dcmi_vdev_info *info){
CALL_FUNC(dcmi_get_vdevice_info,card_id,device_id,info)
}
int (*dcmi_get_device_health_func)(int card_id, int device_id, unsigned int *health);
int dcmi_get_device_health(int card_id, int device_id, unsigned int *health){
CALL_FUNC(dcmi_get_device_health,card_id,device_id,health)
}
int (*dcmi_get_device_chip_info_func)(int card_id, int device_id, struct dcmi_chip_info *chip_info);
int dcmi_get_device_chip_info(int card_id, int device_id, struct dcmi_chip_info *chip_info){
CALL_FUNC(dcmi_get_device_chip_info,card_id,device_id,chip_info)
}
int (*dcmi_create_vdevice_func)(int card_id, int device_id, int vdev_id, const char *template_name,
struct dcmi_create_vdev_out *out);
int dcmi_create_vdevice(int card_id, int device_id, int vdev_id, const char *template_name,
struct dcmi_create_vdev_out *out){
CALL_FUNC(dcmi_create_vdevice,card_id,device_id,vdev_id,template_name,out)
}
// load .so files and functions
int dcmiInit_dl(void){
dcmiHandle = dlopen("libdcmi.so",RTLD_LAZY | RTLD_GLOBAL);
if (dcmiHandle == NULL){
fprintf (stderr,"%s\n",dlerror());
return SO_NOT_FOUND;
}
dcmi_init_func = dlsym(dcmiHandle,"dcmi_init");
dcmi_get_card_num_list_func = dlsym(dcmiHandle,"dcmi_get_card_num_list");
dcmi_get_device_num_in_card_func = dlsym(dcmiHandle,"dcmi_get_device_num_in_card");
dcmi_get_device_id_in_card_func = dlsym(dcmiHandle,"dcmi_get_device_id_in_card");
dcmi_get_device_logic_id_func = dlsym(dcmiHandle,"dcmi_get_device_logic_id");
dcmi_set_create_vdevice_func = dlsym(dcmiHandle,"dcmi_set_create_vdevice");
dcmi_set_destroy_vdevice_func = dlsym(dcmiHandle,"dcmi_set_destroy_vdevice");
dcmi_get_vdevice_info_func = dlsym(dcmiHandle,"dcmi_get_vdevice_info");
dcmi_get_device_health_func = dlsym(dcmiHandle,"dcmi_get_device_health");
dcmi_get_device_chip_info_func = dlsym(dcmiHandle,"dcmi_get_device_chip_info");
dcmi_create_vdevice_func = dlsym(dcmiHandle,"dcmi_create_vdevice");
return SUCCESS;
}
int dcmiShutDown(void){
if (dcmiHandle == NULL) {
return SUCCESS;
}
return (dlclose(dcmiHandle) ? ERROR_UNKNOWN : SUCCESS);
}
int (*dsmi_get_logicid_from_phyid_func)(unsigned int phyid, unsigned int *logicid);
int dsmi_get_logicid_from_phyid(unsigned int phyid, unsigned int *logicid){
CALL_FUNC(dsmi_get_logicid_from_phyid,phyid,logicid)
}
void *dsmiHandle;
int dsmiInit_dl(void){
dsmiHandle = dlopen("libdrvdsmi_host.so",RTLD_LAZY);
if (dsmiHandle == NULL) {
dsmiHandle = dlopen("libdrvdsmi.so",RTLD_LAZY);
}
if (dsmiHandle == NULL){
return SO_NOT_FOUND;
}
dsmi_get_logicid_from_phyid_func = dlsym(dsmiHandle,"dsmi_get_logicid_from_phyid");
return SUCCESS;
}
int dsmiShutDown(void){
if (dsmiHandle == NULL) {
return SUCCESS;
}
return (dlclose(dsmiHandle) ? ERROR_UNKNOWN : SUCCESS);
}
*/
import "C"
import (
"fmt"
"math"
"strconv"
"strings"
"unsafe"
"github.com/opencontainers/runtime-spec/specs-go"
)
const (
// RetError return error when the function failed
retError = -1
// dcmiMaxVdevNum is max number of vdevice, value is from driver specification
dcmiMaxVdevNum = 16
// maxErrorCodeCount is the max number of error code
hiAIMaxCardNum = 16
)
// VDeviceInfo vdevice created info
type VDeviceInfo struct {
CardID int32
DeviceID int32
VdeviceID int32
}
// InitDcmi dcmi/dsmi lib
func InitDcmi() error {
if err := C.dcmiInit_dl(); err != C.SUCCESS {
errInfo := fmt.Errorf("dcmi lib load failed, , error code: %d", int32(err))
return errInfo
}
if err := C.dcmi_init(); err != C.SUCCESS {
errInfo := fmt.Errorf("dcmi init failed, , error code: %d", int32(err))
return errInfo
}
if err := C.dsmiInit_dl(); err != C.SUCCESS {
errInfo := fmt.Errorf("dsmi lib load failed, , error code: %d", int32(err))
return errInfo
}
return nil
}
// ShutDownDcmi shutdown dcmi/dsmi lib
func ShutDownDcmi() {
if err := C.dcmiShutDown(); err != C.SUCCESS {
println(fmt.Errorf("dcmi shut down failed, error code: %d", int32(err)))
}
if err := C.dsmiShutDown(); err != C.SUCCESS {
println(fmt.Errorf("dsmi shut down failed, error code: %d", int32(err)))
}
}
func getCardList() (int32, []int32, error) {
var ids [hiAIMaxCardNum]C.int
var cNum C.int
if err := C.dcmi_get_card_num_list(&cNum, &ids[0], hiAIMaxCardNum); err != 0 {
errInfo := fmt.Errorf("get card list failed, error code: %d", int32(err))
return retError, nil, errInfo
}
// checking card's quantity
if cNum <= 0 {
errInfo := fmt.Errorf("get error card quantity: %d", int32(cNum))
return retError, nil, errInfo
}
var cardNum = int32(cNum)
var i int32
var cardIDList []int32
for i = 0; i < cardNum && i < hiAIMaxCardNum; i++ {
cardID := int32(ids[i])
if cardID < 0 {
continue
}
cardIDList = append(cardIDList, cardID)
}
return cardNum, cardIDList, nil
}
// GetDeviceNumInCard get device number in the npu card
func GetDeviceNumInCard(cardID int32) (int32, error) {
var deviceNum C.int
if err := C.dcmi_get_device_num_in_card(C.int(cardID), &deviceNum); err != 0 {
errInfo := fmt.Errorf("get device count on the card failed, error code: %d", int32(err))
return retError, errInfo
}
if deviceNum <= 0 {
errInfo := fmt.Errorf("the number of chips obtained is invalid, the number is: %d", int32(deviceNum))
return retError, errInfo
}
return int32(deviceNum), nil
}
// GetDeviceLogicID get device logicID
func GetDeviceLogicID(cardID, deviceID int32) (int32, error) {
var logicID C.int
if err := C.dcmi_get_device_logic_id(&logicID, C.int(cardID), C.int(deviceID)); err != 0 {
errInfo := fmt.Errorf("get logicID failed, error code: %d", int32(err))
return retError, errInfo
}
// check whether phyID is too big
if logicID < 0 || uint32(logicID) > uint32(math.MaxInt8) {
errInfo := fmt.Errorf("the logicID value is invalid, logicID is: %d", logicID)
return retError, errInfo
}
return int32(logicID), nil
}
// SetCreateVDevice create virtual device
func SetCreateVDevice(cardID, deviceID int32, coreNum string) (uint32, error) {
var createInfo C.struct_dcmi_create_vdev_out
createInfo.vdev_id = C.uint(math.MaxUint32)
coreTemplate := C.CString(coreNum)
defer C.free(unsafe.Pointer(coreTemplate))
err := C.dcmi_create_vdevice(C.int(cardID), C.int(deviceID), C.int(0), coreTemplate, &createInfo)
if err != 0 {
errInfo := fmt.Errorf("create virtual device failed, error code: %d", int32(err))
return uint32(math.MaxUint32), errInfo
}
println("vdevId", createInfo.vdev_id)
return uint32(createInfo.vdev_id), nil
}
// SetDestroyVDevice destroy virtual device
func SetDestroyVDevice(cardID, deviceID int32, vDevID uint32) error {
if err := C.dcmi_set_destroy_vdevice(C.int(cardID), C.int(deviceID), C.uint(vDevID)); err != 0 {
errInfo := fmt.Errorf("destroy virtual device failed, error code: %d", int32(err))
return errInfo
}
return nil
}
// CreateVDevice will create virtual device
func CreateVDevice(spec *specs.Spec) (VDeviceInfo, error) {
visibleDevice, splitDevice, err := extractVpuParam(spec)
invalidVDevice := VDeviceInfo{CardID: -1, DeviceID: -1, VdeviceID: -1}
if err != nil || visibleDevice == -1 {
return invalidVDevice, err
}
if err := InitDcmi(); err != nil {
return invalidVDevice, fmt.Errorf("cannot init dcmi : %v", err)
}
defer ShutDownDcmi()
var dsmiLogicID C.uint
if err := C.dsmi_get_logicid_from_phyid(C.uint(visibleDevice), &dsmiLogicID); err != 0 {
return invalidVDevice, fmt.Errorf("phy id can not be converted to logic id : %v", err)
}
_, cardList, err := getCardList()
targetDeviceID, targetCardID := int32(math.MaxInt32), int32(math.MaxInt32)
for _, cardID := range cardList {
deviceCount, err := GetDeviceNumInCard(cardID)
if err != nil {
return invalidVDevice, fmt.Errorf("cannot get device num in card : %v", err)
}
for deviceID := int32(0); deviceID < deviceCount; deviceID++ {
logicID, err := GetDeviceLogicID(cardID, deviceID)
println(cardID, deviceID, logicID, dsmiLogicID)
if err != nil {
return invalidVDevice, fmt.Errorf("cannot get logic id : %v", err)
}
if logicID == int32(dsmiLogicID) {
targetCardID, targetDeviceID = cardID, deviceID
}
}
}
vdeviceID, err := SetCreateVDevice(targetCardID, targetDeviceID, splitDevice)
if err != nil || int(vdeviceID) < 0 {
return invalidVDevice, fmt.Errorf("cannot create vd or vdevice is wrong: %v %v", vdeviceID, err)
}
fmt.Printf("%v", VDeviceInfo{CardID: targetCardID, DeviceID: targetDeviceID, VdeviceID: int32(vdeviceID)})
return VDeviceInfo{CardID: targetCardID, DeviceID: targetDeviceID, VdeviceID: int32(vdeviceID)}, nil
}
func extractVpuParam(spec *specs.Spec) (int32, string, error) {
visibleDevice, splitDevice, needSplit, visibleDeviceLine := int32(-1), "", false, ""
allowSplit := map[string]string{
"1C": "vir01", "2C": "vir02", "4C": "vir04", "8C": "vir08", "16C": "vir16",
}
for _, line := range spec.Process.Env {
words := strings.Split(line, "=")
const LENGTH int = 2
if len(words) != LENGTH {
continue
}
if strings.TrimSpace(words[0]) == "ASCEND_VISIBLE_DEVICES" {
visibleDeviceLine = words[1]
}
if strings.TrimSpace(words[0]) == "ASCEND_VISIBLE_VPU_DEVICES" {
if split := allowSplit[words[1]]; split != "" {
splitDevice = split
needSplit = true
} else {
return -1, "", fmt.Errorf("cannot parse param : %v", words[1])
}
}
}
if needSplit {
if cardID, err := strconv.Atoi(visibleDeviceLine); err == nil {
visibleDevice = int32(cardID)
} else {
return -1, "", fmt.Errorf("cannot parse param : %v %v", err, visibleDeviceLine)
}
} else {
return -1, "", nil
}
return visibleDevice, splitDevice, nil
}

File diff suppressed because it is too large Load Diff

View File

@@ -3,6 +3,6 @@ module main
go 1.16
require (
github.com/opencontainers/runtime-spec v1.0.3-0.20200520003142-237cc4f519e2
github.com/opencontainers/runtime-spec v1.0.2
github.com/prashantv/gostub v1.1.0
)

View File

@@ -1,7 +1,7 @@
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/opencontainers/runtime-spec v1.0.3-0.20200520003142-237cc4f519e2 h1:9mv9SC7GWmRWE0J/+oD8w3GsN2KYGKtg6uwLN7hfP5E=
github.com/opencontainers/runtime-spec v1.0.3-0.20200520003142-237cc4f519e2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.0.2 h1:UfAcuLBJB9Coz72x1hgl8O5RVzTdNiaglX6v2DM6FI0=
github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=

View File

@@ -9,6 +9,7 @@ import (
"fmt"
"io/ioutil"
"log"
"main/dcmi"
"os"
"os/exec"
"path"
@@ -21,6 +22,7 @@ import (
const (
loggingPrefix = "ascend-docker-runtime"
hookCli = "ascend-docker-hook"
destroyHookCli = "ascend-docker-destroy"
hookDefaultFilePath = "/usr/local/bin/ascend-docker-hook"
dockerRuncFile = "docker-runc"
runcFile = "runc"
@@ -84,23 +86,70 @@ func addHook(spec *specs.Spec) error {
if spec.Hooks == nil {
spec.Hooks = &specs.Hooks{}
} else if len(spec.Hooks.Prestart) != 0 {
for _, hook := range spec.Hooks.Prestart {
if !strings.Contains(hook.Path, hookCli) {
continue
}
return nil
}
needUpdate := true
for _, hook := range spec.Hooks.Prestart {
if strings.Contains(hook.Path, hookCli) {
needUpdate = false
}
}
if needUpdate {
spec.Hooks.Prestart = append(spec.Hooks.Prestart, specs.Hook{
Path: hookCliPath,
Args: []string{hookCliPath},
})
}
spec.Hooks.Prestart = append(spec.Hooks.Prestart, specs.Hook{
Path: hookCliPath,
Args: []string{hookCliPath},
})
vdevice, err := dcmi.CreateVDevice(spec)
fmt.Printf("create vdevice %v \n", vdevice)
if err != nil {
return err
}
if vdevice.VdeviceID != -1 {
updateEnvAndPostHook(spec, vdevice)
}
return nil
}
func updateEnvAndPostHook(spec *specs.Spec, vdevice dcmi.VDeviceInfo) {
newEnv := make([]string, 0)
needAddVirtualFlag := true
for _, line := range spec.Process.Env {
words := strings.Split(line, "=")
const LENGTH int = 2
if len(words) == LENGTH && strings.TrimSpace(words[0]) == "ASCEND_VISIBLE_DEVICES" {
newEnv = append(newEnv, fmt.Sprintf("ASCEND_VISIBLE_DEVICES=%d", vdevice.VdeviceID))
continue
}
if len(words) == LENGTH && strings.TrimSpace(words[0]) == "ASCEND_RUNTIME_OPTIONS" {
needAddVirtualFlag = false
if strings.Contains(words[1], "VIRTUAL") {
newEnv = append(newEnv, line)
continue
} else {
newEnv = append(newEnv, strings.TrimSpace(line)+",VIRTUAL")
continue
}
}
newEnv = append(newEnv, line)
}
if needAddVirtualFlag {
newEnv = append(newEnv, fmt.Sprintf("ASCEND_RUNTIME_OPTIONS=VIRTUAL"))
}
spec.Process.Env = newEnv
if currentExecPath, err := os.Executable(); err == nil {
postHookCliPath := path.Join(path.Dir(currentExecPath), destroyHookCli)
spec.Hooks.Poststop = append(spec.Hooks.Poststop, specs.Hook{
Path: postHookCliPath,
Args: []string{postHookCliPath, fmt.Sprintf("%d", vdevice.CardID), fmt.Sprintf("%d", vdevice.DeviceID),
fmt.Sprintf("%d", vdevice.VdeviceID)},
})
}
}
func modifySpecFile(path string) error {
stat, err := os.Stat(path)
if err != nil {