Add GPU usage in cluster about API endpoint

This commit is contained in:
Ingo Oppermann
2024-10-31 14:32:18 +01:00
parent eb4d0430b6
commit bfb54ca177
9 changed files with 529 additions and 18 deletions

View File

@@ -171,7 +171,7 @@ func (a *api) Version(c echo.Context) error {
// @Tags v1.0.0
// @ID cluster-1-about
// @Produce json
// @Success 200 {string} About
// @Success 200 {object} client.AboutResponse
// @Success 500 {object} Error
// @Router /v1/about [get]
func (a *api) About(c echo.Context) error {
@@ -413,7 +413,7 @@ func (a *api) ProcessAdd(c echo.Context) error {
// @Param id path string true "Process ID"
// @Param domain query string false "Domain to act on"
// @Param X-Cluster-Origin header string false "Origin ID of request"
// @Success 200 {string} string
// @Success 200 {object} client.GetProcessResponse
// @Failure 404 {object} Error
// @Failure 500 {object} Error
// @Failure 508 {object} Error

View File

@@ -65,7 +65,7 @@ const docTemplateClusterAPI = `{
"200": {
"description": "OK",
"schema": {
"type": "string"
"$ref": "#/definitions/client.AboutResponse"
}
},
"500": {
@@ -803,7 +803,7 @@ const docTemplateClusterAPI = `{
"200": {
"description": "OK",
"schema": {
"type": "string"
"$ref": "#/definitions/client.GetProcessResponse"
}
},
"404": {
@@ -1430,6 +1430,111 @@ const docTemplateClusterAPI = `{
}
}
},
"client.AboutResponse": {
"type": "object",
"properties": {
"address": {
"type": "string"
},
"id": {
"type": "string"
},
"resources": {
"$ref": "#/definitions/client.AboutResponseResources"
},
"started_at": {
"type": "string"
},
"version": {
"type": "string"
}
}
},
"client.AboutResponseGPUResources": {
"type": "object",
"properties": {
"decoder": {
"description": "Current decoder usage, 0-100",
"type": "number"
},
"encoder": {
"description": "Current encoder usage, 0-100",
"type": "number"
},
"memory_bytes": {
"description": "Currently used memory in bytes",
"type": "integer"
},
"memory_limit_bytes": {
"description": "Defined memory limit in bytes",
"type": "integer"
},
"memory_total_bytes": {
"description": "Total available memory in bytes",
"type": "integer"
},
"usage": {
"description": "Current general usage, 0-100",
"type": "number"
},
"usage_limit": {
"description": "Defined general usage limit, 0-100",
"type": "number"
}
}
},
"client.AboutResponseResources": {
"type": "object",
"properties": {
"cpu": {
"description": "Current CPU load, 0-100*ncpu",
"type": "number"
},
"cpu_core": {
"description": "Current CPU load of the core itself, 0-100*ncpu",
"type": "number"
},
"cpu_limit": {
"description": "Defined CPU load limit, 0-100*ncpu",
"type": "number"
},
"error": {
"description": "Last error",
"type": "string"
},
"gpu": {
"description": "Currently used GPU resources",
"type": "array",
"items": {
"$ref": "#/definitions/client.AboutResponseGPUResources"
}
},
"is_throttling": {
"description": "Whether this core is currently throttling",
"type": "boolean"
},
"memory_bytes": {
"description": "Currently used memory in bytes",
"type": "integer"
},
"memory_core_bytes": {
"description": "Current used memory of the core itself in bytes",
"type": "integer"
},
"memory_limit_bytes": {
"description": "Defined memory limit in bytes",
"type": "integer"
},
"memory_total_bytes": {
"description": "Total available memory in bytes",
"type": "integer"
},
"ncpu": {
"description": "Number of CPU on this node",
"type": "number"
}
}
},
"client.AddIdentityRequest": {
"type": "object",
"properties": {
@@ -1446,6 +1551,17 @@ const docTemplateClusterAPI = `{
}
}
},
"client.GetProcessResponse": {
"type": "object",
"properties": {
"nodeid": {
"type": "string"
},
"process": {
"$ref": "#/definitions/github_com_datarhei_core_v16_cluster_store.Process"
}
}
},
"client.JoinRequest": {
"type": "object",
"properties": {
@@ -2210,6 +2326,30 @@ const docTemplateClusterAPI = `{
}
}
},
"github_com_datarhei_core_v16_cluster_store.Process": {
"type": "object",
"properties": {
"config": {
"$ref": "#/definitions/app.Config"
},
"createdAt": {
"type": "string"
},
"error": {
"type": "string"
},
"metadata": {
"type": "object",
"additionalProperties": true
},
"order": {
"type": "string"
},
"updatedAt": {
"type": "string"
}
}
},
"identity.Auth0Tenant": {
"type": "object",
"properties": {

View File

@@ -58,7 +58,7 @@
"200": {
"description": "OK",
"schema": {
"type": "string"
"$ref": "#/definitions/client.AboutResponse"
}
},
"500": {
@@ -796,7 +796,7 @@
"200": {
"description": "OK",
"schema": {
"type": "string"
"$ref": "#/definitions/client.GetProcessResponse"
}
},
"404": {
@@ -1423,6 +1423,111 @@
}
}
},
"client.AboutResponse": {
"type": "object",
"properties": {
"address": {
"type": "string"
},
"id": {
"type": "string"
},
"resources": {
"$ref": "#/definitions/client.AboutResponseResources"
},
"started_at": {
"type": "string"
},
"version": {
"type": "string"
}
}
},
"client.AboutResponseGPUResources": {
"type": "object",
"properties": {
"decoder": {
"description": "Current decoder usage, 0-100",
"type": "number"
},
"encoder": {
"description": "Current encoder usage, 0-100",
"type": "number"
},
"memory_bytes": {
"description": "Currently used memory in bytes",
"type": "integer"
},
"memory_limit_bytes": {
"description": "Defined memory limit in bytes",
"type": "integer"
},
"memory_total_bytes": {
"description": "Total available memory in bytes",
"type": "integer"
},
"usage": {
"description": "Current general usage, 0-100",
"type": "number"
},
"usage_limit": {
"description": "Defined general usage limit, 0-100",
"type": "number"
}
}
},
"client.AboutResponseResources": {
"type": "object",
"properties": {
"cpu": {
"description": "Current CPU load, 0-100*ncpu",
"type": "number"
},
"cpu_core": {
"description": "Current CPU load of the core itself, 0-100*ncpu",
"type": "number"
},
"cpu_limit": {
"description": "Defined CPU load limit, 0-100*ncpu",
"type": "number"
},
"error": {
"description": "Last error",
"type": "string"
},
"gpu": {
"description": "Currently used GPU resources",
"type": "array",
"items": {
"$ref": "#/definitions/client.AboutResponseGPUResources"
}
},
"is_throttling": {
"description": "Whether this core is currently throttling",
"type": "boolean"
},
"memory_bytes": {
"description": "Currently used memory in bytes",
"type": "integer"
},
"memory_core_bytes": {
"description": "Current used memory of the core itself in bytes",
"type": "integer"
},
"memory_limit_bytes": {
"description": "Defined memory limit in bytes",
"type": "integer"
},
"memory_total_bytes": {
"description": "Total available memory in bytes",
"type": "integer"
},
"ncpu": {
"description": "Number of CPU on this node",
"type": "number"
}
}
},
"client.AddIdentityRequest": {
"type": "object",
"properties": {
@@ -1439,6 +1544,17 @@
}
}
},
"client.GetProcessResponse": {
"type": "object",
"properties": {
"nodeid": {
"type": "string"
},
"process": {
"$ref": "#/definitions/github_com_datarhei_core_v16_cluster_store.Process"
}
}
},
"client.JoinRequest": {
"type": "object",
"properties": {
@@ -2203,6 +2319,30 @@
}
}
},
"github_com_datarhei_core_v16_cluster_store.Process": {
"type": "object",
"properties": {
"config": {
"$ref": "#/definitions/app.Config"
},
"createdAt": {
"type": "string"
},
"error": {
"type": "string"
},
"metadata": {
"type": "object",
"additionalProperties": true
},
"order": {
"type": "string"
},
"updatedAt": {
"type": "string"
}
}
},
"identity.Auth0Tenant": {
"type": "object",
"properties": {

View File

@@ -100,6 +100,81 @@ definitions:
description: percent 0-100
type: number
type: object
client.AboutResponse:
properties:
address:
type: string
id:
type: string
resources:
$ref: '#/definitions/client.AboutResponseResources'
started_at:
type: string
version:
type: string
type: object
client.AboutResponseGPUResources:
properties:
decoder:
description: Current decoder usage, 0-100
type: number
encoder:
description: Current encoder usage, 0-100
type: number
memory_bytes:
description: Currently used memory in bytes
type: integer
memory_limit_bytes:
description: Defined memory limit in bytes
type: integer
memory_total_bytes:
description: Total available memory in bytes
type: integer
usage:
description: Current general usage, 0-100
type: number
usage_limit:
description: Defined general usage limit, 0-100
type: number
type: object
client.AboutResponseResources:
properties:
cpu:
description: Current CPU load, 0-100*ncpu
type: number
cpu_core:
description: Current CPU load of the core itself, 0-100*ncpu
type: number
cpu_limit:
description: Defined CPU load limit, 0-100*ncpu
type: number
error:
description: Last error
type: string
gpu:
description: Currently used GPU resources
items:
$ref: '#/definitions/client.AboutResponseGPUResources'
type: array
is_throttling:
description: Whether this core is currently throttling
type: boolean
memory_bytes:
description: Currently used memory in bytes
type: integer
memory_core_bytes:
description: Current used memory of the core itself in bytes
type: integer
memory_limit_bytes:
description: Defined memory limit in bytes
type: integer
memory_total_bytes:
description: Total available memory in bytes
type: integer
ncpu:
description: Number of CPU on this node
type: number
type: object
client.AddIdentityRequest:
properties:
identity:
@@ -110,6 +185,13 @@ definitions:
config:
$ref: '#/definitions/app.Config'
type: object
client.GetProcessResponse:
properties:
nodeid:
type: string
process:
$ref: '#/definitions/github_com_datarhei_core_v16_cluster_store.Process'
type: object
client.JoinRequest:
properties:
id:
@@ -620,6 +702,22 @@ definitions:
format: int64
type: integer
type: object
github_com_datarhei_core_v16_cluster_store.Process:
properties:
config:
$ref: '#/definitions/app.Config'
createdAt:
type: string
error:
type: string
metadata:
additionalProperties: true
type: object
order:
type: string
updatedAt:
type: string
type: object
identity.Auth0Tenant:
properties:
audience:
@@ -944,7 +1042,7 @@ paths:
"200":
description: OK
schema:
type: string
$ref: '#/definitions/client.AboutResponse'
"500":
description: Internal Server Error
schema:
@@ -1471,7 +1569,7 @@ paths:
"200":
description: OK
schema:
type: string
$ref: '#/definitions/client.GetProcessResponse'
"404":
description: Not Found
schema:

View File

@@ -5484,6 +5484,39 @@ const docTemplate = `{
}
}
},
"api.ClusterNodeGPUResources": {
"type": "object",
"properties": {
"memory_limit_bytes": {
"description": "Defined memory limit in bytes",
"type": "integer"
},
"memory_total_bytes": {
"description": "Total available memory in bytes",
"type": "integer"
},
"memory_used_bytes": {
"description": "Currently used memory in bytes",
"type": "integer"
},
"usage_decoder": {
"description": "Current decoder usage, 0-100",
"type": "number"
},
"usage_encoder": {
"description": "Current encoder usage, 0-100",
"type": "number"
},
"usage_general": {
"description": "Current general usage, 0-100",
"type": "number"
},
"usage_limit": {
"description": "Defined general usage limit, 0-100",
"type": "number"
}
}
},
"api.ClusterNodeID": {
"type": "object",
"properties": {
@@ -5510,6 +5543,13 @@ const docTemplate = `{
"error": {
"type": "string"
},
"gpu": {
"description": "GPU resources",
"type": "array",
"items": {
"$ref": "#/definitions/api.ClusterNodeGPUResources"
}
},
"is_throttling": {
"type": "boolean"
},

View File

@@ -5477,6 +5477,39 @@
}
}
},
"api.ClusterNodeGPUResources": {
"type": "object",
"properties": {
"memory_limit_bytes": {
"description": "Defined memory limit in bytes",
"type": "integer"
},
"memory_total_bytes": {
"description": "Total available memory in bytes",
"type": "integer"
},
"memory_used_bytes": {
"description": "Currently used memory in bytes",
"type": "integer"
},
"usage_decoder": {
"description": "Current decoder usage, 0-100",
"type": "number"
},
"usage_encoder": {
"description": "Current encoder usage, 0-100",
"type": "number"
},
"usage_general": {
"description": "Current general usage, 0-100",
"type": "number"
},
"usage_limit": {
"description": "Defined general usage limit, 0-100",
"type": "number"
}
}
},
"api.ClusterNodeID": {
"type": "object",
"properties": {
@@ -5503,6 +5536,13 @@
"error": {
"type": "string"
},
"gpu": {
"description": "GPU resources",
"type": "array",
"items": {
"$ref": "#/definitions/api.ClusterNodeGPUResources"
}
},
"is_throttling": {
"type": "boolean"
},

View File

@@ -283,6 +283,30 @@ definitions:
description: unix timestamp
type: integer
type: object
api.ClusterNodeGPUResources:
properties:
memory_limit_bytes:
description: Defined memory limit in bytes
type: integer
memory_total_bytes:
description: Total available memory in bytes
type: integer
memory_used_bytes:
description: Currently used memory in bytes
type: integer
usage_decoder:
description: Current decoder usage, 0-100
type: number
usage_encoder:
description: Current encoder usage, 0-100
type: number
usage_general:
description: Current general usage, 0-100
type: number
usage_limit:
description: Defined general usage limit, 0-100
type: number
type: object
api.ClusterNodeID:
properties:
id:
@@ -301,6 +325,11 @@ definitions:
type: number
error:
type: string
gpu:
description: GPU resources
items:
$ref: '#/definitions/api.ClusterNodeGPUResources'
type: array
is_throttling:
type: boolean
memory_core_bytes:

View File

@@ -39,16 +39,27 @@ type ClusterNodeCore struct {
}
type ClusterNodeResources struct {
IsThrottling bool `json:"is_throttling"`
NCPU float64 `json:"ncpu"`
CPU float64 `json:"cpu_used"` // percent 0-100*npcu
CPULimit float64 `json:"cpu_limit"` // percent 0-100*npcu
CPUCore float64 `json:"cpu_core"` // percent 0-100*ncpu
Mem uint64 `json:"memory_used_bytes"` // bytes
MemLimit uint64 `json:"memory_limit_bytes"` // bytes
MemTotal uint64 `json:"memory_total_bytes"` // bytes
MemCore uint64 `json:"memory_core_bytes"` // bytes
Error string `json:"error"`
IsThrottling bool `json:"is_throttling"`
NCPU float64 `json:"ncpu"`
CPU float64 `json:"cpu_used"` // percent 0-100*npcu
CPULimit float64 `json:"cpu_limit"` // percent 0-100*npcu
CPUCore float64 `json:"cpu_core"` // percent 0-100*ncpu
Mem uint64 `json:"memory_used_bytes"` // bytes
MemLimit uint64 `json:"memory_limit_bytes"` // bytes
MemTotal uint64 `json:"memory_total_bytes"` // bytes
MemCore uint64 `json:"memory_core_bytes"` // bytes
GPU []ClusterNodeGPUResources `json:"gpu"` // GPU resources
Error string `json:"error"`
}
type ClusterNodeGPUResources struct {
Mem uint64 `json:"memory_used_bytes"` // Currently used memory in bytes
MemLimit uint64 `json:"memory_limit_bytes"` // Defined memory limit in bytes
MemTotal uint64 `json:"memory_total_bytes"` // Total available memory in bytes
Usage float64 `json:"usage_general"` // Current general usage, 0-100
UsageLimit float64 `json:"usage_limit"` // Defined general usage limit, 0-100
Encoder float64 `json:"usage_encoder"` // Current encoder usage, 0-100
Decoder float64 `json:"usage_decoder"` // Current decoder usage, 0-100
}
type ClusterRaft struct {

View File

@@ -123,9 +123,22 @@ func (h *ClusterHandler) marshalClusterNode(node cluster.ClusterNode) api.Cluste
MemLimit: node.Resources.MemLimit,
MemTotal: node.Resources.MemTotal,
MemCore: node.Resources.MemCore,
GPU: []api.ClusterNodeGPUResources{},
},
}
for _, gpu := range node.Resources.GPU {
n.Resources.GPU = append(n.Resources.GPU, api.ClusterNodeGPUResources{
Mem: gpu.Mem,
MemLimit: gpu.MemLimit,
MemTotal: gpu.MemTotal,
Usage: gpu.Usage,
UsageLimit: gpu.UsageLimit,
Encoder: gpu.Encoder,
Decoder: gpu.Decoder,
})
}
if node.Error != nil {
n.Error = node.Error.Error()
}