Add GPU usage in cluster about API endpoint

2025-10-04 23:53:12 +08:00 · 2024-10-31 14:32:18 +01:00
parent eb4d0430b6
commit bfb54ca177
9 changed files with 529 additions and 18 deletions
--- a/cluster/api.go
+++ b/cluster/api.go
@@ -171,7 +171,7 @@ func (a *api) Version(c echo.Context) error {
 // @Tags v1.0.0
 // @ID cluster-1-about
 // @Produce json
-// @Success 200 {string} About
+// @Success 200 {object} client.AboutResponse
 // @Success 500 {object} Error
 // @Router /v1/about [get]
 func (a *api) About(c echo.Context) error {
@@ -413,7 +413,7 @@ func (a *api) ProcessAdd(c echo.Context) error {
 // @Param id path string true "Process ID"
 // @Param domain query string false "Domain to act on"
 // @Param X-Cluster-Origin header string false "Origin ID of request"
-// @Success 200 {string} string
+// @Success 200 {object} client.GetProcessResponse
 // @Failure 404 {object} Error
 // @Failure 500 {object} Error
 // @Failure 508 {object} Error
--- a/cluster/docs/ClusterAPI_docs.go
+++ b/cluster/docs/ClusterAPI_docs.go
@@ -65,7 +65,7 @@ const docTemplateClusterAPI = `{
                    "200": {
                        "description": "OK",
                        "schema": {
-                            "type": "string"
+                            "$ref": "#/definitions/client.AboutResponse"
                        }
                    },
                    "500": {
@@ -803,7 +803,7 @@ const docTemplateClusterAPI = `{
                    "200": {
                        "description": "OK",
                        "schema": {
-                            "type": "string"
+                            "$ref": "#/definitions/client.GetProcessResponse"
                        }
                    },
                    "404": {
@@ -1430,6 +1430,111 @@ const docTemplateClusterAPI = `{
                }
            }
        },
+        "client.AboutResponse": {
+            "type": "object",
+            "properties": {
+                "address": {
+                    "type": "string"
+                },
+                "id": {
+                    "type": "string"
+                },
+                "resources": {
+                    "$ref": "#/definitions/client.AboutResponseResources"
+                },
+                "started_at": {
+                    "type": "string"
+                },
+                "version": {
+                    "type": "string"
+                }
+            }
+        },
+        "client.AboutResponseGPUResources": {
+            "type": "object",
+            "properties": {
+                "decoder": {
+                    "description": "Current decoder usage, 0-100",
+                    "type": "number"
+                },
+                "encoder": {
+                    "description": "Current encoder usage, 0-100",
+                    "type": "number"
+                },
+                "memory_bytes": {
+                    "description": "Currently used memory in bytes",
+                    "type": "integer"
+                },
+                "memory_limit_bytes": {
+                    "description": "Defined memory limit in bytes",
+                    "type": "integer"
+                },
+                "memory_total_bytes": {
+                    "description": "Total available memory in bytes",
+                    "type": "integer"
+                },
+                "usage": {
+                    "description": "Current general usage, 0-100",
+                    "type": "number"
+                },
+                "usage_limit": {
+                    "description": "Defined general usage limit, 0-100",
+                    "type": "number"
+                }
+            }
+        },
+        "client.AboutResponseResources": {
+            "type": "object",
+            "properties": {
+                "cpu": {
+                    "description": "Current CPU load, 0-100*ncpu",
+                    "type": "number"
+                },
+                "cpu_core": {
+                    "description": "Current CPU load of the core itself, 0-100*ncpu",
+                    "type": "number"
+                },
+                "cpu_limit": {
+                    "description": "Defined CPU load limit, 0-100*ncpu",
+                    "type": "number"
+                },
+                "error": {
+                    "description": "Last error",
+                    "type": "string"
+                },
+                "gpu": {
+                    "description": "Currently used GPU resources",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/client.AboutResponseGPUResources"
+                    }
+                },
+                "is_throttling": {
+                    "description": "Whether this core is currently throttling",
+                    "type": "boolean"
+                },
+                "memory_bytes": {
+                    "description": "Currently used memory in bytes",
+                    "type": "integer"
+                },
+                "memory_core_bytes": {
+                    "description": "Current used memory of the core itself in bytes",
+                    "type": "integer"
+                },
+                "memory_limit_bytes": {
+                    "description": "Defined memory limit in bytes",
+                    "type": "integer"
+                },
+                "memory_total_bytes": {
+                    "description": "Total available memory in bytes",
+                    "type": "integer"
+                },
+                "ncpu": {
+                    "description": "Number of CPU on this node",
+                    "type": "number"
+                }
+            }
+        },
        "client.AddIdentityRequest": {
            "type": "object",
            "properties": {
@@ -1446,6 +1551,17 @@ const docTemplateClusterAPI = `{
                }
            }
        },
+        "client.GetProcessResponse": {
+            "type": "object",
+            "properties": {
+                "nodeid": {
+                    "type": "string"
+                },
+                "process": {
+                    "$ref": "#/definitions/github_com_datarhei_core_v16_cluster_store.Process"
+                }
+            }
+        },
        "client.JoinRequest": {
            "type": "object",
            "properties": {
@@ -2210,6 +2326,30 @@ const docTemplateClusterAPI = `{
                }
            }
        },
+        "github_com_datarhei_core_v16_cluster_store.Process": {
+            "type": "object",
+            "properties": {
+                "config": {
+                    "$ref": "#/definitions/app.Config"
+                },
+                "createdAt": {
+                    "type": "string"
+                },
+                "error": {
+                    "type": "string"
+                },
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": true
+                },
+                "order": {
+                    "type": "string"
+                },
+                "updatedAt": {
+                    "type": "string"
+                }
+            }
+        },
        "identity.Auth0Tenant": {
            "type": "object",
            "properties": {
--- a/cluster/docs/ClusterAPI_swagger.json
+++ b/cluster/docs/ClusterAPI_swagger.json
@@ -58,7 +58,7 @@
                    "200": {
                        "description": "OK",
                        "schema": {
-                            "type": "string"
+                            "$ref": "#/definitions/client.AboutResponse"
                        }
                    },
                    "500": {
@@ -796,7 +796,7 @@
                    "200": {
                        "description": "OK",
                        "schema": {
-                            "type": "string"
+                            "$ref": "#/definitions/client.GetProcessResponse"
                        }
                    },
                    "404": {
@@ -1423,6 +1423,111 @@
                }
            }
        },
+        "client.AboutResponse": {
+            "type": "object",
+            "properties": {
+                "address": {
+                    "type": "string"
+                },
+                "id": {
+                    "type": "string"
+                },
+                "resources": {
+                    "$ref": "#/definitions/client.AboutResponseResources"
+                },
+                "started_at": {
+                    "type": "string"
+                },
+                "version": {
+                    "type": "string"
+                }
+            }
+        },
+        "client.AboutResponseGPUResources": {
+            "type": "object",
+            "properties": {
+                "decoder": {
+                    "description": "Current decoder usage, 0-100",
+                    "type": "number"
+                },
+                "encoder": {
+                    "description": "Current encoder usage, 0-100",
+                    "type": "number"
+                },
+                "memory_bytes": {
+                    "description": "Currently used memory in bytes",
+                    "type": "integer"
+                },
+                "memory_limit_bytes": {
+                    "description": "Defined memory limit in bytes",
+                    "type": "integer"
+                },
+                "memory_total_bytes": {
+                    "description": "Total available memory in bytes",
+                    "type": "integer"
+                },
+                "usage": {
+                    "description": "Current general usage, 0-100",
+                    "type": "number"
+                },
+                "usage_limit": {
+                    "description": "Defined general usage limit, 0-100",
+                    "type": "number"
+                }
+            }
+        },
+        "client.AboutResponseResources": {
+            "type": "object",
+            "properties": {
+                "cpu": {
+                    "description": "Current CPU load, 0-100*ncpu",
+                    "type": "number"
+                },
+                "cpu_core": {
+                    "description": "Current CPU load of the core itself, 0-100*ncpu",
+                    "type": "number"
+                },
+                "cpu_limit": {
+                    "description": "Defined CPU load limit, 0-100*ncpu",
+                    "type": "number"
+                },
+                "error": {
+                    "description": "Last error",
+                    "type": "string"
+                },
+                "gpu": {
+                    "description": "Currently used GPU resources",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/client.AboutResponseGPUResources"
+                    }
+                },
+                "is_throttling": {
+                    "description": "Whether this core is currently throttling",
+                    "type": "boolean"
+                },
+                "memory_bytes": {
+                    "description": "Currently used memory in bytes",
+                    "type": "integer"
+                },
+                "memory_core_bytes": {
+                    "description": "Current used memory of the core itself in bytes",
+                    "type": "integer"
+                },
+                "memory_limit_bytes": {
+                    "description": "Defined memory limit in bytes",
+                    "type": "integer"
+                },
+                "memory_total_bytes": {
+                    "description": "Total available memory in bytes",
+                    "type": "integer"
+                },
+                "ncpu": {
+                    "description": "Number of CPU on this node",
+                    "type": "number"
+                }
+            }
+        },
        "client.AddIdentityRequest": {
            "type": "object",
            "properties": {
@@ -1439,6 +1544,17 @@
                }
            }
        },
+        "client.GetProcessResponse": {
+            "type": "object",
+            "properties": {
+                "nodeid": {
+                    "type": "string"
+                },
+                "process": {
+                    "$ref": "#/definitions/github_com_datarhei_core_v16_cluster_store.Process"
+                }
+            }
+        },
        "client.JoinRequest": {
            "type": "object",
            "properties": {
@@ -2203,6 +2319,30 @@
                }
            }
        },
+        "github_com_datarhei_core_v16_cluster_store.Process": {
+            "type": "object",
+            "properties": {
+                "config": {
+                    "$ref": "#/definitions/app.Config"
+                },
+                "createdAt": {
+                    "type": "string"
+                },
+                "error": {
+                    "type": "string"
+                },
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": true
+                },
+                "order": {
+                    "type": "string"
+                },
+                "updatedAt": {
+                    "type": "string"
+                }
+            }
+        },
        "identity.Auth0Tenant": {
            "type": "object",
            "properties": {
--- a/cluster/docs/ClusterAPI_swagger.yaml
+++ b/cluster/docs/ClusterAPI_swagger.yaml
@@ -100,6 +100,81 @@ definitions:
        description: percent 0-100
        type: number
    type: object
+  client.AboutResponse:
+    properties:
+      address:
+        type: string
+      id:
+        type: string
+      resources:
+        $ref: '#/definitions/client.AboutResponseResources'
+      started_at:
+        type: string
+      version:
+        type: string
+    type: object
+  client.AboutResponseGPUResources:
+    properties:
+      decoder:
+        description: Current decoder usage, 0-100
+        type: number
+      encoder:
+        description: Current encoder usage, 0-100
+        type: number
+      memory_bytes:
+        description: Currently used memory in bytes
+        type: integer
+      memory_limit_bytes:
+        description: Defined memory limit in bytes
+        type: integer
+      memory_total_bytes:
+        description: Total available memory in bytes
+        type: integer
+      usage:
+        description: Current general usage, 0-100
+        type: number
+      usage_limit:
+        description: Defined general usage limit, 0-100
+        type: number
+    type: object
+  client.AboutResponseResources:
+    properties:
+      cpu:
+        description: Current CPU load, 0-100*ncpu
+        type: number
+      cpu_core:
+        description: Current CPU load of the core itself, 0-100*ncpu
+        type: number
+      cpu_limit:
+        description: Defined CPU load limit, 0-100*ncpu
+        type: number
+      error:
+        description: Last error
+        type: string
+      gpu:
+        description: Currently used GPU resources
+        items:
+          $ref: '#/definitions/client.AboutResponseGPUResources'
+        type: array
+      is_throttling:
+        description: Whether this core is currently throttling
+        type: boolean
+      memory_bytes:
+        description: Currently used memory in bytes
+        type: integer
+      memory_core_bytes:
+        description: Current used memory of the core itself in bytes
+        type: integer
+      memory_limit_bytes:
+        description: Defined memory limit in bytes
+        type: integer
+      memory_total_bytes:
+        description: Total available memory in bytes
+        type: integer
+      ncpu:
+        description: Number of CPU on this node
+        type: number
+    type: object
  client.AddIdentityRequest:
    properties:
      identity:
@@ -110,6 +185,13 @@ definitions:
      config:
        $ref: '#/definitions/app.Config'
    type: object
+  client.GetProcessResponse:
+    properties:
+      nodeid:
+        type: string
+      process:
+        $ref: '#/definitions/github_com_datarhei_core_v16_cluster_store.Process'
+    type: object
  client.JoinRequest:
    properties:
      id:
@@ -620,6 +702,22 @@ definitions:
        format: int64
        type: integer
    type: object
+  github_com_datarhei_core_v16_cluster_store.Process:
+    properties:
+      config:
+        $ref: '#/definitions/app.Config'
+      createdAt:
+        type: string
+      error:
+        type: string
+      metadata:
+        additionalProperties: true
+        type: object
+      order:
+        type: string
+      updatedAt:
+        type: string
+    type: object
  identity.Auth0Tenant:
    properties:
      audience:
@@ -944,7 +1042,7 @@ paths:
        "200":
          description: OK
          schema:
-            type: string
+            $ref: '#/definitions/client.AboutResponse'
        "500":
          description: Internal Server Error
          schema:
@@ -1471,7 +1569,7 @@ paths:
        "200":
          description: OK
          schema:
-            type: string
+            $ref: '#/definitions/client.GetProcessResponse'
        "404":
          description: Not Found
          schema:
--- a/docs/docs.go
+++ b/docs/docs.go
@@ -5484,6 +5484,39 @@ const docTemplate = `{
                }
            }
        },
+        "api.ClusterNodeGPUResources": {
+            "type": "object",
+            "properties": {
+                "memory_limit_bytes": {
+                    "description": "Defined memory limit in bytes",
+                    "type": "integer"
+                },
+                "memory_total_bytes": {
+                    "description": "Total available memory in bytes",
+                    "type": "integer"
+                },
+                "memory_used_bytes": {
+                    "description": "Currently used memory in bytes",
+                    "type": "integer"
+                },
+                "usage_decoder": {
+                    "description": "Current decoder usage, 0-100",
+                    "type": "number"
+                },
+                "usage_encoder": {
+                    "description": "Current encoder usage, 0-100",
+                    "type": "number"
+                },
+                "usage_general": {
+                    "description": "Current general usage, 0-100",
+                    "type": "number"
+                },
+                "usage_limit": {
+                    "description": "Defined general usage limit, 0-100",
+                    "type": "number"
+                }
+            }
+        },
        "api.ClusterNodeID": {
            "type": "object",
            "properties": {
@@ -5510,6 +5543,13 @@ const docTemplate = `{
                "error": {
                    "type": "string"
                },
+                "gpu": {
+                    "description": "GPU resources",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/api.ClusterNodeGPUResources"
+                    }
+                },
                "is_throttling": {
                    "type": "boolean"
                },
--- a/docs/swagger.json
+++ b/docs/swagger.json
@@ -5477,6 +5477,39 @@
                }
            }
        },
+        "api.ClusterNodeGPUResources": {
+            "type": "object",
+            "properties": {
+                "memory_limit_bytes": {
+                    "description": "Defined memory limit in bytes",
+                    "type": "integer"
+                },
+                "memory_total_bytes": {
+                    "description": "Total available memory in bytes",
+                    "type": "integer"
+                },
+                "memory_used_bytes": {
+                    "description": "Currently used memory in bytes",
+                    "type": "integer"
+                },
+                "usage_decoder": {
+                    "description": "Current decoder usage, 0-100",
+                    "type": "number"
+                },
+                "usage_encoder": {
+                    "description": "Current encoder usage, 0-100",
+                    "type": "number"
+                },
+                "usage_general": {
+                    "description": "Current general usage, 0-100",
+                    "type": "number"
+                },
+                "usage_limit": {
+                    "description": "Defined general usage limit, 0-100",
+                    "type": "number"
+                }
+            }
+        },
        "api.ClusterNodeID": {
            "type": "object",
            "properties": {
@@ -5503,6 +5536,13 @@
                "error": {
                    "type": "string"
                },
+                "gpu": {
+                    "description": "GPU resources",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/api.ClusterNodeGPUResources"
+                    }
+                },
                "is_throttling": {
                    "type": "boolean"
                },
--- a/docs/swagger.yaml
+++ b/docs/swagger.yaml
@@ -283,6 +283,30 @@ definitions:
        description: unix timestamp
        type: integer
    type: object
+  api.ClusterNodeGPUResources:
+    properties:
+      memory_limit_bytes:
+        description: Defined memory limit in bytes
+        type: integer
+      memory_total_bytes:
+        description: Total available memory in bytes
+        type: integer
+      memory_used_bytes:
+        description: Currently used memory in bytes
+        type: integer
+      usage_decoder:
+        description: Current decoder usage, 0-100
+        type: number
+      usage_encoder:
+        description: Current encoder usage, 0-100
+        type: number
+      usage_general:
+        description: Current general usage, 0-100
+        type: number
+      usage_limit:
+        description: Defined general usage limit, 0-100
+        type: number
+    type: object
  api.ClusterNodeID:
    properties:
      id:
@@ -301,6 +325,11 @@ definitions:
        type: number
      error:
        type: string
+      gpu:
+        description: GPU resources
+        items:
+          $ref: '#/definitions/api.ClusterNodeGPUResources'
+        type: array
      is_throttling:
        type: boolean
      memory_core_bytes:
--- a/http/api/cluster.go
+++ b/http/api/cluster.go
@@ -39,16 +39,27 @@ type ClusterNodeCore struct {
 }

 type ClusterNodeResources struct {
-	IsThrottling bool    `json:"is_throttling"`
-	NCPU         float64 `json:"ncpu"`
-	CPU          float64 `json:"cpu_used"`           // percent 0-100*npcu
-	CPULimit     float64 `json:"cpu_limit"`          // percent 0-100*npcu
-	CPUCore      float64 `json:"cpu_core"`           // percent 0-100*ncpu
-	Mem          uint64  `json:"memory_used_bytes"`  // bytes
-	MemLimit     uint64  `json:"memory_limit_bytes"` // bytes
-	MemTotal     uint64  `json:"memory_total_bytes"` // bytes
-	MemCore      uint64  `json:"memory_core_bytes"`  // bytes
-	Error        string  `json:"error"`
+	IsThrottling bool                      `json:"is_throttling"`
+	NCPU         float64                   `json:"ncpu"`
+	CPU          float64                   `json:"cpu_used"`           // percent 0-100*npcu
+	CPULimit     float64                   `json:"cpu_limit"`          // percent 0-100*npcu
+	CPUCore      float64                   `json:"cpu_core"`           // percent 0-100*ncpu
+	Mem          uint64                    `json:"memory_used_bytes"`  // bytes
+	MemLimit     uint64                    `json:"memory_limit_bytes"` // bytes
+	MemTotal     uint64                    `json:"memory_total_bytes"` // bytes
+	MemCore      uint64                    `json:"memory_core_bytes"`  // bytes
+	GPU          []ClusterNodeGPUResources `json:"gpu"`                // GPU resources
+	Error        string                    `json:"error"`
+}
+
+type ClusterNodeGPUResources struct {
+	Mem        uint64  `json:"memory_used_bytes"`  // Currently used memory in bytes
+	MemLimit   uint64  `json:"memory_limit_bytes"` // Defined memory limit in bytes
+	MemTotal   uint64  `json:"memory_total_bytes"` // Total available memory in bytes
+	Usage      float64 `json:"usage_general"`      // Current general usage, 0-100
+	UsageLimit float64 `json:"usage_limit"`        // Defined general usage limit, 0-100
+	Encoder    float64 `json:"usage_encoder"`      // Current encoder usage, 0-100
+	Decoder    float64 `json:"usage_decoder"`      // Current decoder usage, 0-100
 }

 type ClusterRaft struct {
--- a/http/handler/api/cluster.go
+++ b/http/handler/api/cluster.go
@@ -123,9 +123,22 @@ func (h *ClusterHandler) marshalClusterNode(node cluster.ClusterNode) api.Cluste
 			MemLimit:     node.Resources.MemLimit,
 			MemTotal:     node.Resources.MemTotal,
 			MemCore:      node.Resources.MemCore,
+			GPU:          []api.ClusterNodeGPUResources{},
 		},
 	}

+	for _, gpu := range node.Resources.GPU {
+		n.Resources.GPU = append(n.Resources.GPU, api.ClusterNodeGPUResources{
+			Mem:        gpu.Mem,
+			MemLimit:   gpu.MemLimit,
+			MemTotal:   gpu.MemTotal,
+			Usage:      gpu.Usage,
+			UsageLimit: gpu.UsageLimit,
+			Encoder:    gpu.Encoder,
+			Decoder:    gpu.Decoder,
+		})
+	}
+
 	if node.Error != nil {
 		n.Error = node.Error.Error()
 	}