Refactor cluster.About() data

This commit is contained in:
Ingo Oppermann
2023-07-14 11:22:08 +02:00
parent fd7354286e
commit cefd35f7da
11 changed files with 431 additions and 250 deletions

View File

@@ -90,7 +90,7 @@ coverage:
go tool cover -html=test/cover.out -o test/cover.html
## commit: Prepare code for commit (vet, fmt, test)
commit: vet fmt lint test build
commit: vet fmt lint test vulncheck build
@echo "No errors found. Ready for a commit."
## release: Build a release binary of core

View File

@@ -1301,34 +1301,59 @@ func (c *cluster) applyCommand(cmd *store.Command) error {
return nil
}
type ClusterRaftServer struct {
ID string
Address string
Voter bool
Leader bool
}
type ClusterRaftStats struct {
type ClusterRaft struct {
Address string
State string
LastContact time.Duration
NumPeers uint64
LogTerm uint64
LogIndex uint64
}
type ClusterRaft struct {
Server []ClusterRaftServer
Stats ClusterRaftStats
type ClusterNodeResources struct {
IsThrottling bool // Whether this core is currently throttling
NCPU float64 // Number of CPU on this node
CPU float64 // Current CPU load, 0-100*ncpu
CPULimit float64 // Defined CPU load limit, 0-100*ncpu
Mem uint64 // Currently used memory in bytes
MemLimit uint64 // Defined memory limit in bytes
}
type ClusterNode struct {
ID string
Name string
Version string
Status string
Error error
Voter bool
Leader bool
Address string
CreatedAt time.Time
Uptime time.Duration
LastContact time.Duration
Latency time.Duration
Core ClusterNodeCore
Resources ClusterNodeResources
}
type ClusterNodeCore struct {
Address string
Status string
Error error
LastContact time.Duration
Latency time.Duration
}
type ClusterAbout struct {
ID string
Address string
ClusterAPIAddress string
CoreAPIAddress string
Raft ClusterRaft
Nodes []proxy.NodeAbout
Version ClusterVersion
Degraded bool
DegradedErr error
ID string
Name string
Leader bool
Address string
Raft ClusterRaft
Nodes []ClusterNode
Version ClusterVersion
Degraded bool
DegradedErr error
}
func (c *cluster) About() (ClusterAbout, error) {
@@ -1336,48 +1361,79 @@ func (c *cluster) About() (ClusterAbout, error) {
about := ClusterAbout{
ID: c.id,
Address: c.Address(),
Degraded: degraded,
DegradedErr: degradedErr,
Version: Version,
}
if address, err := c.ClusterAPIAddress(""); err == nil {
about.ClusterAPIAddress = address
}
if address, err := c.CoreAPIAddress(""); err == nil {
about.CoreAPIAddress = address
about.Address = address
}
stats := c.raft.Stats()
about.Raft.Stats.State = stats.State
about.Raft.Stats.LastContact = stats.LastContact
about.Raft.Stats.NumPeers = stats.NumPeers
about.Raft.Address = stats.Address
about.Raft.State = stats.State
about.Raft.LastContact = stats.LastContact
about.Raft.NumPeers = stats.NumPeers
about.Raft.LogIndex = stats.LogIndex
about.Raft.LogTerm = stats.LogTerm
servers, err := c.raft.Servers()
if err != nil {
c.logger.Error().WithError(err).Log("Raft configuration")
return ClusterAbout{}, err
c.logger.Warn().WithError(err).Log("Raft configuration")
}
for _, server := range servers {
node := ClusterRaftServer{
ID: server.ID,
Address: server.Address,
Voter: server.Voter,
Leader: server.Leader,
serversMap := map[string]raft.Server{}
for _, s := range servers {
serversMap[s.ID] = s
}
c.nodesLock.Lock()
for id, node := range c.nodes {
nodeAbout := node.About()
node := ClusterNode{
ID: id,
Name: nodeAbout.Name,
Version: nodeAbout.Version,
Status: nodeAbout.Status,
Error: nodeAbout.Error,
Address: nodeAbout.Address,
LastContact: nodeAbout.LastContact,
Latency: nodeAbout.Latency,
CreatedAt: nodeAbout.Core.CreatedAt,
Uptime: nodeAbout.Core.Uptime,
Core: ClusterNodeCore{
Address: nodeAbout.Core.Address,
Status: nodeAbout.Core.Status,
Error: nodeAbout.Core.Error,
LastContact: nodeAbout.Core.LastContact,
Latency: nodeAbout.Core.Latency,
},
Resources: ClusterNodeResources{
IsThrottling: nodeAbout.Resources.IsThrottling,
NCPU: nodeAbout.Resources.NCPU,
CPU: nodeAbout.Resources.CPU,
CPULimit: nodeAbout.Resources.CPULimit,
Mem: nodeAbout.Resources.Mem,
MemLimit: nodeAbout.Resources.MemLimit,
},
}
about.Raft.Server = append(about.Raft.Server, node)
}
if id == c.id {
about.Name = nodeAbout.Name
}
about.Version = Version
if s, ok := serversMap[id]; ok {
node.Voter = s.Voter
node.Leader = s.Leader
}
nodes := c.ProxyReader().ListNodes()
for _, node := range nodes {
about.Nodes = append(about.Nodes, node.About())
about.Nodes = append(about.Nodes, node)
}
c.nodesLock.Unlock()
return about, nil
}

View File

@@ -16,6 +16,7 @@ import (
type Node interface {
Stop() error
About() About
Version() string
IPs() []string
Status() (string, error)
@@ -42,7 +43,7 @@ type node struct {
lastContactErr error
lastCoreContact time.Time
lastCoreContactErr error
latency time.Duration
latency float64
pingLock sync.RWMutex
runLock sync.Mutex
@@ -96,8 +97,9 @@ func (n *node) start(id string) error {
address, config, err := n.CoreEssentials()
n.proxyNode = proxy.NewNode(id, address, config)
n.lastCoreContactErr = err
if err != nil {
n.lastCoreContactErr = err
go func(ctx context.Context) {
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
@@ -143,6 +145,74 @@ func (n *node) Stop() error {
return nil
}
var maxLastContact time.Duration = 5 * time.Second
type AboutCore struct {
Address string
State string
StateError error
Status string
Error error
CreatedAt time.Time
Uptime time.Duration
LastContact time.Duration
Latency time.Duration
}
type About struct {
ID string
Name string
Version string
Address string
Status string
LastContact time.Duration
Latency time.Duration
Error error
Core AboutCore
Resources proxy.NodeResources
}
func (n *node) About() About {
a := About{
ID: n.id,
Version: n.Version(),
Address: n.address,
}
n.pingLock.RLock()
a.LastContact = time.Since(n.lastContact)
if a.LastContact > maxLastContact {
a.Status = "offline"
} else {
a.Status = "online"
}
a.Latency = time.Duration(n.latency * float64(time.Second))
a.Error = n.lastContactErr
coreError := n.lastCoreContactErr
n.pingLock.RUnlock()
about := n.CoreAbout()
a.Name = about.Name
a.Core.Address = about.Address
a.Core.State = about.State
a.Core.StateError = about.Error
a.Core.CreatedAt = about.CreatedAt
a.Core.Uptime = about.Uptime
a.Core.LastContact = time.Since(about.LastContact)
if a.Core.LastContact > maxLastContact {
a.Core.Status = "offline"
} else {
a.Core.Status = "online"
}
a.Core.Error = coreError
a.Core.Latency = about.Latency
a.Resources = about.Resources
return a
}
func (n *node) Version() string {
n.pingLock.RLock()
defer n.pingLock.RUnlock()
@@ -159,7 +229,7 @@ func (n *node) Status() (string, error) {
defer n.pingLock.RUnlock()
since := time.Since(n.lastContact)
if since > 5*time.Second {
if since > maxLastContact {
return "offline", fmt.Errorf("the cluster API didn't respond for %s because: %w", since, n.lastContactErr)
}
@@ -171,7 +241,7 @@ func (n *node) CoreStatus() (string, error) {
defer n.pingLock.RUnlock()
since := time.Since(n.lastCoreContact)
if since > 5*time.Second {
if since > maxLastContact {
return "offline", fmt.Errorf("the core API didn't respond for %s because: %w", since, n.lastCoreContactErr)
}
@@ -211,6 +281,10 @@ func (n *node) CoreAPIAddress() (string, error) {
return n.client.CoreAPIAddress()
}
func (n *node) CoreAbout() proxy.NodeAbout {
return n.proxyNode.About()
}
func (n *node) Barrier(name string) (bool, error) {
return n.client.Barrier(name)
}
@@ -232,7 +306,8 @@ func (n *node) ping(ctx context.Context) {
n.pingLock.Lock()
n.version = version
n.lastContact = time.Now()
n.latency = time.Since(start)
n.lastContactErr = nil
n.latency = n.latency*0.2 + time.Since(start).Seconds()*0.8
n.pingLock.Unlock()
} else {
n.pingLock.Lock()

View File

@@ -541,6 +541,7 @@ func (n *node) About() NodeAbout {
Name: about.Name,
Address: n.address,
State: state.String(),
Error: n.peerErr,
CreatedAt: createdAt,
Uptime: time.Since(createdAt),
LastContact: n.lastContact,

View File

@@ -83,9 +83,12 @@ type Server struct {
}
type Stats struct {
Address string
State string
LastContact time.Duration
NumPeers uint64
LogTerm uint64
LogIndex uint64
}
type Config struct {
@@ -194,7 +197,9 @@ func (r *raft) Servers() ([]Server, error) {
}
func (r *raft) Stats() Stats {
stats := Stats{}
stats := Stats{
Address: r.raftAddress,
}
s := r.raft.Stats()
@@ -219,6 +224,14 @@ func (r *raft) Stats() Stats {
stats.NumPeers = x
}
if x, err := strconv.ParseUint(s["last_log_term"], 10, 64); err == nil {
stats.LogTerm = x
}
if x, err := strconv.ParseUint(s["last_log_index"], 10, 64); err == nil {
stats.LogIndex = x
}
return stats
}

View File

@@ -4437,12 +4437,6 @@ const docTemplate = `{
"address": {
"type": "string"
},
"cluster_api_address": {
"type": "string"
},
"core_api_address": {
"type": "string"
},
"degraded": {
"type": "boolean"
},
@@ -4452,6 +4446,12 @@ const docTemplate = `{
"id": {
"type": "string"
},
"leader": {
"type": "boolean"
},
"name": {
"type": "string"
},
"nodes": {
"type": "array",
"items": {
@@ -4500,31 +4500,65 @@ const docTemplate = `{
"address": {
"type": "string"
},
"core": {
"$ref": "#/definitions/api.ClusterNodeCore"
},
"created_at": {
"description": "RFC 3339",
"type": "string"
},
"error": {
"type": "string"
},
"id": {
"type": "string"
},
"last_contact": {
"description": "unix timestamp",
"type": "integer"
"last_contact_ms": {
"type": "number"
},
"latency_ms": {
"description": "milliseconds",
"type": "number"
},
"leader": {
"type": "boolean"
},
"name": {
"type": "string"
},
"resources": {
"$ref": "#/definitions/api.ClusterNodeResources"
},
"state": {
"status": {
"type": "string"
},
"uptime_seconds": {
"type": "integer"
},
"version": {
"type": "string"
},
"voter": {
"type": "boolean"
}
}
},
"api.ClusterNodeCore": {
"type": "object",
"properties": {
"address": {
"type": "string"
},
"error": {
"type": "string"
},
"last_contact_ms": {
"type": "number"
},
"latency_ms": {
"type": "number"
},
"status": {
"type": "string"
}
}
},
@@ -4580,43 +4614,20 @@ const docTemplate = `{
}
},
"api.ClusterRaft": {
"type": "object",
"properties": {
"server": {
"type": "array",
"items": {
"$ref": "#/definitions/api.ClusterRaftServer"
}
},
"stats": {
"$ref": "#/definitions/api.ClusterRaftStats"
}
}
},
"api.ClusterRaftServer": {
"type": "object",
"properties": {
"address": {
"description": "raft address",
"type": "string"
},
"id": {
"type": "string"
},
"leader": {
"type": "boolean"
},
"voter": {
"type": "boolean"
}
}
},
"api.ClusterRaftStats": {
"type": "object",
"properties": {
"last_contact_ms": {
"type": "number"
},
"log_index": {
"type": "integer"
},
"log_term": {
"type": "integer"
},
"num_peers": {
"type": "integer"
},

View File

@@ -4429,12 +4429,6 @@
"address": {
"type": "string"
},
"cluster_api_address": {
"type": "string"
},
"core_api_address": {
"type": "string"
},
"degraded": {
"type": "boolean"
},
@@ -4444,6 +4438,12 @@
"id": {
"type": "string"
},
"leader": {
"type": "boolean"
},
"name": {
"type": "string"
},
"nodes": {
"type": "array",
"items": {
@@ -4492,31 +4492,65 @@
"address": {
"type": "string"
},
"core": {
"$ref": "#/definitions/api.ClusterNodeCore"
},
"created_at": {
"description": "RFC 3339",
"type": "string"
},
"error": {
"type": "string"
},
"id": {
"type": "string"
},
"last_contact": {
"description": "unix timestamp",
"type": "integer"
"last_contact_ms": {
"type": "number"
},
"latency_ms": {
"description": "milliseconds",
"type": "number"
},
"leader": {
"type": "boolean"
},
"name": {
"type": "string"
},
"resources": {
"$ref": "#/definitions/api.ClusterNodeResources"
},
"state": {
"status": {
"type": "string"
},
"uptime_seconds": {
"type": "integer"
},
"version": {
"type": "string"
},
"voter": {
"type": "boolean"
}
}
},
"api.ClusterNodeCore": {
"type": "object",
"properties": {
"address": {
"type": "string"
},
"error": {
"type": "string"
},
"last_contact_ms": {
"type": "number"
},
"latency_ms": {
"type": "number"
},
"status": {
"type": "string"
}
}
},
@@ -4572,43 +4606,20 @@
}
},
"api.ClusterRaft": {
"type": "object",
"properties": {
"server": {
"type": "array",
"items": {
"$ref": "#/definitions/api.ClusterRaftServer"
}
},
"stats": {
"$ref": "#/definitions/api.ClusterRaftStats"
}
}
},
"api.ClusterRaftServer": {
"type": "object",
"properties": {
"address": {
"description": "raft address",
"type": "string"
},
"id": {
"type": "string"
},
"leader": {
"type": "boolean"
},
"voter": {
"type": "boolean"
}
}
},
"api.ClusterRaftStats": {
"type": "object",
"properties": {
"last_contact_ms": {
"type": "number"
},
"log_index": {
"type": "integer"
},
"log_term": {
"type": "integer"
},
"num_peers": {
"type": "integer"
},

View File

@@ -69,16 +69,16 @@ definitions:
properties:
address:
type: string
cluster_api_address:
type: string
core_api_address:
type: string
degraded:
type: boolean
degraded_error:
type: string
id:
type: string
leader:
type: boolean
name:
type: string
nodes:
items:
$ref: '#/definitions/api.ClusterNode'
@@ -110,24 +110,46 @@ definitions:
properties:
address:
type: string
core:
$ref: '#/definitions/api.ClusterNodeCore'
created_at:
description: RFC 3339
type: string
error:
type: string
id:
type: string
last_contact:
description: unix timestamp
type: integer
latency_ms:
description: milliseconds
last_contact_ms:
type: number
latency_ms:
type: number
leader:
type: boolean
name:
type: string
resources:
$ref: '#/definitions/api.ClusterNodeResources'
state:
status:
type: string
uptime_seconds:
type: integer
version:
type: string
voter:
type: boolean
type: object
api.ClusterNodeCore:
properties:
address:
type: string
error:
type: string
last_contact_ms:
type: number
latency_ms:
type: number
status:
type: string
type: object
api.ClusterNodeFiles:
properties:
@@ -165,30 +187,15 @@ definitions:
type: string
type: object
api.ClusterRaft:
properties:
server:
items:
$ref: '#/definitions/api.ClusterRaftServer'
type: array
stats:
$ref: '#/definitions/api.ClusterRaftStats'
type: object
api.ClusterRaftServer:
properties:
address:
description: raft address
type: string
id:
type: string
leader:
type: boolean
voter:
type: boolean
type: object
api.ClusterRaftStats:
properties:
last_contact_ms:
type: number
log_index:
type: integer
log_term:
type: integer
num_peers:
type: integer
state:

View File

@@ -3,32 +3,31 @@ package api
import (
"encoding/json"
"time"
"github.com/datarhei/core/v16/cluster/proxy"
)
type ClusterNode struct {
ID string `json:"id"`
Name string `json:"name"`
Version string `json:"version"`
Status string `json:"status"`
Error string `json:"error"`
Voter bool `json:"voter"`
Leader bool `json:"leader"`
Address string `json:"address"`
CreatedAt string `json:"created_at"`
CreatedAt string `json:"created_at"` // RFC 3339
Uptime int64 `json:"uptime_seconds"`
LastContact int64 `json:"last_contact"` // unix timestamp
Latency float64 `json:"latency_ms"` // milliseconds
State string `json:"state"`
LastContact float64 `json:"last_contact_ms"`
Latency float64 `json:"latency_ms"`
Core ClusterNodeCore `json:"core"`
Resources ClusterNodeResources `json:"resources"`
}
func (n *ClusterNode) Marshal(about proxy.NodeAbout) {
n.ID = about.ID
n.Name = about.Name
n.Address = about.Address
n.CreatedAt = about.CreatedAt.Format(time.RFC3339)
n.Uptime = int64(about.Uptime.Seconds())
n.LastContact = about.LastContact.Unix()
n.Latency = about.Latency.Seconds() * 1000
n.State = about.State
n.Resources = ClusterNodeResources(about.Resources)
type ClusterNodeCore struct {
Address string `json:"address"`
Status string `json:"status"`
Error string `json:"error"`
LastContact float64 `json:"last_contact_ms"`
Latency float64 `json:"latency_ms"`
}
type ClusterNodeResources struct {
@@ -40,39 +39,30 @@ type ClusterNodeResources struct {
MemLimit uint64 `json:"memory_limit_bytes"` // bytes
}
type ClusterNodeFiles struct {
LastUpdate int64 `json:"last_update"` // unix timestamp
Files map[string][]string `json:"files"`
}
type ClusterRaftServer struct {
ID string `json:"id"`
Address string `json:"address"` // raft address
Voter bool `json:"voter"`
Leader bool `json:"leader"`
}
type ClusterRaftStats struct {
type ClusterRaft struct {
Address string `json:"address"`
State string `json:"state"`
LastContact float64 `json:"last_contact_ms"`
NumPeers uint64 `json:"num_peers"`
}
type ClusterRaft struct {
Server []ClusterRaftServer `json:"server"`
Stats ClusterRaftStats `json:"stats"`
LogTerm uint64 `json:"log_term"`
LogIndex uint64 `json:"log_index"`
}
type ClusterAbout struct {
ID string `json:"id"`
Address string `json:"address"`
ClusterAPIAddress string `json:"cluster_api_address"`
CoreAPIAddress string `json:"core_api_address"`
Raft ClusterRaft `json:"raft"`
Nodes []ClusterNode `json:"nodes"`
Version string `json:"version"`
Degraded bool `json:"degraded"`
DegradedErr string `json:"degraded_error"`
ID string `json:"id"`
Name string `json:"name"`
Leader bool `json:"leader"`
Address string `json:"address"`
Raft ClusterRaft `json:"raft"`
Nodes []ClusterNode `json:"nodes"`
Version string `json:"version"`
Degraded bool `json:"degraded"`
DegradedErr string `json:"degraded_error"`
}
type ClusterNodeFiles struct {
LastUpdate int64 `json:"last_update"` // unix timestamp
Files map[string][]string `json:"files"`
}
type ClusterProcess struct {

View File

@@ -3,6 +3,7 @@ package api
import (
"fmt"
"net/http"
"time"
"github.com/datarhei/core/v16/cluster"
"github.com/datarhei/core/v16/cluster/proxy"
@@ -56,18 +57,19 @@ func (h *ClusterHandler) About(c echo.Context) error {
state, _ := h.cluster.About()
about := api.ClusterAbout{
ID: state.ID,
Address: state.Address,
ClusterAPIAddress: state.ClusterAPIAddress,
CoreAPIAddress: state.CoreAPIAddress,
ID: state.ID,
Name: state.Name,
Leader: state.Leader,
Address: state.Address,
Raft: api.ClusterRaft{
Server: []api.ClusterRaftServer{},
Stats: api.ClusterRaftStats{
State: state.Raft.Stats.State,
LastContact: state.Raft.Stats.LastContact.Seconds() * 1000,
NumPeers: state.Raft.Stats.NumPeers,
},
Address: state.Raft.Address,
State: state.Raft.State,
LastContact: state.Raft.LastContact.Seconds() * 1000,
NumPeers: state.Raft.NumPeers,
LogTerm: state.Raft.LogTerm,
LogIndex: state.Raft.LogIndex,
},
Nodes: []api.ClusterNode{},
Version: state.Version.String(),
Degraded: state.Degraded,
}
@@ -76,25 +78,53 @@ func (h *ClusterHandler) About(c echo.Context) error {
about.DegradedErr = state.DegradedErr.Error()
}
for _, n := range state.Raft.Server {
about.Raft.Server = append(about.Raft.Server, api.ClusterRaftServer{
ID: n.ID,
Address: n.Address,
Voter: n.Voter,
Leader: n.Leader,
})
}
for _, node := range state.Nodes {
n := api.ClusterNode{}
n.Marshal(node)
about.Nodes = append(about.Nodes, n)
about.Nodes = append(about.Nodes, h.marshalClusterNode(node))
}
return c.JSON(http.StatusOK, about)
}
func (h *ClusterHandler) marshalClusterNode(node cluster.ClusterNode) api.ClusterNode {
n := api.ClusterNode{
ID: node.ID,
Name: node.Name,
Version: node.Version,
Status: node.Status,
Voter: node.Voter,
Leader: node.Leader,
Address: node.Address,
CreatedAt: node.CreatedAt.Format(time.RFC3339),
Uptime: int64(node.Uptime.Seconds()),
LastContact: node.LastContact.Seconds() * 1000,
Latency: node.Latency.Seconds() * 1000,
Core: api.ClusterNodeCore{
Address: node.Core.Address,
Status: node.Core.Status,
LastContact: node.Core.LastContact.Seconds() * 1000,
Latency: node.Core.Latency.Seconds() * 1000,
},
Resources: api.ClusterNodeResources{
IsThrottling: node.Resources.IsThrottling,
NCPU: node.Resources.NCPU,
CPU: node.Resources.CPU,
CPULimit: node.Resources.CPULimit,
Mem: node.Resources.Mem,
MemLimit: node.Resources.MemLimit,
},
}
if node.Error != nil {
n.Error = node.Error.Error()
}
if node.Core.Error != nil {
n.Core.Error = node.Core.Error.Error()
}
return n
}
// Healthy returns whether the cluster is healthy
// @Summary Whether the cluster is healthy
// @Description Whether the cluster is healthy

View File

@@ -24,16 +24,12 @@ import (
// @Security ApiKeyAuth
// @Router /api/v3/cluster/node [get]
func (h *ClusterHandler) GetNodes(c echo.Context) error {
nodes := h.proxy.ListNodes()
about, _ := h.cluster.About()
list := []api.ClusterNode{}
for _, node := range nodes {
about := node.About()
n := api.ClusterNode{}
n.Marshal(about)
list = append(list, n)
for _, node := range about.Nodes {
list = append(list, h.marshalClusterNode(node))
}
return c.JSON(http.StatusOK, list)
@@ -53,26 +49,17 @@ func (h *ClusterHandler) GetNodes(c echo.Context) error {
func (h *ClusterHandler) GetNode(c echo.Context) error {
id := util.PathParam(c, "id")
peer, err := h.proxy.GetNodeReader(id)
if err != nil {
return api.Err(http.StatusNotFound, "", "node not found: %s", err.Error())
about, _ := h.cluster.About()
for _, node := range about.Nodes {
if node.ID != id {
continue
}
return c.JSON(http.StatusOK, h.marshalClusterNode(node))
}
about := peer.About()
node := api.ClusterNode{
ID: about.ID,
Name: about.Name,
Address: about.Address,
CreatedAt: about.CreatedAt.Format(time.RFC3339),
Uptime: int64(about.Uptime.Seconds()),
LastContact: about.LastContact.Unix(),
Latency: about.Latency.Seconds() * 1000,
State: about.State,
Resources: api.ClusterNodeResources(about.Resources),
}
return c.JSON(http.StatusOK, node)
return api.Err(http.StatusNotFound, "", "node not found")
}
// GetNodeVersion returns the proxy node version with the given ID