diff --git a/cluster/cluster.go b/cluster/cluster.go index 2c23a5c3..0b20f44b 100644 --- a/cluster/cluster.go +++ b/cluster/cluster.go @@ -19,6 +19,7 @@ import ( "github.com/datarhei/core/v16/log" "github.com/datarhei/core/v16/net" "github.com/datarhei/core/v16/restream/app" + hclog "github.com/hashicorp/go-hclog" "github.com/hashicorp/raft" raftboltdb "github.com/hashicorp/raft-boltdb/v2" @@ -67,11 +68,6 @@ type Cluster interface { Shutdown() error - AddNode(id, address string) error - RemoveNode(id string) error - ListNodes() []addNodeCommand - GetNode(id string) (addNodeCommand, error) - AddProcess(origin string, config *app.Config) error RemoveProcess(origin, id string) error @@ -608,69 +604,6 @@ func (rcw *readCloserWrapper) Close() error { return nil } -func (c *cluster) ListNodes() []addNodeCommand { - c.store.ListNodes() - - return nil -} - -func (c *cluster) GetNode(id string) (addNodeCommand, error) { - c.store.GetNode(id) - - return addNodeCommand{}, nil -} - -func (c *cluster) AddNode(id, address string) error { - if !c.IsRaftLeader() { - return fmt.Errorf("not leader") - } - - com := &command{ - Operation: opAddNode, - Data: &addNodeCommand{ - ID: id, - Address: address, - }, - } - - b, err := json.Marshal(com) - if err != nil { - return err - } - - future := c.raft.Apply(b, 5*time.Second) - if err := future.Error(); err != nil { - return fmt.Errorf("applying command failed: %w", err) - } - - return nil -} - -func (c *cluster) RemoveNode(id string) error { - if !c.IsRaftLeader() { - return fmt.Errorf("not leader") - } - - com := &command{ - Operation: opRemoveNode, - Data: &removeNodeCommand{ - ID: id, - }, - } - - b, err := json.Marshal(com) - if err != nil { - return err - } - - future := c.raft.Apply(b, 5*time.Second) - if err := future.Error(); err != nil { - return fmt.Errorf("applying command failed: %w", err) - } - - return nil -} - func (c *cluster) trackNodeChanges() { ticker := time.NewTicker(time.Second) defer ticker.Stop() @@ -694,9 +627,6 @@ func (c *cluster) trackNodeChanges() { for _, server := range future.Configuration().Servers { id := string(server.ID) - if id == c.id { - continue - } _, ok := c.nodes[id] if !ok { @@ -733,10 +663,14 @@ func (c *cluster) trackNodeChanges() { } for id := range removeNodes { - if node, ok := c.nodes[id]; ok { - node.Disconnect() - c.proxy.RemoveNode(id) + node, ok := c.nodes[id] + if !ok { + continue } + + node.Disconnect() + c.proxy.RemoveNode(id) + delete(c.nodes, id) } c.nodesLock.Unlock() diff --git a/cluster/node.go b/cluster/node.go index 130c504e..62ab5ed2 100644 --- a/cluster/node.go +++ b/cluster/node.go @@ -8,20 +8,20 @@ import ( "net/http" "net/url" "path/filepath" - "regexp" "strings" "sync" "time" "github.com/datarhei/core/v16/client" + httpapi "github.com/datarhei/core/v16/http/api" ) type Node interface { Connect() error Disconnect() - Start(updates chan<- NodeState) error - Stop() + StartFiles(updates chan<- NodeFiles) error + StopFiles() GetURL(path string) (string, error) GetFile(path string) (io.ReadCloser, error) @@ -39,16 +39,23 @@ type NodeReader interface { ID() string Address() string IPs() []string + Files() NodeFiles State() NodeState } -type NodeState struct { +type NodeFiles struct { ID string - State string Files []string - LastPing time.Time LastUpdate time.Time - Latency time.Duration +} + +type NodeState struct { + ID string + State string + LastContact time.Time + Latency time.Duration + CPU float64 + Mem float64 } type nodeState string @@ -68,13 +75,19 @@ type node struct { peer client.RestClient peerLock sync.RWMutex - lastPing time.Time cancelPing context.CancelFunc + lastContact time.Time + + resources struct { + cpu float64 + mem float64 + } + state nodeState latency float64 // Seconds stateLock sync.RWMutex - updates chan<- NodeState + updates chan<- NodeFiles filesList []string lastUpdate time.Time cancelFiles context.CancelFunc @@ -91,15 +104,12 @@ type node struct { srtAddress string srtPassphrase string srtToken string - - prefix *regexp.Regexp } func NewNode(address string) Node { n := &node{ address: address, state: stateDisconnected, - prefix: regexp.MustCompile(`^[a-z]+:`), secure: strings.HasPrefix(address, "https://"), } @@ -195,14 +205,14 @@ func (n *node) Connect() error { for { select { case <-ticker.C: - // ping + // Ping ok, latency := n.peer.Ping() n.stateLock.Lock() if !ok { n.state = stateDisconnected } else { - n.lastPing = time.Now() + n.lastContact = time.Now() n.state = stateConnected } n.latency = n.latency*0.2 + latency.Seconds()*0.8 @@ -213,6 +223,63 @@ func (n *node) Connect() error { } }(ctx) + go func(ctx context.Context) { + ticker := time.NewTicker(time.Second) + defer ticker.Stop() + + for { + select { + case <-ticker.C: + // Metrics + metrics, err := n.peer.Metrics(httpapi.MetricsQuery{ + Metrics: []httpapi.MetricsQueryMetric{ + { + Name: "cpu_idle", + }, + { + Name: "mem_total", + }, + { + Name: "mem_free", + }, + }, + }) + if err != nil { + n.stateLock.Lock() + n.resources.cpu = 100 + n.resources.mem = 100 + n.stateLock.Unlock() + } + + cpu_idle := .0 + mem_total := .0 + mem_free := .0 + + for _, x := range metrics.Metrics { + if x.Name == "cpu_idle" { + cpu_idle = x.Values[0].Value + } else if x.Name == "mem_total" { + mem_total = x.Values[0].Value + } else if x.Name == "mem_free" { + mem_free = x.Values[0].Value + } + } + + n.stateLock.Lock() + n.resources.cpu = 100 - cpu_idle + if mem_total != 0 { + n.resources.mem = (mem_total - mem_free) / mem_total * 100 + } else { + n.resources.mem = 100 + } + n.lastContact = time.Now() + n.stateLock.Unlock() + case <-ctx.Done(): + return + } + } + }(ctx) + return nil } @@ -228,7 +295,7 @@ func (n *node) Disconnect() { n.peer = nil } -func (n *node) Start(updates chan<- NodeState) error { +func (n *node) StartFiles(updates chan<- NodeFiles) error { n.runningLock.Lock() defer n.runningLock.Unlock() @@ -254,7 +321,7 @@ func (n *node) Start(updates chan<- NodeState) error { n.files() select { - case n.updates <- n.State(): + case n.updates <- n.Files(): default: } } @@ -264,7 +331,7 @@ func (n *node) Start(updates chan<- NodeState) error { return nil } -func (n *node) Stop() { +func (n *node) StopFiles() { n.runningLock.Lock() defer n.runningLock.Unlock() @@ -289,23 +356,34 @@ func (n *node) ID() string { return n.peer.ID() } +func (n *node) Files() NodeFiles { + n.stateLock.RLock() + defer n.stateLock.RUnlock() + + state := NodeFiles{ + ID: n.peer.ID(), + LastUpdate: n.lastUpdate, + } + + if n.state != stateDisconnected && time.Since(n.lastUpdate) <= 2*time.Second { + state.Files = make([]string, len(n.filesList)) + copy(state.Files, n.filesList) + } + + return state +} + func (n *node) State() NodeState { n.stateLock.RLock() defer n.stateLock.RUnlock() state := NodeState{ - ID: n.peer.ID(), - LastPing: n.lastPing, - LastUpdate: n.lastUpdate, - Latency: time.Duration(n.latency * float64(time.Second)), - } - - if n.state == stateDisconnected || time.Since(n.lastUpdate) > 2*time.Second { - state.State = stateDisconnected.String() - } else { - state.State = n.state.String() - state.Files = make([]string, len(n.filesList)) - copy(state.Files, n.filesList) + ID: n.peer.ID(), + LastContact: n.lastContact, + State: n.state.String(), + Latency: time.Duration(n.latency * float64(time.Second)), + CPU: n.resources.cpu, + Mem: n.resources.mem, } return state @@ -416,27 +494,29 @@ func (n *node) files() { n.filesList = make([]string, len(filesList)) copy(n.filesList, filesList) n.lastUpdate = time.Now() + n.lastContact = time.Now() n.stateLock.Unlock() } func (n *node) GetURL(path string) (string, error) { - // Remove prefix from path - prefix := n.prefix.FindString(path) - path = n.prefix.ReplaceAllString(path, "") + prefix, path, found := strings.Cut(path, ":") + if !found { + return "", fmt.Errorf("no prefix provided") + } u := "" - if prefix == "mem:" { + if prefix == "mem" { u = n.address + "/" + filepath.Join("memfs", path) - } else if prefix == "disk:" { + } else if prefix == "disk" { u = n.address + path - } else if prefix == "rtmp:" { + } else if prefix == "rtmp" { u = n.rtmpAddress + path if len(n.rtmpToken) != 0 { u += "?token=" + url.QueryEscape(n.rtmpToken) } - } else if prefix == "srt:" { + } else if prefix == "srt" { u = n.srtAddress + "?mode=caller" if len(n.srtPassphrase) != 0 { u += "&passphrase=" + url.QueryEscape(n.srtPassphrase) @@ -454,16 +534,17 @@ func (n *node) GetURL(path string) (string, error) { } func (n *node) GetFile(path string) (io.ReadCloser, error) { - // Remove prefix from path - prefix := n.prefix.FindString(path) - path = n.prefix.ReplaceAllString(path, "") + prefix, path, found := strings.Cut(path, ":") + if !found { + return nil, fmt.Errorf("no prefix provided") + } n.peerLock.RLock() defer n.peerLock.RUnlock() - if prefix == "mem:" { + if prefix == "mem" { return n.peer.MemFSGetFile(path) - } else if prefix == "disk:" { + } else if prefix == "disk" { return n.peer.DiskFSGetFile(path) } diff --git a/cluster/proxy.go b/cluster/proxy.go index 05c2041e..01b8e593 100644 --- a/cluster/proxy.go +++ b/cluster/proxy.go @@ -90,7 +90,7 @@ type proxy struct { limiter net.IPLimiter - updates chan NodeState + updates chan NodeFiles lock sync.RWMutex cancel context.CancelFunc @@ -109,7 +109,7 @@ func NewProxy(config ProxyConfig) (Proxy, error) { idupdate: map[string]time.Time{}, fileid: map[string]string{}, limiter: config.IPLimiter, - updates: make(chan NodeState, 64), + updates: make(chan NodeFiles, 64), logger: config.Logger, } @@ -144,31 +144,32 @@ func (p *proxy) Start() { select { case <-ctx.Done(): return - case state := <-p.updates: + case update := <-p.updates: p.logger.Debug().WithFields(log.Fields{ - "node": state.ID, - "state": state.State, - "files": len(state.Files), + "node": update.ID, + "files": len(update.Files), }).Log("Got update") + if p.id == update.ID { + continue + } + p.lock.Lock() // Cleanup - files := p.idfiles[state.ID] + files := p.idfiles[update.ID] for _, file := range files { delete(p.fileid, file) } - delete(p.idfiles, state.ID) - delete(p.idupdate, state.ID) + delete(p.idfiles, update.ID) + delete(p.idupdate, update.ID) - if state.State == "connected" { - // Add files - for _, file := range state.Files { - p.fileid[file] = state.ID - } - p.idfiles[state.ID] = files - p.idupdate[state.ID] = state.LastUpdate + // Add files + for _, file := range update.Files { + p.fileid[file] = update.ID } + p.idfiles[update.ID] = files + p.idupdate[update.ID] = update.LastUpdate p.lock.Unlock() } @@ -192,7 +193,7 @@ func (p *proxy) Stop() { p.cancel = nil for _, node := range p.nodes { - node.Stop() + node.StopFiles() } p.nodes = map[string]Node{} @@ -205,10 +206,6 @@ func (p *proxy) Reader() ProxyReader { } func (p *proxy) AddNode(id string, node Node) (string, error) { - if id == p.id { - return "", fmt.Errorf("can't add myself as node or a node with the same ID") - } - if id != node.ID() { return "", fmt.Errorf("the provided (%s) and retrieved (%s) ID's don't match", id, node.ID()) } @@ -217,7 +214,7 @@ func (p *proxy) AddNode(id string, node Node) (string, error) { defer p.lock.Unlock() if n, ok := p.nodes[id]; ok { - n.Stop() + n.StopFiles() delete(p.nodes, id) @@ -237,7 +234,7 @@ func (p *proxy) AddNode(id string, node Node) (string, error) { p.nodes[id] = node - node.Start(p.updates) + node.StartFiles(p.updates) p.logger.Info().WithFields(log.Fields{ "address": node.Address(), @@ -256,7 +253,7 @@ func (p *proxy) RemoveNode(id string) error { return ErrNodeNotFound } - node.Stop() + node.StopFiles() delete(p.nodes, id) diff --git a/cluster/store.go b/cluster/store.go index 17cb43ae..711f5945 100644 --- a/cluster/store.go +++ b/cluster/store.go @@ -14,9 +14,6 @@ import ( type Store interface { raft.FSM - ListNodes() []StoreNode - GetNode(id string) (StoreNode, error) - ListProcesses() []app.Config GetProcess(id string) (app.Config, error) } @@ -24,8 +21,6 @@ type Store interface { type operation string const ( - opAddNode operation = "addNode" - opRemoveNode operation = "removeNode" opAddProcess operation = "addProcess" opRemoveProcess operation = "removeProcess" ) @@ -35,15 +30,6 @@ type command struct { Data interface{} } -type addNodeCommand struct { - ID string - Address string -} - -type removeNodeCommand struct { - ID string -} - type StoreNode struct { ID string Address string @@ -60,13 +46,11 @@ type removeProcessCommand struct { // Implement a FSM type store struct { lock sync.RWMutex - Nodes map[string]string Process map[string]app.Config } func NewStore() (Store, error) { return &store{ - Nodes: map[string]string{}, Process: map[string]app.Config{}, }, nil } @@ -86,26 +70,6 @@ func (s *store) Apply(log *raft.Log) interface{} { fmt.Printf("op: %+v\n", c) switch c.Operation { - case opAddNode: - b, _ := json.Marshal(c.Data) - cmd := addNodeCommand{} - json.Unmarshal(b, &cmd) - - fmt.Printf("addNode: %+v\n", cmd) - - s.lock.Lock() - s.Nodes[cmd.ID] = cmd.Address - s.lock.Unlock() - case opRemoveNode: - b, _ := json.Marshal(c.Data) - cmd := removeNodeCommand{} - json.Unmarshal(b, &cmd) - - fmt.Printf("removeNode: %+v\n", cmd) - - s.lock.Lock() - delete(s.Nodes, cmd.ID) - s.lock.Unlock() case opAddProcess: b, _ := json.Marshal(c.Data) cmd := addProcessCommand{} @@ -162,37 +126,6 @@ func (s *store) Restore(snapshot io.ReadCloser) error { return nil } -func (s *store) ListNodes() []StoreNode { - nodes := []StoreNode{} - - s.lock.Lock() - defer s.lock.Unlock() - - for id, address := range s.Nodes { - nodes = append(nodes, StoreNode{ - ID: id, - Address: address, - }) - } - - return nodes -} - -func (s *store) GetNode(id string) (StoreNode, error) { - s.lock.Lock() - defer s.lock.Unlock() - - address, ok := s.Nodes[id] - if !ok { - return StoreNode{}, fmt.Errorf("not found") - } - - return StoreNode{ - ID: id, - Address: address, - }, nil -} - func (s *store) ListProcesses() []app.Config { s.lock.RLock() defer s.lock.RUnlock() diff --git a/http/api/cluster.go b/http/api/cluster.go index 2dedb103..513ce242 100644 --- a/http/api/cluster.go +++ b/http/api/cluster.go @@ -7,15 +7,19 @@ type ClusterNodeConfig struct { } type ClusterNode struct { - Address string `json:"address"` - ID string `json:"id"` - LastPing int64 `json:"last_ping"` - LastUpdate int64 `json:"last_update"` - Latency float64 `json:"latency_ms"` // milliseconds - State string `json:"state"` + Address string `json:"address"` + ID string `json:"id"` + LastContact int64 `json:"last_contact"` // unix timestamp + Latency float64 `json:"latency_ms"` // milliseconds + State string `json:"state"` + CPU float64 `json:"cpu_used"` // percent + Mem float64 `json:"mem_used"` // percent } -type ClusterNodeFiles map[string][]string +type ClusterNodeFiles struct { + LastUpdate int64 `json:"last_update"` // unix timestamp + Files map[string][]string `json:"files"` +} type ClusterServer struct { ID string `json:"id"` diff --git a/http/api/metrics.go b/http/api/metrics.go index f2476988..76acf92b 100644 --- a/http/api/metrics.go +++ b/http/api/metrics.go @@ -1,6 +1,7 @@ package api import ( + "encoding/json" "fmt" "time" @@ -50,6 +51,21 @@ func (v MetricsResponseValue) MarshalJSON() ([]byte, error) { return []byte(s), nil } +// MarshalJSON unmarshals a JSON to MetricsResponseValue +func (v *MetricsResponseValue) UnmarshalJSON(data []byte) error { + x := []float64{} + + err := json.Unmarshal(data, &x) + if err != nil { + return err + } + + v.TS = time.Unix(int64(x[0]), 0) + v.Value = x[1] + + return nil +} + type MetricsResponse struct { Timerange int64 `json:"timerange_sec" format:"int64"` Interval int64 `json:"interval_sec" format:"int64"` diff --git a/http/handler/api/cluster.go b/http/handler/api/cluster.go index 5b9beb1f..506fe167 100644 --- a/http/handler/api/cluster.go +++ b/http/handler/api/cluster.go @@ -2,7 +2,6 @@ package api import ( "net/http" - "regexp" "sort" "strings" @@ -17,7 +16,6 @@ import ( type ClusterHandler struct { cluster cluster.Cluster proxy cluster.ProxyReader - prefix *regexp.Regexp } // NewCluster return a new ClusterHandler type. You have to provide a cluster. @@ -25,7 +23,6 @@ func NewCluster(cluster cluster.Cluster) *ClusterHandler { return &ClusterHandler{ cluster: cluster, proxy: cluster.ProxyReader(), - prefix: regexp.MustCompile(`^[a-z]+:`), } } @@ -47,12 +44,13 @@ func (h *ClusterHandler) GetProxyNodes(c echo.Context) error { for _, node := range nodes { state := node.State() n := api.ClusterNode{ - Address: node.Address(), - ID: state.ID, - LastPing: state.LastPing.Unix(), - LastUpdate: state.LastUpdate.Unix(), - Latency: state.Latency.Seconds() * 1000, - State: state.State, + Address: node.Address(), + ID: state.ID, + LastContact: state.LastContact.Unix(), + Latency: state.Latency.Seconds() * 1000, + State: state.State, + CPU: state.CPU, + Mem: state.Mem, } list = append(list, n) @@ -83,10 +81,13 @@ func (h *ClusterHandler) GetProxyNode(c echo.Context) error { state := peer.State() node := api.ClusterNode{ - Address: peer.Address(), - ID: state.ID, - LastUpdate: state.LastUpdate.Unix(), - State: state.State, + Address: peer.Address(), + ID: state.ID, + LastContact: state.LastContact.Unix(), + Latency: state.Latency.Seconds() * 1000, + State: state.State, + CPU: state.CPU, + Mem: state.Mem, } return c.JSON(http.StatusOK, node) @@ -111,17 +112,23 @@ func (h *ClusterHandler) GetProxyNodeFiles(c echo.Context) error { return api.Err(http.StatusNotFound, "Node not found", "%s", err) } - files := api.ClusterNodeFiles{} + files := api.ClusterNodeFiles{ + Files: make(map[string][]string), + } - state := peer.State() + peerFiles := peer.Files() - sort.Strings(state.Files) + files.LastUpdate = peerFiles.LastUpdate.Unix() - for _, path := range state.Files { - prefix := strings.TrimSuffix(h.prefix.FindString(path), ":") - path = h.prefix.ReplaceAllString(path, "") + sort.Strings(peerFiles.Files) - files[prefix] = append(files[prefix], path) + for _, path := range peerFiles.Files { + prefix, path, found := strings.Cut(path, ":") + if !found { + continue + } + + files.Files[prefix] = append(files.Files[prefix], path) } return c.JSON(http.StatusOK, files)