mirror of
https://github.com/datarhei/core.git
synced 2025-09-26 20:11:29 +08:00
673 lines
12 KiB
Go
673 lines
12 KiB
Go
package node
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"math/rand/v2"
|
|
"net/url"
|
|
"sort"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/datarhei/core/v16/http/api"
|
|
"github.com/datarhei/core/v16/http/client"
|
|
"github.com/datarhei/core/v16/log"
|
|
"github.com/datarhei/core/v16/restream/app"
|
|
)
|
|
|
|
type ProcessListOptions = client.ProcessListOptions
|
|
|
|
type ManagerConfig struct {
|
|
ID string // ID of the node
|
|
|
|
Logger log.Logger
|
|
}
|
|
|
|
type Manager struct {
|
|
id string
|
|
|
|
nodes map[string]*Node // List of known nodes
|
|
lock sync.RWMutex
|
|
|
|
cache *Cache[string]
|
|
|
|
logger log.Logger
|
|
}
|
|
|
|
var ErrNodeNotFound = errors.New("node not found")
|
|
|
|
func NewManager(config ManagerConfig) (*Manager, error) {
|
|
p := &Manager{
|
|
id: config.ID,
|
|
nodes: map[string]*Node{},
|
|
cache: NewCache[string](nil),
|
|
logger: config.Logger,
|
|
}
|
|
|
|
if p.logger == nil {
|
|
p.logger = log.New("")
|
|
}
|
|
|
|
return p, nil
|
|
}
|
|
|
|
func (p *Manager) NodeAdd(id string, node *Node) (string, error) {
|
|
about := node.About()
|
|
|
|
p.lock.Lock()
|
|
defer p.lock.Unlock()
|
|
|
|
if n, ok := p.nodes[id]; ok {
|
|
n.Stop()
|
|
delete(p.nodes, id)
|
|
}
|
|
|
|
p.nodes[id] = node
|
|
|
|
p.logger.Info().WithFields(log.Fields{
|
|
"address": about.Address,
|
|
"name": about.Name,
|
|
"id": id,
|
|
}).Log("Added node")
|
|
|
|
return id, nil
|
|
}
|
|
|
|
func (p *Manager) NodeRemove(id string) (*Node, error) {
|
|
p.lock.Lock()
|
|
defer p.lock.Unlock()
|
|
|
|
node, ok := p.nodes[id]
|
|
if !ok {
|
|
return nil, ErrNodeNotFound
|
|
}
|
|
|
|
node.Stop()
|
|
|
|
delete(p.nodes, id)
|
|
|
|
p.logger.Info().WithFields(log.Fields{
|
|
"id": id,
|
|
}).Log("Removed node")
|
|
|
|
return node, nil
|
|
}
|
|
|
|
func (p *Manager) NodesClear() {
|
|
p.lock.Lock()
|
|
defer p.lock.Unlock()
|
|
|
|
for _, node := range p.nodes {
|
|
node.Stop()
|
|
}
|
|
|
|
p.nodes = map[string]*Node{}
|
|
|
|
p.logger.Info().Log("Removed all nodes")
|
|
}
|
|
|
|
func (p *Manager) NodeHasNode(id string) bool {
|
|
p.lock.RLock()
|
|
defer p.lock.RUnlock()
|
|
|
|
_, hasNode := p.nodes[id]
|
|
|
|
return hasNode
|
|
}
|
|
|
|
func (p *Manager) NodeIDs() []string {
|
|
list := []string{}
|
|
|
|
p.lock.RLock()
|
|
defer p.lock.RUnlock()
|
|
|
|
for id := range p.nodes {
|
|
list = append(list, id)
|
|
}
|
|
|
|
return list
|
|
}
|
|
|
|
func (p *Manager) NodeCount() int {
|
|
p.lock.RLock()
|
|
defer p.lock.RUnlock()
|
|
|
|
return len(p.nodes)
|
|
}
|
|
|
|
func (p *Manager) NodeList() []*Node {
|
|
list := []*Node{}
|
|
|
|
p.lock.RLock()
|
|
defer p.lock.RUnlock()
|
|
|
|
for _, node := range p.nodes {
|
|
list = append(list, node)
|
|
}
|
|
|
|
return list
|
|
}
|
|
|
|
func (p *Manager) NodeGet(id string) (*Node, error) {
|
|
p.lock.RLock()
|
|
defer p.lock.RUnlock()
|
|
|
|
node, ok := p.nodes[id]
|
|
if !ok {
|
|
return nil, fmt.Errorf("%s: %w", id, ErrNodeNotFound)
|
|
}
|
|
|
|
return node, nil
|
|
}
|
|
|
|
func (p *Manager) NodeCheckCompatibility(skipSkillsCheck bool) {
|
|
p.lock.RLock()
|
|
defer p.lock.RUnlock()
|
|
|
|
local, hasLocal := p.nodes[p.id]
|
|
if !hasLocal {
|
|
local = nil
|
|
}
|
|
|
|
for id, node := range p.nodes {
|
|
if id == p.id {
|
|
continue
|
|
}
|
|
|
|
node.CheckCompatibility(local, skipSkillsCheck)
|
|
}
|
|
}
|
|
|
|
func (p *Manager) Barrier(name string) (bool, error) {
|
|
p.lock.RLock()
|
|
defer p.lock.RUnlock()
|
|
|
|
for _, node := range p.nodes {
|
|
ok, err := node.Barrier(name)
|
|
if !ok {
|
|
return false, err
|
|
}
|
|
}
|
|
|
|
return true, nil
|
|
}
|
|
|
|
// getClusterHostnames return a list of all hostnames configured on all nodes. The
|
|
// returned list will not contain any duplicates.
|
|
func (p *Manager) GetHostnames(common bool) ([]string, error) {
|
|
hostnames := map[string]int{}
|
|
|
|
p.lock.RLock()
|
|
defer p.lock.RUnlock()
|
|
|
|
for id, node := range p.nodes {
|
|
config, err := node.CoreConfig(true)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("node %s has no configuration available: %w", id, err)
|
|
}
|
|
|
|
for _, name := range config.Host.Name {
|
|
hostnames[name]++
|
|
}
|
|
}
|
|
|
|
names := []string{}
|
|
|
|
for key, value := range hostnames {
|
|
if common && value != len(p.nodes) {
|
|
continue
|
|
}
|
|
|
|
names = append(names, key)
|
|
}
|
|
|
|
sort.Strings(names)
|
|
|
|
return names, nil
|
|
}
|
|
|
|
func (p *Manager) MediaGetURL(prefix, path string) (*url.URL, error) {
|
|
logger := p.logger.WithFields(log.Fields{
|
|
"path": path,
|
|
"prefix": prefix,
|
|
})
|
|
|
|
node, err := p.getNodeForMedia(prefix, path)
|
|
if err != nil {
|
|
logger.Debug().WithError(err).Log("Unknown node")
|
|
return nil, fmt.Errorf("file not found: %w", err)
|
|
}
|
|
|
|
url, err := node.Core().MediaGetURL(prefix, path)
|
|
if err != nil {
|
|
logger.Debug().Log("Invalid path")
|
|
return nil, fmt.Errorf("file not found")
|
|
}
|
|
|
|
logger.Debug().WithField("url", url).Log("File cluster url")
|
|
|
|
return url, nil
|
|
}
|
|
|
|
func (p *Manager) FilesystemGetFile(prefix, path string, offset int64) (io.ReadCloser, error) {
|
|
logger := p.logger.WithFields(log.Fields{
|
|
"path": path,
|
|
"prefix": prefix,
|
|
})
|
|
|
|
node, err := p.getNodeForMedia(prefix, path)
|
|
if err != nil {
|
|
logger.Debug().WithError(err).Log("File not available")
|
|
return nil, fmt.Errorf("file not found")
|
|
}
|
|
|
|
data, err := node.Core().FilesystemGetFile(prefix, path, offset)
|
|
if err != nil {
|
|
logger.Debug().Log("Invalid path")
|
|
return nil, fmt.Errorf("file not found")
|
|
}
|
|
|
|
logger.Debug().Log("File cluster path")
|
|
|
|
return data, nil
|
|
}
|
|
|
|
func (p *Manager) FilesystemGetFileInfo(prefix, path string) (int64, time.Time, error) {
|
|
logger := p.logger.WithFields(log.Fields{
|
|
"path": path,
|
|
"prefix": prefix,
|
|
})
|
|
|
|
node, err := p.getNodeForMedia(prefix, path)
|
|
if err != nil {
|
|
logger.Debug().WithError(err).Log("File not available")
|
|
return 0, time.Time{}, fmt.Errorf("file not found")
|
|
}
|
|
|
|
size, lastModified, err := node.Core().FilesystemGetFileInfo(prefix, path)
|
|
if err != nil {
|
|
logger.Debug().Log("Invalid path")
|
|
return 0, time.Time{}, fmt.Errorf("file not found")
|
|
}
|
|
|
|
logger.Debug().Log("File cluster path")
|
|
|
|
return size, lastModified, nil
|
|
}
|
|
|
|
func (p *Manager) getNodeIDForMedia(prefix, path string) (string, error) {
|
|
// this is only for mem and disk prefixes
|
|
nodesChan := make(chan string, 16)
|
|
nodeids := []string{}
|
|
|
|
wgList := sync.WaitGroup{}
|
|
wgList.Add(1)
|
|
|
|
go func() {
|
|
defer wgList.Done()
|
|
|
|
for nodeid := range nodesChan {
|
|
if len(nodeid) == 0 {
|
|
continue
|
|
}
|
|
|
|
nodeids = append(nodeids, nodeid)
|
|
}
|
|
}()
|
|
|
|
wg := sync.WaitGroup{}
|
|
|
|
p.lock.RLock()
|
|
for id, n := range p.nodes {
|
|
wg.Add(1)
|
|
|
|
go func(nodeid string, node *Node, p chan<- string) {
|
|
defer wg.Done()
|
|
|
|
_, _, err := node.Core().MediaGetInfo(prefix, path)
|
|
if err != nil {
|
|
nodeid = ""
|
|
}
|
|
|
|
p <- nodeid
|
|
}(id, n, nodesChan)
|
|
}
|
|
p.lock.RUnlock()
|
|
|
|
wg.Wait()
|
|
|
|
close(nodesChan)
|
|
|
|
wgList.Wait()
|
|
|
|
if len(nodeids) == 0 {
|
|
return "", fmt.Errorf("file not found")
|
|
}
|
|
|
|
return nodeids[0], nil
|
|
}
|
|
|
|
func (p *Manager) getNodeForMedia(prefix, path string) (*Node, error) {
|
|
id, err := p.cache.Get(prefix + ":" + path)
|
|
if err == nil {
|
|
node, err := p.NodeGet(id)
|
|
if err == nil {
|
|
return node, nil
|
|
}
|
|
}
|
|
|
|
id, err = p.getNodeIDForMedia(prefix, path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
p.cache.Put(prefix+":"+path, id, 5*time.Second)
|
|
|
|
return p.NodeGet(id)
|
|
}
|
|
|
|
func (p *Manager) FilesystemList(storage, pattern string) []api.FileInfo {
|
|
filesChan := make(chan []api.FileInfo, 64)
|
|
filesList := []api.FileInfo{}
|
|
|
|
wgList := sync.WaitGroup{}
|
|
wgList.Add(1)
|
|
|
|
go func() {
|
|
defer wgList.Done()
|
|
|
|
for list := range filesChan {
|
|
filesList = append(filesList, list...)
|
|
}
|
|
}()
|
|
|
|
wg := sync.WaitGroup{}
|
|
|
|
p.lock.RLock()
|
|
for _, n := range p.nodes {
|
|
wg.Add(1)
|
|
|
|
go func(node *Node, p chan<- []api.FileInfo) {
|
|
defer wg.Done()
|
|
|
|
files, err := node.Core().FilesystemList(storage, pattern)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
p <- files
|
|
}(n, filesChan)
|
|
}
|
|
p.lock.RUnlock()
|
|
|
|
wg.Wait()
|
|
|
|
close(filesChan)
|
|
|
|
wgList.Wait()
|
|
|
|
return filesList
|
|
}
|
|
|
|
func (p *Manager) ClusterProcessList() ([]Process, error) {
|
|
processChan := make(chan []Process, 64)
|
|
processList := []Process{}
|
|
errorChan := make(chan error, 8)
|
|
errorList := []error{}
|
|
|
|
wgList := sync.WaitGroup{}
|
|
wgList.Add(2)
|
|
|
|
go func() {
|
|
defer wgList.Done()
|
|
|
|
for list := range processChan {
|
|
processList = append(processList, list...)
|
|
}
|
|
}()
|
|
|
|
go func() {
|
|
defer wgList.Done()
|
|
|
|
for err := range errorChan {
|
|
errorList = append(errorList, err)
|
|
}
|
|
}()
|
|
|
|
wg := sync.WaitGroup{}
|
|
|
|
p.lock.RLock()
|
|
for _, n := range p.nodes {
|
|
wg.Add(1)
|
|
|
|
go func(node *Node, p chan<- []Process, e chan<- error) {
|
|
defer wg.Done()
|
|
|
|
processes, err := node.Core().ClusterProcessList()
|
|
if err != nil {
|
|
e <- err
|
|
return
|
|
}
|
|
|
|
p <- processes
|
|
}(n, processChan, errorChan)
|
|
}
|
|
p.lock.RUnlock()
|
|
|
|
wg.Wait()
|
|
|
|
close(processChan)
|
|
close(errorChan)
|
|
|
|
wgList.Wait()
|
|
|
|
if len(errorList) != 0 {
|
|
return nil, fmt.Errorf("not all nodes responded wit their process list")
|
|
}
|
|
|
|
return processList, nil
|
|
}
|
|
|
|
func (p *Manager) FindNodeForResources(nodeid string, cpu float64, memory uint64) string {
|
|
p.lock.RLock()
|
|
defer p.lock.RUnlock()
|
|
|
|
if len(nodeid) != 0 {
|
|
node, ok := p.nodes[nodeid]
|
|
if ok {
|
|
r := node.About().Resources
|
|
if r.Error == nil && r.CPU+cpu <= r.CPULimit && r.Mem+memory <= r.MemLimit && !r.IsThrottling {
|
|
return nodeid
|
|
}
|
|
}
|
|
}
|
|
|
|
for nodeid, node := range p.nodes {
|
|
r := node.About().Resources
|
|
if r.Error == nil && r.CPU+cpu <= r.CPULimit && r.Mem+memory <= r.MemLimit && !r.IsThrottling {
|
|
return nodeid
|
|
}
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
func (p *Manager) GetRandomNode() string {
|
|
p.lock.RLock()
|
|
defer p.lock.RUnlock()
|
|
|
|
nodes := []string{}
|
|
for nodeid := range p.nodes {
|
|
nodes = append(nodes, nodeid)
|
|
}
|
|
|
|
return nodes[rand.IntN(len(p.nodes))]
|
|
}
|
|
|
|
func (p *Manager) ProcessList(options client.ProcessListOptions) []api.Process {
|
|
processChan := make(chan []api.Process, 64)
|
|
processList := []api.Process{}
|
|
|
|
wgList := sync.WaitGroup{}
|
|
wgList.Add(1)
|
|
|
|
go func() {
|
|
defer wgList.Done()
|
|
|
|
for list := range processChan {
|
|
processList = append(processList, list...)
|
|
}
|
|
}()
|
|
|
|
wg := sync.WaitGroup{}
|
|
|
|
p.lock.RLock()
|
|
for _, n := range p.nodes {
|
|
wg.Add(1)
|
|
|
|
go func(node *Node, p chan<- []api.Process) {
|
|
defer wg.Done()
|
|
|
|
processes, err := node.Core().ProcessList(options)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
p <- processes
|
|
}(n, processChan)
|
|
}
|
|
p.lock.RUnlock()
|
|
|
|
wg.Wait()
|
|
|
|
close(processChan)
|
|
|
|
wgList.Wait()
|
|
|
|
return processList
|
|
}
|
|
|
|
func (p *Manager) ProcessGet(nodeid string, id app.ProcessID, filter []string) (api.Process, error) {
|
|
node, err := p.NodeGet(nodeid)
|
|
if err != nil {
|
|
return api.Process{}, err
|
|
}
|
|
|
|
list, err := node.Core().ProcessList(client.ProcessListOptions{
|
|
ID: []string{id.ID},
|
|
Filter: filter,
|
|
Domain: id.Domain,
|
|
})
|
|
if err != nil {
|
|
return api.Process{}, err
|
|
}
|
|
|
|
if len(list) == 0 {
|
|
return api.Process{}, fmt.Errorf("process not found")
|
|
}
|
|
|
|
return list[0], nil
|
|
}
|
|
|
|
func (p *Manager) ProcessAdd(nodeid string, config *app.Config, metadata map[string]interface{}) error {
|
|
node, err := p.NodeGet(nodeid)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return node.Core().ProcessAdd(config, metadata)
|
|
}
|
|
|
|
func (p *Manager) ProcessDelete(nodeid string, id app.ProcessID) error {
|
|
node, err := p.NodeGet(nodeid)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return node.Core().ProcessDelete(id)
|
|
}
|
|
|
|
func (p *Manager) ProcessUpdate(nodeid string, id app.ProcessID, config *app.Config, metadata map[string]interface{}) error {
|
|
node, err := p.NodeGet(nodeid)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return node.Core().ProcessUpdate(id, config, metadata)
|
|
}
|
|
|
|
func (p *Manager) ProcessReportSet(nodeid string, id app.ProcessID, report *app.Report) error {
|
|
node, err := p.NodeGet(nodeid)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return node.Core().ProcessReportSet(id, report)
|
|
}
|
|
|
|
func (p *Manager) ProcessCommand(nodeid string, id app.ProcessID, command string) error {
|
|
node, err := p.NodeGet(nodeid)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return node.Core().ProcessCommand(id, command)
|
|
}
|
|
|
|
func (p *Manager) ProcessProbe(nodeid string, id app.ProcessID) (api.Probe, error) {
|
|
node, err := p.NodeGet(nodeid)
|
|
if err != nil {
|
|
probe := api.Probe{
|
|
Log: []string{fmt.Sprintf("the node %s where the process %s should reside on, doesn't exist", nodeid, id.String())},
|
|
}
|
|
return probe, err
|
|
}
|
|
|
|
return node.Core().ProcessProbe(id)
|
|
}
|
|
|
|
func (p *Manager) ProcessProbeConfig(nodeid string, config *app.Config) (api.Probe, error) {
|
|
node, err := p.NodeGet(nodeid)
|
|
if err != nil {
|
|
probe := api.Probe{
|
|
Log: []string{fmt.Sprintf("the node %s where the process config should be probed on, doesn't exist", nodeid)},
|
|
}
|
|
return probe, err
|
|
}
|
|
|
|
return node.Core().ProcessProbeConfig(config)
|
|
}
|
|
|
|
func (p *Manager) ProcessValidateConfig(nodeid string, config *app.Config) error {
|
|
node, err := p.NodeGet(nodeid)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return node.Core().ProcessValidateConfig(config)
|
|
}
|
|
|
|
func (p *Manager) Events(ctx context.Context, filters api.EventFilters) (<-chan api.Event, error) {
|
|
eventChan := make(chan api.Event, 128)
|
|
|
|
p.lock.RLock()
|
|
for _, n := range p.nodes {
|
|
go func(node *Node, e chan<- api.Event) {
|
|
eventChan, err := node.Core().Events(ctx, filters)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
for event := range eventChan {
|
|
event.CoreID = node.id
|
|
e <- event
|
|
}
|
|
}(n, eventChan)
|
|
}
|
|
p.lock.RUnlock()
|
|
|
|
return eventChan, nil
|
|
}
|