mirror of
https://github.com/datarhei/core.git
synced 2025-10-05 16:07:07 +08:00
Add EmergencyLeaderTimeout parameter, ignore throttling nodes, implement NodeRecoverTimeout, introduce processNodeMap in cluster DB
This commit is contained in:
@@ -480,6 +480,7 @@ func (a *api) start() error {
|
|||||||
Peers: peers,
|
Peers: peers,
|
||||||
SyncInterval: time.Duration(cfg.Cluster.SyncInterval) * time.Second,
|
SyncInterval: time.Duration(cfg.Cluster.SyncInterval) * time.Second,
|
||||||
NodeRecoverTimeout: time.Duration(cfg.Cluster.NodeRecoverTimeout) * time.Second,
|
NodeRecoverTimeout: time.Duration(cfg.Cluster.NodeRecoverTimeout) * time.Second,
|
||||||
|
EmergencyLeaderTimeout: time.Duration(cfg.Cluster.EmergencyLeaderTimeout) * time.Second,
|
||||||
CoreAPIAddress: scheme + gonet.JoinHostPort(host, port),
|
CoreAPIAddress: scheme + gonet.JoinHostPort(host, port),
|
||||||
CoreAPIUsername: cfg.API.Auth.Username,
|
CoreAPIUsername: cfg.API.Auth.Username,
|
||||||
CoreAPIPassword: cfg.API.Auth.Password,
|
CoreAPIPassword: cfg.API.Auth.Password,
|
||||||
|
@@ -82,6 +82,7 @@ type ClusterConfig struct {
|
|||||||
|
|
||||||
SyncInterval time.Duration // Interval between aligning the process in the cluster DB with the processes on the nodes
|
SyncInterval time.Duration // Interval between aligning the process in the cluster DB with the processes on the nodes
|
||||||
NodeRecoverTimeout time.Duration // Timeout for a node to recover before rebalancing the processes
|
NodeRecoverTimeout time.Duration // Timeout for a node to recover before rebalancing the processes
|
||||||
|
EmergencyLeaderTimeout time.Duration // Timeout for establishing the emergency leadership after lost contact to raft leader
|
||||||
|
|
||||||
CoreAPIAddress string // Address of the core API
|
CoreAPIAddress string // Address of the core API
|
||||||
CoreAPIUsername string // Username for the core API
|
CoreAPIUsername string // Username for the core API
|
||||||
@@ -115,6 +116,7 @@ type cluster struct {
|
|||||||
|
|
||||||
syncInterval time.Duration
|
syncInterval time.Duration
|
||||||
nodeRecoverTimeout time.Duration
|
nodeRecoverTimeout time.Duration
|
||||||
|
emergencyLeaderTimeout time.Duration
|
||||||
|
|
||||||
forwarder forwarder.Forwarder
|
forwarder forwarder.Forwarder
|
||||||
api API
|
api API
|
||||||
@@ -145,6 +147,7 @@ func New(config ClusterConfig) (Cluster, error) {
|
|||||||
|
|
||||||
syncInterval: config.SyncInterval,
|
syncInterval: config.SyncInterval,
|
||||||
nodeRecoverTimeout: config.NodeRecoverTimeout,
|
nodeRecoverTimeout: config.NodeRecoverTimeout,
|
||||||
|
emergencyLeaderTimeout: config.EmergencyLeaderTimeout,
|
||||||
|
|
||||||
nodes: map[string]proxy.Node{},
|
nodes: map[string]proxy.Node{},
|
||||||
}
|
}
|
||||||
@@ -971,15 +974,15 @@ func (c *cluster) sentinel() {
|
|||||||
|
|
||||||
c.logger.Debug().WithFields(log.Fields{
|
c.logger.Debug().WithFields(log.Fields{
|
||||||
"state": stats.State,
|
"state": stats.State,
|
||||||
"last_contact": stats.LastContact.String(),
|
"last_contact": stats.LastContact,
|
||||||
"num_peers": stats.NumPeers,
|
"num_peers": stats.NumPeers,
|
||||||
}).Log("Stats")
|
}).Log("Stats")
|
||||||
|
|
||||||
if stats.LastContact > 10*time.Second && !isEmergencyLeader {
|
if stats.LastContact > c.emergencyLeaderTimeout && !isEmergencyLeader {
|
||||||
c.logger.Warn().Log("Force leadership due to lost contact to leader")
|
c.logger.Warn().Log("Force leadership due to lost contact to leader")
|
||||||
c.raftEmergencyNotifyCh <- true
|
c.raftEmergencyNotifyCh <- true
|
||||||
isEmergencyLeader = true
|
isEmergencyLeader = true
|
||||||
} else if stats.LastContact <= 10*time.Second && isEmergencyLeader {
|
} else if stats.LastContact <= c.emergencyLeaderTimeout && isEmergencyLeader {
|
||||||
c.logger.Warn().Log("Stop forced leadership due to contact to leader")
|
c.logger.Warn().Log("Stop forced leadership due to contact to leader")
|
||||||
c.raftEmergencyNotifyCh <- false
|
c.raftEmergencyNotifyCh <- false
|
||||||
isEmergencyLeader = false
|
isEmergencyLeader = false
|
||||||
|
@@ -241,7 +241,7 @@ RECONCILE:
|
|||||||
|
|
||||||
// Check if we need to handle initial leadership actions
|
// Check if we need to handle initial leadership actions
|
||||||
if !establishedLeader {
|
if !establishedLeader {
|
||||||
if err := c.establishLeadership(context.TODO()); err != nil {
|
if err := c.establishLeadership(context.TODO(), emergency); err != nil {
|
||||||
c.logger.Error().WithError(err).Log("Establish leadership")
|
c.logger.Error().WithError(err).Log("Establish leadership")
|
||||||
// Immediately revoke leadership since we didn't successfully
|
// Immediately revoke leadership since we didn't successfully
|
||||||
// establish leadership.
|
// establish leadership.
|
||||||
@@ -286,15 +286,13 @@ WAIT:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *cluster) establishLeadership(ctx context.Context) error {
|
func (c *cluster) establishLeadership(ctx context.Context, emergency bool) error {
|
||||||
c.logger.Debug().Log("Establishing leadership")
|
c.logger.Debug().WithField("emergency", emergency).Log("Establishing leadership")
|
||||||
|
|
||||||
// creating a map of which process runs where
|
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(ctx)
|
ctx, cancel := context.WithCancel(ctx)
|
||||||
c.cancelLeaderShip = cancel
|
c.cancelLeaderShip = cancel
|
||||||
|
|
||||||
go c.startRebalance(ctx, c.syncInterval)
|
go c.startSynchronizeAndRebalance(ctx, c.syncInterval, emergency)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -305,7 +303,26 @@ func (c *cluster) revokeLeadership() {
|
|||||||
c.cancelLeaderShip()
|
c.cancelLeaderShip()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *cluster) startRebalance(ctx context.Context, interval time.Duration) {
|
// startSynchronizeAndRebalance synchronizes and rebalances the processes in a given interval. Synchronizing
|
||||||
|
// takes care that all processes in the cluster DB are running on one node. It writes the process->node mapping
|
||||||
|
// to the cluster DB such that when a new leader gets elected it knows where which process should be running.
|
||||||
|
// This is checked against the actual state. If a node is not reachable, the leader still knows which processes
|
||||||
|
// should be running on that node. For a certain duration (nodeRecoverTimeout) this is tolerated in case the
|
||||||
|
// node comes back. If not, the processes will be distributed to the remaining nodes. The actual observed state
|
||||||
|
// is stored back into the cluster DB.
|
||||||
|
//
|
||||||
|
// It follows the rebalancing which takes care that processes are taken from overloaded nodes. In each iteration
|
||||||
|
// only one process is taken away from a node. If a node is not reachable, its processes will be not part of the
|
||||||
|
// rebalancing and no attempt will be made to move processes from and to that node.
|
||||||
|
//
|
||||||
|
// All this happens if there's a leader. If there's no leader election possible, the node goes into the
|
||||||
|
// emergency leadership mode after a certain duration (emergencyLeaderTimeout). The synchronization phase will
|
||||||
|
// happen based on the last known list of processes from the cluster DB. Until nodeRecoverTimeout is reached,
|
||||||
|
// process that would run on unreachable nodes will not be moved to the node. Rebalancing will be disabled.
|
||||||
|
//
|
||||||
|
// The goal of synchronizing and rebalancing is to make as little moves as possible and to be tolerant for
|
||||||
|
// a while if a node is not reachable.
|
||||||
|
func (c *cluster) startSynchronizeAndRebalance(ctx context.Context, interval time.Duration, emergency bool) {
|
||||||
ticker := time.NewTicker(interval)
|
ticker := time.NewTicker(interval)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
|
|
||||||
@@ -314,8 +331,11 @@ func (c *cluster) startRebalance(ctx context.Context, interval time.Duration) {
|
|||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return
|
return
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
c.doSynchronize()
|
c.doSynchronize(emergency)
|
||||||
c.doRebalance()
|
|
||||||
|
if !emergency {
|
||||||
|
c.doRebalance(emergency)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -377,6 +397,7 @@ func (c *cluster) applyOpStack(stack []interface{}) {
|
|||||||
}).Log("Adding process")
|
}).Log("Adding process")
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
err = c.proxy.ProcessStart(v.nodeid, v.config.ProcessID())
|
err = c.proxy.ProcessStart(v.nodeid, v.config.ProcessID())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.logger.Info().WithError(err).WithFields(log.Fields{
|
c.logger.Info().WithError(err).WithFields(log.Fields{
|
||||||
@@ -398,6 +419,7 @@ func (c *cluster) applyOpStack(stack []interface{}) {
|
|||||||
}).Log("Updating process")
|
}).Log("Updating process")
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
c.logger.Info().WithFields(log.Fields{
|
c.logger.Info().WithFields(log.Fields{
|
||||||
"processid": v.config.ID,
|
"processid": v.config.ID,
|
||||||
"nodeid": v.nodeid,
|
"nodeid": v.nodeid,
|
||||||
@@ -411,6 +433,7 @@ func (c *cluster) applyOpStack(stack []interface{}) {
|
|||||||
}).Log("Removing process")
|
}).Log("Removing process")
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
c.logger.Info().WithFields(log.Fields{
|
c.logger.Info().WithFields(log.Fields{
|
||||||
"processid": v.processid,
|
"processid": v.processid,
|
||||||
"nodeid": v.nodeid,
|
"nodeid": v.nodeid,
|
||||||
@@ -425,6 +448,7 @@ func (c *cluster) applyOpStack(stack []interface{}) {
|
|||||||
}).Log("Moving process, adding process")
|
}).Log("Moving process, adding process")
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
err = c.proxy.ProcessDelete(v.fromNodeid, v.config.ProcessID())
|
err = c.proxy.ProcessDelete(v.fromNodeid, v.config.ProcessID())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.logger.Info().WithError(err).WithFields(log.Fields{
|
c.logger.Info().WithError(err).WithFields(log.Fields{
|
||||||
@@ -434,6 +458,7 @@ func (c *cluster) applyOpStack(stack []interface{}) {
|
|||||||
}).Log("Moving process, removing process")
|
}).Log("Moving process, removing process")
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
err = c.proxy.ProcessStart(v.toNodeid, v.config.ProcessID())
|
err = c.proxy.ProcessStart(v.toNodeid, v.config.ProcessID())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.logger.Info().WithError(err).WithFields(log.Fields{
|
c.logger.Info().WithError(err).WithFields(log.Fields{
|
||||||
@@ -443,6 +468,7 @@ func (c *cluster) applyOpStack(stack []interface{}) {
|
|||||||
}).Log("Moving process, starting process")
|
}).Log("Moving process, starting process")
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
c.logger.Info().WithFields(log.Fields{
|
c.logger.Info().WithFields(log.Fields{
|
||||||
"processid": v.config.ID,
|
"processid": v.config.ID,
|
||||||
"fromnodeid": v.fromNodeid,
|
"fromnodeid": v.fromNodeid,
|
||||||
@@ -457,6 +483,7 @@ func (c *cluster) applyOpStack(stack []interface{}) {
|
|||||||
}).Log("Starting process")
|
}).Log("Starting process")
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
c.logger.Info().WithFields(log.Fields{
|
c.logger.Info().WithFields(log.Fields{
|
||||||
"processid": v.processid,
|
"processid": v.processid,
|
||||||
"nodeid": v.nodeid,
|
"nodeid": v.nodeid,
|
||||||
@@ -474,39 +501,73 @@ func (c *cluster) applyOpStack(stack []interface{}) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *cluster) doSynchronize() {
|
func (c *cluster) doSynchronize(emergency bool) {
|
||||||
|
wish := c.store.GetProcessNodeMap()
|
||||||
want := c.store.ProcessList()
|
want := c.store.ProcessList()
|
||||||
have := c.proxy.ListProcesses()
|
have := c.proxy.ListProcesses()
|
||||||
resources := c.proxy.Resources()
|
nodes := c.proxy.ListNodes()
|
||||||
|
|
||||||
|
nodesMap := map[string]proxy.NodeAbout{}
|
||||||
|
|
||||||
|
for _, node := range nodes {
|
||||||
|
about := node.About()
|
||||||
|
nodesMap[about.ID] = about
|
||||||
|
}
|
||||||
|
|
||||||
c.logger.Debug().WithFields(log.Fields{
|
c.logger.Debug().WithFields(log.Fields{
|
||||||
"want": want,
|
"want": want,
|
||||||
"have": have,
|
"have": have,
|
||||||
"resources": resources,
|
"nodes": nodesMap,
|
||||||
}).Log("Synchronize")
|
}).Log("Synchronize")
|
||||||
|
|
||||||
opStack := synchronize(want, have, resources)
|
opStack, _, reality := synchronize(wish, want, have, nodesMap, c.nodeRecoverTimeout)
|
||||||
|
|
||||||
|
if !emergency {
|
||||||
|
cmd := &store.Command{
|
||||||
|
Operation: store.OpSetProcessNodeMap,
|
||||||
|
Data: store.CommandSetProcessNodeMap{
|
||||||
|
Map: reality,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
c.applyCommand(cmd)
|
||||||
|
}
|
||||||
|
|
||||||
c.applyOpStack(opStack)
|
c.applyOpStack(opStack)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *cluster) doRebalance() {
|
func (c *cluster) doRebalance(emergency bool) {
|
||||||
have := c.proxy.ListProcesses()
|
have := c.proxy.ListProcesses()
|
||||||
resources := c.proxy.Resources()
|
nodes := c.proxy.ListNodes()
|
||||||
|
|
||||||
|
nodesMap := map[string]proxy.NodeAbout{}
|
||||||
|
|
||||||
|
for _, node := range nodes {
|
||||||
|
about := node.About()
|
||||||
|
nodesMap[about.ID] = about
|
||||||
|
}
|
||||||
|
|
||||||
c.logger.Debug().WithFields(log.Fields{
|
c.logger.Debug().WithFields(log.Fields{
|
||||||
"have": have,
|
"have": have,
|
||||||
"resources": resources,
|
"nodes": nodes,
|
||||||
}).Log("Rebalance")
|
}).Log("Rebalance")
|
||||||
|
|
||||||
opStack := rebalance(have, resources)
|
opStack, _ := rebalance(have, nodesMap)
|
||||||
|
|
||||||
c.applyOpStack(opStack)
|
c.applyOpStack(opStack)
|
||||||
}
|
}
|
||||||
|
|
||||||
// synchronize returns a list of operations in order to adjust the "have" list to the "want" list
|
// synchronize returns a list of operations in order to adjust the "have" list to the "want" list
|
||||||
// with taking the available resources on each node into account.
|
// with taking the available resources on each node into account.
|
||||||
func synchronize(want []store.Process, have []proxy.Process, resources map[string]proxy.NodeResources) []interface{} {
|
func synchronize(wish map[string]string, want []store.Process, have []proxy.Process, nodes map[string]proxy.NodeAbout, nodeRecoverTimeout time.Duration) ([]interface{}, map[string]proxy.NodeResources, map[string]string) {
|
||||||
|
resources := map[string]proxy.NodeResources{}
|
||||||
|
for nodeid, about := range nodes {
|
||||||
|
resources[nodeid] = about.Resources
|
||||||
|
}
|
||||||
|
|
||||||
|
// A map same as wish, but reflecting the actual situation.
|
||||||
|
reality := map[string]string{}
|
||||||
|
|
||||||
// A map from the process ID to the process config of the processes
|
// A map from the process ID to the process config of the processes
|
||||||
// we want to be running on the nodes.
|
// we want to be running on the nodes.
|
||||||
wantMap := map[string]store.Process{}
|
wantMap := map[string]store.Process{}
|
||||||
@@ -519,31 +580,32 @@ func synchronize(want []store.Process, have []proxy.Process, resources map[strin
|
|||||||
|
|
||||||
// Now we iterate through the processes we actually have running on the nodes
|
// Now we iterate through the processes we actually have running on the nodes
|
||||||
// and remove them from the wantMap. We also make sure that they are running.
|
// and remove them from the wantMap. We also make sure that they are running.
|
||||||
// If a process is not on the wantMap, it will be deleted from the nodes.
|
// If a process cannot be found on the wantMap, it will be deleted from the nodes.
|
||||||
haveAfterRemove := []proxy.Process{}
|
haveAfterRemove := []proxy.Process{}
|
||||||
|
|
||||||
for _, p := range have {
|
for _, haveP := range have {
|
||||||
pid := p.Config.ProcessID().String()
|
pid := haveP.Config.ProcessID().String()
|
||||||
if wantP, ok := wantMap[pid]; !ok {
|
if wantP, ok := wantMap[pid]; !ok {
|
||||||
|
// The process is not on the wantMap. Delete it and adjust the resources.
|
||||||
opStack = append(opStack, processOpDelete{
|
opStack = append(opStack, processOpDelete{
|
||||||
nodeid: p.NodeID,
|
nodeid: haveP.NodeID,
|
||||||
processid: p.Config.ProcessID(),
|
processid: haveP.Config.ProcessID(),
|
||||||
})
|
})
|
||||||
|
|
||||||
// Adjust the resources
|
r, ok := resources[haveP.NodeID]
|
||||||
r, ok := resources[p.NodeID]
|
|
||||||
if ok {
|
if ok {
|
||||||
r.CPU -= p.CPU
|
r.CPU -= haveP.CPU
|
||||||
r.Mem -= p.Mem
|
r.Mem -= haveP.Mem
|
||||||
resources[p.NodeID] = r
|
resources[haveP.NodeID] = r
|
||||||
}
|
}
|
||||||
|
|
||||||
continue
|
continue
|
||||||
} else {
|
} else {
|
||||||
if wantP.UpdatedAt.After(p.UpdatedAt) {
|
// The process is on the wantMap. Update the process if the configuration differ.
|
||||||
|
if !wantP.Config.Equal(haveP.Config) {
|
||||||
opStack = append(opStack, processOpUpdate{
|
opStack = append(opStack, processOpUpdate{
|
||||||
nodeid: p.NodeID,
|
nodeid: haveP.NodeID,
|
||||||
processid: p.Config.ProcessID(),
|
processid: haveP.Config.ProcessID(),
|
||||||
config: wantP.Config,
|
config: wantP.Config,
|
||||||
metadata: wantP.Metadata,
|
metadata: wantP.Metadata,
|
||||||
})
|
})
|
||||||
@@ -551,24 +613,44 @@ func synchronize(want []store.Process, have []proxy.Process, resources map[strin
|
|||||||
}
|
}
|
||||||
|
|
||||||
delete(wantMap, pid)
|
delete(wantMap, pid)
|
||||||
|
reality[pid] = haveP.NodeID
|
||||||
|
|
||||||
if p.Order != "start" {
|
if haveP.Order != "start" {
|
||||||
opStack = append(opStack, processOpStart{
|
opStack = append(opStack, processOpStart{
|
||||||
nodeid: p.NodeID,
|
nodeid: haveP.NodeID,
|
||||||
processid: p.Config.ProcessID(),
|
processid: haveP.Config.ProcessID(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
haveAfterRemove = append(haveAfterRemove, p)
|
haveAfterRemove = append(haveAfterRemove, haveP)
|
||||||
}
|
}
|
||||||
|
|
||||||
have = haveAfterRemove
|
have = haveAfterRemove
|
||||||
|
|
||||||
// A map from the process reference to the node it is running on
|
// In case a node didn't respond, some PID are still on the wantMap, that would run on
|
||||||
|
// the currently not responding nodes. We use the wish map to assign them to the node.
|
||||||
|
// If the node is unavailable for too long, keep these processes on the wantMap, otherwise
|
||||||
|
// remove them and hope that they will reappear during the nodeRecoverTimeout.
|
||||||
|
for pid := range wantMap {
|
||||||
|
// Check if this PID is be assigned to a node.
|
||||||
|
if nodeid, ok := wish[pid]; ok {
|
||||||
|
// Check for how long the node hasn't been contacted, or if it still exists.
|
||||||
|
if node, ok := nodes[nodeid]; ok {
|
||||||
|
if time.Since(node.LastContact) <= nodeRecoverTimeout {
|
||||||
|
reality[pid] = nodeid
|
||||||
|
delete(wantMap, pid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The wantMap now contains only those processes that need to be installed on a node.
|
||||||
|
|
||||||
|
// A map from the process reference to the node it is running on.
|
||||||
haveReferenceAffinityMap := createReferenceAffinityMap(have)
|
haveReferenceAffinityMap := createReferenceAffinityMap(have)
|
||||||
|
|
||||||
// Now all remaining processes in the wantMap must be added to one of the nodes
|
// Now all remaining processes in the wantMap must be added to one of the nodes.
|
||||||
for _, process := range wantMap {
|
for pid, process := range wantMap {
|
||||||
// If a process doesn't have any limits defined, reject that process
|
// If a process doesn't have any limits defined, reject that process
|
||||||
if process.Config.LimitCPU <= 0 || process.Config.LimitMemory <= 0 {
|
if process.Config.LimitCPU <= 0 || process.Config.LimitMemory <= 0 {
|
||||||
opStack = append(opStack, processOpReject{
|
opStack = append(opStack, processOpReject{
|
||||||
@@ -580,19 +662,19 @@ func synchronize(want []store.Process, have []proxy.Process, resources map[strin
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check if there are already processes with the same reference, and if so
|
// Check if there are already processes with the same reference, and if so
|
||||||
// chose this node. Then check the node if it has enough resources left. If
|
// choose this node. Then check the node if it has enough resources left. If
|
||||||
// not, then select a node with the most available resources.
|
// not, then select a node with the most available resources.
|
||||||
nodeid := ""
|
nodeid := ""
|
||||||
|
|
||||||
// Try to add the process to a node where other processes with the same
|
// Try to add the process to a node where other processes with the same
|
||||||
// reference currently reside.
|
// reference currently reside.
|
||||||
if len(process.Config.Reference) != 0 {
|
if len(process.Config.Reference) != 0 {
|
||||||
for _, count := range haveReferenceAffinityMap[process.Config.Reference] {
|
for _, count := range haveReferenceAffinityMap[process.Config.Reference+"@"+process.Config.Domain] {
|
||||||
r := resources[count.nodeid]
|
r := resources[count.nodeid]
|
||||||
cpu := process.Config.LimitCPU
|
cpu := process.Config.LimitCPU
|
||||||
mem := process.Config.LimitMemory
|
mem := process.Config.LimitMemory
|
||||||
|
|
||||||
if r.CPU+cpu < r.CPULimit && r.Mem+mem < r.MemLimit {
|
if r.CPU+cpu < r.CPULimit && r.Mem+mem < r.MemLimit && !r.IsThrottling {
|
||||||
nodeid = count.nodeid
|
nodeid = count.nodeid
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@@ -606,7 +688,7 @@ func synchronize(want []store.Process, have []proxy.Process, resources map[strin
|
|||||||
mem := process.Config.LimitMemory
|
mem := process.Config.LimitMemory
|
||||||
|
|
||||||
if len(nodeid) == 0 {
|
if len(nodeid) == 0 {
|
||||||
if r.CPU+cpu < r.CPULimit && r.Mem+mem < r.MemLimit {
|
if r.CPU+cpu < r.CPULimit && r.Mem+mem < r.MemLimit && !r.IsThrottling {
|
||||||
nodeid = id
|
nodeid = id
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -633,6 +715,8 @@ func synchronize(want []store.Process, have []proxy.Process, resources map[strin
|
|||||||
r.Mem += process.Config.LimitMemory
|
r.Mem += process.Config.LimitMemory
|
||||||
resources[nodeid] = r
|
resources[nodeid] = r
|
||||||
}
|
}
|
||||||
|
|
||||||
|
reality[pid] = nodeid
|
||||||
} else {
|
} else {
|
||||||
opStack = append(opStack, processOpReject{
|
opStack = append(opStack, processOpReject{
|
||||||
processid: process.Config.ProcessID(),
|
processid: process.Config.ProcessID(),
|
||||||
@@ -641,7 +725,7 @@ func synchronize(want []store.Process, have []proxy.Process, resources map[strin
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return opStack
|
return opStack, resources, reality
|
||||||
}
|
}
|
||||||
|
|
||||||
type referenceAffinityNodeCount struct {
|
type referenceAffinityNodeCount struct {
|
||||||
@@ -649,6 +733,8 @@ type referenceAffinityNodeCount struct {
|
|||||||
count uint64
|
count uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// createReferenceAffinityMap returns a map of references (per domain) to an array of nodes this reference
|
||||||
|
// is found on and their count. The array is sorted by the count, the highest first.
|
||||||
func createReferenceAffinityMap(processes []proxy.Process) map[string][]referenceAffinityNodeCount {
|
func createReferenceAffinityMap(processes []proxy.Process) map[string][]referenceAffinityNodeCount {
|
||||||
referenceAffinityMap := map[string][]referenceAffinityNodeCount{}
|
referenceAffinityMap := map[string][]referenceAffinityNodeCount{}
|
||||||
for _, p := range processes {
|
for _, p := range processes {
|
||||||
@@ -656,11 +742,13 @@ func createReferenceAffinityMap(processes []proxy.Process) map[string][]referenc
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ref := p.Config.Reference + "@" + p.Config.Domain
|
||||||
|
|
||||||
// Here we count how often a reference is present on a node. When
|
// Here we count how often a reference is present on a node. When
|
||||||
// moving processes to a different node, the node with the highest
|
// moving processes to a different node, the node with the highest
|
||||||
// count of same references will be the first candidate.
|
// count of same references will be the first candidate.
|
||||||
found := false
|
found := false
|
||||||
arr := referenceAffinityMap[p.Config.Reference]
|
arr := referenceAffinityMap[ref]
|
||||||
for i, count := range arr {
|
for i, count := range arr {
|
||||||
if count.nodeid == p.NodeID {
|
if count.nodeid == p.NodeID {
|
||||||
count.count++
|
count.count++
|
||||||
@@ -677,7 +765,7 @@ func createReferenceAffinityMap(processes []proxy.Process) map[string][]referenc
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
referenceAffinityMap[p.Config.Reference] = arr
|
referenceAffinityMap[ref] = arr
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort every reference count in decreasing order for each reference
|
// Sort every reference count in decreasing order for each reference
|
||||||
@@ -692,32 +780,15 @@ func createReferenceAffinityMap(processes []proxy.Process) map[string][]referenc
|
|||||||
return referenceAffinityMap
|
return referenceAffinityMap
|
||||||
}
|
}
|
||||||
|
|
||||||
// rebalance returns a list of operations that will move running processes away from nodes
|
// rebalance returns a list of operations that will move running processes away from nodes that are overloaded.
|
||||||
// that are overloaded.
|
func rebalance(have []proxy.Process, nodes map[string]proxy.NodeAbout) ([]interface{}, map[string]proxy.NodeResources) {
|
||||||
func rebalance(have []proxy.Process, resources map[string]proxy.NodeResources) []interface{} {
|
resources := map[string]proxy.NodeResources{}
|
||||||
// Group the processes by node
|
for nodeid, about := range nodes {
|
||||||
processNodeMap := map[string][]proxy.Process{}
|
resources[nodeid] = about.Resources
|
||||||
|
|
||||||
for _, p := range have {
|
|
||||||
processNodeMap[p.NodeID] = append(processNodeMap[p.NodeID], p)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort the processes by their runtime (if they are running) for each node
|
// Group the processes by node and sort them
|
||||||
for nodeid, processes := range processNodeMap {
|
nodeProcessMap := createNodeProcessMap(have)
|
||||||
sort.SliceStable(processes, func(a, b int) bool {
|
|
||||||
if processes[a].State == "running" {
|
|
||||||
if processes[b].State != "running" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
return processes[a].Runtime < processes[b].Runtime
|
|
||||||
}
|
|
||||||
|
|
||||||
return false
|
|
||||||
})
|
|
||||||
|
|
||||||
processNodeMap[nodeid] = processes
|
|
||||||
}
|
|
||||||
|
|
||||||
// A map from the process reference to the nodes it is running on
|
// A map from the process reference to the nodes it is running on
|
||||||
haveReferenceAffinityMap := createReferenceAffinityMap(have)
|
haveReferenceAffinityMap := createReferenceAffinityMap(have)
|
||||||
@@ -725,15 +796,17 @@ func rebalance(have []proxy.Process, resources map[string]proxy.NodeResources) [
|
|||||||
opStack := []interface{}{}
|
opStack := []interface{}{}
|
||||||
|
|
||||||
// Check if any of the nodes is overloaded
|
// Check if any of the nodes is overloaded
|
||||||
for id, r := range resources {
|
for id, node := range nodes {
|
||||||
|
r := node.Resources
|
||||||
|
|
||||||
// Check if node is overloaded
|
// Check if node is overloaded
|
||||||
if r.CPU < r.CPULimit && r.Mem < r.MemLimit {
|
if r.CPU < r.CPULimit && r.Mem < r.MemLimit && !r.IsThrottling {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Move processes from this noed to another node with enough free resources.
|
// Move processes from this noed to another node with enough free resources.
|
||||||
// The processes are ordered ascending by their runtime.
|
// The processes are ordered ascending by their runtime.
|
||||||
processes := processNodeMap[id]
|
processes := nodeProcessMap[id]
|
||||||
if len(processes) == 0 {
|
if len(processes) == 0 {
|
||||||
// If there are no processes on that node, we can't do anything
|
// If there are no processes on that node, we can't do anything
|
||||||
continue
|
continue
|
||||||
@@ -752,13 +825,13 @@ func rebalance(have []proxy.Process, resources map[string]proxy.NodeResources) [
|
|||||||
// Try to move the process to a node where other processes with the same
|
// Try to move the process to a node where other processes with the same
|
||||||
// reference currently reside.
|
// reference currently reside.
|
||||||
if len(p.Config.Reference) != 0 {
|
if len(p.Config.Reference) != 0 {
|
||||||
for _, count := range haveReferenceAffinityMap[p.Config.Reference] {
|
for _, count := range haveReferenceAffinityMap[p.Config.Reference+"@"+p.Config.Domain] {
|
||||||
if count.nodeid == overloadedNodeid {
|
if count.nodeid == overloadedNodeid {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
r := resources[count.nodeid]
|
r := resources[count.nodeid]
|
||||||
if r.CPU+p.CPU < r.CPULimit && r.Mem+p.Mem < r.MemLimit {
|
if r.CPU+p.CPU < r.CPULimit && r.Mem+p.Mem < r.MemLimit && !r.IsThrottling {
|
||||||
availableNodeid = count.nodeid
|
availableNodeid = count.nodeid
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@@ -767,13 +840,15 @@ func rebalance(have []proxy.Process, resources map[string]proxy.NodeResources) [
|
|||||||
|
|
||||||
// Find another node with enough resources available
|
// Find another node with enough resources available
|
||||||
if len(availableNodeid) == 0 {
|
if len(availableNodeid) == 0 {
|
||||||
for id, r := range resources {
|
for id, node := range nodes {
|
||||||
if id == overloadedNodeid {
|
if id == overloadedNodeid {
|
||||||
// Skip the overloaded node
|
// Skip the overloaded node
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if r.CPU+p.CPU < r.CPULimit && r.Mem+p.Mem < r.MemLimit {
|
r := node.Resources
|
||||||
|
|
||||||
|
if r.CPU+p.CPU < r.CPULimit && r.Mem+p.Mem < r.MemLimit && !r.IsThrottling {
|
||||||
availableNodeid = id
|
availableNodeid = id
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@@ -812,12 +887,40 @@ func rebalance(have []proxy.Process, resources map[string]proxy.NodeResources) [
|
|||||||
r.Mem -= p.Mem
|
r.Mem -= p.Mem
|
||||||
resources[overloadedNodeid] = r
|
resources[overloadedNodeid] = r
|
||||||
|
|
||||||
// If this node is not anymore overloaded, stop moving processes around
|
// Move only one process at a time
|
||||||
if r.CPU < r.CPULimit && r.Mem < r.MemLimit {
|
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return opStack, resources
|
||||||
|
}
|
||||||
|
|
||||||
|
// createNodeProcessMap takes a list of processes and groups them by the nodeid they
|
||||||
|
// are running on. Each group gets sorted by their preference to be moved somewhere
|
||||||
|
// else, decreasing.
|
||||||
|
func createNodeProcessMap(processes []proxy.Process) map[string][]proxy.Process {
|
||||||
|
nodeProcessMap := map[string][]proxy.Process{}
|
||||||
|
|
||||||
|
for _, p := range processes {
|
||||||
|
nodeProcessMap[p.NodeID] = append(nodeProcessMap[p.NodeID], p)
|
||||||
}
|
}
|
||||||
|
|
||||||
return opStack
|
// Sort the processes by their runtime (if they are running) for each node
|
||||||
|
for nodeid, processes := range nodeProcessMap {
|
||||||
|
sort.SliceStable(processes, func(a, b int) bool {
|
||||||
|
if processes[a].State == "running" {
|
||||||
|
if processes[b].State != "running" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return processes[a].Runtime < processes[b].Runtime
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
})
|
||||||
|
|
||||||
|
nodeProcessMap[nodeid] = processes
|
||||||
|
}
|
||||||
|
|
||||||
|
return nodeProcessMap
|
||||||
}
|
}
|
||||||
|
@@ -12,6 +12,8 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestSynchronizeAdd(t *testing.T) {
|
func TestSynchronizeAdd(t *testing.T) {
|
||||||
|
wish := map[string]string{}
|
||||||
|
|
||||||
want := []store.Process{
|
want := []store.Process{
|
||||||
{
|
{
|
||||||
UpdatedAt: time.Now(),
|
UpdatedAt: time.Now(),
|
||||||
@@ -25,24 +27,30 @@ func TestSynchronizeAdd(t *testing.T) {
|
|||||||
|
|
||||||
have := []proxy.Process{}
|
have := []proxy.Process{}
|
||||||
|
|
||||||
resources := map[string]proxy.NodeResources{
|
nodes := map[string]proxy.NodeAbout{
|
||||||
"node1": {
|
"node1": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 7,
|
CPU: 7,
|
||||||
Mem: 35,
|
Mem: 35,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
"node2": {
|
"node2": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 85,
|
CPU: 85,
|
||||||
Mem: 11,
|
Mem: 11,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
stack := synchronize(want, have, resources)
|
stack, resources, reality := synchronize(wish, want, have, nodes, 2*time.Minute)
|
||||||
|
|
||||||
require.Equal(t, []interface{}{
|
require.Equal(t, []interface{}{
|
||||||
processOpAdd{
|
processOpAdd{
|
||||||
@@ -55,6 +63,10 @@ func TestSynchronizeAdd(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}, stack)
|
}, stack)
|
||||||
|
|
||||||
|
require.Equal(t, map[string]string{
|
||||||
|
"foobar@": "node1",
|
||||||
|
}, reality)
|
||||||
|
|
||||||
require.Equal(t, map[string]proxy.NodeResources{
|
require.Equal(t, map[string]proxy.NodeResources{
|
||||||
"node1": {
|
"node1": {
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
@@ -74,6 +86,10 @@ func TestSynchronizeAdd(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestSynchronizeAddReferenceAffinity(t *testing.T) {
|
func TestSynchronizeAddReferenceAffinity(t *testing.T) {
|
||||||
|
wish := map[string]string{
|
||||||
|
"foobar@": "node2",
|
||||||
|
}
|
||||||
|
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
|
|
||||||
want := []store.Process{
|
want := []store.Process{
|
||||||
@@ -109,28 +125,36 @@ func TestSynchronizeAddReferenceAffinity(t *testing.T) {
|
|||||||
Config: &app.Config{
|
Config: &app.Config{
|
||||||
ID: "foobar",
|
ID: "foobar",
|
||||||
Reference: "barfoo",
|
Reference: "barfoo",
|
||||||
|
LimitCPU: 10,
|
||||||
|
LimitMemory: 20,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
resources := map[string]proxy.NodeResources{
|
nodes := map[string]proxy.NodeAbout{
|
||||||
"node1": {
|
"node1": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 1,
|
CPU: 1,
|
||||||
Mem: 1,
|
Mem: 1,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
"node2": {
|
"node2": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 1,
|
CPU: 1,
|
||||||
Mem: 1,
|
Mem: 1,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
stack := synchronize(want, have, resources)
|
stack, _, reality := synchronize(wish, want, have, nodes, 2*time.Minute)
|
||||||
|
|
||||||
require.Equal(t, []interface{}{
|
require.Equal(t, []interface{}{
|
||||||
processOpAdd{
|
processOpAdd{
|
||||||
@@ -143,9 +167,16 @@ func TestSynchronizeAddReferenceAffinity(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
}, stack)
|
}, stack)
|
||||||
|
|
||||||
|
require.Equal(t, map[string]string{
|
||||||
|
"foobar@": "node2",
|
||||||
|
"foobar2@": "node2",
|
||||||
|
}, reality)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestSynchronizeAddLimit(t *testing.T) {
|
func TestSynchronizeAddLimit(t *testing.T) {
|
||||||
|
wish := map[string]string{}
|
||||||
|
|
||||||
want := []store.Process{
|
want := []store.Process{
|
||||||
{
|
{
|
||||||
UpdatedAt: time.Now(),
|
UpdatedAt: time.Now(),
|
||||||
@@ -159,24 +190,30 @@ func TestSynchronizeAddLimit(t *testing.T) {
|
|||||||
|
|
||||||
have := []proxy.Process{}
|
have := []proxy.Process{}
|
||||||
|
|
||||||
resources := map[string]proxy.NodeResources{
|
nodes := map[string]proxy.NodeAbout{
|
||||||
"node1": {
|
"node1": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 81,
|
CPU: 81,
|
||||||
Mem: 72,
|
Mem: 72,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
"node2": {
|
"node2": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 79,
|
CPU: 79,
|
||||||
Mem: 72,
|
Mem: 72,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
stack := synchronize(want, have, resources)
|
stack, resources, reality := synchronize(wish, want, have, nodes, 2*time.Minute)
|
||||||
|
|
||||||
require.Equal(t, []interface{}{
|
require.Equal(t, []interface{}{
|
||||||
processOpAdd{
|
processOpAdd{
|
||||||
@@ -189,6 +226,10 @@ func TestSynchronizeAddLimit(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}, stack)
|
}, stack)
|
||||||
|
|
||||||
|
require.Equal(t, map[string]string{
|
||||||
|
"foobar@": "node2",
|
||||||
|
}, reality)
|
||||||
|
|
||||||
require.Equal(t, map[string]proxy.NodeResources{
|
require.Equal(t, map[string]proxy.NodeResources{
|
||||||
"node1": {
|
"node1": {
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
@@ -208,6 +249,8 @@ func TestSynchronizeAddLimit(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestSynchronizeAddNoResourcesCPU(t *testing.T) {
|
func TestSynchronizeAddNoResourcesCPU(t *testing.T) {
|
||||||
|
wish := map[string]string{}
|
||||||
|
|
||||||
want := []store.Process{
|
want := []store.Process{
|
||||||
{
|
{
|
||||||
UpdatedAt: time.Now(),
|
UpdatedAt: time.Now(),
|
||||||
@@ -221,24 +264,30 @@ func TestSynchronizeAddNoResourcesCPU(t *testing.T) {
|
|||||||
|
|
||||||
have := []proxy.Process{}
|
have := []proxy.Process{}
|
||||||
|
|
||||||
resources := map[string]proxy.NodeResources{
|
nodes := map[string]proxy.NodeAbout{
|
||||||
"node1": {
|
"node1": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 81,
|
CPU: 81,
|
||||||
Mem: 72,
|
Mem: 72,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
"node2": {
|
"node2": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 79,
|
CPU: 79,
|
||||||
Mem: 72,
|
Mem: 72,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
stack := synchronize(want, have, resources)
|
stack, _, _ := synchronize(wish, want, have, nodes, 2*time.Minute)
|
||||||
|
|
||||||
require.Equal(t, []interface{}{
|
require.Equal(t, []interface{}{
|
||||||
processOpReject{
|
processOpReject{
|
||||||
@@ -249,6 +298,8 @@ func TestSynchronizeAddNoResourcesCPU(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestSynchronizeAddNoResourcesMemory(t *testing.T) {
|
func TestSynchronizeAddNoResourcesMemory(t *testing.T) {
|
||||||
|
wish := map[string]string{}
|
||||||
|
|
||||||
want := []store.Process{
|
want := []store.Process{
|
||||||
{
|
{
|
||||||
UpdatedAt: time.Now(),
|
UpdatedAt: time.Now(),
|
||||||
@@ -262,24 +313,30 @@ func TestSynchronizeAddNoResourcesMemory(t *testing.T) {
|
|||||||
|
|
||||||
have := []proxy.Process{}
|
have := []proxy.Process{}
|
||||||
|
|
||||||
resources := map[string]proxy.NodeResources{
|
nodes := map[string]proxy.NodeAbout{
|
||||||
"node1": {
|
"node1": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 81,
|
CPU: 81,
|
||||||
Mem: 72,
|
Mem: 72,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
"node2": {
|
"node2": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 79,
|
CPU: 79,
|
||||||
Mem: 72,
|
Mem: 72,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
stack := synchronize(want, have, resources)
|
stack, _, _ := synchronize(wish, want, have, nodes, 2*time.Minute)
|
||||||
|
|
||||||
require.Equal(t, []interface{}{
|
require.Equal(t, []interface{}{
|
||||||
processOpReject{
|
processOpReject{
|
||||||
@@ -290,6 +347,8 @@ func TestSynchronizeAddNoResourcesMemory(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestSynchronizeAddNoLimits(t *testing.T) {
|
func TestSynchronizeAddNoLimits(t *testing.T) {
|
||||||
|
wish := map[string]string{}
|
||||||
|
|
||||||
want := []store.Process{
|
want := []store.Process{
|
||||||
{
|
{
|
||||||
UpdatedAt: time.Now(),
|
UpdatedAt: time.Now(),
|
||||||
@@ -301,24 +360,30 @@ func TestSynchronizeAddNoLimits(t *testing.T) {
|
|||||||
|
|
||||||
have := []proxy.Process{}
|
have := []proxy.Process{}
|
||||||
|
|
||||||
resources := map[string]proxy.NodeResources{
|
nodes := map[string]proxy.NodeAbout{
|
||||||
"node1": {
|
"node1": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 81,
|
CPU: 81,
|
||||||
Mem: 72,
|
Mem: 72,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
"node2": {
|
"node2": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 79,
|
CPU: 79,
|
||||||
Mem: 72,
|
Mem: 72,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
stack := synchronize(want, have, resources)
|
stack, _, _ := synchronize(wish, want, have, nodes, 2*time.Minute)
|
||||||
|
|
||||||
require.Equal(t, []interface{}{
|
require.Equal(t, []interface{}{
|
||||||
processOpReject{
|
processOpReject{
|
||||||
@@ -329,6 +394,10 @@ func TestSynchronizeAddNoLimits(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestSynchronizeRemove(t *testing.T) {
|
func TestSynchronizeRemove(t *testing.T) {
|
||||||
|
wish := map[string]string{
|
||||||
|
"foobar@": "node2",
|
||||||
|
}
|
||||||
|
|
||||||
want := []store.Process{}
|
want := []store.Process{}
|
||||||
|
|
||||||
have := []proxy.Process{
|
have := []proxy.Process{
|
||||||
@@ -345,24 +414,30 @@ func TestSynchronizeRemove(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
resources := map[string]proxy.NodeResources{
|
nodes := map[string]proxy.NodeAbout{
|
||||||
"node1": {
|
"node1": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 7,
|
CPU: 7,
|
||||||
Mem: 65,
|
Mem: 65,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
"node2": {
|
"node2": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 85,
|
CPU: 85,
|
||||||
Mem: 11,
|
Mem: 11,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
stack := synchronize(want, have, resources)
|
stack, resources, reality := synchronize(wish, want, have, nodes, 2*time.Minute)
|
||||||
|
|
||||||
require.Equal(t, []interface{}{
|
require.Equal(t, []interface{}{
|
||||||
processOpDelete{
|
processOpDelete{
|
||||||
@@ -387,9 +462,15 @@ func TestSynchronizeRemove(t *testing.T) {
|
|||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
}, resources)
|
}, resources)
|
||||||
|
|
||||||
|
require.Equal(t, map[string]string{}, reality)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestSynchronizeAddRemove(t *testing.T) {
|
func TestSynchronizeAddRemove(t *testing.T) {
|
||||||
|
wish := map[string]string{
|
||||||
|
"foobar2@": "node2",
|
||||||
|
}
|
||||||
|
|
||||||
want := []store.Process{
|
want := []store.Process{
|
||||||
{
|
{
|
||||||
UpdatedAt: time.Now(),
|
UpdatedAt: time.Now(),
|
||||||
@@ -415,24 +496,30 @@ func TestSynchronizeAddRemove(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
resources := map[string]proxy.NodeResources{
|
nodes := map[string]proxy.NodeAbout{
|
||||||
"node1": {
|
"node1": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 7,
|
CPU: 7,
|
||||||
Mem: 35,
|
Mem: 35,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
"node2": {
|
"node2": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 85,
|
CPU: 85,
|
||||||
Mem: 65,
|
Mem: 65,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
stack := synchronize(want, have, resources)
|
stack, resources, reality := synchronize(wish, want, have, nodes, 2*time.Minute)
|
||||||
|
|
||||||
require.Equal(t, []interface{}{
|
require.Equal(t, []interface{}{
|
||||||
processOpDelete{
|
processOpDelete{
|
||||||
@@ -465,6 +552,10 @@ func TestSynchronizeAddRemove(t *testing.T) {
|
|||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
}, resources)
|
}, resources)
|
||||||
|
|
||||||
|
require.Equal(t, map[string]string{
|
||||||
|
"foobar1@": "node1",
|
||||||
|
}, reality)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRebalanceNothingToDo(t *testing.T) {
|
func TestRebalanceNothingToDo(t *testing.T) {
|
||||||
@@ -493,24 +584,30 @@ func TestRebalanceNothingToDo(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
resources := map[string]proxy.NodeResources{
|
nodes := map[string]proxy.NodeAbout{
|
||||||
"node1": {
|
"node1": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 42,
|
CPU: 42,
|
||||||
Mem: 35,
|
Mem: 35,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
"node2": {
|
"node2": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 37,
|
CPU: 37,
|
||||||
Mem: 11,
|
Mem: 11,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
opStack := rebalance(processes, resources)
|
opStack, _ := rebalance(processes, nodes)
|
||||||
|
|
||||||
require.Empty(t, opStack)
|
require.Empty(t, opStack)
|
||||||
}
|
}
|
||||||
@@ -552,24 +649,30 @@ func TestRebalanceOverload(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
resources := map[string]proxy.NodeResources{
|
nodes := map[string]proxy.NodeAbout{
|
||||||
"node1": {
|
"node1": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 91,
|
CPU: 91,
|
||||||
Mem: 35,
|
Mem: 35,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
"node2": {
|
"node2": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 15,
|
CPU: 15,
|
||||||
Mem: 11,
|
Mem: 11,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
opStack := rebalance(processes, resources)
|
opStack, resources := rebalance(processes, nodes)
|
||||||
|
|
||||||
require.NotEmpty(t, opStack)
|
require.NotEmpty(t, opStack)
|
||||||
|
|
||||||
@@ -638,24 +741,30 @@ func TestRebalanceSkip(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
resources := map[string]proxy.NodeResources{
|
nodes := map[string]proxy.NodeAbout{
|
||||||
"node1": {
|
"node1": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 91,
|
CPU: 91,
|
||||||
Mem: 35,
|
Mem: 35,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
"node2": {
|
"node2": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 15,
|
CPU: 15,
|
||||||
Mem: 92,
|
Mem: 92,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
opStack := rebalance(processes, resources)
|
opStack, resources := rebalance(processes, nodes)
|
||||||
|
|
||||||
require.NotEmpty(t, opStack)
|
require.NotEmpty(t, opStack)
|
||||||
|
|
||||||
@@ -758,31 +867,40 @@ func TestRebalanceReferenceAffinity(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
resources := map[string]proxy.NodeResources{
|
nodes := map[string]proxy.NodeAbout{
|
||||||
"node1": {
|
"node1": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 90,
|
CPU: 90,
|
||||||
Mem: 90,
|
Mem: 90,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
"node2": {
|
"node2": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 1,
|
CPU: 1,
|
||||||
Mem: 1,
|
Mem: 1,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
"node3": {
|
"node3": {
|
||||||
|
LastContact: time.Now(),
|
||||||
|
Resources: proxy.NodeResources{
|
||||||
NCPU: 1,
|
NCPU: 1,
|
||||||
CPU: 1,
|
CPU: 1,
|
||||||
Mem: 1,
|
Mem: 1,
|
||||||
CPULimit: 90,
|
CPULimit: 90,
|
||||||
MemLimit: 90,
|
MemLimit: 90,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
opStack := rebalance(processes, resources)
|
opStack, resources := rebalance(processes, nodes)
|
||||||
|
|
||||||
require.NotEmpty(t, opStack)
|
require.NotEmpty(t, opStack)
|
||||||
|
|
||||||
@@ -822,6 +940,164 @@ func TestRebalanceReferenceAffinity(t *testing.T) {
|
|||||||
}, resources)
|
}, resources)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCreateNodeProcessMap(t *testing.T) {
|
||||||
|
processes := []proxy.Process{
|
||||||
|
{
|
||||||
|
NodeID: "node1",
|
||||||
|
Order: "start",
|
||||||
|
State: "running",
|
||||||
|
CPU: 1,
|
||||||
|
Mem: 1,
|
||||||
|
Runtime: 42,
|
||||||
|
Config: &app.Config{
|
||||||
|
ID: "foobar1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
NodeID: "node1",
|
||||||
|
Order: "start",
|
||||||
|
State: "running",
|
||||||
|
CPU: 1,
|
||||||
|
Mem: 1,
|
||||||
|
Runtime: 1,
|
||||||
|
Config: &app.Config{
|
||||||
|
ID: "foobar2",
|
||||||
|
Reference: "ref1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
NodeID: "node2",
|
||||||
|
Order: "start",
|
||||||
|
State: "running",
|
||||||
|
CPU: 1,
|
||||||
|
Mem: 1,
|
||||||
|
Runtime: 67,
|
||||||
|
Config: &app.Config{
|
||||||
|
ID: "foobar3",
|
||||||
|
Reference: "ref3",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
NodeID: "node2",
|
||||||
|
Order: "start",
|
||||||
|
State: "running",
|
||||||
|
CPU: 1,
|
||||||
|
Mem: 1,
|
||||||
|
Runtime: 42,
|
||||||
|
Config: &app.Config{
|
||||||
|
ID: "foobar3",
|
||||||
|
Reference: "ref2",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
NodeID: "node3",
|
||||||
|
Order: "start",
|
||||||
|
State: "running",
|
||||||
|
CPU: 1,
|
||||||
|
Mem: 1,
|
||||||
|
Runtime: 41,
|
||||||
|
Config: &app.Config{
|
||||||
|
ID: "foobar4",
|
||||||
|
Reference: "ref1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
NodeID: "node3",
|
||||||
|
Order: "start",
|
||||||
|
State: "running",
|
||||||
|
CPU: 1,
|
||||||
|
Mem: 1,
|
||||||
|
Runtime: 42,
|
||||||
|
Config: &app.Config{
|
||||||
|
ID: "foobar5",
|
||||||
|
Reference: "ref1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeProcessMap := createNodeProcessMap(processes)
|
||||||
|
|
||||||
|
require.Equal(t, map[string][]proxy.Process{
|
||||||
|
"node1": {
|
||||||
|
{
|
||||||
|
NodeID: "node1",
|
||||||
|
Order: "start",
|
||||||
|
State: "running",
|
||||||
|
CPU: 1,
|
||||||
|
Mem: 1,
|
||||||
|
Runtime: 1,
|
||||||
|
Config: &app.Config{
|
||||||
|
ID: "foobar2",
|
||||||
|
Reference: "ref1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
NodeID: "node1",
|
||||||
|
Order: "start",
|
||||||
|
State: "running",
|
||||||
|
CPU: 1,
|
||||||
|
Mem: 1,
|
||||||
|
Runtime: 42,
|
||||||
|
Config: &app.Config{
|
||||||
|
ID: "foobar1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"node2": {
|
||||||
|
{
|
||||||
|
NodeID: "node2",
|
||||||
|
Order: "start",
|
||||||
|
State: "running",
|
||||||
|
CPU: 1,
|
||||||
|
Mem: 1,
|
||||||
|
Runtime: 42,
|
||||||
|
Config: &app.Config{
|
||||||
|
ID: "foobar3",
|
||||||
|
Reference: "ref2",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
NodeID: "node2",
|
||||||
|
Order: "start",
|
||||||
|
State: "running",
|
||||||
|
CPU: 1,
|
||||||
|
Mem: 1,
|
||||||
|
Runtime: 67,
|
||||||
|
Config: &app.Config{
|
||||||
|
ID: "foobar3",
|
||||||
|
Reference: "ref3",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"node3": {
|
||||||
|
{
|
||||||
|
NodeID: "node3",
|
||||||
|
Order: "start",
|
||||||
|
State: "running",
|
||||||
|
CPU: 1,
|
||||||
|
Mem: 1,
|
||||||
|
Runtime: 41,
|
||||||
|
Config: &app.Config{
|
||||||
|
ID: "foobar4",
|
||||||
|
Reference: "ref1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
NodeID: "node3",
|
||||||
|
Order: "start",
|
||||||
|
State: "running",
|
||||||
|
CPU: 1,
|
||||||
|
Mem: 1,
|
||||||
|
Runtime: 42,
|
||||||
|
Config: &app.Config{
|
||||||
|
ID: "foobar5",
|
||||||
|
Reference: "ref1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}, nodeProcessMap)
|
||||||
|
}
|
||||||
|
|
||||||
func TestCreateReferenceAffinityNodeMap(t *testing.T) {
|
func TestCreateReferenceAffinityNodeMap(t *testing.T) {
|
||||||
processes := []proxy.Process{
|
processes := []proxy.Process{
|
||||||
{
|
{
|
||||||
@@ -900,7 +1176,7 @@ func TestCreateReferenceAffinityNodeMap(t *testing.T) {
|
|||||||
affinityMap := createReferenceAffinityMap(processes)
|
affinityMap := createReferenceAffinityMap(processes)
|
||||||
|
|
||||||
require.Equal(t, map[string][]referenceAffinityNodeCount{
|
require.Equal(t, map[string][]referenceAffinityNodeCount{
|
||||||
"ref1": {
|
"ref1@": {
|
||||||
{
|
{
|
||||||
nodeid: "node3",
|
nodeid: "node3",
|
||||||
count: 2,
|
count: 2,
|
||||||
@@ -910,13 +1186,13 @@ func TestCreateReferenceAffinityNodeMap(t *testing.T) {
|
|||||||
count: 1,
|
count: 1,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"ref2": {
|
"ref2@": {
|
||||||
{
|
{
|
||||||
nodeid: "node2",
|
nodeid: "node2",
|
||||||
count: 1,
|
count: 1,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"ref3": {
|
"ref3@": {
|
||||||
{
|
{
|
||||||
nodeid: "node2",
|
nodeid: "node2",
|
||||||
count: 1,
|
count: 1,
|
||||||
|
@@ -41,6 +41,7 @@ type NodeReader interface {
|
|||||||
IPs() []string
|
IPs() []string
|
||||||
About() NodeAbout
|
About() NodeAbout
|
||||||
Version() NodeVersion
|
Version() NodeVersion
|
||||||
|
Resources() NodeResources
|
||||||
|
|
||||||
Files() NodeFiles
|
Files() NodeFiles
|
||||||
ProcessList() ([]Process, error)
|
ProcessList() ([]Process, error)
|
||||||
@@ -495,6 +496,22 @@ func (n *node) About() NodeAbout {
|
|||||||
return nodeAbout
|
return nodeAbout
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (n *node) Resources() NodeResources {
|
||||||
|
n.stateLock.RLock()
|
||||||
|
defer n.stateLock.RUnlock()
|
||||||
|
|
||||||
|
r := NodeResources{
|
||||||
|
IsThrottling: n.resources.throttling,
|
||||||
|
NCPU: n.resources.ncpu,
|
||||||
|
CPU: n.resources.cpu,
|
||||||
|
CPULimit: n.resources.cpuLimit,
|
||||||
|
Mem: n.resources.mem,
|
||||||
|
MemLimit: n.resources.memLimit,
|
||||||
|
}
|
||||||
|
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
func (n *node) Version() NodeVersion {
|
func (n *node) Version() NodeVersion {
|
||||||
about, err := n.AboutPeer()
|
about, err := n.AboutPeer()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@@ -239,9 +239,8 @@ func (p *proxy) Resources() map[string]NodeResources {
|
|||||||
p.lock.RLock()
|
p.lock.RLock()
|
||||||
defer p.lock.RUnlock()
|
defer p.lock.RUnlock()
|
||||||
|
|
||||||
for _, node := range p.nodes {
|
for id, node := range p.nodes {
|
||||||
about := node.About()
|
resources[id] = node.Resources()
|
||||||
resources[about.ID] = about.Resources
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return resources
|
return resources
|
||||||
|
@@ -1,7 +1,6 @@
|
|||||||
package store
|
package store
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
@@ -23,6 +22,7 @@ type Store interface {
|
|||||||
|
|
||||||
ProcessList() []Process
|
ProcessList() []Process
|
||||||
GetProcess(id app.ProcessID) (Process, error)
|
GetProcess(id app.ProcessID) (Process, error)
|
||||||
|
GetProcessNodeMap() map[string]string
|
||||||
|
|
||||||
UserList() Users
|
UserList() Users
|
||||||
GetUser(name string) Users
|
GetUser(name string) Users
|
||||||
@@ -68,6 +68,7 @@ const (
|
|||||||
OpUpdateIdentity Operation = "updateIdentity"
|
OpUpdateIdentity Operation = "updateIdentity"
|
||||||
OpRemoveIdentity Operation = "removeIdentity"
|
OpRemoveIdentity Operation = "removeIdentity"
|
||||||
OpSetPolicies Operation = "setPolicies"
|
OpSetPolicies Operation = "setPolicies"
|
||||||
|
OpSetProcessNodeMap Operation = "setProcessNodeMap"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Command struct {
|
type Command struct {
|
||||||
@@ -112,10 +113,19 @@ type CommandSetPolicies struct {
|
|||||||
Policies []access.Policy
|
Policies []access.Policy
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type CommandSetProcessNodeMap struct {
|
||||||
|
Map map[string]string
|
||||||
|
}
|
||||||
|
|
||||||
// Implement a FSM
|
// Implement a FSM
|
||||||
type store struct {
|
type store struct {
|
||||||
lock sync.RWMutex
|
lock sync.RWMutex
|
||||||
|
callback func(op Operation)
|
||||||
|
|
||||||
|
logger log.Logger
|
||||||
|
|
||||||
Process map[string]Process
|
Process map[string]Process
|
||||||
|
ProcessNodeMap map[string]string
|
||||||
|
|
||||||
Users struct {
|
Users struct {
|
||||||
UpdatedAt time.Time
|
UpdatedAt time.Time
|
||||||
@@ -126,10 +136,6 @@ type store struct {
|
|||||||
UpdatedAt time.Time
|
UpdatedAt time.Time
|
||||||
Policies map[string][]access.Policy
|
Policies map[string][]access.Policy
|
||||||
}
|
}
|
||||||
|
|
||||||
callback func(op Operation)
|
|
||||||
|
|
||||||
logger log.Logger
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type Config struct {
|
type Config struct {
|
||||||
@@ -139,6 +145,7 @@ type Config struct {
|
|||||||
func NewStore(config Config) (Store, error) {
|
func NewStore(config Config) (Store, error) {
|
||||||
s := &store{
|
s := &store{
|
||||||
Process: map[string]Process{},
|
Process: map[string]Process{},
|
||||||
|
ProcessNodeMap: map[string]string{},
|
||||||
logger: config.Logger,
|
logger: config.Logger,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -219,6 +226,12 @@ func (s *store) Apply(entry *raft.Log) interface{} {
|
|||||||
json.Unmarshal(b, &cmd)
|
json.Unmarshal(b, &cmd)
|
||||||
|
|
||||||
err = s.setPolicies(cmd)
|
err = s.setPolicies(cmd)
|
||||||
|
case OpSetProcessNodeMap:
|
||||||
|
b, _ := json.Marshal(c.Data)
|
||||||
|
cmd := CommandSetProcessNodeMap{}
|
||||||
|
json.Unmarshal(b, &cmd)
|
||||||
|
|
||||||
|
err = s.setProcessNodeMap(cmd)
|
||||||
default:
|
default:
|
||||||
s.logger.Warn().WithField("operation", c.Operation).Log("Unknown operation")
|
s.logger.Warn().WithField("operation", c.Operation).Log("Unknown operation")
|
||||||
return nil
|
return nil
|
||||||
@@ -244,6 +257,10 @@ func (s *store) addProcess(cmd CommandAddProcess) error {
|
|||||||
|
|
||||||
id := cmd.Config.ProcessID().String()
|
id := cmd.Config.ProcessID().String()
|
||||||
|
|
||||||
|
if cmd.Config.LimitCPU <= 0 || cmd.Config.LimitMemory <= 0 {
|
||||||
|
return NewStoreError("the process with the ID '%s' must have limits defined", id)
|
||||||
|
}
|
||||||
|
|
||||||
_, ok := s.Process[id]
|
_, ok := s.Process[id]
|
||||||
if ok {
|
if ok {
|
||||||
return NewStoreError("the process with the ID '%s' already exists", id)
|
return NewStoreError("the process with the ID '%s' already exists", id)
|
||||||
@@ -283,15 +300,16 @@ func (s *store) updateProcess(cmd CommandUpdateProcess) error {
|
|||||||
srcid := cmd.ID.String()
|
srcid := cmd.ID.String()
|
||||||
dstid := cmd.Config.ProcessID().String()
|
dstid := cmd.Config.ProcessID().String()
|
||||||
|
|
||||||
|
if cmd.Config.LimitCPU <= 0 || cmd.Config.LimitMemory <= 0 {
|
||||||
|
return NewStoreError("the process with the ID '%s' must have limits defined", dstid)
|
||||||
|
}
|
||||||
|
|
||||||
p, ok := s.Process[srcid]
|
p, ok := s.Process[srcid]
|
||||||
if !ok {
|
if !ok {
|
||||||
return NewStoreError("the process with the ID '%s' doesn't exists", srcid)
|
return NewStoreError("the process with the ID '%s' doesn't exists", srcid)
|
||||||
}
|
}
|
||||||
|
|
||||||
currentHash := p.Config.Hash()
|
if p.Config.Equal(cmd.Config) {
|
||||||
replaceHash := cmd.Config.Hash()
|
|
||||||
|
|
||||||
if bytes.Equal(currentHash, replaceHash) {
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -404,6 +422,15 @@ func (s *store) setPolicies(cmd CommandSetPolicies) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *store) setProcessNodeMap(cmd CommandSetProcessNodeMap) error {
|
||||||
|
s.lock.Lock()
|
||||||
|
defer s.lock.Unlock()
|
||||||
|
|
||||||
|
s.ProcessNodeMap = cmd.Map
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (s *store) OnApply(fn func(op Operation)) {
|
func (s *store) OnApply(fn func(op Operation)) {
|
||||||
s.lock.Lock()
|
s.lock.Lock()
|
||||||
defer s.lock.Unlock()
|
defer s.lock.Unlock()
|
||||||
@@ -545,6 +572,19 @@ func (s *store) PolicyUserList(name string) Policies {
|
|||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *store) GetProcessNodeMap() map[string]string {
|
||||||
|
s.lock.RLock()
|
||||||
|
defer s.lock.RUnlock()
|
||||||
|
|
||||||
|
m := map[string]string{}
|
||||||
|
|
||||||
|
for key, value := range s.ProcessNodeMap {
|
||||||
|
m[key] = value
|
||||||
|
}
|
||||||
|
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
type fsmSnapshot struct {
|
type fsmSnapshot struct {
|
||||||
data []byte
|
data []byte
|
||||||
}
|
}
|
||||||
|
@@ -291,6 +291,7 @@ func (d *Config) init() {
|
|||||||
d.vars.Register(value.NewClusterPeerList(&d.Cluster.Peers, []string{""}, ","), "cluster.peers", "CORE_CLUSTER_PEERS", nil, "Raft addresses of cores that are part of the cluster", false, false)
|
d.vars.Register(value.NewClusterPeerList(&d.Cluster.Peers, []string{""}, ","), "cluster.peers", "CORE_CLUSTER_PEERS", nil, "Raft addresses of cores that are part of the cluster", false, false)
|
||||||
d.vars.Register(value.NewInt64(&d.Cluster.SyncInterval, 5), "cluster.sync_interval", "CORE_CLUSTER_SYNC_INTERVAL", nil, "Interval between aligning the process in the cluster DB with the processes on the nodes", true, false)
|
d.vars.Register(value.NewInt64(&d.Cluster.SyncInterval, 5), "cluster.sync_interval", "CORE_CLUSTER_SYNC_INTERVAL", nil, "Interval between aligning the process in the cluster DB with the processes on the nodes", true, false)
|
||||||
d.vars.Register(value.NewInt64(&d.Cluster.NodeRecoverTimeout, 120), "cluster.node_recover_timeout", "CORE_CLUSTER_NODE_RECOVER_TIMEOUT", nil, "Timeout for a node to recover before rebalancing the processes", true, false)
|
d.vars.Register(value.NewInt64(&d.Cluster.NodeRecoverTimeout, 120), "cluster.node_recover_timeout", "CORE_CLUSTER_NODE_RECOVER_TIMEOUT", nil, "Timeout for a node to recover before rebalancing the processes", true, false)
|
||||||
|
d.vars.Register(value.NewInt64(&d.Cluster.EmergencyLeaderTimeout, 10), "cluster.emergency_leader_timeout", "CORE_CLUSTER_EMERGENCY_LEADER_TIMEOUT", nil, "Timeout for establishing the emergency leadership after lost contact to raft leader", true, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate validates the current state of the Config for completeness and sanity. Errors are
|
// Validate validates the current state of the Config for completeness and sanity. Errors are
|
||||||
|
@@ -181,6 +181,7 @@ type Data struct {
|
|||||||
Peers []string `json:"peers"`
|
Peers []string `json:"peers"`
|
||||||
SyncInterval int64 `json:"sync_interval" format:"int64"` // seconds
|
SyncInterval int64 `json:"sync_interval" format:"int64"` // seconds
|
||||||
NodeRecoverTimeout int64 `json:"node_recover_timeout" format:"int64"` // seconds
|
NodeRecoverTimeout int64 `json:"node_recover_timeout" format:"int64"` // seconds
|
||||||
|
EmergencyLeaderTimeout int64 `json:"emergency_leader_timeout" format:"int64"` // seconds
|
||||||
} `json:"cluster"`
|
} `json:"cluster"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -96,17 +96,27 @@ func (f *consoleFormatter) String(e *Event) string {
|
|||||||
value := e.Data[key]
|
value := e.Data[key]
|
||||||
|
|
||||||
switch val := value.(type) {
|
switch val := value.(type) {
|
||||||
|
case bool:
|
||||||
|
if val {
|
||||||
|
v = "true"
|
||||||
|
} else {
|
||||||
|
v = "false"
|
||||||
|
}
|
||||||
case string:
|
case string:
|
||||||
v = f.quote(val)
|
v = f.quote(val)
|
||||||
case error:
|
case error:
|
||||||
v = f.quote(val.Error())
|
v = f.quote(val.Error())
|
||||||
default:
|
default:
|
||||||
|
if str, ok := val.(fmt.Stringer); ok {
|
||||||
|
v = f.quote(str.String())
|
||||||
|
} else {
|
||||||
if jsonvalue, err := json.Marshal(value); err == nil {
|
if jsonvalue, err := json.Marshal(value); err == nil {
|
||||||
v = string(jsonvalue)
|
v = string(jsonvalue)
|
||||||
} else {
|
} else {
|
||||||
v = f.quote(err.Error())
|
v = f.quote(err.Error())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
message += fmt.Sprintf(" %s", f.writeKV(key, v))
|
message += fmt.Sprintf(" %s", f.writeKV(key, v))
|
||||||
}
|
}
|
||||||
|
@@ -176,6 +176,10 @@ func (config *Config) Hash() []byte {
|
|||||||
return sum[:]
|
return sum[:]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *Config) Equal(a *Config) bool {
|
||||||
|
return bytes.Equal(c.Hash(), a.Hash())
|
||||||
|
}
|
||||||
|
|
||||||
func (c *Config) ProcessID() ProcessID {
|
func (c *Config) ProcessID() ProcessID {
|
||||||
return ProcessID{
|
return ProcessID{
|
||||||
ID: c.ID,
|
ID: c.ID,
|
||||||
@@ -293,7 +297,7 @@ func (p ProcessID) String() string {
|
|||||||
return p.ID + "@" + p.Domain
|
return p.ID + "@" + p.Domain
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p ProcessID) Equals(b ProcessID) bool {
|
func (p ProcessID) Equal(b ProcessID) bool {
|
||||||
if p.ID == b.ID && p.Domain == b.Domain {
|
if p.ID == b.ID && p.Domain == b.Domain {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
@@ -1,7 +1,6 @@
|
|||||||
package restream
|
package restream
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
@@ -1177,17 +1176,14 @@ func (r *restream) UpdateProcess(id app.ProcessID, config *app.Config) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
currentHash := task.config.Hash()
|
|
||||||
replaceHash := t.config.Hash()
|
|
||||||
|
|
||||||
// If the new config has the same hash as the current config, do nothing.
|
// If the new config has the same hash as the current config, do nothing.
|
||||||
if bytes.Equal(currentHash, replaceHash) {
|
if task.config.Equal(t.config) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
tid := t.ID()
|
tid := t.ID()
|
||||||
|
|
||||||
if !tid.Equals(id) {
|
if !tid.Equal(id) {
|
||||||
_, ok := r.tasks[tid]
|
_, ok := r.tasks[tid]
|
||||||
if ok {
|
if ok {
|
||||||
return ErrProcessExists
|
return ErrProcessExists
|
||||||
|
Reference in New Issue
Block a user