From eb75a6829cbcc302e4da3553104920f42ee55d4e Mon Sep 17 00:00:00 2001 From: 0xdcarns Date: Wed, 28 Sep 2022 16:17:49 -0400 Subject: [PATCH] added wipe failover cases and ceased node update on metrics update --- controllers/node.go | 6 ++++++ ee/initialize.go | 27 ++++++++++++++------------- ee/logic/failover.go | 27 +++++++++++++++++++++++++++ logic/gateway.go | 8 ++++---- logic/server.go | 3 +++ mq/handlers.go | 25 +++++++++++++------------ 6 files changed, 67 insertions(+), 29 deletions(-) diff --git a/controllers/node.go b/controllers/node.go index 14416df4..b6422b59 100644 --- a/controllers/node.go +++ b/controllers/node.go @@ -886,6 +886,12 @@ func updateNode(w http.ResponseWriter, r *http.Request) { } } + if ifaceDelta && servercfg.Is_EE { + if err = logic.EnterpriseResetAllPeersFailovers.(func(string, string) error)(node.ID, node.Network); err != nil { + logger.Log(0, "failed to reset failover lists during node update for node", node.Name, node.Network) + } + } + err = logic.UpdateNode(&node, &newNode) if err != nil { logger.Log(0, r.Header.Get("user"), diff --git a/ee/initialize.go b/ee/initialize.go index ca9b09c8..b3273039 100644 --- a/ee/initialize.go +++ b/ee/initialize.go @@ -30,8 +30,9 @@ func InitEE() { AddLicenseHooks() }) logic.EnterpriseFailoverFunc = eelogic.SetFailover - // logic.EnterpriseResetFailoverFunc = eelogic.ResetFailover - // resetFailover() + logic.EnterpriseResetFailoverFunc = eelogic.ResetFailover + logic.EnterpriseResetAllPeersFailovers = eelogic.WipeAffectedFailoversOnly + resetFailover() } func setControllerLimits() { @@ -42,17 +43,17 @@ func setControllerLimits() { servercfg.Is_EE = true } -// func resetFailover() { -// nets, err := logic.GetNetworks() -// if err == nil { -// for _, net := range nets { -// err = logic.EnterpriseResetFailoverFunc.(func(string) error)(net.NetID) -// if err != nil { -// logger.Log(0, "failed to reset failover on network", net.NetID, ":", err.Error()) -// } -// } -// } -// } +func resetFailover() { + nets, err := logic.GetNetworks() + if err == nil { + for _, net := range nets { + err = ResetFailover(net.NetID) + if err != nil { + logger.Log(0, "failed to reset failover on network", net.NetID, ":", err.Error()) + } + } + } +} func retrieveEELogo() string { return ` diff --git a/ee/logic/failover.go b/ee/logic/failover.go index 4928de50..146bd13f 100644 --- a/ee/logic/failover.go +++ b/ee/logic/failover.go @@ -92,3 +92,30 @@ func WipeFailover(nodeid string) error { } return nil } + +// WipeAffectedFailoversOnly - wipes failovers for nodes that have given node (ID) +// in their respective failover lists +func WipeAffectedFailoversOnly(nodeid, network string) error { + currentNetworkNodes, err := logic.GetNetworkNodes(network) + if err != nil { + return nil + } + + for i := range currentNetworkNodes { + currNodeID := currentNetworkNodes[i].ID + if currNodeID == nodeid { + WipeFailover(nodeid) + continue + } + currMetrics, err := logic.GetMetrics(currNodeID) + if err != nil || currMetrics == nil { + continue + } + if currMetrics.FailoverPeers != nil { + if len(currMetrics.FailoverPeers[nodeid]) > 0 { + WipeFailover(currNodeID) + } + } + } + return nil +} diff --git a/logic/gateway.go b/logic/gateway.go index 537eaa19..d8a45cf5 100644 --- a/logic/gateway.go +++ b/logic/gateway.go @@ -276,10 +276,10 @@ func DeleteIngressGateway(networkName string, nodeid string) (models.Node, error } } - // err = EnterpriseResetFailoverFunc.(func(string) error)(node.Network) - // if err != nil { - // logger.Log(0, "failed to reset failover on network", node.Network, ":", err.Error()) - // } + err = EnterpriseResetFailoverFunc.(func(string) error)(node.Network) + if err != nil { + logger.Log(0, "failed to reset failover on network", node.Network, ":", err.Error()) + } data, err := json.Marshal(&node) if err != nil { diff --git a/logic/server.go b/logic/server.go index b1e587f5..57ffd7ab 100644 --- a/logic/server.go +++ b/logic/server.go @@ -27,6 +27,9 @@ var EnterpriseFailoverFunc interface{} // EnterpriseResetFailoverFunc - interface to control reset failover funcs var EnterpriseResetFailoverFunc interface{} +// EnterpriseResetAllPeersFailovers - resets all nodes that are considering a node to be failover worthy (inclusive) +var EnterpriseResetAllPeersFailovers interface{} + // == Join, Checkin, and Leave for Server == // KUBERNETES_LISTEN_PORT - starting port for Kubernetes in order to use NodePort range diff --git a/mq/handlers.go b/mq/handlers.go index dcf4671e..14f72435 100644 --- a/mq/handlers.go +++ b/mq/handlers.go @@ -122,7 +122,7 @@ func UpdateMetrics(client mqtt.Client, msg mqtt.Message) { return } - updateNodeMetrics(¤tNode, &newMetrics) + shouldUpdate := updateNodeMetrics(¤tNode, &newMetrics) if err = logic.UpdateMetrics(id, &newMetrics); err != nil { logger.Log(1, "faield to update node metrics", id, currentNode.Name, err.Error()) @@ -139,13 +139,12 @@ func UpdateMetrics(client mqtt.Client, msg mqtt.Message) { err := logic.EnterpriseFailoverFunc.(func(*models.Node) error)(¤tNode) if err != nil { logger.Log(0, "failed to failover for node", currentNode.Name, "on network", currentNode.Network, "-", err.Error()) - } else { - if err := NodeUpdate(¤tNode); err != nil { - logger.Log(1, "error publishing node update to node", currentNode.Name, err.Error()) - } - if err := PublishPeerUpdate(¤tNode, true); err != nil { - logger.Log(1, "error publishing peer update after auto relay for node", currentNode.Name, err.Error()) - } + } + } + + if shouldUpdate { + if err = PublishPeerUpdate(¤tNode, true); err != nil { + logger.Log(0, "failed to publish update after failover peer change for node", currentNode.Name, currentNode.Network) } } @@ -208,14 +207,14 @@ func updateNodePeers(currentNode *models.Node) { } } -func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) { +func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) bool { if newMetrics.FailoverPeers == nil { newMetrics.FailoverPeers = make(map[string]string) } oldMetrics, err := logic.GetMetrics(currentNode.ID) if err != nil { logger.Log(1, "error finding old metrics for node", currentNode.ID, currentNode.Name) - return + return false } if oldMetrics.FailoverPeers == nil { oldMetrics.FailoverPeers = make(map[string]string) @@ -256,7 +255,7 @@ func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) { nodes, err := logic.GetNetworkNodes(currentNode.Network) if err != nil { logger.Log(0, "failed to retrieve nodes while updating metrics") - return + return false } for _, node := range nodes { if !newMetrics.Connectivity[node.ID].Connected && @@ -265,14 +264,16 @@ func updateNodeMetrics(currentNode *models.Node, newMetrics *models.Metrics) { newMetrics.FailoverPeers[node.ID] = node.FailoverNode } } - + shouldUpdate := false for k, v := range oldMetrics.FailoverPeers { if len(v) > 0 && len(newMetrics.FailoverPeers[k]) == 0 { newMetrics.FailoverPeers[k] = v + shouldUpdate = true } } for k := range oldMetrics.Connectivity { // cleanup any left over data, self healing delete(newMetrics.Connectivity, k) } + return shouldUpdate }