Files
netmaker/mq/mq.go
Abhishek K 66069fbc34 NET-1082: Scale Testing Fixes (#2894)
* add additional mutex lock on node acls func

* increase verbosity

* disable acls on cloud emqx

* add emqx creds creation to go routine

* add debug log of mq client id

* comment port check

* uncomment port check

* check for connection mq connection open

* use username for client id

* add write mutex on acl is allowed

* add mq connection lost handler on server

* spin off zombie init as go routine

* get whole api path from config

* Revert "get whole api path from config"

This reverts commit 392f5f4c5f.

* update extclient acls async

* add additional mutex lock on node acls func

(cherry picked from commit 5325f0e7d7)

* increase verbosity

(cherry picked from commit 705b3cf0bf)

* add emqx creds creation to go routine

(cherry picked from commit c8e65f4820)

* add debug log of mq client id

(cherry picked from commit 29c5d6ceca)

* comment port check

(cherry picked from commit db8d6d95ea)

* check for connection mq connection open

(cherry picked from commit 13b11033b0)

* use username for client id

(cherry picked from commit e90c7386de)

* add write mutex on acl is allowed

(cherry picked from commit 4cae1b0bb4)

* add mq connection lost handler on server

(cherry picked from commit c82918ad35)

* spin off zombie init as go routine

(cherry picked from commit 6d65c44c43)

* update extclient acls async

(cherry picked from commit 6557ef1ebe)

* additionl logs for oauth user flow

(cherry picked from commit 61703038ae)

* add more debug logs

(cherry picked from commit 5980beacd1)

* add more debug logs

(cherry picked from commit 4d001f0d27)

* add set auth secret

(cherry picked from commit f41cef5da5)

* fix fetch pass

(cherry picked from commit 825caf4b60)

* make sure auth secret is set only once

(cherry picked from commit ba33ed02aa)

* make sure auth secret is set only once

(cherry picked from commit 920ac4c507)

* comment usage of emqx acls

* replace  read lock with write lock on acls

* replace  read lock with write lock on acls

(cherry picked from commit 808d2135c8)

* use deadlock pkg for visibility

* add additional mutex locks

* remove race flag

* on mq re-connecting donot exit if failed

* on mq re-connecting donot exit if failed

* revert mutex package change

* set mq clean session

* remove debug log

* go mod tidy

* revert on prem emqx acls del
2024-04-11 21:18:57 +05:30

150 lines
4.8 KiB
Go

package mq
import (
"context"
"fmt"
"log"
"time"
mqtt "github.com/eclipse/paho.mqtt.golang"
"github.com/gravitl/netmaker/logger"
"github.com/gravitl/netmaker/servercfg"
"golang.org/x/exp/slog"
)
// KEEPALIVE_TIMEOUT - time in seconds for timeout
const KEEPALIVE_TIMEOUT = 60 //timeout in seconds
// MQ_DISCONNECT - disconnects MQ
const MQ_DISCONNECT = 250
// MQ_TIMEOUT - timeout for MQ
const MQ_TIMEOUT = 30
var peer_force_send = 0
var mqclient mqtt.Client
func setMqOptions(user, password string, opts *mqtt.ClientOptions) {
broker, _ := servercfg.GetMessageQueueEndpoint()
opts.AddBroker(broker)
opts.ClientID = user
opts.SetUsername(user)
opts.SetPassword(password)
opts.SetAutoReconnect(true)
opts.SetConnectRetry(true)
opts.SetCleanSession(true)
opts.SetConnectRetryInterval(time.Second * 4)
opts.SetKeepAlive(time.Minute)
opts.SetCleanSession(true)
opts.SetWriteTimeout(time.Minute)
}
// SetupMQTT creates a connection to broker and return client
func SetupMQTT(fatal bool) {
if servercfg.GetBrokerType() == servercfg.EmqxBrokerType {
if emqx.GetType() == servercfg.EmqxOnPremDeploy {
time.Sleep(10 * time.Second) // wait for the REST endpoint to be ready
// setup authenticator and create admin user
if err := emqx.CreateEmqxDefaultAuthenticator(); err != nil {
logger.Log(0, err.Error())
}
emqx.DeleteEmqxUser(servercfg.GetMqUserName())
if err := emqx.CreateEmqxUserforServer(); err != nil {
log.Fatal(err)
}
// create an ACL authorization source for the built in EMQX MNESIA database
if err := emqx.CreateEmqxDefaultAuthorizer(); err != nil {
logger.Log(0, err.Error())
}
// create a default deny ACL to all topics for all users
if err := emqx.CreateDefaultDenyRule(); err != nil {
log.Fatal(err)
}
} else {
emqx.DeleteEmqxUser(servercfg.GetMqUserName())
if err := emqx.CreateEmqxUserforServer(); err != nil {
log.Fatal(err)
}
}
}
opts := mqtt.NewClientOptions()
setMqOptions(servercfg.GetMqUserName(), servercfg.GetMqPassword(), opts)
logger.Log(0, "Mq Client Connecting with Random ID: ", opts.ClientID)
opts.SetOnConnectHandler(func(client mqtt.Client) {
serverName := servercfg.GetServer()
if token := client.Subscribe(fmt.Sprintf("update/%s/#", serverName), 0, mqtt.MessageHandler(UpdateNode)); token.WaitTimeout(MQ_TIMEOUT*time.Second) && token.Error() != nil {
client.Disconnect(240)
logger.Log(0, "node update subscription failed")
}
if token := client.Subscribe(fmt.Sprintf("host/serverupdate/%s/#", serverName), 0, mqtt.MessageHandler(UpdateHost)); token.WaitTimeout(MQ_TIMEOUT*time.Second) && token.Error() != nil {
client.Disconnect(240)
logger.Log(0, "host update subscription failed")
}
if token := client.Subscribe(fmt.Sprintf("signal/%s/#", serverName), 0, mqtt.MessageHandler(ClientPeerUpdate)); token.WaitTimeout(MQ_TIMEOUT*time.Second) && token.Error() != nil {
client.Disconnect(240)
logger.Log(0, "node client subscription failed")
}
if token := client.Subscribe(fmt.Sprintf("metrics/%s/#", serverName), 0, mqtt.MessageHandler(UpdateMetrics)); token.WaitTimeout(MQ_TIMEOUT*time.Second) && token.Error() != nil {
client.Disconnect(240)
logger.Log(0, "node metrics subscription failed")
}
opts.SetOrderMatters(false)
opts.SetResumeSubs(true)
})
opts.SetConnectionLostHandler(func(c mqtt.Client, e error) {
slog.Warn("detected broker connection lost", "err", e.Error())
c.Disconnect(250)
slog.Info("re-initiating MQ connection")
SetupMQTT(false)
})
mqclient = mqtt.NewClient(opts)
tperiod := time.Now().Add(10 * time.Second)
for {
if token := mqclient.Connect(); !token.WaitTimeout(MQ_TIMEOUT*time.Second) || token.Error() != nil {
logger.Log(2, "unable to connect to broker, retrying ...")
if time.Now().After(tperiod) {
if token.Error() == nil {
if fatal {
logger.FatalLog("could not connect to broker, token timeout, exiting ...")
}
logger.Log(0, "could not connect to broker, token timeout, exiting ...")
} else {
if fatal {
logger.FatalLog("could not connect to broker, exiting ...", token.Error().Error())
}
logger.Log(0, "could not connect to broker, exiting ...", token.Error().Error())
}
}
} else {
break
}
time.Sleep(2 * time.Second)
}
}
// Keepalive -- periodically pings all nodes to let them know server is still alive and doing well
func Keepalive(ctx context.Context) {
for {
select {
case <-ctx.Done():
return
case <-time.After(time.Second * KEEPALIVE_TIMEOUT):
sendPeers()
}
}
}
// IsConnected - function for determining if the mqclient is connected or not
func IsConnected() bool {
return mqclient != nil && mqclient.IsConnectionOpen()
}
// CloseClient - function to close the mq connection from server
func CloseClient() {
mqclient.Disconnect(250)
}