Support failover in cluster (experimental)

This commit is contained in:
finley
2025-04-19 22:11:58 +08:00
parent 14ec8277ca
commit f4a2c92fc1
20 changed files with 739 additions and 136 deletions

View File

@@ -22,6 +22,9 @@ type Cluster struct {
slotsManager *slotsManager
rebalanceManger *rebalanceManager
transactions *TransactionManager
replicaManager *replicaManager
closeChan chan struct{}
// allow inject route implementation
getSlotImpl func(key string) uint32
@@ -33,7 +36,9 @@ type Config struct {
raft.RaftConfig
StartAsSeed bool
JoinAddress string
Master string
connectionStub ConnectionFactory // for test
noCron bool // for test
}
func (c *Cluster) SelfID() string {
@@ -123,7 +128,11 @@ func NewCluster(cfg *Config) (*Cluster, error) {
if err != nil {
return nil, err
}
result := conn.Send(utils.ToCmdLine(joinClusterCommand, cfg.RedisAdvertiseAddr, cfg.RaftAdvertiseAddr))
joinCmdLine := utils.ToCmdLine(joinClusterCommand, cfg.RedisAdvertiseAddr, cfg.RaftAdvertiseAddr)
if cfg.Master != "" {
joinCmdLine = append(joinCmdLine, []byte(cfg.Master))
}
result := conn.Send(joinCmdLine)
if err := protocol.Try2ErrorReply(result); err != nil {
return nil, err
}
@@ -137,6 +146,8 @@ func NewCluster(cfg *Config) (*Cluster, error) {
rebalanceManger: newRebalanceManager(),
slotsManager: newSlotsManager(),
transactions: newTransactionManager(),
replicaManager: newReplicaManager(),
closeChan: make(chan struct{}),
}
cluster.pickNodeImpl = func(slotID uint32) string {
return defaultPickNodeImpl(cluster, slotID)
@@ -146,6 +157,8 @@ func NewCluster(cfg *Config) (*Cluster, error) {
}
cluster.injectInsertCallback()
cluster.injectDeleteCallback()
cluster.registerOnFailover()
go cluster.clusterCron()
return cluster, nil
}
@@ -155,6 +168,7 @@ func (cluster *Cluster) AfterClientClose(c redis.Connection) {
}
func (cluster *Cluster) Close() {
close(cluster.closeChan)
cluster.db.Close()
err := cluster.raftNode.Close()
if err != nil {

View File

@@ -6,6 +6,7 @@ import (
"time"
"github.com/hdt3213/godis/cluster/raft"
"github.com/hdt3213/godis/interface/redis"
"github.com/hdt3213/godis/lib/utils"
"github.com/hdt3213/godis/redis/connection"
"github.com/hdt3213/godis/redis/protocol"
@@ -33,6 +34,7 @@ func TestClusterBootstrap(t *testing.T) {
},
StartAsSeed: true,
connectionStub: connections,
noCron: true,
}
leader, err := NewCluster(leaderCfg)
if err != nil {
@@ -72,6 +74,7 @@ func TestClusterBootstrap(t *testing.T) {
StartAsSeed: false,
JoinAddress: leaderCfg.RedisAdvertiseAddr,
connectionStub: connections,
noCron: true,
}
follower, err := NewCluster(followerCfg)
if err != nil {
@@ -132,3 +135,104 @@ func TestClusterBootstrap(t *testing.T) {
}
}
}
func TestFailover(t *testing.T) {
// start leader
leaderDir := "test/0"
os.RemoveAll(leaderDir)
os.MkdirAll(leaderDir, 0777)
defer func() {
os.RemoveAll(leaderDir)
}()
RegisterCmd("slaveof", func(cluster *Cluster, c redis.Connection, cmdLine CmdLine) redis.Reply {
return protocol.MakeOkReply()
})
// connection stub
connections := NewInMemConnectionFactory()
leaderCfg := &Config{
RaftConfig: raft.RaftConfig{
RedisAdvertiseAddr: "127.0.0.1:6399",
RaftListenAddr: "127.0.0.1:26666",
RaftAdvertiseAddr: "127.0.0.1:26666",
Dir: leaderDir,
},
StartAsSeed: true,
connectionStub: connections,
noCron: true,
}
leader, err := NewCluster(leaderCfg)
if err != nil {
t.Error(err)
return
}
connections.nodes[leaderCfg.RedisAdvertiseAddr] = leader
// start follower
followerDir := "test/1"
os.RemoveAll(followerDir)
os.MkdirAll(followerDir, 0777)
defer func() {
os.RemoveAll(followerDir)
}()
followerCfg := &Config{
RaftConfig: raft.RaftConfig{
RedisAdvertiseAddr: "127.0.0.1:6499",
RaftListenAddr: "127.0.0.1:26667",
RaftAdvertiseAddr: "127.0.0.1:26667",
Dir: followerDir,
},
StartAsSeed: false,
JoinAddress: leaderCfg.RedisAdvertiseAddr,
connectionStub: connections,
noCron: true,
Master: leader.SelfID(),
}
follower, err := NewCluster(followerCfg)
if err != nil {
t.Error(err)
return
}
connections.nodes[followerCfg.RedisAdvertiseAddr] = follower
_ = follower.SelfID()
// check nodes
joined := false
for i := 0; i < 10; i++ {
nodes, err := leader.raftNode.GetNodes()
if err != nil {
t.Log(err)
continue
}
if len(nodes) == 2 {
t.Log("join success")
joined = true
break
}
time.Sleep(time.Second)
}
if !joined {
t.Error("join failed")
return
}
// rebalance
leader.replicaManager.masterHeartbeats[leader.SelfID()] = time.Now().Add(-time.Hour)
leader.doFailoverCheck()
time.Sleep(2 * time.Second)
for i := 0; i < 1000; i++ {
success := false
leader.raftNode.FSM.WithReadLock(func(fsm *raft.FSM) {
ms := fsm.MasterSlaves[follower.SelfID()]
if ms != nil && len(ms.Slaves) > 0 {
success = true
}
})
if success {
t.Log("rebalance success")
break
} else {
time.Sleep(time.Second)
}
}
}

36
cluster/core/cron.go Normal file
View File

@@ -0,0 +1,36 @@
package core
import (
"sync/atomic"
"time"
"github.com/hdt3213/godis/cluster/raft"
)
func (cluster *Cluster) clusterCron() {
if cluster.config.noCron {
return
}
ticker := time.NewTicker(time.Second)
var running int32
for {
select {
case <-ticker.C:
if cluster.raftNode.State() == raft.Leader {
if atomic.CompareAndSwapInt32(&running, 0, 1) {
// Disable parallelism
go func() {
cluster.doFailoverCheck()
cluster.doRebalance()
atomic.StoreInt32(&running, 0)
}()
}
} else {
cluster.sendHearbeat()
}
case <-cluster.closeChan:
ticker.Stop()
return
}
}
}

View File

@@ -23,10 +23,10 @@ func init() {
RegisterCmd(migrationChangeRouteCommand, execMigrationChangeRoute)
}
// execJoin handles cluster-join command
// format: cluster-join redisAddress (advertised)raftAddress
// execJoin handles cluster-join command as raft leader
// format: cluster-join redisAddress(advertised), raftAddress, masterId
func execJoin(cluster *Cluster, c redis.Connection, cmdLine CmdLine) redis.Reply {
if len(cmdLine) != 3 {
if len(cmdLine) < 3 {
return protocol.MakeArgNumErrReply(joinClusterCommand)
}
state := cluster.raftNode.State()
@@ -42,10 +42,26 @@ func execJoin(cluster *Cluster, c redis.Connection, cmdLine CmdLine) redis.Reply
// self node is leader
redisAddr := string(cmdLine[1])
raftAddr := string(cmdLine[2])
err := cluster.raftNode.HandleJoin(redisAddr, raftAddr)
err := cluster.raftNode.AddToRaft(redisAddr, raftAddr)
if err != nil {
return protocol.MakeErrReply(err.Error())
}
master := ""
if len(cmdLine) == 4 {
master = string(cmdLine[3])
}
_, err = cluster.raftNode.Propose(&raft.LogEntry{
Event: raft.EventJoin,
JoinTask: &raft.JoinTask{
NodeId: redisAddr,
Master: master,
},
})
if err != nil {
// todo: remove the node from raft
return protocol.MakeErrReply(err.Error())
}
// join sucees, rebalance node
return protocol.MakeOkReply()
}
@@ -114,17 +130,14 @@ func (cluster *Cluster) triggerMigrationTask(task *raft.MigratingTask) error {
}
func (cluster *Cluster) makeRebalancePlan() ([]*raft.MigratingTask, error) {
nodes, err := cluster.raftNode.GetNodes()
if err != nil {
return nil, err
}
avgSlot := int(math.Ceil(float64(SlotCount) / float64(len(nodes))))
var migratings []*raft.MigratingTask
cluster.raftNode.FSM.WithReadLock(func(fsm *raft.FSM) {
avgSlot := int(math.Ceil(float64(SlotCount) / float64(len(fsm.MasterSlaves))))
var exportingNodes []string
var importingNodes []string
for _, node := range nodes {
nodeId := string(node.ID)
for _, ms := range fsm.MasterSlaves {
nodeId := ms.MasterId
nodeSlots := fsm.Node2Slot[nodeId]
if len(nodeSlots) > avgSlot+1 {
exportingNodes = append(exportingNodes, nodeId)
@@ -200,7 +213,7 @@ func (cluster *Cluster) waitCommitted(peer string, logIndex uint64) error {
// format: cluster.migration.changeroute taskid
func execMigrationChangeRoute(cluster *Cluster, c redis.Connection, cmdLine CmdLine) redis.Reply {
if len(cmdLine) != 2 {
return protocol.MakeArgNumErrReply(joinClusterCommand)
return protocol.MakeArgNumErrReply(migrationChangeRouteCommand)
}
state := cluster.raftNode.State()
if state != raft.Leader {

View File

@@ -0,0 +1,155 @@
package core
import (
"net"
"sync"
"time"
"github.com/hdt3213/godis/cluster/raft"
"github.com/hdt3213/godis/interface/redis"
"github.com/hdt3213/godis/lib/logger"
"github.com/hdt3213/godis/lib/utils"
"github.com/hdt3213/godis/redis/connection"
"github.com/hdt3213/godis/redis/protocol"
)
const heartbeatCommand = "cluster.heartbeat"
func init() {
RegisterCmd(heartbeatCommand, execHeartbeat)
}
const (
statusNormal = iota
statusFailing // failover in progress
)
type replicaManager struct {
mu sync.RWMutex
masterHeartbeats map[string]time.Time // id -> lastHeartbeatTime
}
func newReplicaManager() *replicaManager {
return &replicaManager{
masterHeartbeats: make(map[string]time.Time),
}
}
// execHeartbeat receives heartbeat from follower as raft leader
// cmdLine: cluster.heartbeat nodeId
func execHeartbeat(cluster *Cluster, c redis.Connection, cmdLine CmdLine) redis.Reply {
if len(cmdLine) != 2 {
return protocol.MakeArgNumErrReply(heartbeatCommand)
}
id := string(cmdLine[1])
cluster.replicaManager.mu.Lock()
cluster.replicaManager.masterHeartbeats[id] = time.Now()
cluster.replicaManager.mu.Unlock()
return protocol.MakeOkReply()
}
func (cluster *Cluster) sendHearbeat() {
leaderConn, err := cluster.BorrowLeaderClient()
if err != nil {
logger.Error(err)
}
defer cluster.connections.ReturnPeerClient(leaderConn)
reply := leaderConn.Send(utils.ToCmdLine(heartbeatCommand, cluster.SelfID()))
if err := protocol.Try2ErrorReply(reply); err != nil {
logger.Error(err)
}
}
const followerTimeout = 10 * time.Second
func (cluster *Cluster) doFailoverCheck() {
// find timeout masters
var timeoutMasters []*raft.MasterSlave
ddl := time.Now().Add(-followerTimeout)
cluster.replicaManager.mu.RLock()
for masterId, lastTime := range cluster.replicaManager.masterHeartbeats {
if lastTime.Second() == 0 {
// do not set new joined node as timeout
cluster.replicaManager.masterHeartbeats[masterId] = time.Now()
}
if lastTime.Before(ddl) {
slaves := cluster.raftNode.GetSlaves(masterId)
if slaves != nil && len(slaves.Slaves) > 0 {
timeoutMasters = append(timeoutMasters, slaves)
}
}
}
cluster.replicaManager.mu.RUnlock()
// trigger failover
for _, failed := range timeoutMasters {
cluster.triggerFailover(failed)
}
}
func (cluster *Cluster) triggerFailover(failed *raft.MasterSlave) error {
newMaster := failed.Slaves[0]
id := utils.RandString(20)
// propose change
_, err := cluster.raftNode.Propose(&raft.LogEntry{
Event: raft.EventStartFailover,
FailoverTask: &raft.FailoverTask{
ID: id,
OldMasterId: failed.MasterId,
NewMasterId: newMaster,
},
})
if err != nil {
return err
}
logger.Infof("proposed start failover id=%s, oldMaster=%s, newMaster=%s", id, failed.MasterId, newMaster)
// send slave of to new master
conn, err := cluster.connections.BorrowPeerClient(newMaster)
if err != nil {
return err
}
defer cluster.connections.ReturnPeerClient(conn)
reply := conn.Send(utils.ToCmdLine("slaveof", "no", "one"))
if err := protocol.Try2ErrorReply(reply); err != nil {
return err
}
// new master is ready to receive commands, change route
_, err = cluster.raftNode.Propose(&raft.LogEntry{
Event: raft.EventFinishFailover,
FailoverTask: &raft.FailoverTask{
ID: id,
OldMasterId: failed.MasterId,
NewMasterId: newMaster,
},
})
if err != nil {
return err
}
logger.Infof("proposed finish failover id=%s, oldMaster=%s, newMaster=%s", id, failed.MasterId, newMaster)
// other slaves will listen to raft to change master
return nil
}
func (cluster *Cluster) registerOnFailover() {
cluster.raftNode.SetOnFailover(func(newMaster string) {
if newMaster != "" && newMaster != cluster.SelfID() {
// old master failed and other node become the new master
// this node may be the old master
ip, port, err := net.SplitHostPort(newMaster)
if err != nil {
logger.Errorf("illegal new master: %s", newMaster)
return
}
c := connection.NewFakeConn()
ret := cluster.db.Exec(c, utils.ToCmdLine("slaveof", ip, port))
if err := protocol.Try2ErrorReply(ret); err != nil {
logger.Errorf("slave of failed: %v", err)
return
}
}
})
}