Support failover in cluster (experimental)

This commit is contained in:
finley
2025-04-19 22:11:58 +08:00
parent 14ec8277ca
commit f4a2c92fc1
20 changed files with 739 additions and 136 deletions

View File

@@ -6,6 +6,7 @@ import (
"time"
"github.com/hdt3213/godis/cluster/raft"
"github.com/hdt3213/godis/interface/redis"
"github.com/hdt3213/godis/lib/utils"
"github.com/hdt3213/godis/redis/connection"
"github.com/hdt3213/godis/redis/protocol"
@@ -33,6 +34,7 @@ func TestClusterBootstrap(t *testing.T) {
},
StartAsSeed: true,
connectionStub: connections,
noCron: true,
}
leader, err := NewCluster(leaderCfg)
if err != nil {
@@ -72,6 +74,7 @@ func TestClusterBootstrap(t *testing.T) {
StartAsSeed: false,
JoinAddress: leaderCfg.RedisAdvertiseAddr,
connectionStub: connections,
noCron: true,
}
follower, err := NewCluster(followerCfg)
if err != nil {
@@ -132,3 +135,104 @@ func TestClusterBootstrap(t *testing.T) {
}
}
}
func TestFailover(t *testing.T) {
// start leader
leaderDir := "test/0"
os.RemoveAll(leaderDir)
os.MkdirAll(leaderDir, 0777)
defer func() {
os.RemoveAll(leaderDir)
}()
RegisterCmd("slaveof", func(cluster *Cluster, c redis.Connection, cmdLine CmdLine) redis.Reply {
return protocol.MakeOkReply()
})
// connection stub
connections := NewInMemConnectionFactory()
leaderCfg := &Config{
RaftConfig: raft.RaftConfig{
RedisAdvertiseAddr: "127.0.0.1:6399",
RaftListenAddr: "127.0.0.1:26666",
RaftAdvertiseAddr: "127.0.0.1:26666",
Dir: leaderDir,
},
StartAsSeed: true,
connectionStub: connections,
noCron: true,
}
leader, err := NewCluster(leaderCfg)
if err != nil {
t.Error(err)
return
}
connections.nodes[leaderCfg.RedisAdvertiseAddr] = leader
// start follower
followerDir := "test/1"
os.RemoveAll(followerDir)
os.MkdirAll(followerDir, 0777)
defer func() {
os.RemoveAll(followerDir)
}()
followerCfg := &Config{
RaftConfig: raft.RaftConfig{
RedisAdvertiseAddr: "127.0.0.1:6499",
RaftListenAddr: "127.0.0.1:26667",
RaftAdvertiseAddr: "127.0.0.1:26667",
Dir: followerDir,
},
StartAsSeed: false,
JoinAddress: leaderCfg.RedisAdvertiseAddr,
connectionStub: connections,
noCron: true,
Master: leader.SelfID(),
}
follower, err := NewCluster(followerCfg)
if err != nil {
t.Error(err)
return
}
connections.nodes[followerCfg.RedisAdvertiseAddr] = follower
_ = follower.SelfID()
// check nodes
joined := false
for i := 0; i < 10; i++ {
nodes, err := leader.raftNode.GetNodes()
if err != nil {
t.Log(err)
continue
}
if len(nodes) == 2 {
t.Log("join success")
joined = true
break
}
time.Sleep(time.Second)
}
if !joined {
t.Error("join failed")
return
}
// rebalance
leader.replicaManager.masterHeartbeats[leader.SelfID()] = time.Now().Add(-time.Hour)
leader.doFailoverCheck()
time.Sleep(2 * time.Second)
for i := 0; i < 1000; i++ {
success := false
leader.raftNode.FSM.WithReadLock(func(fsm *raft.FSM) {
ms := fsm.MasterSlaves[follower.SelfID()]
if ms != nil && len(ms.Slaves) > 0 {
success = true
}
})
if success {
t.Log("rebalance success")
break
} else {
time.Sleep(time.Second)
}
}
}