mirror of
https://github.com/HDT3213/godis.git
synced 2025-10-25 01:30:57 +08:00
Support failover in cluster (experimental)
This commit is contained in:
@@ -22,6 +22,9 @@ type Cluster struct {
|
||||
slotsManager *slotsManager
|
||||
rebalanceManger *rebalanceManager
|
||||
transactions *TransactionManager
|
||||
replicaManager *replicaManager
|
||||
|
||||
closeChan chan struct{}
|
||||
|
||||
// allow inject route implementation
|
||||
getSlotImpl func(key string) uint32
|
||||
@@ -33,7 +36,9 @@ type Config struct {
|
||||
raft.RaftConfig
|
||||
StartAsSeed bool
|
||||
JoinAddress string
|
||||
Master string
|
||||
connectionStub ConnectionFactory // for test
|
||||
noCron bool // for test
|
||||
}
|
||||
|
||||
func (c *Cluster) SelfID() string {
|
||||
@@ -123,7 +128,11 @@ func NewCluster(cfg *Config) (*Cluster, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result := conn.Send(utils.ToCmdLine(joinClusterCommand, cfg.RedisAdvertiseAddr, cfg.RaftAdvertiseAddr))
|
||||
joinCmdLine := utils.ToCmdLine(joinClusterCommand, cfg.RedisAdvertiseAddr, cfg.RaftAdvertiseAddr)
|
||||
if cfg.Master != "" {
|
||||
joinCmdLine = append(joinCmdLine, []byte(cfg.Master))
|
||||
}
|
||||
result := conn.Send(joinCmdLine)
|
||||
if err := protocol.Try2ErrorReply(result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -137,6 +146,8 @@ func NewCluster(cfg *Config) (*Cluster, error) {
|
||||
rebalanceManger: newRebalanceManager(),
|
||||
slotsManager: newSlotsManager(),
|
||||
transactions: newTransactionManager(),
|
||||
replicaManager: newReplicaManager(),
|
||||
closeChan: make(chan struct{}),
|
||||
}
|
||||
cluster.pickNodeImpl = func(slotID uint32) string {
|
||||
return defaultPickNodeImpl(cluster, slotID)
|
||||
@@ -146,6 +157,8 @@ func NewCluster(cfg *Config) (*Cluster, error) {
|
||||
}
|
||||
cluster.injectInsertCallback()
|
||||
cluster.injectDeleteCallback()
|
||||
cluster.registerOnFailover()
|
||||
go cluster.clusterCron()
|
||||
return cluster, nil
|
||||
}
|
||||
|
||||
@@ -155,6 +168,7 @@ func (cluster *Cluster) AfterClientClose(c redis.Connection) {
|
||||
}
|
||||
|
||||
func (cluster *Cluster) Close() {
|
||||
close(cluster.closeChan)
|
||||
cluster.db.Close()
|
||||
err := cluster.raftNode.Close()
|
||||
if err != nil {
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/hdt3213/godis/cluster/raft"
|
||||
"github.com/hdt3213/godis/interface/redis"
|
||||
"github.com/hdt3213/godis/lib/utils"
|
||||
"github.com/hdt3213/godis/redis/connection"
|
||||
"github.com/hdt3213/godis/redis/protocol"
|
||||
@@ -33,6 +34,7 @@ func TestClusterBootstrap(t *testing.T) {
|
||||
},
|
||||
StartAsSeed: true,
|
||||
connectionStub: connections,
|
||||
noCron: true,
|
||||
}
|
||||
leader, err := NewCluster(leaderCfg)
|
||||
if err != nil {
|
||||
@@ -72,6 +74,7 @@ func TestClusterBootstrap(t *testing.T) {
|
||||
StartAsSeed: false,
|
||||
JoinAddress: leaderCfg.RedisAdvertiseAddr,
|
||||
connectionStub: connections,
|
||||
noCron: true,
|
||||
}
|
||||
follower, err := NewCluster(followerCfg)
|
||||
if err != nil {
|
||||
@@ -132,3 +135,104 @@ func TestClusterBootstrap(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFailover(t *testing.T) {
|
||||
// start leader
|
||||
leaderDir := "test/0"
|
||||
os.RemoveAll(leaderDir)
|
||||
os.MkdirAll(leaderDir, 0777)
|
||||
defer func() {
|
||||
os.RemoveAll(leaderDir)
|
||||
}()
|
||||
RegisterCmd("slaveof", func(cluster *Cluster, c redis.Connection, cmdLine CmdLine) redis.Reply {
|
||||
return protocol.MakeOkReply()
|
||||
})
|
||||
|
||||
// connection stub
|
||||
connections := NewInMemConnectionFactory()
|
||||
leaderCfg := &Config{
|
||||
RaftConfig: raft.RaftConfig{
|
||||
RedisAdvertiseAddr: "127.0.0.1:6399",
|
||||
RaftListenAddr: "127.0.0.1:26666",
|
||||
RaftAdvertiseAddr: "127.0.0.1:26666",
|
||||
Dir: leaderDir,
|
||||
},
|
||||
StartAsSeed: true,
|
||||
connectionStub: connections,
|
||||
noCron: true,
|
||||
}
|
||||
leader, err := NewCluster(leaderCfg)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
connections.nodes[leaderCfg.RedisAdvertiseAddr] = leader
|
||||
|
||||
// start follower
|
||||
followerDir := "test/1"
|
||||
os.RemoveAll(followerDir)
|
||||
os.MkdirAll(followerDir, 0777)
|
||||
defer func() {
|
||||
os.RemoveAll(followerDir)
|
||||
}()
|
||||
followerCfg := &Config{
|
||||
RaftConfig: raft.RaftConfig{
|
||||
RedisAdvertiseAddr: "127.0.0.1:6499",
|
||||
RaftListenAddr: "127.0.0.1:26667",
|
||||
RaftAdvertiseAddr: "127.0.0.1:26667",
|
||||
Dir: followerDir,
|
||||
},
|
||||
StartAsSeed: false,
|
||||
JoinAddress: leaderCfg.RedisAdvertiseAddr,
|
||||
connectionStub: connections,
|
||||
noCron: true,
|
||||
Master: leader.SelfID(),
|
||||
}
|
||||
follower, err := NewCluster(followerCfg)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
connections.nodes[followerCfg.RedisAdvertiseAddr] = follower
|
||||
|
||||
_ = follower.SelfID()
|
||||
// check nodes
|
||||
joined := false
|
||||
for i := 0; i < 10; i++ {
|
||||
nodes, err := leader.raftNode.GetNodes()
|
||||
if err != nil {
|
||||
t.Log(err)
|
||||
continue
|
||||
}
|
||||
if len(nodes) == 2 {
|
||||
t.Log("join success")
|
||||
joined = true
|
||||
break
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
if !joined {
|
||||
t.Error("join failed")
|
||||
return
|
||||
}
|
||||
|
||||
// rebalance
|
||||
leader.replicaManager.masterHeartbeats[leader.SelfID()] = time.Now().Add(-time.Hour)
|
||||
leader.doFailoverCheck()
|
||||
time.Sleep(2 * time.Second)
|
||||
for i := 0; i < 1000; i++ {
|
||||
success := false
|
||||
leader.raftNode.FSM.WithReadLock(func(fsm *raft.FSM) {
|
||||
ms := fsm.MasterSlaves[follower.SelfID()]
|
||||
if ms != nil && len(ms.Slaves) > 0 {
|
||||
success = true
|
||||
}
|
||||
})
|
||||
if success {
|
||||
t.Log("rebalance success")
|
||||
break
|
||||
} else {
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
36
cluster/core/cron.go
Normal file
36
cluster/core/cron.go
Normal file
@@ -0,0 +1,36 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/hdt3213/godis/cluster/raft"
|
||||
)
|
||||
|
||||
func (cluster *Cluster) clusterCron() {
|
||||
if cluster.config.noCron {
|
||||
return
|
||||
}
|
||||
ticker := time.NewTicker(time.Second)
|
||||
var running int32
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
if cluster.raftNode.State() == raft.Leader {
|
||||
if atomic.CompareAndSwapInt32(&running, 0, 1) {
|
||||
// Disable parallelism
|
||||
go func() {
|
||||
cluster.doFailoverCheck()
|
||||
cluster.doRebalance()
|
||||
atomic.StoreInt32(&running, 0)
|
||||
}()
|
||||
}
|
||||
} else {
|
||||
cluster.sendHearbeat()
|
||||
}
|
||||
case <-cluster.closeChan:
|
||||
ticker.Stop()
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -23,10 +23,10 @@ func init() {
|
||||
RegisterCmd(migrationChangeRouteCommand, execMigrationChangeRoute)
|
||||
}
|
||||
|
||||
// execJoin handles cluster-join command
|
||||
// format: cluster-join redisAddress (advertised)raftAddress
|
||||
// execJoin handles cluster-join command as raft leader
|
||||
// format: cluster-join redisAddress(advertised), raftAddress, masterId
|
||||
func execJoin(cluster *Cluster, c redis.Connection, cmdLine CmdLine) redis.Reply {
|
||||
if len(cmdLine) != 3 {
|
||||
if len(cmdLine) < 3 {
|
||||
return protocol.MakeArgNumErrReply(joinClusterCommand)
|
||||
}
|
||||
state := cluster.raftNode.State()
|
||||
@@ -42,10 +42,26 @@ func execJoin(cluster *Cluster, c redis.Connection, cmdLine CmdLine) redis.Reply
|
||||
// self node is leader
|
||||
redisAddr := string(cmdLine[1])
|
||||
raftAddr := string(cmdLine[2])
|
||||
err := cluster.raftNode.HandleJoin(redisAddr, raftAddr)
|
||||
err := cluster.raftNode.AddToRaft(redisAddr, raftAddr)
|
||||
if err != nil {
|
||||
return protocol.MakeErrReply(err.Error())
|
||||
}
|
||||
master := ""
|
||||
if len(cmdLine) == 4 {
|
||||
master = string(cmdLine[3])
|
||||
}
|
||||
_, err = cluster.raftNode.Propose(&raft.LogEntry{
|
||||
Event: raft.EventJoin,
|
||||
JoinTask: &raft.JoinTask{
|
||||
NodeId: redisAddr,
|
||||
Master: master,
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
// todo: remove the node from raft
|
||||
return protocol.MakeErrReply(err.Error())
|
||||
}
|
||||
|
||||
// join sucees, rebalance node
|
||||
return protocol.MakeOkReply()
|
||||
}
|
||||
@@ -114,17 +130,14 @@ func (cluster *Cluster) triggerMigrationTask(task *raft.MigratingTask) error {
|
||||
}
|
||||
|
||||
func (cluster *Cluster) makeRebalancePlan() ([]*raft.MigratingTask, error) {
|
||||
nodes, err := cluster.raftNode.GetNodes()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
avgSlot := int(math.Ceil(float64(SlotCount) / float64(len(nodes))))
|
||||
|
||||
var migratings []*raft.MigratingTask
|
||||
cluster.raftNode.FSM.WithReadLock(func(fsm *raft.FSM) {
|
||||
avgSlot := int(math.Ceil(float64(SlotCount) / float64(len(fsm.MasterSlaves))))
|
||||
var exportingNodes []string
|
||||
var importingNodes []string
|
||||
for _, node := range nodes {
|
||||
nodeId := string(node.ID)
|
||||
for _, ms := range fsm.MasterSlaves {
|
||||
nodeId := ms.MasterId
|
||||
nodeSlots := fsm.Node2Slot[nodeId]
|
||||
if len(nodeSlots) > avgSlot+1 {
|
||||
exportingNodes = append(exportingNodes, nodeId)
|
||||
@@ -200,7 +213,7 @@ func (cluster *Cluster) waitCommitted(peer string, logIndex uint64) error {
|
||||
// format: cluster.migration.changeroute taskid
|
||||
func execMigrationChangeRoute(cluster *Cluster, c redis.Connection, cmdLine CmdLine) redis.Reply {
|
||||
if len(cmdLine) != 2 {
|
||||
return protocol.MakeArgNumErrReply(joinClusterCommand)
|
||||
return protocol.MakeArgNumErrReply(migrationChangeRouteCommand)
|
||||
}
|
||||
state := cluster.raftNode.State()
|
||||
if state != raft.Leader {
|
||||
155
cluster/core/replica_manager.go
Normal file
155
cluster/core/replica_manager.go
Normal file
@@ -0,0 +1,155 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"net"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/hdt3213/godis/cluster/raft"
|
||||
"github.com/hdt3213/godis/interface/redis"
|
||||
"github.com/hdt3213/godis/lib/logger"
|
||||
"github.com/hdt3213/godis/lib/utils"
|
||||
"github.com/hdt3213/godis/redis/connection"
|
||||
"github.com/hdt3213/godis/redis/protocol"
|
||||
)
|
||||
|
||||
const heartbeatCommand = "cluster.heartbeat"
|
||||
|
||||
func init() {
|
||||
RegisterCmd(heartbeatCommand, execHeartbeat)
|
||||
}
|
||||
|
||||
const (
|
||||
statusNormal = iota
|
||||
statusFailing // failover in progress
|
||||
)
|
||||
|
||||
type replicaManager struct {
|
||||
mu sync.RWMutex
|
||||
masterHeartbeats map[string]time.Time // id -> lastHeartbeatTime
|
||||
}
|
||||
|
||||
func newReplicaManager() *replicaManager {
|
||||
return &replicaManager{
|
||||
masterHeartbeats: make(map[string]time.Time),
|
||||
}
|
||||
}
|
||||
|
||||
// execHeartbeat receives heartbeat from follower as raft leader
|
||||
// cmdLine: cluster.heartbeat nodeId
|
||||
func execHeartbeat(cluster *Cluster, c redis.Connection, cmdLine CmdLine) redis.Reply {
|
||||
if len(cmdLine) != 2 {
|
||||
return protocol.MakeArgNumErrReply(heartbeatCommand)
|
||||
}
|
||||
id := string(cmdLine[1])
|
||||
cluster.replicaManager.mu.Lock()
|
||||
cluster.replicaManager.masterHeartbeats[id] = time.Now()
|
||||
cluster.replicaManager.mu.Unlock()
|
||||
|
||||
return protocol.MakeOkReply()
|
||||
}
|
||||
|
||||
func (cluster *Cluster) sendHearbeat() {
|
||||
leaderConn, err := cluster.BorrowLeaderClient()
|
||||
if err != nil {
|
||||
logger.Error(err)
|
||||
}
|
||||
defer cluster.connections.ReturnPeerClient(leaderConn)
|
||||
reply := leaderConn.Send(utils.ToCmdLine(heartbeatCommand, cluster.SelfID()))
|
||||
if err := protocol.Try2ErrorReply(reply); err != nil {
|
||||
logger.Error(err)
|
||||
}
|
||||
}
|
||||
|
||||
const followerTimeout = 10 * time.Second
|
||||
|
||||
func (cluster *Cluster) doFailoverCheck() {
|
||||
// find timeout masters
|
||||
var timeoutMasters []*raft.MasterSlave
|
||||
ddl := time.Now().Add(-followerTimeout)
|
||||
cluster.replicaManager.mu.RLock()
|
||||
for masterId, lastTime := range cluster.replicaManager.masterHeartbeats {
|
||||
if lastTime.Second() == 0 {
|
||||
// do not set new joined node as timeout
|
||||
cluster.replicaManager.masterHeartbeats[masterId] = time.Now()
|
||||
}
|
||||
if lastTime.Before(ddl) {
|
||||
slaves := cluster.raftNode.GetSlaves(masterId)
|
||||
if slaves != nil && len(slaves.Slaves) > 0 {
|
||||
timeoutMasters = append(timeoutMasters, slaves)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
cluster.replicaManager.mu.RUnlock()
|
||||
|
||||
// trigger failover
|
||||
for _, failed := range timeoutMasters {
|
||||
cluster.triggerFailover(failed)
|
||||
}
|
||||
}
|
||||
|
||||
func (cluster *Cluster) triggerFailover(failed *raft.MasterSlave) error {
|
||||
newMaster := failed.Slaves[0]
|
||||
id := utils.RandString(20)
|
||||
// propose change
|
||||
_, err := cluster.raftNode.Propose(&raft.LogEntry{
|
||||
Event: raft.EventStartFailover,
|
||||
FailoverTask: &raft.FailoverTask{
|
||||
ID: id,
|
||||
OldMasterId: failed.MasterId,
|
||||
NewMasterId: newMaster,
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
logger.Infof("proposed start failover id=%s, oldMaster=%s, newMaster=%s", id, failed.MasterId, newMaster)
|
||||
// send slave of to new master
|
||||
conn, err := cluster.connections.BorrowPeerClient(newMaster)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer cluster.connections.ReturnPeerClient(conn)
|
||||
|
||||
reply := conn.Send(utils.ToCmdLine("slaveof", "no", "one"))
|
||||
if err := protocol.Try2ErrorReply(reply); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// new master is ready to receive commands, change route
|
||||
_, err = cluster.raftNode.Propose(&raft.LogEntry{
|
||||
Event: raft.EventFinishFailover,
|
||||
FailoverTask: &raft.FailoverTask{
|
||||
ID: id,
|
||||
OldMasterId: failed.MasterId,
|
||||
NewMasterId: newMaster,
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
logger.Infof("proposed finish failover id=%s, oldMaster=%s, newMaster=%s", id, failed.MasterId, newMaster)
|
||||
// other slaves will listen to raft to change master
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cluster *Cluster) registerOnFailover() {
|
||||
cluster.raftNode.SetOnFailover(func(newMaster string) {
|
||||
if newMaster != "" && newMaster != cluster.SelfID() {
|
||||
// old master failed and other node become the new master
|
||||
// this node may be the old master
|
||||
ip, port, err := net.SplitHostPort(newMaster)
|
||||
if err != nil {
|
||||
logger.Errorf("illegal new master: %s", newMaster)
|
||||
return
|
||||
}
|
||||
c := connection.NewFakeConn()
|
||||
ret := cluster.db.Exec(c, utils.ToCmdLine("slaveof", ip, port))
|
||||
if err := protocol.Try2ErrorReply(ret); err != nil {
|
||||
logger.Errorf("slave of failed: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user