package core import ( "errors" "fmt" "math" "strconv" "sync" "time" "github.com/hdt3213/godis/cluster/raft" "github.com/hdt3213/godis/interface/redis" "github.com/hdt3213/godis/lib/logger" "github.com/hdt3213/godis/lib/utils" "github.com/hdt3213/godis/redis/protocol" ) const joinClusterCommand = "cluster.join" const migrationChangeRouteCommand = "cluster.migration.changeroute" func init() { RegisterCmd(joinClusterCommand, execJoin) RegisterCmd(migrationChangeRouteCommand, execMigrationChangeRoute) } // execJoin handles cluster-join command // format: cluster-join redisAddress (advertised)raftAddress func execJoin(cluster *Cluster, c redis.Connection, cmdLine CmdLine) redis.Reply { if len(cmdLine) != 3 { return protocol.MakeArgNumErrReply(joinClusterCommand) } state := cluster.raftNode.State() if state != raft.Leader { // I am not leader, forward request to leader leaderConn, err := cluster.BorrowLeaderClient() if err != nil { return protocol.MakeErrReply(err.Error()) } defer cluster.connections.ReturnPeerClient(leaderConn) return leaderConn.Send(cmdLine) } // self node is leader redisAddr := string(cmdLine[1]) raftAddr := string(cmdLine[2]) err := cluster.raftNode.HandleJoin(redisAddr, raftAddr) if err != nil { return protocol.MakeErrReply(err.Error()) } // join sucees, rebalance node return protocol.MakeOkReply() } type rebalanceManager struct { mu *sync.Mutex } func newRebalanceManager() *rebalanceManager { return &rebalanceManager{ mu: &sync.Mutex{}, } } func (cluster *Cluster) doRebalance() { cluster.rebalanceManger.mu.Lock() defer cluster.rebalanceManger.mu.Unlock() pendingTasks, err := cluster.makeRebalancePlan() if err != nil { logger.Errorf("makeRebalancePlan err: %v", err) return } logger.Infof("rebalance plan generated, contains %d tasks", len(pendingTasks)) if len(pendingTasks) == 0 { return } for _, task := range pendingTasks { err := cluster.triggerMigrationTask(task) if err != nil { logger.Errorf("triggerMigrationTask err: %v", err) } else { logger.Infof("triggerMigrationTask %s success", task.ID) } } } // triggerRebalanceTask start a rebalance task // only leader can do func (cluster *Cluster) triggerMigrationTask(task *raft.MigratingTask) error { // propose migration _, err := cluster.raftNode.Propose(&raft.LogEntry{ Event: raft.EventStartMigrate, MigratingTask: task, }) if err != nil { return fmt.Errorf("propose EventStartMigrate %s failed: %v", task.ID, err) } logger.Infof("propose EventStartMigrate %s success", task.ID) cmdLine := utils.ToCmdLine(startMigrationCommand, task.ID, task.SrcNode) for _, slotId := range task.Slots { slotIdStr := strconv.Itoa(int(slotId)) cmdLine = append(cmdLine, []byte(slotIdStr)) } targetNodeConn, err := cluster.connections.BorrowPeerClient(task.TargetNode) if err != nil { return err } defer cluster.connections.ReturnPeerClient(targetNodeConn) reply := targetNodeConn.Send(cmdLine) if protocol.IsOKReply(reply) { return nil } return protocol.MakeErrReply("") } func (cluster *Cluster) makeRebalancePlan() ([]*raft.MigratingTask, error) { nodes, err := cluster.raftNode.GetNodes() if err != nil { return nil, err } avgSlot := int(math.Ceil(float64(SlotCount) / float64(len(nodes)))) var migratings []*raft.MigratingTask cluster.raftNode.FSM.WithReadLock(func(fsm *raft.FSM) { var exportingNodes []string var importingNodes []string for _, node := range nodes { nodeId := string(node.ID) nodeSlots := fsm.Node2Slot[nodeId] if len(nodeSlots) > avgSlot+1 { exportingNodes = append(exportingNodes, nodeId) } if len(nodeSlots) < avgSlot-1 { importingNodes = append(importingNodes, nodeId) } } importIndex := 0 exportIndex := 0 var exportSlots []uint32 for importIndex < len(importingNodes) && exportIndex < len(exportingNodes) { exportNode := exportingNodes[exportIndex] if len(exportSlots) == 0 { exportNodeSlots := fsm.Node2Slot[exportNode] exportCount := len(exportNodeSlots) - avgSlot exportSlots = exportNodeSlots[0:exportCount] } importNode := importingNodes[importIndex] importNodeCurrentIndex := fsm.Node2Slot[importNode] requirements := avgSlot - len(importNodeCurrentIndex) task := &raft.MigratingTask{ ID: utils.RandString(20), SrcNode: exportNode, TargetNode: importNode, } if requirements <= len(exportSlots) { // exportSlots 可以提供足够的 slots, importingNode 处理完毕 task.Slots = exportSlots[0:requirements] exportSlots = exportSlots[requirements:] importIndex++ } else { // exportSlots 无法提供足够的 slots, exportingNode 处理完毕 task.Slots = exportSlots exportSlots = nil exportIndex++ } migratings = append(migratings, task) } }) return migratings, nil } func (cluster *Cluster) waitCommitted(peer string, logIndex uint64) error { srcNodeConn, err := cluster.connections.BorrowPeerClient(peer) if err != nil { return err } defer cluster.connections.ReturnPeerClient(srcNodeConn) var peerIndex uint64 for i := 0; i < 50; i++ { reply := srcNodeConn.Send(utils.ToCmdLine(getCommittedIndexCommand)) switch reply := reply.(type) { case *protocol.IntReply: peerIndex = uint64(reply.Code) if peerIndex >= logIndex { return nil } case *protocol.StandardErrReply: logger.Infof("get committed index failed: %v", reply.Error()) default: logger.Infof("get committed index unknown responseL %+v", reply.ToBytes()) } time.Sleep(time.Millisecond * 100) } return errors.New("wait committed timeout") } // execMigrationChangeRoute should be exectued at leader // it proposes EventFinishMigrate through raft, to change the route to the new node // it returns until the proposal has been accepted by the majority and two related nodes // format: cluster.migration.changeroute taskid func execMigrationChangeRoute(cluster *Cluster, c redis.Connection, cmdLine CmdLine) redis.Reply { if len(cmdLine) != 2 { return protocol.MakeArgNumErrReply(joinClusterCommand) } state := cluster.raftNode.State() if state != raft.Leader { // I am not leader, forward request to leader leaderConn, err := cluster.BorrowLeaderClient() if err != nil { return protocol.MakeErrReply(err.Error()) } defer cluster.connections.ReturnPeerClient(leaderConn) return leaderConn.Send(cmdLine) } taskId := string(cmdLine[1]) logger.Infof("change route for migration %s", taskId) task := cluster.raftNode.FSM.GetMigratingTask(taskId) if task == nil { return protocol.MakeErrReply("ERR task not found") } logger.Infof("change route for migration %s, got task info", taskId) // propose logIndex, err := cluster.raftNode.Propose(&raft.LogEntry{ Event: raft.EventFinishMigrate, MigratingTask: task, }) if err != nil { return protocol.MakeErrReply("ERR " + err.Error()) } logger.Infof("change route for migration %s, raft proposed", taskId) // confirm the 2 related node committed this log err = cluster.waitCommitted(task.SrcNode, logIndex) if err != nil { return protocol.MakeErrReply("ERR " + err.Error()) } logger.Infof("change route for migration %s, confirm source node finished", taskId) err = cluster.waitCommitted(task.TargetNode, logIndex) if err != nil { return protocol.MakeErrReply("ERR " + err.Error()) } logger.Infof("change route for migration %s, confirm target node finished", taskId) return protocol.MakeOkReply() }