mirror of
https://github.com/HDT3213/godis.git
synced 2025-10-30 19:46:21 +08:00
optimize migration; clean migrated data
This commit is contained in:
@@ -1,11 +1,9 @@
|
|||||||
package core
|
package core
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
|
||||||
|
|
||||||
"github.com/hdt3213/godis/cluster/raft"
|
"github.com/hdt3213/godis/cluster/raft"
|
||||||
dbimpl "github.com/hdt3213/godis/database"
|
dbimpl "github.com/hdt3213/godis/database"
|
||||||
"github.com/hdt3213/godis/datastruct/set"
|
|
||||||
"github.com/hdt3213/godis/interface/database"
|
"github.com/hdt3213/godis/interface/database"
|
||||||
"github.com/hdt3213/godis/interface/redis"
|
"github.com/hdt3213/godis/interface/redis"
|
||||||
"github.com/hdt3213/godis/lib/logger"
|
"github.com/hdt3213/godis/lib/logger"
|
||||||
@@ -49,57 +47,6 @@ func (c *Cluster) SelfID() string {
|
|||||||
return c.raftNode.Cfg.ID()
|
return c.raftNode.Cfg.ID()
|
||||||
}
|
}
|
||||||
|
|
||||||
// slotsManager 负责管理当前 node 上的 slot
|
|
||||||
type slotsManager struct {
|
|
||||||
mu *sync.RWMutex
|
|
||||||
slots map[uint32]*slotStatus // 记录当前node上的 slot
|
|
||||||
importingTask *raft.MigratingTask
|
|
||||||
}
|
|
||||||
|
|
||||||
const (
|
|
||||||
slotStateHosting = iota
|
|
||||||
slotStateImporting
|
|
||||||
slotStateExporting
|
|
||||||
)
|
|
||||||
|
|
||||||
type slotStatus struct {
|
|
||||||
mu *sync.RWMutex
|
|
||||||
state int
|
|
||||||
keys *set.Set // 记录当前 slot 上的 key
|
|
||||||
|
|
||||||
exportSnapshot *set.Set // 开始传输时拷贝 slot 中的 key, 避免并发并发
|
|
||||||
dirtyKeys *set.Set // 传输开始后被修改的key, 在传输结束阶段需要重传一遍
|
|
||||||
}
|
|
||||||
|
|
||||||
func newSlotsManager() *slotsManager {
|
|
||||||
return &slotsManager{
|
|
||||||
mu: &sync.RWMutex{},
|
|
||||||
slots: map[uint32]*slotStatus{},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ssm *slotsManager) getSlot(index uint32) *slotStatus {
|
|
||||||
ssm.mu.RLock()
|
|
||||||
slot := ssm.slots[index]
|
|
||||||
ssm.mu.RUnlock()
|
|
||||||
if slot != nil {
|
|
||||||
return slot
|
|
||||||
}
|
|
||||||
ssm.mu.Lock()
|
|
||||||
defer ssm.mu.Unlock()
|
|
||||||
// check-lock-check
|
|
||||||
slot = ssm.slots[index]
|
|
||||||
if slot != nil {
|
|
||||||
return slot
|
|
||||||
}
|
|
||||||
slot = &slotStatus{
|
|
||||||
state: slotStateHosting,
|
|
||||||
keys: set.Make(),
|
|
||||||
mu: &sync.RWMutex{},
|
|
||||||
}
|
|
||||||
ssm.slots[index] = slot
|
|
||||||
return slot
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewCluster(cfg *Config) (*Cluster, error) {
|
func NewCluster(cfg *Config) (*Cluster, error) {
|
||||||
var connections ConnectionFactory
|
var connections ConnectionFactory
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ package core
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/hdt3213/godis/aof"
|
"github.com/hdt3213/godis/aof"
|
||||||
@@ -26,6 +26,58 @@ func init() {
|
|||||||
RegisterCmd(startMigrationCommand, execStartMigration)
|
RegisterCmd(startMigrationCommand, execStartMigration)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// slotsManager 负责管理当前 node 上的 slot
|
||||||
|
type slotsManager struct {
|
||||||
|
mu *sync.RWMutex
|
||||||
|
slots map[uint32]*slotStatus // 记录当前node上的 slot
|
||||||
|
importingTask *raft.MigratingTask
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
slotStateHosting = iota
|
||||||
|
slotStateImporting
|
||||||
|
slotStateExporting
|
||||||
|
)
|
||||||
|
|
||||||
|
type slotStatus struct {
|
||||||
|
mu *sync.RWMutex
|
||||||
|
state int
|
||||||
|
keys *set.Set // 记录当前 slot 上的 key
|
||||||
|
|
||||||
|
exportSnapshot *set.Set // 开始传输时拷贝 slot 中的 key, 避免并发
|
||||||
|
dirtyKeys *set.Set // 传输开始后被修改的key, 在传输结束阶段需要重传一遍
|
||||||
|
}
|
||||||
|
|
||||||
|
func newSlotsManager() *slotsManager {
|
||||||
|
return &slotsManager{
|
||||||
|
mu: &sync.RWMutex{},
|
||||||
|
slots: map[uint32]*slotStatus{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ssm *slotsManager) getSlot(index uint32) *slotStatus {
|
||||||
|
ssm.mu.RLock()
|
||||||
|
slot := ssm.slots[index]
|
||||||
|
ssm.mu.RUnlock()
|
||||||
|
if slot != nil {
|
||||||
|
return slot
|
||||||
|
}
|
||||||
|
ssm.mu.Lock()
|
||||||
|
defer ssm.mu.Unlock()
|
||||||
|
// check-lock-check
|
||||||
|
slot = ssm.slots[index]
|
||||||
|
if slot != nil {
|
||||||
|
return slot
|
||||||
|
}
|
||||||
|
slot = &slotStatus{
|
||||||
|
state: slotStateHosting,
|
||||||
|
keys: set.Make(),
|
||||||
|
mu: &sync.RWMutex{},
|
||||||
|
}
|
||||||
|
ssm.slots[index] = slot
|
||||||
|
return slot
|
||||||
|
}
|
||||||
|
|
||||||
func (sm *slotStatus) startExporting() protocol.ErrorReply {
|
func (sm *slotStatus) startExporting() protocol.ErrorReply {
|
||||||
sm.mu.Lock()
|
sm.mu.Lock()
|
||||||
defer sm.mu.Unlock()
|
defer sm.mu.Unlock()
|
||||||
@@ -44,6 +96,25 @@ func (sm *slotStatus) finishExportingWithinLock() {
|
|||||||
sm.exportSnapshot = nil
|
sm.exportSnapshot = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (cluster *Cluster) dropSlot(index uint32) {
|
||||||
|
cluster.slotsManager.mu.RLock()
|
||||||
|
slot := cluster.slotsManager.slots[index]
|
||||||
|
cluster.slotsManager.mu.RUnlock()
|
||||||
|
if slot == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
slot.mu.Lock()
|
||||||
|
defer slot.mu.Unlock()
|
||||||
|
c := connection.NewFakeConn()
|
||||||
|
slot.keys.ForEach(func(key string) bool {
|
||||||
|
cluster.LocalExec(c, utils.ToCmdLine("del", key))
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
cluster.slotsManager.mu.Lock()
|
||||||
|
delete(cluster.slotsManager.slots, index)
|
||||||
|
cluster.slotsManager.mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
func (cluster *Cluster) injectInsertCallback() {
|
func (cluster *Cluster) injectInsertCallback() {
|
||||||
cb := func(dbIndex int, key string, entity *database.DataEntity) {
|
cb := func(dbIndex int, key string, entity *database.DataEntity) {
|
||||||
slotIndex := cluster.GetSlot(key)
|
slotIndex := cluster.GetSlot(key)
|
||||||
@@ -187,32 +258,40 @@ func execFinishExport(cluster *Cluster, c redis.Connection, cmdLine CmdLine) red
|
|||||||
}
|
}
|
||||||
logger.Infof("finishing migration task %s, route changed", taskId)
|
logger.Infof("finishing migration task %s, route changed", taskId)
|
||||||
|
|
||||||
|
// clean migrated slots
|
||||||
|
go func() {
|
||||||
|
defer func() {
|
||||||
|
if e := recover(); e != nil {
|
||||||
|
logger.Errorf("panic %v", e)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
for _, index := range task.Slots {
|
||||||
|
cluster.dropSlot(index)
|
||||||
|
}
|
||||||
|
}()
|
||||||
c.Write(protocol.MakeOkReply().ToBytes())
|
c.Write(protocol.MakeOkReply().ToBytes())
|
||||||
return &protocol.NoReply{}
|
return &protocol.NoReply{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// execStartMigration receives startMigrationCommand from leader and start migration job at background
|
// execStartMigration receives startMigrationCommand from leader and start migration job at background
|
||||||
// command line: startMigrationCommand taskId srcNode slotId1 [slotId2]...
|
// command line: startMigrationCommand taskId
|
||||||
func execStartMigration(cluster *Cluster, c redis.Connection, cmdLine CmdLine) redis.Reply {
|
func execStartMigration(cluster *Cluster, c redis.Connection, cmdLine CmdLine) redis.Reply {
|
||||||
if len(cmdLine) < 4 {
|
if len(cmdLine) != 2 {
|
||||||
return protocol.MakeArgNumErrReply(startMigrationCommand)
|
return protocol.MakeArgNumErrReply(startMigrationCommand)
|
||||||
}
|
}
|
||||||
taskId := string(cmdLine[1])
|
taskId := string(cmdLine[1])
|
||||||
srcNode := string(cmdLine[2])
|
|
||||||
var slotIds []uint32
|
var task *raft.MigratingTask
|
||||||
for _, slotIdStr := range cmdLine[3:] {
|
for i := 0; i < 50; i++ {
|
||||||
slotId, err := strconv.Atoi(string(slotIdStr))
|
task = cluster.raftNode.FSM.GetMigratingTask(taskId)
|
||||||
if err != nil {
|
if task == nil {
|
||||||
return protocol.MakeErrReply("illegal slot id: " + string(slotIdStr))
|
time.Sleep(time.Millisecond * 100)
|
||||||
}
|
}
|
||||||
slotIds = append(slotIds, uint32(slotId))
|
|
||||||
}
|
}
|
||||||
task := &raft.MigratingTask{
|
if task == nil {
|
||||||
ID: taskId,
|
return protocol.MakeErrReply("ERR get migrating task timeout")
|
||||||
SrcNode: srcNode,
|
|
||||||
TargetNode: cluster.SelfID(),
|
|
||||||
Slots: slotIds,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cluster.slotsManager.mu.Lock()
|
cluster.slotsManager.mu.Lock()
|
||||||
cluster.slotsManager.importingTask = task
|
cluster.slotsManager.importingTask = task
|
||||||
cluster.slotsManager.mu.Unlock()
|
cluster.slotsManager.mu.Unlock()
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"strconv"
|
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -15,6 +14,29 @@ import (
|
|||||||
"github.com/hdt3213/godis/redis/protocol"
|
"github.com/hdt3213/godis/redis/protocol"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
**Rebalance Procedure**
|
||||||
|
1. Invoke `triggerMigrationTask` on cluster Leader to start a migration task
|
||||||
|
2. Leader propose EventStartMigrate to raft group then send startMigrationCommand to TargetNode. (at `triggerMigrationTask`)
|
||||||
|
|
||||||
|
3. Target Node runs `doImports` after receiving startMigrationCommand
|
||||||
|
4. Target Node send exportCommand to Source Node.
|
||||||
|
|
||||||
|
6. Source Node get migrating task from raft (at `execExport`)
|
||||||
|
7. SourceNode set task into slotManager to start recording dirty keys during migration. (at `injectInsertCallback`)
|
||||||
|
8. Source Node dump old data to Target Node
|
||||||
|
|
||||||
|
9. Target node send migrationDoneCommand to Source Node. (at `doImports`)
|
||||||
|
10. Source Node runs `execFinishExport`, lock slots to stop writing
|
||||||
|
11. Source Node send dirty keys to Target Node
|
||||||
|
|
||||||
|
12. Source Node send migrationChangeRouteCommand to Leader
|
||||||
|
13. Leader porposes EventFinishMigrate to raft and waits Source Node and Target Node receives this entry(at `execMigrationChangeRoute`)
|
||||||
|
14. Source Node finish exporting, unlock slots, clean data
|
||||||
|
15. Target Node finish importing, unlock slots, start serve
|
||||||
|
*/
|
||||||
|
|
||||||
const joinClusterCommand = "cluster.join"
|
const joinClusterCommand = "cluster.join"
|
||||||
const migrationChangeRouteCommand = "cluster.migration.changeroute"
|
const migrationChangeRouteCommand = "cluster.migration.changeroute"
|
||||||
|
|
||||||
@@ -113,11 +135,7 @@ func (cluster *Cluster) triggerMigrationTask(task *raft.MigratingTask) error {
|
|||||||
}
|
}
|
||||||
logger.Infof("propose EventStartMigrate %s success", task.ID)
|
logger.Infof("propose EventStartMigrate %s success", task.ID)
|
||||||
|
|
||||||
cmdLine := utils.ToCmdLine(startMigrationCommand, task.ID, task.SrcNode)
|
cmdLine := utils.ToCmdLine(startMigrationCommand, task.ID)
|
||||||
for _, slotId := range task.Slots {
|
|
||||||
slotIdStr := strconv.Itoa(int(slotId))
|
|
||||||
cmdLine = append(cmdLine, []byte(slotIdStr))
|
|
||||||
}
|
|
||||||
targetNodeConn, err := cluster.connections.BorrowPeerClient(task.TargetNode)
|
targetNodeConn, err := cluster.connections.BorrowPeerClient(task.TargetNode)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
|||||||
Reference in New Issue
Block a user