mirror of
https://github.com/HDT3213/godis.git
synced 2025-10-03 07:56:38 +08:00
463 lines
14 KiB
Go
463 lines
14 KiB
Go
package database
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/hdt3213/godis/interface/redis"
|
|
"github.com/hdt3213/godis/lib/logger"
|
|
"github.com/hdt3213/godis/lib/sync/atomic"
|
|
"github.com/hdt3213/godis/lib/utils"
|
|
"github.com/hdt3213/godis/redis/protocol"
|
|
)
|
|
|
|
const (
|
|
slaveStateHandShake = uint8(iota)
|
|
slaveStateWaitSaveEnd
|
|
slaveStateSendingRDB
|
|
slaveStateOnline
|
|
)
|
|
|
|
const (
|
|
bgSaveIdle = uint8(iota)
|
|
bgSaveRunning
|
|
bgSaveFinish
|
|
)
|
|
|
|
const (
|
|
slaveCapacityNone = 0
|
|
slaveCapacityEOF = 1 << iota
|
|
slaveCapacityPsync2
|
|
)
|
|
|
|
// slaveClient stores slave status in the view of master
|
|
type slaveClient struct {
|
|
conn redis.Connection
|
|
state uint8
|
|
offset int64
|
|
lastAckTime time.Time
|
|
announceIp string
|
|
announcePort int
|
|
capacity uint8
|
|
}
|
|
|
|
// aofListener is currently only responsible for updating the backlog
|
|
type replBacklog struct {
|
|
buf []byte
|
|
beginOffset int64
|
|
currentOffset int64
|
|
}
|
|
|
|
func (backlog *replBacklog) appendBytes(bin []byte) {
|
|
backlog.buf = append(backlog.buf, bin...)
|
|
backlog.currentOffset += int64(len(bin))
|
|
}
|
|
|
|
func (backlog *replBacklog) getSnapshot() ([]byte, int64) {
|
|
return backlog.buf[:], backlog.currentOffset
|
|
}
|
|
|
|
func (backlog *replBacklog) getSnapshotAfter(beginOffset int64) ([]byte, int64) {
|
|
beg := beginOffset - backlog.beginOffset
|
|
return backlog.buf[beg:], backlog.currentOffset
|
|
}
|
|
|
|
func (backlog *replBacklog) isValidOffset(offset int64) bool {
|
|
return offset >= backlog.beginOffset && offset < backlog.currentOffset
|
|
}
|
|
|
|
type masterStatus struct {
|
|
mu sync.RWMutex
|
|
replId string
|
|
backlog *replBacklog
|
|
slaveMap map[redis.Connection]*slaveClient
|
|
waitSlaves map[*slaveClient]struct{}
|
|
onlineSlaves map[*slaveClient]struct{}
|
|
bgSaveState uint8
|
|
rdbFilename string
|
|
aofListener *replAofListener
|
|
rewriting atomic.Boolean
|
|
}
|
|
|
|
// bgSaveForReplication does bg-save and send rdb to waiting slaves
|
|
func (server *Server) bgSaveForReplication() {
|
|
go func() {
|
|
defer func() {
|
|
if e := recover(); e != nil {
|
|
logger.Errorf("panic: %v", e)
|
|
}
|
|
}()
|
|
if err := server.saveForReplication(); err != nil {
|
|
logger.Errorf("save for replication error: %v", err)
|
|
}
|
|
}()
|
|
|
|
}
|
|
|
|
// saveForReplication does bg-save and send rdb to waiting slaves
|
|
func (server *Server) saveForReplication() error {
|
|
rdbFile, err := ioutil.TempFile("", "*.rdb")
|
|
if err != nil {
|
|
return fmt.Errorf("create temp rdb failed: %v", err)
|
|
}
|
|
rdbFilename := rdbFile.Name()
|
|
server.masterStatus.mu.Lock()
|
|
server.masterStatus.bgSaveState = bgSaveRunning
|
|
server.masterStatus.rdbFilename = rdbFilename // todo: can reuse config.Properties.RDBFilename?
|
|
aofListener := &replAofListener{
|
|
mdb: server,
|
|
backlog: server.masterStatus.backlog,
|
|
}
|
|
server.masterStatus.aofListener = aofListener
|
|
server.masterStatus.mu.Unlock()
|
|
|
|
err = server.persister.GenerateRDBForReplication(rdbFilename, aofListener, nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
aofListener.readyToSend = true
|
|
|
|
// change bgSaveState and get waitSlaves for sending
|
|
waitSlaves := make(map[*slaveClient]struct{})
|
|
server.masterStatus.mu.Lock()
|
|
server.masterStatus.bgSaveState = bgSaveFinish
|
|
for slave := range server.masterStatus.waitSlaves {
|
|
waitSlaves[slave] = struct{}{}
|
|
}
|
|
server.masterStatus.waitSlaves = nil
|
|
server.masterStatus.mu.Unlock()
|
|
|
|
// send rdb to waiting slaves
|
|
for slave := range waitSlaves {
|
|
err = server.masterFullReSyncWithSlave(slave)
|
|
if err != nil {
|
|
server.removeSlave(slave)
|
|
logger.Errorf("masterFullReSyncWithSlave error: %v", err)
|
|
continue
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (server *Server) rewriteRDB() error {
|
|
rdbFile, err := ioutil.TempFile("", "*.rdb")
|
|
if err != nil {
|
|
return fmt.Errorf("create temp rdb failed: %v", err)
|
|
}
|
|
rdbFilename := rdbFile.Name()
|
|
newBacklog := &replBacklog{}
|
|
aofListener := &replAofListener{
|
|
backlog: newBacklog,
|
|
mdb: server,
|
|
}
|
|
hook := func() {
|
|
// pausing aof first, then lock masterStatus.
|
|
// use the same order as replAofListener to avoid dead lock
|
|
server.masterStatus.mu.Lock()
|
|
defer server.masterStatus.mu.Unlock()
|
|
newBacklog.beginOffset = server.masterStatus.backlog.currentOffset
|
|
}
|
|
err = server.persister.GenerateRDBForReplication(rdbFilename, aofListener, hook)
|
|
if err != nil { // wait rdb result
|
|
return err
|
|
}
|
|
server.masterStatus.mu.Lock()
|
|
server.masterStatus.rdbFilename = rdbFilename
|
|
server.masterStatus.backlog = newBacklog
|
|
server.persister.RemoveListener(server.masterStatus.aofListener)
|
|
server.masterStatus.aofListener = aofListener
|
|
server.masterStatus.mu.Unlock()
|
|
// It is ok to know that new backlog is ready later, so we change readyToSend without sync
|
|
// But setting readyToSend=true must after new backlog is really ready (that means master.mu.Unlock)
|
|
aofListener.readyToSend = true
|
|
return nil
|
|
}
|
|
|
|
// masterFullReSyncWithSlave send replication header, rdb file and all backlogs to slave
|
|
func (server *Server) masterFullReSyncWithSlave(slave *slaveClient) error {
|
|
// write replication header
|
|
header := "+FULLRESYNC " + server.masterStatus.replId + " " +
|
|
strconv.FormatInt(server.masterStatus.backlog.beginOffset, 10) + protocol.CRLF
|
|
_, err := slave.conn.Write([]byte(header))
|
|
if err != nil {
|
|
return fmt.Errorf("write replication header to slave failed: %v", err)
|
|
}
|
|
// send rdb
|
|
rdbFile, err := os.Open(server.masterStatus.rdbFilename)
|
|
if err != nil {
|
|
return fmt.Errorf("open rdb file %s for replication error: %v", server.masterStatus.rdbFilename, err)
|
|
}
|
|
slave.state = slaveStateSendingRDB
|
|
rdbInfo, _ := os.Stat(server.masterStatus.rdbFilename)
|
|
rdbSize := rdbInfo.Size()
|
|
rdbHeader := "$" + strconv.FormatInt(rdbSize, 10) + protocol.CRLF
|
|
_, err = slave.conn.Write([]byte(rdbHeader))
|
|
if err != nil {
|
|
return fmt.Errorf("write rdb header to slave failed: %v", err)
|
|
}
|
|
_, err = io.Copy(slave.conn, rdbFile)
|
|
if err != nil {
|
|
return fmt.Errorf("write rdb file to slave failed: %v", err)
|
|
}
|
|
|
|
// send backlog
|
|
server.masterStatus.mu.RLock()
|
|
backlog, currentOffset := server.masterStatus.backlog.getSnapshot()
|
|
server.masterStatus.mu.RUnlock()
|
|
_, err = slave.conn.Write(backlog)
|
|
if err != nil {
|
|
return fmt.Errorf("full resync write backlog to slave failed: %v", err)
|
|
}
|
|
|
|
// set slave as online
|
|
server.setSlaveOnline(slave, currentOffset)
|
|
return nil
|
|
}
|
|
|
|
var cannotPartialSync = errors.New("cannot do partial sync")
|
|
|
|
func (server *Server) masterTryPartialSyncWithSlave(slave *slaveClient, replId string, slaveOffset int64) error {
|
|
server.masterStatus.mu.RLock()
|
|
if replId != server.masterStatus.replId {
|
|
server.masterStatus.mu.RUnlock()
|
|
return cannotPartialSync
|
|
}
|
|
if !server.masterStatus.backlog.isValidOffset(slaveOffset) {
|
|
server.masterStatus.mu.RUnlock()
|
|
return cannotPartialSync
|
|
}
|
|
backlog, currentOffset := server.masterStatus.backlog.getSnapshotAfter(slaveOffset)
|
|
server.masterStatus.mu.RUnlock()
|
|
|
|
// send replication header
|
|
header := "+CONTINUE " + server.masterStatus.replId + protocol.CRLF
|
|
_, err := slave.conn.Write([]byte(header))
|
|
if err != nil {
|
|
return fmt.Errorf("write replication header to slave failed: %v", err)
|
|
}
|
|
// send backlog
|
|
_, err = slave.conn.Write(backlog)
|
|
if err != nil {
|
|
return fmt.Errorf("partial resync write backlog to slave failed: %v", err)
|
|
}
|
|
|
|
// set slave online
|
|
server.setSlaveOnline(slave, currentOffset)
|
|
return nil
|
|
}
|
|
|
|
// masterSendUpdatesToSlave only sends data to online slaves after bgSave is finished
|
|
// if bgSave is running, updates will be sent after the saving finished
|
|
func (server *Server) masterSendUpdatesToSlave() error {
|
|
onlineSlaves := make(map[*slaveClient]struct{})
|
|
server.masterStatus.mu.RLock()
|
|
beginOffset := server.masterStatus.backlog.beginOffset
|
|
backlog, currentOffset := server.masterStatus.backlog.getSnapshot()
|
|
for slave := range server.masterStatus.onlineSlaves {
|
|
onlineSlaves[slave] = struct{}{}
|
|
}
|
|
server.masterStatus.mu.RUnlock()
|
|
for slave := range onlineSlaves {
|
|
slaveBeginOffset := slave.offset - beginOffset
|
|
_, err := slave.conn.Write(backlog[slaveBeginOffset:])
|
|
if err != nil {
|
|
logger.Errorf("send updates backlog to slave failed: %v", err)
|
|
server.removeSlave(slave)
|
|
continue
|
|
}
|
|
slave.offset = currentOffset
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (server *Server) execPSync(c redis.Connection, args [][]byte) redis.Reply {
|
|
replId := string(args[0])
|
|
replOffset, err := strconv.ParseInt(string(args[1]), 10, 64)
|
|
if err != nil {
|
|
return protocol.MakeErrReply("ERR value is not an integer or out of range")
|
|
}
|
|
server.masterStatus.mu.Lock()
|
|
defer server.masterStatus.mu.Unlock()
|
|
slave := server.masterStatus.slaveMap[c]
|
|
if slave == nil {
|
|
slave = &slaveClient{
|
|
conn: c,
|
|
}
|
|
c.SetSlave()
|
|
server.masterStatus.slaveMap[c] = slave
|
|
}
|
|
if server.masterStatus.bgSaveState == bgSaveIdle {
|
|
slave.state = slaveStateWaitSaveEnd
|
|
server.masterStatus.waitSlaves[slave] = struct{}{}
|
|
server.bgSaveForReplication()
|
|
} else if server.masterStatus.bgSaveState == bgSaveRunning {
|
|
slave.state = slaveStateWaitSaveEnd
|
|
server.masterStatus.waitSlaves[slave] = struct{}{}
|
|
} else if server.masterStatus.bgSaveState == bgSaveFinish {
|
|
go func() {
|
|
defer func() {
|
|
if e := recover(); e != nil {
|
|
logger.Errorf("panic: %v", e)
|
|
}
|
|
}()
|
|
err := server.masterTryPartialSyncWithSlave(slave, replId, replOffset)
|
|
if err == nil {
|
|
return
|
|
}
|
|
if err != nil && err != cannotPartialSync {
|
|
server.removeSlave(slave)
|
|
logger.Errorf("masterTryPartialSyncWithSlave error: %v", err)
|
|
return
|
|
}
|
|
// assert err == cannotPartialSync
|
|
if err := server.masterFullReSyncWithSlave(slave); err != nil {
|
|
server.removeSlave(slave)
|
|
logger.Errorf("masterFullReSyncWithSlave error: %v", err)
|
|
return
|
|
}
|
|
}()
|
|
}
|
|
return &protocol.NoReply{}
|
|
}
|
|
|
|
func (server *Server) execReplConf(c redis.Connection, args [][]byte) redis.Reply {
|
|
if len(args)%2 != 0 {
|
|
return protocol.MakeSyntaxErrReply()
|
|
}
|
|
server.masterStatus.mu.RLock()
|
|
slave := server.masterStatus.slaveMap[c]
|
|
server.masterStatus.mu.RUnlock()
|
|
for i := 0; i < len(args); i += 2 {
|
|
key := strings.ToLower(string(args[i]))
|
|
value := string(args[i+1])
|
|
switch key {
|
|
case "ack":
|
|
offset, err := strconv.ParseInt(value, 10, 64)
|
|
if err != nil {
|
|
return protocol.MakeErrReply("ERR value is not an integer or out of range")
|
|
}
|
|
slave.offset = offset
|
|
slave.lastAckTime = time.Now()
|
|
return &protocol.NoReply{}
|
|
}
|
|
}
|
|
return protocol.MakeOkReply()
|
|
}
|
|
|
|
func (server *Server) removeSlave(slave *slaveClient) {
|
|
server.masterStatus.mu.Lock()
|
|
defer server.masterStatus.mu.Unlock()
|
|
_ = slave.conn.Close()
|
|
delete(server.masterStatus.slaveMap, slave.conn)
|
|
delete(server.masterStatus.waitSlaves, slave)
|
|
delete(server.masterStatus.onlineSlaves, slave)
|
|
logger.Info("disconnect with slave " + slave.conn.Name())
|
|
}
|
|
|
|
func (server *Server) setSlaveOnline(slave *slaveClient, currentOffset int64) {
|
|
server.masterStatus.mu.Lock()
|
|
defer server.masterStatus.mu.Unlock()
|
|
slave.state = slaveStateOnline
|
|
slave.offset = currentOffset
|
|
server.masterStatus.onlineSlaves[slave] = struct{}{}
|
|
}
|
|
|
|
var pingBytes = protocol.MakeMultiBulkReply(utils.ToCmdLine("ping")).ToBytes()
|
|
|
|
const maxBacklogSize = 10 * 1024 * 1024 // 10MB
|
|
|
|
func (server *Server) masterCron() {
|
|
server.masterStatus.mu.Lock()
|
|
if len(server.masterStatus.slaveMap) == 0 { // no slaves, do nothing
|
|
server.masterStatus.mu.Unlock()
|
|
return
|
|
}
|
|
if server.masterStatus.bgSaveState == bgSaveFinish {
|
|
server.masterStatus.backlog.appendBytes(pingBytes)
|
|
}
|
|
backlogSize := len(server.masterStatus.backlog.buf)
|
|
server.masterStatus.mu.Unlock()
|
|
if err := server.masterSendUpdatesToSlave(); err != nil {
|
|
logger.Errorf("masterSendUpdatesToSlave error: %v", err)
|
|
}
|
|
if backlogSize > maxBacklogSize && !server.masterStatus.rewriting.Get() {
|
|
go func() {
|
|
server.masterStatus.rewriting.Set(true)
|
|
defer server.masterStatus.rewriting.Set(false)
|
|
if err := server.rewriteRDB(); err != nil {
|
|
server.masterStatus.rewriting.Set(false)
|
|
logger.Errorf("rewrite error: %v", err)
|
|
}
|
|
}()
|
|
}
|
|
}
|
|
|
|
// replAofListener is an implementation for aof.Listener
|
|
type replAofListener struct {
|
|
mdb *Server
|
|
backlog *replBacklog // may NOT be mdb.masterStatus.backlog
|
|
readyToSend bool
|
|
}
|
|
|
|
func (listener *replAofListener) Callback(cmdLines []CmdLine) {
|
|
listener.mdb.masterStatus.mu.Lock()
|
|
for _, cmdLine := range cmdLines {
|
|
reply := protocol.MakeMultiBulkReply(cmdLine)
|
|
listener.backlog.appendBytes(reply.ToBytes())
|
|
}
|
|
listener.mdb.masterStatus.mu.Unlock()
|
|
// listener could receive updates generated during rdb saving in progress
|
|
// Do not send updates to slave before rdb saving is finished
|
|
if listener.readyToSend {
|
|
if err := listener.mdb.masterSendUpdatesToSlave(); err != nil {
|
|
logger.Errorf("masterSendUpdatesToSlave after receive aof error: %v", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (server *Server) initMaster() {
|
|
server.masterStatus = &masterStatus{
|
|
mu: sync.RWMutex{},
|
|
replId: utils.RandHexString(40),
|
|
backlog: &replBacklog{},
|
|
slaveMap: make(map[redis.Connection]*slaveClient),
|
|
waitSlaves: make(map[*slaveClient]struct{}),
|
|
onlineSlaves: make(map[*slaveClient]struct{}),
|
|
bgSaveState: bgSaveIdle,
|
|
rdbFilename: "",
|
|
}
|
|
}
|
|
|
|
func (server *Server) stopMaster() {
|
|
server.masterStatus.mu.Lock()
|
|
defer server.masterStatus.mu.Unlock()
|
|
|
|
// disconnect with slave
|
|
for _, slave := range server.masterStatus.slaveMap {
|
|
_ = slave.conn.Close()
|
|
delete(server.masterStatus.slaveMap, slave.conn)
|
|
delete(server.masterStatus.waitSlaves, slave)
|
|
delete(server.masterStatus.onlineSlaves, slave)
|
|
}
|
|
|
|
// clean master status
|
|
if server.persister != nil {
|
|
server.persister.RemoveListener(server.masterStatus.aofListener)
|
|
}
|
|
_ = os.Remove(server.masterStatus.rdbFilename)
|
|
server.masterStatus.rdbFilename = ""
|
|
server.masterStatus.replId = ""
|
|
server.masterStatus.backlog = &replBacklog{}
|
|
server.masterStatus.slaveMap = make(map[redis.Connection]*slaveClient)
|
|
server.masterStatus.waitSlaves = make(map[*slaveClient]struct{})
|
|
server.masterStatus.onlineSlaves = make(map[*slaveClient]struct{})
|
|
server.masterStatus.bgSaveState = bgSaveIdle
|
|
}
|