Updated Raft snapshot to also persist last saved unix timestamp in milliseconds. This timestamp is re-loaded when the server is re-started.

Created standalone snapshot flow including manifest file that is used to determine whether another snapshot should be taken based on the hashed contents of the snapshot.
This commit is contained in:
Kelvin Clement Mwinuka
2024-01-28 03:36:09 +08:00
parent 06822f122f
commit d31acfbbdd
8 changed files with 313 additions and 51 deletions

View File

@@ -3,6 +3,7 @@ package admin
import ( import (
"context" "context"
"errors" "errors"
"fmt"
"github.com/echovault/echovault/src/utils" "github.com/echovault/echovault/src/utils"
"net" "net"
) )
@@ -48,13 +49,17 @@ func NewModule() Plugin {
{ {
Command: "lastsave", Command: "lastsave",
Categories: []string{utils.AdminCategory, utils.FastCategory, utils.DangerousCategory}, Categories: []string{utils.AdminCategory, utils.FastCategory, utils.DangerousCategory},
Description: "(LASTSAVE) Get timestamp for the latest snapshot", Description: "(LASTSAVE) Get unix timestamp for the latest snapshot in milliseconds.",
Sync: false, Sync: false,
KeyExtractionFunc: func(cmd []string) ([]string, error) { KeyExtractionFunc: func(cmd []string) ([]string, error) {
return []string{}, nil return []string{}, nil
}, },
HandlerFunc: func(ctx context.Context, cmd []string, server utils.Server, conn *net.Conn) ([]byte, error) { HandlerFunc: func(ctx context.Context, cmd []string, server utils.Server, conn *net.Conn) ([]byte, error) {
return nil, errors.New("LASTSAVE command not implemented") msec := server.GetLatestSnapshot()
if msec == 0 {
return nil, errors.New("no snapshot")
}
return []byte(fmt.Sprintf(":%d\r\n\r\n", msec)), nil
}, },
}, },
{ {

View File

@@ -4,13 +4,16 @@ import (
"encoding/json" "encoding/json"
"github.com/echovault/echovault/src/utils" "github.com/echovault/echovault/src/utils"
"github.com/hashicorp/raft" "github.com/hashicorp/raft"
"strconv"
"strings"
) )
type SnapshotOpts struct { type SnapshotOpts struct {
config utils.Config config utils.Config
data map[string]interface{} data map[string]interface{}
startSnapshot func() startSnapshot func()
finishSnapshot func() finishSnapshot func()
setLatestSnapshot func(msec int64)
} }
type Snapshot struct { type Snapshot struct {
@@ -27,7 +30,18 @@ func NewFSMSnapshot(opts SnapshotOpts) *Snapshot {
func (s *Snapshot) Persist(sink raft.SnapshotSink) error { func (s *Snapshot) Persist(sink raft.SnapshotSink) error {
s.options.startSnapshot() s.options.startSnapshot()
o, err := json.Marshal(s.options.data) msec, err := strconv.Atoi(strings.Split(sink.ID(), "-")[2])
if err != nil {
sink.Cancel()
return err
}
snapshotObject := utils.SnapshotObject{
State: s.options.data,
LatestSnapshotMilliseconds: int64(msec),
}
o, err := json.Marshal(snapshotObject)
if err != nil { if err != nil {
sink.Cancel() sink.Cancel()
@@ -39,6 +53,8 @@ func (s *Snapshot) Persist(sink raft.SnapshotSink) error {
return err return err
} }
s.options.setLatestSnapshot(int64(msec))
return nil return nil
} }

View File

@@ -76,10 +76,11 @@ func (fsm *FSM) Apply(log *raft.Log) interface{} {
// Snapshot implements raft.FSM interface // Snapshot implements raft.FSM interface
func (fsm *FSM) Snapshot() (raft.FSMSnapshot, error) { func (fsm *FSM) Snapshot() (raft.FSMSnapshot, error) {
return NewFSMSnapshot(SnapshotOpts{ return NewFSMSnapshot(SnapshotOpts{
config: fsm.options.Config, config: fsm.options.Config,
data: fsm.options.Server.GetState(), data: fsm.options.Server.GetState(),
startSnapshot: fsm.options.Server.StartSnapshot, startSnapshot: fsm.options.Server.StartSnapshot,
finishSnapshot: fsm.options.Server.FinishSnapshot, finishSnapshot: fsm.options.Server.FinishSnapshot,
setLatestSnapshot: fsm.options.Server.SetLatestSnapshot,
}), nil }), nil
} }
@@ -92,14 +93,18 @@ func (fsm *FSM) Restore(snapshot io.ReadCloser) error {
return err return err
} }
data := make(map[string]interface{}) data := utils.SnapshotObject{
State: make(map[string]interface{}),
LatestSnapshotMilliseconds: 0,
}
if err := json.Unmarshal(b, &data); err != nil { if err := json.Unmarshal(b, &data); err != nil {
log.Fatal(err) log.Fatal(err)
return err return err
} }
for k, v := range data { // Set state
for k, v := range data.State {
_, err := fsm.options.Server.CreateKeyAndLock(context.Background(), k) _, err := fsm.options.Server.CreateKeyAndLock(context.Background(), k)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
@@ -107,6 +112,8 @@ func (fsm *FSM) Restore(snapshot io.ReadCloser) error {
fsm.options.Server.SetValue(context.Background(), k, v) fsm.options.Server.SetValue(context.Background(), k, v)
fsm.options.Server.KeyUnlock(k) fsm.options.Server.KeyUnlock(k)
} }
// Set latest snapshot milliseconds
fsm.options.Server.SetLatestSnapshot(data.LatestSnapshotMilliseconds)
return nil return nil
} }

View File

@@ -8,7 +8,6 @@ import (
"log" "log"
"net" "net"
"os" "os"
"path"
"path/filepath" "path/filepath"
"time" "time"
@@ -63,7 +62,7 @@ func (r *Raft) RaftInit(ctx context.Context) {
stableStore = raft.StableStore(boltdb) stableStore = raft.StableStore(boltdb)
snapshotStore, err = raft.NewFileSnapshotStore(path.Join(conf.DataDir, "snapshots"), 2, os.Stdout) snapshotStore, err = raft.NewFileSnapshotStore(conf.DataDir, 2, os.Stdout)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
@@ -144,10 +143,10 @@ func (r *Raft) HasJoinedCluster() bool {
} }
func (r *Raft) AddVoter( func (r *Raft) AddVoter(
id raft.ServerID, id raft.ServerID,
address raft.ServerAddress, address raft.ServerAddress,
prevIndex uint64, prevIndex uint64,
timeout time.Duration, timeout time.Duration,
) error { ) error {
if r.IsRaftLeader() { if r.IsRaftLeader() {
raftConfig := r.raft.GetConfiguration() raftConfig := r.raft.GetConfiguration()

View File

@@ -1 +1,22 @@
package aof package aof
import (
"github.com/echovault/echovault/src/utils"
)
// This package handles AOF logging in standalone mode only.
// Logging in clusters is handled in the raft layer.
type Opts struct {
Config utils.Config
}
type Engine struct {
options Opts
}
func NewAOFEngine(opts Opts) *Engine {
return &Engine{
options: opts,
}
}

View File

@@ -9,6 +9,8 @@ import (
"github.com/echovault/echovault/src/modules/acl" "github.com/echovault/echovault/src/modules/acl"
"github.com/echovault/echovault/src/modules/pubsub" "github.com/echovault/echovault/src/modules/pubsub"
"github.com/echovault/echovault/src/raft" "github.com/echovault/echovault/src/raft"
"github.com/echovault/echovault/src/server/aof"
"github.com/echovault/echovault/src/server/snapshot"
"github.com/echovault/echovault/src/utils" "github.com/echovault/echovault/src/utils"
"io" "io"
"log" "log"
@@ -38,7 +40,10 @@ type Server struct {
ACL *acl.ACL ACL *acl.ACL
PubSub *pubsub.PubSub PubSub *pubsub.PubSub
SnapshotInProgress atomic.Bool SnapshotInProgress atomic.Bool
LatestSnapshotMilliseconds atomic.Int64 // Unix epoch in milliseconds
SnapshotEngine *snapshot.Engine
AOFEngine *aof.Engine
} }
func (server *Server) StartTCP(ctx context.Context) { func (server *Server) StartTCP(ctx context.Context) {
@@ -99,28 +104,26 @@ func (server *Server) handleConnection(ctx context.Context, conn net.Conn) {
if err != nil { if err != nil {
if err == io.EOF { if err == io.EOF {
// Connection closed // Connection closed
// TODO: Remove this connection from channel subscriptions
break break
} }
if err, ok := err.(net.Error); ok && err.Timeout() { if err, ok := err.(net.Error); ok && err.Timeout() {
// Connection timeout // Connection timeout
fmt.Println(err) log.Println(err)
break break
} }
if err, ok := err.(tls.RecordHeaderError); ok { if err, ok := err.(tls.RecordHeaderError); ok {
// TLS verification error // TLS verification error
fmt.Println(err) log.Println(err)
break break
} }
fmt.Println(err) log.Println(err)
break break
} }
if cmd, err := utils.Decode(message); err != nil { if cmd, err := utils.Decode(message); err != nil {
// Return error to client // Return error to client
if _, err := w.Write([]byte(fmt.Sprintf("-Error %s\r\n\r\n", err.Error()))); err != nil { if _, err := w.Write([]byte(fmt.Sprintf("-Error %s\r\n\r\n", err.Error()))); err != nil {
// TODO: Log error at configured logger log.Println(err)
fmt.Println(err)
} }
continue continue
} else { } else {
@@ -128,8 +131,7 @@ func (server *Server) handleConnection(ctx context.Context, conn net.Conn) {
if err != nil { if err != nil {
if _, err := w.Write([]byte(fmt.Sprintf("-%s\r\n\r\n", err.Error()))); err != nil { if _, err := w.Write([]byte(fmt.Sprintf("-%s\r\n\r\n", err.Error()))); err != nil {
// TODO: Log error at configured logger log.Println(err)
fmt.Println(err)
} }
continue continue
} }
@@ -146,8 +148,7 @@ func (server *Server) handleConnection(ctx context.Context, conn net.Conn) {
if err := server.ACL.AuthorizeConnection(&conn, cmd, command, subCommand); err != nil { if err := server.ACL.AuthorizeConnection(&conn, cmd, command, subCommand); err != nil {
if _, err := w.Write([]byte(fmt.Sprintf("-%s\r\n\r\n", err.Error()))); err != nil { if _, err := w.Write([]byte(fmt.Sprintf("-%s\r\n\r\n", err.Error()))); err != nil {
// TODO: Log error at configured logger log.Println(err)
fmt.Println(err)
} }
continue continue
} }
@@ -155,13 +156,11 @@ func (server *Server) handleConnection(ctx context.Context, conn net.Conn) {
if !server.IsInCluster() || !synchronize { if !server.IsInCluster() || !synchronize {
if res, err := handler(ctx, cmd, server, &conn); err != nil { if res, err := handler(ctx, cmd, server, &conn); err != nil {
if _, err := w.Write([]byte(fmt.Sprintf("-%s\r\n\r\n", err.Error()))); err != nil { if _, err := w.Write([]byte(fmt.Sprintf("-%s\r\n\r\n", err.Error()))); err != nil {
// TODO: Log error at configured logger log.Println(err)
fmt.Println(err)
} }
} else { } else {
if _, err := w.Write(res); err != nil { if _, err := w.Write(res); err != nil {
// TODO: Log error at configured logger log.Println(err)
fmt.Println(err)
} }
// TODO: Write successful, add entry to AOF // TODO: Write successful, add entry to AOF
} }
@@ -172,13 +171,11 @@ func (server *Server) handleConnection(ctx context.Context, conn net.Conn) {
if server.raft.IsRaftLeader() { if server.raft.IsRaftLeader() {
if res, err := server.raftApply(ctx, cmd); err != nil { if res, err := server.raftApply(ctx, cmd); err != nil {
if _, err := w.Write([]byte(fmt.Sprintf("-Error %s\r\n\r\n", err.Error()))); err != nil { if _, err := w.Write([]byte(fmt.Sprintf("-Error %s\r\n\r\n", err.Error()))); err != nil {
// TODO: Log error at configured logger log.Println(err)
fmt.Println(err)
} }
} else { } else {
if _, err := w.Write(res); err != nil { if _, err := w.Write(res); err != nil {
// TODO: Log error at configured logger log.Println(err)
fmt.Println(err)
} }
} }
continue continue
@@ -188,22 +185,19 @@ func (server *Server) handleConnection(ctx context.Context, conn net.Conn) {
if server.Config.ForwardCommand { if server.Config.ForwardCommand {
server.memberList.ForwardDataMutation(ctx, message) server.memberList.ForwardDataMutation(ctx, message)
if _, err := w.Write([]byte(utils.OK_RESPONSE)); err != nil { if _, err := w.Write([]byte(utils.OK_RESPONSE)); err != nil {
// TODO: Log error at configured logger log.Println(err)
fmt.Println(err)
} }
continue continue
} }
if _, err := w.Write([]byte("-Error not cluster leader, cannot carry out command\r\n\r\n")); err != nil { if _, err := w.Write([]byte("-Error not cluster leader, cannot carry out command\r\n\r\n")); err != nil {
// TODO: Log error at configured logger log.Println(err)
fmt.Println(err)
} }
} }
} }
if err := conn.Close(); err != nil { if err := conn.Close(); err != nil {
// TODO: Log error at configured logger log.Println(err)
fmt.Println(err)
} }
} }
@@ -238,6 +232,21 @@ func (server *Server) Start(ctx context.Context) {
}) })
server.raft.RaftInit(ctx) server.raft.RaftInit(ctx)
server.memberList.MemberListInit(ctx) server.memberList.MemberListInit(ctx)
} else {
// Initialize standalone AOF engine
server.AOFEngine = aof.NewAOFEngine(aof.Opts{
Config: conf,
})
// Initialize and start standalone snapshot engine
server.SnapshotEngine = snapshot.NewSnapshotEngine(snapshot.Opts{
Config: conf,
StartSnapshot: server.StartSnapshot,
FinishSnapshot: server.FinishSnapshot,
GetState: server.GetState,
SetLatestSnapshotMilliseconds: server.SetLatestSnapshot,
GetLatestSnapshotMilliseconds: server.GetLatestSnapshot,
})
server.SnapshotEngine.Start()
} }
server.StartTCP(ctx) server.StartTCP(ctx)
@@ -247,17 +256,32 @@ func (server *Server) TakeSnapshot() error {
if server.SnapshotInProgress.Load() { if server.SnapshotInProgress.Load() {
return errors.New("snapshot already in progress") return errors.New("snapshot already in progress")
} }
if server.IsInCluster() {
// Handle snapshot in cluster mode go func() {
go func() { if server.IsInCluster() {
err := server.raft.TakeSnapshot() // Handle snapshot in cluster mode
if err := server.raft.TakeSnapshot(); err != nil {
log.Println(err)
}
return
}
// Handle snapshot in standalone mode
if err := server.SnapshotEngine.TakeSnapshot(); err != nil {
log.Println(err) log.Println(err)
}() }
} }()
// Handle snapshot in standalone mode
return nil return nil
} }
func (server *Server) SetLatestSnapshot(msec int64) {
server.LatestSnapshotMilliseconds.Store(msec)
}
func (server *Server) GetLatestSnapshot() int64 {
return server.LatestSnapshotMilliseconds.Load()
}
func (server *Server) ShutDown(ctx context.Context) { func (server *Server) ShutDown(ctx context.Context) {
if server.IsInCluster() { if server.IsInCluster() {
server.raft.RaftShutdown(ctx) server.raft.RaftShutdown(ctx)

View File

@@ -1,4 +1,187 @@
package snapshot package snapshot
import (
"crypto/md5"
"encoding/json"
"errors"
"fmt"
"github.com/echovault/echovault/src/utils"
"io"
"io/fs"
"log"
"os"
"path"
"time"
)
// This package contains the snapshot engine for standalone mode. // This package contains the snapshot engine for standalone mode.
// Snapshots in cluster mode will be handled using the raft package in the raft layer. // Snapshots in cluster mode will be handled using the raft package in the raft layer.
type Manifest struct {
LatestSnapshotMilliseconds int64
LatestSnapshotHash [16]byte
}
type Opts struct {
Config utils.Config
StartSnapshot func()
FinishSnapshot func()
GetState func() map[string]interface{}
SetLatestSnapshotMilliseconds func(msec int64)
GetLatestSnapshotMilliseconds func() int64
}
type Engine struct {
options Opts
}
func NewSnapshotEngine(opts Opts) *Engine {
return &Engine{
options: opts,
}
}
func (engine *Engine) Start() {
// TODO: Start goroutine for periodic snapshots
}
func (engine *Engine) TakeSnapshot() error {
engine.options.StartSnapshot()
defer engine.options.FinishSnapshot()
// Extract current time
now := time.Now()
msec := now.UnixNano() / int64(time.Millisecond)
// Update manifest file to indicate the latest snapshot.
// If manifest file does not exist, create it.
// Manifest object will contain the following information:
// 1. Hash of the snapshot contents.
// 2. Unix time of the latest snapshot taken.
// The information above will be used to determine whether a snapshot should be taken.
// If the hash of the current state equals the hash in the manifest file, skip the snapshot.
// Otherwise, take the snapshot and update the latest snapshot timestamp and hash in the manifest file.
var firstSnapshot bool // Tracks whether the snapshot being attempted is the first one
dirname := path.Join(engine.options.Config.DataDir, "snapshots")
if err := os.MkdirAll(dirname, os.ModePerm); err != nil {
log.Println(err)
return err
}
// Open manifest file
var mf *os.File
mf, err := os.Open(path.Join(dirname, "manifest.bin"))
if err != nil {
if errors.Is(err, fs.ErrNotExist) {
// Create file if it does not exist
mf, err = os.Create(path.Join(dirname, "manifest.bin"))
if err != nil {
log.Println(err)
return err
}
firstSnapshot = true
} else {
log.Println(err)
return err
}
}
md, err := io.ReadAll(mf)
if err != nil {
log.Println(err)
return err
}
if err := mf.Close(); err != nil {
log.Println(err)
return err
}
manifest := new(Manifest)
if !firstSnapshot {
if err = json.Unmarshal(md, manifest); err != nil {
log.Println(err)
return err
}
}
// Get current state
snapshotObject := utils.SnapshotObject{
State: engine.options.GetState(),
LatestSnapshotMilliseconds: engine.options.GetLatestSnapshotMilliseconds(),
}
out, err := json.Marshal(snapshotObject)
if err != nil {
log.Println(err)
return err
}
snapshotHash := md5.Sum(out)
if snapshotHash == manifest.LatestSnapshotHash {
return errors.New("nothing new to snapshot")
}
// Update the snapshotObject
snapshotObject.LatestSnapshotMilliseconds = msec
// Marshal the updated snapshotObject
out, err = json.Marshal(snapshotObject)
if err != nil {
log.Println(err)
return err
}
// os.Create will replace the old manifest file
mf, err = os.Create(path.Join(dirname, "manifest.bin"))
if err != nil {
log.Println(err)
return err
}
// Write the latest manifest data
manifest = &Manifest{
LatestSnapshotHash: md5.Sum(out),
LatestSnapshotMilliseconds: msec,
}
mo, err := json.Marshal(manifest)
if err != nil {
log.Println(err)
return err
}
if _, err = mf.Write(mo); err != nil {
log.Println(err)
return err
}
if err = mf.Close(); err != nil {
log.Println(err)
return err
}
// Create snapshot directory
dirname = path.Join(engine.options.Config.DataDir, "snapshots", fmt.Sprintf("%d", msec))
if err := os.MkdirAll(dirname, os.ModePerm); err != nil {
return err
}
// Create snapshot file
f, err := os.OpenFile(path.Join(dirname, "state.bin"), os.O_WRONLY|os.O_CREATE, os.ModePerm)
if err != nil {
log.Println(err)
return err
}
defer func() {
if err := f.Close(); err != nil {
log.Println(err)
}
}()
// Write state to file
if _, err = f.Write(out); err != nil {
return err
}
// Set the latest snapshot in unix milliseconds
engine.options.SetLatestSnapshotMilliseconds(msec)
return nil
}

View File

@@ -21,6 +21,8 @@ type Server interface {
TakeSnapshot() error TakeSnapshot() error
StartSnapshot() StartSnapshot()
FinishSnapshot() FinishSnapshot()
SetLatestSnapshot(msec int64)
GetLatestSnapshot() int64
} }
type ContextServerID string type ContextServerID string
@@ -64,3 +66,8 @@ type Plugin interface {
Commands() []Command Commands() []Command
Description() string Description() string
} }
type SnapshotObject struct {
State map[string]interface{}
LatestSnapshotMilliseconds int64
}