Add automatic bootstrap and recovery

This commit is contained in:
Ingo Oppermann
2023-05-02 15:49:46 +02:00
parent 75c11eb475
commit 4eb0258ba4
3 changed files with 91 additions and 20 deletions

View File

@@ -628,6 +628,7 @@ func (a *api) start() error {
Name: cfg.Name,
Path: filepath.Join(cfg.DB.Dir, "cluster"),
Bootstrap: cfg.Cluster.Bootstrap,
Recover: cfg.Cluster.Recover,
Address: cfg.Cluster.Address,
JoinAddress: cfg.Cluster.JoinAddress,
CoreAPIAddress: cfg.Address,

View File

@@ -941,26 +941,50 @@ func (c *cluster) startRaft(fsm raft.FSM, bootstrap, recover, inmem bool) error
cfg.LocalID = raft.ServerID(c.id)
cfg.Logger = NewLogger(c.logger, hclog.Debug).Named("raft")
if bootstrap {
hasState, err := raft.HasExistingState(logStore, stableStore, snapshots)
hasState, err := raft.HasExistingState(logStore, stableStore, snapshots)
if err != nil {
return err
}
if !hasState {
// Bootstrap cluster
configuration := raft.Configuration{
Servers: []raft.Server{
{
Suffrage: raft.Voter,
ID: raft.ServerID(c.id),
Address: transport.LocalAddr(),
},
},
}
if err := raft.BootstrapCluster(cfg, logStore, stableStore, snapshots, transport, configuration); err != nil {
return err
}
c.logger.Debug().Log("raft node bootstrapped")
} else {
// Recover cluster
fsm, err := NewStore()
if err != nil {
return err
}
if !hasState {
configuration := raft.Configuration{
Servers: []raft.Server{
{
Suffrage: raft.Voter,
ID: raft.ServerID(c.id),
Address: transport.LocalAddr(),
},
},
}
if err := raft.BootstrapCluster(cfg, logStore, stableStore, snapshots, transport, configuration); err != nil {
return err
}
configuration := raft.Configuration{
Servers: []raft.Server{
{
Suffrage: raft.Voter,
ID: raft.ServerID(c.id),
Address: transport.LocalAddr(),
},
},
}
if err := raft.RecoverCluster(cfg, fsm, logStore, stableStore, snapshots, transport, configuration); err != nil {
return err
}
c.logger.Debug().Log("raft node recoverd")
}
// Set up a channel for reliable leader notifications.

View File

@@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"io"
"sync"
"github.com/hashicorp/raft"
)
@@ -45,10 +46,15 @@ type addProcessCommand struct {
}
// Implement a FSM
type store struct{}
type store struct {
lock sync.RWMutex
Nodes map[string]string
}
func NewStore() (Store, error) {
return &store{}, nil
return &store{
Nodes: map[string]string{},
}, nil
}
func (s *store) Apply(log *raft.Log) interface{} {
@@ -72,23 +78,53 @@ func (s *store) Apply(log *raft.Log) interface{} {
json.Unmarshal(b, &cmd)
fmt.Printf("addNode: %+v\n", cmd)
s.lock.Lock()
s.Nodes[cmd.ID] = cmd.Address
s.lock.Unlock()
case opRemoveNode:
b, _ := json.Marshal(c.Data)
cmd := removeNodeCommand{}
json.Unmarshal(b, &cmd)
fmt.Printf("removeNode: %+v\n", cmd)
s.lock.Lock()
delete(s.Nodes, cmd.ID)
s.lock.Unlock()
}
return nil
}
func (s *store) Snapshot() (raft.FSMSnapshot, error) {
fmt.Printf("a snapshot is requested\n")
return &fsmSnapshot{}, nil
s.lock.Lock()
defer s.lock.Unlock()
data, err := json.Marshal(s)
if err != nil {
return nil, err
}
return &fsmSnapshot{
data: data,
}, nil
}
func (s *store) Restore(snapshot io.ReadCloser) error {
fmt.Printf("a snapshot is restored\n")
defer snapshot.Close()
s.lock.Lock()
defer s.lock.Unlock()
dec := json.NewDecoder(snapshot)
if err := dec.Decode(s); err != nil {
return err
}
return nil
}
@@ -100,10 +136,20 @@ func (s *store) GetNode(id string) string {
return ""
}
type fsmSnapshot struct{}
type fsmSnapshot struct {
data []byte
}
func (s *fsmSnapshot) Persist(sink raft.SnapshotSink) error {
if _, err := sink.Write(s.data); err != nil {
sink.Cancel()
return err
}
sink.Close()
return nil
}
func (s *fsmSnapshot) Release() {}
func (s *fsmSnapshot) Release() {
s.data = nil
}