Add automatic bootstrap and recovery

This commit is contained in:
Ingo Oppermann
2023-05-02 15:49:46 +02:00
parent 75c11eb475
commit 4eb0258ba4
3 changed files with 91 additions and 20 deletions

View File

@@ -628,6 +628,7 @@ func (a *api) start() error {
Name: cfg.Name, Name: cfg.Name,
Path: filepath.Join(cfg.DB.Dir, "cluster"), Path: filepath.Join(cfg.DB.Dir, "cluster"),
Bootstrap: cfg.Cluster.Bootstrap, Bootstrap: cfg.Cluster.Bootstrap,
Recover: cfg.Cluster.Recover,
Address: cfg.Cluster.Address, Address: cfg.Cluster.Address,
JoinAddress: cfg.Cluster.JoinAddress, JoinAddress: cfg.Cluster.JoinAddress,
CoreAPIAddress: cfg.Address, CoreAPIAddress: cfg.Address,

View File

@@ -941,12 +941,13 @@ func (c *cluster) startRaft(fsm raft.FSM, bootstrap, recover, inmem bool) error
cfg.LocalID = raft.ServerID(c.id) cfg.LocalID = raft.ServerID(c.id)
cfg.Logger = NewLogger(c.logger, hclog.Debug).Named("raft") cfg.Logger = NewLogger(c.logger, hclog.Debug).Named("raft")
if bootstrap {
hasState, err := raft.HasExistingState(logStore, stableStore, snapshots) hasState, err := raft.HasExistingState(logStore, stableStore, snapshots)
if err != nil { if err != nil {
return err return err
} }
if !hasState { if !hasState {
// Bootstrap cluster
configuration := raft.Configuration{ configuration := raft.Configuration{
Servers: []raft.Server{ Servers: []raft.Server{
{ {
@@ -960,7 +961,30 @@ func (c *cluster) startRaft(fsm raft.FSM, bootstrap, recover, inmem bool) error
if err := raft.BootstrapCluster(cfg, logStore, stableStore, snapshots, transport, configuration); err != nil { if err := raft.BootstrapCluster(cfg, logStore, stableStore, snapshots, transport, configuration); err != nil {
return err return err
} }
c.logger.Debug().Log("raft node bootstrapped")
} else {
// Recover cluster
fsm, err := NewStore()
if err != nil {
return err
} }
configuration := raft.Configuration{
Servers: []raft.Server{
{
Suffrage: raft.Voter,
ID: raft.ServerID(c.id),
Address: transport.LocalAddr(),
},
},
}
if err := raft.RecoverCluster(cfg, fsm, logStore, stableStore, snapshots, transport, configuration); err != nil {
return err
}
c.logger.Debug().Log("raft node recoverd")
} }
// Set up a channel for reliable leader notifications. // Set up a channel for reliable leader notifications.

View File

@@ -4,6 +4,7 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
"sync"
"github.com/hashicorp/raft" "github.com/hashicorp/raft"
) )
@@ -45,10 +46,15 @@ type addProcessCommand struct {
} }
// Implement a FSM // Implement a FSM
type store struct{} type store struct {
lock sync.RWMutex
Nodes map[string]string
}
func NewStore() (Store, error) { func NewStore() (Store, error) {
return &store{}, nil return &store{
Nodes: map[string]string{},
}, nil
} }
func (s *store) Apply(log *raft.Log) interface{} { func (s *store) Apply(log *raft.Log) interface{} {
@@ -72,23 +78,53 @@ func (s *store) Apply(log *raft.Log) interface{} {
json.Unmarshal(b, &cmd) json.Unmarshal(b, &cmd)
fmt.Printf("addNode: %+v\n", cmd) fmt.Printf("addNode: %+v\n", cmd)
s.lock.Lock()
s.Nodes[cmd.ID] = cmd.Address
s.lock.Unlock()
case opRemoveNode: case opRemoveNode:
b, _ := json.Marshal(c.Data) b, _ := json.Marshal(c.Data)
cmd := removeNodeCommand{} cmd := removeNodeCommand{}
json.Unmarshal(b, &cmd) json.Unmarshal(b, &cmd)
fmt.Printf("removeNode: %+v\n", cmd) fmt.Printf("removeNode: %+v\n", cmd)
s.lock.Lock()
delete(s.Nodes, cmd.ID)
s.lock.Unlock()
} }
return nil return nil
} }
func (s *store) Snapshot() (raft.FSMSnapshot, error) { func (s *store) Snapshot() (raft.FSMSnapshot, error) {
fmt.Printf("a snapshot is requested\n") fmt.Printf("a snapshot is requested\n")
return &fsmSnapshot{}, nil
s.lock.Lock()
defer s.lock.Unlock()
data, err := json.Marshal(s)
if err != nil {
return nil, err
}
return &fsmSnapshot{
data: data,
}, nil
} }
func (s *store) Restore(snapshot io.ReadCloser) error { func (s *store) Restore(snapshot io.ReadCloser) error {
fmt.Printf("a snapshot is restored\n") fmt.Printf("a snapshot is restored\n")
defer snapshot.Close()
s.lock.Lock()
defer s.lock.Unlock()
dec := json.NewDecoder(snapshot)
if err := dec.Decode(s); err != nil {
return err
}
return nil return nil
} }
@@ -100,10 +136,20 @@ func (s *store) GetNode(id string) string {
return "" return ""
} }
type fsmSnapshot struct{} type fsmSnapshot struct {
data []byte
}
func (s *fsmSnapshot) Persist(sink raft.SnapshotSink) error { func (s *fsmSnapshot) Persist(sink raft.SnapshotSink) error {
if _, err := sink.Write(s.data); err != nil {
sink.Cancel()
return err
}
sink.Close()
return nil return nil
} }
func (s *fsmSnapshot) Release() {} func (s *fsmSnapshot) Release() {
s.data = nil
}