mirror of
https://github.com/kemko/nomad.git
synced 2026-01-08 19:35:41 +03:00
Remove checks in member reconcile that was causing servers in protocol 3 to not change their ID in raft forever
This commit is contained in:
@@ -727,11 +727,6 @@ func (s *Server) reconcileMember(member serf.Member) error {
|
||||
}
|
||||
defer metrics.MeasureSince([]string{"nomad", "leader", "reconcileMember"}, time.Now())
|
||||
|
||||
// Do not reconcile ourself
|
||||
if member.Name == fmt.Sprintf("%s.%s", s.config.NodeName, s.config.Region) {
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error
|
||||
switch member.Status {
|
||||
case serf.StatusAlive:
|
||||
@@ -768,12 +763,6 @@ func (s *Server) reconcileJobSummaries() error {
|
||||
|
||||
// addRaftPeer is used to add a new Raft peer when a Nomad server joins
|
||||
func (s *Server) addRaftPeer(m serf.Member, parts *serverParts) error {
|
||||
// Do not join ourselfs
|
||||
if m.Name == s.config.NodeName {
|
||||
s.logger.Printf("[DEBUG] nomad: adding self (%q) as raft peer skipped", m.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check for possibility of multiple bootstrap nodes
|
||||
members := s.serf.Members()
|
||||
if parts.Bootstrap {
|
||||
@@ -786,17 +775,19 @@ func (s *Server) addRaftPeer(m serf.Member, parts *serverParts) error {
|
||||
}
|
||||
}
|
||||
|
||||
// See if it's already in the configuration. It's harmless to re-add it
|
||||
// but we want to avoid doing that if possible to prevent useless Raft
|
||||
// log entries.
|
||||
// Processing ourselves could result in trying to remove ourselves to
|
||||
// fix up our address, which would make us step down. This is only
|
||||
// safe to attempt if there are multiple servers available.
|
||||
addr := (&net.TCPAddr{IP: m.Addr, Port: parts.Port}).String()
|
||||
configFuture := s.raft.GetConfiguration()
|
||||
if err := configFuture.Error(); err != nil {
|
||||
s.logger.Printf("[ERR] nomad: failed to get raft configuration: %v", err)
|
||||
return err
|
||||
}
|
||||
for _, server := range configFuture.Configuration().Servers {
|
||||
if server.Address == raft.ServerAddress(addr) {
|
||||
|
||||
if m.Name == s.config.NodeName {
|
||||
if l := len(configFuture.Configuration().Servers); l < 3 {
|
||||
s.logger.Printf("[DEBUG] consul: Skipping self join check for %q since the cluster is too small", m.Name)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -817,7 +808,7 @@ func (s *Server) addRaftPeer(m serf.Member, parts *serverParts) error {
|
||||
|
||||
// If the address or ID matches an existing server, see if we need to remove the old one first
|
||||
if server.Address == raft.ServerAddress(addr) || server.ID == raft.ServerID(parts.ID) {
|
||||
// Exit with no-op if this is being called on an existing server
|
||||
// Exit with no-op if this is being called on an existing server and both the ID and address match
|
||||
if server.Address == raft.ServerAddress(addr) && server.ID == raft.ServerID(parts.ID) {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package nomad
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -13,6 +14,7 @@ import (
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
"github.com/hashicorp/raft"
|
||||
"github.com/hashicorp/serf/serf"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
@@ -1112,3 +1114,86 @@ func TestLeader_RevokeLeadership_MultipleTimes(t *testing.T) {
|
||||
require.Nil(t, s1.revokeLeadership())
|
||||
require.Nil(t, s1.revokeLeadership())
|
||||
}
|
||||
|
||||
// Test reconciling a member that's already in the config on a older raft protocol version
|
||||
func TestServer_ReconcileMember(t *testing.T) {
|
||||
// Create a three node cluster
|
||||
t.Parallel()
|
||||
s1 := TestServer(t, func(c *Config) {
|
||||
c.DevDisableBootstrap = true
|
||||
c.RaftConfig.ProtocolVersion = 3
|
||||
})
|
||||
defer s1.Shutdown()
|
||||
|
||||
s2 := TestServer(t, func(c *Config) {
|
||||
c.DevDisableBootstrap = true
|
||||
c.RaftConfig.ProtocolVersion = 3
|
||||
})
|
||||
defer s2.Shutdown()
|
||||
|
||||
s3 := TestServer(t, func(c *Config) {
|
||||
c.DevDisableBootstrap = true
|
||||
c.RaftConfig.ProtocolVersion = 2
|
||||
})
|
||||
defer s3.Shutdown()
|
||||
TestJoin(t, s1, s2, s3)
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
|
||||
// Create a memberlist object for s3, with raft protocol upgraded to 3
|
||||
upgradedS3Member := serf.Member{
|
||||
Name: s3.config.NodeName,
|
||||
Addr: s3.config.RPCAddr.IP,
|
||||
Status: serf.StatusAlive,
|
||||
Tags: make(map[string]string),
|
||||
}
|
||||
upgradedS3Member.Tags["role"] = "nomad"
|
||||
upgradedS3Member.Tags["id"] = s3.config.NodeID
|
||||
upgradedS3Member.Tags["region"] = s3.config.Region
|
||||
upgradedS3Member.Tags["dc"] = s3.config.Datacenter
|
||||
upgradedS3Member.Tags["rpc_addr"] = "127.0.0.1"
|
||||
upgradedS3Member.Tags["port"] = strconv.Itoa(s3.config.RPCAddr.Port)
|
||||
upgradedS3Member.Tags["build"] = "0.8.0"
|
||||
upgradedS3Member.Tags["vsn"] = "2"
|
||||
upgradedS3Member.Tags["mvn"] = "1"
|
||||
upgradedS3Member.Tags["raft_vsn"] = "3"
|
||||
|
||||
// Find the leader so that we can call reconcile member on it
|
||||
var leader *Server
|
||||
for _, s := range []*Server{s1, s2, s3} {
|
||||
if s.IsLeader() {
|
||||
leader = s
|
||||
}
|
||||
}
|
||||
leader.reconcileMember(upgradedS3Member)
|
||||
// This should remove s3 from the config and potentially cause a leader election
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
|
||||
// Figure out the new leader and call reconcile again, this should add s3 with the new ID format
|
||||
for _, s := range []*Server{s1, s2, s3} {
|
||||
if s.IsLeader() {
|
||||
leader = s
|
||||
}
|
||||
}
|
||||
leader.reconcileMember(upgradedS3Member)
|
||||
testutil.WaitForLeader(t, s1.RPC)
|
||||
future := s2.raft.GetConfiguration()
|
||||
if err := future.Error(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
addrs := 0
|
||||
ids := 0
|
||||
for _, server := range future.Configuration().Servers {
|
||||
if string(server.ID) == string(server.Address) {
|
||||
addrs++
|
||||
} else {
|
||||
ids++
|
||||
}
|
||||
}
|
||||
// After this, all three servers should have IDs in raft
|
||||
if got, want := addrs, 0; got != want {
|
||||
t.Fatalf("got %d server addresses want %d", got, want)
|
||||
}
|
||||
if got, want := ids, 3; got != want {
|
||||
t.Fatalf("got %d server ids want %d", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user