From f02a3a4677a86e47f67e11d01930a3b429b7d191 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Mon, 9 Apr 2018 17:42:30 -0500 Subject: [PATCH 1/8] Unit tests for rolling upgrade and killing a leader --- nomad/leader_test.go | 115 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 3 deletions(-) diff --git a/nomad/leader_test.go b/nomad/leader_test.go index 9ec0be5d3..0b243e8dd 100644 --- a/nomad/leader_test.go +++ b/nomad/leader_test.go @@ -903,6 +903,48 @@ func TestLeader_UpgradeRaftVersion(t *testing.T) { } } +func TestLeader_Reelection(t *testing.T) { + t.Parallel() + s1 := TestServer(t, func(c *Config) { + c.RaftConfig.ProtocolVersion = 2 + }) + defer s1.Shutdown() + + s2 := TestServer(t, func(c *Config) { + c.DevDisableBootstrap = true + c.RaftConfig.ProtocolVersion = 2 + }) + defer s2.Shutdown() + + s3 := TestServer(t, func(c *Config) { + c.DevDisableBootstrap = true + c.RaftConfig.ProtocolVersion = 2 + }) + + servers := []*Server{s1, s2, s3} + + // Try to join + TestJoin(t, s1, s2, s3) + + var leader, nonLeader *Server + + testutil.WaitForLeader(t, s1.RPC) + + for _, s := range servers { + if s.IsLeader() { + leader = s + } else { + nonLeader = s + } + } + + // Shutdown the leader + leader.Shutdown() + + testutil.WaitForLeader(t, nonLeader.RPC) + +} + func TestLeader_RollRaftServer(t *testing.T) { t.Parallel() s1 := TestServer(t, func(c *Config) { @@ -955,7 +997,74 @@ func TestLeader_RollRaftServer(t *testing.T) { TestJoin(t, s4, s1) servers[1] = s4 - // Make sure the dead server is removed and we're back to 3 total peers + // Kill the v2 server + s1.Shutdown() + + for _, s := range []*Server{s3, s4} { + retry.Run(t, func(r *retry.R) { + minVer, err := s.autopilot.MinRaftProtocol() + if err != nil { + r.Fatal(err) + } + if got, want := minVer, 2; got != want { + r.Fatalf("got min raft version %d want %d", got, want) + } + }) + } + // Replace another dead server with one running raft protocol v3 + s5 := TestServer(t, func(c *Config) { + c.DevDisableBootstrap = true + c.RaftConfig.ProtocolVersion = 3 + }) + defer s5.Shutdown() + TestJoin(t, s5, s4) + servers[0] = s5 + + // Make sure all the dead servers are removed and we're back to 3 total peers + for _, s := range servers { + retry.Run(t, func(r *retry.R) { + addrs := 0 + ids := 0 + future := s.raft.GetConfiguration() + if err := future.Error(); err != nil { + r.Fatal(err) + } + for _, server := range future.Configuration().Servers { + if string(server.ID) == string(server.Address) { + addrs++ + } else { + ids++ + } + fmt.Println("*** server ID before all are 3 ***", server.ID) + } + }) + } + + // Kill the last v2 server, now minRaftProtocol should be 3 + s3.Shutdown() + + for _, s := range []*Server{s4, s5} { + retry.Run(t, func(r *retry.R) { + minVer, err := s.autopilot.MinRaftProtocol() + if err != nil { + r.Fatal(err) + } + if got, want := minVer, 3; got != want { + r.Fatalf("got min raft version %d want %d", got, want) + } + }) + } + + // Replace the last dead server with one running raft protocol v3 + s6 := TestServer(t, func(c *Config) { + c.DevDisableBootstrap = true + c.RaftConfig.ProtocolVersion = 3 + }) + defer s6.Shutdown() + TestJoin(t, s6, s4) + servers[2] = s6 + + // Make sure all the dead servers are removed and we're back to 3 total peers for _, s := range servers { retry.Run(t, func(r *retry.R) { addrs := 0 @@ -971,10 +1080,10 @@ func TestLeader_RollRaftServer(t *testing.T) { ids++ } } - if got, want := addrs, 2; got != want { + if got, want := addrs, 0; got != want { r.Fatalf("got %d server addresses want %d", got, want) } - if got, want := ids, 1; got != want { + if got, want := ids, 3; got != want { r.Fatalf("got %d server ids want %d", got, want) } }) From 5194f5d6c71ae1e6c550e37be7ec9f77fa49f748 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 9 Apr 2018 17:51:55 -0700 Subject: [PATCH 2/8] WIP: Not setting node id properlperly --- command/agent/agent.go | 65 ++++++++++++++++++++++++ nomad/autopilot.go | 4 +- nomad/config.go | 4 +- nomad/leader_test.go | 33 +++++++++--- nomad/server.go | 2 + nomad/server_test.go | 1 + nomad/testing.go | 2 + vendor/github.com/hashicorp/raft/raft.go | 13 +++-- 8 files changed, 112 insertions(+), 12 deletions(-) diff --git a/command/agent/agent.go b/command/agent/agent.go index b0df549f9..8b3c96856 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -3,6 +3,7 @@ package agent import ( "fmt" "io" + "io/ioutil" "log" "net" "os" @@ -15,9 +16,12 @@ import ( metrics "github.com/armon/go-metrics" "github.com/hashicorp/consul/api" + "github.com/hashicorp/consul/lib" + uuidparse "github.com/hashicorp/go-uuid" "github.com/hashicorp/nomad/client" clientconfig "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/command/agent/consul" + "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs/config" @@ -444,6 +448,14 @@ func (a *Agent) setupServer() error { return fmt.Errorf("server config setup failed: %s", err) } + a.logger.Printf("ALEX: data dir: %q", conf.DataDir) + + // Generate a node ID and persist it if it is the first instance, otherwise + // read the persisted node ID. + if err := a.setupNodeID(conf); err != nil { + return fmt.Errorf("setting up server node ID failed: %s", err) + } + // Sets up the keyring for gossip encryption if err := a.setupKeyrings(conf); err != nil { return fmt.Errorf("failed to configure keyring: %v", err) @@ -518,6 +530,59 @@ func (a *Agent) setupServer() error { return nil } +// setupNodeID will pull the persisted node ID, if any, or create a random one +// and persist it. +func (a *Agent) setupNodeID(config *nomad.Config) error { + // If they've configured a node ID manually then just use that, as + // long as it's valid. + if config.NodeID != "" { + config.NodeID = strings.ToLower(string(config.NodeID)) + if _, err := uuidparse.ParseUUID(string(config.NodeID)); err != nil { + return err + } + + return nil + } + + // For dev mode we have no filesystem access so just make one. + if a.config.DevMode { + config.NodeID = uuid.Generate() + return nil + } + + // Load saved state, if any. Since a user could edit this, we also + // validate it. + fileID := filepath.Join(config.DataDir, "node-id") + if _, err := os.Stat(fileID); err == nil { + rawID, err := ioutil.ReadFile(fileID) + if err != nil { + return err + } + + nodeID := strings.TrimSpace(string(rawID)) + nodeID = strings.ToLower(nodeID) + if _, err := uuidparse.ParseUUID(nodeID); err != nil { + return err + } + + config.NodeID = nodeID + } + + // If we still don't have a valid node ID, make one. + if config.NodeID == "" { + id := uuid.Generate() + if err := lib.EnsurePath(fileID, false); err != nil { + return err + } + if err := ioutil.WriteFile(fileID, []byte(id), 0600); err != nil { + return err + } + + config.NodeID = id + } + return nil +} + // setupKeyrings is used to initialize and load keyrings during agent startup func (a *Agent) setupKeyrings(config *nomad.Config) error { file := filepath.Join(a.config.DataDir, serfKeyring) diff --git a/nomad/autopilot.go b/nomad/autopilot.go index 399b3458f..f70bc8cfe 100644 --- a/nomad/autopilot.go +++ b/nomad/autopilot.go @@ -89,7 +89,9 @@ func (d *AutopilotDelegate) PromoteNonVoters(conf *autopilot.Config, health auto return nil, fmt.Errorf("failed to get raft configuration: %v", err) } - return autopilot.PromoteStableServers(conf, health, future.Configuration().Servers), nil + servers := autopilot.PromoteStableServers(conf, health, future.Configuration().Servers) + d.server.logger.Printf("ALEX: PROMOTENONVOTER: %+v %+v", conf, health) + return servers, nil } func (d *AutopilotDelegate) Raft() *raft.Raft { diff --git a/nomad/config.go b/nomad/config.go index 12cb9825c..17dcbed42 100644 --- a/nomad/config.go +++ b/nomad/config.go @@ -10,7 +10,6 @@ import ( "github.com/hashicorp/memberlist" "github.com/hashicorp/nomad/helper/tlsutil" - "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs/config" "github.com/hashicorp/nomad/scheduler" @@ -312,12 +311,13 @@ func DefaultConfig() *Config { panic(err) } + // TODO Add back random node ID and always persist in the agent startup + // TODO Remove places I added uuid.Generate() c := &Config{ Region: DefaultRegion, AuthoritativeRegion: DefaultRegion, Datacenter: DefaultDC, NodeName: hostname, - NodeID: uuid.Generate(), ProtocolVersion: ProtocolVersionMax, RaftConfig: raft.DefaultConfig(), RaftTimeout: 10 * time.Second, diff --git a/nomad/leader_test.go b/nomad/leader_test.go index 0b243e8dd..ab8a754e9 100644 --- a/nomad/leader_test.go +++ b/nomad/leader_test.go @@ -12,6 +12,7 @@ import ( "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" + "github.com/hashicorp/raft" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -906,30 +907,48 @@ func TestLeader_UpgradeRaftVersion(t *testing.T) { func TestLeader_Reelection(t *testing.T) { t.Parallel() s1 := TestServer(t, func(c *Config) { - c.RaftConfig.ProtocolVersion = 2 + c.BootstrapExpect = 3 + c.RaftConfig.ProtocolVersion = 3 }) defer s1.Shutdown() s2 := TestServer(t, func(c *Config) { + c.BootstrapExpect = 3 c.DevDisableBootstrap = true - c.RaftConfig.ProtocolVersion = 2 + c.RaftConfig.ProtocolVersion = 3 }) defer s2.Shutdown() s3 := TestServer(t, func(c *Config) { + c.BootstrapExpect = 3 c.DevDisableBootstrap = true - c.RaftConfig.ProtocolVersion = 2 + c.RaftConfig.ProtocolVersion = 3 }) servers := []*Server{s1, s2, s3} // Try to join TestJoin(t, s1, s2, s3) - - var leader, nonLeader *Server - testutil.WaitForLeader(t, s1.RPC) + testutil.WaitForResult(func() (bool, error) { + future := s1.raft.GetConfiguration() + if err := future.Error(); err != nil { + return false, err + } + + for _, server := range future.Configuration().Servers { + if server.Suffrage == raft.Nonvoter { + return false, fmt.Errorf("non-voter %v", server) + } + } + + return true, nil + }, func(err error) { + t.Fatal(err) + }) + + var leader, nonLeader *Server for _, s := range servers { if s.IsLeader() { leader = s @@ -939,9 +958,11 @@ func TestLeader_Reelection(t *testing.T) { } // Shutdown the leader + s1.logger.Println("-------------------------------- LEADER SHUTDOWN --------------------------------------") leader.Shutdown() testutil.WaitForLeader(t, nonLeader.RPC) + s1.logger.Println("-------------------------------- GOT LEADER --------------------------------------") } diff --git a/nomad/server.go b/nomad/server.go index 5a214d71e..1434d2984 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -1080,6 +1080,8 @@ func (s *Server) setupRaft() error { s.config.RaftConfig.LocalID = raft.ServerID(s.config.NodeID) } + s.logger.Printf("ALEX: NOMAD.SETUP_RAFT: node ID %q", s.config.NodeID) + // Build an all in-memory setup for dev mode, otherwise prepare a full // disk-based setup. var log raft.LogStore diff --git a/nomad/server_test.go b/nomad/server_test.go index 3bdb58e7e..dd9f2b81b 100644 --- a/nomad/server_test.go +++ b/nomad/server_test.go @@ -532,6 +532,7 @@ func TestServer_InvalidSchedulers(t *testing.T) { require := require.New(t) config := DefaultConfig() + config.NodeID = uuid.Generate() config.DevMode = true config.LogOutput = testlog.NewWriter(t) config.SerfConfig.MemberlistConfig.BindAddr = "127.0.0.1" diff --git a/nomad/testing.go b/nomad/testing.go index 2859dfb63..9e09e60c7 100644 --- a/nomad/testing.go +++ b/nomad/testing.go @@ -11,6 +11,7 @@ import ( "github.com/hashicorp/consul/lib/freeport" "github.com/hashicorp/nomad/command/agent/consul" "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/mitchellh/go-testing-interface" @@ -38,6 +39,7 @@ func TestACLServer(t testing.T, cb func(*Config)) (*Server, *structs.ACLToken) { func TestServer(t testing.T, cb func(*Config)) *Server { // Setup the default settings config := DefaultConfig() + config.NodeID = uuid.Generate() config.Build = "0.8.0+unittest" config.DevMode = true nodeNum := atomic.AddUint32(&nodeNumber, 1) diff --git a/vendor/github.com/hashicorp/raft/raft.go b/vendor/github.com/hashicorp/raft/raft.go index b5cc9ca98..44da1d2e9 100644 --- a/vendor/github.com/hashicorp/raft/raft.go +++ b/vendor/github.com/hashicorp/raft/raft.go @@ -193,13 +193,18 @@ func (r *Raft) runFollower() { if r.configurations.latestIndex == 0 { if !didWarn { r.logger.Printf("[WARN] raft: no known peers, aborting election") - didWarn = true + //didWarn = true } } else if r.configurations.latestIndex == r.configurations.committedIndex && !hasVote(r.configurations.latest, r.localID) { if !didWarn { - r.logger.Printf("[WARN] raft: not part of stable configuration, aborting election") - didWarn = true + r.logger.Printf("[WARN] raft: not part of stable configuration, aborting election: %v == %v && !%v", r.configurations.latestIndex, r.configurations.committedIndex, hasVote(r.configurations.latest, r.localID)) + + r.logger.Printf("ALEX: local ID: %v", r.localID) + for _, srv := range r.configurations.latest.Servers { + r.logger.Printf("ALEX: %+v", srv) + } + //didWarn = true } } else { r.logger.Printf(`[WARN] raft: Heartbeat timeout from %q reached, starting election`, lastLeader) @@ -495,6 +500,8 @@ func (r *Raft) leaderLoop() { // of peers. stepDown := false + r.logger.Printf("ALEX: local ID as LEADER: %v", r.localID) + lease := time.After(r.conf.LeaderLeaseTimeout) for r.getState() == Leader { select { From 37f80a9bdb0c8447523601f16db13e0cdb64f408 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Tue, 10 Apr 2018 08:16:50 -0500 Subject: [PATCH 3/8] Remove debug print statements --- nomad/autopilot.go | 4 +--- nomad/leader_test.go | 24 +----------------------- nomad/server.go | 2 -- vendor/github.com/hashicorp/raft/raft.go | 13 +++---------- 4 files changed, 5 insertions(+), 38 deletions(-) diff --git a/nomad/autopilot.go b/nomad/autopilot.go index f70bc8cfe..399b3458f 100644 --- a/nomad/autopilot.go +++ b/nomad/autopilot.go @@ -89,9 +89,7 @@ func (d *AutopilotDelegate) PromoteNonVoters(conf *autopilot.Config, health auto return nil, fmt.Errorf("failed to get raft configuration: %v", err) } - servers := autopilot.PromoteStableServers(conf, health, future.Configuration().Servers) - d.server.logger.Printf("ALEX: PROMOTENONVOTER: %+v %+v", conf, health) - return servers, nil + return autopilot.PromoteStableServers(conf, health, future.Configuration().Servers), nil } func (d *AutopilotDelegate) Raft() *raft.Raft { diff --git a/nomad/leader_test.go b/nomad/leader_test.go index ab8a754e9..ec4f59134 100644 --- a/nomad/leader_test.go +++ b/nomad/leader_test.go @@ -958,11 +958,9 @@ func TestLeader_Reelection(t *testing.T) { } // Shutdown the leader - s1.logger.Println("-------------------------------- LEADER SHUTDOWN --------------------------------------") leader.Shutdown() - + // Wait for new leader to elect testutil.WaitForLeader(t, nonLeader.RPC) - s1.logger.Println("-------------------------------- GOT LEADER --------------------------------------") } @@ -1041,26 +1039,6 @@ func TestLeader_RollRaftServer(t *testing.T) { TestJoin(t, s5, s4) servers[0] = s5 - // Make sure all the dead servers are removed and we're back to 3 total peers - for _, s := range servers { - retry.Run(t, func(r *retry.R) { - addrs := 0 - ids := 0 - future := s.raft.GetConfiguration() - if err := future.Error(); err != nil { - r.Fatal(err) - } - for _, server := range future.Configuration().Servers { - if string(server.ID) == string(server.Address) { - addrs++ - } else { - ids++ - } - fmt.Println("*** server ID before all are 3 ***", server.ID) - } - }) - } - // Kill the last v2 server, now minRaftProtocol should be 3 s3.Shutdown() diff --git a/nomad/server.go b/nomad/server.go index 1434d2984..5a214d71e 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -1080,8 +1080,6 @@ func (s *Server) setupRaft() error { s.config.RaftConfig.LocalID = raft.ServerID(s.config.NodeID) } - s.logger.Printf("ALEX: NOMAD.SETUP_RAFT: node ID %q", s.config.NodeID) - // Build an all in-memory setup for dev mode, otherwise prepare a full // disk-based setup. var log raft.LogStore diff --git a/vendor/github.com/hashicorp/raft/raft.go b/vendor/github.com/hashicorp/raft/raft.go index 44da1d2e9..b5cc9ca98 100644 --- a/vendor/github.com/hashicorp/raft/raft.go +++ b/vendor/github.com/hashicorp/raft/raft.go @@ -193,18 +193,13 @@ func (r *Raft) runFollower() { if r.configurations.latestIndex == 0 { if !didWarn { r.logger.Printf("[WARN] raft: no known peers, aborting election") - //didWarn = true + didWarn = true } } else if r.configurations.latestIndex == r.configurations.committedIndex && !hasVote(r.configurations.latest, r.localID) { if !didWarn { - r.logger.Printf("[WARN] raft: not part of stable configuration, aborting election: %v == %v && !%v", r.configurations.latestIndex, r.configurations.committedIndex, hasVote(r.configurations.latest, r.localID)) - - r.logger.Printf("ALEX: local ID: %v", r.localID) - for _, srv := range r.configurations.latest.Servers { - r.logger.Printf("ALEX: %+v", srv) - } - //didWarn = true + r.logger.Printf("[WARN] raft: not part of stable configuration, aborting election") + didWarn = true } } else { r.logger.Printf(`[WARN] raft: Heartbeat timeout from %q reached, starting election`, lastLeader) @@ -500,8 +495,6 @@ func (r *Raft) leaderLoop() { // of peers. stepDown := false - r.logger.Printf("ALEX: local ID as LEADER: %v", r.localID) - lease := time.After(r.conf.LeaderLeaseTimeout) for r.getState() == Leader { select { From 86ced8aef2dccccb4d36c3a2fcd304bcbc662902 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Tue, 10 Apr 2018 08:47:33 -0500 Subject: [PATCH 4/8] Use preconfigured nodeID if there isn't a persisted node ID, and persist it if its not persisted. --- command/agent/agent.go | 45 ++++++++++++++++++++++++------------------ nomad/config.go | 4 ++-- nomad/server_test.go | 1 - nomad/testing.go | 2 -- 4 files changed, 28 insertions(+), 24 deletions(-) diff --git a/command/agent/agent.go b/command/agent/agent.go index 8b3c96856..c8fccfd1c 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -533,26 +533,10 @@ func (a *Agent) setupServer() error { // setupNodeID will pull the persisted node ID, if any, or create a random one // and persist it. func (a *Agent) setupNodeID(config *nomad.Config) error { - // If they've configured a node ID manually then just use that, as - // long as it's valid. - if config.NodeID != "" { - config.NodeID = strings.ToLower(string(config.NodeID)) - if _, err := uuidparse.ParseUUID(string(config.NodeID)); err != nil { - return err - } - - return nil - } - - // For dev mode we have no filesystem access so just make one. - if a.config.DevMode { - config.NodeID = uuid.Generate() - return nil - } - // Load saved state, if any. Since a user could edit this, we also - // validate it. + // validate it. Saved state overwrites any configured node id fileID := filepath.Join(config.DataDir, "node-id") + savedNodeID := false if _, err := os.Stat(fileID); err == nil { rawID, err := ioutil.ReadFile(fileID) if err != nil { @@ -564,8 +548,31 @@ func (a *Agent) setupNodeID(config *nomad.Config) error { if _, err := uuidparse.ParseUUID(nodeID); err != nil { return err } - config.NodeID = nodeID + savedNodeID = true + } + + // If they've configured a node ID manually then just use that, as + // long as it's valid. + if !savedNodeID && config.NodeID != "" { + config.NodeID = strings.ToLower(string(config.NodeID)) + if _, err := uuidparse.ParseUUID(string(config.NodeID)); err != nil { + return err + } + // Persist this configured nodeID to our data directory + if err := lib.EnsurePath(fileID, false); err != nil { + return err + } + if err := ioutil.WriteFile(fileID, []byte(config.NodeID), 0600); err != nil { + return err + } + return nil + } + + // For dev mode we have no filesystem access so just make one. + if a.config.DevMode { + config.NodeID = uuid.Generate() + return nil } // If we still don't have a valid node ID, make one. diff --git a/nomad/config.go b/nomad/config.go index 17dcbed42..12cb9825c 100644 --- a/nomad/config.go +++ b/nomad/config.go @@ -10,6 +10,7 @@ import ( "github.com/hashicorp/memberlist" "github.com/hashicorp/nomad/helper/tlsutil" + "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs/config" "github.com/hashicorp/nomad/scheduler" @@ -311,13 +312,12 @@ func DefaultConfig() *Config { panic(err) } - // TODO Add back random node ID and always persist in the agent startup - // TODO Remove places I added uuid.Generate() c := &Config{ Region: DefaultRegion, AuthoritativeRegion: DefaultRegion, Datacenter: DefaultDC, NodeName: hostname, + NodeID: uuid.Generate(), ProtocolVersion: ProtocolVersionMax, RaftConfig: raft.DefaultConfig(), RaftTimeout: 10 * time.Second, diff --git a/nomad/server_test.go b/nomad/server_test.go index dd9f2b81b..3bdb58e7e 100644 --- a/nomad/server_test.go +++ b/nomad/server_test.go @@ -532,7 +532,6 @@ func TestServer_InvalidSchedulers(t *testing.T) { require := require.New(t) config := DefaultConfig() - config.NodeID = uuid.Generate() config.DevMode = true config.LogOutput = testlog.NewWriter(t) config.SerfConfig.MemberlistConfig.BindAddr = "127.0.0.1" diff --git a/nomad/testing.go b/nomad/testing.go index 9e09e60c7..2859dfb63 100644 --- a/nomad/testing.go +++ b/nomad/testing.go @@ -11,7 +11,6 @@ import ( "github.com/hashicorp/consul/lib/freeport" "github.com/hashicorp/nomad/command/agent/consul" "github.com/hashicorp/nomad/helper/testlog" - "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/mitchellh/go-testing-interface" @@ -39,7 +38,6 @@ func TestACLServer(t testing.T, cb func(*Config)) (*Server, *structs.ACLToken) { func TestServer(t testing.T, cb func(*Config)) *Server { // Setup the default settings config := DefaultConfig() - config.NodeID = uuid.Generate() config.Build = "0.8.0+unittest" config.DevMode = true nodeNum := atomic.AddUint32(&nodeNumber, 1) From 1ba9feb3b744a03d2c8f7ad990361455d483f558 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Tue, 10 Apr 2018 11:22:16 -0500 Subject: [PATCH 5/8] Lint fixes --- command/agent/agent.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/command/agent/agent.go b/command/agent/agent.go index c8fccfd1c..f61625a06 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -555,8 +555,8 @@ func (a *Agent) setupNodeID(config *nomad.Config) error { // If they've configured a node ID manually then just use that, as // long as it's valid. if !savedNodeID && config.NodeID != "" { - config.NodeID = strings.ToLower(string(config.NodeID)) - if _, err := uuidparse.ParseUUID(string(config.NodeID)); err != nil { + config.NodeID = strings.ToLower(config.NodeID) + if _, err := uuidparse.ParseUUID(config.NodeID); err != nil { return err } // Persist this configured nodeID to our data directory From ffb622fbfdaccb4275f8ca51a49e12c58030561d Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Tue, 10 Apr 2018 12:34:14 -0500 Subject: [PATCH 6/8] Dev mode should never persist nodeid --- command/agent/agent.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/command/agent/agent.go b/command/agent/agent.go index f61625a06..9f056b69d 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -448,8 +448,6 @@ func (a *Agent) setupServer() error { return fmt.Errorf("server config setup failed: %s", err) } - a.logger.Printf("ALEX: data dir: %q", conf.DataDir) - // Generate a node ID and persist it if it is the first instance, otherwise // read the persisted node ID. if err := a.setupNodeID(conf); err != nil { @@ -533,6 +531,12 @@ func (a *Agent) setupServer() error { // setupNodeID will pull the persisted node ID, if any, or create a random one // and persist it. func (a *Agent) setupNodeID(config *nomad.Config) error { + // For dev mode we have no filesystem access so just make a node ID. + if a.config.DevMode { + config.NodeID = uuid.Generate() + return nil + } + // Load saved state, if any. Since a user could edit this, we also // validate it. Saved state overwrites any configured node id fileID := filepath.Join(config.DataDir, "node-id") @@ -569,12 +573,6 @@ func (a *Agent) setupNodeID(config *nomad.Config) error { return nil } - // For dev mode we have no filesystem access so just make one. - if a.config.DevMode { - config.NodeID = uuid.Generate() - return nil - } - // If we still don't have a valid node ID, make one. if config.NodeID == "" { id := uuid.Generate() From 1ac669cdff30b79fc64f05c1bcf9bedad7df4ee2 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Tue, 10 Apr 2018 14:20:02 -0500 Subject: [PATCH 7/8] Make leader election test run on all three protocol versions --- nomad/leader_test.go | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/nomad/leader_test.go b/nomad/leader_test.go index ec4f59134..926e12e1d 100644 --- a/nomad/leader_test.go +++ b/nomad/leader_test.go @@ -905,24 +905,33 @@ func TestLeader_UpgradeRaftVersion(t *testing.T) { } func TestLeader_Reelection(t *testing.T) { - t.Parallel() + raftProtocols := []int{1, 2, 3} + for _, p := range raftProtocols { + t.Run("Leader Election - Protocol version "+string(p), func(t *testing.T) { + leaderElectionTest(t, raft.ProtocolVersion(p)) + }) + } + +} + +func leaderElectionTest(t *testing.T, raftProtocol raft.ProtocolVersion) { s1 := TestServer(t, func(c *Config) { c.BootstrapExpect = 3 - c.RaftConfig.ProtocolVersion = 3 + c.RaftConfig.ProtocolVersion = raftProtocol }) defer s1.Shutdown() s2 := TestServer(t, func(c *Config) { c.BootstrapExpect = 3 c.DevDisableBootstrap = true - c.RaftConfig.ProtocolVersion = 3 + c.RaftConfig.ProtocolVersion = raftProtocol }) defer s2.Shutdown() s3 := TestServer(t, func(c *Config) { c.BootstrapExpect = 3 c.DevDisableBootstrap = true - c.RaftConfig.ProtocolVersion = 3 + c.RaftConfig.ProtocolVersion = raftProtocol }) servers := []*Server{s1, s2, s3} @@ -961,7 +970,6 @@ func TestLeader_Reelection(t *testing.T) { leader.Shutdown() // Wait for new leader to elect testutil.WaitForLeader(t, nonLeader.RPC) - } func TestLeader_RollRaftServer(t *testing.T) { @@ -973,7 +981,7 @@ func TestLeader_RollRaftServer(t *testing.T) { s2 := TestServer(t, func(c *Config) { c.DevDisableBootstrap = true - c.RaftConfig.ProtocolVersion = 1 + c.RaftConfig.ProtocolVersion = 2 }) defer s2.Shutdown() @@ -992,8 +1000,8 @@ func TestLeader_RollRaftServer(t *testing.T) { retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) }) } - // Kill the v1 server - s2.Shutdown() + // Kill the first v2 server + s1.Shutdown() for _, s := range []*Server{s1, s3} { retry.Run(t, func(r *retry.R) { @@ -1013,11 +1021,11 @@ func TestLeader_RollRaftServer(t *testing.T) { c.RaftConfig.ProtocolVersion = 3 }) defer s4.Shutdown() - TestJoin(t, s4, s1) - servers[1] = s4 + TestJoin(t, s4, s2) + servers[0] = s4 - // Kill the v2 server - s1.Shutdown() + // Kill the second v2 server + s2.Shutdown() for _, s := range []*Server{s3, s4} { retry.Run(t, func(r *retry.R) { @@ -1037,7 +1045,7 @@ func TestLeader_RollRaftServer(t *testing.T) { }) defer s5.Shutdown() TestJoin(t, s5, s4) - servers[0] = s5 + servers[1] = s5 // Kill the last v2 server, now minRaftProtocol should be 3 s3.Shutdown() From f8427c847d87e102d5e6271e166bdc6df8267187 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Tue, 10 Apr 2018 15:33:01 -0500 Subject: [PATCH 8/8] minor code review fix --- command/agent/agent.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/command/agent/agent.go b/command/agent/agent.go index 9f056b69d..2eddcad02 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -540,7 +540,6 @@ func (a *Agent) setupNodeID(config *nomad.Config) error { // Load saved state, if any. Since a user could edit this, we also // validate it. Saved state overwrites any configured node id fileID := filepath.Join(config.DataDir, "node-id") - savedNodeID := false if _, err := os.Stat(fileID); err == nil { rawID, err := ioutil.ReadFile(fileID) if err != nil { @@ -553,12 +552,12 @@ func (a *Agent) setupNodeID(config *nomad.Config) error { return err } config.NodeID = nodeID - savedNodeID = true + return nil } // If they've configured a node ID manually then just use that, as // long as it's valid. - if !savedNodeID && config.NodeID != "" { + if config.NodeID != "" { config.NodeID = strings.ToLower(config.NodeID) if _, err := uuidparse.ParseUUID(config.NodeID); err != nil { return err