diff --git a/api/nodes.go b/api/nodes.go index a505d9ae3..926152854 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -51,13 +51,19 @@ type NodeUpdateDrainRequest struct { // DrainSpec is the drain specification to set for the node. A nil DrainSpec // will disable draining. DrainSpec *DrainSpec + + // MarkEligible marks the node as eligible if removing the drain strategy. + MarkEligible bool } -// UpdateDrain is used to update the drain strategy for a given node. -func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, q *WriteOptions) (*WriteMeta, error) { +// UpdateDrain is used to update the drain strategy for a given node. If +// markEligible is true and the drain is being removed, the node will be marked +// as having its scheduling being elibile +func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, markEligible bool, q *WriteOptions) (*WriteMeta, error) { req := &NodeUpdateDrainRequest{ - NodeID: nodeID, - DrainSpec: spec, + NodeID: nodeID, + DrainSpec: spec, + MarkEligible: markEligible, } wm, err := n.client.write("/v1/node/"+nodeID+"/drain", req, nil, q) diff --git a/api/nodes_test.go b/api/nodes_test.go index 22d61c401..d2b02b82c 100644 --- a/api/nodes_test.go +++ b/api/nodes_test.go @@ -177,7 +177,7 @@ func TestNodes_ToggleDrain(t *testing.T) { spec := &DrainSpec{ Deadline: 10 * time.Second, } - wm, err := nodes.UpdateDrain(nodeID, spec, nil) + wm, err := nodes.UpdateDrain(nodeID, spec, false, nil) if err != nil { t.Fatalf("err: %s", err) } @@ -193,7 +193,7 @@ func TestNodes_ToggleDrain(t *testing.T) { } // Toggle off again - wm, err = nodes.UpdateDrain(nodeID, nil, nil) + wm, err = nodes.UpdateDrain(nodeID, nil, true, nil) if err != nil { t.Fatalf("err: %s", err) } @@ -210,6 +210,9 @@ func TestNodes_ToggleDrain(t *testing.T) { if out.DrainStrategy != nil { t.Fatalf("drain strategy should be unset") } + if out.SchedulingEligibility != structs.NodeSchedulingEligible { + t.Fatalf("should be eligible") + } } func TestNodes_ToggleEligibility(t *testing.T) { diff --git a/command/agent/node_endpoint.go b/command/agent/node_endpoint.go index a86df751c..bad4fc445 100644 --- a/command/agent/node_endpoint.go +++ b/command/agent/node_endpoint.go @@ -132,7 +132,8 @@ func (s *HTTPServer) nodeToggleDrain(resp http.ResponseWriter, req *http.Request } args := structs.NodeUpdateDrainRequest{ - NodeID: nodeID, + NodeID: nodeID, + MarkEligible: drainRequest.MarkEligible, } if drainRequest.DrainSpec != nil { args.DrainStrategy = &structs.DrainStrategy{ diff --git a/command/node_drain.go b/command/node_drain.go index f6475c7be..9d8326d47 100644 --- a/command/node_drain.go +++ b/command/node_drain.go @@ -56,6 +56,11 @@ Node Drain Options: Ignore system allows the drain to complete without stopping system job allocations. By default system jobs are stopped last. + -keep-ineligible + Keep ineligible will maintain the node's scheduling ineligibility even if + the drain is being disabled. This is useful when an existing drain is being + cancelled but additional scheduling on the node is not desired. + -self Set the drain status of the local node. @@ -72,14 +77,15 @@ func (c *NodeDrainCommand) Synopsis() string { func (c *NodeDrainCommand) AutocompleteFlags() complete.Flags { return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), complete.Flags{ - "-disable": complete.PredictNothing, - "-enable": complete.PredictNothing, - "-deadline": complete.PredictAnything, - "-force": complete.PredictNothing, - "-no-deadline": complete.PredictNothing, - "-ignore-system": complete.PredictNothing, - "-self": complete.PredictNothing, - "-yes": complete.PredictNothing, + "-disable": complete.PredictNothing, + "-enable": complete.PredictNothing, + "-deadline": complete.PredictAnything, + "-force": complete.PredictNothing, + "-no-deadline": complete.PredictNothing, + "-ignore-system": complete.PredictNothing, + "-keep-ineligible": complete.PredictNothing, + "-self": complete.PredictNothing, + "-yes": complete.PredictNothing, }) } @@ -100,7 +106,7 @@ func (c *NodeDrainCommand) AutocompleteArgs() complete.Predictor { func (c *NodeDrainCommand) Run(args []string) int { var enable, disable, force, - noDeadline, ignoreSystem, self, autoYes bool + noDeadline, ignoreSystem, keepIneligible, self, autoYes bool var deadline string flags := c.Meta.FlagSet("node-drain", FlagSetClient) @@ -111,6 +117,7 @@ func (c *NodeDrainCommand) Run(args []string) int { flags.BoolVar(&force, "force", false, "Force immediate drain") flags.BoolVar(&noDeadline, "no-deadline", false, "Drain node with no deadline") flags.BoolVar(&ignoreSystem, "ignore-system", false, "Do not drain system job allocations from the node") + flags.BoolVar(&keepIneligible, "keep-ineligible", false, "Do not update the nodes scheduling eligibility") flags.BoolVar(&self, "self", false, "") flags.BoolVar(&autoYes, "yes", false, "Automatic yes to prompts.") @@ -252,7 +259,7 @@ func (c *NodeDrainCommand) Run(args []string) int { } // Toggle node draining - if _, err := client.Nodes().UpdateDrain(node.ID, spec, nil); err != nil { + if _, err := client.Nodes().UpdateDrain(node.ID, spec, !keepIneligible, nil); err != nil { c.Ui.Error(fmt.Sprintf("Error updating drain specification: %s", err)) return 1 } diff --git a/nomad/fsm.go b/nomad/fsm.go index b377f09b3..bc52f256e 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -330,7 +330,7 @@ func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} { panic(fmt.Errorf("failed to decode request: %v", err)) } - if err := n.state.UpdateNodeDrain(index, req.NodeID, req.DrainStrategy); err != nil { + if err := n.state.UpdateNodeDrain(index, req.NodeID, req.DrainStrategy, req.MarkEligible); err != nil { n.logger.Printf("[ERR] nomad.fsm: UpdateNodeDrain failed: %v", err) return err } diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index 3d98a942f..0a18f937c 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -2470,7 +2470,7 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { Deadline: 10 * time.Second, }, } - if err := state.UpdateNodeDrain(3, node.ID, s); err != nil { + if err := state.UpdateNodeDrain(3, node.ID, s, false); err != nil { t.Fatalf("err: %v", err) } }) diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index ef6a51754..5f4564001 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -618,7 +618,8 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error } // UpdateNodeDrain is used to update the drain of a node -func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain *structs.DrainStrategy) error { +func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, + drain *structs.DrainStrategy, markEligible bool) error { txn := s.db.Txn(true) defer txn.Abort() @@ -641,6 +642,8 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain *structs copyNode.DrainStrategy = drain if drain != nil { copyNode.SchedulingEligibility = structs.NodeSchedulingIneligible + } else if markEligible { + copyNode.SchedulingEligibility = structs.NodeSchedulingEligible } copyNode.ModifyIndex = index diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 1bf1467de..7eeb4672e 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -716,7 +716,7 @@ func TestStateStore_UpdateNodeDrain_Node(t *testing.T) { }, } - require.Nil(state.UpdateNodeDrain(1001, node.ID, expectedDrain)) + require.Nil(state.UpdateNodeDrain(1001, node.ID, expectedDrain, false)) require.True(watchFired(ws)) ws = memdb.NewWatchSet() @@ -822,6 +822,43 @@ func TestStateStore_NodeEvents_RetentionWindow(t *testing.T) { require.Equal(uint64(20), out.Events[len(out.Events)-1].CreateIndex) } +func TestStateStore_UpdateNodeDrain_ResetEligiblity(t *testing.T) { + require := require.New(t) + state := testStateStore(t) + node := mock.Node() + require.Nil(state.UpsertNode(1000, node)) + + // Create a watchset so we can test that update node drain fires the watch + ws := memdb.NewWatchSet() + _, err := state.NodeByID(ws, node.ID) + require.Nil(err) + + drain := &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: -1 * time.Second, + }, + } + + require.Nil(state.UpdateNodeDrain(1001, node.ID, drain, false)) + require.True(watchFired(ws)) + + // Remove the drain + require.Nil(state.UpdateNodeDrain(1002, node.ID, nil, true)) + + ws = memdb.NewWatchSet() + out, err := state.NodeByID(ws, node.ID) + require.Nil(err) + require.False(out.Drain) + require.Nil(out.DrainStrategy) + require.Equal(out.SchedulingEligibility, structs.NodeSchedulingEligible) + require.EqualValues(1002, out.ModifyIndex) + + index, err := state.Index("nodes") + require.Nil(err) + require.EqualValues(1002, index) + require.False(watchFired(ws)) +} + func TestStateStore_UpdateNodeEligibility(t *testing.T) { require := require.New(t) state := testStateStore(t) @@ -860,7 +897,7 @@ func TestStateStore_UpdateNodeEligibility(t *testing.T) { Deadline: -1 * time.Second, }, } - require.Nil(state.UpdateNodeDrain(1002, node.ID, expectedDrain)) + require.Nil(state.UpdateNodeDrain(1002, node.ID, expectedDrain, false)) // Try to set the node to eligible err = state.UpdateNodeEligibility(1003, node.ID, structs.NodeSchedulingEligible) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index f85a4bf48..04c073946 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -308,6 +308,9 @@ type NodeUpdateDrainRequest struct { NodeID string Drain bool // TODO Deprecate DrainStrategy *DrainStrategy + + // MarkEligible marks the node as eligible if removing the drain strategy. + MarkEligible bool WriteRequest }