Toggle Drain allows resetting eligibility

This PR allows marking a node as eligible for scheduling while toggling
drain. By default the `nomad node drain -disable` commmand will mark it
as eligible but the drainer will maintain in-eligibility.
This commit is contained in:
Alex Dadgar
2018-03-08 11:06:30 -08:00
committed by Michael Schurter
parent 5b36af9860
commit d15371405f
9 changed files with 82 additions and 22 deletions

View File

@@ -51,13 +51,19 @@ type NodeUpdateDrainRequest struct {
// DrainSpec is the drain specification to set for the node. A nil DrainSpec
// will disable draining.
DrainSpec *DrainSpec
// MarkEligible marks the node as eligible if removing the drain strategy.
MarkEligible bool
}
// UpdateDrain is used to update the drain strategy for a given node.
func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, q *WriteOptions) (*WriteMeta, error) {
// UpdateDrain is used to update the drain strategy for a given node. If
// markEligible is true and the drain is being removed, the node will be marked
// as having its scheduling being elibile
func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, markEligible bool, q *WriteOptions) (*WriteMeta, error) {
req := &NodeUpdateDrainRequest{
NodeID: nodeID,
DrainSpec: spec,
NodeID: nodeID,
DrainSpec: spec,
MarkEligible: markEligible,
}
wm, err := n.client.write("/v1/node/"+nodeID+"/drain", req, nil, q)

View File

@@ -177,7 +177,7 @@ func TestNodes_ToggleDrain(t *testing.T) {
spec := &DrainSpec{
Deadline: 10 * time.Second,
}
wm, err := nodes.UpdateDrain(nodeID, spec, nil)
wm, err := nodes.UpdateDrain(nodeID, spec, false, nil)
if err != nil {
t.Fatalf("err: %s", err)
}
@@ -193,7 +193,7 @@ func TestNodes_ToggleDrain(t *testing.T) {
}
// Toggle off again
wm, err = nodes.UpdateDrain(nodeID, nil, nil)
wm, err = nodes.UpdateDrain(nodeID, nil, true, nil)
if err != nil {
t.Fatalf("err: %s", err)
}
@@ -210,6 +210,9 @@ func TestNodes_ToggleDrain(t *testing.T) {
if out.DrainStrategy != nil {
t.Fatalf("drain strategy should be unset")
}
if out.SchedulingEligibility != structs.NodeSchedulingEligible {
t.Fatalf("should be eligible")
}
}
func TestNodes_ToggleEligibility(t *testing.T) {

View File

@@ -132,7 +132,8 @@ func (s *HTTPServer) nodeToggleDrain(resp http.ResponseWriter, req *http.Request
}
args := structs.NodeUpdateDrainRequest{
NodeID: nodeID,
NodeID: nodeID,
MarkEligible: drainRequest.MarkEligible,
}
if drainRequest.DrainSpec != nil {
args.DrainStrategy = &structs.DrainStrategy{

View File

@@ -56,6 +56,11 @@ Node Drain Options:
Ignore system allows the drain to complete without stopping system job
allocations. By default system jobs are stopped last.
-keep-ineligible
Keep ineligible will maintain the node's scheduling ineligibility even if
the drain is being disabled. This is useful when an existing drain is being
cancelled but additional scheduling on the node is not desired.
-self
Set the drain status of the local node.
@@ -72,14 +77,15 @@ func (c *NodeDrainCommand) Synopsis() string {
func (c *NodeDrainCommand) AutocompleteFlags() complete.Flags {
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
complete.Flags{
"-disable": complete.PredictNothing,
"-enable": complete.PredictNothing,
"-deadline": complete.PredictAnything,
"-force": complete.PredictNothing,
"-no-deadline": complete.PredictNothing,
"-ignore-system": complete.PredictNothing,
"-self": complete.PredictNothing,
"-yes": complete.PredictNothing,
"-disable": complete.PredictNothing,
"-enable": complete.PredictNothing,
"-deadline": complete.PredictAnything,
"-force": complete.PredictNothing,
"-no-deadline": complete.PredictNothing,
"-ignore-system": complete.PredictNothing,
"-keep-ineligible": complete.PredictNothing,
"-self": complete.PredictNothing,
"-yes": complete.PredictNothing,
})
}
@@ -100,7 +106,7 @@ func (c *NodeDrainCommand) AutocompleteArgs() complete.Predictor {
func (c *NodeDrainCommand) Run(args []string) int {
var enable, disable, force,
noDeadline, ignoreSystem, self, autoYes bool
noDeadline, ignoreSystem, keepIneligible, self, autoYes bool
var deadline string
flags := c.Meta.FlagSet("node-drain", FlagSetClient)
@@ -111,6 +117,7 @@ func (c *NodeDrainCommand) Run(args []string) int {
flags.BoolVar(&force, "force", false, "Force immediate drain")
flags.BoolVar(&noDeadline, "no-deadline", false, "Drain node with no deadline")
flags.BoolVar(&ignoreSystem, "ignore-system", false, "Do not drain system job allocations from the node")
flags.BoolVar(&keepIneligible, "keep-ineligible", false, "Do not update the nodes scheduling eligibility")
flags.BoolVar(&self, "self", false, "")
flags.BoolVar(&autoYes, "yes", false, "Automatic yes to prompts.")
@@ -252,7 +259,7 @@ func (c *NodeDrainCommand) Run(args []string) int {
}
// Toggle node draining
if _, err := client.Nodes().UpdateDrain(node.ID, spec, nil); err != nil {
if _, err := client.Nodes().UpdateDrain(node.ID, spec, !keepIneligible, nil); err != nil {
c.Ui.Error(fmt.Sprintf("Error updating drain specification: %s", err))
return 1
}

View File

@@ -330,7 +330,7 @@ func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} {
panic(fmt.Errorf("failed to decode request: %v", err))
}
if err := n.state.UpdateNodeDrain(index, req.NodeID, req.DrainStrategy); err != nil {
if err := n.state.UpdateNodeDrain(index, req.NodeID, req.DrainStrategy, req.MarkEligible); err != nil {
n.logger.Printf("[ERR] nomad.fsm: UpdateNodeDrain failed: %v", err)
return err
}

View File

@@ -2470,7 +2470,7 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) {
Deadline: 10 * time.Second,
},
}
if err := state.UpdateNodeDrain(3, node.ID, s); err != nil {
if err := state.UpdateNodeDrain(3, node.ID, s, false); err != nil {
t.Fatalf("err: %v", err)
}
})

View File

@@ -618,7 +618,8 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
}
// UpdateNodeDrain is used to update the drain of a node
func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain *structs.DrainStrategy) error {
func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string,
drain *structs.DrainStrategy, markEligible bool) error {
txn := s.db.Txn(true)
defer txn.Abort()
@@ -641,6 +642,8 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain *structs
copyNode.DrainStrategy = drain
if drain != nil {
copyNode.SchedulingEligibility = structs.NodeSchedulingIneligible
} else if markEligible {
copyNode.SchedulingEligibility = structs.NodeSchedulingEligible
}
copyNode.ModifyIndex = index

View File

@@ -716,7 +716,7 @@ func TestStateStore_UpdateNodeDrain_Node(t *testing.T) {
},
}
require.Nil(state.UpdateNodeDrain(1001, node.ID, expectedDrain))
require.Nil(state.UpdateNodeDrain(1001, node.ID, expectedDrain, false))
require.True(watchFired(ws))
ws = memdb.NewWatchSet()
@@ -822,6 +822,43 @@ func TestStateStore_NodeEvents_RetentionWindow(t *testing.T) {
require.Equal(uint64(20), out.Events[len(out.Events)-1].CreateIndex)
}
func TestStateStore_UpdateNodeDrain_ResetEligiblity(t *testing.T) {
require := require.New(t)
state := testStateStore(t)
node := mock.Node()
require.Nil(state.UpsertNode(1000, node))
// Create a watchset so we can test that update node drain fires the watch
ws := memdb.NewWatchSet()
_, err := state.NodeByID(ws, node.ID)
require.Nil(err)
drain := &structs.DrainStrategy{
DrainSpec: structs.DrainSpec{
Deadline: -1 * time.Second,
},
}
require.Nil(state.UpdateNodeDrain(1001, node.ID, drain, false))
require.True(watchFired(ws))
// Remove the drain
require.Nil(state.UpdateNodeDrain(1002, node.ID, nil, true))
ws = memdb.NewWatchSet()
out, err := state.NodeByID(ws, node.ID)
require.Nil(err)
require.False(out.Drain)
require.Nil(out.DrainStrategy)
require.Equal(out.SchedulingEligibility, structs.NodeSchedulingEligible)
require.EqualValues(1002, out.ModifyIndex)
index, err := state.Index("nodes")
require.Nil(err)
require.EqualValues(1002, index)
require.False(watchFired(ws))
}
func TestStateStore_UpdateNodeEligibility(t *testing.T) {
require := require.New(t)
state := testStateStore(t)
@@ -860,7 +897,7 @@ func TestStateStore_UpdateNodeEligibility(t *testing.T) {
Deadline: -1 * time.Second,
},
}
require.Nil(state.UpdateNodeDrain(1002, node.ID, expectedDrain))
require.Nil(state.UpdateNodeDrain(1002, node.ID, expectedDrain, false))
// Try to set the node to eligible
err = state.UpdateNodeEligibility(1003, node.ID, structs.NodeSchedulingEligible)

View File

@@ -308,6 +308,9 @@ type NodeUpdateDrainRequest struct {
NodeID string
Drain bool // TODO Deprecate
DrainStrategy *DrainStrategy
// MarkEligible marks the node as eligible if removing the drain strategy.
MarkEligible bool
WriteRequest
}