mirror of
https://github.com/kemko/nomad.git
synced 2026-01-07 19:05:42 +03:00
Toggle Drain allows resetting eligibility
This PR allows marking a node as eligible for scheduling while toggling drain. By default the `nomad node drain -disable` commmand will mark it as eligible but the drainer will maintain in-eligibility.
This commit is contained in:
committed by
Michael Schurter
parent
5b36af9860
commit
d15371405f
14
api/nodes.go
14
api/nodes.go
@@ -51,13 +51,19 @@ type NodeUpdateDrainRequest struct {
|
||||
// DrainSpec is the drain specification to set for the node. A nil DrainSpec
|
||||
// will disable draining.
|
||||
DrainSpec *DrainSpec
|
||||
|
||||
// MarkEligible marks the node as eligible if removing the drain strategy.
|
||||
MarkEligible bool
|
||||
}
|
||||
|
||||
// UpdateDrain is used to update the drain strategy for a given node.
|
||||
func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, q *WriteOptions) (*WriteMeta, error) {
|
||||
// UpdateDrain is used to update the drain strategy for a given node. If
|
||||
// markEligible is true and the drain is being removed, the node will be marked
|
||||
// as having its scheduling being elibile
|
||||
func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, markEligible bool, q *WriteOptions) (*WriteMeta, error) {
|
||||
req := &NodeUpdateDrainRequest{
|
||||
NodeID: nodeID,
|
||||
DrainSpec: spec,
|
||||
NodeID: nodeID,
|
||||
DrainSpec: spec,
|
||||
MarkEligible: markEligible,
|
||||
}
|
||||
|
||||
wm, err := n.client.write("/v1/node/"+nodeID+"/drain", req, nil, q)
|
||||
|
||||
@@ -177,7 +177,7 @@ func TestNodes_ToggleDrain(t *testing.T) {
|
||||
spec := &DrainSpec{
|
||||
Deadline: 10 * time.Second,
|
||||
}
|
||||
wm, err := nodes.UpdateDrain(nodeID, spec, nil)
|
||||
wm, err := nodes.UpdateDrain(nodeID, spec, false, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
@@ -193,7 +193,7 @@ func TestNodes_ToggleDrain(t *testing.T) {
|
||||
}
|
||||
|
||||
// Toggle off again
|
||||
wm, err = nodes.UpdateDrain(nodeID, nil, nil)
|
||||
wm, err = nodes.UpdateDrain(nodeID, nil, true, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
@@ -210,6 +210,9 @@ func TestNodes_ToggleDrain(t *testing.T) {
|
||||
if out.DrainStrategy != nil {
|
||||
t.Fatalf("drain strategy should be unset")
|
||||
}
|
||||
if out.SchedulingEligibility != structs.NodeSchedulingEligible {
|
||||
t.Fatalf("should be eligible")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNodes_ToggleEligibility(t *testing.T) {
|
||||
|
||||
@@ -132,7 +132,8 @@ func (s *HTTPServer) nodeToggleDrain(resp http.ResponseWriter, req *http.Request
|
||||
}
|
||||
|
||||
args := structs.NodeUpdateDrainRequest{
|
||||
NodeID: nodeID,
|
||||
NodeID: nodeID,
|
||||
MarkEligible: drainRequest.MarkEligible,
|
||||
}
|
||||
if drainRequest.DrainSpec != nil {
|
||||
args.DrainStrategy = &structs.DrainStrategy{
|
||||
|
||||
@@ -56,6 +56,11 @@ Node Drain Options:
|
||||
Ignore system allows the drain to complete without stopping system job
|
||||
allocations. By default system jobs are stopped last.
|
||||
|
||||
-keep-ineligible
|
||||
Keep ineligible will maintain the node's scheduling ineligibility even if
|
||||
the drain is being disabled. This is useful when an existing drain is being
|
||||
cancelled but additional scheduling on the node is not desired.
|
||||
|
||||
-self
|
||||
Set the drain status of the local node.
|
||||
|
||||
@@ -72,14 +77,15 @@ func (c *NodeDrainCommand) Synopsis() string {
|
||||
func (c *NodeDrainCommand) AutocompleteFlags() complete.Flags {
|
||||
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
|
||||
complete.Flags{
|
||||
"-disable": complete.PredictNothing,
|
||||
"-enable": complete.PredictNothing,
|
||||
"-deadline": complete.PredictAnything,
|
||||
"-force": complete.PredictNothing,
|
||||
"-no-deadline": complete.PredictNothing,
|
||||
"-ignore-system": complete.PredictNothing,
|
||||
"-self": complete.PredictNothing,
|
||||
"-yes": complete.PredictNothing,
|
||||
"-disable": complete.PredictNothing,
|
||||
"-enable": complete.PredictNothing,
|
||||
"-deadline": complete.PredictAnything,
|
||||
"-force": complete.PredictNothing,
|
||||
"-no-deadline": complete.PredictNothing,
|
||||
"-ignore-system": complete.PredictNothing,
|
||||
"-keep-ineligible": complete.PredictNothing,
|
||||
"-self": complete.PredictNothing,
|
||||
"-yes": complete.PredictNothing,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -100,7 +106,7 @@ func (c *NodeDrainCommand) AutocompleteArgs() complete.Predictor {
|
||||
|
||||
func (c *NodeDrainCommand) Run(args []string) int {
|
||||
var enable, disable, force,
|
||||
noDeadline, ignoreSystem, self, autoYes bool
|
||||
noDeadline, ignoreSystem, keepIneligible, self, autoYes bool
|
||||
var deadline string
|
||||
|
||||
flags := c.Meta.FlagSet("node-drain", FlagSetClient)
|
||||
@@ -111,6 +117,7 @@ func (c *NodeDrainCommand) Run(args []string) int {
|
||||
flags.BoolVar(&force, "force", false, "Force immediate drain")
|
||||
flags.BoolVar(&noDeadline, "no-deadline", false, "Drain node with no deadline")
|
||||
flags.BoolVar(&ignoreSystem, "ignore-system", false, "Do not drain system job allocations from the node")
|
||||
flags.BoolVar(&keepIneligible, "keep-ineligible", false, "Do not update the nodes scheduling eligibility")
|
||||
flags.BoolVar(&self, "self", false, "")
|
||||
flags.BoolVar(&autoYes, "yes", false, "Automatic yes to prompts.")
|
||||
|
||||
@@ -252,7 +259,7 @@ func (c *NodeDrainCommand) Run(args []string) int {
|
||||
}
|
||||
|
||||
// Toggle node draining
|
||||
if _, err := client.Nodes().UpdateDrain(node.ID, spec, nil); err != nil {
|
||||
if _, err := client.Nodes().UpdateDrain(node.ID, spec, !keepIneligible, nil); err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error updating drain specification: %s", err))
|
||||
return 1
|
||||
}
|
||||
|
||||
@@ -330,7 +330,7 @@ func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} {
|
||||
panic(fmt.Errorf("failed to decode request: %v", err))
|
||||
}
|
||||
|
||||
if err := n.state.UpdateNodeDrain(index, req.NodeID, req.DrainStrategy); err != nil {
|
||||
if err := n.state.UpdateNodeDrain(index, req.NodeID, req.DrainStrategy, req.MarkEligible); err != nil {
|
||||
n.logger.Printf("[ERR] nomad.fsm: UpdateNodeDrain failed: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -2470,7 +2470,7 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) {
|
||||
Deadline: 10 * time.Second,
|
||||
},
|
||||
}
|
||||
if err := state.UpdateNodeDrain(3, node.ID, s); err != nil {
|
||||
if err := state.UpdateNodeDrain(3, node.ID, s, false); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
@@ -618,7 +618,8 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
|
||||
}
|
||||
|
||||
// UpdateNodeDrain is used to update the drain of a node
|
||||
func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain *structs.DrainStrategy) error {
|
||||
func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string,
|
||||
drain *structs.DrainStrategy, markEligible bool) error {
|
||||
|
||||
txn := s.db.Txn(true)
|
||||
defer txn.Abort()
|
||||
@@ -641,6 +642,8 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain *structs
|
||||
copyNode.DrainStrategy = drain
|
||||
if drain != nil {
|
||||
copyNode.SchedulingEligibility = structs.NodeSchedulingIneligible
|
||||
} else if markEligible {
|
||||
copyNode.SchedulingEligibility = structs.NodeSchedulingEligible
|
||||
}
|
||||
|
||||
copyNode.ModifyIndex = index
|
||||
|
||||
@@ -716,7 +716,7 @@ func TestStateStore_UpdateNodeDrain_Node(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
require.Nil(state.UpdateNodeDrain(1001, node.ID, expectedDrain))
|
||||
require.Nil(state.UpdateNodeDrain(1001, node.ID, expectedDrain, false))
|
||||
require.True(watchFired(ws))
|
||||
|
||||
ws = memdb.NewWatchSet()
|
||||
@@ -822,6 +822,43 @@ func TestStateStore_NodeEvents_RetentionWindow(t *testing.T) {
|
||||
require.Equal(uint64(20), out.Events[len(out.Events)-1].CreateIndex)
|
||||
}
|
||||
|
||||
func TestStateStore_UpdateNodeDrain_ResetEligiblity(t *testing.T) {
|
||||
require := require.New(t)
|
||||
state := testStateStore(t)
|
||||
node := mock.Node()
|
||||
require.Nil(state.UpsertNode(1000, node))
|
||||
|
||||
// Create a watchset so we can test that update node drain fires the watch
|
||||
ws := memdb.NewWatchSet()
|
||||
_, err := state.NodeByID(ws, node.ID)
|
||||
require.Nil(err)
|
||||
|
||||
drain := &structs.DrainStrategy{
|
||||
DrainSpec: structs.DrainSpec{
|
||||
Deadline: -1 * time.Second,
|
||||
},
|
||||
}
|
||||
|
||||
require.Nil(state.UpdateNodeDrain(1001, node.ID, drain, false))
|
||||
require.True(watchFired(ws))
|
||||
|
||||
// Remove the drain
|
||||
require.Nil(state.UpdateNodeDrain(1002, node.ID, nil, true))
|
||||
|
||||
ws = memdb.NewWatchSet()
|
||||
out, err := state.NodeByID(ws, node.ID)
|
||||
require.Nil(err)
|
||||
require.False(out.Drain)
|
||||
require.Nil(out.DrainStrategy)
|
||||
require.Equal(out.SchedulingEligibility, structs.NodeSchedulingEligible)
|
||||
require.EqualValues(1002, out.ModifyIndex)
|
||||
|
||||
index, err := state.Index("nodes")
|
||||
require.Nil(err)
|
||||
require.EqualValues(1002, index)
|
||||
require.False(watchFired(ws))
|
||||
}
|
||||
|
||||
func TestStateStore_UpdateNodeEligibility(t *testing.T) {
|
||||
require := require.New(t)
|
||||
state := testStateStore(t)
|
||||
@@ -860,7 +897,7 @@ func TestStateStore_UpdateNodeEligibility(t *testing.T) {
|
||||
Deadline: -1 * time.Second,
|
||||
},
|
||||
}
|
||||
require.Nil(state.UpdateNodeDrain(1002, node.ID, expectedDrain))
|
||||
require.Nil(state.UpdateNodeDrain(1002, node.ID, expectedDrain, false))
|
||||
|
||||
// Try to set the node to eligible
|
||||
err = state.UpdateNodeEligibility(1003, node.ID, structs.NodeSchedulingEligible)
|
||||
|
||||
@@ -308,6 +308,9 @@ type NodeUpdateDrainRequest struct {
|
||||
NodeID string
|
||||
Drain bool // TODO Deprecate
|
||||
DrainStrategy *DrainStrategy
|
||||
|
||||
// MarkEligible marks the node as eligible if removing the drain strategy.
|
||||
MarkEligible bool
|
||||
WriteRequest
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user