provide -no-shutdown-delay flag for job/alloc stop (#11596)

Some operators use very long group/task `shutdown_delay` settings to
safely drain network connections to their workloads after service
deregistration. But during incident response, they may want to cause
that drain to be skipped so they can quickly shed load.

Provide a `-no-shutdown-delay` flag on the `nomad alloc stop` and
`nomad job stop` commands that bypasses the delay. This sets a new
desired transition state on the affected allocations that the
allocation/task runner will identify during pre-kill on the client.

Note (as documented here) that using this flag will almost always
result in failed inbound network connections for workloads as the
tasks will exit before clients receive updated service discovery
information and won't be gracefully drained.
This commit is contained in:
Tim Gross
2021-12-13 14:54:53 -05:00
committed by GitHub
parent 43b3e1628f
commit 35c22bcb6c
18 changed files with 372 additions and 47 deletions

View File

@@ -138,8 +138,18 @@ func (s *HTTPServer) allocStop(allocID string, resp http.ResponseWriter, req *ht
return nil, CodedError(405, ErrInvalidMethod)
}
noShutdownDelay := false
if noShutdownDelayQS := req.URL.Query().Get("no_shutdown_delay"); noShutdownDelayQS != "" {
var err error
noShutdownDelay, err = strconv.ParseBool(noShutdownDelayQS)
if err != nil {
return nil, fmt.Errorf("no_shutdown_delay value is not a boolean: %v", err)
}
}
sr := &structs.AllocStopRequest{
AllocID: allocID,
AllocID: allocID,
NoShutdownDelay: noShutdownDelay,
}
s.parseWriteRequest(req, &sr.WriteRequest)

View File

@@ -453,6 +453,18 @@ func (s *HTTPServer) jobDelete(resp http.ResponseWriter, req *http.Request,
return nil, err
}
// Identify the no_shutdown_delay query param and parse.
noShutdownDelayStr := req.URL.Query().Get("no_shutdown_delay")
var noShutdownDelay bool
if noShutdownDelayStr != "" {
var err error
noShutdownDelay, err = strconv.ParseBool(noShutdownDelayStr)
if err != nil {
return nil, fmt.Errorf("Failed to parse value of %qq (%v) as a bool: %v", "no_shutdown_delay", noShutdownDelayStr, err)
}
}
args.NoShutdownDelay = noShutdownDelay
// Validate the evaluation priority if the user supplied a non-default
// value. It's more efficient to do it here, within the agent rather than
// sending a bad request for the server to reject.

View File

@@ -38,6 +38,12 @@ Stop Specific Options:
screen, which can be used to examine the rescheduling evaluation using the
eval-status command.
-no-shutdown-delay
Ignore the the group and task shutdown_delay configuration so there is no
delay between service deregistration and task shutdown. Note that using
this flag will result in failed network connections to the allocation
being stopped.
-verbose
Show full information.
`
@@ -47,12 +53,13 @@ Stop Specific Options:
func (c *AllocStopCommand) Name() string { return "alloc stop" }
func (c *AllocStopCommand) Run(args []string) int {
var detach, verbose bool
var detach, verbose, noShutdownDelay bool
flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.BoolVar(&detach, "detach", false, "")
flags.BoolVar(&verbose, "verbose", false, "")
flags.BoolVar(&noShutdownDelay, "no-shutdown-delay", false, "")
if err := flags.Parse(args); err != nil {
return 1
@@ -115,7 +122,12 @@ func (c *AllocStopCommand) Run(args []string) int {
return 1
}
resp, err := client.Allocations().Stop(alloc, nil)
var opts *api.QueryOptions
if noShutdownDelay {
opts = &api.QueryOptions{Params: map[string]string{"no_shutdown_delay": "true"}}
}
resp, err := client.Allocations().Stop(alloc, opts)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error stopping allocation: %s", err))
return 1

View File

@@ -43,14 +43,20 @@ Stop Options:
Override the priority of the evaluations produced as a result of this job
deregistration. By default, this is set to the priority of the job.
-purge
Purge is used to stop the job and purge it from the system. If not set, the
job will still be queryable and will be purged by the garbage collector.
-global
Stop a multi-region job in all its regions. By default job stop will stop
only a single region at a time. Ignored for single-region jobs.
-no-shutdown-delay
Ignore the the group and task shutdown_delay configuration so that there is no
delay between service deregistration and task shutdown. Note that using
this flag will result in failed network connections to the allocations
being stopped.
-purge
Purge is used to stop the job and purge it from the system. If not set, the
job will still be queryable and will be purged by the garbage collector.
-yes
Automatic yes to prompts.
@@ -67,12 +73,13 @@ func (c *JobStopCommand) Synopsis() string {
func (c *JobStopCommand) AutocompleteFlags() complete.Flags {
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
complete.Flags{
"-detach": complete.PredictNothing,
"-eval-priority": complete.PredictNothing,
"-purge": complete.PredictNothing,
"-global": complete.PredictNothing,
"-yes": complete.PredictNothing,
"-verbose": complete.PredictNothing,
"-detach": complete.PredictNothing,
"-eval-priority": complete.PredictNothing,
"-purge": complete.PredictNothing,
"-global": complete.PredictNothing,
"-no-shutdown-delay": complete.PredictNothing,
"-yes": complete.PredictNothing,
"-verbose": complete.PredictNothing,
})
}
@@ -94,7 +101,7 @@ func (c *JobStopCommand) AutocompleteArgs() complete.Predictor {
func (c *JobStopCommand) Name() string { return "job stop" }
func (c *JobStopCommand) Run(args []string) int {
var detach, purge, verbose, global, autoYes bool
var detach, purge, verbose, global, autoYes, noShutdownDelay bool
var evalPriority int
flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
@@ -102,6 +109,7 @@ func (c *JobStopCommand) Run(args []string) int {
flags.BoolVar(&detach, "detach", false, "")
flags.BoolVar(&verbose, "verbose", false, "")
flags.BoolVar(&global, "global", false, "")
flags.BoolVar(&noShutdownDelay, "no-shutdown-delay", false, "")
flags.BoolVar(&autoYes, "yes", false, "")
flags.BoolVar(&purge, "purge", false, "")
flags.IntVar(&evalPriority, "eval-priority", 0, "")
@@ -199,7 +207,7 @@ func (c *JobStopCommand) Run(args []string) int {
}
// Invoke the stop
opts := &api.DeregisterOptions{Purge: purge, Global: global, EvalPriority: evalPriority}
opts := &api.DeregisterOptions{Purge: purge, Global: global, EvalPriority: evalPriority, NoShutdownDelay: noShutdownDelay}
wq := &api.WriteOptions{Namespace: jobs[0].JobSummary.Namespace}
evalID, _, err := client.Jobs().DeregisterOpts(*job.ID, opts, wq)
if err != nil {