mirror of
https://github.com/kemko/nomad.git
synced 2026-01-06 10:25:42 +03:00
provide -no-shutdown-delay flag for job/alloc stop (#11596)
Some operators use very long group/task `shutdown_delay` settings to safely drain network connections to their workloads after service deregistration. But during incident response, they may want to cause that drain to be skipped so they can quickly shed load. Provide a `-no-shutdown-delay` flag on the `nomad alloc stop` and `nomad job stop` commands that bypasses the delay. This sets a new desired transition state on the affected allocations that the allocation/task runner will identify during pre-kill on the client. Note (as documented here) that using this flag will almost always result in failed inbound network connections for workloads as the tasks will exit before clients receive updated service discovery information and won't be gracefully drained.
This commit is contained in:
@@ -138,8 +138,18 @@ func (s *HTTPServer) allocStop(allocID string, resp http.ResponseWriter, req *ht
|
||||
return nil, CodedError(405, ErrInvalidMethod)
|
||||
}
|
||||
|
||||
noShutdownDelay := false
|
||||
if noShutdownDelayQS := req.URL.Query().Get("no_shutdown_delay"); noShutdownDelayQS != "" {
|
||||
var err error
|
||||
noShutdownDelay, err = strconv.ParseBool(noShutdownDelayQS)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("no_shutdown_delay value is not a boolean: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
sr := &structs.AllocStopRequest{
|
||||
AllocID: allocID,
|
||||
AllocID: allocID,
|
||||
NoShutdownDelay: noShutdownDelay,
|
||||
}
|
||||
s.parseWriteRequest(req, &sr.WriteRequest)
|
||||
|
||||
|
||||
@@ -453,6 +453,18 @@ func (s *HTTPServer) jobDelete(resp http.ResponseWriter, req *http.Request,
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Identify the no_shutdown_delay query param and parse.
|
||||
noShutdownDelayStr := req.URL.Query().Get("no_shutdown_delay")
|
||||
var noShutdownDelay bool
|
||||
if noShutdownDelayStr != "" {
|
||||
var err error
|
||||
noShutdownDelay, err = strconv.ParseBool(noShutdownDelayStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Failed to parse value of %qq (%v) as a bool: %v", "no_shutdown_delay", noShutdownDelayStr, err)
|
||||
}
|
||||
}
|
||||
args.NoShutdownDelay = noShutdownDelay
|
||||
|
||||
// Validate the evaluation priority if the user supplied a non-default
|
||||
// value. It's more efficient to do it here, within the agent rather than
|
||||
// sending a bad request for the server to reject.
|
||||
|
||||
@@ -38,6 +38,12 @@ Stop Specific Options:
|
||||
screen, which can be used to examine the rescheduling evaluation using the
|
||||
eval-status command.
|
||||
|
||||
-no-shutdown-delay
|
||||
Ignore the the group and task shutdown_delay configuration so there is no
|
||||
delay between service deregistration and task shutdown. Note that using
|
||||
this flag will result in failed network connections to the allocation
|
||||
being stopped.
|
||||
|
||||
-verbose
|
||||
Show full information.
|
||||
`
|
||||
@@ -47,12 +53,13 @@ Stop Specific Options:
|
||||
func (c *AllocStopCommand) Name() string { return "alloc stop" }
|
||||
|
||||
func (c *AllocStopCommand) Run(args []string) int {
|
||||
var detach, verbose bool
|
||||
var detach, verbose, noShutdownDelay bool
|
||||
|
||||
flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
|
||||
flags.Usage = func() { c.Ui.Output(c.Help()) }
|
||||
flags.BoolVar(&detach, "detach", false, "")
|
||||
flags.BoolVar(&verbose, "verbose", false, "")
|
||||
flags.BoolVar(&noShutdownDelay, "no-shutdown-delay", false, "")
|
||||
|
||||
if err := flags.Parse(args); err != nil {
|
||||
return 1
|
||||
@@ -115,7 +122,12 @@ func (c *AllocStopCommand) Run(args []string) int {
|
||||
return 1
|
||||
}
|
||||
|
||||
resp, err := client.Allocations().Stop(alloc, nil)
|
||||
var opts *api.QueryOptions
|
||||
if noShutdownDelay {
|
||||
opts = &api.QueryOptions{Params: map[string]string{"no_shutdown_delay": "true"}}
|
||||
}
|
||||
|
||||
resp, err := client.Allocations().Stop(alloc, opts)
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error stopping allocation: %s", err))
|
||||
return 1
|
||||
|
||||
@@ -43,14 +43,20 @@ Stop Options:
|
||||
Override the priority of the evaluations produced as a result of this job
|
||||
deregistration. By default, this is set to the priority of the job.
|
||||
|
||||
-purge
|
||||
Purge is used to stop the job and purge it from the system. If not set, the
|
||||
job will still be queryable and will be purged by the garbage collector.
|
||||
|
||||
-global
|
||||
Stop a multi-region job in all its regions. By default job stop will stop
|
||||
only a single region at a time. Ignored for single-region jobs.
|
||||
|
||||
-no-shutdown-delay
|
||||
Ignore the the group and task shutdown_delay configuration so that there is no
|
||||
delay between service deregistration and task shutdown. Note that using
|
||||
this flag will result in failed network connections to the allocations
|
||||
being stopped.
|
||||
|
||||
-purge
|
||||
Purge is used to stop the job and purge it from the system. If not set, the
|
||||
job will still be queryable and will be purged by the garbage collector.
|
||||
|
||||
-yes
|
||||
Automatic yes to prompts.
|
||||
|
||||
@@ -67,12 +73,13 @@ func (c *JobStopCommand) Synopsis() string {
|
||||
func (c *JobStopCommand) AutocompleteFlags() complete.Flags {
|
||||
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
|
||||
complete.Flags{
|
||||
"-detach": complete.PredictNothing,
|
||||
"-eval-priority": complete.PredictNothing,
|
||||
"-purge": complete.PredictNothing,
|
||||
"-global": complete.PredictNothing,
|
||||
"-yes": complete.PredictNothing,
|
||||
"-verbose": complete.PredictNothing,
|
||||
"-detach": complete.PredictNothing,
|
||||
"-eval-priority": complete.PredictNothing,
|
||||
"-purge": complete.PredictNothing,
|
||||
"-global": complete.PredictNothing,
|
||||
"-no-shutdown-delay": complete.PredictNothing,
|
||||
"-yes": complete.PredictNothing,
|
||||
"-verbose": complete.PredictNothing,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -94,7 +101,7 @@ func (c *JobStopCommand) AutocompleteArgs() complete.Predictor {
|
||||
func (c *JobStopCommand) Name() string { return "job stop" }
|
||||
|
||||
func (c *JobStopCommand) Run(args []string) int {
|
||||
var detach, purge, verbose, global, autoYes bool
|
||||
var detach, purge, verbose, global, autoYes, noShutdownDelay bool
|
||||
var evalPriority int
|
||||
|
||||
flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
|
||||
@@ -102,6 +109,7 @@ func (c *JobStopCommand) Run(args []string) int {
|
||||
flags.BoolVar(&detach, "detach", false, "")
|
||||
flags.BoolVar(&verbose, "verbose", false, "")
|
||||
flags.BoolVar(&global, "global", false, "")
|
||||
flags.BoolVar(&noShutdownDelay, "no-shutdown-delay", false, "")
|
||||
flags.BoolVar(&autoYes, "yes", false, "")
|
||||
flags.BoolVar(&purge, "purge", false, "")
|
||||
flags.IntVar(&evalPriority, "eval-priority", 0, "")
|
||||
@@ -199,7 +207,7 @@ func (c *JobStopCommand) Run(args []string) int {
|
||||
}
|
||||
|
||||
// Invoke the stop
|
||||
opts := &api.DeregisterOptions{Purge: purge, Global: global, EvalPriority: evalPriority}
|
||||
opts := &api.DeregisterOptions{Purge: purge, Global: global, EvalPriority: evalPriority, NoShutdownDelay: noShutdownDelay}
|
||||
wq := &api.WriteOptions{Namespace: jobs[0].JobSummary.Namespace}
|
||||
evalID, _, err := client.Jobs().DeregisterOpts(*job.ID, opts, wq)
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user