mirror of
https://github.com/kemko/nomad.git
synced 2026-01-07 10:55:42 +03:00
cli: job restart command (#16278)
Implement the new `nomad job restart` command that allows operators to restart allocations tasks or reschedule then entire allocation. Restarts can be batched to target multiple allocations in parallel. Between each batch the command can stop and hold for a predefined time or until the user confirms that the process should proceed. This implements the "Stateless Restarts" alternative from the original RFC (https://gist.github.com/schmichael/e0b8b2ec1eb146301175fd87ddd46180). The original concept is still worth implementing, as it allows this functionality to be exposed over an API that can be consumed by the Nomad UI and other clients. But the implementation turned out to be more complex than we initially expected so we thought it would be better to release a stateless CLI-based implementation first to gather feedback and validate the restart behaviour. Co-authored-by: Shishir Mahajan <smahajan@roblox.com>
This commit is contained in:
@@ -415,6 +415,14 @@ func Commands(metaPtr *Meta, agentUi cli.Ui) map[string]cli.CommandFactory {
|
||||
Meta: meta,
|
||||
}, nil
|
||||
},
|
||||
"job restart": func() (cli.Command, error) {
|
||||
// Use a *cli.ConcurrentUi because this command spawns several
|
||||
// goroutines that write to the terminal concurrently.
|
||||
meta.Ui = &cli.ConcurrentUi{Ui: meta.Ui}
|
||||
return &JobRestartCommand{
|
||||
Meta: meta,
|
||||
}, nil
|
||||
},
|
||||
"job deployments": func() (cli.Command, error) {
|
||||
return &JobDeploymentsCommand{
|
||||
Meta: meta,
|
||||
|
||||
@@ -62,6 +62,13 @@ func limit(s string, length int) string {
|
||||
return s[:length]
|
||||
}
|
||||
|
||||
// indentString returns the string s padded with the given number of empty
|
||||
// spaces before each line except for the first one.
|
||||
func indentString(s string, pad int) string {
|
||||
prefix := strings.Repeat(" ", pad)
|
||||
return strings.Join(strings.Split(s, "\n"), fmt.Sprintf("\n%s", prefix))
|
||||
}
|
||||
|
||||
// wrapAtLengthWithPadding wraps the given text at the maxLineLength, taking
|
||||
// into account any provided left padding.
|
||||
func wrapAtLengthWithPadding(s string, pad int) string {
|
||||
|
||||
1205
command/job_restart.go
Normal file
1205
command/job_restart.go
Normal file
File diff suppressed because it is too large
Load Diff
1591
command/job_restart_test.go
Normal file
1591
command/job_restart_test.go
Normal file
File diff suppressed because it is too large
Load Diff
@@ -4,6 +4,7 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
"github.com/hashicorp/nomad/api"
|
||||
@@ -176,7 +177,35 @@ func (m *Meta) allNamespaces() bool {
|
||||
}
|
||||
|
||||
func (m *Meta) Colorize() *colorstring.Colorize {
|
||||
_, coloredUi := m.Ui.(*cli.ColoredUi)
|
||||
ui := m.Ui
|
||||
coloredUi := false
|
||||
|
||||
// Meta.Ui may wrap other cli.Ui instances, so unwrap them until we find a
|
||||
// *cli.ColoredUi or there is nothing left to unwrap.
|
||||
for {
|
||||
if ui == nil {
|
||||
break
|
||||
}
|
||||
|
||||
_, coloredUi = ui.(*cli.ColoredUi)
|
||||
if coloredUi {
|
||||
break
|
||||
}
|
||||
|
||||
v := reflect.ValueOf(ui)
|
||||
if v.Kind() == reflect.Ptr {
|
||||
v = v.Elem()
|
||||
}
|
||||
for i := 0; i < v.NumField(); i++ {
|
||||
if !v.Field(i).CanInterface() {
|
||||
continue
|
||||
}
|
||||
ui, _ = v.Field(i).Interface().(cli.Ui)
|
||||
if ui != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return &colorstring.Colorize{
|
||||
Colors: colorstring.DefaultColors,
|
||||
|
||||
@@ -152,21 +152,48 @@ func waitForNodes(t *testing.T, client *api.Client) {
|
||||
})
|
||||
}
|
||||
|
||||
func waitForAllocRunning(t *testing.T, client *api.Client, allocID string) {
|
||||
func waitForJobAllocsStatus(t *testing.T, client *api.Client, jobID string, status string, token string) {
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
q := &api.QueryOptions{AuthToken: token}
|
||||
|
||||
allocs, _, err := client.Jobs().Allocations(jobID, true, q)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to query job allocs: %v", err)
|
||||
}
|
||||
if len(allocs) == 0 {
|
||||
return false, fmt.Errorf("no allocs")
|
||||
}
|
||||
|
||||
for _, alloc := range allocs {
|
||||
if alloc.ClientStatus != status {
|
||||
return false, fmt.Errorf("alloc status is %q not %q", alloc.ClientStatus, status)
|
||||
}
|
||||
}
|
||||
return true, nil
|
||||
}, func(err error) {
|
||||
must.NoError(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func waitForAllocStatus(t *testing.T, client *api.Client, allocID string, status string) {
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
alloc, _, err := client.Allocations().Info(allocID, nil)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if alloc.ClientStatus == api.AllocClientStatusRunning {
|
||||
if alloc.ClientStatus == status {
|
||||
return true, nil
|
||||
}
|
||||
return false, fmt.Errorf("alloc status: %s", alloc.ClientStatus)
|
||||
return false, fmt.Errorf("alloc status is %q not %q", alloc.ClientStatus, status)
|
||||
}, func(err error) {
|
||||
t.Fatalf("timed out waiting for alloc to be running: %v", err)
|
||||
must.NoError(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func waitForAllocRunning(t *testing.T, client *api.Client, allocID string) {
|
||||
waitForAllocStatus(t, client, allocID, api.AllocClientStatusRunning)
|
||||
}
|
||||
|
||||
func waitForCheckStatus(t *testing.T, client *api.Client, allocID, status string) {
|
||||
testutil.WaitForResult(func() (bool, error) {
|
||||
results, err := client.Allocations().Checks(allocID, nil)
|
||||
|
||||
Reference in New Issue
Block a user