cli: display nomad service check status output in CLI commands

This PR adds some NSD check status output to the CLI.

1. The 'nomad alloc status' command produces nsd check summary output (if present)
2. The 'nomad alloc checks' sub-command is added to produce complete nsd check output (if present)
This commit is contained in:
Seth Hoenig
2022-08-05 08:30:17 -05:00
parent f9f6bc4057
commit 5694999c61
9 changed files with 432 additions and 23 deletions

View File

@@ -72,13 +72,13 @@ func (a *Allocations) Info(allocID string, q *QueryOptions) (*Allocation, *Query
// the task environment.
//
// The parameters are:
// * ctx: context to set deadlines or timeout
// * allocation: the allocation to execute command inside
// * task: the task's name to execute command in
// * tty: indicates whether to start a pseudo-tty for the command
// * stdin, stdout, stderr: the std io to pass to command.
// If tty is true, then streams need to point to a tty that's alive for the whole process
// * terminalSizeCh: A channel to send new tty terminal sizes
// - ctx: context to set deadlines or timeout
// - allocation: the allocation to execute command inside
// - task: the task's name to execute command in
// - tty: indicates whether to start a pseudo-tty for the command
// - stdin, stdout, stderr: the std io to pass to command.
// If tty is true, then streams need to point to a tty that's alive for the whole process
// - terminalSizeCh: A channel to send new tty terminal sizes
//
// The call blocks until command terminates (or an error occurs), and returns the exit code.
//
@@ -119,6 +119,17 @@ func (a *Allocations) Stats(alloc *Allocation, q *QueryOptions) (*AllocResourceU
return &resp, err
}
// Checks gets status information for nomad service checks that exist in the allocation.
//
// Note: for cluster topologies where API consumers don't have network access to
// Nomad clients, set api.ClientConnTimeout to a small value (ex 1ms) to avoid
// long pauses on this API call.
func (a *Allocations) Checks(allocID string, q *QueryOptions) (AllocCheckStatuses, error) {
var resp AllocCheckStatuses
_, err := a.client.query("/v1/client/allocation/"+allocID+"/checks", &resp, q)
return resp, err
}
// GC forces a garbage collection of client state for an allocation.
//
// Note: for cluster topologies where API consumers don't have network access to
@@ -506,12 +517,12 @@ type ExecStreamingInput struct {
TTYSize *TerminalSize `json:"tty_size,omitempty"`
}
// ExecStreamingExitResults captures the exit code of just completed nomad exec command
// ExecStreamingExitResult captures the exit code of just completed nomad exec command
type ExecStreamingExitResult struct {
ExitCode int `json:"exit_code"`
}
// ExecStreamingInput represents an output streaming entity, e.g. stdout/stderr update or termination
// ExecStreamingOutput represents an output streaming entity, e.g. stdout/stderr update or termination
//
// At most one of these fields should be set: `Stdout`, `Stderr`, or `Result`.
// If `Exited` is true, then `Result` is non-nil, and other fields are nil.

View File

@@ -64,6 +64,24 @@ type AllocResourceUsage struct {
Timestamp int64
}
// AllocCheckStatus contains the current status of a nomad service discovery check.
type AllocCheckStatus struct {
ID string
Check string
Group string
Mode string
Output string
Service string
Task string
Status string
StatusCode int
Timestamp int64
}
// AllocCheckStatuses holds the set of nomad service discovery checks within
// the allocation (including group and task level service checks).
type AllocCheckStatuses map[string]AllocCheckStatus
// RestartPolicy defines how the Nomad client restarts
// tasks in a taskgroup when they fail
type RestartPolicy struct {

162
command/alloc_checks.go Normal file
View File

@@ -0,0 +1,162 @@
package command
import (
"fmt"
"strings"
"time"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/api/contexts"
"github.com/posener/complete"
)
type AllocChecksCommand struct {
Meta
}
func (c *AllocChecksCommand) Help() string {
helpText := `
Usage: nomad alloc checks [options] <allocation>
Alias: nomad checks
Outputs the latest health check status information for services in the allocation
using the Nomad service discovery provider.
General Options:
` + generalOptionsUsage(usageOptsDefault) + `
Checks Specific Options:
-verbose
Show full information.
`
return strings.TrimSpace(helpText)
}
func (c *AllocChecksCommand) Synopsis() string {
return "Outputs service health check status information."
}
func (c *AllocChecksCommand) AutocompleteFlags() complete.Flags {
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
complete.Flags{
"-verbose": complete.PredictNothing,
})
}
func (c *AllocChecksCommand) AutocompleteArgs() complete.Predictor {
return complete.PredictFunc(func(a complete.Args) []string {
client, err := c.Meta.Client()
if err != nil {
return nil
}
resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Allocs, nil)
if err != nil {
return nil
}
return resp.Matches[contexts.Allocs]
})
}
func (c *AllocChecksCommand) Name() string {
return "alloc checks"
}
func (c *AllocChecksCommand) Run(args []string) int {
var verbose bool
flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.BoolVar(&verbose, "verbose", false, "")
if err := flags.Parse(args); err != nil {
return 1
}
args = flags.Args()
if numArgs := len(args); numArgs < 1 {
c.Ui.Error("An allocation ID is required")
c.Ui.Error(commandErrorText(c))
return 1
} else if numArgs > 1 {
c.Ui.Error("This command takes one argument (allocation ID)")
c.Ui.Error(commandErrorText(c))
return 1
}
client, err := c.Meta.Client()
if err != nil {
c.Ui.Error(fmt.Sprintf("Error initializing client: %v", err))
return 1
}
allocID := args[0]
// Truncate the id unless full length is requested
length := shortId
if verbose {
length = fullId
}
// Query the allocation info
if len(allocID) == 1 {
c.Ui.Error("Alloc ID must contain at least two characters.")
return 1
}
allocID = sanitizeUUIDPrefix(allocID)
allocations, _, err := client.Allocations().PrefixList(allocID)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error querying allocation: %v", err))
return 1
}
if len(allocations) == 0 {
c.Ui.Error(fmt.Sprintf("No allocation(s) with prefix or id %q found", allocID))
return 1
}
if len(allocations) > 1 {
out := formatAllocListStubs(allocations, verbose, length)
c.Ui.Error(fmt.Sprintf("Prefix matched multiple allocations\n\n%s", out))
return 1
}
// prefix lookup matched single allocation (happy path), lookup the checks
q := &api.QueryOptions{Namespace: allocations[0].Namespace}
checks, err := client.Allocations().Checks(allocations[0].ID, q)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error querying allocation checks: %s", err))
return 1
}
c.Ui.Output(fmt.Sprintf("Status of %d Nomad Service Checks", len(checks)))
c.Ui.Output("")
pair := func(key, value string) string { return fmt.Sprintf("%s|=|%s", key, value) }
taskFmt := func(s string) string {
if s == "" {
return "(group)"
}
return s
}
for _, check := range checks {
list := []string{
pair("ID", check.ID),
pair("Name", check.Check),
pair("Group", check.Group),
pair("Task", taskFmt(check.Task)),
pair("Service", check.Service),
pair("Status", check.Status),
}
if check.StatusCode > 0 {
list = append(list, pair("StatusCode", fmt.Sprintf("%d", check.StatusCode)))
}
list = append(list,
pair("Mode", check.Mode),
pair("Timestamp", formatTaskTimes(time.Unix(check.Timestamp, 0))),
pair("Output", check.Output),
)
c.Ui.Output(formatList(list))
c.Ui.Output("")
}
return 0
}

View File

@@ -0,0 +1,122 @@
package command
import (
"testing"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/mitchellh/cli"
"github.com/posener/complete"
"github.com/shoenig/test/must"
)
func TestAllocChecksCommand_Implements(t *testing.T) {
ci.Parallel(t)
var _ cli.Command = (*AllocChecksCommand)(nil)
}
func TestAllocChecksCommand_Fails(t *testing.T) {
ci.Parallel(t)
srv, _, url := testServer(t, false, nil)
t.Cleanup(func() {
_ = srv.Shutdown()
})
ui := cli.NewMockUi()
cmd := &AllocChecksCommand{Meta: Meta{Ui: ui}}
// fails on misuse t.Run("fails on misuse", func(t *testing.T) {
code := cmd.Run([]string{"some", "bad", "args"})
must.One(t, code)
out := ui.ErrorWriter.String()
must.StrContains(t, out, commandErrorText(cmd))
ui.ErrorWriter.Reset()
// fails on connection failure
code = cmd.Run([]string{"-address=nope", "foobar"})
must.One(t, code)
out = ui.ErrorWriter.String()
must.StrContains(t, out, "Error querying allocation")
ui.ErrorWriter.Reset()
// fails on missing allocation
code = cmd.Run([]string{"-address=" + url, "26470238-5CF2-438F-8772-DC67CFB0705C"})
must.One(t, code)
out = ui.ErrorWriter.String()
must.StrContains(t, out, "No allocation(s) with prefix or id")
ui.ErrorWriter.Reset()
// fails on prefix with too few characters
code = cmd.Run([]string{"-address=" + url, "2"})
must.One(t, code)
out = ui.ErrorWriter.String()
must.StrContains(t, out, "must contain at least two characters.")
ui.ErrorWriter.Reset()
}
func TestAllocChecksCommand_AutocompleteArgs(t *testing.T) {
ci.Parallel(t)
srv, _, url := testServer(t, true, nil)
defer stopTestAgent(srv)
ui := cli.NewMockUi()
cmd := &AllocChecksCommand{Meta: Meta{Ui: ui, flagAddress: url}}
// Create a fake alloc
state := srv.Agent.Server().State()
a := mock.Alloc()
must.NoError(t, state.UpsertAllocs(structs.MsgTypeTestSetup, 1000, []*structs.Allocation{a}))
prefix := a.ID[:5]
args := complete.Args{Last: prefix}
predictor := cmd.AutocompleteArgs()
res := predictor.Predict(args)
must.Len(t, 1, res)
must.Eq(t, a.ID, res[0])
}
func TestAllocChecksCommand_Run(t *testing.T) {
ci.Parallel(t)
srv, client, url := testServer(t, true, nil)
defer stopTestAgent(srv)
// wait for nodes
waitForNodes(t, client)
jobID := "job1_checks"
job1 := testNomadServiceJob(jobID)
resp, _, err := client.Jobs().Register(job1, nil)
must.NoError(t, err)
// wait for registration success
ui := cli.NewMockUi()
code := waitForSuccess(ui, client, fullId, t, resp.EvalID)
must.Zero(t, code)
// Get an alloc id
allocID := getAllocFromJob(t, client, jobID)
// do not wait for alloc running - it will stay pending because the
// health-check will never pass
// Run command
cmd := &AllocChecksCommand{Meta: Meta{Ui: ui, flagAddress: url}}
code = cmd.Run([]string{"-address=" + url, allocID})
must.Zero(t, code)
// check output
out := ui.OutputWriter.String()
must.StrContains(t, out, `Name = check1`)
must.StrContains(t, out, `Group = job1_checks.group1[0]`)
must.StrContains(t, out, `Task = (group)`)
must.StrContains(t, out, `Service = service1`)
must.StrContains(t, out, `Mode = healthiness`)
}

View File

@@ -114,20 +114,12 @@ func TestAllocSignalCommand_Run(t *testing.T) {
code := waitForSuccess(ui, client, fullId, t, resp.EvalID)
must.Zero(t, code)
// get an alloc id
allocID := ""
if allocs, _, err := client.Jobs().Allocations(jobID, false, nil); err == nil {
if len(allocs) > 0 {
allocID = allocs[0].ID
}
}
must.NotEq(t, "", allocID)
// Get an alloc id
allocID := getAllocFromJob(t, client, jobID)
// Wait for alloc to be running
waitForAllocRunning(t, client, allocID)
code = cmd.Run([]string{"-address=" + url, allocID})
must.Zero(t, code)
ui.OutputWriter.Reset()
}

View File

@@ -199,10 +199,17 @@ func (c *AllocStatusCommand) Run(args []string) int {
}
c.Ui.Output(output)
// add allocation network addresses
if alloc.AllocatedResources != nil && len(alloc.AllocatedResources.Shared.Networks) > 0 && alloc.AllocatedResources.Shared.Networks[0].HasPorts() {
c.Ui.Output("")
c.Ui.Output(formatAllocNetworkInfo(alloc))
}
// add allocation nomad service discovery checks
if checkOutput := formatAllocNomadServiceChecks(alloc.ID, client); checkOutput != "" {
c.Ui.Output("")
c.Ui.Output(checkOutput)
}
}
if short {
@@ -355,7 +362,28 @@ func formatAllocNetworkInfo(alloc *api.Allocation) string {
mode = fmt.Sprintf(" (mode = %q)", nw.Mode)
}
return fmt.Sprintf("Allocation Addresses%s\n%s", mode, formatList(addrs))
return fmt.Sprintf("Allocation Addresses%s:\n%s", mode, formatList(addrs))
}
func formatAllocNomadServiceChecks(allocID string, client *api.Client) string {
statuses, err := client.Allocations().Checks(allocID, nil)
if err != nil {
return ""
} else if len(statuses) == 0 {
return ""
}
results := []string{"Service|Task|Name|Mode|Status"}
for _, status := range statuses {
task := "(group)"
if status.Task != "" {
task = status.Task
}
// check | group | mode | status
s := fmt.Sprintf("%s|%s|%s|%s|%s", status.Service, task, status.Check, status.Mode, status.Status)
results = append(results, s)
}
sort.Strings(results[1:])
return fmt.Sprintf("Nomad Service Checks:\n%s", formatList(results))
}
// futureEvalTimePretty returns when the eval is eligible to reschedule
@@ -553,7 +581,7 @@ func (c *AllocStatusCommand) outputTaskResources(alloc *api.Allocation, task str
return
}
c.Ui.Output("Task Resources")
c.Ui.Output("Task Resources:")
var addr []string
for _, nw := range resource.Networks {
ports := append(nw.DynamicPorts, nw.ReservedPorts...) //nolint:gocritic

View File

@@ -194,6 +194,9 @@ func TestAllocStatusCommand_Run(t *testing.T) {
out = ui.OutputWriter.String()
must.StrContains(t, out, allocID)
// make sure nsd checks status output is elided if none exist
must.StrNotContains(t, out, `Nomad Service Checks:`)
}
func TestAllocStatusCommand_RescheduleInfo(t *testing.T) {
@@ -441,3 +444,40 @@ func TestAllocStatusCommand_CSIVolumes(t *testing.T) {
must.StrContains(t, out, fmt.Sprintf("%s minnie", vol0))
must.StrNotContains(t, out, "Host Volumes")
}
func TestAllocStatusCommand_NSD_Checks(t *testing.T) {
ci.Parallel(t)
srv, client, url := testServer(t, true, nil)
defer stopTestAgent(srv)
// wait for nodes
waitForNodes(t, client)
jobID := "job1_checks"
job1 := testNomadServiceJob(jobID)
resp, _, err := client.Jobs().Register(job1, nil)
must.NoError(t, err)
// wait for registration success
ui := cli.NewMockUi()
code := waitForSuccess(ui, client, fullId, t, resp.EvalID)
must.Zero(t, code)
// Get an alloc id
allocID := getAllocFromJob(t, client, jobID)
// do not wait for alloc running - it will stay pending because the
// health-check will never pass
// Run command
cmd := &AllocStatusCommand{Meta: Meta{Ui: ui, flagAddress: url}}
code = cmd.Run([]string{"-address=" + url, allocID})
must.Zero(t, code)
// check output
out := ui.OutputWriter.String()
must.StrContains(t, out, `Nomad Service Checks:`)
must.RegexMatch(t, regexp.MustCompile(`Service\s+Task\s+Name\s+Mode\s+Status`), out)
must.RegexMatch(t, regexp.MustCompile(`service1\s+\(group\)\s+check1\s+healthiness\s+(pending|failure)`), out)
}

View File

@@ -177,6 +177,11 @@ func Commands(metaPtr *Meta, agentUi cli.Ui) map[string]cli.CommandFactory {
Meta: meta,
}, nil
},
"alloc checks": func() (cli.Command, error) {
return &AllocChecksCommand{
Meta: meta,
}, nil
},
"alloc status": func() (cli.Command, error) {
return &AllocStatusCommand{
Meta: meta,

View File

@@ -4,6 +4,7 @@ import (
"fmt"
"os"
"testing"
"time"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/command/agent"
@@ -22,7 +23,7 @@ func testServer(t *testing.T, runClient bool, cb func(*agent.Config)) (*agent.Te
cb(config)
}
})
t.Cleanup(func() { a.Shutdown() })
t.Cleanup(func() { _ = a.Shutdown() })
c := a.Client()
return a, c, a.HTTPAddr()
@@ -37,7 +38,7 @@ func testClient(t *testing.T, name string, cb func(*agent.Config)) (*agent.TestA
cb(config)
}
})
t.Cleanup(func() { a.Shutdown() })
t.Cleanup(func() { _ = a.Shutdown() })
c := a.Client()
t.Logf("Waiting for client %s to join server(s) %s", name, a.GetConfig().Client.Servers)
@@ -73,6 +74,25 @@ func testJob(jobID string) *api.Job {
return job
}
func testNomadServiceJob(jobID string) *api.Job {
j := testJob(jobID)
j.TaskGroups[0].Services = []*api.Service{{
Name: "service1",
PortLabel: "1000",
AddressMode: "",
Address: "127.0.0.1",
Checks: []api.ServiceCheck{{
Name: "check1",
Type: "http",
Path: "/",
Interval: 1 * time.Second,
Timeout: 1 * time.Second,
}},
Provider: "nomad",
}}
return j
}
func testMultiRegionJob(jobID, region, datacenter string) *api.Job {
task := api.NewTask("task1", "mock_driver").
SetConfig("kill_after", "10s").
@@ -144,6 +164,17 @@ func waitForAllocRunning(t *testing.T, client *api.Client, allocID string) {
})
}
func getAllocFromJob(t *testing.T, client *api.Client, jobID string) string {
var allocID string
if allocations, _, err := client.Jobs().Allocations(jobID, false, nil); err == nil {
if len(allocations) > 0 {
allocID = allocations[0].ID
}
}
must.NotEq(t, "", allocID, must.Sprint("expected to find an evaluation after running job", jobID))
return allocID
}
func stopTestAgent(a *agent.TestAgent) {
_ = a.Shutdown()
}