From 96acddbc131fa6f98c107512f7832f2e21ad68d8 Mon Sep 17 00:00:00 2001 From: "Soren L. Hansen" Date: Fri, 1 Mar 2024 15:07:28 +0100 Subject: [PATCH] Avoid NPE in nomad/command/job_restart.go (#20049) stopAlloc() checks if an allocation represents a system job like this: ``` if *alloc.Job.Type == api.JobTypeSystem { ... } ``` This caused the cli to crash: ``` ==> 2024-02-29T08:45:53+01:00: Restarting 2 allocations 2024-02-29T08:45:54+01:00: Rescheduling allocation "6a9da11a" for group "redacted-group" panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x2 addr=0x20 pc=0x10686affc] goroutine 36 [running]: github.com/hashicorp/nomad/command.(*JobRestartCommand).stopAlloc(0x14000b11040, {0x14000996dc0?, 0x0?}) github.com/hashicorp/nomad/command/job_restart.go:968 +0x25c github.com/hashicorp/nomad/command.(*JobRestartCommand).handleAlloc(0x14000b11040, {0x14000996dc0?, 0x0?}) github.com/hashicorp/nomad/command/job_restart.go:868 +0x34 github.com/hashicorp/nomad/command.(*JobRestartCommand).Run.(*JobRestartCommand).Run.func1.func2() github.com/hashicorp/nomad/command/job_restart.go:392 +0x28 github.com/hashicorp/go-multierror.(*Group).Go.func1() github.com/hashicorp/go-multierror@v1.1.1/group.go:23 +0x60 created by github.com/hashicorp/go-multierror.(*Group).Go in goroutine 1 github.com/hashicorp/go-multierror@v1.1.1/group.go:20 +0x84 ``` Attaching a debugger revealed that `alloc.Job` was set, but `alloc.Job.Type` was nil. After guarding the `.Type` check with a `alloc.Job.Type != nil`, it still crashed. This time, `alloc.Job` was nil. I was scrambling to get the job running again, so I didn't have the opportunity to find out why those values were nil, but this change ensures the CLI does not crash in these situations. Fixes #20048 --- .changelog/20049.txt | 3 +++ command/job_restart.go | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 .changelog/20049.txt diff --git a/.changelog/20049.txt b/.changelog/20049.txt new file mode 100644 index 000000000..2f04b6b0c --- /dev/null +++ b/.changelog/20049.txt @@ -0,0 +1,3 @@ +```release-note:bug +cli: Fixed a bug where the `nomad job restart` command could crash if the job type was not present in a response from the server +``` diff --git a/command/job_restart.go b/command/job_restart.go index f7e1d3db5..102cd9874 100644 --- a/command/job_restart.go +++ b/command/job_restart.go @@ -965,7 +965,7 @@ func (c *JobRestartCommand) stopAlloc(alloc AllocationListStubWithJob) error { // Allocations for system jobs do not get replaced by the scheduler after // being stopped, so an eval is needed to trigger the reconciler. - if *alloc.Job.Type == api.JobTypeSystem { + if alloc.isSystemJob() { opts := api.EvalOptions{ ForceReschedule: true, } @@ -1241,3 +1241,9 @@ func (a *AllocationListStubWithJob) IsRunning() bool { return a.ClientStatus == api.AllocClientStatusRunning || a.DesiredStatus == api.AllocDesiredStatusRun } + +// isSystemJob returns true if allocation's job type +// is "system", false otherwise +func (a *AllocationListStubWithJob) isSystemJob() bool { + return a.Job != nil && a.Job.Type != nil && *a.Job.Type == api.JobTypeSystem +}