mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
Avoid NPE in nomad/command/job_restart.go (#20049)
stopAlloc() checks if an allocation represents a system job like this:
```
if *alloc.Job.Type == api.JobTypeSystem {
...
}
```
This caused the cli to crash:
```
==> 2024-02-29T08:45:53+01:00: Restarting 2 allocations
2024-02-29T08:45:54+01:00: Rescheduling allocation "6a9da11a" for group "redacted-group"
panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x2 addr=0x20 pc=0x10686affc]
goroutine 36 [running]:
github.com/hashicorp/nomad/command.(*JobRestartCommand).stopAlloc(0x14000b11040, {0x14000996dc0?, 0x0?})
github.com/hashicorp/nomad/command/job_restart.go:968 +0x25c
github.com/hashicorp/nomad/command.(*JobRestartCommand).handleAlloc(0x14000b11040, {0x14000996dc0?, 0x0?})
github.com/hashicorp/nomad/command/job_restart.go:868 +0x34
github.com/hashicorp/nomad/command.(*JobRestartCommand).Run.(*JobRestartCommand).Run.func1.func2()
github.com/hashicorp/nomad/command/job_restart.go:392 +0x28
github.com/hashicorp/go-multierror.(*Group).Go.func1()
github.com/hashicorp/go-multierror@v1.1.1/group.go:23 +0x60
created by github.com/hashicorp/go-multierror.(*Group).Go in goroutine 1
github.com/hashicorp/go-multierror@v1.1.1/group.go:20 +0x84
```
Attaching a debugger revealed that `alloc.Job` was set, but
`alloc.Job.Type` was nil. After guarding the `.Type` check with a
`alloc.Job.Type != nil`, it still crashed. This time, `alloc.Job` was
nil.
I was scrambling to get the job running again, so I didn't have the
opportunity to find out why those values were nil, but this change
ensures the CLI does not crash in these situations.
Fixes #20048
This commit is contained in:
3
.changelog/20049.txt
Normal file
3
.changelog/20049.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
```release-note:bug
|
||||
cli: Fixed a bug where the `nomad job restart` command could crash if the job type was not present in a response from the server
|
||||
```
|
||||
@@ -965,7 +965,7 @@ func (c *JobRestartCommand) stopAlloc(alloc AllocationListStubWithJob) error {
|
||||
|
||||
// Allocations for system jobs do not get replaced by the scheduler after
|
||||
// being stopped, so an eval is needed to trigger the reconciler.
|
||||
if *alloc.Job.Type == api.JobTypeSystem {
|
||||
if alloc.isSystemJob() {
|
||||
opts := api.EvalOptions{
|
||||
ForceReschedule: true,
|
||||
}
|
||||
@@ -1241,3 +1241,9 @@ func (a *AllocationListStubWithJob) IsRunning() bool {
|
||||
return a.ClientStatus == api.AllocClientStatusRunning ||
|
||||
a.DesiredStatus == api.AllocDesiredStatusRun
|
||||
}
|
||||
|
||||
// isSystemJob returns true if allocation's job type
|
||||
// is "system", false otherwise
|
||||
func (a *AllocationListStubWithJob) isSystemJob() bool {
|
||||
return a.Job != nil && a.Job.Type != nil && *a.Job.Type == api.JobTypeSystem
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user