mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
scheduler: monitor deployments correctly (#26605)
Corrects two minor bugs that prevented proper deployment monitoring for systems jobs: populating the new deployment field of the system scheduler object, and correcting allocrunner health checks that were guarded not to run on system jobs.
This commit is contained in:
committed by
GitHub
parent
3d373c9a6a
commit
7c4faf9227
@@ -86,7 +86,9 @@ func newAllocHealthWatcherHook(
|
||||
|
||||
// Neither deployments nor migrations care about the health of
|
||||
// non-service jobs so never watch their health
|
||||
if alloc.Job.Type != structs.JobTypeService {
|
||||
switch alloc.Job.Type {
|
||||
case structs.JobTypeService, structs.JobTypeSystem:
|
||||
default:
|
||||
return noopAllocHealthWatcherHook{}
|
||||
}
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/shoenig/test/must"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
@@ -396,26 +397,28 @@ func TestHealthHook_SetHealth_unhealthy(t *testing.T) {
|
||||
require.NoError(h.Postrun())
|
||||
}
|
||||
|
||||
// TestHealthHook_SystemNoop asserts that system jobs return the noop tracker.
|
||||
func TestHealthHook_SystemNoop(t *testing.T) {
|
||||
// TestHealthHook_System asserts that system jobs trigger hooks just like service jobs.
|
||||
func TestHealthHook_System(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
alloc := mock.SystemAlloc()
|
||||
h := newAllocHealthWatcherHook(testlog.HCLogger(t), alloc.Copy(), nil, nil, nil, nil)
|
||||
|
||||
// Assert that it's the noop impl
|
||||
_, ok := h.(noopAllocHealthWatcherHook)
|
||||
require.True(t, ok)
|
||||
must.False(t, ok)
|
||||
|
||||
// Assert the noop impl does not implement any hooks
|
||||
_, ok = h.(*allocHealthWatcherHook)
|
||||
must.True(t, ok)
|
||||
|
||||
// Assert the other hooks are implemented, too
|
||||
_, ok = h.(interfaces.RunnerPrerunHook)
|
||||
require.False(t, ok)
|
||||
must.True(t, ok)
|
||||
_, ok = h.(interfaces.RunnerUpdateHook)
|
||||
require.False(t, ok)
|
||||
must.True(t, ok)
|
||||
_, ok = h.(interfaces.RunnerPostrunHook)
|
||||
require.False(t, ok)
|
||||
must.True(t, ok)
|
||||
_, ok = h.(interfaces.ShutdownHook)
|
||||
require.False(t, ok)
|
||||
must.True(t, ok)
|
||||
}
|
||||
|
||||
// TestHealthHook_BatchNoop asserts that batch jobs return the noop tracker.
|
||||
|
||||
@@ -102,7 +102,7 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) (err error) {
|
||||
desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason", eval.TriggeredBy)
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil,
|
||||
s.failedTGAllocs, s.planAnnotations, structs.EvalStatusFailed, desc,
|
||||
s.queuedAllocs, "")
|
||||
s.queuedAllocs, s.deployment.GetID())
|
||||
}
|
||||
|
||||
limit := maxSystemScheduleAttempts
|
||||
@@ -116,7 +116,7 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) (err error) {
|
||||
if statusErr, ok := err.(*SetStatusError); ok {
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil,
|
||||
s.failedTGAllocs, s.planAnnotations, statusErr.EvalStatus, err.Error(),
|
||||
s.queuedAllocs, "")
|
||||
s.queuedAllocs, s.deployment.GetID())
|
||||
}
|
||||
return err
|
||||
}
|
||||
@@ -124,7 +124,7 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) (err error) {
|
||||
// Update the status to complete
|
||||
return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil,
|
||||
s.failedTGAllocs, s.planAnnotations, structs.EvalStatusComplete, "",
|
||||
s.queuedAllocs, "")
|
||||
s.queuedAllocs, s.deployment.GetID())
|
||||
}
|
||||
|
||||
// process is wrapped in retryMax to iteratively run the handler until we have no
|
||||
@@ -286,6 +286,11 @@ func (s *SystemScheduler) computeJobAllocs() error {
|
||||
s.plan.Deployment = nr.DeploymentCurrent
|
||||
s.plan.DeploymentUpdates = nr.DeploymentUpdates
|
||||
|
||||
// Update the stored deployment
|
||||
if nr.DeploymentCurrent != nil {
|
||||
s.deployment = nr.DeploymentCurrent
|
||||
}
|
||||
|
||||
// Add all the allocs to stop
|
||||
for _, e := range r.Stop {
|
||||
s.plan.AppendStoppedAlloc(e.Alloc, sstructs.StatusAllocNotNeeded, "", "")
|
||||
|
||||
Reference in New Issue
Block a user