From 268a99e71a49cc32572585e5833d4bf70b289bef Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Wed, 9 May 2018 11:30:42 -0500 Subject: [PATCH] Add unit tests for forced rescheduling --- nomad/job_endpoint_test.go | 74 +++++++++++++++++++++++++++++++++++++ scheduler/reconcile_test.go | 72 ++++++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+) diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go index 92067040a..85197ab80 100644 --- a/nomad/job_endpoint_test.go +++ b/nomad/job_endpoint_test.go @@ -1297,6 +1297,80 @@ func TestJobEndpoint_Evaluate(t *testing.T) { } } +func TestJobEndpoint_ForceRescheduleEvaluate(t *testing.T) { + require := require.New(t) + t.Parallel() + s1 := TestServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the register request + job := mock.Job() + req := &structs.JobRegisterRequest{ + Job: job, + WriteRequest: structs.WriteRequest{ + Region: "global", + Namespace: job.Namespace, + }, + } + + // Fetch the response + var resp structs.JobRegisterResponse + err := msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp) + require.Nil(err) + require.NotEqual(0, resp.Index) + + state := s1.fsm.State() + job, err = state.JobByID(nil, structs.DefaultNamespace, job.ID) + + // Create a failed alloc + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.TaskGroup = job.TaskGroups[0].Name + alloc.Namespace = job.Namespace + alloc.ClientStatus = structs.AllocClientStatusFailed + err = s1.State().UpsertAllocs(resp.Index+1, []*structs.Allocation{alloc}) + require.Nil(err) + + // Force a re-evaluation + reEval := &structs.JobEvaluateRequest{ + JobID: job.ID, + EvalOptions: structs.EvalOptions{ForceReschedule: true}, + WriteRequest: structs.WriteRequest{ + Region: "global", + Namespace: job.Namespace, + }, + } + + // Fetch the response + err = msgpackrpc.CallWithCodec(codec, "Job.Evaluate", reEval, &resp) + require.Nil(err) + require.NotEqual(0, resp.Index) + + // Lookup the evaluation + ws := memdb.NewWatchSet() + eval, err := state.EvalByID(ws, resp.EvalID) + require.Nil(err) + require.NotNil(eval) + require.Equal(eval.CreateIndex, resp.EvalCreateIndex) + require.Equal(eval.Priority, job.Priority) + require.Equal(eval.Type, job.Type) + require.Equal(eval.TriggeredBy, structs.EvalTriggerJobRegister) + require.Equal(eval.JobID, job.ID) + require.Equal(eval.JobModifyIndex, resp.JobModifyIndex) + require.Equal(eval.Status, structs.EvalStatusPending) + + // Lookup the alloc, verify DesiredTransition ForceReschedule + alloc, err = state.AllocByID(ws, alloc.ID) + require.NotNil(alloc) + require.Nil(err) + require.True(*alloc.DesiredTransition.ForceReschedule) +} + func TestJobEndpoint_Evaluate_ACL(t *testing.T) { t.Parallel() require := require.New(t) diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index 9d22439e2..8ac0db020 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -4570,3 +4570,75 @@ func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T }) assertPlaceResultsHavePreviousAllocs(t, 10, r.place) } + +// Tests rescheduling failed service allocations with desired state stop +func TestReconciler_ForceReschedule_Service(t *testing.T) { + require := require.New(t) + + // Set desired 5 + job := mock.Job() + job.TaskGroups[0].Count = 5 + tgName := job.TaskGroups[0].Name + + // Set up reschedule policy and update stanza + job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ + Attempts: 1, + Interval: 24 * time.Hour, + Delay: 5 * time.Second, + DelayFunction: "", + MaxDelay: 1 * time.Hour, + Unlimited: false, + } + job.TaskGroups[0].Update = noCanaryUpdate + + // Create 5 existing allocations + var allocs []*structs.Allocation + for i := 0; i < 5; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = uuid.Generate() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + allocs = append(allocs, alloc) + alloc.ClientStatus = structs.AllocClientStatusRunning + } + + // Mark one as failed and past its reschedule limit so not eligible to reschedule + allocs[0].ClientStatus = structs.AllocClientStatusFailed + allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ + {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), + PrevAllocID: uuid.Generate(), + PrevNodeID: uuid.Generate(), + }, + }} + + // Mark DesiredTransition ForceReschedule + allocs[0].DesiredTransition = structs.DesiredTransition{ForceReschedule: helper.BoolToPtr(true)} + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") + r := reconciler.Compute() + + // Verify that no follow up evals were created + evals := r.desiredFollowupEvals[tgName] + require.Nil(evals) + + // Verify that one rescheduled alloc was created because of the forced reschedule + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 1, + inplace: 0, + stop: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Place: 1, + Ignore: 4, + }, + }, + }) + + // Rescheduled allocs should have previous allocs + assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) + assertPlaceResultsHavePreviousAllocs(t, 1, r.place) + assertPlacementsAreRescheduled(t, 1, r.place) +}