mirror of
https://github.com/kemko/nomad.git
synced 2026-01-06 10:25:42 +03:00
fix reconcile tests
This commit is contained in:
committed by
Preetha Appan
parent
b1df4611fe
commit
be3e3eadf6
@@ -194,26 +194,8 @@ func (a *allocReconciler) Compute() *reconcileResults {
|
||||
|
||||
// Detect if the deployment is paused
|
||||
if a.deployment != nil {
|
||||
// XXX Fix
|
||||
// XXX An idea for not replacing failed allocs that are part of
|
||||
// deployment that will fail immediately is to only replace them if
|
||||
// their desired transition has a replace bool set by the deployment
|
||||
// watcher.
|
||||
|
||||
// Detect if any allocs associated with this deploy have failed
|
||||
// Failed allocations could edge trigger an evaluation before the deployment watcher
|
||||
// runs and marks the deploy as failed. This block makes sure that is still
|
||||
// considered a failed deploy
|
||||
failedAllocsInDeploy := false
|
||||
//for _, as := range m {
|
||||
//for _, alloc := range as {
|
||||
//if alloc.DeploymentID == a.deployment.ID && alloc.ClientStatus == structs.AllocClientStatusFailed {
|
||||
//failedAllocsInDeploy = true
|
||||
//}
|
||||
//}
|
||||
//}
|
||||
a.deploymentPaused = a.deployment.Status == structs.DeploymentStatusPaused
|
||||
a.deploymentFailed = a.deployment.Status == structs.DeploymentStatusFailed || failedAllocsInDeploy
|
||||
a.deploymentFailed = a.deployment.Status == structs.DeploymentStatusFailed
|
||||
}
|
||||
|
||||
// Reconcile each group
|
||||
|
||||
@@ -19,69 +19,6 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
/*
|
||||
Basic Tests:
|
||||
√ Place when there is nothing in the cluster
|
||||
√ Place remainder when there is some in the cluster
|
||||
√ Scale down from n to n-m where n != m
|
||||
√ Scale down from n to zero
|
||||
√ Inplace upgrade test
|
||||
√ Inplace upgrade and scale up test
|
||||
√ Inplace upgrade and scale down test
|
||||
√ Destructive upgrade
|
||||
√ Destructive upgrade and scale up test
|
||||
√ Destructive upgrade and scale down test
|
||||
√ Handle lost nodes
|
||||
√ Handle lost nodes and scale up
|
||||
√ Handle lost nodes and scale down
|
||||
√ Handle draining nodes
|
||||
√ Handle draining nodes and scale up
|
||||
√ Handle draining nodes and scale down
|
||||
√ Handle task group being removed
|
||||
√ Handle job being stopped both as .Stopped and nil
|
||||
√ Place more that one group
|
||||
√ Handle delayed rescheduling failed allocs for batch jobs
|
||||
√ Handle delayed rescheduling failed allocs for service jobs
|
||||
√ Handle eligible now rescheduling failed allocs for batch jobs
|
||||
√ Handle eligible now rescheduling failed allocs for service jobs
|
||||
√ Previously rescheduled allocs should not be rescheduled again
|
||||
√ Aggregated evaluations for allocations that fail close together
|
||||
|
||||
Update stanza Tests:
|
||||
√ Stopped job cancels any active deployment
|
||||
√ Stopped job doesn't cancel terminal deployment
|
||||
√ JobIndex change cancels any active deployment
|
||||
√ JobIndex change doesn't cancels any terminal deployment
|
||||
√ Destructive changes create deployment and get rolled out via max_parallelism
|
||||
√ Don't create a deployment if there are no changes
|
||||
√ Deployment created by all inplace updates
|
||||
√ Paused or failed deployment doesn't create any more canaries
|
||||
√ Paused or failed deployment doesn't do any placements unless replacing lost allocs
|
||||
√ Paused or failed deployment doesn't do destructive updates
|
||||
√ Paused does do migrations
|
||||
√ Failed deployment doesn't do migrations
|
||||
√ Canary that is on a draining node
|
||||
√ Canary that is on a lost node
|
||||
√ Stop old canaries
|
||||
√ Create new canaries on job change
|
||||
√ Create new canaries on job change while scaling up
|
||||
√ Create new canaries on job change while scaling down
|
||||
√ Fill canaries if partial placement
|
||||
√ Promote canaries unblocks max_parallel
|
||||
√ Promote canaries when canaries == count
|
||||
√ Only place as many as are healthy in deployment
|
||||
√ Limit calculation accounts for healthy allocs on migrating/lost nodes
|
||||
√ Failed deployment should not place anything
|
||||
√ Run after canaries have been promoted, new allocs have been rolled out and there is no deployment
|
||||
√ Failed deployment cancels non-promoted task groups
|
||||
√ Failed deployment and updated job works
|
||||
√ Finished deployment gets marked as complete
|
||||
√ Change job change while scaling up
|
||||
√ Update the job when all allocations from the previous job haven't been placed yet.
|
||||
√ Paused or failed deployment doesn't do any rescheduling of failed allocs
|
||||
√ Running deployment with failed allocs doesn't do any rescheduling of failed allocs
|
||||
*/
|
||||
|
||||
var (
|
||||
canaryUpdate = &structs.UpdateStrategy{
|
||||
Canary: 2,
|
||||
@@ -3117,6 +3054,7 @@ func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) {
|
||||
DesiredTotal: 2,
|
||||
DesiredCanaries: 2,
|
||||
PlacedAllocs: 2,
|
||||
HealthyAllocs: 2,
|
||||
}
|
||||
d.TaskGroups[job.TaskGroups[0].Name] = s
|
||||
|
||||
@@ -3913,7 +3851,8 @@ func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
// Test that a running deployment with failed allocs will not result in rescheduling failed allocations
|
||||
// Test that a running deployment with failed allocs will not result in
|
||||
// rescheduling failed allocations unless they are marked as reschedulable.
|
||||
func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) {
|
||||
job := mock.Job()
|
||||
job.TaskGroups[0].Update = noCanaryUpdate
|
||||
@@ -3925,13 +3864,13 @@ func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) {
|
||||
d.Status = structs.DeploymentStatusRunning
|
||||
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
|
||||
Promoted: false,
|
||||
DesiredTotal: 5,
|
||||
PlacedAllocs: 4,
|
||||
DesiredTotal: 10,
|
||||
PlacedAllocs: 10,
|
||||
}
|
||||
|
||||
// Create 4 allocations and mark two as failed
|
||||
// Create 10 allocations
|
||||
var allocs []*structs.Allocation
|
||||
for i := 0; i < 4; i++ {
|
||||
for i := 0; i < 10; i++ {
|
||||
alloc := mock.Alloc()
|
||||
alloc.Job = job
|
||||
alloc.JobID = job.ID
|
||||
@@ -3939,31 +3878,30 @@ func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) {
|
||||
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
|
||||
alloc.TaskGroup = job.TaskGroups[0].Name
|
||||
alloc.DeploymentID = d.ID
|
||||
alloc.ClientStatus = structs.AllocClientStatusFailed
|
||||
alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
|
||||
StartedAt: now.Add(-1 * time.Hour),
|
||||
FinishedAt: now.Add(-10 * time.Second)}}
|
||||
allocs = append(allocs, alloc)
|
||||
}
|
||||
|
||||
// Create allocs that are reschedulable now
|
||||
allocs[2].ClientStatus = structs.AllocClientStatusFailed
|
||||
allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
|
||||
StartedAt: now.Add(-1 * time.Hour),
|
||||
FinishedAt: now.Add(-10 * time.Second)}}
|
||||
|
||||
allocs[3].ClientStatus = structs.AllocClientStatusFailed
|
||||
allocs[3].TaskStates = map[string]*structs.TaskState{tgName: {State: "start",
|
||||
StartedAt: now.Add(-1 * time.Hour),
|
||||
FinishedAt: now.Add(-10 * time.Second)}}
|
||||
// Mark half of them as reschedulable
|
||||
for i := 0; i < 5; i++ {
|
||||
allocs[i].DesiredTransition.Reschedule = helper.BoolToPtr(true)
|
||||
}
|
||||
|
||||
reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "")
|
||||
r := reconciler.Compute()
|
||||
|
||||
// Assert that no rescheduled placements were created
|
||||
assertResults(t, r, &resultExpectation{
|
||||
place: 0,
|
||||
place: 5,
|
||||
createDeployment: nil,
|
||||
deploymentUpdates: nil,
|
||||
desiredTGUpdates: map[string]*structs.DesiredUpdates{
|
||||
job.TaskGroups[0].Name: {
|
||||
Ignore: 2,
|
||||
Place: 5,
|
||||
Ignore: 5,
|
||||
},
|
||||
},
|
||||
})
|
||||
@@ -3993,12 +3931,12 @@ func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) {
|
||||
jobv2.Version = 2
|
||||
jobv2.TaskGroups[0].Meta = map[string]string{"version": "2"}
|
||||
|
||||
// Create an existing failed deployment that has promoted one task group
|
||||
d := structs.NewDeployment(jobv2)
|
||||
state := &structs.DeploymentState{
|
||||
Promoted: false,
|
||||
DesiredTotal: 3,
|
||||
PlacedAllocs: 3,
|
||||
Promoted: true,
|
||||
DesiredTotal: 3,
|
||||
PlacedAllocs: 3,
|
||||
HealthyAllocs: 3,
|
||||
}
|
||||
d.TaskGroups[job.TaskGroups[0].Name] = state
|
||||
|
||||
|
||||
Reference in New Issue
Block a user