mirror of
https://github.com/kemko/nomad.git
synced 2026-01-06 02:15:43 +03:00
Fix handling of failed job
This commit is contained in:
@@ -113,7 +113,7 @@ func NewAllocReconciler(logger *log.Logger, allocUpdateFn allocUpdateType, batch
|
||||
jobID string, job *structs.Job, deployment *structs.Deployment,
|
||||
existingAllocs []*structs.Allocation, taintedNodes map[string]*structs.Node) *allocReconciler {
|
||||
|
||||
a := &allocReconciler{
|
||||
return &allocReconciler{
|
||||
logger: logger,
|
||||
allocUpdateFn: allocUpdateFn,
|
||||
batch: batch,
|
||||
@@ -126,14 +126,6 @@ func NewAllocReconciler(logger *log.Logger, allocUpdateFn allocUpdateType, batch
|
||||
desiredTGUpdates: make(map[string]*structs.DesiredUpdates),
|
||||
},
|
||||
}
|
||||
|
||||
// Detect if the deployment is paused
|
||||
if deployment != nil {
|
||||
a.deploymentPaused = deployment.Status == structs.DeploymentStatusPaused
|
||||
a.deploymentFailed = deployment.Status == structs.DeploymentStatusFailed
|
||||
}
|
||||
|
||||
return a
|
||||
}
|
||||
|
||||
// Compute reconciles the existing cluster state and returns the set of changes
|
||||
@@ -152,6 +144,12 @@ func (a *allocReconciler) Compute() *reconcileResults {
|
||||
return a.result
|
||||
}
|
||||
|
||||
// Detect if the deployment is paused
|
||||
if a.deployment != nil {
|
||||
a.deploymentPaused = a.deployment.Status == structs.DeploymentStatusPaused
|
||||
a.deploymentFailed = a.deployment.Status == structs.DeploymentStatusFailed
|
||||
}
|
||||
|
||||
// Reconcile each group
|
||||
complete := true
|
||||
for group, as := range m {
|
||||
@@ -184,8 +182,6 @@ func (a *allocReconciler) cancelDeployments() {
|
||||
return
|
||||
}
|
||||
|
||||
// TODO it doesn't like operating on a failed deployment
|
||||
// Write a test
|
||||
d := a.deployment
|
||||
if d == nil {
|
||||
return
|
||||
|
||||
@@ -2675,4 +2675,74 @@ func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) {
|
||||
assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
|
||||
}
|
||||
|
||||
// TODO Test that a failed deployment and updated job works
|
||||
// Test that a failed deployment and updated job works
|
||||
func TestReconciler_FailedDeployment_NewJob(t *testing.T) {
|
||||
job := mock.Job()
|
||||
job.TaskGroups[0].Update = noCanaryUpdate
|
||||
|
||||
// Create an existing failed deployment that has some placed allocs
|
||||
d := structs.NewDeployment(job)
|
||||
d.Status = structs.DeploymentStatusFailed
|
||||
d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
|
||||
Promoted: true,
|
||||
DesiredTotal: 10,
|
||||
PlacedAllocs: 4,
|
||||
}
|
||||
|
||||
// Create 6 allocations from the old job
|
||||
var allocs []*structs.Allocation
|
||||
for i := 4; i < 10; i++ {
|
||||
alloc := mock.Alloc()
|
||||
alloc.Job = job
|
||||
alloc.JobID = job.ID
|
||||
alloc.NodeID = structs.GenerateUUID()
|
||||
alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
|
||||
alloc.TaskGroup = job.TaskGroups[0].Name
|
||||
allocs = append(allocs, alloc)
|
||||
}
|
||||
|
||||
// Create the healthy replacements
|
||||
for i := 0; i < 4; i++ {
|
||||
new := mock.Alloc()
|
||||
new.Job = job
|
||||
new.JobID = job.ID
|
||||
new.NodeID = structs.GenerateUUID()
|
||||
new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i))
|
||||
new.TaskGroup = job.TaskGroups[0].Name
|
||||
new.DeploymentID = d.ID
|
||||
new.DeploymentStatus = &structs.AllocDeploymentStatus{
|
||||
Healthy: helper.BoolToPtr(true),
|
||||
}
|
||||
allocs = append(allocs, new)
|
||||
}
|
||||
|
||||
// Up the job version
|
||||
jobNew := job.Copy()
|
||||
jobNew.JobModifyIndex += 100
|
||||
|
||||
reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, jobNew, d, allocs, nil)
|
||||
r := reconciler.Compute()
|
||||
|
||||
dnew := structs.NewDeployment(job)
|
||||
dnew.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{
|
||||
DesiredTotal: 10,
|
||||
}
|
||||
|
||||
// Assert the correct results
|
||||
assertResults(t, r, &resultExpectation{
|
||||
createDeployment: dnew,
|
||||
deploymentUpdates: nil,
|
||||
place: 4,
|
||||
inplace: 0,
|
||||
stop: 4,
|
||||
desiredTGUpdates: map[string]*structs.DesiredUpdates{
|
||||
job.TaskGroups[0].Name: {
|
||||
DestructiveUpdate: 4,
|
||||
Ignore: 6,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
assertNamesHaveIndexes(t, intRange(0, 3), stopResultsToNames(r.stop))
|
||||
assertNamesHaveIndexes(t, intRange(0, 3), placeResultsToNames(r.place))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user