mirror of
https://github.com/kemko/nomad.git
synced 2026-01-10 04:15:41 +03:00
Merge pull request #2961 from hashicorp/b-deployment-ttl
Lost allocs replaced even if deployment failed
This commit is contained in:
@@ -365,13 +365,29 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool {
|
||||
dstate.DesiredTotal += len(place)
|
||||
}
|
||||
|
||||
if !a.deploymentPaused && !a.deploymentFailed && !canaryState {
|
||||
// Place all new allocations
|
||||
// deploymentPlaceReady tracks whether the deployment is in a state where
|
||||
// placements can be made without any other consideration.
|
||||
deploymentPlaceReady := !a.deploymentPaused && !a.deploymentFailed && !canaryState
|
||||
|
||||
if deploymentPlaceReady {
|
||||
desiredChanges.Place += uint64(len(place))
|
||||
for _, p := range place {
|
||||
a.result.place = append(a.result.place, p)
|
||||
}
|
||||
} else if !deploymentPlaceReady && len(lost) != 0 {
|
||||
// We are in a situation where we shouldn't be placing more than we need
|
||||
// to but we have lost allocations. It is a very weird user experience
|
||||
// if you have a node go down and Nomad doesn't replace the allocations
|
||||
// because the deployment is paused/failed so we only place to recover
|
||||
// the lost allocations.
|
||||
allowed := helper.IntMin(len(lost), len(place))
|
||||
desiredChanges.Place += uint64(allowed)
|
||||
for _, p := range place[:allowed] {
|
||||
a.result.place = append(a.result.place, p)
|
||||
}
|
||||
}
|
||||
|
||||
if deploymentPlaceReady {
|
||||
// Do all destructive updates
|
||||
min := helper.IntMin(len(destructive), limit)
|
||||
limit -= min
|
||||
|
||||
@@ -47,7 +47,7 @@ Update stanza Tests:
|
||||
√ Don't create a deployment if there are no changes
|
||||
√ Deployment created by all inplace updates
|
||||
√ Paused or failed deployment doesn't create any more canaries
|
||||
√ Paused or failed deployment doesn't do any placements
|
||||
√ Paused or failed deployment doesn't do any placements unless replacing lost allocs
|
||||
√ Paused or failed deployment doesn't do destructive updates
|
||||
√ Paused does do migrations
|
||||
√ Failed deployment doesn't do migrations
|
||||
@@ -2538,8 +2538,9 @@ func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) {
|
||||
assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
|
||||
}
|
||||
|
||||
// Tests the reconciler handles a failed deployment and does no placements
|
||||
func TestReconciler_FailedDeployment_NoPlacements(t *testing.T) {
|
||||
// Tests the reconciler handles a failed deployment and only replaces lost
|
||||
// deployments
|
||||
func TestReconciler_FailedDeployment_PlacementLost(t *testing.T) {
|
||||
job := mock.Job()
|
||||
job.TaskGroups[0].Update = noCanaryUpdate
|
||||
|
||||
@@ -2602,18 +2603,20 @@ func TestReconciler_FailedDeployment_NoPlacements(t *testing.T) {
|
||||
assertResults(t, r, &resultExpectation{
|
||||
createDeployment: nil,
|
||||
deploymentUpdates: nil,
|
||||
place: 0,
|
||||
place: 1, // Only replace the lost node
|
||||
inplace: 0,
|
||||
stop: 2,
|
||||
followupEvalWait: 0, // Since the deployment is failed, there should be no followup
|
||||
desiredTGUpdates: map[string]*structs.DesiredUpdates{
|
||||
job.TaskGroups[0].Name: {
|
||||
Place: 1,
|
||||
Stop: 2,
|
||||
Ignore: 8,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
|
||||
assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user