Mark canaries on creation, and unmark on promotion

This commit is contained in:
Alex Dadgar
2018-04-19 13:58:06 -07:00
committed by Preetha Appan
parent 334f5fb826
commit 8be599af94
7 changed files with 182 additions and 94 deletions

View File

@@ -143,6 +143,8 @@ type AllocationListStub struct {
// healthy.
type AllocDeploymentStatus struct {
Healthy *bool
Timestamp time.Time
Canary bool
ModifyIndex uint64
}

View File

@@ -246,34 +246,22 @@ func formatAllocBasicInfo(alloc *api.Allocation, client *api.Client, uuidLength
if alloc.DeploymentID != "" {
health := "unset"
if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Healthy != nil {
if *alloc.DeploymentStatus.Healthy {
health = "healthy"
} else {
health = "unhealthy"
canary := false
if alloc.DeploymentStatus != nil {
if alloc.DeploymentStatus.Healthy != nil {
if *alloc.DeploymentStatus.Healthy {
health = "healthy"
} else {
health = "unhealthy"
}
}
canary = alloc.DeploymentStatus.Canary
}
basic = append(basic,
fmt.Sprintf("Deployment ID|%s", limit(alloc.DeploymentID, uuidLength)),
fmt.Sprintf("Deployment Health|%s", health))
// Check if this allocation is a canary
deployment, _, err := client.Deployments().Info(alloc.DeploymentID, nil)
if err != nil {
return "", fmt.Errorf("Error querying deployment %q: %s", alloc.DeploymentID, err)
}
canary := false
if state, ok := deployment.TaskGroups[alloc.TaskGroup]; ok {
for _, id := range state.PlacedCanaries {
if id == alloc.ID {
canary = true
break
}
}
}
if canary {
basic = append(basic, fmt.Sprintf("Canary|%v", true))
}

View File

@@ -1890,7 +1890,18 @@ func (s *StateStore) nestedUpdateAllocFromClient(txn *memdb.Txn, index uint64, a
copyAlloc.ClientStatus = alloc.ClientStatus
copyAlloc.ClientDescription = alloc.ClientDescription
copyAlloc.TaskStates = alloc.TaskStates
// Merge the deployment status taking only what the client should set
oldDeploymentStatus := copyAlloc.DeploymentStatus
copyAlloc.DeploymentStatus = alloc.DeploymentStatus
if oldDeploymentStatus != nil {
if oldDeploymentStatus.Canary {
copyAlloc.DeploymentStatus.Canary = true
}
if oldDeploymentStatus.Timestamp.After(copyAlloc.DeploymentStatus.Timestamp) {
copyAlloc.DeploymentStatus.Timestamp = oldDeploymentStatus.Timestamp
}
}
// Update the modify index
copyAlloc.ModifyIndex = index
@@ -1961,6 +1972,9 @@ func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation
alloc.CreateIndex = index
alloc.ModifyIndex = index
alloc.AllocModifyIndex = index
if alloc.DeploymentStatus != nil {
alloc.DeploymentStatus.ModifyIndex = index
}
// Issue https://github.com/hashicorp/nomad/issues/2583 uncovered
// the a race between a forced garbage collection and the scheduler
@@ -2620,11 +2634,13 @@ func (s *StateStore) UpdateDeploymentPromotion(index uint64, req *structs.ApplyD
return err
}
// groupIndex is a map of groups being promoted
groupIndex := make(map[string]struct{}, len(req.Groups))
for _, g := range req.Groups {
groupIndex[g] = struct{}{}
}
// canaryIndex is the set of placed canaries in the deployment
canaryIndex := make(map[string]struct{}, len(deployment.TaskGroups))
for _, state := range deployment.TaskGroups {
for _, c := range state.PlacedCanaries {
@@ -2632,8 +2648,13 @@ func (s *StateStore) UpdateDeploymentPromotion(index uint64, req *structs.ApplyD
}
}
haveCanaries := false
var unhealthyErr multierror.Error
// healthyCounts is a mapping of group to the number of healthy canaries
healthyCounts := make(map[string]int, len(deployment.TaskGroups))
// promotable is the set of allocations that we can move from canary to
// non-canary
var promotable []*structs.Allocation
for {
raw := iter.Next()
if raw == nil {
@@ -2654,21 +2675,34 @@ func (s *StateStore) UpdateDeploymentPromotion(index uint64, req *structs.ApplyD
// Ensure the canaries are healthy
if !alloc.DeploymentStatus.IsHealthy() {
multierror.Append(&unhealthyErr, fmt.Errorf("Canary allocation %q for group %q is not healthy", alloc.ID, alloc.TaskGroup))
continue
}
haveCanaries = true
healthyCounts[alloc.TaskGroup]++
promotable = append(promotable, alloc)
}
// Determine if we have enough healthy allocations
var unhealthyErr multierror.Error
for tg, state := range deployment.TaskGroups {
if _, ok := groupIndex[tg]; !req.All && !ok {
continue
}
need := state.DesiredCanaries
if need == 0 {
continue
}
if have := healthyCounts[tg]; have < need {
multierror.Append(&unhealthyErr, fmt.Errorf("Task group %q has %d/%d healthy allocations", tg, have, need))
}
}
if err := unhealthyErr.ErrorOrNil(); err != nil {
return err
}
if !haveCanaries {
return fmt.Errorf("no canaries to promote")
}
// Update deployment
copy := deployment.Copy()
copy.ModifyIndex = index
@@ -2698,6 +2732,24 @@ func (s *StateStore) UpdateDeploymentPromotion(index uint64, req *structs.ApplyD
}
}
// For each promotable allocation remoce the canary field
for _, alloc := range promotable {
promoted := alloc.Copy()
promoted.DeploymentStatus.Canary = false
promoted.DeploymentStatus.ModifyIndex = index
promoted.ModifyIndex = index
promoted.AllocModifyIndex = index
if err := txn.Insert("allocs", promoted); err != nil {
return fmt.Errorf("alloc insert failed: %v", err)
}
}
// Update the alloc index
if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil {
return fmt.Errorf("index update failed: %v", err)
}
txn.Commit()
return nil
}

View File

@@ -3577,6 +3577,50 @@ func TestStateStore_UpdateAllocsFromClient_Deployment(t *testing.T) {
require.True(healthy.Add(pdeadline).Equal(dstate.RequireProgressBy))
}
// This tests that the deployment state is merged correctly
func TestStateStore_UpdateAllocsFromClient_DeploymentStateMerges(t *testing.T) {
require := require.New(t)
state := testStateStore(t)
ts1 := time.Now()
ts2 := ts1.Add(1 * time.Hour)
alloc := mock.Alloc()
alloc.CreateTime = ts1.UnixNano()
pdeadline := 5 * time.Minute
deployment := mock.Deployment()
deployment.TaskGroups[alloc.TaskGroup].ProgressDeadline = pdeadline
alloc.DeploymentID = deployment.ID
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
Canary: true,
Timestamp: ts2,
}
require.Nil(state.UpsertJob(999, alloc.Job))
require.Nil(state.UpsertDeployment(1000, deployment))
require.Nil(state.UpsertAllocs(1001, []*structs.Allocation{alloc}))
update := &structs.Allocation{
ID: alloc.ID,
ClientStatus: structs.AllocClientStatusRunning,
JobID: alloc.JobID,
TaskGroup: alloc.TaskGroup,
DeploymentStatus: &structs.AllocDeploymentStatus{
Healthy: helper.BoolToPtr(true),
Canary: false,
Timestamp: ts1,
},
}
require.Nil(state.UpdateAllocsFromClient(1001, []*structs.Allocation{update}))
// Check that the merging of the deployment status was correct
out, err := state.AllocByID(nil, alloc.ID)
require.Nil(err)
require.NotNil(out)
require.True(out.DeploymentStatus.Canary)
require.Equal(ts2, out.DeploymentStatus.Timestamp)
}
func TestStateStore_UpsertAlloc_Alloc(t *testing.T) {
state := testStateStore(t)
alloc := mock.Alloc()
@@ -5534,19 +5578,17 @@ func TestStateStore_UpsertDeploymentPromotion_Terminal(t *testing.T) {
// Test promoting unhealthy canaries in a deployment.
func TestStateStore_UpsertDeploymentPromotion_Unhealthy(t *testing.T) {
state := testStateStore(t)
require := require.New(t)
// Create a job
j := mock.Job()
if err := state.UpsertJob(1, j); err != nil {
t.Fatalf("bad: %v", err)
}
require.Nil(state.UpsertJob(1, j))
// Create a deployment
d := mock.Deployment()
d.JobID = j.ID
if err := state.UpsertDeployment(2, d); err != nil {
t.Fatalf("bad: %v", err)
}
d.TaskGroups["web"].DesiredCanaries = 2
require.Nil(state.UpsertDeployment(2, d))
// Create a set of allocations
c1 := mock.Alloc()
@@ -5558,9 +5600,7 @@ func TestStateStore_UpsertDeploymentPromotion_Unhealthy(t *testing.T) {
c2.DeploymentID = d.ID
d.TaskGroups[c2.TaskGroup].PlacedCanaries = append(d.TaskGroups[c2.TaskGroup].PlacedCanaries, c2.ID)
if err := state.UpsertAllocs(3, []*structs.Allocation{c1, c2}); err != nil {
t.Fatalf("err: %v", err)
}
require.Nil(state.UpsertAllocs(3, []*structs.Allocation{c1, c2}))
// Promote the canaries
req := &structs.ApplyDeploymentPromoteRequest{
@@ -5570,33 +5610,24 @@ func TestStateStore_UpsertDeploymentPromotion_Unhealthy(t *testing.T) {
},
}
err := state.UpdateDeploymentPromotion(4, req)
if err == nil {
t.Fatalf("bad: %v", err)
}
if !strings.Contains(err.Error(), c1.ID) {
t.Fatalf("expect canary %q to be listed as unhealth: %v", c1.ID, err)
}
if !strings.Contains(err.Error(), c2.ID) {
t.Fatalf("expect canary %q to be listed as unhealth: %v", c2.ID, err)
}
require.NotNil(err)
require.Contains(err.Error(), `Task group "web" has 0/2 healthy allocations`)
}
// Test promoting a deployment with no canaries
func TestStateStore_UpsertDeploymentPromotion_NoCanaries(t *testing.T) {
state := testStateStore(t)
require := require.New(t)
// Create a job
j := mock.Job()
if err := state.UpsertJob(1, j); err != nil {
t.Fatalf("bad: %v", err)
}
require.Nil(state.UpsertJob(1, j))
// Create a deployment
d := mock.Deployment()
d.TaskGroups["web"].DesiredCanaries = 2
d.JobID = j.ID
if err := state.UpsertDeployment(2, d); err != nil {
t.Fatalf("bad: %v", err)
}
require.Nil(state.UpsertDeployment(2, d))
// Promote the canaries
req := &structs.ApplyDeploymentPromoteRequest{
@@ -5606,12 +5637,8 @@ func TestStateStore_UpsertDeploymentPromotion_NoCanaries(t *testing.T) {
},
}
err := state.UpdateDeploymentPromotion(4, req)
if err == nil {
t.Fatalf("bad: %v", err)
}
if !strings.Contains(err.Error(), "no canaries to promote") {
t.Fatalf("expect error promoting nonexistent canaries: %v", err)
}
require.NotNil(err)
require.Contains(err.Error(), `Task group "web" has 0/2 healthy allocations`)
}
// Test promoting all canaries in a deployment.
@@ -5714,6 +5741,7 @@ func TestStateStore_UpsertDeploymentPromotion_All(t *testing.T) {
// Test promoting a subset of canaries in a deployment.
func TestStateStore_UpsertDeploymentPromotion_Subset(t *testing.T) {
state := testStateStore(t)
require := require.New(t)
// Create a job with two task groups
j := mock.Job()
@@ -5721,9 +5749,7 @@ func TestStateStore_UpsertDeploymentPromotion_Subset(t *testing.T) {
tg2 := tg1.Copy()
tg2.Name = "foo"
j.TaskGroups = append(j.TaskGroups, tg2)
if err := state.UpsertJob(1, j); err != nil {
t.Fatalf("bad: %v", err)
}
require.Nil(state.UpsertJob(1, j))
// Create a deployment
d := mock.Deployment()
@@ -5738,18 +5764,19 @@ func TestStateStore_UpsertDeploymentPromotion_Subset(t *testing.T) {
DesiredCanaries: 1,
},
}
if err := state.UpsertDeployment(2, d); err != nil {
t.Fatalf("bad: %v", err)
}
require.Nil(state.UpsertDeployment(2, d))
// Create a set of allocations
// Create a set of allocations for both groups, including an unhealthy one
c1 := mock.Alloc()
c1.JobID = j.ID
c1.DeploymentID = d.ID
d.TaskGroups[c1.TaskGroup].PlacedCanaries = append(d.TaskGroups[c1.TaskGroup].PlacedCanaries, c1.ID)
c1.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: helper.BoolToPtr(true),
Canary: true,
}
// Should still be a canary
c2 := mock.Alloc()
c2.JobID = j.ID
c2.DeploymentID = d.ID
@@ -5757,12 +5784,20 @@ func TestStateStore_UpsertDeploymentPromotion_Subset(t *testing.T) {
c2.TaskGroup = tg2.Name
c2.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: helper.BoolToPtr(true),
Canary: true,
}
if err := state.UpsertAllocs(3, []*structs.Allocation{c1, c2}); err != nil {
t.Fatalf("err: %v", err)
c3 := mock.Alloc()
c3.JobID = j.ID
c3.DeploymentID = d.ID
d.TaskGroups[c3.TaskGroup].PlacedCanaries = append(d.TaskGroups[c3.TaskGroup].PlacedCanaries, c3.ID)
c3.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: helper.BoolToPtr(false),
Canary: true,
}
require.Nil(state.UpsertAllocs(3, []*structs.Allocation{c1, c2, c3}))
// Create an eval
e := mock.Eval()
@@ -5774,36 +5809,34 @@ func TestStateStore_UpsertDeploymentPromotion_Subset(t *testing.T) {
},
Eval: e,
}
err := state.UpdateDeploymentPromotion(4, req)
if err != nil {
t.Fatalf("bad: %v", err)
}
require.Nil(state.UpdateDeploymentPromotion(4, req))
// Check that the status per task group was updated properly
ws := memdb.NewWatchSet()
dout, err := state.DeploymentByID(ws, d.ID)
if err != nil {
t.Fatalf("bad: %v", err)
}
if len(dout.TaskGroups) != 2 {
t.Fatalf("bad: %#v", dout.TaskGroups)
}
stateout, ok := dout.TaskGroups["web"]
if !ok {
t.Fatalf("bad: no state for task group web")
}
if !stateout.Promoted {
t.Fatalf("bad: task group web not promoted: %#v", stateout)
}
require.Nil(err)
require.Len(dout.TaskGroups, 2)
require.Contains(dout.TaskGroups, "web")
require.True(dout.TaskGroups["web"].Promoted)
// Check that the evaluation was created
eout, _ := state.EvalByID(ws, e.ID)
if err != nil {
t.Fatalf("bad: %v", err)
}
if eout == nil {
t.Fatalf("bad: %#v", eout)
}
eout, err := state.EvalByID(ws, e.ID)
require.Nil(err)
require.NotNil(eout)
// Check the canary field was set properly
aout1, err1 := state.AllocByID(ws, c1.ID)
aout2, err2 := state.AllocByID(ws, c2.ID)
aout3, err3 := state.AllocByID(ws, c3.ID)
require.Nil(err1)
require.Nil(err2)
require.Nil(err3)
require.NotNil(aout1)
require.NotNil(aout2)
require.NotNil(aout3)
require.False(aout1.DeploymentStatus.Canary)
require.True(aout2.DeploymentStatus.Canary)
require.True(aout3.DeploymentStatus.Canary)
}
// Test that allocation health can't be set against a nonexistent deployment

View File

@@ -6219,6 +6219,10 @@ type AllocDeploymentStatus struct {
// Timestamp is the time at which the health status was set.
Timestamp time.Time
// Canary marks whether the allocation is a canary or not. A canary that has
// been promoted will have this field set to false.
Canary bool
// ModifyIndex is the raft index in which the deployment status was last
// changed.
ModifyIndex uint64

View File

@@ -499,11 +499,15 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul
}
// If we are placing a canary and we found a match, add the canary
// to the deployment state object.
// to the deployment state object and mark it as a canary.
if missing.Canary() {
if state, ok := s.deployment.TaskGroups[tg.Name]; ok {
state.PlacedCanaries = append(state.PlacedCanaries, alloc.ID)
}
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
Canary: true,
}
}
// Track the placement

View File

@@ -1814,6 +1814,11 @@ func TestServiceSched_JobModify_Canaries(t *testing.T) {
if len(planned) != desiredUpdates {
t.Fatalf("bad: %#v", plan)
}
for _, canary := range planned {
if canary.DeploymentStatus == nil || !canary.DeploymentStatus.Canary {
t.Fatalf("expected canary field to be set on canary alloc %q", canary.ID)
}
}
h.AssertEvalStatus(t, structs.EvalStatusComplete)