diff --git a/scheduler/reconciler/reconcile_node.go b/scheduler/reconciler/reconcile_node.go index 2a21f7096..71dd7965c 100644 --- a/scheduler/reconciler/reconcile_node.go +++ b/scheduler/reconciler/reconcile_node.go @@ -59,7 +59,7 @@ func (nr *NodeReconciler) Compute( // Canary deployments deploy to the TaskGroup.UpdateStrategy.Canary // percentage of eligible nodes, so we create a mapping of task group name // to a list of nodes that canaries should be placed on. - canaryNodes, canariesPerTG := computeCanaryNodes(required, eligibleNodes) + canaryNodes, canariesPerTG := nr.computeCanaryNodes(required, nodeAllocs, terminal, eligibleNodes) result := new(NodeReconcileResult) deploymentComplete := true @@ -76,11 +76,13 @@ func (nr *NodeReconciler) Compute( return result } -// computeCanaryNodes is a helper function that, given required task groups and +// computeCanaryNodes is a helper function that, given required task groups, +// mappings of nodes to their live allocs and terminal allocs, and a map of // eligible nodes, outputs a map[nodeID] -> map[TG] -> bool which indicates // which TGs this node is a canary for, and a map[TG] -> int to indicate how // many total canaries are to be placed for a TG. -func computeCanaryNodes(required map[string]*structs.TaskGroup, +func (nr *NodeReconciler) computeCanaryNodes(required map[string]*structs.TaskGroup, + liveAllocs map[string][]*structs.Allocation, terminalAllocs structs.TerminalByNodeByName, eligibleNodes map[string]*structs.Node) (map[string]map[string]bool, map[string]int) { canaryNodes := map[string]map[string]bool{} @@ -96,12 +98,32 @@ func computeCanaryNodes(required map[string]*structs.TaskGroup, numberOfCanaryNodes := int(math.Ceil(float64(tg.Update.Canary) * float64(len(eligibleNodes)) / 100)) canariesPerTG[tg.Name] = numberOfCanaryNodes + // check if there are any live allocations on any nodes that are/were + // canaries. + for nodeID, allocs := range liveAllocs { + for _, a := range allocs { + eligibleNodesList, numberOfCanaryNodes = nr.findOldCanaryNodes( + eligibleNodesList, numberOfCanaryNodes, a, tg, canaryNodes, nodeID) + } + } + + // check if there are any terminal allocations that were canaries + for nodeID, terminalAlloc := range terminalAllocs { + for _, a := range terminalAlloc { + eligibleNodesList, numberOfCanaryNodes = nr.findOldCanaryNodes( + eligibleNodesList, numberOfCanaryNodes, a, tg, canaryNodes, nodeID) + } + } + for i, n := range eligibleNodesList { - canaryNodes[n.ID] = map[string]bool{} if i > numberOfCanaryNodes-1 { break } + if _, ok := canaryNodes[n.ID]; !ok { + canaryNodes[n.ID] = map[string]bool{} + } + canaryNodes[n.ID][tg.Name] = true } } @@ -109,6 +131,33 @@ func computeCanaryNodes(required map[string]*structs.TaskGroup, return canaryNodes, canariesPerTG } +func (nr *NodeReconciler) findOldCanaryNodes(nodesList []*structs.Node, numberOfCanaryNodes int, + a *structs.Allocation, tg *structs.TaskGroup, canaryNodes map[string]map[string]bool, nodeID string) ([]*structs.Node, int) { + + if a.DeploymentStatus == nil || a.DeploymentStatus.Canary == false || + nr.DeploymentCurrent == nil { // TODO: should we add this? || nr.DeploymentCurrent.ID != a.DeploymentID { + return nodesList, numberOfCanaryNodes + } + + nodes := nodesList + numberOfCanaries := numberOfCanaryNodes + if a.TaskGroup == tg.Name { + if _, ok := canaryNodes[nodeID]; !ok { + canaryNodes[nodeID] = map[string]bool{} + } + canaryNodes[nodeID][tg.Name] = true + + // this node should no longer be considered when searching + // for canary nodes + numberOfCanaries -= 1 + nodes = slices.DeleteFunc( + nodes, + func(n *structs.Node) bool { return n.ID == nodeID }, + ) + } + return nodes, numberOfCanaries +} + // computeForNode is used to do a set difference between the target // allocations and the existing allocations for a particular node. This returns // 8 sets of results: diff --git a/scheduler/reconciler/reconcile_node_test.go b/scheduler/reconciler/reconcile_node_test.go index 286f2a546..c9906e76a 100644 --- a/scheduler/reconciler/reconcile_node_test.go +++ b/scheduler/reconciler/reconcile_node_test.go @@ -764,11 +764,13 @@ func Test_computeCanaryNodes(t *testing.T) { // generate an odd number of nodes fiveEligibleNodes := map[string]*structs.Node{} - for range 5 { - nodeID := uuid.Generate() + // name them so we can refer to their names while testing pre-existing + // canary allocs + fiveEligibleNodeNames := []string{"node1", "node2", "node3", "node4", "node5"} + for _, name := range fiveEligibleNodeNames { node := mock.Node() - node.ID = nodeID - fiveEligibleNodes[nodeID] = node + node.ID = name + fiveEligibleNodes[name] = node } // generate an even number of nodes @@ -781,29 +783,43 @@ func Test_computeCanaryNodes(t *testing.T) { } testCases := []struct { - name string - nodes map[string]*structs.Node - required map[string]*structs.TaskGroup - expectedCanaryNodes map[string]int // number of nodes per tg + name string + nodes map[string]*structs.Node + liveAllocs map[string][]*structs.Allocation + terminalAllocs structs.TerminalByNodeByName + required map[string]*structs.TaskGroup + existingDeployment *structs.Deployment + expectedCanaryNodes map[string]int // number of nodes per tg + expectedCanaryNodeID map[string]string // sometimes we want to make sure a particular node ID is a canary }{ { - name: "no required task groups", - nodes: fourEligibleNodes, - required: nil, - expectedCanaryNodes: map[string]int{}, + name: "no required task groups", + nodes: fourEligibleNodes, + liveAllocs: nil, + terminalAllocs: nil, + required: nil, + existingDeployment: nil, + expectedCanaryNodes: map[string]int{}, + expectedCanaryNodeID: nil, }, { - name: "one task group with no update strategy", - nodes: fourEligibleNodes, + name: "one task group with no update strategy", + nodes: fourEligibleNodes, + liveAllocs: nil, + terminalAllocs: nil, required: map[string]*structs.TaskGroup{ "foo": { Name: "foo", }}, - expectedCanaryNodes: map[string]int{}, + existingDeployment: nil, + expectedCanaryNodes: map[string]int{}, + expectedCanaryNodeID: nil, }, { - name: "one task group with 33% canary deployment", - nodes: fourEligibleNodes, + name: "one task group with 33% canary deployment", + nodes: fourEligibleNodes, + liveAllocs: nil, + terminalAllocs: nil, required: map[string]*structs.TaskGroup{ "foo": { Name: "foo", @@ -813,13 +829,17 @@ func Test_computeCanaryNodes(t *testing.T) { }, }, }, + existingDeployment: nil, expectedCanaryNodes: map[string]int{ "foo": 2, // we always round up }, + expectedCanaryNodeID: nil, }, { - name: "one task group with 100% canary deployment, four nodes", - nodes: fourEligibleNodes, + name: "one task group with 100% canary deployment, four nodes", + nodes: fourEligibleNodes, + liveAllocs: nil, + terminalAllocs: nil, required: map[string]*structs.TaskGroup{ "foo": { Name: "foo", @@ -829,13 +849,17 @@ func Test_computeCanaryNodes(t *testing.T) { }, }, }, + existingDeployment: nil, expectedCanaryNodes: map[string]int{ "foo": 4, }, + expectedCanaryNodeID: nil, }, { - name: "one task group with 50% canary deployment, even nodes", - nodes: fourEligibleNodes, + name: "one task group with 50% canary deployment, even nodes", + nodes: fourEligibleNodes, + liveAllocs: nil, + terminalAllocs: nil, required: map[string]*structs.TaskGroup{ "foo": { Name: "foo", @@ -845,13 +869,35 @@ func Test_computeCanaryNodes(t *testing.T) { }, }, }, + existingDeployment: nil, expectedCanaryNodes: map[string]int{ "foo": 2, }, + expectedCanaryNodeID: nil, }, { - name: "two task groups: one with 50% canary deploy, second one with 2% canary deploy", + name: "two task groups: one with 50% canary deploy, second one with 2% canary deploy, pre-existing canary alloc", nodes: fiveEligibleNodes, + liveAllocs: map[string][]*structs.Allocation{ + "foo": {mock.Alloc()}, // should be disregarded since it's not one of our nodes + fiveEligibleNodeNames[0]: { + {DeploymentStatus: nil}, + {DeploymentStatus: &structs.AllocDeploymentStatus{Canary: false}}, + {DeploymentStatus: &structs.AllocDeploymentStatus{Canary: true}, TaskGroup: "foo"}, + }, + fiveEligibleNodeNames[1]: { + {DeploymentStatus: &structs.AllocDeploymentStatus{Canary: true}, TaskGroup: "bar"}, + }, + }, + terminalAllocs: structs.TerminalByNodeByName{ + fiveEligibleNodeNames[2]: map[string]*structs.Allocation{ + "foo": { + DeploymentStatus: &structs.AllocDeploymentStatus{ + Canary: true, + }, + }, + }, + }, required: map[string]*structs.TaskGroup{ "foo": { Name: "foo", @@ -868,17 +914,29 @@ func Test_computeCanaryNodes(t *testing.T) { }, }, }, + existingDeployment: structs.NewDeployment(mock.SystemJob(), 100, time.Now().Unix()), expectedCanaryNodes: map[string]int{ "foo": 3, // we always round up "bar": 1, // we always round up }, + expectedCanaryNodeID: map[string]string{ + fiveEligibleNodeNames[0]: "foo", + fiveEligibleNodeNames[1]: "bar", + fiveEligibleNodeNames[2]: "foo", + }, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - _, canariesPerTG := computeCanaryNodes(tc.required, tc.nodes) + nr := NewNodeReconciler(tc.existingDeployment) + canaryNodes, canariesPerTG := nr.computeCanaryNodes(tc.required, tc.liveAllocs, tc.terminalAllocs, tc.nodes) must.Eq(t, tc.expectedCanaryNodes, canariesPerTG) + if tc.liveAllocs != nil { + for nodeID, tgName := range tc.expectedCanaryNodeID { + must.True(t, canaryNodes[nodeID][tgName]) + } + } }) } }