eval delete: move batching of deletes into RPC handler and state (#15117)

During unusual outage recovery scenarios on large clusters, a backlog of millions of evaluations can appear. In these cases, the `eval delete` command can put excessive load on the cluster by listing large sets of evals to extract the IDs and then sending larges batches of IDs. Although the command's batch size was carefully tuned, we still need to be JSON deserialize, re-serialize to MessagePack, send the log entries through raft, and get the FSM applied. To improve performance of this recovery case, move the batching process into the RPC handler and the state store. The design here is a little weird, so let's look a the failed options first: * A naive solution here would be to just send the filter as the raft request and let the FSM apply delete the whole set in a single operation. Benchmarking with 1M evals on a 3 node cluster demonstrated this can block the FSM apply for several minutes, which puts the cluster at risk if there's a leadership failover (the barrier write can't be made while this apply is in-flight). * A less naive but still bad solution would be to have the RPC handler filter and paginate, and then hand a list of IDs to the existing raft log entry. Benchmarks showed this blocked the FSM apply for 20-30s at a time and took roughly an hour to complete. Instead, we're filtering and paginating in the RPC handler to find a page token, and then passing both the filter and page token in the raft log. The FSM apply recreates the paginator using the filter and page token to get roughly the same page of evaluations, which it then deletes. The pagination process is fairly cheap (only abut 5% of the total FSM apply time), so counter-intuitively this rework ends up being much faster. A benchmark of 1M evaluations showed this blocked the FSM apply for 20-30ms at a time (typical for normal operations) and completes in less than 4 minutes. Note that, as with the existing design, this delete is not consistent: a new evaluation inserted "behind" the cursor of the pagination will fail to be deleted.
2026-01-06 18:35:44 +03:00 · 2022-11-14 14:08:13 -05:00
parent 1217a96edf
commit 65b3d01aab
13 changed files with 434 additions and 136 deletions
--- a/command/agent/eval_endpoint.go
+++ b/command/agent/eval_endpoint.go
@@ -56,13 +56,19 @@ func (s *HTTPServer) evalsDeleteRequest(resp http.ResponseWriter, req *http.Requ

 	numIDs := len(args.EvalIDs)

-	// Ensure the number of evaluation IDs included in the request is within
-	// bounds.
-	if numIDs < 1 {
-		return nil, CodedError(http.StatusBadRequest, "request does not include any evaluation IDs")
-	} else if numIDs > structs.MaxUUIDsPerWriteRequest {
+	if args.Filter != "" && numIDs > 0 {
+		return nil, CodedError(http.StatusBadRequest,
+			"evals cannot be deleted by both ID and filter")
+	}
+	if args.Filter == "" && numIDs == 0 {
+		return nil, CodedError(http.StatusBadRequest,
+			"evals must be deleted by either ID or filter")
+	}
+
+	// If an explicit list of evaluation IDs is sent, ensure its within bounds
+	if numIDs > structs.MaxUUIDsPerWriteRequest {
 		return nil, CodedError(http.StatusBadRequest, fmt.Sprintf(
-			"request includes %v evaluations IDs, must be %v or fewer",
+			"request includes %v evaluation IDs, must be %v or fewer",
 			numIDs, structs.MaxUUIDsPerWriteRequest))
 	}

@@ -73,8 +79,9 @@ func (s *HTTPServer) evalsDeleteRequest(resp http.ResponseWriter, req *http.Requ
 	if err := s.agent.RPC(structs.EvalDeleteRPCMethod, &args, &reply); err != nil {
 		return nil, err
 	}
+
 	setIndex(resp, reply.Index)
-	return nil, nil
+	return reply, nil
 }

 func (s *HTTPServer) EvalSpecificRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
--- a/command/agent/eval_endpoint_test.go
+++ b/command/agent/eval_endpoint_test.go
@@ -138,7 +138,7 @@ func TestHTTP_EvalsDelete(t *testing.T) {
 					// Make the request and check the response.
 					obj, err := s.Server.EvalsRequest(respW, req)
 					require.Equal(t,
-						CodedError(http.StatusBadRequest, "request does not include any evaluation IDs"), err)
+						CodedError(http.StatusBadRequest, "evals must be deleted by either ID or filter"), err)
 					require.Nil(t, obj)
 				})
 			},
@@ -169,7 +169,7 @@ func TestHTTP_EvalsDelete(t *testing.T) {
 					obj, err := s.Server.EvalsRequest(respW, req)
 					require.Equal(t,
 						CodedError(http.StatusBadRequest,
-							"request includes 8000 evaluations IDs, must be 7281 or fewer"), err)
+							"request includes 8000 evaluation IDs, must be 7281 or fewer"), err)
 					require.Nil(t, obj)
 				})
 			},
@@ -223,8 +223,10 @@ func TestHTTP_EvalsDelete(t *testing.T) {

 					// Make the request and check the response.
 					obj, err := s.Server.EvalsRequest(respW, req)
-					require.Nil(t, err)
-					require.Nil(t, obj)
+					require.NoError(t, err)
+					require.NotNil(t, obj)
+					deleteResp := obj.(structs.EvalDeleteResponse)
+					require.Equal(t, deleteResp.Count, 1)

 					// Ensure the eval is not found.
 					readEval, err := s.Agent.server.State().EvalByID(nil, mockEval.ID)
--- a/command/eval_delete.go
+++ b/command/eval_delete.go
@@ -4,7 +4,6 @@ import (
 	"errors"
 	"fmt"
 	"strings"
-	"time"

 	"github.com/hashicorp/nomad/api"
 	"github.com/hashicorp/nomad/api/contexts"
@@ -58,7 +57,9 @@ Eval Delete Options:
  -filter
    Specifies an expression used to filter evaluations by for deletion. When
    using this flag, it is advisable to ensure the syntax is correct using the
-    eval list command first.
+    eval list command first. Note that deleting evals by filter is imprecise:
+    for sets of evals larger than a single raft log batch, evals can be inserted
+    behind the cursor and therefore be missed.

  -yes
    Bypass the confirmation prompt if an evaluation ID was not provided.
@@ -148,32 +149,7 @@ func (e *EvalDeleteCommand) Run(args []string) int {
 		e.deleteByArg = true
 		exitCode, err = e.handleEvalArgDelete(args[0])
 	default:
-
-		// Track the next token, so we can iterate all pages that match the
-		// passed filter.
-		var nextToken string
-
-		// It is possible the filter matches a large number of evaluations
-		// which means we need to run a number of batch deletes. Perform
-		// iteration here rather than recursion in later function, so we avoid
-		// any potential issues with stack size limits.
-		for {
-			exitCode, nextToken, err = e.handleFlagFilterDelete(nextToken)
-
-			// If there is another page of evaluations matching the filter,
-			// iterate the loop and delete the next batch of evals. We pause
-			// for a 500ms rather than just run as fast as the code and machine
-			// possibly can. This means deleting 13million evals will take
-			// roughly 13-15 mins, which seems reasonable. It is worth noting,
-			// we do not expect operators to delete this many evals in a single
-			// run and expect more careful filtering options to be used.
-			if nextToken != "" {
-				time.Sleep(500 * time.Millisecond)
-				continue
-			} else {
-				break
-			}
-		}
+		exitCode, err = e.handleDeleteByFilter(e.filter)
 	}

 	// Do not exit if we got an error as it's possible this was on the
@@ -228,93 +204,6 @@ func (e *EvalDeleteCommand) handleEvalArgDelete(evalID string) (int, error) {
 	return code, err
 }

-// handleFlagFilterDelete handles deletion of evaluations discovered using
-// the filter. It is unknown how many will match the operator criteria so
-// this function batches lookup and delete requests into sensible numbers.
-func (e *EvalDeleteCommand) handleFlagFilterDelete(nt string) (int, string, error) {
-
-	evalsToDelete, nextToken, err := e.batchLookupEvals(nt)
-	if err != nil {
-		return 1, "", err
-	}
-
-	numEvalsToDelete := len(evalsToDelete)
-
-	// The filter flags are operator controlled, therefore ensure we
-	// actually found some evals to delete. Otherwise, inform the operator
-	// their flags are potentially incorrect.
-	if numEvalsToDelete == 0 {
-		if e.numDeleted > 0 {
-			return 0, "", nil
-		} else {
-			return 1, "", errors.New("failed to find any evals that matched filter criteria")
-		}
-	}
-
-	if code, actioned, err := e.batchDelete(evalsToDelete); err != nil {
-		return code, "", err
-	} else if !actioned {
-		return code, "", nil
-	}
-
-	e.Ui.Info(fmt.Sprintf("Successfully deleted batch of %v %s",
-		numEvalsToDelete, correctGrammar("evaluation", numEvalsToDelete)))
-
-	return 0, nextToken, nil
-}
-
-// batchLookupEvals handles batched lookup of evaluations using the operator
-// provided filter. The lookup is performed a maximum number of 3 times to
-// ensure their size is limited and the number of evals to delete doesn't exceed
-// the total allowable in a single call.
-//
-// The JSON serialized evaluation API object is 350-380B in size.
-// 2426 * 380B (3.8e-4 MB) = 0.92MB. We may want to make this configurable
-// in the future, but this is counteracted by the CLI logic which will loop
-// until the user tells it to exit, or all evals matching the filter are
-// deleted. 2426 * 3 falls below the maximum limit for eval IDs in a single
-// delete request (set by MaxEvalIDsPerDeleteRequest).
-func (e *EvalDeleteCommand) batchLookupEvals(nextToken string) ([]*api.Evaluation, string, error) {
-
-	var evalsToDelete []*api.Evaluation
-	currentNextToken := nextToken
-
-	// Call List 3 times to accumulate the maximum number if eval IDs supported
-	// in a single Delete request. See math above.
-	for i := 0; i < 3; i++ {
-
-		// Generate the query options using the passed next token and filter. The
-		// per page value is less than the total number we can include in a single
-		// delete request. This keeps the maximum size of the return object at a
-		// reasonable size.
-		opts := api.QueryOptions{
-			Filter:    e.filter,
-			PerPage:   2426,
-			NextToken: currentNextToken,
-		}
-
-		evalList, meta, err := e.client.Evaluations().List(&opts)
-		if err != nil {
-			return nil, "", err
-		}
-
-		if len(evalList) > 0 {
-			evalsToDelete = append(evalsToDelete, evalList...)
-		}
-
-		// Store the next token no matter if it is empty or populated.
-		currentNextToken = meta.NextToken
-
-		// If there is no next token, ensure we exit and avoid any new loops
-		// which will result in duplicate IDs.
-		if currentNextToken == "" {
-			break
-		}
-	}
-
-	return evalsToDelete, currentNextToken, nil
-}
-
 // batchDelete is responsible for deleting the passed evaluations and asking
 // any confirmation questions along the way. It will ask whether the operator
 // want to list the evals before deletion, and optionally ask for confirmation
@@ -404,3 +293,38 @@ func correctGrammar(word string, num int) string {
 	}
 	return word
 }
+
+func (e *EvalDeleteCommand) handleDeleteByFilter(filterExpr string) (int, error) {
+
+	// If the user did not wish to bypass the confirmation step, ask this now
+	// and handle the response.
+	if !e.yes && !e.deleteByArg {
+
+		resp, _, err := e.client.Evaluations().Count(&api.QueryOptions{
+			Filter: filterExpr,
+		})
+		if err != nil {
+			return 1, err
+		}
+
+		code, deleteEvals := e.askQuestion(fmt.Sprintf(
+			"Are you sure you want to delete %d evals? [y/N]",
+			resp.Count), "Cancelling eval deletion")
+		e.Ui.Output("")
+
+		if !deleteEvals {
+			return code, nil
+		}
+	}
+
+	resp, _, err := e.client.Evaluations().DeleteOpts(&api.EvalDeleteRequest{
+		Filter: filterExpr,
+	}, nil)
+	if err != nil {
+		return 1, err
+	}
+	e.numDeleted = resp.Count
+
+	return 0, nil
+
+}