From 9cdbbbb583f77c6c0bd8e799a588173babc80ba9 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Fri, 14 Oct 2022 14:15:07 -0700 Subject: [PATCH] test: extend timing and output of overlap e2e test (#14894) Keeps failing in the nightly e2e test with unhelpful output like: ``` Failed === RUN TestOverlap overlap_test.go:92: Followup job overlap93ee1d2b blocked. Sleeping for the rest of overlap48c26c39's shutdown_delay (9.2/10s) overlap_test.go:105: 1500/2000 retries reached for github.com/hashicorp/nomad/e2e/overlap.TestOverlap (err=timed out before an allocation was found for overlap93ee1d2b) overlap_test.go:105: timeout: timed out before an allocation was found for overlap93ee1d2b --- FAIL: TestOverlap (38.96s) ``` I have not been able to replicate it in my own e2e cluster, so I added the EvalDump helper to add detailed eval information like: ``` === RUN TestOverlap 1/1 Job overlap7b0e90ec Eval c38c9919-a4f0-5baf-45f7-0702383c682a Type: service TriggeredBy: job-register Deployment: Status: pending () NextEval: PrevEval: BlockedEval: -- No placement failures -- QueuedAllocs: SnapshotIdx: 0 CreateIndex: 96 ModifyIndex: 96 ... ``` Hopefully helpful when debugging other tests as well! --- e2e/e2eutil/utils.go | 58 +++++++++++++++++++++++++++++++++++++ e2e/overlap/overlap_test.go | 5 +++- 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/e2e/e2eutil/utils.go b/e2e/e2eutil/utils.go index b98beb9eb..3efc0e433 100644 --- a/e2e/e2eutil/utils.go +++ b/e2e/e2eutil/utils.go @@ -1,9 +1,11 @@ package e2eutil import ( + "bytes" "fmt" "os" "testing" + "text/template" "time" api "github.com/hashicorp/nomad/api" @@ -304,3 +306,59 @@ func WaitForDeployment(t *testing.T, nomadClient *api.Client, deployID string, s require.NoError(t, err, "failed to wait on deployment") }) } + +// DumpEvals for a job. This is intended to be used during test development or +// prior to exiting a test after an assertion failed. +func DumpEvals(c *api.Client, jobID string) string { + evals, _, err := c.Jobs().Evaluations(jobID, nil) + if err != nil { + return fmt.Sprintf("error retrieving evals for job %q: %s", jobID, err) + } + if len(evals) == 0 { + return fmt.Sprintf("no evals found for job %q", jobID) + } + buf := bytes.NewBuffer(nil) + for i, e := range evals { + err := EvalTemplate.Execute(buf, map[string]interface{}{ + "Index": i + 1, + "Total": len(evals), + "Eval": e, + }) + if err != nil { + fmt.Fprintf(buf, "error rendering eval: %s\n", err) + } + } + return buf.String() +} + +var EvalTemplate = template.Must(template.New("dump_eval").Parse( + `{{.Index}}/{{.Total}} Job {{.Eval.JobID}} Eval {{.Eval.ID}} + Type: {{.Eval.Type}} + TriggeredBy: {{.Eval.TriggeredBy}} + Deployment: {{.Eval.DeploymentID}} + Status: {{.Eval.Status}} ({{.Eval.StatusDescription}}) + NextEval: {{.Eval.NextEval}} + PrevEval: {{.Eval.PreviousEval}} + BlockedEval: {{.Eval.BlockedEval}} + {{- range $k, $v := .Eval.FailedTGAllocs}} + Failed Group: {{$k}} + NodesEvaluated: {{$v.NodesEvaluated}} + NodesFiltered: {{$v.NodesFiltered}} + NodesAvailable: {{range $dc, $n := $v.NodesAvailable}}{{$dc}}:{{$n}} {{end}} + NodesExhausted: {{$v.NodesExhausted}} + ClassFiltered: {{len $v.ClassFiltered}} + ConstraintFilt: {{len $v.ConstraintFiltered}} + DimensionExhst: {{range $d, $n := $v.DimensionExhausted}}{{$d}}:{{$n}} {{end}} + ResourcesExhst: {{range $r, $n := $v.ResourcesExhausted}}{{$r}}:{{$n}} {{end}} + QuotaExhausted: {{range $i, $q := $v.QuotaExhausted}}{{$q}} {{end}} + CoalescedFail: {{$v.CoalescedFailures}} + ScoreMetaDAta: {{len $v.ScoreMetaData}} + AllocationTime: {{$v.AllocationTime}} + {{- else}} + -- No placement failures -- + {{- end}} + QueuedAllocs: {{range $k, $n := .Eval.QueuedAllocations}}{{$k}}:{{$n}} {{end}} + SnapshotIdx: {{.Eval.SnapshotIndex}} + CreateIndex: {{.Eval.CreateIndex}} + ModifyIndex: {{.Eval.ModifyIndex}} +`)) diff --git a/e2e/overlap/overlap_test.go b/e2e/overlap/overlap_test.go index 78106312d..a9ba990fc 100644 --- a/e2e/overlap/overlap_test.go +++ b/e2e/overlap/overlap_test.go @@ -37,10 +37,13 @@ func TestOverlap(t *testing.T) { var origAlloc *api.AllocationListStub testutil.Wait(t, func() (bool, error) { + time.Sleep(time.Second) + a, _, err := nomadClient.Jobs().Allocations(jobID1, false, nil) must.NoError(t, err) if n := len(a); n == 0 { - return false, fmt.Errorf("timed out before an allocation was found for %s", jobID1) + evalOut := e2eutil.DumpEvals(nomadClient, jobID1) + return false, fmt.Errorf("timed out before an allocation was found for %s. Evals:\n%s", jobID1, evalOut) } must.Len(t, 1, a)