From 9cdbbbb583f77c6c0bd8e799a588173babc80ba9 Mon Sep 17 00:00:00 2001
From: Michael Schurter <mschurter@hashicorp.com>
Date: Fri, 14 Oct 2022 14:15:07 -0700
Subject: [PATCH] test: extend timing and output of overlap e2e test (#14894)

Keeps failing in the nightly e2e test with unhelpful output like:
```
Failed
=== RUN   TestOverlap
    overlap_test.go:92: Followup job overlap93ee1d2b blocked. Sleeping for the rest of overlap48c26c39's shutdown_delay (9.2/10s)
    overlap_test.go:105: 1500/2000 retries reached for github.com/hashicorp/nomad/e2e/overlap.TestOverlap (err=timed out before an allocation was found for overlap93ee1d2b)
    overlap_test.go:105: timeout: timed out before an allocation was found for overlap93ee1d2b
--- FAIL: TestOverlap (38.96s)
```

I have not been able to replicate it in my own e2e cluster, so I added
the EvalDump helper to add detailed eval information like:

```
=== RUN   TestOverlap
1/1 Job overlap7b0e90ec Eval c38c9919-a4f0-5baf-45f7-0702383c682a
  Type:         service
  TriggeredBy:  job-register
  Deployment:
  Status:       pending ()
  NextEval:
  PrevEval:
  BlockedEval:
   -- No placement failures --
  QueuedAllocs:
  SnapshotIdx:  0
  CreateIndex:  96
  ModifyIndex:  96

...
```

Hopefully helpful when debugging other tests as well!
---
 e2e/e2eutil/utils.go        | 58 +++++++++++++++++++++++++++++++++++++
 e2e/overlap/overlap_test.go |  5 +++-
 2 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/e2e/e2eutil/utils.go b/e2e/e2eutil/utils.go
index b98beb9eb..3efc0e433 100644
--- a/e2e/e2eutil/utils.go
+++ b/e2e/e2eutil/utils.go
@@ -1,9 +1,11 @@
 package e2eutil
 
 import (
+	"bytes"
 	"fmt"
 	"os"
 	"testing"
+	"text/template"
 	"time"
 
 	api "github.com/hashicorp/nomad/api"
@@ -304,3 +306,59 @@ func WaitForDeployment(t *testing.T, nomadClient *api.Client, deployID string, s
 		require.NoError(t, err, "failed to wait on deployment")
 	})
 }
+
+// DumpEvals for a job. This is intended to be used during test development or
+// prior to exiting a test after an assertion failed.
+func DumpEvals(c *api.Client, jobID string) string {
+	evals, _, err := c.Jobs().Evaluations(jobID, nil)
+	if err != nil {
+		return fmt.Sprintf("error retrieving evals for job %q: %s", jobID, err)
+	}
+	if len(evals) == 0 {
+		return fmt.Sprintf("no evals found for job %q", jobID)
+	}
+	buf := bytes.NewBuffer(nil)
+	for i, e := range evals {
+		err := EvalTemplate.Execute(buf, map[string]interface{}{
+			"Index": i + 1,
+			"Total": len(evals),
+			"Eval":  e,
+		})
+		if err != nil {
+			fmt.Fprintf(buf, "error rendering eval: %s\n", err)
+		}
+	}
+	return buf.String()
+}
+
+var EvalTemplate = template.Must(template.New("dump_eval").Parse(
+	`{{.Index}}/{{.Total}} Job {{.Eval.JobID}} Eval {{.Eval.ID}}
+  Type:         {{.Eval.Type}}
+  TriggeredBy:  {{.Eval.TriggeredBy}}
+  Deployment:   {{.Eval.DeploymentID}}
+  Status:       {{.Eval.Status}} ({{.Eval.StatusDescription}})
+  NextEval:     {{.Eval.NextEval}}
+  PrevEval:     {{.Eval.PreviousEval}}
+  BlockedEval:  {{.Eval.BlockedEval}}
+  {{- range $k, $v := .Eval.FailedTGAllocs}}
+    Failed Group: {{$k}}
+      NodesEvaluated: {{$v.NodesEvaluated}}
+  		NodesFiltered:  {{$v.NodesFiltered}}
+  		NodesAvailable: {{range $dc, $n := $v.NodesAvailable}}{{$dc}}:{{$n}} {{end}}
+  		NodesExhausted: {{$v.NodesExhausted}}
+  		ClassFiltered:  {{len $v.ClassFiltered}}
+  		ConstraintFilt: {{len $v.ConstraintFiltered}}
+  		DimensionExhst: {{range $d, $n := $v.DimensionExhausted}}{{$d}}:{{$n}} {{end}}
+  		ResourcesExhst: {{range $r, $n := $v.ResourcesExhausted}}{{$r}}:{{$n}} {{end}}
+  		QuotaExhausted: {{range $i, $q := $v.QuotaExhausted}}{{$q}} {{end}}
+  		CoalescedFail:  {{$v.CoalescedFailures}}
+  		ScoreMetaDAta:  {{len $v.ScoreMetaData}}
+  		AllocationTime: {{$v.AllocationTime}}
+  {{- else}}
+   -- No placement failures --
+  {{- end}}
+  QueuedAllocs: {{range $k, $n := .Eval.QueuedAllocations}}{{$k}}:{{$n}} {{end}}
+  SnapshotIdx:  {{.Eval.SnapshotIndex}}
+  CreateIndex:  {{.Eval.CreateIndex}}
+  ModifyIndex:  {{.Eval.ModifyIndex}}
+`))
diff --git a/e2e/overlap/overlap_test.go b/e2e/overlap/overlap_test.go
index 78106312d..a9ba990fc 100644
--- a/e2e/overlap/overlap_test.go
+++ b/e2e/overlap/overlap_test.go
@@ -37,10 +37,13 @@ func TestOverlap(t *testing.T) {
 
 	var origAlloc *api.AllocationListStub
 	testutil.Wait(t, func() (bool, error) {
+		time.Sleep(time.Second)
+
 		a, _, err := nomadClient.Jobs().Allocations(jobID1, false, nil)
 		must.NoError(t, err)
 		if n := len(a); n == 0 {
-			return false, fmt.Errorf("timed out before an allocation was found for %s", jobID1)
+			evalOut := e2eutil.DumpEvals(nomadClient, jobID1)
+			return false, fmt.Errorf("timed out before an allocation was found for %s. Evals:\n%s", jobID1, evalOut)
 		}
 		must.Len(t, 1, a)