From 4e7ad58d2df6ddef0243a2ec406f5cba39b050f9 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 29 Nov 2023 12:16:41 -0500 Subject: [PATCH] E2E: modernize `ConsulTemplate` test and fix some assertions (#19126) The `TestTemplateUpdateTriggers` is flaky because of what turned out to be incompatibility between the Consul agent on the E2E cluster and the HCP Consul server we were running but hadn't upgraded in a while. Upgrading the HCP Consul server seems to have fixed the tests, but while I'm in here I've updated this test suite: * Port all the consul template test suite off of the old framework, and upgrade to using e2e "v3" where feasible. * Clean up some of the assertions in the update triggers test to make the purpose of the test more clear. * Remove unnecessary default fields from the job specs. Closes: #19075 --- e2e/consultemplate/consultemplate.go | 551 ----------------- e2e/consultemplate/consultemplate_test.go | 553 ++++++++++++++++++ e2e/consultemplate/doc.go | 7 + .../input/bad_template_paths.nomad | 1 - .../input/nomad_provider_service.nomad | 5 +- .../input/nomad_provider_service_lookup.nomad | 2 - .../input/nomad_provider_service_ns.nomad | 5 +- e2e/consultemplate/input/template_paths.nomad | 1 - .../input/template_shared_alloc.nomad | 1 - ...lating.nomad => update_triggers.nomad.hcl} | 4 +- e2e/e2e_test.go | 2 +- e2e/v3/jobs3/jobs3.go | 2 + 12 files changed, 571 insertions(+), 563 deletions(-) delete mode 100644 e2e/consultemplate/consultemplate.go create mode 100644 e2e/consultemplate/consultemplate_test.go create mode 100644 e2e/consultemplate/doc.go rename e2e/consultemplate/input/{templating.nomad => update_triggers.nomad.hcl} (97%) diff --git a/e2e/consultemplate/consultemplate.go b/e2e/consultemplate/consultemplate.go deleted file mode 100644 index 7aab6e384..000000000 --- a/e2e/consultemplate/consultemplate.go +++ /dev/null @@ -1,551 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: BUSL-1.1 - -package consultemplate - -import ( - "fmt" - "os" - "strings" - "time" - - capi "github.com/hashicorp/consul/api" - api "github.com/hashicorp/nomad/api" - "github.com/hashicorp/nomad/e2e/e2eutil" - "github.com/hashicorp/nomad/e2e/framework" - "github.com/hashicorp/nomad/helper/uuid" - "github.com/hashicorp/nomad/jobspec" - "github.com/hashicorp/nomad/nomad/structs" - "github.com/hashicorp/nomad/testutil" -) - -const ns = "" - -type ConsulTemplateTest struct { - framework.TC - jobIDs []string - consulKeys []string - - // namespaceIDs tracks the created namespace for removal after test - // completion. - namespaceIDs []string - - // namespacedJobIDs tracks any non-default namespaced jobs for removal - // after test completion. - namespacedJobIDs map[string][]string -} - -func init() { - framework.AddSuites(&framework.TestSuite{ - Component: "ConsulTemplate", - CanRunLocal: true, - Consul: true, - Cases: []framework.TestCase{ - &ConsulTemplateTest{ - namespacedJobIDs: make(map[string][]string), - }, - }, - }) -} - -func (tc *ConsulTemplateTest) BeforeAll(f *framework.F) { - e2eutil.WaitForLeader(f.T(), tc.Nomad()) - e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1) -} - -func (tc *ConsulTemplateTest) AfterEach(f *framework.F) { - if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" { - return - } - - for _, id := range tc.jobIDs { - err := e2eutil.StopJob(id, "-purge") - f.Assert().NoError(err, "could not clean up job", id) - } - tc.jobIDs = []string{} - - for _, key := range tc.consulKeys { - _, err := tc.Consul().KV().Delete(key, nil) - f.Assert().NoError(err, "could not clean up consul key", key) - } - tc.consulKeys = []string{} - - for namespace, jobIDs := range tc.namespacedJobIDs { - for _, jobID := range jobIDs { - err := e2eutil.StopJob(jobID, "-purge", "-namespace", namespace) - f.Assert().NoError(err) - } - } - tc.namespacedJobIDs = make(map[string][]string) - - for _, ns := range tc.namespaceIDs { - _, err := e2eutil.Command("nomad", "namespace", "delete", ns) - f.Assert().NoError(err) - } - tc.namespaceIDs = []string{} - - _, err := e2eutil.Command("nomad", "system", "gc") - f.NoError(err) -} - -// TestTemplateUpdateTriggers exercises consul-template integration, verifying that: -// - missing keys block allocations from starting -// - key updates trigger re-render -// - service updates trigger re-render -// - 'noop' vs ”restart' configuration -func (tc *ConsulTemplateTest) TestTemplateUpdateTriggers(f *framework.F) { - - wc := &e2eutil.WaitConfig{} - interval, retries := wc.OrDefault() - - key := "consultemplate-" + uuid.Generate()[:8] - jobID := key - - replacement := fmt.Sprintf(`--- -key: {{ key "%s" }} -job: {{ env "NOMAD_JOB_NAME" }} -`, key) - - // Ensure consul key does not exist - _, err := tc.Consul().KV().Delete(key, nil) - f.NoError(err) - - // Parse job so we can replace the template block with isolated keys - job, err := jobspec.ParseFile("consultemplate/input/templating.nomad") - f.NoError(err) - job.ID = &jobID - - job.TaskGroups[0].Tasks[0].Templates[1].EmbeddedTmpl = &replacement - job.TaskGroups[1].Tasks[0].Templates[1].EmbeddedTmpl = &replacement - - tc.jobIDs = append(tc.jobIDs, jobID) - - _, _, err = tc.Nomad().Jobs().Register(job, nil) - f.NoError(err, "could not register job") - - expected := map[string]string{ - "upstream": "running", - "exec_downstream": "pending", - "docker_downstream": "pending"} - f.NoError(waitForAllocStatusByGroup(jobID, ns, expected, nil)) - - // We won't reschedule any of these allocs, so we can cache these IDs for later - downstreams := map[string]string{} // alloc ID -> group name - allocs, err := e2eutil.AllocsForJob(jobID, ns) - f.NoError(err) - for _, alloc := range allocs { - group := alloc["Task Group"] - if group == "docker_downstream" || group == "exec_downstream" { - downstreams[alloc["ID"]] = group - } - } - - // note: checking pending above doesn't tell us whether we've tried to render - // the template yet, so we still need to poll for the template event - for allocID, group := range downstreams { - var checkErr error - testutil.WaitForResultRetries(retries, func() (bool, error) { - time.Sleep(interval) - out, err := e2eutil.Command("nomad", "alloc", "status", allocID) - f.NoError(err, "could not get allocation status") - return strings.Contains(out, "Missing: kv.block"), - fmt.Errorf("expected %q to be blocked on Consul key", group) - }, func(e error) { - checkErr = e - }) - f.NoError(checkErr) - } - - // Write our key to Consul - _, err = tc.Consul().KV().Put(&capi.KVPair{Key: key, Value: []byte("foo")}, nil) - f.NoError(err) - tc.consulKeys = append(tc.consulKeys, key) - - // template will render, allowing downstream allocs to run - expected = map[string]string{ - "upstream": "running", - "exec_downstream": "running", - "docker_downstream": "running"} - f.NoError(waitForAllocStatusByGroup(jobID, ns, expected, &e2eutil.WaitConfig{ - Interval: time.Millisecond * 300, - Retries: 100, - })) - - // verify we've rendered the templates - for allocID := range downstreams { - f.NoError(waitForTemplateRender(allocID, "task/local/kv.yml", - func(out string) bool { - return strings.TrimSpace(out) == "---\nkey: foo\njob: templating" - }, nil), "expected consul key to be rendered") - - f.NoError(waitForTemplateRender(allocID, "task/local/services.conf", - func(out string) bool { - confLines := strings.Split(strings.TrimSpace(out), "\n") - servers := 0 - for _, line := range confLines { - if strings.HasPrefix(line, "server upstream-service ") { - servers++ - } - } - return servers == 2 - }, nil), "expected 2 upstream servers") - } - - // Update our key in Consul - _, err = tc.Consul().KV().Put(&capi.KVPair{Key: key, Value: []byte("bar")}, nil) - f.NoError(err) - - // Wait for restart - for allocID, group := range downstreams { - var checkErr error - testutil.WaitForResultRetries(retries, func() (bool, error) { - time.Sleep(interval) - out, err := e2eutil.Command("nomad", "alloc", "status", allocID) - f.NoError(err, "could not get allocation status") - - section, err := e2eutil.GetSection(out, "Task Events:") - f.NoError(err, out) - - restarts, err := e2eutil.GetField(section, "Total Restarts") - f.NoError(err) - return restarts == "1", - fmt.Errorf("expected 1 restart for %q but found %s", group, restarts) - }, func(e error) { - checkErr = e - }) - f.NoError(checkErr) - - // verify we've re-rendered the template - f.NoError(waitForTemplateRender(allocID, "task/local/kv.yml", - func(out string) bool { - return strings.TrimSpace(out) == "---\nkey: bar\njob: templating" - }, nil), "expected updated consul key") - } - - // increase the count for upstreams - count := 3 - job.TaskGroups[2].Count = &count - _, _, err = tc.Nomad().Jobs().Register(job, nil) - f.NoError(err, "could not register job") - - // wait for re-rendering - for allocID := range downstreams { - f.NoError(waitForTemplateRender(allocID, "task/local/services.conf", - func(out string) bool { - confLines := strings.Split(strings.TrimSpace(out), "\n") - servers := 0 - for _, line := range confLines { - if strings.HasPrefix(line, "server upstream-service ") { - servers++ - } - } - return servers == 3 - }, nil), "expected 3 upstream servers") - - // verify noop was honored: no additional restarts - out, err := e2eutil.Command("nomad", "alloc", "status", allocID) - f.NoError(err, "could not get allocation status") - - section, err := e2eutil.GetSection(out, "Task Events:") - f.NoError(err, out) - - restarts, err := e2eutil.GetField(section, "Total Restarts") - f.NoError(err) - f.Equal("1", restarts, "expected no new restarts for group") - } -} - -// TestTemplatePathInterpolation_Ok asserts that NOMAD_*_DIR variables are -// properly interpolated into template source and destination paths without -// being treated as escaping. -func (tc *ConsulTemplateTest) TestTemplatePathInterpolation_Ok(f *framework.F) { - jobID := "template-paths-" + uuid.Generate()[:8] - tc.jobIDs = append(tc.jobIDs, jobID) - - allocStubs := e2eutil.RegisterAndWaitForAllocs( - f.T(), tc.Nomad(), "consultemplate/input/template_paths.nomad", jobID, "") - f.Len(allocStubs, 1) - allocID := allocStubs[0].ID - - e2eutil.WaitForAllocRunning(f.T(), tc.Nomad(), allocID) - - f.NoError(waitForTemplateRender(allocID, "task/secrets/foo/dst", - func(out string) bool { - return len(out) > 0 - }, nil), "expected file to have contents") - - f.NoError(waitForTemplateRender(allocID, "alloc/shared.txt", - func(out string) bool { - return len(out) > 0 - }, nil), "expected shared-alloc-dir file to have contents") -} - -// TestTemplatePathInterpolation_Bad asserts that template.source paths are not -// allowed to escape the sandbox directory tree by default. -func (tc *ConsulTemplateTest) TestTemplatePathInterpolation_Bad(f *framework.F) { - wc := &e2eutil.WaitConfig{} - interval, retries := wc.OrDefault() - - jobID := "bad-template-paths-" + uuid.Generate()[:8] - tc.jobIDs = append(tc.jobIDs, jobID) - - allocStubs := e2eutil.RegisterAndWaitForAllocs( - f.T(), tc.Nomad(), "consultemplate/input/bad_template_paths.nomad", jobID, "") - f.Len(allocStubs, 1) - allocID := allocStubs[0].ID - - // Wait for alloc to fail - var err error - var alloc *api.Allocation - testutil.WaitForResultRetries(retries, func() (bool, error) { - time.Sleep(interval) - alloc, _, err = tc.Nomad().Allocations().Info(allocID, nil) - if err != nil { - return false, err - } - - return alloc.ClientStatus == structs.AllocClientStatusFailed, fmt.Errorf("expected status failed, but was: %s", alloc.ClientStatus) - }, func(err error) { - f.NoError(err, "failed to wait on alloc") - }) - - // Assert the "source escapes" error occurred to prevent false - // positives. - found := false - for _, event := range alloc.TaskStates["task"].Events { - if strings.Contains(event.DisplayMessage, "template source path escapes alloc directory") { - found = true - break - } - } - f.True(found, "alloc failed but NOT due to expected source path escape error") -} - -// TestTemplatePathInterpolation_SharedAllocDir asserts that NOMAD_ALLOC_DIR -// is supported as a destination for artifact and template blocks, and -// that it is properly interpolated for task drivers with varying -// filesystem isolation -func (tc *ConsulTemplateTest) TestTemplatePathInterpolation_SharedAllocDir(f *framework.F) { - jobID := "template-shared-alloc-" + uuid.Generate()[:8] - tc.jobIDs = append(tc.jobIDs, jobID) - - allocStubs := e2eutil.RegisterAndWaitForAllocs( - f.T(), tc.Nomad(), "consultemplate/input/template_shared_alloc.nomad", jobID, "") - f.Len(allocStubs, 1) - allocID := allocStubs[0].ID - - e2eutil.WaitForAllocRunning(f.T(), tc.Nomad(), allocID) - - for _, task := range []string{"docker", "exec", "raw_exec"} { - - // tests that we can render templates into the shared alloc directory - f.NoError(waitForTaskFile(allocID, task, "${NOMAD_ALLOC_DIR}/raw_exec.env", - func(out string) bool { - return len(out) > 0 && strings.TrimSpace(out) != "/alloc" - }, nil), "expected raw_exec.env to not be '/alloc'") - - f.NoError(waitForTaskFile(allocID, task, "${NOMAD_ALLOC_DIR}/exec.env", - func(out string) bool { - return strings.TrimSpace(out) == "/alloc" - }, nil), "expected shared exec.env to contain '/alloc'") - - f.NoError(waitForTaskFile(allocID, task, "${NOMAD_ALLOC_DIR}/docker.env", - func(out string) bool { - return strings.TrimSpace(out) == "/alloc" - }, nil), "expected shared docker.env to contain '/alloc'") - - // test that we can fetch artifacts into the shared alloc directory - for _, a := range []string{"google1.html", "google2.html", "google3.html"} { - f.NoError(waitForTaskFile(allocID, task, "${NOMAD_ALLOC_DIR}/"+a, - func(out string) bool { - return len(out) > 0 - }, nil), "expected artifact in alloc dir") - } - - // test that we can load environment variables rendered with templates using interpolated paths - out, err := e2eutil.Command("nomad", "alloc", "exec", "-task", task, allocID, "sh", "-c", "env") - f.NoError(err) - f.Contains(out, "HELLO_FROM=raw_exec") - } -} - -// TestConsulTemplate_NomadServiceLookups tests consul-templates Nomad service -// lookup functionality. It runs a job which registers two services, then -// another which performs both a list and read template function lookup against -// registered services. -func (tc *ConsulTemplateTest) TestConsulTemplate_NomadServiceLookups(f *framework.F) { - - // Set up our base job that will be used in various manners. - serviceJob, err := jobspec.ParseFile("consultemplate/input/nomad_provider_service.nomad") - f.NoError(err) - serviceJobID := "test-consul-template-nomad-lookups" + uuid.Generate()[0:8] - serviceJob.ID = &serviceJobID - - _, _, err = tc.Nomad().Jobs().Register(serviceJob, nil) - f.NoError(err) - tc.jobIDs = append(tc.jobIDs, serviceJobID) - f.NoError(e2eutil.WaitForAllocStatusExpected(serviceJobID, "default", []string{"running"}), "job should be running") - - // Pull the allocation ID for the job, we use this to ensure this is found - // in the rendered template later on. - serviceJobAllocs, err := e2eutil.AllocsForJob(serviceJobID, "default") - f.NoError(err) - f.Len(serviceJobAllocs, 1) - serviceAllocID := serviceJobAllocs[0]["ID"] - - // Create at non-default namespace. - _, err = e2eutil.Command("nomad", "namespace", "apply", "platform") - f.NoError(err) - tc.namespaceIDs = append(tc.namespaceIDs, "NamespaceA") - - // Register a job which includes services destined for the Nomad provider - // into the platform namespace. This is used to ensure consul-template - // lookups stay bound to the allocation namespace. - diffNamespaceServiceJobID := "test-consul-template-nomad-lookups" + uuid.Generate()[0:8] - f.NoError(e2eutil.Register(diffNamespaceServiceJobID, "consultemplate/input/nomad_provider_service_ns.nomad")) - tc.namespacedJobIDs["platform"] = append(tc.namespacedJobIDs["platform"], diffNamespaceServiceJobID) - f.NoError(e2eutil.WaitForAllocStatusExpected(diffNamespaceServiceJobID, "platform", []string{"running"}), "job should be running") - - // Register a job which includes consul-template function performing Nomad - // service listing and reads. - serviceLookupJobID := "test-consul-template-nomad-lookups" + uuid.Generate()[0:8] - f.NoError(e2eutil.Register(serviceLookupJobID, "consultemplate/input/nomad_provider_service_lookup.nomad")) - tc.jobIDs = append(tc.jobIDs, serviceLookupJobID) - f.NoError(e2eutil.WaitForAllocStatusExpected(serviceLookupJobID, "default", []string{"running"}), "job should be running") - - // Find the allocation ID for the job which contains templates, so we can - // perform filesystem actions. - serviceLookupJobAllocs, err := e2eutil.AllocsForJob(serviceLookupJobID, "default") - f.NoError(err) - f.Len(serviceLookupJobAllocs, 1) - serviceLookupAllocID := serviceLookupJobAllocs[0]["ID"] - - // Ensure the listing (nomadServices) template function has found all - // services within the default namespace. - err = waitForTaskFile(serviceLookupAllocID, "test", "${NOMAD_TASK_DIR}/services.conf", - func(out string) bool { - if !strings.Contains(out, "service default-nomad-provider-service-primary [bar foo]") { - return false - } - if !strings.Contains(out, "service default-nomad-provider-service-secondary [baz buz]") { - return false - } - return !strings.Contains(out, "service platform-nomad-provider-service-secondary [baz buz]") - }, nil) - f.NoError(err) - - // Ensure the direct service lookup has found the entry we expect. - err = waitForTaskFile(serviceLookupAllocID, "test", "${NOMAD_TASK_DIR}/service.conf", - func(out string) bool { - expected := fmt.Sprintf("service default-nomad-provider-service-primary [bar foo] dc1 %s", serviceAllocID) - return strings.Contains(out, expected) - }, nil) - f.NoError(err) - - // Scale the default namespaced service job in order to change the expected - // number of entries. - count := 3 - serviceJob.TaskGroups[0].Count = &count - _, _, err = tc.Nomad().Jobs().Register(serviceJob, nil) - f.NoError(err) - - // Pull the allocation ID for the job, we use this to ensure this is found - // in the rendered template later on. Wrap this in an eventual do to the - // eventual consistency around the service registration process. - f.Eventually(func() bool { - serviceJobAllocs, err = e2eutil.AllocsForJob(serviceJobID, "default") - if err != nil { - return false - } - return len(serviceJobAllocs) == 3 - }, 10*time.Second, 200*time.Millisecond, "unexpected number of allocs found") - - // Track the expected entries, including the allocID to make this test - // actually valuable. - var expectedEntries []string - for _, allocs := range serviceJobAllocs { - e := fmt.Sprintf("service default-nomad-provider-service-primary [bar foo] dc1 %s", allocs["ID"]) - expectedEntries = append(expectedEntries, e) - } - - // Ensure the direct service lookup has the new entries we expect. - err = waitForTaskFile(serviceLookupAllocID, "test", "${NOMAD_TASK_DIR}/service.conf", - func(out string) bool { - for _, entry := range expectedEntries { - if !strings.Contains(out, entry) { - return false - } - } - return true - }, nil) - f.NoError(err) -} - -func waitForTaskFile(allocID, task, path string, test func(out string) bool, wc *e2eutil.WaitConfig) error { - var err error - var out string - interval, retries := wc.OrDefault() - - testutil.WaitForResultRetries(retries, func() (bool, error) { - time.Sleep(interval) - out, err = e2eutil.Command("nomad", "alloc", "exec", "-task", task, allocID, "sh", "-c", "cat "+path) - if err != nil { - return false, fmt.Errorf("could not cat file %q from task %q in allocation %q: %v", - path, task, allocID, err) - } - return test(out), nil - }, func(e error) { - err = fmt.Errorf("test for file content failed: got %#v\nerror: %v", out, e) - }) - return err -} - -// waitForTemplateRender is a helper that grabs a file via alloc fs -// and tests it for -func waitForTemplateRender(allocID, path string, test func(string) bool, wc *e2eutil.WaitConfig) error { - var err error - var out string - interval, retries := wc.OrDefault() - - testutil.WaitForResultRetries(retries, func() (bool, error) { - time.Sleep(interval) - out, err = e2eutil.Command("nomad", "alloc", "fs", allocID, path) - if err != nil { - return false, fmt.Errorf("could not get file %q from allocation %q: %v", - path, allocID, err) - } - return test(out), nil - }, func(e error) { - err = fmt.Errorf("test for file content failed: got %#v\nerror: %v", out, e) - }) - return err -} - -// waitForAllocStatusByGroup is similar to WaitForAllocStatus but maps -// specific task group names to statuses without having to deal with specific counts -func waitForAllocStatusByGroup(jobID, ns string, expected map[string]string, wc *e2eutil.WaitConfig) error { - var got []map[string]string - var err error - interval, retries := wc.OrDefault() - testutil.WaitForResultRetries(retries, func() (bool, error) { - time.Sleep(interval) - got, err = e2eutil.AllocsForJob(jobID, ns) - if err != nil { - return false, err - } - for _, row := range got { - group := row["Task Group"] - expectedStatus := expected[group] - gotStatus := row["Status"] - if expectedStatus != gotStatus { - return false, fmt.Errorf("expected %q to be %q, got %q", - group, expectedStatus, gotStatus) - } - } - err = nil - return true, nil - }, func(e error) { - err = fmt.Errorf("alloc status check failed: got %#v\nerror: %v", got, e) - }) - return err -} diff --git a/e2e/consultemplate/consultemplate_test.go b/e2e/consultemplate/consultemplate_test.go new file mode 100644 index 000000000..2d9bf9479 --- /dev/null +++ b/e2e/consultemplate/consultemplate_test.go @@ -0,0 +1,553 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package consultemplate + +import ( + "fmt" + "os" + "strings" + "testing" + "time" + + capi "github.com/hashicorp/consul/api" + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/e2e/e2eutil" + "github.com/hashicorp/nomad/e2e/v3/jobs3" + "github.com/hashicorp/nomad/e2e/v3/namespaces3" + "github.com/hashicorp/nomad/helper/uuid" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test" + "github.com/shoenig/test/must" + "github.com/shoenig/test/wait" +) + +const ns = "" + +// TestTemplateUpdateTriggers exercises consul-template integration, verifying that: +// - missing keys block allocations from starting +// - key updates trigger re-render +// - service updates trigger re-render +// - 'noop' vs 'restart' configuration +func TestTemplateUpdateTriggers(t *testing.T) { + + // use a random suffix to encapsulate test keys and job ID for cleanup + key := "consultemplate-" + uuid.Generate()[:8] + + cc := e2eutil.ConsulClient(t) + + // Ensure consul key does not exist + _, err := cc.KV().Delete(key, nil) + must.NoError(t, err) + + cleanupGC(t) + + submission, cleanupJob := jobs3.Submit(t, + "./input/update_triggers.nomad.hcl", + jobs3.Detach(), + jobs3.ReplaceInJobSpec("consultemplatetest", key), + ) + t.Cleanup(cleanupJob) + jobID := submission.JobID() + + cleanupKey(t, key) + + expected := map[string]string{ + "upstream": "running", + "exec_downstream": "pending", + "docker_downstream": "pending"} + + mustWaitForStatusByGroup(t, jobID, ns, expected, 20*time.Second) + + // We won't reschedule any of these allocs, so we can cache these IDs for later + downstreams := map[string]string{} // alloc ID -> group name + allocs, err := e2eutil.AllocsForJob(jobID, ns) + must.NoError(t, err) + for _, alloc := range allocs { + group := alloc["Task Group"] + if group == "docker_downstream" || group == "exec_downstream" { + downstreams[alloc["ID"]] = group + t.Logf("alloc %q (%s) on node %q", alloc["ID"], group, alloc["Node ID"]) + } + } + + now := time.Now() + for allocID, group := range downstreams { + + // note: checking pending above doesn't tell us whether we've tried to + // render the KV template yet, so we still need to poll for the template + // event; note that it may take a long while for the `exec` task to be + // ready + must.Wait(t, wait.InitialSuccess( + wait.BoolFunc(func() bool { + out, err := e2eutil.Command("nomad", "alloc", "status", allocID) + must.NoError(t, err) + return strings.Contains(out, "Missing: kv.block") + }), + wait.Gap(time.Millisecond*500), + wait.Timeout(time.Second*30), + ), must.Sprintf("expected %q to be blocked on Consul key", group)) + + // note: although the tasks are stuck in pending, the service templates + // should be rendered already by this point or quickly thereafter + t.Logf("verifying service template contents") + mustWaitTemplateRender(t, allocID, "task/local/services.conf", + func(out string) error { + confLines := strings.Split(strings.TrimSpace(out), "\n") + servers := 0 + for _, line := range confLines { + if strings.HasPrefix(line, "server upstream-service ") { + servers++ + } + } + if servers != 2 { + return fmt.Errorf( + "expected 2 upstream servers for alloc %q, got:\n%s", allocID, out) + } + return nil + }, + time.Second*5, + ) + + t.Logf("ok for alloc %q: elapsed=%v", allocID, time.Since(now)) + } + + // Write our key to Consul + _, err = cc.KV().Put(&capi.KVPair{Key: key, Value: []byte("foo")}, nil) + must.NoError(t, err) + + t.Logf("waiting for blocked downstream allocs to run") + + // template will render, allowing downstream allocs to run + expected = map[string]string{ + "upstream": "running", + "exec_downstream": "running", + "docker_downstream": "running"} + mustWaitForStatusByGroup(t, jobID, ns, expected, 30*time.Second) + + for allocID := range downstreams { + + // verify we've rendered the templates + t.Logf("verifying kv template contents") + mustWaitTemplateRender(t, allocID, "task/local/kv.yml", + func(out string) error { + if strings.TrimSpace(out) != "---\nkey: foo\njob: templating" { + fmt.Errorf("expected consul key to be rendered for alloc %q, got:%s", allocID, out) + } + return nil + }, + time.Second*10) + } + + // Update our key in Consul + t.Logf("updating key %v", key) + _, err = cc.KV().Put(&capi.KVPair{Key: key, Value: []byte("bar")}, nil) + must.NoError(t, err) + + // Wait for restart + t.Logf("waiting for restart") + for allocID, group := range downstreams { + must.Wait(t, wait.InitialSuccess( + wait.BoolFunc(func() bool { + out, err := e2eutil.Command("nomad", "alloc", "status", allocID) + must.NoError(t, err) + + section, err := e2eutil.GetSection(out, "Task Events:") + must.NoError(t, err, + must.Sprintf("could not parse Task Events section from: %v", out)) + + restarts, err := e2eutil.GetField(section, "Total Restarts") + must.NoError(t, err) + return restarts == "1" + }), + wait.Gap(time.Millisecond*500), + wait.Timeout(time.Second*20), + ), must.Sprintf("expected 1 restart for %q", group)) + } + + t.Logf("waiting for template re-render") + for allocID := range downstreams { + // verify we've re-rendered the template + mustWaitTemplateRender(t, allocID, "task/local/kv.yml", + func(out string) error { + if strings.TrimSpace(out) != "---\nkey: bar\njob: templating" { + fmt.Errorf("expected updated consul key for alloc %q, got:%s", allocID, out) + } + return nil + }, + time.Second*10) + } + + // increase the count for upstreams + t.Logf("increasing upstream count") + submission.Rerun(jobs3.MutateJobSpec(func(spec string) string { + return strings.Replace(spec, "count = 2", "count = 3", 1) + })) + + // wait for re-rendering + now = time.Now() + t.Logf("waiting for service template re-render") + for allocID := range downstreams { + mustWaitTemplateRender(t, allocID, "task/local/services.conf", + func(out string) error { + confLines := strings.Split(strings.TrimSpace(out), "\n") + servers := 0 + for _, line := range confLines { + if strings.HasPrefix(line, "server upstream-service ") { + servers++ + } + } + if servers != 3 { + return fmt.Errorf( + "expected 3 upstream servers for alloc %q, got:\n%s", allocID, out) + } + return nil + }, + time.Second*30, + ) + t.Logf("ok for alloc %q: elapsed=%v", allocID, time.Since(now)) + } + + t.Logf("verifying no restart") + for allocID := range downstreams { + + // verify noop was honored: no additional restarts + out, err := e2eutil.Command("nomad", "alloc", "status", allocID) + must.NoError(t, err, must.Sprint("could not get allocation status")) + + section, err := e2eutil.GetSection(out, "Task Events:") + must.NoError(t, err, must.Sprintf("could not parse Task Events from: %v", out)) + + restarts, err := e2eutil.GetField(section, "Total Restarts") + must.NoError(t, err) + must.Eq(t, "1", restarts, must.Sprint("expected no new restarts for group")) + } +} + +// TestTemplatePathInterpolation_Ok asserts that NOMAD_*_DIR variables are +// properly interpolated into template source and destination paths without +// being treated as escaping. +func TestTemplatePathInterpolation_Ok(t *testing.T) { + cleanupGC(t) + submission, cleanupJob := jobs3.Submit(t, "./input/template_paths.nomad") + t.Cleanup(cleanupJob) + allocID := submission.AllocID("template-paths") + + mustWaitTemplateRender(t, allocID, "task/secrets/foo/dst", + func(out string) error { + if len(out) == 0 { + return fmt.Errorf("expected file to have contents") + } + return nil + }, + time.Second*10) + + mustWaitTemplateRender(t, allocID, "alloc/shared.txt", + func(out string) error { + if len(out) == 0 { + return fmt.Errorf("expected shared-alloc-dir file to have contents") + } + return nil + }, + time.Second*10) +} + +// TestTemplatePathInterpolation_Bad asserts that template.source paths are not +// allowed to escape the sandbox directory tree by default. +func TestTemplatePathInterpolation_Bad(t *testing.T) { + cleanupGC(t) + submission, cleanupJob := jobs3.Submit(t, + "./input/bad_template_paths.nomad", + jobs3.Detach(), + ) + t.Cleanup(cleanupJob) + allocID := submission.AllocID("template-paths") + + nc := e2eutil.NomadClient(t) + + // Wait for alloc to fail + var err error + var alloc *api.Allocation + + must.Wait(t, wait.InitialSuccess( + wait.BoolFunc(func() bool { + alloc, _, err = nc.Allocations().Info(allocID, nil) + must.NoError(t, err) + return alloc.ClientStatus == structs.AllocClientStatusFailed + }), + wait.Timeout(10*time.Second), + wait.Gap(500*time.Millisecond), + ), must.Sprint("expected failed alloc")) + + // Assert the "source escapes" error occurred to prevent false + // positives. + found := false + for _, event := range alloc.TaskStates["task"].Events { + if strings.Contains(event.DisplayMessage, "template source path escapes alloc directory") { + found = true + break + } + } + must.True(t, found, must.Sprint("alloc failed but NOT due to expected source path escape error")) +} + +// TestTemplatePathInterpolation_SharedAllocDir asserts that NOMAD_ALLOC_DIR +// is supported as a destination for artifact and template blocks, and +// that it is properly interpolated for task drivers with varying +// filesystem isolation +func TestTemplatePathInterpolation_SharedAllocDir(t *testing.T) { + cleanupGC(t) + submission, cleanupJob := jobs3.Submit(t, + "./input/template_shared_alloc.nomad", + jobs3.Timeout(time.Second*60)) // note: exec tasks can take a while + t.Cleanup(cleanupJob) + allocID := submission.AllocID("template-paths") + + for _, task := range []string{"docker", "exec", "raw_exec"} { + + // tests that we can render templates into the shared alloc directory + mustWaitForTaskFile(t, allocID, task, "${NOMAD_ALLOC_DIR}/raw_exec.env", + func(out string) error { + if len(out) == 0 || strings.TrimSpace(out) == "/alloc" { + return fmt.Errorf("expected raw_exec.env to not be '/alloc'") + } + return nil + }) + + mustWaitForTaskFile(t, allocID, task, "${NOMAD_ALLOC_DIR}/exec.env", + func(out string) error { + if strings.TrimSpace(out) != "/alloc" { + return fmt.Errorf("expected shared exec.env to contain '/alloc'") + } + return nil + }) + + mustWaitForTaskFile(t, allocID, task, "${NOMAD_ALLOC_DIR}/docker.env", + func(out string) error { + if strings.TrimSpace(out) != "/alloc" { + return fmt.Errorf("expected shared docker.env to contain '/alloc'") + } + return nil + }) + + // test that we can fetch artifacts into the shared alloc directory + for _, a := range []string{"google1.html", "google2.html", "google3.html"} { + mustWaitForTaskFile(t, allocID, task, "${NOMAD_ALLOC_DIR}/"+a, + func(out string) error { + if len(out) == 0 { + return fmt.Errorf("expected artifact in alloc dir") + } + return nil + }) + } + + // test that we can load environment variables rendered with templates using interpolated paths + out, err := e2eutil.Command("nomad", "alloc", "exec", "-task", task, allocID, "sh", "-c", "env") + must.NoError(t, err) + must.StrContains(t, out, "HELLO_FROM=raw_exec") + } +} + +// TestConsulTemplate_NomadServiceLookups tests consul-templates Nomad service +// lookup functionality. It runs a job which registers two services, then +// another which performs both a list and read template function lookup against +// registered services. +func TestConsulTemplate_NomadServiceLookups(t *testing.T) { + + cleanupGC(t) + + // The service job will be the source for template data + serviceJobSubmission, cleanupJob := jobs3.Submit(t, "./input/nomad_provider_service.nomad") + t.Cleanup(cleanupJob) + serviceAllocID := serviceJobSubmission.AllocID("nomad_provider_service") + + // Create a non-default namespace. + t.Cleanup(namespaces3.Create(t, "platform")) + + // Register a job which includes services destined for the Nomad provider + // into the platform namespace. This is used to ensure consul-template + // lookups stay bound to the allocation namespace. + _, diffCleanupJob := jobs3.Submit(t, "./input/nomad_provider_service_ns.nomad") + t.Cleanup(diffCleanupJob) + + // Register a job which includes consul-template function performing Nomad + // service listing and reads. + serviceLookupJobSubmission, serviceLookupJobCleanup := jobs3.Submit( + t, "./input/nomad_provider_service_lookup.nomad") + t.Cleanup(serviceLookupJobCleanup) + serviceLookupAllocID := serviceLookupJobSubmission.AllocID("nomad_provider_service_lookup") + + // Ensure the listing (nomadServices) template function has found all + // services within the default namespace. + mustWaitForTaskFile(t, serviceLookupAllocID, "test", "${NOMAD_TASK_DIR}/services.conf", + func(out string) error { + expect := "service default-nomad-provider-service-primary [bar foo]" + if !strings.Contains(out, expect) { + return fmt.Errorf("expected %q, got %q", expect, out) + } + expect = "service default-nomad-provider-service-secondary [baz buz]" + if !strings.Contains(out, expect) { + return fmt.Errorf("expected %q, got %q", expect, out) + } + expect = "service platform-nomad-provider-service-secondary [baz buz]" + if !strings.Contains(out, expect) { + return fmt.Errorf("expected %q, got %q", expect, out) + } + return nil + }) + + // Ensure the direct service lookup has found the entry we expect. + expected := fmt.Sprintf("service default-nomad-provider-service-primary [bar foo] dc1 %s", serviceAllocID) + + mustWaitForTaskFile(t, serviceLookupAllocID, "test", "${NOMAD_TASK_DIR}/service.conf", + func(out string) error { + if !strings.Contains(out, expected) { + return fmt.Errorf("expected %q, got %q", expected, out) + } + return nil + }) + + // Scale the default namespaced service job in order to change the expected + // number of entries. + serviceJobSubmission.Rerun(jobs3.MutateJobSpec(func(spec string) string { + return strings.Replace(spec, "count = 1", "count = 3", 1) + })) + + // Pull the allocation ID for the job, we use this to ensure this is found + // in the rendered template later on. Wrap this in a wait due to the + // eventual consistency around the service registration process. + serviceJobAllocs := []map[string]string{} + var err error + must.Wait(t, wait.InitialSuccess( + wait.BoolFunc(func() bool { + serviceJobAllocs, err = e2eutil.AllocsForJob(serviceJobSubmission.JobID(), "default") + must.NoError(t, err) + return len(serviceJobAllocs) == 3 + }), + wait.Timeout(10*time.Second), + wait.Gap(200*time.Millisecond), + ), must.Sprint("unexpected number of allocs found")) + + // Track the expected entries, including the allocID to make this test + // actually valuable. + var expectedEntries []string + for _, allocs := range serviceJobAllocs { + e := fmt.Sprintf("service default-nomad-provider-service-primary [bar foo] dc1 %s", allocs["ID"]) + expectedEntries = append(expectedEntries, e) + } + + // Ensure the direct service lookup has the new entries we expect. + mustWaitForTaskFile(t, serviceLookupAllocID, "test", "${NOMAD_TASK_DIR}/service.conf", + func(out string) error { + for _, entry := range expectedEntries { + if !strings.Contains(out, entry) { + return fmt.Errorf("expected %q, got %q", expectedEntries, out) + } + } + return nil + }) +} + +// mustWaitForTaskFile is a helper that asserts a file not reachable from alloc +// FS has been rendered and tests its contents +func mustWaitForTaskFile(t *testing.T, allocID, task, path string, testFn func(string) error, testSettings ...must.Setting) { + t.Helper() + + must.Wait(t, wait.InitialSuccess( + wait.ErrorFunc(func() error { + out, err := e2eutil.Command("nomad", "alloc", "exec", "-task", task, allocID, "sh", "-c", "cat "+path) + if err != nil { + return fmt.Errorf("could not cat file %q from task %q in allocation %q: %v", + path, task, allocID, err) + } + return testFn(out) + }), + wait.Gap(time.Millisecond*500), + wait.Timeout(30*time.Second), + ), testSettings...) +} + +// mustWaitTemplateRender is a helper that asserts a file has been rendered and +// tests its contents +func mustWaitTemplateRender(t *testing.T, allocID, path string, testFn func(string) error, timeout time.Duration, testSettings ...must.Setting) { + t.Helper() + + must.Wait(t, wait.InitialSuccess( + wait.ErrorFunc(func() error { + out, err := e2eutil.Command("nomad", "alloc", "fs", allocID, path) + if err != nil { + return err + } + return testFn(out) + }), + wait.Gap(time.Millisecond*500), + wait.Timeout(timeout), + ), testSettings...) +} + +// mustWaitForStatusByGroup is similar to e2eutil.WaitForAllocStatus but maps +// specific task group names to statuses without having to deal with specific +// counts +func mustWaitForStatusByGroup(t *testing.T, jobID, ns string, + expected map[string]string, + timeout time.Duration, testSettings ...must.Setting) { + + t.Helper() + + must.Wait(t, wait.InitialSuccess( + wait.ErrorFunc(func() error { + got, err := e2eutil.AllocsForJob(jobID, ns) + if err != nil { + return err + } + for _, row := range got { + group := row["Task Group"] + expectedStatus := expected[group] + gotStatus := row["Status"] + if expectedStatus != gotStatus { + return fmt.Errorf("expected %q to be %q, got %q", + group, expectedStatus, gotStatus) + } + } + return nil + }), + wait.Gap(time.Millisecond*500), + wait.Timeout(timeout), + ), testSettings...) +} + +func cleanupJob(t *testing.T, ns, jobID string) { + if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" { + return + } + + t.Helper() + t.Cleanup(func() { + e2eutil.StopJob(jobID, "-purge", "-detach", "-namespace", ns) + }) +} + +func cleanupGC(t *testing.T) { + if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" { + return + } + + t.Helper() + t.Cleanup(func() { + _, err := e2eutil.Command("nomad", "system", "gc") + test.NoError(t, err) + }) +} + +func cleanupKey(t *testing.T, key string) { + if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" { + return + } + + t.Helper() + t.Cleanup(func() { + cc := e2eutil.ConsulClient(t) + _, err := cc.KV().Delete(key, nil) + test.NoError(t, err, test.Sprintf("could not clean up consul key: %v", key)) + }) +} diff --git a/e2e/consultemplate/doc.go b/e2e/consultemplate/doc.go new file mode 100644 index 000000000..152c61707 --- /dev/null +++ b/e2e/consultemplate/doc.go @@ -0,0 +1,7 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package consultemplate + +// This package contains only tests, so this is a placeholder file to +// make sure builds don't fail with "no non-test Go files in" errors diff --git a/e2e/consultemplate/input/bad_template_paths.nomad b/e2e/consultemplate/input/bad_template_paths.nomad index 00f4cf7cc..95e3fed73 100644 --- a/e2e/consultemplate/input/bad_template_paths.nomad +++ b/e2e/consultemplate/input/bad_template_paths.nomad @@ -2,7 +2,6 @@ # SPDX-License-Identifier: BUSL-1.1 job "bad-template-paths" { - datacenters = ["dc1", "dc2"] constraint { attribute = "${attr.kernel.name}" diff --git a/e2e/consultemplate/input/nomad_provider_service.nomad b/e2e/consultemplate/input/nomad_provider_service.nomad index e2fecaffb..b993cf74f 100644 --- a/e2e/consultemplate/input/nomad_provider_service.nomad +++ b/e2e/consultemplate/input/nomad_provider_service.nomad @@ -2,8 +2,9 @@ # SPDX-License-Identifier: BUSL-1.1 job "nomad_provider_service" { + + # note: this is required for the test assertion, not legacy leftover datacenters = ["dc1"] - type = "service" constraint { attribute = "${attr.kernel.name}" @@ -12,6 +13,8 @@ job "nomad_provider_service" { group "nomad_provider_service" { + count = 1 + service { name = "${NOMAD_NAMESPACE}-nomad-provider-service-primary" provider = "nomad" diff --git a/e2e/consultemplate/input/nomad_provider_service_lookup.nomad b/e2e/consultemplate/input/nomad_provider_service_lookup.nomad index db109d781..306c60975 100644 --- a/e2e/consultemplate/input/nomad_provider_service_lookup.nomad +++ b/e2e/consultemplate/input/nomad_provider_service_lookup.nomad @@ -2,8 +2,6 @@ # SPDX-License-Identifier: BUSL-1.1 job "nomad_provider_service_lookup" { - datacenters = ["dc1"] - type = "service" constraint { attribute = "${attr.kernel.name}" diff --git a/e2e/consultemplate/input/nomad_provider_service_ns.nomad b/e2e/consultemplate/input/nomad_provider_service_ns.nomad index e02bb5488..1a741096e 100644 --- a/e2e/consultemplate/input/nomad_provider_service_ns.nomad +++ b/e2e/consultemplate/input/nomad_provider_service_ns.nomad @@ -2,9 +2,8 @@ # SPDX-License-Identifier: BUSL-1.1 job "nomad_provider_service" { - datacenters = ["dc1"] - type = "service" - namespace = "platform" + + namespace = "platform" constraint { attribute = "${attr.kernel.name}" diff --git a/e2e/consultemplate/input/template_paths.nomad b/e2e/consultemplate/input/template_paths.nomad index 51905bdb7..ef2c8f0d5 100644 --- a/e2e/consultemplate/input/template_paths.nomad +++ b/e2e/consultemplate/input/template_paths.nomad @@ -2,7 +2,6 @@ # SPDX-License-Identifier: BUSL-1.1 job "template-paths" { - datacenters = ["dc1", "dc2"] meta { ARTIFACT_DEST_DIR = "local/foo/src" diff --git a/e2e/consultemplate/input/template_shared_alloc.nomad b/e2e/consultemplate/input/template_shared_alloc.nomad index e00af5206..59045197c 100644 --- a/e2e/consultemplate/input/template_shared_alloc.nomad +++ b/e2e/consultemplate/input/template_shared_alloc.nomad @@ -2,7 +2,6 @@ # SPDX-License-Identifier: BUSL-1.1 job "template-shared-alloc" { - datacenters = ["dc1", "dc2"] constraint { attribute = "${attr.kernel.name}" diff --git a/e2e/consultemplate/input/templating.nomad b/e2e/consultemplate/input/update_triggers.nomad.hcl similarity index 97% rename from e2e/consultemplate/input/templating.nomad rename to e2e/consultemplate/input/update_triggers.nomad.hcl index 7090f4c45..63a9d8bb0 100644 --- a/e2e/consultemplate/input/templating.nomad +++ b/e2e/consultemplate/input/update_triggers.nomad.hcl @@ -2,7 +2,6 @@ # SPDX-License-Identifier: BUSL-1.1 job "templating" { - datacenters = ["dc1", "dc2"] constraint { attribute = "${attr.kernel.name}" @@ -105,9 +104,10 @@ EOT task "task" { - driver = "exec" + driver = "docker" config { + image = "busybox:1" command = "/bin/sh" args = ["-c", "sleep 300"] } diff --git a/e2e/e2e_test.go b/e2e/e2e_test.go index 6e4afd42b..591f487e0 100644 --- a/e2e/e2e_test.go +++ b/e2e/e2e_test.go @@ -15,7 +15,6 @@ import ( _ "github.com/hashicorp/nomad/e2e/clientstate" _ "github.com/hashicorp/nomad/e2e/connect" _ "github.com/hashicorp/nomad/e2e/consul" - _ "github.com/hashicorp/nomad/e2e/consultemplate" _ "github.com/hashicorp/nomad/e2e/csi" _ "github.com/hashicorp/nomad/e2e/deployment" _ "github.com/hashicorp/nomad/e2e/eval_priority" @@ -41,6 +40,7 @@ import ( // these are no longer on the old framework but by importing them // we get a quick check that they compile on every commit + _ "github.com/hashicorp/nomad/e2e/consultemplate" _ "github.com/hashicorp/nomad/e2e/disconnectedclients" _ "github.com/hashicorp/nomad/e2e/namespaces" _ "github.com/hashicorp/nomad/e2e/nodedrain" diff --git a/e2e/v3/jobs3/jobs3.go b/e2e/v3/jobs3/jobs3.go index 901687be3..8cb51994a 100644 --- a/e2e/v3/jobs3/jobs3.go +++ b/e2e/v3/jobs3/jobs3.go @@ -187,6 +187,7 @@ type Option func(*Submission) type Cleanup func() func Submit(t *testing.T, filename string, opts ...Option) (*Submission, Cleanup) { + t.Helper() sub := initialize(t, filename) for _, opt := range opts { @@ -205,6 +206,7 @@ func Namespace(name string) Option { sub.inNamespace = name } } + func AuthToken(token string) Option { return func(sub *Submission) { sub.authToken = token