From fbd7f9116574b62a9e0ad0e7903ccfaefc5b1590 Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Fri, 19 Jun 2020 13:03:10 -0500 Subject: [PATCH] e2e: add tests for connect native Adds 2 tests around Connect Native. Both make use of the example connect native services in https://github.com/hashicorp/nomad-connect-examples One of them runs without Consul ACLs enabled, the other with. --- e2e/connect/acls.go | 196 ++++++++-------------------- e2e/connect/client.go | 1 + e2e/connect/connect.go | 150 ++++----------------- e2e/connect/input/native-demo.nomad | 71 ++++++++++ e2e/e2eutil/utils.go | 49 +++++++ 5 files changed, 203 insertions(+), 264 deletions(-) create mode 100644 e2e/connect/input/native-demo.nomad diff --git a/e2e/connect/acls.go b/e2e/connect/acls.go index bfbcfd5d0..3a89119aa 100644 --- a/e2e/connect/acls.go +++ b/e2e/connect/acls.go @@ -3,18 +3,16 @@ package connect import ( "os" "regexp" - "strings" "testing" "time" - capi "github.com/hashicorp/consul/api" - napi "github.com/hashicorp/nomad/api" + consulapi "github.com/hashicorp/consul/api" + nomadapi "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/e2e/consulacls" "github.com/hashicorp/nomad/e2e/e2eutil" "github.com/hashicorp/nomad/e2e/framework" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/jobspec" - "github.com/kr/pretty" "github.com/stretchr/testify/require" ) @@ -24,6 +22,9 @@ const ( // demoConnectJob is the example connect enabled job useful for testing demoConnectJob = "connect/input/demo.nomad" + + // demoConnectNativeJob is the example connect native enabled job useful for testing + demoConnectNativeJob = "connect/input/native-demo.nomad" ) type ConnectACLsE2ETest struct { @@ -111,14 +112,14 @@ func (tc *ConnectACLsE2ETest) AfterEach(f *framework.F) { // cleanup consul tokens for _, id := range tc.consulTokenIDs { t.Log("cleanup: delete consul token id:", id) - _, err := tc.Consul().ACL().TokenDelete(id, &capi.WriteOptions{Token: tc.consulMasterToken}) + _, err := tc.Consul().ACL().TokenDelete(id, &consulapi.WriteOptions{Token: tc.consulMasterToken}) r.NoError(err) } // cleanup consul policies for _, id := range tc.consulPolicyIDs { t.Log("cleanup: delete consul policy id:", id) - _, err := tc.Consul().ACL().PolicyDelete(id, &capi.WriteOptions{Token: tc.consulMasterToken}) + _, err := tc.Consul().ACL().PolicyDelete(id, &consulapi.WriteOptions{Token: tc.consulMasterToken}) r.NoError(err) } @@ -146,11 +147,11 @@ type consulPolicy struct { func (tc *ConnectACLsE2ETest) createConsulPolicy(p consulPolicy, f *framework.F) string { r := require.New(f.T()) - result, _, err := tc.Consul().ACL().PolicyCreate(&capi.ACLPolicy{ + result, _, err := tc.Consul().ACL().PolicyCreate(&consulapi.ACLPolicy{ Name: p.Name, Description: "test policy " + p.Name, Rules: p.Rules, - }, &capi.WriteOptions{Token: tc.consulMasterToken}) + }, &consulapi.WriteOptions{Token: tc.consulMasterToken}) r.NoError(err, "failed to create consul policy") tc.consulPolicyIDs = append(tc.consulPolicyIDs, result.ID) return result.ID @@ -158,10 +159,10 @@ func (tc *ConnectACLsE2ETest) createConsulPolicy(p consulPolicy, f *framework.F) func (tc *ConnectACLsE2ETest) createOperatorToken(policyID string, f *framework.F) string { r := require.New(f.T()) - token, _, err := tc.Consul().ACL().TokenCreate(&capi.ACLToken{ + token, _, err := tc.Consul().ACL().TokenCreate(&consulapi.ACLToken{ Description: "operator token", - Policies: []*capi.ACLTokenPolicyLink{{ID: policyID}}, - }, &capi.WriteOptions{Token: tc.consulMasterToken}) + Policies: []*consulapi.ACLTokenPolicyLink{{ID: policyID}}, + }, &consulapi.WriteOptions{Token: tc.consulMasterToken}) r.NoError(err, "failed to create operator token") tc.consulTokenIDs = append(tc.consulTokenIDs, token.AccessorID) return token.SecretID @@ -248,7 +249,7 @@ func (tc *ConnectACLsE2ETest) TestConnectACLsConnectDemo(f *framework.F) { t.Log("test register Connect job w/ ACLs enabled w/ operator token") - // === Setup ACL policy and token === + // === Setup ACL policy and mint Operator token === // create a policy allowing writes of services "count-api" and "count-dashboard" policyID := tc.createConsulPolicy(consulPolicy{ @@ -261,130 +262,14 @@ func (tc *ConnectACLsE2ETest) TestConnectACLsConnectDemo(f *framework.F) { operatorToken := tc.createOperatorToken(policyID, f) t.Log("created operator token:", operatorToken) - // === Register the Nomad job === - jobID := "connectACL_connect_demo" + jobID := connectJobID() + tc.jobIDs = append(tc.jobIDs, jobID) - var allocs []*napi.AllocationListStub - allocIDs := make(map[string]bool, 2) - { - - // parse the example connect jobspec file - tc.jobIDs = append(tc.jobIDs, jobID) - job := tc.parseJobSpecFile(t, demoConnectJob) - job.ID = &jobID - jobAPI := tc.Nomad().Jobs() - - // set the valid consul operator token - job.ConsulToken = &operatorToken - - // registering the job should succeed - resp, _, err := jobAPI.Register(job, nil) - r.NoError(err) - r.NotNil(resp) - r.Empty(resp.Warnings) - t.Log("job has been registered with evalID:", resp.EvalID) - - // === Make sure the evaluation actually succeeds === - EVAL: - qOpts := &napi.QueryOptions{WaitIndex: resp.EvalCreateIndex} - evalAPI := tc.Nomad().Evaluations() - eval, qMeta, err := evalAPI.Info(resp.EvalID, qOpts) - r.NoError(err) - qOpts.WaitIndex = qMeta.LastIndex - - switch eval.Status { - case "pending": - goto EVAL - case "complete": - // ok! - case "failed", "canceled", "blocked": - r.Failf("eval %s\n%s\n", eval.Status, pretty.Sprint(eval)) - default: - r.Failf("unknown eval status: %s\n%s\n", eval.Status, pretty.Sprint(eval)) - } - - // assert there were no placement failures - r.Zero(eval.FailedTGAllocs, pretty.Sprint(eval.FailedTGAllocs)) - r.Len(eval.QueuedAllocations, 2, pretty.Sprint(eval.QueuedAllocations)) - - // === Assert allocs are running === - for i := 0; i < 20; i++ { - allocs, qMeta, err = evalAPI.Allocations(eval.ID, qOpts) - r.NoError(err) - r.Len(allocs, 2) - qOpts.WaitIndex = qMeta.LastIndex - - running := 0 - for _, alloc := range allocs { - switch alloc.ClientStatus { - case "running": - running++ - case "pending": - // keep trying - default: - r.Failf("alloc failed", "alloc: %s", pretty.Sprint(alloc)) - } - } - - if running == len(allocs) { - t.Log("running:", running, "allocs:", allocs) - break - } - - time.Sleep(500 * time.Millisecond) - } - - for _, a := range allocs { - if a.ClientStatus != "running" || a.DesiredStatus != "run" { - r.Failf("terminal alloc", "alloc %s (%s) terminal; client=%s desired=%s", a.TaskGroup, a.ID, a.ClientStatus, a.DesiredStatus) - } - allocIDs[a.ID] = true - } - } - - // === Check Consul service health === - agentAPI := tc.Consul().Agent() - - failing := map[string]*capi.AgentCheck{} - for i := 0; i < 60; i++ { - checks, err := agentAPI.Checks() - require.NoError(t, err) - - // filter out checks for other services - for cid, check := range checks { - found := false - // for _, allocID := range allocIDs { // list - for allocID := range allocIDs { // map - if strings.Contains(check.ServiceID, allocID) { - found = true - break - } - } - - if !found { - delete(checks, cid) - } - } - - // ensure checks are all passing - failing = map[string]*capi.AgentCheck{} - for _, check := range checks { - if check.Status != "passing" { - failing[check.CheckID] = check - break - } - } - - if len(failing) == 0 { - break - } - - t.Logf("still %d checks not passing", len(failing)) - - time.Sleep(time.Second) - } - - require.Len(t, failing, 0, pretty.Sprint(failing)) + allocs := e2eutil.RegisterAndWaitForAllocs(t, tc.Nomad(), demoConnectJob, jobID, operatorToken) + r.Equal(2, len(allocs), "expected 2 allocs for connect demo", allocs) + allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs) + r.Equal(2, len(allocIDs), "expected 2 allocIDs for connect demo", allocIDs) + e2eutil.WaitForAllocsRunning(t, tc.Nomad(), allocIDs) // === Check Consul SI tokens were generated for sidecars === foundSITokens := tc.countSITokens(t) @@ -392,7 +277,42 @@ func (tc *ConnectACLsE2ETest) TestConnectACLsConnectDemo(f *framework.F) { r.Equal(1, foundSITokens["connect-proxy-count-api"], "expected 1 SI token for connect-proxy-count-api: %v", foundSITokens) r.Equal(1, foundSITokens["connect-proxy-count-dashboard"], "expected 1 SI token for connect-proxy-count-dashboard: %v", foundSITokens) - t.Log("connect job with ACLs enable finished") + t.Log("connect legacy job with ACLs enable finished") +} + +func (tc *ConnectACLsE2ETest) TestConnectACLsConnectNativeDemo(f *framework.F) { + t := f.T() + r := require.New(t) + + t.Log("test register Connect job w/ ACLs enabled w/ operator token") + + // === Setup ACL policy and mint Operator token === + + // create a policy allowing writes of services "uuid-fe" and "uuid-api" + policyID := tc.createConsulPolicy(consulPolicy{ + Name: "nomad-operator-policy", + Rules: `service "uuid-fe" { policy = "write" } service "uuid-api" { policy = "write" }`, + }, f) + t.Log("created operator policy:", policyID) + + // create a Consul "operator token" blessed with the above policy + operatorToken := tc.createOperatorToken(policyID, f) + t.Log("created operator token:", operatorToken) + + jobID := connectJobID() + tc.jobIDs = append(tc.jobIDs, jobID) + + allocs := e2eutil.RegisterAndWaitForAllocs(t, tc.Nomad(), demoConnectNativeJob, jobID, operatorToken) + allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs) + e2eutil.WaitForAllocsRunning(t, tc.Nomad(), allocIDs) + + // === Check Consul SI tokens were generated for native tasks === + foundSITokens := tc.countSITokens(t) + r.Equal(2, len(foundSITokens), "expected 2 SI tokens total: %v", foundSITokens) + r.Equal(1, foundSITokens["frontend"], "expected 1 SI token for frontend: %v", foundSITokens) + r.Equal(1, foundSITokens["generate"], "expected 1 SI token for generate: %v", foundSITokens) + + t.Log("connect native job with ACLs enable finished") } var ( @@ -408,7 +328,7 @@ func (tc *ConnectACLsE2ETest) serviceofSIToken(description string) string { func (tc *ConnectACLsE2ETest) countSITokens(t *testing.T) map[string]int { aclAPI := tc.Consul().ACL() - tokens, _, err := aclAPI.TokenList(&capi.QueryOptions{ + tokens, _, err := aclAPI.TokenList(&consulapi.QueryOptions{ Token: tc.consulMasterToken, }) require.NoError(t, err) @@ -424,7 +344,7 @@ func (tc *ConnectACLsE2ETest) countSITokens(t *testing.T) map[string]int { return foundSITokens } -func (tc *ConnectACLsE2ETest) parseJobSpecFile(t *testing.T, filename string) *napi.Job { +func (tc *ConnectACLsE2ETest) parseJobSpecFile(t *testing.T, filename string) *nomadapi.Job { job, err := jobspec.ParseFile(filename) require.NoError(t, err) return job diff --git a/e2e/connect/client.go b/e2e/connect/client.go index 2d9a2016c..c80f289fc 100644 --- a/e2e/connect/client.go +++ b/e2e/connect/client.go @@ -35,6 +35,7 @@ func (tc *ConnectClientStateE2ETest) AfterEach(f *framework.F) { func (tc *ConnectClientStateE2ETest) TestClientRestart(f *framework.F) { t := f.T() require := require.New(t) + jobID := "connect" + uuid.Generate()[0:8] tc.jobIds = append(tc.jobIds, jobID) client := tc.Nomad() diff --git a/e2e/connect/connect.go b/e2e/connect/connect.go index 3acd43139..892c96153 100644 --- a/e2e/connect/connect.go +++ b/e2e/connect/connect.go @@ -2,17 +2,10 @@ package connect import ( "os" - "strings" - "time" - consulapi "github.com/hashicorp/consul/api" - "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/e2e/e2eutil" "github.com/hashicorp/nomad/e2e/framework" "github.com/hashicorp/nomad/helper/uuid" - "github.com/hashicorp/nomad/jobspec" - "github.com/kr/pretty" - "github.com/stretchr/testify/require" ) type ConnectE2ETest struct { @@ -61,127 +54,32 @@ func (tc *ConnectE2ETest) AfterEach(f *framework.F) { tc.Nomad().System().GarbageCollect() } -// TestConnectDemo tests the demo job file from the Consul Connect Technology -// Preview. -// -// https://github.com/hashicorp/nomad/blob/v0.9.5/website/source/guides/integrations/consul-connect/index.html.md#run-the-connect-enabled-services -// +func connectJobID() string { + id := uuid.Generate() + jobID := "connect" + id[0:8] + return jobID +} + +// TestConnectDemo tests the demo job file used in Connect Integration examples. func (tc *ConnectE2ETest) TestConnectDemo(f *framework.F) { t := f.T() - uuid := uuid.Generate() - jobID := "connect" + uuid[0:8] + + jobID := connectJobID() tc.jobIds = append(tc.jobIds, jobID) - jobapi := tc.Nomad().Jobs() - job, err := jobspec.ParseFile("connect/input/demo.nomad") - require.NoError(t, err) - job.ID = &jobID - - resp, _, err := jobapi.Register(job, nil) - require.NoError(t, err) - require.NotNil(t, resp) - require.Zero(t, resp.Warnings) - -EVAL: - qopts := &api.QueryOptions{ - WaitIndex: resp.EvalCreateIndex, - } - evalapi := tc.Nomad().Evaluations() - eval, qmeta, err := evalapi.Info(resp.EvalID, qopts) - require.NoError(t, err) - qopts.WaitIndex = qmeta.LastIndex - - switch eval.Status { - case "pending": - goto EVAL - case "complete": - // Ok! - case "failed", "canceled", "blocked": - t.Fatalf("eval %s\n%s\n", eval.Status, pretty.Sprint(eval)) - default: - t.Fatalf("unknown eval status: %s\n%s\n", eval.Status, pretty.Sprint(eval)) - } - - // Assert there were 0 placement failures - require.Zero(t, eval.FailedTGAllocs, pretty.Sprint(eval.FailedTGAllocs)) - require.Len(t, eval.QueuedAllocations, 2, pretty.Sprint(eval.QueuedAllocations)) - - // Assert allocs are running - for i := 0; i < 20; i++ { - allocs, qmeta, err := evalapi.Allocations(eval.ID, qopts) - require.NoError(t, err) - require.Len(t, allocs, 2) - qopts.WaitIndex = qmeta.LastIndex - - running := 0 - for _, alloc := range allocs { - switch alloc.ClientStatus { - case "running": - running++ - case "pending": - // keep trying - default: - t.Fatalf("alloc failed: %s", pretty.Sprint(alloc)) - } - } - - if running == len(allocs) { - break - } - - time.Sleep(500 * time.Millisecond) - } - - allocs, _, err := evalapi.Allocations(eval.ID, qopts) - require.NoError(t, err) - allocIDs := make(map[string]bool, 2) - for _, a := range allocs { - if a.ClientStatus != "running" || a.DesiredStatus != "run" { - t.Fatalf("alloc %s (%s) terminal; client=%s desired=%s", a.TaskGroup, a.ID, a.ClientStatus, a.DesiredStatus) - } - allocIDs[a.ID] = true - } - - // Check Consul service health - agentapi := tc.Consul().Agent() - - failing := map[string]*consulapi.AgentCheck{} - for i := 0; i < 60; i++ { - checks, err := agentapi.Checks() - require.NoError(t, err) - - // Filter out checks for other services - for cid, check := range checks { - found := false - for allocID := range allocIDs { - if strings.Contains(check.ServiceID, allocID) { - found = true - break - } - } - - if !found { - delete(checks, cid) - } - } - - // Ensure checks are all passing - failing = map[string]*consulapi.AgentCheck{} - for _, check := range checks { - if check.Status != "passing" { - failing[check.CheckID] = check - break - } - } - - if len(failing) == 0 { - break - } - - t.Logf("still %d checks not passing", len(failing)) - - time.Sleep(time.Second) - } - - require.Len(t, failing, 0, pretty.Sprint(failing)) + allocs := e2eutil.RegisterAndWaitForAllocs(t, tc.Nomad(), demoConnectJob, jobID, "") + allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs) + e2eutil.WaitForAllocsRunning(t, tc.Nomad(), allocIDs) +} + +// TestConnectNativeDemo tests the demo job file used in Connect Native Integration examples. +func (tc *ConnectE2ETest) TestConnectNativeDemo(f *framework.F) { + t := f.T() + + jobID := connectJobID() + tc.jobIds = append(tc.jobIds, jobID) + + allocs := e2eutil.RegisterAndWaitForAllocs(t, tc.Nomad(), demoConnectNativeJob, jobID, "") + allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs) + e2eutil.WaitForAllocsRunning(t, tc.Nomad(), allocIDs) } diff --git a/e2e/connect/input/native-demo.nomad b/e2e/connect/input/native-demo.nomad new file mode 100644 index 000000000..3d0b0f784 --- /dev/null +++ b/e2e/connect/input/native-demo.nomad @@ -0,0 +1,71 @@ +job "cn-demo" { + datacenters = ["dc1"] + + constraint { + attribute = "${attr.kernel.name}" + value = "linux" + } + + group "generator" { + network { + port "api" {} + } + + service { + name = "uuid-api" + port = "${NOMAD_PORT_api}" + task = "generate" + + connect { + native = true + } + } + + task "generate" { + driver = "docker" + + config { + image = "hashicorpnomad/uuid-api:v1" + network_mode = "host" + } + + env { + BIND = "0.0.0.0" + PORT = "${NOMAD_PORT_api}" + } + } + } + + group "frontend" { + network { + port "http" { + static = 9800 + } + } + + service { + name = "uuid-fe" + port = "9800" + task = "frontend" + + connect { + native = true + } + } + + task "frontend" { + driver = "docker" + + config { + image = "hashicorpnomad/uuid-fe:v1" + network_mode = "host" + } + + env { + UPSTREAM = "uuid-api" + BIND = "0.0.0.0" + PORT = "9800" + } + } + } +} \ No newline at end of file diff --git a/e2e/e2eutil/utils.go b/e2e/e2eutil/utils.go index 15ce69461..42d7320ef 100644 --- a/e2e/e2eutil/utils.go +++ b/e2e/e2eutil/utils.go @@ -2,9 +2,11 @@ package e2eutil import ( "fmt" + "strings" "testing" "time" + consulapi "github.com/hashicorp/consul/api" "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/jobspec" @@ -229,3 +231,50 @@ func WaitForDeployment(t *testing.T, nomadClient *api.Client, deployID string, s t.Fatalf("failed to wait on deployment: %v", err) }) } + +// CheckServicesPassing scans for passing agent checks via the given agent API +// client. +// +// Deprecated: not useful in e2e, where more than one node exists and Nomad jobs +// are placed non-deterministically. The Consul agentAPI only knows about what +// is registered on its node, and cannot be used to query for cluster wide state. +func CheckServicesPassing(t *testing.T, agentAPI *consulapi.Agent, allocIDs []string) { + failing := map[string]*consulapi.AgentCheck{} + for i := 0; i < 60; i++ { + checks, err := agentAPI.Checks() + require.NoError(t, err) + + // Filter out checks for other services + for cid, check := range checks { + found := false + for _, allocID := range allocIDs { + if strings.Contains(check.ServiceID, allocID) { + found = true + break + } + } + + if !found { + delete(checks, cid) + } + } + + // Ensure checks are all passing + failing = map[string]*consulapi.AgentCheck{} + for _, check := range checks { + if check.Status != "passing" { + failing[check.CheckID] = check + break + } + } + + if len(failing) == 0 { + break + } + + t.Logf("still %d checks not passing", len(failing)) + + time.Sleep(time.Second) + } + require.Len(t, failing, 0, pretty.Sprint(failing)) +}