mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
e2e: Migrate legacy Vault token based workflow to workload ID (#25139)
Nomad 1.10.0 is removing the legacy Vault token based workflow which means the legacy e2e compatibility tests will fail and not work. The Nomad e2e cluster was using the legacy Vault token based workflow for initial cluster build. This change migrates to using the workload identity flow which utilizes authentication methods, roles, and policies. The Nomad server network has been modified to allow traffic from the HCP Vault HVN which is a private network peered into our AWS account. This is required, so that Vault can pull JWKS information from the Nomad API without going over the public internet. The cluster build will now also configure a Vault KV v2 mount at a unique indentifier for the e2e cluster. This allows all Nomad workloads and tests to use this if required. The vaultsecrets suite has been updated to accommodate the new changes and extended to test the default workload ID flow for allocations which use Vault for secrets.
This commit is contained in:
@@ -125,3 +125,14 @@ You can update the `nomad_version` variable, or simply rebuild the binary you
|
||||
have at the `nomad_local_binary` path so that Terraform picks up the
|
||||
changes. Then run `terraform plan`/`terraform apply` again. This will update
|
||||
Nomad in place, making the minimum amount of changes necessary.
|
||||
|
||||
### ...Use Vault within a Test
|
||||
|
||||
The infrastructure build enables a Vault KV2 mount whose mount point matches the value of the
|
||||
`CLUSTER_UNIQUE_IDENTIFIER` environment variable and is generated
|
||||
[here](https://github.com/hashicorp/nomad/blob/687335639bc6d4d522c91d6026d9e3f149aa75dc/e2e/terraform/provision-infra/main.tf#L16).
|
||||
|
||||
All Nomad workloads which include a
|
||||
[Vault block](https://developer.hashicorp.com/nomad/docs/job-specification/vault) will be granted
|
||||
access to secrets according to the
|
||||
[default policy document](./terraform/provision-infra/templates/vault-acl-jwt-policy-nomad-workloads.hcl.tpl).
|
||||
|
||||
@@ -42,6 +42,7 @@ cd ./hcp-vault-auth
|
||||
terraform init
|
||||
terraform apply --auto-approve
|
||||
$(terraform output --raw environment)
|
||||
cd ../
|
||||
```
|
||||
|
||||
Optionally, edit the `terraform.tfvars` file to change the number of
|
||||
|
||||
@@ -11,42 +11,80 @@ data "hcp_vault_cluster" "e2e_shared_vault" {
|
||||
cluster_id = var.hcp_vault_cluster_id
|
||||
}
|
||||
|
||||
# Vault policy for the Nomad cluster, which allows it to mint derived tokens for
|
||||
# tasks. It's interpolated with the random cluster name to avoid collisions
|
||||
# between concurrent E2E clusters
|
||||
resource "vault_policy" "nomad" {
|
||||
name = "${local.random_name}-nomad-server"
|
||||
policy = templatefile("${path.module}/provision-nomad/etc/acls/vault/nomad-policy.hcl", {
|
||||
role = "nomad-tasks-${local.random_name}"
|
||||
// Use stable naming formatting, so that e2e tests can rely on the
|
||||
// CLUSTER_UNIQUE_IDENTIFIER env var to re-build these names when they need to.
|
||||
//
|
||||
// If these change, downstream tests will need to be updated as well, most
|
||||
// notably vaultsecrets.
|
||||
locals {
|
||||
workload_identity_path = "jwt-nomad-${local.random_name}"
|
||||
workload_identity_role = "jwt-nomad-${local.random_name}-workloads"
|
||||
workload_identity_policy = "jwt-nomad-${local.random_name}-workloads"
|
||||
}
|
||||
|
||||
// The authentication backed is used by Nomad to generated workload identities
|
||||
// for allocations.
|
||||
//
|
||||
// Nomad is running TLS, so we must pass the CA and HTTPS endpoint. Due to
|
||||
// limitations within Vault at the moment, the Nomad TLS configuration must set
|
||||
// "verify_https_client=false". Vault will return an error without this when
|
||||
// writing the auth backend.
|
||||
resource "vault_jwt_auth_backend" "nomad_cluster" {
|
||||
depends_on = [null_resource.bootstrap_nomad_acls]
|
||||
default_role = local.workload_identity_role
|
||||
jwks_url = "https://${aws_instance.server[0].private_ip}:4646/.well-known/jwks.json"
|
||||
jwks_ca_pem = tls_self_signed_cert.ca.cert_pem
|
||||
jwt_supported_algs = ["RS256"]
|
||||
path = local.workload_identity_path
|
||||
}
|
||||
|
||||
// This is our default role for the nomad JWT authentication backend within
|
||||
// Vault.
|
||||
resource "vault_jwt_auth_backend_role" "nomad_cluster" {
|
||||
backend = vault_jwt_auth_backend.nomad_cluster.path
|
||||
bound_audiences = ["vault.io"]
|
||||
role_name = local.workload_identity_role
|
||||
role_type = "jwt"
|
||||
token_period = 1800
|
||||
token_policies = [local.workload_identity_policy]
|
||||
token_type = "service"
|
||||
user_claim = "/nomad_job_id"
|
||||
user_claim_json_pointer = true
|
||||
|
||||
claim_mappings = {
|
||||
nomad_namespace = "nomad_namespace"
|
||||
nomad_job_id = "nomad_job_id"
|
||||
nomad_task = "nomad_task"
|
||||
}
|
||||
}
|
||||
|
||||
// Enable a KV secrets backend using the generated name for the path, so that
|
||||
// multiple clusters can run simultaneously and that failed destroys do not
|
||||
// impact subsequent runs.
|
||||
resource "vault_mount" "nomad_cluster" {
|
||||
path = local.random_name
|
||||
type = "kv"
|
||||
options = { version = "2" }
|
||||
}
|
||||
|
||||
// This Vault policy is linked from default Nomad WI auth backend role and uses
|
||||
// Nomad's documented default policy for workloads as an outline. It grants
|
||||
// access to the KV path enabled above, making it available to all e2e tests by
|
||||
// default.
|
||||
resource "vault_policy" "nomad-workloads" {
|
||||
name = local.workload_identity_policy
|
||||
policy = templatefile("${path.module}/templates/vault-acl-jwt-policy-nomad-workloads.hcl.tpl", {
|
||||
AUTH_METHOD_ACCESSOR = vault_jwt_auth_backend.nomad_cluster.accessor
|
||||
MOUNT = local.random_name
|
||||
})
|
||||
}
|
||||
|
||||
resource "vault_token" "nomad" {
|
||||
policies = [vault_policy.nomad.name]
|
||||
no_parent = true
|
||||
renewable = true
|
||||
ttl = "72h"
|
||||
}
|
||||
|
||||
# The default role that Nomad will use for derived tokens. It's not allowed
|
||||
# access to nomad-policy so that it can only mint tokens for tasks, not for new
|
||||
# clusters
|
||||
resource "vault_token_auth_backend_role" "nomad_cluster" {
|
||||
role_name = "nomad-tasks-${local.random_name}"
|
||||
disallowed_policies = [vault_policy.nomad.name]
|
||||
orphan = true
|
||||
token_period = "259200"
|
||||
renewable = true
|
||||
token_max_ttl = "0"
|
||||
}
|
||||
|
||||
# Nomad agent configuration for Vault
|
||||
resource "local_sensitive_file" "nomad_config_for_vault" {
|
||||
content = templatefile("${path.module}/provision-nomad/etc/nomad.d/vault.hcl", {
|
||||
token = vault_token.nomad.client_token
|
||||
url = data.hcp_vault_cluster.e2e_shared_vault.vault_private_endpoint_url
|
||||
namespace = var.hcp_vault_namespace
|
||||
role = "nomad-tasks-${local.random_name}"
|
||||
jwt_auth_backend_path = local.workload_identity_path
|
||||
url = data.hcp_vault_cluster.e2e_shared_vault.vault_private_endpoint_url
|
||||
namespace = var.hcp_vault_namespace
|
||||
})
|
||||
filename = "${local.uploads_dir}/shared/nomad.d/vault.hcl"
|
||||
file_permission = "0600"
|
||||
|
||||
@@ -54,6 +54,15 @@ resource "aws_security_group" "servers" {
|
||||
cidr_blocks = [local.ingress_cidr]
|
||||
}
|
||||
|
||||
# Nomad HTTP access from the HashiCorp Cloud virtual network CIDR. This is
|
||||
# used for the workload identity authentication method JWKS callback.
|
||||
ingress {
|
||||
from_port = 4646
|
||||
to_port = 4646
|
||||
protocol = "tcp"
|
||||
cidr_blocks = [var.hcp_hvn_cidr]
|
||||
}
|
||||
|
||||
# Nomad HTTP and RPC from clients
|
||||
ingress {
|
||||
from_port = 4646
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
# Allow creating tokens under "nomad-tasks" role. The role name should be
|
||||
# updated if "nomad-tasks" is not used.
|
||||
path "auth/token/create/${role}" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
||||
# Allow looking up "${role}" role. The role name should be updated if
|
||||
# "${role}" is not used.
|
||||
path "auth/token/roles/${role}" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
|
||||
# Allow looking up the token passed to Nomad to validate the token has the
|
||||
# proper capabilities. This is provided by the "default" policy.
|
||||
path "auth/token/lookup-self" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
|
||||
# Allow looking up incoming tokens to validate they have permissions to access
|
||||
# the tokens they are requesting. This is only required if
|
||||
# `allow_unauthenticated` is set to false.
|
||||
path "auth/token/lookup" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
||||
# Allow revoking tokens that should no longer exist. This allows revoking
|
||||
# tokens for dead tasks.
|
||||
path "auth/token/revoke-accessor" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
||||
# Allow checking the capabilities of our own token. This is used to validate the
|
||||
# token upon startup.
|
||||
path "sys/capabilities-self" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
||||
# Allow our own token to be renewed.
|
||||
path "auth/token/renew-self" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
@@ -10,5 +10,5 @@ tls {
|
||||
key_file = "/etc/nomad.d/tls/agent.key"
|
||||
|
||||
verify_server_hostname = true
|
||||
verify_https_client = true
|
||||
verify_https_client = false
|
||||
}
|
||||
|
||||
@@ -2,10 +2,13 @@
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
vault {
|
||||
enabled = true
|
||||
address = "${url}"
|
||||
task_token_ttl = "1h"
|
||||
create_from_role = "${role}"
|
||||
namespace = "${namespace}"
|
||||
token = "${token}"
|
||||
enabled = true
|
||||
address = "${url}"
|
||||
namespace = "${namespace}"
|
||||
jwt_auth_backend_path = "${jwt_auth_backend_path}/"
|
||||
|
||||
default_identity {
|
||||
aud = ["vault.io"]
|
||||
ttl = "1h"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
path "${MOUNT}/data/{{identity.entity.aliases.${AUTH_METHOD_ACCESSOR}.metadata.nomad_namespace}}/{{identity.entity.aliases.${AUTH_METHOD_ACCESSOR}.metadata.nomad_job_id}}/*" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
|
||||
path "${MOUNT}/data/{{identity.entity.aliases.${AUTH_METHOD_ACCESSOR}.metadata.nomad_namespace}}/{{identity.entity.aliases.${AUTH_METHOD_ACCESSOR}.metadata.nomad_job_id}}" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
|
||||
path "${MOUNT}/metadata/{{identity.entity.aliases.${AUTH_METHOD_ACCESSOR}.metadata.nomad_namespace}}/*" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
|
||||
path "${MOUNT}/metadata/*" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
@@ -79,7 +79,6 @@ variable "volumes" {
|
||||
default = true
|
||||
}
|
||||
|
||||
|
||||
variable "hcp_vault_cluster_id" {
|
||||
description = "The ID of the HCP Vault cluster"
|
||||
type = string
|
||||
@@ -92,6 +91,12 @@ variable "hcp_vault_namespace" {
|
||||
default = "admin"
|
||||
}
|
||||
|
||||
variable "hcp_hvn_cidr" {
|
||||
description = "The CIDR block of the HVN peered into the account."
|
||||
type = string
|
||||
default = "172.25.16.0/20"
|
||||
}
|
||||
|
||||
variable "aws_kms_alias" {
|
||||
description = "The alias for the AWS KMS key ID"
|
||||
type = string
|
||||
|
||||
@@ -4,4 +4,11 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
|
||||
required_providers {
|
||||
vault = {
|
||||
source = "hashicorp/vault"
|
||||
version = "4.6.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,16 +11,6 @@ const (
|
||||
jwtPath = "nomad_jwt"
|
||||
)
|
||||
|
||||
// roleLegacy is the legacy recommendation for nomad cluster role.
|
||||
var roleLegacy = map[string]interface{}{
|
||||
"disallowed_policies": "nomad-server",
|
||||
"explicit_max_ttl": 0, // use old name for vault compatibility
|
||||
"name": "nomad-cluster",
|
||||
"orphan": false,
|
||||
"period": 259200, // use old name for vault compatibility
|
||||
"renewable": true,
|
||||
}
|
||||
|
||||
// authConfigJWT is the configuration for the JWT auth method used by Nomad.
|
||||
func authConfigJWT(jwksURL string) map[string]any {
|
||||
return map[string]any{
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
job "cat" {
|
||||
type = "batch"
|
||||
group "testcase" {
|
||||
task "cat" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "cat"
|
||||
args = ["${NOMAD_SECRETS_DIR}/vault_token"]
|
||||
}
|
||||
|
||||
vault {
|
||||
policies = ["default"]
|
||||
}
|
||||
}
|
||||
|
||||
restart {
|
||||
attempts = 0
|
||||
mode = "fail"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
path "auth/token/create/nomad-cluster" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
||||
path "auth/token/roles/nomad-cluster" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
|
||||
path "auth/token/lookup-self" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
|
||||
path "auth/token/lookup" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
||||
path "auth/token/revoke-accessor" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
||||
path "sys/capabilities-self" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
||||
path "auth/token/renew-self" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
@@ -26,16 +26,6 @@ func usable(v, minimum *version.Version) bool {
|
||||
}
|
||||
}
|
||||
|
||||
func testVaultLegacy(t *testing.T, b build) {
|
||||
vStop, vc := startVault(t, b)
|
||||
defer vStop()
|
||||
setupVaultLegacy(t, vc)
|
||||
|
||||
nStop, nc := startNomad(t, configureNomadVaultLegacy(vc))
|
||||
defer nStop()
|
||||
runJob(t, nc, "input/cat.hcl", "default", validateLegacyAllocs)
|
||||
}
|
||||
|
||||
func testVaultJWT(t *testing.T, b build) {
|
||||
vStop, vc := startVault(t, b)
|
||||
defer vStop()
|
||||
|
||||
@@ -21,7 +21,6 @@ import (
|
||||
goversion "github.com/hashicorp/go-version"
|
||||
"github.com/hashicorp/nomad/api"
|
||||
nomadapi "github.com/hashicorp/nomad/api"
|
||||
"github.com/hashicorp/nomad/helper/pointer"
|
||||
"github.com/hashicorp/nomad/helper/testlog"
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
@@ -65,9 +64,6 @@ func testVaultBuild(t *testing.T, b build) {
|
||||
must.NoError(t, err)
|
||||
|
||||
t.Run("vault("+b.Version+")", func(t *testing.T) {
|
||||
t.Run("legacy", func(t *testing.T) {
|
||||
testVaultLegacy(t, b)
|
||||
})
|
||||
|
||||
if version.GreaterThanOrEqual(minJWTVersion) {
|
||||
t.Run("jwt", func(t *testing.T) {
|
||||
@@ -80,16 +76,6 @@ func testVaultBuild(t *testing.T, b build) {
|
||||
})
|
||||
}
|
||||
|
||||
func validateLegacyAllocs(allocs []*nomadapi.AllocationListStub) error {
|
||||
if n := len(allocs); n != 1 {
|
||||
return fmt.Errorf("expected 1 alloc, got %d", n)
|
||||
}
|
||||
if s := allocs[0].ClientStatus; s != "complete" {
|
||||
return fmt.Errorf("expected alloc status complete, got %s", s)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateJWTAllocs(allocs []*nomadapi.AllocationListStub) error {
|
||||
if n := len(allocs); n != 2 {
|
||||
return fmt.Errorf("expected 2 allocs, got %d", n)
|
||||
@@ -181,27 +167,6 @@ func startVault(t *testing.T, b build) (func(), *vaultapi.Client) {
|
||||
return vlt.Stop, vlt.Client
|
||||
}
|
||||
|
||||
func setupVaultLegacy(t *testing.T, vc *vaultapi.Client) {
|
||||
policy, err := os.ReadFile("input/policy_legacy.hcl")
|
||||
must.NoError(t, err)
|
||||
|
||||
sys := vc.Sys()
|
||||
must.NoError(t, sys.PutPolicy("nomad-server", string(policy)))
|
||||
|
||||
log := vc.Logical()
|
||||
log.Write("auth/token/roles/nomad-cluster", roleLegacy)
|
||||
|
||||
token := vc.Auth().Token()
|
||||
secret, err := token.Create(&vaultapi.TokenCreateRequest{
|
||||
Policies: []string{"nomad-server"},
|
||||
Period: "72h",
|
||||
NoParent: true,
|
||||
})
|
||||
must.NoError(t, err, must.Sprint("failed to create vault token"))
|
||||
must.NotNil(t, secret)
|
||||
must.NotNil(t, secret.Auth)
|
||||
}
|
||||
|
||||
func setupVaultJWT(t *testing.T, vc *vaultapi.Client, jwksURL string) {
|
||||
logical := vc.Logical()
|
||||
sys := vc.Sys()
|
||||
@@ -278,18 +243,6 @@ func startNomad(t *testing.T, cb func(*testutil.TestServerConfig)) (func(), *nom
|
||||
return ts.Stop, nc
|
||||
}
|
||||
|
||||
func configureNomadVaultLegacy(vc *vaultapi.Client) func(*testutil.TestServerConfig) {
|
||||
return func(c *testutil.TestServerConfig) {
|
||||
c.Vaults = []*testutil.VaultConfig{{
|
||||
Enabled: true,
|
||||
Address: vc.Address(),
|
||||
Token: vc.Token(),
|
||||
Role: "nomad-cluster",
|
||||
AllowUnauthenticated: pointer.Of(true),
|
||||
}}
|
||||
}
|
||||
}
|
||||
|
||||
func configureNomadVaultJWT(vc *vaultapi.Client) func(*testutil.TestServerConfig) {
|
||||
return func(c *testutil.TestServerConfig) {
|
||||
c.Vaults = []*testutil.VaultConfig{{
|
||||
|
||||
19
e2e/vaultsecrets/input/acl-role.json
Normal file
19
e2e/vaultsecrets/input/acl-role.json
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"role_type": "jwt",
|
||||
"bound_audiences": ["vault.io"],
|
||||
"bound_claims": {
|
||||
"nomad_namespace": "vault-secrets",
|
||||
"nomad_job_id": "secrets"
|
||||
},
|
||||
"user_claim": "/nomad_job_id",
|
||||
"user_claim_json_pointer": true,
|
||||
"claim_mappings": {
|
||||
"nomad_namespace": "nomad_namespace",
|
||||
"nomad_job_id": "nomad_job_id",
|
||||
"nomad_task": "nomad_task"
|
||||
},
|
||||
"token_type": "service",
|
||||
"token_policies": ["POLICYID"],
|
||||
"token_period": "30m",
|
||||
"token_explicit_max_ttl": 0
|
||||
}
|
||||
40
e2e/vaultsecrets/input/default_wi.nomad.hcl
Normal file
40
e2e/vaultsecrets/input/default_wi.nomad.hcl
Normal file
@@ -0,0 +1,40 @@
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
job "default_wi" {
|
||||
|
||||
constraint {
|
||||
attribute = "${attr.kernel.name}"
|
||||
value = "linux"
|
||||
}
|
||||
|
||||
group "group" {
|
||||
|
||||
task "task" {
|
||||
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "busybox:1"
|
||||
command = "/bin/sh"
|
||||
args = ["-c", "sleep 300"]
|
||||
}
|
||||
|
||||
vault {}
|
||||
|
||||
template {
|
||||
data = <<EOT
|
||||
E2E_SECRET={{ with secret "SECRET_PATH" }}{{- .Data.data.key -}}{{end}}
|
||||
EOT
|
||||
|
||||
destination = "${NOMAD_SECRETS_DIR}/secret.txt"
|
||||
env = true
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 128
|
||||
memory = 64
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -26,7 +26,7 @@ job "secrets" {
|
||||
}
|
||||
|
||||
vault {
|
||||
policies = ["access-secrets-TESTID"]
|
||||
role = "TESTID"
|
||||
}
|
||||
|
||||
template {
|
||||
@@ -53,6 +53,5 @@ EOT
|
||||
memory = 64
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@@ -9,12 +9,14 @@ import (
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
e2e "github.com/hashicorp/nomad/e2e/e2eutil"
|
||||
"github.com/hashicorp/nomad/e2e/v3/cluster3"
|
||||
"github.com/hashicorp/nomad/e2e/v3/jobs3"
|
||||
"github.com/hashicorp/nomad/e2e/v3/namespaces3"
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
@@ -26,6 +28,87 @@ import (
|
||||
const ns = "vault-secrets"
|
||||
|
||||
func TestVaultSecrets(t *testing.T) {
|
||||
cluster3.Establish(t,
|
||||
cluster3.Leader(),
|
||||
cluster3.LinuxClients(1),
|
||||
cluster3.Timeout(10*time.Second),
|
||||
)
|
||||
|
||||
// Create a Nomad namespace to run test jobs within and then execute them.
|
||||
// Any tests that wants a custom Nomad namespace should handle that itself.
|
||||
t.Cleanup(namespaces3.Create(t, ns))
|
||||
|
||||
t.Run("defaultWID", testDefaultWI)
|
||||
t.Run("nonDefaultWID", testNonDefaultWI)
|
||||
}
|
||||
|
||||
func testDefaultWI(t *testing.T) {
|
||||
|
||||
// Lookup the cluster ID which is the KV backend path start.
|
||||
clusterID, found := os.LookupEnv("CLUSTER_UNIQUE_IDENTIFIER")
|
||||
if !found {
|
||||
t.Fatal("CLUSTER_UNIQUE_IDENTIFIER env var not set")
|
||||
}
|
||||
|
||||
// Generate our pathing for Vault and a secret value that we will check as
|
||||
// part of the test.
|
||||
secretCLIPath := filepath.Join(ns, "default_wi", "config")
|
||||
secretFullPath := filepath.Join(clusterID, "data", secretCLIPath)
|
||||
secretValue := uuid.Generate()
|
||||
|
||||
// Create the secret at the correct mount point for this E2E cluster and use
|
||||
// the metadata delete command to permanently delete this when the test
|
||||
// exits.
|
||||
e2e.MustCommand(t, "vault kv put -mount=%s %s key=%s", clusterID, secretCLIPath, secretValue)
|
||||
e2e.CleanupCommand(t, "vault kv metadata delete -mount=%s %s", clusterID, secretCLIPath)
|
||||
|
||||
// Use a stable job ID, otherwise there is a chicken-and-egg problem with
|
||||
// the job submission generation of the job ID and ensuring the template
|
||||
// lookup uses the correct job ID.
|
||||
submission, cleanJob := jobs3.Submit(t,
|
||||
"./input/default_wi.nomad.hcl",
|
||||
jobs3.DisableRandomJobID(),
|
||||
jobs3.Namespace(ns),
|
||||
jobs3.Detach(),
|
||||
jobs3.ReplaceInJobSpec("SECRET_PATH", secretFullPath),
|
||||
)
|
||||
t.Cleanup(cleanJob)
|
||||
|
||||
// Ensure the placed allocation reaches the running state. If the test fails
|
||||
// here, it's likely due to permissions or pathing of the secret errors.
|
||||
must.NoError(
|
||||
t,
|
||||
e2e.WaitForAllocStatusExpected(submission.JobID(), ns, []string{"running"}),
|
||||
must.Sprint("expected running allocation"),
|
||||
)
|
||||
|
||||
// Read the written Vault WI and read secret within the allocations secrets
|
||||
// directory.
|
||||
waitForAllocSecret(t, submission, "/secrets/vault_token", "hvs.")
|
||||
waitForAllocSecret(t, submission, "/secrets/secret.txt", secretValue)
|
||||
|
||||
// Ensure both the Vault WI token and the read secret are exported within
|
||||
// the task env and desired.
|
||||
var (
|
||||
vaultTokenRE = regexp.MustCompile(`VAULT_TOKEN=(.*)`)
|
||||
vaultSecretRE = regexp.MustCompile(`E2E_SECRET=(.*)`)
|
||||
)
|
||||
|
||||
envList := submission.Exec("group", "task", []string{"env"})
|
||||
|
||||
must.NotNil(
|
||||
t,
|
||||
vaultTokenRE.FindStringSubmatch(envList.Stdout),
|
||||
must.Sprintf("could not find VAULT_TOKEN, got:%v\n", envList.Stdout),
|
||||
)
|
||||
must.NotNil(
|
||||
t,
|
||||
vaultSecretRE.FindStringSubmatch(envList.Stdout),
|
||||
must.Sprintf("could not find E2E_SECRET, got:%v\n", envList.Stdout),
|
||||
)
|
||||
}
|
||||
|
||||
func testNonDefaultWI(t *testing.T) {
|
||||
// use a random suffix to encapsulate test keys, polices, etc.
|
||||
// for cleanup from vault
|
||||
testID := uuid.Generate()[0:8]
|
||||
@@ -36,8 +119,6 @@ func TestVaultSecrets(t *testing.T) {
|
||||
pkiCertIssue := pkiPath + "/issue/nomad"
|
||||
policyID := "access-secrets-" + testID
|
||||
|
||||
t.Cleanup(namespaces3.Create(t, ns))
|
||||
|
||||
// configure KV secrets engine
|
||||
// Note: the secret key is written to 'secret-###/myapp' but the kv2 API
|
||||
// for Vault implicitly turns that into 'secret-###/data/myapp' so we
|
||||
@@ -59,12 +140,18 @@ func TestVaultSecrets(t *testing.T) {
|
||||
"max_ttl=1m", pkiPath)
|
||||
e2e.MustCommand(t, "vault secrets tune -max-lease-ttl=1m %s", pkiPath)
|
||||
|
||||
// we can't set an empty policy in our job, so write a bogus policy that
|
||||
// doesn't have access to any of the paths we're using
|
||||
// Create an ACL role which links to our custom ACL policy which will be
|
||||
// assigned to the allocation via the Vault block. In order to test that
|
||||
// access permissions can be updated via the policy, the ACL role must be
|
||||
// valid.
|
||||
writeRole(t, policyID, testID, "./input/acl-role.json")
|
||||
writePolicy(t, policyID, "./input/policy-bad.hcl", testID)
|
||||
|
||||
// In order to write the Vault ACL role before job submission, we need a
|
||||
// stable job ID.
|
||||
submission, cleanJob := jobs3.Submit(t,
|
||||
"./input/secrets.nomad",
|
||||
"./input/non-default_wi.nomad.hcl",
|
||||
jobs3.DisableRandomJobID(),
|
||||
jobs3.Namespace(ns),
|
||||
jobs3.Detach(),
|
||||
jobs3.ReplaceInJobSpec("TESTID", testID),
|
||||
@@ -114,7 +201,7 @@ func TestVaultSecrets(t *testing.T) {
|
||||
renderedCert := waitForAllocSecret(t, submission, "/secrets/certificate.crt", "BEGIN CERTIFICATE")
|
||||
waitForAllocSecret(t, submission, "/secrets/access.key", secretValue)
|
||||
|
||||
// record the earliest we can guaranteee that the vault lease TTL has
|
||||
// record the earliest we can guarantee that the vault lease TTL has
|
||||
// started, so we don't have to wait excessively later on
|
||||
ttlStart := time.Now()
|
||||
|
||||
@@ -145,7 +232,6 @@ func TestVaultSecrets(t *testing.T) {
|
||||
|
||||
// secret will *not* be renewed because it doesn't have a lease to expire
|
||||
waitForAllocSecret(t, submission, "/secrets/access.key", secretValue)
|
||||
|
||||
}
|
||||
|
||||
// We need to namespace the keys in the policy, so read it in and replace the
|
||||
@@ -176,6 +262,43 @@ func writePolicy(t *testing.T, policyID, policyPath, testID string) {
|
||||
e2e.CleanupCommand(t, "vault policy delete %s", policyID)
|
||||
}
|
||||
|
||||
func writeRole(t *testing.T, policyID, testID, rolePath string) {
|
||||
t.Helper()
|
||||
|
||||
// The configured e2e workload identity auth backend uses the cluster ID
|
||||
// to allow for concurrent clusters. Without this, we cannot build the auth
|
||||
// role path to write the role to.
|
||||
clusterID, found := os.LookupEnv("CLUSTER_UNIQUE_IDENTIFIER")
|
||||
if !found {
|
||||
t.Fatal("CLUSTER_UNIQUE_IDENTIFIER env var not set")
|
||||
}
|
||||
|
||||
authMethodName := "jwt-nomad-" + clusterID
|
||||
authRolePath := filepath.Join("auth", authMethodName, "role", testID)
|
||||
|
||||
raw, err := os.ReadFile(rolePath)
|
||||
must.NoError(t, err)
|
||||
|
||||
roleDoc := string(raw)
|
||||
roleDoc = strings.ReplaceAll(roleDoc, "POLICYID", policyID)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, "vault", "write", authRolePath, "-")
|
||||
stdin, err := cmd.StdinPipe()
|
||||
must.NoError(t, err)
|
||||
|
||||
go func() {
|
||||
defer stdin.Close()
|
||||
_, err := io.WriteString(stdin, roleDoc)
|
||||
test.NoError(t, err)
|
||||
}()
|
||||
|
||||
out, err := cmd.CombinedOutput()
|
||||
must.NoError(t, err, must.Sprintf("error writing role, output: %s", out))
|
||||
e2e.CleanupCommand(t, "vault delete %s", authRolePath)
|
||||
}
|
||||
|
||||
// waitForAllocSecret is similar to e2e.WaitForAllocFile but uses `alloc exec`
|
||||
// to be able to read the secrets dir, which is not available to `alloc fs`
|
||||
func waitForAllocSecret(t *testing.T, sub *jobs3.Submission, path string, expect string) string {
|
||||
|
||||
Reference in New Issue
Block a user