vault: add new nomad setup vault -check commmand (#19720)

The new `nomad setup vault -check` commmand can be used to retrieve
information about the changes required before a cluster is migrated from
the deprecated legacy authentication flow with Vault to use only
workload identities.
This commit is contained in:
Luiz Aoqui
2024-01-12 15:48:30 -05:00
committed by GitHub
parent 5b7f4746ce
commit e1e80f383e
16 changed files with 1070 additions and 15 deletions

3
.changelog/19720.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:improvement
cli: Add new option `nomad setup vault -check` to help cluster operators migrate to workload identities for Vault
```

View File

@@ -411,3 +411,63 @@ type LeadershipTransferResponse struct {
WriteMeta
}
// VaultWorkloadIdentityUpgradeCheck is the result of verifying if the cluster
// is ready to switch to workload identities for Vault.
type VaultWorkloadIdentityUpgradeCheck struct {
// JobsWithoutVaultIdentity is the list of jobs that have a `vault` block
// but do not have an `identity` for Vault.
JobsWithoutVaultIdentity []*JobListStub
// OutdatedNodes is the list of nodes running a version of Nomad that does
// not support workload identities for Vault.
OutdatedNodes []*NodeListStub
// VaultTokens is the list of Vault ACL token accessors that Nomad created
// and will no longer manage after the cluster is migrated to workload
// identities.
VaultTokens []*VaultAccessor
}
// Ready returns true if the cluster is ready to migrate to workload identities
// with Vault.
func (v *VaultWorkloadIdentityUpgradeCheck) Ready() bool {
return v != nil &&
len(v.VaultTokens) == 0 &&
len(v.OutdatedNodes) == 0 &&
len(v.JobsWithoutVaultIdentity) == 0
}
// VaultAccessor is a Vault ACL token created by Nomad for a task to access
// Vault using the legacy authentication flow.
type VaultAccessor struct {
// AllocID is the ID of the allocation that requested this token.
AllocID string
// Task is the name of the task that requested this token.
Task string
// NodeID is the ID of the node running the allocation that requested this
// token.
NodeID string
// Accessor is the Vault ACL token accessor ID.
Accessor string
// CreationTTL is the TTL set when the token was created.
CreationTTL int
// CreateIndex is the Raft index when the token was created.
CreateIndex uint64
}
// UpgradeCheckVaultWorkloadIdentity retrieves the cluster status for migrating
// to workload identities with Vault.
func (op *Operator) UpgradeCheckVaultWorkloadIdentity(q *QueryOptions) (*VaultWorkloadIdentityUpgradeCheck, *QueryMeta, error) {
var resp VaultWorkloadIdentityUpgradeCheck
qm, err := op.c.query("/v1/operator/upgrade-check/vault-workload-identity", &resp, q)
if err != nil {
return nil, nil, err
}
return &resp, qm, nil
}

View File

@@ -487,6 +487,7 @@ func (s *HTTPServer) registerHandlers(enableDebug bool) {
s.mux.HandleFunc("/v1/operator/autopilot/configuration", s.wrap(s.OperatorAutopilotConfiguration))
s.mux.HandleFunc("/v1/operator/autopilot/health", s.wrap(s.OperatorServerHealth))
s.mux.HandleFunc("/v1/operator/snapshot", s.wrap(s.SnapshotRequest))
s.mux.HandleFunc("/v1/operator/upgrade-check/", s.wrap(s.UpgradeCheckRequest))
s.mux.HandleFunc("/v1/system/gc", s.wrap(s.GarbageCollectRequest))
s.mux.HandleFunc("/v1/system/reconcile/summaries", s.wrap(s.ReconcileJobSummaries))

View File

@@ -521,3 +521,32 @@ func (s *HTTPServer) snapshotRestoreRequest(resp http.ResponseWriter, req *http.
return nil, codedErr
}
func (s *HTTPServer) UpgradeCheckRequest(resp http.ResponseWriter, req *http.Request) (any, error) {
path := strings.TrimPrefix(req.URL.Path, "/v1/operator/upgrade-check")
switch {
case strings.HasSuffix(path, "/vault-workload-identity"):
return s.upgradeCheckVaultWorkloadIdentity(resp, req)
default:
return nil, CodedError(http.StatusNotFound, fmt.Sprintf("Path %s not found", req.URL.Path))
}
}
func (s *HTTPServer) upgradeCheckVaultWorkloadIdentity(resp http.ResponseWriter, req *http.Request) (any, error) {
if req.Method != http.MethodGet {
return nil, CodedError(405, ErrInvalidMethod)
}
args := structs.UpgradeCheckVaultWorkloadIdentityRequest{}
if s.parse(resp, req, &args.Region, &args.QueryOptions) {
return nil, nil
}
var out structs.UpgradeCheckVaultWorkloadIdentityResponse
if err := s.agent.RPC("Operator.UpgradeCheckVaultWorkloadIdentity", &args, &out); err != nil {
return nil, err
}
setMeta(resp, &out.QueryMeta)
return out, nil
}

View File

@@ -661,3 +661,42 @@ func TestOperator_SnapshotRequests(t *testing.T) {
require.True(t, jobExists())
})
}
func TestOperator_UpgradeCheckRequest_VaultWorkloadIdentity(t *testing.T) {
ci.Parallel(t)
httpTest(t, func(c *Config) {
c.Vaults[0].Enabled = pointer.Of(true)
c.Vaults[0].Name = "default"
}, func(s *TestAgent) {
// Create a test job with a Vault block but without an identity.
job := mock.Job()
job.TaskGroups[0].Tasks[0].Vault = &structs.Vault{
Cluster: "default",
Policies: []string{"test"},
}
args := structs.JobRegisterRequest{
Job: job,
WriteRequest: structs.WriteRequest{Region: "global"},
}
var resp structs.JobRegisterResponse
err := s.Agent.RPC("Job.Register", &args, &resp)
must.NoError(t, err)
// Make HTTP request to retrieve
req, err := http.NewRequest(http.MethodGet, "/v1/operator/upgrade-check/vault-workload-identity", nil)
must.NoError(t, err)
respW := httptest.NewRecorder()
obj, err := s.Server.UpgradeCheckRequest(respW, req)
must.NoError(t, err)
must.NotEq(t, "", respW.Header().Get("X-Nomad-Index"))
must.NotEq(t, "", respW.Header().Get("X-Nomad-LastContact"))
must.Eq(t, "true", respW.Header().Get("X-Nomad-KnownLeader"))
upgradeCheck := obj.(structs.UpgradeCheckVaultWorkloadIdentityResponse)
must.Len(t, 1, upgradeCheck.JobsWithoutVaultIdentity)
must.Len(t, 0, upgradeCheck.VaultTokens)
must.Eq(t, job.ID, upgradeCheck.JobsWithoutVaultIdentity[0].ID)
})
}

View File

@@ -12,6 +12,7 @@ import (
"slices"
"strings"
"github.com/dustin/go-humanize/english"
"github.com/hashicorp/vault/api"
"github.com/mitchellh/cli"
"github.com/posener/complete"
@@ -48,6 +49,12 @@ type SetupVaultCommand struct {
destroy bool
autoYes bool
// Options for -check.
check bool
json bool
tmpl string
verbose bool
}
// Help satisfies the cli.Command Help function.
@@ -62,6 +69,10 @@ Usage: nomad setup vault [options]
VAULT_TOKEN, VAULT_ADDR, and other Vault-related environment variables
as documented in https://developer.hashicorp.com/vault/docs/commands#environment-variables.
The -check option can be used to verify if the Nomad cluster is ready to
migrate to use Workload Identities with Vault. This option requires
operator:read permission for Nomad.
WARNING: This command is an experimental feature and may change its behavior
in future versions of Nomad.
@@ -79,7 +90,22 @@ Setup Vault options:
Automatically answers "yes" to all the questions, making the setup
non-interactive. Defaults to "false".
`
-check
Verify if the Nomad cluster is ready to migrate to Workload Identities.
Setup Vault options when using -check:
-json
Output migration status information in its JSON format.
-t
Format and display migration status information using a Go template.
-verbose
Display full information.
` + generalOptionsUsage(usageOptsDefault|usageOptsNoNamespace)
return strings.TrimSpace(helpText)
}
@@ -89,6 +115,12 @@ func (s *SetupVaultCommand) AutocompleteFlags() complete.Flags {
"-jwks-url": complete.PredictAnything,
"-destroy": complete.PredictSet("true", "false"),
"-y": complete.PredictSet("true", "false"),
// Options for -check.
"-check": complete.PredictSet("true", "false"),
"-json": complete.PredictSet("true", "false"),
"-verbose": complete.PredictSet("true", "false"),
"-t": complete.PredictAnything,
})
}
@@ -110,6 +142,13 @@ func (s *SetupVaultCommand) Run(args []string) int {
flags.BoolVar(&s.destroy, "destroy", false, "")
flags.BoolVar(&s.autoYes, "y", false, "")
flags.StringVar(&s.jwksURL, "jwks-url", "http://localhost:4646/.well-known/jwks.json", "")
// Options for -check.
flags.BoolVar(&s.check, "check", false, "")
flags.BoolVar(&s.json, "json", false, "")
flags.BoolVar(&s.verbose, "verbose", false, "")
flags.StringVar(&s.tmpl, "t", "", "")
if err := flags.Parse(args); err != nil {
return 1
}
@@ -121,6 +160,32 @@ func (s *SetupVaultCommand) Run(args []string) int {
return 1
}
if s.check {
return s.checkUpgrade()
} else {
// Verify that -check flags are not set.
var invalid []string
if s.json {
invalid = append(invalid, "-json")
}
if s.verbose {
invalid = append(invalid, "-verbose")
}
if s.tmpl != "" {
invalid = append(invalid, "-t")
}
if len(invalid) > 0 {
s.Ui.Error(fmt.Sprintf(
"The %s %s can only be used with -check",
english.OxfordWordSeries(invalid, "and"),
english.PluralWord(len(invalid), "option", "options"),
))
s.Ui.Error(commandErrorText(s))
return 1
}
}
if !isTty() && !s.autoYes {
s.Ui.Error("This command requires -y option when running in non-interactive mode")
return 1
@@ -216,7 +281,7 @@ a namespace %q and create all configuration within that namespace.
*/
s.Ui.Output(`
We will now enable the JWT credential backend and create a JWT auth method that
Nomad workloads will use.
Nomad workloads will use.
`)
if s.authMethodExists() {
@@ -606,6 +671,117 @@ func (s *SetupVaultCommand) removeConfiguredComponents() int {
return exitCode
}
func (s *SetupVaultCommand) checkUpgrade() int {
length := shortId
if s.verbose {
length = fullId
}
client, err := s.Meta.Client()
if err != nil {
s.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
return 1
}
resp, _, err := client.Operator().UpgradeCheckVaultWorkloadIdentity(nil)
if err != nil {
s.Ui.Error(fmt.Sprintf("Error querying scheduler configuration: %s", err))
return 1
}
// Output formatted option if requested.
if s.json || len(s.tmpl) > 0 {
out, err := Format(s.json, s.tmpl, resp)
if err != nil {
s.Ui.Error(err.Error())
return 1
}
s.Ui.Output(out)
return 0
}
if resp.Ready() {
s.Ui.Output("Nomad cluster is ready to use workload identities with Vault.")
return 0
}
if len(resp.JobsWithoutVaultIdentity) != 0 {
s.Ui.Output(s.Colorize().Color(`
[bold]Jobs Without Workload Identity for Vault[reset]
The following jobs access Vault but are not configured for workload identity.
You should redeploy them before fully migrating to workload identities with
Vault to prevent unexpected errors if their tokens need to be recreated.
Refer to https://developer.hashicorp.com/nomad/s/vault-workload-identity-migration
for more information.
`))
out := make([]string, len(resp.JobsWithoutVaultIdentity)+1)
out[0] = "ID|Namespace|Type|Status"
for i, job := range resp.JobsWithoutVaultIdentity {
out[i+1] = fmt.Sprintf("%s|%s|%s|%s",
limit(job.ID, length),
job.Namespace,
job.Type,
job.Status,
)
}
s.Ui.Output(formatList(out))
}
if len(resp.OutdatedNodes) != 0 {
s.Ui.Output(s.Colorize().Color(`
[bold]Outdated Nodes[reset]
The following nodes are running a version of Nomad that does not support using
workload identities with Vault.
You should upgrade them to Nomad 1.7 before fully migrating to workload
identities with Vault to prevent unexpected errors if they receive allocations
for jobs that use Vault.
Refer to https://developer.hashicorp.com/nomad/s/vault-workload-identity-migration
for more information.
`))
out := make([]string, len(resp.OutdatedNodes)+1)
out[0] = "ID|Name|Address|Version|Drain|Eligibility|Status"
for i, node := range resp.OutdatedNodes {
out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%v|%s|%s",
limit(node.ID, length),
node.Name,
node.Address,
node.Version,
node.Drain,
node.SchedulingEligibility,
node.Status,
)
}
s.Ui.Output(formatList(out))
}
if len(resp.VaultTokens) != 0 {
s.Ui.Output(s.Colorize().Color(`
[bold]Vault Tokens[reset]
The following Vault ACL tokens were created by Nomad but will not be
automatically revoked after migrating to workload identities. They will expire
once their TTL reaches zero.
`))
out := make([]string, len(resp.VaultTokens)+1)
out[0] = "Accessor ID|Allocation ID|Node ID|Configured TTL"
for i, token := range resp.VaultTokens {
out[i+1] = fmt.Sprintf("%s|%s|%s|%d",
token.Accessor,
limit(token.AllocID, length),
limit(token.NodeID, length),
token.CreationTTL,
)
}
s.Ui.Output(formatList(out))
}
return 0
}
func printMapOfStrings(m map[string]string) string {
var output string

153
command/setup_vault_test.go Normal file
View File

@@ -0,0 +1,153 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package command
import (
"fmt"
"testing"
"github.com/mitchellh/cli"
"github.com/shoenig/test/must"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/command/agent"
"github.com/hashicorp/nomad/helper/pointer"
)
func TestSetupVaultCommand_Run(t *testing.T) {
ci.Parallel(t)
// Start in dev mode so we get a node registration
srv, client, url := testServer(t, true, func(c *agent.Config) {
c.DevMode = true
c.Vaults[0].Name = "default"
c.Vaults[0].Enabled = pointer.Of(true)
})
defer srv.Shutdown()
// Register a job with a vault block but without an identity for Vault.
job := testJob("test")
job.TaskGroups[0].Tasks[0].Vault = &api.Vault{
Cluster: "default",
Policies: []string{"test"},
}
_, _, err := client.Jobs().Register(job, nil)
must.NoError(t, err)
job, _, err = client.Jobs().Info(*job.ID, nil)
must.NoError(t, err)
testCases := []struct {
name string
args []string
expectedErr string
expectedRC int
expectedOut string
}{
{
name: "-check flags",
args: []string{
"-json",
"-t", "{{.}}",
"-verbose",
},
expectedRC: 1,
expectedErr: "The -json, -verbose, and -t options can only be used with -check",
},
{
name: "-check",
args: []string{
"-check",
"-address", url,
},
expectedRC: 0,
expectedOut: `
Jobs Without Workload Identity for Vault
The following jobs access Vault but are not configured for workload identity.
You should redeploy them before fully migrating to workload identities with
Vault to prevent unexpected errors if their tokens need to be recreated.
Refer to https://developer.hashicorp.com/nomad/s/vault-workload-identity-migration
for more information.
ID Namespace Type Status
test default batch pending
`,
},
{
name: "-check with -json",
args: []string{
"-check",
"-json",
"-address", url,
},
expectedRC: 0,
expectedOut: fmt.Sprintf(`{
"JobsWithoutVaultIdentity": [
{
"CreateIndex": 10,
"Datacenters": [
"dc1"
],
"ID": "test",
"JobModifyIndex": %d,
"JobSummary": null,
"ModifyIndex": %d,
"Name": "test",
"Namespace": "default",
"ParameterizedJob": false,
"ParentID": "",
"Periodic": false,
"Priority": 1,
"Status": "pending",
"StatusDescription": "",
"Stop": false,
"SubmitTime": %d,
"Type": "batch"
}
],
"OutdatedNodes": [],
"VaultTokens": []
}
`, *job.CreateIndex, *job.ModifyIndex, *job.SubmitTime),
},
{
name: "-check with -t",
args: []string{
"-check",
"-t", "{{with index .JobsWithoutVaultIdentity 0}}{{.ID}}{{end}}",
"-address", url,
},
expectedRC: 0,
expectedOut: "test\n",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
ui := cli.NewMockUi()
meta := Meta{Ui: ui}
defer func() {
if t.Failed() {
fmt.Println(ui.ErrorWriter.String())
fmt.Println(ui.OutputWriter.String())
}
}()
cmd := &SetupVaultCommand{Meta: meta}
got := cmd.Run(tc.args)
must.Eq(t, tc.expectedRC, got)
if tc.expectedErr != "" {
must.StrContains(t, ui.ErrorWriter.String(), tc.expectedErr)
} else {
must.Eq(t, ui.ErrorWriter.String(), "")
must.Eq(t, ui.OutputWriter.String(), tc.expectedOut)
}
})
}
}

View File

@@ -12,7 +12,9 @@ import (
"time"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-memdb"
"github.com/hashicorp/go-msgpack/codec"
version "github.com/hashicorp/go-version"
"github.com/hashicorp/raft"
"github.com/hashicorp/serf/serf"
@@ -787,6 +789,99 @@ func (op *Operator) snapshotRestore(conn io.ReadWriteCloser) {
encoder.Encode(reply)
}
func (op *Operator) UpgradeCheckVaultWorkloadIdentity(
args *structs.UpgradeCheckVaultWorkloadIdentityRequest,
reply *structs.UpgradeCheckVaultWorkloadIdentityResponse,
) error {
authErr := op.srv.Authenticate(op.ctx, args)
if done, err := op.srv.forward("Operator.UpgradeCheckVaultWorkloadIdentity", args, args, reply); done {
return err
}
op.srv.MeasureRPCRate("operator", structs.RateMetricRead, args)
if authErr != nil {
return structs.ErrPermissionDenied
}
// This action requires operator read access.
rule, err := op.srv.ResolveACL(args)
if err != nil {
return err
} else if rule != nil && !rule.AllowOperatorRead() {
return structs.ErrPermissionDenied
}
state := op.srv.fsm.State()
ws := memdb.NewWatchSet()
// Check for jobs that use Vault but don't have an identity for Vault.
jobsIter, err := state.Jobs(ws)
if err != nil {
return fmt.Errorf("failed to retrieve jobs: %w", err)
}
jobs := []*structs.JobListStub{}
for raw := jobsIter.Next(); raw != nil; raw = jobsIter.Next() {
job := raw.(*structs.Job)
TG_LOOP:
for _, tg := range job.TaskGroups {
for _, t := range tg.Tasks {
if t.Vault == nil {
continue
}
foundWID := false
for _, wid := range t.Identities {
if wid.IsVault() {
foundWID = true
break
}
}
if !foundWID {
jobs = append(jobs, job.Stub(nil, nil))
break TG_LOOP
}
}
}
}
reply.JobsWithoutVaultIdentity = jobs
// Find nodes that don't support workload identities for Vault.
nodesIter, err := state.Nodes(ws)
if err != nil {
return fmt.Errorf("failed to retrieve nodes: %w", err)
}
nodes := []*structs.NodeListStub{}
for raw := nodesIter.Next(); raw != nil; raw = nodesIter.Next() {
node := raw.(*structs.Node)
v, err := version.NewVersion(node.Attributes["nomad.version"])
if err != nil || v.LessThan(structs.MinNomadVersionVaultWID) {
nodes = append(nodes, node.Stub(nil))
continue
}
}
reply.OutdatedNodes = nodes
// Retrieve Vault tokens that were created by Nomad servers.
vaultTokensIter, err := state.VaultAccessors(ws)
if err != nil {
return fmt.Errorf("failed to retrieve Vault token accessors: %w", err)
}
vaultTokens := []*structs.VaultAccessor{}
for raw := vaultTokensIter.Next(); raw != nil; raw = vaultTokensIter.Next() {
vaultTokens = append(vaultTokens, raw.(*structs.VaultAccessor))
}
reply.VaultTokens = vaultTokens
reply.QueryMeta.Index, _ = op.srv.State().LatestIndex()
op.srv.setQueryMeta(&reply.QueryMeta)
return nil
}
func decodeStreamOutput(decoder *codec.Decoder) (io.Reader, <-chan error) {
pr, pw := io.Pipe()
errCh := make(chan error, 1)

View File

@@ -17,6 +17,7 @@ import (
"testing"
"time"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/hashicorp/go-msgpack/codec"
msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc"
"github.com/hashicorp/nomad/acl"
@@ -1188,3 +1189,183 @@ func TestOperator_SnapshotRestore_ACL(t *testing.T) {
})
}
}
func TestOperator_UpgradeCheckRequest_VaultWorkloadIdentity(t *testing.T) {
ci.Parallel(t)
s1, cleanupS1 := TestServer(t, nil)
defer cleanupS1()
testutil.WaitForLeader(t, s1.RPC)
codec := rpcClient(t, s1)
state := s1.fsm.State()
// Register mock nodes, one pre-1.7.
node := mock.Node()
node.Attributes["nomad.version"] = "1.7.2"
err := state.UpsertNode(structs.MsgTypeTestSetup, 1000, node)
must.NoError(t, err)
outdatedNode := mock.Node()
outdatedNode.Attributes["nomad.version"] = "1.6.4"
err = state.UpsertNode(structs.MsgTypeTestSetup, 1001, outdatedNode)
must.NoError(t, err)
// Create non-default namespace.
ns := mock.Namespace()
state.UpsertNamespaces(1002, []*structs.Namespace{ns})
// Register Vault jobs, one with and another without workload identity.
jobNoWID := mock.Job()
jobNoWID.TaskGroups[0].Tasks[0].Vault = &structs.Vault{
Cluster: "default",
Policies: []string{"test"},
}
// Add multiple tasks and groups to make sure we don't have duplicate jobs
// in the result.
jobNoWID.TaskGroups[0].Tasks = append(jobNoWID.TaskGroups[0].Tasks, jobNoWID.TaskGroups[0].Tasks[0].Copy())
jobNoWID.TaskGroups[0].Tasks[1].Name = "task-1"
jobNoWID.TaskGroups = append(jobNoWID.TaskGroups, jobNoWID.TaskGroups[0].Copy())
jobNoWID.TaskGroups[1].Name = "tg-1"
err = state.UpsertJob(structs.MsgTypeTestSetup, 1003, nil, jobNoWID)
must.NoError(t, err)
jobNoWIDNonDefaultNS := mock.Job()
jobNoWIDNonDefaultNS.Namespace = ns.Name
jobNoWIDNonDefaultNS.TaskGroups[0].Tasks[0].Vault = &structs.Vault{
Cluster: "default",
Policies: []string{"test"},
}
err = state.UpsertJob(structs.MsgTypeTestSetup, 1004, nil, jobNoWIDNonDefaultNS)
must.NoError(t, err)
jobWithWID := mock.Job()
jobWithWID.TaskGroups[0].Tasks[0].Vault = &structs.Vault{
Cluster: "default",
}
jobWithWID.TaskGroups[0].Tasks[0].Identities = []*structs.WorkloadIdentity{{
Name: "vault_default",
}}
err = state.UpsertJob(structs.MsgTypeTestSetup, 1005, nil, jobWithWID)
must.NoError(t, err)
// Create allocs for the jobs.
allocJobNoWID := mock.Alloc()
allocJobNoWID.Job = jobNoWID
allocJobNoWID.JobID = jobNoWID.ID
allocJobNoWID.NodeID = node.ID
allocJobWithWID := mock.Alloc()
allocJobWithWID.Job = jobWithWID
allocJobWithWID.JobID = jobWithWID.ID
allocJobWithWID.NodeID = node.ID
err = state.UpsertAllocs(structs.MsgTypeTestSetup, 1006, []*structs.Allocation{allocJobNoWID, allocJobWithWID})
must.NoError(t, err)
// Create Vault token accessor for job without Vault identity and one that
// is no longer used.
tokenJobNoWID := mock.VaultAccessor()
tokenJobNoWID.AllocID = allocJobNoWID.ID
tokenJobNoWID.NodeID = node.ID
tokenUnused := mock.VaultAccessor()
err = state.UpsertVaultAccessor(1007, []*structs.VaultAccessor{tokenJobNoWID, tokenUnused})
must.NoError(t, err)
// Make request.
args := &structs.UpgradeCheckVaultWorkloadIdentityRequest{
QueryOptions: structs.QueryOptions{
Region: "global",
AuthToken: node.SecretID,
},
}
var resp structs.UpgradeCheckVaultWorkloadIdentityResponse
err = msgpackrpc.CallWithCodec(codec, "Operator.UpgradeCheckVaultWorkloadIdentity", args, &resp)
must.NoError(t, err)
must.Eq(t, 1007, resp.Index)
// Verify only jobs without Vault identity are returned.
must.Len(t, 2, resp.JobsWithoutVaultIdentity)
must.SliceContains(t, resp.JobsWithoutVaultIdentity, jobNoWID.Stub(nil, nil), must.Cmp(cmpopts.IgnoreFields(
structs.JobListStub{},
"Status",
"ModifyIndex",
)))
must.SliceContains(t, resp.JobsWithoutVaultIdentity, jobNoWIDNonDefaultNS.Stub(nil, nil), must.Cmp(cmpopts.IgnoreFields(
structs.JobListStub{},
"Status",
"ModifyIndex",
)))
// Verify only outdated nodes are returned.
must.Len(t, 1, resp.OutdatedNodes)
must.SliceContains(t, resp.OutdatedNodes, outdatedNode.Stub(nil))
// Verify Vault ACL tokens are returned.
must.Len(t, 2, resp.VaultTokens)
must.SliceContains(t, resp.VaultTokens, tokenJobNoWID)
must.SliceContains(t, resp.VaultTokens, tokenUnused)
}
func TestOperator_UpgradeCheckRequest_VaultWorkloadIdentity_ACL(t *testing.T) {
ci.Parallel(t)
s1, root, cleanupS1 := TestACLServer(t, nil)
defer cleanupS1()
testutil.WaitForLeader(t, s1.RPC)
codec := rpcClient(t, s1)
state := s1.fsm.State()
// Create test tokens and policies.
allowed := mock.CreatePolicyAndToken(t, state, 1000, "allowed", `operator {policy = "read"}`)
notAllowed := mock.CreatePolicyAndToken(t, state, 1002, "not-allowed", mock.NamespacePolicy("default", "write", nil))
testCases := []struct {
name string
token string
expectedErr string
}{
{
name: "root token is allowed",
token: root.SecretID,
expectedErr: "",
},
{
name: "operator read token is allowed",
token: allowed.SecretID,
expectedErr: "",
},
{
name: "token not allowed",
token: notAllowed.SecretID,
expectedErr: structs.ErrPermissionDenied.Error(),
},
{
name: "missing token not allowed",
token: "",
expectedErr: structs.ErrPermissionDenied.Error(),
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Make request.
args := &structs.UpgradeCheckVaultWorkloadIdentityRequest{
QueryOptions: structs.QueryOptions{
Region: "global",
AuthToken: tc.token,
},
}
var resp structs.UpgradeCheckVaultWorkloadIdentityResponse
err := msgpackrpc.CallWithCodec(codec, "Operator.UpgradeCheckVaultWorkloadIdentity", args, &resp)
if tc.expectedErr == "" {
must.NoError(t, err)
} else {
must.ErrorContains(t, err, tc.expectedErr)
}
})
}
}

View File

@@ -373,3 +373,15 @@ type SnapshotRestoreResponse struct {
QueryMeta
}
type UpgradeCheckVaultWorkloadIdentityRequest struct {
QueryOptions
}
type UpgradeCheckVaultWorkloadIdentityResponse struct {
JobsWithoutVaultIdentity []*JobListStub
OutdatedNodes []*NodeListStub
VaultTokens []*VaultAccessor
QueryMeta
}

View File

@@ -11,6 +11,7 @@ import (
"time"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/go-version"
)
const (
@@ -52,6 +53,12 @@ var (
// validIdentityName is used to validate workload identity Name fields. Must
// be safe to use in filenames.
validIdentityName = regexp.MustCompile("^[a-zA-Z0-9-_]{1,128}$")
// MinNomadVersionVaultWID is the minimum version of Nomad that supports
// workload identities for Vault.
// "-a" is used here so that it is "less than" all pre-release versions of
// Nomad 1.7.0 as well
MinNomadVersionVaultWID = version.Must(version.NewVersion("1.7.0-a"))
)
// WorkloadIdentity is the jobspec block which determines if and how a workload

View File

@@ -0,0 +1,187 @@
---
layout: api
page_title: Upgrade Check - Operator - HTTP API
description: |-
The /operator/upgrade-check endpoints provide tools for verifying the state
of the cluster prior to upgrades.
---
# Upgrade Check Operator HTTP API
The `/operator/upgrade-check` endpoints provide some predefined verifications
that can be useful prior to upgrades and changes to Nomad configuration.
<Note>
These endpoints are meant to target specific releases of Nomad and may be
removed or modified without notice.
</Note>
## Vault Workload Identity
This endpoint retrieves jobs, nodes, and Vault ACL tokens that may be affected
when migrating a Nomad cluster to use [workload identities for
Vault][nomad_acl_vault_wid].
| Method | Path | Produces |
| ------ | ---------------------------------------------------- | ------------------ |
| `GET` | `/v1/operator/upgrade-check/vault-workload-identity` | `application/json` |
The table below shows this endpoint's support for
[blocking queries](/nomad/api-docs#blocking-queries) and
[required ACLs](/nomad/api-docs#acls).
| Blocking Queries | ACL Required |
| ---------------- | --------------- |
| `NO` | `operator:read` |
### Sample Request
```shell-session
$ nomad operator api \
/v1/operator/upgrade-check/vault-workload-identity
```
### Sample Response
```json
{
"Index": 20,
"JobsWithoutVaultIdentity": [
{
"CreateIndex": 11,
"Datacenters": [
"*"
],
"ID": "example",
"JobModifyIndex": 11,
"JobSummary": null,
"ModifyIndex": 19,
"Multiregion": null,
"Name": "example",
"Namespace": "default",
"NodePool": "default",
"ParameterizedJob": false,
"ParentID": "",
"Periodic": false,
"Priority": 50,
"Status": "running",
"StatusDescription": "",
"Stop": false,
"SubmitTime": 1704995322434188000,
"Type": "service"
}
],
"KnownLeader": true,
"LastContact": 0,
"NextToken": "",
"OutdatedNodes": [
{
"Address": "192.168.0.186",
"CreateIndex": 8,
"Datacenter": "dc1",
"Drain": false,
"Drivers": {
"qemu": {
"Attributes": {
"driver.qemu": "true",
"driver.qemu.version": "8.1.1"
},
"Detected": true,
"HealthDescription": "Healthy",
"Healthy": true,
"UpdateTime": "2024-01-11T12:48:35.993541-05:00"
},
"exec": {
"Attributes": {},
"Detected": false,
"HealthDescription": "exec driver unsupported on client OS",
"Healthy": false,
"UpdateTime": "2024-01-11T12:48:35.958495-05:00"
},
"raw_exec": {
"Attributes": {
"driver.raw_exec": "true"
},
"Detected": true,
"HealthDescription": "Healthy",
"Healthy": true,
"UpdateTime": "2024-01-11T12:48:35.958539-05:00"
},
"java": {
"Attributes": {},
"Detected": false,
"HealthDescription": "",
"Healthy": false,
"UpdateTime": "2024-01-11T12:48:35.97141-05:00"
},
"docker": {
"Attributes": {
"driver.docker.bridge_ip": "172.17.0.1",
"driver.docker.runtimes": "io.containerd.runc.v2,runc",
"driver.docker.os_type": "linux",
"driver.docker": "true",
"driver.docker.version": "24.0.7"
},
"Detected": true,
"HealthDescription": "Healthy",
"Healthy": true,
"UpdateTime": "2024-01-11T12:48:35.989993-05:00"
}
},
"HostVolumes": null,
"ID": "049f7683-0cde-727f-428a-913a89f92bd8",
"LastDrain": null,
"ModifyIndex": 10,
"Name": "client-1",
"NodeClass": "",
"NodePool": "default",
"SchedulingEligibility": "eligible",
"Status": "ready",
"StatusDescription": "",
"Version": "1.6.4"
}
],
"VaultTokens": [
{
"Accessor": "czh9MPcRXzAhxBL9XKyb3Kh1",
"AllocID": "f00893d4-d9ef-4937-6a7a-ab495b68a971",
"CreateIndex": 14,
"CreationTTL": 60,
"NodeID": "049f7683-0cde-727f-428a-913a89f92bd8",
"Task": "redis"
}
]
}
```
#### Field Reference
- `JobsWithoutVaultIdentity` `(array<Job>)` - The list of jobs that have a
[`vault`][] block but do not have an [`identity`][] for Vault
authentication. These jobs can fail if they are not redeployed with an
identity for Vault before the configuration for Nomad servers are updated and
their access to Vault is removed.
- `OutdatedNodes` `(array<Node>)` - The list of nodes running a version of
Nomad that does not support workload identity authentication for Vault.
Allocations placed in these nodes will use the deprecated legacy flow to
retrieve Vault tokens. If the Nomad servers configuration is update to remove
their access to Vault before these nodes are upgraded, these allocations will
fail. Allocations that use workload identity for Vault will not be able to be
placed in these nodes until they are upgraded.
- `VaultTokens` `(array<VaultAccessor>)` - The list of Vault ACL tokens created
by Nomad servers using the deprecated legacy flow. They will continue to work
even after the migration to the workload identities, but they may not be
automatically revoked by Nomad and will only expire once their TTL reaches
zero.
Refer to [Migrating to Using Workload Identity with
Vault][nomad_acl_vault_wid_migrate] for more information.
[`identity`]: /nomad/docs/job-specification/identity
[`vault`]: /nomad/docs/job-specification/vault
[nomad_acl_vault_wid]: /nomad/docs/integrations/vault/acl#nomad-workload-identities
[nomad_acl_vault_wid_migrate]: /nomad/docs/integrations/vault/acl#migrating-to-using-workload-identity-with-vault

View File

@@ -14,6 +14,13 @@ This command requires `acl:write` permissions for Vault and respects
`VAULT_TOKEN`, `VAULT_ADDR`, and other [Vault-related environment
variables][vaultenv].
The `-check` option can be used to verify if the Nomad cluster is ready to
migrate to use Workload Identities with Vault. This option requires
`operator:read` permission for Nomad.
Refer to [Migrating to Using Workload Identity with
Vault][nomad_acl_vault_wid_migrate] for more information.
<Warning>
This command is an experimental feature and may change its behavior in future
@@ -38,6 +45,19 @@ nomad setup vault [options]
- `-y`: Automatically answers `yes` to all the questions, making the setup
non-interactive. Defaults to `false`.
- `-check`: Verify if the Nomad cluster is ready to migrate to Workload
Identities.
### Setup Vault Options When Using `-check`:
- `-json`: Output migration status information in its JSON format.
- `-t`: Format and display migration status information using a Go template.
- `-verbose`: Display full information.
@include 'general_options_no_namespace.mdx'
## Examples
Below is an example of an interactive session with default options, interrupted
@@ -145,4 +165,46 @@ services using workload identities.
Run the command again to finish the configuration process.
```
The `-check` option can use to verify if a cluster is ready to migrate to using
workload identities with Vault.
```
$ nomad setup vault -check
Jobs Without Workload Identity for Vault
The following jobs access Vault but are not configured for workload identity.
You should redeploy them before fully migrating to workload identities with
Vault to prevent unexpected errors if their tokens need to be recreated.
Refer to https://developer.hashicorp.com/nomad/s/vault-workload-identity-migration
for more information.
ID Namespace Type Status
example default service running
Outdated Nodes
The following nodes are running a version of Nomad that does not support using
workload identities with Vault.
You should upgrade them to Nomad 1.7 before fully migrating to workload
identities with Vault to prevent unexpected errors if they receive allocations
for jobs that use Vault.
Refer to https://developer.hashicorp.com/nomad/s/vault-workload-identity-migration
for more information.
ID Name Address Version Drain Eligibility Status
049f7683 client-1 192.168.0.186 1.6.4 false eligible ready
Vault Tokens
The following Vault ACL tokens were created by Nomad but will not be
automatically revoked after migrating to workload identities. They will expire
once their TTL reaches zero.
Accessor ID Allocation ID Node ID Configured TTL
czh9MPcRXzAhxBL9XKyb3Kh1 f00893d4 049f7683 60
```
[vaultenv]: /vault/docs/commands#environment-variables
[nomad_acl_vault_wid_migrate]: /nomad/docs/integrations/vault/acl#migrating-to-using-workload-identity-with-vault

View File

@@ -763,25 +763,60 @@ $ VAULT_TOKEN=s.H39hfS7eHSbb1GpkdzOQLTmz.fvuLy nomad job run vault.nomad
Migrating from the legacy (pre-1.7) workflow where workloads use the agent's
Vault token requires configuration on your Vault cluster and your Nomad server
agents. It does not require updating your running Nomad jobs unless you wish to
specify a non-default role. To migrate:
agents.
Once the migration is fully complete, Nomad server will no longer have access
to Vault, as it was required in the deprecated legacy workflow. This also means
that they will no longer be able to fulfill some of their responsibilities from
the legacy workflow, such as generating and revoking Vault ACL tokens.
Before removing Vault connectivity configuration from Nomad servers, you must
make sure the rest of the cluster is ready to support workload identities for
Vault. You can run the [`nomad setup vault -check`][nomad_cli_setup_vault]
command to verify what changes are still necessary.
Before removing Nomad servers access to Vault you must:
* Redeploy the jobs listed in the section `Jobs Without Workload Identity for
Vault` with an identity for Vault. You can specify this identity [directly
in the job][jobspec_identity_vault] or redeploy the job without changes to
use the default value from the server [`vault.default_identity`][]
configuration if set.
* Upgrade nodes listed in the section `Outdated Nodes` to a version of Nomad
above 1.7.0.
There is not action required for the Vault ACL tokens listed under `Vault
Tokens`. Nomad will revoke them as you redeploy jobs to use workload identities
but there may be some leftovers. You can still proceed with the migration
process, but Nomad will not revoke them once access to Vault is removed from
Nomad servers. They will expire once their TTL reaches zero, or you may
manually revoke them if they are no longer needed by an allocation.
The migration process can happen over time. As long as all servers are upgraded
to Nomad 1.7+ and still retain access to Vault, jobs can still use either the
new workload identity flow or the deprecated legacy flow.
To summarize the migration process:
* Create the Vault auth method, default role, and policies on your Vault
cluster.
* Enable [`vault.default_identity`][] blocks in your Nomad server agent
configurations, but **do not modify any of the existing Vault
configuration**.
* Upgrade your cluster following the documented [Upgrade
Process][docs_upgrade].
* Resubmit Nomad jobs that need access to Vault to redeploy them with a new
workload identity for Vault.
* (Optionally) Add [`vault.role`][] fields to any Nomad jobs that will not
use the default role.
* (Optionally) add [`identity`][] blocks to your jobs if you want to use a
different identity because of how your auth method and roles are
configured.
* Once all jobs have been resubmitted, you may remove parameters no longer used
by the Nomad server agents from the [`vault`][config] configuration block.
* Create the Vault auth method, default role, and policies on your Vault
cluster.
* Run the `nomad setup vault -check` command to verify if the cluster is ready
to migrate to workload identity access to Vault.
* Resubmit Nomad jobs that need access to Vault to redeploy them with a new
workload identity for Vault.
* (Optionally) Add [`vault.role`][] fields to any Nomad jobs that will not
use the default role.
* (Optionally) add [`identity`][] blocks to your jobs if you want to use a
different identity because of how your auth method and roles are
configured.
* Upgrade any remaining clients to Nomad 1.7+.
* Remove parameters no longer used by the Nomad server agents from the
[`vault`][config] configuration block.
[Variables]: /nomad/docs/concepts/variables
[Vault Namespaces]: /vault/docs/enterprise/namespaces

View File

@@ -132,6 +132,10 @@
{
"title": "Snapshot",
"path": "operator/snapshot"
},
{
"title": "Upgrade Check",
"path": "operator/upgrade-check"
}
]
},

View File

@@ -29,6 +29,17 @@ module.exports = [
permanent: true,
},
*/
/**
* /s/* redirects for useful links that need a stable URL but we may need to
* change its destination in the future.
*/
{
source: '/nomad/s/vault-workload-identity-migration',
destination:
'https://developer.hashicorp.com/nomad/docs/integrations/vault/acl#migrating-to-using-workload-identity-with-vault',
permanent: false,
},
// Rename and re-arrange Autoscaling Internals section
{
source: '/nomad/tools/autoscaling/internals/:path*',