mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
Nomad creates Consul ACL tokens and service registrations to support Consul service mesh workloads, before bootstrapping the Envoy proxy. Nomad always talks to the local Consul agent and never directly to the Consul servers. But the local Consul agent talks to the Consul servers in stale consistency mode to reduce load on the servers. This can result in the Nomad client making the Envoy bootstrap request with a tokens or services that have not yet replicated to the follower that the local client is connected to. This request gets a 404 on the ACL token and that negative entry gets cached, preventing any retries from succeeding. To workaround this, we'll use a method described by our friends over on `consul-k8s` where after creating the objects in Consul we try to read them from the local agent in stale consistency mode (which prevents a failed read from being cached). This cannot completely eliminate this source of error because it's possible that Consul cluster replication is unhealthy at the time we need it, but this should make Envoy bootstrap significantly more robust. This changset adds preflight checks for the objects we create in Consul: * We add a preflight check for ACL tokens after we login via via Workload Identity and in the function we use to derive tokens in the legacy workflow. We do this check early because we also want to use this token for registering group services in the allocrunner hooks. * We add a preflight check for services right before we bootstrap Envoy in the taskrunner hook, so that we have time for our service client to batch updates to the local Consul agent in addition to the local agent sync. We've added the timeouts to be configurable via node metadata rather than the usual static configuration because for most cases, users should not need to touch or even know these values are configurable; the configuration is mostly available for testing. Fixes: https://github.com/hashicorp/nomad/issues/9307 Fixes: https://github.com/hashicorp/nomad/issues/10451 Fixes: https://github.com/hashicorp/nomad/issues/20516 Ref: https://github.com/hashicorp/consul-k8s/pull/887 Ref: https://hashicorp.atlassian.net/browse/NET-10051 Ref: https://hashicorp.atlassian.net/browse/NET-9273 Follow-up: https://hashicorp.atlassian.net/browse/NET-10138
315 lines
9.2 KiB
Go
315 lines
9.2 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: BUSL-1.1
|
|
|
|
package taskrunner
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/hashicorp/go-hclog"
|
|
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
|
|
ti "github.com/hashicorp/nomad/client/allocrunner/taskrunner/interfaces"
|
|
"github.com/hashicorp/nomad/client/consul"
|
|
cstructs "github.com/hashicorp/nomad/client/structs"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
)
|
|
|
|
const (
|
|
// the name of this hook, used in logs
|
|
sidsHookName = "consul_si_token"
|
|
|
|
// sidsBackoffBaseline is the baseline time for exponential backoff when
|
|
// attempting to retrieve a Consul SI token
|
|
sidsBackoffBaseline = 5 * time.Second
|
|
|
|
// sidsBackoffLimit is the limit of the exponential backoff when attempting
|
|
// to retrieve a Consul SI token
|
|
sidsBackoffLimit = 3 * time.Minute
|
|
|
|
// sidsDerivationTimeout limits the amount of time we may spend trying to
|
|
// derive a SI token. If the hook does not get a token within this amount of
|
|
// time, the result is a failure.
|
|
sidsDerivationTimeout = 5 * time.Minute
|
|
|
|
// sidsTokenFile is the name of the file holding the Consul SI token inside
|
|
// the task's secret directory
|
|
sidsTokenFile = "si_token"
|
|
|
|
// sidsTokenFilePerms is the level of file permissions granted on the file
|
|
// in the secrets directory for the task
|
|
sidsTokenFilePerms = 0440
|
|
)
|
|
|
|
type sidsHookConfig struct {
|
|
alloc *structs.Allocation
|
|
task *structs.Task
|
|
sidsClient consul.ServiceIdentityAPI
|
|
lifecycle ti.TaskLifecycle
|
|
logger hclog.Logger
|
|
allocHookResources *cstructs.AllocHookResources
|
|
}
|
|
|
|
// Service Identities hook for managing SI tokens of connect enabled tasks.
|
|
type sidsHook struct {
|
|
// alloc is the allocation
|
|
alloc *structs.Allocation
|
|
|
|
// taskName is the name of the task
|
|
task *structs.Task
|
|
|
|
// sidsClient is the Consul client [proxy] for requesting SI tokens
|
|
sidsClient consul.ServiceIdentityAPI
|
|
|
|
// lifecycle is used to signal, restart, and kill a task
|
|
lifecycle ti.TaskLifecycle
|
|
|
|
// derivationTimeout is the amount of time we may wait for Consul to successfully
|
|
// provide a SI token. Making this configurable for testing, otherwise
|
|
// default to sidsDerivationTimeout
|
|
derivationTimeout time.Duration
|
|
|
|
// logger is used to log
|
|
logger hclog.Logger
|
|
|
|
// lock variables that can be manipulated after hook creation
|
|
lock sync.Mutex
|
|
// firstRun keeps track of whether the hook is being called for the first
|
|
// time (for this task) during the lifespan of the Nomad Client process.
|
|
firstRun bool
|
|
|
|
// allocHookResources gives us access to Consul tokens that may have been
|
|
// set by the consul_hook
|
|
allocHookResources *cstructs.AllocHookResources
|
|
}
|
|
|
|
func newSIDSHook(c sidsHookConfig) *sidsHook {
|
|
return &sidsHook{
|
|
alloc: c.alloc,
|
|
task: c.task,
|
|
sidsClient: c.sidsClient,
|
|
lifecycle: c.lifecycle,
|
|
derivationTimeout: sidsDerivationTimeout,
|
|
logger: c.logger.Named(sidsHookName),
|
|
firstRun: true,
|
|
allocHookResources: c.allocHookResources,
|
|
}
|
|
}
|
|
|
|
func (h *sidsHook) Name() string {
|
|
return sidsHookName
|
|
}
|
|
|
|
func (h *sidsHook) Prestart(
|
|
ctx context.Context,
|
|
req *interfaces.TaskPrestartRequest,
|
|
resp *interfaces.TaskPrestartResponse) error {
|
|
|
|
h.lock.Lock()
|
|
defer h.lock.Unlock()
|
|
|
|
// do nothing if we have already done things
|
|
if h.earlyExit() {
|
|
resp.Done = true
|
|
return nil
|
|
}
|
|
|
|
// optimistically try to recover token from disk
|
|
token, err := h.recoverToken(req.TaskDir.SecretsDir)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// if we're using Workload Identities then this Connect task should already
|
|
// have a token stored under the cluster + service ID.
|
|
tokens := h.allocHookResources.GetConsulTokens()
|
|
|
|
// Find the group-level service that this task belongs to
|
|
tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup)
|
|
serviceName := h.task.Kind.Value()
|
|
var serviceIdentityName string
|
|
var cluster string
|
|
for _, service := range tg.Services {
|
|
if service.Name == serviceName {
|
|
serviceIdentityName = service.MakeUniqueIdentityName()
|
|
cluster = service.GetConsulClusterName(tg)
|
|
break
|
|
}
|
|
}
|
|
if cluster != "" && serviceIdentityName != "" {
|
|
if token, ok := tokens[cluster][serviceIdentityName]; ok {
|
|
if err := h.writeToken(req.TaskDir.SecretsDir, token.SecretID); err != nil {
|
|
return err
|
|
}
|
|
resp.Done = true
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// COMPAT(1.9): this code path exists only to support the legacy (non-WI)
|
|
// workflow. remove for 1.9.0.
|
|
if token == "" {
|
|
if token, err = h.deriveSIToken(ctx); err != nil {
|
|
return err
|
|
}
|
|
if err := h.writeToken(req.TaskDir.SecretsDir, token); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
h.logger.Info("derived SI token", "task", h.task.Name, "si_task", h.task.Kind.Value())
|
|
|
|
resp.Done = true
|
|
return nil
|
|
}
|
|
|
|
// earlyExit returns true if the Prestart hook has already been executed during
|
|
// the instantiation of this task runner.
|
|
//
|
|
// assumes h is locked
|
|
func (h *sidsHook) earlyExit() bool {
|
|
if h.firstRun {
|
|
h.firstRun = false
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
// writeToken writes token into the secrets directory for the task.
|
|
func (h *sidsHook) writeToken(dir string, token string) error {
|
|
tokenPath := filepath.Join(dir, sidsTokenFile)
|
|
if err := os.WriteFile(tokenPath, []byte(token), sidsTokenFilePerms); err != nil {
|
|
return fmt.Errorf("failed to write SI token: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// recoverToken returns the token saved to disk in the secrets directory for the
|
|
// task if it exists, or the empty string if the file does not exist. an error
|
|
// is returned only for some other (e.g. disk IO) error.
|
|
func (h *sidsHook) recoverToken(dir string) (string, error) {
|
|
tokenPath := filepath.Join(dir, sidsTokenFile)
|
|
token, err := os.ReadFile(tokenPath)
|
|
if err != nil {
|
|
if !os.IsNotExist(err) {
|
|
h.logger.Error("failed to recover SI token", "error", err)
|
|
return "", fmt.Errorf("failed to recover SI token: %w", err)
|
|
}
|
|
h.logger.Trace("no pre-existing SI token to recover", "task", h.task.Name)
|
|
return "", nil // token file does not exist yet
|
|
}
|
|
h.logger.Trace("recovered pre-existing SI token", "task", h.task.Name)
|
|
return string(token), nil
|
|
}
|
|
|
|
// siDerivationResult is used to pass along the result of attempting to derive
|
|
// an SI token between the goroutine doing the derivation and its caller
|
|
type siDerivationResult struct {
|
|
token string
|
|
err error
|
|
}
|
|
|
|
// deriveSIToken spawns and waits on a goroutine which will make attempts to
|
|
// derive an SI token until a token is successfully created, or ctx is signaled
|
|
// done.
|
|
func (h *sidsHook) deriveSIToken(ctx context.Context) (string, error) {
|
|
ctx, cancel := context.WithTimeout(ctx, h.derivationTimeout)
|
|
defer cancel()
|
|
|
|
resultCh := make(chan siDerivationResult)
|
|
|
|
// keep trying to get the token in the background
|
|
go h.tryDerive(ctx, resultCh)
|
|
|
|
// wait until we get a token, or we get a signal to quit
|
|
for {
|
|
select {
|
|
case result := <-resultCh:
|
|
if result.err != nil {
|
|
h.logger.Error("failed to derive SI token", "error", result.err)
|
|
h.kill(ctx, fmt.Errorf("failed to derive SI token: %w", result.err))
|
|
return "", result.err
|
|
}
|
|
return result.token, nil
|
|
case <-ctx.Done():
|
|
return "", ctx.Err()
|
|
}
|
|
}
|
|
}
|
|
|
|
func (h *sidsHook) kill(ctx context.Context, reason error) {
|
|
if err := h.lifecycle.Kill(ctx,
|
|
structs.NewTaskEvent(structs.TaskKilling).
|
|
SetFailsTask().
|
|
SetDisplayMessage(reason.Error()),
|
|
); err != nil {
|
|
h.logger.Error("failed to kill task", "kill_reason", reason, "error", err)
|
|
}
|
|
}
|
|
|
|
// tryDerive loops forever until a token is created, or ctx is done.
|
|
func (h *sidsHook) tryDerive(ctx context.Context, ch chan<- siDerivationResult) {
|
|
for attempt := 0; backoff(ctx, attempt); attempt++ {
|
|
|
|
tokens, err := h.sidsClient.DeriveSITokens(ctx, h.alloc, []string{h.task.Name})
|
|
|
|
switch {
|
|
case err == nil:
|
|
token, exists := tokens[h.task.Name]
|
|
if !exists {
|
|
err := errors.New("response does not include token for task")
|
|
h.logger.Error("derive SI token is missing token for task", "error", err, "task", h.task.Name)
|
|
ch <- siDerivationResult{token: "", err: err}
|
|
return
|
|
}
|
|
ch <- siDerivationResult{token: token, err: nil}
|
|
return
|
|
case structs.IsServerSide(err):
|
|
// the error is known to be a server problem, just die
|
|
h.logger.Error("failed to derive SI token", "error", err, "task", h.task.Name, "server_side", true)
|
|
ch <- siDerivationResult{token: "", err: err}
|
|
return
|
|
case !structs.IsRecoverable(err):
|
|
// the error is known not to be recoverable, just die
|
|
h.logger.Error("failed to derive SI token", "error", err, "task", h.task.Name, "recoverable", false)
|
|
ch <- siDerivationResult{token: "", err: err}
|
|
return
|
|
|
|
default:
|
|
// the error is marked recoverable, retry after some backoff
|
|
h.logger.Error("failed attempt to derive SI token", "error", err, "recoverable", true)
|
|
}
|
|
}
|
|
}
|
|
|
|
func backoff(ctx context.Context, attempt int) bool {
|
|
next := computeBackoff(attempt)
|
|
select {
|
|
case <-ctx.Done():
|
|
return false
|
|
case <-time.After(next):
|
|
return true
|
|
}
|
|
}
|
|
|
|
func computeBackoff(attempt int) time.Duration {
|
|
switch attempt {
|
|
case 0:
|
|
return 0
|
|
case 1:
|
|
// go fast on first retry, because a unit test should be fast
|
|
return 100 * time.Millisecond
|
|
default:
|
|
wait := time.Duration(attempt) * sidsBackoffBaseline
|
|
if wait > sidsBackoffLimit {
|
|
wait = sidsBackoffLimit
|
|
}
|
|
return wait
|
|
}
|
|
}
|