Files
nomad/client/allocrunner/taskrunner/sids_hook.go
Tim Gross df67e74615 Consul: add preflight checks for Envoy bootstrap (#23381)
Nomad creates Consul ACL tokens and service registrations to support Consul
service mesh workloads, before bootstrapping the Envoy proxy. Nomad always talks
to the local Consul agent and never directly to the Consul servers. But the
local Consul agent talks to the Consul servers in stale consistency mode to
reduce load on the servers. This can result in the Nomad client making the Envoy
bootstrap request with a tokens or services that have not yet replicated to the
follower that the local client is connected to. This request gets a 404 on the
ACL token and that negative entry gets cached, preventing any retries from
succeeding.

To workaround this, we'll use a method described by our friends over on
`consul-k8s` where after creating the objects in Consul we try to read them from
the local agent in stale consistency mode (which prevents a failed read from
being cached). This cannot completely eliminate this source of error because
it's possible that Consul cluster replication is unhealthy at the time we need
it, but this should make Envoy bootstrap significantly more robust.

This changset adds preflight checks for the objects we create in Consul:
* We add a preflight check for ACL tokens after we login via via Workload
  Identity and in the function we use to derive tokens in the legacy
  workflow. We do this check early because we also want to use this token for
  registering group services in the allocrunner hooks.
* We add a preflight check for services right before we bootstrap Envoy in the
  taskrunner hook, so that we have time for our service client to batch updates
  to the local Consul agent in addition to the local agent sync.

We've added the timeouts to be configurable via node metadata rather than the
usual static configuration because for most cases, users should not need to
touch or even know these values are configurable; the configuration is mostly
available for testing.


Fixes: https://github.com/hashicorp/nomad/issues/9307
Fixes: https://github.com/hashicorp/nomad/issues/10451
Fixes: https://github.com/hashicorp/nomad/issues/20516

Ref: https://github.com/hashicorp/consul-k8s/pull/887
Ref: https://hashicorp.atlassian.net/browse/NET-10051
Ref: https://hashicorp.atlassian.net/browse/NET-9273
Follow-up: https://hashicorp.atlassian.net/browse/NET-10138
2024-06-27 10:15:37 -04:00

315 lines
9.2 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package taskrunner
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"sync"
"time"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
ti "github.com/hashicorp/nomad/client/allocrunner/taskrunner/interfaces"
"github.com/hashicorp/nomad/client/consul"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/nomad/structs"
)
const (
// the name of this hook, used in logs
sidsHookName = "consul_si_token"
// sidsBackoffBaseline is the baseline time for exponential backoff when
// attempting to retrieve a Consul SI token
sidsBackoffBaseline = 5 * time.Second
// sidsBackoffLimit is the limit of the exponential backoff when attempting
// to retrieve a Consul SI token
sidsBackoffLimit = 3 * time.Minute
// sidsDerivationTimeout limits the amount of time we may spend trying to
// derive a SI token. If the hook does not get a token within this amount of
// time, the result is a failure.
sidsDerivationTimeout = 5 * time.Minute
// sidsTokenFile is the name of the file holding the Consul SI token inside
// the task's secret directory
sidsTokenFile = "si_token"
// sidsTokenFilePerms is the level of file permissions granted on the file
// in the secrets directory for the task
sidsTokenFilePerms = 0440
)
type sidsHookConfig struct {
alloc *structs.Allocation
task *structs.Task
sidsClient consul.ServiceIdentityAPI
lifecycle ti.TaskLifecycle
logger hclog.Logger
allocHookResources *cstructs.AllocHookResources
}
// Service Identities hook for managing SI tokens of connect enabled tasks.
type sidsHook struct {
// alloc is the allocation
alloc *structs.Allocation
// taskName is the name of the task
task *structs.Task
// sidsClient is the Consul client [proxy] for requesting SI tokens
sidsClient consul.ServiceIdentityAPI
// lifecycle is used to signal, restart, and kill a task
lifecycle ti.TaskLifecycle
// derivationTimeout is the amount of time we may wait for Consul to successfully
// provide a SI token. Making this configurable for testing, otherwise
// default to sidsDerivationTimeout
derivationTimeout time.Duration
// logger is used to log
logger hclog.Logger
// lock variables that can be manipulated after hook creation
lock sync.Mutex
// firstRun keeps track of whether the hook is being called for the first
// time (for this task) during the lifespan of the Nomad Client process.
firstRun bool
// allocHookResources gives us access to Consul tokens that may have been
// set by the consul_hook
allocHookResources *cstructs.AllocHookResources
}
func newSIDSHook(c sidsHookConfig) *sidsHook {
return &sidsHook{
alloc: c.alloc,
task: c.task,
sidsClient: c.sidsClient,
lifecycle: c.lifecycle,
derivationTimeout: sidsDerivationTimeout,
logger: c.logger.Named(sidsHookName),
firstRun: true,
allocHookResources: c.allocHookResources,
}
}
func (h *sidsHook) Name() string {
return sidsHookName
}
func (h *sidsHook) Prestart(
ctx context.Context,
req *interfaces.TaskPrestartRequest,
resp *interfaces.TaskPrestartResponse) error {
h.lock.Lock()
defer h.lock.Unlock()
// do nothing if we have already done things
if h.earlyExit() {
resp.Done = true
return nil
}
// optimistically try to recover token from disk
token, err := h.recoverToken(req.TaskDir.SecretsDir)
if err != nil {
return err
}
// if we're using Workload Identities then this Connect task should already
// have a token stored under the cluster + service ID.
tokens := h.allocHookResources.GetConsulTokens()
// Find the group-level service that this task belongs to
tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup)
serviceName := h.task.Kind.Value()
var serviceIdentityName string
var cluster string
for _, service := range tg.Services {
if service.Name == serviceName {
serviceIdentityName = service.MakeUniqueIdentityName()
cluster = service.GetConsulClusterName(tg)
break
}
}
if cluster != "" && serviceIdentityName != "" {
if token, ok := tokens[cluster][serviceIdentityName]; ok {
if err := h.writeToken(req.TaskDir.SecretsDir, token.SecretID); err != nil {
return err
}
resp.Done = true
return nil
}
}
// COMPAT(1.9): this code path exists only to support the legacy (non-WI)
// workflow. remove for 1.9.0.
if token == "" {
if token, err = h.deriveSIToken(ctx); err != nil {
return err
}
if err := h.writeToken(req.TaskDir.SecretsDir, token); err != nil {
return err
}
}
h.logger.Info("derived SI token", "task", h.task.Name, "si_task", h.task.Kind.Value())
resp.Done = true
return nil
}
// earlyExit returns true if the Prestart hook has already been executed during
// the instantiation of this task runner.
//
// assumes h is locked
func (h *sidsHook) earlyExit() bool {
if h.firstRun {
h.firstRun = false
return false
}
return true
}
// writeToken writes token into the secrets directory for the task.
func (h *sidsHook) writeToken(dir string, token string) error {
tokenPath := filepath.Join(dir, sidsTokenFile)
if err := os.WriteFile(tokenPath, []byte(token), sidsTokenFilePerms); err != nil {
return fmt.Errorf("failed to write SI token: %w", err)
}
return nil
}
// recoverToken returns the token saved to disk in the secrets directory for the
// task if it exists, or the empty string if the file does not exist. an error
// is returned only for some other (e.g. disk IO) error.
func (h *sidsHook) recoverToken(dir string) (string, error) {
tokenPath := filepath.Join(dir, sidsTokenFile)
token, err := os.ReadFile(tokenPath)
if err != nil {
if !os.IsNotExist(err) {
h.logger.Error("failed to recover SI token", "error", err)
return "", fmt.Errorf("failed to recover SI token: %w", err)
}
h.logger.Trace("no pre-existing SI token to recover", "task", h.task.Name)
return "", nil // token file does not exist yet
}
h.logger.Trace("recovered pre-existing SI token", "task", h.task.Name)
return string(token), nil
}
// siDerivationResult is used to pass along the result of attempting to derive
// an SI token between the goroutine doing the derivation and its caller
type siDerivationResult struct {
token string
err error
}
// deriveSIToken spawns and waits on a goroutine which will make attempts to
// derive an SI token until a token is successfully created, or ctx is signaled
// done.
func (h *sidsHook) deriveSIToken(ctx context.Context) (string, error) {
ctx, cancel := context.WithTimeout(ctx, h.derivationTimeout)
defer cancel()
resultCh := make(chan siDerivationResult)
// keep trying to get the token in the background
go h.tryDerive(ctx, resultCh)
// wait until we get a token, or we get a signal to quit
for {
select {
case result := <-resultCh:
if result.err != nil {
h.logger.Error("failed to derive SI token", "error", result.err)
h.kill(ctx, fmt.Errorf("failed to derive SI token: %w", result.err))
return "", result.err
}
return result.token, nil
case <-ctx.Done():
return "", ctx.Err()
}
}
}
func (h *sidsHook) kill(ctx context.Context, reason error) {
if err := h.lifecycle.Kill(ctx,
structs.NewTaskEvent(structs.TaskKilling).
SetFailsTask().
SetDisplayMessage(reason.Error()),
); err != nil {
h.logger.Error("failed to kill task", "kill_reason", reason, "error", err)
}
}
// tryDerive loops forever until a token is created, or ctx is done.
func (h *sidsHook) tryDerive(ctx context.Context, ch chan<- siDerivationResult) {
for attempt := 0; backoff(ctx, attempt); attempt++ {
tokens, err := h.sidsClient.DeriveSITokens(ctx, h.alloc, []string{h.task.Name})
switch {
case err == nil:
token, exists := tokens[h.task.Name]
if !exists {
err := errors.New("response does not include token for task")
h.logger.Error("derive SI token is missing token for task", "error", err, "task", h.task.Name)
ch <- siDerivationResult{token: "", err: err}
return
}
ch <- siDerivationResult{token: token, err: nil}
return
case structs.IsServerSide(err):
// the error is known to be a server problem, just die
h.logger.Error("failed to derive SI token", "error", err, "task", h.task.Name, "server_side", true)
ch <- siDerivationResult{token: "", err: err}
return
case !structs.IsRecoverable(err):
// the error is known not to be recoverable, just die
h.logger.Error("failed to derive SI token", "error", err, "task", h.task.Name, "recoverable", false)
ch <- siDerivationResult{token: "", err: err}
return
default:
// the error is marked recoverable, retry after some backoff
h.logger.Error("failed attempt to derive SI token", "error", err, "recoverable", true)
}
}
}
func backoff(ctx context.Context, attempt int) bool {
next := computeBackoff(attempt)
select {
case <-ctx.Done():
return false
case <-time.After(next):
return true
}
}
func computeBackoff(attempt int) time.Duration {
switch attempt {
case 0:
return 0
case 1:
// go fast on first retry, because a unit test should be fast
return 100 * time.Millisecond
default:
wait := time.Duration(attempt) * sidsBackoffBaseline
if wait > sidsBackoffLimit {
wait = sidsBackoffLimit
}
return wait
}
}