Files
nomad/client/allocrunner/consul_hook.go
Tim Gross df67e74615 Consul: add preflight checks for Envoy bootstrap (#23381)
Nomad creates Consul ACL tokens and service registrations to support Consul
service mesh workloads, before bootstrapping the Envoy proxy. Nomad always talks
to the local Consul agent and never directly to the Consul servers. But the
local Consul agent talks to the Consul servers in stale consistency mode to
reduce load on the servers. This can result in the Nomad client making the Envoy
bootstrap request with a tokens or services that have not yet replicated to the
follower that the local client is connected to. This request gets a 404 on the
ACL token and that negative entry gets cached, preventing any retries from
succeeding.

To workaround this, we'll use a method described by our friends over on
`consul-k8s` where after creating the objects in Consul we try to read them from
the local agent in stale consistency mode (which prevents a failed read from
being cached). This cannot completely eliminate this source of error because
it's possible that Consul cluster replication is unhealthy at the time we need
it, but this should make Envoy bootstrap significantly more robust.

This changset adds preflight checks for the objects we create in Consul:
* We add a preflight check for ACL tokens after we login via via Workload
  Identity and in the function we use to derive tokens in the legacy
  workflow. We do this check early because we also want to use this token for
  registering group services in the allocrunner hooks.
* We add a preflight check for services right before we bootstrap Envoy in the
  taskrunner hook, so that we have time for our service client to batch updates
  to the local Consul agent in addition to the local agent sync.

We've added the timeouts to be configurable via node metadata rather than the
usual static configuration because for most cases, users should not need to
touch or even know these values are configurable; the configuration is mostly
available for testing.


Fixes: https://github.com/hashicorp/nomad/issues/9307
Fixes: https://github.com/hashicorp/nomad/issues/10451
Fixes: https://github.com/hashicorp/nomad/issues/20516

Ref: https://github.com/hashicorp/consul-k8s/pull/887
Ref: https://hashicorp.atlassian.net/browse/NET-10051
Ref: https://hashicorp.atlassian.net/browse/NET-9273
Follow-up: https://hashicorp.atlassian.net/browse/NET-10138
2024-06-27 10:15:37 -04:00

306 lines
8.7 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package allocrunner
import (
"context"
"fmt"
consulapi "github.com/hashicorp/consul/api"
log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/consul"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/client/taskenv"
"github.com/hashicorp/nomad/client/widmgr"
"github.com/hashicorp/nomad/nomad/structs"
structsc "github.com/hashicorp/nomad/nomad/structs/config"
)
type consulHook struct {
alloc *structs.Allocation
allocdir allocdir.Interface
widmgr widmgr.IdentityManager
consulConfigs map[string]*structsc.ConsulConfig
consulClientConstructor consul.ConsulClientFunc
hookResources *cstructs.AllocHookResources
envBuilder *taskenv.Builder
logger log.Logger
shutdownCtx context.Context
shutdownCancelFn context.CancelFunc
}
type consulHookConfig struct {
alloc *structs.Allocation
allocdir allocdir.Interface
widmgr widmgr.IdentityManager
// consulConfigs is a map of cluster names to Consul configs
consulConfigs map[string]*structsc.ConsulConfig
// consulClientConstructor injects the function that will return a consul
// client (eases testing)
consulClientConstructor consul.ConsulClientFunc
// hookResources is used for storing and retrieving Consul tokens
hookResources *cstructs.AllocHookResources
// envBuilder is used to interpolate services
envBuilder func() *taskenv.Builder
logger log.Logger
}
func newConsulHook(cfg consulHookConfig) *consulHook {
shutdownCtx, shutdownCancelFn := context.WithCancel(context.Background())
h := &consulHook{
alloc: cfg.alloc,
allocdir: cfg.allocdir,
widmgr: cfg.widmgr,
consulConfigs: cfg.consulConfigs,
consulClientConstructor: cfg.consulClientConstructor,
hookResources: cfg.hookResources,
envBuilder: cfg.envBuilder(),
shutdownCtx: shutdownCtx,
shutdownCancelFn: shutdownCancelFn,
}
h.logger = cfg.logger.Named(h.Name())
return h
}
func (*consulHook) Name() string {
return "consul"
}
func (h *consulHook) Prerun() error {
job := h.alloc.Job
if job == nil {
// this is always a programming error
err := fmt.Errorf("alloc %v does not have a job", h.alloc.Name)
h.logger.Error(err.Error())
return err
}
// tokens are a map of Consul cluster to identity name to Consul ACL token.
tokens := map[string]map[string]*consulapi.ACLToken{}
tg := job.LookupTaskGroup(h.alloc.TaskGroup)
if tg == nil { // this is always a programming error
return fmt.Errorf("alloc %v does not have a valid task group", h.alloc.Name)
}
var mErr *multierror.Error
if err := h.prepareConsulTokensForServices(tg.Services, tg, tokens, h.envBuilder.Build()); err != nil {
mErr = multierror.Append(mErr, err)
}
for _, task := range tg.Tasks {
h.envBuilder.UpdateTask(h.alloc, task)
if err := h.prepareConsulTokensForServices(task.Services, tg, tokens, h.envBuilder.Build()); err != nil {
mErr = multierror.Append(mErr, err)
}
if err := h.prepareConsulTokensForTask(task, tg, tokens); err != nil {
mErr = multierror.Append(mErr, err)
}
}
if err := mErr.ErrorOrNil(); err != nil {
revokeErr := h.revokeTokens(tokens)
mErr = multierror.Append(mErr, revokeErr)
return mErr.ErrorOrNil()
}
// write the tokens to hookResources
h.hookResources.SetConsulTokens(tokens)
return nil
}
func (h *consulHook) prepareConsulTokensForTask(task *structs.Task, tg *structs.TaskGroup, tokens map[string]map[string]*consulapi.ACLToken) error {
if task == nil {
// programming error
return fmt.Errorf("cannot prepare consul tokens, no task specified")
}
clusterName := task.GetConsulClusterName(tg)
consulConfig, ok := h.consulConfigs[clusterName]
if !ok {
return fmt.Errorf("no such consul cluster: %s", clusterName)
}
// Find task workload identity for Consul.
widName := fmt.Sprintf("%s_%s", structs.ConsulTaskIdentityNamePrefix, consulConfig.Name)
wid := task.GetIdentity(widName)
if wid == nil {
// Skip task if it doesn't have an identity for Consul since it doesn't
// need a token.
return nil
}
// Find signed workload identity.
ti := *task.IdentityHandle(wid)
jwt, err := h.widmgr.Get(ti)
if err != nil {
return fmt.Errorf("error getting signed identity for task %s: %v", task.Name, err)
}
// Derive token for task.
req := consul.JWTLoginRequest{
JWT: jwt.JWT,
AuthMethodName: consulConfig.TaskIdentityAuthMethod,
Meta: map[string]string{
"requested_by": fmt.Sprintf("nomad_task_%s", task.Name),
},
}
token, err := h.getConsulToken(consulConfig.Name, req)
if err != nil {
return fmt.Errorf("failed to derive Consul token for task %s: %v", task.Name, err)
}
// Store token in results.
if _, ok = tokens[clusterName]; !ok {
tokens[clusterName] = make(map[string]*consulapi.ACLToken)
}
tokenName := widName + "/" + task.Name
tokens[clusterName][tokenName] = token
return nil
}
func (h *consulHook) prepareConsulTokensForServices(services []*structs.Service, tg *structs.TaskGroup, tokens map[string]map[string]*consulapi.ACLToken, env *taskenv.TaskEnv) error {
if len(services) == 0 {
return nil
}
var mErr *multierror.Error
for _, service := range services {
// Exit early if service doesn't need a Consul token.
if service == nil || !service.IsConsul() || service.Identity == nil {
continue
}
clusterName := service.GetConsulClusterName(tg)
consulConfig, ok := h.consulConfigs[clusterName]
if !ok {
return fmt.Errorf("no such consul cluster: %s", clusterName)
}
// Find signed identity workload.
handle := *service.IdentityHandle(env.ReplaceEnv)
jwt, err := h.widmgr.Get(handle)
if err != nil {
mErr = multierror.Append(mErr, fmt.Errorf(
"error getting signed identity for service %s: %v",
service.Name, err,
))
continue
}
// Derive token for service.
req := consul.JWTLoginRequest{
JWT: jwt.JWT,
AuthMethodName: consulConfig.ServiceIdentityAuthMethod,
Meta: map[string]string{
"requested_by": fmt.Sprintf("nomad_service_%s", handle.InterpolatedWorkloadIdentifier),
},
}
token, err := h.getConsulToken(clusterName, req)
if err != nil {
mErr = multierror.Append(mErr, fmt.Errorf(
"failed to derive Consul token for service %s: %v",
service.Name, err,
))
continue
}
// Store token in results.
if _, ok = tokens[clusterName]; !ok {
tokens[clusterName] = make(map[string]*consulapi.ACLToken)
}
tokens[clusterName][service.Identity.Name] = token
}
return mErr.ErrorOrNil()
}
func (h *consulHook) getConsulToken(cluster string, req consul.JWTLoginRequest) (*consulapi.ACLToken, error) {
client, err := h.clientForCluster(cluster)
if err != nil {
return nil, fmt.Errorf("failed to retrieve Consul client for cluster %s: %v", cluster, err)
}
t, err := client.DeriveTokenWithJWT(req)
if err == nil {
err = client.TokenPreflightCheck(h.shutdownCtx, t)
}
return t, err
}
func (h *consulHook) clientForCluster(cluster string) (consul.Client, error) {
consulConf, ok := h.consulConfigs[cluster]
if !ok {
return nil, fmt.Errorf("unable to find configuration for consul cluster %v", cluster)
}
return h.consulClientConstructor(consulConf, h.logger)
}
// Postrun cleans up the Consul tokens after the tasks have exited.
func (h *consulHook) Postrun() error {
tokens := h.hookResources.GetConsulTokens()
err := h.revokeTokens(tokens)
if err != nil {
return err
}
h.hookResources.SetConsulTokens(tokens)
return nil
}
// Shutdown will get called when the client is gracefully stopping.
func (h *consulHook) Shutdown() {
h.shutdownCancelFn()
}
// Destroy cleans up any remaining Consul tokens if the alloc is GC'd or fails
// to restore after a client restart.
func (h *consulHook) Destroy() error {
tokens := h.hookResources.GetConsulTokens()
err := h.revokeTokens(tokens)
if err != nil {
return err
}
h.hookResources.SetConsulTokens(tokens)
return nil
}
func (h *consulHook) revokeTokens(tokens map[string]map[string]*consulapi.ACLToken) error {
mErr := multierror.Error{}
for cluster, tokensForCluster := range tokens {
if tokensForCluster == nil {
// if called by Destroy, may have been removed by Postrun
continue
}
client, err := h.clientForCluster(cluster)
if err != nil {
mErr.Errors = append(mErr.Errors, err)
continue
}
toRevoke := []*consulapi.ACLToken{}
for _, token := range tokensForCluster {
toRevoke = append(toRevoke, token)
}
err = client.RevokeTokens(toRevoke)
if err != nil {
mErr.Errors = append(mErr.Errors, err)
continue
}
tokens[cluster] = nil
}
return mErr.ErrorOrNil()
}