Files
nomad/client/allocrunner/consul_hook.go
Tim Gross e168548341 provide allocrunner hooks with prebuilt taskenv and fix mutation bugs (#25373)
Some of our allocrunner hooks require a task environment for interpolating values based on the node or allocation. But several of the hooks accept an already-built environment or builder and then keep that in memory. Both of these retain a copy of all the node attributes and allocation metadata, which balloons memory usage until the allocation is GC'd.

While we'd like to look into ways to avoid keeping the allocrunner around entirely (see #25372), for now we can significantly reduce memory usage by creating the task environment on-demand when calling allocrunner methods, rather than persisting it in the allocrunner hooks.

In doing so, we uncover two other bugs:
* The WID manager, the group service hook, and the checks hook have to interpolate services for specific tasks. They mutated a taskenv builder to do so, but each time they mutate the builder, they write to the same environment map. When a group has multiple tasks, it's possible for one task to set an environment variable that would then be interpolated in the service definition for another task if that task did not have that environment variable. Only the service definition interpolation is impacted. This does not leak env vars across running tasks, as each taskrunner has its own builder.

  To fix this, we move the `UpdateTask` method off the builder and onto the taskenv as the `WithTask` method. This makes a shallow copy of the taskenv with a deep clone of the environment map used for interpolation, and then overwrites the environment from the task.

* The checks hook interpolates Nomad native service checks only on `Prerun` and not on `Update`. This could cause unexpected deregistration and registration of checks during in-place updates. To fix this, we make sure we interpolate in the `Update` method.

I also bumped into an incorrectly implemented interface in the CSI hook. I've pulled that and some better guardrails out to https://github.com/hashicorp/nomad/pull/25472.

Fixes: https://github.com/hashicorp/nomad/issues/25269
Fixes: https://hashicorp.atlassian.net/browse/NET-12310
Ref: https://github.com/hashicorp/nomad/issues/25372
2025-03-24 12:05:04 -04:00

310 lines
8.9 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package allocrunner
import (
"context"
"fmt"
consulapi "github.com/hashicorp/consul/api"
log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
"github.com/hashicorp/nomad/client/consul"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/client/taskenv"
"github.com/hashicorp/nomad/client/widmgr"
"github.com/hashicorp/nomad/nomad/structs"
structsc "github.com/hashicorp/nomad/nomad/structs/config"
)
type consulHook struct {
alloc *structs.Allocation
allocdir allocdir.Interface
widmgr widmgr.IdentityManager
consulConfigs map[string]*structsc.ConsulConfig
consulClientConstructor consul.ConsulClientFunc
hookResources *cstructs.AllocHookResources
logger log.Logger
shutdownCtx context.Context
shutdownCancelFn context.CancelFunc
}
type consulHookConfig struct {
alloc *structs.Allocation
allocdir allocdir.Interface
widmgr widmgr.IdentityManager
// consulConfigs is a map of cluster names to Consul configs
consulConfigs map[string]*structsc.ConsulConfig
// consulClientConstructor injects the function that will return a consul
// client (eases testing)
consulClientConstructor consul.ConsulClientFunc
// hookResources is used for storing and retrieving Consul tokens
hookResources *cstructs.AllocHookResources
logger log.Logger
}
func newConsulHook(cfg consulHookConfig) *consulHook {
shutdownCtx, shutdownCancelFn := context.WithCancel(context.Background())
h := &consulHook{
alloc: cfg.alloc,
allocdir: cfg.allocdir,
widmgr: cfg.widmgr,
consulConfigs: cfg.consulConfigs,
consulClientConstructor: cfg.consulClientConstructor,
hookResources: cfg.hookResources,
shutdownCtx: shutdownCtx,
shutdownCancelFn: shutdownCancelFn,
}
h.logger = cfg.logger.Named(h.Name())
return h
}
// statically assert the hook implements the expected interfaces
var (
_ interfaces.RunnerPrerunHook = (*consulHook)(nil)
_ interfaces.RunnerPostrunHook = (*consulHook)(nil)
_ interfaces.RunnerDestroyHook = (*consulHook)(nil)
_ interfaces.ShutdownHook = (*consulHook)(nil)
)
func (*consulHook) Name() string {
return "consul"
}
func (h *consulHook) Prerun(allocEnv *taskenv.TaskEnv) error {
job := h.alloc.Job
if job == nil {
// this is always a programming error
err := fmt.Errorf("alloc %v does not have a job", h.alloc.Name)
h.logger.Error(err.Error())
return err
}
// tokens are a map of Consul cluster to identity name to Consul ACL token.
tokens := map[string]map[string]*consulapi.ACLToken{}
tg := job.LookupTaskGroup(h.alloc.TaskGroup)
if tg == nil { // this is always a programming error
return fmt.Errorf("alloc %v does not have a valid task group", h.alloc.Name)
}
var mErr *multierror.Error
if err := h.prepareConsulTokensForServices(tg.Services, tg, tokens, allocEnv); err != nil {
mErr = multierror.Append(mErr, err)
}
for _, task := range tg.Tasks {
taskEnv := allocEnv.WithTask(h.alloc, task)
if err := h.prepareConsulTokensForServices(task.Services, tg, tokens, taskEnv); err != nil {
mErr = multierror.Append(mErr, err)
}
if err := h.prepareConsulTokensForTask(task, tg, tokens); err != nil {
mErr = multierror.Append(mErr, err)
}
}
if err := mErr.ErrorOrNil(); err != nil {
revokeErr := h.revokeTokens(tokens)
mErr = multierror.Append(mErr, revokeErr)
return mErr.ErrorOrNil()
}
// write the tokens to hookResources
h.hookResources.SetConsulTokens(tokens)
return nil
}
func (h *consulHook) prepareConsulTokensForTask(task *structs.Task, tg *structs.TaskGroup, tokens map[string]map[string]*consulapi.ACLToken) error {
if task == nil {
// programming error
return fmt.Errorf("cannot prepare consul tokens, no task specified")
}
clusterName := task.GetConsulClusterName(tg)
consulConfig, ok := h.consulConfigs[clusterName]
if !ok {
return fmt.Errorf("no such consul cluster: %s", clusterName)
}
// Find task workload identity for Consul.
widName := fmt.Sprintf("%s_%s", structs.ConsulTaskIdentityNamePrefix, consulConfig.Name)
wid := task.GetIdentity(widName)
if wid == nil {
// Skip task if it doesn't have an identity for Consul since it doesn't
// need a token.
return nil
}
// Find signed workload identity.
ti := *task.IdentityHandle(wid)
jwt, err := h.widmgr.Get(ti)
if err != nil {
return fmt.Errorf("error getting signed identity for task %s: %v", task.Name, err)
}
// Derive token for task.
req := consul.JWTLoginRequest{
JWT: jwt.JWT,
AuthMethodName: consulConfig.TaskIdentityAuthMethod,
Meta: map[string]string{
"requested_by": fmt.Sprintf("nomad_task_%s", task.Name),
},
}
token, err := h.getConsulToken(consulConfig.Name, req)
if err != nil {
return fmt.Errorf("failed to derive Consul token for task %s: %v", task.Name, err)
}
// Store token in results.
if _, ok = tokens[clusterName]; !ok {
tokens[clusterName] = make(map[string]*consulapi.ACLToken)
}
tokenName := widName + "/" + task.Name
tokens[clusterName][tokenName] = token
return nil
}
func (h *consulHook) prepareConsulTokensForServices(services []*structs.Service, tg *structs.TaskGroup, tokens map[string]map[string]*consulapi.ACLToken, env *taskenv.TaskEnv) error {
if len(services) == 0 {
return nil
}
var mErr *multierror.Error
for _, service := range services {
// Exit early if service doesn't need a Consul token.
if service == nil || !service.IsConsul() || service.Identity == nil {
continue
}
clusterName := service.GetConsulClusterName(tg)
consulConfig, ok := h.consulConfigs[clusterName]
if !ok {
return fmt.Errorf("no such consul cluster: %s", clusterName)
}
// Find signed identity workload.
handle := *service.IdentityHandle(env.ReplaceEnv)
jwt, err := h.widmgr.Get(handle)
if err != nil {
mErr = multierror.Append(mErr, fmt.Errorf(
"error getting signed identity for service %s: %v",
service.Name, err,
))
continue
}
// Derive token for service.
req := consul.JWTLoginRequest{
JWT: jwt.JWT,
AuthMethodName: consulConfig.ServiceIdentityAuthMethod,
Meta: map[string]string{
"requested_by": fmt.Sprintf("nomad_service_%s", handle.InterpolatedWorkloadIdentifier),
},
}
token, err := h.getConsulToken(clusterName, req)
if err != nil {
mErr = multierror.Append(mErr, fmt.Errorf(
"failed to derive Consul token for service %s: %v",
service.Name, err,
))
continue
}
// Store token in results.
if _, ok = tokens[clusterName]; !ok {
tokens[clusterName] = make(map[string]*consulapi.ACLToken)
}
tokens[clusterName][service.Identity.Name] = token
}
return mErr.ErrorOrNil()
}
func (h *consulHook) getConsulToken(cluster string, req consul.JWTLoginRequest) (*consulapi.ACLToken, error) {
client, err := h.clientForCluster(cluster)
if err != nil {
return nil, fmt.Errorf("failed to retrieve Consul client for cluster %s: %v", cluster, err)
}
t, err := client.DeriveTokenWithJWT(req)
if err == nil {
err = client.TokenPreflightCheck(h.shutdownCtx, t)
}
return t, err
}
func (h *consulHook) clientForCluster(cluster string) (consul.Client, error) {
consulConf, ok := h.consulConfigs[cluster]
if !ok {
return nil, fmt.Errorf("unable to find configuration for consul cluster %v", cluster)
}
return h.consulClientConstructor(consulConf, h.logger)
}
// Postrun cleans up the Consul tokens after the tasks have exited.
func (h *consulHook) Postrun() error {
tokens := h.hookResources.GetConsulTokens()
err := h.revokeTokens(tokens)
if err != nil {
return err
}
h.hookResources.SetConsulTokens(tokens)
return nil
}
// Shutdown will get called when the client is gracefully stopping.
func (h *consulHook) Shutdown() {
h.shutdownCancelFn()
}
// Destroy cleans up any remaining Consul tokens if the alloc is GC'd or fails
// to restore after a client restart.
func (h *consulHook) Destroy() error {
tokens := h.hookResources.GetConsulTokens()
err := h.revokeTokens(tokens)
if err != nil {
return err
}
h.hookResources.SetConsulTokens(tokens)
return nil
}
func (h *consulHook) revokeTokens(tokens map[string]map[string]*consulapi.ACLToken) error {
mErr := multierror.Error{}
for cluster, tokensForCluster := range tokens {
if tokensForCluster == nil {
// if called by Destroy, may have been removed by Postrun
continue
}
client, err := h.clientForCluster(cluster)
if err != nil {
mErr.Errors = append(mErr.Errors, err)
continue
}
toRevoke := []*consulapi.ACLToken{}
for _, token := range tokensForCluster {
toRevoke = append(toRevoke, token)
}
err = client.RevokeTokens(toRevoke)
if err != nil {
mErr.Errors = append(mErr.Errors, err)
continue
}
tokens[cluster] = nil
}
return mErr.ErrorOrNil()
}