Files
nomad/client/allocrunner/consul_http_sock_hook.go
Tim Gross e168548341 provide allocrunner hooks with prebuilt taskenv and fix mutation bugs (#25373)
Some of our allocrunner hooks require a task environment for interpolating values based on the node or allocation. But several of the hooks accept an already-built environment or builder and then keep that in memory. Both of these retain a copy of all the node attributes and allocation metadata, which balloons memory usage until the allocation is GC'd.

While we'd like to look into ways to avoid keeping the allocrunner around entirely (see #25372), for now we can significantly reduce memory usage by creating the task environment on-demand when calling allocrunner methods, rather than persisting it in the allocrunner hooks.

In doing so, we uncover two other bugs:
* The WID manager, the group service hook, and the checks hook have to interpolate services for specific tasks. They mutated a taskenv builder to do so, but each time they mutate the builder, they write to the same environment map. When a group has multiple tasks, it's possible for one task to set an environment variable that would then be interpolated in the service definition for another task if that task did not have that environment variable. Only the service definition interpolation is impacted. This does not leak env vars across running tasks, as each taskrunner has its own builder.

  To fix this, we move the `UpdateTask` method off the builder and onto the taskenv as the `WithTask` method. This makes a shallow copy of the taskenv with a deep clone of the environment map used for interpolation, and then overwrites the environment from the task.

* The checks hook interpolates Nomad native service checks only on `Prerun` and not on `Update`. This could cause unexpected deregistration and registration of checks during in-place updates. To fix this, we make sure we interpolate in the `Update` method.

I also bumped into an incorrectly implemented interface in the CSI hook. I've pulled that and some better guardrails out to https://github.com/hashicorp/nomad/pull/25472.

Fixes: https://github.com/hashicorp/nomad/issues/25269
Fixes: https://hashicorp.atlassian.net/browse/NET-12310
Ref: https://github.com/hashicorp/nomad/issues/25372
2025-03-24 12:05:04 -04:00

275 lines
6.4 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package allocrunner
import (
"context"
"errors"
"fmt"
"net"
"os"
"path/filepath"
"sync"
"time"
"github.com/hashicorp/go-hclog"
multierror "github.com/hashicorp/go-multierror"
"github.com/hashicorp/go-set/v3"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
"github.com/hashicorp/nomad/client/taskenv"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/nomad/structs/config"
)
func tgFirstNetworkIsBridge(tg *structs.TaskGroup) bool {
if len(tg.Networks) < 1 || tg.Networks[0].Mode != "bridge" {
return false
}
return true
}
const (
consulHTTPSocketHookName = "consul_http_socket"
)
type consulHTTPSockHook struct {
logger hclog.Logger
// lock synchronizes proxy and alloc which may be mutated and read concurrently
// via Prerun, Update, and Postrun.
lock sync.Mutex
alloc *structs.Allocation
proxies map[string]*httpSocketProxy
}
func newConsulHTTPSocketHook(
logger hclog.Logger,
alloc *structs.Allocation,
allocDir allocdir.Interface,
configs map[string]*config.ConsulConfig,
) *consulHTTPSockHook {
// Get the deduplicated set of Consul clusters that are needed by this
// alloc. For Nomad CE, this will always be just the default cluster.
clusterNames := set.New[string](1)
tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
for _, s := range tg.Services {
clusterNames.Insert(s.GetConsulClusterName(tg))
}
proxies := map[string]*httpSocketProxy{}
for clusterName := range clusterNames.Items() {
proxies[clusterName] = newHTTPSocketProxy(
logger,
allocDir,
configs[clusterName],
)
}
return &consulHTTPSockHook{
alloc: alloc,
proxies: proxies,
logger: logger.Named(consulHTTPSocketHookName),
}
}
// statically assert the hook implements the expected interfaces
var (
_ interfaces.RunnerPrerunHook = (*consulHTTPSockHook)(nil)
_ interfaces.RunnerPostrunHook = (*consulHTTPSockHook)(nil)
_ interfaces.RunnerUpdateHook = (*consulHTTPSockHook)(nil)
)
func (*consulHTTPSockHook) Name() string {
return consulHTTPSocketHookName
}
// shouldRun returns true if the alloc contains at least one connect native
// task and has a network configured in bridge mode
//
// todo(shoenig): what about CNI networks?
func (h *consulHTTPSockHook) shouldRun() bool {
tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup)
// we must be in bridge networking and at least one connect native task
if !tgFirstNetworkIsBridge(tg) {
return false
}
for _, service := range tg.Services {
if service.Connect.IsNative() {
return true
}
}
return false
}
func (h *consulHTTPSockHook) Prerun(_ *taskenv.TaskEnv) error {
h.lock.Lock()
defer h.lock.Unlock()
if !h.shouldRun() {
return nil
}
var mErr *multierror.Error
for _, proxy := range h.proxies {
if err := proxy.run(h.alloc); err != nil {
mErr = multierror.Append(mErr, err)
}
}
return mErr.ErrorOrNil()
}
func (h *consulHTTPSockHook) Update(req *interfaces.RunnerUpdateRequest) error {
h.lock.Lock()
defer h.lock.Unlock()
h.alloc = req.Alloc
if !h.shouldRun() {
return nil
}
if len(h.proxies) == 0 {
return fmt.Errorf("cannot update alloc to Connect in-place")
}
var mErr *multierror.Error
for _, proxy := range h.proxies {
if err := proxy.run(h.alloc); err != nil {
mErr = multierror.Append(mErr, err)
}
}
return mErr.ErrorOrNil()
}
func (h *consulHTTPSockHook) Postrun() error {
h.lock.Lock()
defer h.lock.Unlock()
for _, proxy := range h.proxies {
if err := proxy.stop(); err != nil {
// Only log failures to stop proxies. Worst case scenario is a small
// goroutine leak.
h.logger.Warn("error stopping Consul HTTP proxy", "error", err)
}
}
return nil
}
type httpSocketProxy struct {
logger hclog.Logger
allocDir allocdir.Interface
config *config.ConsulConfig
ctx context.Context
cancel func()
doneCh chan struct{}
runOnce bool
}
func newHTTPSocketProxy(
logger hclog.Logger,
allocDir allocdir.Interface,
config *config.ConsulConfig,
) *httpSocketProxy {
ctx, cancel := context.WithCancel(context.Background())
return &httpSocketProxy{
logger: logger,
allocDir: allocDir,
config: config,
ctx: ctx,
cancel: cancel,
doneCh: make(chan struct{}),
}
}
// run the httpSocketProxy for the given allocation.
//
// Assumes locking done by the calling alloc runner.
func (p *httpSocketProxy) run(alloc *structs.Allocation) error {
// Only run once.
if p.runOnce {
return nil
}
// Never restart.
select {
case <-p.doneCh:
p.logger.Trace("consul http socket proxy already shutdown; exiting")
return nil
case <-p.ctx.Done():
p.logger.Trace("consul http socket proxy already done; exiting")
return nil
default:
}
// consul http dest addr
destAddr := p.config.Addr
if destAddr == "" {
return errors.New("consul address must be set on nomad client")
}
socketFile := allocdir.AllocHTTPSocket
if p.config.Name != structs.ConsulDefaultCluster && p.config.Name != "" {
socketFile = filepath.Join(allocdir.SharedAllocName, allocdir.TmpDirName,
"consul_"+p.config.Name+"_http.sock")
}
hostHTTPSockPath := filepath.Join(p.allocDir.AllocDirPath(), socketFile)
if err := maybeRemoveOldSocket(hostHTTPSockPath); err != nil {
return err
}
listener, err := net.Listen("unix", hostHTTPSockPath)
if err != nil {
return fmt.Errorf("unable to create unix socket for Consul HTTP endpoint: %w", err)
}
// The Consul HTTP socket should be usable by all users in case a task is
// running as a non-privileged user. Unix does not allow setting domain
// socket permissions when creating the file, so we must manually call
// chmod afterwards.
if err := os.Chmod(hostHTTPSockPath, os.ModePerm); err != nil {
return fmt.Errorf("unable to set permissions on unix socket: %w", err)
}
go func() {
proxy(p.ctx, p.logger, destAddr, listener)
p.cancel()
close(p.doneCh)
}()
p.runOnce = true
return nil
}
func (p *httpSocketProxy) stop() error {
p.cancel()
// if proxy was never run, no need to wait before shutdown
if !p.runOnce {
return nil
}
select {
case <-p.doneCh:
case <-time.After(socketProxyStopWaitTime):
return errSocketProxyTimeout
}
return nil
}
func maybeRemoveOldSocket(socketPath string) error {
_, err := os.Stat(socketPath)
if err == nil {
if err = os.Remove(socketPath); err != nil {
return fmt.Errorf("unable to remove existing unix socket: %w", err)
}
}
return nil
}