Files
nomad/client/allocrunner/checks_hook.go
Tim Gross e168548341 provide allocrunner hooks with prebuilt taskenv and fix mutation bugs (#25373)
Some of our allocrunner hooks require a task environment for interpolating values based on the node or allocation. But several of the hooks accept an already-built environment or builder and then keep that in memory. Both of these retain a copy of all the node attributes and allocation metadata, which balloons memory usage until the allocation is GC'd.

While we'd like to look into ways to avoid keeping the allocrunner around entirely (see #25372), for now we can significantly reduce memory usage by creating the task environment on-demand when calling allocrunner methods, rather than persisting it in the allocrunner hooks.

In doing so, we uncover two other bugs:
* The WID manager, the group service hook, and the checks hook have to interpolate services for specific tasks. They mutated a taskenv builder to do so, but each time they mutate the builder, they write to the same environment map. When a group has multiple tasks, it's possible for one task to set an environment variable that would then be interpolated in the service definition for another task if that task did not have that environment variable. Only the service definition interpolation is impacted. This does not leak env vars across running tasks, as each taskrunner has its own builder.

  To fix this, we move the `UpdateTask` method off the builder and onto the taskenv as the `WithTask` method. This makes a shallow copy of the taskenv with a deep clone of the environment map used for interpolation, and then overwrites the environment from the task.

* The checks hook interpolates Nomad native service checks only on `Prerun` and not on `Update`. This could cause unexpected deregistration and registration of checks during in-place updates. To fix this, we make sure we interpolate in the `Update` method.

I also bumped into an incorrectly implemented interface in the CSI hook. I've pulled that and some better guardrails out to https://github.com/hashicorp/nomad/pull/25472.

Fixes: https://github.com/hashicorp/nomad/issues/25269
Fixes: https://hashicorp.atlassian.net/browse/NET-12310
Ref: https://github.com/hashicorp/nomad/issues/25372
2025-03-24 12:05:04 -04:00

286 lines
7.4 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package allocrunner
import (
"context"
"sync"
"time"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
"github.com/hashicorp/nomad/client/serviceregistration/checks"
"github.com/hashicorp/nomad/client/serviceregistration/checks/checkstore"
"github.com/hashicorp/nomad/client/taskenv"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/structs"
)
const (
// checksHookName is the name of this hook as appears in logs
checksHookName = "checks_hook"
)
// observers maintains a map from check_id -> observer for a particular check. Each
// observer in the map must share the same context.
type observers map[structs.CheckID]*observer
// An observer is used to execute a particular check on its interval and update the
// check store with those results.
type observer struct {
ctx context.Context
cancel context.CancelFunc
checker checks.Checker
checkStore checkstore.Shim
qc *checks.QueryContext
check *structs.ServiceCheck
allocID string
}
// start checking our check on its interval
func (o *observer) start() {
// compromise between immediate (too early) and waiting full interval (slow)
firstWait := o.check.Interval / 2
timer, cancel := helper.NewSafeTimer(firstWait)
defer cancel()
for {
select {
// exit the observer
case <-o.ctx.Done():
return
// time to execute the check
case <-timer.C:
query := checks.GetCheckQuery(o.check)
result := o.checker.Do(o.ctx, o.qc, query)
// and put the results into the store (already logged)
_ = o.checkStore.Set(o.allocID, result)
// setup timer for next interval
timer.Reset(o.check.Interval)
}
}
}
// stop checking our check - this will also interrupt an in-progress execution
func (o *observer) stop() {
o.cancel()
}
// checksHook manages checks of Nomad service registrations, at both the group and
// task level, by storing / removing them from the Client state store.
//
// Does not manage Consul service checks; see groupServiceHook instead.
type checksHook struct {
logger hclog.Logger
network structs.NetworkStatus
shim checkstore.Shim
checker checks.Checker
allocID string
// fields that get re-initialized on allocation update
lock sync.RWMutex
ctx context.Context
stop func()
observers observers
alloc *structs.Allocation
}
func newChecksHook(
logger hclog.Logger,
alloc *structs.Allocation,
shim checkstore.Shim,
network structs.NetworkStatus,
) *checksHook {
h := &checksHook{
logger: logger.Named(checksHookName),
allocID: alloc.ID,
alloc: alloc,
shim: shim,
network: network,
checker: checks.New(logger),
}
h.initialize(alloc)
return h
}
// statically assert that the hook meets the expected interfaces
var (
_ interfaces.RunnerPrerunHook = (*checksHook)(nil)
_ interfaces.RunnerUpdateHook = (*checksHook)(nil)
_ interfaces.RunnerPreKillHook = (*checksHook)(nil)
)
// initialize the dynamic fields of checksHook, which is to say setup all the
// observers and query context things associated with the alloc.
//
// Should be called during initial setup only.
func (h *checksHook) initialize(alloc *structs.Allocation) {
h.lock.Lock()
defer h.lock.Unlock()
tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
if tg == nil {
return
}
// fresh context and stop function for this allocation
h.ctx, h.stop = context.WithCancel(context.Background())
// fresh set of observers
h.observers = make(observers)
// set the initial alloc
h.alloc = alloc
}
// observe will create the observer for each service in services.
// services must use only nomad service provider.
//
// Caller must hold h.lock.
func (h *checksHook) observe(alloc *structs.Allocation, services []*structs.Service) {
var ports structs.AllocatedPorts
var networks structs.Networks
if alloc.AllocatedResources != nil {
ports = alloc.AllocatedResources.Shared.Ports
networks = alloc.AllocatedResources.Shared.Networks
}
for _, service := range services {
for _, check := range service.Checks {
// remember the initialization time
now := time.Now().UTC().Unix()
// create the deterministic check id for this check
id := structs.NomadCheckID(alloc.ID, alloc.TaskGroup, check)
// an observer for this check already exists
if _, exists := h.observers[id]; exists {
continue
}
ctx, cancel := context.WithCancel(h.ctx)
// create the observer for this check
h.observers[id] = &observer{
ctx: ctx,
cancel: cancel,
check: check.Copy(),
checkStore: h.shim,
checker: h.checker,
allocID: h.allocID,
qc: &checks.QueryContext{
ID: id,
CustomAddress: service.Address,
ServicePortLabel: service.PortLabel,
Ports: ports,
Networks: networks,
NetworkStatus: h.network,
Group: alloc.Name,
Task: service.TaskName,
Service: service.Name,
Check: check.Name,
},
}
// insert a pending result into state store for each check
result := checks.Stub(id, structs.GetCheckMode(check), now, alloc.Name, service.TaskName, service.Name, check.Name)
if err := h.shim.Set(h.allocID, result); err != nil {
h.logger.Error("failed to set initial check status", "id", h.allocID, "error", err)
continue
}
// start the observer
go h.observers[id].start()
}
}
}
func (h *checksHook) Name() string {
return checksHookName
}
func (h *checksHook) Prerun(allocEnv *taskenv.TaskEnv) error {
h.lock.Lock()
defer h.lock.Unlock()
group := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup)
if group == nil {
return nil
}
interpolatedServices := taskenv.InterpolateServices(
allocEnv, group.NomadServices())
// create and start observers of nomad service checks in alloc
h.observe(h.alloc, interpolatedServices)
return nil
}
func (h *checksHook) Update(request *interfaces.RunnerUpdateRequest) error {
h.lock.Lock()
defer h.lock.Unlock()
group := request.Alloc.Job.LookupTaskGroup(request.Alloc.TaskGroup)
if group == nil {
return nil
}
// get all group and task level services using nomad provider
interpolatedServices := taskenv.InterpolateServices(
request.AllocEnv, group.NomadServices())
// create a set of the updated set of checks
next := make([]structs.CheckID, 0, len(h.observers))
for _, service := range interpolatedServices {
for _, check := range service.Checks {
next = append(next, structs.NomadCheckID(
request.Alloc.ID,
request.Alloc.TaskGroup,
check,
))
}
}
// stop the observers of the checks we are removing
remove := h.shim.Difference(request.Alloc.ID, next)
for _, id := range remove {
h.observers[id].stop()
delete(h.observers, id)
}
// remove checks that are no longer part of the allocation
if err := h.shim.Remove(request.Alloc.ID, remove); err != nil {
return err
}
// remember this new alloc
h.alloc = request.Alloc
// ensure we are observing new checks (idempotent)
h.observe(request.Alloc, interpolatedServices)
return nil
}
func (h *checksHook) PreKill() {
h.lock.Lock()
defer h.lock.Unlock()
// terminate our hook context, which threads down to all observers
h.stop()
// purge all checks for this allocation from the client state store
if err := h.shim.Purge(h.allocID); err != nil {
h.logger.Error("failed to purge check results", "alloc_id", h.allocID, "error", err)
}
}