mirror of
https://github.com/kemko/nomad.git
synced 2026-01-06 02:15:43 +03:00
Some of our allocrunner hooks require a task environment for interpolating values based on the node or allocation. But several of the hooks accept an already-built environment or builder and then keep that in memory. Both of these retain a copy of all the node attributes and allocation metadata, which balloons memory usage until the allocation is GC'd. While we'd like to look into ways to avoid keeping the allocrunner around entirely (see #25372), for now we can significantly reduce memory usage by creating the task environment on-demand when calling allocrunner methods, rather than persisting it in the allocrunner hooks. In doing so, we uncover two other bugs: * The WID manager, the group service hook, and the checks hook have to interpolate services for specific tasks. They mutated a taskenv builder to do so, but each time they mutate the builder, they write to the same environment map. When a group has multiple tasks, it's possible for one task to set an environment variable that would then be interpolated in the service definition for another task if that task did not have that environment variable. Only the service definition interpolation is impacted. This does not leak env vars across running tasks, as each taskrunner has its own builder. To fix this, we move the `UpdateTask` method off the builder and onto the taskenv as the `WithTask` method. This makes a shallow copy of the taskenv with a deep clone of the environment map used for interpolation, and then overwrites the environment from the task. * The checks hook interpolates Nomad native service checks only on `Prerun` and not on `Update`. This could cause unexpected deregistration and registration of checks during in-place updates. To fix this, we make sure we interpolate in the `Update` method. I also bumped into an incorrectly implemented interface in the CSI hook. I've pulled that and some better guardrails out to https://github.com/hashicorp/nomad/pull/25472. Fixes: https://github.com/hashicorp/nomad/issues/25269 Fixes: https://hashicorp.atlassian.net/browse/NET-12310 Ref: https://github.com/hashicorp/nomad/issues/25372
291 lines
8.6 KiB
Go
291 lines
8.6 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: BUSL-1.1
|
|
|
|
package allocrunner
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/hashicorp/go-hclog"
|
|
"github.com/hashicorp/nomad/client/allochealth"
|
|
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
|
|
"github.com/hashicorp/nomad/client/serviceregistration"
|
|
"github.com/hashicorp/nomad/client/serviceregistration/checks/checkstore"
|
|
cstructs "github.com/hashicorp/nomad/client/structs"
|
|
"github.com/hashicorp/nomad/client/taskenv"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
)
|
|
|
|
// healthSetter is able to set/clear alloc health.
|
|
type healthSetter interface {
|
|
// HasHealth returns true if health is already set.
|
|
HasHealth() bool
|
|
|
|
// SetHealth via the mutator.
|
|
SetHealth(healthy, isDeploy bool, taskEvents map[string]*structs.TaskEvent)
|
|
|
|
// ClearHealth for when the deployment ID changes.
|
|
ClearHealth()
|
|
}
|
|
|
|
// allocHealthWatcherHook is responsible for watching an allocation's task
|
|
// status and (optionally) Consul health check status to determine if the
|
|
// allocation is healthy or unhealthy. Used by deployments and migrations.
|
|
type allocHealthWatcherHook struct {
|
|
healthSetter healthSetter
|
|
|
|
// consul client used to monitor Consul service health checks
|
|
consul serviceregistration.Handler
|
|
|
|
// checkStore is used to monitor Nomad service health checks
|
|
checkStore checkstore.Shim
|
|
|
|
// listener is given to trackers to listen for alloc updates and closed
|
|
// when the alloc is destroyed.
|
|
listener *cstructs.AllocListener
|
|
|
|
// hookLock is held by hook methods to prevent concurrent access by
|
|
// Update and synchronous hooks.
|
|
hookLock sync.Mutex
|
|
|
|
// watchDone is created before calling watchHealth and is closed when
|
|
// watchHealth exits. Must be passed into watchHealth to avoid races.
|
|
// Initialized already closed as Update may be called before Prerun.
|
|
watchDone chan struct{}
|
|
|
|
// ranOnce is set once Prerun or Update have run at least once. This
|
|
// prevents Prerun from running if an Update has already been
|
|
// processed. Must hold hookLock to access.
|
|
ranOnce bool
|
|
|
|
// cancelFn stops the health watching/setting goroutine. Wait on
|
|
// watchLock to block until the watcher exits.
|
|
cancelFn context.CancelFunc
|
|
|
|
// alloc set by new func or Update. Must hold hookLock to access.
|
|
alloc *structs.Allocation
|
|
|
|
// isDeploy is true if monitoring a deployment. Set in init(). Must
|
|
// hold hookLock to access.
|
|
isDeploy bool
|
|
|
|
logger hclog.Logger
|
|
}
|
|
|
|
func newAllocHealthWatcherHook(
|
|
logger hclog.Logger,
|
|
alloc *structs.Allocation,
|
|
hs healthSetter,
|
|
listener *cstructs.AllocListener,
|
|
consul serviceregistration.Handler,
|
|
checkStore checkstore.Shim,
|
|
) interfaces.RunnerHook {
|
|
|
|
// Neither deployments nor migrations care about the health of
|
|
// non-service jobs so never watch their health
|
|
if alloc.Job.Type != structs.JobTypeService {
|
|
return noopAllocHealthWatcherHook{}
|
|
}
|
|
|
|
// Initialize watchDone with a closed chan in case Update runs before Prerun
|
|
closedDone := make(chan struct{})
|
|
close(closedDone)
|
|
|
|
h := &allocHealthWatcherHook{
|
|
alloc: alloc,
|
|
cancelFn: func() {}, // initialize to prevent nil func panics
|
|
watchDone: closedDone,
|
|
consul: consul,
|
|
checkStore: checkStore,
|
|
healthSetter: hs,
|
|
listener: listener,
|
|
}
|
|
|
|
h.logger = logger.Named(h.Name())
|
|
return h
|
|
}
|
|
|
|
// statically assert the hook implements the expected interfaces
|
|
var (
|
|
_ interfaces.RunnerPrerunHook = (*allocHealthWatcherHook)(nil)
|
|
_ interfaces.RunnerPostrunHook = (*allocHealthWatcherHook)(nil)
|
|
_ interfaces.RunnerUpdateHook = (*allocHealthWatcherHook)(nil)
|
|
_ interfaces.ShutdownHook = (*allocHealthWatcherHook)(nil)
|
|
)
|
|
|
|
func (h *allocHealthWatcherHook) Name() string {
|
|
return "alloc_health_watcher"
|
|
}
|
|
|
|
// init starts the allochealth.Tracker and watchHealth goroutine on either
|
|
// Prerun or Update. Caller must set/update alloc and logger fields.
|
|
//
|
|
// Not threadsafe so the caller should lock since Updates occur concurrently.
|
|
func (h *allocHealthWatcherHook) init(allocEnv *taskenv.TaskEnv) error {
|
|
// No need to watch health as it's already set
|
|
if h.healthSetter.HasHealth() {
|
|
h.logger.Trace("not watching; already has health set")
|
|
return nil
|
|
}
|
|
|
|
tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup)
|
|
if tg == nil {
|
|
return fmt.Errorf("task group %q does not exist in job %q", h.alloc.TaskGroup, h.alloc.Job.ID)
|
|
}
|
|
|
|
h.isDeploy = h.alloc.DeploymentID != ""
|
|
|
|
// No need to watch allocs for deployments that rely on operators
|
|
// manually setting health
|
|
if h.isDeploy && (tg.Update.IsEmpty() || tg.Update.HealthCheck == structs.UpdateStrategyHealthCheck_Manual) {
|
|
return nil
|
|
}
|
|
|
|
// Define the deadline, health method, min healthy time from the
|
|
// deployment if this is a deployment; otherwise from the migration
|
|
// strategy.
|
|
deadline, useChecks, minHealthyTime := getHealthParams(time.Now(), tg, h.isDeploy)
|
|
|
|
// Create a context that is canceled when the tracker should shutdown.
|
|
ctx := context.Background()
|
|
ctx, h.cancelFn = context.WithCancel(ctx)
|
|
|
|
h.logger.Trace("watching", "deadline", deadline, "checks", useChecks, "min_healthy_time", minHealthyTime)
|
|
// Create a new tracker, start it, and watch for health results.
|
|
tracker := allochealth.NewTracker(
|
|
ctx, h.logger, h.alloc, h.listener, allocEnv, h.consul, h.checkStore, minHealthyTime, useChecks,
|
|
)
|
|
tracker.Start()
|
|
|
|
// Create a new done chan and start watching for health updates
|
|
h.watchDone = make(chan struct{})
|
|
go h.watchHealth(ctx, deadline, tracker, h.watchDone)
|
|
return nil
|
|
}
|
|
|
|
func (h *allocHealthWatcherHook) Prerun(allocEnv *taskenv.TaskEnv) error {
|
|
h.hookLock.Lock()
|
|
defer h.hookLock.Unlock()
|
|
|
|
if h.ranOnce {
|
|
// An Update beat Prerun to running the watcher; noop
|
|
return nil
|
|
}
|
|
|
|
h.ranOnce = true
|
|
return h.init(allocEnv)
|
|
}
|
|
|
|
func (h *allocHealthWatcherHook) Update(req *interfaces.RunnerUpdateRequest) error {
|
|
h.hookLock.Lock()
|
|
defer h.hookLock.Unlock()
|
|
|
|
// Prevent Prerun from running after an Update
|
|
h.ranOnce = true
|
|
|
|
// Cancel the old watcher and create a new one
|
|
h.cancelFn()
|
|
|
|
// Wait until the watcher exits
|
|
<-h.watchDone
|
|
|
|
// Deployment has changed, reset status
|
|
if req.Alloc.DeploymentID != h.alloc.DeploymentID {
|
|
h.healthSetter.ClearHealth()
|
|
}
|
|
|
|
// Update alloc
|
|
h.alloc = req.Alloc
|
|
|
|
return h.init(req.AllocEnv)
|
|
}
|
|
|
|
func (h *allocHealthWatcherHook) Postrun() error {
|
|
h.hookLock.Lock()
|
|
defer h.hookLock.Unlock()
|
|
|
|
h.cancelFn()
|
|
h.listener.Close()
|
|
|
|
// Wait until the watcher exits
|
|
<-h.watchDone
|
|
|
|
return nil
|
|
}
|
|
|
|
func (h *allocHealthWatcherHook) Shutdown() {
|
|
// Same as Postrun
|
|
_ = h.Postrun()
|
|
}
|
|
|
|
// watchHealth watches alloc health until it is set, the alloc is stopped, the
|
|
// deadline is reached, or the context is canceled. watchHealth will be
|
|
// canceled and restarted on Updates so calls are serialized with a lock.
|
|
func (h *allocHealthWatcherHook) watchHealth(ctx context.Context, deadline time.Time, tracker *allochealth.Tracker, done chan<- struct{}) {
|
|
defer close(done)
|
|
|
|
// Default to unhealthy for the deadline reached case
|
|
healthy := false
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
// Graceful shutdown
|
|
return
|
|
|
|
case <-tracker.AllocStoppedCh():
|
|
// Allocation has stopped so no need to set health
|
|
return
|
|
|
|
case <-time.After(time.Until(deadline)):
|
|
// Time is up! Fallthrough to set unhealthy.
|
|
h.logger.Trace("deadline reached; setting unhealthy", "deadline", deadline)
|
|
|
|
case healthy = <-tracker.HealthyCh():
|
|
// Health received. Fallthrough to set it.
|
|
}
|
|
|
|
h.logger.Trace("health set", "healthy", healthy)
|
|
|
|
// If this is an unhealthy deployment emit events for tasks
|
|
var taskEvents map[string]*structs.TaskEvent
|
|
if !healthy && h.isDeploy {
|
|
taskEvents = tracker.TaskEvents()
|
|
}
|
|
|
|
h.healthSetter.SetHealth(healthy, h.isDeploy, taskEvents)
|
|
}
|
|
|
|
// getHealthParams returns the health watcher parameters which vary based on
|
|
// whether this allocation is in a deployment or migration.
|
|
func getHealthParams(now time.Time, tg *structs.TaskGroup, isDeploy bool) (deadline time.Time, useChecks bool, minHealthyTime time.Duration) {
|
|
if isDeploy {
|
|
deadline = now.Add(tg.Update.HealthyDeadline)
|
|
minHealthyTime = tg.Update.MinHealthyTime
|
|
useChecks = tg.Update.HealthCheck == structs.UpdateStrategyHealthCheck_Checks
|
|
} else {
|
|
strategy := tg.Migrate
|
|
if strategy == nil {
|
|
// For backwards compat with pre-0.8 allocations that
|
|
// don't have a migrate strategy set.
|
|
strategy = structs.DefaultMigrateStrategy()
|
|
}
|
|
|
|
deadline = now.Add(strategy.HealthyDeadline)
|
|
minHealthyTime = strategy.MinHealthyTime
|
|
useChecks = strategy.HealthCheck == structs.MigrateStrategyHealthChecks
|
|
}
|
|
return
|
|
}
|
|
|
|
// noopAllocHealthWatcherHook is an empty hook implementation returned by
|
|
// newAllocHealthWatcherHook when an allocation will never need its health
|
|
// monitored.
|
|
type noopAllocHealthWatcherHook struct{}
|
|
|
|
func (noopAllocHealthWatcherHook) Name() string {
|
|
return "alloc_health_watcher"
|
|
}
|