mirror of
https://github.com/kemko/nomad.git
synced 2026-01-06 02:15:43 +03:00
The Nomad client can now optionally emit telemetry data from the prerun and prestart hooks. This allows operators to monitor and alert on failures and time taken to complete. The new datapoints are: - nomad.client.alloc_hook.prerun.success (counter) - nomad.client.alloc_hook.prerun.failed (counter) - nomad.client.alloc_hook.prerun.elapsed (sample) - nomad.client.task_hook.prestart.success (counter) - nomad.client.task_hook.prestart.failed (counter) - nomad.client.task_hook.prestart.elapsed (sample) The hook execution time is useful to Nomad engineering and will help optimize code where possible and understand job specification impacts on hook performance. Currently only the PreRun and PreStart hooks have telemetry enabled, so we limit the number of new metrics being produced.
156 lines
5.3 KiB
Go
156 lines
5.3 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: BUSL-1.1
|
|
|
|
package config
|
|
|
|
import (
|
|
"context"
|
|
|
|
"github.com/armon/go-metrics"
|
|
log "github.com/hashicorp/go-hclog"
|
|
"github.com/hashicorp/nomad/client/allocdir"
|
|
arinterfaces "github.com/hashicorp/nomad/client/allocrunner/interfaces"
|
|
"github.com/hashicorp/nomad/client/consul"
|
|
"github.com/hashicorp/nomad/client/devicemanager"
|
|
"github.com/hashicorp/nomad/client/dynamicplugins"
|
|
"github.com/hashicorp/nomad/client/interfaces"
|
|
"github.com/hashicorp/nomad/client/pluginmanager/csimanager"
|
|
"github.com/hashicorp/nomad/client/pluginmanager/drivermanager"
|
|
"github.com/hashicorp/nomad/client/serviceregistration"
|
|
"github.com/hashicorp/nomad/client/serviceregistration/checks/checkstore"
|
|
"github.com/hashicorp/nomad/client/serviceregistration/wrapper"
|
|
cstate "github.com/hashicorp/nomad/client/state"
|
|
"github.com/hashicorp/nomad/client/vaultclient"
|
|
"github.com/hashicorp/nomad/client/widmgr"
|
|
"github.com/hashicorp/nomad/helper/users/dynamic"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
)
|
|
|
|
// AllocRunnerFactory returns an AllocRunner interface built from the
|
|
// configuration. Note: the type for config is any because we can't count on
|
|
// test callers being able to make a real allocrunner.Config without an circular
|
|
// import
|
|
type AllocRunnerFactory func(*AllocRunnerConfig) (arinterfaces.AllocRunner, error)
|
|
|
|
// RPCer is the interface needed by hooks to make RPC calls.
|
|
type RPCer interface {
|
|
RPC(method string, args interface{}, reply interface{}) error
|
|
}
|
|
|
|
// AllocRunnerConfig holds the configuration for creating an allocation runner.
|
|
type AllocRunnerConfig struct {
|
|
// Logger is the logger for the allocation runner.
|
|
Logger log.Logger
|
|
|
|
// ClientConfig is the clients configuration.
|
|
ClientConfig *Config
|
|
|
|
// Alloc captures the allocation that should be run.
|
|
Alloc *structs.Allocation
|
|
|
|
// BaseLabels are the base metric labels generated by the client. These can
|
|
// be used by processes which emit metrics that want to include these
|
|
// labels that include node_id, node_class, and node_pool.
|
|
BaseLabels []metrics.Label
|
|
|
|
// StateDB is used to store and restore state.
|
|
StateDB cstate.StateDB
|
|
|
|
// ConsulServices is used to register task services and checks
|
|
ConsulServices serviceregistration.Handler
|
|
|
|
// ConsulProxiesFunc gets a Consul client used to lookup supported envoy
|
|
// versions of the Consul agent.
|
|
ConsulProxiesFunc consul.SupportedProxiesAPIFunc
|
|
|
|
// ConsulSI is the Consul client used to manage service identity tokens.
|
|
ConsulSI consul.ServiceIdentityAPI
|
|
|
|
// VaultFunc is the function to get a Vault client to use to retrieve Vault
|
|
// tokens
|
|
VaultFunc vaultclient.VaultClientFunc
|
|
|
|
// StateUpdater is used to emit updated task state
|
|
StateUpdater interfaces.AllocStateHandler
|
|
|
|
// DeviceStatsReporter is used to lookup resource usage for alloc devices
|
|
DeviceStatsReporter interfaces.DeviceStatsReporter
|
|
|
|
// PrevAllocWatcher handles waiting on previous or preempted allocations
|
|
PrevAllocWatcher PrevAllocWatcher
|
|
|
|
// PrevAllocMigrator allows the migration of a previous allocations alloc dir
|
|
PrevAllocMigrator PrevAllocMigrator
|
|
|
|
// DynamicRegistry contains all locally registered dynamic plugins (e.g csi
|
|
// plugins).
|
|
DynamicRegistry dynamicplugins.Registry
|
|
|
|
// CSIManager is used to wait for CSI Volumes to be attached, and by the task
|
|
// runner to manage their mounting
|
|
CSIManager csimanager.Manager
|
|
|
|
// DeviceManager is used to mount devices as well as lookup device
|
|
// statistics
|
|
DeviceManager devicemanager.Manager
|
|
|
|
// DriverManager handles dispensing of driver plugins
|
|
DriverManager drivermanager.Manager
|
|
|
|
// ServersContactedCh is closed when the first GetClientAllocs call to
|
|
// servers succeeds and allocs are synced.
|
|
ServersContactedCh chan struct{}
|
|
|
|
// RPCClient is the RPC Client that should be used by the allocrunner and its
|
|
// hooks to communicate with Nomad Servers.
|
|
RPCClient RPCer
|
|
|
|
// ServiceRegWrapper is the handler wrapper that is used by service hooks
|
|
// to perform service and check registration and deregistration.
|
|
ServiceRegWrapper *wrapper.HandlerWrapper
|
|
|
|
// CheckStore contains check result information.
|
|
CheckStore checkstore.Shim
|
|
|
|
// Getter is an interface for retrieving artifacts.
|
|
Getter interfaces.ArtifactGetter
|
|
|
|
// Wranglers is an interface for managing unix/windows processes.
|
|
Wranglers interfaces.ProcessWranglers
|
|
|
|
// Partitions is an interface for managing cpuset partitions.
|
|
Partitions interfaces.CPUPartitions
|
|
|
|
// WIDSigner fetches workload identities
|
|
WIDSigner widmgr.IdentitySigner
|
|
|
|
// WIDMgr manages workload identities
|
|
WIDMgr widmgr.IdentityManager
|
|
|
|
// Users manages a pool of dynamic workload users
|
|
Users dynamic.Pool
|
|
}
|
|
|
|
// PrevAllocWatcher allows AllocRunners to wait for a previous allocation to
|
|
// terminate whether or not the previous allocation is local or remote.
|
|
// See `PrevAllocMigrator` for migrating workloads.
|
|
type PrevAllocWatcher interface {
|
|
// Wait for previous alloc to terminate
|
|
Wait(context.Context) error
|
|
|
|
// IsWaiting returns true if a concurrent caller is blocked in Wait
|
|
IsWaiting() bool
|
|
}
|
|
|
|
// PrevAllocMigrator allows AllocRunners to migrate a previous allocation
|
|
// whether or not the previous allocation is local or remote.
|
|
type PrevAllocMigrator interface {
|
|
PrevAllocWatcher
|
|
|
|
// IsMigrating returns true if a concurrent caller is in Migrate
|
|
IsMigrating() bool
|
|
|
|
// Migrate data from previous alloc
|
|
Migrate(ctx context.Context, dest allocdir.Interface) error
|
|
}
|