mirror of
https://github.com/kemko/nomad.git
synced 2026-01-04 01:15:43 +03:00
The Nomad client expects certain cgroups paths to exist in order to manage tasks. These paths are created when the agent first starts, but if process fails the agent would just log the error and proceed with its initialization, despite not being able to run tasks. This commit surfaces the errors back to the client initialization so the process can stop early and make clear to operators that something went wrong.
78 lines
1.8 KiB
Go
78 lines
1.8 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: BUSL-1.1
|
|
|
|
//go:build linux
|
|
|
|
package proclib
|
|
|
|
import (
|
|
"time"
|
|
|
|
"github.com/hashicorp/go-hclog"
|
|
"github.com/hashicorp/nomad/client/lib/cgroupslib"
|
|
"oss.indeed.com/go/libtime/decay"
|
|
)
|
|
|
|
// LinuxWranglerCG1 is an implementation of ProcessWrangler that leverages
|
|
// cgroups v1 on older Linux systems.
|
|
//
|
|
// e.g. Ubuntu 20.04 / RHEL 8 and previous versions.
|
|
type LinuxWranglerCG1 struct {
|
|
task Task
|
|
log hclog.Logger
|
|
cg cgroupslib.Lifecycle
|
|
}
|
|
|
|
func newCG1(c *Configs) (create, error) {
|
|
logger := c.Logger.Named("cg1")
|
|
err := cgroupslib.Init(logger, c.UsableCores.String())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return func(task Task) ProcessWrangler {
|
|
return &LinuxWranglerCG1{
|
|
task: task,
|
|
log: logger,
|
|
cg: cgroupslib.Factory(task.AllocID, task.Task, task.Cores),
|
|
}
|
|
}, nil
|
|
}
|
|
|
|
func (w *LinuxWranglerCG1) Initialize() error {
|
|
w.log.Trace("initialize cgroups", "task", w.task)
|
|
return w.cg.Setup()
|
|
}
|
|
|
|
func (w *LinuxWranglerCG1) Kill() error {
|
|
w.log.Trace("force kill processes in cgroup", "task", w.task)
|
|
return w.cg.Kill()
|
|
}
|
|
|
|
func (w *LinuxWranglerCG1) Cleanup() error {
|
|
w.log.Trace("remove cgroups", "task", w.task)
|
|
|
|
// need to give the kernel an opportunity to cleanup procs; which could
|
|
// take some time while the procs wake from being thawed only to find they
|
|
// have been issued a kill signal and need to be reaped
|
|
|
|
rm := func() (bool, error) {
|
|
err := w.cg.Teardown()
|
|
if err != nil {
|
|
return true, err
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
go func() {
|
|
if err := decay.Backoff(rm, decay.BackoffOptions{
|
|
MaxSleepTime: 30 * time.Second,
|
|
InitialGapSize: 1 * time.Second,
|
|
}); err != nil {
|
|
w.log.Debug("failed to cleanup cgroups", "alloc", w.task.AllocID, "task", w.task.Task, "error", err)
|
|
}
|
|
}()
|
|
|
|
return nil
|
|
}
|