diff --git a/.changelog/19915.txt b/.changelog/19915.txt new file mode 100644 index 000000000..3b8668c61 --- /dev/null +++ b/.changelog/19915.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Prevent client from starting if cgroup initialization fails +``` diff --git a/client/client.go b/client/client.go index 390e7a7b5..8f041bc41 100644 --- a/client/client.go +++ b/client/client.go @@ -477,10 +477,13 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie ) // Create the process wranglers - wranglers := proclib.New(&proclib.Configs{ + wranglers, err := proclib.New(&proclib.Configs{ UsableCores: c.topology.UsableCores(), Logger: c.logger.Named("proclib"), }) + if err != nil { + return nil, fmt.Errorf("failed to initialize process manager: %w", err) + } c.wranglers = wranglers // Build the allow/denylists of drivers. diff --git a/client/lib/cgroupslib/init.go b/client/lib/cgroupslib/init.go index c09c93bdd..18726e6ea 100644 --- a/client/lib/cgroupslib/init.go +++ b/client/lib/cgroupslib/init.go @@ -7,6 +7,7 @@ package cgroupslib import ( "bytes" + "fmt" "os" "path/filepath" @@ -23,7 +24,7 @@ const ( // Init will initialize the cgroup tree that the Nomad client will use for // isolating resources of tasks. cores is the cpuset granted for use by Nomad. -func Init(log hclog.Logger, cores string) { +func Init(log hclog.Logger, cores string) error { log.Info("initializing nomad cgroups", "cores", cores) switch GetMode() { @@ -41,8 +42,7 @@ func Init(log hclog.Logger, cores string) { for _, ctrl := range controllers { p := filepath.Join(root, ctrl, NomadCgroupParent) if err := os.MkdirAll(p, 0755); err != nil { - log.Error("failed to create nomad cgroup", "controller", ctrl, "error", err) - return + return fmt.Errorf("failed to create nomad cgroup %s: %w", ctrl, err) } } @@ -56,8 +56,7 @@ func Init(log hclog.Logger, cores string) { // band from nomad itself var memsSet string if mems, err := detectMemsCG1(); err != nil { - log.Error("failed to detect memset", "error", err) - return + return fmt.Errorf("failed to detect memset: %w", err) } else { memsSet = mems } @@ -78,18 +77,15 @@ func Init(log hclog.Logger, cores string) { // def456.task/{cgroup.procs, cpuset.cpus, cpuset.mems} if err := writeCG(noClone, "cpuset", NomadCgroupParent, cloneFile); err != nil { - log.Error("failed to set clone_children on nomad cpuset cgroup", "error", err) - return + return fmt.Errorf("failed to set clone_children on nomad cpuset cgroup: %w", err) } if err := writeCG(memsSet, "cpuset", NomadCgroupParent, memsFile); err != nil { - log.Error("failed to set cpuset.mems on nomad cpuset cgroup", "error", err) - return + return fmt.Errorf("failed to set cpuset.mems on nomad cpuset cgroup: %w", err) } if err := writeCG(cores, "cpuset", NomadCgroupParent, cpusetFile); err != nil { - log.Error("failed to write cores to nomad cpuset cgroup", "error", err) - return + return fmt.Errorf("failed to write cores to nomad cpuset cgroup: %w", err) } // @@ -97,18 +93,15 @@ func Init(log hclog.Logger, cores string) { // if err := mkCG("cpuset", NomadCgroupParent, SharePartition()); err != nil { - log.Error("failed to create share cpuset partition", "error", err) - return + return fmt.Errorf("failed to create share cpuset partition: %w", err) } if err := writeCG(noClone, "cpuset", NomadCgroupParent, SharePartition(), cloneFile); err != nil { - log.Error("failed to set clone_children on nomad cpuset cgroup", "error", err) - return + return fmt.Errorf("failed to set clone_children on nomad cpuset cgroup: %w", err) } if err := writeCG(memsSet, "cpuset", NomadCgroupParent, SharePartition(), memsFile); err != nil { - log.Error("failed to set cpuset.mems on share cpuset partition", "error", err) - return + return fmt.Errorf("failed to set cpuset.mems on share cpuset partition: %w", err) } // @@ -116,18 +109,15 @@ func Init(log hclog.Logger, cores string) { // if err := mkCG("cpuset", NomadCgroupParent, ReservePartition()); err != nil { - log.Error("failed to create reserve cpuset partition", "error", err) - return + return fmt.Errorf("failed to create reserve cpuset partition: %w", err) } if err := writeCG(noClone, "cpuset", NomadCgroupParent, ReservePartition(), cloneFile); err != nil { - log.Error("failed to set clone_children on nomad cpuset cgroup", "error", err) - return + return fmt.Errorf("failed to set clone_children on nomad cpuset cgroup: %w", err) } if err := writeCG(memsSet, "cpuset", NomadCgroupParent, ReservePartition(), memsFile); err != nil { - log.Error("failed to set cpuset.mems on reserve cpuset partition", "error", err) - return + return fmt.Errorf("failed to set cpuset.mems on reserve cpuset partition: %w", err) } log.Debug("nomad cpuset partitions initialized", "cores", cores) @@ -144,8 +134,7 @@ func Init(log hclog.Logger, cores string) { // if err := writeCG(activation, subtreeFile); err != nil { - log.Error("failed to create nomad cgroup", "error", err) - return + return fmt.Errorf("failed to create nomad cgroup: %w", err) } // @@ -153,18 +142,15 @@ func Init(log hclog.Logger, cores string) { // if err := mkCG(NomadCgroupParent); err != nil { - log.Error("failed to create nomad cgroup", "error", err) - return + return fmt.Errorf("failed to create nomad cgroup: %w", err) } if err := writeCG(activation, NomadCgroupParent, subtreeFile); err != nil { - log.Error("failed to set subtree control on nomad cgroup", "error", err) - return + return fmt.Errorf("failed to set subtree control on nomad cgroup: %w", err) } if err := writeCG(cores, NomadCgroupParent, cpusetFile); err != nil { - log.Error("failed to write root partition cpuset", "error", err) - return + return fmt.Errorf("failed to write root partition cpuset: %w", err) } log.Debug("top level partition root nomad.slice cgroup initialized") @@ -174,13 +160,11 @@ func Init(log hclog.Logger, cores string) { // if err := mkCG(NomadCgroupParent, SharePartition()); err != nil { - log.Error("failed to create share cgroup", "error", err) - return + return fmt.Errorf("failed to create share cgroup: %w", err) } if err := writeCG(activation, NomadCgroupParent, SharePartition(), subtreeFile); err != nil { - log.Error("failed to set subtree control on cpuset share partition", "error", err) - return + return fmt.Errorf("failed to set subtree control on cpuset share partition: %w", err) } log.Debug("partition member nomad.slice/share cgroup initialized") @@ -190,17 +174,17 @@ func Init(log hclog.Logger, cores string) { // if err := mkCG(NomadCgroupParent, ReservePartition()); err != nil { - log.Error("failed to create share cgroup", "error", err) - return + return fmt.Errorf("failed to create share cgroup: %w", err) } if err := writeCG(activation, NomadCgroupParent, ReservePartition(), subtreeFile); err != nil { - log.Error("failed to set subtree control on cpuset reserve partition", "error", err) - return + return fmt.Errorf("failed to set subtree control on cpuset reserve partition: %w", err) } log.Debug("partition member nomad.slice/reserve cgroup initialized") } + + return nil } // detectMemsCG1 will determine the cpuset.mems value to use for diff --git a/client/lib/proclib/wrangler_cg1_linux.go b/client/lib/proclib/wrangler_cg1_linux.go index a8654a633..0d284b23e 100644 --- a/client/lib/proclib/wrangler_cg1_linux.go +++ b/client/lib/proclib/wrangler_cg1_linux.go @@ -23,16 +23,20 @@ type LinuxWranglerCG1 struct { cg cgroupslib.Lifecycle } -func newCG1(c *Configs) create { +func newCG1(c *Configs) (create, error) { logger := c.Logger.Named("cg1") - cgroupslib.Init(logger, c.UsableCores.String()) + err := cgroupslib.Init(logger, c.UsableCores.String()) + if err != nil { + return nil, err + } + return func(task Task) ProcessWrangler { return &LinuxWranglerCG1{ task: task, log: logger, cg: cgroupslib.Factory(task.AllocID, task.Task, task.Cores), } - } + }, nil } func (w *LinuxWranglerCG1) Initialize() error { diff --git a/client/lib/proclib/wrangler_cg2_linux.go b/client/lib/proclib/wrangler_cg2_linux.go index 8dcbf3dfe..f02931713 100644 --- a/client/lib/proclib/wrangler_cg2_linux.go +++ b/client/lib/proclib/wrangler_cg2_linux.go @@ -20,16 +20,20 @@ type LinuxWranglerCG2 struct { cg cgroupslib.Lifecycle } -func newCG2(c *Configs) create { +func newCG2(c *Configs) (create, error) { logger := c.Logger.Named("cg2") - cgroupslib.Init(logger, c.UsableCores.String()) + err := cgroupslib.Init(logger, c.UsableCores.String()) + if err != nil { + return nil, err + } + return func(task Task) ProcessWrangler { return &LinuxWranglerCG2{ task: task, log: c.Logger, cg: cgroupslib.Factory(task.AllocID, task.Task, task.Cores), } - } + }, nil } func (w LinuxWranglerCG2) Initialize() error { diff --git a/client/lib/proclib/wrangler_default.go b/client/lib/proclib/wrangler_default.go index 4dc9d84f2..1fb5890e4 100644 --- a/client/lib/proclib/wrangler_default.go +++ b/client/lib/proclib/wrangler_default.go @@ -7,14 +7,14 @@ package proclib // New creates a Wranglers backed by the DefaultWrangler implementation, which // does not do anything. -func New(configs *Configs) *Wranglers { +func New(configs *Configs) (*Wranglers, error) { w := &Wranglers{ configs: configs, m: make(map[Task]ProcessWrangler), create: doNothing(configs), } - return w + return w, nil } func doNothing(*Configs) create { diff --git a/client/lib/proclib/wrangler_linux.go b/client/lib/proclib/wrangler_linux.go index 757f2ead3..3dc7bb7c1 100644 --- a/client/lib/proclib/wrangler_linux.go +++ b/client/lib/proclib/wrangler_linux.go @@ -11,18 +11,19 @@ import ( // New creates a Wranglers factory for creating ProcessWrangler's appropriate // for the given system (i.e. cgroups v1 or cgroups v2). -func New(configs *Configs) *Wranglers { +func New(configs *Configs) (*Wranglers, error) { w := &Wranglers{ configs: configs, m: make(map[Task]ProcessWrangler), } + var err error switch cgroupslib.GetMode() { case cgroupslib.CG1: - w.create = newCG1(configs) + w.create, err = newCG1(configs) default: - w.create = newCG2(configs) + w.create, err = newCG2(configs) } - return w + return w, err }