mirror of
https://github.com/kemko/nomad.git
synced 2026-01-06 18:35:44 +03:00
Nomad clients manage a cpuset cgroup for each task to reserve or share CPU cores. But Docker owns its own cgroups, and attempting to set a parent cgroup that Nomad manages runs into conflicts with how runc manages cgroups via systemd. Therefore Nomad must run as root in order for cpuset management to ever be compatible with Docker. However, some users running in unsupported configurations felt that the changes we made in Nomad 1.7.0 to ensure Nomad was running correctly represented a regression. This changeset disables cpuset management for non-root Nomad clients. When running Nomad as non-root, the driver will not longer reconcile cpusets with Nomad and `resources.cores` will behave incorrectly (but the driver will still run). Although this is one small step along the way to supporting a rootless Nomad client, running Nomad as non-root is still unsupported. This PR is insufficient by itself to have a secure and properly-working rootless Nomad client. Ref: https://github.com/hashicorp/nomad/issues/18211 Ref: https://github.com/hashicorp/nomad/issues/13669 Ref: https://hashicorp.atlassian.net/browse/NET-10652 Ref: https://github.com/opencontainers/runc/blob/main/docs/systemd.md
204 lines
5.9 KiB
Go
204 lines
5.9 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: BUSL-1.1
|
|
|
|
package docker
|
|
|
|
import (
|
|
"context"
|
|
"runtime"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/hashicorp/nomad/helper/pointer"
|
|
"github.com/hashicorp/nomad/plugins/drivers"
|
|
"github.com/hashicorp/nomad/plugins/drivers/utils"
|
|
pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
|
|
)
|
|
|
|
func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) {
|
|
// Start docker reconcilers when we start fingerprinting, a workaround for
|
|
// task drivers not having a kind of post-setup hook.
|
|
d.danglingReconciler.Start()
|
|
|
|
ch := make(chan *drivers.Fingerprint)
|
|
go d.handleFingerprint(ctx, ch)
|
|
return ch, nil
|
|
}
|
|
|
|
func (d *Driver) previouslyDetected() bool {
|
|
d.detectedLock.RLock()
|
|
defer d.detectedLock.RUnlock()
|
|
|
|
return d.detected
|
|
}
|
|
|
|
func (d *Driver) setDetected(detected bool) {
|
|
d.detectedLock.Lock()
|
|
defer d.detectedLock.Unlock()
|
|
|
|
d.detected = detected
|
|
}
|
|
|
|
// setFingerprintSuccess marks the driver as having fingerprinted successfully
|
|
func (d *Driver) setFingerprintSuccess() {
|
|
d.fingerprintLock.Lock()
|
|
d.fingerprintSuccess = pointer.Of(true)
|
|
d.fingerprintLock.Unlock()
|
|
}
|
|
|
|
// setFingerprintFailure marks the driver as having failed fingerprinting
|
|
func (d *Driver) setFingerprintFailure() {
|
|
d.fingerprintLock.Lock()
|
|
d.fingerprintSuccess = pointer.Of(false)
|
|
d.fingerprintLock.Unlock()
|
|
}
|
|
|
|
// fingerprintSuccessful returns true if the driver has
|
|
// never fingerprinted or has successfully fingerprinted
|
|
func (d *Driver) fingerprintSuccessful() bool {
|
|
d.fingerprintLock.Lock()
|
|
defer d.fingerprintLock.Unlock()
|
|
return d.fingerprintSuccess == nil || *d.fingerprintSuccess
|
|
}
|
|
|
|
func (d *Driver) handleFingerprint(ctx context.Context, ch chan *drivers.Fingerprint) {
|
|
defer close(ch)
|
|
|
|
ticker := time.NewTimer(0)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-d.ctx.Done():
|
|
return
|
|
case <-ticker.C:
|
|
ticker.Reset(fingerprintPeriod)
|
|
ch <- d.buildFingerprint()
|
|
}
|
|
}
|
|
}
|
|
|
|
func (d *Driver) buildFingerprint() *drivers.Fingerprint {
|
|
fp := &drivers.Fingerprint{
|
|
Attributes: make(map[string]*pstructs.Attribute, 8),
|
|
Health: drivers.HealthStateHealthy,
|
|
HealthDescription: drivers.DriverHealthy,
|
|
}
|
|
|
|
// warn if non-root on linux systems unless we've intentionally disabled
|
|
// cpuset management
|
|
if runtime.GOOS == "linux" && !utils.IsUnixRoot() {
|
|
d.config.disableCpusetManagement = true
|
|
d.logger.Warn("docker driver requires running as root: resources.cores and NUMA-aware scheduling will not function correctly on this node, including for non-docker tasks")
|
|
fp.Attributes["driver.docker.cpuset_management.disabled"] = pstructs.NewBoolAttribute(true)
|
|
}
|
|
|
|
dockerClient, err := d.getDockerClient()
|
|
if err != nil {
|
|
if d.fingerprintSuccessful() {
|
|
d.logger.Info("failed to initialize client", "error", err)
|
|
}
|
|
d.setFingerprintFailure()
|
|
return &drivers.Fingerprint{
|
|
Health: drivers.HealthStateUndetected,
|
|
HealthDescription: "Failed to initialize docker client",
|
|
}
|
|
}
|
|
|
|
env, err := dockerClient.Version()
|
|
if err != nil {
|
|
if d.fingerprintSuccessful() {
|
|
d.logger.Debug("could not connect to docker daemon", "endpoint", dockerClient.Endpoint(), "error", err)
|
|
}
|
|
d.setFingerprintFailure()
|
|
|
|
result := drivers.HealthStateUndetected
|
|
if d.previouslyDetected() {
|
|
result = drivers.HealthStateUnhealthy
|
|
}
|
|
|
|
return &drivers.Fingerprint{
|
|
Health: result,
|
|
HealthDescription: "Failed to connect to docker daemon",
|
|
}
|
|
}
|
|
|
|
d.setDetected(true)
|
|
fp.Attributes["driver.docker"] = pstructs.NewBoolAttribute(true)
|
|
fp.Attributes["driver.docker.version"] = pstructs.NewStringAttribute(env.Get("Version"))
|
|
if d.config.AllowPrivileged {
|
|
fp.Attributes["driver.docker.privileged.enabled"] = pstructs.NewBoolAttribute(true)
|
|
}
|
|
|
|
if d.config.PidsLimit > 0 {
|
|
fp.Attributes["driver.docker.pids.limit"] = pstructs.NewIntAttribute(d.config.PidsLimit, "")
|
|
}
|
|
|
|
if d.config.Volumes.Enabled {
|
|
fp.Attributes["driver.docker.volumes.enabled"] = pstructs.NewBoolAttribute(true)
|
|
}
|
|
|
|
if nets, err := dockerClient.ListNetworks(); err != nil {
|
|
d.logger.Warn("error discovering bridge IP", "error", err)
|
|
} else {
|
|
for _, n := range nets {
|
|
if n.Name != "bridge" {
|
|
continue
|
|
}
|
|
|
|
if len(n.IPAM.Config) == 0 {
|
|
d.logger.Warn("no IPAM config for bridge network")
|
|
break
|
|
}
|
|
|
|
if n.IPAM.Config[0].Gateway != "" {
|
|
fp.Attributes["driver.docker.bridge_ip"] = pstructs.NewStringAttribute(n.IPAM.Config[0].Gateway)
|
|
} else if d.fingerprintSuccess == nil {
|
|
// Docker 17.09.0-ce dropped the Gateway IP from the bridge network
|
|
// See https://github.com/moby/moby/issues/32648
|
|
d.logger.Debug("bridge_ip could not be discovered")
|
|
}
|
|
break
|
|
}
|
|
}
|
|
|
|
if dockerInfo, err := dockerClient.Info(); err != nil {
|
|
d.logger.Warn("failed to get Docker system info", "error", err)
|
|
} else {
|
|
runtimeNames := make([]string, 0, len(dockerInfo.Runtimes))
|
|
for name := range dockerInfo.Runtimes {
|
|
if d.config.GPURuntimeName == name {
|
|
// Nvidia runtime is detected by Docker.
|
|
// It makes possible to run GPU workloads using Docker driver on this host.
|
|
d.gpuRuntime = true
|
|
}
|
|
runtimeNames = append(runtimeNames, name)
|
|
}
|
|
sort.Strings(runtimeNames)
|
|
|
|
fp.Attributes["driver.docker.runtimes"] = pstructs.NewStringAttribute(
|
|
strings.Join(runtimeNames, ","))
|
|
fp.Attributes["driver.docker.os_type"] = pstructs.NewStringAttribute(dockerInfo.OSType)
|
|
|
|
// If this situations arises, we are running in Windows 10 with Linux Containers enabled via VM
|
|
if runtime.GOOS == "windows" && dockerInfo.OSType == "linux" {
|
|
if d.fingerprintSuccessful() {
|
|
d.logger.Warn("Docker is configured with Linux containers; switch to Windows Containers")
|
|
}
|
|
|
|
d.setFingerprintFailure()
|
|
return &drivers.Fingerprint{
|
|
Health: drivers.HealthStateUnhealthy,
|
|
HealthDescription: "Docker is configured with Linux containers; switch to Windows Containers",
|
|
}
|
|
}
|
|
}
|
|
|
|
d.setFingerprintSuccess()
|
|
|
|
return fp
|
|
}
|