diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index a56643ac4..2fcf7a592 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -34,6 +34,7 @@ import ( "github.com/hashicorp/nomad/client/vaultclient" "github.com/hashicorp/nomad/client/widmgr" "github.com/hashicorp/nomad/helper/pointer" + "github.com/hashicorp/nomad/helper/users/dynamic" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/device" "github.com/hashicorp/nomad/plugins/drivers" @@ -211,6 +212,9 @@ type allocRunner struct { // widmgr manages workload identity signatures widmgr widmgr.IdentityManager + + // users manages a pool of dynamic workload users + users dynamic.Pool } // NewAllocRunner returns a new allocation runner. @@ -255,6 +259,7 @@ func NewAllocRunner(config *config.AllocRunnerConfig) (interfaces.AllocRunner, e partitions: config.Partitions, hookResources: cstructs.NewAllocHookResources(), widsigner: config.WIDSigner, + users: config.Users, } // Create the logger based on the allocation ID @@ -324,6 +329,7 @@ func (ar *allocRunner) initTaskRunners(tasks []*structs.Task) error { Wranglers: ar.wranglers, AllocHookResources: ar.hookResources, WIDMgr: ar.widmgr, + Users: ar.users, } // Create, but do not Run, the task runner diff --git a/client/allocrunner/taskrunner/dynamic_users_hook.go b/client/allocrunner/taskrunner/dynamic_users_hook.go new file mode 100644 index 000000000..ff67d0c13 --- /dev/null +++ b/client/allocrunner/taskrunner/dynamic_users_hook.go @@ -0,0 +1,124 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package taskrunner + +import ( + "context" + "fmt" + "sync" + + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/client/allocrunner/interfaces" + "github.com/hashicorp/nomad/helper/users/dynamic" +) + +const ( + dynamicUsersHookName = "workload_users" + dynamicUsersStateKey = "dynamic_user_ugid" +) + +// dynamicUsersHook is used for allocating a one-time use UID/GID on behalf of +// a single workload (task). No other task will be assigned the same UID/GID +// while this task is running. +type dynamicUsersHook struct { + shutdownCtx context.Context + logger hclog.Logger + usable bool + + lock *sync.Mutex + pool dynamic.Pool +} + +func newDynamicUsersHook(ctx context.Context, usable bool, logger hclog.Logger, pool dynamic.Pool) *dynamicUsersHook { + return &dynamicUsersHook{ + shutdownCtx: ctx, + logger: logger.Named(dynamicUsersHookName), + lock: new(sync.Mutex), + pool: pool, + usable: usable, + } +} + +func (*dynamicUsersHook) Name() string { + return dynamicUsersHookName +} + +// Prestart runs on both initial start and on restart. +func (h *dynamicUsersHook) Prestart(_ context.Context, request *interfaces.TaskPrestartRequest, response *interfaces.TaskPrestartResponse) error { + // if the task driver does not support the DynamicWorkloadUsers capability, + // do nothing + if !h.usable { + return nil + } + + // if the task has a user set, do nothing + // + // it's up to the job-submitter to set a user that exists on the system + if request.Task.User != "" { + return nil + } + + // if this is the restart case, the UGID will already be acquired and we + // just need to read it back out of the hook's state + if request.PreviousState != nil { + ugid, exists := request.PreviousState[dynamicUsersStateKey] + if exists { + response.State[dynamicUsersStateKey] = ugid + return nil + } + } + + // otherwise we will acquire a dynamic UGID from the pool. + h.lock.Lock() + defer h.lock.Unlock() + + // allocate an unused UID/GID from the pool + ugid, err := h.pool.Acquire() + if err != nil { + h.logger.Error("unable to acquire anonymous UID/GID: %v", err) + return err + } + + h.logger.Trace("acquired dynamic workload user", "ugid", ugid) + + // set the special user of the task + request.Task.User = dynamic.String(ugid) + + // set the user on the hook so we may release it later + response.State = make(map[string]string, 1) + response.State[dynamicUsersStateKey] = request.Task.User + + return nil +} + +func (h *dynamicUsersHook) Stop(_ context.Context, request *interfaces.TaskStopRequest, response *interfaces.TaskStopResponse) error { + // if the task driver does not support the DWU capability, nothing to do + if !h.usable { + return nil + } + + // if we did not store a user for this task; nothing to release + user, exists := request.ExistingState[dynamicUsersStateKey] + if !exists { + return nil + } + + // otherwise we need to release the UGID back to the pool + h.lock.Lock() + defer h.lock.Unlock() + + // parse the UID/GID from the pseudo username + ugid, err := dynamic.Parse(user) + if err != nil { + return fmt.Errorf("unable to release dynamic workload user: %w", err) + } + + // release the UID/GID to the pool + if err = h.pool.Release(ugid); err != nil { + return fmt.Errorf("unable to release dynamic workload user: %w", err) + } + + h.logger.Trace("released dynamic workload user", "ugid", ugid) + return nil +} diff --git a/client/allocrunner/taskrunner/dynamic_users_hook_test.go b/client/allocrunner/taskrunner/dynamic_users_hook_test.go new file mode 100644 index 000000000..5ebfa621f --- /dev/null +++ b/client/allocrunner/taskrunner/dynamic_users_hook_test.go @@ -0,0 +1,203 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package taskrunner + +import ( + "context" + "testing" + + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/client/allocrunner/interfaces" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/helper/users/dynamic" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" +) + +func TestTaskRunner_DynamicUsersHook_Prestart_unusable(t *testing.T) { + ci.Parallel(t) + + // task driver does not indicate DynamicWorkloadUsers capability + const capable = false + ctx := context.Background() + logger := testlog.HCLogger(t) + + // if the driver does not indicate the DynamicWorkloadUsers capability, + // none of the pool, request, or response are touched - so using nil + // for each of them shows we are exiting the hook immediatly + var pool dynamic.Pool = nil + var request *interfaces.TaskPrestartRequest = nil + var response *interfaces.TaskPrestartResponse = nil + + h := newDynamicUsersHook(ctx, capable, logger, pool) + must.False(t, h.usable) + must.NoError(t, h.Prestart(ctx, request, response)) +} + +func TestTaskRunner_DynamicUsersHook_Prestart_unnecessary(t *testing.T) { + ci.Parallel(t) + + const capable = true + ctx := context.Background() + logger := testlog.HCLogger(t) + + // if the task configures a user, no dynamic workload user will be allocated + // and we prove this by setting a nil pool + var pool dynamic.Pool = nil + var response = new(interfaces.TaskPrestartResponse) + var request = &interfaces.TaskPrestartRequest{ + Task: &structs.Task{User: "billy"}, + } + + h := newDynamicUsersHook(ctx, capable, logger, pool) + must.True(t, h.usable) + must.NoError(t, h.Prestart(ctx, request, response)) + must.MapEmpty(t, response.State) // no user set + must.Eq(t, "billy", request.Task.User) // not modified +} + +func TestTaskRunner_DynamicUsersHook_Prestart_used(t *testing.T) { + ci.Parallel(t) + + const capable = true + ctx := context.Background() + logger := testlog.HCLogger(t) + + // create a pool allowing UIDs in range [100, 199] + var pool dynamic.Pool = dynamic.New(&dynamic.PoolConfig{ + MinUGID: 100, + MaxUGID: 199, + }) + var response = new(interfaces.TaskPrestartResponse) + var request = &interfaces.TaskPrestartRequest{ + Task: &structs.Task{User: ""}, // user is not set + } + + // once the hook runs, check we got an expected ugid and the + // task user is set to our pseudo dynamic username + h := newDynamicUsersHook(ctx, capable, logger, pool) + must.True(t, h.usable) + must.NoError(t, h.Prestart(ctx, request, response)) + username, exists := response.State[dynamicUsersStateKey] + must.True(t, exists) + ugid, err := dynamic.Parse(username) + must.NoError(t, err) + must.Between(t, 100, ugid, 199) + must.Eq(t, username, request.Task.User) + must.StrHasPrefix(t, "nomad-", username) +} + +func TestTaskRunner_DynamicUsersHook_Prestart_exhausted(t *testing.T) { + ci.Parallel(t) + + const capable = true + ctx := context.Background() + logger := testlog.HCLogger(t) + + // create a pool allowing UIDs in range [100, 199] + var pool dynamic.Pool = dynamic.New(&dynamic.PoolConfig{ + MinUGID: 100, + MaxUGID: 101, + }) + pool.Restore(100) + pool.Restore(101) + var response = new(interfaces.TaskPrestartResponse) + var request = &interfaces.TaskPrestartRequest{ + Task: &structs.Task{User: ""}, // user is not set + } + + h := newDynamicUsersHook(ctx, capable, logger, pool) + must.True(t, h.usable) + must.ErrorContains(t, h.Prestart(ctx, request, response), "uid/gid pool exhausted") +} + +func TestTaskRunner_DynamicUsersHook_Stop_unusable(t *testing.T) { + ci.Parallel(t) + + const capable = false + ctx := context.Background() + logger := testlog.HCLogger(t) + + // prove we use none of these by setting them all to nil + var pool dynamic.Pool = nil + var request *interfaces.TaskStopRequest = nil + var response *interfaces.TaskStopResponse = nil + + h := newDynamicUsersHook(ctx, capable, logger, pool) + must.False(t, h.usable) + must.NoError(t, h.Stop(ctx, request, response)) +} + +func TestTaskRunner_DynamicUsersHook_Stop_release(t *testing.T) { + ci.Parallel(t) + + const capable = true + ctx := context.Background() + logger := testlog.HCLogger(t) + + // prove we use none of these by setting them all to nil + var pool dynamic.Pool = dynamic.New(&dynamic.PoolConfig{ + MinUGID: 100, + MaxUGID: 199, + }) + pool.Restore(150) // allocate ugid 150 + var request = &interfaces.TaskStopRequest{ + ExistingState: map[string]string{ + dynamicUsersStateKey: "nomad-150", + }, + } + var response = new(interfaces.TaskStopResponse) + + h := newDynamicUsersHook(ctx, capable, logger, pool) + must.True(t, h.usable) + must.NoError(t, h.Stop(ctx, request, response)) +} + +func TestTaskRunner_DynamicUsersHook_Stop_malformed(t *testing.T) { + ci.Parallel(t) + + const capable = true + ctx := context.Background() + logger := testlog.HCLogger(t) + + // prove we use none of these by setting them all to nil + var pool dynamic.Pool = dynamic.New(&dynamic.PoolConfig{ + MinUGID: 100, + MaxUGID: 199, + }) + var request = &interfaces.TaskStopRequest{ + ExistingState: map[string]string{ + dynamicUsersStateKey: "not-valid", + }, + } + var response = new(interfaces.TaskStopResponse) + + h := newDynamicUsersHook(ctx, capable, logger, pool) + must.True(t, h.usable) + must.ErrorContains(t, h.Stop(ctx, request, response), "unable to parse uid/gid from username") +} + +func TestTaskRunner_DynamicUsersHook_Stop_not_in_use(t *testing.T) { + ci.Parallel(t) + + const capable = true + ctx := context.Background() + logger := testlog.HCLogger(t) + + // prove we use none of these by setting them all to nil + var pool dynamic.Pool = dynamic.New(&dynamic.PoolConfig{ + MinUGID: 100, + MaxUGID: 199, + }) + var request = &interfaces.TaskStopRequest{ + ExistingState: map[string]string{ + dynamicUsersStateKey: "nomad-101", + }, + } + var response = new(interfaces.TaskStopResponse) + + h := newDynamicUsersHook(ctx, capable, logger, pool) + must.True(t, h.usable) + must.ErrorContains(t, h.Stop(ctx, request, response), "release of unused uid/gid") +} diff --git a/client/allocrunner/taskrunner/task_runner.go b/client/allocrunner/taskrunner/task_runner.go index 3f9efb033..968311877 100644 --- a/client/allocrunner/taskrunner/task_runner.go +++ b/client/allocrunner/taskrunner/task_runner.go @@ -38,6 +38,7 @@ import ( "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/pluginutils/hclspecutils" "github.com/hashicorp/nomad/helper/pluginutils/hclutils" + "github.com/hashicorp/nomad/helper/users/dynamic" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/structs" bstructs "github.com/hashicorp/nomad/plugins/base/structs" @@ -270,6 +271,9 @@ type TaskRunner struct { // widmgr manages workload identities widmgr widmgr.IdentityManager + + // users manages the pool of dynamic workload users + users dynamic.Pool } type Config struct { @@ -345,6 +349,9 @@ type Config struct { // WIDMgr manages workload identities WIDMgr widmgr.IdentityManager + + // Users manages a pool of dynamic workload users + Users dynamic.Pool } func NewTaskRunner(config *Config) (*TaskRunner, error) { @@ -1117,7 +1124,7 @@ func (tr *TaskRunner) persistLocalState() error { func (tr *TaskRunner) buildTaskConfig() *drivers.TaskConfig { task := tr.Task() alloc := tr.Alloc() - invocationid := uuid.Generate()[:8] + invocationid := uuid.Short() taskResources := tr.taskResources ports := tr.Alloc().AllocatedResources.Shared.Ports env := tr.envBuilder.Build() diff --git a/client/allocrunner/taskrunner/task_runner_hooks.go b/client/allocrunner/taskrunner/task_runner_hooks.go index 5282f6697..9dc169e9e 100644 --- a/client/allocrunner/taskrunner/task_runner_hooks.go +++ b/client/allocrunner/taskrunner/task_runner_hooks.go @@ -63,6 +63,7 @@ func (tr *TaskRunner) initHooks() { alloc := tr.Alloc() tr.runnerHooks = []interfaces.TaskHook{ newValidateHook(tr.clientConfig, hookLogger), + newDynamicUsersHook(tr.killCtx, tr.driverCapabilities.DynamicWorkloadUsers, tr.logger, tr.users), newTaskDirHook(tr, hookLogger), newIdentityHook(tr, hookLogger), newLogMonHook(tr, hookLogger), diff --git a/client/client.go b/client/client.go index f79f51095..f411a300b 100644 --- a/client/client.go +++ b/client/client.go @@ -56,6 +56,7 @@ import ( "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/pool" "github.com/hashicorp/nomad/helper/tlsutil" + "github.com/hashicorp/nomad/helper/users/dynamic" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/structs" nconfig "github.com/hashicorp/nomad/nomad/structs/config" @@ -339,6 +340,9 @@ type Client struct { // widsigner signs workload identities widsigner widmgr.IdentitySigner + + // users is a pool of dynamic workload users + users dynamic.Pool } var ( @@ -471,6 +475,12 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie c.topology = numalib.NoImpl(ir.Topology) } + // Create the dynamic workload users pool + c.users = dynamic.New(&dynamic.PoolConfig{ + MinUGID: 80_000, // TODO(shoenig) plumb client config + MaxUGID: 89_999, // TODO(shoenig) plumb client config + }) + // Create the cpu core partition manager c.partitions = cgroupslib.GetPartition( c.topology.UsableCores(), @@ -2772,6 +2782,7 @@ func (c *Client) newAllocRunnerConfig( WIDSigner: c.widsigner, Wranglers: c.wranglers, Partitions: c.partitions, + Users: c.users, } } diff --git a/client/config/arconfig.go b/client/config/arconfig.go index 2390209e7..3ce09f0c9 100644 --- a/client/config/arconfig.go +++ b/client/config/arconfig.go @@ -21,6 +21,7 @@ import ( cstate "github.com/hashicorp/nomad/client/state" "github.com/hashicorp/nomad/client/vaultclient" "github.com/hashicorp/nomad/client/widmgr" + "github.com/hashicorp/nomad/helper/users/dynamic" "github.com/hashicorp/nomad/nomad/structs" ) @@ -119,6 +120,9 @@ type AllocRunnerConfig struct { // WIDMgr manages workload identities WIDMgr widmgr.IdentityManager + + // Users manages a pool of dynamic workload users + Users dynamic.Pool } // PrevAllocWatcher allows AllocRunners to wait for a previous allocation to diff --git a/helper/users/dynamic/pool.go b/helper/users/dynamic/pool.go index e165ef648..86a1cc624 100644 --- a/helper/users/dynamic/pool.go +++ b/helper/users/dynamic/pool.go @@ -1,8 +1,6 @@ // Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: BUSL-1.1 -//go:build linux - // Package dynamic provides a way of allocating UID/GID to be used by Nomad // tasks with no associated service users managed by the operating system. package dynamic @@ -18,8 +16,8 @@ import ( ) var ( - ErrPoolExhausted = errors.New("users: credentials exhausted") - ErrReleaseUnused = errors.New("users: release of unused credentials") + ErrPoolExhausted = errors.New("users: uid/gid pool exhausted") + ErrReleaseUnused = errors.New("users: release of unused uid/gid") ErrCannotParse = errors.New("users: unable to parse uid/gid from username") ) diff --git a/helper/users/dynamic/users.go b/helper/users/dynamic/users.go new file mode 100644 index 000000000..9d065b5fd --- /dev/null +++ b/helper/users/dynamic/users.go @@ -0,0 +1,61 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package dynamic + +import ( + "fmt" + "regexp" + "strconv" + + "github.com/hashicorp/nomad/helper/users" +) + +const ( + // Home is the non-existent directory path to associate with dynamic + // workload users. Any operation on this path should cause an error. + // + // The path '/nonexistent' is consistent with what systemd uses for + // non-interactive service users. + Home = "/nonexistent" +) + +// String creates a pseudo username encoding the given ugid, in the form +// 'nomad-'. +func String(ugid UGID) string { + return fmt.Sprintf("nomad-%d", ugid) +} + +var ( + re = regexp.MustCompile(`^nomad-(\d+)$`) +) + +// Parse the given pseudo username and extract the ugid. +func Parse(user string) (UGID, error) { + values := re.FindStringSubmatch(user) + if len(values) != 2 { + return none, ErrCannotParse + } + + i, err := strconv.ParseUint(values[1], 10, 64) + if err != nil { + return none, ErrCannotParse + } + + return UGID(i), err +} + +// LookupUser will return the UID, GID, and home directory associated with the +// given username. If username is of the form 'nomad-' this indicates Nomad +// has synthesized a dynamic workload user for the task and the UID/GID are the +// value. +func LookupUser(username string) (int, int, string, error) { + // if we can successfully parse username as an anonymous user, use that + ugid, err := Parse(username) + if err == nil { + return int(ugid), int(ugid), Home, nil + } + + // otherwise lookup the user using nomad's user lookup cache + return users.LookupUnix(username) +} diff --git a/plugins/drivers/driver.go b/plugins/drivers/driver.go index 51f75ceb2..cf2bdd695 100644 --- a/plugins/drivers/driver.go +++ b/plugins/drivers/driver.go @@ -178,6 +178,12 @@ type Capabilities struct { // DisableLogCollection indicates this driver has disabled log collection // and the client should not start a logmon process. DisableLogCollection bool + + // DynamicWorkloadUsers indicates this driver is capable (but not required) + // of making use of UID/GID not backed by a user known to the operating system. + // The allocation of a unique, not-in-use UID/GID is managed by Nomad client + // ensuring no overlap. + DynamicWorkloadUsers bool } func (c *Capabilities) HasNetIsolationMode(m NetIsolationMode) bool {