mirror of
https://github.com/kemko/nomad.git
synced 2026-01-09 11:55:42 +03:00
The Nomad client renders templates in the same privileged process used for most other client operations. During internal testing, we discovered that a malicious task can create a symlink that can cause template rendering to read and write to arbitrary files outside the allocation sandbox. Because the Nomad agent can be restarted without restarting tasks, we can't simply check that the path is safe at the time we write without encountering a time-of-check/time-of-use race. To protect Nomad client hosts from this attack, we'll now read and write templates in a subprocess: * On Linux/Unix, this subprocess is sandboxed via chroot to the allocation directory. This requires that Nomad is running as a privileged process. A non-root Nomad agent will warn that it cannot sandbox the template renderer. * On Windows, this process is sandboxed via a Windows AppContainer which has been granted access to only to the allocation directory. This does not require special privileges on Windows. (Creating symlinks in the first place can be prevented by running workloads as non-Administrator or non-ContainerAdministrator users.) Both sandboxes cause encountered symlinks to be evaluated in the context of the sandbox, which will result in a "file not found" or "access denied" error, depending on the platform. This change will also require an update to Consul-Template to allow callers to inject a custom `ReaderFunc` and `RenderFunc`. This design is intended as a workaround to allow us to fix this bug without creating backwards compatibility issues for running tasks. A future version of Nomad may introduce a read-only mount specifically for templates and artifacts so that tasks cannot write into the same location that the Nomad agent is. Fixes: https://github.com/hashicorp/nomad/issues/19888 Fixes: CVE-2024-1329
272 lines
6.9 KiB
Go
272 lines
6.9 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: BUSL-1.1
|
|
|
|
package taskrunner
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"path"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
consulapi "github.com/hashicorp/consul/api"
|
|
"github.com/hashicorp/nomad/ci"
|
|
"github.com/hashicorp/nomad/client/allocdir"
|
|
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
|
|
trtesting "github.com/hashicorp/nomad/client/allocrunner/taskrunner/testing"
|
|
"github.com/hashicorp/nomad/client/config"
|
|
cstructs "github.com/hashicorp/nomad/client/structs"
|
|
"github.com/hashicorp/nomad/client/taskenv"
|
|
"github.com/hashicorp/nomad/helper/pointer"
|
|
"github.com/hashicorp/nomad/helper/testlog"
|
|
"github.com/hashicorp/nomad/helper/uuid"
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
structsc "github.com/hashicorp/nomad/nomad/structs/config"
|
|
"github.com/shoenig/test/must"
|
|
)
|
|
|
|
func Test_templateHook_Prestart_ConsulWI(t *testing.T) {
|
|
ci.Parallel(t)
|
|
logger := testlog.HCLogger(t)
|
|
|
|
// Create some alloc hook resources, one with tokens and an empty one.
|
|
defaultToken := uuid.Generate()
|
|
hrTokens := cstructs.NewAllocHookResources()
|
|
hrTokens.SetConsulTokens(
|
|
map[string]map[string]*consulapi.ACLToken{
|
|
structs.ConsulDefaultCluster: {
|
|
fmt.Sprintf("consul_%s", structs.ConsulDefaultCluster): &consulapi.ACLToken{
|
|
SecretID: defaultToken,
|
|
},
|
|
},
|
|
},
|
|
)
|
|
hrEmpty := cstructs.NewAllocHookResources()
|
|
|
|
tests := []struct {
|
|
name string
|
|
taskConsul *structs.Consul
|
|
groupConsul *structs.Consul
|
|
hr *cstructs.AllocHookResources
|
|
wantErrMsg string
|
|
wantConsulToken string
|
|
legacyFlow bool
|
|
}{
|
|
{
|
|
// COMPAT remove in 1.9+
|
|
name: "legecy flow",
|
|
hr: hrEmpty,
|
|
legacyFlow: true,
|
|
wantConsulToken: "",
|
|
},
|
|
{
|
|
name: "task missing Consul token",
|
|
hr: hrEmpty,
|
|
wantErrMsg: "not found",
|
|
},
|
|
{
|
|
name: "task without consul blocks uses default cluster",
|
|
hr: hrTokens,
|
|
wantConsulToken: defaultToken,
|
|
},
|
|
{
|
|
name: "task with consul block at task level",
|
|
hr: hrTokens,
|
|
taskConsul: &structs.Consul{
|
|
Cluster: structs.ConsulDefaultCluster,
|
|
},
|
|
wantConsulToken: defaultToken,
|
|
},
|
|
{
|
|
name: "task with consul block at group level",
|
|
hr: hrTokens,
|
|
groupConsul: &structs.Consul{
|
|
Cluster: structs.ConsulDefaultCluster,
|
|
},
|
|
wantConsulToken: defaultToken,
|
|
},
|
|
}
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
a := mock.Alloc()
|
|
|
|
task := a.Job.TaskGroups[0].Tasks[0]
|
|
if !tt.legacyFlow {
|
|
task.Identities = []*structs.WorkloadIdentity{
|
|
{Name: fmt.Sprintf("%s_%s",
|
|
structs.ConsulTaskIdentityNamePrefix,
|
|
structs.ConsulDefaultCluster,
|
|
)},
|
|
}
|
|
}
|
|
|
|
clientConfig := &config.Config{Region: "global"}
|
|
envBuilder := taskenv.NewBuilder(mock.Node(), a, task, clientConfig.Region)
|
|
taskHooks := trtesting.NewMockTaskHooks()
|
|
|
|
conf := &templateHookConfig{
|
|
alloc: a,
|
|
logger: logger,
|
|
lifecycle: taskHooks,
|
|
events: &trtesting.MockEmitter{},
|
|
clientConfig: clientConfig,
|
|
envBuilder: envBuilder,
|
|
hookResources: tt.hr,
|
|
}
|
|
h := &templateHook{
|
|
config: conf,
|
|
logger: logger,
|
|
managerLock: sync.Mutex{},
|
|
driverHandle: nil,
|
|
}
|
|
req := &interfaces.TaskPrestartRequest{
|
|
Alloc: a,
|
|
Task: a.Job.TaskGroups[0].Tasks[0],
|
|
TaskDir: &allocdir.TaskDir{Dir: "foo"},
|
|
}
|
|
|
|
err := h.Prestart(context.Background(), req, nil)
|
|
if tt.wantErrMsg != "" {
|
|
must.Error(t, err)
|
|
must.ErrorContains(t, err, tt.wantErrMsg)
|
|
} else {
|
|
must.NoError(t, err)
|
|
}
|
|
|
|
must.Eq(t, tt.wantConsulToken, h.consulToken)
|
|
})
|
|
}
|
|
}
|
|
|
|
func Test_templateHook_Prestart_Vault(t *testing.T) {
|
|
ci.Parallel(t)
|
|
|
|
secretsResp := `
|
|
{
|
|
"data": {
|
|
"data": {
|
|
"secret": "secret"
|
|
},
|
|
"metadata": {
|
|
"created_time": "2023-10-18T15:58:29.65137Z",
|
|
"custom_metadata": null,
|
|
"deletion_time": "",
|
|
"destroyed": false,
|
|
"version": 1
|
|
}
|
|
}
|
|
}`
|
|
|
|
// Start test server to simulate Vault cluster responses.
|
|
reqCh := make(chan any)
|
|
defaultVaultServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
reqCh <- struct{}{}
|
|
fmt.Fprintln(w, secretsResp)
|
|
}))
|
|
t.Cleanup(defaultVaultServer.Close)
|
|
|
|
// Setup client with Vault config.
|
|
clientConfig := config.DefaultConfig()
|
|
clientConfig.TemplateConfig.DisableSandbox = true
|
|
clientConfig.VaultConfigs = map[string]*structsc.VaultConfig{
|
|
structs.VaultDefaultCluster: {
|
|
Name: structs.VaultDefaultCluster,
|
|
Enabled: pointer.Of(true),
|
|
Addr: defaultVaultServer.URL,
|
|
},
|
|
}
|
|
|
|
testCases := []struct {
|
|
name string
|
|
vault *structs.Vault
|
|
expectedCluster string
|
|
}{
|
|
{
|
|
name: "use default cluster",
|
|
vault: &structs.Vault{
|
|
Cluster: structs.VaultDefaultCluster,
|
|
},
|
|
expectedCluster: structs.VaultDefaultCluster,
|
|
},
|
|
{
|
|
name: "use default cluster if no vault block is provided",
|
|
vault: nil,
|
|
expectedCluster: structs.VaultDefaultCluster,
|
|
},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
// Setup alloc and task to connect to Vault cluster.
|
|
alloc := mock.MinAlloc()
|
|
task := alloc.Job.TaskGroups[0].Tasks[0]
|
|
task.Vault = tc.vault
|
|
|
|
// Setup template hook.
|
|
taskDir := t.TempDir()
|
|
hookConfig := &templateHookConfig{
|
|
alloc: alloc,
|
|
logger: testlog.HCLogger(t),
|
|
lifecycle: trtesting.NewMockTaskHooks(),
|
|
events: &trtesting.MockEmitter{},
|
|
clientConfig: clientConfig,
|
|
envBuilder: taskenv.NewBuilder(mock.Node(), alloc, task, clientConfig.Region),
|
|
templates: []*structs.Template{
|
|
{
|
|
EmbeddedTmpl: `{{with secret "secret/data/test"}}{{.Data.data.secret}}{{end}}`,
|
|
ChangeMode: structs.TemplateChangeModeNoop,
|
|
DestPath: path.Join(taskDir, "out.txt"),
|
|
},
|
|
},
|
|
}
|
|
hook := newTemplateHook(hookConfig)
|
|
|
|
// Start template hook with a timeout context to ensure it exists.
|
|
req := &interfaces.TaskPrestartRequest{
|
|
Alloc: alloc,
|
|
Task: task,
|
|
TaskDir: &allocdir.TaskDir{Dir: taskDir},
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
|
t.Cleanup(cancel)
|
|
|
|
// Start in a goroutine because Prestart() blocks until first
|
|
// render.
|
|
hookErrCh := make(chan error)
|
|
go func() {
|
|
err := hook.Prestart(ctx, req, nil)
|
|
hookErrCh <- err
|
|
}()
|
|
|
|
var gotRequest bool
|
|
LOOP:
|
|
for {
|
|
select {
|
|
// Register mock Vault server received a request.
|
|
case <-reqCh:
|
|
gotRequest = true
|
|
|
|
// Verify test doesn't timeout.
|
|
case <-ctx.Done():
|
|
must.NoError(t, ctx.Err())
|
|
return
|
|
|
|
// Verify hook.Prestart() doesn't errors.
|
|
case err := <-hookErrCh:
|
|
must.NoError(t, err)
|
|
break LOOP
|
|
}
|
|
}
|
|
|
|
// Verify mock Vault server received a request.
|
|
must.True(t, gotRequest)
|
|
})
|
|
}
|
|
}
|