diff --git a/.changelog/19888.txt b/.changelog/19888.txt new file mode 100644 index 000000000..5cba4487a --- /dev/null +++ b/.changelog/19888.txt @@ -0,0 +1,3 @@ +```release-note:security +template: Fixed a bug where symlinks could force templates to read and write to arbitrary locations (CVE-2024-1329) +``` diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index e968ea7f7..902ff2a66 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -89,7 +89,9 @@ jobs: github.com/hashicorp/nomad/drivers/docker \ github.com/hashicorp/nomad/client/lib/fifo \ github.com/hashicorp/nomad/client/logmon \ - github.com/hashicorp/nomad/client/allocrunner/taskrunner/template + github.com/hashicorp/nomad/client/allocrunner/taskrunner/template \ + github.com/hashicorp/nomad/helper/winappcontainer \ + github.com/hashicorp/nomad/helper/winexec - uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2 with: name: results.xml diff --git a/client/allocrunner/taskrunner/template/renderer/doc.go b/client/allocrunner/taskrunner/template/renderer/doc.go new file mode 100644 index 000000000..2a50cda24 --- /dev/null +++ b/client/allocrunner/taskrunner/template/renderer/doc.go @@ -0,0 +1,13 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package renderer + +// This package implements a "hidden" command `nomad template-render`, similarly +// to how we implement logmon, getter, docklog, and executor. This package's +// init() function is evaluated before Nomad's top-level main.go gets a chance +// to parse arguments. This bypasses loading in any behaviors other than the +// small bit of code here. +// +// This command and its subcommands `write` and `read` are only invoked by the +// template runner. See the parent package for the callers. diff --git a/client/allocrunner/taskrunner/template/renderer/template_sandbox_default.go b/client/allocrunner/taskrunner/template/renderer/template_sandbox_default.go new file mode 100644 index 000000000..abdbc882d --- /dev/null +++ b/client/allocrunner/taskrunner/template/renderer/template_sandbox_default.go @@ -0,0 +1,39 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build !windows + +package renderer + +import ( + "fmt" + "os" + "path/filepath" + "syscall" +) + +// sandbox is the non-Windows sandbox implementation, which relies on chroot. +// Although chroot is not an appropriate boundary for tasks (implicitly +// untrusted), here the only code that's executing is Nomad itself. Returns the +// new destPath inside the chroot. +func sandbox(sandboxPath, destPath string) (string, error) { + + err := syscall.Chroot(sandboxPath) + if err != nil { + // if the user is running in unsupported non-root configuration, we + // can't build the sandbox, but need to handle this gracefully + fmt.Fprintf(os.Stderr, "template-render sandbox %q not available: %v", + sandboxPath, err) + return destPath, nil + } + + destPath, err = filepath.Rel(sandboxPath, destPath) + if err != nil { + return "", fmt.Errorf("could not find destination path relative to chroot: %w", err) + } + if !filepath.IsAbs(destPath) { + destPath = "/" + destPath + } + + return destPath, nil +} diff --git a/client/allocrunner/taskrunner/template/renderer/template_sandbox_windows.go b/client/allocrunner/taskrunner/template/renderer/template_sandbox_windows.go new file mode 100644 index 000000000..0f9266689 --- /dev/null +++ b/client/allocrunner/taskrunner/template/renderer/template_sandbox_windows.go @@ -0,0 +1,14 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build windows + +package renderer + +// sandbox is the Windows-specific sandbox implementation. Under Windows, +// symlinks can only be written by the Administrator (including the +// ContainerAdministrator user unfortunately used as the default for Docker). So +// our sandboxing is done by creating an AppContainer in the caller. +func sandbox(_, destPath string) (string, error) { + return destPath, nil +} diff --git a/client/allocrunner/taskrunner/template/renderer/z_template_render.go b/client/allocrunner/taskrunner/template/renderer/z_template_render.go new file mode 100644 index 000000000..9bf63f0dc --- /dev/null +++ b/client/allocrunner/taskrunner/template/renderer/z_template_render.go @@ -0,0 +1,152 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package renderer + +import ( + "bytes" + "flag" + "fmt" + "io" + "io/fs" + "os" + "strconv" + + "github.com/hashicorp/consul-template/renderer" +) + +const ( + // DefaultFilePerms are the default file permissions for files rendered onto + // disk when a specific file permission has not already been specified. + DefaultFilePerms = 0o644 + + ExitDidRender = 0 + ExitError = 1 + ExitWouldRenderButDidnt = 117 // something unmistakeably belonging to Nomad +) + +// This init() must be initialized last in package required by the child plugin +// process. It's recommended to avoid any other `init()` or inline any necessary +// calls here. See eeaa95d commit message for more details. +func init() { + if len(os.Args) > 1 && os.Args[1] == "template-render" { + + if len(os.Args) <= 3 { + // note: we don't use logger here as any message we send will get + // wrapped by CT's own logger, but it's important to keep Stderr and + // Stdout separate so that "read" has a clean output. + fmt.Fprintln(os.Stderr, `expected "read" or "write" argument`) + } + + switch os.Args[2] { + case "read": + err := readTemplate() + if err != nil { + fmt.Fprintln(os.Stderr, err.Error()) + os.Exit(ExitError) + } + os.Exit(0) + + case "write": + result, err := writeTemplate() + if err != nil { + fmt.Fprintln(os.Stderr, err.Error()) + os.Exit(ExitError) + } + + if result.DidRender { + os.Exit(ExitDidRender) + } + if result.WouldRender { + os.Exit(ExitWouldRenderButDidnt) + } + os.Exit(ExitError) + default: + fmt.Fprintln(os.Stderr, `expected "read" or "write" argument`) + os.Exit(ExitError) + } + } +} + +func readTemplate() error { + var ( + sandboxPath, sourcePath string + err error + ) + + flags := flag.NewFlagSet("template-render", flag.ExitOnError) + flags.StringVar(&sandboxPath, "sandbox-path", "", "") + flags.StringVar(&sourcePath, "source-path", "", "") + flags.Parse(os.Args[3:]) + + sourcePath, err = sandbox(sandboxPath, sourcePath) // platform-specific sandboxing + if err != nil { + return fmt.Errorf("failed to sandbox alloc dir %q: %w", sandboxPath, err) + } + + f, err := os.Open(sourcePath) + if err != nil { + return fmt.Errorf("failed to open source file %q: %w", sourcePath, err) + } + defer f.Close() + + _, err = io.Copy(os.Stdout, f) + return err +} + +func writeTemplate() (*renderer.RenderResult, error) { + + var ( + sandboxPath, destPath, perms, user, group string + ) + + flags := flag.NewFlagSet("template-render", flag.ExitOnError) + flags.StringVar(&sandboxPath, "sandbox-path", "", "") + flags.StringVar(&destPath, "dest-path", "", "") + flags.StringVar(&perms, "perms", "", "") + flags.StringVar(&user, "user", "", "") + flags.StringVar(&group, "group", "", "") + + flags.Parse(os.Args[3:]) + + contents := new(bytes.Buffer) + _, err := io.Copy(contents, os.Stdin) + if err != nil { + return nil, fmt.Errorf("failed reading template contents: %w", err) + } + + destPath, err = sandbox(sandboxPath, destPath) // platform-specific sandboxing + if err != nil { + return nil, fmt.Errorf("failed to sandbox alloc dir %q: %w", sandboxPath, err) + } + + // perms must parse into a valid file permission + fileMode := os.FileMode(DefaultFilePerms) + if perms != "" { + fileModeInt, err := strconv.ParseUint(perms, 8, 32) + if err != nil { + return nil, fmt.Errorf( + "Invalid file mode %q: Must be a valid octal number: %w", perms, err) + + } + fileMode = fs.FileMode(fileModeInt) + if fileMode.Perm() != fileMode { + return nil, fmt.Errorf( + "Invalid file mode %q: Must be a valid Unix permission: %w", perms, err) + } + } + + input := &renderer.RenderInput{ + Backup: false, + Contents: contents.Bytes(), + CreateDestDirs: true, + Dry: false, + DryStream: nil, + Path: destPath, + Perms: fileMode, + User: user, + Group: group, + } + + return renderer.Render(input) +} diff --git a/client/allocrunner/taskrunner/template/template.go b/client/allocrunner/taskrunner/template/template.go index a5e038fc6..80c414498 100644 --- a/client/allocrunner/taskrunner/template/template.go +++ b/client/allocrunner/taskrunner/template/template.go @@ -7,8 +7,10 @@ import ( "context" "errors" "fmt" + "log" "math/rand" "os" + "path/filepath" "sort" "strconv" "strings" @@ -17,13 +19,17 @@ import ( ctconf "github.com/hashicorp/consul-template/config" "github.com/hashicorp/consul-template/manager" + "github.com/hashicorp/consul-template/renderer" "github.com/hashicorp/consul-template/signals" envparse "github.com/hashicorp/go-envparse" + "github.com/hashicorp/go-hclog" multierror "github.com/hashicorp/go-multierror" "github.com/hashicorp/nomad/client/allocrunner/taskrunner/interfaces" + trenderer "github.com/hashicorp/nomad/client/allocrunner/taskrunner/template/renderer" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/helper/pointer" + "github.com/hashicorp/nomad/helper/subproc" "github.com/hashicorp/nomad/nomad/structs" structsc "github.com/hashicorp/nomad/nomad/structs/config" ) @@ -128,6 +134,12 @@ type TaskTemplateManagerConfig struct { // NomadToken is the Nomad token or identity claim for the task NomadToken string + + // TaskID is a unique identifier for this task's template manager, for use + // in downstream platform-specific template runner consumers + TaskID string + + Logger hclog.Logger } // Validate validates the configuration. @@ -182,6 +194,13 @@ func NewTaskTemplateManager(config *TaskTemplateManagerConfig) (*TaskTemplateMan tm.signals[tmpl.ChangeSignal] = sig } + // the platform sandbox needs to be created before we construct the runner + // so that reading the template is sandboxed + err := createPlatformSandbox(config) + if err != nil { + return nil, err + } + // Build the consul-template runner runner, lookup, err := templateRunner(config) if err != nil { @@ -210,6 +229,8 @@ func (tm *TaskTemplateManager) Stop() { if tm.runner != nil { tm.runner.Stop() } + + destroyPlatformSandbox(tm.config) } // SetDriverHandle sets the executor @@ -956,10 +977,117 @@ func newRunnerConfig(config *TaskTemplateManagerConfig, } } + sandboxEnabled := isSandboxEnabled(config) + sandboxDir := filepath.Dir(config.TaskDir) // alloc working directory + conf.ReaderFunc = ReaderFn(config.TaskID, sandboxDir, sandboxEnabled) + conf.RendererFunc = RenderFn(config.TaskID, sandboxDir, sandboxEnabled) conf.Finalize() return conf, nil } +func isSandboxEnabled(cfg *TaskTemplateManagerConfig) bool { + if cfg.ClientConfig != nil && cfg.ClientConfig.TemplateConfig != nil && cfg.ClientConfig.TemplateConfig.DisableSandbox { + return false + } + return true +} + +type sandboxConfig struct { + thisBin string + sandboxPath string + destPath string + sourcePath string + perms string + user string + group string + taskID string + contents []byte +} + +func ReaderFn(taskID, taskDir string, sandboxEnabled bool) func(string) ([]byte, error) { + if !sandboxEnabled { + return nil + } + thisBin := subproc.Self() + + return func(src string) ([]byte, error) { + + sandboxCfg := &sandboxConfig{ + thisBin: thisBin, + sandboxPath: taskDir, + sourcePath: src, + taskID: taskID, + } + + stdout, stderr, code, err := readTemplateFromSandbox(sandboxCfg) + if err != nil && code != 0 { + return nil, fmt.Errorf("%v: %s", err, string(stderr)) + } + + // this will get wrapped in CT log formatter + fmt.Fprintf(os.Stderr, "[DEBUG] %s", string(stderr)) + return stdout, nil + } +} + +func RenderFn(taskID, taskDir string, sandboxEnabled bool) func(*renderer.RenderInput) (*renderer.RenderResult, error) { + if !sandboxEnabled { + return nil + } + thisBin := subproc.Self() + + return func(i *renderer.RenderInput) (*renderer.RenderResult, error) { + wouldRender := false + didRender := false + + sandboxCfg := &sandboxConfig{ + thisBin: thisBin, + sandboxPath: taskDir, + destPath: i.Path, + perms: strconv.FormatUint(uint64(i.Perms), 8), + user: i.User, + group: i.Group, + taskID: taskID, + contents: i.Contents, + } + + logs, code, err := renderTemplateInSandbox(sandboxCfg) + if err != nil { + if len(logs) > 0 { + log.Printf("[ERROR] %v: %s", err, logs) + } else { + log.Printf("[ERROR] %v", err) + } + return &renderer.RenderResult{ + DidRender: false, + WouldRender: false, + Contents: []byte{}, + }, fmt.Errorf("template render subprocess failed: %w", err) + } + if code == trenderer.ExitWouldRenderButDidnt { + didRender = false + wouldRender = true + } else { + didRender = true + wouldRender = true + } + + // the subprocess emits logs matching the consul-template runner, but we + // CT doesn't support hclog, so we just print the whole output here to + // stderr the same way CT does so the results look seamless + if len(logs) > 0 { + log.Printf("[DEBUG] %s", logs) + } + + result := &renderer.RenderResult{ + DidRender: didRender, + WouldRender: wouldRender, + Contents: i.Contents, + } + return result, nil + } +} + // loadTemplateEnv loads task environment variables from all templates. func loadTemplateEnv(tmpls []*structs.Template, taskEnv *taskenv.TaskEnv) (map[string]string, error) { all := make(map[string]string, 50) diff --git a/client/allocrunner/taskrunner/template/template_default.go b/client/allocrunner/taskrunner/template/template_default.go new file mode 100644 index 000000000..8b10db0a2 --- /dev/null +++ b/client/allocrunner/taskrunner/template/template_default.go @@ -0,0 +1,106 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build !windows + +package template + +import ( + "bytes" + "context" + "io" + "os/exec" + "time" + + "github.com/hashicorp/nomad/client/allocrunner/taskrunner/template/renderer" +) + +// createPlatformSandbox is a no-op outside of windows +func createPlatformSandbox(_ *TaskTemplateManagerConfig) error { return nil } + +// destroyPlatformSandbox is a no-op outside of windows +func destroyPlatformSandbox(_ *TaskTemplateManagerConfig) error { return nil } + +// renderTemplateInSandbox runs the template-render command in a subprocess that +// will chroot itself to prevent a task from swapping a directory between the +// sandbox path and the destination with a symlink pointing to somewhere outside +// the sandbox. +// +// See renderer/ subdirectory for implementation. +func renderTemplateInSandbox(cfg *sandboxConfig) (string, int, error) { + + // Safe to inject user input as command arguments since Go's exec.Command + // does not invoke a shell. + args := []string{ + "template-render", + "write", + "-sandbox-path", cfg.sandboxPath, + "-dest-path", cfg.destPath, + "-perms", cfg.perms, + } + if cfg.user != "" { + args = append(args, "-user") + args = append(args, cfg.user) + } + if cfg.group != "" { + args = append(args, "-group") + args = append(args, cfg.group) + } + + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + // note: we can't simply set cmd.SysProcAttr.Chroot here because the Nomad + // binary isn't in the chroot + cmd := exec.CommandContext(ctx, cfg.thisBin, args...) + stdin, err := cmd.StdinPipe() + if err != nil { + return "", 1, err + } + + go func() { + defer stdin.Close() + io.Copy(stdin, bytes.NewReader(cfg.contents)) + }() + + out, err := cmd.CombinedOutput() + code := cmd.ProcessState.ExitCode() + if code == renderer.ExitWouldRenderButDidnt { + err = nil // erase the "exit code 117" error + } + + return string(out), code, err +} + +// readTemplateFromSandbox runs the template-render command in a subprocess that +// will chroot itself to prevent a task from swapping a directory between the +// sandbox path and the source with a symlink pointing to somewhere outside +// the sandbox. +func readTemplateFromSandbox(cfg *sandboxConfig) ([]byte, []byte, int, error) { + + // Safe to inject user input as command arguments since Go's exec.Command + // does not invoke a shell. Also, the only user-controlled argument here is + // the source path which we've already verified is at least a valid path in + // the caller. + args := []string{ + "template-render", + "read", + "-sandbox-path", cfg.sandboxPath, + "-source-path", cfg.sourcePath, + } + + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + // note: we can't simply set cmd.SysProcAttr.Chroot here because the Nomad + // binary isn't in the chroot + cmd := exec.CommandContext(ctx, cfg.thisBin, args...) + var outb, errb bytes.Buffer + cmd.Stdout = &outb + cmd.Stderr = &errb + + err := cmd.Run() + stdout := outb.Bytes() + stderr := errb.Bytes() + return stdout, stderr, cmd.ProcessState.ExitCode(), err +} diff --git a/client/allocrunner/taskrunner/template/template_default_test.go b/client/allocrunner/taskrunner/template/template_default_test.go index d9a37b9de..4aa7db866 100644 --- a/client/allocrunner/taskrunner/template/template_default_test.go +++ b/client/allocrunner/taskrunner/template/template_default_test.go @@ -6,6 +6,7 @@ package template import ( + "fmt" "os" "path/filepath" "syscall" @@ -13,8 +14,10 @@ import ( "time" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/client/taskenv" clienttestutil "github.com/hashicorp/nomad/client/testutil" "github.com/hashicorp/nomad/helper/pointer" + "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" "github.com/shoenig/test/must" @@ -52,13 +55,8 @@ func TestTaskTemplateManager_Permissions(t *testing.T) { // Check the file is there path := filepath.Join(harness.taskDir, file) fi, err := os.Stat(path) - if err != nil { - t.Fatalf("Failed to stat file: %v", err) - } - - if m := fi.Mode(); m != os.ModePerm { - t.Fatalf("Got mode %v; want %v", m, os.ModePerm) - } + must.NoError(t, err, must.Sprint("Failed to stat file")) + must.Eq(t, os.ModePerm, fi.Mode()) sys := fi.Sys() uid := pointer.Of(int(sys.(*syscall.Stat_t).Uid)) @@ -67,3 +65,98 @@ func TestTaskTemplateManager_Permissions(t *testing.T) { must.Eq(t, template.Uid, uid) must.Eq(t, template.Gid, gid) } + +// TestTaskTemplateManager_SymlinkEscapeSource verifies that a malicious or +// compromised task cannot use a symlink parent directory to cause reads to +// escape the sandbox +func TestTaskTemplateManager_SymlinkEscapeSource(t *testing.T) { + ci.Parallel(t) + clienttestutil.RequireRoot(t) + + // Create a set of "sensitive" files outside the task dir that the task + // should not be able to read or write to, despite filesystem permissions + sensitiveDir := t.TempDir() + sensitiveFile := filepath.Join(sensitiveDir, "sensitive.txt") + os.WriteFile(sensitiveFile, []byte("very-secret-stuff"), 0755) + + a := mock.Alloc() + task := a.Job.TaskGroups[0].Tasks[0] + task.Name = TestTaskName + template := &structs.Template{ChangeMode: structs.TemplateChangeModeNoop} + + // Build a new task environment with a valid DestPath + harness := newTestHarness(t, []*structs.Template{template}, false, false) + harness.envBuilder = taskenv.NewBuilder(harness.node, a, task, "global") + harness.envBuilder.SetClientTaskRoot(harness.taskDir) + os.MkdirAll(filepath.Join(harness.taskDir, "local"), 0755) + harness.templates[0].DestPath = filepath.Join("local", "dest.tmpl") + + // "Attack" the SourcePath by creating a symlink from the sensitive file to + // the task dir; this simulates what happens when the client restarts and + // the task attacks while the client is down, which is the easiest case to + // reproduce + must.NoError(t, os.Symlink(sensitiveDir, filepath.Join(harness.taskDir, "local", "pwned"))) + harness.templates[0].SourcePath = filepath.Join("local", "pwned", "sensitive.txt") + + err := harness.startWithErr() + t.Cleanup(harness.stop) + + errPath := "/" + filepath.Join((filepath.Base(harness.taskDir)), + harness.templates[0].SourcePath) + + must.EqError(t, err, fmt.Sprintf("failed to read template: exit status 1: failed to open source file %q: open %s: no such file or directory\n", errPath, errPath)) +} + +// TestTaskTemplateManager_SymlinkEscapeDest verifies that a malicious or +// compromised task cannot use a symlink parent directory to cause writes to +// escape the sandbox +func TestTaskTemplateManager_SymlinkEscapeDest(t *testing.T) { + ci.Parallel(t) + clienttestutil.RequireRoot(t) + + // Create a set of "sensitive" files outside the task dir that the task + // should not be able to read or write to, despite filesystem permissions + sensitiveDir := t.TempDir() + sensitiveFile := filepath.Join(sensitiveDir, "sensitive.txt") + os.WriteFile(sensitiveFile, []byte("very-secret-stuff"), 0755) + + a := mock.Alloc() + task := a.Job.TaskGroups[0].Tasks[0] + task.Name = TestTaskName + template := &structs.Template{ChangeMode: structs.TemplateChangeModeNoop} + + // Build a task environment with a valid SourcePath + harness := newTestHarness(t, []*structs.Template{template}, false, false) + harness.envBuilder = taskenv.NewBuilder(harness.node, a, task, "global") + harness.envBuilder.SetClientTaskRoot(harness.taskDir) + os.MkdirAll(filepath.Join(harness.taskDir, "local"), 0755) + + harness.templates[0].SourcePath = filepath.Join("local", "source.tmpl") + must.NoError(t, os.WriteFile( + filepath.Join(harness.taskDir, harness.templates[0].SourcePath), + []byte("hacked!"), 0755)) + + // "Attack" the DestPath by creating a symlink from the sensitive file to + // the task dir + must.NoError(t, os.Symlink(sensitiveDir, filepath.Join(harness.taskDir, "local", "pwned"))) + harness.templates[0].DestPath = filepath.Join("local", "pwned", "sensitive.txt") + + err := harness.startWithErr() + t.Cleanup(harness.stop) + must.NoError(t, err) + + // This template has never rendered successfully so we'll get a Kill when we + // wait for the first render + select { + case <-harness.mockHooks.KillCh: + case <-harness.mockHooks.UnblockCh: + t.Fatalf("task should not have unblocked") + case <-time.After(time.Duration(testutil.TestMultiplier()) * time.Second): + t.Fatalf("task kill should have been called") + } + + // Ensure we haven't written despite the error + b, err := os.ReadFile(sensitiveFile) + must.NoError(t, err) + must.Eq(t, "very-secret-stuff", string(b)) +} diff --git a/client/allocrunner/taskrunner/template/template_test.go b/client/allocrunner/taskrunner/template/template_test.go index 228edfbe5..e4d534f6b 100644 --- a/client/allocrunner/taskrunner/template/template_test.go +++ b/client/allocrunner/taskrunner/template/template_test.go @@ -5,12 +5,14 @@ package template import ( "bytes" + "flag" "fmt" "io" "os" "path/filepath" "reflect" "regexp" + "slices" "sort" "strconv" "strings" @@ -38,6 +40,19 @@ import ( "github.com/shoenig/test/must" ) +// TestMain overrides the normal top-level test runner for this package. When +// template-render subprocesses are run, they use os.Executable to find their +// own binary, which is the template.test binary when these tests are +// running. That causes the template-render subprocess to run all these tests! +// Bail out early if we know we're in the subprocess. +func TestMain(m *testing.M) { + flag.Parse() + if slices.Contains(flag.Args(), "template-render") { + return + } + os.Exit(m.Run()) +} + const ( // TestTaskName is the name of the injected task. It should appear in the // environment variable $NOMAD_TASK_NAME @@ -74,6 +89,7 @@ type testHarness struct { // newTestHarness returns a harness starting a dev consul and vault server, // building the appropriate config and creating a TaskTemplateManager func newTestHarness(t *testing.T, templates []*structs.Template, consul, vault bool) *testHarness { + t.Helper() region := "global" mockNode := mock.Node() @@ -129,6 +145,7 @@ func newTestHarness(t *testing.T, templates []*structs.Template, consul, vault b } func (h *testHarness) start(t *testing.T) { + t.Helper() if err := h.startWithErr(); err != nil { t.Fatalf("failed to build task template manager: %v", err) } @@ -148,6 +165,7 @@ func (h *testHarness) startWithErr() error { TaskDir: h.taskDir, EnvBuilder: h.envBuilder, MaxTemplateEventRate: h.emitRate, + TaskID: uuid.Generate(), }) return err } @@ -176,7 +194,7 @@ func TestTaskTemplateManager_InvalidConfig(t *testing.T) { ci.Parallel(t) hooks := trtesting.NewMockTaskHooks() clientConfig := &config.Config{Region: "global"} - taskDir := "foo" + taskDir := t.TempDir() a := mock.Alloc() envBuilder := taskenv.NewBuilder(mock.Node(), a, a.Job.TaskGroups[0].Tasks[0], clientConfig.Region) @@ -199,6 +217,7 @@ func TestTaskTemplateManager_InvalidConfig(t *testing.T) { TaskDir: taskDir, EnvBuilder: envBuilder, MaxTemplateEventRate: DefaultMaxTemplateEventRate, + Logger: testlog.HCLogger(t), }, expectedErr: "lifecycle hooks", }, @@ -299,6 +318,10 @@ func TestTaskTemplateManager_InvalidConfig(t *testing.T) { for _, c := range cases { t.Run(c.name, func(t *testing.T) { + if c.config != nil { + c.config.TaskID = c.name + c.config.Logger = testlog.HCLogger(t) + } _, err := NewTaskTemplateManager(c.config) if err != nil { if c.expectedErr == "" { @@ -392,17 +415,15 @@ func TestTaskTemplateManager_HostPath(t *testing.T) { harness.taskDir, template.SourcePath, err) } - // Test with desination too + // Test with destination too template.SourcePath = f.Name() template.DestPath = "../../../../../../" + file harness = newTestHarness(t, []*structs.Template{template}, false, false) harness.envBuilder = taskenv.NewBuilder(harness.node, a, task, "global") err = harness.startWithErr() - if err == nil || !strings.Contains(err.Error(), "escapes alloc directory") { - t.Fatalf("Expected directory traversal out of %q via interpolation disallowed for %q: %v", - harness.taskDir, template.SourcePath, err) - } - + must.ErrorContains(t, err, "escapes alloc directory", must.Sprintf( + "Expected directory traversal out of %q via interpolation disallowed for %q: %v", + harness.taskDir, template.SourcePath, err)) } func TestTaskTemplateManager_Unblock_Static(t *testing.T) { @@ -416,7 +437,13 @@ func TestTaskTemplateManager_Unblock_Static(t *testing.T) { ChangeMode: structs.TemplateChangeModeNoop, } + a := mock.Alloc() + task := a.Job.TaskGroups[0].Tasks[0] + task.Name = TestTaskName + harness := newTestHarness(t, []*structs.Template{template}, false, false) + harness.envBuilder = taskenv.NewBuilder(harness.node, a, task, "global") + harness.envBuilder.SetClientTaskRoot(harness.taskDir) harness.start(t) defer harness.stop() @@ -451,7 +478,13 @@ func TestTaskTemplateManager_Unblock_Static_NomadEnv(t *testing.T) { ChangeMode: structs.TemplateChangeModeNoop, } + a := mock.Alloc() + task := a.Job.TaskGroups[0].Tasks[0] + task.Name = TestTaskName + harness := newTestHarness(t, []*structs.Template{template}, false, false) + harness.envBuilder = taskenv.NewBuilder(harness.node, a, task, "global") + harness.envBuilder.SetClientTaskRoot(harness.taskDir) harness.start(t) defer harness.stop() @@ -485,13 +518,17 @@ func TestTaskTemplateManager_Unblock_Static_AlreadyRendered(t *testing.T) { ChangeMode: structs.TemplateChangeModeNoop, } + a := mock.Alloc() + task := a.Job.TaskGroups[0].Tasks[0] + task.Name = TestTaskName + harness := newTestHarness(t, []*structs.Template{template}, false, false) + harness.envBuilder = taskenv.NewBuilder(harness.node, a, task, "global") + harness.envBuilder.SetClientTaskRoot(harness.taskDir) // Write the contents path := filepath.Join(harness.taskDir, file) - if err := os.WriteFile(path, []byte(content), 0777); err != nil { - t.Fatalf("Failed to write data: %v", err) - } + must.NoError(t, os.WriteFile(path, []byte(content), 0777)) harness.start(t) defer harness.stop() @@ -506,13 +543,10 @@ func TestTaskTemplateManager_Unblock_Static_AlreadyRendered(t *testing.T) { // Check the file is there path = filepath.Join(harness.taskDir, file) raw, err := os.ReadFile(path) - if err != nil { - t.Fatalf("Failed to read rendered template from %q: %v", path, err) - } + must.NoError(t, err, must.Sprintf( + "Failed to read rendered template from %q", path)) - if s := string(raw); s != content { - t.Fatalf("Unexpected template data; got %q, want %q", s, content) - } + must.Eq(t, content, string(raw), must.Sprint("Unexpected template data")) } func TestTaskTemplateManager_Unblock_Consul(t *testing.T) { @@ -748,7 +782,7 @@ func TestTaskTemplateManager_FirstRender_Restored(t *testing.T) { case <-harness.mockHooks.RestartCh: t.Fatal("should not have restarted", harness.mockHooks) case <-harness.mockHooks.SignalCh: - t.Fatal("should not have restarted", harness.mockHooks) + t.Fatal("should not have received signal", harness.mockHooks) case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): } @@ -1031,6 +1065,7 @@ func TestTaskTemplateManager_Interpolate_Destination(t *testing.T) { } harness := newTestHarness(t, []*structs.Template{template}, false, false) + harness.config.TemplateConfig.DisableSandbox = true // no real alloc in this test harness.start(t) defer harness.stop() @@ -1392,10 +1427,10 @@ COMMON={{key "common"}} }) } -// TestTaskTemplateManager_FiltersProcessEnvVars asserts that we only render +// TestTaskTemplateManager_FiltersEnvVars asserts that we only render // environment variables found in task env-vars and not read the nomad host -// process environment variables. nomad host process environment variables -// are to be treated the same as not found environment variables. +// process environment variables. nomad host process environment variables are +// to be treated the same as not found environment variables. func TestTaskTemplateManager_FiltersEnvVars(t *testing.T) { t.Setenv("NOMAD_TASK_NAME", "should be overridden by task") @@ -1417,6 +1452,7 @@ TEST_ENV_NOT_FOUND: {{env "` + testenv + `_NOTFOUND" }}` } harness := newTestHarness(t, []*structs.Template{template}, false, false) + harness.config.TemplateConfig.DisableSandbox = true // no real alloc in this test harness.start(t) defer harness.stop() @@ -1454,6 +1490,7 @@ ANYTHING_goes=Spaces are=ok! Envvars: true, } harness := newTestHarness(t, []*structs.Template{template}, true, false) + harness.config.TemplateConfig.DisableSandbox = true // no real alloc in this test harness.start(t) defer harness.stop() @@ -1691,6 +1728,7 @@ func TestTaskTemplateManager_Config_ServerName(t *testing.T) { ClientConfig: c, VaultToken: "token", VaultConfig: c.GetDefaultVault(), + TaskID: uuid.Generate(), } ctconf, err := newRunnerConfig(config, nil) if err != nil { @@ -1725,6 +1763,7 @@ func TestTaskTemplateManager_Config_VaultNamespace(t *testing.T) { VaultToken: "token", VaultConfig: c.GetDefaultVault(), EnvBuilder: taskenv.NewBuilder(c.Node, alloc, alloc.Job.TaskGroups[0].Tasks[0], c.Region), + TaskID: uuid.Generate(), } ctmplMapping, err := parseTemplateConfigs(config) @@ -1762,6 +1801,7 @@ func TestTaskTemplateManager_Config_VaultNamespace_TaskOverride(t *testing.T) { VaultConfig: c.GetDefaultVault(), VaultNamespace: overriddenNS, EnvBuilder: taskenv.NewBuilder(c.Node, alloc, alloc.Job.TaskGroups[0].Tasks[0], c.Region), + TaskID: uuid.Generate(), } ctmplMapping, err := parseTemplateConfigs(config) @@ -1782,7 +1822,7 @@ func TestTaskTemplateManager_Escapes(t *testing.T) { clienttestutil.RequireNotWindows(t) clientConf := config.DefaultConfig() - must.False(t, clientConf.TemplateConfig.DisableSandbox, must.Sprint("expected sandbox to be disabled")) + must.False(t, clientConf.TemplateConfig.DisableSandbox, must.Sprint("expected sandbox to be enabled")) // Set a fake alloc dir to make test output more realistic clientConf.AllocDir = "/fake/allocdir" @@ -2005,6 +2045,7 @@ func TestTaskTemplateManager_Escapes(t *testing.T) { tc := cases[i] t.Run(tc.Name, func(t *testing.T) { config := tc.Config() + config.TaskID = uuid.Generate() mapping, err := parseTemplateConfigs(config) if tc.Err == nil { // Ok path @@ -2311,6 +2352,7 @@ func TestTaskTemplateManager_ClientTemplateConfig_Set(t *testing.T) { t.Run(_case.Name, func(t *testing.T) { // monkey patch the client config with the version of the ClientTemplateConfig we want to test. _case.TTMConfig.ClientConfig.TemplateConfig = _case.ClientTemplateConfig + _case.TTMConfig.TaskID = uuid.Generate() templateMapping, err := parseTemplateConfigs(_case.TTMConfig) must.NoError(t, err) @@ -2375,6 +2417,7 @@ func TestTaskTemplateManager_Template_Wait_Set(t *testing.T) { }, }, }, + TaskID: uuid.Generate(), } templateMapping, err := parseTemplateConfigs(ttmConfig) @@ -2412,6 +2455,7 @@ func TestTaskTemplateManager_Template_ErrMissingKey_Set(t *testing.T) { ErrMissingKey: true, }, }, + TaskID: uuid.Generate(), } templateMapping, err := parseTemplateConfigs(ttmConfig) diff --git a/client/allocrunner/taskrunner/template/template_windows.go b/client/allocrunner/taskrunner/template/template_windows.go new file mode 100644 index 000000000..c84fddd90 --- /dev/null +++ b/client/allocrunner/taskrunner/template/template_windows.go @@ -0,0 +1,169 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build windows + +package template + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "path/filepath" + "time" + + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/client/allocrunner/taskrunner/template/renderer" + "github.com/hashicorp/nomad/helper/subproc" + "github.com/hashicorp/nomad/helper/winappcontainer" + "github.com/hashicorp/nomad/helper/winexec" +) + +const ExitCodeFatal int = 13 // typically this is going to be a bug in Nomad + +// createPlatformSandbox creates the AppContainer profile and sets DACLs on the +// files we want to grant access to. +func createPlatformSandbox(cfg *TaskTemplateManagerConfig) error { + + if !isSandboxEnabled(cfg) { + return nil + } + thisBin := subproc.Self() + + containerCfg := &winappcontainer.AppContainerConfig{ + Name: cfg.TaskID, + AllowedPaths: []string{ + thisBin, + filepath.Dir(cfg.TaskDir), // give access to the whole alloc working directory + }, + } + if cfg.Logger == nil { + cfg.Logger = hclog.Default() // prevents panics in tests + } + + err := winappcontainer.CreateAppContainer(cfg.Logger, containerCfg) + if err != nil { + // if Nomad is running as an unprivileged user, we might not be able to + // create the sandbox, but in that case we're not vulnerable to the + // attacks this is intended to prevent anyways + if errors.Is(err, winappcontainer.ErrAccessDeniedToCreateSandbox) { + cfg.Logger.Debug("could not create platform sandbox", "error", err) + return nil + } + return fmt.Errorf("could not create platform sandbox: %w", err) + } + + return nil +} + +// destroyPlatformSandbox deletes the AppContainer profile. +func destroyPlatformSandbox(cfg *TaskTemplateManagerConfig) error { + + if cfg.ClientConfig.TemplateConfig.DisableSandbox { + return nil + } + + if cfg.Logger == nil { + cfg.Logger = hclog.Default() + } + + err := winappcontainer.DeleteAppContainer(cfg.Logger, cfg.TaskID) + if err != nil { + cfg.Logger.Warn("could not destroy platform sandbox", "error", err) + } + return err +} + +// renderTemplateInSandbox runs the template-render command in an AppContainer to +// prevent a task from swapping a directory between the sandbox path and the +// destination with a symlink pointing to somewhere outside the sandbox. +// +// See renderer/ subdirectory for implementation. +func renderTemplateInSandbox(cfg *sandboxConfig) (string, int, error) { + procThreadAttrs, cleanup, err := winappcontainer.CreateProcThreadAttributes(cfg.taskID) + if err != nil { + return "", ExitCodeFatal, fmt.Errorf("could not create proc attributes: %v", err) + } + defer cleanup() + + // Safe to inject user input as command arguments since winexec.Command + // does not invoke a shell. + args := []string{ + "template-render", + "write", + "-sandbox-path", cfg.sandboxPath, + "-dest-path", cfg.destPath, + "-perms", cfg.perms, + } + if cfg.user != "" { + args = append(args, "-user") + args = append(args, cfg.user) + } + if cfg.group != "" { + args = append(args, "-group") + args = append(args, cfg.group) + } + + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + cmd := winexec.CommandContext(ctx, cfg.thisBin, args...) + cmd.ProcThreadAttributes = procThreadAttrs + + stdin, err := cmd.StdinPipe() + if err != nil { + return "", 1, err + } + + go func() { + defer stdin.Close() + io.Copy(stdin, bytes.NewReader(cfg.contents)) + }() + + out, err := cmd.CombinedOutput() + code := cmd.ProcessState.ExitCode() + if code == renderer.ExitWouldRenderButDidnt { + err = nil // erase the "exit code 117" error + } + + return string(out), code, err +} + +// readTemplateFromSandbox runs the template-render command in a subprocess that +// will chroot itself to prevent a task from swapping a directory between the +// sandbox path and the source with a symlink pointing to somewhere outside +// the sandbox. +func readTemplateFromSandbox(cfg *sandboxConfig) ([]byte, []byte, int, error) { + procThreadAttrs, cleanup, err := winappcontainer.CreateProcThreadAttributes(cfg.taskID) + if err != nil { + return nil, nil, ExitCodeFatal, fmt.Errorf("could not create proc attributes: %v", err) + } + defer cleanup() + + // Safe to inject user input as command arguments since winexec.Command + // does not invoke a shell. Also, the only user-controlled argument here is + // the source path which we've already verified is at least a valid path in + // the caller. + args := []string{ + "template-render", + "read", + "-sandbox-path", cfg.sandboxPath, + "-source-path", cfg.sourcePath, + } + + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + cmd := winexec.CommandContext(ctx, cfg.thisBin, args...) + cmd.ProcThreadAttributes = procThreadAttrs + var outb, errb bytes.Buffer + cmd.Stdout = &outb + cmd.Stderr = &errb + + err = cmd.Run() + stdout := outb.Bytes() + stderr := errb.Bytes() + return stdout, stderr, cmd.ProcessState.ExitCode(), err +} diff --git a/client/allocrunner/taskrunner/template/template_windows_test.go b/client/allocrunner/taskrunner/template/template_windows_test.go new file mode 100644 index 000000000..4a1b14c6d --- /dev/null +++ b/client/allocrunner/taskrunner/template/template_windows_test.go @@ -0,0 +1,103 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build windows + +package template + +import ( + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/client/taskenv" + clienttestutil "github.com/hashicorp/nomad/client/testutil" + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" +) + +// TestTaskTemplateManager_SymlinkEscapeSource verifies that a malicious or +// compromised task cannot use a symlink parent directory to cause reads to +// escape the sandbox +func TestTaskTemplateManager_SymlinkEscapeSource(t *testing.T) { + ci.Parallel(t) + clienttestutil.RequireAdministrator(t) // making symlinks is privileged on Windows + + // Create a set of "sensitive" files outside the task dir that the task + // should not be able to read or write to, despite filesystem permissions + sensitiveDir := t.TempDir() + sensitiveFile := filepath.Join(sensitiveDir, "sensitive.txt") + os.WriteFile(sensitiveFile, []byte("very-secret-stuff"), 0755) + + a := mock.Alloc() + task := a.Job.TaskGroups[0].Tasks[0] + task.Name = TestTaskName + template := &structs.Template{ChangeMode: structs.TemplateChangeModeNoop} + + // Build a new task environment with a valid DestPath + harness := newTestHarness(t, []*structs.Template{template}, false, false) + harness.envBuilder = taskenv.NewBuilder(harness.node, a, task, "global") + harness.envBuilder.SetClientTaskRoot(harness.taskDir) + os.MkdirAll(filepath.Join(harness.taskDir, "local"), 0755) + harness.templates[0].DestPath = filepath.Join("local", "dest.tmpl") + + // "Attack" the SourcePath by creating a symlink from the sensitive file to + // the task dir; this simulates what happens when the client restarts and + // the task attacks while the client is down, which is the easiest case to + // reproduce + must.NoError(t, os.Symlink(sensitiveDir, filepath.Join(harness.taskDir, "local", "pwned"))) + harness.templates[0].SourcePath = filepath.Join("local", "pwned", "sensitive.txt") + fullSrcPath := filepath.Join(harness.taskDir, harness.templates[0].SourcePath) + + err := harness.startWithErr() + t.Cleanup(harness.stop) + + must.EqError(t, err, fmt.Sprintf( + "failed to read template: failed to open source file %q: open %s: Access is denied.\n", fullSrcPath, fullSrcPath)) +} + +// TestTaskTemplateManager_SymlinkEscapeDest verifies that a malicious or +// compromised task cannot use a symlink parent directory to cause writes to +// escape the sandbox +func TestTaskTemplateManager_SymlinkEscapeDest(t *testing.T) { + ci.Parallel(t) + clienttestutil.RequireAdministrator(t) // making symlinks is privileged on Windows + + // Create a set of "sensitive" files outside the task dir that the task + // should not be able to read or write to, despite filesystem permissions + sensitiveDir := t.TempDir() + sensitiveFile := filepath.Join(sensitiveDir, "sensitive.txt") + os.WriteFile(sensitiveFile, []byte("very-secret-stuff"), 0755) + + a := mock.Alloc() + task := a.Job.TaskGroups[0].Tasks[0] + task.Name = TestTaskName + template := &structs.Template{ChangeMode: structs.TemplateChangeModeNoop} + + // Build a task environment with a valid SourcePath + harness := newTestHarness(t, []*structs.Template{template}, false, false) + harness.envBuilder = taskenv.NewBuilder(harness.node, a, task, "global") + harness.envBuilder.SetClientTaskRoot(harness.taskDir) + os.MkdirAll(filepath.Join(harness.taskDir, "local"), 0755) + + harness.templates[0].SourcePath = filepath.Join("local", "source.tmpl") + must.NoError(t, os.WriteFile( + filepath.Join(harness.taskDir, harness.templates[0].SourcePath), + []byte("hacked!"), 0755)) + + // "Attack" the DestPath by creating a symlink from the sensitive file to + // the task dir + must.NoError(t, os.Symlink(sensitiveDir, filepath.Join(harness.taskDir, "local", "pwned"))) + harness.templates[0].DestPath = filepath.Join("local", "pwned", "sensitive.txt") + + err := harness.startWithErr() + t.Cleanup(harness.stop) + + // Ensure we haven't written despite the error + b, err := os.ReadFile(sensitiveFile) + must.NoError(t, err) + must.Eq(t, "very-secret-stuff", string(b)) +} diff --git a/client/allocrunner/taskrunner/template_hook.go b/client/allocrunner/taskrunner/template_hook.go index 3cd8c412d..a7ce6f940 100644 --- a/client/allocrunner/taskrunner/template_hook.go +++ b/client/allocrunner/taskrunner/template_hook.go @@ -94,6 +94,10 @@ type templateHook struct { // taskDir is the task directory taskDir string + + // taskID is a unique identifier for this templateHook, for use in + // downstream platform-specific template runner consumers + taskID string } func newTemplateHook(config *templateHookConfig) *templateHook { @@ -127,6 +131,7 @@ func (h *templateHook) Prestart(ctx context.Context, req *interfaces.TaskPrestar h.taskDir = req.TaskDir.Dir h.vaultToken = req.VaultToken h.nomadToken = req.NomadToken + h.taskID = req.Alloc.ID + "-" + req.Task.Name // Set the consul token if the task uses WI. tg := h.config.alloc.Job.LookupTaskGroup(h.config.alloc.TaskGroup) @@ -240,6 +245,8 @@ func (h *templateHook) newManager() (unblock chan struct{}, err error) { MaxTemplateEventRate: template.DefaultMaxTemplateEventRate, NomadNamespace: h.config.nomadNamespace, NomadToken: h.nomadToken, + TaskID: h.taskID, + Logger: h.logger, }) if err != nil { h.logger.Error("failed to create template manager", "error", err) diff --git a/client/allocrunner/taskrunner/template_hook_test.go b/client/allocrunner/taskrunner/template_hook_test.go index 91566a3a1..6aa7e1847 100644 --- a/client/allocrunner/taskrunner/template_hook_test.go +++ b/client/allocrunner/taskrunner/template_hook_test.go @@ -125,6 +125,7 @@ func Test_templateHook_Prestart_ConsulWI(t *testing.T) { driverHandle: nil, } req := &interfaces.TaskPrestartRequest{ + Alloc: a, Task: a.Job.TaskGroups[0].Tasks[0], TaskDir: &allocdir.TaskDir{Dir: "foo"}, } @@ -227,6 +228,7 @@ func Test_templateHook_Prestart_Vault(t *testing.T) { // Start template hook with a timeout context to ensure it exists. req := &interfaces.TaskPrestartRequest{ + Alloc: alloc, Task: task, TaskDir: &allocdir.TaskDir{Dir: taskDir}, } diff --git a/client/testutil/driver_compatible.go b/client/testutil/driver_compatible.go index e139c60d9..441743999 100644 --- a/client/testutil/driver_compatible.go +++ b/client/testutil/driver_compatible.go @@ -5,6 +5,7 @@ package testutil import ( "os/exec" + "os/user" "runtime" "syscall" "testing" @@ -26,6 +27,15 @@ func RequireNonRoot(t *testing.T) { } } +// RequireAdministrator skips tests unless: +// - running as Windows Administrator +func RequireAdministrator(t *testing.T) { + user, _ := user.Current() + if user.Name != "Administrator" { + t.Skip("Test requires Administrator") + } +} + // RequireConsul skips tests unless: // - "consul" executable is detected on $PATH func RequireConsul(t *testing.T) { diff --git a/go.mod b/go.mod index b59f0c04c..24885bb68 100644 --- a/go.mod +++ b/go.mod @@ -45,7 +45,7 @@ require ( github.com/gosuri/uilive v0.0.4 github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 github.com/hashicorp/cap v0.2.0 - github.com/hashicorp/consul-template v0.35.0 + github.com/hashicorp/consul-template v0.36.1-0.20240205193627-e15d61bb21ae github.com/hashicorp/consul/api v1.26.1 github.com/hashicorp/consul/sdk v0.15.0 github.com/hashicorp/cronexpr v1.1.2 diff --git a/go.sum b/go.sum index f287d1fc2..53a3f597f 100644 --- a/go.sum +++ b/go.sum @@ -604,8 +604,8 @@ github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vb github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/hashicorp/cap v0.2.0 h1:Cgr1iDczX17y0PNF5VG+bWTtDiimYL8F18izMPbWNy4= github.com/hashicorp/cap v0.2.0/go.mod h1:zb3VvIFA0lM2lbmO69NjowV9dJzJnZS89TaM9blXPJA= -github.com/hashicorp/consul-template v0.35.0 h1:wLlninL3h31ftATB31Evo0QbzGrQS9T775mWl3JSy28= -github.com/hashicorp/consul-template v0.35.0/go.mod h1:vM3cOhA+7pbu+esuIU1HzbPttG5RENs69d2hfDnx4xM= +github.com/hashicorp/consul-template v0.36.1-0.20240205193627-e15d61bb21ae h1:ehZNpVWpoWtMrxFE/FKvJyfjDGY384iaBccpYu13yCw= +github.com/hashicorp/consul-template v0.36.1-0.20240205193627-e15d61bb21ae/go.mod h1:bvidXKwpfXzJ1X4wDw68OXnVxy5k7HLOHhOf5gnQr3M= github.com/hashicorp/consul/api v1.10.1-0.20230925152502-e5f5fc9301c7 h1:VjNJGdw+esQUaPG2J1DiT/rEN21/1GQfHb3CvPQlD8U= github.com/hashicorp/consul/api v1.10.1-0.20230925152502-e5f5fc9301c7/go.mod h1:+pNEP6hQgkrBLjQlYLI13/tyyb1GK3MGVw1PC/IHk9M= github.com/hashicorp/consul/sdk v0.15.0 h1:2qK9nDrr4tiJKRoxPGhm6B7xJjLVIQqkjiab2M4aKjU= diff --git a/helper/winappcontainer/winappcontainer.go b/helper/winappcontainer/winappcontainer.go new file mode 100644 index 000000000..1b3d687ed --- /dev/null +++ b/helper/winappcontainer/winappcontainer.go @@ -0,0 +1,342 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build windows + +package winappcontainer + +import ( + "errors" + "fmt" + "regexp" + "syscall" + "unsafe" + + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/helper/winexec" + "golang.org/x/sys/windows" +) + +var ( + userenvDLL = windows.NewLazySystemDLL("userenv.dll") + procCreateAppContainerProfile = userenvDLL.NewProc("CreateAppContainerProfile") + procDeleteAppContainerProfile = userenvDLL.NewProc("DeleteAppContainerProfile") + procDeriveAppContainerSidFromAppContainerName = userenvDLL.NewProc("DeriveAppContainerSidFromAppContainerName") + + ErrAccessDeniedToCreateSandbox = errors.New("Nomad does not have sufficient permission to create the template rendering AppContainer") + ErrInvalidArg = errors.New("Windows returned E_INVALIDARG, this is a bug in Nomad") + + invalidContainerName = regexp.MustCompile(`[^-_. A-Za-z0-9]+`) +) + +const ( + // https://learn.microsoft.com/en-us/windows/win32/fileio/file-access-rights-constants + FILE_ALL_ACCESS uint32 = 2032127 + + // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-updateprocthreadattribute + PROC_THREAD_ATTRIBUTE_SECURITY_CAPABILITIES uint32 = 0x20009 // 131081 + + // https://learn.microsoft.com/en-us/windows/win32/api/userenv/nf-userenv-createappcontainerprofile + WindowsResultOk uintptr = 0x0 // S_OK + WindowsResultErrAccessDenied uintptr = 0x80070005 // E_ACCESS_DENIED + WindowsResultErrAlreadyExists uintptr = 0x800700b7 // HRESULT_FROM_WIN32(ERROR_ALREADY_EXISTS) + WindowsResultErrInvalidArg uintptr = 0x80070057 // E_INVALIDARG + WindowsResultBadEnvironment uintptr = 0x8007000a // BAD_ENVIRONMENT + + ExitCodeFatal int = 13 // typically this is going to be a bug in Nomad + + // sidBufferSz is the size of the buffer that the PSID will be written + // to. The sys/x/windows.LookupSID method gets a INSUFFICIENT_BUFFER error + // that is uses to retry with a larger size, but the methods we're calling + // don't. Empirically, the buffer is getting populated by a *pointer* to the + // PSID, so this should only need to be a 64-bit word long, but the failure + // mode if we're wrong breaks template rendering, so give ourselves some + // room to screw it up. + sidBufferSz int = 128 +) + +func cleanupSID(sid *windows.SID) func() { + return func() { + windows.FreeSid(sid) + } +} + +func taskIDtoContainerName(id string) string { + return trimString(invalidContainerName.ReplaceAllString(id, "-"), 64) +} + +func trimString(s string, max int) string { + if s == "" { + // makes testing easier to handle this gracefully + return "appcontainer" + } + if max > len(s) { + max = len(s) + } + max = max - 1 // less a trailing NULL + return s[:max] +} + +type AppContainerConfig struct { + Name string + AllowedPaths []string +} + +func CreateAppContainer(log hclog.Logger, cfg *AppContainerConfig) error { + sid, cleanup, err := createAppContainerProfile(log, cfg.Name) + if err != nil { + return fmt.Errorf("could not create AppContainer profile: %w", err) + } + defer cleanup() + + for _, path := range cfg.AllowedPaths { + err := allowNamedObjectAccess(log, sid, path) + if err != nil { + return fmt.Errorf("could not grant object access: %w", err) + } + } + + return nil +} + +func createAppContainerProfile(log hclog.Logger, taskID string) (*windows.SID, func(), error) { + + containerName := taskIDtoContainerName(taskID) + pszAppContainerName, err := windows.UTF16PtrFromString(containerName) + if err != nil { + return nil, nil, fmt.Errorf( + "container name %q could not be encoded to utf16: %w", containerName, err) + } + + taskID = trimString(taskID, 512) + pszDisplayName, err := windows.UTF16PtrFromString(taskID) + if err != nil { + return nil, nil, fmt.Errorf( + "task ID %q could not be encoded to utf16: %w", taskID, err) + } + + pszDescription, err := windows.UTF16PtrFromString( + "template renderer AppContainer for " + taskID) + if err != nil { + return nil, nil, fmt.Errorf( + "description for task ID %q could not be encoded to utf16: %w", taskID, err) + } + + var pCapabilities uintptr // PSID_AND_ATTRIBUTES + var dwCapabilityCount uint32 // DWORD + + // note: this buffer gets populated with a pointer to a PSID, and the + // resulting handle needs to be freed here in the caller + sidBuffer := make([]byte, sidBufferSz) + + // USERENVAPI HRESULT CreateAppContainerProfile( + // [in] PCWSTR pszAppContainerName, + // [in] PCWSTR pszDisplayName, + // [in] PCWSTR pszDescription, + // [in] PSID_AND_ATTRIBUTES pCapabilities, + // [in] DWORD dwCapabilityCount, + // [out] PSID *ppSidAppContainerSid + // ); + // https://learn.microsoft.com/en-us/windows/win32/api/userenv/nf-userenv-createappcontainerprofile + result, _, err := procCreateAppContainerProfile.Call( + uintptr(unsafe.Pointer(pszAppContainerName)), + uintptr(unsafe.Pointer(pszDisplayName)), + uintptr(unsafe.Pointer(pszDescription)), + uintptr(pCapabilities), + uintptr(dwCapabilityCount), + uintptr(unsafe.Pointer(&sidBuffer)), + ) + ppSidAppContainerSid := (*windows.SID)(unsafe.Pointer(&sidBuffer[0])) + + switch result { + case WindowsResultOk: + if !ppSidAppContainerSid.IsValid() { + return nil, nil, fmt.Errorf("creating AppContainer returned invalid SID: %v", + ppSidAppContainerSid.String()) + } + + log.Debug("created new AppContainer", "sid", ppSidAppContainerSid.String()) + return ppSidAppContainerSid, cleanupSID(ppSidAppContainerSid), nil + + case WindowsResultErrAccessDenied, WindowsResultBadEnvironment: + // we cannot sandbox if Nomad is running with insufficient privs, so in + // that case we rely on the file system permissions that the user gave + // Nomad + return nil, nil, ErrAccessDeniedToCreateSandbox + + case WindowsResultErrAlreadyExists: + // WARNING: this method will return a derived SID even if the container + // doesn't already exist, so it's critical that we don't "optimize" this + // method by checking first! + return deriveAppContainerSID(taskID) + + case WindowsResultErrInvalidArg: + return nil, nil, ErrInvalidArg + + default: + // note: the error we get here is always non-nil and always reports + // sucess for known error codes + return nil, nil, fmt.Errorf("creating AppContainer failed: (%x) %v", + result, syscall.Errno(result)) + } + +} + +// deriveAppContainerSID gets the AppContainer SID that should be associated +// with the given task ID. Note that if the AppContainer exists, Windows will +// give us the SID that it should have, so we can only call this if we know that +// we've already created the AppContainer +func deriveAppContainerSID(taskID string) (*windows.SID, func(), error) { + + containerName := taskIDtoContainerName(taskID) + pszAppContainerName, err := windows.UTF16PtrFromString(containerName) + if err != nil { + return nil, nil, fmt.Errorf( + "container name %q could not be encoded to utf16: %w", containerName, err) + } + + // note: this buffer gets populated with a pointer to a PSID, and the + // resulting handle needs to be freed here in the caller + sidBuffer := make([]byte, sidBufferSz) + + // USERENVAPI HRESULT DeriveAppContainerSidFromAppContainerName( + // [in] PCWSTR pszAppContainerName, + // [out] PSID *ppsidAppContainerSid + // ); + // https://learn.microsoft.com/en-us/windows/win32/api/userenv/nf-userenv-deriveappcontainersidfromappcontainername + result, _, err := procDeriveAppContainerSidFromAppContainerName.Call( + uintptr(unsafe.Pointer(pszAppContainerName)), + uintptr(unsafe.Pointer(&sidBuffer)), + ) + switch result { + case WindowsResultOk: + ppSidAppContainerSid := (*windows.SID)(unsafe.Pointer(&sidBuffer[0])) + if !ppSidAppContainerSid.IsValid() { + return nil, nil, fmt.Errorf("looking up AppContainer SID returned invalid SID: %v", + ppSidAppContainerSid.String()) + } + + return ppSidAppContainerSid, cleanupSID(ppSidAppContainerSid), nil + default: + return nil, nil, fmt.Errorf("looking up AppContainer SID failed: errno=%v, err=%w", + syscall.Errno(result), err) + } +} + +// allowNamedObjectAccess grants inheritable R/W access to the object path for +// the AppContainer SID +func allowNamedObjectAccess(log hclog.Logger, sid *windows.SID, path string) error { + pathAccess := windows.EXPLICIT_ACCESS{ + AccessPermissions: windows.ACCESS_MASK(FILE_ALL_ACCESS), + AccessMode: windows.GRANT_ACCESS, + Inheritance: windows.OBJECT_INHERIT_ACE | windows.CONTAINER_INHERIT_ACE, + Trustee: windows.TRUSTEE{ + MultipleTrustee: nil, + MultipleTrusteeOperation: windows.NO_MULTIPLE_TRUSTEE, + TrusteeForm: windows.TRUSTEE_IS_SID, + TrusteeType: windows.TRUSTEE_IS_GROUP, + TrusteeValue: windows.TrusteeValueFromSID(sid), + }, + } + + pathSD, err := windows.GetNamedSecurityInfo( + path, windows.SE_FILE_OBJECT, windows.DACL_SECURITY_INFORMATION) + if err != nil { + return fmt.Errorf("could not GetNamedSecurityInfo for %q: %w", path, err) + } + + acl, _, err := pathSD.DACL() + if err != nil { + return fmt.Errorf("could not get DACL for %q: %w", path, err) + } + + newACL, err := windows.ACLFromEntries([]windows.EXPLICIT_ACCESS{pathAccess}, acl) + if err != nil { + return fmt.Errorf("could not create new DACL for %q: %w", path, err) + } + + err = windows.SetNamedSecurityInfo( + path, windows.SE_FILE_OBJECT, windows.DACL_SECURITY_INFORMATION, nil, nil, newACL, nil) + if err != nil { + return fmt.Errorf("could not SetNamedSecurityInfo for %q: %w", path, err) + } + + log.Trace("AppContainer access configured", "sid", sid, "path", path) + return nil +} + +func DeleteAppContainer(log hclog.Logger, taskID string) error { + + containerName := taskIDtoContainerName(taskID) + pszAppContainerName, err := windows.UTF16PtrFromString(containerName) + if err != nil { + return fmt.Errorf( + "container name %q could not be encoded to utf16: %w", containerName, err) + } + + // USERENVAPI HRESULT DeleteAppContainerProfile( + // [in] PCWSTR pszAppContainerName + // ); + // https://learn.microsoft.com/en-us/windows/win32/api/userenv/nf-userenv-deleteappcontainerprofile + result, _, err := procDeleteAppContainerProfile.Call( + uintptr(unsafe.Pointer(pszAppContainerName)), + ) + + switch result { + case WindowsResultOk: // we get this if AppContainer doesn't exist + log.Debug("deleted AppContainer") + return nil + + case WindowsResultErrInvalidArg: + return ErrInvalidArg + + default: + // note: the error we get here is always non-nil and always reports + // sucess for known error codes + return fmt.Errorf("deleting AppContainer failed: errno=%v, err=%w", + syscall.Errno(result), err) + } + +} + +func CreateProcThreadAttributes(taskID string) ([]winexec.ProcThreadAttribute, func(), error) { + + sid, cleanup, err := deriveAppContainerSID(taskID) + if err != nil { + return nil, cleanup, fmt.Errorf("could not get SID for app container: %w", err) + } + + procThreadAttrs, err := createProcThreadAttributes(sid) + if err != nil { + return nil, cleanup, fmt.Errorf("could not create proc attributes: %w", err) + } + + return procThreadAttrs, cleanup, nil +} + +type SecurityCapabilities struct { + AppContainerSid uintptr // PSID *windows.SID + Capabilities uintptr // SID_AND_ATTRIBUTES *windows.SIDAndAttributes + CapabilityCount uint32 + Reserved uint32 +} + +// createProcThreadAttributes returns ProcThreadAttributes so that winexec.Cmd +// can set the SecurityCapabilities on the process +func createProcThreadAttributes(containerSID *windows.SID) ([]winexec.ProcThreadAttribute, error) { + + sd, err := windows.NewSecurityDescriptor() + if err != nil { + return nil, fmt.Errorf("could not create new security descriptor: %w", err) + } + sd.SetOwner(containerSID, true) + + sc := &SecurityCapabilities{AppContainerSid: uintptr(unsafe.Pointer(containerSID))} + + return []winexec.ProcThreadAttribute{ + { + Attribute: uintptr(PROC_THREAD_ATTRIBUTE_SECURITY_CAPABILITIES), + Value: unsafe.Pointer(sc), + Size: uintptr(unsafe.Sizeof(*sc)), + }}, nil +} diff --git a/helper/winappcontainer/winappcontainer_test.go b/helper/winappcontainer/winappcontainer_test.go new file mode 100644 index 000000000..fb6632b9b --- /dev/null +++ b/helper/winappcontainer/winappcontainer_test.go @@ -0,0 +1,78 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build windows + +package winappcontainer + +import ( + "context" + "io" + "os" + "testing" + "time" + + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/helper/winexec" + "github.com/shoenig/test/must" +) + +// TestAppContainer_CatStdin runs a "cat"-like command in an AppContainer and +// pipes data into stdin. We use TestCatHelper to do this so that we don't need +// to rely on external programs +func TestAppContainer_CatStdin(t *testing.T) { + ci.Parallel(t) + t.Helper() + + path, _ := os.Executable() + + containerCfg := &AppContainerConfig{ + Name: t.Name(), + AllowedPaths: []string{path}, + } + logger := testlog.HCLogger(t) + err := CreateAppContainer(logger, containerCfg) + if err != nil { + // if the tests are running as an unprivileged user, we might not be + // able to create the sandbox, but in that case we're not vulnerable to + // the attacks this is intended to prevent anyways + must.EqError(t, err, ErrAccessDeniedToCreateSandbox.Error()) + } + + t.Cleanup(func() { + must.NoError(t, DeleteAppContainer(logger, t.Name())) + }) + + procThreadAttrs, cleanup, err := CreateProcThreadAttributes(t.Name()) + must.NoError(t, err) + t.Cleanup(cleanup) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + args := []string{"-test.run=TestCatHelper", "--"} + cmd := winexec.CommandContext(ctx, path, args...) + cmd.Env = append(os.Environ(), "GO_WANT_HELPER_PROCESS=1") + cmd.ProcThreadAttributes = procThreadAttrs + + input := "Input string\nLine 2" + stdin, _ := cmd.StdinPipe() + go func() { + defer stdin.Close() + io.WriteString(stdin, input) + }() + + bs, err := cmd.CombinedOutput() + must.EqError(t, err, "exit status 7") + must.Eq(t, input, string(bs)) +} + +func TestCatHelper(t *testing.T) { + if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" { + t.Skip("this should only be run as part of the tests above") + return + } + io.Copy(os.Stdout, os.Stdin) + os.Exit(7) +} diff --git a/helper/winexec/create.go b/helper/winexec/create.go new file mode 100644 index 000000000..bd6da3ef3 --- /dev/null +++ b/helper/winexec/create.go @@ -0,0 +1,151 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build windows + +package winexec + +import ( + "errors" + "fmt" + "os" + "runtime" + "syscall" + "unsafe" + + "golang.org/x/sys/windows" +) + +var EINVAL = errors.New("EINVAL") + +func (c *Cmd) createProcess( + path string, commandLine []string, + userProcThreadAttrs []ProcThreadAttribute, + attr *syscall.ProcAttr, +) (*os.Process, error) { + + // Much like in os/exec Command, we're creating the process directly without + // creating a shell. Unlike what we're doing for Linux/Unix, we're creating + // this process directly into the AppContainer rather than starting the + // process and dropping privs, and we control all the initial arguments that + // enforce we're calling the particular binary we want. + cli := windows.ComposeCommandLine(commandLine) + wCommandLine, err := windows.UTF16PtrFromString(cli) + if err != nil { + return nil, fmt.Errorf("could not create UTF16 pointer from cli: %w", err) + } + + var wCurrentDir *uint16 + if c.Dir != "" { + wCurrentDir, err = windows.UTF16PtrFromString(c.Dir) + if err != nil { + return nil, fmt.Errorf("could not create UTF16 pointer from currentDir: %w", err) + } + } + + parentProcess, _ := windows.GetCurrentProcess() + p := parentProcess + fd := make([]windows.Handle, len(attr.Files)) + for i := range attr.Files { + if attr.Files[i] > 0 { + destinationProcessHandle := parentProcess + err := windows.DuplicateHandle( + p, windows.Handle(attr.Files[i]), + destinationProcessHandle, &fd[i], 0, true, windows.DUPLICATE_SAME_ACCESS) + if err != nil { + return nil, err + } + defer windows.DuplicateHandle( + parentProcess, fd[i], 0, nil, 0, false, windows.DUPLICATE_CLOSE_SOURCE) + } + } + + procThreadAttrs, err := mergeProcThreadAttrs(fd, userProcThreadAttrs) + if err != nil { + return nil, err + } + + startupInfo := new(windows.StartupInfoEx) + startupInfo.Cb = uint32(unsafe.Sizeof(*startupInfo)) // Cb: size of struct in bytes + startupInfo.ProcThreadAttributeList = procThreadAttrs.List() + startupInfo.StdInput = fd[0] + startupInfo.StdOutput = fd[1] + startupInfo.StdErr = fd[2] + startupInfo.Flags = syscall.STARTF_USESTDHANDLES + + flags := uint32(windows.CREATE_UNICODE_ENVIRONMENT | + windows.EXTENDED_STARTUPINFO_PRESENT) + + envBlock, err := createEnvBlock(attr.Env) + if err != nil { + return nil, err + } + + outProcInfo := new(windows.ProcessInformation) + err = windows.CreateProcess( + nil, //appName + wCommandLine, + nil, // procSecurity + nil, // threadSecurity + true, // inheritHandles, + flags, + envBlock, + wCurrentDir, + &startupInfo.StartupInfo, + outProcInfo) + if err != nil { + return nil, fmt.Errorf("could not CreateProcess: %w", err) + } + + defer windows.CloseHandle(windows.Handle(outProcInfo.Thread)) + + // this ensures we don't call the finalizers on the attr.Files before we + // make the syscall. See stdlib's os/exec_posix.go for another example. + runtime.KeepAlive(fd) + runtime.KeepAlive(attr) + + return os.FindProcess(int(outProcInfo.ProcessId)) +} + +// ref https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-updateprocthreadattribute +// actual value from https://docs.rs/windows-sys/latest/windows_sys/Win32/System/Threading/constant.PROC_THREAD_ATTRIBUTE_HANDLE_LIST.html and empirically tested +const PROC_THREAD_ATTRIBUTE_HANDLE_LIST = 0x20002 // 131074 + +type ProcThreadAttribute struct { + Attribute uintptr + Value unsafe.Pointer + Size uintptr +} + +func mergeProcThreadAttrs( + fd []windows.Handle, + userAttrs []ProcThreadAttribute, +) (*windows.ProcThreadAttributeListContainer, error) { + + newLen := len(userAttrs) + 1 + + procThreadAttrs, err := windows.NewProcThreadAttributeList(uint32(newLen)) + if err != nil { + return nil, fmt.Errorf("could not create NewProcThreadAttributeList: %v", err) + } + + err = procThreadAttrs.Update( + uintptr(PROC_THREAD_ATTRIBUTE_HANDLE_LIST), + unsafe.Pointer(&fd[0]), + uintptr(len(fd))*unsafe.Sizeof(fd[0])) + if err != nil { + return nil, fmt.Errorf("could not update procthread attrs: %v", err) + } + + for _, userAttr := range userAttrs { + err = procThreadAttrs.Update( + userAttr.Attribute, + userAttr.Value, + userAttr.Size) + if err != nil { + return nil, fmt.Errorf("could not update procthread attrs: %v", err) + } + } + + return procThreadAttrs, nil +} diff --git a/helper/winexec/winexec.go b/helper/winexec/winexec.go new file mode 100644 index 000000000..e6e503a59 --- /dev/null +++ b/helper/winexec/winexec.go @@ -0,0 +1,663 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// TODO(tgross): almost everything in this file is lifted directly from the +// stdlib's os/exec/exec.go and syscall/exec_windows.go, stripped down to remove +// non-Windows bits, some legacy cruft from upstream, and methods we don't care +// about here. This gives us the StdinPipe and CombinedOutput methods we want, +// but adds the ProcThreadAttributeList which we need for running Windows +// applications in AppContainers. Ideally we'd get this feature upstreamed and +// then we could remove this package entirely. A similar proposal was rejected +// in https://github.com/golang/go/issues/44005 but hopefully using this package +// as example of the lift involved we can advocate for getting the issue +// reconsidered. + +//go:build windows + +package winexec + +import ( + "bytes" + "context" + "errors" + "io" + "io/fs" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + "syscall" + "time" + "unicode/utf16" +) + +type Cmd struct { + *exec.Cmd + + // these are all private fields of exec.Cmd that we're hoisting into this + // struct so that we can access them in the methods we're implementing + ctx context.Context + childIOFiles []io.Closer + parentIOPipes []io.Closer + goroutine []func() error + goroutineErr <-chan error + ctxResult <-chan ctxResult + + // ProcThreadAttributes will get merged with the one that gets created + // automatically for StartupInfoEx + ProcThreadAttributes []ProcThreadAttribute +} + +// A ctxResult reports the result of watching the Context associated with a +// running command (and sending corresponding signals if needed). +// This is lifted from os/exec/exec.go +type ctxResult struct { + err error + timer *time.Timer +} + +// CommandContext returns a new Cmd with a given context. Note we return the +// concrete struct and not an interface here because callers need to update +// fields on the inner exec.Cmd directly +func CommandContext(ctx context.Context, name string, arg ...string) *Cmd { + if ctx == nil { + panic("nil Context") + } + innerCmd := exec.Command(name, arg...) + + cmd := &Cmd{} + cmd.Cmd = innerCmd + cmd.ctx = ctx + cmd.Cancel = func() error { + return cmd.Process.Kill() + } + + if filepath.Base(name) == name { + lp, err := exec.LookPath(name) + if lp != "" { + cmd.Path = lp + } + if err != nil { + cmd.Err = err + } + } + + return cmd +} + +func (c *Cmd) StdinPipe() (io.WriteCloser, error) { + if c.Stdin != nil { + return nil, errors.New("exec: Stdin already set") + } + if c.Process != nil { + return nil, errors.New("exec: StdinPipe after process started") + } + pr, pw, err := os.Pipe() + if err != nil { + return nil, err + } + c.Stdin = pr + c.childIOFiles = append(c.childIOFiles, pr) + c.parentIOPipes = append(c.parentIOPipes, pw) + return pw, nil +} + +func (c *Cmd) CombinedOutput() ([]byte, error) { + if c.Stdout != nil { + return nil, errors.New("exec: Stdout already set") + } + if c.Stderr != nil { + return nil, errors.New("exec: Stderr already set") + } + var b bytes.Buffer + c.Stdout = &b + c.Stderr = &b + err := c.Run() + return b.Bytes(), err +} + +func (c *Cmd) Run() error { + err := c.Start() + if err != nil { + return err + } + return c.Wait() +} + +func (c *Cmd) Start() error { + + if c.Process != nil { + return errors.New("exec: already started") + } + + started := false + defer func() { + closeDescriptors(c.childIOFiles) + c.childIOFiles = nil + + if !started { + closeDescriptors(c.parentIOPipes) + c.parentIOPipes = nil + } + }() + + if c.Path == "" && c.Err == nil { + c.Err = errors.New("exec: no command") + } + if c.Err != nil { + return c.Err + } + + if c.Cancel != nil && c.ctx == nil { + return errors.New("exec: command with a non-nil Cancel was not created with CommandContext") + } + if c.ctx != nil { + select { + case <-c.ctx.Done(): + return c.ctx.Err() + default: + } + } + + childFiles := make([]*os.File, 0, 3) + + stdin, err := c.childStdin() + if err != nil { + return err + } + childFiles = append(childFiles, stdin) + + stdout, err := c.childStdout() + if err != nil { + return err + } + childFiles = append(childFiles, stdout) + + stderr, err := c.childStderr(stdout) + if err != nil { + return err + } + childFiles = append(childFiles, stderr) + + env, err := c.environ() + if err != nil { + return err + } + + attr := &syscall.ProcAttr{ + Dir: c.Dir, + Files: []uintptr{ + childFiles[0].Fd(), + childFiles[1].Fd(), + childFiles[2].Fd(), + }, + Env: env, + Sys: c.SysProcAttr, + } + + c.Process, err = c.createProcess(c.Path, c.Args, c.ProcThreadAttributes, attr) + if err != nil { + return err + } + started = true + + if len(c.goroutine) > 0 { + goroutineErr := make(chan error, 1) + c.goroutineErr = goroutineErr + + type goroutineStatus struct { + running int + firstErr error + } + statusc := make(chan goroutineStatus, 1) + statusc <- goroutineStatus{running: len(c.goroutine)} + for _, fn := range c.goroutine { + go func(fn func() error) { + err := fn() + status := <-statusc + if status.firstErr == nil { + status.firstErr = err + } + status.running-- + if status.running == 0 { + goroutineErr <- status.firstErr + } else { + statusc <- status + } + }(fn) + } + c.goroutine = nil + } + + if (c.Cancel != nil || c.WaitDelay != 0) && c.ctx != nil && c.ctx.Done() != nil { + resultc := make(chan ctxResult) + c.ctxResult = resultc + go c.watchCtx(resultc) + } + + return nil +} + +func (c *Cmd) environ() ([]string, error) { + var err error + env := c.Env + if env == nil { + return os.Environ(), nil + } + env, dedupErr := dedupEnv(env) + if err == nil { + err = dedupErr + } + return addCriticalEnv(env), nil +} + +// dedupEnv returns a copy of env with any duplicates removed, in favor of +// later values. +// Items not of the normal environment "key=value" form are preserved unchanged. +// Except on Plan 9, items containing NUL characters are removed, and +// an error is returned along with the remaining values. +func dedupEnv(env []string) ([]string, error) { + return dedupEnvCase(true, false, env) +} + +// dedupEnvCase is dedupEnv with a case option for testing. +// If caseInsensitive is true, the case of keys is ignored. +// If nulOK is false, items containing NUL characters are allowed. +func dedupEnvCase(caseInsensitive, nulOK bool, env []string) ([]string, error) { + // Construct the output in reverse order, to preserve the + // last occurrence of each key. + var err error + out := make([]string, 0, len(env)) + saw := make(map[string]bool, len(env)) + for n := len(env); n > 0; n-- { + kv := env[n-1] + + // Reject NUL in environment variables to prevent security issues (#56284); + // except on Plan 9, which uses NUL as os.PathListSeparator (#56544). + if !nulOK && strings.IndexByte(kv, 0) != -1 { + err = errors.New("exec: environment variable contains NUL") + continue + } + + i := strings.Index(kv, "=") + if i == 0 { + // We observe in practice keys with a single leading "=" on Windows. + // TODO(#49886): Should we consume only the first leading "=" as part + // of the key, or parse through arbitrarily many of them until a non-"="? + i = strings.Index(kv[1:], "=") + 1 + } + if i < 0 { + if kv != "" { + // The entry is not of the form "key=value" (as it is required to be). + // Leave it as-is for now. + // TODO(#52436): should we strip or reject these bogus entries? + out = append(out, kv) + } + continue + } + k := kv[:i] + if caseInsensitive { + k = strings.ToLower(k) + } + if saw[k] { + continue + } + + saw[k] = true + out = append(out, kv) + } + + // Now reverse the slice to restore the original order. + for i := 0; i < len(out)/2; i++ { + j := len(out) - i - 1 + out[i], out[j] = out[j], out[i] + } + + return out, err +} + +func addCriticalEnv(env []string) []string { + if runtime.GOOS != "windows" { + return env + } + for _, kv := range env { + k, _, ok := strings.Cut(kv, "=") + if !ok { + continue + } + if strings.EqualFold(k, "SYSTEMROOT") { + // We already have it. + return env + } + } + return append(env, "SYSTEMROOT="+os.Getenv("SYSTEMROOT")) +} + +func (c *Cmd) watchCtx(resultc chan<- ctxResult) { + select { + case resultc <- ctxResult{}: + return + case <-c.ctx.Done(): + } + + var err error + if c.Cancel != nil { + if interruptErr := c.Cancel(); interruptErr == nil { + // We appear to have successfully interrupted the command, so any + // program behavior from this point may be due to ctx even if the + // command exits with code 0. + err = c.ctx.Err() + } else if errors.Is(interruptErr, os.ErrProcessDone) { + // The process already finished: we just didn't notice it yet. + // (Perhaps c.Wait hadn't been called, or perhaps it happened to race with + // c.ctx being cancelled.) Don't inject a needless error. + } else { + err = wrappedError{ + prefix: "exec: canceling Cmd", + err: interruptErr, + } + } + } + if c.WaitDelay == 0 { + resultc <- ctxResult{err: err} + return + } + + timer := time.NewTimer(c.WaitDelay) + select { + case resultc <- ctxResult{err: err, timer: timer}: + // c.Process.Wait returned and we've handed the timer off to c.Wait. + // It will take care of goroutine shutdown from here. + return + case <-timer.C: + } + + killed := false + if killErr := c.Process.Kill(); killErr == nil { + // We appear to have killed the process. c.Process.Wait should return a + // non-nil error to c.Wait unless the Kill signal races with a successful + // exit, and if that does happen we shouldn't report a spurious error, + // so don't set err to anything here. + killed = true + } else if !errors.Is(killErr, os.ErrProcessDone) { + err = wrappedError{ + prefix: "exec: killing Cmd", + err: killErr, + } + } + + if c.goroutineErr != nil { + select { + case goroutineErr := <-c.goroutineErr: + // Forward goroutineErr only if we don't have reason to believe it was + // caused by a call to Cancel or Kill above. + if err == nil && !killed { + err = goroutineErr + } + default: + // Close the child process's I/O pipes, in case it abandoned some + // subprocess that inherited them and is still holding them open + // (see https://go.dev/issue/23019). + // + // We close the goroutine pipes only after we have sent any signals we're + // going to send to the process (via Signal or Kill above): if we send + // SIGKILL to the process, we would prefer for it to die of SIGKILL, not + // SIGPIPE. (However, this may still cause any orphaned subprocesses to + // terminate with SIGPIPE.) + closeDescriptors(c.parentIOPipes) + // Wait for the copying goroutines to finish, but report ErrWaitDelay for + // the error: any other error here could result from closing the pipes. + _ = <-c.goroutineErr + if err == nil { + err = ErrWaitDelay + } + } + + // Since we have already received the only result from c.goroutineErr, + // set it to nil to prevent awaitGoroutines from blocking on it. + c.goroutineErr = nil + } + + resultc <- ctxResult{err: err} +} + +// ErrWaitDelay is returned by (*Cmd).Wait if the process exits with a +// successful status code but its output pipes are not closed before the +// command's WaitDelay expires. +var ErrWaitDelay = errors.New("exec: WaitDelay expired before I/O complete") + +// wrappedError wraps an error without relying on fmt.Errorf. +type wrappedError struct { + prefix string + err error +} + +func (w wrappedError) Error() string { + return w.prefix + ": " + w.err.Error() +} + +func (w wrappedError) Unwrap() error { + return w.err +} + +func (c *Cmd) Wait() error { + if c.Process == nil { + return errors.New("exec: not started") + } + if c.ProcessState != nil { + return errors.New("exec: Wait was already called") + } + + state, err := c.Process.Wait() + if err == nil && !state.Success() { + err = &exec.ExitError{ProcessState: state} + } + c.ProcessState = state + + var timer *time.Timer + if c.ctxResult != nil { + watch := <-c.ctxResult + timer = watch.timer + // If c.Process.Wait returned an error, prefer that. + // Otherwise, report any error from the watchCtx goroutine, + // such as a Context cancellation or a WaitDelay overrun. + if err == nil && watch.err != nil { + err = watch.err + } + } + + if goroutineErr := c.awaitGoroutines(timer); err == nil { + // Report an error from the copying goroutines only if the program + // otherwise exited normally on its own. Otherwise, the copying error + // may be due to the abnormal termination. + err = goroutineErr + } + closeDescriptors(c.parentIOPipes) + c.parentIOPipes = nil + + return err +} + +func (c *Cmd) awaitGoroutines(timer *time.Timer) error { + defer func() { + if timer != nil { + timer.Stop() + } + c.goroutineErr = nil + }() + + if c.goroutineErr == nil { + return nil // No running goroutines to await. + } + + if timer == nil { + if c.WaitDelay == 0 { + return <-c.goroutineErr + } + + select { + case err := <-c.goroutineErr: + // Avoid the overhead of starting a timer. + return err + default: + } + + // No existing timer was started: either there is no Context associated with + // the command, or c.Process.Wait completed before the Context was done. + timer = time.NewTimer(c.WaitDelay) + } + + select { + case <-timer.C: + closeDescriptors(c.parentIOPipes) + // Wait for the copying goroutines to finish, but ignore any error + // (since it was probably caused by closing the pipes). + _ = <-c.goroutineErr + return ErrWaitDelay + + case err := <-c.goroutineErr: + return err + } +} + +func closeDescriptors(closers []io.Closer) { + for _, fd := range closers { + fd.Close() + } +} + +func (c *Cmd) childStdin() (*os.File, error) { + if c.Stdin == nil { + f, err := os.Open(os.DevNull) + if err != nil { + return nil, err + } + c.childIOFiles = append(c.childIOFiles, f) + return f, nil + } + + if f, ok := c.Stdin.(*os.File); ok { + return f, nil + } + + pr, pw, err := os.Pipe() + if err != nil { + return nil, err + } + + c.childIOFiles = append(c.childIOFiles, pr) + c.parentIOPipes = append(c.parentIOPipes, pw) + c.goroutine = append(c.goroutine, func() error { + _, err := io.Copy(pw, c.Stdin) + if skipStdinCopyError(err) { + err = nil + } + if err1 := pw.Close(); err == nil { + err = err1 + } + return err + }) + return pr, nil +} + +func (c *Cmd) childStdout() (*os.File, error) { + return c.writerDescriptor(c.Stdout) +} + +func (c *Cmd) childStderr(childStdout *os.File) (*os.File, error) { + if c.Stderr != nil && interfaceEqual(c.Stderr, c.Stdout) { + return childStdout, nil + } + return c.writerDescriptor(c.Stderr) +} + +func (c *Cmd) writerDescriptor(w io.Writer) (*os.File, error) { + if w == nil { + f, err := os.OpenFile(os.DevNull, os.O_WRONLY, 0) + if err != nil { + return nil, err + } + c.childIOFiles = append(c.childIOFiles, f) + return f, nil + } + + if f, ok := w.(*os.File); ok { + return f, nil + } + + pr, pw, err := os.Pipe() + if err != nil { + return nil, err + } + + c.childIOFiles = append(c.childIOFiles, pw) + c.parentIOPipes = append(c.parentIOPipes, pr) + c.goroutine = append(c.goroutine, func() error { + _, err := io.Copy(w, pr) + pr.Close() // in case io.Copy stopped due to write error + return err + }) + return pw, nil +} + +// interfaceEqual protects against panics from doing equality tests on +// two interfaces with non-comparable underlying types. +func interfaceEqual(a, b any) bool { + defer func() { + recover() + }() + return a == b +} + +func skipStdinCopyError(err error) bool { + // Ignore ERROR_BROKEN_PIPE and ERROR_NO_DATA errors copying + // to stdin if the program completed successfully otherwise. + // See Issue 20445. + const _ERROR_NO_DATA = syscall.Errno(0xe8) + pe, ok := err.(*fs.PathError) + return ok && + pe.Op == "write" && pe.Path == "|1" && + (pe.Err == syscall.ERROR_BROKEN_PIPE || pe.Err == _ERROR_NO_DATA) +} + +// createEnvBlock converts an array of environment strings into +// the representation required by CreateProcess: a sequence of NUL +// terminated strings followed by a nil. +// Last bytes are two UCS-2 NULs, or four NUL bytes. +// If any string contains a NUL, it returns (nil, EINVAL). +func createEnvBlock(envv []string) (*uint16, error) { + if len(envv) == 0 { + return &utf16.Encode([]rune("\x00\x00"))[0], nil + } + length := 0 + for _, s := range envv { + if IndexByteString(s, 0) != -1 { + return nil, EINVAL + } + length += len(s) + 1 + } + length += 1 + + b := make([]byte, length) + i := 0 + for _, s := range envv { + l := len(s) + copy(b[i:i+l], []byte(s)) + copy(b[i+l:i+l+1], []byte{0}) + i = i + l + 1 + } + copy(b[i:i+1], []byte{0}) + + return &utf16.Encode([]rune(string(b)))[0], nil +} + +func IndexByteString(s string, c byte) int { + for i := 0; i < len(s); i++ { + if s[i] == c { + return i + } + } + return -1 +} diff --git a/helper/winexec/winexec_test.go b/helper/winexec/winexec_test.go new file mode 100644 index 000000000..0fa2ca61d --- /dev/null +++ b/helper/winexec/winexec_test.go @@ -0,0 +1,84 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build windows + +package winexec + +import ( + "context" + "io" + "os" + "os/exec" + "testing" + "time" + + "github.com/hashicorp/nomad/ci" + "github.com/shoenig/test/must" +) + +type execCmd interface { + StdinPipe() (io.WriteCloser, error) + CombinedOutput() ([]byte, error) +} + +// TestWinExec_CatStdin runs a "cat"-like command and pipes data into stdin. We +// use TestCatHelper to do this so that we don't need to rely on external +// programs +func TestWinExec_CatStdin(t *testing.T) { + ci.Parallel(t) + + testCases := []struct { + name string + factory func(context.Context, string, ...string) execCmd + }{ + { + name: "winexec.CommandContext", + factory: func(ctx context.Context, name string, args ...string) execCmd { + cmd := CommandContext(ctx, name, args...) + cmd.Env = append(os.Environ(), "GO_WANT_HELPER_PROCESS=1") + return cmd + }, + }, + { + // run the exact same test as above, using os/exec's version, so + // that we can verify we have the exact same behavior + name: "os/exec.CommandContext", + factory: func(ctx context.Context, name string, args ...string) execCmd { + cmd := exec.CommandContext(ctx, name, args...) + cmd.Env = append(os.Environ(), "GO_WANT_HELPER_PROCESS=1") + return cmd + }, + }, + } + + for _, tc := range testCases { + path, _ := os.Executable() + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + args := []string{"-test.run=TestCatHelper", "--"} + cmd := tc.factory(ctx, path, args...) + + input := "Input string\nLine 2" + stdin, _ := cmd.StdinPipe() + go func() { + defer stdin.Close() + io.WriteString(stdin, input) + }() + + bs, err := cmd.CombinedOutput() + must.EqError(t, err, "exit status 7") + must.Eq(t, input, string(bs)) + } +} + +func TestCatHelper(t *testing.T) { + t.Helper() + if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" { + t.Skip("this should only be run as part of the tests above") + return + } + io.Copy(os.Stdout, os.Stdin) + os.Exit(7) +} diff --git a/main.go b/main.go index 2539dc113..075dc977d 100644 --- a/main.go +++ b/main.go @@ -17,6 +17,7 @@ import ( // processes along side of a task. By early importing them we can avoid // additional code being imported and thus reserving memory. _ "github.com/hashicorp/nomad/client/allocrunner/taskrunner/getter" + _ "github.com/hashicorp/nomad/client/allocrunner/taskrunner/template/renderer" _ "github.com/hashicorp/nomad/client/logmon" _ "github.com/hashicorp/nomad/drivers/docker/docklog" _ "github.com/hashicorp/nomad/drivers/shared/executor" @@ -51,6 +52,7 @@ var ( "operator raft _logs", "operator raft _state", "operator snapshot _state", + "template-render", } // aliases is the list of aliases we want users to be aware of. We hide