windows: set job object for executor and children (#24214)

On Windows, if the `raw_exec` driver's executor exits, the child processes are not also killed. Create a Windows "job object" (not to be confused with a Nomad job) and add the executor to it. Child processes of the executor will inherit the job automatically. When the handle to the job object is freed (on executor exit), the job itself is destroyed and this causes all processes in that job to exit. Fixes: https://github.com/hashicorp/nomad/issues/23668 Ref: https://learn.microsoft.com/en-us/windows/win32/procthread/job-objects
2026-01-06 10:25:42 +03:00 · 2024-10-16 09:20:26 -04:00
parent 0f6561bdfe
commit 6b8ddff1fa
9 changed files with 369 additions and 142 deletions
--- a/drivers/shared/executor/executor_test.go
+++ b/drivers/shared/executor/executor_test.go
@@ -1,10 +1,11 @@
 // Copyright (c) HashiCorp, Inc.
 // SPDX-License-Identifier: MPL-2.0

+//go:build !windows
+
 package executor

 import (
-	"bytes"
 	"context"
 	"fmt"
 	"io"
@@ -12,7 +13,6 @@ import (
 	"path/filepath"
 	"runtime"
 	"strings"
-	"sync"
 	"syscall"
 	"testing"
 	"time"
@@ -59,15 +59,6 @@ var (
 	compute  = topology.Compute()
 )

-type testExecCmd struct {
-	command  *ExecCommand
-	allocDir *allocdir.AllocDir
-
-	stdout         *bytes.Buffer
-	stderr         *bytes.Buffer
-	outputCopyDone *sync.WaitGroup
-}
-
 // testExecutorContext returns an ExecutorContext and AllocDir.
 //
 // The caller is responsible for calling AllocDir.Destroy() to cleanup.
@@ -123,38 +114,6 @@ func testExecutorCommand(t *testing.T) *testExecCmd {
 	return testCmd
 }

-// configureTLogging configures a test command executor with buffer as Std{out|err}
-// but using os.Pipe so it mimics non-test case where cmd is set with files as Std{out|err}
-// the buffers can be used to read command output
-func configureTLogging(t *testing.T, testcmd *testExecCmd) {
-	var stdout, stderr bytes.Buffer
-	var copyDone sync.WaitGroup
-
-	stdoutPr, stdoutPw, err := os.Pipe()
-	require.NoError(t, err)
-
-	stderrPr, stderrPw, err := os.Pipe()
-	require.NoError(t, err)
-
-	copyDone.Add(2)
-	go func() {
-		defer copyDone.Done()
-		io.Copy(&stdout, stdoutPr)
-	}()
-	go func() {
-		defer copyDone.Done()
-		io.Copy(&stderr, stderrPr)
-	}()
-
-	testcmd.stdout = &stdout
-	testcmd.stderr = &stderr
-	testcmd.outputCopyDone = &copyDone
-
-	testcmd.command.stdout = stdoutPw
-	testcmd.command.stderr = stderrPw
-	return
-}
-
 func TestExecutor_Start_Invalid(t *testing.T) {
 	ci.Parallel(t)
 	invalid := "/bin/foobar"
--- a/drivers/shared/executor/executor_windows.go
+++ b/drivers/shared/executor/executor_windows.go
@@ -9,17 +9,48 @@ import (
 	"fmt"
 	"os"
 	"syscall"
+	"unsafe"

 	"golang.org/x/sys/windows"
 )

-// configure new process group for child process
+// configure new process group for child process and creates a JobObject for the
+// executor. Children of the executor will be created in the same JobObject
+// Ref: https://learn.microsoft.com/en-us/windows/win32/procthread/job-objects
 func (e *UniversalExecutor) setNewProcessGroup() error {
 	// We need to check that as build flags includes windows for this file
 	if e.childCmd.SysProcAttr == nil {
 		e.childCmd.SysProcAttr = &syscall.SysProcAttr{}
 	}
 	e.childCmd.SysProcAttr.CreationFlags = syscall.CREATE_NEW_PROCESS_GROUP
+
+	// note: we don't call CloseHandle on this job handle because we need to
+	// hold onto it until the executor exits
+	job, err := windows.CreateJobObject(nil, nil)
+	if err != nil {
+		return fmt.Errorf("could not create Windows job object for executor: %w", err)
+	}
+
+	info := windows.JOBOBJECT_EXTENDED_LIMIT_INFORMATION{
+		BasicLimitInformation: windows.JOBOBJECT_BASIC_LIMIT_INFORMATION{
+			LimitFlags: windows.JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE,
+		},
+	}
+	_, err = windows.SetInformationJobObject(
+		job,
+		windows.JobObjectExtendedLimitInformation,
+		uintptr(unsafe.Pointer(&info)),
+		uint32(unsafe.Sizeof(info)))
+	if err != nil {
+		return fmt.Errorf("could not configure Windows job object for executor: %w", err)
+	}
+
+	handle := windows.CurrentProcess()
+	err = windows.AssignProcessToJobObject(job, handle)
+	if err != nil {
+		return fmt.Errorf("could not assign executor to Windows job object: %w", err)
+	}
+
 	return nil
 }

--- a/drivers/shared/executor/executor_windows_test.go
+++ b/drivers/shared/executor/executor_windows_test.go
@@ -0,0 +1,88 @@
+// Copyright (c) HashiCorp, Inc.
+// SPDX-License-Identifier: MPL-2.0
+
+//go:build windows
+
+package executor
+
+import (
+	"context"
+	"os"
+	"testing"
+	"time"
+
+	"github.com/hashicorp/nomad/ci"
+	"github.com/hashicorp/nomad/client/allocdir"
+	"github.com/hashicorp/nomad/client/lib/numalib"
+	"github.com/hashicorp/nomad/client/taskenv"
+	"github.com/hashicorp/nomad/helper/testlog"
+	"github.com/hashicorp/nomad/nomad/mock"
+	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/plugins/drivers"
+	"github.com/hashicorp/nomad/plugins/drivers/fsisolation"
+	"github.com/shoenig/test/must"
+)
+
+// testExecutorCommand sets up a test task environment.
+func testExecutorCommand(t *testing.T) *testExecCmd {
+	alloc := mock.Alloc()
+	task := alloc.Job.TaskGroups[0].Tasks[0]
+	taskEnv := taskenv.NewBuilder(mock.Node(), alloc, task, "global").Build()
+
+	allocDir := allocdir.NewAllocDir(testlog.HCLogger(t), t.TempDir(), t.TempDir(), alloc.ID)
+	must.NoError(t, allocDir.Build())
+	t.Cleanup(func() { allocDir.Destroy() })
+
+	must.NoError(t, allocDir.NewTaskDir(task).Build(fsisolation.None, nil, task.User))
+	td := allocDir.TaskDirs[task.Name]
+	cmd := &ExecCommand{
+		Env:     taskEnv.List(),
+		TaskDir: td.Dir,
+		Resources: &drivers.Resources{
+			NomadResources: &structs.AllocatedTaskResources{
+				Cpu: structs.AllocatedCpuResources{
+					CpuShares: 500,
+				},
+				Memory: structs.AllocatedMemoryResources{
+					MemoryMB: 256,
+				},
+			},
+		},
+	}
+
+	testCmd := &testExecCmd{
+		command:  cmd,
+		allocDir: allocDir,
+	}
+	configureTLogging(t, testCmd)
+	return testCmd
+}
+
+func TestExecutor_ProcessExit(t *testing.T) {
+	ci.Parallel(t)
+
+	topology := numalib.Scan(numalib.PlatformScanners())
+	compute := topology.Compute()
+
+	cmd := testExecutorCommand(t)
+	cmd.command.Cmd = "Powershell.exe"
+	cmd.command.Args = []string{"sleep", "30"}
+	executor := NewExecutor(testlog.HCLogger(t), compute)
+
+	t.Cleanup(func() { executor.Shutdown("SIGKILL", 0) })
+
+	childPs, err := executor.Launch(cmd.command)
+	must.NoError(t, err)
+	must.NonZero(t, childPs.Pid)
+
+	proc, err := os.FindProcess(childPs.Pid)
+	must.NoError(t, err)
+	must.NoError(t, proc.Kill())
+
+	ctx, cancel := context.WithTimeout(context.TODO(), 1*time.Second)
+	t.Cleanup(cancel)
+	waitPs, err := executor.Wait(ctx)
+	must.NoError(t, err)
+	must.Eq(t, 1, waitPs.ExitCode)
+	must.Eq(t, childPs.Pid, waitPs.Pid)
+}
--- a/drivers/shared/executor/utils_test.go
+++ b/drivers/shared/executor/utils_test.go
@@ -4,8 +4,13 @@
 package executor

 import (
+	"bytes"
+	"io"
+	"os"
+	"sync"
 	"testing"

+	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/stretchr/testify/require"
 )

@@ -29,3 +34,45 @@ func TestUtils_IsolationMode(t *testing.T) {
 		require.Equal(t, tc.exp, result)
 	}
 }
+
+type testExecCmd struct {
+	command  *ExecCommand
+	allocDir *allocdir.AllocDir
+
+	stdout         *bytes.Buffer
+	stderr         *bytes.Buffer
+	outputCopyDone *sync.WaitGroup
+}
+
+// configureTLogging configures a test command executor with buffer as
+// Std{out|err} but using os.Pipe so it mimics non-test case where cmd is set
+// with files as Std{out|err} the buffers can be used to read command output
+func configureTLogging(t *testing.T, testcmd *testExecCmd) {
+	t.Helper()
+	var stdout, stderr bytes.Buffer
+	var copyDone sync.WaitGroup
+
+	stdoutPr, stdoutPw, err := os.Pipe()
+	require.NoError(t, err)
+
+	stderrPr, stderrPw, err := os.Pipe()
+	require.NoError(t, err)
+
+	copyDone.Add(2)
+	go func() {
+		defer copyDone.Done()
+		io.Copy(&stdout, stdoutPr)
+	}()
+	go func() {
+		defer copyDone.Done()
+		io.Copy(&stderr, stderrPr)
+	}()
+
+	testcmd.stdout = &stdout
+	testcmd.stderr = &stderr
+	testcmd.outputCopyDone = &copyDone
+
+	testcmd.command.stdout = stdoutPw
+	testcmd.command.stderr = stderrPw
+	return
+}