Merge pull request #3591 from hashicorp/b-1755-stop

Allow controlling the stop signal for drivers
This commit is contained in:
Chelsea Komlo
2017-12-07 17:06:43 -05:00
committed by GitHub
20 changed files with 334 additions and 6 deletions

View File

@@ -11,6 +11,8 @@ __BACKWARDS INCOMPATIBILITIES:__
IMPROVEMENTS:
* core: Allow operators to reload TLS certificate and key files via SIGHUP
[GH-3479]
* core: Allow configurable stop signals for a task, when drivers support
sending stop signals [GH-1755]
* core: Allow agents to be run in `rpc_upgrade_mode` when migrating a cluster
to TLS rather than changing `heartbeat_grace`
* api: Allocations now track and return modify time in addition to create time

View File

@@ -370,6 +370,7 @@ type Task struct {
DispatchPayload *DispatchPayloadConfig
Leader bool
ShutdownDelay time.Duration `mapstructure:"shutdown_delay"`
KillSignal string `mapstructure:"kill_signal"`
}
func (t *Task) Canonicalize(tg *TaskGroup, job *Job) {

View File

@@ -160,6 +160,7 @@ type DockerVolumeDriverConfig struct {
Options []map[string]string `mapstructure:"options"`
}
// DockerDriverConfig defines the user specified config block in a jobspec
type DockerDriverConfig struct {
ImageName string `mapstructure:"image"` // Container's Image Name
LoadImage string `mapstructure:"load"` // LoadImage is a path to an image archive file
@@ -712,7 +713,6 @@ func (d *DockerDriver) Prestart(ctx *ExecContext, task *structs.Task) (*Prestart
}
func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse, error) {
pluginLogFile := filepath.Join(ctx.TaskDir.Dir, "executor.out")
executorConfig := &dstructs.ExecutorConfig{
LogFile: pluginLogFile,
@@ -1046,6 +1046,7 @@ func (d *DockerDriver) createContainerConfig(ctx *ExecContext, task *structs.Tas
return c, err
}
// create the config block that will later be consumed by go-dockerclient
config := &docker.Config{
Image: d.imageID,
Hostname: driverConfig.Hostname,
@@ -1053,6 +1054,7 @@ func (d *DockerDriver) createContainerConfig(ctx *ExecContext, task *structs.Tas
Tty: driverConfig.TTY,
OpenStdin: driverConfig.Interactive,
StopTimeout: int(task.KillTimeout.Seconds()),
StopSignal: task.KillSignal,
}
if driverConfig.WorkDir != "" {

View File

@@ -2045,3 +2045,50 @@ func TestDockerDriver_Device_Success(t *testing.T) {
assert.Equal(t, expectedDevice, container.HostConfig.Devices[0], "Incorrect device ")
}
func TestDockerDriver_Kill(t *testing.T) {
assert := assert.New(t)
if !tu.IsTravis() {
t.Parallel()
}
if !testutil.DockerIsConnected(t) {
t.Skip("Docker not connected")
}
// Tasks started with a signal that is not supported should not error
task := &structs.Task{
Name: "nc-demo",
Driver: "docker",
KillSignal: "SIGKILL",
Config: map[string]interface{}{
"load": "busybox.tar",
"image": "busybox",
"command": "/bin/nc",
"args": []string{"-l", "127.0.0.1", "-p", "0"},
},
LogConfig: &structs.LogConfig{
MaxFiles: 10,
MaxFileSizeMB: 10,
},
Resources: basicResources,
}
ctx := testDockerDriverContexts(t, task)
defer ctx.AllocDir.Destroy()
d := NewDockerDriver(ctx.DriverCtx)
copyImage(t, ctx.ExecCtx.TaskDir, "busybox.tar")
_, err := d.Prestart(ctx.ExecCtx, task)
if err != nil {
t.Fatalf("error in prestart: %v", err)
}
resp, err := d.Start(ctx.ExecCtx, task)
assert.Nil(err)
assert.NotNil(resp.Handle)
handle := resp.Handle.(*DockerHandle)
waitForExist(t, client, handle)
err = handle.Kill()
assert.Nil(err)
}

View File

@@ -129,9 +129,15 @@ func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse
return nil, fmt.Errorf("failed to set executor context: %v", err)
}
taskKillSignal, err := getTaskKillSignal(task.KillSignal)
if err != nil {
return nil, err
}
execCmd := &executor.ExecCommand{
Cmd: command,
Args: driverConfig.Args,
TaskKillSignal: taskKillSignal,
FSIsolation: true,
ResourceLimits: true,
User: getExecutorUser(task),

View File

@@ -98,6 +98,9 @@ type ExecCommand struct {
// Args is the args of the command that the user wants to run.
Args []string
// TaskKillSignal is an optional field which signal to kill the process
TaskKillSignal os.Signal
// FSIsolation determines whether the command would be run in a chroot.
FSIsolation bool
@@ -496,9 +499,20 @@ func (e *UniversalExecutor) ShutDown() error {
}
return nil
}
if err = proc.Signal(os.Interrupt); err != nil && err.Error() != finishedErr {
// Set default kill signal, as some drivers don't support configurable
// signals (such as rkt)
var osSignal os.Signal
if e.command.TaskKillSignal != nil {
osSignal = e.command.TaskKillSignal
} else {
osSignal = os.Interrupt
}
if err = proc.Signal(osSignal); err != nil && err.Error() != finishedErr {
return fmt.Errorf("executor.shutdown error: %v", err)
}
return nil
}

View File

@@ -265,12 +265,18 @@ func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse
return nil, err
}
taskKillSignal, err := getTaskKillSignal(task.KillSignal)
if err != nil {
return nil, err
}
execCmd := &executor.ExecCommand{
Cmd: absPath,
Args: args,
FSIsolation: true,
ResourceLimits: true,
User: getExecutorUser(task),
TaskKillSignal: taskKillSignal,
}
ps, err := execIntf.LaunchCmd(execCmd)
if err != nil {

View File

@@ -13,6 +13,7 @@ import (
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/assert"
ctestutils "github.com/hashicorp/nomad/client/testutil"
)
@@ -432,3 +433,85 @@ func TestJavaDriver_Start_Wait_Class(t *testing.T) {
t.Fatalf("Error: %s", err)
}
}
func TestJavaDriver_Start_Kill(t *testing.T) {
assert := assert.New(t)
if !testutil.IsTravis() {
t.Parallel()
}
if !javaLocated() {
t.Skip("Java not found; skipping")
}
// Test that a valid kill signal will successfully stop the process
{
ctestutils.JavaCompatible(t)
task := &structs.Task{
Name: "demo-app",
Driver: "java",
KillSignal: "SIGKILL",
Config: map[string]interface{}{
"jar_path": "demoapp.jar",
"args": []string{"5"},
},
LogConfig: &structs.LogConfig{
MaxFiles: 10,
MaxFileSizeMB: 10,
},
Resources: basicResources,
}
ctx := testDriverContexts(t, task)
defer ctx.AllocDir.Destroy()
d := NewJavaDriver(ctx.DriverCtx)
// Copy the test jar into the task's directory
dst := ctx.ExecCtx.TaskDir.Dir
copyFile("./test-resources/java/demoapp.jar", filepath.Join(dst, "demoapp.jar"), t)
_, err := d.Prestart(ctx.ExecCtx, task)
assert.Nil(err)
resp, err := d.Start(ctx.ExecCtx, task)
assert.Nil(err)
assert.NotNil(resp.Handle)
err = resp.Handle.Kill()
assert.Nil(err)
}
// Test that an unsupported kill signal will return an error
{
ctestutils.JavaCompatible(t)
task := &structs.Task{
Name: "demo-app",
Driver: "java",
KillSignal: "ABCDEF",
Config: map[string]interface{}{
"jar_path": "demoapp.jar",
"args": []string{"5"},
},
LogConfig: &structs.LogConfig{
MaxFiles: 10,
MaxFileSizeMB: 10,
},
Resources: basicResources,
}
ctx := testDriverContexts(t, task)
defer ctx.AllocDir.Destroy()
d := NewJavaDriver(ctx.DriverCtx)
// Copy the test jar into the task's directory
dst := ctx.ExecCtx.TaskDir.Dir
copyFile("./test-resources/java/demoapp.jar", filepath.Join(dst, "demoapp.jar"), t)
_, err := d.Prestart(ctx.ExecCtx, task)
assert.Nil(err)
_, err = d.Start(ctx.ExecCtx, task)
assert.NotNil(err)
assert.Contains(err.Error(), "Signal ABCDEF is not supported")
}
}

View File

@@ -144,10 +144,16 @@ func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (*StartRespo
return nil, fmt.Errorf("failed to set executor context: %v", err)
}
taskKillSignal, err := getTaskKillSignal(task.KillSignal)
if err != nil {
return nil, err
}
execCmd := &executor.ExecCommand{
Cmd: command,
Args: driverConfig.Args,
User: task.User,
Cmd: command,
Args: driverConfig.Args,
User: task.User,
TaskKillSignal: taskKillSignal,
}
ps, err := exec.LaunchCmd(execCmd)
if err != nil {

View File

@@ -10,6 +10,7 @@ import (
"strings"
"time"
"github.com/hashicorp/consul-template/signals"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/go-plugin"
"github.com/hashicorp/nomad/client/allocdir"
@@ -204,3 +205,18 @@ func SetEnvvars(envBuilder *env.Builder, fsi cstructs.FSIsolation, taskDir *allo
envBuilder.SetHostEnvvars(filter)
}
}
// getTaskKillSignal looks up the signal specified for the task if it has been
// specified. If it is not supported on the platform, returns an error.
func getTaskKillSignal(signal string) (os.Signal, error) {
if signal == "" {
return os.Interrupt, nil
}
taskKillSignal := signals.SignalLookup[signal]
if taskKillSignal == nil {
return nil, fmt.Errorf("Signal %s is not supported", signal)
}
return taskKillSignal, nil
}

View File

@@ -1,8 +1,13 @@
package driver
import (
"os"
"runtime"
"syscall"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func TestDriver_KillTimeout(t *testing.T) {
@@ -21,3 +26,33 @@ func TestDriver_KillTimeout(t *testing.T) {
t.Fatalf("KillTimeout() returned %v; want %v", actual, expected)
}
}
func TestDriver_getTaskKillSignal(t *testing.T) {
assert := assert.New(t)
t.Parallel()
if runtime.GOOS != "linux" {
t.Skip("Linux only test")
}
// Test that the default is SIGINT
{
sig, err := getTaskKillSignal("")
assert.Nil(err)
assert.Equal(sig, os.Interrupt)
}
// Test that unsupported signals return an error
{
_, err := getTaskKillSignal("ABCDEF")
assert.NotNil(err)
assert.Contains(err.Error(), "Signal ABCDEF is not supported")
}
// Test that supported signals return that signal
{
sig, err := getTaskKillSignal("SIGKILL")
assert.Nil(err)
assert.Equal(sig, syscall.SIGKILL)
}
}

View File

@@ -666,6 +666,8 @@ func ApiTgToStructsTG(taskGroup *api.TaskGroup, tg *structs.TaskGroup) {
}
}
// ApiTaskToStructsTask is a copy and type conversion between the API
// representation of a task from a struct representation of a task.
func ApiTaskToStructsTask(apiTask *api.Task, structsTask *structs.Task) {
structsTask.Name = apiTask.Name
structsTask.Driver = apiTask.Driver
@@ -676,6 +678,7 @@ func ApiTaskToStructsTask(apiTask *api.Task, structsTask *structs.Task) {
structsTask.Meta = apiTask.Meta
structsTask.KillTimeout = *apiTask.KillTimeout
structsTask.ShutdownDelay = apiTask.ShutdownDelay
structsTask.KillSignal = apiTask.KillSignal
if l := len(apiTask.Constraints); l != 0 {
structsTask.Constraints = make([]*structs.Constraint, l)

View File

@@ -1260,6 +1260,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
"lol": "code",
},
KillTimeout: helper.TimeToPtr(10 * time.Second),
KillSignal: "SIGQUIT",
LogConfig: &api.LogConfig{
MaxFiles: helper.IntToPtr(10),
MaxFileSizeMB: helper.IntToPtr(100),
@@ -1455,6 +1456,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
"lol": "code",
},
KillTimeout: 10 * time.Second,
KillSignal: "SIGQUIT",
LogConfig: &structs.LogConfig{
MaxFiles: 10,
MaxFileSizeMB: 100,

View File

@@ -591,6 +591,7 @@ func parseTasks(jobName string, taskGroupName string, result *[]*api.Task, list
"template",
"user",
"vault",
"kill_signal",
}
if err := helper.CheckHCLKeys(listVal, valid); err != nil {
return multierror.Prefix(err, fmt.Sprintf("'%s' ->", n))
@@ -623,6 +624,7 @@ func parseTasks(jobName string, taskGroupName string, result *[]*api.Task, list
WeaklyTypedInput: true,
Result: &t,
})
if err != nil {
return err
}

View File

@@ -202,7 +202,8 @@ func TestParse(t *testing.T) {
RightDelim: helper.StringToPtr("__"),
},
},
Leader: true,
Leader: true,
KillSignal: "",
},
{
Name: "storagelocker",
@@ -559,6 +560,29 @@ func TestParse(t *testing.T) {
},
false,
},
{
"job-with-kill-signal.hcl",
&api.Job{
ID: helper.StringToPtr("foo"),
Name: helper.StringToPtr("foo"),
TaskGroups: []*api.TaskGroup{
{
Name: helper.StringToPtr("bar"),
Tasks: []*api.Task{
{
Name: "bar",
Driver: "docker",
KillSignal: "SIGQUIT",
Config: map[string]interface{}{
"image": "hashicorp/image",
},
},
},
},
},
},
false,
},
}
for _, tc := range cases {

View File

@@ -0,0 +1,10 @@
job "foo" {
task "bar" {
driver = "docker"
kill_signal = "SIGQUIT"
config {
image = "hashicorp/image"
}
}
}

View File

@@ -3748,6 +3748,35 @@ func TestJobEndpoint_ValidateJob_InvalidSignals(t *testing.T) {
}
}
func TestJobEndpoint_ValidateJob_KillSignal(t *testing.T) {
assert := assert.New(t)
t.Parallel()
// test validate fails if the driver does not support sending signals, but a
// stop_signal has been specified
{
job := mock.Job()
job.TaskGroups[0].Tasks[0].Driver = "qemu" // qemu does not support sending signals
job.TaskGroups[0].Tasks[0].KillSignal = "SIGINT"
err, warnings := validateJob(job)
assert.NotNil(err)
assert.True(strings.Contains(err.Error(), "support sending signals"))
assert.Nil(warnings)
}
// test validate succeeds if the driver does support sending signals, and
// a stop_signal has been specified
{
job := mock.Job()
job.TaskGroups[0].Tasks[0].KillSignal = "SIGINT"
err, warnings := validateJob(job)
assert.Nil(err)
assert.Nil(warnings)
}
}
func TestJobEndpoint_ValidateJobUpdate(t *testing.T) {
t.Parallel()
old := mock.Job()

View File

@@ -1978,6 +1978,11 @@ func (j *Job) RequiredSignals() map[string]map[string][]string {
taskSignals[task.Vault.ChangeSignal] = struct{}{}
}
// If a user has specified a KillSignal, add it to required signals
if task.KillSignal != "" {
taskSignals[task.KillSignal] = struct{}{}
}
// Check if any template change mode uses signals
for _, t := range task.Templates {
if t.ChangeMode != TemplateChangeModeSignal {
@@ -3221,6 +3226,12 @@ type Task struct {
// ShutdownDelay is the duration of the delay between deregistering a
// task from Consul and sending it a signal to shutdown. See #2441
ShutdownDelay time.Duration
// The kill signal to use for the task. This is an optional specification,
// KillSignal is the kill signal to use for the task. This is an optional
// specification and defaults to SIGINT
KillSignal string
}
func (t *Task) Copy() *Task {

View File

@@ -806,6 +806,26 @@ func TestJob_RequiredSignals(t *testing.T) {
},
}
j2 := &Job{
TaskGroups: []*TaskGroup{
{
Name: "foo",
Tasks: []*Task{
{
Name: "t1",
KillSignal: "SIGQUIT",
},
},
},
},
}
e2 := map[string]map[string][]string{
"foo": {
"t1": {"SIGQUIT"},
},
}
cases := []struct {
Job *Job
Expected map[string]map[string][]string
@@ -818,6 +838,10 @@ func TestJob_RequiredSignals(t *testing.T) {
Job: j1,
Expected: e1,
},
{
Job: j2,
Expected: e2,
},
}
for i, c := range cases {

View File

@@ -54,6 +54,11 @@ job "docs" {
[`max_kill_timeout`][max_kill] on the agent running the task, which has a
default value of 30 seconds.
- `kill_signal` `(string)` - Specifies a configurable kill signal for a task,
where the default is SIGINT. Note that this is only supported for drivers
which accept sending signals (currently Docker, exec, raw_exec, and Java
drivers).
- `leader` `(bool: false)` - Specifies whether the task is the leader task of
the task group. If set to true, when the leader task completes, all other
tasks within the task group will be gracefully shutdown.