template: apply splay value on change_mode script (#14749)

Previously, the splay timeout was only applied if a template re-render
caused a restart or a signal action. The `change_mode = "script"` was
running after the `if restart || len(signals) != 0` check, so it was
invoked at all times.

This change refactors the logic so it's easier to notice that new
`change_mode` options should start only after `splay` is applied.
This commit is contained in:
Luiz Aoqui
2022-09-30 12:04:22 -04:00
committed by GitHub
parent fb1f5ea2d9
commit 88b61cb5b4
3 changed files with 184 additions and 42 deletions

3
.changelog/14749.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:bug
template: Fixed a bug where the `splay` timeout was not being applied when `change_mode` was set to `script`.
```

View File

@@ -463,7 +463,12 @@ func (tm *TaskTemplateManager) onTemplateRendered(handledRenders map[string]time
handling = append(handling, id)
}
if restart || len(signals) != 0 {
shouldHandle := restart || len(signals) != 0 || len(scripts) != 0
if !shouldHandle {
return
}
// Apply splay timeout to avoid applying change_mode too frequently.
if splay != 0 {
ns := splay.Nanoseconds()
offset := rand.Int63n(ns)
@@ -485,7 +490,15 @@ func (tm *TaskTemplateManager) onTemplateRendered(handledRenders map[string]time
tm.config.Lifecycle.Restart(context.Background(),
structs.NewTaskEvent(structs.TaskRestartSignal).
SetDisplayMessage("Template with change_mode restart re-rendered"), false)
} else if len(signals) != 0 {
} else {
// Handle signals and scripts since the task may have multiple
// templates with mixed change_mode values.
tm.handleChangeModeSignal(signals)
tm.handleChangeModeScript(scripts)
}
}
func (tm *TaskTemplateManager) handleChangeModeSignal(signals map[string]struct{}) {
var mErr multierror.Error
for signal := range signals {
s := tm.signals[signal]
@@ -506,9 +519,9 @@ func (tm *TaskTemplateManager) onTemplateRendered(handledRenders map[string]time
SetFailsTask().
SetDisplayMessage(fmt.Sprintf("Template failed to send signals %v: %v", flat, err)))
}
}
}
}
func (tm *TaskTemplateManager) handleChangeModeScript(scripts []*structs.ChangeScript) {
// process script execution concurrently
var wg sync.WaitGroup
for _, script := range scripts {

View File

@@ -1275,7 +1275,7 @@ BAR={{key "bar"}}
// Update the keys in Consul
harness.consul.SetKV(t, key1, []byte(content1_2))
// Wait for restart
// Wait for script execution
timeout := time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second)
OUTER:
for {
@@ -1373,6 +1373,132 @@ BAR={{key "bar"}}
require.Contains(harness.mockHooks.KillEvent.DisplayMessage, "task is being killed")
}
func TestTaskTemplateManager_ChangeModeMixed(t *testing.T) {
ci.Parallel(t)
templateRestart := &structs.Template{
EmbeddedTmpl: `
RESTART={{key "restart"}}
COMMON={{key "common"}}
`,
DestPath: "restart",
ChangeMode: structs.TemplateChangeModeRestart,
}
templateSignal := &structs.Template{
EmbeddedTmpl: `
SIGNAL={{key "signal"}}
COMMON={{key "common"}}
`,
DestPath: "signal",
ChangeMode: structs.TemplateChangeModeSignal,
ChangeSignal: "SIGALRM",
}
templateScript := &structs.Template{
EmbeddedTmpl: `
SCRIPT={{key "script"}}
COMMON={{key "common"}}
`,
DestPath: "script",
ChangeMode: structs.TemplateChangeModeScript,
ChangeScript: &structs.ChangeScript{
Command: "/bin/foo",
Args: []string{},
Timeout: 5 * time.Second,
FailOnError: true,
},
}
templates := []*structs.Template{
templateRestart,
templateSignal,
templateScript,
}
me := mockExecutor{DesiredExit: 0, DesiredErr: nil}
harness := newTestHarness(t, templates, true, false)
harness.start(t)
harness.manager.SetDriverHandle(&me)
defer harness.stop()
// Ensure no unblock
select {
case <-harness.mockHooks.UnblockCh:
require.Fail(t, "Task unblock should not have been called")
case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second):
}
// Write the key to Consul
harness.consul.SetKV(t, "common", []byte(fmt.Sprintf("%v", time.Now())))
harness.consul.SetKV(t, "restart", []byte(fmt.Sprintf("%v", time.Now())))
harness.consul.SetKV(t, "signal", []byte(fmt.Sprintf("%v", time.Now())))
harness.consul.SetKV(t, "script", []byte(fmt.Sprintf("%v", time.Now())))
// Wait for the unblock
select {
case <-harness.mockHooks.UnblockCh:
case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second):
require.Fail(t, "Task unblock should have been called")
}
t.Run("restart takes precedence", func(t *testing.T) {
// Update the common Consul key.
harness.consul.SetKV(t, "common", []byte(fmt.Sprintf("%v", time.Now())))
// Collect some events.
timeout := time.After(time.Duration(3*testutil.TestMultiplier()) * time.Second)
events := []*structs.TaskEvent{}
OUTER:
for {
select {
case <-harness.mockHooks.RestartCh:
// Consume restarts so the channel is clean for other tests.
case <-harness.mockHooks.SignalCh:
require.Fail(t, "signal not expected")
case ev := <-harness.mockHooks.EmitEventCh:
events = append(events, ev)
case <-timeout:
break OUTER
}
}
for _, ev := range events {
require.NotContains(t, ev.DisplayMessage, templateScript.ChangeScript.Command)
require.NotContains(t, ev.Type, structs.TaskSignaling)
}
})
t.Run("signal and script", func(t *testing.T) {
// Update the signal and script Consul keys.
harness.consul.SetKV(t, "signal", []byte(fmt.Sprintf("%v", time.Now())))
harness.consul.SetKV(t, "script", []byte(fmt.Sprintf("%v", time.Now())))
// Wait for a events.
var gotSignal, gotScript bool
timeout := time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second)
for {
select {
case <-harness.mockHooks.RestartCh:
require.Fail(t, "restart not expected")
case ev := <-harness.mockHooks.EmitEventCh:
if strings.Contains(ev.DisplayMessage, templateScript.ChangeScript.Command) {
// Make sure we only run script once.
require.False(t, gotScript)
gotScript = true
}
case <-harness.mockHooks.SignalCh:
// Make sure we only signal once.
require.False(t, gotSignal)
gotSignal = true
case <-timeout:
require.Fail(t, "timeout waiting for script and signal")
}
if gotScript && gotSignal {
break
}
}
})
}
// TestTaskTemplateManager_FiltersProcessEnvVars asserts that we only render
// environment variables found in task env-vars and not read the nomad host
// process environment variables. nomad host process environment variables