Merge branch 'master' into f-grpc-executor

* master: (71 commits)
  Fix output of 'nomad deployment fail' with no arg
  Always create a running allocation when testing task state
  tests: ensure exec tests pass valid task resources (#4992)
  some changes for more idiomatic code
  fix iops related tests
  fixed bug in loop delay
  gofmt
  improved code for readability
  client: updateAlloc release lock after read
  fixup! device attributes in `nomad node status -verbose`
  drivers/exec: support device binds and mounts
  fix iops bug and increase test matrix coverage
  tests: tag image explicitly
  changelog
  ci: install lxc-templates explicitly
  tests: skip checking rdma cgroup
  ci: use Ubuntu 16.04 (Xenial) in TravisCI
  client: update driver info on new fingerprint
  drivers/docker: enforce volumes.enabled (#4983)
  client: Style: use fluent style for building loggers
  ...
This commit is contained in:
Nick Ethier
2018-12-13 14:41:09 -05:00
129 changed files with 3942 additions and 1094 deletions

View File

@@ -485,7 +485,6 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) {
r.CPU = agentConfig.Client.Reserved.CPU
r.MemoryMB = agentConfig.Client.Reserved.MemoryMB
r.DiskMB = agentConfig.Client.Reserved.DiskMB
r.IOPS = agentConfig.Client.Reserved.IOPS
res := conf.Node.ReservedResources
if res == nil {

View File

@@ -51,7 +51,6 @@ client {
cpu = 10
memory = 10
disk = 10
iops = 10
reserved_ports = "1,100,10-12"
}
client_min_port = 1000

View File

@@ -15,7 +15,6 @@ import (
"time"
"github.com/hashicorp/go-sockaddr/template"
client "github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad"
@@ -573,7 +572,6 @@ type Resources struct {
CPU int `mapstructure:"cpu"`
MemoryMB int `mapstructure:"memory"`
DiskMB int `mapstructure:"disk"`
IOPS int `mapstructure:"iops"`
ReservedPorts string `mapstructure:"reserved_ports"`
}
@@ -1394,9 +1392,6 @@ func (r *Resources) Merge(b *Resources) *Resources {
if b.DiskMB != 0 {
result.DiskMB = b.DiskMB
}
if b.IOPS != 0 {
result.IOPS = b.IOPS
}
if b.ReservedPorts != "" {
result.ReservedPorts = b.ReservedPorts
}

View File

@@ -493,7 +493,6 @@ func parseReserved(result **Resources, list *ast.ObjectList) error {
"cpu",
"memory",
"disk",
"iops",
"reserved_ports",
}
if err := helper.CheckHCLKeys(listVal, valid); err != nil {

View File

@@ -76,7 +76,6 @@ func TestConfig_Parse(t *testing.T) {
CPU: 10,
MemoryMB: 10,
DiskMB: 10,
IOPS: 10,
ReservedPorts: "1,100,10-12",
},
GCInterval: 6 * time.Second,

View File

@@ -96,7 +96,6 @@ func TestConfig_Merge(t *testing.T) {
CPU: 10,
MemoryMB: 10,
DiskMB: 10,
IOPS: 10,
ReservedPorts: "1,10-30,55",
},
},
@@ -254,7 +253,6 @@ func TestConfig_Merge(t *testing.T) {
CPU: 15,
MemoryMB: 15,
DiskMB: 15,
IOPS: 15,
ReservedPorts: "2,10-30,55",
},
GCInterval: 6 * time.Second,

View File

@@ -18,6 +18,54 @@ type heartbeater interface {
UpdateTTL(id, output, status string) error
}
// contextExec allows canceling a ScriptExecutor with a context.
type contextExec struct {
// pctx is the parent context. A subcontext will be created with Exec's
// timeout.
pctx context.Context
// exec to be wrapped in a context
exec interfaces.ScriptExecutor
}
func newContextExec(ctx context.Context, exec interfaces.ScriptExecutor) *contextExec {
return &contextExec{
pctx: ctx,
exec: exec,
}
}
type execResult struct {
buf []byte
code int
err error
}
// Exec a command until the timeout expires, the context is canceled, or the
// underlying Exec returns.
func (c *contextExec) Exec(timeout time.Duration, cmd string, args []string) ([]byte, int, error) {
resCh := make(chan execResult, 1)
// Don't trust the underlying implementation to obey timeout
ctx, cancel := context.WithTimeout(c.pctx, timeout)
defer cancel()
go func() {
output, code, err := c.exec.Exec(timeout, cmd, args)
select {
case resCh <- execResult{output, code, err}:
case <-ctx.Done():
}
}()
select {
case res := <-resCh:
return res.buf, res.code, res.err
case <-ctx.Done():
return nil, 0, ctx.Err()
}
}
// scriptHandle is returned by scriptCheck.run by cancelling a scriptCheck and
// waiting for it to shutdown.
type scriptHandle struct {
@@ -74,6 +122,11 @@ func newScriptCheck(allocID, taskName, checkID string, check *structs.ServiceChe
func (s *scriptCheck) run() *scriptHandle {
ctx, cancel := context.WithCancel(context.Background())
exitCh := make(chan struct{})
// Wrap the original ScriptExecutor in one that obeys context
// cancelation.
ctxExec := newContextExec(ctx, s.exec)
go func() {
defer close(exitCh)
timer := time.NewTimer(0)
@@ -93,11 +146,10 @@ func (s *scriptCheck) run() *scriptHandle {
metrics.IncrCounter([]string{"client", "consul", "script_runs"}, 1)
// Execute check script with timeout
output, code, err := s.exec.Exec(s.check.Timeout, s.check.Command, s.check.Args)
output, code, err := ctxExec.Exec(s.check.Timeout, s.check.Command, s.check.Args)
switch err {
case context.Canceled:
// check removed during execution; exit
cancel()
return
case context.DeadlineExceeded:
metrics.IncrCounter([]string{"client", "consul", "script_timeouts"}, 1)
@@ -112,9 +164,6 @@ func (s *scriptCheck) run() *scriptHandle {
s.logger.Warn("check timed out", "timeout", s.check.Timeout)
}
// cleanup context
cancel()
state := api.HealthCritical
switch code {
case 0:

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"os"
"os/exec"
"sync/atomic"
"testing"
"time"
@@ -23,20 +24,34 @@ func TestMain(m *testing.M) {
// blockingScriptExec implements ScriptExec by running a subcommand that never
// exits.
type blockingScriptExec struct {
// pctx is canceled *only* for test cleanup. Just like real
// ScriptExecutors its Exec method cannot be canceled directly -- only
// with a timeout.
pctx context.Context
// running is ticked before blocking to allow synchronizing operations
running chan struct{}
// set to true if Exec is called and has exited
exited bool
// set to 1 with atomics if Exec is called and has exited
exited int32
}
func newBlockingScriptExec() *blockingScriptExec {
return &blockingScriptExec{running: make(chan struct{})}
// newBlockingScriptExec returns a ScriptExecutor that blocks Exec() until the
// caller recvs on the b.running chan. It also returns a CancelFunc for test
// cleanup only. The runtime cannot cancel ScriptExecutors before their timeout
// expires.
func newBlockingScriptExec() (*blockingScriptExec, context.CancelFunc) {
ctx, cancel := context.WithCancel(context.Background())
exec := &blockingScriptExec{
pctx: ctx,
running: make(chan struct{}),
}
return exec, cancel
}
func (b *blockingScriptExec) Exec(dur time.Duration, _ string, _ []string) ([]byte, int, error) {
b.running <- struct{}{}
ctx, cancel := context.WithTimeout(context.Background(), dur)
ctx, cancel := context.WithTimeout(b.pctx, dur)
defer cancel()
cmd := exec.CommandContext(ctx, testtask.Path(), "sleep", "9000h")
testtask.SetCmdEnv(cmd)
@@ -47,23 +62,20 @@ func (b *blockingScriptExec) Exec(dur time.Duration, _ string, _ []string) ([]by
code = 1
}
}
b.exited = true
atomic.StoreInt32(&b.exited, 1)
return []byte{}, code, err
}
// TestConsulScript_Exec_Cancel asserts cancelling a script check shortcircuits
// any running scripts.
func TestConsulScript_Exec_Cancel(t *testing.T) {
// FIXME: This test is failing now as check process cancellation
// doesn't get propogated to the script check causing timeouts
t.Skip("FIXME: unexpected failing test")
serviceCheck := structs.ServiceCheck{
Name: "sleeper",
Interval: time.Hour,
Timeout: time.Hour,
}
exec := newBlockingScriptExec()
exec, cancel := newBlockingScriptExec()
defer cancel()
// pass nil for heartbeater as it shouldn't be called
check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, nil, testlog.HCLogger(t), nil)
@@ -80,8 +92,11 @@ func TestConsulScript_Exec_Cancel(t *testing.T) {
case <-time.After(3 * time.Second):
t.Fatalf("timed out waiting for script check to exit")
}
if !exec.exited {
t.Errorf("expected script executor to run and exit but it has not")
// The underlying ScriptExecutor (newBlockScriptExec) *cannot* be
// canceled. Only a wrapper around it obeys the context cancelation.
if atomic.LoadInt32(&exec.exited) == 1 {
t.Errorf("expected script executor to still be running after timeout")
}
}
@@ -106,16 +121,19 @@ func newFakeHeartbeater() *fakeHeartbeater {
return &fakeHeartbeater{updates: make(chan execStatus)}
}
// TestConsulScript_Exec_Timeout asserts a script will be killed when the
// TestConsulScript_Exec_TimeoutBasic asserts a script will be killed when the
// timeout is reached.
func TestConsulScript_Exec_Timeout(t *testing.T) {
t.Parallel() // run the slow tests in parallel
func TestConsulScript_Exec_TimeoutBasic(t *testing.T) {
t.Parallel()
serviceCheck := structs.ServiceCheck{
Name: "sleeper",
Interval: time.Hour,
Timeout: time.Second,
}
exec := newBlockingScriptExec()
exec, cancel := newBlockingScriptExec()
defer cancel()
hb := newFakeHeartbeater()
check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.HCLogger(t), nil)
@@ -132,8 +150,11 @@ func TestConsulScript_Exec_Timeout(t *testing.T) {
case <-time.After(3 * time.Second):
t.Fatalf("timed out waiting for script check to exit")
}
if !exec.exited {
t.Errorf("expected script executor to run and exit but it has not")
// The underlying ScriptExecutor (newBlockScriptExec) *cannot* be
// canceled. Only a wrapper around it obeys the context cancelation.
if atomic.LoadInt32(&exec.exited) == 1 {
t.Errorf("expected script executor to still be running after timeout")
}
// Cancel and watch for exit
@@ -160,11 +181,8 @@ func (sleeperExec) Exec(time.Duration, string, []string) ([]byte, int, error) {
// the timeout is reached and always set a critical status regardless of what
// Exec returns.
func TestConsulScript_Exec_TimeoutCritical(t *testing.T) {
// FIXME: This test is failing now because we no longer mark critical
// if check succeeded
t.Skip("FIXME: unexpected failing test")
t.Parallel()
t.Parallel() // run the slow tests in parallel
serviceCheck := structs.ServiceCheck{
Name: "sleeper",
Interval: time.Hour,

View File

@@ -777,10 +777,6 @@ func TestConsul_RegServices(t *testing.T) {
// TestConsul_ShutdownOK tests the ok path for the shutdown logic in
// ServiceClient.
func TestConsul_ShutdownOK(t *testing.T) {
// FIXME: This test is failing now because checks are called once only
// not sure what changed
t.Skip("FIXME: unexpected failing test")
require := require.New(t)
ctx := setupFake(t)
@@ -832,7 +828,7 @@ func TestConsul_ShutdownOK(t *testing.T) {
t.Fatalf("expected 1 checkTTL entry but found: %d", n)
}
for _, v := range ctx.FakeConsul.checkTTLs {
require.Equalf(2, v, "expected 2 updates but foud %d", v)
require.Equalf(2, v, "expected 2 updates but found %d", v)
}
for _, v := range ctx.FakeConsul.checks {
if v.Status != "passing" {

View File

@@ -853,7 +853,11 @@ func ApiResourcesToStructs(in *api.Resources) *structs.Resources {
out := &structs.Resources{
CPU: *in.CPU,
MemoryMB: *in.MemoryMB,
IOPS: *in.IOPS,
}
// COMPAT(0.10): Only being used to issue warnings
if in.IOPS != nil {
out.IOPS = *in.IOPS
}
if l := len(in.Networks); l != 0 {

View File

@@ -240,8 +240,19 @@ func (a *TestAgent) Shutdown() error {
}()
// shutdown agent before endpoints
a.Server.Shutdown()
return a.Agent.Shutdown()
ch := make(chan error, 1)
go func() {
defer close(ch)
a.Server.Shutdown()
ch <- a.Agent.Shutdown()
}()
select {
case err := <-ch:
return err
case <-time.After(1 * time.Minute):
return fmt.Errorf("timed out while shutting down test agent")
}
}
func (a *TestAgent) HTTPAddr() string {

View File

@@ -9,7 +9,6 @@ import (
"time"
humanize "github.com/dustin/go-humanize"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/api/contexts"
"github.com/hashicorp/nomad/client/allocrunner/taskrunner/restarts"
@@ -490,7 +489,7 @@ func (c *AllocStatusCommand) outputTaskResources(alloc *api.Allocation, task str
}
var resourcesOutput []string
resourcesOutput = append(resourcesOutput, "CPU|Memory|Disk|IOPS|Addresses")
resourcesOutput = append(resourcesOutput, "CPU|Memory|Disk|Addresses")
firstAddr := ""
if len(addr) > 0 {
firstAddr = addr[0]
@@ -512,11 +511,10 @@ func (c *AllocStatusCommand) outputTaskResources(alloc *api.Allocation, task str
deviceStats = ru.ResourceUsage.DeviceStats
}
}
resourcesOutput = append(resourcesOutput, fmt.Sprintf("%v MHz|%v|%v|%v|%v",
resourcesOutput = append(resourcesOutput, fmt.Sprintf("%v MHz|%v|%v|%v",
cpuUsage,
memUsage,
humanize.IBytes(uint64(*alloc.Resources.DiskMB*bytesPerMegabyte)),
*resource.IOPS,
firstAddr))
for i := 1; i < len(addr); i++ {
resourcesOutput = append(resourcesOutput, fmt.Sprintf("||||%v", addr[i]))

View File

@@ -79,10 +79,10 @@ func (c *DeploymentFailCommand) Run(args []string) int {
return 1
}
// Check that we got no arguments
// Check that we got one argument
args = flags.Args()
if l := len(args); l != 1 {
c.Ui.Error("This command takes no arguments")
c.Ui.Error("This command takes one argument: <deployment id>")
c.Ui.Error(commandErrorText(c))
return 1
}

View File

@@ -109,3 +109,15 @@ func printDeviceStats(ui cli.Ui, deviceGroupStats []*api.DeviceGroupStats) {
}
}
}
func getDeviceAttributes(d *api.NodeDeviceResource) []string {
attrs := []string{fmt.Sprintf("Device Group|%s", d.ID())}
for k, v := range d.Attributes {
attrs = append(attrs, k+"|"+v.String())
}
sort.Strings(attrs[1:])
return attrs
}

View File

@@ -247,3 +247,29 @@ func TestNodeStatusCommand_GetDeviceResources(t *testing.T) {
assert.Equal(t, expected, formattedDevices)
}
func TestGetDeviceAttributes(t *testing.T) {
d := &api.NodeDeviceResource{
Vendor: "Vendor",
Type: "Type",
Name: "Name",
Attributes: map[string]*api.Attribute{
"utilization": {
FloatVal: helper.Float64ToPtr(0.78),
Unit: "%",
},
"filesystem": {
StringVal: helper.StringToPtr("ext4"),
},
},
}
formattedDevices := getDeviceAttributes(d)
expected := []string{
"Device Group|Vendor/Type/Name",
"filesystem|ext4",
"utilization|0.78 %",
}
assert.Equal(t, expected, formattedDevices)
}

View File

@@ -9,11 +9,10 @@ import (
"time"
humanize "github.com/dustin/go-humanize"
"github.com/posener/complete"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/api/contexts"
"github.com/hashicorp/nomad/helper"
"github.com/posener/complete"
)
const (
@@ -419,6 +418,7 @@ func (c *NodeStatusCommand) formatNode(client *api.Client, node *api.Node) int {
if c.verbose {
c.formatAttributes(node)
c.formatDeviceAttributes(node)
c.formatMeta(node)
}
return 0
@@ -529,6 +529,32 @@ func (c *NodeStatusCommand) formatAttributes(node *api.Node) {
c.Ui.Output(formatKV(attributes))
}
func (c *NodeStatusCommand) formatDeviceAttributes(node *api.Node) {
devices := node.NodeResources.Devices
if len(devices) == 0 {
return
}
sort.Slice(devices, func(i, j int) bool {
return devices[i].ID() < devices[j].ID()
})
first := true
for _, d := range devices {
if len(d.Attributes) == 0 {
continue
}
if first {
c.Ui.Output("\nDevice Group Attributes")
first = false
} else {
c.Ui.Output("")
}
c.Ui.Output(formatKV(getDeviceAttributes(d)))
}
}
func (c *NodeStatusCommand) formatMeta(node *api.Node) {
// Print the meta
keys := make([]string, 0, len(node.Meta))
@@ -611,25 +637,22 @@ func getAllocatedResources(client *api.Client, runningAllocs []*api.Allocation,
total := computeNodeTotalResources(node)
// Get Resources
var cpu, mem, disk, iops int
var cpu, mem, disk int
for _, alloc := range runningAllocs {
cpu += *alloc.Resources.CPU
mem += *alloc.Resources.MemoryMB
disk += *alloc.Resources.DiskMB
iops += *alloc.Resources.IOPS
}
resources := make([]string, 2)
resources[0] = "CPU|Memory|Disk|IOPS"
resources[1] = fmt.Sprintf("%d/%d MHz|%s/%s|%s/%s|%d/%d",
resources[0] = "CPU|Memory|Disk"
resources[1] = fmt.Sprintf("%d/%d MHz|%s/%s|%s/%s",
cpu,
*total.CPU,
humanize.IBytes(uint64(mem*bytesPerMegabyte)),
humanize.IBytes(uint64(*total.MemoryMB*bytesPerMegabyte)),
humanize.IBytes(uint64(disk*bytesPerMegabyte)),
humanize.IBytes(uint64(*total.DiskMB*bytesPerMegabyte)),
iops,
*total.IOPS)
humanize.IBytes(uint64(*total.DiskMB*bytesPerMegabyte)))
return resources
}
@@ -647,7 +670,6 @@ func computeNodeTotalResources(node *api.Node) api.Resources {
total.CPU = helper.IntToPtr(*r.CPU - *res.CPU)
total.MemoryMB = helper.IntToPtr(*r.MemoryMB - *res.MemoryMB)
total.DiskMB = helper.IntToPtr(*r.DiskMB - *res.DiskMB)
total.IOPS = helper.IntToPtr(*r.IOPS - *res.IOPS)
return total
}