Merge branch 'master' into f-grpc-executor

* master: (71 commits) Fix output of 'nomad deployment fail' with no arg Always create a running allocation when testing task state tests: ensure exec tests pass valid task resources (#4992) some changes for more idiomatic code fix iops related tests fixed bug in loop delay gofmt improved code for readability client: updateAlloc release lock after read fixup! device attributes in `nomad node status -verbose` drivers/exec: support device binds and mounts fix iops bug and increase test matrix coverage tests: tag image explicitly changelog ci: install lxc-templates explicitly tests: skip checking rdma cgroup ci: use Ubuntu 16.04 (Xenial) in TravisCI client: update driver info on new fingerprint drivers/docker: enforce volumes.enabled (#4983) client: Style: use fluent style for building loggers ...
2026-01-07 19:05:42 +03:00 · 2018-12-13 14:41:09 -05:00
parent 4243b7d5f3 0826042a7d
commit 8a344412e8
129 changed files with 3942 additions and 1094 deletions
--- a/command/agent/agent.go
+++ b/command/agent/agent.go
@@ -485,7 +485,6 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) {
 	r.CPU = agentConfig.Client.Reserved.CPU
 	r.MemoryMB = agentConfig.Client.Reserved.MemoryMB
 	r.DiskMB = agentConfig.Client.Reserved.DiskMB
-	r.IOPS = agentConfig.Client.Reserved.IOPS

 	res := conf.Node.ReservedResources
 	if res == nil {
--- a/command/agent/config-test-fixtures/basic.hcl
+++ b/command/agent/config-test-fixtures/basic.hcl
@@ -51,7 +51,6 @@ client {
 		cpu = 10
 		memory = 10
 		disk = 10
-		iops = 10
 		reserved_ports = "1,100,10-12"
 	}
 	client_min_port = 1000
--- a/command/agent/config.go
+++ b/command/agent/config.go
@@ -15,7 +15,6 @@ import (
 	"time"

 	"github.com/hashicorp/go-sockaddr/template"
-
 	client "github.com/hashicorp/nomad/client/config"
 	"github.com/hashicorp/nomad/helper"
 	"github.com/hashicorp/nomad/nomad"
@@ -573,7 +572,6 @@ type Resources struct {
 	CPU           int    `mapstructure:"cpu"`
 	MemoryMB      int    `mapstructure:"memory"`
 	DiskMB        int    `mapstructure:"disk"`
-	IOPS          int    `mapstructure:"iops"`
 	ReservedPorts string `mapstructure:"reserved_ports"`
 }

@@ -1394,9 +1392,6 @@ func (r *Resources) Merge(b *Resources) *Resources {
 	if b.DiskMB != 0 {
 		result.DiskMB = b.DiskMB
 	}
-	if b.IOPS != 0 {
-		result.IOPS = b.IOPS
-	}
 	if b.ReservedPorts != "" {
 		result.ReservedPorts = b.ReservedPorts
 	}
--- a/command/agent/config_parse.go
+++ b/command/agent/config_parse.go
@@ -493,7 +493,6 @@ func parseReserved(result **Resources, list *ast.ObjectList) error {
 		"cpu",
 		"memory",
 		"disk",
-		"iops",
 		"reserved_ports",
 	}
 	if err := helper.CheckHCLKeys(listVal, valid); err != nil {
--- a/command/agent/config_parse_test.go
+++ b/command/agent/config_parse_test.go
@@ -76,7 +76,6 @@ func TestConfig_Parse(t *testing.T) {
 						CPU:           10,
 						MemoryMB:      10,
 						DiskMB:        10,
-						IOPS:          10,
 						ReservedPorts: "1,100,10-12",
 					},
 					GCInterval:            6 * time.Second,
--- a/command/agent/config_test.go
+++ b/command/agent/config_test.go
@@ -96,7 +96,6 @@ func TestConfig_Merge(t *testing.T) {
 				CPU:           10,
 				MemoryMB:      10,
 				DiskMB:        10,
-				IOPS:          10,
 				ReservedPorts: "1,10-30,55",
 			},
 		},
@@ -254,7 +253,6 @@ func TestConfig_Merge(t *testing.T) {
 				CPU:           15,
 				MemoryMB:      15,
 				DiskMB:        15,
-				IOPS:          15,
 				ReservedPorts: "2,10-30,55",
 			},
 			GCInterval:            6 * time.Second,
--- a/command/agent/consul/script.go
+++ b/command/agent/consul/script.go
@@ -18,6 +18,54 @@ type heartbeater interface {
 	UpdateTTL(id, output, status string) error
 }

+// contextExec allows canceling a ScriptExecutor with a context.
+type contextExec struct {
+	// pctx is the parent context. A subcontext will be created with Exec's
+	// timeout.
+	pctx context.Context
+
+	// exec to be wrapped in a context
+	exec interfaces.ScriptExecutor
+}
+
+func newContextExec(ctx context.Context, exec interfaces.ScriptExecutor) *contextExec {
+	return &contextExec{
+		pctx: ctx,
+		exec: exec,
+	}
+}
+
+type execResult struct {
+	buf  []byte
+	code int
+	err  error
+}
+
+// Exec a command until the timeout expires, the context is canceled, or the
+// underlying Exec returns.
+func (c *contextExec) Exec(timeout time.Duration, cmd string, args []string) ([]byte, int, error) {
+	resCh := make(chan execResult, 1)
+
+	// Don't trust the underlying implementation to obey timeout
+	ctx, cancel := context.WithTimeout(c.pctx, timeout)
+	defer cancel()
+
+	go func() {
+		output, code, err := c.exec.Exec(timeout, cmd, args)
+		select {
+		case resCh <- execResult{output, code, err}:
+		case <-ctx.Done():
+		}
+	}()
+
+	select {
+	case res := <-resCh:
+		return res.buf, res.code, res.err
+	case <-ctx.Done():
+		return nil, 0, ctx.Err()
+	}
+}
+
 // scriptHandle is returned by scriptCheck.run by cancelling a scriptCheck and
 // waiting for it to shutdown.
 type scriptHandle struct {
@@ -74,6 +122,11 @@ func newScriptCheck(allocID, taskName, checkID string, check *structs.ServiceChe
 func (s *scriptCheck) run() *scriptHandle {
 	ctx, cancel := context.WithCancel(context.Background())
 	exitCh := make(chan struct{})
+
+	// Wrap the original ScriptExecutor in one that obeys context
+	// cancelation.
+	ctxExec := newContextExec(ctx, s.exec)
+
 	go func() {
 		defer close(exitCh)
 		timer := time.NewTimer(0)
@@ -93,11 +146,10 @@ func (s *scriptCheck) run() *scriptHandle {
 			metrics.IncrCounter([]string{"client", "consul", "script_runs"}, 1)

 			// Execute check script with timeout
-			output, code, err := s.exec.Exec(s.check.Timeout, s.check.Command, s.check.Args)
+			output, code, err := ctxExec.Exec(s.check.Timeout, s.check.Command, s.check.Args)
 			switch err {
 			case context.Canceled:
 				// check removed during execution; exit
-				cancel()
 				return
 			case context.DeadlineExceeded:
 				metrics.IncrCounter([]string{"client", "consul", "script_timeouts"}, 1)
@@ -112,9 +164,6 @@ func (s *scriptCheck) run() *scriptHandle {
 				s.logger.Warn("check timed out", "timeout", s.check.Timeout)
 			}

-			// cleanup context
-			cancel()
-
 			state := api.HealthCritical
 			switch code {
 			case 0:
--- a/command/agent/consul/script_test.go
+++ b/command/agent/consul/script_test.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"os"
 	"os/exec"
+	"sync/atomic"
 	"testing"
 	"time"

@@ -23,20 +24,34 @@ func TestMain(m *testing.M) {
 // blockingScriptExec implements ScriptExec by running a subcommand that never
 // exits.
 type blockingScriptExec struct {
+	// pctx is canceled *only* for test cleanup. Just like real
+	// ScriptExecutors its Exec method cannot be canceled directly -- only
+	// with a timeout.
+	pctx context.Context
+
 	// running is ticked before blocking to allow synchronizing operations
 	running chan struct{}

-	// set to true if Exec is called and has exited
-	exited bool
+	// set to 1 with atomics if Exec is called and has exited
+	exited int32
 }

-func newBlockingScriptExec() *blockingScriptExec {
-	return &blockingScriptExec{running: make(chan struct{})}
+// newBlockingScriptExec returns a ScriptExecutor that blocks Exec() until the
+// caller recvs on the b.running chan. It also returns a CancelFunc for test
+// cleanup only. The runtime cannot cancel ScriptExecutors before their timeout
+// expires.
+func newBlockingScriptExec() (*blockingScriptExec, context.CancelFunc) {
+	ctx, cancel := context.WithCancel(context.Background())
+	exec := &blockingScriptExec{
+		pctx:    ctx,
+		running: make(chan struct{}),
+	}
+	return exec, cancel
 }

 func (b *blockingScriptExec) Exec(dur time.Duration, _ string, _ []string) ([]byte, int, error) {
 	b.running <- struct{}{}
-	ctx, cancel := context.WithTimeout(context.Background(), dur)
+	ctx, cancel := context.WithTimeout(b.pctx, dur)
 	defer cancel()
 	cmd := exec.CommandContext(ctx, testtask.Path(), "sleep", "9000h")
 	testtask.SetCmdEnv(cmd)
@@ -47,23 +62,20 @@ func (b *blockingScriptExec) Exec(dur time.Duration, _ string, _ []string) ([]by
 			code = 1
 		}
 	}
-	b.exited = true
+	atomic.StoreInt32(&b.exited, 1)
 	return []byte{}, code, err
 }

 // TestConsulScript_Exec_Cancel asserts cancelling a script check shortcircuits
 // any running scripts.
 func TestConsulScript_Exec_Cancel(t *testing.T) {
-	// FIXME: This test is failing now as check process cancellation
-	// doesn't get propogated to the script check causing timeouts
-	t.Skip("FIXME: unexpected failing test")
-
 	serviceCheck := structs.ServiceCheck{
 		Name:     "sleeper",
 		Interval: time.Hour,
 		Timeout:  time.Hour,
 	}
-	exec := newBlockingScriptExec()
+	exec, cancel := newBlockingScriptExec()
+	defer cancel()

 	// pass nil for heartbeater as it shouldn't be called
 	check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, nil, testlog.HCLogger(t), nil)
@@ -80,8 +92,11 @@ func TestConsulScript_Exec_Cancel(t *testing.T) {
 	case <-time.After(3 * time.Second):
 		t.Fatalf("timed out waiting for script check to exit")
 	}
-	if !exec.exited {
-		t.Errorf("expected script executor to run and exit but it has not")
+
+	// The underlying ScriptExecutor (newBlockScriptExec) *cannot* be
+	// canceled. Only a wrapper around it obeys the context cancelation.
+	if atomic.LoadInt32(&exec.exited) == 1 {
+		t.Errorf("expected script executor to still be running after timeout")
 	}
 }

@@ -106,16 +121,19 @@ func newFakeHeartbeater() *fakeHeartbeater {
 	return &fakeHeartbeater{updates: make(chan execStatus)}
 }

-// TestConsulScript_Exec_Timeout asserts a script will be killed when the
+// TestConsulScript_Exec_TimeoutBasic asserts a script will be killed when the
 // timeout is reached.
-func TestConsulScript_Exec_Timeout(t *testing.T) {
-	t.Parallel() // run the slow tests in parallel
+func TestConsulScript_Exec_TimeoutBasic(t *testing.T) {
+	t.Parallel()
+
 	serviceCheck := structs.ServiceCheck{
 		Name:     "sleeper",
 		Interval: time.Hour,
 		Timeout:  time.Second,
 	}
-	exec := newBlockingScriptExec()
+
+	exec, cancel := newBlockingScriptExec()
+	defer cancel()

 	hb := newFakeHeartbeater()
 	check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.HCLogger(t), nil)
@@ -132,8 +150,11 @@ func TestConsulScript_Exec_Timeout(t *testing.T) {
 	case <-time.After(3 * time.Second):
 		t.Fatalf("timed out waiting for script check to exit")
 	}
-	if !exec.exited {
-		t.Errorf("expected script executor to run and exit but it has not")
+
+	// The underlying ScriptExecutor (newBlockScriptExec) *cannot* be
+	// canceled. Only a wrapper around it obeys the context cancelation.
+	if atomic.LoadInt32(&exec.exited) == 1 {
+		t.Errorf("expected script executor to still be running after timeout")
 	}

 	// Cancel and watch for exit
@@ -160,11 +181,8 @@ func (sleeperExec) Exec(time.Duration, string, []string) ([]byte, int, error) {
 // the timeout is reached and always set a critical status regardless of what
 // Exec returns.
 func TestConsulScript_Exec_TimeoutCritical(t *testing.T) {
-	// FIXME: This test is failing now because we no longer mark critical
-	// if check succeeded
-	t.Skip("FIXME: unexpected failing test")
+	t.Parallel()

-	t.Parallel() // run the slow tests in parallel
 	serviceCheck := structs.ServiceCheck{
 		Name:     "sleeper",
 		Interval: time.Hour,
--- a/command/agent/consul/unit_test.go
+++ b/command/agent/consul/unit_test.go
@@ -777,10 +777,6 @@ func TestConsul_RegServices(t *testing.T) {
 // TestConsul_ShutdownOK tests the ok path for the shutdown logic in
 // ServiceClient.
 func TestConsul_ShutdownOK(t *testing.T) {
-	// FIXME: This test is failing now because checks are called once only
-	// not sure what changed
-	t.Skip("FIXME: unexpected failing test")
-
 	require := require.New(t)
 	ctx := setupFake(t)

@@ -832,7 +828,7 @@ func TestConsul_ShutdownOK(t *testing.T) {
 		t.Fatalf("expected 1 checkTTL entry but found: %d", n)
 	}
 	for _, v := range ctx.FakeConsul.checkTTLs {
-		require.Equalf(2, v, "expected 2 updates but foud %d", v)
+		require.Equalf(2, v, "expected 2 updates but found %d", v)
 	}
 	for _, v := range ctx.FakeConsul.checks {
 		if v.Status != "passing" {
--- a/command/agent/job_endpoint.go
+++ b/command/agent/job_endpoint.go
@@ -853,7 +853,11 @@ func ApiResourcesToStructs(in *api.Resources) *structs.Resources {
 	out := &structs.Resources{
 		CPU:      *in.CPU,
 		MemoryMB: *in.MemoryMB,
-		IOPS:     *in.IOPS,
+	}
+
+	// COMPAT(0.10): Only being used to issue warnings
+	if in.IOPS != nil {
+		out.IOPS = *in.IOPS
 	}

 	if l := len(in.Networks); l != 0 {
--- a/command/agent/testagent.go
+++ b/command/agent/testagent.go
@@ -240,8 +240,19 @@ func (a *TestAgent) Shutdown() error {
 	}()

 	// shutdown agent before endpoints
-	a.Server.Shutdown()
-	return a.Agent.Shutdown()
+	ch := make(chan error, 1)
+	go func() {
+		defer close(ch)
+		a.Server.Shutdown()
+		ch <- a.Agent.Shutdown()
+	}()
+
+	select {
+	case err := <-ch:
+		return err
+	case <-time.After(1 * time.Minute):
+		return fmt.Errorf("timed out while shutting down test agent")
+	}
 }

 func (a *TestAgent) HTTPAddr() string {
--- a/command/alloc_status.go
+++ b/command/alloc_status.go
@@ -9,7 +9,6 @@ import (
 	"time"

 	humanize "github.com/dustin/go-humanize"
-
 	"github.com/hashicorp/nomad/api"
 	"github.com/hashicorp/nomad/api/contexts"
 	"github.com/hashicorp/nomad/client/allocrunner/taskrunner/restarts"
@@ -490,7 +489,7 @@ func (c *AllocStatusCommand) outputTaskResources(alloc *api.Allocation, task str
 	}

 	var resourcesOutput []string
-	resourcesOutput = append(resourcesOutput, "CPU|Memory|Disk|IOPS|Addresses")
+	resourcesOutput = append(resourcesOutput, "CPU|Memory|Disk|Addresses")
 	firstAddr := ""
 	if len(addr) > 0 {
 		firstAddr = addr[0]
@@ -512,11 +511,10 @@ func (c *AllocStatusCommand) outputTaskResources(alloc *api.Allocation, task str
 			deviceStats = ru.ResourceUsage.DeviceStats
 		}
 	}
-	resourcesOutput = append(resourcesOutput, fmt.Sprintf("%v MHz|%v|%v|%v|%v",
+	resourcesOutput = append(resourcesOutput, fmt.Sprintf("%v MHz|%v|%v|%v",
 		cpuUsage,
 		memUsage,
 		humanize.IBytes(uint64(*alloc.Resources.DiskMB*bytesPerMegabyte)),
-		*resource.IOPS,
 		firstAddr))
 	for i := 1; i < len(addr); i++ {
 		resourcesOutput = append(resourcesOutput, fmt.Sprintf("||||%v", addr[i]))
--- a/command/deployment_fail.go
+++ b/command/deployment_fail.go
@@ -79,10 +79,10 @@ func (c *DeploymentFailCommand) Run(args []string) int {
 		return 1
 	}

-	// Check that we got no arguments
+	// Check that we got one argument
 	args = flags.Args()
 	if l := len(args); l != 1 {
-		c.Ui.Error("This command takes no arguments")
+		c.Ui.Error("This command takes one argument: <deployment id>")
 		c.Ui.Error(commandErrorText(c))
 		return 1
 	}
--- a/command/helper_devices.go
+++ b/command/helper_devices.go
@@ -109,3 +109,15 @@ func printDeviceStats(ui cli.Ui, deviceGroupStats []*api.DeviceGroupStats) {
 		}
 	}
 }
+
+func getDeviceAttributes(d *api.NodeDeviceResource) []string {
+	attrs := []string{fmt.Sprintf("Device Group|%s", d.ID())}
+
+	for k, v := range d.Attributes {
+		attrs = append(attrs, k+"|"+v.String())
+	}
+
+	sort.Strings(attrs[1:])
+
+	return attrs
+}
--- a/command/helper_devices_test.go
+++ b/command/helper_devices_test.go
@@ -247,3 +247,29 @@ func TestNodeStatusCommand_GetDeviceResources(t *testing.T) {

 	assert.Equal(t, expected, formattedDevices)
 }
+func TestGetDeviceAttributes(t *testing.T) {
+	d := &api.NodeDeviceResource{
+		Vendor: "Vendor",
+		Type:   "Type",
+		Name:   "Name",
+
+		Attributes: map[string]*api.Attribute{
+			"utilization": {
+				FloatVal: helper.Float64ToPtr(0.78),
+				Unit:     "%",
+			},
+			"filesystem": {
+				StringVal: helper.StringToPtr("ext4"),
+			},
+		},
+	}
+
+	formattedDevices := getDeviceAttributes(d)
+	expected := []string{
+		"Device Group|Vendor/Type/Name",
+		"filesystem|ext4",
+		"utilization|0.78 %",
+	}
+
+	assert.Equal(t, expected, formattedDevices)
+}
--- a/command/node_status.go
+++ b/command/node_status.go
@@ -9,11 +9,10 @@ import (
 	"time"

 	humanize "github.com/dustin/go-humanize"
-	"github.com/posener/complete"
-
 	"github.com/hashicorp/nomad/api"
 	"github.com/hashicorp/nomad/api/contexts"
 	"github.com/hashicorp/nomad/helper"
+	"github.com/posener/complete"
 )

 const (
@@ -419,6 +418,7 @@ func (c *NodeStatusCommand) formatNode(client *api.Client, node *api.Node) int {

 	if c.verbose {
 		c.formatAttributes(node)
+		c.formatDeviceAttributes(node)
 		c.formatMeta(node)
 	}
 	return 0
@@ -529,6 +529,32 @@ func (c *NodeStatusCommand) formatAttributes(node *api.Node) {
 	c.Ui.Output(formatKV(attributes))
 }

+func (c *NodeStatusCommand) formatDeviceAttributes(node *api.Node) {
+	devices := node.NodeResources.Devices
+	if len(devices) == 0 {
+		return
+	}
+
+	sort.Slice(devices, func(i, j int) bool {
+		return devices[i].ID() < devices[j].ID()
+	})
+
+	first := true
+	for _, d := range devices {
+		if len(d.Attributes) == 0 {
+			continue
+		}
+
+		if first {
+			c.Ui.Output("\nDevice Group Attributes")
+			first = false
+		} else {
+			c.Ui.Output("")
+		}
+		c.Ui.Output(formatKV(getDeviceAttributes(d)))
+	}
+}
+
 func (c *NodeStatusCommand) formatMeta(node *api.Node) {
 	// Print the meta
 	keys := make([]string, 0, len(node.Meta))
@@ -611,25 +637,22 @@ func getAllocatedResources(client *api.Client, runningAllocs []*api.Allocation,
 	total := computeNodeTotalResources(node)

 	// Get Resources
-	var cpu, mem, disk, iops int
+	var cpu, mem, disk int
 	for _, alloc := range runningAllocs {
 		cpu += *alloc.Resources.CPU
 		mem += *alloc.Resources.MemoryMB
 		disk += *alloc.Resources.DiskMB
-		iops += *alloc.Resources.IOPS
 	}

 	resources := make([]string, 2)
-	resources[0] = "CPU|Memory|Disk|IOPS"
-	resources[1] = fmt.Sprintf("%d/%d MHz|%s/%s|%s/%s|%d/%d",
+	resources[0] = "CPU|Memory|Disk"
+	resources[1] = fmt.Sprintf("%d/%d MHz|%s/%s|%s/%s",
 		cpu,
 		*total.CPU,
 		humanize.IBytes(uint64(mem*bytesPerMegabyte)),
 		humanize.IBytes(uint64(*total.MemoryMB*bytesPerMegabyte)),
 		humanize.IBytes(uint64(disk*bytesPerMegabyte)),
-		humanize.IBytes(uint64(*total.DiskMB*bytesPerMegabyte)),
-		iops,
-		*total.IOPS)
+		humanize.IBytes(uint64(*total.DiskMB*bytesPerMegabyte)))

 	return resources
 }
@@ -647,7 +670,6 @@ func computeNodeTotalResources(node *api.Node) api.Resources {
 	total.CPU = helper.IntToPtr(*r.CPU - *res.CPU)
 	total.MemoryMB = helper.IntToPtr(*r.MemoryMB - *res.MemoryMB)
 	total.DiskMB = helper.IntToPtr(*r.DiskMB - *res.DiskMB)
-	total.IOPS = helper.IntToPtr(*r.IOPS - *res.IOPS)
 	return total
 }