diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..8ab081052 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.qcow2 filter=lfs diff=lfs merge=lfs -text diff --git a/client/driver/qemu_test.go b/client/driver/qemu_test.go index 9dbfde76a..b3d442b09 100644 --- a/client/driver/qemu_test.go +++ b/client/driver/qemu_test.go @@ -1,14 +1,17 @@ package driver import ( + "bytes" "fmt" - "os" + "net" "path/filepath" + "strconv" "strings" "syscall" "testing" "time" + "github.com/hashicorp/consul/lib/freeport" "github.com/hashicorp/nomad/client/config" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/nomad/structs" @@ -134,45 +137,49 @@ func TestQemuDriver_StartOpen_Wait(t *testing.T) { } func TestQemuDriver_GracefulShutdown(t *testing.T) { - logger := testLogger() + testutil.SkipSlow(t) if !testutil.IsTravis() { t.Parallel() } ctestutils.QemuCompatible(t) - ctestutils.RequireRoot(t) + + logger := testLogger() + + // Graceful shutdown may be really slow unfortunately + killTimeout := 3 * time.Minute + + // Grab a free port so we can tell when the image has started + port := freeport.GetT(t, 1)[0] + task := &structs.Task{ - Name: "linux", + Name: "alpine-shutdown-test", Driver: "qemu", Config: map[string]interface{}{ - "image_path": "linux-0.2.img", - "accelerator": "tcg", + "image_path": "alpine.qcow2", "graceful_shutdown": true, + "args": []string{"-nodefconfig", "-nodefaults"}, "port_map": []map[string]int{{ - "main": 22, - "web": 8080, + "ssh": 22, }}, - "args": []string{"-nodefconfig", "-nodefaults"}, }, - // With the use of tcg acceleration, it's very unlikely a qemu instance - // will boot (and gracefully halt) in a reasonable amount of time, so - // this timeout is kept low to reduce test execution time. - KillTimeout: time.Duration(1 * time.Second), LogConfig: &structs.LogConfig{ MaxFiles: 10, MaxFileSizeMB: 10, }, Resources: &structs.Resources{ - CPU: 500, - MemoryMB: 512, + CPU: 1000, + MemoryMB: 256, Networks: []*structs.NetworkResource{ { - ReservedPorts: []structs.Port{{Label: "main", Value: 22000}, {Label: "web", Value: 80}}, + ReservedPorts: []structs.Port{{Label: "ssh", Value: port}}, }, }, }, + KillTimeout: killTimeout, } ctx := testDriverContexts(t, task) + ctx.DriverCtx.config.MaxKillTimeout = killTimeout defer ctx.AllocDir.Destroy() d := NewQemuDriver(ctx.DriverCtx) @@ -189,7 +196,7 @@ func TestQemuDriver_GracefulShutdown(t *testing.T) { dst := ctx.ExecCtx.TaskDir.Dir - copyFile("./test-resources/qemu/linux-0.2.img", filepath.Join(dst, "linux-0.2.img"), t) + copyFile("./test-resources/qemu/alpine.qcow2", filepath.Join(dst, "alpine.qcow2"), t) if _, err := d.Prestart(ctx.ExecCtx, task); err != nil { t.Fatalf("Prestart failed: %v", err) @@ -202,26 +209,46 @@ func TestQemuDriver_GracefulShutdown(t *testing.T) { // Clean up defer func() { + select { + case <-resp.Handle.WaitCh(): + // Already exited + return + default: + } + if err := resp.Handle.Kill(); err != nil { - logger.Printf("Error killing Qemu test: %s", err) + logger.Printf("[TEST] Error killing Qemu test: %s", err) } }() - // The monitor socket will not exist immediately, so we'll wait up to - // 5 seconds for it to become available. - monitorPath := fmt.Sprintf("%s/linux/%s", ctx.AllocDir.AllocDir, qemuMonitorSocketName) - monitorPathExists := false - for i := 0; i < 100; i++ { - if _, err := os.Stat(monitorPath); !os.IsNotExist(err) { - logger.Printf("monitor socket exists at %q\n", monitorPath) - monitorPathExists = true - break + // Wait until sshd starts before attempting to do a graceful shutdown + testutil.WaitForResult(func() (bool, error) { + conn, err := net.Dial("tcp", net.JoinHostPort("127.0.0.1", strconv.Itoa(port))) + if err != nil { + return false, err } - time.Sleep(200 * time.Millisecond) - } - if monitorPathExists == false { - t.Fatalf("monitor socket did not exist after waiting 20 seconds") - } + + // Since the connection will be accepted by the QEMU process + // before sshd actually starts, we need to block until we can + // read the "SSH" magic bytes + header := make([]byte, 3) + conn.SetReadDeadline(time.Now().Add(10 * time.Second)) + _, err = conn.Read(header) + if err != nil { + return false, err + } + if !bytes.Equal(header, []byte{'S', 'S', 'H'}) { + return false, fmt.Errorf("expected 'SSH' but received: %q %v", string(header), header) + } + + logger.Printf("[TEST] connected to sshd in VM") + conn.Close() + return true, nil + }, func(err error) { + t.Fatalf("failed to connect to sshd in VM: %v", err) + }) + + monitorPath := filepath.Join(ctx.AllocDir.AllocDir, task.Name, qemuMonitorSocketName) // userPid supplied in sendQemuShutdown calls is bogus (it's used only // for log output) @@ -236,6 +263,13 @@ func TestQemuDriver_GracefulShutdown(t *testing.T) { if err := sendQemuShutdown(ctx.DriverCtx.logger, monitorPath, 0); err != nil { t.Fatalf("unexpected error from sendQemuShutdown: %s", err) } + + select { + case <-resp.Handle.WaitCh(): + logger.Printf("[TEST] VM exited gracefully as expected") + case <-time.After(killTimeout): + t.Fatalf("VM did not exit gracefully exit before timeout: %s", killTimeout) + } } func TestQemuDriverUser(t *testing.T) { diff --git a/client/driver/test-resources/qemu/README.md b/client/driver/test-resources/qemu/README.md new file mode 100644 index 000000000..ccfa25ff3 --- /dev/null +++ b/client/driver/test-resources/qemu/README.md @@ -0,0 +1,21 @@ +# QEMU Test Images + +## `linux-0.2.img` + +via https://en.wikibooks.org/wiki/QEMU/Images + +Does not support graceful shutdown. + +## Alpine + +``` +qemu-img create -fmt qcow2 alpine.qcow2 8G + +# Download virtual x86_64 Alpine image https://alpinelinux.org/downloads/ +qemu-system-x86_64 -cdrom path/to/alpine.iso -hda alpine.qcow2 -boot d -net nic -net user -m 256 -localtime + +# In the guest run setup-alpine and exit when complete + +# Boot again with: +qemu-system-x86_64 alpine.qcow2 +``` diff --git a/client/driver/test-resources/qemu/alpine.qcow2 b/client/driver/test-resources/qemu/alpine.qcow2 new file mode 100644 index 000000000..6c680469c Binary files /dev/null and b/client/driver/test-resources/qemu/alpine.qcow2 differ diff --git a/scripts/travis.sh b/scripts/travis.sh index 8e500f793..541bbb587 100755 --- a/scripts/travis.sh +++ b/scripts/travis.sh @@ -14,7 +14,7 @@ if [ "$RUN_STATIC_CHECKS" ]; then fi fi -make test +NOMAD_SLOW_TEST=1 make test TEST_OUTPUT=$? kill $PING_LOOP_PID diff --git a/testutil/slow.go b/testutil/slow.go new file mode 100644 index 000000000..1a8088024 --- /dev/null +++ b/testutil/slow.go @@ -0,0 +1,15 @@ +package testutil + +import ( + "os" + + testing "github.com/mitchellh/go-testing-interface" +) + +// SkipSlow skips a slow test unless the NOMAD_SLOW_TEST environment variable +// is set. +func SkipSlow(t testing.T) { + if os.Getenv("NOMAD_SLOW_TEST") == "" { + t.Skip("Skipping slow test. Set NOMAD_SLOW_TEST=1 to run.") + } +}