Files
nomad/drivers/qemu/driver_test.go
Tim Gross eedbd36fef qemu: pass task resources into driver for cgroup setup (#23466)
As part of the work for 1.7.0 we moved portions of the task cgroup setup down
into the executor. This requires that the executor constructor get the
`TaskConfig.Resources` struct, and this was missing from the `qemu` driver. We
fixed a panic caused by this change in #19089 before we shipped, but this fix
was effectively undo after we added plumbing for custom cgroups for `raw_exec`
in 1.8.0. As a result, running `qemu` tasks always fail on Linux.

This was undetected in testing because our CI environment doesn't have QEMU
installed. I've got all the unit tests running locally again and have added QEMU
installation when we're running the drivers tests.

Fixes: https://github.com/hashicorp/nomad/issues/23250
2024-07-01 11:41:10 -04:00

399 lines
10 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package qemu
import (
"context"
"io"
"os"
"path/filepath"
"testing"
"time"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/client/lib/cgroupslib"
"github.com/hashicorp/nomad/client/lib/numalib"
ctestutil "github.com/hashicorp/nomad/client/testutil"
"github.com/hashicorp/nomad/helper/pluginutils/hclutils"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/base"
"github.com/hashicorp/nomad/plugins/drivers"
dtestutil "github.com/hashicorp/nomad/plugins/drivers/testutils"
"github.com/hashicorp/nomad/testutil"
"github.com/shoenig/test/must"
)
// TODO(preetha) - tests remaining
// using monitor socket for graceful shutdown
// Verifies starting a qemu image and stopping it
func TestQemuDriver_Start_Wait_Stop(t *testing.T) {
ci.Parallel(t)
ctestutil.QemuCompatible(t)
ctestutil.CgroupsCompatible(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
topology := numalib.Scan(numalib.PlatformScanners())
d := NewQemuDriver(ctx, testlog.HCLogger(t))
d.(*Driver).nomadConfig = &base.ClientDriverConfig{Topology: topology}
harness := dtestutil.NewDriverHarness(t, d)
allocID := uuid.Generate()
harness.MakeTaskCgroup(allocID, "linux")
task := &drivers.TaskConfig{
AllocID: allocID,
ID: uuid.Generate(),
Name: "linux",
Resources: testResources(allocID, "linux"),
}
tc := &TaskConfig{
ImagePath: "linux-0.2.img",
Accelerator: "tcg",
GracefulShutdown: false,
PortMap: map[string]int{
"main": 22,
"web": 8080,
},
Args: []string{"-nodefaults"},
}
must.NoError(t, task.EncodeConcreteDriverConfig(&tc))
cleanup := harness.MkAllocDir(task, true)
defer cleanup()
taskDir := filepath.Join(task.AllocDir, task.Name)
copyFile("./test-resources/linux-0.2.img", filepath.Join(taskDir, "linux-0.2.img"), t)
handle, _, err := harness.StartTask(task)
must.NoError(t, err)
must.NotNil(t, handle)
// Ensure that sending a Signal returns an error
err = d.SignalTask(task.ID, "SIGINT")
must.EqError(t, err, "QEMU driver can't signal commands")
must.NoError(t, harness.DestroyTask(task.ID, true))
}
// copyFile moves an existing file to the destination
func copyFile(src, dst string, t *testing.T) {
in, err := os.Open(src)
if err != nil {
t.Fatalf("copying %v -> %v failed: %v", src, dst, err)
}
defer in.Close()
out, err := os.Create(dst)
if err != nil {
t.Fatalf("copying %v -> %v failed: %v", src, dst, err)
}
defer func() {
if err := out.Close(); err != nil {
t.Fatalf("copying %v -> %v failed: %v", src, dst, err)
}
}()
if _, err = io.Copy(out, in); err != nil {
t.Fatalf("copying %v -> %v failed: %v", src, dst, err)
}
if err := out.Sync(); err != nil {
t.Fatalf("copying %v -> %v failed: %v", src, dst, err)
}
}
// Verifies starting a qemu image and stopping it
func TestQemuDriver_User(t *testing.T) {
ci.Parallel(t)
ctestutil.QemuCompatible(t)
ctestutil.CgroupsCompatible(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
topology := numalib.Scan(numalib.PlatformScanners())
d := NewQemuDriver(ctx, testlog.HCLogger(t))
d.(*Driver).nomadConfig = &base.ClientDriverConfig{Topology: topology}
harness := dtestutil.NewDriverHarness(t, d)
allocID := uuid.Generate()
harness.MakeTaskCgroup(allocID, "linux")
task := &drivers.TaskConfig{
ID: uuid.Generate(),
Name: "linux",
User: "alice",
Resources: testResources(allocID, "linux"),
}
tc := &TaskConfig{
ImagePath: "linux-0.2.img",
Accelerator: "tcg",
GracefulShutdown: false,
PortMap: map[string]int{
"main": 22,
"web": 8080,
},
Args: []string{"-nodefaults"},
}
must.NoError(t, task.EncodeConcreteDriverConfig(&tc))
cleanup := harness.MkAllocDir(task, true)
defer cleanup()
taskDir := filepath.Join(task.AllocDir, task.Name)
copyFile("./test-resources/linux-0.2.img", filepath.Join(taskDir, "linux-0.2.img"), t)
_, _, err := harness.StartTask(task)
must.ErrorContains(t, err, "unknown user alice")
}
// TestQemuDriver_Stats verifies we can get resources usage stats
func TestQemuDriver_Stats(t *testing.T) {
ci.Parallel(t)
ctestutil.QemuCompatible(t)
ctestutil.CgroupsCompatible(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
topology := numalib.Scan(numalib.PlatformScanners())
d := NewQemuDriver(ctx, testlog.HCLogger(t))
d.(*Driver).nomadConfig = &base.ClientDriverConfig{Topology: topology}
harness := dtestutil.NewDriverHarness(t, d)
allocID := uuid.Generate()
harness.MakeTaskCgroup(allocID, "linux")
task := &drivers.TaskConfig{
AllocID: allocID,
ID: uuid.Generate(),
Name: "linux",
Resources: testResources(allocID, "linux"),
}
tc := &TaskConfig{
ImagePath: "linux-0.2.img",
Accelerator: "tcg",
GracefulShutdown: false,
PortMap: map[string]int{
"main": 22,
"web": 8080,
},
Args: []string{"-nodefaults"},
}
must.NoError(t, task.EncodeConcreteDriverConfig(&tc))
cleanup := harness.MkAllocDir(task, true)
defer cleanup()
taskDir := filepath.Join(task.AllocDir, task.Name)
copyFile("./test-resources/linux-0.2.img", filepath.Join(taskDir, "linux-0.2.img"), t)
handle, _, err := harness.StartTask(task)
must.NoError(t, err)
must.NotNil(t, handle)
// Wait for task to start
exitCh, err := harness.WaitTask(context.Background(), handle.Config.ID)
must.NoError(t, err)
// Wait until task started
must.NoError(t, harness.WaitUntilStarted(task.ID, 1*time.Second))
t.Cleanup(func() { harness.DestroyTask(task.ID, true) })
timeout := 30 * time.Second
statsCtx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
statsCh, err := harness.TaskStats(statsCtx, task.ID, time.Second*1)
must.NoError(t, err)
for {
select {
case exitCode := <-exitCh:
t.Fatalf("should not have exited: %+v", exitCode)
case stats := <-statsCh:
if stats == nil {
continue // receiving empty stats races with ctx.Done
}
t.Logf("CPU:%+v Memory:%+v\n",
stats.ResourceUsage.CpuStats, stats.ResourceUsage.MemoryStats)
if stats.ResourceUsage.MemoryStats.RSS != 0 {
must.NonZero(t, stats.ResourceUsage.MemoryStats.RSS)
must.NoError(t, harness.DestroyTask(task.ID, true))
return
}
case <-ctx.Done():
t.Fatal("timed out before receiving non-zero stats")
}
}
}
func TestQemuDriver_Fingerprint(t *testing.T) {
ci.Parallel(t)
ctestutil.QemuCompatible(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
d := NewQemuDriver(ctx, testlog.HCLogger(t))
harness := dtestutil.NewDriverHarness(t, d)
fingerCh, err := harness.Fingerprint(context.Background())
must.NoError(t, err)
select {
case finger := <-fingerCh:
must.Eq(t, drivers.HealthStateHealthy, finger.Health)
ok, _ := finger.Attributes["driver.qemu"].GetBool()
must.True(t, ok)
case <-time.After(time.Duration(testutil.TestMultiplier()*5) * time.Second):
t.Fatal("timeout receiving fingerprint")
}
}
func TestConfig_ParseAllHCL(t *testing.T) {
ci.Parallel(t)
cfgStr := `
config {
image_path = "/tmp/image_path"
drive_interface = "virtio"
accelerator = "kvm"
args = ["arg1", "arg2"]
port_map {
http = 80
https = 443
}
graceful_shutdown = true
}`
expected := &TaskConfig{
ImagePath: "/tmp/image_path",
DriveInterface: "virtio",
Accelerator: "kvm",
Args: []string{"arg1", "arg2"},
PortMap: map[string]int{
"http": 80,
"https": 443,
},
GracefulShutdown: true,
}
var tc *TaskConfig
hclutils.NewConfigParser(taskConfigSpec).ParseHCL(t, cfgStr, &tc)
must.Eq(t, expected, tc)
}
func TestIsAllowedDriveInterface(t *testing.T) {
validInterfaces := []string{"ide", "scsi", "sd", "mtd", "floppy", "pflash", "virtio", "none"}
invalidInterfaces := []string{"foo", "virtio-foo"}
for _, i := range validInterfaces {
must.True(t, isAllowedDriveInterface(i),
must.Sprintf("drive_interface should be allowed: %v", i))
}
for _, i := range invalidInterfaces {
must.False(t, isAllowedDriveInterface(i),
must.Sprintf("drive_interface should not be allowed: %v", i))
}
}
func TestIsAllowedImagePath(t *testing.T) {
ci.Parallel(t)
allowedPaths := []string{"/tmp", "/opt/qemu"}
allocDir := "/opt/nomad/some-alloc-dir"
validPaths := []string{
"local/path",
"/tmp/subdir/qemu-image",
"/opt/qemu/image",
"/opt/qemu/subdir/image",
"/opt/nomad/some-alloc-dir/local/image.img",
}
invalidPaths := []string{
"/image.img",
"../image.img",
"/tmpimage.img",
"/opt/other/image.img",
"/opt/nomad-submatch.img",
}
for _, p := range validPaths {
must.True(t, isAllowedImagePath(allowedPaths, allocDir, p),
must.Sprintf("path should be allowed: %v", p))
}
for _, p := range invalidPaths {
must.False(t, isAllowedImagePath(allowedPaths, allocDir, p),
must.Sprintf("path should be not allowed: %v", p))
}
}
func TestArgsAllowList(t *testing.T) {
ci.Parallel(t)
pluginConfigAllowList := []string{"-drive", "-net", "-snapshot"}
validArgs := [][]string{
{"-drive", "/path/to/wherever", "-snapshot"},
{"-net", "tap,vlan=0,ifname=tap0"},
}
invalidArgs := [][]string{
{"-usbdevice", "mouse"},
{"-singlestep"},
{"--singlestep"},
{" -singlestep"},
{"\t-singlestep"},
}
for _, args := range validArgs {
must.NoError(t, validateArgs(pluginConfigAllowList, args))
must.NoError(t, validateArgs([]string{}, args))
}
for _, args := range invalidArgs {
must.Error(t, validateArgs(pluginConfigAllowList, args))
must.NoError(t, validateArgs([]string{}, args))
}
}
func testResources(allocID, task string) *drivers.Resources {
if allocID == "" || task == "" {
panic("must be set")
}
r := &drivers.Resources{
NomadResources: &structs.AllocatedTaskResources{
Memory: structs.AllocatedMemoryResources{
MemoryMB: 128,
},
Cpu: structs.AllocatedCpuResources{
CpuShares: 100,
},
Networks: []*structs.NetworkResource{
{
ReservedPorts: []structs.Port{
{Label: "main", Value: 22000}, {Label: "web", Value: 8888}},
},
},
},
LinuxResources: &drivers.LinuxResources{
MemoryLimitBytes: 134217728,
CPUShares: 100,
CpusetCgroupPath: cgroupslib.LinuxResourcesPath(allocID, task, false),
},
}
return r
}