Files
nomad/client/state/db_test.go
Daniel Bennett 05f1cda594 dynamic host volumes: client state (#24595)
store dynamic host volume creations in client state,
so they can be "restored" on agent restart. restore works
by repeating the same Create operation as initial creation,
and expecting the plugin to be idempotent.

this is (potentially) especially important after host restarts,
which may have dropped mount points or such.
2024-12-19 09:25:54 -05:00

505 lines
14 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package state
import (
"os"
"reflect"
"sync"
"testing"
"time"
"github.com/hashicorp/nomad/ci"
trstate "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state"
dmstate "github.com/hashicorp/nomad/client/devicemanager/state"
"github.com/hashicorp/nomad/client/dynamicplugins"
driverstate "github.com/hashicorp/nomad/client/pluginmanager/drivermanager/state"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/kr/pretty"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"
)
// assert each implementation satisfies StateDB interface
var (
_ StateDB = (*BoltStateDB)(nil)
_ StateDB = (*MemDB)(nil)
_ StateDB = (*NoopDB)(nil)
_ StateDB = (*ErrDB)(nil)
)
func setupBoltStateDB(t *testing.T) *BoltStateDB {
dir := t.TempDir()
db, err := NewBoltStateDB(testlog.HCLogger(t), dir)
if err != nil {
if rmErr := os.RemoveAll(dir); rmErr != nil {
t.Logf("error removing boltdb dir: %v", rmErr)
}
t.Fatalf("error creating boltdb: %v", err)
}
t.Cleanup(func() {
if closeErr := db.Close(); closeErr != nil {
t.Errorf("error closing boltdb: %v", closeErr)
}
})
return db.(*BoltStateDB)
}
func testDB(t *testing.T, f func(*testing.T, StateDB)) {
dbs := []StateDB{
setupBoltStateDB(t),
NewMemDB(testlog.HCLogger(t)),
}
for _, db := range dbs {
t.Run(db.Name(), func(t *testing.T) {
f(t, db)
})
}
}
// TestStateDB_Allocations asserts the behavior of GetAllAllocations, PutAllocation, and
// DeleteAllocationBucket for all operational StateDB implementations.
func TestStateDB_Allocations(t *testing.T) {
ci.Parallel(t)
testDB(t, func(t *testing.T, db StateDB) {
require := require.New(t)
// Empty database should return empty non-nil results
allocs, errs, err := db.GetAllAllocations()
require.NoError(err)
require.NotNil(allocs)
require.Empty(allocs)
require.NotNil(errs)
require.Empty(errs)
// Put allocations
alloc1 := mock.Alloc()
alloc2 := mock.BatchAlloc()
require.NoError(db.PutAllocation(alloc1))
require.NoError(db.PutAllocation(alloc2))
// Retrieve them
allocs, errs, err = db.GetAllAllocations()
require.NoError(err)
require.NotNil(allocs)
require.Len(allocs, 2)
for _, a := range allocs {
switch a.ID {
case alloc1.ID:
if !reflect.DeepEqual(a, alloc1) {
pretty.Ldiff(t, a, alloc1)
t.Fatalf("alloc %q unequal", a.ID)
}
case alloc2.ID:
if !reflect.DeepEqual(a, alloc2) {
pretty.Ldiff(t, a, alloc2)
t.Fatalf("alloc %q unequal", a.ID)
}
default:
t.Fatalf("unexpected alloc id %q", a.ID)
}
}
require.NotNil(errs)
require.Empty(errs)
// Add another
alloc3 := mock.SystemAlloc()
require.NoError(db.PutAllocation(alloc3))
allocs, errs, err = db.GetAllAllocations()
require.NoError(err)
require.NotNil(allocs)
require.Len(allocs, 3)
require.Contains(allocs, alloc1)
require.Contains(allocs, alloc2)
require.Contains(allocs, alloc3)
require.NotNil(errs)
require.Empty(errs)
// Deleting a nonexistent alloc is a noop
require.NoError(db.DeleteAllocationBucket("asdf"))
allocs, _, err = db.GetAllAllocations()
require.NoError(err)
require.NotNil(allocs)
require.Len(allocs, 3)
// Delete alloc1
require.NoError(db.DeleteAllocationBucket(alloc1.ID))
allocs, errs, err = db.GetAllAllocations()
require.NoError(err)
require.NotNil(allocs)
require.Len(allocs, 2)
require.Contains(allocs, alloc2)
require.Contains(allocs, alloc3)
require.NotNil(errs)
require.Empty(errs)
})
}
// Integer division, rounded up.
func ceilDiv(a, b int) int {
return (a + b - 1) / b
}
// TestStateDB_Batch asserts the behavior of PutAllocation, PutNetworkStatus and
// DeleteAllocationBucket in batch mode, for all operational StateDB implementations.
func TestStateDB_Batch(t *testing.T) {
ci.Parallel(t)
testDB(t, func(t *testing.T, db StateDB) {
require := require.New(t)
// For BoltDB, get initial tx_id
var getTxID func() int
var prevTxID int
var batchDelay time.Duration
var batchSize int
if boltStateDB, ok := db.(*BoltStateDB); ok {
boltdb := boltStateDB.DB().BoltDB()
getTxID = func() int {
tx, err := boltdb.Begin(true)
require.NoError(err)
defer tx.Rollback()
return tx.ID()
}
prevTxID = getTxID()
batchDelay = boltdb.MaxBatchDelay
batchSize = boltdb.MaxBatchSize
}
// Write 1000 allocations and network statuses in batch mode
startTime := time.Now()
const numAllocs = 1000
var allocs []*structs.Allocation
for i := 0; i < numAllocs; i++ {
allocs = append(allocs, mock.Alloc())
}
var wg sync.WaitGroup
for _, alloc := range allocs {
wg.Add(1)
go func(alloc *structs.Allocation) {
require.NoError(db.PutNetworkStatus(alloc.ID, mock.AllocNetworkStatus(), WithBatchMode()))
require.NoError(db.PutAllocation(alloc, WithBatchMode()))
wg.Done()
}(alloc)
}
wg.Wait()
// Check BoltDB actually combined PutAllocation calls into much fewer transactions.
// The actual number of transactions depends on how fast the goroutines are spawned,
// with every batchDelay (10ms by default) period saved in a separate transaction,
// plus each transaction is limited to batchSize writes (1000 by default).
// See boltdb MaxBatchDelay and MaxBatchSize parameters for more details.
if getTxID != nil {
numTransactions := getTxID() - prevTxID
writeTime := time.Now().Sub(startTime)
expectedNumTransactions := ceilDiv(2*numAllocs, batchSize) + ceilDiv(int(writeTime), int(batchDelay))
require.LessOrEqual(numTransactions, expectedNumTransactions)
prevTxID = getTxID()
}
// Retrieve allocs and make sure they are the same (order can differ)
readAllocs, errs, err := db.GetAllAllocations()
require.NoError(err)
require.NotNil(readAllocs)
require.Len(readAllocs, len(allocs))
require.NotNil(errs)
require.Empty(errs)
readAllocsById := make(map[string]*structs.Allocation)
for _, readAlloc := range readAllocs {
readAllocsById[readAlloc.ID] = readAlloc
}
for _, alloc := range allocs {
readAlloc, ok := readAllocsById[alloc.ID]
if !ok {
t.Fatalf("no alloc with ID=%q", alloc.ID)
}
if !reflect.DeepEqual(readAlloc, alloc) {
pretty.Ldiff(t, readAlloc, alloc)
t.Fatalf("alloc %q unequal", alloc.ID)
}
}
// Delete all allocs in batch mode
startTime = time.Now()
for _, alloc := range allocs {
wg.Add(1)
go func(alloc *structs.Allocation) {
require.NoError(db.DeleteAllocationBucket(alloc.ID, WithBatchMode()))
wg.Done()
}(alloc)
}
wg.Wait()
// Check BoltDB combined DeleteAllocationBucket calls into much fewer transactions.
if getTxID != nil {
numTransactions := getTxID() - prevTxID
writeTime := time.Now().Sub(startTime)
expectedNumTransactions := ceilDiv(numAllocs, batchSize) + ceilDiv(int(writeTime), int(batchDelay))
require.LessOrEqual(numTransactions, expectedNumTransactions)
prevTxID = getTxID()
}
// Check all allocs were deleted.
readAllocs, errs, err = db.GetAllAllocations()
require.NoError(err)
require.Empty(readAllocs)
require.Empty(errs)
})
}
// TestStateDB_TaskState asserts the behavior of task state related StateDB
// methods.
func TestStateDB_TaskState(t *testing.T) {
ci.Parallel(t)
testDB(t, func(t *testing.T, db StateDB) {
require := require.New(t)
// Getting nonexistent state should return nils
ls, ts, err := db.GetTaskRunnerState("allocid", "taskname")
require.NoError(err)
require.Nil(ls)
require.Nil(ts)
// Putting TaskState without first putting the allocation should work
state := structs.NewTaskState()
state.Failed = true // set a non-default value
require.NoError(db.PutTaskState("allocid", "taskname", state))
// Getting should return the available state
ls, ts, err = db.GetTaskRunnerState("allocid", "taskname")
require.NoError(err)
require.Nil(ls)
require.Equal(state, ts)
// Deleting a nonexistent task should not error
require.NoError(db.DeleteTaskBucket("adsf", "asdf"))
require.NoError(db.DeleteTaskBucket("asllocid", "asdf"))
// Data should be untouched
ls, ts, err = db.GetTaskRunnerState("allocid", "taskname")
require.NoError(err)
require.Nil(ls)
require.Equal(state, ts)
// Deleting the task should remove the state
require.NoError(db.DeleteTaskBucket("allocid", "taskname"))
ls, ts, err = db.GetTaskRunnerState("allocid", "taskname")
require.NoError(err)
require.Nil(ls)
require.Nil(ts)
// Putting LocalState should work just like TaskState
origLocalState := trstate.NewLocalState()
require.NoError(db.PutTaskRunnerLocalState("allocid", "taskname", origLocalState))
ls, ts, err = db.GetTaskRunnerState("allocid", "taskname")
require.NoError(err)
require.Equal(origLocalState, ls)
require.Nil(ts)
})
}
// TestStateDB_DeviceManager asserts the behavior of device manager state related StateDB
// methods.
func TestStateDB_DeviceManager(t *testing.T) {
ci.Parallel(t)
testDB(t, func(t *testing.T, db StateDB) {
require := require.New(t)
// Getting nonexistent state should return nils
ps, err := db.GetDevicePluginState()
require.NoError(err)
require.Nil(ps)
// Putting PluginState should work
state := &dmstate.PluginState{}
require.NoError(db.PutDevicePluginState(state))
// Getting should return the available state
ps, err = db.GetDevicePluginState()
require.NoError(err)
require.NotNil(ps)
require.Equal(state, ps)
})
}
// TestStateDB_DriverManager asserts the behavior of device manager state related StateDB
// methods.
func TestStateDB_DriverManager(t *testing.T) {
ci.Parallel(t)
testDB(t, func(t *testing.T, db StateDB) {
require := require.New(t)
// Getting nonexistent state should return nils
ps, err := db.GetDriverPluginState()
require.NoError(err)
require.Nil(ps)
// Putting PluginState should work
state := &driverstate.PluginState{}
require.NoError(db.PutDriverPluginState(state))
// Getting should return the available state
ps, err = db.GetDriverPluginState()
require.NoError(err)
require.NotNil(ps)
require.Equal(state, ps)
})
}
// TestStateDB_DynamicRegistry asserts the behavior of dynamic registry state related StateDB
// methods.
func TestStateDB_DynamicRegistry(t *testing.T) {
ci.Parallel(t)
testDB(t, func(t *testing.T, db StateDB) {
require := require.New(t)
// Getting nonexistent state should return nils
ps, err := db.GetDynamicPluginRegistryState()
require.NoError(err)
require.Nil(ps)
// Putting PluginState should work
state := &dynamicplugins.RegistryState{}
require.NoError(db.PutDynamicPluginRegistryState(state))
// Getting should return the available state
ps, err = db.GetDynamicPluginRegistryState()
require.NoError(err)
require.NotNil(ps)
require.Equal(state, ps)
})
}
// TestStateDB_HostVolumes asserts the behavior of dynamic host volume state.
func TestStateDB_HostVolumes(t *testing.T) {
ci.Parallel(t)
testDB(t, func(t *testing.T, db StateDB) {
vols, err := db.GetDynamicHostVolumes()
must.NoError(t, err)
must.Len(t, 0, vols)
vol := &cstructs.HostVolumeState{
ID: "test-vol-id",
CreateReq: &cstructs.ClientHostVolumeCreateRequest{
ID: "test-vol-id",
Name: "test-vol-name",
PluginID: "test-plugin-id",
NodeID: "test-node-id",
RequestedCapacityMinBytes: 5,
RequestedCapacityMaxBytes: 10,
Parameters: map[string]string{"test": "ing"},
},
}
must.NoError(t, db.PutDynamicHostVolume(vol))
vols, err = db.GetDynamicHostVolumes()
must.NoError(t, err)
must.Len(t, 1, vols)
must.Eq(t, vol, vols[0])
must.NoError(t, db.DeleteDynamicHostVolume(vol.ID))
vols, err = db.GetDynamicHostVolumes()
must.NoError(t, err)
must.Len(t, 0, vols)
})
}
func TestStateDB_CheckResult_keyForCheck(t *testing.T) {
ci.Parallel(t)
allocID := "alloc1"
checkID := structs.CheckID("id1")
result := keyForCheck(allocID, checkID)
exp := allocID + "_" + string(checkID)
must.Eq(t, exp, string(result))
}
func TestStateDB_CheckResult(t *testing.T) {
ci.Parallel(t)
qr := func(id string) *structs.CheckQueryResult {
return &structs.CheckQueryResult{
ID: structs.CheckID(id),
Mode: "healthiness",
Status: "passing",
Output: "nomad: tcp ok",
Timestamp: 1,
Group: "group",
Task: "task",
Service: "service",
Check: "check",
}
}
testDB(t, func(t *testing.T, db StateDB) {
t.Run("put and get", func(t *testing.T) {
err := db.PutCheckResult("alloc1", qr("abc123"))
must.NoError(t, err)
results, err := db.GetCheckResults()
must.NoError(t, err)
must.MapContainsKeys(t, results, []string{"alloc1"})
must.MapContainsKeys(t, results["alloc1"], []structs.CheckID{"abc123"})
})
})
testDB(t, func(t *testing.T, db StateDB) {
t.Run("delete", func(t *testing.T) {
must.NoError(t, db.PutCheckResult("alloc1", qr("id1")))
must.NoError(t, db.PutCheckResult("alloc1", qr("id2")))
must.NoError(t, db.PutCheckResult("alloc1", qr("id3")))
must.NoError(t, db.PutCheckResult("alloc1", qr("id4")))
must.NoError(t, db.PutCheckResult("alloc2", qr("id5")))
err := db.DeleteCheckResults("alloc1", []structs.CheckID{"id2", "id3"})
must.NoError(t, err)
results, err := db.GetCheckResults()
must.NoError(t, err)
must.MapContainsKeys(t, results, []string{"alloc1", "alloc2"})
must.MapContainsKeys(t, results["alloc1"], []structs.CheckID{"id1", "id4"})
must.MapContainsKeys(t, results["alloc2"], []structs.CheckID{"id5"})
})
})
testDB(t, func(t *testing.T, db StateDB) {
t.Run("purge", func(t *testing.T) {
must.NoError(t, db.PutCheckResult("alloc1", qr("id1")))
must.NoError(t, db.PutCheckResult("alloc1", qr("id2")))
must.NoError(t, db.PutCheckResult("alloc1", qr("id3")))
must.NoError(t, db.PutCheckResult("alloc1", qr("id4")))
must.NoError(t, db.PutCheckResult("alloc2", qr("id5")))
err := db.PurgeCheckResults("alloc1")
must.NoError(t, err)
results, err := db.GetCheckResults()
must.NoError(t, err)
must.MapContainsKeys(t, results, []string{"alloc2"})
must.MapContainsKeys(t, results["alloc2"], []structs.CheckID{"id5"})
})
})
}
// TestStateDB_Upgrade asserts calling Upgrade on new databases always
// succeeds.
func TestStateDB_Upgrade(t *testing.T) {
ci.Parallel(t)
testDB(t, func(t *testing.T, db StateDB) {
require.NoError(t, db.Upgrade())
})
}