From a280cf06eb7b969f787c5976d6693638c06f3004 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 19 Mar 2020 17:09:49 -0400 Subject: [PATCH] csi: stub fingerprint on instance manager shutdown (#7388) Run the plugin fingerprint one last time with a closed client during instance manager shutdown. This will return quickly and will give us a correctly-populated `PluginInfo` marked as unhealthy so the Nomad client can update the server about plugin health. --- client/pluginmanager/csimanager/instance.go | 21 +++++-- .../pluginmanager/csimanager/instance_test.go | 60 +++++++++++++++++-- plugins/csi/fake/client.go | 40 +++++++++++++ 3 files changed, 112 insertions(+), 9 deletions(-) diff --git a/client/pluginmanager/csimanager/instance.go b/client/pluginmanager/csimanager/instance.go index 4386923fc..95f88221f 100644 --- a/client/pluginmanager/csimanager/instance.go +++ b/client/pluginmanager/csimanager/instance.go @@ -126,15 +126,26 @@ func (i *instanceManager) runLoop() { i.client.Close() i.client = nil } - close(i.shutdownCh) - return - case <-timer.C: - ctx, cancelFn := i.requestCtxWithTimeout(managerFingerprintInterval) + // run one last fingerprint so that we mark the plugin as unhealthy. + // the client has been closed so this will return quickly with the + // plugin's basic info + ctx, cancelFn := i.requestCtxWithTimeout(time.Second) info := i.fp.fingerprint(ctx) cancelFn() - i.updater(i.info.Name, info) + if info != nil { + i.updater(i.info.Name, info) + } + close(i.shutdownCh) + return + case <-timer.C: + ctx, cancelFn := i.requestCtxWithTimeout(managerFingerprintInterval) + info := i.fp.fingerprint(ctx) + cancelFn() + if info != nil { + i.updater(i.info.Name, info) + } timer.Reset(managerFingerprintInterval) } } diff --git a/client/pluginmanager/csimanager/instance_test.go b/client/pluginmanager/csimanager/instance_test.go index c6b53043f..6a8658df5 100644 --- a/client/pluginmanager/csimanager/instance_test.go +++ b/client/pluginmanager/csimanager/instance_test.go @@ -1,11 +1,18 @@ package csimanager import ( + "context" + "fmt" + "sync" "testing" + "time" "github.com/hashicorp/nomad/client/dynamicplugins" "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/plugins/csi" "github.com/hashicorp/nomad/plugins/csi/fake" + "github.com/stretchr/testify/require" ) func setupTestNodeInstanceManager(t *testing.T) (*fake.Client, *instanceManager) { @@ -21,10 +28,55 @@ func setupTestNodeInstanceManager(t *testing.T) (*fake.Client, *instanceManager) info: pinfo, client: tp, fp: &pluginFingerprinter{ - logger: logger.Named("fingerprinter"), - info: pinfo, - client: tp, - fingerprintNode: true, + logger: logger.Named("fingerprinter"), + info: pinfo, + client: tp, + fingerprintNode: true, + hadFirstSuccessfulFingerprintCh: make(chan struct{}), }, } } + +func TestInstanceManager_Shutdown(t *testing.T) { + + var pluginHealth bool + var lock sync.Mutex + ctx, cancelFn := context.WithCancel(context.Background()) + client, im := setupTestNodeInstanceManager(t) + im.shutdownCtx = ctx + im.shutdownCtxCancelFn = cancelFn + im.shutdownCh = make(chan struct{}) + im.updater = func(_ string, info *structs.CSIInfo) { + fmt.Println(info) + lock.Lock() + defer lock.Unlock() + pluginHealth = info.Healthy + } + + // set up a mock successful fingerprint so that we can get + // a healthy plugin before shutting down + client.NextPluginGetCapabilitiesResponse = &csi.PluginCapabilitySet{} + client.NextPluginGetCapabilitiesErr = nil + client.NextNodeGetInfoResponse = &csi.NodeGetInfoResponse{NodeID: "foo"} + client.NextNodeGetInfoErr = nil + client.NextNodeGetCapabilitiesResponse = &csi.NodeCapabilitySet{} + client.NextNodeGetCapabilitiesErr = nil + client.NextPluginProbeResponse = true + + go im.runLoop() + + require.Eventually(t, func() bool { + lock.Lock() + defer lock.Unlock() + return pluginHealth + }, 1*time.Second, 10*time.Millisecond) + + cancelFn() // fires im.shutdown() + + require.Eventually(t, func() bool { + lock.Lock() + defer lock.Unlock() + return !pluginHealth + }, 1*time.Second, 10*time.Millisecond) + +} diff --git a/plugins/csi/fake/client.go b/plugins/csi/fake/client.go index b18ec6f6e..162b6bb73 100644 --- a/plugins/csi/fake/client.go +++ b/plugins/csi/fake/client.go @@ -5,6 +5,7 @@ package fake import ( "context" "errors" + "fmt" "sync" "github.com/hashicorp/nomad/plugins/base" @@ -232,5 +233,44 @@ func (c *Client) NodeUnpublishVolume(ctx context.Context, volumeID, targetPath s // Shutdown the client and ensure any connections are cleaned up. func (c *Client) Close() error { + + c.NextPluginInfoResponse = nil + c.NextPluginInfoErr = fmt.Errorf("closed client") + + c.NextPluginProbeResponse = false + c.NextPluginProbeErr = fmt.Errorf("closed client") + + c.NextPluginGetInfoNameResponse = "" + c.NextPluginGetInfoVersionResponse = "" + c.NextPluginGetInfoErr = fmt.Errorf("closed client") + + c.NextPluginGetCapabilitiesResponse = nil + c.NextPluginGetCapabilitiesErr = fmt.Errorf("closed client") + + c.NextControllerGetCapabilitiesResponse = nil + c.NextControllerGetCapabilitiesErr = fmt.Errorf("closed client") + + c.NextControllerPublishVolumeResponse = nil + c.NextControllerPublishVolumeErr = fmt.Errorf("closed client") + + c.NextControllerUnpublishVolumeResponse = nil + c.NextControllerUnpublishVolumeErr = fmt.Errorf("closed client") + + c.NextControllerValidateVolumeErr = fmt.Errorf("closed client") + + c.NextNodeGetCapabilitiesResponse = nil + c.NextNodeGetCapabilitiesErr = fmt.Errorf("closed client") + + c.NextNodeGetInfoResponse = nil + c.NextNodeGetInfoErr = fmt.Errorf("closed client") + + c.NextNodeStageVolumeErr = fmt.Errorf("closed client") + + c.NextNodeUnstageVolumeErr = fmt.Errorf("closed client") + + c.NextNodePublishVolumeErr = fmt.Errorf("closed client") + + c.NextNodeUnpublishVolumeErr = fmt.Errorf("closed client") + return nil }