CSI Plugin Registration (#6555)

This changeset implements the initial registration and fingerprinting
of CSI Plugins as part of #5378. At a high level, it introduces the
following:

* A `csi_plugin` stanza as part of a Nomad task configuration, to
  allow a task to expose that it is a plugin.

* A new task runner hook: `csi_plugin_supervisor`. This hook does two
  things. When the `csi_plugin` stanza is detected, it will
  automatically configure the plugin task to receive bidirectional
  mounts to the CSI intermediary directory. At runtime, it will then
  perform an initial heartbeat of the plugin and handle submitting it to
  the new `dynamicplugins.Registry` for further use by the client, and
  then run a lightweight heartbeat loop that will emit task events
  when health changes.

* The `dynamicplugins.Registry` for handling plugins that run
  as Nomad tasks, in contrast to the existing catalog that requires
  `go-plugin` type plugins and to know the plugin configuration in
  advance.

* The `csimanager` which fingerprints CSI plugins, in a similar way to
  `drivermanager` and `devicemanager`. It currently only fingerprints
  the NodeID from the plugin, and assumes that all plugins are
  monolithic.

Missing features

* We do not use the live updates of the `dynamicplugin` registry in
  the `csimanager` yet.

* We do not deregister the plugins from the client when they shutdown
  yet, they just become indefinitely marked as unhealthy. This is
  deliberate until we figure out how we should manage deploying new
  versions of plugins/transitioning them.
This commit is contained in:
Danielle Lancashire
2019-10-22 15:20:26 +02:00
committed by Tim Gross
parent d8bff7e940
commit d296efd2c6
27 changed files with 2805 additions and 6 deletions

210
plugins/csi/client.go Normal file
View File

@@ -0,0 +1,210 @@
package csi
import (
"context"
"fmt"
"net"
"time"
csipbv1 "github.com/container-storage-interface/spec/lib/go/csi"
"github.com/hashicorp/nomad/plugins/base"
"github.com/hashicorp/nomad/plugins/shared/hclspec"
"google.golang.org/grpc"
)
type NodeGetInfoResponse struct {
NodeID string
MaxVolumes int64
AccessibleTopology *Topology
}
// Topology is a map of topological domains to topological segments.
// A topological domain is a sub-division of a cluster, like "region",
// "zone", "rack", etc.
//
// According to CSI, there are a few requirements for the keys within this map:
// - Valid keys have two segments: an OPTIONAL prefix and name, separated
// by a slash (/), for example: "com.company.example/zone".
// - The key name segment is REQUIRED. The prefix is OPTIONAL.
// - The key name MUST be 63 characters or less, begin and end with an
// alphanumeric character ([a-z0-9A-Z]), and contain only dashes (-),
// underscores (_), dots (.), or alphanumerics in between, for example
// "zone".
// - The key prefix MUST be 63 characters or less, begin and end with a
// lower-case alphanumeric character ([a-z0-9]), contain only
// dashes (-), dots (.), or lower-case alphanumerics in between, and
// follow domain name notation format
// (https://tools.ietf.org/html/rfc1035#section-2.3.1).
// - The key prefix SHOULD include the plugin's host company name and/or
// the plugin name, to minimize the possibility of collisions with keys
// from other plugins.
// - If a key prefix is specified, it MUST be identical across all
// topology keys returned by the SP (across all RPCs).
// - Keys MUST be case-insensitive. Meaning the keys "Zone" and "zone"
// MUST not both exist.
// - Each value (topological segment) MUST contain 1 or more strings.
// - Each string MUST be 63 characters or less and begin and end with an
// alphanumeric character with '-', '_', '.', or alphanumerics in
// between.
type Topology struct {
Segments map[string]string
}
type client struct {
conn *grpc.ClientConn
identityClient csipbv1.IdentityClient
controllerClient csipbv1.ControllerClient
nodeClient csipbv1.NodeClient
}
func (c *client) Close() error {
if c.conn != nil {
return c.conn.Close()
}
return nil
}
func NewClient(addr string) (CSIPlugin, error) {
if addr == "" {
return nil, fmt.Errorf("address is empty")
}
conn, err := newGrpcConn(addr)
if err != nil {
return nil, err
}
return &client{
conn: conn,
identityClient: csipbv1.NewIdentityClient(conn),
controllerClient: csipbv1.NewControllerClient(conn),
nodeClient: csipbv1.NewNodeClient(conn),
}, nil
}
func newGrpcConn(addr string) (*grpc.ClientConn, error) {
conn, err := grpc.Dial(
addr,
grpc.WithInsecure(),
grpc.WithDialer(func(target string, timeout time.Duration) (net.Conn, error) {
return net.DialTimeout("unix", target, timeout)
}),
)
if err != nil {
return nil, fmt.Errorf("failed to open grpc connection to addr: %s, err: %v", addr, err)
}
return conn, nil
}
// PluginInfo describes the type and version of a plugin as required by the nomad
// base.BasePlugin interface.
func (c *client) PluginInfo() (*base.PluginInfoResponse, error) {
name, err := c.PluginGetInfo(context.TODO())
if err != nil {
return nil, err
}
return &base.PluginInfoResponse{
Type: "csi",
PluginApiVersions: []string{"1.0.0"}, // TODO: fingerprint csi version
PluginVersion: "1.0.0", // TODO: get plugin version from somewhere?!
Name: name,
}, nil
}
// ConfigSchema returns the schema for parsing the plugins configuration as
// required by the base.BasePlugin interface. It will always return nil.
func (c *client) ConfigSchema() (*hclspec.Spec, error) {
return nil, nil
}
// SetConfig is used to set the configuration by passing a MessagePack
// encoding of it.
func (c *client) SetConfig(_ *base.Config) error {
return fmt.Errorf("unsupported")
}
func (c *client) PluginProbe(ctx context.Context) (bool, error) {
req, err := c.identityClient.Probe(ctx, &csipbv1.ProbeRequest{})
if err != nil {
return false, err
}
wrapper := req.GetReady()
// wrapper.GetValue() protects against wrapper being `nil`, and returns false.
ready := wrapper.GetValue()
if wrapper == nil {
// If the plugin returns a nil value for ready, then it should be
// interpreted as the plugin is ready for compatibility with plugins that
// do not do health checks.
ready = true
}
return ready, nil
}
func (c *client) PluginGetInfo(ctx context.Context) (string, error) {
if c == nil {
return "", fmt.Errorf("Client not initialized")
}
if c.identityClient == nil {
return "", fmt.Errorf("Client not initialized")
}
req, err := c.identityClient.GetPluginInfo(ctx, &csipbv1.GetPluginInfoRequest{})
if err != nil {
return "", err
}
name := req.GetName()
if name == "" {
return "", fmt.Errorf("PluginGetInfo: plugin returned empty name field")
}
return name, nil
}
func (c *client) PluginGetCapabilities(ctx context.Context) (*PluginCapabilitySet, error) {
if c == nil {
return nil, fmt.Errorf("Client not initialized")
}
if c.identityClient == nil {
return nil, fmt.Errorf("Client not initialized")
}
resp, err := c.identityClient.GetPluginCapabilities(ctx, &csipbv1.GetPluginCapabilitiesRequest{})
if err != nil {
return nil, err
}
return NewPluginCapabilitySet(resp), nil
}
func (c *client) NodeGetInfo(ctx context.Context) (*NodeGetInfoResponse, error) {
if c == nil {
return nil, fmt.Errorf("Client not initialized")
}
if c.nodeClient == nil {
return nil, fmt.Errorf("Client not initialized")
}
result := &NodeGetInfoResponse{}
resp, err := c.nodeClient.NodeGetInfo(ctx, &csipbv1.NodeGetInfoRequest{})
if err != nil {
return nil, err
}
if resp.GetNodeId() == "" {
return nil, fmt.Errorf("plugin failed to return nodeid")
}
result.NodeID = resp.GetNodeId()
result.MaxVolumes = resp.GetMaxVolumesPerNode()
return result, nil
}

191
plugins/csi/client_test.go Normal file
View File

@@ -0,0 +1,191 @@
package csi
import (
"context"
"fmt"
"testing"
csipbv1 "github.com/container-storage-interface/spec/lib/go/csi"
"github.com/golang/protobuf/ptypes/wrappers"
fake "github.com/hashicorp/nomad/plugins/csi/testing"
"github.com/stretchr/testify/require"
)
func newTestClient() (*fake.IdentityClient, CSIPlugin) {
ic := &fake.IdentityClient{}
client := &client{
identityClient: ic,
}
return ic, client
}
func TestClient_RPC_PluginProbe(t *testing.T) {
cases := []struct {
Name string
ResponseErr error
ProbeResponse *csipbv1.ProbeResponse
ExpectedResponse bool
ExpectedErr error
}{
{
Name: "handles underlying grpc errors",
ResponseErr: fmt.Errorf("some grpc error"),
ExpectedErr: fmt.Errorf("some grpc error"),
},
{
Name: "returns false for ready when the provider returns false",
ProbeResponse: &csipbv1.ProbeResponse{
Ready: &wrappers.BoolValue{Value: false},
},
ExpectedResponse: false,
},
{
Name: "returns true for ready when the provider returns true",
ProbeResponse: &csipbv1.ProbeResponse{
Ready: &wrappers.BoolValue{Value: true},
},
ExpectedResponse: true,
},
{
/* When a SP does not return a ready value, a CO MAY treat this as ready.
We do so because example plugins rely on this behaviour. We may
re-evaluate this decision in the future. */
Name: "returns true for ready when the provider returns a nil wrapper",
ProbeResponse: &csipbv1.ProbeResponse{
Ready: nil,
},
ExpectedResponse: true,
},
}
for _, c := range cases {
t.Run(c.Name, func(t *testing.T) {
ic, client := newTestClient()
defer client.Close()
ic.NextErr = c.ResponseErr
ic.NextPluginProbe = c.ProbeResponse
resp, err := client.PluginProbe(context.TODO())
if c.ExpectedErr != nil {
require.Error(t, c.ExpectedErr, err)
}
require.Equal(t, c.ExpectedResponse, resp)
})
}
}
func TestClient_RPC_PluginInfo(t *testing.T) {
cases := []struct {
Name string
ResponseErr error
InfoResponse *csipbv1.GetPluginInfoResponse
ExpectedResponse string
ExpectedErr error
}{
{
Name: "handles underlying grpc errors",
ResponseErr: fmt.Errorf("some grpc error"),
ExpectedErr: fmt.Errorf("some grpc error"),
},
{
Name: "returns an error if we receive an empty `name`",
InfoResponse: &csipbv1.GetPluginInfoResponse{
Name: "",
},
ExpectedErr: fmt.Errorf("PluginGetInfo: plugin returned empty name field"),
},
{
Name: "returns the name when successfully retrieved and not empty",
InfoResponse: &csipbv1.GetPluginInfoResponse{
Name: "com.hashicorp.storage",
},
ExpectedResponse: "com.hashicorp.storage",
},
}
for _, c := range cases {
t.Run(c.Name, func(t *testing.T) {
ic, client := newTestClient()
defer client.Close()
ic.NextErr = c.ResponseErr
ic.NextPluginInfo = c.InfoResponse
resp, err := client.PluginGetInfo(context.TODO())
if c.ExpectedErr != nil {
require.Error(t, c.ExpectedErr, err)
}
require.Equal(t, c.ExpectedResponse, resp)
})
}
}
func TestClient_RPC_PluginGetCapabilities(t *testing.T) {
cases := []struct {
Name string
ResponseErr error
Response *csipbv1.GetPluginCapabilitiesResponse
ExpectedResponse *PluginCapabilitySet
ExpectedErr error
}{
{
Name: "handles underlying grpc errors",
ResponseErr: fmt.Errorf("some grpc error"),
ExpectedErr: fmt.Errorf("some grpc error"),
},
{
Name: "HasControllerService is true when it's part of the response",
Response: &csipbv1.GetPluginCapabilitiesResponse{
Capabilities: []*csipbv1.PluginCapability{
{
Type: &csipbv1.PluginCapability_Service_{
Service: &csipbv1.PluginCapability_Service{
Type: csipbv1.PluginCapability_Service_CONTROLLER_SERVICE,
},
},
},
},
},
ExpectedResponse: &PluginCapabilitySet{hasControllerService: true},
},
{
Name: "HasTopologies is true when it's part of the response",
Response: &csipbv1.GetPluginCapabilitiesResponse{
Capabilities: []*csipbv1.PluginCapability{
{
Type: &csipbv1.PluginCapability_Service_{
Service: &csipbv1.PluginCapability_Service{
Type: csipbv1.PluginCapability_Service_VOLUME_ACCESSIBILITY_CONSTRAINTS,
},
},
},
},
},
ExpectedResponse: &PluginCapabilitySet{hasTopologies: true},
},
}
for _, c := range cases {
t.Run(c.Name, func(t *testing.T) {
ic, client := newTestClient()
defer client.Close()
ic.NextErr = c.ResponseErr
ic.NextPluginCapabilities = c.Response
resp, err := client.PluginGetCapabilities(context.TODO())
if c.ExpectedErr != nil {
require.Error(t, c.ExpectedErr, err)
}
require.Equal(t, c.ExpectedResponse, resp)
})
}
}

112
plugins/csi/fake/client.go Normal file
View File

@@ -0,0 +1,112 @@
// fake is a package that includes fake implementations of public interfaces
// from the CSI package for testing.
package fake
import (
"context"
"errors"
"sync"
"github.com/hashicorp/nomad/plugins/base"
"github.com/hashicorp/nomad/plugins/csi"
"github.com/hashicorp/nomad/plugins/shared/hclspec"
)
var _ csi.CSIPlugin = &Client{}
// Client is a mock implementation of the csi.CSIPlugin interface for use in testing
// external components
type Client struct {
Mu sync.RWMutex
NextPluginInfoResponse *base.PluginInfoResponse
NextPluginInfoErr error
PluginInfoCallCount int64
NextPluginProbeResponse bool
NextPluginProbeErr error
PluginProbeCallCount int64
NextPluginGetInfoResponse string
NextPluginGetInfoErr error
PluginGetInfoCallCount int64
NextPluginGetCapabilitiesResponse *csi.PluginCapabilitySet
NextPluginGetCapabilitiesErr error
PluginGetCapabilitiesCallCount int64
NextNodeGetInfoResponse *csi.NodeGetInfoResponse
NextNodeGetInfoErr error
NodeGetInfoCallCount int64
}
// PluginInfo describes the type and version of a plugin.
func (c *Client) PluginInfo() (*base.PluginInfoResponse, error) {
c.Mu.Lock()
defer c.Mu.Unlock()
c.PluginInfoCallCount++
return c.NextPluginInfoResponse, c.NextPluginInfoErr
}
// ConfigSchema returns the schema for parsing the plugins configuration.
func (c *Client) ConfigSchema() (*hclspec.Spec, error) {
return nil, errors.New("Unsupported")
}
// SetConfig is used to set the configuration by passing a MessagePack
// encoding of it.
func (c *Client) SetConfig(a *base.Config) error {
return errors.New("Unsupported")
}
// PluginProbe is used to verify that the plugin is in a healthy state
func (c *Client) PluginProbe(ctx context.Context) (bool, error) {
c.Mu.Lock()
defer c.Mu.Unlock()
c.PluginProbeCallCount++
return c.NextPluginProbeResponse, c.NextPluginProbeErr
}
// PluginGetInfo is used to return semantic data about the plugin.
// Response:
// - string: name, the name of the plugin in domain notation format.
func (c *Client) PluginGetInfo(ctx context.Context) (string, error) {
c.Mu.Lock()
defer c.Mu.Unlock()
c.PluginGetInfoCallCount++
return c.NextPluginGetInfoResponse, c.NextPluginGetInfoErr
}
// PluginGetCapabilities is used to return the available capabilities from the
// identity service. This currently only looks for the CONTROLLER_SERVICE and
// Accessible Topology Support
func (c *Client) PluginGetCapabilities(ctx context.Context) (*csi.PluginCapabilitySet, error) {
c.Mu.Lock()
defer c.Mu.Unlock()
c.PluginGetCapabilitiesCallCount++
return c.NextPluginGetCapabilitiesResponse, c.NextPluginGetCapabilitiesErr
}
// NodeGetInfo is used to return semantic data about the current node in
// respect to the SP.
func (c *Client) NodeGetInfo(ctx context.Context) (*csi.NodeGetInfoResponse, error) {
c.Mu.Lock()
defer c.Mu.Unlock()
c.NodeGetInfoCallCount++
return c.NextNodeGetInfoResponse, c.NextNodeGetInfoErr
}
// Shutdown the client and ensure any connections are cleaned up.
func (c *Client) Close() error {
return nil
}

85
plugins/csi/plugin.go Normal file
View File

@@ -0,0 +1,85 @@
package csi
import (
"context"
csipbv1 "github.com/container-storage-interface/spec/lib/go/csi"
"github.com/hashicorp/nomad/plugins/base"
)
// CSIPlugin implements a lightweight abstraction layer around a CSI Plugin.
// It validates that responses from storage providers (SP's), correctly conform
// to the specification before returning response data or erroring.
type CSIPlugin interface {
base.BasePlugin
// PluginProbe is used to verify that the plugin is in a healthy state
PluginProbe(ctx context.Context) (bool, error)
// PluginGetInfo is used to return semantic data about the plugin.
// Response:
// - string: name, the name of the plugin in domain notation format.
PluginGetInfo(ctx context.Context) (string, error)
// PluginGetCapabilities is used to return the available capabilities from the
// identity service. This currently only looks for the CONTROLLER_SERVICE and
// Accessible Topology Support
PluginGetCapabilities(ctx context.Context) (*PluginCapabilitySet, error)
// NodeGetInfo is used to return semantic data about the current node in
// respect to the SP.
NodeGetInfo(ctx context.Context) (*NodeGetInfoResponse, error)
// Shutdown the client and ensure any connections are cleaned up.
Close() error
}
type PluginCapabilitySet struct {
hasControllerService bool
hasTopologies bool
}
func (p *PluginCapabilitySet) HasControllerService() bool {
return p.hasControllerService
}
// HasTopologies indicates whether the volumes for this plugin are equally
// accessible by all nodes in the cluster.
// If true, we MUST use the topology information when scheduling workloads.
func (p *PluginCapabilitySet) HasToplogies() bool {
return p.hasTopologies
}
func (p *PluginCapabilitySet) IsEqual(o *PluginCapabilitySet) bool {
return p.hasControllerService == o.hasControllerService && p.hasTopologies == o.hasTopologies
}
func NewTestPluginCapabilitySet(topologies, controller bool) *PluginCapabilitySet {
return &PluginCapabilitySet{
hasTopologies: topologies,
hasControllerService: controller,
}
}
func NewPluginCapabilitySet(capabilities *csipbv1.GetPluginCapabilitiesResponse) *PluginCapabilitySet {
cs := &PluginCapabilitySet{}
pluginCapabilities := capabilities.GetCapabilities()
for _, pcap := range pluginCapabilities {
if svcCap := pcap.GetService(); svcCap != nil {
switch svcCap.Type {
case csipbv1.PluginCapability_Service_UNKNOWN:
continue
case csipbv1.PluginCapability_Service_CONTROLLER_SERVICE:
cs.hasControllerService = true
case csipbv1.PluginCapability_Service_VOLUME_ACCESSIBILITY_CONSTRAINTS:
cs.hasTopologies = true
default:
continue
}
}
}
return cs
}

View File

@@ -0,0 +1,43 @@
package testing
import (
"context"
csipbv1 "github.com/container-storage-interface/spec/lib/go/csi"
"google.golang.org/grpc"
)
// IdentityClient is a CSI identity client used for testing
type IdentityClient struct {
NextErr error
NextPluginInfo *csipbv1.GetPluginInfoResponse
NextPluginCapabilities *csipbv1.GetPluginCapabilitiesResponse
NextPluginProbe *csipbv1.ProbeResponse
}
// NewIdentityClient returns a new IdentityClient
func NewIdentityClient() *IdentityClient {
return &IdentityClient{}
}
func (f *IdentityClient) Reset() {
f.NextErr = nil
f.NextPluginInfo = nil
f.NextPluginCapabilities = nil
f.NextPluginProbe = nil
}
// GetPluginInfo returns plugin info
func (f *IdentityClient) GetPluginInfo(ctx context.Context, in *csipbv1.GetPluginInfoRequest, opts ...grpc.CallOption) (*csipbv1.GetPluginInfoResponse, error) {
return f.NextPluginInfo, f.NextErr
}
// GetPluginCapabilities implements csi method
func (f *IdentityClient) GetPluginCapabilities(ctx context.Context, in *csipbv1.GetPluginCapabilitiesRequest, opts ...grpc.CallOption) (*csipbv1.GetPluginCapabilitiesResponse, error) {
return f.NextPluginCapabilities, f.NextErr
}
// Probe implements csi method
func (f *IdentityClient) Probe(ctx context.Context, in *csipbv1.ProbeRequest, opts ...grpc.CallOption) (*csipbv1.ProbeResponse, error) {
return f.NextPluginProbe, f.NextErr
}