From 044784b2fbfb87e9ab85521f8faa46f5ccbb684a Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 15 Jan 2025 14:04:18 -0500 Subject: [PATCH] dynamic host volumes: move node pool governance to placement filter (CE) (#24867) Enterprise governance checks happen after dynamic host volumes are placed, so if node pool governance is active and you don't set a node pool or node ID for a volume, it's possible to get a placement that fails node pool governance even though there might be other nodes in the cluster that would be valid placements. Move the node pool governance for host volumes into the placement path, so that we're checking a specific node pool when node pool or node ID are set, but otherwise filtering out candidate nodes by node pool. This changset is the CE version of ENT/2200. Ref: https://hashicorp.atlassian.net/browse/NET-11549 Ref: https://github.com/hashicorp/nomad-enterprise/pull/2200 --- nomad/host_volume_endpoint.go | 27 +++++++++++++++++++++++++-- nomad/host_volume_endpoint_ce.go | 8 +++++++- nomad/host_volume_endpoint_test.go | 10 +++++----- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/nomad/host_volume_endpoint.go b/nomad/host_volume_endpoint.go index fcdd8c887..d92945855 100644 --- a/nomad/host_volume_endpoint.go +++ b/nomad/host_volume_endpoint.go @@ -508,9 +508,17 @@ func (v *HostVolume) placeHostVolume(snap *state.StateSnapshot, vol *structs.Hos return node, nil } + poolFilterFn, err := v.enterpriseNodePoolFilter(snap, vol) + if err != nil { + return nil, err + } + var iter memdb.ResultIterator - var err error if vol.NodePool != "" { + if !poolFilterFn(vol.NodePool) { + return nil, fmt.Errorf("namespace %q does not allow volumes to use node pool %q", + vol.Namespace, vol.NodePool) + } iter, err = snap.NodesByNodePool(nil, vol.NodePool) } else { iter, err = snap.Nodes(nil) @@ -532,6 +540,12 @@ func (v *HostVolume) placeHostVolume(snap *state.StateSnapshot, vol *structs.Hos constraints = append(constraints, vol.Constraints...) checker = scheduler.NewConstraintChecker(ctx, constraints) + var ( + filteredByExisting int + filteredByGovernance int + filteredByFeasibility int + ) + for { raw := iter.Next() if raw == nil { @@ -544,11 +558,18 @@ func (v *HostVolume) placeHostVolume(snap *state.StateSnapshot, vol *structs.Hos // haven't yet written to state. The client will reject requests to // create/register a volume with the same name with a different ID. if _, hasVol := candidate.HostVolumes[vol.Name]; hasVol { + filteredByExisting++ + continue + } + + if !poolFilterFn(candidate.NodePool) { + filteredByGovernance++ continue } if checker != nil { if ok := checker.Feasible(candidate); !ok { + filteredByFeasibility++ continue } } @@ -559,7 +580,9 @@ func (v *HostVolume) placeHostVolume(snap *state.StateSnapshot, vol *structs.Hos } - return nil, fmt.Errorf("no node meets constraints") + return nil, fmt.Errorf( + "no node meets constraints: %d nodes had existing volume, %d nodes filtered by node pool governance, %d nodes were infeasible", + filteredByExisting, filteredByGovernance, filteredByFeasibility) } // placementContext implements the scheduler.ConstraintContext interface, a diff --git a/nomad/host_volume_endpoint_ce.go b/nomad/host_volume_endpoint_ce.go index 756df5f42..982ffd121 100644 --- a/nomad/host_volume_endpoint_ce.go +++ b/nomad/host_volume_endpoint_ce.go @@ -12,7 +12,7 @@ import ( ) // enforceEnterprisePolicy is the CE stub for Enterprise governance via -// Sentinel policy, quotas, and node pools +// Sentinel policy and quotas func (v *HostVolume) enforceEnterprisePolicy( _ *state.StateSnapshot, _ *structs.HostVolume, @@ -21,3 +21,9 @@ func (v *HostVolume) enforceEnterprisePolicy( ) (error, error) { return nil, nil } + +// enterpriseNodePoolFilter is the CE stub for filtering nodes during placement +// via Enterprise node pool governance. +func (v *HostVolume) enterpriseNodePoolFilter(_ *state.StateSnapshot, _ *structs.HostVolume) (func(string) bool, error) { + return func(_ string) bool { return true }, nil +} diff --git a/nomad/host_volume_endpoint_test.go b/nomad/host_volume_endpoint_test.go index ae5bf08c9..224b62a39 100644 --- a/nomad/host_volume_endpoint_test.go +++ b/nomad/host_volume_endpoint_test.go @@ -168,12 +168,12 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { var resp structs.HostVolumeCreateResponse req.AuthToken = token err := msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) - must.EqError(t, err, `could not place volume "example1": no node meets constraints`) + must.EqError(t, err, `could not place volume "example1": no node meets constraints: 0 nodes had existing volume, 0 nodes filtered by node pool governance, 1 nodes were infeasible`) req.Volume = vol2.Copy() resp = structs.HostVolumeCreateResponse{} err = msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) - must.EqError(t, err, `could not place volume "example2": no node meets constraints`) + must.EqError(t, err, `could not place volume "example2": no node meets constraints: 0 nodes had existing volume, 0 nodes filtered by node pool governance, 1 nodes were infeasible`) }) t.Run("valid create", func(t *testing.T) { @@ -725,12 +725,12 @@ func TestHostVolumeEndpoint_placeVolume(t *testing.T) { Operand: "=", }, }}, - expectErr: "no node meets constraints", + expectErr: "no node meets constraints: 0 nodes had existing volume, 0 nodes filtered by node pool governance, 4 nodes were infeasible", }, { name: "no matching plugin", vol: &structs.HostVolume{PluginID: "not-mkdir"}, - expectErr: "no node meets constraints", + expectErr: "no node meets constraints: 0 nodes had existing volume, 0 nodes filtered by node pool governance, 4 nodes were infeasible", }, { name: "match already has a volume with the same name", @@ -744,7 +744,7 @@ func TestHostVolumeEndpoint_placeVolume(t *testing.T) { Operand: "=", }, }}, - expectErr: "no node meets constraints", + expectErr: "no node meets constraints: 1 nodes had existing volume, 0 nodes filtered by node pool governance, 3 nodes were infeasible", }, }