namespaces: warn if deleting namespaces that have allocs, volumes, vars or quotas associated with them (#23499)

This commit is contained in:
Piotr Kazmierczak
2024-07-09 17:10:09 +02:00
committed by GitHub
parent 5bce4b531e
commit 75a1df3390
4 changed files with 280 additions and 47 deletions

3
.changelog/23499.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:improvement
namespaces: Added warnings if deleting namespaces that have existing objects associated with them
```

View File

@@ -113,15 +113,40 @@ func (n *Namespace) DeleteNamespaces(args *structs.NamespaceDeleteRequest, reply
}
}
// Check that the deleting namespaces do not have non-terminal jobs in both
// this region and all federated regions
// snapshot the state once, because we'll be doing many checks and want
// consistend state
snap, err := n.srv.fsm.State().Snapshot()
if err != nil {
return err
}
var mErr multierror.Error
for _, ns := range args.Namespaces {
nonTerminal, err := n.nonTerminalNamespaces(args.AuthToken, ns)
if err != nil {
// make sure this namespace exists before we start making costly checks
exists, _ := snap.NamespaceByName(nil, ns)
if exists == nil {
continue
}
// do a check across jobs, allocations, volumes and variables to make sure we're
// not leaving any objects associated with the namespace hanging
type objectCheck struct {
localCheckFunc func(string, *state.StateSnapshot) (bool, error)
remoteCheckFunc func(string, string, string) (bool, error)
errorMsg string
}
objects := []objectCheck{
{n.namespaceTerminalJobsLocally, n.namespaceTerminalJobsInRegion, "namespace %q has non-terminal jobs in regions: %v"},
{n.namespaceTerminalAllocsLocally, n.namespaceTerminalAllocsInRegion, "namespace %q has non-terminal allocations in regions: %v"},
{n.namespaceNoAssociatedVolumesLocally, n.namespaceNoAssociatedVolumesInRegion, "namespace %q has volumes associated with it in regions: %v"},
{n.namespaceNoAssociatedVarsLocally, n.namespaceNoAssociatedVarsInRegion, "namespace %q has variables associated with it in regions: %v"},
{n.namespaceNoAssociatedQuotasLocally, nil, "namespace %q has quotas associated with it: %v"},
}
for _, object := range objects {
if err := n.nonTerminalObjectsInNS(args.AuthToken, ns, snap, object.localCheckFunc, object.remoteCheckFunc, object.errorMsg); err != nil {
_ = multierror.Append(&mErr, err)
} else if len(nonTerminal) != 0 {
_ = multierror.Append(&mErr, fmt.Errorf("namespace %q has non-terminal jobs in regions: %v", ns, nonTerminal))
}
}
}
@@ -140,18 +165,24 @@ func (n *Namespace) DeleteNamespaces(args *structs.NamespaceDeleteRequest, reply
return nil
}
// nonTerminalNamespaces returns whether the set of regions in which the
// namespaces contains non-terminal jobs, checking all federated regions
// including this one.
func (n *Namespace) nonTerminalNamespaces(authToken, namespace string) ([]string, error) {
// nonTerminalJobsInNS returns whether the set of regions in which the
// namespaces contains non-terminal jobs, allocations, volumes or other objects
// associated with the namespace, checking all federated regions including this
// one.
func (n *Namespace) nonTerminalObjectsInNS(
authToken, namespace string,
snap *state.StateSnapshot,
localCheckFunc func(string, *state.StateSnapshot) (bool, error),
remoteCheckFunc func(string, string, string) (bool, error),
errorMsg string,
) error {
regions := n.srv.Regions()
thisRegion := n.srv.Region()
terminal := make([]string, 0, len(regions))
// Check if this region is terminal
localTerminal, err := n.namespaceTerminalLocally(namespace)
localTerminal, err := localCheckFunc(namespace, snap)
if err != nil {
return nil, err
return err
}
if !localTerminal {
terminal = append(terminal, thisRegion)
@@ -162,31 +193,31 @@ func (n *Namespace) nonTerminalNamespaces(authToken, namespace string) ([]string
continue
}
remoteTerminal, err := n.namespaceTerminalInRegion(authToken, namespace, region)
if remoteCheckFunc != nil {
remoteTerminal, err := remoteCheckFunc(authToken, namespace, region)
if err != nil {
return nil, err
return err
}
if !remoteTerminal {
terminal = append(terminal, region)
}
}
return terminal, nil
}
// namespaceTerminalLocally returns if the namespace contains only terminal jobs
// in the local region .
func (n *Namespace) namespaceTerminalLocally(namespace string) (bool, error) {
snap, err := n.srv.fsm.State().Snapshot()
if err != nil {
return false, err
}
if len(terminal) != 0 {
return fmt.Errorf(errorMsg, namespace, terminal)
}
return nil
}
// namespaceTerminalJobsLocally returns true if the namespace contains only
// terminal jobs in the local region.
func (n *Namespace) namespaceTerminalJobsLocally(namespace string, snap *state.StateSnapshot) (bool, error) {
iter, err := snap.JobsByNamespace(nil, namespace, state.SortDefault)
if err != nil {
return false, err
}
for {
raw := iter.Next()
if raw == nil {
@@ -202,10 +233,91 @@ func (n *Namespace) namespaceTerminalLocally(namespace string) (bool, error) {
return true, nil
}
// namespaceTerminalInRegion returns if the namespace contains only terminal
// jobs in the given region .
func (n *Namespace) namespaceTerminalInRegion(authToken, namespace, region string) (bool, error) {
req := &structs.JobListRequest{
// namespaceTerminalAllocsLocally returns true if the namespace contains only
// terminal allocations in the local region.
func (n *Namespace) namespaceTerminalAllocsLocally(namespace string, snap *state.StateSnapshot) (bool, error) {
iter, err := snap.AllocsByNamespace(nil, namespace)
if err != nil {
return false, err
}
for {
raw := iter.Next()
if raw == nil {
break
}
alloc := raw.(*structs.Allocation)
if !alloc.ClientTerminalStatus() {
return false, nil
}
}
return true, nil
}
// namespaceNoAssociatedVolumesLocally returns true if there are no CSI volumes
// associated with this namespace in the local region
func (n *Namespace) namespaceNoAssociatedVolumesLocally(namespace string, snap *state.StateSnapshot) (bool, error) {
iter, err := snap.CSIVolumesByNamespace(nil, namespace, "")
if err != nil {
return false, err
}
for {
raw := iter.Next()
if raw == nil {
break
}
vol := raw.(*structs.CSIVolume)
if vol.Namespace == namespace {
return false, nil
}
}
return true, nil
}
// namespaceNoAssociatedVarsLocally returns true if there are no variables
// associated with this namespace in the local region
func (n *Namespace) namespaceNoAssociatedVarsLocally(namespace string, snap *state.StateSnapshot) (bool, error) {
// check for variables
iter, err := snap.GetVariablesByNamespace(nil, namespace)
if err != nil {
return false, err
}
for {
raw := iter.Next()
if raw == nil {
break
}
v := raw.(*structs.VariableEncrypted)
if v.VariableMetadata.Namespace == namespace {
return false, nil
}
}
return true, nil
}
// namespaceNoAssociatedQuotasLocally returns true if there are no quotas
// associated with this namespace in the local region
func (n *Namespace) namespaceNoAssociatedQuotasLocally(namespace string, snap *state.StateSnapshot) (bool, error) {
ns, _ := snap.NamespaceByName(nil, namespace)
if ns == nil {
return false, fmt.Errorf("namespace %s does not exist", ns.Name)
}
if ns.Quota != "" {
return false, nil
}
return true, nil
}
// namespaceTerminalJobsInRegion returns true if the namespace contains only
// terminal jobs in the given region.
func (n *Namespace) namespaceTerminalJobsInRegion(authToken, namespace, region string) (bool, error) {
jobReq := &structs.JobListRequest{
QueryOptions: structs.QueryOptions{
Region: region,
Namespace: namespace,
@@ -214,20 +326,103 @@ func (n *Namespace) namespaceTerminalInRegion(authToken, namespace, region strin
},
}
var resp structs.JobListResponse
done, err := n.srv.forward("Job.List", req, req, &resp)
var jobResp structs.JobListResponse
done, err := n.srv.forward("Job.List", jobReq, jobReq, &jobResp)
if !done {
return false, fmt.Errorf("unexpectedly did not forward Job.List to region %q", region)
} else if err != nil {
return false, err
}
for _, job := range resp.Jobs {
for _, job := range jobResp.Jobs {
if job.Status != structs.JobStatusDead {
return false, nil
}
}
return true, nil
}
// namespaceTerminalAllocsInRegion returns true if the namespace contains only
// terminal allocations in the given region.
func (n *Namespace) namespaceTerminalAllocsInRegion(authToken, namespace, region string) (bool, error) {
allocReq := &structs.AllocListRequest{
QueryOptions: structs.QueryOptions{
Region: region,
Namespace: namespace,
AllowStale: false,
AuthToken: authToken,
},
}
var allocResp structs.AllocListResponse
done, err := n.srv.forward("Alloc.List", allocReq, allocReq, &allocResp)
if !done {
return false, fmt.Errorf("unexpectedly did not forward Alloc.List to region %q", region)
} else if err != nil {
return false, err
}
for _, alloc := range allocResp.Allocations {
if !alloc.ClientTerminalStatus() {
return false, nil
}
}
return true, nil
}
// namespaceNoAssociatedVolumesInRegion returns true if there are no volumes
// associated with the namespace in the given region.
func (n *Namespace) namespaceNoAssociatedVolumesInRegion(authToken, namespace, region string) (bool, error) {
volumesReq := &structs.CSIVolumeListRequest{
QueryOptions: structs.QueryOptions{
Region: region,
Namespace: namespace,
AllowStale: false,
AuthToken: authToken,
},
}
var volumesResp structs.CSIVolumeListResponse
done, err := n.srv.forward("CSIVolume.List", volumesReq, volumesReq, &volumesResp)
if !done {
return false, fmt.Errorf("unexpectedly did not forward CSIVolume.List to region %q", region)
} else if err != nil {
return false, err
}
for _, volume := range volumesResp.Volumes {
if volume.Namespace == namespace {
return false, nil
}
}
return true, nil
}
// namespaceNoAssociatedVarsInRegion returns true if there are no variables
// associated with the namespace in the given region.
func (n *Namespace) namespaceNoAssociatedVarsInRegion(authToken, namespace, region string) (bool, error) {
varReq := &structs.VariablesListRequest{
QueryOptions: structs.QueryOptions{
Region: region,
Namespace: namespace,
AllowStale: false,
AuthToken: authToken,
},
}
var varResp structs.VariablesListResponse
done, err := n.srv.forward("Variables.List", varReq, varReq, &varResp)
if !done {
return false, fmt.Errorf("unexpectedly did not forward Variables.List to region %q", region)
} else if err != nil {
return false, err
}
for _, v := range varResp.Data {
if v.Namespace == namespace {
return false, nil
}
}
return true, nil
}

View File

@@ -15,6 +15,7 @@ import (
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/assert"
)
@@ -488,7 +489,6 @@ func TestNamespaceEndpoint_DeleteNamespaces(t *testing.T) {
func TestNamespaceEndpoint_DeleteNamespaces_NonTerminal_Local(t *testing.T) {
ci.Parallel(t)
assert := assert.New(t)
s1, cleanupS1 := TestServer(t, nil)
defer cleanupS1()
codec := rpcClient(t, s1)
@@ -502,7 +502,7 @@ func TestNamespaceEndpoint_DeleteNamespaces_NonTerminal_Local(t *testing.T) {
// Create a job in one
j := mock.Job()
j.Namespace = ns1.Name
assert.Nil(s1.fsm.State().UpsertJob(structs.MsgTypeTestSetup, 1001, nil, j))
must.Nil(t, s1.fsm.State().UpsertJob(structs.MsgTypeTestSetup, 1001, nil, j))
// Lookup the namespaces
req := &structs.NamespaceDeleteRequest{
@@ -511,9 +511,36 @@ func TestNamespaceEndpoint_DeleteNamespaces_NonTerminal_Local(t *testing.T) {
}
var resp structs.GenericResponse
err := msgpackrpc.CallWithCodec(codec, "Namespace.DeleteNamespaces", req, &resp)
if assert.NotNil(err) {
assert.Contains(err.Error(), "has non-terminal jobs")
must.NotNil(t, err)
must.StrContains(t, err.Error(), "has non-terminal jobs")
}
func TestNamespaceEndpoint_DeleteNamespaces_NoAssociatedVolumes_Local(t *testing.T) {
ci.Parallel(t)
s1, cleanupS1 := TestServer(t, nil)
defer cleanupS1()
codec := rpcClient(t, s1)
testutil.WaitForLeader(t, s1.RPC)
// Create the register request
ns1 := mock.Namespace()
ns2 := mock.Namespace()
s1.fsm.State().UpsertNamespaces(1000, []*structs.Namespace{ns1, ns2})
// Create a volume in one
vol := mock.CSIVolume(mock.CSIPlugin())
vol.Namespace = ns1.Name
must.Nil(t, s1.fsm.State().UpsertCSIVolume(1001, []*structs.CSIVolume{vol}))
// Lookup the namespaces
req := &structs.NamespaceDeleteRequest{
Namespaces: []string{ns1.Name, ns2.Name},
WriteRequest: structs.WriteRequest{Region: "global"},
}
var resp structs.GenericResponse
err := msgpackrpc.CallWithCodec(codec, "Namespace.DeleteNamespaces", req, &resp)
must.NotNil(t, err)
must.StrContains(t, err.Error(), "has volumes associated with it")
}
func TestNamespaceEndpoint_DeleteNamespaces_NonTerminal_Federated_ACL(t *testing.T) {

View File

@@ -10809,6 +10809,14 @@ const (
AllocClientStatusUnknown = "unknown"
)
// terminalAllocationStatuses lists allocation statutes that we consider
// terminal
var terminalAllocationStatuses = []string{
AllocClientStatusComplete,
AllocClientStatusFailed,
AllocClientStatusLost,
}
// Allocation is used to allocate the placement of a task group to a node.
type Allocation struct {
// msgpack omit empty fields during serialization
@@ -11126,12 +11134,7 @@ func (a *Allocation) ServerTerminalStatus() bool {
// ClientTerminalStatus returns if the client status is terminal and will no longer transition
func (a *Allocation) ClientTerminalStatus() bool {
switch a.ClientStatus {
case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost:
return true
default:
return false
}
return slices.Contains(terminalAllocationStatuses, a.ClientStatus)
}
// ShouldReschedule returns if the allocation is eligible to be rescheduled according
@@ -11832,6 +11835,11 @@ func (a *AllocListStub) RescheduleEligible(reschedulePolicy *ReschedulePolicy, f
return a.RescheduleTracker.RescheduleEligible(reschedulePolicy, failTime)
}
// ClientTerminalStatus returns if the client status is terminal and will no longer transition
func (a *AllocListStub) ClientTerminalStatus() bool {
return slices.Contains(terminalAllocationStatuses, a.ClientStatus)
}
func setDisplayMsg(taskStates map[string]*TaskState) {
for _, taskState := range taskStates {
for _, event := range taskState.Events {