diff --git a/.changelog/23499.txt b/.changelog/23499.txt new file mode 100644 index 000000000..3adf985c8 --- /dev/null +++ b/.changelog/23499.txt @@ -0,0 +1,3 @@ +```release-note:improvement +namespaces: Added warnings if deleting namespaces that have existing objects associated with them +``` diff --git a/nomad/namespace_endpoint.go b/nomad/namespace_endpoint.go index 9061107e6..61452345b 100644 --- a/nomad/namespace_endpoint.go +++ b/nomad/namespace_endpoint.go @@ -113,15 +113,40 @@ func (n *Namespace) DeleteNamespaces(args *structs.NamespaceDeleteRequest, reply } } - // Check that the deleting namespaces do not have non-terminal jobs in both - // this region and all federated regions + // snapshot the state once, because we'll be doing many checks and want + // consistend state + snap, err := n.srv.fsm.State().Snapshot() + if err != nil { + return err + } + var mErr multierror.Error for _, ns := range args.Namespaces { - nonTerminal, err := n.nonTerminalNamespaces(args.AuthToken, ns) - if err != nil { - _ = multierror.Append(&mErr, err) - } else if len(nonTerminal) != 0 { - _ = multierror.Append(&mErr, fmt.Errorf("namespace %q has non-terminal jobs in regions: %v", ns, nonTerminal)) + // make sure this namespace exists before we start making costly checks + exists, _ := snap.NamespaceByName(nil, ns) + if exists == nil { + continue + } + + // do a check across jobs, allocations, volumes and variables to make sure we're + // not leaving any objects associated with the namespace hanging + type objectCheck struct { + localCheckFunc func(string, *state.StateSnapshot) (bool, error) + remoteCheckFunc func(string, string, string) (bool, error) + errorMsg string + } + objects := []objectCheck{ + {n.namespaceTerminalJobsLocally, n.namespaceTerminalJobsInRegion, "namespace %q has non-terminal jobs in regions: %v"}, + {n.namespaceTerminalAllocsLocally, n.namespaceTerminalAllocsInRegion, "namespace %q has non-terminal allocations in regions: %v"}, + {n.namespaceNoAssociatedVolumesLocally, n.namespaceNoAssociatedVolumesInRegion, "namespace %q has volumes associated with it in regions: %v"}, + {n.namespaceNoAssociatedVarsLocally, n.namespaceNoAssociatedVarsInRegion, "namespace %q has variables associated with it in regions: %v"}, + {n.namespaceNoAssociatedQuotasLocally, nil, "namespace %q has quotas associated with it: %v"}, + } + + for _, object := range objects { + if err := n.nonTerminalObjectsInNS(args.AuthToken, ns, snap, object.localCheckFunc, object.remoteCheckFunc, object.errorMsg); err != nil { + _ = multierror.Append(&mErr, err) + } } } @@ -140,18 +165,24 @@ func (n *Namespace) DeleteNamespaces(args *structs.NamespaceDeleteRequest, reply return nil } -// nonTerminalNamespaces returns whether the set of regions in which the -// namespaces contains non-terminal jobs, checking all federated regions -// including this one. -func (n *Namespace) nonTerminalNamespaces(authToken, namespace string) ([]string, error) { +// nonTerminalJobsInNS returns whether the set of regions in which the +// namespaces contains non-terminal jobs, allocations, volumes or other objects +// associated with the namespace, checking all federated regions including this +// one. +func (n *Namespace) nonTerminalObjectsInNS( + authToken, namespace string, + snap *state.StateSnapshot, + localCheckFunc func(string, *state.StateSnapshot) (bool, error), + remoteCheckFunc func(string, string, string) (bool, error), + errorMsg string, +) error { regions := n.srv.Regions() thisRegion := n.srv.Region() terminal := make([]string, 0, len(regions)) - // Check if this region is terminal - localTerminal, err := n.namespaceTerminalLocally(namespace) + localTerminal, err := localCheckFunc(namespace, snap) if err != nil { - return nil, err + return err } if !localTerminal { terminal = append(terminal, thisRegion) @@ -162,31 +193,31 @@ func (n *Namespace) nonTerminalNamespaces(authToken, namespace string) ([]string continue } - remoteTerminal, err := n.namespaceTerminalInRegion(authToken, namespace, region) - if err != nil { - return nil, err - } - if !remoteTerminal { - terminal = append(terminal, region) + if remoteCheckFunc != nil { + remoteTerminal, err := remoteCheckFunc(authToken, namespace, region) + if err != nil { + return err + } + if !remoteTerminal { + terminal = append(terminal, region) + } } } - return terminal, nil + if len(terminal) != 0 { + return fmt.Errorf(errorMsg, namespace, terminal) + } + + return nil } -// namespaceTerminalLocally returns if the namespace contains only terminal jobs -// in the local region . -func (n *Namespace) namespaceTerminalLocally(namespace string) (bool, error) { - snap, err := n.srv.fsm.State().Snapshot() - if err != nil { - return false, err - } - +// namespaceTerminalJobsLocally returns true if the namespace contains only +// terminal jobs in the local region. +func (n *Namespace) namespaceTerminalJobsLocally(namespace string, snap *state.StateSnapshot) (bool, error) { iter, err := snap.JobsByNamespace(nil, namespace, state.SortDefault) if err != nil { return false, err } - for { raw := iter.Next() if raw == nil { @@ -202,10 +233,91 @@ func (n *Namespace) namespaceTerminalLocally(namespace string) (bool, error) { return true, nil } -// namespaceTerminalInRegion returns if the namespace contains only terminal -// jobs in the given region . -func (n *Namespace) namespaceTerminalInRegion(authToken, namespace, region string) (bool, error) { - req := &structs.JobListRequest{ +// namespaceTerminalAllocsLocally returns true if the namespace contains only +// terminal allocations in the local region. +func (n *Namespace) namespaceTerminalAllocsLocally(namespace string, snap *state.StateSnapshot) (bool, error) { + iter, err := snap.AllocsByNamespace(nil, namespace) + if err != nil { + return false, err + } + for { + raw := iter.Next() + if raw == nil { + break + } + + alloc := raw.(*structs.Allocation) + if !alloc.ClientTerminalStatus() { + return false, nil + } + } + + return true, nil +} + +// namespaceNoAssociatedVolumesLocally returns true if there are no CSI volumes +// associated with this namespace in the local region +func (n *Namespace) namespaceNoAssociatedVolumesLocally(namespace string, snap *state.StateSnapshot) (bool, error) { + iter, err := snap.CSIVolumesByNamespace(nil, namespace, "") + if err != nil { + return false, err + } + for { + raw := iter.Next() + if raw == nil { + break + } + + vol := raw.(*structs.CSIVolume) + if vol.Namespace == namespace { + return false, nil + } + } + + return true, nil +} + +// namespaceNoAssociatedVarsLocally returns true if there are no variables +// associated with this namespace in the local region +func (n *Namespace) namespaceNoAssociatedVarsLocally(namespace string, snap *state.StateSnapshot) (bool, error) { + // check for variables + iter, err := snap.GetVariablesByNamespace(nil, namespace) + if err != nil { + return false, err + } + for { + raw := iter.Next() + if raw == nil { + break + } + + v := raw.(*structs.VariableEncrypted) + if v.VariableMetadata.Namespace == namespace { + return false, nil + } + } + + return true, nil +} + +// namespaceNoAssociatedQuotasLocally returns true if there are no quotas +// associated with this namespace in the local region +func (n *Namespace) namespaceNoAssociatedQuotasLocally(namespace string, snap *state.StateSnapshot) (bool, error) { + ns, _ := snap.NamespaceByName(nil, namespace) + if ns == nil { + return false, fmt.Errorf("namespace %s does not exist", ns.Name) + } + if ns.Quota != "" { + return false, nil + } + + return true, nil +} + +// namespaceTerminalJobsInRegion returns true if the namespace contains only +// terminal jobs in the given region. +func (n *Namespace) namespaceTerminalJobsInRegion(authToken, namespace, region string) (bool, error) { + jobReq := &structs.JobListRequest{ QueryOptions: structs.QueryOptions{ Region: region, Namespace: namespace, @@ -214,20 +326,103 @@ func (n *Namespace) namespaceTerminalInRegion(authToken, namespace, region strin }, } - var resp structs.JobListResponse - done, err := n.srv.forward("Job.List", req, req, &resp) + var jobResp structs.JobListResponse + done, err := n.srv.forward("Job.List", jobReq, jobReq, &jobResp) if !done { return false, fmt.Errorf("unexpectedly did not forward Job.List to region %q", region) } else if err != nil { return false, err } - for _, job := range resp.Jobs { + for _, job := range jobResp.Jobs { if job.Status != structs.JobStatusDead { return false, nil } } + return true, nil +} +// namespaceTerminalAllocsInRegion returns true if the namespace contains only +// terminal allocations in the given region. +func (n *Namespace) namespaceTerminalAllocsInRegion(authToken, namespace, region string) (bool, error) { + allocReq := &structs.AllocListRequest{ + QueryOptions: structs.QueryOptions{ + Region: region, + Namespace: namespace, + AllowStale: false, + AuthToken: authToken, + }, + } + + var allocResp structs.AllocListResponse + done, err := n.srv.forward("Alloc.List", allocReq, allocReq, &allocResp) + if !done { + return false, fmt.Errorf("unexpectedly did not forward Alloc.List to region %q", region) + } else if err != nil { + return false, err + } + + for _, alloc := range allocResp.Allocations { + if !alloc.ClientTerminalStatus() { + return false, nil + } + } + return true, nil +} + +// namespaceNoAssociatedVolumesInRegion returns true if there are no volumes +// associated with the namespace in the given region. +func (n *Namespace) namespaceNoAssociatedVolumesInRegion(authToken, namespace, region string) (bool, error) { + volumesReq := &structs.CSIVolumeListRequest{ + QueryOptions: structs.QueryOptions{ + Region: region, + Namespace: namespace, + AllowStale: false, + AuthToken: authToken, + }, + } + + var volumesResp structs.CSIVolumeListResponse + done, err := n.srv.forward("CSIVolume.List", volumesReq, volumesReq, &volumesResp) + if !done { + return false, fmt.Errorf("unexpectedly did not forward CSIVolume.List to region %q", region) + } else if err != nil { + return false, err + } + + for _, volume := range volumesResp.Volumes { + if volume.Namespace == namespace { + return false, nil + } + } + return true, nil +} + +// namespaceNoAssociatedVarsInRegion returns true if there are no variables +// associated with the namespace in the given region. +func (n *Namespace) namespaceNoAssociatedVarsInRegion(authToken, namespace, region string) (bool, error) { + varReq := &structs.VariablesListRequest{ + QueryOptions: structs.QueryOptions{ + Region: region, + Namespace: namespace, + AllowStale: false, + AuthToken: authToken, + }, + } + + var varResp structs.VariablesListResponse + done, err := n.srv.forward("Variables.List", varReq, varReq, &varResp) + if !done { + return false, fmt.Errorf("unexpectedly did not forward Variables.List to region %q", region) + } else if err != nil { + return false, err + } + + for _, v := range varResp.Data { + if v.Namespace == namespace { + return false, nil + } + } return true, nil } diff --git a/nomad/namespace_endpoint_test.go b/nomad/namespace_endpoint_test.go index c24b3d525..6a0583cc6 100644 --- a/nomad/namespace_endpoint_test.go +++ b/nomad/namespace_endpoint_test.go @@ -15,6 +15,7 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" + "github.com/shoenig/test/must" "github.com/stretchr/testify/assert" ) @@ -488,7 +489,6 @@ func TestNamespaceEndpoint_DeleteNamespaces(t *testing.T) { func TestNamespaceEndpoint_DeleteNamespaces_NonTerminal_Local(t *testing.T) { ci.Parallel(t) - assert := assert.New(t) s1, cleanupS1 := TestServer(t, nil) defer cleanupS1() codec := rpcClient(t, s1) @@ -502,7 +502,7 @@ func TestNamespaceEndpoint_DeleteNamespaces_NonTerminal_Local(t *testing.T) { // Create a job in one j := mock.Job() j.Namespace = ns1.Name - assert.Nil(s1.fsm.State().UpsertJob(structs.MsgTypeTestSetup, 1001, nil, j)) + must.Nil(t, s1.fsm.State().UpsertJob(structs.MsgTypeTestSetup, 1001, nil, j)) // Lookup the namespaces req := &structs.NamespaceDeleteRequest{ @@ -511,9 +511,36 @@ func TestNamespaceEndpoint_DeleteNamespaces_NonTerminal_Local(t *testing.T) { } var resp structs.GenericResponse err := msgpackrpc.CallWithCodec(codec, "Namespace.DeleteNamespaces", req, &resp) - if assert.NotNil(err) { - assert.Contains(err.Error(), "has non-terminal jobs") + must.NotNil(t, err) + must.StrContains(t, err.Error(), "has non-terminal jobs") +} + +func TestNamespaceEndpoint_DeleteNamespaces_NoAssociatedVolumes_Local(t *testing.T) { + ci.Parallel(t) + s1, cleanupS1 := TestServer(t, nil) + defer cleanupS1() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the register request + ns1 := mock.Namespace() + ns2 := mock.Namespace() + s1.fsm.State().UpsertNamespaces(1000, []*structs.Namespace{ns1, ns2}) + + // Create a volume in one + vol := mock.CSIVolume(mock.CSIPlugin()) + vol.Namespace = ns1.Name + must.Nil(t, s1.fsm.State().UpsertCSIVolume(1001, []*structs.CSIVolume{vol})) + + // Lookup the namespaces + req := &structs.NamespaceDeleteRequest{ + Namespaces: []string{ns1.Name, ns2.Name}, + WriteRequest: structs.WriteRequest{Region: "global"}, } + var resp structs.GenericResponse + err := msgpackrpc.CallWithCodec(codec, "Namespace.DeleteNamespaces", req, &resp) + must.NotNil(t, err) + must.StrContains(t, err.Error(), "has volumes associated with it") } func TestNamespaceEndpoint_DeleteNamespaces_NonTerminal_Federated_ACL(t *testing.T) { diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 0cb00f8ac..fed7ade9d 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -10809,6 +10809,14 @@ const ( AllocClientStatusUnknown = "unknown" ) +// terminalAllocationStatuses lists allocation statutes that we consider +// terminal +var terminalAllocationStatuses = []string{ + AllocClientStatusComplete, + AllocClientStatusFailed, + AllocClientStatusLost, +} + // Allocation is used to allocate the placement of a task group to a node. type Allocation struct { // msgpack omit empty fields during serialization @@ -11126,12 +11134,7 @@ func (a *Allocation) ServerTerminalStatus() bool { // ClientTerminalStatus returns if the client status is terminal and will no longer transition func (a *Allocation) ClientTerminalStatus() bool { - switch a.ClientStatus { - case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost: - return true - default: - return false - } + return slices.Contains(terminalAllocationStatuses, a.ClientStatus) } // ShouldReschedule returns if the allocation is eligible to be rescheduled according @@ -11832,6 +11835,11 @@ func (a *AllocListStub) RescheduleEligible(reschedulePolicy *ReschedulePolicy, f return a.RescheduleTracker.RescheduleEligible(reschedulePolicy, failTime) } +// ClientTerminalStatus returns if the client status is terminal and will no longer transition +func (a *AllocListStub) ClientTerminalStatus() bool { + return slices.Contains(terminalAllocationStatuses, a.ClientStatus) +} + func setDisplayMsg(taskStates map[string]*TaskState) { for _, taskState := range taskStates { for _, event := range taskState.Events {