Files
nomad/client/state/db_error.go
Tim Gross deae9bb62e client: send node secret with every client-to-server RPC (#16799)
In Nomad 1.5.3 we fixed a security bug that allowed bypass of ACL checks if the
request came thru a client node first. But this fix broke (knowingly) the
identification of many client-to-server RPCs. These will be now measured as if
they were anonymous. The reason for this is that many client-to-server RPCs do
not send the node secret and instead rely on the protection of mTLS.

This changeset ensures that the node secret is being sent with every
client-to-server RPC request. In a future version of Nomad we can add
enforcement on the server side, but this was left out of this changeset to
reduce risks to the safe upgrade path.

Sending the node secret as an auth token introduces a new problem during initial
introduction of a client. Clients send many RPCs concurrently with
`Node.Register`, but until the node is registered the node secret is unknown to
the server and will be rejected as invalid. This causes permission denied
errors.

To fix that, this changeset introduces a gate on having successfully made a
`Node.Register` RPC before any other RPCs can be sent (except for `Status.Ping`,
which we need earlier but which also ignores the error because that handler
doesn't do an authorization check). This ensures that we only send requests with
a node secret already known to the server. This also makes client startup a
little easier to reason about because we know `Node.Register` must succeed
first, and it should make for a good place to hook in future plans for secure
introduction of nodes. The tradeoff is that an existing client that has running
allocs will take slightly longer (a second or two) to transition to ready after
a restart, because the transition in `Node.UpdateStatus` is gated at the server
by first submitting `Node.UpdateAlloc` with client alloc updates.
2023-06-22 11:06:49 -04:00

144 lines
4.0 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package state
import (
"fmt"
arstate "github.com/hashicorp/nomad/client/allocrunner/state"
"github.com/hashicorp/nomad/client/allocrunner/taskrunner/state"
dmstate "github.com/hashicorp/nomad/client/devicemanager/state"
"github.com/hashicorp/nomad/client/dynamicplugins"
driverstate "github.com/hashicorp/nomad/client/pluginmanager/drivermanager/state"
"github.com/hashicorp/nomad/client/serviceregistration/checks"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/nomad/structs"
)
// ErrDB implements a StateDB that returns errors on restore methods, used for testing
type ErrDB struct {
// Allocs is a preset slice of allocations used in GetAllAllocations
Allocs []*structs.Allocation
}
func (m *ErrDB) Name() string {
return "errdb"
}
func (m *ErrDB) Upgrade() error {
return nil
}
func (m *ErrDB) GetAllAllocations() ([]*structs.Allocation, map[string]error, error) {
return m.Allocs, nil, nil
}
func (m *ErrDB) PutAllocation(alloc *structs.Allocation, opts ...WriteOption) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) GetDeploymentStatus(allocID string) (*structs.AllocDeploymentStatus, error) {
return nil, fmt.Errorf("Error!")
}
func (m *ErrDB) PutDeploymentStatus(allocID string, ds *structs.AllocDeploymentStatus) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) GetNetworkStatus(allocID string) (*structs.AllocNetworkStatus, error) {
return nil, fmt.Errorf("Error!")
}
func (m *ErrDB) PutNetworkStatus(allocID string, ns *structs.AllocNetworkStatus, opts ...WriteOption) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) PutAcknowledgedState(allocID string, state *arstate.State, opts ...WriteOption) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) GetAcknowledgedState(allocID string) (*arstate.State, error) {
return nil, fmt.Errorf("Error!")
}
func (m *ErrDB) GetTaskRunnerState(allocID string, taskName string) (*state.LocalState, *structs.TaskState, error) {
return nil, nil, fmt.Errorf("Error!")
}
func (m *ErrDB) PutTaskRunnerLocalState(allocID string, taskName string, val *state.LocalState) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) PutTaskState(allocID string, taskName string, state *structs.TaskState) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) DeleteTaskBucket(allocID, taskName string) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) DeleteAllocationBucket(allocID string, opts ...WriteOption) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) PutDevicePluginState(ps *dmstate.PluginState) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) GetDynamicPluginRegistryState() (*dynamicplugins.RegistryState, error) {
return nil, fmt.Errorf("Error!")
}
func (m *ErrDB) PutDynamicPluginRegistryState(state *dynamicplugins.RegistryState) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) GetDevicePluginState() (*dmstate.PluginState, error) {
return nil, fmt.Errorf("Error!")
}
func (m *ErrDB) GetDriverPluginState() (*driverstate.PluginState, error) {
return nil, fmt.Errorf("Error!")
}
func (m *ErrDB) PutDriverPluginState(ps *driverstate.PluginState) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) PutCheckResult(allocID string, qr *structs.CheckQueryResult) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) GetCheckResults() (checks.ClientResults, error) {
return nil, fmt.Errorf("Error!")
}
func (m *ErrDB) DeleteCheckResults(allocID string, checkIDs []structs.CheckID) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) PurgeCheckResults(allocID string) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) PutNodeMeta(map[string]*string) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) GetNodeMeta() (map[string]*string, error) {
return nil, fmt.Errorf("Error!")
}
func (m *ErrDB) PutNodeRegistration(reg *cstructs.NodeRegistration) error {
return fmt.Errorf("Error!")
}
func (m *ErrDB) GetNodeRegistration() (*cstructs.NodeRegistration, error) {
return nil, fmt.Errorf("Error!")
}
func (m *ErrDB) Close() error {
return fmt.Errorf("Error!")
}