auth: refactor Authenticate into its own package (#18703)

The RPC handlers expect to see `nil` ACL objects whenever ACLs are disabled. By
using `nil` as a sentinel value, we have the risk of nil pointer exceptions and
improper handling of `nil` when returned from our various auth methods that can
lead to privilege escalation bugs.

This patchset is the first in a series to eliminate the use of `nil` ACLs as a
sentinel value for when ACLs are disabled. This one is entirely refactoring to
reduce the burden of reviewing the final patchsets that have the functional
changes:

* Move RPC auth into a new `nomad/auth` package, injecting the dependencies
  required from the server. Expose only those public methods on `nomad/auth`
  that are intended for use in the RPC handlers.
* Keep the existing large authentication test as an integration test.
* Add unit tests covering the methods of `nomad/auth` we intend on keeping. The
  assertions for many of these will change once we have no `nil` sentinels and
  can make safe assertions about permissions on the resulting `ACL` objects.
This commit is contained in:
Tim Gross
2023-10-10 11:01:24 -04:00
committed by GitHub
parent 9c57ddd838
commit 9c2ecbf1d3
9 changed files with 1504 additions and 460 deletions

View File

@@ -34,6 +34,7 @@
"internal/...",
"jobspec/...",
"lib/...",
"nomad/auth/...",
"nomad/deploymentwatcher/...",
"nomad/drainer/...",
"nomad/reporting/...",

View File

@@ -4,453 +4,42 @@
package nomad
import (
"errors"
"fmt"
"net"
"time"
metrics "github.com/armon/go-metrics"
"github.com/hashicorp/nomad/acl"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/state"
"github.com/hashicorp/nomad/nomad/structs"
)
// Authenticate extracts an AuthenticatedIdentity from the request context or
// provided token and sets the identity on the request. The caller can extract
// an acl.ACL, WorkloadIdentity, or other identifying tokens to use for
// authorization. Keeping these fields independent rather than merging them into
// an ephemeral ACLToken makes the original of the credential clear to RPC
// handlers, who may have different behavior for internal vs external origins.
//
// Note: when called on the follower we'll be making stale queries, so it's
// possible if the follower is behind that the leader will get a different value
// if an ACL token or allocation's WI has just been created.
//
// This method returns errors that are used for testing diagnostics. RPC callers
// should always return ErrPermissionDenied after checking forwarding when one
// of these errors is received.
func (s *Server) Authenticate(ctx *RPCContext, args structs.RequestWithIdentity) error {
// get the user ACLToken or anonymous token
secretID := args.GetAuthToken()
aclToken, err := s.ResolveSecretToken(secretID)
switch {
case err == nil:
// If ACLs are disabled or we have a non-anonymous token, return that.
if aclToken == nil || aclToken != structs.AnonymousACLToken {
args.SetIdentity(&structs.AuthenticatedIdentity{ACLToken: aclToken})
return nil
}
case errors.Is(err, structs.ErrTokenExpired):
return err
case errors.Is(err, structs.ErrTokenInvalid):
// if it's not a UUID it might be an identity claim
claims, err := s.VerifyClaim(secretID)
if err != nil {
// we already know the token wasn't valid for an ACL in the state
// store, so if we get an error at this point we have an invalid
// token and there are no other options but to bail out
return err
}
args.SetIdentity(&structs.AuthenticatedIdentity{Claims: claims})
return nil
case errors.Is(err, structs.ErrTokenNotFound):
// Check if the secret ID is the leader's secret ID, in which case treat
// it as a management token.
leaderAcl := s.getLeaderAcl()
if leaderAcl != "" && secretID == leaderAcl {
aclToken = structs.LeaderACLToken
break
} else {
// Otherwise, see if the secret ID belongs to a node. We should
// reach this point only on first connection.
node, err := s.State().NodeBySecretID(nil, secretID)
if err != nil {
// this is a go-memdb error; shouldn't happen
return fmt.Errorf("could not resolve node secret: %w", err)
}
if node != nil {
args.SetIdentity(&structs.AuthenticatedIdentity{ClientID: node.ID})
return nil
}
}
// we were passed a bogus token so we'll return an error, but we'll also
// want to capture the IP for metrics
remoteIP, err := s.remoteIPFromRPCContext(ctx)
if err != nil {
s.logger.Error("could not determine remote address", "error", err)
}
args.SetIdentity(&structs.AuthenticatedIdentity{RemoteIP: remoteIP})
return structs.ErrPermissionDenied
default: // any other error
return fmt.Errorf("could not resolve user: %w", err)
}
// If there's no context we're in a "static" handler which only happens for
// cases where the leader is making RPCs internally (volumewatcher and
// deploymentwatcher)
if ctx == nil {
args.SetIdentity(&structs.AuthenticatedIdentity{ACLToken: aclToken})
return nil
}
// At this point we either have an anonymous token or an invalid one.
// Unlike clients that provide their Node ID on first connection, server
// RPCs don't include an ID for the server so we identify servers by cert
// and IP address.
identity := &structs.AuthenticatedIdentity{ACLToken: aclToken}
if ctx.TLS {
identity.TLSName = ctx.Certificate().Subject.CommonName
}
remoteIP, err := s.remoteIPFromRPCContext(ctx)
if err != nil {
s.logger.Error(
"could not authenticate RPC request or determine remote address", "error", err)
return err
}
identity.RemoteIP = remoteIP
args.SetIdentity(identity)
return nil
func (srv *Server) Authenticate(ctx *RPCContext, args structs.RequestWithIdentity) error {
return srv.auth.Authenticate(ctx, args)
}
func (s *Server) remoteIPFromRPCContext(ctx *RPCContext) (net.IP, error) {
var remoteAddr *net.TCPAddr
var ok bool
if ctx == nil {
return nil, nil
}
if ctx.Session != nil {
remoteAddr, ok = ctx.Session.RemoteAddr().(*net.TCPAddr)
if !ok {
return nil, errors.New("session address was not a TCP address")
}
}
if remoteAddr == nil && ctx.Conn != nil {
remoteAddr, ok = ctx.Conn.RemoteAddr().(*net.TCPAddr)
if !ok {
return nil, errors.New("session address was not a TCP address")
}
}
if remoteAddr != nil {
return remoteAddr.IP, nil
}
return nil, structs.ErrPermissionDenied
func (srv *Server) ResolveACL(args structs.RequestWithIdentity) (*acl.ACL, error) {
return srv.auth.ResolveACL(args)
}
// ResolveACL is an authentication wrapper which handles resolving both ACL
// tokens and Workload Identities. If both are provided the ACL token is
// preferred, but it is best for the RPC caller to only include the credentials
// for the identity they intend the operation to be performed with.
func (s *Server) ResolveACL(args structs.RequestWithIdentity) (*acl.ACL, error) {
identity := args.GetIdentity()
if !s.config.ACLEnabled || identity == nil {
return nil, nil
}
aclToken := identity.GetACLToken()
if aclToken != nil {
return s.ResolveACLForToken(aclToken)
}
claims := identity.GetClaims()
if claims != nil {
return s.ResolveClaims(claims)
}
return nil, nil
func (srv *Server) VerifyClaim(token string) (*structs.IdentityClaims, error) {
return srv.auth.VerifyClaim(token)
}
// ResolveACLForToken resolves an ACL from a token only. It should be used only
// by Variables endpoints, which have additional implicit policies for their
// claims so we can't wrap them up in ResolveACL.
//
// TODO: figure out a way to the Variables endpoint implicit policies baked into
// their acl.ACL object so that we can avoid using this method.
func (s *Server) ResolveACLForToken(aclToken *structs.ACLToken) (*acl.ACL, error) {
if !s.config.ACLEnabled {
return nil, nil
}
snap, err := s.fsm.State().Snapshot()
if err != nil {
return nil, err
}
return resolveACLFromToken(snap, s.aclCache, aclToken)
func (srv *Server) ResolveToken(secretID string) (*acl.ACL, error) {
return srv.auth.ResolveToken(secretID)
}
// ResolveClientOrACL resolves an ACL if the identity has a token or claim, and
// falls back to verifying the client ID if one has been set
func (s *Server) ResolveClientOrACL(args structs.RequestWithIdentity) (*acl.ACL, error) {
identity := args.GetIdentity()
if !s.config.ACLEnabled || identity == nil || identity.ClientID != "" {
return nil, nil
}
aclObj, err := s.ResolveACL(args)
if err != nil {
return nil, err
}
// Returns either the users aclObj, or nil if ACLs are disabled.
return aclObj, nil
func (srv *Server) ResolveClientOrACL(args structs.RequestWithIdentity) (*acl.ACL, error) {
return srv.auth.ResolveClientOrACL(args)
}
// ResolveToken is used to translate an ACL Token Secret ID into
// an ACL object, nil if ACLs are disabled, or an error.
func (s *Server) ResolveToken(secretID string) (*acl.ACL, error) {
// Fast-path if ACLs are disabled
if !s.config.ACLEnabled {
return nil, nil
}
defer metrics.MeasureSince([]string{"nomad", "acl", "resolveToken"}, time.Now())
// Check if the secret ID is the leader secret ID, in which case treat it as
// a management token.
if leaderAcl := s.getLeaderAcl(); leaderAcl != "" && secretID == leaderAcl {
return acl.ManagementACL, nil
}
// Snapshot the state
snap, err := s.fsm.State().Snapshot()
if err != nil {
return nil, err
}
// Resolve the ACL
return resolveTokenFromSnapshotCache(snap, s.aclCache, secretID)
func (srv *Server) ResolvePoliciesForClaims(claims *structs.IdentityClaims) ([]*structs.ACLPolicy, error) {
return srv.auth.ResolvePoliciesForClaims(claims)
}
// VerifyClaim asserts that the token is valid and that the resulting
// allocation ID belongs to a non-terminal allocation
func (s *Server) VerifyClaim(token string) (*structs.IdentityClaims, error) {
claims, err := s.encrypter.VerifyClaim(token)
if err != nil {
return nil, err
}
snap, err := s.fsm.State().Snapshot()
if err != nil {
return nil, err
}
alloc, err := snap.AllocByID(nil, claims.AllocationID)
if err != nil {
return nil, err
}
if alloc == nil || alloc.Job == nil {
return nil, fmt.Errorf("allocation does not exist")
}
// the claims for terminal allocs are always treated as expired
if alloc.TerminalStatus() {
return nil, fmt.Errorf("allocation is terminal")
}
return claims, nil
func (srv *Server) ResolveACLForToken(aclToken *structs.ACLToken) (*acl.ACL, error) {
return srv.auth.ResolveACLForToken(aclToken)
}
func (s *Server) ResolveClaims(claims *structs.IdentityClaims) (*acl.ACL, error) {
policies, err := s.resolvePoliciesForClaims(claims)
if err != nil {
return nil, err
}
// Compile and cache the ACL object. For many claims this will result in an
// ACL object with no policies, which can be efficiently cached.
aclObj, err := structs.CompileACLObject(s.aclCache, policies)
if err != nil {
return nil, err
}
return aclObj, nil
func (srv *Server) ResolveSecretToken(secretID string) (*structs.ACLToken, error) {
return srv.auth.ResolveSecretToken(secretID)
}
// resolveTokenFromSnapshotCache is used to resolve an ACL object from a
// snapshot of state, using a cache to avoid parsing and ACL construction when
// possible. It is split from resolveToken to simplify testing.
func resolveTokenFromSnapshotCache(snap *state.StateSnapshot, cache *structs.ACLCache[*acl.ACL], secretID string) (*acl.ACL, error) {
// Lookup the ACL Token
var token *structs.ACLToken
var err error
// Handle anonymous requests
if secretID == "" {
token = structs.AnonymousACLToken
} else {
token, err = snap.ACLTokenBySecretID(nil, secretID)
if err != nil {
return nil, err
}
if token == nil {
return nil, structs.ErrTokenNotFound
}
if token.IsExpired(time.Now().UTC()) {
return nil, structs.ErrTokenExpired
}
}
return resolveACLFromToken(snap, cache, token)
}
func resolveACLFromToken(snap *state.StateSnapshot, cache *structs.ACLCache[*acl.ACL], token *structs.ACLToken) (*acl.ACL, error) {
// Check if this is a management token
if token.Type == structs.ACLManagementToken {
return acl.ManagementACL, nil
}
// Store all policies detailed in the token request, this includes the
// named policies and those referenced within the role link.
policies := make([]*structs.ACLPolicy, 0, len(token.Policies)+len(token.Roles))
// Iterate all the token policies and add these to our policy tracking
// array.
for _, policyName := range token.Policies {
policy, err := snap.ACLPolicyByName(nil, policyName)
if err != nil {
return nil, err
}
if policy == nil {
// Ignore policies that don't exist, since they don't grant any
// more privilege.
continue
}
// Add the policy to the tracking array.
policies = append(policies, policy)
}
// Iterate all the token role links, so we can unpack these and identify
// the ACL policies.
for _, roleLink := range token.Roles {
// Any error reading the role means we cannot move forward. We just
// ignore any roles that have been detailed but are not within our
// state.
role, err := snap.GetACLRoleByID(nil, roleLink.ID)
if err != nil {
return nil, err
}
if role == nil {
continue
}
// Unpack the policies held within the ACL role to form a single list
// of ACL policies that this token has available.
for _, policyLink := range role.Policies {
policy, err := snap.ACLPolicyByName(nil, policyLink.Name)
if err != nil {
return nil, err
}
// Ignore policies that don't exist, since they don't grant any
// more privilege.
if policy == nil {
continue
}
// Add the policy to the tracking array.
policies = append(policies, policy)
}
}
// Compile and cache the ACL object
aclObj, err := structs.CompileACLObject(cache, policies)
if err != nil {
return nil, err
}
return aclObj, nil
}
// ResolveSecretToken is used to translate an ACL Token Secret ID into
// an ACLToken object, nil if ACLs are disabled, or an error.
func (s *Server) ResolveSecretToken(secretID string) (*structs.ACLToken, error) {
// TODO(Drew) Look into using ACLObject cache or create a separate cache
// Fast-path if ACLs are disabled
if !s.config.ACLEnabled {
return nil, nil
}
defer metrics.MeasureSince([]string{"nomad", "acl", "resolveSecretToken"}, time.Now())
if secretID == "" {
return structs.AnonymousACLToken, nil
}
if !helper.IsUUID(secretID) {
return nil, structs.ErrTokenInvalid
}
snap, err := s.fsm.State().Snapshot()
if err != nil {
return nil, err
}
// Lookup the ACL Token
token, err := snap.ACLTokenBySecretID(nil, secretID)
if err != nil {
return nil, err
}
if token == nil {
return nil, structs.ErrTokenNotFound
}
if token.IsExpired(time.Now().UTC()) {
return nil, structs.ErrTokenExpired
}
return token, nil
}
func (s *Server) resolvePoliciesForClaims(claims *structs.IdentityClaims) ([]*structs.ACLPolicy, error) {
snap, err := s.fsm.State().Snapshot()
if err != nil {
return nil, err
}
alloc, err := snap.AllocByID(nil, claims.AllocationID)
if err != nil {
return nil, err
}
if alloc == nil || alloc.Job == nil {
return nil, fmt.Errorf("allocation does not exist")
}
// Find any policies attached to the job
jobId := alloc.Job.ID
if alloc.Job.ParentID != "" {
jobId = alloc.Job.ParentID
}
iter, err := snap.ACLPolicyByJob(nil, alloc.Namespace, jobId)
if err != nil {
return nil, err
}
policies := []*structs.ACLPolicy{}
for {
raw := iter.Next()
if raw == nil {
break
}
policy := raw.(*structs.ACLPolicy)
if policy.JobACL == nil {
continue
}
switch {
case policy.JobACL.Group == "":
policies = append(policies, policy)
case policy.JobACL.Group != alloc.TaskGroup:
continue // don't bother checking task
case policy.JobACL.Task == "":
policies = append(policies, policy)
case policy.JobACL.Task == claims.TaskName:
policies = append(policies, policy)
}
}
return policies, nil
func (srv *Server) ResolveClaims(claims *structs.IdentityClaims) (*acl.ACL, error) {
return srv.auth.ResolveClaims(claims)
}

View File

@@ -447,7 +447,7 @@ func (a *ACL) GetClaimPolicies(args *structs.GenericRequest, reply *structs.ACLP
return structs.ErrPermissionDenied
}
policies, err := a.srv.resolvePoliciesForClaims(claims)
policies, err := a.srv.ResolvePoliciesForClaims(claims)
if err != nil {
// Likely only hit if a job/alloc has been GC'd on the server but the
// client hasn't stopped it yet. Return Permission Denied as there's no way

View File

@@ -793,7 +793,7 @@ func TestResolveClaims(t *testing.T) {
must.NotNil(t, aclObj)
must.Eq(t, aclObj, aclObj2, must.Sprintf("expected cached value"))
policies, err := srv.resolvePoliciesForClaims(claims)
policies, err := srv.ResolvePoliciesForClaims(claims)
must.NoError(t, err)
must.Len(t, 3, policies)
must.SliceContainsAll(t, policies, []*structs.ACLPolicy{policy1, policy2, policy3})

492
nomad/auth/auth.go Normal file
View File

@@ -0,0 +1,492 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package auth
import (
"crypto/x509"
"errors"
"fmt"
"net"
"time"
"github.com/armon/go-metrics"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/acl"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/state"
"github.com/hashicorp/nomad/nomad/structs"
)
// aclCacheSize is the number of ACL objects to keep cached. ACLs have a parsing
// and construction cost, so we keep the hot objects cached to reduce the ACL
// token resolution time.
const aclCacheSize = 512
type StateGetter func() *state.StateStore
type LeaderACLGetter func() string
type RPCContext interface {
IsTLS() bool
IsStatic() bool
Certificate() *x509.Certificate
GetRemoteIP() (net.IP, error)
}
type Encrypter interface {
VerifyClaim(string) (*structs.IdentityClaims, error)
}
type Authenticator struct {
aclsEnabled bool
tlsEnabled bool
logger hclog.Logger
getState StateGetter
getLeaderACL LeaderACLGetter
region string
// aclCache is used to maintain the parsed ACL objects
aclCache *structs.ACLCache[*acl.ACL]
// encrypter is a pointer to the server's Encrypter that can be used to
// verify claims
encrypter Encrypter
}
type AuthenticatorConfig struct {
StateFn StateGetter
Logger hclog.Logger
GetLeaderACLFn LeaderACLGetter
AclsEnabled bool
TLSEnabled bool
Region string
Encrypter Encrypter
}
func NewAuthenticator(cfg *AuthenticatorConfig) *Authenticator {
return &Authenticator{
aclsEnabled: cfg.AclsEnabled,
tlsEnabled: cfg.TLSEnabled,
logger: cfg.Logger.With("auth"),
getState: cfg.StateFn,
getLeaderACL: cfg.GetLeaderACLFn,
region: cfg.Region,
aclCache: structs.NewACLCache[*acl.ACL](aclCacheSize),
encrypter: cfg.Encrypter,
}
}
// Authenticate extracts an AuthenticatedIdentity from the request context or
// provided token and sets the identity on the request. The caller can extract
// an acl.ACL, WorkloadIdentity, or other identifying tokens to use for
// authorization. Keeping these fields independent rather than merging them into
// an ephemeral ACLToken makes the original of the credential clear to RPC
// handlers, who may have different behavior for internal vs external origins.
//
// Note: when called on the follower we'll be making stale queries, so it's
// possible if the follower is behind that the leader will get a different value
// if an ACL token or allocation's WI has just been created.
//
// This method returns errors that are used for testing diagnostics. RPC callers
// should always return ErrPermissionDenied after checking forwarding when one
// of these errors is received.
func (s *Authenticator) Authenticate(ctx RPCContext, args structs.RequestWithIdentity) error {
// get the user ACLToken or anonymous token
secretID := args.GetAuthToken()
aclToken, err := s.ResolveSecretToken(secretID)
switch {
case err == nil:
// If ACLs are disabled or we have a non-anonymous token, return that.
if aclToken == nil || aclToken != structs.AnonymousACLToken {
args.SetIdentity(&structs.AuthenticatedIdentity{ACLToken: aclToken})
return nil
}
case errors.Is(err, structs.ErrTokenExpired):
return err
case errors.Is(err, structs.ErrTokenInvalid):
// if it's not a UUID it might be an identity claim
claims, err := s.VerifyClaim(secretID)
if err != nil {
// we already know the token wasn't valid for an ACL in the state
// store, so if we get an error at this point we have an invalid
// token and there are no other options but to bail out
return err
}
args.SetIdentity(&structs.AuthenticatedIdentity{Claims: claims})
return nil
case errors.Is(err, structs.ErrTokenNotFound):
// Check if the secret ID is the leader's secret ID, in which case treat
// it as a management token.
leaderAcl := s.getLeaderACL()
if leaderAcl != "" && secretID == leaderAcl {
aclToken = structs.LeaderACLToken
break
} else {
// Otherwise, see if the secret ID belongs to a node. We should
// reach this point only on first connection.
node, err := s.getState().NodeBySecretID(nil, secretID)
if err != nil {
// this is a go-memdb error; shouldn't happen
return fmt.Errorf("could not resolve node secret: %w", err)
}
if node != nil {
args.SetIdentity(&structs.AuthenticatedIdentity{ClientID: node.ID})
return nil
}
}
// we were passed a bogus token so we'll return an error, but we'll also
// want to capture the IP for metrics
remoteIP, err := ctx.GetRemoteIP()
if err != nil {
s.logger.Error("could not determine remote address", "error", err)
}
args.SetIdentity(&structs.AuthenticatedIdentity{RemoteIP: remoteIP})
return structs.ErrPermissionDenied
default: // any other error
return fmt.Errorf("could not resolve user: %w", err)
}
// If there's no context we're in a "static" handler which only happens for
// cases where the leader is making RPCs internally (volumewatcher and
// deploymentwatcher)
if ctx == nil {
args.SetIdentity(&structs.AuthenticatedIdentity{ACLToken: aclToken})
return nil
}
// At this point we either have an anonymous token or an invalid one.
// Unlike clients that provide their Node ID on first connection, server
// RPCs don't include an ID for the server so we identify servers by cert
// and IP address.
identity := &structs.AuthenticatedIdentity{ACLToken: aclToken}
if ctx.IsTLS() {
identity.TLSName = ctx.Certificate().Subject.CommonName
}
remoteIP, err := ctx.GetRemoteIP()
if err != nil {
s.logger.Error(
"could not authenticate RPC request or determine remote address", "error", err)
return err
}
identity.RemoteIP = remoteIP
args.SetIdentity(identity)
return nil
}
// ResolveACL is an authentication wrapper which handles resolving both ACL
// tokens and Workload Identities. If both are provided the ACL token is
// preferred, but it is best for the RPC caller to only include the credentials
// for the identity they intend the operation to be performed with.
func (s *Authenticator) ResolveACL(args structs.RequestWithIdentity) (*acl.ACL, error) {
identity := args.GetIdentity()
if !s.aclsEnabled || identity == nil {
return nil, nil
}
aclToken := identity.GetACLToken()
if aclToken != nil {
return s.ResolveACLForToken(aclToken)
}
claims := identity.GetClaims()
if claims != nil {
return s.ResolveClaims(claims)
}
return nil, nil
}
// ResolveACLForToken resolves an ACL from a token only. It should be used only
// by Variables endpoints, which have additional implicit policies for their
// claims so we can't wrap them up in ResolveACL.
//
// TODO: figure out a way to the Variables endpoint implicit policies baked into
// their acl.ACL object so that we can avoid using this method.
func (s *Authenticator) ResolveACLForToken(aclToken *structs.ACLToken) (*acl.ACL, error) {
if !s.aclsEnabled {
return nil, nil
}
snap, err := s.getState().Snapshot()
if err != nil {
return nil, err
}
return resolveACLFromToken(snap, s.aclCache, aclToken)
}
// ResolveClientOrACL resolves an ACL if the identity has a token or claim, and
// falls back to verifying the client ID if one has been set
func (s *Authenticator) ResolveClientOrACL(args structs.RequestWithIdentity) (*acl.ACL, error) {
identity := args.GetIdentity()
if !s.aclsEnabled || identity == nil || identity.ClientID != "" {
return nil, nil
}
aclObj, err := s.ResolveACL(args)
if err != nil {
return nil, err
}
// Returns either the users aclObj, or nil if ACLs are disabled.
return aclObj, nil
}
// ResolveToken is used to translate an ACL Token Secret ID into
// an ACL object, nil if ACLs are disabled, or an error.
func (s *Authenticator) ResolveToken(secretID string) (*acl.ACL, error) {
// Fast-path if ACLs are disabled
if !s.aclsEnabled {
return nil, nil
}
defer metrics.MeasureSince([]string{"nomad", "acl", "resolveToken"}, time.Now())
// Check if the secret ID is the leader secret ID, in which case treat it as
// a management token.
if leaderAcl := s.getLeaderACL(); leaderAcl != "" && secretID == leaderAcl {
return acl.ManagementACL, nil
}
// Snapshot the state
snap, err := s.getState().Snapshot()
if err != nil {
return nil, err
}
// Resolve the ACL
return resolveTokenFromSnapshotCache(snap, s.aclCache, secretID)
}
// VerifyClaim asserts that the token is valid and that the resulting
// allocation ID belongs to a non-terminal allocation
func (s *Authenticator) VerifyClaim(token string) (*structs.IdentityClaims, error) {
claims, err := s.encrypter.VerifyClaim(token)
if err != nil {
return nil, err
}
snap, err := s.getState().Snapshot()
if err != nil {
return nil, err
}
alloc, err := snap.AllocByID(nil, claims.AllocationID)
if err != nil {
return nil, err
}
if alloc == nil || alloc.Job == nil {
return nil, fmt.Errorf("allocation does not exist")
}
// the claims for terminal allocs are always treated as expired
if alloc.TerminalStatus() {
return nil, fmt.Errorf("allocation is terminal")
}
return claims, nil
}
func (s *Authenticator) ResolveClaims(claims *structs.IdentityClaims) (*acl.ACL, error) {
policies, err := s.ResolvePoliciesForClaims(claims)
if err != nil {
return nil, err
}
// Compile and cache the ACL object. For many claims this will result in an
// ACL object with no policies, which can be efficiently cached.
aclObj, err := structs.CompileACLObject(s.aclCache, policies)
if err != nil {
return nil, err
}
return aclObj, nil
}
// resolveTokenFromSnapshotCache is used to resolve an ACL object from a
// snapshot of state, using a cache to avoid parsing and ACL construction when
// possible. It is split from resolveToken to simplify testing.
func resolveTokenFromSnapshotCache(snap *state.StateSnapshot, cache *structs.ACLCache[*acl.ACL], secretID string) (*acl.ACL, error) {
// Lookup the ACL Token
var token *structs.ACLToken
var err error
// Handle anonymous requests
if secretID == "" {
token = structs.AnonymousACLToken
} else {
token, err = snap.ACLTokenBySecretID(nil, secretID)
if err != nil {
return nil, err
}
if token == nil {
return nil, structs.ErrTokenNotFound
}
if token.IsExpired(time.Now().UTC()) {
return nil, structs.ErrTokenExpired
}
}
return resolveACLFromToken(snap, cache, token)
}
func resolveACLFromToken(snap *state.StateSnapshot, cache *structs.ACLCache[*acl.ACL], token *structs.ACLToken) (*acl.ACL, error) {
// Check if this is a management token
if token.Type == structs.ACLManagementToken {
return acl.ManagementACL, nil
}
// Store all policies detailed in the token request, this includes the
// named policies and those referenced within the role link.
policies := make([]*structs.ACLPolicy, 0, len(token.Policies)+len(token.Roles))
// Iterate all the token policies and add these to our policy tracking
// array.
for _, policyName := range token.Policies {
policy, err := snap.ACLPolicyByName(nil, policyName)
if err != nil {
return nil, err
}
if policy == nil {
// Ignore policies that don't exist, since they don't grant any
// more privilege.
continue
}
// Add the policy to the tracking array.
policies = append(policies, policy)
}
// Iterate all the token role links, so we can unpack these and identify
// the ACL policies.
for _, roleLink := range token.Roles {
// Any error reading the role means we cannot move forward. We just
// ignore any roles that have been detailed but are not within our
// state.
role, err := snap.GetACLRoleByID(nil, roleLink.ID)
if err != nil {
return nil, err
}
if role == nil {
continue
}
// Unpack the policies held within the ACL role to form a single list
// of ACL policies that this token has available.
for _, policyLink := range role.Policies {
policy, err := snap.ACLPolicyByName(nil, policyLink.Name)
if err != nil {
return nil, err
}
// Ignore policies that don't exist, since they don't grant any
// more privilege.
if policy == nil {
continue
}
// Add the policy to the tracking array.
policies = append(policies, policy)
}
}
// Compile and cache the ACL object
aclObj, err := structs.CompileACLObject(cache, policies)
if err != nil {
return nil, err
}
return aclObj, nil
}
// ResolveSecretToken is used to translate an ACL Token Secret ID into
// an ACLToken object, nil if ACLs are disabled, or an error.
func (s *Authenticator) ResolveSecretToken(secretID string) (*structs.ACLToken, error) {
// TODO(Drew) Look into using ACLObject cache or create a separate cache
// Fast-path if ACLs are disabled
if !s.aclsEnabled {
return nil, nil
}
defer metrics.MeasureSince([]string{"nomad", "acl", "resolveSecretToken"}, time.Now())
if secretID == "" {
return structs.AnonymousACLToken, nil
}
if !helper.IsUUID(secretID) {
return nil, structs.ErrTokenInvalid
}
snap, err := s.getState().Snapshot()
if err != nil {
return nil, err
}
// Lookup the ACL Token
token, err := snap.ACLTokenBySecretID(nil, secretID)
if err != nil {
return nil, err
}
if token == nil {
return nil, structs.ErrTokenNotFound
}
if token.IsExpired(time.Now().UTC()) {
return nil, structs.ErrTokenExpired
}
return token, nil
}
func (s *Authenticator) ResolvePoliciesForClaims(claims *structs.IdentityClaims) ([]*structs.ACLPolicy, error) {
snap, err := s.getState().Snapshot()
if err != nil {
return nil, err
}
alloc, err := snap.AllocByID(nil, claims.AllocationID)
if err != nil {
return nil, err
}
if alloc == nil || alloc.Job == nil {
return nil, fmt.Errorf("allocation does not exist")
}
// Find any policies attached to the job
jobId := alloc.Job.ID
if alloc.Job.ParentID != "" {
jobId = alloc.Job.ParentID
}
iter, err := snap.ACLPolicyByJob(nil, alloc.Namespace, jobId)
if err != nil {
return nil, err
}
policies := []*structs.ACLPolicy{}
for {
raw := iter.Next()
if raw == nil {
break
}
policy := raw.(*structs.ACLPolicy)
if policy.JobACL == nil {
continue
}
switch {
case policy.JobACL.Group == "":
policies = append(policies, policy)
case policy.JobACL.Group != alloc.TaskGroup:
continue // don't bother checking task
case policy.JobACL.Task == "":
policies = append(policies, policy)
case policy.JobACL.Task == claims.TaskName:
policies = append(policies, policy)
}
}
return policies, nil
}

937
nomad/auth/auth_test.go Normal file
View File

@@ -0,0 +1,937 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package auth
import (
"crypto/ed25519"
"crypto/x509"
"crypto/x509/pkix"
"errors"
"fmt"
"net"
"strings"
"testing"
"time"
"github.com/go-jose/go-jose/v3"
"github.com/go-jose/go-jose/v3/jwt"
"github.com/shoenig/test/must"
"github.com/hashicorp/nomad/acl"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper/crypto"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/state"
"github.com/hashicorp/nomad/nomad/structs"
)
func TestAuthenticateDefault(t *testing.T) {
ci.Parallel(t)
testAuthenticator := func(t *testing.T, store *state.StateStore,
hasACLs, hasTLS bool) *Authenticator {
leaderACL := uuid.Generate()
return NewAuthenticator(&AuthenticatorConfig{
StateFn: func() *state.StateStore { return store },
Logger: testlog.HCLogger(t),
GetLeaderACLFn: func() string { return leaderACL },
AclsEnabled: hasACLs,
TLSEnabled: hasTLS,
Region: "global",
Encrypter: newTestEncrypter(),
})
}
testCases := []struct {
name string
testFn func(*testing.T, *state.StateStore)
}{
{
name: "mTLS and ACLs but anonymous",
testFn: func(t *testing.T, store *state.StateStore) {
ctx := newTestContext(t, noTLSCtx, "192.168.1.1")
args := &structs.GenericRequest{}
args.AuthToken = ""
auth := testAuthenticator(t, store, true, true)
err := auth.Authenticate(ctx, args)
must.NoError(t, err)
must.Eq(t, "token:anonymous", args.GetIdentity().String())
aclObj, err := auth.ResolveACL(args)
must.NoError(t, err)
must.NotNil(t, aclObj)
must.False(t, aclObj.AllowAgentRead())
},
},
{
name: "no mTLS or ACLs but anonymous",
testFn: func(t *testing.T, store *state.StateStore) {
ctx := newTestContext(t, noTLSCtx, "192.168.1.1")
args := &structs.GenericRequest{}
args.AuthToken = ""
auth := testAuthenticator(t, store, false, false)
err := auth.Authenticate(ctx, args)
must.NoError(t, err)
must.NotNil(t, args.GetIdentity())
aclObj, err := auth.ResolveACL(args)
must.NoError(t, err)
must.Nil(t, aclObj)
},
},
{
name: "mTLS and ACLs but anonymous with TLS context",
testFn: func(t *testing.T, store *state.StateStore) {
ctx := newTestContext(t, "cli.global.nomad", "192.168.1.1")
args := &structs.GenericRequest{}
args.AuthToken = ""
auth := testAuthenticator(t, store, true, true)
err := auth.Authenticate(ctx, args)
must.NoError(t, err)
must.Eq(t, "token:anonymous", args.GetIdentity().String())
aclObj, err := auth.ResolveACL(args)
must.NoError(t, err)
must.NotNil(t, aclObj)
must.False(t, aclObj.AllowAgentRead())
},
},
{
name: "mTLS and ACLs with client secret",
testFn: func(t *testing.T, store *state.StateStore) {
node := mock.Node()
store.UpsertNode(structs.MsgTypeTestSetup, 100, node)
ctx := newTestContext(t, noTLSCtx, "192.168.1.1")
args := &structs.GenericRequest{}
args.AuthToken = node.SecretID
auth := testAuthenticator(t, store, true, true)
err := auth.Authenticate(ctx, args)
must.NoError(t, err)
must.Eq(t, "client:"+node.ID, args.GetIdentity().String())
aclObj, err := auth.ResolveACL(args)
must.NoError(t, err)
must.Nil(t, aclObj)
},
},
{
name: "mTLS and ACLs with invalid token no TLS context",
testFn: func(t *testing.T, store *state.StateStore) {
ctx := newTestContext(t, noTLSCtx, "192.168.1.1")
args := &structs.GenericRequest{}
args.AuthToken = uuid.Generate()
auth := testAuthenticator(t, store, true, true)
err := auth.Authenticate(ctx, args)
must.ErrorIs(t, err, structs.ErrPermissionDenied)
must.Eq(t, ":192.168.1.1", args.GetIdentity().String())
},
},
{
name: "mTLS and ACLs with invalid token",
testFn: func(t *testing.T, store *state.StateStore) {
ctx := newTestContext(t, "cli.nomad.global", "192.168.1.1")
args := &structs.GenericRequest{}
args.AuthToken = uuid.Generate()
auth := testAuthenticator(t, store, true, true)
err := auth.Authenticate(ctx, args)
must.ErrorIs(t, err, structs.ErrPermissionDenied)
must.Eq(t, ":192.168.1.1", args.GetIdentity().String())
},
},
{
name: "mTLS and ACLs with valid ACL token",
testFn: func(t *testing.T, store *state.StateStore) {
token1 := mock.ACLToken()
store.UpsertACLTokens(structs.MsgTypeTestSetup, 100, []*structs.ACLToken{
token1,
})
ctx := newTestContext(t, "cli.nomad.global", "192.168.1.1")
args := &structs.GenericRequest{}
args.AuthToken = token1.SecretID
auth := testAuthenticator(t, store, true, true)
err := auth.Authenticate(ctx, args)
must.NoError(t, err)
must.Eq(t, "token:"+token1.AccessorID, args.GetIdentity().String())
aclObj, err := auth.ResolveACL(args)
must.NoError(t, err)
must.NotNil(t, aclObj)
must.False(t, aclObj.AllowAgentRead()) // no permissions
},
},
{
name: "mTLS and ACLs with expired ACL token",
testFn: func(t *testing.T, store *state.StateStore) {
token2 := mock.ACLToken()
expireTime := time.Now().Add(time.Second * -10)
token2.ExpirationTime = &expireTime
store.UpsertACLTokens(structs.MsgTypeTestSetup, 100, []*structs.ACLToken{
token2,
})
ctx := newTestContext(t, "cli.nomad.global", "192.168.1.1")
args := &structs.GenericRequest{}
args.AuthToken = token2.SecretID
auth := testAuthenticator(t, store, true, true)
err := auth.Authenticate(ctx, args)
must.ErrorIs(t, err, structs.ErrTokenExpired)
must.Eq(t, "unauthenticated", args.GetIdentity().String())
},
},
{
name: "mTLS but no ACLs with valid ACL token",
testFn: func(t *testing.T, store *state.StateStore) {
token3 := mock.ACLToken()
store.UpsertACLTokens(structs.MsgTypeTestSetup, 100, []*structs.ACLToken{
token3,
})
ctx := newTestContext(t, "cli.nomad.global", "192.168.1.1")
args := &structs.GenericRequest{}
args.AuthToken = token3.SecretID
auth := testAuthenticator(t, store, false, true)
err := auth.Authenticate(ctx, args)
must.NoError(t, err)
must.Nil(t, args.GetIdentity().ACLToken)
aclObj, err := auth.ResolveACL(args)
must.NoError(t, err)
must.Nil(t, aclObj)
},
},
{
name: "mTLS and ACLs with valid WI token",
testFn: func(t *testing.T, store *state.StateStore) {
alloc := mock.Alloc()
alloc.ClientStatus = structs.AllocClientStatusRunning
claims := structs.NewIdentityClaims(
alloc.Job, alloc, "web", alloc.LookupTask("web").Identity, time.Now())
auth := testAuthenticator(t, store, true, true)
token, err := auth.encrypter.(*testEncrypter).signClaim(claims)
must.NoError(t, err)
ctx := newTestContext(t, "client.nomad.global", "192.168.1.1")
args := &structs.GenericRequest{}
args.AuthToken = token
err = auth.Authenticate(ctx, args)
must.EqError(t, err, "allocation does not exist")
// insert alloc so it's live
store.UpsertAllocs(structs.MsgTypeTestSetup, 200,
[]*structs.Allocation{alloc})
args = &structs.GenericRequest{}
args.AuthToken = token
err = auth.Authenticate(ctx, args)
must.NoError(t, err)
aclObj, err := auth.ResolveACL(args)
must.NoError(t, err)
must.NotNil(t, aclObj)
must.False(t, aclObj.AllowAgentRead())
must.NotNil(t, args.GetIdentity().GetClaims())
must.Eq(t, "alloc:"+alloc.ID, args.GetIdentity().String())
// alloc becomes terminal
alloc.ClientStatus = structs.AllocClientStatusComplete
store.UpsertAllocs(structs.MsgTypeTestSetup, 200,
[]*structs.Allocation{alloc})
args = &structs.GenericRequest{}
args.AuthToken = token
err = auth.Authenticate(ctx, args)
must.EqError(t, err, "allocation is terminal")
must.Eq(t, "unauthenticated", args.GetIdentity().String())
aclObj, err = auth.ResolveACL(args)
must.Nil(t, aclObj)
must.Nil(t, args.GetIdentity().GetClaims())
},
},
{
name: "mTLS and ACLs with invalid WI token",
testFn: func(t *testing.T, store *state.StateStore) {
alloc := mock.Alloc()
alloc.ClientStatus = structs.AllocClientStatusRunning
claims := structs.NewIdentityClaims(
alloc.Job, alloc, "web", alloc.LookupTask("web").Identity, time.Now())
auth := testAuthenticator(t, store, true, true)
token, err := auth.encrypter.(*testEncrypter).signClaim(claims)
must.NoError(t, err)
// break the token
token = strings.ReplaceAll(token, "0", "1")
ctx := newTestContext(t, "client.nomad.global", "192.168.1.1")
args := &structs.GenericRequest{}
args.AuthToken = token
err = auth.Authenticate(ctx, args)
must.ErrorContains(t, err, "invalid signature")
},
},
{
name: "mTLS and ACLs from static handler with leader ACL token",
testFn: func(t *testing.T, store *state.StateStore) {
auth := testAuthenticator(t, store, true, true)
args := &structs.GenericRequest{}
args.AuthToken = auth.getLeaderACL()
var ctx *testContext
err := auth.Authenticate(ctx, args)
must.NoError(t, err)
must.Eq(t, "token:leader", args.GetIdentity().String())
aclObj, err := auth.ResolveACL(args)
must.NoError(t, err)
must.NotNil(t, aclObj)
must.True(t, aclObj.IsManagement())
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
store := testStateStore(t)
tc.testFn(t, store)
})
}
}
func TestResolveACLToken(t *testing.T) {
ci.Parallel(t)
testCases := []struct {
name string
testFn func()
}{
{
name: "leader token",
testFn: func() {
auth := testDefaultAuthenticator(t)
// Resolve the token and ensure it's a management token.
aclResp, err := auth.ResolveToken(auth.getLeaderACL())
must.NoError(t, err)
must.NotNil(t, aclResp)
must.True(t, aclResp.IsManagement())
},
},
{
name: "anonymous token",
testFn: func() {
auth := testDefaultAuthenticator(t)
// Call the function with an empty input secret ID which is
// classed as representing anonymous access in clusters with
// ACLs enabled.
aclResp, err := auth.ResolveToken("")
must.NoError(t, err)
must.NotNil(t, aclResp)
must.False(t, aclResp.IsManagement())
},
},
{
name: "anonymous token and acls disabled",
testFn: func() {
auth := testDefaultAuthenticator(t)
auth.aclsEnabled = false
aclResp, err := auth.ResolveToken("")
must.NoError(t, err)
must.Nil(t, aclResp)
},
},
{
name: "token not found",
testFn: func() {
auth := testDefaultAuthenticator(t)
// Call the function with randomly generated secret ID which
// does not exist within state.
aclResp, err := auth.ResolveToken(uuid.Generate())
must.ErrorIs(t, err, structs.ErrTokenNotFound)
must.Nil(t, aclResp)
},
},
{
name: "token expired",
testFn: func() {
auth := testDefaultAuthenticator(t)
// Create a mock token with an expiration time long in the
// past, and upsert.
token := mock.ACLToken()
token.ExpirationTime = pointer.Of(time.Date(
1970, time.January, 1, 0, 0, 0, 0, time.UTC))
err := auth.getState().UpsertACLTokens(
structs.MsgTypeTestSetup, 10, []*structs.ACLToken{token})
must.NoError(t, err)
// Perform the function call which should result in finding the
// token has expired.
aclResp, err := auth.ResolveToken(uuid.Generate())
must.ErrorIs(t, err, structs.ErrTokenNotFound)
must.Nil(t, aclResp)
},
},
{
name: "management token",
testFn: func() {
auth := testDefaultAuthenticator(t)
// Generate a management token and upsert this.
managementToken := mock.ACLToken()
managementToken.Type = structs.ACLManagementToken
managementToken.Policies = nil
err := auth.getState().UpsertACLTokens(
structs.MsgTypeTestSetup, 10, []*structs.ACLToken{managementToken})
must.NoError(t, err)
// Resolve the token and check that we received a management
// ACL.
aclResp, err := auth.ResolveToken(managementToken.SecretID)
must.Nil(t, err)
must.NotNil(t, aclResp)
must.True(t, aclResp.IsManagement())
must.Eq(t, acl.ManagementACL, aclResp)
},
},
{
name: "client token with policies only",
testFn: func() {
auth := testDefaultAuthenticator(t)
// Generate a client token with associated policies and upsert
// these.
policy1 := mock.ACLPolicy()
policy2 := mock.ACLPolicy()
err := auth.getState().UpsertACLPolicies(
structs.MsgTypeTestSetup, 10, []*structs.ACLPolicy{policy1, policy2})
clientToken := mock.ACLToken()
clientToken.Policies = []string{policy1.Name, policy2.Name}
err = auth.getState().UpsertACLTokens(
structs.MsgTypeTestSetup, 20, []*structs.ACLToken{clientToken})
must.NoError(t, err)
// Resolve the token and check that we received a client
// ACL with appropriate permissions.
aclResp, err := auth.ResolveToken(clientToken.SecretID)
must.Nil(t, err)
must.NotNil(t, aclResp)
must.False(t, aclResp.IsManagement())
allowed := aclResp.AllowNamespaceOperation("default", acl.NamespaceCapabilityListJobs)
must.True(t, allowed)
allowed = aclResp.AllowNamespaceOperation("other", acl.NamespaceCapabilityListJobs)
must.False(t, allowed)
// Resolve the same token again and ensure we get the same
// result.
aclResp2, err := auth.ResolveToken(clientToken.SecretID)
must.Nil(t, err)
must.NotNil(t, aclResp2)
must.Eq(t, aclResp, aclResp2)
// Bust the cache by upserting the policy
err = auth.getState().UpsertACLPolicies(
structs.MsgTypeTestSetup, 30, []*structs.ACLPolicy{policy1})
must.Nil(t, err)
// Resolve the same token again, should get different value
aclResp3, err := auth.ResolveToken(clientToken.SecretID)
must.Nil(t, err)
must.NotNil(t, aclResp3)
must.NotEq(t, aclResp2, aclResp3)
},
},
{
name: "client token with roles only",
testFn: func() {
auth := testDefaultAuthenticator(t)
// Create a client token that only has a link to a role.
policy1 := mock.ACLPolicy()
policy2 := mock.ACLPolicy()
err := auth.getState().UpsertACLPolicies(
structs.MsgTypeTestSetup, 10, []*structs.ACLPolicy{policy1, policy2})
aclRole := mock.ACLRole()
aclRole.Policies = []*structs.ACLRolePolicyLink{
{Name: policy1.Name},
{Name: policy2.Name},
}
err = auth.getState().UpsertACLRoles(
structs.MsgTypeTestSetup, 30, []*structs.ACLRole{aclRole}, false)
must.NoError(t, err)
clientToken := mock.ACLToken()
clientToken.Policies = []string{}
clientToken.Roles = []*structs.ACLTokenRoleLink{{ID: aclRole.ID}}
err = auth.getState().UpsertACLTokens(
structs.MsgTypeTestSetup, 30, []*structs.ACLToken{clientToken})
must.NoError(t, err)
// Resolve the token and check that we received a client
// ACL with appropriate permissions.
aclResp, err := auth.ResolveToken(clientToken.SecretID)
must.Nil(t, err)
must.NotNil(t, aclResp)
must.False(t, aclResp.IsManagement())
allowed := aclResp.AllowNamespaceOperation("default", acl.NamespaceCapabilityListJobs)
must.True(t, allowed)
allowed = aclResp.AllowNamespaceOperation("other", acl.NamespaceCapabilityListJobs)
must.False(t, allowed)
// Remove the policies from the ACL role and ensure the resolution
// permissions are updated.
aclRole.Policies = []*structs.ACLRolePolicyLink{}
err = auth.getState().UpsertACLRoles(
structs.MsgTypeTestSetup, 40, []*structs.ACLRole{aclRole}, false)
must.NoError(t, err)
aclResp, err = auth.ResolveToken(clientToken.SecretID)
must.Nil(t, err)
must.NotNil(t, aclResp)
must.False(t, aclResp.IsManagement())
must.False(t, aclResp.AllowNamespaceOperation("default", acl.NamespaceCapabilityListJobs))
},
},
{
name: "client with roles and policies",
testFn: func() {
auth := testDefaultAuthenticator(t)
// Generate two policies, each with a different namespace
// permission set.
policy1 := &structs.ACLPolicy{
Name: "policy-" + uuid.Generate(),
Rules: `namespace "platform" { policy = "write"}`,
CreateIndex: 10,
ModifyIndex: 10,
}
policy1.SetHash()
policy2 := &structs.ACLPolicy{
Name: "policy-" + uuid.Generate(),
Rules: `namespace "web" { policy = "write"}`,
CreateIndex: 10,
ModifyIndex: 10,
}
policy2.SetHash()
err := auth.getState().UpsertACLPolicies(
structs.MsgTypeTestSetup, 10, []*structs.ACLPolicy{policy1, policy2})
must.NoError(t, err)
// Create a role which references the policy that has access to
// the web namespace.
aclRole := mock.ACLRole()
aclRole.Policies = []*structs.ACLRolePolicyLink{{Name: policy2.Name}}
err = auth.getState().UpsertACLRoles(
structs.MsgTypeTestSetup, 20, []*structs.ACLRole{aclRole}, false)
must.NoError(t, err)
// Create a token which references the policy and role.
clientToken := mock.ACLToken()
clientToken.Policies = []string{policy1.Name}
clientToken.Roles = []*structs.ACLTokenRoleLink{{ID: aclRole.ID}}
err = auth.getState().UpsertACLTokens(
structs.MsgTypeTestSetup, 30, []*structs.ACLToken{clientToken})
must.NoError(t, err)
// Resolve the token and check that we received a client
// ACL with appropriate permissions.
aclResp, err := auth.ResolveToken(clientToken.SecretID)
must.Nil(t, err)
must.NotNil(t, aclResp)
must.False(t, aclResp.IsManagement())
allowed := aclResp.AllowNamespaceOperation("platform", acl.NamespaceCapabilityListJobs)
must.True(t, allowed)
allowed = aclResp.AllowNamespaceOperation("web", acl.NamespaceCapabilityListJobs)
must.True(t, allowed)
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
tc.testFn()
})
}
}
func TestResolveSecretToken(t *testing.T) {
ci.Parallel(t)
auth := testDefaultAuthenticator(t)
testCases := []struct {
name string
testFn func()
}{
{
name: "valid token",
testFn: func() {
// Generate and upsert a token.
token := mock.ACLToken()
err := auth.getState().UpsertACLTokens(
structs.MsgTypeTestSetup, 10, []*structs.ACLToken{token})
must.NoError(t, err)
// Attempt to look up the token and perform checks.
tokenResp, err := auth.ResolveSecretToken(token.SecretID)
must.NoError(t, err)
must.NotNil(t, tokenResp)
must.Eq(t, token, tokenResp)
},
},
{
name: "anonymous token",
testFn: func() {
// Call the function with an empty input secret ID which is
// classed as representing anonymous access in clusters with
// ACLs enabled.
tokenResp, err := auth.ResolveSecretToken("")
must.NoError(t, err)
must.NotNil(t, tokenResp)
must.Eq(t, structs.AnonymousACLToken, tokenResp)
},
},
{
name: "token not found",
testFn: func() {
// Call the function with randomly generated secret ID which
// does not exist within state.
tokenResp, err := auth.ResolveSecretToken(uuid.Generate())
must.ErrorIs(t, err, structs.ErrTokenNotFound)
must.Nil(t, tokenResp)
},
},
{
name: "token expired",
testFn: func() {
// Create a mock token with an expiration time long in the
// past, and upsert.
token := mock.ACLToken()
token.ExpirationTime = pointer.Of(time.Date(
1970, time.January, 1, 0, 0, 0, 0, time.UTC))
err := auth.getState().UpsertACLTokens(
structs.MsgTypeTestSetup, 10, []*structs.ACLToken{token})
must.NoError(t, err)
// Perform the function call which should result in finding the
// token has expired.
tokenResp, err := auth.ResolveSecretToken(uuid.Generate())
must.ErrorIs(t, err, structs.ErrTokenNotFound)
must.Nil(t, tokenResp)
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
tc.testFn()
})
}
}
func TestResolveClaims(t *testing.T) {
ci.Parallel(t)
auth := testDefaultAuthenticator(t)
index := uint64(100)
alloc := mock.Alloc()
dispatchAlloc := mock.Alloc()
dispatchAlloc.Job.ParentID = alloc.JobID
claims := &structs.IdentityClaims{
Namespace: alloc.Namespace,
JobID: alloc.Job.ID,
AllocationID: alloc.ID,
TaskName: alloc.Job.TaskGroups[0].Tasks[0].Name,
}
dispatchClaims := &structs.IdentityClaims{
Namespace: dispatchAlloc.Namespace,
JobID: dispatchAlloc.Job.ID,
AllocationID: dispatchAlloc.ID,
TaskName: dispatchAlloc.Job.TaskGroups[0].Tasks[0].Name,
}
// unrelated policy
policy0 := mock.ACLPolicy()
// policy for job
policy1 := mock.ACLPolicy()
policy1.JobACL = &structs.JobACL{
Namespace: claims.Namespace,
JobID: claims.JobID,
}
// policy for job and group
policy2 := mock.ACLPolicy()
policy2.JobACL = &structs.JobACL{
Namespace: claims.Namespace,
JobID: claims.JobID,
Group: alloc.Job.TaskGroups[0].Name,
}
// policy for job and group and task
policy3 := mock.ACLPolicy()
policy3.JobACL = &structs.JobACL{
Namespace: claims.Namespace,
JobID: claims.JobID,
Group: alloc.Job.TaskGroups[0].Name,
Task: claims.TaskName,
}
// policy for job and group but different task
policy4 := mock.ACLPolicy()
policy4.JobACL = &structs.JobACL{
Namespace: claims.Namespace,
JobID: claims.JobID,
Group: alloc.Job.TaskGroups[0].Name,
Task: "another",
}
// policy for job but different group
policy5 := mock.ACLPolicy()
policy5.JobACL = &structs.JobACL{
Namespace: claims.Namespace,
JobID: claims.JobID,
Group: "another",
}
// policy for same namespace but different job
policy6 := mock.ACLPolicy()
policy6.JobACL = &structs.JobACL{
Namespace: claims.Namespace,
JobID: "another",
}
// policy for same job in different namespace
policy7 := mock.ACLPolicy()
policy7.JobACL = &structs.JobACL{
Namespace: "another",
JobID: claims.JobID,
}
aclObj, err := auth.ResolveClaims(claims)
must.Nil(t, aclObj)
must.EqError(t, err, "allocation does not exist")
// upsert the allocation
index++
err = auth.getState().UpsertAllocs(structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc, dispatchAlloc})
must.NoError(t, err)
// Resolve claims and check we that the ACL object without policies provides no access
aclObj, err = auth.ResolveClaims(claims)
must.NoError(t, err)
must.NotNil(t, aclObj)
must.False(t, aclObj.AllowNamespaceOperation("default", acl.NamespaceCapabilityListJobs))
// Add the policies
index++
err = auth.getState().UpsertACLPolicies(structs.MsgTypeTestSetup, index, []*structs.ACLPolicy{
policy0, policy1, policy2, policy3, policy4, policy5, policy6, policy7})
must.NoError(t, err)
// Re-resolve and check that the resulting ACL looks reasonable
aclObj, err = auth.ResolveClaims(claims)
must.NoError(t, err)
must.NotNil(t, aclObj)
must.False(t, aclObj.IsManagement())
must.True(t, aclObj.AllowNamespaceOperation("default", acl.NamespaceCapabilityListJobs))
must.False(t, aclObj.AllowNamespaceOperation("other", acl.NamespaceCapabilityListJobs))
// Resolve the same claim again, should get cache value
aclObj2, err := auth.ResolveClaims(claims)
must.NoError(t, err)
must.NotNil(t, aclObj)
must.Eq(t, aclObj, aclObj2, must.Sprintf("expected cached value"))
policies, err := auth.ResolvePoliciesForClaims(claims)
must.NoError(t, err)
must.Len(t, 3, policies)
must.SliceContainsAll(t, policies, []*structs.ACLPolicy{policy1, policy2, policy3})
// Check the dispatch claims
aclObj3, err := auth.ResolveClaims(dispatchClaims)
must.NoError(t, err)
must.NotNil(t, aclObj)
must.Eq(t, aclObj, aclObj3, must.Sprintf("expected cached value"))
dispatchPolicies, err := auth.ResolvePoliciesForClaims(dispatchClaims)
must.NoError(t, err)
must.Len(t, 3, dispatchPolicies)
must.SliceContainsAll(t, dispatchPolicies, []*structs.ACLPolicy{policy1, policy2, policy3})
}
func testStateStore(t *testing.T) *state.StateStore {
sconfig := &state.StateStoreConfig{
Logger: testlog.HCLogger(t),
Region: "global",
JobTrackedVersions: structs.JobDefaultTrackedVersions,
}
store, err := state.NewStateStore(sconfig)
must.NoError(t, err)
return store
}
func testDefaultAuthenticator(t *testing.T) *Authenticator {
leaderACL := uuid.Generate()
store := testStateStore(t)
return NewAuthenticator(&AuthenticatorConfig{
StateFn: func() *state.StateStore { return store },
Logger: testlog.HCLogger(t),
GetLeaderACLFn: func() string { return leaderACL },
AclsEnabled: true,
TLSEnabled: true,
Region: "global",
Encrypter: nil,
})
}
type testContext struct {
isTLS bool
cert *x509.Certificate
remoteIP net.IP
}
const noTLSCtx = ""
func newTestContext(t *testing.T, tlsName, ipAddr string) *testContext {
t.Helper()
ip := net.ParseIP(ipAddr)
must.NotNil(t, ip, must.Sprintf("could not parse ipAddr=%s", ipAddr))
ctx := &testContext{
remoteIP: ip,
}
if tlsName != "" {
ctx.isTLS = true
ctx.cert = &x509.Certificate{
Subject: pkix.Name{
CommonName: tlsName,
},
}
}
return ctx
}
func (ctx *testContext) GetRemoteIP() (net.IP, error) {
if ctx == nil {
return nil, nil
}
if len(ctx.remoteIP) == 0 {
return nil, errors.New("could not determine remote IP from context")
}
return ctx.remoteIP, nil
}
func (ctx *testContext) IsTLS() bool {
if ctx == nil {
return false
}
return ctx.isTLS
}
func (ctx *testContext) IsStatic() bool {
return ctx == nil
}
func (ctx *testContext) Certificate() *x509.Certificate {
if ctx == nil {
return nil
}
return ctx.cert
}
type testEncrypter struct {
key ed25519.PrivateKey
}
func newTestEncrypter() *testEncrypter {
buf, _ := crypto.Bytes(32)
return &testEncrypter{
key: ed25519.NewKeyFromSeed(buf),
}
}
func (te *testEncrypter) signClaim(claims *structs.IdentityClaims) (string, error) {
opts := (&jose.SignerOptions{}).WithType("JWT")
sig, err := jose.NewSigner(jose.SigningKey{Algorithm: jose.EdDSA, Key: te.key}, opts)
if err != nil {
return "", err
}
raw, err := jwt.Signed(sig).Claims(claims).CompactSerialize()
if err != nil {
return "", err
}
return raw, nil
}
func (te *testEncrypter) VerifyClaim(tokenString string) (*structs.IdentityClaims, error) {
token, err := jwt.ParseSigned(tokenString)
if err != nil {
return nil, fmt.Errorf("failed to parse signed token: %w", err)
}
pubKey := te.key.Public()
claims := &structs.IdentityClaims{}
if err := token.Claims(pubKey, claims); err != nil {
return nil, fmt.Errorf("invalid signature: %w", err)
}
expect := jwt.Expected{}
if err := claims.Validate(expect); err != nil {
return nil, fmt.Errorf("invalid claims: %w", err)
}
return claims, nil
}

View File

@@ -79,7 +79,7 @@ func TestCSIVolumeEndpoint_Get(t *testing.T) {
func TestCSIVolumeEndpoint_Get_ACL(t *testing.T) {
ci.Parallel(t)
srv, shutdown := TestServer(t, func(c *Config) {
srv, _, shutdown := TestACLServer(t, func(c *Config) {
c.NumSchedulers = 0 // Prevent automatic dequeue
})
defer shutdown()
@@ -88,8 +88,6 @@ func TestCSIVolumeEndpoint_Get_ACL(t *testing.T) {
ns := structs.DefaultNamespace
state := srv.fsm.State()
state.BootstrapACLTokens(structs.MsgTypeTestSetup, 1, 0, mock.ACLManagementToken())
srv.config.ACLEnabled = true
policy := mock.NamespacePolicy(ns, "", []string{acl.NamespaceCapabilityCSIReadVolume})
validToken := mock.CreatePolicyAndToken(t, state, 1001, "csi-access", policy)
@@ -467,8 +465,7 @@ func TestCSIVolumeEndpoint_Claim(t *testing.T) {
// when a controller is required.
func TestCSIVolumeEndpoint_ClaimWithController(t *testing.T) {
ci.Parallel(t)
srv, shutdown := TestServer(t, func(c *Config) {
c.ACLEnabled = true
srv, _, shutdown := TestACLServer(t, func(c *Config) {
c.NumSchedulers = 0 // Prevent automatic dequeue
})
defer shutdown()
@@ -476,7 +473,6 @@ func TestCSIVolumeEndpoint_ClaimWithController(t *testing.T) {
ns := structs.DefaultNamespace
state := srv.fsm.State()
state.BootstrapACLTokens(structs.MsgTypeTestSetup, 1, 0, mock.ACLManagementToken())
policy := mock.NamespacePolicy(ns, "", []string{acl.NamespaceCapabilityCSIMountVolume}) +
mock.PluginPolicy("read")
@@ -553,7 +549,7 @@ func TestCSIVolumeEndpoint_ClaimWithController(t *testing.T) {
func TestCSIVolumeEndpoint_Unpublish(t *testing.T) {
ci.Parallel(t)
srv, shutdown := TestServer(t, func(c *Config) { c.NumSchedulers = 0 })
srv, _, shutdown := TestACLServer(t, func(c *Config) { c.NumSchedulers = 0 })
defer shutdown()
testutil.WaitForLeader(t, srv.RPC)
@@ -561,7 +557,6 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) {
index := uint64(1000)
ns := structs.DefaultNamespace
state := srv.fsm.State()
state.BootstrapACLTokens(structs.MsgTypeTestSetup, 1, 0, mock.ACLManagementToken())
policy := mock.NamespacePolicy(ns, "", []string{acl.NamespaceCapabilityCSIMountVolume}) +
mock.PluginPolicy("read")
@@ -743,15 +738,13 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) {
func TestCSIVolumeEndpoint_List(t *testing.T) {
ci.Parallel(t)
srv, shutdown := TestServer(t, func(c *Config) {
srv, _, shutdown := TestACLServer(t, func(c *Config) {
c.NumSchedulers = 0 // Prevent automatic dequeue
})
defer shutdown()
testutil.WaitForLeader(t, srv.RPC)
state := srv.fsm.State()
state.BootstrapACLTokens(structs.MsgTypeTestSetup, 1, 0, mock.ACLManagementToken())
srv.config.ACLEnabled = true
codec := rpcClient(t, srv)
nsPolicy := mock.NamespacePolicy(structs.DefaultNamespace, "", []string{acl.NamespaceCapabilityCSIReadVolume}) +
@@ -1995,7 +1988,7 @@ func TestCSIVolume_nodeExpandVolume(t *testing.T) {
func TestCSIPluginEndpoint_RegisterViaFingerprint(t *testing.T) {
ci.Parallel(t)
srv, shutdown := TestServer(t, func(c *Config) {
srv, _, shutdown := TestACLServer(t, func(c *Config) {
c.NumSchedulers = 0 // Prevent automatic dequeue
})
defer shutdown()
@@ -2005,8 +1998,6 @@ func TestCSIPluginEndpoint_RegisterViaFingerprint(t *testing.T) {
defer deleteNodes()
state := srv.fsm.State()
state.BootstrapACLTokens(structs.MsgTypeTestSetup, 1, 0, mock.ACLManagementToken())
srv.config.ACLEnabled = true
codec := rpcClient(t, srv)
// Get the plugin back out
@@ -2144,7 +2135,7 @@ func TestCSIPluginEndpoint_RegisterViaJob(t *testing.T) {
func TestCSIPluginEndpoint_DeleteViaGC(t *testing.T) {
ci.Parallel(t)
srv, shutdown := TestServer(t, func(c *Config) {
srv, _, shutdown := TestACLServer(t, func(c *Config) {
c.NumSchedulers = 0 // Prevent automatic dequeue
})
defer shutdown()
@@ -2154,8 +2145,6 @@ func TestCSIPluginEndpoint_DeleteViaGC(t *testing.T) {
defer deleteNodes()
state := srv.fsm.State()
state.BootstrapACLTokens(structs.MsgTypeTestSetup, 1, 0, mock.ACLManagementToken())
srv.config.ACLEnabled = true
codec := rpcClient(t, srv)
// Get the plugin back out

View File

@@ -108,6 +108,13 @@ type RPCContext struct {
NodeID string
}
func (ctx *RPCContext) IsTLS() bool {
if ctx == nil {
return false
}
return ctx.TLS
}
// Certificate returns the first certificate available in the chain.
func (ctx *RPCContext) Certificate() *x509.Certificate {
if ctx == nil || len(ctx.VerifiedChains) == 0 || len(ctx.VerifiedChains[0]) == 0 {
@@ -140,6 +147,34 @@ func (ctx *RPCContext) ValidateCertificateForName(name string) error {
return fmt.Errorf("invalid certificate, %s not in %s", name, strings.Join(validNames, ","))
}
func (ctx *RPCContext) IsStatic() bool {
return ctx == nil
}
func (ctx *RPCContext) GetRemoteIP() (net.IP, error) {
if ctx == nil {
return nil, nil
}
var remoteAddr *net.TCPAddr
var ok bool
if ctx.Session != nil {
remoteAddr, ok = ctx.Session.RemoteAddr().(*net.TCPAddr)
if !ok {
return nil, errors.New("session address was not a TCP address")
}
}
if remoteAddr == nil && ctx.Conn != nil {
remoteAddr, ok = ctx.Conn.RemoteAddr().(*net.TCPAddr)
if !ok {
return nil, errors.New("session address was not a TCP address")
}
}
if remoteAddr != nil {
return remoteAddr.IP, nil
}
return nil, errors.New("could not determine remote IP from context")
}
// listen is used to listen for incoming RPC connections
func (r *rpcHandler) listen(ctx context.Context) {
defer close(r.listenerCh)

View File

@@ -25,7 +25,6 @@ import (
consulapi "github.com/hashicorp/consul/api"
log "github.com/hashicorp/go-hclog"
multierror "github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/acl"
"github.com/hashicorp/raft"
autopilot "github.com/hashicorp/raft-autopilot"
raftboltdb "github.com/hashicorp/raft-boltdb/v2"
@@ -39,6 +38,7 @@ import (
"github.com/hashicorp/nomad/helper/pool"
"github.com/hashicorp/nomad/helper/tlsutil"
"github.com/hashicorp/nomad/lib/auth/oidc"
"github.com/hashicorp/nomad/nomad/auth"
"github.com/hashicorp/nomad/nomad/deploymentwatcher"
"github.com/hashicorp/nomad/nomad/drainer"
"github.com/hashicorp/nomad/nomad/lock"
@@ -92,10 +92,6 @@ const (
// defaultConsulDiscoveryIntervalRetry is how often to poll Consul for
// new servers if there is no leader and the last Consul query failed.
defaultConsulDiscoveryIntervalRetry time.Duration = 9 * time.Second
// aclCacheSize is the number of ACL objects to keep cached. ACLs have a parsing and
// construction cost, so we keep the hot objects cached to reduce the ACL token resolution time.
aclCacheSize = 512
)
// Server is Nomad server which manages the job queues,
@@ -146,6 +142,8 @@ type Server struct {
// rpcServer is the static RPC server that is used by the local agent.
rpcServer *rpc.Server
auth *auth.Authenticator
// clientRpcAdvertise is the advertised RPC address for Nomad clients to connect
// to this server
clientRpcAdvertise net.Addr
@@ -264,9 +262,6 @@ type Server struct {
workerConfigLock sync.RWMutex
workersEventCh chan interface{}
// aclCache is used to maintain the parsed ACL objects
aclCache *structs.ACLCache[*acl.ACL]
// oidcProviderCache maintains a cache of OIDC providers. This is useful as
// the provider performs background HTTP requests. When the Nomad server is
// shutting down, the oidcProviderCache.Shutdown() function must be called.
@@ -331,9 +326,6 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, consulConfigEntr
return nil, err
}
// Create the ACL object cache
aclCache := structs.NewACLCache[*acl.ACL](aclCacheSize)
// Create the logger
logger := config.Logger.ResetNamedIntercept("nomad")
@@ -363,7 +355,6 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, consulConfigEntr
reapCancelableEvalsCh: make(chan struct{}),
blockedEvals: NewBlockedEvals(evalBroker, logger),
rpcTLS: incomingTLS,
aclCache: aclCache,
workersEventCh: make(chan interface{}, 1),
lockTTLTimer: lock.NewTTLTimer(),
lockDelayTimer: lock.NewDelayTimer(),
@@ -427,6 +418,16 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, consulConfigEntr
return nil, fmt.Errorf("Failed to start RPC layer: %v", err)
}
s.auth = auth.NewAuthenticator(&auth.AuthenticatorConfig{
StateFn: s.State,
Logger: s.logger,
GetLeaderACLFn: s.getLeaderAcl,
AclsEnabled: s.config.ACLEnabled,
TLSEnabled: s.config.TLSConfig != nil && s.config.TLSConfig.EnableRPC,
Region: s.Region(),
Encrypter: s.encrypter,
})
// Initialize the Raft server
if err := s.setupRaft(); err != nil {
s.Shutdown()