mirror of
https://github.com/kemko/nomad.git
synced 2026-01-05 09:55:44 +03:00
api: acl bootstrap errors aren't 500
Noticed that ACL endpoints return 500 status code for user errors. This is confusing and can lead to false monitoring alerts. Here, I introduce a concept of RPCCoded errors to be returned by RPC that signal a code in addition to error message. Codes for now match HTTP codes to ease reasoning. ``` $ nomad acl bootstrap Error bootstrapping: Unexpected response code: 500 (ACL bootstrap already done (reset index: 9)) $ nomad acl bootstrap Error bootstrapping: Unexpected response code: 400 (ACL bootstrap already done (reset index: 9)) ```
This commit is contained in:
@@ -304,6 +304,9 @@ func (s *HTTPServer) wrap(handler func(resp http.ResponseWriter, req *http.Reque
|
||||
errMsg := err.Error()
|
||||
if http, ok := err.(HTTPCodedError); ok {
|
||||
code = http.Code()
|
||||
} else if ecode, emsg, ok := structs.CodeFromRPCCodedErr(err); ok {
|
||||
code = ecode
|
||||
errMsg = emsg
|
||||
} else {
|
||||
// RPC errors get wrapped, so manually unwrap by only looking at their suffix
|
||||
if strings.HasSuffix(errMsg, structs.ErrPermissionDenied.Error()) {
|
||||
|
||||
@@ -18,7 +18,7 @@ import (
|
||||
|
||||
var (
|
||||
// aclDisabled is returned when an ACL endpoint is hit but ACLs are not enabled
|
||||
aclDisabled = fmt.Errorf("ACL support disabled")
|
||||
aclDisabled = structs.NewErrRPCCoded(400, "ACL support disabled")
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -55,13 +55,13 @@ func (a *ACL) UpsertPolicies(args *structs.ACLPolicyUpsertRequest, reply *struct
|
||||
|
||||
// Validate non-zero set of policies
|
||||
if len(args.Policies) == 0 {
|
||||
return fmt.Errorf("must specify as least one policy")
|
||||
return structs.NewErrRPCCoded(400, "must specify as least one policy")
|
||||
}
|
||||
|
||||
// Validate each policy, compute hash
|
||||
for idx, policy := range args.Policies {
|
||||
if err := policy.Validate(); err != nil {
|
||||
return fmt.Errorf("policy %d invalid: %v", idx, err)
|
||||
return structs.NewErrRPCCodedf(404, "policy %d invalid: %v", idx, err)
|
||||
}
|
||||
policy.SetHash()
|
||||
}
|
||||
@@ -99,7 +99,7 @@ func (a *ACL) DeletePolicies(args *structs.ACLPolicyDeleteRequest, reply *struct
|
||||
|
||||
// Validate non-zero set of policies
|
||||
if len(args.Names) == 0 {
|
||||
return fmt.Errorf("must specify as least one policy")
|
||||
return structs.NewErrRPCCoded(400, "must specify as least one policy")
|
||||
}
|
||||
|
||||
// Update via Raft
|
||||
@@ -370,9 +370,9 @@ func (a *ACL) Bootstrap(args *structs.ACLTokenBootstrapRequest, reply *structs.A
|
||||
// Check if there is a reset index specified
|
||||
specifiedIndex := a.fileBootstrapResetIndex()
|
||||
if specifiedIndex == 0 {
|
||||
return fmt.Errorf("ACL bootstrap already done (reset index: %d)", resetIdx)
|
||||
return structs.NewErrRPCCodedf(400, "ACL bootstrap already done (reset index: %d)", resetIdx)
|
||||
} else if specifiedIndex != resetIdx {
|
||||
return fmt.Errorf("Invalid bootstrap reset index (specified %d, reset index: %d)", specifiedIndex, resetIdx)
|
||||
return structs.NewErrRPCCodedf(400, "Invalid bootstrap reset index (specified %d, reset index: %d)", specifiedIndex, resetIdx)
|
||||
}
|
||||
|
||||
// Setup the reset index to allow bootstrapping again
|
||||
@@ -404,7 +404,7 @@ func (a *ACL) Bootstrap(args *structs.ACLTokenBootstrapRequest, reply *structs.A
|
||||
}
|
||||
out, err := state.ACLTokenByAccessorID(nil, args.Token.AccessorID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("token lookup failed: %v", err)
|
||||
return structs.NewErrRPCCodedf(400, "token lookup failed: %v", err)
|
||||
}
|
||||
reply.Tokens = append(reply.Tokens, out)
|
||||
|
||||
@@ -448,7 +448,7 @@ func (a *ACL) UpsertTokens(args *structs.ACLTokenUpsertRequest, reply *structs.A
|
||||
|
||||
// Validate non-zero set of tokens
|
||||
if len(args.Tokens) == 0 {
|
||||
return fmt.Errorf("must specify as least one token")
|
||||
return structs.NewErrRPCCoded(400, "must specify as least one token")
|
||||
}
|
||||
|
||||
// Force the request to the authoritative region if we are creating global tokens
|
||||
@@ -466,7 +466,7 @@ func (a *ACL) UpsertTokens(args *structs.ACLTokenUpsertRequest, reply *structs.A
|
||||
// the entire request as a single batch.
|
||||
if hasGlobal {
|
||||
if !allGlobal {
|
||||
return fmt.Errorf("cannot upsert mixed global and non-global tokens")
|
||||
return structs.NewErrRPCCoded(400, "cannot upsert mixed global and non-global tokens")
|
||||
}
|
||||
|
||||
// Force the request to the authoritative region if it has global
|
||||
@@ -494,7 +494,7 @@ func (a *ACL) UpsertTokens(args *structs.ACLTokenUpsertRequest, reply *structs.A
|
||||
// Validate each token
|
||||
for idx, token := range args.Tokens {
|
||||
if err := token.Validate(); err != nil {
|
||||
return fmt.Errorf("token %d invalid: %v", idx, err)
|
||||
return structs.NewErrRPCCodedf(400, "token %d invalid: %v", idx, err)
|
||||
}
|
||||
|
||||
// Generate an accessor and secret ID if new
|
||||
@@ -507,15 +507,15 @@ func (a *ACL) UpsertTokens(args *structs.ACLTokenUpsertRequest, reply *structs.A
|
||||
// Verify the token exists
|
||||
out, err := state.ACLTokenByAccessorID(nil, token.AccessorID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("token lookup failed: %v", err)
|
||||
return structs.NewErrRPCCodedf(400, "token lookup failed: %v", err)
|
||||
}
|
||||
if out == nil {
|
||||
return fmt.Errorf("cannot find token %s", token.AccessorID)
|
||||
return structs.NewErrRPCCodedf(400, "cannot find token %s", token.AccessorID)
|
||||
}
|
||||
|
||||
// Cannot toggle the "Global" mode
|
||||
if token.Global != out.Global {
|
||||
return fmt.Errorf("cannot toggle global mode of %s", token.AccessorID)
|
||||
return structs.NewErrRPCCodedf(400, "cannot toggle global mode of %s", token.AccessorID)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -538,7 +538,7 @@ func (a *ACL) UpsertTokens(args *structs.ACLTokenUpsertRequest, reply *structs.A
|
||||
for _, token := range args.Tokens {
|
||||
out, err := state.ACLTokenByAccessorID(nil, token.AccessorID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("token lookup failed: %v", err)
|
||||
return structs.NewErrRPCCodedf(400, "token lookup failed: %v", err)
|
||||
}
|
||||
reply.Tokens = append(reply.Tokens, out)
|
||||
}
|
||||
@@ -557,7 +557,7 @@ func (a *ACL) DeleteTokens(args *structs.ACLTokenDeleteRequest, reply *structs.G
|
||||
|
||||
// Validate non-zero set of tokens
|
||||
if len(args.AccessorIDs) == 0 {
|
||||
return fmt.Errorf("must specify as least one token")
|
||||
return structs.NewErrRPCCoded(400, "must specify as least one token")
|
||||
}
|
||||
|
||||
if done, err := a.srv.forward("ACL.DeleteTokens", args, args, reply); done {
|
||||
@@ -585,7 +585,7 @@ func (a *ACL) DeleteTokens(args *structs.ACLTokenDeleteRequest, reply *structs.G
|
||||
for _, accessor := range args.AccessorIDs {
|
||||
token, err := state.ACLTokenByAccessorID(nil, accessor)
|
||||
if err != nil {
|
||||
return fmt.Errorf("token lookup failed: %v", err)
|
||||
return structs.NewErrRPCCodedf(400, "token lookup failed: %v", err)
|
||||
}
|
||||
if token == nil {
|
||||
nonexistentTokens = append(nonexistentTokens, accessor)
|
||||
@@ -599,14 +599,14 @@ func (a *ACL) DeleteTokens(args *structs.ACLTokenDeleteRequest, reply *structs.G
|
||||
}
|
||||
|
||||
if len(nonexistentTokens) != 0 {
|
||||
return fmt.Errorf("Cannot delete nonexistent tokens: %v", strings.Join(nonexistentTokens, ", "))
|
||||
return structs.NewErrRPCCodedf(400, "Cannot delete nonexistent tokens: %v", strings.Join(nonexistentTokens, ", "))
|
||||
}
|
||||
|
||||
// Disallow mixed requests with global and non-global tokens since we forward
|
||||
// the entire request as a single batch.
|
||||
if hasGlobal {
|
||||
if !allGlobal {
|
||||
return fmt.Errorf("cannot delete mixed global and non-global tokens")
|
||||
return structs.NewErrRPCCoded(400, "cannot delete mixed global and non-global tokens")
|
||||
}
|
||||
|
||||
// Force the request to the authoritative region if it has global
|
||||
|
||||
@@ -931,7 +931,7 @@ func TestACLEndpoint_DeleteTokens_WithNonexistentToken(t *testing.T) {
|
||||
|
||||
assert.NotNil(err)
|
||||
expectedError := fmt.Sprintf("Cannot delete nonexistent tokens: %s", nonexistentToken.AccessorID)
|
||||
assert.Contains(expectedError, err.Error())
|
||||
assert.Contains(err.Error(), expectedError)
|
||||
}
|
||||
|
||||
func TestACLEndpoint_Bootstrap(t *testing.T) {
|
||||
|
||||
@@ -3,6 +3,7 @@ package structs
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@@ -25,6 +26,8 @@ const (
|
||||
ErrUnknownJobPrefix = "Unknown job"
|
||||
ErrUnknownEvaluationPrefix = "Unknown evaluation"
|
||||
ErrUnknownDeploymentPrefix = "Unknown deployment"
|
||||
|
||||
errRPCCodedErrorPrefix = "RPC_ERROR::"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -144,3 +147,31 @@ func IsErrUnknownNomadVersion(err error) bool {
|
||||
func IsErrNodeLacksRpc(err error) bool {
|
||||
return err != nil && strings.Contains(err.Error(), errNodeLacksRpc)
|
||||
}
|
||||
|
||||
func NewErrRPCCoded(code int, msg string) error {
|
||||
return fmt.Errorf("%s%d,%s", errRPCCodedErrorPrefix, code, msg)
|
||||
}
|
||||
|
||||
func NewErrRPCCodedf(code int, format string, args ...interface{}) error {
|
||||
msg := fmt.Sprintf(format, args...)
|
||||
return fmt.Errorf("%s%d,%s", errRPCCodedErrorPrefix, code, msg)
|
||||
}
|
||||
|
||||
func CodeFromRPCCodedErr(err error) (code int, msg string, ok bool) {
|
||||
if err == nil || !strings.HasPrefix(err.Error(), errRPCCodedErrorPrefix) {
|
||||
return 0, "", false
|
||||
}
|
||||
|
||||
headerLen := len(errRPCCodedErrorPrefix)
|
||||
parts := strings.SplitN(err.Error()[headerLen:], ",", 2)
|
||||
if len(parts) != 2 {
|
||||
return 0, "", false
|
||||
}
|
||||
|
||||
code, err = strconv.Atoi(parts[0])
|
||||
if err != nil {
|
||||
return 0, "", false
|
||||
}
|
||||
|
||||
return code, parts[1], true
|
||||
}
|
||||
|
||||
49
nomad/structs/errors_test.go
Normal file
49
nomad/structs/errors_test.go
Normal file
@@ -0,0 +1,49 @@
|
||||
package structs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestRPCCodedErrors(t *testing.T) {
|
||||
cases := []struct {
|
||||
err error
|
||||
code int
|
||||
message string
|
||||
}{
|
||||
{
|
||||
NewErrRPCCoded(400, "a test message,here"),
|
||||
400,
|
||||
"a test message,here",
|
||||
},
|
||||
{
|
||||
NewErrRPCCodedf(500, "a test message,here %s %s", "and,here%s", "second"),
|
||||
500,
|
||||
"a test message,here and,here%s second",
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run(c.err.Error(), func(t *testing.T) {
|
||||
code, msg, ok := CodeFromRPCCodedErr(c.err)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, c.code, code)
|
||||
assert.Equal(t, c.message, msg)
|
||||
})
|
||||
}
|
||||
|
||||
negativeCases := []string{
|
||||
"random error",
|
||||
errRPCCodedErrorPrefix,
|
||||
errRPCCodedErrorPrefix + "123",
|
||||
errRPCCodedErrorPrefix + "qwer,asdf",
|
||||
}
|
||||
for _, c := range negativeCases {
|
||||
t.Run(c, func(t *testing.T) {
|
||||
_, _, ok := CodeFromRPCCodedErr(errors.New(c))
|
||||
assert.False(t, ok)
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user