consul connect: allow "cni/*" network mode (#26449)

don't require "bridge" network mode when using connect{}

we document this as "at your own risk" because CNI configuration
is so flexible that we can't guarantee a user's network will work,
but Nomad's "bridge" CNI config may be used as a reference.
This commit is contained in:
Daniel Bennett
2025-09-04 12:29:50 -04:00
committed by GitHub
parent 2944a34b58
commit 9682aa2724
16 changed files with 275 additions and 134 deletions

3
.changelog/26449.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:improvement
consul connect: Allow cni/* network mode; use at your own risk
```

View File

@@ -117,8 +117,8 @@ func (*consulGRPCSocketHook) Name() string {
func (h *consulGRPCSocketHook) shouldRun() bool { func (h *consulGRPCSocketHook) shouldRun() bool {
tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup) tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup)
// we must be in bridge networking and at least one connect sidecar task // we must be in bridge/cni networking and at least one connect sidecar task
if !tgFirstNetworkIsBridge(tg) { if !tgFirstNetworkCanConsulConnect(tg) {
return false return false
} }

View File

@@ -10,6 +10,7 @@ import (
"net" "net"
"os" "os"
"path/filepath" "path/filepath"
"strings"
"sync" "sync"
"time" "time"
@@ -23,11 +24,12 @@ import (
"github.com/hashicorp/nomad/nomad/structs/config" "github.com/hashicorp/nomad/nomad/structs/config"
) )
func tgFirstNetworkIsBridge(tg *structs.TaskGroup) bool { func tgFirstNetworkCanConsulConnect(tg *structs.TaskGroup) bool {
if len(tg.Networks) < 1 || tg.Networks[0].Mode != "bridge" { if len(tg.Networks) < 1 {
return false return false
} }
return true mode := tg.Networks[0].Mode
return mode == "bridge" || strings.HasPrefix(mode, "cni/")
} }
const ( const (
@@ -88,13 +90,11 @@ func (*consulHTTPSockHook) Name() string {
// shouldRun returns true if the alloc contains at least one connect native // shouldRun returns true if the alloc contains at least one connect native
// task and has a network configured in bridge mode // task and has a network configured in bridge mode
//
// todo(shoenig): what about CNI networks?
func (h *consulHTTPSockHook) shouldRun() bool { func (h *consulHTTPSockHook) shouldRun() bool {
tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup) tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup)
// we must be in bridge networking and at least one connect native task // we must be in bridge/cni networking and at least one connect native task
if !tgFirstNetworkIsBridge(tg) { if !tgFirstNetworkCanConsulConnect(tg) {
return false return false
} }

View File

@@ -264,7 +264,7 @@ func connectProxyConfig(cfg map[string]interface{}, port int, info structs.Alloc
func connectProxyBindAddress(networks structs.Networks) string { func connectProxyBindAddress(networks structs.Networks) string {
for _, n := range networks { for _, n := range networks {
if n.Mode == "bridge" && n.IsIPv6() { if n.IsIPv6() && (n.Mode == "bridge" || strings.HasPrefix(n.Mode, "cni/")) {
return "::" return "::"
} }
} }

View File

@@ -156,7 +156,7 @@ job "countdash" {
# config { # config {
# image = "${meta.connect.sidecar_image}" # image = "${meta.connect.sidecar_image}"
# args = [ # args = [
# "-c", "${NOMAD_TASK_DIR}/bootstrap.json", # "-c", "${NOMAD_SECRETS_DIR}/envoy_bootstrap.json",
# "-l", "${meta.connect.log_level}" # "-l", "${meta.connect.log_level}"
# ] # ]
# } # }

View File

@@ -25,7 +25,8 @@ func TestConnect(t *testing.T) {
test.NoError(t, err) test.NoError(t, err)
}) })
t.Run("ConnectDemo", testConnectDemo) t.Run("ConnectDemo", testConnectDemo("bridge"))
t.Run("ConnectDemoCNI", testConnectDemo("cni/nomad-bridge-copy"))
t.Run("ConnectCustomSidecarExposed", testConnectCustomSidecarExposed) t.Run("ConnectCustomSidecarExposed", testConnectCustomSidecarExposed)
t.Run("ConnectNativeDemo", testConnectNativeDemo) t.Run("ConnectNativeDemo", testConnectNativeDemo)
t.Run("ConnectIngressGatewayDemo", testConnectIngressGatewayDemo) t.Run("ConnectIngressGatewayDemo", testConnectIngressGatewayDemo)
@@ -36,30 +37,34 @@ func TestConnect(t *testing.T) {
} }
// testConnectDemo tests the demo job file used in Connect Integration examples. // testConnectDemo tests the demo job file used in Connect Integration examples.
func testConnectDemo(t *testing.T) { func testConnectDemo(networkMode string) func(t *testing.T) {
sub, _ := jobs3.Submit(t, "./input/demo.nomad", jobs3.Timeout(time.Second*60)) return func(t *testing.T) {
sub, _ := jobs3.Submit(t, "./input/demo.nomad", jobs3.Timeout(time.Second*60),
jobs3.Var("network_mode", networkMode),
)
cc := e2eutil.ConsulClient(t) cc := e2eutil.ConsulClient(t)
ixn := &capi.Intention{ ixn := &capi.Intention{
SourceName: "count-dashboard", SourceName: "count-dashboard",
DestinationName: "count-api", DestinationName: "count-api",
Action: "allow", Action: "allow",
}
_, err := cc.Connect().IntentionUpsert(ixn, nil)
must.NoError(t, err, must.Sprint("could not create intention"))
t.Cleanup(func() {
_, err := cc.Connect().IntentionDeleteExact("count-dashboard", "count-api", nil)
test.NoError(t, err)
})
assertServiceOk(t, cc, "count-api-sidecar-proxy")
assertServiceOk(t, cc, "count-dashboard-sidecar-proxy")
logs := sub.Exec("dashboard", "dashboard",
[]string{"/bin/sh", "-c", "wget -O /dev/null http://${NOMAD_UPSTREAM_ADDR_count_api}"})
must.StrContains(t, logs.Stderr, "saving to")
} }
_, err := cc.Connect().IntentionUpsert(ixn, nil)
must.NoError(t, err, must.Sprint("could not create intention"))
t.Cleanup(func() {
_, err := cc.Connect().IntentionDeleteExact("count-dashboard", "count-api", nil)
test.NoError(t, err)
})
assertServiceOk(t, cc, "count-api-sidecar-proxy")
assertServiceOk(t, cc, "count-dashboard-sidecar-proxy")
logs := sub.Exec("dashboard", "dashboard",
[]string{"/bin/sh", "-c", "wget -O /dev/null http://${NOMAD_UPSTREAM_ADDR_count_api}"})
must.StrContains(t, logs.Stderr, "saving to")
} }
// testConnectCustomSidecarExposed tests that a connect sidecar with custom task // testConnectCustomSidecarExposed tests that a connect sidecar with custom task

View File

@@ -1,6 +1,10 @@
# Copyright (c) HashiCorp, Inc. # Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1 # SPDX-License-Identifier: BUSL-1.1
variable "network_mode" {
default = "bridge"
}
job "countdash" { job "countdash" {
datacenters = ["dc1"] datacenters = ["dc1"]
@@ -11,7 +15,7 @@ job "countdash" {
group "api" { group "api" {
network { network {
mode = "bridge" mode = var.network_mode
} }
service { service {
@@ -43,7 +47,7 @@ job "countdash" {
group "dashboard" { group "dashboard" {
network { network {
mode = "bridge" mode = var.network_mode
port "http" { port "http" {
static = 9002 static = 9002

View File

@@ -0,0 +1,41 @@
{
"cniVersion": "1.0.0",
"name": "nomad-bridge-copy",
"plugins": [
{
"type": "loopback"
},
{
"type": "bridge",
"bridge": "nomad",
"ipMasq": true,
"isGateway": true,
"forceAddress": true,
"hairpinMode": false,
"ipam": {
"type": "host-local",
"ranges": [
[{"subnet": "172.26.64.0/20"}],
[{"subnet": "a110:c8::/112"}]
],
"routes": [
{"dst": "0.0.0.0/0"},
{"dst": "::/0"}
],
"dataDir": "/var/run/cni"
}
},
{
"type": "firewall",
"backend": "iptables",
"iptablesAdminChainName": "NOMAD-ADMIN"
},
{
"type": "portmap",
"capabilities": {
"portMappings": true
},
"snat": true
}
]
}

View File

@@ -109,6 +109,10 @@ sudo mv /tmp/linux/cni/loopback.* /opt/cni/config/
sudo mv /tmp/linux/cni/cni_args.conflist /opt/cni/config/ sudo mv /tmp/linux/cni/cni_args.conflist /opt/cni/config/
sudo mv /tmp/linux/cni/cni_args.sh /opt/cni/bin/ sudo mv /tmp/linux/cni/cni_args.sh /opt/cni/bin/
echo "Installing additional CNI network configs"
# copy of nomad's "bridge" for connect+cni test (e2e/connect/)
sudo mv /tmp/linux/cni/nomad_bridge_copy.conflist /opt/cni/config/
# Podman # Podman
echo "Installing Podman" echo "Installing Podman"
sudo apt-get -y install podman catatonit sudo apt-get -y install podman catatonit

View File

@@ -7,7 +7,6 @@ import (
"errors" "errors"
"fmt" "fmt"
"net" "net"
"slices"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@@ -26,6 +25,11 @@ const (
defaultConnectTimeout = 5 * time.Second defaultConnectTimeout = 5 * time.Second
) )
var (
ErrConnectRequireOneNetwork = errors.New("must have exactly one network for Consul Connect")
ErrConnectInvalidNetworkMode = errors.New("invalid network mode for Consul Connect")
)
// connectSidecarResources returns the set of resources used by default for // connectSidecarResources returns the set of resources used by default for
// the Consul Connect sidecar task // the Consul Connect sidecar task
func connectSidecarResources() *structs.Resources { func connectSidecarResources() *structs.Resources {
@@ -166,7 +170,7 @@ func (jobConnectHook) Validate(job *structs.Job) ([]error, error) {
for _, g := range job.TaskGroups { for _, g := range job.TaskGroups {
if err := groupConnectValidate(g); err != nil { if err := groupConnectValidate(g); err != nil {
return nil, err return warnings, err
} }
} }
@@ -441,7 +445,7 @@ func gatewayProxy(gateway *structs.ConsulGateway, mode string) *structs.ConsulGa
proxy.ConnectTimeout = pointer.Of(defaultConnectTimeout) proxy.ConnectTimeout = pointer.Of(defaultConnectTimeout)
} }
if mode == "bridge" { if mode == "bridge" || strings.HasPrefix(mode, "cni/") {
// magically configure bind address(es) for bridge networking, per gateway type // magically configure bind address(es) for bridge networking, per gateway type
// non-default configuration is gated above // non-default configuration is gated above
switch { switch {
@@ -544,28 +548,22 @@ func newConnectSidecarTask(service, driver, cluster string) *structs.Task {
} }
func groupConnectValidate(g *structs.TaskGroup) error { func groupConnectValidate(g *structs.TaskGroup) error {
var err error
for _, s := range g.Services { for _, s := range g.Services {
switch { switch {
case s.Connect.HasSidecar(): case s.Connect.HasSidecar():
if err := groupConnectSidecarValidate(g, s); err != nil { err = groupConnectSidecarValidate(g, s)
return err
}
case s.Connect.IsNative(): case s.Connect.IsNative():
if err := groupConnectNativeValidate(g, s); err != nil { err = groupConnectNativeValidate(g, s)
return err
}
case s.Connect.IsGateway(): case s.Connect.IsGateway():
if err := groupConnectGatewayValidate(g); err != nil { err = groupConnectGatewayValidate(g)
return err }
} if err != nil {
return err
} }
} }
err = groupConnectUpstreamsValidate(g, g.Services)
if err := groupConnectUpstreamsValidate(g, g.Services); err != nil { return err
return err
}
return nil
} }
func groupConnectUpstreamsValidate(g *structs.TaskGroup, services []*structs.Service) error { func groupConnectUpstreamsValidate(g *structs.TaskGroup, services []*structs.Service) error {
@@ -634,13 +632,29 @@ func transparentProxyPortLabelValidate(g *structs.TaskGroup, portLabel string) b
return false return false
} }
func groupConnectSidecarValidate(g *structs.TaskGroup, s *structs.Service) error { func groupConnectNetworkModeValidate(g *structs.TaskGroup, errorPrefix string, allowHost bool) error {
if n := len(g.Networks); n != 1 { if nn := len(g.Networks); nn != 1 {
return fmt.Errorf("Consul Connect sidecars require exactly 1 network, found %d in group %q", n, g.Name) return fmt.Errorf("%s: %w: group %q has %d networks",
errorPrefix, ErrConnectRequireOneNetwork, g.Name, nn)
} }
if g.Networks[0].Mode != "bridge" { mode := g.Networks[0].Mode
return fmt.Errorf("Consul Connect sidecar requires bridge network, found %q in group %q", g.Networks[0].Mode, g.Name) if mode == "bridge" || (allowHost && mode == "host") || strings.HasPrefix(mode, "cni/") {
return nil
}
// helpful error message
allowed := `"bridge" or "cni/*"`
if allowHost {
allowed = `"bridge", "host", or "cni/*"`
}
return fmt.Errorf("%s: %w: group %q uses network mode %q; must be %s",
errorPrefix, ErrConnectInvalidNetworkMode, g.Name, mode, allowed)
}
func groupConnectSidecarValidate(g *structs.TaskGroup, s *structs.Service) error {
if err := groupConnectNetworkModeValidate(g, "connect sidecar", false); err != nil {
return err
} }
// We must enforce lowercase characters on group and service names for connect // We must enforce lowercase characters on group and service names for connect
@@ -668,17 +682,6 @@ func groupConnectNativeValidate(g *structs.TaskGroup, s *structs.Service) error
} }
func groupConnectGatewayValidate(g *structs.TaskGroup) error { func groupConnectGatewayValidate(g *structs.TaskGroup) error {
// the group needs to be either bridge or host mode so we know how to configure // note that gateways can run in host network mode
// the docker driver config return groupConnectNetworkModeValidate(g, "connect gateway", true)
if n := len(g.Networks); n != 1 {
return fmt.Errorf("Consul Connect gateways require exactly 1 network, found %d in group %q", n, g.Name)
}
modes := []string{"bridge", "host"}
if !slices.Contains(modes, g.Networks[0].Mode) {
return fmt.Errorf(`Consul Connect Gateway service requires Task Group with network mode of type "bridge" or "host"`)
}
return nil
} }

View File

@@ -471,28 +471,41 @@ func TestJobEndpointConnect_groupConnectSidecarValidate(t *testing.T) {
} }
t.Run("sidecar 0 networks", func(t *testing.T) { t.Run("sidecar 0 networks", func(t *testing.T) {
require.EqualError(t, groupConnectSidecarValidate(&structs.TaskGroup{ err := groupConnectSidecarValidate(&structs.TaskGroup{
Name: "g1", Name: "g1",
Networks: nil, Networks: nil,
}, makeService("connect-service")), `Consul Connect sidecars require exactly 1 network, found 0 in group "g1"`) }, makeService("connect-service"))
must.EqError(t, err, `connect sidecar: must have exactly one network for Consul Connect: group "g1" has 0 networks`)
}) })
t.Run("sidecar non bridge", func(t *testing.T) { t.Run("sidecar non bridge", func(t *testing.T) {
require.EqualError(t, groupConnectSidecarValidate(&structs.TaskGroup{ err := groupConnectSidecarValidate(&structs.TaskGroup{
Name: "g2", Name: "g2",
Networks: structs.Networks{{ Networks: structs.Networks{{
Mode: "host", Mode: "host",
}}, }},
}, makeService("connect-service")), `Consul Connect sidecar requires bridge network, found "host" in group "g2"`) }, makeService("connect-service"))
must.EqError(t, err, `connect sidecar: invalid network mode for Consul Connect: group "g2" uses network mode "host"; must be "bridge" or "cni/*"`)
}) })
t.Run("sidecar okay", func(t *testing.T) { t.Run("sidecar okay bridge", func(t *testing.T) {
require.NoError(t, groupConnectSidecarValidate(&structs.TaskGroup{ err := groupConnectSidecarValidate(&structs.TaskGroup{
Name: "g3", Name: "g3",
Networks: structs.Networks{{ Networks: structs.Networks{{
Mode: "bridge", Mode: "bridge",
}}, }},
}, makeService("connect-service"))) }, makeService("connect-service"))
must.NoError(t, err)
})
t.Run("sidecar okay cni", func(t *testing.T) {
err := groupConnectSidecarValidate(&structs.TaskGroup{
Name: "g4",
Networks: structs.Networks{{
Mode: "cni/test-net",
}},
}, makeService("connect-service"))
must.NoError(t, err)
}) })
// group and service name validation // group and service name validation
@@ -505,7 +518,7 @@ func TestJobEndpointConnect_groupConnectSidecarValidate(t *testing.T) {
Name: "Other-Service", Name: "Other-Service",
}}, }},
}) })
require.NoError(t, err) must.NoError(t, err)
}) })
t.Run("connect service contains uppercase characters", func(t *testing.T) { t.Run("connect service contains uppercase characters", func(t *testing.T) {
@@ -516,7 +529,7 @@ func TestJobEndpointConnect_groupConnectSidecarValidate(t *testing.T) {
Name: "Other-Service", Name: "Other-Service",
}, makeService("Connect-Service")}, }, makeService("Connect-Service")},
}) })
require.EqualError(t, err, `Consul Connect service name "Connect-Service" in group "group" must not contain uppercase characters`) must.EqError(t, err, `Consul Connect service name "Connect-Service" in group "group" must not contain uppercase characters`)
}) })
t.Run("non-connect group contains uppercase characters", func(t *testing.T) { t.Run("non-connect group contains uppercase characters", func(t *testing.T) {
@@ -527,7 +540,7 @@ func TestJobEndpointConnect_groupConnectSidecarValidate(t *testing.T) {
Name: "other-service", Name: "other-service",
}}, }},
}) })
require.NoError(t, err) must.NoError(t, err)
}) })
t.Run("connect-group contains uppercase characters", func(t *testing.T) { t.Run("connect-group contains uppercase characters", func(t *testing.T) {
@@ -538,7 +551,7 @@ func TestJobEndpointConnect_groupConnectSidecarValidate(t *testing.T) {
Name: "other-service", Name: "other-service",
}, makeService("connect-service")}, }, makeService("connect-service")},
}) })
require.EqualError(t, err, `Consul Connect group "Connect-Group" with service "connect-service" must not contain uppercase characters`) must.EqError(t, err, `Consul Connect group "Connect-Group" with service "connect-service" must not contain uppercase characters`)
}) })
t.Run("connect group and service lowercase", func(t *testing.T) { t.Run("connect group and service lowercase", func(t *testing.T) {
@@ -549,7 +562,7 @@ func TestJobEndpointConnect_groupConnectSidecarValidate(t *testing.T) {
Name: "other-service", Name: "other-service",
}, makeService("connect-service")}, }, makeService("connect-service")},
}) })
require.NoError(t, err) must.NoError(t, err)
}) })
t.Run("connect group overlap upstreams", func(t *testing.T) { t.Run("connect group overlap upstreams", func(t *testing.T) {
@@ -570,7 +583,7 @@ func TestJobEndpointConnect_groupConnectSidecarValidate(t *testing.T) {
Networks: structs.Networks{{Mode: "bridge"}}, Networks: structs.Networks{{Mode: "bridge"}},
Services: []*structs.Service{s1, s2}, Services: []*structs.Service{s1, s2},
}) })
require.EqualError(t, err, `Consul Connect services "s2" and "s1" in group "connect-group" using same address for upstreams (:8999)`) must.EqError(t, err, `Consul Connect services "s2" and "s1" in group "connect-group" using same address for upstreams (:8999)`)
}) })
} }
@@ -781,7 +794,7 @@ func TestJobEndpointConnect_groupConnectGatewayValidate(t *testing.T) {
Name: "g1", Name: "g1",
Networks: nil, Networks: nil,
}) })
require.EqualError(t, err, `Consul Connect gateways require exactly 1 network, found 0 in group "g1"`) must.EqError(t, err, `connect gateway: must have exactly one network for Consul Connect: group "g1" has 0 networks`)
}) })
t.Run("bad network mode", func(t *testing.T) { t.Run("bad network mode", func(t *testing.T) {
@@ -791,7 +804,29 @@ func TestJobEndpointConnect_groupConnectGatewayValidate(t *testing.T) {
Mode: "", Mode: "",
}}, }},
}) })
require.EqualError(t, err, `Consul Connect Gateway service requires Task Group with network mode of type "bridge" or "host"`) must.EqError(t, err, `connect gateway: invalid network mode for Consul Connect: group "g1" uses network mode ""; must be "bridge", "host", or "cni/*"`)
})
for _, good := range []string{"bridge", "host"} {
t.Run("good network mode "+good, func(t *testing.T) {
err := groupConnectGatewayValidate(&structs.TaskGroup{
Name: "g1",
Networks: structs.Networks{{
Mode: good,
}},
})
must.NoError(t, err)
})
}
t.Run("good network mode cni", func(t *testing.T) {
err := groupConnectGatewayValidate(&structs.TaskGroup{
Name: "g1",
Networks: structs.Networks{{
Mode: "cni/test-net",
}},
})
must.NoError(t, err)
}) })
} }

View File

@@ -55,27 +55,24 @@ func (jobExposeCheckHook) Mutate(job *structs.Job) (_ *structs.Job, warnings []e
// Validate will ensure: // Validate will ensure:
// - The job contains valid network configuration for each task group in which // - The job contains valid network configuration for each task group in which
// an expose path is configured. The network must be of type bridge mode. // an expose path is configured. The network must be bridge or "cni/" mode.
// - The check Expose field is configured only for connect-enabled group-services. // - The check Expose field is configured only for connect-enabled group-services.
func (jobExposeCheckHook) Validate(job *structs.Job) (warnings []error, err error) { func (jobExposeCheckHook) Validate(job *structs.Job) (warnings []error, err error) {
for _, tg := range job.TaskGroups { for _, tg := range job.TaskGroups {
// Make sure any group that contains a group-service that enables expose // Make sure any group that contains a group-service that enables expose
// is configured with one network that is in "bridge" mode. This check // is configured with one network that is in "bridge" mode, or warn
// is being done independently of the preceding Connect task injection // if the network is a "cni/*" mode.
// hook, because at some point in the future Connect will not require the if err = tgValidateExposeNetworkMode(tg); err != nil {
// use of network namespaces, whereas the use of "expose" does not make return warnings, err
// sense without the use of network namespace.
if err := tgValidateUseOfBridgeMode(tg); err != nil {
return nil, err
} }
// Make sure any group-service that contains a check that enables expose // Make sure any group-service that contains a check that enables expose
// is connect-enabled and does not specify a custom sidecar task. We only // is connect-enabled and does not specify a custom sidecar task. We only
// support the expose feature when using the built-in Envoy integration. // support the expose feature when using the built-in Envoy integration.
if err := tgValidateUseOfCheckExpose(tg); err != nil { if err = tgValidateUseOfCheckExpose(tg); err != nil {
return nil, err return warnings, err
} }
} }
return nil, nil return warnings, nil
} }
// serviceExposeConfig digs through s to extract the connect sidecar service proxy // serviceExposeConfig digs through s to extract the connect sidecar service proxy
@@ -137,17 +134,12 @@ func tgValidateUseOfCheckExpose(tg *structs.TaskGroup) error {
return nil return nil
} }
// tgValidateUseOfBridgeMode ensures there is exactly 1 network configured for // tgValidateExposeNetworkMode ensures there is exactly 1 network configured for
// the task group, and that it makes use of "bridge" mode (i.e. enables network // the task group, and that it uses "bridge" or "cni/*" mode (i.e. enables network
// namespaces). // namespaces).
func tgValidateUseOfBridgeMode(tg *structs.TaskGroup) error { func tgValidateExposeNetworkMode(tg *structs.TaskGroup) error {
if tgUsesExposeCheck(tg) { if tgUsesExposeCheck(tg) {
if len(tg.Networks) != 1 { return groupConnectNetworkModeValidate(tg, "connect expose check", false)
return fmt.Errorf("group %q must specify one bridge network for exposing service check(s)", tg.Name)
}
if tg.Networks[0].Mode != "bridge" {
return fmt.Errorf("group %q must use bridge network for exposing service check(s)", tg.Name)
}
} }
return nil return nil
} }
@@ -188,7 +180,7 @@ func exposePathForCheck(tg *structs.TaskGroup, s *structs.Service, check *struct
// Borrow some of the validation before we start manipulating the group // Borrow some of the validation before we start manipulating the group
// network, which needs to exist once. // network, which needs to exist once.
if err := tgValidateUseOfBridgeMode(tg); err != nil { if err := tgValidateExposeNetworkMode(tg); err != nil {
return nil, err return nil, err
} }

View File

@@ -8,6 +8,7 @@ import (
"github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
@@ -57,37 +58,52 @@ func TestJobExposeCheckHook_tgValidateUseOfBridgeMode(t *testing.T) {
} }
t.Run("no networks but no use of expose", func(t *testing.T) { t.Run("no networks but no use of expose", func(t *testing.T) {
require.Nil(t, tgValidateUseOfBridgeMode(&structs.TaskGroup{ err := tgValidateExposeNetworkMode(&structs.TaskGroup{
Networks: make(structs.Networks, 0), Networks: make(structs.Networks, 0),
})) })
must.NoError(t, err)
}) })
t.Run("no networks and uses expose", func(t *testing.T) { t.Run("no networks and uses expose", func(t *testing.T) {
require.EqualError(t, tgValidateUseOfBridgeMode(&structs.TaskGroup{ err := tgValidateExposeNetworkMode(&structs.TaskGroup{
Name: "g1", Name: "g1",
Networks: make(structs.Networks, 0), Networks: make(structs.Networks, 0),
Services: []*structs.Service{s1}, Services: []*structs.Service{s1},
}), `group "g1" must specify one bridge network for exposing service check(s)`) })
must.EqError(t, err, `connect expose check: must have exactly one network for Consul Connect: group "g1" has 0 networks`)
}) })
t.Run("non-bridge network and uses expose", func(t *testing.T) { t.Run("non-bridge network and uses expose", func(t *testing.T) {
require.EqualError(t, tgValidateUseOfBridgeMode(&structs.TaskGroup{ err := tgValidateExposeNetworkMode(&structs.TaskGroup{
Name: "g1", Name: "g1",
Networks: structs.Networks{{ Networks: structs.Networks{{
Mode: "host", Mode: "host",
}}, }},
Services: []*structs.Service{s1}, Services: []*structs.Service{s1},
}), `group "g1" must use bridge network for exposing service check(s)`) })
must.EqError(t, err, `connect expose check: invalid network mode for Consul Connect: group "g1" uses network mode "host"; must be "bridge" or "cni/*"`)
}) })
t.Run("bridge network uses expose", func(t *testing.T) { t.Run("bridge network uses expose", func(t *testing.T) {
require.Nil(t, tgValidateUseOfBridgeMode(&structs.TaskGroup{ err := tgValidateExposeNetworkMode(&structs.TaskGroup{
Name: "g1", Name: "g1",
Networks: structs.Networks{{ Networks: structs.Networks{{
Mode: "bridge", Mode: "bridge",
}}, }},
Services: []*structs.Service{s1}, Services: []*structs.Service{s1},
})) })
must.NoError(t, err)
})
t.Run("cni network uses expose", func(t *testing.T) {
err := tgValidateExposeNetworkMode(&structs.TaskGroup{
Name: "g1",
Networks: structs.Networks{{
Mode: "cni/test-net",
}},
Services: []*structs.Service{s1},
})
must.NoError(t, err)
}) })
} }
@@ -165,8 +181,8 @@ func TestJobExposeCheckHook_Validate(t *testing.T) {
Services: []*structs.Service{s1}, Services: []*structs.Service{s1},
}}, }},
}) })
require.Empty(t, warnings) must.SliceEmpty(t, warnings)
require.EqualError(t, err, `group "g1" must specify one bridge network for exposing service check(s)`) must.EqError(t, err, `connect expose check: must have exactly one network for Consul Connect: group "g1" has 2 networks`)
}) })
t.Run("expose in service check", func(t *testing.T) { t.Run("expose in service check", func(t *testing.T) {
@@ -189,8 +205,8 @@ func TestJobExposeCheckHook_Validate(t *testing.T) {
}}, }},
}}, }},
}) })
require.Empty(t, warnings) must.SliceEmpty(t, warnings)
require.EqualError(t, err, `exposed service check g1[t1]->s2->s2-check1 is not a task-group service`) must.EqError(t, err, `exposed service check g1[t1]->s2->s2-check1 is not a task-group service`)
}) })
t.Run("ok", func(t *testing.T) { t.Run("ok", func(t *testing.T) {
@@ -224,8 +240,8 @@ func TestJobExposeCheckHook_Validate(t *testing.T) {
}}, }},
}}, }},
}) })
require.Empty(t, warnings) must.SliceEmpty(t, warnings)
require.Nil(t, err) must.NoError(t, err)
}) })
} }
@@ -321,9 +337,7 @@ func TestJobExposeCheckHook_exposePathForCheck(t *testing.T) {
Services: []*structs.Service{s}, Services: []*structs.Service{s},
Networks: structs.Networks{{ Networks: structs.Networks{{
Mode: "bridge", Mode: "bridge",
DynamicPorts: []structs.Port{ DynamicPorts: []structs.Port{}, // service declares "sPort", but does not exist
// service declares "sPort", but does not exist
},
}}, }},
}, s, c, checkIdx) }, s, c, checkIdx)
require.EqualError(t, err, `unable to determine local service port for service check group1->service1->check1`) require.EqualError(t, err, `unable to determine local service port for service check group1->service1->check1`)
@@ -400,8 +414,8 @@ func TestJobExposeCheckHook_exposePathForCheck(t *testing.T) {
Networks: nil, // not set, should cause validation error Networks: nil, // not set, should cause validation error
} }
ePath, err := exposePathForCheck(tg, s, c, checkIdx) ePath, err := exposePathForCheck(tg, s, c, checkIdx)
require.EqualError(t, err, `group "group1" must specify one bridge network for exposing service check(s)`) must.EqError(t, err, `connect expose check: must have exactly one network for Consul Connect: group "group1" has 0 networks`)
require.Nil(t, ePath) must.Nil(t, ePath)
}) })
} }

View File

@@ -6293,6 +6293,17 @@ func TestJobEndpoint_ValidateJob_ConsulConnect(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
}) })
t.Run("valid consul connect with cni", func(t *testing.T) {
j := mock.Job()
tg := j.TaskGroups[0]
tg.Services = tgServices
tg.Networks[0].Mode = "cni/test-net"
err := validateJob(j)
must.NoError(t, err)
})
t.Run("consul connect but missing network", func(t *testing.T) { t.Run("consul connect but missing network", func(t *testing.T) {
j := mock.Job() j := mock.Job()
@@ -6301,8 +6312,7 @@ func TestJobEndpoint_ValidateJob_ConsulConnect(t *testing.T) {
tg.Networks = nil tg.Networks = nil
err := validateJob(j) err := validateJob(j)
require.Error(t, err) must.ErrorContains(t, err, ErrConnectRequireOneNetwork.Error())
require.Contains(t, err.Error(), `Consul Connect sidecars require exactly 1 network`)
}) })
t.Run("consul connect but non bridge network", func(t *testing.T) { t.Run("consul connect but non bridge network", func(t *testing.T) {
@@ -6316,8 +6326,7 @@ func TestJobEndpoint_ValidateJob_ConsulConnect(t *testing.T) {
} }
err := validateJob(j) err := validateJob(j)
require.Error(t, err) must.ErrorContains(t, err, ErrConnectInvalidNetworkMode.Error())
require.Contains(t, err.Error(), `Consul Connect sidecar requires bridge network, found "host" in group "web"`)
}) })
} }

View File

@@ -36,15 +36,18 @@ service-based access control permissions throughout the entire mesh.
Nomad has native integration with Consul to provide service mesh capabilities. Nomad has native integration with Consul to provide service mesh capabilities.
The [`connect`][] block is the entrypoint for all service mesh configuration. The [`connect`][] block is the entrypoint for all service mesh configuration.
Nomad automatically deploys a sidecar proxy task to all allocations that have a Nomad automatically deploys a sidecar proxy task to all allocations that have a
[`sidecar_service`][] block. [`sidecar_service`][] block. All incoming external traffic is handled by the
sidecar.
This proxy task is responsible for exposing the service to the mesh and can This proxy task is responsible for exposing the service to the mesh and can
also be used to access other services from within the allocation. These also be used to access other services from within the allocation. These
external services are called upstreams and are declared using the external services are called upstreams and are declared using the
[`upstreams`][] block. [`upstreams`][] block.
The allocation `network_mode` must be set to `bridge` for network isolation and Consul service mesh requires network isolation to function, so you must set
all external traffic is provided by the sidecar. job group's `network` [`mode`](/nomad/docs/job-specification/network#mode)
to `bridge`, or an [appropriately configured `cni/*`
network](/nomad/docs/networking/consul/service-mesh#network-mode).
~> **Warning:** To fully isolate your workloads make sure to bind them only to ~> **Warning:** To fully isolate your workloads make sure to bind them only to
the `loopback` interface. the `loopback` interface.
@@ -167,4 +170,3 @@ The types of gateways provided by Consul Service Mesh are:
[`upstreams`]: /nomad/docs/job-specification/upstreams [`upstreams`]: /nomad/docs/job-specification/upstreams
[consul_cli_envoy]: /consul/commands/connect/envoy [consul_cli_envoy]: /consul/commands/connect/envoy
[runtime_network]: /nomad/docs/reference/runtime-environment-settings#network-related-variables [runtime_network]: /nomad/docs/reference/runtime-environment-settings#network-related-variables

View File

@@ -59,6 +59,35 @@ For using the Consul service mesh integration with Consul ACLs enabled, see the
[Secure Nomad Jobs with Consul Service Mesh](/nomad/tutorials/integrate-consul/consul-service-mesh) [Secure Nomad Jobs with Consul Service Mesh](/nomad/tutorials/integrate-consul/consul-service-mesh)
guide. guide.
### Network mode
Consul service mesh requires network isolation to function, so you must set
job group's `network` [`mode`](/nomad/docs/job-specification/network#mode)
to `bridge`, or an appropriately configured `cni/*` network.
<details>
<summary style={{ cursor: "pointer" }}>
Using a custom <code>cni/*</code> network with Consul service mesh requires
extra care.
</summary>
Given the variety of network configurations, the Nomad team and enterprise
support are limited in our ability to support custom network configurations.
Use custom CNI networks with Consul service mesh at your own risk. That said,
you may model your network configuration on Nomad's [`bridge`
network](/nomad/docs/networking/cni#create-a-custom-bridge-mode-configuration-with-cni-plugins).
Consider these qualities when configuring your network:
* Nomad provides an isolated network namespace, but your CNI configuration
should not expose the main task(s) to the host network.
* Incoming traffic needs to be able to reach the sidecar at the IP:port
which will be advertised on the sidecar service.
* Traffic needs to be able to flow from different allocs' sidecars to
one another.
</details>
## Nomad Consul service mesh example ## Nomad Consul service mesh example
The following section walks through an example to enable secure communication The following section walks through an example to enable secure communication