diff --git a/CHANGELOG.md b/CHANGELOG.md index eee2dfbb3..521d4c3e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ IMPROVEMENTS: * client: Updated consul-template to v0.25.0 - config `function_blacklist` deprecated and replaced with `function_denylist` [[GH-8988](https://github.com/hashicorp/nomad/pull/8988)] * config: Deprecated terms `blacklist` and `whitelist` from configuration and replaced them with `denylist` and `allowlist`. [[GH-9019](https://github.com/hashicorp/nomad/issues/9019)] * consul: Support Consul namespace (Consul Enterprise) in client configuration. [[GH-8849](https://github.com/hashicorp/nomad/pull/8849)] + * consul: Support advertising CNI and multi-host network addresses to consul [[GH-8801](https://github.com/hashicorp/nomad/issues/8801)] * consul/connect: Dynamically select envoy sidecar at runtime [[GH-8945](https://github.com/hashicorp/nomad/pull/8945)] * csi: Relaxed validation requirements when checking volume capabilities with controller plugins, to accommodate existing plugin behaviors. [[GH-9049](https://github.com/hashicorp/nomad/issues/9049)] * driver/docker: Upgrade pause container and detect architecture [[GH-8957](https://github.com/hashicorp/nomad/pull/8957)] diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 7aa3a487e..dc827e335 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -734,6 +734,12 @@ func (ar *allocRunner) SetNetworkStatus(s *structs.AllocNetworkStatus) { ar.state.NetworkStatus = s.Copy() } +func (ar *allocRunner) NetworkStatus() *structs.AllocNetworkStatus { + ar.stateLock.Lock() + defer ar.stateLock.Unlock() + return ar.state.NetworkStatus.Copy() +} + // AllocState returns a copy of allocation state including a snapshot of task // states. func (ar *allocRunner) AllocState() *state.State { diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go index 769260389..df797e44c 100644 --- a/client/allocrunner/alloc_runner_hooks.go +++ b/client/allocrunner/alloc_runner_hooks.go @@ -143,11 +143,12 @@ func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error { newAllocHealthWatcherHook(hookLogger, alloc, hs, ar.Listener(), ar.consulClient), newNetworkHook(hookLogger, ns, alloc, nm, nc, ar), newGroupServiceHook(groupServiceHookConfig{ - alloc: alloc, - consul: ar.consulClient, - restarter: ar, - taskEnvBuilder: taskenv.NewBuilder(config.Node, ar.Alloc(), nil, config.Region).SetAllocDir(ar.allocDir.AllocDir), - logger: hookLogger, + alloc: alloc, + consul: ar.consulClient, + restarter: ar, + taskEnvBuilder: taskenv.NewBuilder(config.Node, ar.Alloc(), nil, config.Region).SetAllocDir(ar.allocDir.AllocDir), + networkStatusGetter: ar, + logger: hookLogger, }), newConsulGRPCSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig), newConsulHTTPSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig), diff --git a/client/allocrunner/groupservice_hook.go b/client/allocrunner/groupservice_hook.go index fb37fcca6..89f710cd6 100644 --- a/client/allocrunner/groupservice_hook.go +++ b/client/allocrunner/groupservice_hook.go @@ -13,16 +13,21 @@ import ( "github.com/hashicorp/nomad/plugins/drivers" ) +type networkStatusGetter interface { + NetworkStatus() *structs.AllocNetworkStatus +} + // groupServiceHook manages task group Consul service registration and // deregistration. type groupServiceHook struct { - allocID string - group string - restarter agentconsul.WorkloadRestarter - consulClient consul.ConsulServiceAPI - prerun bool - delay time.Duration - deregistered bool + allocID string + group string + restarter agentconsul.WorkloadRestarter + consulClient consul.ConsulServiceAPI + prerun bool + delay time.Duration + deregistered bool + networkStatusGetter networkStatusGetter logger log.Logger @@ -30,6 +35,7 @@ type groupServiceHook struct { canary bool services []*structs.Service networks structs.Networks + ports structs.AllocatedPorts taskEnvBuilder *taskenv.Builder // Since Update() may be called concurrently with any other hook all @@ -38,11 +44,12 @@ type groupServiceHook struct { } type groupServiceHookConfig struct { - alloc *structs.Allocation - consul consul.ConsulServiceAPI - restarter agentconsul.WorkloadRestarter - taskEnvBuilder *taskenv.Builder - logger log.Logger + alloc *structs.Allocation + consul consul.ConsulServiceAPI + restarter agentconsul.WorkloadRestarter + taskEnvBuilder *taskenv.Builder + networkStatusGetter networkStatusGetter + logger log.Logger } func newGroupServiceHook(cfg groupServiceHookConfig) *groupServiceHook { @@ -54,18 +61,20 @@ func newGroupServiceHook(cfg groupServiceHookConfig) *groupServiceHook { } h := &groupServiceHook{ - allocID: cfg.alloc.ID, - group: cfg.alloc.TaskGroup, - restarter: cfg.restarter, - consulClient: cfg.consul, - taskEnvBuilder: cfg.taskEnvBuilder, - delay: shutdownDelay, + allocID: cfg.alloc.ID, + group: cfg.alloc.TaskGroup, + restarter: cfg.restarter, + consulClient: cfg.consul, + taskEnvBuilder: cfg.taskEnvBuilder, + delay: shutdownDelay, + networkStatusGetter: cfg.networkStatusGetter, } h.logger = cfg.logger.Named(h.Name()) h.services = cfg.alloc.Job.LookupTaskGroup(h.group).Services if cfg.alloc.AllocatedResources != nil { h.networks = cfg.alloc.AllocatedResources.Shared.Networks + h.ports = cfg.alloc.AllocatedResources.Shared.Ports } if cfg.alloc.DeploymentStatus != nil { @@ -109,6 +118,7 @@ func (h *groupServiceHook) Update(req *interfaces.RunnerUpdateRequest) error { var networks structs.Networks if req.Alloc.AllocatedResources != nil { networks = req.Alloc.AllocatedResources.Shared.Networks + h.ports = req.Alloc.AllocatedResources.Shared.Ports } tg := req.Alloc.Job.LookupTaskGroup(h.group) @@ -200,6 +210,11 @@ func (h *groupServiceHook) getWorkloadServices() *agentconsul.WorkloadServices { // Interpolate with the task's environment interpolatedServices := taskenv.InterpolateServices(h.taskEnvBuilder.Build(), h.services) + var netStatus *structs.AllocNetworkStatus + if h.networkStatusGetter != nil { + netStatus = h.networkStatusGetter.NetworkStatus() + } + // Create task services struct with request's driver metadata return &agentconsul.WorkloadServices{ AllocID: h.allocID, @@ -208,6 +223,8 @@ func (h *groupServiceHook) getWorkloadServices() *agentconsul.WorkloadServices { Services: interpolatedServices, DriverNetwork: h.driverNet(), Networks: h.networks, + NetworkStatus: netStatus, + Ports: h.ports, Canary: h.canary, } } diff --git a/client/allocrunner/taskrunner/service_hook.go b/client/allocrunner/taskrunner/service_hook.go index e329af93f..86dc26c00 100644 --- a/client/allocrunner/taskrunner/service_hook.go +++ b/client/allocrunner/taskrunner/service_hook.go @@ -44,6 +44,7 @@ type serviceHook struct { canary bool services []*structs.Service networks structs.Networks + ports structs.AllocatedPorts taskEnv *taskenv.TaskEnv // initialRegistrations tracks if Poststart has completed, initializing @@ -62,6 +63,7 @@ func newServiceHook(c serviceHookConfig) *serviceHook { taskName: c.task.Name, services: c.task.Services, restarter: c.restarter, + ports: c.alloc.AllocatedResources.Shared.Ports, } if res := c.alloc.AllocatedResources.Tasks[c.task.Name]; res != nil { @@ -141,6 +143,7 @@ func (h *serviceHook) updateHookFields(req *interfaces.TaskUpdateRequest) error h.services = task.Services h.networks = networks h.canary = canary + h.ports = req.Alloc.AllocatedResources.Shared.Ports return nil } @@ -195,5 +198,6 @@ func (h *serviceHook) getWorkloadServices() *agentconsul.WorkloadServices { DriverNetwork: h.driverNet, Networks: h.networks, Canary: h.canary, + Ports: h.ports, } } diff --git a/command/agent/consul/service_client.go b/command/agent/consul/service_client.go index ca93249b9..41905ac3f 100644 --- a/command/agent/consul/service_client.go +++ b/command/agent/consul/service_client.go @@ -823,7 +823,7 @@ func (c *ServiceClient) serviceRegs(ops *operations, service *structs.Service, w } // Determine the address to advertise based on the mode - ip, port, err := getAddress(addrMode, service.PortLabel, workload.Networks, workload.DriverNetwork) + ip, port, err := getAddress(addrMode, service.PortLabel, workload.Networks, workload.DriverNetwork, workload.Ports, workload.NetworkStatus) if err != nil { return nil, fmt.Errorf("unable to get address for service %q: %v", service.Name, err) } @@ -934,7 +934,7 @@ func (c *ServiceClient) checkRegs(ops *operations, serviceID string, service *st addrMode = structs.AddressModeHost } - ip, port, err := getAddress(addrMode, portLabel, workload.Networks, workload.DriverNetwork) + ip, port, err := getAddress(addrMode, portLabel, workload.Networks, workload.DriverNetwork, workload.Ports, workload.NetworkStatus) if err != nil { return nil, fmt.Errorf("error getting address for check %q: %v", check.Name, err) } @@ -1448,7 +1448,7 @@ func getNomadSidecar(id string, services map[string]*api.AgentService) *api.Agen // getAddress returns the IP and port to use for a service or check. If no port // label is specified (an empty value), zero values are returned because no // address could be resolved. -func getAddress(addrMode, portLabel string, networks structs.Networks, driverNet *drivers.DriverNetwork) (string, int, error) { +func getAddress(addrMode, portLabel string, networks structs.Networks, driverNet *drivers.DriverNetwork, ports structs.AllocatedPorts, netStatus *structs.AllocNetworkStatus) (string, int, error) { switch addrMode { case structs.AddressModeAuto: if driverNet.Advertise() { @@ -1456,7 +1456,7 @@ func getAddress(addrMode, portLabel string, networks structs.Networks, driverNet } else { addrMode = structs.AddressModeHost } - return getAddress(addrMode, portLabel, networks, driverNet) + return getAddress(addrMode, portLabel, networks, driverNet, ports, netStatus) case structs.AddressModeHost: if portLabel == "" { if len(networks) != 1 { @@ -1471,11 +1471,17 @@ func getAddress(addrMode, portLabel string, networks structs.Networks, driverNet } // Default path: use host ip:port - ip, port := networks.Port(portLabel) - if ip == "" && port <= 0 { - return "", 0, fmt.Errorf("invalid port %q: port label not found", portLabel) + // Try finding port in the AllocatedPorts struct first + // Check in Networks struct for backwards compatibility if not found + mapping, ok := ports.Get(portLabel) + if !ok { + ip, port := networks.Port(portLabel) + if ip == "" && port <= 0 { + return "", 0, fmt.Errorf("invalid port %q: port label not found", portLabel) + } + return ip, port, nil } - return ip, port, nil + return mapping.HostIP, mapping.Value, nil case structs.AddressModeDriver: // Require a driver network if driver address mode is used @@ -1489,6 +1495,11 @@ func getAddress(addrMode, portLabel string, networks structs.Networks, driverNet } // If the port is a label, use the driver's port (not the host's) + if port, ok := ports.Get(portLabel); ok { + return driverNet.IP, port.To, nil + } + + // Check if old style driver portmap is used if port, ok := driverNet.PortMap[portLabel]; ok { return driverNet.IP, port, nil } @@ -1507,6 +1518,32 @@ func getAddress(addrMode, portLabel string, networks structs.Networks, driverNet return driverNet.IP, port, nil + case "alloc": + if netStatus == nil { + return "", 0, fmt.Errorf(`cannot use address_mode="alloc": no allocation network status reported`) + } + + // If no port label is specified just return the IP + if portLabel == "" { + return netStatus.Address, 0, nil + } + + // If port is a label and is found then return it + if port, ok := ports.Get(portLabel); ok { + return netStatus.Address, port.Value, nil + } + + // Check if port is a literal number + port, err := strconv.Atoi(portLabel) + if err != nil { + // User likely specified wrong port label here + return "", 0, fmt.Errorf("invalid port %q: port label not found or is not numeric", portLabel) + } + if port <= 0 { + return "", 0, fmt.Errorf("invalid port: %q: port must be >0", portLabel) + } + return netStatus.Address, port, nil + default: // Shouldn't happen due to validation, but enforce invariants return "", 0, fmt.Errorf("invalid address mode %q", addrMode) diff --git a/command/agent/consul/structs.go b/command/agent/consul/structs.go index b41387a77..4d71bcdb7 100644 --- a/command/agent/consul/structs.go +++ b/command/agent/consul/structs.go @@ -29,8 +29,16 @@ type WorkloadServices struct { Services []*structs.Service // Networks from the task's resources stanza. + // TODO: remove and use Ports Networks structs.Networks + // NetworkStatus from alloc if network namespace is created + // Can be nil + NetworkStatus *structs.AllocNetworkStatus + + // AllocatedPorts is the list of port mappings + Ports structs.AllocatedPorts + // DriverExec is the script executor for the task's driver. // For group services this is nil and script execution is managed by // a tasklet in the taskrunner script_check_hook diff --git a/command/agent/consul/unit_test.go b/command/agent/consul/unit_test.go index 0e664d616..29f1a1c81 100644 --- a/command/agent/consul/unit_test.go +++ b/command/agent/consul/unit_test.go @@ -1359,6 +1359,8 @@ func TestGetAddress(t *testing.T) { PortLabel string Host map[string]int // will be converted to structs.Networks Driver *drivers.DriverNetwork + Ports structs.AllocatedPorts + Status *structs.AllocNetworkStatus // Results ExpectedIP string @@ -1487,6 +1489,162 @@ func TestGetAddress(t *testing.T) { }, ExpectedIP: "10.1.2.3", }, + + // Scenarios using port 0.12 networking fields (NetworkStatus, AllocatedPortMapping) + { + Name: "ExampleServer_withAllocatedPorts", + Mode: structs.AddressModeAuto, + PortLabel: "db", + Ports: []structs.AllocatedPortMapping{ + { + Label: "db", + Value: 12435, + To: 6379, + HostIP: HostIP, + }, + }, + Status: &structs.AllocNetworkStatus{ + InterfaceName: "eth0", + Address: "172.26.0.1", + }, + ExpectedIP: HostIP, + ExpectedPort: 12435, + }, + { + Name: "Host_withAllocatedPorts", + Mode: structs.AddressModeHost, + PortLabel: "db", + Ports: []structs.AllocatedPortMapping{ + { + Label: "db", + Value: 12345, + To: 6379, + HostIP: HostIP, + }, + }, + Status: &structs.AllocNetworkStatus{ + InterfaceName: "eth0", + Address: "172.26.0.1", + }, + ExpectedIP: HostIP, + ExpectedPort: 12345, + }, + { + Name: "Driver_withAllocatedPorts", + Mode: structs.AddressModeDriver, + PortLabel: "db", + Ports: []structs.AllocatedPortMapping{ + { + Label: "db", + Value: 12345, + To: 6379, + HostIP: HostIP, + }, + }, + Driver: &drivers.DriverNetwork{ + IP: "10.1.2.3", + }, + Status: &structs.AllocNetworkStatus{ + InterfaceName: "eth0", + Address: "172.26.0.1", + }, + ExpectedIP: "10.1.2.3", + ExpectedPort: 6379, + }, + { + Name: "AutoDriver_withAllocatedPorts", + Mode: structs.AddressModeAuto, + PortLabel: "db", + Ports: []structs.AllocatedPortMapping{ + { + Label: "db", + Value: 12345, + To: 6379, + HostIP: HostIP, + }, + }, + Driver: &drivers.DriverNetwork{ + IP: "10.1.2.3", + AutoAdvertise: true, + }, + Status: &structs.AllocNetworkStatus{ + InterfaceName: "eth0", + Address: "172.26.0.1", + }, + ExpectedIP: "10.1.2.3", + ExpectedPort: 6379, + }, + { + Name: "DriverCustomPort_withAllocatedPorts", + Mode: structs.AddressModeDriver, + PortLabel: "7890", + Ports: []structs.AllocatedPortMapping{ + { + Label: "db", + Value: 12345, + To: 6379, + HostIP: HostIP, + }, + }, + Driver: &drivers.DriverNetwork{ + IP: "10.1.2.3", + }, + Status: &structs.AllocNetworkStatus{ + InterfaceName: "eth0", + Address: "172.26.0.1", + }, + ExpectedIP: "10.1.2.3", + ExpectedPort: 7890, + }, + { + Name: "Host_MultiHostInterface", + Mode: structs.AddressModeAuto, + PortLabel: "db", + Ports: []structs.AllocatedPortMapping{ + { + Label: "db", + Value: 12345, + To: 6379, + HostIP: "127.0.0.100", + }, + }, + Status: &structs.AllocNetworkStatus{ + InterfaceName: "eth0", + Address: "172.26.0.1", + }, + ExpectedIP: "127.0.0.100", + ExpectedPort: 12345, + }, + { + Name: "Alloc", + Mode: structs.AddressModeAlloc, + PortLabel: "db", + Ports: []structs.AllocatedPortMapping{ + { + Label: "db", + Value: 12345, + To: 6379, + HostIP: HostIP, + }, + }, + Status: &structs.AllocNetworkStatus{ + InterfaceName: "eth0", + Address: "172.26.0.1", + }, + ExpectedIP: "172.26.0.1", + ExpectedPort: 12345, + }, + { + Name: "AllocCustomPort", + Mode: structs.AddressModeAlloc, + PortLabel: "6379", + Status: &structs.AllocNetworkStatus{ + InterfaceName: "eth0", + Address: "172.26.0.1", + }, + ExpectedIP: "172.26.0.1", + ExpectedPort: 6379, + }, } for _, tc := range cases { @@ -1507,7 +1665,7 @@ func TestGetAddress(t *testing.T) { } // Run getAddress - ip, port, err := getAddress(tc.Mode, tc.PortLabel, networks, tc.Driver) + ip, port, err := getAddress(tc.Mode, tc.PortLabel, networks, tc.Driver, tc.Ports, tc.Status) // Assert the results assert.Equal(t, tc.ExpectedIP, ip, "IP mismatch") diff --git a/nomad/structs/services.go b/nomad/structs/services.go index 0558e52aa..af26354df 100644 --- a/nomad/structs/services.go +++ b/nomad/structs/services.go @@ -246,7 +246,7 @@ func (sc *ServiceCheck) validate() error { // Validate AddressMode switch sc.AddressMode { - case "", AddressModeHost, AddressModeDriver: + case "", AddressModeHost, AddressModeDriver, AddressModeAlloc: // Ok case AddressModeAuto: return fmt.Errorf("invalid address_mode %q - %s only valid for services", sc.AddressMode, AddressModeAuto) @@ -378,6 +378,7 @@ const ( AddressModeAuto = "auto" AddressModeHost = "host" AddressModeDriver = "driver" + AddressModeAlloc = "alloc" ) // Service represents a Consul service definition @@ -485,7 +486,7 @@ func (s *Service) Validate() error { } switch s.AddressMode { - case "", AddressModeAuto, AddressModeHost, AddressModeDriver: + case "", AddressModeAuto, AddressModeHost, AddressModeDriver, AddressModeAlloc: // OK default: mErr.Errors = append(mErr.Errors, fmt.Errorf("Service address_mode must be %q, %q, or %q; not %q", AddressModeAuto, AddressModeHost, AddressModeDriver, s.AddressMode)) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 409e9b332..44f9b1f9c 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -6079,6 +6079,9 @@ func (tg *TaskGroup) validateServices() error { // error messages to provide the user. continue } + if service.AddressMode == AddressModeDriver { + mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q cannot use address_mode=\"driver\", only services defined in a \"task\" block can use this mode", service.Name)) + } if _, ok := knownServices[service.Name+service.PortLabel]; ok { mErr.Errors = append(mErr.Errors, fmt.Errorf("Service %s is duplicate", service.Name)) } @@ -6089,6 +6092,9 @@ func (tg *TaskGroup) validateServices() error { mErr.Errors = append(mErr.Errors, fmt.Errorf("Check %s invalid: only script and gRPC checks should have tasks", check.Name)) } + if check.AddressMode == AddressModeDriver { + mErr.Errors = append(mErr.Errors, fmt.Errorf("Check %q invalid: cannot use address_mode=\"driver\", only checks defined in a \"task\" service block can use this mode", service.Name)) + } if _, ok := knownTasks[check.TaskName]; !ok { mErr.Errors = append(mErr.Errors, fmt.Errorf("Check %s invalid: refers to non-existent task %s", check.Name, check.TaskName)) @@ -6714,6 +6720,10 @@ func validateServices(t *Task) error { mErr.Errors = append(mErr.Errors, outer) } + if service.AddressMode == AddressModeAlloc { + mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q cannot use address_mode=\"alloc\", only services defined in a \"group\" block can use this mode", service.Name)) + } + // Ensure that services with the same name are not being registered for // the same port if _, ok := knownServices[service.Name+service.PortLabel]; ok { @@ -6742,6 +6752,10 @@ func validateServices(t *Task) error { } knownChecks[check.Name] = struct{}{} + if check.AddressMode == AddressModeAlloc { + mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q cannot use address_mode=\"alloc\", only checks defined in a \"group\" service block can use this mode", service.Name)) + } + if !check.RequiresPort() { // No need to continue validating check if it doesn't need a port continue diff --git a/website/pages/docs/job-specification/service.mdx b/website/pages/docs/job-specification/service.mdx index 9d9fd453d..63818250c 100644 --- a/website/pages/docs/job-specification/service.mdx +++ b/website/pages/docs/job-specification/service.mdx @@ -129,11 +129,17 @@ Connect][connect] integration. [documentation](https://www.consul.io/docs/internals/anti-entropy#enable-tag-override) for more information. -- `address_mode` `(string: "auto")` - Specifies what address (host or +- `address_mode` `(string: "auto")` - Specifies what address (host, alloc or driver-specific) this service should advertise. This setting is supported in Docker since Nomad 0.6 and rkt since Nomad 0.7. See [below for examples.](#using-driver-address-mode) Valid options are: + - `alloc` - For allocations which create a network namespace, this address mode + uses the IP address inside the namespace. Can only be used with "bridge" and "cni" + [networking modes][network_mode]. A numeric port may be specified for situations + where no port mapping is necessary. This mode can only be set for services which + are defined in a "group" block. + - `auto` - Allows the driver to determine whether the host or driver address should be used. Defaults to `host` and only implemented by Docker. If you use a Docker network plugin such as weave, Docker will automatically use @@ -143,7 +149,8 @@ Connect][connect] integration. port map. A numeric port may be specified since port maps aren't required by all network plugins. Useful for advertising SDN and overlay network addresses. Task will fail if driver network cannot be determined. Only - implemented for Docker and rkt. + implemented for Docker and rkt. This mode can only be set for services + which are defined in a "task" block. - `host` - Use the host IP and port. @@ -716,3 +723,4 @@ advertise and check directly since Nomad isn't managing any port assignments. [killsignal]: /docs/job-specification/task#kill_signal [killtimeout]: /docs/job-specification/task#kill_timeout [service_task]: /docs/job-specification/service#task-1 +[network_mode]: /docs/job-specification/network#mode \ No newline at end of file