mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
networking: Inject implicit constraints on CNI plugins when using bridge mode (#15473)
This PR adds a job mutator which injects constraints on the job taskgroups that make use of bridge networking. Creating a bridge network makes use of the CNI plugins: bridge, firewall, host-local, loopback, and portmap. Starting with Nomad 1.5 these plugins are fingerprinted on each node, and as such we can ensure jobs are correctly scheduled only on nodes where they are available, when needed.
This commit is contained in:
3
.changelog/15473.txt
Normal file
3
.changelog/15473.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
```release-note:improvement
|
||||
networking: Inject constraints on CNI plugins when using bridge networking
|
||||
```
|
||||
@@ -148,7 +148,8 @@ func buildNomadBridgeNetConfig(b bridgeNetworkConfigurator) []byte {
|
||||
}
|
||||
|
||||
// Update website/content/docs/networking/cni.mdx when the bridge configuration
|
||||
// is modified.
|
||||
// is modified. If CNI plugins are added or versions need to be updated for new
|
||||
// fields, add a new constraint to nomad/job_endpoint_hooks.go
|
||||
const nomadCNIConfigTemplate = `{
|
||||
"cniVersion": "0.4.0",
|
||||
"name": "nomad",
|
||||
|
||||
@@ -15,13 +15,24 @@ import (
|
||||
"golang.org/x/exp/maps"
|
||||
)
|
||||
|
||||
// Node attributes acquired via fingerprinting.
|
||||
const (
|
||||
attrVaultVersion = `${attr.vault.version}`
|
||||
attrConsulVersion = `${attr.consul.version}`
|
||||
attrNomadVersion = `${attr.nomad.version}`
|
||||
attrNomadServiceDisco = `${attr.nomad.service_discovery}`
|
||||
attrBridgeCNI = `${attr.plugins.cni.version.bridge}`
|
||||
attrFirewallCNI = `${attr.plugins.cni.version.firewall}`
|
||||
attrHostLocalCNI = `${attr.plugins.cni.version.host-local}`
|
||||
attrLoopbackCNI = `${attr.plugins.cni.version.loopback}`
|
||||
attrPortMapCNI = `${attr.plugins.cni.version.portmap}`
|
||||
)
|
||||
|
||||
// cniMinVersion is the version expression for the minimum CNI version supported
|
||||
// for the CNI container-networking plugins. Support was added at v0.4.0, so
|
||||
// we set the minimum to that.
|
||||
const cniMinVersion = ">= 0.4.0"
|
||||
|
||||
var (
|
||||
// vaultConstraint is the implicit constraint added to jobs requesting a
|
||||
// Vault token
|
||||
@@ -78,6 +89,51 @@ var (
|
||||
RTarget: "linux",
|
||||
Operand: "=",
|
||||
}
|
||||
|
||||
// cniBridgeConstraint is an implicit constraint added to jobs making use
|
||||
// of bridge networking mode. This is one of the CNI plugins used to support
|
||||
// bridge networking.
|
||||
cniBridgeConstraint = &structs.Constraint{
|
||||
LTarget: attrBridgeCNI,
|
||||
RTarget: cniMinVersion,
|
||||
Operand: structs.ConstraintSemver,
|
||||
}
|
||||
|
||||
// cniFirewallConstraint is an implicit constraint added to jobs making use
|
||||
// of bridge networking mode. This is one of the CNI plugins used to support
|
||||
// bridge networking.
|
||||
cniFirewallConstraint = &structs.Constraint{
|
||||
LTarget: attrFirewallCNI,
|
||||
RTarget: cniMinVersion,
|
||||
Operand: structs.ConstraintSemver,
|
||||
}
|
||||
|
||||
// cniHostLocalConstraint is an implicit constraint added to jobs making use
|
||||
// of bridge networking mode. This is one of the CNI plugins used to support
|
||||
// bridge networking.
|
||||
cniHostLocalConstraint = &structs.Constraint{
|
||||
LTarget: attrHostLocalCNI,
|
||||
RTarget: cniMinVersion,
|
||||
Operand: structs.ConstraintSemver,
|
||||
}
|
||||
|
||||
// cniLoopbackConstraint is an implicit constraint added to jobs making use
|
||||
// of bridge networking mode. This is one of the CNI plugins used to support
|
||||
// bridge networking.
|
||||
cniLoopbackConstraint = &structs.Constraint{
|
||||
LTarget: attrLoopbackCNI,
|
||||
RTarget: cniMinVersion,
|
||||
Operand: structs.ConstraintSemver,
|
||||
}
|
||||
|
||||
// cniPortMapConstraint is an implicit constraint added to jobs making use
|
||||
// of bridge networking mode. This is one of the CNI plugins used to support
|
||||
// bridge networking.
|
||||
cniPortMapConstraint = &structs.Constraint{
|
||||
LTarget: attrPortMapCNI,
|
||||
RTarget: cniMinVersion,
|
||||
Operand: structs.ConstraintSemver,
|
||||
}
|
||||
)
|
||||
|
||||
type admissionController interface {
|
||||
@@ -192,12 +248,14 @@ func (jobImpliedConstraints) Mutate(j *structs.Job) (*structs.Job, []error, erro
|
||||
// Identify which task groups are utilizing NUMA resources.
|
||||
numaTaskGroups := j.RequiredNUMA()
|
||||
|
||||
bridgeNetworkingTaskGroups := j.RequiredBridgeNetwork()
|
||||
|
||||
// Hot path where none of our things require constraints.
|
||||
//
|
||||
// [UPDATE THIS] if you are adding a new constraint thing!
|
||||
if len(signals) == 0 && len(vaultBlocks) == 0 &&
|
||||
nativeServiceDisco.Empty() && len(consulServiceDisco) == 0 &&
|
||||
numaTaskGroups.Empty() {
|
||||
numaTaskGroups.Empty() && bridgeNetworkingTaskGroups.Empty() {
|
||||
return j, nil, nil
|
||||
}
|
||||
|
||||
@@ -254,6 +312,14 @@ func (jobImpliedConstraints) Mutate(j *structs.Job) (*structs.Job, []error, erro
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if bridgeNetworkingTaskGroups.Contains(tg.Name) {
|
||||
mutateConstraint(constraintMatcherLeft, tg, cniBridgeConstraint)
|
||||
mutateConstraint(constraintMatcherLeft, tg, cniFirewallConstraint)
|
||||
mutateConstraint(constraintMatcherLeft, tg, cniHostLocalConstraint)
|
||||
mutateConstraint(constraintMatcherLeft, tg, cniLoopbackConstraint)
|
||||
mutateConstraint(constraintMatcherLeft, tg, cniPortMapConstraint)
|
||||
}
|
||||
}
|
||||
|
||||
return j, nil, nil
|
||||
|
||||
@@ -1160,6 +1160,40 @@ func Test_jobImpliedConstraints_Mutate(t *testing.T) {
|
||||
expectedOutputWarnings: nil,
|
||||
expectedOutputError: nil,
|
||||
},
|
||||
{
|
||||
inputJob: &structs.Job{
|
||||
Name: "example",
|
||||
TaskGroups: []*structs.TaskGroup{
|
||||
{
|
||||
Name: "group-with-bridge",
|
||||
Networks: []*structs.NetworkResource{
|
||||
{Mode: "bridge"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedOutputJob: &structs.Job{
|
||||
Name: "example",
|
||||
TaskGroups: []*structs.TaskGroup{
|
||||
{
|
||||
Name: "group-with-bridge",
|
||||
Networks: []*structs.NetworkResource{
|
||||
{Mode: "bridge"},
|
||||
},
|
||||
Constraints: []*structs.Constraint{
|
||||
cniBridgeConstraint,
|
||||
cniFirewallConstraint,
|
||||
cniHostLocalConstraint,
|
||||
cniLoopbackConstraint,
|
||||
cniPortMapConstraint,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedOutputWarnings: nil,
|
||||
expectedOutputError: nil,
|
||||
name: "task group with bridge network",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
|
||||
@@ -132,3 +132,15 @@ func (j *Job) RequiredNUMA() set.Collection[string] {
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// RequiredBridgeNetwork identifies which task groups, if any, within the job
|
||||
// contain networks requesting bridge networking.
|
||||
func (j *Job) RequiredBridgeNetwork() set.Collection[string] {
|
||||
result := set.New[string](len(j.TaskGroups))
|
||||
for _, tg := range j.TaskGroups {
|
||||
if tg.Networks.Modes().Contains("bridge") {
|
||||
result.Insert(tg.Name)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -3006,6 +3006,13 @@ func (ns Networks) NetIndex(n *NetworkResource) int {
|
||||
return -1
|
||||
}
|
||||
|
||||
// Modes returns the set of network modes used by our NetworkResource blocks.
|
||||
func (ns Networks) Modes() *set.Set[string] {
|
||||
return set.FromFunc(ns, func(nr *NetworkResource) string {
|
||||
return nr.Mode
|
||||
})
|
||||
}
|
||||
|
||||
// RequestedDevice is used to request a device for a task.
|
||||
type RequestedDevice struct {
|
||||
// Name is the request name. The possible values are as follows:
|
||||
|
||||
@@ -14,12 +14,26 @@ their upgrades as a result of new features or changed behavior. This page is
|
||||
used to document those details separately from the standard upgrade flow.
|
||||
|
||||
## Nomad 1.8.0
|
||||
|
||||
#### Deprecated Disconnect Fields
|
||||
|
||||
Nomad 1.8.0 introduces a `disconnect` block meant to group all the configuration
|
||||
options related to disconnected client's and server's behavior, causing the
|
||||
deprecation of the fileds `stop_after_client_disconnect`, `max_client_disconnect`
|
||||
deprecation of the fields `stop_after_client_disconnect`, `max_client_disconnect`
|
||||
and `prevent_reschedule_on_lost`. This block also introduces new options for
|
||||
allocations reconciliation if the client regains connectivity.
|
||||
|
||||
#### CNI Constraints
|
||||
|
||||
In Nomad 1.8.0, jobs with `bridge` networking will have constraints added during
|
||||
job submit that require CNI plugins to be present on the node. Nodes have
|
||||
fingerprinted the available CNI plugins starting in Nomad 1.5.0.
|
||||
|
||||
If you are upgrading from Nomad 1.5.0 or later to 1.8.0 or later, there's
|
||||
nothing additional for you to do. It's not recommended to skip more than 2
|
||||
versions of Nomad. But if you upgrade from earlier than 1.5.0 to 1.8.0 or later,
|
||||
you will need to ensure that clients have been upgraded before submitting any
|
||||
jobs that use `bridge` networking.
|
||||
|
||||
#### Removal of `raw_exec` option `no_cgroups`
|
||||
|
||||
|
||||
Reference in New Issue
Block a user