mirror of
https://github.com/kemko/nomad.git
synced 2026-01-07 19:05:42 +03:00
Merge branch 'master' of github.com:hashicorp/nomad into release-0.12.0
This commit is contained in:
@@ -27,6 +27,8 @@ IMPROVEMENTS:
|
||||
* api: Support querying for jobs and allocations across all namespaces [[GH-8192](https://github.com/hashicorp/nomad/issues/8192)]
|
||||
* api: New `/agent/host` endpoint returns diagnostic information about the host [[GH-8325](https://github.com/hashicorp/nomad/pull/8325)]
|
||||
* build: Updated to Go 1.14.4 [[GH-8172](https://github.com/hashicorp/nomad/issues/9172)]
|
||||
* build: Switched to Go modules for dependency management [[GH-8041](https://github.com/hashicorp/nomad/pull/8041)]
|
||||
* connect: Infer service task parameter where possible [[GH-8274](https://github.com/hashicorp/nomad/issues/8274)]
|
||||
* server: Added `raft_multiplier` config to tweak Raft related timeouts [[GH-8082](https://github.com/hashicorp/nomad/issues/8082)]
|
||||
|
||||
BUG FIXES:
|
||||
@@ -34,6 +36,7 @@ BUG FIXES:
|
||||
* cli: Fixed malformed alloc status address list when listing more than 1 address [[GH-8161](https://github.com/hashicorp/nomad/issues/8161)]
|
||||
* client: Fixed a bug where stdout/stderr were not properly reopened for community task drivers [[GH-8155](https://github.com/hashicorp/nomad/issues/8155)]
|
||||
* client: Fixed a bug where batch job sidecars may be left running after the main task completes [[GH-8311](https://github.com/hashicorp/nomad/issues/8311)]
|
||||
* connect: Fixed a bug where custom `sidecar_task` definitions were being shared [[GH-8337](https://github.com/hashicorp/nomad/issues/8337)]
|
||||
* csi: Fixed a bug where `NodeStageVolume` and `NodePublishVolume` requests were not receiving volume context [[GH-8239](https://github.com/hashicorp/nomad/issues/8239)]
|
||||
* driver/docker: Fixed a bug to set correct value for `memory-swap` when using `memory_hard_limit` [[GH-8153](https://github.com/hashicorp/nomad/issues/8153)]
|
||||
* ui: The log streamer will now always follow logs when the current scroll position is the end of the buffer. [[GH-8177](https://github.com/hashicorp/nomad/issues/8177)]
|
||||
|
||||
@@ -56,7 +56,7 @@ func (v *CSIVolumes) Register(vol *CSIVolume, w *WriteOptions) (*WriteMeta, erro
|
||||
}
|
||||
|
||||
func (v *CSIVolumes) Deregister(id string, force bool, w *WriteOptions) error {
|
||||
_, err := v.client.delete(fmt.Sprintf("/v1/volume/csi/%v?purge=%t", url.PathEscape(id), force), nil, w)
|
||||
_, err := v.client.delete(fmt.Sprintf("/v1/volume/csi/%v?force=%t", url.PathEscape(id), force), nil, w)
|
||||
return err
|
||||
}
|
||||
|
||||
|
||||
@@ -632,6 +632,7 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) {
|
||||
|
||||
// Setup networking configuration
|
||||
conf.CNIPath = agentConfig.Client.CNIPath
|
||||
conf.CNIConfigDir = agentConfig.Client.CNIConfigDir
|
||||
conf.BridgeNetworkName = agentConfig.Client.BridgeNetworkName
|
||||
conf.BridgeNetworkAllocSubnet = agentConfig.Client.BridgeNetworkSubnet
|
||||
|
||||
|
||||
@@ -277,6 +277,10 @@ type ClientConfig struct {
|
||||
// specified colon delimited
|
||||
CNIPath string `hcl:"cni_path"`
|
||||
|
||||
// CNIConfigDir is the directory where CNI network configuration is located. The
|
||||
// client will use this path when fingerprinting CNI networks.
|
||||
CNIConfigDir string `hcl:"cni_config_dir"`
|
||||
|
||||
// BridgeNetworkName is the name of the bridge to create when using the
|
||||
// bridge network mode
|
||||
BridgeNetworkName string `hcl:"bridge_network_name"`
|
||||
@@ -1535,6 +1539,9 @@ func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig {
|
||||
if b.CNIPath != "" {
|
||||
result.CNIPath = b.CNIPath
|
||||
}
|
||||
if b.CNIConfigDir != "" {
|
||||
result.CNIConfigDir = b.CNIConfigDir
|
||||
}
|
||||
if b.BridgeNetworkName != "" {
|
||||
result.BridgeNetworkName = b.BridgeNetworkName
|
||||
}
|
||||
|
||||
@@ -195,7 +195,7 @@ func (c *JobPlanCommand) multiregionPlan(client *api.Client, job *api.Job, opts
|
||||
|
||||
for regionName, resp := range plans {
|
||||
c.Ui.Output(c.Colorize().Color(fmt.Sprintf("[bold]Region: %q[reset]", regionName)))
|
||||
regionExitCode := c.outputPlannedJob(job, resp, verbose, diff)
|
||||
regionExitCode := c.outputPlannedJob(job, resp, diff, verbose)
|
||||
if regionExitCode > exitCode {
|
||||
exitCode = regionExitCode
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -20,13 +21,15 @@ var (
|
||||
|
||||
// connectDriverConfig is the driver configuration used by the injected
|
||||
// connect proxy sidecar task
|
||||
connectDriverConfig = map[string]interface{}{
|
||||
"image": "${meta.connect.sidecar_image}",
|
||||
"args": []interface{}{
|
||||
"-c", structs.EnvoyBootstrapPath,
|
||||
"-l", "${meta.connect.log_level}",
|
||||
"--disable-hot-restart",
|
||||
},
|
||||
connectDriverConfig = func() map[string]interface{} {
|
||||
return map[string]interface{}{
|
||||
"image": "${meta.connect.sidecar_image}",
|
||||
"args": []interface{}{
|
||||
"-c", structs.EnvoyBootstrapPath,
|
||||
"-l", "${meta.connect.log_level}",
|
||||
"--disable-hot-restart",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// connectVersionConstraint is used when building the sidecar task to ensure
|
||||
@@ -97,13 +100,23 @@ func isSidecarForService(t *structs.Task, svc string) bool {
|
||||
return t.Kind == structs.TaskKind(fmt.Sprintf("%s:%s", structs.ConnectProxyPrefix, svc))
|
||||
}
|
||||
|
||||
func getNamedTaskForNativeService(tg *structs.TaskGroup, taskName string) *structs.Task {
|
||||
// getNamedTaskForNativeService retrieves the Task with the name specified in the
|
||||
// group service definition. If the task name is empty and there is only one task
|
||||
// in the group, infer the name from the only option.
|
||||
func getNamedTaskForNativeService(tg *structs.TaskGroup, serviceName, taskName string) (*structs.Task, error) {
|
||||
if taskName == "" {
|
||||
if len(tg.Tasks) == 1 {
|
||||
return tg.Tasks[0], nil
|
||||
}
|
||||
return nil, errors.Errorf("task for Consul Connect Native service %s->%s is ambiguous and must be set", tg.Name, serviceName)
|
||||
}
|
||||
|
||||
for _, t := range tg.Tasks {
|
||||
if t.Name == taskName {
|
||||
return t
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return nil, errors.Errorf("task %s named by Consul Connect Native service %s->%s does not exist", taskName, tg.Name, serviceName)
|
||||
}
|
||||
|
||||
// probably need to hack this up to look for checks on the service, and if they
|
||||
@@ -155,11 +168,13 @@ func groupConnectHook(job *structs.Job, g *structs.TaskGroup) error {
|
||||
// create a port for the sidecar task's proxy port
|
||||
makePort(fmt.Sprintf("%s-%s", structs.ConnectProxyPrefix, service.Name))
|
||||
} else if service.Connect.IsNative() {
|
||||
// find the task backing this connect native service and set the kind
|
||||
nativeTaskName := service.TaskName
|
||||
if t := getNamedTaskForNativeService(g, nativeTaskName); t != nil {
|
||||
t.Kind = structs.NewTaskKind(structs.ConnectNativePrefix, service.Name)
|
||||
if t, err := getNamedTaskForNativeService(g, service.Name, nativeTaskName); err != nil {
|
||||
return err
|
||||
} else {
|
||||
return fmt.Errorf("native task %s named by %s->%s does not exist", nativeTaskName, g.Name, service.Name)
|
||||
t.Kind = structs.NewTaskKind(structs.ConnectNativePrefix, service.Name)
|
||||
service.TaskName = t.Name // in case the task was inferred
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -172,7 +187,7 @@ func newConnectTask(serviceName string) *structs.Task {
|
||||
Name: fmt.Sprintf("%s-%s", structs.ConnectProxyPrefix, serviceName),
|
||||
Kind: structs.NewTaskKind(structs.ConnectProxyPrefix, serviceName),
|
||||
Driver: "docker",
|
||||
Config: connectDriverConfig,
|
||||
Config: connectDriverConfig(),
|
||||
ShutdownDelay: 5 * time.Second,
|
||||
LogConfig: &structs.LogConfig{
|
||||
MaxFiles: 2,
|
||||
@@ -220,16 +235,8 @@ func groupConnectSidecarValidate(g *structs.TaskGroup) error {
|
||||
func groupConnectNativeValidate(g *structs.TaskGroup, s *structs.Service) error {
|
||||
// note that network mode is not enforced for connect native services
|
||||
|
||||
// a native service must have the task identified in the service definition.
|
||||
if len(s.TaskName) == 0 {
|
||||
return fmt.Errorf("Consul Connect Native service %q requires task name", s.Name)
|
||||
if _, err := getNamedTaskForNativeService(g, s.Name, s.TaskName); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// also make sure that task actually exists
|
||||
for _, task := range g.Tasks {
|
||||
if s.TaskName == task.Name {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("Consul Connect Native service %q requires undefined task %q in group %q", s.Name, s.TaskName, g.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -157,32 +157,40 @@ func TestJobEndpointConnect_groupConnectSidecarValidate(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestJobEndpointConnect_groupConnectNativeValidate(t *testing.T) {
|
||||
t.Run("no task in service", func(t *testing.T) {
|
||||
require.EqualError(t, groupConnectNativeValidate(&structs.TaskGroup{
|
||||
Name: "g1",
|
||||
}, &structs.Service{
|
||||
Name: "s1",
|
||||
TaskName: "",
|
||||
}), `Consul Connect Native service "s1" requires task name`)
|
||||
func TestJobEndpointConnect_getNamedTaskForNativeService(t *testing.T) {
|
||||
t.Run("named exists", func(t *testing.T) {
|
||||
task, err := getNamedTaskForNativeService(&structs.TaskGroup{
|
||||
Name: "g1",
|
||||
Tasks: []*structs.Task{{Name: "t1"}, {Name: "t2"}},
|
||||
}, "s1", "t2")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "t2", task.Name)
|
||||
})
|
||||
|
||||
t.Run("no task for service", func(t *testing.T) {
|
||||
require.EqualError(t, groupConnectNativeValidate(&structs.TaskGroup{
|
||||
Name: "g2",
|
||||
}, &structs.Service{
|
||||
Name: "s2",
|
||||
TaskName: "t1",
|
||||
}), `Consul Connect Native service "s2" requires undefined task "t1" in group "g2"`)
|
||||
t.Run("infer exists", func(t *testing.T) {
|
||||
task, err := getNamedTaskForNativeService(&structs.TaskGroup{
|
||||
Name: "g1",
|
||||
Tasks: []*structs.Task{{Name: "t2"}},
|
||||
}, "s1", "")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "t2", task.Name)
|
||||
})
|
||||
|
||||
t.Run("native okay", func(t *testing.T) {
|
||||
require.NoError(t, groupConnectNativeValidate(&structs.TaskGroup{
|
||||
Name: "g2",
|
||||
Tasks: []*structs.Task{{Name: "t0"}, {Name: "t1"}, {Name: "t3"}},
|
||||
}, &structs.Service{
|
||||
Name: "s2",
|
||||
TaskName: "t1",
|
||||
}))
|
||||
t.Run("infer ambiguous", func(t *testing.T) {
|
||||
task, err := getNamedTaskForNativeService(&structs.TaskGroup{
|
||||
Name: "g1",
|
||||
Tasks: []*structs.Task{{Name: "t1"}, {Name: "t2"}},
|
||||
}, "s1", "")
|
||||
require.EqualError(t, err, "task for Consul Connect Native service g1->s1 is ambiguous and must be set")
|
||||
require.Nil(t, task)
|
||||
})
|
||||
|
||||
t.Run("named absent", func(t *testing.T) {
|
||||
task, err := getNamedTaskForNativeService(&structs.TaskGroup{
|
||||
Name: "g1",
|
||||
Tasks: []*structs.Task{{Name: "t1"}, {Name: "t2"}},
|
||||
}, "s1", "t3")
|
||||
require.EqualError(t, err, "task t3 named by Consul Connect Native service g1->s1 does not exist")
|
||||
require.Nil(t, task)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -420,7 +420,7 @@ func TestJobEndpoint_Register_ConnectWithSidecarTask(t *testing.T) {
|
||||
require.Equal("test", sidecarTask.Meta["source"])
|
||||
require.Equal(500, sidecarTask.Resources.CPU)
|
||||
require.Equal(connectSidecarResources().MemoryMB, sidecarTask.Resources.MemoryMB)
|
||||
cfg := connectDriverConfig
|
||||
cfg := connectDriverConfig()
|
||||
cfg["labels"] = map[string]interface{}{
|
||||
"foo": "bar",
|
||||
}
|
||||
@@ -5597,9 +5597,9 @@ func TestJobEndpoint_Scale_DeploymentBlocking(t *testing.T) {
|
||||
|
||||
// attempt to scale
|
||||
originalCount := job.TaskGroups[0].Count
|
||||
newCount := int64(originalCount+1)
|
||||
newCount := int64(originalCount + 1)
|
||||
groupName := job.TaskGroups[0].Name
|
||||
scalingMetadata := map[string]interface{}{
|
||||
scalingMetadata := map[string]interface{}{
|
||||
"meta": "data",
|
||||
}
|
||||
scalingMessage := "original reason for scaling"
|
||||
@@ -5692,7 +5692,7 @@ func TestJobEndpoint_Scale_InformationalEventsShouldNotBeBlocked(t *testing.T) {
|
||||
|
||||
// register informational scaling event
|
||||
groupName := job.TaskGroups[0].Name
|
||||
scalingMetadata := map[string]interface{}{
|
||||
scalingMetadata := map[string]interface{}{
|
||||
"meta": "data",
|
||||
}
|
||||
scalingMessage := "original reason for scaling"
|
||||
|
||||
@@ -1054,7 +1054,7 @@ func multiregionDiff(old, new *Multiregion, contextual bool) *ObjectDiff {
|
||||
for name, oldRegion := range oldMap {
|
||||
// Diff the same, deleted and edited
|
||||
newRegion := newMap[name]
|
||||
rdiff := multiregionRegionDiff(newRegion, oldRegion, contextual)
|
||||
rdiff := multiregionRegionDiff(oldRegion, newRegion, contextual)
|
||||
if rdiff != nil {
|
||||
diff.Objects = append(diff.Objects, rdiff)
|
||||
}
|
||||
|
||||
@@ -1210,8 +1210,8 @@ func TestJobDiff(t *testing.T) {
|
||||
Regions: []*MultiregionRegion{
|
||||
{
|
||||
Name: "west",
|
||||
Count: 1,
|
||||
Datacenters: []string{"west-1"},
|
||||
Count: 3,
|
||||
Datacenters: []string{"west-2"},
|
||||
Meta: map[string]string{"region_code": "W"},
|
||||
},
|
||||
{
|
||||
@@ -1223,7 +1223,6 @@ func TestJobDiff(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
Expected: &JobDiff{
|
||||
Type: DiffTypeEdited,
|
||||
Objects: []*ObjectDiff{
|
||||
@@ -1231,6 +1230,38 @@ func TestJobDiff(t *testing.T) {
|
||||
Type: DiffTypeEdited,
|
||||
Name: "Multiregion",
|
||||
Objects: []*ObjectDiff{
|
||||
{
|
||||
Type: DiffTypeEdited,
|
||||
Name: "Region",
|
||||
Fields: []*FieldDiff{
|
||||
{
|
||||
Type: DiffTypeEdited,
|
||||
Name: "Count",
|
||||
Old: "1",
|
||||
New: "3",
|
||||
},
|
||||
},
|
||||
Objects: []*ObjectDiff{
|
||||
{
|
||||
Type: DiffTypeEdited,
|
||||
Name: "Datacenters",
|
||||
Fields: []*FieldDiff{
|
||||
{
|
||||
Type: DiffTypeAdded,
|
||||
Name: "Datacenters",
|
||||
Old: "",
|
||||
New: "west-2",
|
||||
},
|
||||
{
|
||||
Type: DiffTypeDeleted,
|
||||
Name: "Datacenters",
|
||||
Old: "west-1",
|
||||
New: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: DiffTypeAdded,
|
||||
Name: "Region",
|
||||
|
||||
@@ -481,7 +481,8 @@ func (s *Service) Validate() error {
|
||||
mErr.Errors = append(mErr.Errors, err)
|
||||
}
|
||||
|
||||
// if service is connect native, service task must be set
|
||||
// if service is connect native, service task must be set (which may
|
||||
// happen implicitly in a job mutation if there is only one task)
|
||||
if s.Connect.IsNative() && len(s.TaskName) == 0 {
|
||||
mErr.Errors = append(mErr.Errors, fmt.Errorf("Service %s is Connect Native and requires setting the task", s.Name))
|
||||
}
|
||||
|
||||
2
vendor/github.com/hashicorp/nomad/api/csi.go
generated
vendored
2
vendor/github.com/hashicorp/nomad/api/csi.go
generated
vendored
@@ -56,7 +56,7 @@ func (v *CSIVolumes) Register(vol *CSIVolume, w *WriteOptions) (*WriteMeta, erro
|
||||
}
|
||||
|
||||
func (v *CSIVolumes) Deregister(id string, force bool, w *WriteOptions) error {
|
||||
_, err := v.client.delete(fmt.Sprintf("/v1/volume/csi/%v?purge=%t", url.PathEscape(id), force), nil, w)
|
||||
_, err := v.client.delete(fmt.Sprintf("/v1/volume/csi/%v?force=%t", url.PathEscape(id), force), nil, w)
|
||||
return err
|
||||
}
|
||||
|
||||
|
||||
@@ -224,6 +224,29 @@ export default [
|
||||
},
|
||||
'schedulers',
|
||||
{ category: 'runtime', content: ['environment', 'interpolation'] },
|
||||
{
|
||||
category: 'autoscaling',
|
||||
content: [
|
||||
'agent',
|
||||
'api',
|
||||
'cli',
|
||||
'policy',
|
||||
{
|
||||
category: 'plugins',
|
||||
content: [
|
||||
'apm',
|
||||
'strategy',
|
||||
'target'
|
||||
]
|
||||
},
|
||||
{
|
||||
category: 'internals',
|
||||
content: [
|
||||
'checks'
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{ category: 'telemetry', content: ['metrics'] },
|
||||
{ category: 'vault-integration' },
|
||||
'------------',
|
||||
|
||||
@@ -320,6 +320,22 @@ $ curl \
|
||||
"Mode": "",
|
||||
"ReservedPorts": null
|
||||
}
|
||||
],
|
||||
"NodeNetworks": [
|
||||
{
|
||||
"Addresses": [
|
||||
{
|
||||
"Address": "127.0.0.1",
|
||||
"Alias": "default",
|
||||
"Family": "ipv4",
|
||||
"Gateway": "",
|
||||
"ReservedPorts": ""
|
||||
}
|
||||
],
|
||||
"Device": "lo",
|
||||
"MacAddress": "00:00:00:00:00:00",
|
||||
"Mode": "host",
|
||||
}
|
||||
]
|
||||
},
|
||||
"Reserved": {
|
||||
|
||||
@@ -88,10 +88,6 @@ $ curl \
|
||||
[
|
||||
{
|
||||
"ID": "example_plugin_id",
|
||||
"Topologies": [
|
||||
{"key": "val"},
|
||||
{"key": "val2"}
|
||||
],
|
||||
"Provider": "aws.ebs",
|
||||
"Version": "1.0.1",
|
||||
"ControllersRequired": true,
|
||||
|
||||
250
website/pages/docs/autoscaling/agent.mdx
Normal file
250
website/pages/docs/autoscaling/agent.mdx
Normal file
@@ -0,0 +1,250 @@
|
||||
---
|
||||
layout: docs
|
||||
page_title: Agent
|
||||
sidebar_title: Agent
|
||||
description: The Nomad Autoscaler is a long lived process which coordinates scaling activates.
|
||||
---
|
||||
|
||||
# Nomad Autoscaler Agent
|
||||
|
||||
The Nomad Autoscaler agent has a variety of parameters that can be specified
|
||||
via configuration files or command-line flags. Configuration files are written
|
||||
in [HCL][hcl_v2]. The Nomad Autoscaler can read and combine parameters from
|
||||
multiple configuration files or directories to configure the agent.
|
||||
|
||||
## Nomad Namespaces
|
||||
|
||||
The Nomad Autoscaler currently has limited support for
|
||||
[Nomad Namespaces][nomad_namespaces]. The `nomad` configuration below supports
|
||||
specifying a namespace; if configured with a namespace, the Autoscaler will
|
||||
retrieve scaling policies and perform autoscaling only for jobs in that
|
||||
namespace. A future version will include support for multiple namespaces.
|
||||
|
||||
## Nomad ACLs
|
||||
|
||||
The Nomad Autoscaler can be configured to interact with an ACL-enabled Nomad
|
||||
cluster. Nomad 0.11 includes the `scale` ACL policy disposition specifically for
|
||||
supporting the operations of the Nomad Autoscaler. Therefore, the
|
||||
following policy is sufficient for creating an ACL token that can be used by
|
||||
the autoscaler for fetching scaling policies and scaling jobs:
|
||||
|
||||
```hcl
|
||||
namespace "default" {
|
||||
policy = "scale"
|
||||
}
|
||||
|
||||
Other APM and target plugins may require additional ACLs; see the plugin documentation for more information.
|
||||
|
||||
## Load Order and Merging
|
||||
|
||||
The Nomad Autoscaler agent supports multiple configuration files, which can be
|
||||
provided using the [-config][autoscaler_cli_config] CLI flag. The flag can
|
||||
accept either a file or folder. In the case of a folder, any `.hcl` and `.json`
|
||||
files in the folder will be loaded and merged in lexicographical order. Directories
|
||||
are not loaded recursively.
|
||||
|
||||
For example:
|
||||
|
||||
```shell-session
|
||||
$ nomad-autoscaler agent -config=autoscaler.conf -config=/etc/nomad-autoscaler -config=extra.json
|
||||
```
|
||||
|
||||
This will load configuration from autoscaler.conf, from `.hcl` and `.json` files
|
||||
under `/etc/nomad-autoscaler`, and finally from `extra.json`. As each file is
|
||||
processed, its contents are merged into the existing configuration. When merging,
|
||||
any non-empty values from the latest config file will append or replace
|
||||
parameters in the current configuration. An empty value means `""` for strings,
|
||||
`0` for integer or float values, and `false` for booleans.
|
||||
|
||||
## SIGHUP Reload
|
||||
|
||||
The Nomad Autoscaler agent supports handling the `SIGHUP` signal for reloading without the need for
|
||||
restarting the agent. When sending a `SIGHUP` signal to the agent process, the agent will perform the
|
||||
following actions.
|
||||
|
||||
- reload the contents of the scaling policy directory as defined by the [policy dir][autoscaler_cli_policy_dir]
|
||||
parameter.
|
||||
|
||||
## General Parameters
|
||||
|
||||
- `log_level` `(string: "INFO")` - Specify the verbosity level of Nomad
|
||||
Autoscaler's logs. Valid values include DEBUG, INFO, and WARN, in decreasing
|
||||
order of verbosity.
|
||||
|
||||
- `log_json` `(bool: false)` - Output logs in a JSON format.
|
||||
|
||||
- `plugin_dir` `(string: "./plugins")` - The plugin directory is used to
|
||||
discover Nomad Autoscaler plugins.
|
||||
|
||||
## `http` Block
|
||||
|
||||
The `http` block configures the Nomad Autoscaler's HTTP endpoint.
|
||||
|
||||
```hcl
|
||||
http {
|
||||
bind_address = "10.0.0.10"
|
||||
bind_port = 9999
|
||||
}
|
||||
```
|
||||
|
||||
### `http` Parameters
|
||||
|
||||
- `bind_address` `(string "127.0.0.1")` - The HTTP address that the server will
|
||||
bind to.
|
||||
|
||||
- `bind_port` `(int 8080)` - The port that the server will bind to.
|
||||
|
||||
## `nomad` Block
|
||||
|
||||
The `nomad` block configures the Nomad Autoscaler's Nomad client.
|
||||
|
||||
```hcl
|
||||
nomad {
|
||||
address = "http://my-nomad.systems:4646"
|
||||
region = "esp-vlc-1"
|
||||
}
|
||||
```
|
||||
|
||||
### `nomad` Parameters
|
||||
|
||||
- `address` `(string "http://127.0.0.1:4646")` - The address of the Nomad server
|
||||
in the form of protocol://addr:port.
|
||||
|
||||
- `region` `(string "global")` - The region of the Nomad servers to connect with.
|
||||
|
||||
- `namespace` `(string "")` - The target namespace for queries and actions bound
|
||||
to a namespace.
|
||||
|
||||
- `token` `(string "")` - The SecretID of an ACL token to use to authenticate
|
||||
API requests with.
|
||||
|
||||
- `http_auth` `(string "")` - The authentication information to use when connecting
|
||||
to a Nomad API which is using HTTP authentication.
|
||||
|
||||
- `ca_cert` `(string "")` - Path to a PEM encoded CA cert file to use to verify
|
||||
the Nomad server SSL certificate.
|
||||
|
||||
- `ca_path` `(string "")` - Path to a directory of PEM encoded CA cert files to
|
||||
verify the Nomad server SSL certificate.
|
||||
|
||||
- `client_cert` `(string "")` - Path to a PEM encoded client certificate for TLS
|
||||
authentication to the Nomad server.
|
||||
|
||||
- `client_key` `(string "")` - Path to an unencrypted PEM encoded private key
|
||||
matching the client certificate.
|
||||
|
||||
- `tls_server_name` `(string "")` - The server name to use as the SNI host when
|
||||
connecting via TLS.
|
||||
|
||||
- `skip_verify` `(bool false)` - Do not verify TLS certificates. This is strongly
|
||||
discouraged.
|
||||
|
||||
## `policy` Block
|
||||
|
||||
The `policy` block configures the Nomad Autoscaler's policy handling.
|
||||
|
||||
```hcl
|
||||
policy {
|
||||
dir = "/opt/nomad-autoscaler/plugins"
|
||||
default_cooldown = "2m"
|
||||
}
|
||||
```
|
||||
|
||||
### `policy` Parameters
|
||||
|
||||
- `dir` `(string "./plugins")` - The path to a directory used to load scaling
|
||||
policies.
|
||||
|
||||
- `default_cooldown` `(string "5m")` - The default cooldown that will be applied
|
||||
to all scaling policies which do not specify a cooldown period.
|
||||
|
||||
- `default_evaluation_interval` `(string "10s")` - The default evaluation interval
|
||||
that will be applied to all scaling policies which do not specify an evaluation
|
||||
interval.
|
||||
|
||||
## `apm` Block
|
||||
|
||||
The `apm` block is used to configure application performance metric (APM) plugins.
|
||||
|
||||
```hcl
|
||||
apm "example-apm-plugin" {
|
||||
driver = "example-apm-plugin"
|
||||
args = ["-my-flag"]
|
||||
|
||||
config = {
|
||||
address = "http://127.0.0.1:9090"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### `apm` Parameters
|
||||
|
||||
- `args` `(array<string>: [])` - Specifies a set of arguments to pass to the
|
||||
plugin binary when it is executed.
|
||||
|
||||
- `driver` `(string: "")` - The plugin's executable name relative to to the
|
||||
plugin_dir. If the plugin has a suffix, such as .exe, this should be omitted.
|
||||
|
||||
- `config` `(map<string><string>: nil)` - Specifies configuration values for
|
||||
the plugin either as HCL or JSON. The accepted values are plugin specific.
|
||||
Please refer to the individual plugin's documentation.
|
||||
|
||||
## `target` Block
|
||||
|
||||
The `target` block is used to configure scaling target plugins.
|
||||
|
||||
```hcl
|
||||
target "example-target-plugin" {
|
||||
driver = "example-target-plugin"
|
||||
args = ["-my-flag"]
|
||||
|
||||
config = {
|
||||
region = "esp-vlc-1"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### `target` Parameters
|
||||
|
||||
- `args` `(array<string>: [])` - Specifies a set of arguments to pass to the
|
||||
plugin binary when it is executed.
|
||||
|
||||
- `driver` `(string: "")` - The plugin's executable name relative to to the
|
||||
plugin_dir. If the plugin has a suffix, such as .exe, this should be omitted.
|
||||
|
||||
- `config` `(map<string><string>: nil)` - Specifies configuration values for
|
||||
the plugin either as HCL or JSON. The accepted values are plugin specific.
|
||||
Please refer to the individual plugin's documentation.
|
||||
|
||||
## `strategy` Block
|
||||
|
||||
The `strategy` block is used to configure scaling strategy plugins.
|
||||
|
||||
```hcl
|
||||
strategy "example-strategy-plugin" {
|
||||
driver = "example-strategy-plugin"
|
||||
args = ["-my-flag"]
|
||||
|
||||
config = {
|
||||
algorithm = "complex"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### `strategy` Parameters
|
||||
|
||||
- `args` `(array<string>: [])` - Specifies a set of arguments to pass to the
|
||||
plugin binary when it is executed.
|
||||
|
||||
- `driver` `(string: "")` - The plugin's executable name relative to to the
|
||||
plugin_dir. If the plugin has a suffix, such as .exe, this should be omitted.
|
||||
|
||||
- `config` `(map<string><string>: nil)` - Specifies configuration values for
|
||||
the plugin either as HCL or JSON. The accepted values are plugin specific.
|
||||
Please refer to the individual plugin's documentation.
|
||||
|
||||
[hcl_v2]: https://github.com/hashicorp/hcl/tree/hcl2
|
||||
[nomad_namespaces]: https://learn.hashicorp.com/nomad/governance-and-policy/namespaces
|
||||
[nomad_acls]: https://learn.hashicorp.com/nomad?track=acls#acls
|
||||
[autoscaler_cli_config]: /docs/autoscaling/cli#config
|
||||
[autoscaler_cli_policy_dir]: /docs/autoscaling/cli#policy-dir
|
||||
29
website/pages/docs/autoscaling/api.mdx
Normal file
29
website/pages/docs/autoscaling/api.mdx
Normal file
@@ -0,0 +1,29 @@
|
||||
---
|
||||
layout: docs
|
||||
page_title: HTTP API
|
||||
sidebar_title: API
|
||||
description: Learn about the Nomad Autoscaler HTTP API.
|
||||
---
|
||||
|
||||
# Nomad Autoscaler HTTP API
|
||||
|
||||
The Nomad Autoscaler exposes a small, simple API to be used for health checking
|
||||
the agent.
|
||||
|
||||
## Health API
|
||||
|
||||
This endpoint can be used to query the Nomad Autoscaler agent aliveness. If the
|
||||
agent is alive, the request will return a 200 OK, otherwise it will return a
|
||||
503 ServiceUnavailable.
|
||||
|
||||
| Method | Path | Produces |
|
||||
| ------ | ------------ | ------------------ |
|
||||
| `GET` | `/v1/health` | `application/json` |
|
||||
|
||||
### Sample Request
|
||||
|
||||
```shell-session
|
||||
$ curl \
|
||||
--request PUT \
|
||||
https://localhost:8080/v1/health
|
||||
```
|
||||
108
website/pages/docs/autoscaling/cli.mdx
Normal file
108
website/pages/docs/autoscaling/cli.mdx
Normal file
@@ -0,0 +1,108 @@
|
||||
---
|
||||
layout: docs
|
||||
page_title: CLI
|
||||
sidebar_title: CLI
|
||||
description: >
|
||||
The Nomad Autoscaler can be controlled via a command-line interface. This
|
||||
page documents all the commands the Nomad Autoscaler accepts.
|
||||
---
|
||||
|
||||
# Nomad Autoscaler Command: agent
|
||||
|
||||
The agent command is used to start the Nomad Autoscaler which runs until an
|
||||
interrupt signal is received. The Nomad Autoscaler agent's configuration
|
||||
primarily comes from the config files used, but a subset of the options may
|
||||
also be passed directly as CLI arguments. See the
|
||||
[Nomad Autoscaler Agent guide][nomad_autoscaler_agent_guide] for more information
|
||||
on how to use this command and the options it has.
|
||||
|
||||
## Command-line Options
|
||||
|
||||
A subset of the available Nomad Autoscaler agent configuration can optionally be
|
||||
passed in via CLI arguments. The `agent` command accepts the following arguments:
|
||||
|
||||
- `-config=<path>`: The path to either a single config file or a directory of
|
||||
config files to use for configuring the Nomad Autoscaler agent.
|
||||
|
||||
- `-log-level=<level>`: Specify the verbosity level of Nomad Autoscaler's logs.
|
||||
Valid values include DEBUG, INFO, and WARN, in decreasing order of verbosity.
|
||||
The default is `INFO`.
|
||||
|
||||
- `-log-json`: Output logs in a JSON format. The default is false.
|
||||
|
||||
- `-plugin-dir=<path>`: The plugin directory is used to discover Nomad Autoscaler
|
||||
plugins. If not specified, the plugin directory defaults to be that of
|
||||
`<current-dir>/plugins/`.
|
||||
|
||||
- `-http-bind-address=<addr>`: The HTTP address that the health server will bind
|
||||
to. The default is `127.0.0.1`.
|
||||
|
||||
- `-http-bind-port=<port>`: The port that the health server will bind to. The
|
||||
default is `8080`.
|
||||
|
||||
- `-nomad-address=<addr>`: The address of the Nomad server in the form of
|
||||
protocol://addr:port. The default is `http://127.0.0.1:4646`.
|
||||
|
||||
- `-nomad-region=<region>`: The region of the Nomad servers to connect with.
|
||||
|
||||
- `-nomad-namespace=<namespace>`: The target namespace for queries and actions
|
||||
bound to a namespace.
|
||||
|
||||
- `-nomad-token=<token>`: The SecretID of an ACL token to use to authenticate
|
||||
API requests with.
|
||||
|
||||
- `-nomad-http-auth=<username:password>`: The authentication information to use
|
||||
when connecting to a Nomad API which is using HTTP authentication.
|
||||
|
||||
- `-nomad-ca-cert=<path>`: Path to a PEM encoded CA cert file to use to verify
|
||||
the Nomad server SSL certificate.
|
||||
|
||||
- `-nomad-ca-path=<path>`: Path to a directory of PEM encoded CA cert files to
|
||||
verify the Nomad server SSL certificate. If both `-nomad-ca-cert` and
|
||||
`-nomad-ca-path` are specified, `-nomad-ca-cert` is used.
|
||||
|
||||
- `-nomad-client-cert=<path>`: Path to a PEM encoded client certificate for TLS
|
||||
authentication to the Nomad server. Must also specify `-nomad-client-key`.
|
||||
|
||||
- `-nomad-client-key=<path>`: Path to an unencrypted PEM encoded private key
|
||||
matching the client certificate from `-nomad-client-cert`.
|
||||
|
||||
- `-nomad-tls-server-name=<name>`: The server name to use as the SNI host when
|
||||
connecting via TLS.
|
||||
|
||||
- `-nomad-skip-verify`: Do not verify TLS certificates. This is strongly discouraged.
|
||||
|
||||
- `-policy-dir=<path>`: The path to a directory used to load scaling policies.
|
||||
|
||||
- `-policy-default-cooldown=<dur>`: The default cooldown that will be applied to
|
||||
all scaling policies which do not specify a cooldown period. The default is `5m`.
|
||||
|
||||
- `-policy-default-evaluation-interval=<dur>`: The default evaluation interval
|
||||
that will be applied to all scaling policies which do not specify an evaluation
|
||||
interval. The default is `10s`.
|
||||
|
||||
# Nomad Autoscaler Command: version
|
||||
|
||||
The `version` command displays build information about the running binary,
|
||||
including the release version and the exact revision.
|
||||
|
||||
## Usage
|
||||
|
||||
```plaintext
|
||||
nomad-autoscaler version
|
||||
```
|
||||
|
||||
## Output
|
||||
|
||||
This command prints both the version number as well as the exact commit SHA used
|
||||
during the build. The SHA may also have the string `+CHANGES` appended to the
|
||||
end, indicating that local, uncommitted changes were detected at build time.
|
||||
|
||||
## Examples
|
||||
|
||||
```shell-session
|
||||
$ nomad-autoscaler version
|
||||
Nomad Autoscaler v0.0.3-dev (da91fa9)
|
||||
```
|
||||
|
||||
[nomad_autoscaler_agent_guide]: /docs/autoscaling/agent
|
||||
43
website/pages/docs/autoscaling/index.mdx
Normal file
43
website/pages/docs/autoscaling/index.mdx
Normal file
@@ -0,0 +1,43 @@
|
||||
---
|
||||
layout: docs
|
||||
page_title: Autoscaling
|
||||
sidebar_title: Autoscaling
|
||||
description: |-
|
||||
Overview of the Nomad Autoscaler that provides horizontal application and
|
||||
cluster scaling.
|
||||
---
|
||||
|
||||
# Nomad Autoscaler Overview
|
||||
|
||||
This section details the Nomad Autoscaler, a horizontal application and cluster
|
||||
autoscaler for Nomad. The Nomad Autoscaler is built and released separately to
|
||||
Nomad. The source code can be viewed on [GitHub][autoscaler_github] and releases
|
||||
are available on the [HashiCorp releases page][autoscaler_releases] or via
|
||||
[Docker Hub][autoscaler_dockerhub].
|
||||
|
||||
The Nomad Autoscaler repository includes a number of [demos][autoscaler_demo]
|
||||
which provide guided learning on running the autoscaler.
|
||||
|
||||
## Horizontal Application Autoscaling
|
||||
|
||||
Horizontal application autoscaling is the process of automatically controlling the number of instances of an application
|
||||
to have sufficient work throughput to meet service-level agreements (SLA). In
|
||||
Nomad, horizontal application autoscaling can be achieved by modifying the number
|
||||
of allocations in a task group based on the value of a relevant metric, such as
|
||||
CPU and memory utilization or number of open connections. This is enabled by configuring
|
||||
[autoscaling policies][autoscaling_policy] on individual Nomad jobs using the [scaling block][scaling_block].
|
||||
## Horizontal Cluster Autoscaling
|
||||
|
||||
Horizontal cluster autoscaling is the process of adding or removing Nomad clients from a cluster to ensure there
|
||||
is an appropriate amount of cluster resource for the scheduled applications.
|
||||
This is achieved by interacting with remote providers to start or terminate new
|
||||
Nomad clients based on metrics such as the remaining free schedulable CPU or memory.
|
||||
Cluster scaling is enabled by configuring the [autoscaler agent][/docs/autoscaling/agent#dir]
|
||||
with policies targeting the Nomad cluster.
|
||||
|
||||
[scaling_block]: /docs/job-specification/scaling
|
||||
[autoscaling_policy]: /docs/autoscaling/policy
|
||||
[autoscaler_github]: https://github.com/hashicorp/nomad-autoscaler
|
||||
[autoscaler_releases]: https://releases.hashicorp.com/nomad-autoscaler/
|
||||
[autoscaler_dockerhub]: https://hub.docker.com/repository/docker/hashicorp/nomad-autoscaler
|
||||
[autoscaler_demo]: https://github.com/hashicorp/nomad-autoscaler/tree/master/demo
|
||||
27
website/pages/docs/autoscaling/internals/checks.mdx
Normal file
27
website/pages/docs/autoscaling/internals/checks.mdx
Normal file
@@ -0,0 +1,27 @@
|
||||
---
|
||||
layout: docs
|
||||
page_title: Checks
|
||||
sidebar_title: Checks
|
||||
description: Learn about how the Autoscaler deals with policy checks.
|
||||
---
|
||||
|
||||
# Nomad Autoscaler Check Calculations
|
||||
|
||||
A scaling policy can include several checks all of which produce a scaling
|
||||
suggesting. The checks are executed at the same time during a policy evaluation
|
||||
and the results can conflict with each other. In a scenario like this, the
|
||||
autoscaler iterates the results the chooses the safest result which results in
|
||||
retaining the most capacity of the resource.
|
||||
|
||||
In a scenario where two checks return different desired directions, the following
|
||||
logic is applied.
|
||||
|
||||
- `ScaleOut and ScaleIn => ScaleOut`
|
||||
- `ScaleOut and ScaleNone => ScaleOut`
|
||||
- `ScaleIn and ScaleNone => ScaleNone`
|
||||
|
||||
In situations where the two same actions are suggested, but with different counts the
|
||||
following logic is applied, where the count is the absolute desired value.
|
||||
|
||||
- `ScaleOut(10) and ScaleOut(9) => ScaleOut(10)`
|
||||
- `ScaleIn(3) and ScaleIn(4) => ScaleIn(4)`
|
||||
13
website/pages/docs/autoscaling/internals/index.mdx
Normal file
13
website/pages/docs/autoscaling/internals/index.mdx
Normal file
@@ -0,0 +1,13 @@
|
||||
---
|
||||
layout: docs
|
||||
page_title: Internals
|
||||
sidebar_title: Internals
|
||||
description: >
|
||||
This section covers the internals of the Nomad Autoscaler and explains
|
||||
technical details of its operation.
|
||||
---
|
||||
|
||||
# Nomad Autoscaler Internals
|
||||
|
||||
This section covers the internals of the Nomad Autoscaler and explains the
|
||||
technical details of how it functions, its architecture, and sub-systems.
|
||||
149
website/pages/docs/autoscaling/plugins/apm.mdx
Normal file
149
website/pages/docs/autoscaling/plugins/apm.mdx
Normal file
@@ -0,0 +1,149 @@
|
||||
---
|
||||
layout: docs
|
||||
page_title: APM
|
||||
sidebar_title: APM
|
||||
description: APM plugins provide metric data points describing the resources current state.
|
||||
---
|
||||
|
||||
# APM Plugins
|
||||
|
||||
APMs are used to store metrics about an applications performance and current
|
||||
state. The APM (Application Performance Management) plugin is responsible for
|
||||
querying the APM and returning a value which will be used to determine if
|
||||
scaling should occur.
|
||||
|
||||
## Prometheus APM Plugin
|
||||
|
||||
Use [Prometheus][prometheus_io] metrics to scale your Nomad job task groups or
|
||||
cluster. The query performed on Prometheus should return a single value. You can
|
||||
use the [scalar][prometheus_scaler_function] function in your query to achieve
|
||||
this.
|
||||
|
||||
### Agent Configuration Options
|
||||
|
||||
```hcl
|
||||
apm "prometheus" {
|
||||
driver = "prometheus"
|
||||
|
||||
config = {
|
||||
address = "http://prometheus.my.endpoint.io:9090"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- `address` `(string: "http://127.0.0.1:9090")` - The address of the Prometheus
|
||||
endpoint used to perform queries.
|
||||
|
||||
### Policy Configuration Options
|
||||
|
||||
```hcl
|
||||
check {
|
||||
source = "prometheus"
|
||||
query = "scalar(avg((haproxy_server_current_sessions{backend=\"http_back\"}) and (haproxy_server_up{backend=\"http_back\"} == 1)))"
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
## Nomad APM Plugin
|
||||
|
||||
The Nomad APM plugin allows querying the Nomad API for metric data. This provides
|
||||
an immediate starting point without addition applications but comes at the price
|
||||
of efficiency. When using this APM, it is advised to monitor Nomad carefully
|
||||
ensuring it is not put under excessive load pressure.
|
||||
|
||||
### Agent Configuration Options
|
||||
|
||||
```hcl
|
||||
target "nomad-apm" {
|
||||
driver = "nomad-apm"
|
||||
}
|
||||
```
|
||||
|
||||
When using a Nomad cluster with ACLs enabled, following ACL policy will provide the appropriate
|
||||
permissions for obtaining task group metrics:
|
||||
|
||||
```hcl
|
||||
namespace "default" {
|
||||
policy = "read"
|
||||
capabilities = ["read-job"]
|
||||
}
|
||||
```
|
||||
|
||||
In order to obtain cluster level metrics, the following ACL policy will be required:
|
||||
|
||||
```hcl
|
||||
node {
|
||||
policy = "read"
|
||||
}
|
||||
|
||||
namespace "default" {
|
||||
policy = "read"
|
||||
capabilities = ["read-job"]
|
||||
}
|
||||
```
|
||||
|
||||
### Policy Configuration Options - Task Groups
|
||||
|
||||
The Nomad APM allows querying Nomad to understand the current resource usage of
|
||||
a task group.
|
||||
|
||||
```hcl
|
||||
check {
|
||||
source = "nomad-apm"
|
||||
query = "avg_cpu"
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
Querying Nomad task group metrics is be done using the `operation_metric` syntax,
|
||||
where valid operations are:
|
||||
|
||||
- `avg` - returns the average of the metric value across allocations in the task
|
||||
group.
|
||||
|
||||
- `min` - returns the lowest metric value among the allocations in the task group.
|
||||
|
||||
- `max` - returns the highest metric value among the allocations in the task
|
||||
group.
|
||||
|
||||
- `sum` - returns the sum of all the metric values for the allocations in the
|
||||
task group.
|
||||
|
||||
The metric value can be:
|
||||
|
||||
- `cpu` - CPU usage as reported by the `nomad.client.allocs.cpu.total_percent`
|
||||
metric.
|
||||
|
||||
- `memory` - Memory usage as reported by the `nomad.client.allocs.memory.usage`
|
||||
metric.
|
||||
|
||||
### Policy Configuration Options - Client Nodes
|
||||
|
||||
The Nomad APM allows querying Nomad to understand the current allocated resource
|
||||
as a percentage of the total available.
|
||||
|
||||
```hcl
|
||||
check {
|
||||
source = "nomad-apm"
|
||||
query = "percentage-allocated_cpu"
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
Querying Nomad client node metrics is be done using the `operation_metric` syntax,
|
||||
where valid operations are:
|
||||
|
||||
- `percentage-allocated` - returns the allocated percentage of the desired
|
||||
resource.
|
||||
|
||||
The metric value can be:
|
||||
|
||||
- `cpu` - allocated CPU as reported by calculating total allocatable against the
|
||||
total allocated by the scheduler.
|
||||
|
||||
- `memory` - allocated memory as reported by calculating total allocatable against
|
||||
the total allocated by the scheduler.
|
||||
|
||||
[prometheus_io]: https://prometheus.io/
|
||||
[prometheus_scaler_function]: https://prometheus.io/docs/prometheus/latest/querying/functions/#scalar
|
||||
[nomad_telemetry_stanza]: /docs/configuration/telemetry#inlinecode-publish_allocation_metrics
|
||||
65
website/pages/docs/autoscaling/plugins/index.mdx
Normal file
65
website/pages/docs/autoscaling/plugins/index.mdx
Normal file
@@ -0,0 +1,65 @@
|
||||
---
|
||||
layout: docs
|
||||
page_title: Plugins
|
||||
sidebar_title: Plugins
|
||||
description: Plugins are used to architect the Nomad Autoscaler into distinct areas.
|
||||
---
|
||||
|
||||
# Nomad Autoscaler Plugins
|
||||
|
||||
Plugins are an essential part of the Nomad Autoscaler architecture. The Autoscaler
|
||||
uses the [go-plugin][go_plugin_github] library to implement an ecosystem of
|
||||
different types of plugins. Each plugin type is responsible for a specific task;
|
||||
APM plugins retrieve metrics about the workloads being monitored and Strategy
|
||||
plugins decide which actions Nomad should execute to keep the policy valid. The
|
||||
flexibility of plugins allows the Nomad Autoscaler to be extended to meet specific
|
||||
business requirements or technology use cases.
|
||||
|
||||
The Nomad Autoscaler currently ships with a number of built-in plugins to ease
|
||||
the learning curve. Details of these can be found below, under the specific
|
||||
plugin type sections.
|
||||
|
||||
# General Options
|
||||
|
||||
All plugins which require Nomad API connectivity support the parameters detailed
|
||||
below. These plugins include Nomad APM, Nomad Target and all cluster scaling
|
||||
targets.
|
||||
|
||||
- `nomad_config_inherit` `(bool: true)` - A boolean flag which indicates whether
|
||||
the plugin should inherit the agents Nomad configuration parameters. Plugins
|
||||
can override individual parameters and have their Nomad configuration merged
|
||||
with that of the agent.
|
||||
|
||||
- `nomad_address` `(string: "")` - The address of the Nomad server in the form
|
||||
of protocol://addr:port.
|
||||
|
||||
- `nomad_region` `(string: "")` - The region of the Nomad servers to connect with.
|
||||
|
||||
- `nomad_namespace` `(string: "")` - The target namespace for queries and actions
|
||||
bound to a namespace.
|
||||
|
||||
- `nomad_token` `(string: "")` - The SecretID of an ACL token to use to authenticate
|
||||
API requests with.
|
||||
|
||||
- `nomad_http-auth` `(string: "")` - The authentication information to use when
|
||||
connecting to a Nomad API which is using HTTP authentication.
|
||||
|
||||
- `nomad_ca-cert` `(string: "")` - Path to a PEM encoded CA cert file to use to
|
||||
verify the Nomad server SSL certificate.
|
||||
|
||||
- `nomad_ca-path` `(string: "")` - Path to a directory of PEM encoded CA cert
|
||||
files to verify the Nomad server SSL certificate.
|
||||
|
||||
- `nomad_client-cert` `(string: "")` - Path to a PEM encoded client certificate
|
||||
for TLS authentication to the Nomad server.
|
||||
|
||||
- `nomad-client-key` `(string: "")` - Path to an unencrypted PEM encoded private
|
||||
key matching the client certificate.
|
||||
|
||||
- `nomad_tls-server-name` `(string: "")` - The server name to use as the SNI
|
||||
host when connecting via TLS.
|
||||
|
||||
- `nomad_skip-verify` `(string: "")` - Do not verify TLS certificates. This is
|
||||
strongly discouraged.
|
||||
|
||||
[go_plugin_github]: https://github.com/hashicorp/go-plugin
|
||||
46
website/pages/docs/autoscaling/plugins/strategy.mdx
Normal file
46
website/pages/docs/autoscaling/plugins/strategy.mdx
Normal file
@@ -0,0 +1,46 @@
|
||||
---
|
||||
layout: docs
|
||||
page_title: Strategy
|
||||
sidebar_title: Strategy
|
||||
description: Strategy plugins compare the current state of the system against the desired state.
|
||||
---
|
||||
|
||||
# Strategy Plugins
|
||||
|
||||
Strategy plugins compare the current state of the system against the desired state
|
||||
defined by the operator in the scaling policy and generate an action that will
|
||||
bring the system closer to the desired state. In practical terms, strategies
|
||||
receive the current count and a metric value for a task group and output what
|
||||
the new task group count should be.
|
||||
|
||||
## Target Value Strategy Plugin
|
||||
|
||||
The target value strategy plugin will perform count calculations in order to keep
|
||||
the value resulting from the APM query at or around a specified target.
|
||||
|
||||
### Agent Configuration Options
|
||||
|
||||
```hcl
|
||||
strategy "target-value" {
|
||||
driver = "target-value"
|
||||
}
|
||||
```
|
||||
|
||||
### Policy Configuration Options
|
||||
|
||||
```hcl
|
||||
check {
|
||||
...
|
||||
strategy "target-value" {
|
||||
target = 20
|
||||
threshold = 0.0001
|
||||
}
|
||||
...
|
||||
```
|
||||
|
||||
- `target` `(float: <required>)` - Specifies the metric value the Autscaler
|
||||
should try to meet.
|
||||
|
||||
- `threshold` `(float: 0.01)` - Specifies how significant a change in the input
|
||||
metric should be considered. Small threshold values can lead to output
|
||||
fluctuation.
|
||||
154
website/pages/docs/autoscaling/plugins/target.mdx
Normal file
154
website/pages/docs/autoscaling/plugins/target.mdx
Normal file
@@ -0,0 +1,154 @@
|
||||
---
|
||||
layout: docs
|
||||
page_title: Target
|
||||
sidebar_title: Target
|
||||
description: Target plugins determine where the resource to be autoscaled is located.
|
||||
---
|
||||
|
||||
# Target Plugins
|
||||
|
||||
Target Plugins determine where the resource to be autoscaled is located. All
|
||||
target plugins support the `dry-run` policy config parameter which allows a policy
|
||||
to be evaluated, but will noop any suggested changes.
|
||||
|
||||
## Nomad Task Group Target
|
||||
|
||||
The Nomad task group target indicates the scalable resource is a Nomad job
|
||||
running on a Nomad cluster.
|
||||
|
||||
### Agent Configuration Options
|
||||
|
||||
The Nomad target is automatically launched by the Nomad Autoscaler and so the
|
||||
following setup is optional.
|
||||
|
||||
```hcl
|
||||
target "nomad" {
|
||||
driver = "nomad"
|
||||
}
|
||||
```
|
||||
|
||||
### Policy Configuration Options
|
||||
|
||||
If using the [Nomad job specification scaling stanza][nomad_scaling_stanza] to
|
||||
configure the scaling policy, the following section can be omitted as Nomad will
|
||||
populate them on job submission.
|
||||
|
||||
```hcl
|
||||
check {
|
||||
...
|
||||
target "nomad" {
|
||||
Job = "example"
|
||||
Group = "cache"
|
||||
}
|
||||
...
|
||||
```
|
||||
|
||||
- `job` `(string: "") ` - The job identifier which contains the task group to
|
||||
scale as defined within the job specification [job stanza][nomad_job_stanza].
|
||||
|
||||
- `group` `(string: "")` - The name of the task group to scale as defined in the
|
||||
job specification [group stanza][nomad_group_stanza].
|
||||
|
||||
## AWS AutoScaling Group Target
|
||||
|
||||
The AWS ASG target plugin allows for the scaling of the Nomad cluster clients
|
||||
via manipulating [AWS AutoScaling Groups][aws_autoscaling].
|
||||
|
||||
### Agent Configuration Options
|
||||
|
||||
To use the AWS ASG target plugin, the agent configuration needs to be populated
|
||||
with the appropriate target block. Authentication to the AWS API can be supplied
|
||||
in a number of ways including EC2 instance roles. It is recommended, if possible
|
||||
to use the [Vault AWS Secrets engine][vault_aws_backend] for supplying access
|
||||
credentials to the plugin. Credentials should be injected into the configuration
|
||||
via a template rather than as environment variables. This ensures the credentials
|
||||
are passed only to the plugin, rather than being available for all plugins and
|
||||
the agent process.
|
||||
|
||||
The IAM policy required for the AWS ASG plugin to function properly is detailed
|
||||
below.
|
||||
|
||||
```json
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Sid": "",
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"ec2:TerminateInstances",
|
||||
"ec2:DescribeInstanceStatus",
|
||||
"autoscaling:UpdateAutoScalingGroup",
|
||||
"autoscaling:DetachInstances",
|
||||
"autoscaling:DescribeScalingActivities",
|
||||
"autoscaling:DescribeAutoScalingGroups",
|
||||
"autoscaling:CreateOrUpdateTags"
|
||||
],
|
||||
"Resource": "*"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
```hcl
|
||||
target "aws-asg" {
|
||||
driver = "aws-asg"
|
||||
config = {
|
||||
aws_region = "eu-west-3"
|
||||
aws_access_key_id = "AKIAIOSFODNN7EXAMPLE"
|
||||
aws_secret_key_id = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
When using a Nomad cluster with ACLs enabled, the plugin will require an ACL token which provides
|
||||
the following permissions:
|
||||
|
||||
```hcl
|
||||
node {
|
||||
policy = "write"
|
||||
}
|
||||
```
|
||||
|
||||
- `aws_region` `(string: "us-east-1")` - The [AWS region][aws_region] identifier
|
||||
to connect to and where resources should be managed.
|
||||
|
||||
- `aws_access_key_id` `(string: "")` - The AWS access key ID used to authenticate
|
||||
with the AWS API.
|
||||
|
||||
- `aws_secret_key_id` `(string: "")` - The AWS secret key ID used to authenticate
|
||||
with the AWS API.
|
||||
|
||||
- `aws_session_token` `(string: "")` - The AWS session token used to authenticate
|
||||
with the AWS API.
|
||||
|
||||
### Policy Configuration Options
|
||||
|
||||
```hcl
|
||||
check {
|
||||
...
|
||||
target "aws-asg" {
|
||||
aws_asg_name = "hashistack-client-asg"
|
||||
node_class = "hashistack"
|
||||
node_drain_deadline = "5m"
|
||||
}
|
||||
...
|
||||
```
|
||||
|
||||
- `asg_name` `(string: <required>)` - The name of the AWS AutoScaling Group to
|
||||
interact with when performing scaling actions.
|
||||
|
||||
- `class` `(string: <required>)` - The Nomad [client node class][nomad_node_class]
|
||||
identifier used to group nodes into a pool of resource.
|
||||
|
||||
- `drain_deadline` `(duration: "15m")` The Nomad [drain deadline][nomad_node_drain_deadline]
|
||||
to use when performing node draining actions.
|
||||
|
||||
[nomad_node_class]: https://www.nomadproject.io/docs/configuration/client#node_class
|
||||
[nomad_node_drain_deadline]: https://www.nomadproject.io/api-docs/nodes#deadline
|
||||
[nomad_scaling_stanza]: /docs/job-specification/scaling
|
||||
[nomad_group_stanza]: docs/job-specification/group#group-stanza
|
||||
[nomad_job_stanza]: /docs/job-specification/job#job-stanza
|
||||
[aws_region]: https://aws.amazon.com/about-aws/global-infrastructure/regions_az/
|
||||
[aws_autoscaling]: https://aws.amazon.com/autoscaling/
|
||||
[vault_aws_backend]: https://www.vaultproject.io/docs/secrets/aws
|
||||
96
website/pages/docs/autoscaling/policy.mdx
Normal file
96
website/pages/docs/autoscaling/policy.mdx
Normal file
@@ -0,0 +1,96 @@
|
||||
---
|
||||
layout: docs
|
||||
page_title: Scaling Policies
|
||||
sidebar_title: Policy
|
||||
description: >
|
||||
Scaling policies describe the target resource desired state and how to
|
||||
perform calculations to ensure the current state reaches the desired.
|
||||
---
|
||||
|
||||
# Nomad Autoscaler Scaling Policies
|
||||
|
||||
Nomad Autoscaler scaling policies can be configured via the
|
||||
[task group scaling stanza][jobspec_scaling_stanza] or by configuration
|
||||
files stored on disk.
|
||||
|
||||
## Top Level Options
|
||||
|
||||
- `enabled` - A boolean flag that allows operators to administratively disable a
|
||||
policy from active evaluation.
|
||||
|
||||
- `min` - The minimum running count of the targeted resource. This can be 0 or any
|
||||
positive integer.
|
||||
|
||||
- `max` - The maximum running count of the targeted resource. This can be 0 or any
|
||||
positive integer.
|
||||
|
||||
## `policy` Options
|
||||
|
||||
- `cooldown` - A time interval after a scaling action during which no additional
|
||||
scaling will be performed on the resource. It should be provided as a duration
|
||||
(e.g.: "5s", "1m"). If omitted the configuration value
|
||||
[policy_default_cooldown][policy_default_cooldown_agent] from the agent will
|
||||
be used.
|
||||
|
||||
- `evaluation_interval` - Defines how often the policy is evaluated by the
|
||||
Autoscaler. It should be provided as a duration (e.g.: "5s", "1m"). If
|
||||
omitted the configuration value [default_evaluation_interval][eval_interval_agent]
|
||||
from the agent will be used.
|
||||
|
||||
- `target` - Defines where the autoscaling target is running. Detailed information
|
||||
on the configuration options can be found on the [target plugin][target_plugin_docs]
|
||||
page.
|
||||
|
||||
- `check` - Specifies one or more checks to be executed when determining if a
|
||||
scaling action is required.
|
||||
|
||||
## `check` Options
|
||||
|
||||
- `source` - The APM plugin that should handle the metric query. If omitted,
|
||||
this defaults to using the Nomad APM.
|
||||
|
||||
- `query` - The query to run against the specified APM. Currently this query
|
||||
should return a single value. Detailed information on the configuration options
|
||||
can be found on the [apm plugin][apm_plugin_docs] page.
|
||||
|
||||
- `strategy` - The strategy to use, and it's configuration when calculating the
|
||||
desired state based on the current count and the metric returned by the APM.
|
||||
Detailed information on the configuration options can be found on the
|
||||
[strategy plugin][strategy_plugin_docs] page.
|
||||
|
||||
### Example
|
||||
|
||||
A full example of a policy document that can be written into the Nomad task group
|
||||
scaling stanza or via a file within the policy dir can be seen below.
|
||||
|
||||
```hcl
|
||||
min = 2
|
||||
max = 10
|
||||
enabled = true
|
||||
|
||||
policy {
|
||||
evaluation_interval = "5s"
|
||||
cooldown = "1m"
|
||||
|
||||
target "target" {
|
||||
Job = "example"
|
||||
Group = "example"
|
||||
}
|
||||
|
||||
check "active_connections" {
|
||||
source = "prometheus"
|
||||
query = "scalar(open_connections_example_cache)"
|
||||
|
||||
strategy "target_value" {
|
||||
target = 10
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
[policy_default_cooldown_agent]: /docs/autoscaling/agent#default_cooldown
|
||||
[eval_interval_agent]: /docs/autoscaling/agent#default_evaluation_interval
|
||||
[target_plugin_docs]: /docs/autoscaling/plugins/target
|
||||
[strategy_plugin_docs]: /docs/autoscaling/plugins/strategy
|
||||
[apm_plugin_docs]: /docs/autoscaling/plugins/apm
|
||||
[jobspec_scaling_stanza]: /docs/job-specification/scaling
|
||||
@@ -137,6 +137,9 @@ driver) but will be removed in a future release.
|
||||
CNI plugin discovery. Multiple paths can be searched using colon delimited
|
||||
paths
|
||||
|
||||
- `cni_config_dir` `(string: "/opt/cni/config")` - Sets the directory where CNI
|
||||
network configuration is located. The client will use this path when fingerprinting CNI networks.
|
||||
|
||||
- `bridge_network name` `(string: "nomad")` - Sets the name of the bridge to be
|
||||
created by nomad for allocations running with bridge networking mode on the
|
||||
client.
|
||||
@@ -151,6 +154,9 @@ driver) but will be removed in a future release.
|
||||
- `host_volume` <code>([host_volume](#host_volume-stanza): nil)</code> - Exposes
|
||||
paths from the host as volumes that can be mounted into jobs.
|
||||
|
||||
- `host_network` <code>([host_network](#host_network-stanza): nil)</code> - Registers
|
||||
additional host networks with the node that can be selected when port mapping.
|
||||
|
||||
### `chroot_env` Parameters
|
||||
|
||||
Drivers based on [isolated fork/exec](/docs/drivers/exec) implement file
|
||||
@@ -372,6 +378,35 @@ client {
|
||||
- `read_only` `(bool: false)` - Specifies whether the volume should only ever be
|
||||
allowed to be mounted `read_only`, or if it should be writeable.
|
||||
|
||||
### `host_network` Stanza
|
||||
|
||||
The `host_network` stanza is used to register additional host networks with
|
||||
the node that can be used when port mapping.
|
||||
|
||||
The key of the stanza corresponds to the name of the network used in the
|
||||
[`host_network`](/docs/job-specification/network#host-network).
|
||||
|
||||
```hcl
|
||||
client {
|
||||
host_network "public" {
|
||||
cidr = "203.0.113.0/24"
|
||||
reserved_ports = "22,80"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### `host_network` Parameters
|
||||
|
||||
- `cidr` `(string: "")` - Specifies a cidr block of addresses to match against.
|
||||
If an address is found on the node that is contained by this cidr block, the
|
||||
host network will be registered with it.
|
||||
|
||||
- `interface` `(string: "")` - Filters searching of addresses to a specific interface.
|
||||
|
||||
- `reserved_ports` `(string: "")` - Specifies a comma-separated list of ports to
|
||||
reserve on all fingerprinted network devices. Ranges can be specified by using
|
||||
a hyphen separating the two inclusive ends.
|
||||
|
||||
## `client` Examples
|
||||
|
||||
### Common Setup
|
||||
|
||||
@@ -14,7 +14,6 @@ description: |-
|
||||
<Placement
|
||||
groups={[
|
||||
['job', 'group', 'network'],
|
||||
['job', 'group', 'task', 'resources', 'network']
|
||||
]}
|
||||
/>
|
||||
|
||||
@@ -73,6 +72,8 @@ job "docs" {
|
||||
drivers.
|
||||
- `host` - Each task will join the host network namespace and a shared network
|
||||
namespace is not created. This matches the current behavior in Nomad 0.9.
|
||||
- `cni/<cni network name>` - Task group will have an isolated network namespace
|
||||
with the network configured by CNI.
|
||||
|
||||
- `dns` <code>([DNSConfig](#dns-parameters): nil)</code> - Sets the DNS configuration
|
||||
for the allocations. By default all DNS configuration is inherited from the client host.
|
||||
@@ -80,10 +81,16 @@ job "docs" {
|
||||
|
||||
### `port` Parameters
|
||||
|
||||
- `static` `(int: nil)` - Specifies the static TCP/UDP port to allocate. If omitted, a dynamic port is chosen. We **do not recommend** using static ports, except
|
||||
- `static` `(int: nil)` - Specifies the static TCP/UDP port to allocate. If omitted, a
|
||||
dynamic port is chosen. We **do not recommend** using static ports, except
|
||||
for `system` or specialized jobs like load balancers.
|
||||
- `to` `(string:nil)` - Applicable when using "bridge" mode to configure port
|
||||
to map to inside the task's network namespace. `-1` sets the mapped port equal to the dynamic port allocated by the scheduler. The `NOMAD_PORT_<label>` environment variable will contain the `to` value.
|
||||
to map to inside the task's network namespace. `-1` sets the mapped port equal to the
|
||||
dynamic port allocated by the scheduler. The `NOMAD_PORT_<label>` environment variable
|
||||
will contain the `to` value.
|
||||
- `host_network` `(string:nil)` - Designates the host network name to use when allocating
|
||||
the port. When port mapping the host port will only forward traffic to the matched host
|
||||
network address.
|
||||
|
||||
The label assigned to the port is used to identify the port in service
|
||||
discovery, and used in the name of the environment variable that indicates
|
||||
@@ -224,6 +231,74 @@ network {
|
||||
}
|
||||
```
|
||||
|
||||
### Container Network Interface (CNI)
|
||||
|
||||
Nomad supports CNI by fingerprinting each node for [CNI network configurations](https://github.com/containernetworking/cni/blob/v0.8.0/SPEC.md#network-configuration).
|
||||
These are associated to the node by the `name` field of the CNI configuration.
|
||||
The `name` can then be used when setting the network `mode` field in the form of `cni/<name>`.
|
||||
|
||||
As an example if the following CNI configuration was on a node the proceeding network stanza could be used.
|
||||
|
||||
```json
|
||||
{
|
||||
"cniVersion": "0.3.1",
|
||||
"name": "mynet",
|
||||
"plugins": [
|
||||
{
|
||||
"type": "ptp",
|
||||
"ipMasq": true,
|
||||
"ipam": {
|
||||
"type": "host-local",
|
||||
"subnet": "172.16.30.0/24",
|
||||
"routes": [
|
||||
{
|
||||
"dst": "0.0.0.0/0"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "portmap",
|
||||
"capabilities": {"portMappings": true},
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
```hcl
|
||||
network {
|
||||
mode = "cni/mynet"
|
||||
port "http" {
|
||||
to = 8080
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The Nomad client will build the correct [capabilities arguments](https://github.com/containernetworking/cni/blob/v0.8.0/CONVENTIONS.md#well-known-capabilities) for the portmap plugin based on the defined port stanzas.
|
||||
|
||||
### Host Networks
|
||||
|
||||
If `host_network` is set for a port, Nomad will schedule the allocations on a node which has defined a `host_network` with the given name.
|
||||
If not set the "default" host network is used which is commonly the address with a default route associated with it.
|
||||
|
||||
|
||||
```hcl
|
||||
network {
|
||||
mode = "bridge"
|
||||
|
||||
# define a port to use for public https traffic
|
||||
port "https" {
|
||||
static = 443
|
||||
to = 8080
|
||||
host_network = "public"
|
||||
}
|
||||
# define a port that is only exposed to private traffic
|
||||
port "admin" {
|
||||
to = 9000
|
||||
host_network = "private"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Limitations
|
||||
|
||||
- Only one `network` stanza can be specified, when it is defined at the task group level.
|
||||
|
||||
@@ -149,8 +149,8 @@ Connect][connect] integration.
|
||||
|
||||
- `task` `(string: "")` - Specifies the name of the Nomad Task associated with
|
||||
this service definition. Only available on group services. Must be set if this
|
||||
service definition represents a Consul Connect Native service. The Nomad Task
|
||||
must exist in the same Task Group.
|
||||
service definition represents a Consul Connect Native service and there is more
|
||||
than one task in the task group.
|
||||
|
||||
- `meta` <code>([Meta][]: nil)</code> - Specifies a key-value map that annotates
|
||||
the Consul service with user-defined metadata.
|
||||
|
||||
Reference in New Issue
Block a user