From 5d4e0f18bc1bc83b8c50ede3afbce19ec04feb29 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 15 Nov 2016 13:55:51 -0800 Subject: [PATCH 1/3] Updated AWS speeds and network_speed now overrides This PR: * Makes AWS network speeds more granular * Makes `network_speed` an override and not a default * Adds a default of 1000 MBits if no network link speed is detected. Fixes #1985 --- client/fingerprint/env_aws.go | 95 +++++++++---------- client/fingerprint/env_aws_test.go | 92 +++++++++++++++++- client/fingerprint/network.go | 16 +++- client/fingerprint/network_test.go | 6 +- command/agent/config.go | 4 +- .../docs/agent/configuration/client.html.md | 7 +- 6 files changed, 158 insertions(+), 62 deletions(-) diff --git a/client/fingerprint/env_aws.go b/client/fingerprint/env_aws.go index 5d33fdd89..e8b872940 100644 --- a/client/fingerprint/env_aws.go +++ b/client/fingerprint/env_aws.go @@ -21,51 +21,24 @@ import ( const DEFAULT_AWS_URL = "http://169.254.169.254/latest/meta-data/" // map of instance type to approximate speed, in Mbits/s -// http://serverfault.com/questions/324883/aws-bandwidth-and-content-delivery/326797#326797 -// which itself cites these sources: -// - http://blog.rightscale.com/2007/10/28/network-performance-within-amazon-ec2-and-to-amazon-s3/ -// - http://www.soc.napier.ac.uk/~bill/chris_p.pdf -// +// Estimates from http://stackoverflow.com/a/35806587 // This data is meant for a loose approximation -var ec2InstanceSpeedMap = map[string]int{ - "m4.large": 80, - "m3.medium": 80, - "m3.large": 80, - "c4.large": 80, - "c3.large": 80, - "c3.xlarge": 80, - "r3.large": 80, - "r3.xlarge": 80, - "i2.xlarge": 80, - "d2.xlarge": 80, - "t2.micro": 16, - "t2.small": 16, - "t2.medium": 16, - "t2.large": 16, - "m4.xlarge": 760, - "m4.2xlarge": 760, - "m4.4xlarge": 760, - "m3.xlarge": 760, - "m3.2xlarge": 760, - "c4.xlarge": 760, - "c4.2xlarge": 760, - "c4.4xlarge": 760, - "c3.2xlarge": 760, - "c3.4xlarge": 760, - "g2.2xlarge": 760, - "r3.2xlarge": 760, - "r3.4xlarge": 760, - "i2.2xlarge": 760, - "i2.4xlarge": 760, - "d2.2xlarge": 760, - "d2.4xlarge": 760, - "m4.10xlarge": 10000, - "c4.8xlarge": 10000, - "c3.8xlarge": 10000, - "g2.8xlarge": 10000, - "r3.8xlarge": 10000, - "i2.8xlarge": 10000, - "d2.8xlarge": 10000, +var ec2InstanceSpeedMap = map[*regexp.Regexp]int{ + regexp.MustCompile("t2.nano"): 30, + regexp.MustCompile("t2.micro"): 70, + regexp.MustCompile("t2.small"): 125, + regexp.MustCompile("t2.medium"): 300, + regexp.MustCompile("m3.medium"): 400, + regexp.MustCompile("c4.8xlarge"): 4000, + regexp.MustCompile("x1.16xlarge"): 5000, + regexp.MustCompile(`.*\.large`): 500, + regexp.MustCompile(`.*\.xlarge`): 750, + regexp.MustCompile(`.*\.2xlarge`): 1000, + regexp.MustCompile(`.*\.4xlarge`): 2000, + regexp.MustCompile(`.*\.8xlarge`): 10000, + regexp.MustCompile(`.*\.10xlarge`): 10000, + regexp.MustCompile(`.*\.16xlarge`): 10000, + regexp.MustCompile(`.*\.32xlarge`): 10000, } // EnvAWSFingerprint is used to fingerprint AWS metadata @@ -156,14 +129,33 @@ func (f *EnvAWSFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) } // find LinkSpeed from lookup - if throughput := f.linkSpeed(); throughput > 0 { - newNetwork.MBits = throughput + throughput := f.linkSpeed() + if cfg.NetworkSpeed != 0 { + throughput = cfg.NetworkSpeed + } else if throughput == 0 { + // Failed to determine speed. Check if the network fingerprint got it + found := false + if node.Resources != nil && len(node.Resources.Networks) > 0 { + for _, n := range node.Resources.Networks { + if n.IP == newNetwork.IP { + throughput = n.MBits + found = true + break + } + } + } + + // Nothing detected so default + if !found { + throughput = defaultNetworkSpeed + } } // populate Node Network Resources if node.Resources == nil { node.Resources = &structs.Resources{} } + newNetwork.MBits = throughput node.Resources.Networks = []*structs.NetworkResource{newNetwork} // populate Links @@ -240,10 +232,13 @@ func (f *EnvAWSFingerprint) linkSpeed() int { } key := strings.Trim(string(body), "\n") - v, ok := ec2InstanceSpeedMap[key] - if !ok { - return 0 + netSpeed := 0 + for reg, speed := range ec2InstanceSpeedMap { + if reg.MatchString(key) { + netSpeed = speed + break + } } - return v + return netSpeed } diff --git a/client/fingerprint/env_aws_test.go b/client/fingerprint/env_aws_test.go index 105a828f7..c12185212 100644 --- a/client/fingerprint/env_aws_test.go +++ b/client/fingerprint/env_aws_test.go @@ -122,7 +122,7 @@ const aws_routes = ` { "uri": "/latest/meta-data/instance-type", "content-type": "text/plain", - "body": "m3.large" + "body": "m3.2xlarge" }, { "uri": "/latest/meta-data/local-hostname", @@ -199,6 +199,96 @@ func TestNetworkFingerprint_AWS(t *testing.T) { } } +func TestNetworkFingerprint_AWS_network(t *testing.T) { + // configure mock server with fixture routes, data + // TODO: Refator with the AWS ENV test + routes := routes{} + if err := json.Unmarshal([]byte(aws_routes), &routes); err != nil { + t.Fatalf("Failed to unmarshal JSON in AWS ENV test: %s", err) + } + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + for _, e := range routes.Endpoints { + if r.RequestURI == e.Uri { + w.Header().Set("Content-Type", e.ContentType) + fmt.Fprintln(w, e.Body) + } + } + })) + + defer ts.Close() + os.Setenv("AWS_ENV_URL", ts.URL+"/latest/meta-data/") + + f := NewEnvAWSFingerprint(testLogger()) + node := &structs.Node{ + Attributes: make(map[string]string), + } + + cfg := &config.Config{} + ok, err := f.Fingerprint(cfg, node) + if err != nil { + t.Fatalf("err: %v", err) + } + if !ok { + t.Fatalf("should apply") + } + + assertNodeAttributeContains(t, node, "unique.network.ip-address") + + if node.Resources == nil || len(node.Resources.Networks) == 0 { + t.Fatal("Expected to find Network Resources") + } + + // Test at least the first Network Resource + net := node.Resources.Networks[0] + if net.IP == "" { + t.Fatal("Expected Network Resource to have an IP") + } + if net.CIDR == "" { + t.Fatal("Expected Network Resource to have a CIDR") + } + if net.Device == "" { + t.Fatal("Expected Network Resource to have a Device Name") + } + if net.MBits != 1000 { + t.Fatalf("Expected Network Resource to have speed %d; got %d", 1000, net.MBits) + } + + // Try again this time setting a network speed in the config + node = &structs.Node{ + Attributes: make(map[string]string), + } + + cfg.NetworkSpeed = 10 + ok, err = f.Fingerprint(cfg, node) + if err != nil { + t.Fatalf("err: %v", err) + } + if !ok { + t.Fatalf("should apply") + } + + assertNodeAttributeContains(t, node, "unique.network.ip-address") + + if node.Resources == nil || len(node.Resources.Networks) == 0 { + t.Fatal("Expected to find Network Resources") + } + + // Test at least the first Network Resource + net = node.Resources.Networks[0] + if net.IP == "" { + t.Fatal("Expected Network Resource to have an IP") + } + if net.CIDR == "" { + t.Fatal("Expected Network Resource to have a CIDR") + } + if net.Device == "" { + t.Fatal("Expected Network Resource to have a Device Name") + } + if net.MBits != 10 { + t.Fatalf("Expected Network Resource to have speed %d; got %d", 10, net.MBits) + } +} + func TestNetworkFingerprint_notAWS(t *testing.T) { os.Setenv("AWS_ENV_URL", "http://127.0.0.1/latest/meta-data/") f := NewEnvAWSFingerprint(testLogger()) diff --git a/client/fingerprint/network.go b/client/fingerprint/network.go index 9d4e8683a..eceb213e4 100644 --- a/client/fingerprint/network.go +++ b/client/fingerprint/network.go @@ -10,6 +10,12 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) +const ( + // defaultNetworkSpeed is the speed set if the network link speed could not + // be detected. + defaultNetworkSpeed = 1000 +) + // NetworkFingerprint is used to fingerprint the Network capabilities of a node type NetworkFingerprint struct { StaticFingerprinter @@ -74,12 +80,16 @@ func (f *NetworkFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) f.logger.Printf("[DEBUG] fingerprint.network: Detected interface %v with IP %v during fingerprinting", intf.Name, ip) - if throughput := f.linkSpeed(intf.Name); throughput > 0 { + throughput := f.linkSpeed(intf.Name) + if cfg.NetworkSpeed != 0 { + newNetwork.MBits = cfg.NetworkSpeed + f.logger.Printf("[DEBUG] fingerprint.network: setting link speed to user configured speed: %d", newNetwork.MBits) + } else if throughput != 0 { newNetwork.MBits = throughput f.logger.Printf("[DEBUG] fingerprint.network: link speed for %v set to %v", intf.Name, newNetwork.MBits) } else { - f.logger.Printf("[DEBUG] fingerprint.network: Unable to read link speed; setting to default %v", cfg.NetworkSpeed) - newNetwork.MBits = cfg.NetworkSpeed + newNetwork.MBits = defaultNetworkSpeed + f.logger.Printf("[DEBUG] fingerprint.network: link speed could not be detected and no speed specified by user. Defaulting to %d", defaultNetworkSpeed) } if node.Resources == nil { diff --git a/client/fingerprint/network_test.go b/client/fingerprint/network_test.go index 2d8fa4ea3..84e0a7642 100644 --- a/client/fingerprint/network_test.go +++ b/client/fingerprint/network_test.go @@ -151,7 +151,7 @@ func TestNetworkFingerprint_basic(t *testing.T) { node := &structs.Node{ Attributes: make(map[string]string), } - cfg := &config.Config{NetworkSpeed: 100} + cfg := &config.Config{NetworkSpeed: 101} ok, err := f.Fingerprint(cfg, node) if err != nil { @@ -184,8 +184,8 @@ func TestNetworkFingerprint_basic(t *testing.T) { if net.Device == "" { t.Fatal("Expected Network Resource to have a Device Name") } - if net.MBits == 0 { - t.Fatal("Expected Network Resource to have a non-zero bandwith") + if net.MBits != 101 { + t.Fatalf("Expected Network Resource to have bandwith %d; got %d", 101, net.MBits) } } diff --git a/command/agent/config.go b/command/agent/config.go index b3ec9cbbc..dcfce9bf0 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -177,7 +177,8 @@ type ClientConfig struct { // Interface to use for network fingerprinting NetworkInterface string `mapstructure:"network_interface"` - // The network link speed to use if it can not be determined dynamically. + // NetworkSpeed is used to override any detected or default network link + // speed. NetworkSpeed int `mapstructure:"network_speed"` // MaxKillTimeout allows capping the user-specifiable KillTimeout. @@ -485,7 +486,6 @@ func DefaultConfig() *Config { Vault: config.DefaultVaultConfig(), Client: &ClientConfig{ Enabled: false, - NetworkSpeed: 100, MaxKillTimeout: "30s", ClientMinPort: 14000, ClientMaxPort: 14512, diff --git a/website/source/docs/agent/configuration/client.html.md b/website/source/docs/agent/configuration/client.html.md index 58d368b52..601a255c6 100644 --- a/website/source/docs/agent/configuration/client.html.md +++ b/website/source/docs/agent/configuration/client.html.md @@ -53,9 +53,10 @@ client { interface to force network fingerprinting on. This defaults to the loopback interface. -- `network_speed` `(int: 100)` - Specifies the default link speed of network - interfaces, in megabits. Most clients can determine their speed automatically, - but will fallback to this value if they cannot. +- `network_speed` `(int: 0)` - Specifies an override for the network link speed. + This value, if set, overrides any detected or defaulted link speed. Most + clients can determine their speed automatically, and thus in most cases this + should be left unset. - `node_class` `(string: "")` - Specifies an arbitrary string used to logically group client nodes by user-defined class. This can be used during job From ba6473a5c788a52f515465673a53766fcd2000a8 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 15 Nov 2016 13:58:36 -0800 Subject: [PATCH 2/3] Backwards compatibility notice --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d9660f64f..44496c1f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ __BACKWARDS INCOMPATIBILITIES:__ new block is name `ephemeral_disk`. Nomad will automatically convert existing jobs but newly submitted jobs should refactor the disk resource [GH-1710, GH-1679] + * agent/config: `network_speed` is now an override and not a default value. If + the network link speed is not detected a default value is applied. IMPROVEMENTS: * core: Support for gossip encryption [GH-1791] From cce384e0f1d8cd8a4c882e7e894e863777fd1f81 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 15 Nov 2016 16:23:37 -0800 Subject: [PATCH 3/3] Remove old TODOs --- client/fingerprint/env_aws_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/client/fingerprint/env_aws_test.go b/client/fingerprint/env_aws_test.go index c12185212..6a58bc015 100644 --- a/client/fingerprint/env_aws_test.go +++ b/client/fingerprint/env_aws_test.go @@ -150,7 +150,6 @@ const aws_routes = ` func TestNetworkFingerprint_AWS(t *testing.T) { // configure mock server with fixture routes, data - // TODO: Refator with the AWS ENV test routes := routes{} if err := json.Unmarshal([]byte(aws_routes), &routes); err != nil { t.Fatalf("Failed to unmarshal JSON in AWS ENV test: %s", err) @@ -201,7 +200,6 @@ func TestNetworkFingerprint_AWS(t *testing.T) { func TestNetworkFingerprint_AWS_network(t *testing.T) { // configure mock server with fixture routes, data - // TODO: Refator with the AWS ENV test routes := routes{} if err := json.Unmarshal([]byte(aws_routes), &routes); err != nil { t.Fatalf("Failed to unmarshal JSON in AWS ENV test: %s", err)