mirror of
https://github.com/kemko/nomad.git
synced 2026-01-09 11:55:42 +03:00
Merge branch 'master' into b-2117-windows-group-process
This commit is contained in:
2
Vagrantfile
vendored
2
Vagrantfile
vendored
@@ -156,7 +156,7 @@ def suggestedCPUCores()
|
||||
when /darwin/
|
||||
Integer(`sysctl -n hw.ncpu`) / 2
|
||||
when /linux/
|
||||
Integer(`cat /proc/cpuinfo | grep processor | wc -l`) / 2
|
||||
Integer(`grep -c ^processor /proc/cpuinfo`) / 2
|
||||
else
|
||||
2
|
||||
end
|
||||
|
||||
@@ -259,7 +259,13 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic
|
||||
return nil, fmt.Errorf("node setup failed: %v", err)
|
||||
}
|
||||
|
||||
fingerprintManager := NewFingerprintManager(c.GetConfig, c.config.Node,
|
||||
// Store the config copy before restoring state but after it has been
|
||||
// initialized.
|
||||
c.configLock.Lock()
|
||||
c.configCopy = c.config.Copy()
|
||||
c.configLock.Unlock()
|
||||
|
||||
fingerprintManager := NewFingerprintManager(c.GetConfig, c.configCopy.Node,
|
||||
c.shutdownCh, c.updateNodeFromFingerprint, c.updateNodeFromDriver,
|
||||
c.logger)
|
||||
|
||||
@@ -271,12 +277,6 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic
|
||||
// Setup the reserved resources
|
||||
c.reservePorts()
|
||||
|
||||
// Store the config copy before restoring state but after it has been
|
||||
// initialized.
|
||||
c.configLock.Lock()
|
||||
c.configCopy = c.config.Copy()
|
||||
c.configLock.Unlock()
|
||||
|
||||
// Set the preconfigured list of static servers
|
||||
c.configLock.RLock()
|
||||
if len(c.configCopy.Servers) > 0 {
|
||||
@@ -437,7 +437,7 @@ func (c *Client) Leave() error {
|
||||
func (c *Client) GetConfig() *config.Config {
|
||||
c.configLock.Lock()
|
||||
defer c.configLock.Unlock()
|
||||
return c.config
|
||||
return c.configCopy
|
||||
}
|
||||
|
||||
// Datacenter returns the datacenter for the given client
|
||||
@@ -726,7 +726,7 @@ func (c *Client) restoreState() error {
|
||||
watcher := noopPrevAlloc{}
|
||||
|
||||
c.configLock.RLock()
|
||||
ar := NewAllocRunner(c.logger, c.configCopy, c.stateDB, c.updateAllocStatus, alloc, c.vaultClient, c.consulService, watcher)
|
||||
ar := NewAllocRunner(c.logger, c.configCopy.Copy(), c.stateDB, c.updateAllocStatus, alloc, c.vaultClient, c.consulService, watcher)
|
||||
c.configLock.RUnlock()
|
||||
|
||||
c.allocLock.Lock()
|
||||
@@ -963,6 +963,9 @@ func (c *Client) reservePorts() {
|
||||
for _, net := range reservedIndex {
|
||||
node.Reserved.Networks = append(node.Reserved.Networks, net)
|
||||
}
|
||||
|
||||
// Make the changes available to the config copy.
|
||||
c.configCopy = c.config.Copy()
|
||||
}
|
||||
|
||||
// updateNodeFromFingerprint updates the node with the result of
|
||||
@@ -1009,10 +1012,10 @@ func (c *Client) updateNodeFromFingerprint(response *cstructs.FingerprintRespons
|
||||
}
|
||||
|
||||
if nodeHasChanged {
|
||||
c.updateNode()
|
||||
c.updateNodeLocked()
|
||||
}
|
||||
|
||||
return c.config.Node
|
||||
return c.configCopy.Node
|
||||
}
|
||||
|
||||
// updateNodeFromDriver receives either a fingerprint of the driver or its
|
||||
@@ -1104,10 +1107,10 @@ func (c *Client) updateNodeFromDriver(name string, fingerprint, health *structs.
|
||||
|
||||
if hasChanged {
|
||||
c.config.Node.Drivers[name].UpdateTime = time.Now()
|
||||
c.updateNode()
|
||||
c.updateNodeLocked()
|
||||
}
|
||||
|
||||
return c.config.Node
|
||||
return c.configCopy.Node
|
||||
}
|
||||
|
||||
// resourcesAreEqual is a temporary function to compare whether resources are
|
||||
@@ -1752,9 +1755,14 @@ OUTER:
|
||||
}
|
||||
}
|
||||
|
||||
// updateNode triggers a client to update its node copy if it isn't doing
|
||||
// so already
|
||||
func (c *Client) updateNode() {
|
||||
// updateNode updates the Node copy and triggers the client to send the updated
|
||||
// Node to the server. This should be done while the caller holds the
|
||||
// configLock lock.
|
||||
func (c *Client) updateNodeLocked() {
|
||||
// Update the config copy.
|
||||
node := c.config.Node.Copy()
|
||||
c.configCopy.Node = node
|
||||
|
||||
select {
|
||||
case c.triggerNodeUpdate <- struct{}{}:
|
||||
// Node update goroutine was released to execute
|
||||
@@ -1774,15 +1782,7 @@ func (c *Client) watchNodeUpdates() {
|
||||
select {
|
||||
case <-timer.C:
|
||||
c.logger.Printf("[DEBUG] client: state changed, updating node and re-registering.")
|
||||
|
||||
// Update the config copy.
|
||||
c.configLock.Lock()
|
||||
node := c.config.Node.Copy()
|
||||
c.configCopy.Node = node
|
||||
c.configLock.Unlock()
|
||||
|
||||
c.retryRegisterNode()
|
||||
|
||||
hasChanged = false
|
||||
case <-c.triggerNodeUpdate:
|
||||
if hasChanged {
|
||||
@@ -1899,7 +1899,10 @@ func (c *Client) addAlloc(alloc *structs.Allocation, migrateToken string) error
|
||||
c.configLock.RLock()
|
||||
prevAlloc := newAllocWatcher(alloc, prevAR, c, c.configCopy, c.logger, migrateToken)
|
||||
|
||||
ar := NewAllocRunner(c.logger, c.configCopy, c.stateDB, c.updateAllocStatus, alloc, c.vaultClient, c.consulService, prevAlloc)
|
||||
// Copy the config since the node can be swapped out as it is being updated.
|
||||
// The long term fix is to pass in the config and node separately and then
|
||||
// we don't have to do a copy.
|
||||
ar := NewAllocRunner(c.logger, c.configCopy.Copy(), c.stateDB, c.updateAllocStatus, alloc, c.vaultClient, c.consulService, prevAlloc)
|
||||
c.configLock.RUnlock()
|
||||
|
||||
// Store the alloc runner.
|
||||
|
||||
@@ -53,10 +53,16 @@ func NewFingerprintManager(getConfig func() *config.Config,
|
||||
func (fm *FingerprintManager) setNode(node *structs.Node) {
|
||||
fm.nodeLock.Lock()
|
||||
defer fm.nodeLock.Unlock()
|
||||
|
||||
fm.node = node
|
||||
}
|
||||
|
||||
// getNode returns the current client node
|
||||
func (fm *FingerprintManager) getNode() *structs.Node {
|
||||
fm.nodeLock.Lock()
|
||||
defer fm.nodeLock.Unlock()
|
||||
return fm.node
|
||||
}
|
||||
|
||||
// Run starts the process of fingerprinting the node. It does an initial pass,
|
||||
// identifying whitelisted and blacklisted fingerprints/drivers. Then, for
|
||||
// those which require periotic checking, it starts a periodic process for
|
||||
@@ -167,7 +173,7 @@ func (fm *FingerprintManager) setupFingerprinters(fingerprints []string) error {
|
||||
// supported
|
||||
func (fm *FingerprintManager) setupDrivers(drivers []string) error {
|
||||
var availDrivers []string
|
||||
driverCtx := driver.NewDriverContext("", "", fm.getConfig(), fm.node, fm.logger, nil)
|
||||
driverCtx := driver.NewDriverContext("", "", fm.getConfig(), fm.getNode(), fm.logger, nil)
|
||||
for _, name := range drivers {
|
||||
|
||||
d, err := driver.NewDriver(name, driverCtx)
|
||||
|
||||
2
demo/vagrant/Vagrantfile
vendored
2
demo/vagrant/Vagrantfile
vendored
@@ -17,7 +17,7 @@ cd /tmp/
|
||||
curl -sSL https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_linux_amd64.zip -o nomad.zip
|
||||
|
||||
echo "Fetching Consul..."
|
||||
curl -sSL https://releases.hashicorp.com/consul/1.0.0/consul_1.0.7_linux_amd64.zip > consul.zip
|
||||
curl -sSL https://releases.hashicorp.com/consul/1.0.7/consul_1.0.7_linux_amd64.zip > consul.zip
|
||||
|
||||
echo "Installing Nomad..."
|
||||
unzip nomad.zip
|
||||
|
||||
@@ -2,6 +2,8 @@ package structs
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/helper"
|
||||
)
|
||||
|
||||
// DriverInfo is the current state of a single driver. This is updated
|
||||
@@ -14,6 +16,17 @@ type DriverInfo struct {
|
||||
UpdateTime time.Time
|
||||
}
|
||||
|
||||
func (di *DriverInfo) Copy() *DriverInfo {
|
||||
if di == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
cdi := new(DriverInfo)
|
||||
*cdi = *di
|
||||
cdi.Attributes = helper.CopyMapStringString(di.Attributes)
|
||||
return cdi
|
||||
}
|
||||
|
||||
// MergeHealthCheck merges information from a health check for a drier into a
|
||||
// node's driver info
|
||||
func (di *DriverInfo) MergeHealthCheck(other *DriverInfo) {
|
||||
|
||||
@@ -1461,6 +1461,7 @@ func (n *Node) Copy() *Node {
|
||||
nn.Meta = helper.CopyMapStringString(nn.Meta)
|
||||
nn.Events = copyNodeEvents(n.Events)
|
||||
nn.DrainStrategy = nn.DrainStrategy.Copy()
|
||||
nn.Drivers = copyNodeDrivers(n.Drivers)
|
||||
return nn
|
||||
}
|
||||
|
||||
@@ -1478,6 +1479,20 @@ func copyNodeEvents(events []*NodeEvent) []*NodeEvent {
|
||||
return c
|
||||
}
|
||||
|
||||
// copyNodeDrivers is a helper to copy a map of DriverInfo
|
||||
func copyNodeDrivers(drivers map[string]*DriverInfo) map[string]*DriverInfo {
|
||||
l := len(drivers)
|
||||
if l == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
c := make(map[string]*DriverInfo, l)
|
||||
for driver, info := range drivers {
|
||||
c[driver] = info.Copy()
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// TerminalStatus returns if the current status is terminal and
|
||||
// will no longer transition.
|
||||
func (n *Node) TerminalStatus() bool {
|
||||
|
||||
@@ -3676,3 +3676,80 @@ func TestNode_Canonicalize(t *testing.T) {
|
||||
node.Canonicalize()
|
||||
require.Equal(NodeSchedulingIneligible, node.SchedulingEligibility)
|
||||
}
|
||||
|
||||
func TestNode_Copy(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
node := &Node{
|
||||
ID: uuid.Generate(),
|
||||
SecretID: uuid.Generate(),
|
||||
Datacenter: "dc1",
|
||||
Name: "foobar",
|
||||
Attributes: map[string]string{
|
||||
"kernel.name": "linux",
|
||||
"arch": "x86",
|
||||
"nomad.version": "0.5.0",
|
||||
"driver.exec": "1",
|
||||
"driver.mock_driver": "1",
|
||||
},
|
||||
Resources: &Resources{
|
||||
CPU: 4000,
|
||||
MemoryMB: 8192,
|
||||
DiskMB: 100 * 1024,
|
||||
IOPS: 150,
|
||||
Networks: []*NetworkResource{
|
||||
{
|
||||
Device: "eth0",
|
||||
CIDR: "192.168.0.100/32",
|
||||
MBits: 1000,
|
||||
},
|
||||
},
|
||||
},
|
||||
Reserved: &Resources{
|
||||
CPU: 100,
|
||||
MemoryMB: 256,
|
||||
DiskMB: 4 * 1024,
|
||||
Networks: []*NetworkResource{
|
||||
{
|
||||
Device: "eth0",
|
||||
IP: "192.168.0.100",
|
||||
ReservedPorts: []Port{{Label: "ssh", Value: 22}},
|
||||
MBits: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
Links: map[string]string{
|
||||
"consul": "foobar.dc1",
|
||||
},
|
||||
Meta: map[string]string{
|
||||
"pci-dss": "true",
|
||||
"database": "mysql",
|
||||
"version": "5.6",
|
||||
},
|
||||
NodeClass: "linux-medium-pci",
|
||||
Status: NodeStatusReady,
|
||||
SchedulingEligibility: NodeSchedulingEligible,
|
||||
Drivers: map[string]*DriverInfo{
|
||||
"mock_driver": &DriverInfo{
|
||||
Attributes: map[string]string{"running": "1"},
|
||||
Detected: true,
|
||||
Healthy: true,
|
||||
HealthDescription: "Currently active",
|
||||
UpdateTime: time.Now(),
|
||||
},
|
||||
},
|
||||
}
|
||||
node.ComputeClass()
|
||||
|
||||
node2 := node.Copy()
|
||||
|
||||
require.Equal(node.Attributes, node2.Attributes)
|
||||
require.Equal(node.Resources, node2.Resources)
|
||||
require.Equal(node.Reserved, node2.Reserved)
|
||||
require.Equal(node.Links, node2.Links)
|
||||
require.Equal(node.Meta, node2.Meta)
|
||||
require.Equal(node.Events, node2.Events)
|
||||
require.Equal(node.DrainStrategy, node2.DrainStrategy)
|
||||
require.Equal(node.Drivers, node2.Drivers)
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ a custom AMI:
|
||||
|
||||
```bash
|
||||
region = "us-east-1"
|
||||
ami = "ami-3330e54e"
|
||||
ami = "ami-d4b016ab"
|
||||
instance_type = "t2.medium"
|
||||
key_name = "KEY_NAME"
|
||||
server_count = "3"
|
||||
@@ -57,7 +57,7 @@ variable like so:
|
||||
|
||||
```bash
|
||||
region = "us-east-1"
|
||||
ami = "ami-3330e54e"
|
||||
ami = "ami-d4b016ab"
|
||||
instance_type = "t2.medium"
|
||||
key_name = "KEY_NAME"
|
||||
server_count = "3"
|
||||
|
||||
2
terraform/aws/env/us-east/terraform.tfvars
vendored
2
terraform/aws/env/us-east/terraform.tfvars
vendored
@@ -1,5 +1,5 @@
|
||||
region = "us-east-1"
|
||||
ami = "ami-3330e54e"
|
||||
ami = "ami-d4b016ab"
|
||||
instance_type = "t2.medium"
|
||||
key_name = "KEY_NAME"
|
||||
server_count = "3"
|
||||
|
||||
@@ -9,17 +9,17 @@ cd /ops
|
||||
|
||||
CONFIGDIR=/ops/shared/config
|
||||
|
||||
CONSULVERSION=1.0.6
|
||||
CONSULVERSION=1.0.7
|
||||
CONSULDOWNLOAD=https://releases.hashicorp.com/consul/${CONSULVERSION}/consul_${CONSULVERSION}_linux_amd64.zip
|
||||
CONSULCONFIGDIR=/etc/consul.d
|
||||
CONSULDIR=/opt/consul
|
||||
|
||||
VAULTVERSION=0.9.6
|
||||
VAULTVERSION=0.10.0
|
||||
VAULTDOWNLOAD=https://releases.hashicorp.com/vault/${VAULTVERSION}/vault_${VAULTVERSION}_linux_amd64.zip
|
||||
VAULTCONFIGDIR=/etc/vault.d
|
||||
VAULTDIR=/opt/vault
|
||||
|
||||
NOMADVERSION=0.7.1
|
||||
NOMADVERSION=0.8.0
|
||||
NOMADDOWNLOAD=https://releases.hashicorp.com/nomad/${NOMADVERSION}/nomad_${NOMADVERSION}_linux_amd64.zip
|
||||
NOMADCONFIGDIR=/etc/nomad.d
|
||||
NOMADDIR=/opt/nomad
|
||||
|
||||
@@ -56,18 +56,18 @@ List view:
|
||||
|
||||
```
|
||||
$ nomad node status
|
||||
ID DC Name Drain Status
|
||||
a72dfba2 dc1 node1 false ready
|
||||
1f3f03ea dc1 node2 false ready
|
||||
ID DC Name Class Drain Eligibility Status
|
||||
a72dfba2 dc1 node1 <none> false eligible ready
|
||||
1f3f03ea dc1 node2 <none> false eligible ready
|
||||
```
|
||||
|
||||
List view, with running allocations:
|
||||
|
||||
```
|
||||
$ nomad node status -allocs
|
||||
ID DC Name Class Drain Status Running Allocs
|
||||
4d2ba53b dc1 node1 <none> false ready 1
|
||||
34dfba32 dc1 node2 <none> false ready 3
|
||||
ID DC Name Class Drain Eligibility Status Running Allocs
|
||||
4d2ba53b dc1 node1 <none> false eligible ready 1
|
||||
34dfba32 dc1 node2 <none> false eligible ready 3
|
||||
```
|
||||
|
||||
Single-node view in short mode:
|
||||
|
||||
@@ -326,8 +326,8 @@ respond as expected:
|
||||
|
||||
```text
|
||||
$ nomad node status
|
||||
ID DC Name Class Drain Status
|
||||
237cd4c5 dc1 nomad <none> false ready
|
||||
ID DC Name Class Drain Eligibility Status
|
||||
237cd4c5 dc1 nomad <none> false eligible ready
|
||||
|
||||
$ nomad job init
|
||||
Example job file written to example.nomad
|
||||
|
||||
@@ -142,9 +142,9 @@ we should see both nodes in the `ready` state:
|
||||
|
||||
```text
|
||||
$ nomad node status
|
||||
ID Datacenter Name Class Drain Status
|
||||
fca62612 dc1 client1 <none> false ready
|
||||
c887deef dc1 client2 <none> false ready
|
||||
ID DC Name Class Drain Eligibility Status
|
||||
fca62612 dc1 client1 <none> false eligible ready
|
||||
c887deef dc1 client2 <none> false eligible ready
|
||||
```
|
||||
|
||||
We now have a simple three node cluster running. The only difference
|
||||
|
||||
@@ -82,8 +82,8 @@ $ vagrant ssh
|
||||
...
|
||||
|
||||
$ nomad node status
|
||||
ID Datacenter Name Class Drain Status
|
||||
171a583b dc1 nomad <none> false ready
|
||||
ID DC Name Class Drain Eligibility Status
|
||||
171a583b dc1 nomad <none> false eligible ready
|
||||
```
|
||||
|
||||
The output shows our Node ID, which is a randomly generated UUID,
|
||||
|
||||
Reference in New Issue
Block a user