From 6af01d2d6f216f7d284fc8ea940ae4ba64dfc3a0 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Wed, 9 Dec 2015 15:24:44 -0800 Subject: [PATCH 01/82] Muted the consul debug messages --- client/alloc_runner_test.go | 4 ++-- client/client.go | 2 +- client/consul.go | 18 +++++++++++++----- client/consul_test.go | 2 +- client/task_runner_test.go | 4 ++-- 5 files changed, 19 insertions(+), 11 deletions(-) diff --git a/client/alloc_runner_test.go b/client/alloc_runner_test.go index 6b49fcb53..8e0c2c33c 100644 --- a/client/alloc_runner_test.go +++ b/client/alloc_runner_test.go @@ -31,7 +31,7 @@ func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) { conf.AllocDir = os.TempDir() upd := &MockAllocStateUpdater{} alloc := mock.Alloc() - consulClient, _ := NewConsulService(logger, "127.0.0.1:8500", "", "", false, false) + consulClient, _ := NewConsulService(logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}) if !restarts { alloc.Job.Type = structs.JobTypeBatch *alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0} @@ -142,7 +142,7 @@ func TestAllocRunner_SaveRestoreState(t *testing.T) { } // Create a new alloc runner - consulClient, err := NewConsulService(ar.logger, "127.0.0.1:8500", "", "", false, false) + consulClient, err := NewConsulService(ar.logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}) ar2 := NewAllocRunner(ar.logger, ar.config, upd.Update, &structs.Allocation{ID: ar.alloc.ID}, consulClient) err = ar2.RestoreState() diff --git a/client/client.go b/client/client.go index c41b1b0ed..6f88f7144 100644 --- a/client/client.go +++ b/client/client.go @@ -157,7 +157,7 @@ func (c *Client) setupConsulService() error { auth := c.config.Read("consul.auth") enableSSL := c.config.ReadBoolDefault("consul.ssl", false) verifySSL := c.config.ReadBoolDefault("consul.verifyssl", true) - if consulService, err = NewConsulService(c.logger, addr, token, auth, enableSSL, verifySSL); err != nil { + if consulService, err = NewConsulService(c.logger, addr, token, auth, enableSSL, verifySSL, c.config.Node); err != nil { return err } c.consulService = consulService diff --git a/client/consul.go b/client/consul.go index 271918729..2eb54b1e1 100644 --- a/client/consul.go +++ b/client/consul.go @@ -72,6 +72,7 @@ type ConsulService struct { client consulApi logger *log.Logger shutdownCh chan struct{} + node *structs.Node trackedTasks map[string]*trackedTask serviceStates map[string]string @@ -80,7 +81,7 @@ type ConsulService struct { // A factory method to create new consul service func NewConsulService(logger *log.Logger, consulAddr string, token string, - auth string, enableSSL bool, verifySSL bool) (*ConsulService, error) { + auth string, enableSSL bool, verifySSL bool, node *structs.Node) (*ConsulService, error) { var err error var c *consul.Client cfg := consul.DefaultConfig() @@ -122,6 +123,7 @@ func NewConsulService(logger *log.Logger, consulAddr string, token string, consulService := ConsulService{ client: &consulApiClient{client: c}, logger: logger, + node: node, trackedTasks: make(map[string]*trackedTask), serviceStates: make(map[string]string), shutdownCh: make(chan struct{}), @@ -161,7 +163,7 @@ func (c *ConsulService) Deregister(task *structs.Task, allocID string) error { } c.logger.Printf("[INFO] consul: deregistering service %v with consul", service.Name) if err := c.deregisterService(service.Id); err != nil { - c.logger.Printf("[DEBUG] consul: error in deregistering service %v from consul", service.Name) + c.printDebugMessage("[DEBUG] consul: error in deregistering service %v from consul", service.Name) mErr.Errors = append(mErr.Errors, err) } } @@ -273,13 +275,13 @@ func (c *ConsulService) registerService(service *structs.Service, task *structs. } if err := c.client.ServiceRegister(asr); err != nil { - c.logger.Printf("[DEBUG] consul: error while registering service %v with consul: %v", service.Name, err) + c.printDebugMessage("[DEBUG] consul: error while registering service %v with consul: %v", service.Name, err) mErr.Errors = append(mErr.Errors, err) } for _, check := range service.Checks { cr := c.makeCheck(service, check, host, port) if err := c.registerCheck(cr); err != nil { - c.logger.Printf("[DEBUG] consul: error while registerting check %v with consul: %v", check.Name, err) + c.printDebugMessage("[DEBUG] consul: error while registerting check %v with consul: %v", check.Name, err) mErr.Errors = append(mErr.Errors, err) } @@ -289,7 +291,7 @@ func (c *ConsulService) registerService(service *structs.Service, task *structs. // registerCheck registers a check with Consul func (c *ConsulService) registerCheck(check *consul.AgentCheckRegistration) error { - c.logger.Printf("[INFO] consul: registering Check with ID: %v for service: %v", check.ID, check.ServiceID) + c.logger.Printf("[INFO] consul: registering check with ID: %v for service: %v", check.ID, check.ServiceID) return c.client.CheckRegister(check) } @@ -336,3 +338,9 @@ func (c *ConsulService) makeCheck(service *structs.Service, check *structs.Servi } return cr } + +func (c *ConsulService) printDebugMessage(message string, v ...interface{}) { + if _, ok := c.node.Attributes["consul.version"]; ok { + c.logger.Printf(message, v) + } +} diff --git a/client/consul_test.go b/client/consul_test.go index f756c078c..9cb38ede7 100644 --- a/client/consul_test.go +++ b/client/consul_test.go @@ -46,7 +46,7 @@ func (a *mockConsulApiClient) Checks() (map[string]*consul.AgentCheck, error) { func newConsulService() *ConsulService { logger := log.New(os.Stdout, "logger: ", log.Lshortfile) - c, _ := NewConsulService(logger, "", "", "", false, false) + c, _ := NewConsulService(logger, "", "", "", false, false, &structs.Node{}) c.client = &mockConsulApiClient{} return c } diff --git a/client/task_runner_test.go b/client/task_runner_test.go index 4557b6b55..96da69d4e 100644 --- a/client/task_runner_test.go +++ b/client/task_runner_test.go @@ -32,7 +32,7 @@ func testTaskRunner(restarts bool) (*MockTaskStateUpdater, *TaskRunner) { upd := &MockTaskStateUpdater{} alloc := mock.Alloc() task := alloc.Job.TaskGroups[0].Tasks[0] - consulClient, _ := NewConsulService(logger, "127.0.0.1:8500", "", "", false, false) + consulClient, _ := NewConsulService(logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}) // Initialize the port listing. This should be done by the offer process but // we have a mock so that doesn't happen. task.Resources.Networks[0].ReservedPorts = []structs.Port{{"", 80}} @@ -164,7 +164,7 @@ func TestTaskRunner_SaveRestoreState(t *testing.T) { } // Create a new task runner - consulClient, _ := NewConsulService(tr.logger, "127.0.0.1:8500", "", "", false, false) + consulClient, _ := NewConsulService(tr.logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}) tr2 := NewTaskRunner(tr.logger, tr.config, upd.Update, tr.ctx, tr.allocID, &structs.Task{Name: tr.task.Name}, tr.state, tr.restartTracker, consulClient) From deea0d6031f0518859cee9662120d271f9bffd3b Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 11 Dec 2015 09:01:28 -0800 Subject: [PATCH 02/82] Print consul debug messages only when the consul agent is available --- client/fingerprint/consul.go | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/client/fingerprint/consul.go b/client/fingerprint/consul.go index 8f826f912..047da3e73 100644 --- a/client/fingerprint/consul.go +++ b/client/fingerprint/consul.go @@ -12,15 +12,21 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) +const ( + consulAvailable = "consulavailable" + consulUnavailable = "consulunavailable" +) + // ConsulFingerprint is used to fingerprint the architecture type ConsulFingerprint struct { - logger *log.Logger - client *consul.Client + logger *log.Logger + client *consul.Client + lastState string } // NewConsulFingerprint is used to create an OS fingerprint func NewConsulFingerprint(logger *log.Logger) Fingerprint { - f := &ConsulFingerprint{logger: logger} + f := &ConsulFingerprint{logger: logger, lastState: consulUnavailable} return f } @@ -55,6 +61,13 @@ func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Nod if err != nil { // Clear any attributes set by a previous fingerprint. f.clearConsulAttributes(node) + + // Print a message indicating that the Consul Agent is not available + // anymore + if f.lastState == consulAvailable { + f.logger.Printf("[INFO] fingerprinter: consul agent is unavailable") + } + f.lastState = consulUnavailable return false, nil } @@ -68,6 +81,12 @@ func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Nod node.Attributes["consul.datacenter"], node.Attributes["consul.name"]) + // If the Consul Agent was previously unavailable print a message to + // indicate the Agent is available now + if f.lastState == consulUnavailable { + f.logger.Printf("[INFO] fingerprinter: consul agent is available") + } + f.lastState = consulAvailable return true, nil } From b3ed3e494271910b4ea39e6ef4a5375c05aceb38 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 11 Dec 2015 09:10:21 -0800 Subject: [PATCH 03/82] Changed the log line in consul fingerprinter --- client/fingerprint/consul.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client/fingerprint/consul.go b/client/fingerprint/consul.go index 047da3e73..04f711e27 100644 --- a/client/fingerprint/consul.go +++ b/client/fingerprint/consul.go @@ -65,7 +65,7 @@ func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Nod // Print a message indicating that the Consul Agent is not available // anymore if f.lastState == consulAvailable { - f.logger.Printf("[INFO] fingerprinter: consul agent is unavailable") + f.logger.Printf("[INFO] fingerprint.consul: consul agent is unavailable") } f.lastState = consulUnavailable return false, nil @@ -84,7 +84,7 @@ func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Nod // If the Consul Agent was previously unavailable print a message to // indicate the Agent is available now if f.lastState == consulUnavailable { - f.logger.Printf("[INFO] fingerprinter: consul agent is available") + f.logger.Printf("[INFO] fingerprt.consul: consul agent is available") } f.lastState = consulAvailable return true, nil From e1d2d5877a4d6410932e47546b43c77a94ad6dda Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 11 Dec 2015 09:22:47 -0800 Subject: [PATCH 04/82] Printing most of the consul messages only when the agent is available --- client/consul.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/client/consul.go b/client/consul.go index 2eb54b1e1..861387181 100644 --- a/client/consul.go +++ b/client/consul.go @@ -163,7 +163,7 @@ func (c *ConsulService) Deregister(task *structs.Task, allocID string) error { } c.logger.Printf("[INFO] consul: deregistering service %v with consul", service.Name) if err := c.deregisterService(service.Id); err != nil { - c.printDebugMessage("[DEBUG] consul: error in deregistering service %v from consul", service.Name) + c.printLogMessage("[DEBUG] consul: error in deregistering service %v from consul", service.Name) mErr.Errors = append(mErr.Errors, err) } } @@ -209,14 +209,14 @@ func (c *ConsulService) performSync() { // Add new services which Consul agent isn't aware of knownServices[service.Id] = struct{}{} if _, ok := consulServices[service.Id]; !ok { - c.logger.Printf("[INFO] consul: registering service %s with consul.", service.Name) + c.printLogMessage("[INFO] consul: registering service %s with consul.", service.Name) c.registerService(service, trackedTask.task, trackedTask.allocID) continue } // If a service has changed, re-register it with Consul agent if service.Hash() != c.serviceStates[service.Id] { - c.logger.Printf("[INFO] consul: reregistering service %s with consul.", service.Name) + c.printLogMessage("[INFO] consul: reregistering service %s with consul.", service.Name) c.registerService(service, trackedTask.task, trackedTask.allocID) continue } @@ -244,7 +244,7 @@ func (c *ConsulService) performSync() { for _, consulService := range consulServices { if _, ok := knownServices[consulService.ID]; !ok { delete(c.serviceStates, consulService.ID) - c.logger.Printf("[INFO] consul: deregistering service %v with consul", consulService.Service) + c.printLogMessage("[INFO] consul: deregistering service %v with consul", consulService.Service) c.deregisterService(consulService.ID) } } @@ -275,13 +275,13 @@ func (c *ConsulService) registerService(service *structs.Service, task *structs. } if err := c.client.ServiceRegister(asr); err != nil { - c.printDebugMessage("[DEBUG] consul: error while registering service %v with consul: %v", service.Name, err) + c.printLogMessage("[DEBUG] consul: error while registering service %v with consul: %v", service.Name, err) mErr.Errors = append(mErr.Errors, err) } for _, check := range service.Checks { cr := c.makeCheck(service, check, host, port) if err := c.registerCheck(cr); err != nil { - c.printDebugMessage("[DEBUG] consul: error while registerting check %v with consul: %v", check.Name, err) + c.printLogMessage("[DEBUG] consul: error while registerting check %v with consul: %v", check.Name, err) mErr.Errors = append(mErr.Errors, err) } @@ -291,13 +291,13 @@ func (c *ConsulService) registerService(service *structs.Service, task *structs. // registerCheck registers a check with Consul func (c *ConsulService) registerCheck(check *consul.AgentCheckRegistration) error { - c.logger.Printf("[INFO] consul: registering check with ID: %v for service: %v", check.ID, check.ServiceID) + c.printLogMessage("[INFO] consul: registering check with ID: %v for service: %v", check.ID, check.ServiceID) return c.client.CheckRegister(check) } // deregisterCheck de-registers a check with a specific ID from Consul func (c *ConsulService) deregisterCheck(checkID string) error { - c.logger.Printf("[INFO] consul: removing check with ID: %v", checkID) + c.printLogMessage("[INFO] consul: removing check with ID: %v", checkID) return c.client.CheckDeregister(checkID) } @@ -339,7 +339,7 @@ func (c *ConsulService) makeCheck(service *structs.Service, check *structs.Servi return cr } -func (c *ConsulService) printDebugMessage(message string, v ...interface{}) { +func (c *ConsulService) printLogMessage(message string, v ...interface{}) { if _, ok := c.node.Attributes["consul.version"]; ok { c.logger.Printf(message, v) } From 2bcab7cc0c3ea3d7d5e3be9a191c9b58e8b69269 Mon Sep 17 00:00:00 2001 From: Chris Hines Date: Fri, 11 Dec 2015 15:30:53 -0500 Subject: [PATCH 05/82] Avoid leaking a time.Ticker. --- client/driver/spawn/spawn.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/client/driver/spawn/spawn.go b/client/driver/spawn/spawn.go index 5c7795f38..5b3200984 100644 --- a/client/driver/spawn/spawn.go +++ b/client/driver/spawn/spawn.go @@ -251,7 +251,9 @@ func (s *Spawner) pollWait() *structs.WaitResult { } // Read after the process exits. - for _ = range time.Tick(5 * time.Second) { + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + for range ticker.C { if !s.Alive() { break } From 42b4a340461faf0df9d56b4b62bed600a99d51fd Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 11 Dec 2015 13:10:20 -0800 Subject: [PATCH 06/82] Not continuing sync if we couldn't get services and checks from consul --- client/consul.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/client/consul.go b/client/consul.go index 861387181..3440064c7 100644 --- a/client/consul.go +++ b/client/consul.go @@ -195,8 +195,14 @@ func (c *ConsulService) SyncWithConsul() { // services which are no longer present in tasks func (c *ConsulService) performSync() { // Get the list of the services and that Consul knows about - consulServices, _ := c.client.Services() - consulChecks, _ := c.client.Checks() + consulServices, err := c.client.Services() + if err != nil { + return + } + consulChecks, err := c.client.Checks() + if err != nil { + return + } delete(consulServices, "consul") knownChecks := make(map[string]struct{}) From c9277e27cf0c24d9abe77f1f0192090fbd47c5a9 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 11 Dec 2015 13:18:04 -0800 Subject: [PATCH 07/82] Renaming constants --- client/fingerprint/consul.go | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/client/fingerprint/consul.go b/client/fingerprint/consul.go index 04f711e27..873ec7289 100644 --- a/client/fingerprint/consul.go +++ b/client/fingerprint/consul.go @@ -13,8 +13,8 @@ import ( ) const ( - consulAvailable = "consulavailable" - consulUnavailable = "consulunavailable" + available = "available" + unavailable = "unavailable" ) // ConsulFingerprint is used to fingerprint the architecture @@ -26,8 +26,7 @@ type ConsulFingerprint struct { // NewConsulFingerprint is used to create an OS fingerprint func NewConsulFingerprint(logger *log.Logger) Fingerprint { - f := &ConsulFingerprint{logger: logger, lastState: consulUnavailable} - return f + return &ConsulFingerprint{logger: logger, lastState: unavailable} } func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Node) (bool, error) { @@ -64,10 +63,10 @@ func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Nod // Print a message indicating that the Consul Agent is not available // anymore - if f.lastState == consulAvailable { + if f.lastState == available { f.logger.Printf("[INFO] fingerprint.consul: consul agent is unavailable") } - f.lastState = consulUnavailable + f.lastState = unavailable return false, nil } @@ -83,10 +82,10 @@ func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Nod // If the Consul Agent was previously unavailable print a message to // indicate the Agent is available now - if f.lastState == consulUnavailable { - f.logger.Printf("[INFO] fingerprt.consul: consul agent is available") + if f.lastState == unavailable { + f.logger.Printf("[INFO] fingerprint.consul: consul agent is available") } - f.lastState = consulAvailable + f.lastState = available return true, nil } From 83c4650c945a2a6c6dd27be79bbad9670ae18d3c Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 11 Dec 2015 13:47:35 -0800 Subject: [PATCH 08/82] Renaming constants --- client/fingerprint/consul.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/client/fingerprint/consul.go b/client/fingerprint/consul.go index 873ec7289..1603fb2ca 100644 --- a/client/fingerprint/consul.go +++ b/client/fingerprint/consul.go @@ -13,8 +13,8 @@ import ( ) const ( - available = "available" - unavailable = "unavailable" + consulAvailable = "available" + consulUnavailable = "unavailable" ) // ConsulFingerprint is used to fingerprint the architecture @@ -26,7 +26,7 @@ type ConsulFingerprint struct { // NewConsulFingerprint is used to create an OS fingerprint func NewConsulFingerprint(logger *log.Logger) Fingerprint { - return &ConsulFingerprint{logger: logger, lastState: unavailable} + return &ConsulFingerprint{logger: logger, lastState: consulUnavailable} } func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Node) (bool, error) { @@ -63,10 +63,10 @@ func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Nod // Print a message indicating that the Consul Agent is not available // anymore - if f.lastState == available { + if f.lastState == consulAvailable { f.logger.Printf("[INFO] fingerprint.consul: consul agent is unavailable") } - f.lastState = unavailable + f.lastState = consulUnavailable return false, nil } @@ -82,10 +82,10 @@ func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Nod // If the Consul Agent was previously unavailable print a message to // indicate the Agent is available now - if f.lastState == unavailable { + if f.lastState == consulUnavailable { f.logger.Printf("[INFO] fingerprint.consul: consul agent is available") } - f.lastState = available + f.lastState = consulAvailable return true, nil } From f182c137297342128a676abd8e0dce9531339f9f Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 11 Dec 2015 10:08:57 -0800 Subject: [PATCH 09/82] Deregister services and checks which are managed by Nomad --- client/consul.go | 38 +++++++++++++++++++++++++++++++++++--- client/consul_test.go | 3 +++ nomad/structs/structs.go | 2 +- 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/client/consul.go b/client/consul.go index 3440064c7..abe915fa8 100644 --- a/client/consul.go +++ b/client/consul.go @@ -195,15 +195,18 @@ func (c *ConsulService) SyncWithConsul() { // services which are no longer present in tasks func (c *ConsulService) performSync() { // Get the list of the services and that Consul knows about - consulServices, err := c.client.Services() + srvcs, err := c.client.Services() if err != nil { return } - consulChecks, err := c.client.Checks() + chks, err := c.client.Checks() if err != nil { return } - delete(consulServices, "consul") + + // Filter the services and checks that isn't managed by consul + consulServices := c.filterConsulServices(srvcs) + consulChecks := c.filterConsulChecks(chks) knownChecks := make(map[string]struct{}) knownServices := make(map[string]struct{}) @@ -345,6 +348,35 @@ func (c *ConsulService) makeCheck(service *structs.Service, check *structs.Servi return cr } +// filterConsulServices prunes out all the service whose ids are not prefixed +// with nomad- +func (c *ConsulService) filterConsulServices(srvcs map[string]*consul.AgentService) map[string]*consul.AgentService { + nomadServices := make(map[string]*consul.AgentService) + delete(srvcs, "consul") + for _, srv := range srvcs { + if strings.HasPrefix(srv.ID, "nomad-") { + nomadServices[srv.ID] = srv + } + } + return nomadServices + +} + +// filterConsulChecks prunes out all the consul checks which do not have +// services with id prefixed with noamd- +func (c *ConsulService) filterConsulChecks(chks map[string]*consul.AgentCheck) map[string]*consul.AgentCheck { + nomadChecks := make(map[string]*consul.AgentCheck) + for _, chk := range chks { + if strings.HasPrefix(chk.ServiceID, "nomad-") { + nomadChecks[chk.CheckID] = chk + } + } + return nomadChecks + +} + +// printLogMessage prints log messages only when the node attributes have consul +// related information func (c *ConsulService) printLogMessage(message string, v ...interface{}) { if _, ok := c.node.Attributes["consul.version"]; ok { c.logger.Printf(message, v) diff --git a/client/consul_test.go b/client/consul_test.go index 9cb38ede7..87c8143bb 100644 --- a/client/consul_test.go +++ b/client/consul_test.go @@ -278,3 +278,6 @@ func TestConsul_ModifyCheck(t *testing.T) { t.Fatalf("Expected number of check registrations: %v, Actual: %v", 2, apiClient.checkRegisterCallCount) } } + +func TestConsul_FilterNomadServicesAndChecks(t *testing.T) { +} diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index c817b6672..bfa4012d1 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1157,7 +1157,7 @@ type Service struct { // InitFields interpolates values of Job, Task Group and Task in the Service // Name. This also generates check names, service id and check ids. func (s *Service) InitFields(job string, taskGroup string, task string) { - s.Id = GenerateUUID() + s.Id = fmt.Sprintf("nomad-%s", GenerateUUID()) s.Name = args.ReplaceEnv(s.Name, map[string]string{ "JOB": job, "TASKGROUP": taskGroup, From df14c98b3f7f23d830cfe3cb983a604eb882ac84 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 11 Dec 2015 10:47:08 -0800 Subject: [PATCH 10/82] Added a test for the filtering logic of service and clients --- client/consul_test.go | 53 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/client/consul_test.go b/client/consul_test.go index 87c8143bb..ca56263ef 100644 --- a/client/consul_test.go +++ b/client/consul_test.go @@ -280,4 +280,57 @@ func TestConsul_ModifyCheck(t *testing.T) { } func TestConsul_FilterNomadServicesAndChecks(t *testing.T) { + c := newConsulService() + srvs := map[string]*consul.AgentService{ + "foo-bar": { + ID: "foo-bar", + Service: "http-frontend", + Tags: []string{"global"}, + Port: 8080, + Address: "10.10.1.11", + }, + "nomad-2121212": { + ID: "nomad-2121212", + Service: "identity-service", + Tags: []string{"global"}, + Port: 8080, + Address: "10.10.1.11", + }, + } + + nomadServices := c.filterConsulServices(srvs) + + if len(nomadServices) != 1 { + t.Fatalf("Expected number of services: %v, Actual: %v", 1, len(nomadServices)) + } + + nomadServices = c.filterConsulServices(nil) + if len(nomadServices) != 0 { + t.Fatalf("Expected number of services: %v, Actual: %v", 0, len(nomadServices)) + } + + chks := map[string]*consul.AgentCheck{ + "foo-bar-chk": { + CheckID: "foo-bar-chk", + ServiceID: "foo-bar", + Name: "alive", + }, + "212121212": { + CheckID: "212121212", + ServiceID: "nomad-2121212", + Name: "ping", + }, + } + + nomadChecks := c.filterConsulChecks(chks) + + if len(nomadChecks) != 1 { + t.Fatalf("Expected number of checks: %v, Actual: %v", 1, len(nomadChecks)) + } + + nomadChecks = c.filterConsulChecks(nil) + if len(nomadChecks) != 0 { + t.Fatalf("Expected number of checks: %v, Actual: %v", 0, len(nomadChecks)) + } + } From 568006247d7347bc571293cd1fa7bd75705ef9d2 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 11 Dec 2015 11:02:23 -0800 Subject: [PATCH 11/82] Making a struct to hold consul service config --- client/alloc_runner_test.go | 4 ++-- client/client.go | 11 ++++++++++- client/consul.go | 35 ++++++++++++++++++++++------------- client/consul_test.go | 2 +- client/task_runner_test.go | 4 ++-- 5 files changed, 37 insertions(+), 19 deletions(-) diff --git a/client/alloc_runner_test.go b/client/alloc_runner_test.go index 8e0c2c33c..c0bb369a1 100644 --- a/client/alloc_runner_test.go +++ b/client/alloc_runner_test.go @@ -31,7 +31,7 @@ func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) { conf.AllocDir = os.TempDir() upd := &MockAllocStateUpdater{} alloc := mock.Alloc() - consulClient, _ := NewConsulService(logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}) + consulClient, _ := NewConsulService(&consulServiceConfig{logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}}) if !restarts { alloc.Job.Type = structs.JobTypeBatch *alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0} @@ -142,7 +142,7 @@ func TestAllocRunner_SaveRestoreState(t *testing.T) { } // Create a new alloc runner - consulClient, err := NewConsulService(ar.logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}) + consulClient, err := NewConsulService(&consulServiceConfig{ar.logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}}) ar2 := NewAllocRunner(ar.logger, ar.config, upd.Update, &structs.Allocation{ID: ar.alloc.ID}, consulClient) err = ar2.RestoreState() diff --git a/client/client.go b/client/client.go index 6f88f7144..b371b5df9 100644 --- a/client/client.go +++ b/client/client.go @@ -157,7 +157,16 @@ func (c *Client) setupConsulService() error { auth := c.config.Read("consul.auth") enableSSL := c.config.ReadBoolDefault("consul.ssl", false) verifySSL := c.config.ReadBoolDefault("consul.verifyssl", true) - if consulService, err = NewConsulService(c.logger, addr, token, auth, enableSSL, verifySSL, c.config.Node); err != nil { + consulServiceCfg := &consulServiceConfig{ + logger: c.logger, + consulAddr: addr, + token: token, + auth: auth, + enableSSL: enableSSL, + verifySSL: verifySSL, + node: c.config.Node, + } + if consulService, err = NewConsulService(consulServiceCfg); err != nil { return err } c.consulService = consulService diff --git a/client/consul.go b/client/consul.go index abe915fa8..6abc9469f 100644 --- a/client/consul.go +++ b/client/consul.go @@ -79,25 +79,34 @@ type ConsulService struct { trackedTskLock sync.Mutex } +type consulServiceConfig struct { + logger *log.Logger + consulAddr string + token string + auth string + enableSSL bool + verifySSL bool + node *structs.Node +} + // A factory method to create new consul service -func NewConsulService(logger *log.Logger, consulAddr string, token string, - auth string, enableSSL bool, verifySSL bool, node *structs.Node) (*ConsulService, error) { +func NewConsulService(config *consulServiceConfig) (*ConsulService, error) { var err error var c *consul.Client cfg := consul.DefaultConfig() - cfg.Address = consulAddr - if token != "" { - cfg.Token = token + cfg.Address = config.consulAddr + if config.token != "" { + cfg.Token = config.token } - if auth != "" { + if config.auth != "" { var username, password string - if strings.Contains(auth, ":") { - split := strings.SplitN(auth, ":", 2) + if strings.Contains(config.auth, ":") { + split := strings.SplitN(config.auth, ":", 2) username = split[0] password = split[1] } else { - username = auth + username = config.auth } cfg.HttpAuth = &consul.HttpBasicAuth{ @@ -105,10 +114,10 @@ func NewConsulService(logger *log.Logger, consulAddr string, token string, Password: password, } } - if enableSSL { + if config.enableSSL { cfg.Scheme = "https" } - if enableSSL && !verifySSL { + if config.enableSSL && !config.verifySSL { cfg.HttpClient.Transport = &http.Transport{ TLSClientConfig: &tls.Config{ InsecureSkipVerify: true, @@ -122,8 +131,8 @@ func NewConsulService(logger *log.Logger, consulAddr string, token string, consulService := ConsulService{ client: &consulApiClient{client: c}, - logger: logger, - node: node, + logger: config.logger, + node: config.node, trackedTasks: make(map[string]*trackedTask), serviceStates: make(map[string]string), shutdownCh: make(chan struct{}), diff --git a/client/consul_test.go b/client/consul_test.go index ca56263ef..efb7c0c98 100644 --- a/client/consul_test.go +++ b/client/consul_test.go @@ -46,7 +46,7 @@ func (a *mockConsulApiClient) Checks() (map[string]*consul.AgentCheck, error) { func newConsulService() *ConsulService { logger := log.New(os.Stdout, "logger: ", log.Lshortfile) - c, _ := NewConsulService(logger, "", "", "", false, false, &structs.Node{}) + c, _ := NewConsulService(&consulServiceConfig{logger, "", "", "", false, false, &structs.Node{}}) c.client = &mockConsulApiClient{} return c } diff --git a/client/task_runner_test.go b/client/task_runner_test.go index 96da69d4e..dcdc84577 100644 --- a/client/task_runner_test.go +++ b/client/task_runner_test.go @@ -32,7 +32,7 @@ func testTaskRunner(restarts bool) (*MockTaskStateUpdater, *TaskRunner) { upd := &MockTaskStateUpdater{} alloc := mock.Alloc() task := alloc.Job.TaskGroups[0].Tasks[0] - consulClient, _ := NewConsulService(logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}) + consulClient, _ := NewConsulService(&consulServiceConfig{logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}}) // Initialize the port listing. This should be done by the offer process but // we have a mock so that doesn't happen. task.Resources.Networks[0].ReservedPorts = []structs.Port{{"", 80}} @@ -164,7 +164,7 @@ func TestTaskRunner_SaveRestoreState(t *testing.T) { } // Create a new task runner - consulClient, _ := NewConsulService(tr.logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}) + consulClient, _ := NewConsulService(&consulServiceConfig{tr.logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}}) tr2 := NewTaskRunner(tr.logger, tr.config, upd.Update, tr.ctx, tr.allocID, &structs.Task{Name: tr.task.Name}, tr.state, tr.restartTracker, consulClient) From 45bdd1c5e57bdf124423fcf22eb2ee55dfb1efc2 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 11 Dec 2015 13:07:37 -0800 Subject: [PATCH 12/82] Removing extra lines --- client/consul_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/client/consul_test.go b/client/consul_test.go index efb7c0c98..e75aee74e 100644 --- a/client/consul_test.go +++ b/client/consul_test.go @@ -299,7 +299,6 @@ func TestConsul_FilterNomadServicesAndChecks(t *testing.T) { } nomadServices := c.filterConsulServices(srvs) - if len(nomadServices) != 1 { t.Fatalf("Expected number of services: %v, Actual: %v", 1, len(nomadServices)) } @@ -323,7 +322,6 @@ func TestConsul_FilterNomadServicesAndChecks(t *testing.T) { } nomadChecks := c.filterConsulChecks(chks) - if len(nomadChecks) != 1 { t.Fatalf("Expected number of checks: %v, Actual: %v", 1, len(nomadChecks)) } From 519dc4c65b9cd5d237334a9c8017936b6ac34e5d Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 11 Dec 2015 14:02:09 -0800 Subject: [PATCH 13/82] Exctracted nomad- to a constant --- client/consul.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client/consul.go b/client/consul.go index 6abc9469f..9ac123ebc 100644 --- a/client/consul.go +++ b/client/consul.go @@ -363,7 +363,7 @@ func (c *ConsulService) filterConsulServices(srvcs map[string]*consul.AgentServi nomadServices := make(map[string]*consul.AgentService) delete(srvcs, "consul") for _, srv := range srvcs { - if strings.HasPrefix(srv.ID, "nomad-") { + if strings.HasPrefix(srv.ID, structs.NomadConsulPrefix) { nomadServices[srv.ID] = srv } } @@ -376,7 +376,7 @@ func (c *ConsulService) filterConsulServices(srvcs map[string]*consul.AgentServi func (c *ConsulService) filterConsulChecks(chks map[string]*consul.AgentCheck) map[string]*consul.AgentCheck { nomadChecks := make(map[string]*consul.AgentCheck) for _, chk := range chks { - if strings.HasPrefix(chk.ServiceID, "nomad-") { + if strings.HasPrefix(chk.ServiceID, structs.NomadConsulPrefix) { nomadChecks[chk.CheckID] = chk } } From 57ff1a3009f7286f6dee9f25e4ed56f34a2012d9 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 11 Dec 2015 14:03:52 -0800 Subject: [PATCH 14/82] Adding the changes to structs --- nomad/structs/structs.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index bfa4012d1..2c91d5f93 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1145,6 +1145,10 @@ func (sc *ServiceCheck) Hash(serviceId string) string { return fmt.Sprintf("%x", h.Sum(nil)) } +const ( + NomadConsulPrefix = "nomad" +) + // The Service model represents a Consul service defintion type Service struct { Id string // Id of the service, this needs to be unique on a local machine @@ -1157,7 +1161,7 @@ type Service struct { // InitFields interpolates values of Job, Task Group and Task in the Service // Name. This also generates check names, service id and check ids. func (s *Service) InitFields(job string, taskGroup string, task string) { - s.Id = fmt.Sprintf("nomad-%s", GenerateUUID()) + s.Id = fmt.Sprintf("%s-%s", NomadConsulPrefix, GenerateUUID()) s.Name = args.ReplaceEnv(s.Name, map[string]string{ "JOB": job, "TASKGROUP": taskGroup, From 6a1e6159919097fcbd9cd8fa678684220297e137 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 11 Dec 2015 14:14:04 -0800 Subject: [PATCH 15/82] Refactored test and added some comments --- client/consul_test.go | 27 +++++++++++++++++++++++++-- nomad/structs/structs.go | 3 +++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/client/consul_test.go b/client/consul_test.go index e75aee74e..e7bb8012e 100644 --- a/client/consul_test.go +++ b/client/consul_test.go @@ -5,6 +5,7 @@ import ( "github.com/hashicorp/nomad/nomad/structs" "log" "os" + "reflect" "testing" "time" ) @@ -298,9 +299,19 @@ func TestConsul_FilterNomadServicesAndChecks(t *testing.T) { }, } + expSrvcs := map[string]*consul.AgentService{ + "nomad-2121212": { + ID: "nomad-2121212", + Service: "identity-service", + Tags: []string{"global"}, + Port: 8080, + Address: "10.10.1.11", + }, + } + nomadServices := c.filterConsulServices(srvs) - if len(nomadServices) != 1 { - t.Fatalf("Expected number of services: %v, Actual: %v", 1, len(nomadServices)) + if !reflect.DeepEqual(expSrvcs, nomadServices) { + t.Fatalf("Expected: %v, Actual: %v", expSrvcs, nomadServices) } nomadServices = c.filterConsulServices(nil) @@ -321,7 +332,19 @@ func TestConsul_FilterNomadServicesAndChecks(t *testing.T) { }, } + expChks := map[string]*consul.AgentCheck{ + "212121212": { + CheckID: "212121212", + ServiceID: "nomad-2121212", + Name: "ping", + }, + } + nomadChecks := c.filterConsulChecks(chks) + if !reflect.DeepEqual(expChks, nomadChecks) { + t.Fatalf("Expected: %v, Actual: %v", expChks, nomadChecks) + } + if len(nomadChecks) != 1 { t.Fatalf("Expected number of checks: %v, Actual: %v", 1, len(nomadChecks)) } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 2c91d5f93..84a171e67 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1161,6 +1161,9 @@ type Service struct { // InitFields interpolates values of Job, Task Group and Task in the Service // Name. This also generates check names, service id and check ids. func (s *Service) InitFields(job string, taskGroup string, task string) { + // We add a prefix to the Service ID so that we can know that this service + // is managed by Consul since Consul can also have service which are not + // managed by Nomad s.Id = fmt.Sprintf("%s-%s", NomadConsulPrefix, GenerateUUID()) s.Name = args.ReplaceEnv(s.Name, map[string]string{ "JOB": job, From 6654b4a9360293ca55034232927b0821303c229c Mon Sep 17 00:00:00 2001 From: Chris Bednarski Date: Fri, 11 Dec 2015 14:38:21 -0800 Subject: [PATCH 16/82] Updated changelog for 0.2.2 --- CHANGELOG.md | 18 ++++++++++++------ version.go | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 360358fe3..c6e6d231b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,19 @@ -## 0.3.0 +## 0.2.2 (Unreleased) IMPROVEMENTS: - * Server join/retry-join command line and config options [GH-527] - * Enable raw_exec driver in dev mode [GH-558] + * core: Enable raw_exec driver in dev mode [GH-558] + * cli: Server join/retry-join command line and config options [GH-527] + * cli: Nomad reports which config files are loaded at start time, or if none are loaded [GH-536], [GH-553 ] BUG FIXES: - * Shutdown a task now sends the interrupt signal first to the process before - forcefully killing it. [GH-543] - * Docker driver no longer leaks unix domain socket connections [GH-556] + * core: Send syslog to `LOCAL0` by default as previously documented [GH-547] + * consul: Nomad is less noisy when Consul is not running. [GH-567] + * consul: Nomad only deregisters services that it created. [GH-568] + * driver/docker: Docker driver no longer leaks unix domain socket connections + [GH-556] + * driver/exec: Shutdown a task now sends the interrupt signal first to the + process before forcefully killing it. [GH-543] + * fingerprint/network: Now correctly detects interfaces on Windows [GH-382] ## 0.2.1 diff --git a/version.go b/version.go index c2fba56b0..87d04a0a5 100644 --- a/version.go +++ b/version.go @@ -5,7 +5,7 @@ var GitCommit string var GitDescribe string // The main version number that is being run at the moment. -const Version = "0.3.0" +const Version = "0.2.2" // A pre-release marker for the version. If this is "" (empty string) // then it means that it is a final release. Otherwise, this is a pre-release From 598b6c364def6b61879b4243e73edcea55c58d77 Mon Sep 17 00:00:00 2001 From: Chris Bednarski Date: Fri, 11 Dec 2015 14:38:45 -0800 Subject: [PATCH 17/82] Formatting --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6e6d231b..b92481ec9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,8 @@ IMPROVEMENTS: * core: Enable raw_exec driver in dev mode [GH-558] * cli: Server join/retry-join command line and config options [GH-527] - * cli: Nomad reports which config files are loaded at start time, or if none are loaded [GH-536], [GH-553 ] + * cli: Nomad reports which config files are loaded at start time, or if none + are loaded [GH-536], [GH-553 ] BUG FIXES: * core: Send syslog to `LOCAL0` by default as previously documented [GH-547] From 74176ed3b14b1e32fb41a0c42e0c2053b374d52e Mon Sep 17 00:00:00 2001 From: Chris Bednarski Date: Fri, 11 Dec 2015 14:40:23 -0800 Subject: [PATCH 18/82] Added 0.2.1 release date to changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b92481ec9..fff19f120 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,7 @@ BUG FIXES: process before forcefully killing it. [GH-543] * fingerprint/network: Now correctly detects interfaces on Windows [GH-382] -## 0.2.1 +## 0.2.1 (November 28, 2015) IMPROVEMENTS: From 45e9337550f47146b560ddb9840d4a7fb23351de Mon Sep 17 00:00:00 2001 From: Chris Bednarski Date: Fri, 11 Dec 2015 14:41:45 -0800 Subject: [PATCH 19/82] Formatting tweaks --- CHANGELOG.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fff19f120..95fd0534f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,15 +1,15 @@ ## 0.2.2 (Unreleased) IMPROVEMENTS: - * core: Enable raw_exec driver in dev mode [GH-558] + * core: Enable `raw_exec` driver in dev mode [GH-558] * cli: Server join/retry-join command line and config options [GH-527] * cli: Nomad reports which config files are loaded at start time, or if none - are loaded [GH-536], [GH-553 ] + are loaded [GH-536], [GH-553] BUG FIXES: * core: Send syslog to `LOCAL0` by default as previously documented [GH-547] - * consul: Nomad is less noisy when Consul is not running. [GH-567] - * consul: Nomad only deregisters services that it created. [GH-568] + * consul: Nomad is less noisy when Consul is not running [GH-567] + * consul: Nomad only deregisters services that it created [GH-568] * driver/docker: Docker driver no longer leaks unix domain socket connections [GH-556] * driver/exec: Shutdown a task now sends the interrupt signal first to the From d536fe9027c01aea2899be14d29d587497a41873 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 11 Dec 2015 15:02:13 -0800 Subject: [PATCH 20/82] Remove all calls to the default logger --- client/driver/docker.go | 26 +++++++++++++------------- client/fingerprint/env_aws.go | 8 ++++---- command/agent/command_test.go | 3 +-- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/client/driver/docker.go b/client/driver/docker.go index 25c6874fb..330acd75c 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -455,27 +455,27 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle }, }) if err != nil { - log.Printf("[ERR] driver.docker: failed to query list of containers matching name:%s", config.Name) + d.logger.Printf("[ERR] driver.docker: failed to query list of containers matching name:%s", config.Name) return nil, fmt.Errorf("Failed to query list of containers: %s", err) } if len(containers) != 1 { - log.Printf("[ERR] driver.docker: failed to get id for container %s", config.Name) + d.logger.Printf("[ERR] driver.docker: failed to get id for container %s", config.Name) return nil, fmt.Errorf("Failed to get id for container %s", config.Name) } - log.Printf("[INFO] driver.docker: a container with the name %s already exists; will attempt to purge and re-create", config.Name) + d.logger.Printf("[INFO] driver.docker: a container with the name %s already exists; will attempt to purge and re-create", config.Name) err = client.RemoveContainer(docker.RemoveContainerOptions{ ID: containers[0].ID, }) if err != nil { - log.Printf("[ERR] driver.docker: failed to purge container %s", config.Name) + d.logger.Printf("[ERR] driver.docker: failed to purge container %s", config.Name) return nil, fmt.Errorf("Failed to purge container %s: %s", config.Name, err) } - log.Printf("[INFO] driver.docker: purged container %s", config.Name) + d.logger.Printf("[INFO] driver.docker: purged container %s", config.Name) container, err = client.CreateContainer(config) if err != nil { - log.Printf("[ERR] driver.docker: failed to re-create container %s; aborting", config.Name) + d.logger.Printf("[ERR] driver.docker: failed to re-create container %s; aborting", config.Name) return nil, fmt.Errorf("Failed to re-create container %s; aborting", config.Name) } } else { @@ -593,10 +593,10 @@ func (h *DockerHandle) Kill() error { // Stop the container err := h.client.StopContainer(h.containerID, 5) if err != nil { - log.Printf("[ERR] driver.docker: failed to stop container %s", h.containerID) + h.logger.Printf("[ERR] driver.docker: failed to stop container %s", h.containerID) return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err) } - log.Printf("[INFO] driver.docker: stopped container %s", h.containerID) + h.logger.Printf("[INFO] driver.docker: stopped container %s", h.containerID) // Cleanup container if h.cleanupContainer { @@ -605,10 +605,10 @@ func (h *DockerHandle) Kill() error { RemoveVolumes: true, }) if err != nil { - log.Printf("[ERR] driver.docker: failed to remove container %s", h.containerID) + h.logger.Printf("[ERR] driver.docker: failed to remove container %s", h.containerID) return fmt.Errorf("Failed to remove container %s: %s", h.containerID, err) } - log.Printf("[INFO] driver.docker: removed container %s", h.containerID) + h.logger.Printf("[INFO] driver.docker: removed container %s", h.containerID) } // Cleanup image. This operation may fail if the image is in use by another @@ -624,17 +624,17 @@ func (h *DockerHandle) Kill() error { }, }) if err != nil { - log.Printf("[ERR] driver.docker: failed to query list of containers matching image:%s", h.imageID) + h.logger.Printf("[ERR] driver.docker: failed to query list of containers matching image:%s", h.imageID) return fmt.Errorf("Failed to query list of containers: %s", err) } inUse := len(containers) if inUse > 0 { - log.Printf("[INFO] driver.docker: image %s is still in use by %d container(s)", h.imageID, inUse) + h.logger.Printf("[INFO] driver.docker: image %s is still in use by %d container(s)", h.imageID, inUse) } else { return fmt.Errorf("Failed to remove image %s", h.imageID) } } else { - log.Printf("[INFO] driver.docker: removed image %s", h.imageID) + h.logger.Printf("[INFO] driver.docker: removed image %s", h.imageID) } } return nil diff --git a/client/fingerprint/env_aws.go b/client/fingerprint/env_aws.go index ca50a76c3..1963f6c50 100644 --- a/client/fingerprint/env_aws.go +++ b/client/fingerprint/env_aws.go @@ -80,7 +80,7 @@ func NewEnvAWSFingerprint(logger *log.Logger) Fingerprint { } func (f *EnvAWSFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { - if !isAWS() { + if !f.isAWS() { return false, nil } @@ -161,7 +161,7 @@ func (f *EnvAWSFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) return true, nil } -func isAWS() bool { +func (f *EnvAWSFingerprint) isAWS() bool { // Read the internal metadata URL from the environment, allowing test files to // provide their own metadataURL := os.Getenv("AWS_ENV_URL") @@ -178,7 +178,7 @@ func isAWS() bool { // Query the metadata url for the ami-id, to veryify we're on AWS resp, err := client.Get(metadataURL + "ami-id") if err != nil { - log.Printf("[DEBUG] fingerprint.env_aws: Error querying AWS Metadata URL, skipping") + f.logger.Printf("[DEBUG] fingerprint.env_aws: Error querying AWS Metadata URL, skipping") return false } defer resp.Body.Close() @@ -190,7 +190,7 @@ func isAWS() bool { instanceID, err := ioutil.ReadAll(resp.Body) if err != nil { - log.Printf("[DEBUG] fingerprint.env_aws: Error reading AWS Instance ID, skipping") + f.logger.Printf("[DEBUG] fingerprint.env_aws: Error reading AWS Instance ID, skipping") return false } diff --git a/command/agent/command_test.go b/command/agent/command_test.go index a42261853..9e6d750ef 100644 --- a/command/agent/command_test.go +++ b/command/agent/command_test.go @@ -3,7 +3,6 @@ package agent import ( "fmt" "io/ioutil" - "log" "os" "strings" "testing" @@ -112,7 +111,7 @@ func TestRetryJoin(t *testing.T) { go func() { if code := cmd.Run(args); code != 0 { - log.Printf("bad: %d", code) + t.Logf("bad: %d", code) } close(doneCh) }() From 80452132db8d2230ff538b38d06e4dabc8b976d4 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 11 Dec 2015 15:09:09 -0800 Subject: [PATCH 21/82] Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 95fd0534f..7d4f4133e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ BUG FIXES: * driver/exec: Shutdown a task now sends the interrupt signal first to the process before forcefully killing it. [GH-543] * fingerprint/network: Now correctly detects interfaces on Windows [GH-382] + * client: remove all calls to default logger [GH-570] ## 0.2.1 (November 28, 2015) From 7d3e21fcb3c14be7c581033ee51d5e86a6d321cb Mon Sep 17 00:00:00 2001 From: Chris Bednarski Date: Fri, 11 Dec 2015 15:20:32 -0800 Subject: [PATCH 22/82] Add lint check for log.Printf usage --- Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 3f8362668..82d9df435 100644 --- a/Makefile +++ b/Makefile @@ -48,8 +48,12 @@ vet: @echo "--> Running go tool vet $(VETARGS) ." @go tool vet $(VETARGS) . ; if [ $$? -eq 1 ]; then \ echo ""; \ - echo "Vet found suspicious constructs. Please check the reported constructs"; \ - echo "and fix them if necessary before submitting the code for reviewal."; \ + echo "[LINT] Vet found suspicious constructs. Please check the reported constructs"; \ + echo "and fix them if necessary before submitting the code for review."; \ + fi + + @git grep -n `echo "log"".Print"` ; if [ $$? -eq 0 ]; then \ + echo "[LINT] Found "log"".Printf" calls. These should use Nomad's logger instead."; \ fi web: From 4b41cfc43e3221bbd1e46e341fc3e6c902a9c565 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 11 Dec 2015 15:45:49 -0800 Subject: [PATCH 23/82] Fixed log printing logic --- client/consul.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client/consul.go b/client/consul.go index 9ac123ebc..e75c71f6a 100644 --- a/client/consul.go +++ b/client/consul.go @@ -309,7 +309,7 @@ func (c *ConsulService) registerService(service *structs.Service, task *structs. // registerCheck registers a check with Consul func (c *ConsulService) registerCheck(check *consul.AgentCheckRegistration) error { - c.printLogMessage("[INFO] consul: registering check with ID: %v for service: %v", check.ID, check.ServiceID) + c.printLogMessage("[INFO] consul: registering check with ID: %s for service: %s", check.ID, check.ServiceID) return c.client.CheckRegister(check) } @@ -388,6 +388,6 @@ func (c *ConsulService) filterConsulChecks(chks map[string]*consul.AgentCheck) m // related information func (c *ConsulService) printLogMessage(message string, v ...interface{}) { if _, ok := c.node.Attributes["consul.version"]; ok { - c.logger.Printf(message, v) + c.logger.Println(fmt.Sprintf(message, v...)) } } From c45ae6eb3af5eb4e0276833b70b6252da530e816 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 11 Dec 2015 15:48:51 -0800 Subject: [PATCH 24/82] Removing the pre-release marker --- version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.go b/version.go index 87d04a0a5..3361dd423 100644 --- a/version.go +++ b/version.go @@ -10,4 +10,4 @@ const Version = "0.2.2" // A pre-release marker for the version. If this is "" (empty string) // then it means that it is a final release. Otherwise, this is a pre-release // such as "dev" (in development), "beta", "rc1", etc. -const VersionPrerelease = "dev" +const VersionPrerelease = "" From 0e754116c47a7d9a1531a0d7f3bc7ef75b779e41 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 11 Dec 2015 15:53:17 -0800 Subject: [PATCH 25/82] Updated the changelog to add the 0.2.2 release date --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d4f4133e..481d6b4fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.2.2 (Unreleased) +## 0.2.2 (December 11, 2015) IMPROVEMENTS: * core: Enable `raw_exec` driver in dev mode [GH-558] From 460228488b462ef14080888d4d3bbd75cdad05f6 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 11 Dec 2015 17:06:45 -0800 Subject: [PATCH 26/82] Bumping up the version in website --- website/config.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/config.rb b/website/config.rb index 8bf363bb9..96b57dc74 100644 --- a/website/config.rb +++ b/website/config.rb @@ -2,7 +2,7 @@ set :base_url, "https://www.nomadproject.io/" activate :hashicorp do |h| h.name = "nomad" - h.version = "0.2.1" + h.version = "0.2.2" h.github_slug = "hashicorp/nomad" h.minify_javascript = false From 53ca64ed7e6bddcc9827b3d58a9e26abd2d5e9a6 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Sat, 12 Dec 2015 09:08:24 -0800 Subject: [PATCH 27/82] Using Go 1.5.2 --- Vagrantfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index 07c06f034..a328b355a 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -18,8 +18,8 @@ ARCH=`uname -m | sed 's|i686|386|' | sed 's|x86_64|amd64|'` # Install Go cd /tmp -wget -q https://storage.googleapis.com/golang/go1.5.1.linux-${ARCH}.tar.gz -tar -xf go1.5.1.linux-${ARCH}.tar.gz +wget -q https://storage.googleapis.com/golang/go1.5.2.linux-${ARCH}.tar.gz +tar -xf go1.5.2.linux-${ARCH}.tar.gz sudo mv go $SRCROOT sudo chmod 775 $SRCROOT sudo chown vagrant:vagrant $SRCROOT From dffcc8bac6dff6054771b714db7edee17f565e33 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 14 Dec 2015 08:54:34 -0800 Subject: [PATCH 28/82] Changing the prefix of the service id --- nomad/structs/structs.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 84a171e67..265c0713a 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1146,7 +1146,7 @@ func (sc *ServiceCheck) Hash(serviceId string) string { } const ( - NomadConsulPrefix = "nomad" + NomadConsulPrefix = "1729nomad" ) // The Service model represents a Consul service defintion From 68b1dd032edac922258985ef2532c94a7dbfbbed Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 14 Dec 2015 11:14:22 -0800 Subject: [PATCH 29/82] Changing the prefix of the service --- client/consul_test.go | 12 ++++++------ nomad/structs/structs.go | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/client/consul_test.go b/client/consul_test.go index e7bb8012e..f2fb18ef8 100644 --- a/client/consul_test.go +++ b/client/consul_test.go @@ -290,8 +290,8 @@ func TestConsul_FilterNomadServicesAndChecks(t *testing.T) { Port: 8080, Address: "10.10.1.11", }, - "nomad-2121212": { - ID: "nomad-2121212", + "nomad-registered-service-2121212": { + ID: "nomad-registered-service-2121212", Service: "identity-service", Tags: []string{"global"}, Port: 8080, @@ -300,8 +300,8 @@ func TestConsul_FilterNomadServicesAndChecks(t *testing.T) { } expSrvcs := map[string]*consul.AgentService{ - "nomad-2121212": { - ID: "nomad-2121212", + "nomad-registered-service-2121212": { + ID: "nomad-registered-service-2121212", Service: "identity-service", Tags: []string{"global"}, Port: 8080, @@ -327,7 +327,7 @@ func TestConsul_FilterNomadServicesAndChecks(t *testing.T) { }, "212121212": { CheckID: "212121212", - ServiceID: "nomad-2121212", + ServiceID: "nomad-registered-service-2121212", Name: "ping", }, } @@ -335,7 +335,7 @@ func TestConsul_FilterNomadServicesAndChecks(t *testing.T) { expChks := map[string]*consul.AgentCheck{ "212121212": { CheckID: "212121212", - ServiceID: "nomad-2121212", + ServiceID: "nomad-registered-service-2121212", Name: "ping", }, } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 265c0713a..82723965c 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1146,7 +1146,7 @@ func (sc *ServiceCheck) Hash(serviceId string) string { } const ( - NomadConsulPrefix = "1729nomad" + NomadConsulPrefix = "nomad-registered-service" ) // The Service model represents a Consul service defintion From cacd8a78b3691d2e3bee251b2d85644593f55ef0 Mon Sep 17 00:00:00 2001 From: Chris Hines Date: Mon, 14 Dec 2015 14:59:19 -0500 Subject: [PATCH 30/82] Use minimum OS specific path. This change ensures LoadConfig and LoadConfigDir report consistent paths to files and those paths use the appropriate path separator for the target OS. Note that LoadConfigDir constructs paths with filepath.Join, which calls filepath.Clean, which calls filepath.FromSlash. --- command/agent/config.go | 2 +- command/agent/config_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/command/agent/config.go b/command/agent/config.go index e3fb3ae17..8ee166842 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -613,7 +613,7 @@ func LoadConfig(path string) (*Config, error) { if fi.IsDir() { return LoadConfigDir(path) } - return LoadConfigFile(path) + return LoadConfigFile(filepath.Clean(path)) } // LoadConfigString is used to parse a config string diff --git a/command/agent/config_test.go b/command/agent/config_test.go index e342b997b..f89386a95 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -327,7 +327,7 @@ func TestConfig_LoadConfigsFileOrder(t *testing.T) { config := config1.Merge(config2) if !reflect.DeepEqual(config.Files, expected) { - t.Errorf("Loaded configs don't match\nExpected\n%+vGot\n%+v\n", + t.Errorf("Loaded configs don't match\nwant: %+v\n got: %+v\n", expected, config.Files) } } From 04c7ba69af032781825b70c12264bbf2a98d54eb Mon Sep 17 00:00:00 2001 From: Chris Hines Date: Mon, 14 Dec 2015 17:03:08 -0500 Subject: [PATCH 31/82] Remove clock granularity sensitive test assertion. TestRequestTime already verifies that the request time is properly recorded. --- api/util_test.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/api/util_test.go b/api/util_test.go index 1c1073ed7..dfd0e95d0 100644 --- a/api/util_test.go +++ b/api/util_test.go @@ -17,9 +17,6 @@ func assertWriteMeta(t *testing.T, wm *WriteMeta) { if wm.LastIndex == 0 { t.Fatalf("bad index: %d", wm.LastIndex) } - if wm.RequestTime == 0 { - t.Fatalf("bad request time: %d", wm.RequestTime) - } } func testJob() *Job { From 53e05ed579dd60999e6fcd6da189c4f66ead7072 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 14 Dec 2015 15:07:24 -0800 Subject: [PATCH 32/82] Moving to Consul 0.6 --- Vagrantfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Vagrantfile b/Vagrantfile index a328b355a..a5b251d2f 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -42,7 +42,7 @@ source /etc/profile.d/gopath.sh echo Fetching Consul... cd /tmp/ -wget https://releases.hashicorp.com/consul/0.6.0-rc2/consul_0.6.0-rc2_linux_amd64.zip -O consul.zip +wget https://releases.hashicorp.com/consul/0.6.0/consul_0.6.0_linux_amd64.zip -O consul.zip echo Installing Consul... unzip consul.zip sudo chmod +x consul From d4d7572604c2afcddd675d4f3410591e55db7e18 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 14 Dec 2015 14:53:49 -0800 Subject: [PATCH 33/82] Making the allocs hold service ids --- api/allocations.go | 1 + client/alloc_runner.go | 4 +-- client/consul.go | 56 +++++++++++++++++++++----------------- client/task_runner.go | 32 +++++++++++----------- nomad/structs/structs.go | 23 +++++++++++----- scheduler/generic_sched.go | 4 +++ scheduler/system_sched.go | 4 +++ 7 files changed, 74 insertions(+), 50 deletions(-) diff --git a/api/allocations.go b/api/allocations.go index 00ab6984d..73f600d7e 100644 --- a/api/allocations.go +++ b/api/allocations.go @@ -47,6 +47,7 @@ type Allocation struct { TaskGroup string Resources *Resources TaskResources map[string]*Resources + Services map[string]string Metrics *AllocationMetric DesiredStatus string DesiredDescription string diff --git a/client/alloc_runner.go b/client/alloc_runner.go index d48aaf166..14b93dd0d 100644 --- a/client/alloc_runner.go +++ b/client/alloc_runner.go @@ -112,7 +112,7 @@ func (r *AllocRunner) RestoreState() error { task := &structs.Task{Name: name} restartTracker := newRestartTracker(r.alloc.Job.Type, r.RestartPolicy) tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx, - r.alloc.ID, task, r.alloc.TaskStates[task.Name], restartTracker, + r.alloc, task, r.alloc.TaskStates[task.Name], restartTracker, r.consulService) r.tasks[name] = tr @@ -324,7 +324,7 @@ func (r *AllocRunner) Run() { task.Resources = alloc.TaskResources[task.Name] restartTracker := newRestartTracker(r.alloc.Job.Type, r.RestartPolicy) tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx, - r.alloc.ID, task, r.alloc.TaskStates[task.Name], restartTracker, + r.alloc, task, r.alloc.TaskStates[task.Name], restartTracker, r.consulService) r.tasks[task.Name] = tr go tr.Run() diff --git a/client/consul.go b/client/consul.go index e75c71f6a..39f398579 100644 --- a/client/consul.go +++ b/client/consul.go @@ -62,8 +62,8 @@ func (a *consulApiClient) Checks() (map[string]*consul.AgentCheck, error) { // trackedTask is a Task that we are tracking for changes in service and check // definitions and keep them sycned with Consul Agent type trackedTask struct { - allocID string - task *structs.Task + task *structs.Task + alloc *structs.Allocation } // ConsulService is the service which tracks tasks and syncs the services and @@ -143,15 +143,16 @@ func NewConsulService(config *consulServiceConfig) (*ConsulService, error) { // Register starts tracking a task for changes to it's services and tasks and // adds/removes services and checks associated with it. -func (c *ConsulService) Register(task *structs.Task, allocID string) error { +func (c *ConsulService) Register(task *structs.Task, alloc *structs.Allocation) error { var mErr multierror.Error c.trackedTskLock.Lock() - tt := &trackedTask{allocID: allocID, task: task} - c.trackedTasks[fmt.Sprintf("%s-%s", allocID, task.Name)] = tt + tt := &trackedTask{task: task, alloc: alloc} + c.trackedTasks[fmt.Sprintf("%s-%s", alloc.ID, task.Name)] = tt c.trackedTskLock.Unlock() for _, service := range task.Services { c.logger.Printf("[INFO] consul: registering service %s with consul.", service.Name) - if err := c.registerService(service, task, allocID); err != nil { + if err := c.registerService(service, task, alloc); err != nil { + fmt.Printf("DIPTANU ERR %v\n", err) mErr.Errors = append(mErr.Errors, err) } } @@ -161,17 +162,18 @@ func (c *ConsulService) Register(task *structs.Task, allocID string) error { // Deregister stops tracking a task for changes to it's services and checks and // removes all the services and checks associated with the Task -func (c *ConsulService) Deregister(task *structs.Task, allocID string) error { +func (c *ConsulService) Deregister(task *structs.Task, alloc *structs.Allocation) error { var mErr multierror.Error c.trackedTskLock.Lock() - delete(c.trackedTasks, fmt.Sprintf("%s-%s", allocID, task.Name)) + delete(c.trackedTasks, fmt.Sprintf("%s-%s", alloc.ID, task.Name)) c.trackedTskLock.Unlock() for _, service := range task.Services { - if service.Id == "" { + serviceId := alloc.Services[service.Name] + if serviceId == "" { continue } c.logger.Printf("[INFO] consul: deregistering service %v with consul", service.Name) - if err := c.deregisterService(service.Id); err != nil { + if err := c.deregisterService(serviceId); err != nil { c.printLogMessage("[DEBUG] consul: error in deregistering service %v from consul", service.Name) mErr.Errors = append(mErr.Errors, err) } @@ -223,28 +225,30 @@ func (c *ConsulService) performSync() { // Add services and checks which Consul doesn't know about for _, trackedTask := range c.trackedTasks { for _, service := range trackedTask.task.Services { + serviceId := trackedTask.alloc.Services[service.Name] // Add new services which Consul agent isn't aware of - knownServices[service.Id] = struct{}{} - if _, ok := consulServices[service.Id]; !ok { + knownServices[serviceId] = struct{}{} + if _, ok := consulServices[serviceId]; !ok { c.printLogMessage("[INFO] consul: registering service %s with consul.", service.Name) - c.registerService(service, trackedTask.task, trackedTask.allocID) + c.registerService(service, trackedTask.task, trackedTask.alloc) continue } // If a service has changed, re-register it with Consul agent - if service.Hash() != c.serviceStates[service.Id] { + if service.Hash() != c.serviceStates[serviceId] { c.printLogMessage("[INFO] consul: reregistering service %s with consul.", service.Name) - c.registerService(service, trackedTask.task, trackedTask.allocID) + c.registerService(service, trackedTask.task, trackedTask.alloc) continue } // Add new checks that Consul isn't aware of for _, check := range service.Checks { - knownChecks[check.Id] = struct{}{} - if _, ok := consulChecks[check.Id]; !ok { + checkId := check.Hash(serviceId) + knownChecks[checkId] = struct{}{} + if _, ok := consulChecks[checkId]; !ok { host, port := trackedTask.task.FindHostAndPortFor(service.PortLabel) - cr := c.makeCheck(service, check, host, port) + cr := c.makeCheck(serviceId, check, host, port) c.registerCheck(cr) } } @@ -276,16 +280,17 @@ func (c *ConsulService) performSync() { } // registerService registers a Service with Consul -func (c *ConsulService) registerService(service *structs.Service, task *structs.Task, allocID string) error { +func (c *ConsulService) registerService(service *structs.Service, task *structs.Task, alloc *structs.Allocation) error { var mErr multierror.Error host, port := task.FindHostAndPortFor(service.PortLabel) if host == "" || port == 0 { return fmt.Errorf("consul: the port:%q marked for registration of service: %q couldn't be found", service.PortLabel, service.Name) } - c.serviceStates[service.Id] = service.Hash() + serviceId := alloc.Services[service.Name] + c.serviceStates[serviceId] = service.Hash() asr := &consul.AgentServiceRegistration{ - ID: service.Id, + ID: serviceId, Name: service.Name, Tags: service.Tags, Port: port, @@ -297,7 +302,7 @@ func (c *ConsulService) registerService(service *structs.Service, task *structs. mErr.Errors = append(mErr.Errors, err) } for _, check := range service.Checks { - cr := c.makeCheck(service, check, host, port) + cr := c.makeCheck(serviceId, check, host, port) if err := c.registerCheck(cr); err != nil { c.printLogMessage("[DEBUG] consul: error while registerting check %v with consul: %v", check.Name, err) mErr.Errors = append(mErr.Errors, err) @@ -329,11 +334,12 @@ func (c *ConsulService) deregisterService(serviceId string) error { } // makeCheck creates a Consul Check Registration struct -func (c *ConsulService) makeCheck(service *structs.Service, check *structs.ServiceCheck, ip string, port int) *consul.AgentCheckRegistration { +func (c *ConsulService) makeCheck(serviceId string, check *structs.ServiceCheck, ip string, port int) *consul.AgentCheckRegistration { + checkId := check.Hash(serviceId) cr := &consul.AgentCheckRegistration{ - ID: check.Id, + ID: checkId, Name: check.Name, - ServiceID: service.Id, + ServiceID: serviceId, } cr.Interval = check.Interval.String() cr.Timeout = check.Timeout.String() diff --git a/client/task_runner.go b/client/task_runner.go index 7f6cc40ff..cf446c395 100644 --- a/client/task_runner.go +++ b/client/task_runner.go @@ -23,7 +23,7 @@ type TaskRunner struct { updater TaskStateUpdater logger *log.Logger ctx *driver.ExecContext - allocID string + alloc *structs.Allocation restartTracker restartTracker consulService *ConsulService @@ -52,7 +52,7 @@ type TaskStateUpdater func(taskName string) // NewTaskRunner is used to create a new task context func NewTaskRunner(logger *log.Logger, config *config.Config, updater TaskStateUpdater, ctx *driver.ExecContext, - allocID string, task *structs.Task, state *structs.TaskState, + alloc *structs.Allocation, task *structs.Task, state *structs.TaskState, restartTracker restartTracker, consulService *ConsulService) *TaskRunner { tc := &TaskRunner{ @@ -62,7 +62,7 @@ func NewTaskRunner(logger *log.Logger, config *config.Config, restartTracker: restartTracker, consulService: consulService, ctx: ctx, - allocID: allocID, + alloc: alloc, task: task, state: state, updateCh: make(chan *structs.Task, 8), @@ -85,7 +85,7 @@ func (r *TaskRunner) stateFilePath() string { dirName := fmt.Sprintf("task-%s", hashHex) // Generate the path - path := filepath.Join(r.config.StateDir, "alloc", r.allocID, + path := filepath.Join(r.config.StateDir, "alloc", r.alloc.ID, dirName, "state.json") return path } @@ -113,7 +113,7 @@ func (r *TaskRunner) RestoreState() error { // In the case it fails, we relaunch the task in the Run() method. if err != nil { r.logger.Printf("[ERR] client: failed to open handle to task '%s' for alloc '%s': %v", - r.task.Name, r.allocID, err) + r.task.Name, r.alloc.ID, err) return nil } r.handle = handle @@ -176,7 +176,7 @@ func (r *TaskRunner) createDriver() (driver.Driver, error) { driver, err := driver.NewDriver(r.task.Driver, driverCtx) if err != nil { err = fmt.Errorf("failed to create driver '%s' for alloc %s: %v", - r.task.Driver, r.allocID, err) + r.task.Driver, r.alloc.ID, err) r.logger.Printf("[ERR] client: %s", err) } return driver, err @@ -196,7 +196,7 @@ func (r *TaskRunner) startTask() error { handle, err := driver.Start(r.ctx, r.task) if err != nil { r.logger.Printf("[ERR] client: failed to start task '%s' for alloc '%s': %v", - r.task.Name, r.allocID, err) + r.task.Name, r.alloc.ID, err) e := structs.NewTaskEvent(structs.TaskDriverFailure). SetDriverError(fmt.Errorf("failed to start: %v", err)) r.setState(structs.TaskStateDead, e) @@ -211,7 +211,7 @@ func (r *TaskRunner) startTask() error { func (r *TaskRunner) Run() { defer close(r.waitCh) r.logger.Printf("[DEBUG] client: starting task context for '%s' (alloc '%s')", - r.task.Name, r.allocID) + r.task.Name, r.alloc.ID) r.run() return @@ -234,10 +234,10 @@ func (r *TaskRunner) run() { destroyed := false // Register the services defined by the task with Consil - r.consulService.Register(r.task, r.allocID) + r.consulService.Register(r.task, r.alloc) // De-Register the services belonging to the task from consul - defer r.consulService.Deregister(r.task, r.allocID) + defer r.consulService.Deregister(r.task, r.alloc) OUTER: // Wait for updates @@ -249,7 +249,7 @@ func (r *TaskRunner) run() { // Update r.task = update if err := r.handle.Update(update); err != nil { - r.logger.Printf("[ERR] client: failed to update task '%s' for alloc '%s': %v", r.task.Name, r.allocID, err) + r.logger.Printf("[ERR] client: failed to update task '%s' for alloc '%s': %v", r.task.Name, r.alloc.ID, err) } case <-r.destroyCh: // Avoid destroying twice @@ -259,7 +259,7 @@ func (r *TaskRunner) run() { // Send the kill signal, and use the WaitCh to block until complete if err := r.handle.Kill(); err != nil { - r.logger.Printf("[ERR] client: failed to kill task '%s' for alloc '%s': %v", r.task.Name, r.allocID, err) + r.logger.Printf("[ERR] client: failed to kill task '%s' for alloc '%s': %v", r.task.Name, r.alloc.ID, err) destroyErr = err } destroyed = true @@ -274,16 +274,16 @@ func (r *TaskRunner) run() { // Log whether the task was successful or not. if !waitRes.Successful() { - r.logger.Printf("[ERR] client: failed to complete task '%s' for alloc '%s': %v", r.task.Name, r.allocID, waitRes) + r.logger.Printf("[ERR] client: failed to complete task '%s' for alloc '%s': %v", r.task.Name, r.alloc.ID, waitRes) } else { - r.logger.Printf("[INFO] client: completed task '%s' for alloc '%s'", r.task.Name, r.allocID) + r.logger.Printf("[INFO] client: completed task '%s' for alloc '%s'", r.task.Name, r.alloc.ID) } // Check if we should restart. If not mark task as dead and exit. shouldRestart, when := r.restartTracker.nextRestart(waitRes.ExitCode) waitEvent := r.waitErrorToEvent(waitRes) if !shouldRestart { - r.logger.Printf("[INFO] client: Not restarting task: %v for alloc: %v ", r.task.Name, r.allocID) + r.logger.Printf("[INFO] client: Not restarting task: %v for alloc: %v ", r.task.Name, r.alloc.ID) r.setState(structs.TaskStateDead, waitEvent) return } @@ -329,7 +329,7 @@ func (r *TaskRunner) Update(update *structs.Task) { case r.updateCh <- update: default: r.logger.Printf("[ERR] client: dropping task update '%s' (alloc '%s')", - update.Name, r.allocID) + update.Name, r.alloc.ID) } } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 82723965c..6366c73bc 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1106,7 +1106,6 @@ const ( // The ServiceCheck data model represents the consul health check that // Nomad registers for a Task type ServiceCheck struct { - Id string // Id of the check, must be unique and it is autogenrated Name string // Name of the check, defaults to id Type string // Type of the check - tcp, http, docker and script Script string // Script to invoke for script check @@ -1151,7 +1150,6 @@ const ( // The Service model represents a Consul service defintion type Service struct { - Id string // Id of the service, this needs to be unique on a local machine Name string // Name of the service, defaults to id Tags []string // List of tags for the service PortLabel string `mapstructure:"port"` // port for the service @@ -1161,10 +1159,6 @@ type Service struct { // InitFields interpolates values of Job, Task Group and Task in the Service // Name. This also generates check names, service id and check ids. func (s *Service) InitFields(job string, taskGroup string, task string) { - // We add a prefix to the Service ID so that we can know that this service - // is managed by Consul since Consul can also have service which are not - // managed by Nomad - s.Id = fmt.Sprintf("%s-%s", NomadConsulPrefix, GenerateUUID()) s.Name = args.ReplaceEnv(s.Name, map[string]string{ "JOB": job, "TASKGROUP": taskGroup, @@ -1174,7 +1168,6 @@ func (s *Service) InitFields(job string, taskGroup string, task string) { ) for _, check := range s.Checks { - check.Id = check.Hash(s.Id) if check.Name == "" { check.Name = fmt.Sprintf("service: %q check", s.Name) } @@ -1451,6 +1444,9 @@ type Allocation struct { // task. These should sum to the total Resources. TaskResources map[string]*Resources + // Services is a map of service names and service ids + Services map[string]string + // Metrics associated with this allocation Metrics *AllocMetric @@ -1504,6 +1500,19 @@ func (a *Allocation) Stub() *AllocListStub { } } +func (a *Allocation) PopulateServiceIds() { + a.Services = make(map[string]string) + tg := a.Job.LookupTaskGroup(a.TaskGroup) + for _, task := range tg.Tasks { + for _, service := range task.Services { + // We add a prefix to the Service ID so that we can know that this service + // is managed by Consul since Consul can also have service which are not + // managed by Nomad + a.Services[service.Name] = fmt.Sprintf("%s-%s", NomadConsulPrefix, GenerateUUID()) + } + } +} + // AllocListStub is used to return a subset of alloc information type AllocListStub struct { ID string diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index b3b486658..436e4991b 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -279,6 +279,10 @@ func (s *GenericScheduler) computePlacements(place []allocTuple) error { Metrics: s.ctx.Metrics(), } + // Generate the service ids for the tasks which this allocation is going + // to run + alloc.PopulateServiceIds() + // Set fields based on if we found an allocation option if option != nil { alloc.NodeID = option.Node.ID diff --git a/scheduler/system_sched.go b/scheduler/system_sched.go index d448642ff..b8b99bee6 100644 --- a/scheduler/system_sched.go +++ b/scheduler/system_sched.go @@ -246,6 +246,10 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error { Metrics: s.ctx.Metrics(), } + // Generate the service ids for the tasks that this allocation is going + // to run + alloc.PopulateServiceIds() + // Set fields based on if we found an allocation option if option != nil { alloc.NodeID = option.Node.ID From 76666998bbbe074fc71bfb0fe1e7803628ab9853 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 14 Dec 2015 15:09:57 -0800 Subject: [PATCH 34/82] Fixed the jobspec tests --- jobspec/parse_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go index 8c7d79199..4497348eb 100644 --- a/jobspec/parse_test.go +++ b/jobspec/parse_test.go @@ -96,13 +96,11 @@ func TestParse(t *testing.T) { }, Services: []*structs.Service{ { - Id: "", Name: "binstore-storagelocker-binsl-binstore", Tags: []string{"foo", "bar"}, PortLabel: "http", Checks: []*structs.ServiceCheck{ { - Id: "", Name: "check-name", Type: "tcp", Interval: 10 * time.Second, From ed04b11862edecee37154b80a95ee18cde29ce5d Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 14 Dec 2015 15:47:01 -0800 Subject: [PATCH 35/82] Fixed tests --- client/consul_test.go | 30 +++++++++++++++--------------- client/task_runner_test.go | 4 ++-- nomad/mock/mock.go | 1 + nomad/structs/structs_test.go | 7 +------ 4 files changed, 19 insertions(+), 23 deletions(-) diff --git a/client/consul_test.go b/client/consul_test.go index f2fb18ef8..ac16a1233 100644 --- a/client/consul_test.go +++ b/client/consul_test.go @@ -1,7 +1,9 @@ package client import ( + "fmt" consul "github.com/hashicorp/consul/api" + "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "log" "os" @@ -70,7 +72,6 @@ func newTask() *structs.Task { func TestConsul_MakeChecks(t *testing.T) { service := &structs.Service{ - Id: "Foo", Name: "Bar", Checks: []*structs.ServiceCheck{ { @@ -95,10 +96,11 @@ func TestConsul_MakeChecks(t *testing.T) { } c := newConsulService() + serviceId := fmt.Sprintf("%s-1234", structs.NomadConsulPrefix) - check1 := c.makeCheck(service, service.Checks[0], "10.10.0.1", 8090) - check2 := c.makeCheck(service, service.Checks[1], "10.10.0.1", 8090) - check3 := c.makeCheck(service, service.Checks[2], "10.10.0.1", 8090) + check1 := c.makeCheck(serviceId, service.Checks[0], "10.10.0.1", 8090) + check2 := c.makeCheck(serviceId, service.Checks[1], "10.10.0.1", 8090) + check3 := c.makeCheck(serviceId, service.Checks[2], "10.10.0.1", 8090) if check1.HTTP != "http://10.10.0.1:8090/foo/bar" { t.Fatalf("Invalid http url for check: %v", check1.HTTP) @@ -142,7 +144,6 @@ func TestConsul_InvalidPortLabelForService(t *testing.T) { }, } service := &structs.Service{ - Id: "service-id", Name: "foo", Tags: []string{"a", "b"}, PortLabel: "https", @@ -150,7 +151,7 @@ func TestConsul_InvalidPortLabelForService(t *testing.T) { } c := newConsulService() - if err := c.registerService(service, task, "allocid"); err == nil { + if err := c.registerService(service, task, mock.Alloc()); err == nil { t.Fatalf("Service should be invalid") } } @@ -175,7 +176,7 @@ func TestConsul_Services_Deleted_From_Task(t *testing.T) { }, }, } - c.Register(&task, "1") + c.Register(&task, mock.Alloc()) if len(c.serviceStates) != 1 { t.Fatalf("Expected tracked services: %v, Actual: %v", 1, len(c.serviceStates)) } @@ -191,13 +192,14 @@ func TestConsul_Service_Should_Be_Re_Reregistered_On_Change(t *testing.T) { c := newConsulService() task := newTask() s1 := structs.Service{ - Id: "1-example-cache-redis", Name: "example-cache-redis", Tags: []string{"global"}, PortLabel: "db", } task.Services = append(task.Services, &s1) - c.Register(task, "1") + alloc := mock.Alloc() + serviceID := alloc.Services[s1.Name] + c.Register(task, alloc) s1.Tags = []string{"frontcache"} @@ -207,8 +209,8 @@ func TestConsul_Service_Should_Be_Re_Reregistered_On_Change(t *testing.T) { t.Fatal("We should be tracking one service") } - if c.serviceStates[s1.Id] != s1.Hash() { - t.Fatalf("Hash is %v, expected %v", c.serviceStates[s1.Id], s1.Hash()) + if c.serviceStates[serviceID] != s1.Hash() { + t.Fatalf("Hash is %v, expected %v", c.serviceStates[serviceID], s1.Hash()) } } @@ -219,14 +221,13 @@ func TestConsul_AddCheck_To_Service(t *testing.T) { task := newTask() var checks []*structs.ServiceCheck s1 := structs.Service{ - Id: "1-example-cache-redis", Name: "example-cache-redis", Tags: []string{"global"}, PortLabel: "db", Checks: checks, } task.Services = append(task.Services, &s1) - c.Register(task, "1") + c.Register(task, mock.Alloc()) check1 := structs.ServiceCheck{ Name: "alive", @@ -250,14 +251,13 @@ func TestConsul_ModifyCheck(t *testing.T) { task := newTask() var checks []*structs.ServiceCheck s1 := structs.Service{ - Id: "1-example-cache-redis", Name: "example-cache-redis", Tags: []string{"global"}, PortLabel: "db", Checks: checks, } task.Services = append(task.Services, &s1) - c.Register(task, "1") + c.Register(task, mock.Alloc()) check1 := structs.ServiceCheck{ Name: "alive", diff --git a/client/task_runner_test.go b/client/task_runner_test.go index dcdc84577..f6b581eb7 100644 --- a/client/task_runner_test.go +++ b/client/task_runner_test.go @@ -48,7 +48,7 @@ func testTaskRunner(restarts bool) (*MockTaskStateUpdater, *TaskRunner) { } state := alloc.TaskStates[task.Name] - tr := NewTaskRunner(logger, conf, upd.Update, ctx, alloc.ID, task, state, restartTracker, consulClient) + tr := NewTaskRunner(logger, conf, upd.Update, ctx, mock.Alloc(), task, state, restartTracker, consulClient) return upd, tr } @@ -166,7 +166,7 @@ func TestTaskRunner_SaveRestoreState(t *testing.T) { // Create a new task runner consulClient, _ := NewConsulService(&consulServiceConfig{tr.logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}}) tr2 := NewTaskRunner(tr.logger, tr.config, upd.Update, - tr.ctx, tr.allocID, &structs.Task{Name: tr.task.Name}, tr.state, tr.restartTracker, + tr.ctx, tr.alloc, &structs.Task{Name: tr.task.Name}, tr.state, tr.restartTracker, consulClient) if err := tr2.RestoreState(); err != nil { t.Fatalf("err: %v", err) diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index 0d2949d80..8de856483 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -226,6 +226,7 @@ func Alloc() *structs.Allocation { }, }, }, + Services: map[string]string{"web-frontend": "nomad-registered-task-1234"}, TaskStates: map[string]*structs.TaskState{ "web": &structs.TaskState{ State: structs.TaskStatePending, diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index a94a2750c..78c3828dd 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -391,13 +391,11 @@ func TestEncodeDecode(t *testing.T) { func TestInvalidServiceCheck(t *testing.T) { s := Service{ - Id: "service-id", Name: "service-name", PortLabel: "bar", Checks: []*ServiceCheck{ { - Id: "check-id", Name: "check-name", Type: "lol", }, @@ -442,7 +440,7 @@ func TestDistinctCheckId(t *testing.T) { } -func TestService_InitFiels(t *testing.T) { +func TestService_InitFields(t *testing.T) { job := "example" taskGroup := "cache" task := "redis" @@ -455,9 +453,6 @@ func TestService_InitFiels(t *testing.T) { if s.Name != "redis-db" { t.Fatalf("Expected name: %v, Actual: %v", "redis-db", s.Name) } - if s.Id == "" { - t.Fatalf("Expected a GUID for Service ID, Actual: %v", s.Id) - } s.Name = "db" s.InitFields(job, taskGroup, task) From f089e249c815c3b2fa78592efb7c442e415c77c0 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 14 Dec 2015 15:57:56 -0800 Subject: [PATCH 36/82] Renamed serviceId to serviceID --- client/consul.go | 54 +++++++++++++++++------------------ client/consul_test.go | 10 +++---- nomad/structs/structs.go | 6 ++-- nomad/structs/structs_test.go | 10 +++---- scheduler/generic_sched.go | 2 +- scheduler/system_sched.go | 2 +- 6 files changed, 42 insertions(+), 42 deletions(-) diff --git a/client/consul.go b/client/consul.go index 39f398579..4fff4fbb1 100644 --- a/client/consul.go +++ b/client/consul.go @@ -47,8 +47,8 @@ func (a *consulApiClient) ServiceRegister(service *consul.AgentServiceRegistrati return a.client.Agent().ServiceRegister(service) } -func (a *consulApiClient) ServiceDeregister(serviceId string) error { - return a.client.Agent().ServiceDeregister(serviceId) +func (a *consulApiClient) ServiceDeregister(serviceID string) error { + return a.client.Agent().ServiceDeregister(serviceID) } func (a *consulApiClient) Services() (map[string]*consul.AgentService, error) { @@ -168,12 +168,12 @@ func (c *ConsulService) Deregister(task *structs.Task, alloc *structs.Allocation delete(c.trackedTasks, fmt.Sprintf("%s-%s", alloc.ID, task.Name)) c.trackedTskLock.Unlock() for _, service := range task.Services { - serviceId := alloc.Services[service.Name] - if serviceId == "" { + serviceID := alloc.Services[service.Name] + if serviceID == "" { continue } c.logger.Printf("[INFO] consul: deregistering service %v with consul", service.Name) - if err := c.deregisterService(serviceId); err != nil { + if err := c.deregisterService(serviceID); err != nil { c.printLogMessage("[DEBUG] consul: error in deregistering service %v from consul", service.Name) mErr.Errors = append(mErr.Errors, err) } @@ -225,18 +225,18 @@ func (c *ConsulService) performSync() { // Add services and checks which Consul doesn't know about for _, trackedTask := range c.trackedTasks { for _, service := range trackedTask.task.Services { - serviceId := trackedTask.alloc.Services[service.Name] + serviceID := trackedTask.alloc.Services[service.Name] // Add new services which Consul agent isn't aware of - knownServices[serviceId] = struct{}{} - if _, ok := consulServices[serviceId]; !ok { + knownServices[serviceID] = struct{}{} + if _, ok := consulServices[serviceID]; !ok { c.printLogMessage("[INFO] consul: registering service %s with consul.", service.Name) c.registerService(service, trackedTask.task, trackedTask.alloc) continue } // If a service has changed, re-register it with Consul agent - if service.Hash() != c.serviceStates[serviceId] { + if service.Hash() != c.serviceStates[serviceID] { c.printLogMessage("[INFO] consul: reregistering service %s with consul.", service.Name) c.registerService(service, trackedTask.task, trackedTask.alloc) continue @@ -244,11 +244,11 @@ func (c *ConsulService) performSync() { // Add new checks that Consul isn't aware of for _, check := range service.Checks { - checkId := check.Hash(serviceId) - knownChecks[checkId] = struct{}{} - if _, ok := consulChecks[checkId]; !ok { + checkID := check.Hash(serviceID) + knownChecks[checkID] = struct{}{} + if _, ok := consulChecks[checkID]; !ok { host, port := trackedTask.task.FindHostAndPortFor(service.PortLabel) - cr := c.makeCheck(serviceId, check, host, port) + cr := c.makeCheck(serviceID, check, host, port) c.registerCheck(cr) } } @@ -256,9 +256,9 @@ func (c *ConsulService) performSync() { } // Remove services from the service tracker which no longer exists - for serviceId := range c.serviceStates { - if _, ok := knownServices[serviceId]; !ok { - delete(c.serviceStates, serviceId) + for serviceID := range c.serviceStates { + if _, ok := knownServices[serviceID]; !ok { + delete(c.serviceStates, serviceID) } } @@ -286,11 +286,11 @@ func (c *ConsulService) registerService(service *structs.Service, task *structs. if host == "" || port == 0 { return fmt.Errorf("consul: the port:%q marked for registration of service: %q couldn't be found", service.PortLabel, service.Name) } - serviceId := alloc.Services[service.Name] - c.serviceStates[serviceId] = service.Hash() + serviceID := alloc.Services[service.Name] + c.serviceStates[serviceID] = service.Hash() asr := &consul.AgentServiceRegistration{ - ID: serviceId, + ID: serviceID, Name: service.Name, Tags: service.Tags, Port: port, @@ -302,7 +302,7 @@ func (c *ConsulService) registerService(service *structs.Service, task *structs. mErr.Errors = append(mErr.Errors, err) } for _, check := range service.Checks { - cr := c.makeCheck(serviceId, check, host, port) + cr := c.makeCheck(serviceID, check, host, port) if err := c.registerCheck(cr); err != nil { c.printLogMessage("[DEBUG] consul: error while registerting check %v with consul: %v", check.Name, err) mErr.Errors = append(mErr.Errors, err) @@ -325,21 +325,21 @@ func (c *ConsulService) deregisterCheck(checkID string) error { } // deregisterService de-registers a Service with a specific id from Consul -func (c *ConsulService) deregisterService(serviceId string) error { - delete(c.serviceStates, serviceId) - if err := c.client.ServiceDeregister(serviceId); err != nil { +func (c *ConsulService) deregisterService(serviceID string) error { + delete(c.serviceStates, serviceID) + if err := c.client.ServiceDeregister(serviceID); err != nil { return err } return nil } // makeCheck creates a Consul Check Registration struct -func (c *ConsulService) makeCheck(serviceId string, check *structs.ServiceCheck, ip string, port int) *consul.AgentCheckRegistration { - checkId := check.Hash(serviceId) +func (c *ConsulService) makeCheck(serviceID string, check *structs.ServiceCheck, ip string, port int) *consul.AgentCheckRegistration { + checkID := check.Hash(serviceID) cr := &consul.AgentCheckRegistration{ - ID: checkId, + ID: checkID, Name: check.Name, - ServiceID: serviceId, + ServiceID: serviceID, } cr.Interval = check.Interval.String() cr.Timeout = check.Timeout.String() diff --git a/client/consul_test.go b/client/consul_test.go index ac16a1233..1b111382a 100644 --- a/client/consul_test.go +++ b/client/consul_test.go @@ -34,7 +34,7 @@ func (a *mockConsulApiClient) ServiceRegister(service *consul.AgentServiceRegist return nil } -func (a *mockConsulApiClient) ServiceDeregister(serviceId string) error { +func (a *mockConsulApiClient) ServiceDeregister(serviceID string) error { a.serviceDeregisterCallCount += 1 return nil } @@ -96,11 +96,11 @@ func TestConsul_MakeChecks(t *testing.T) { } c := newConsulService() - serviceId := fmt.Sprintf("%s-1234", structs.NomadConsulPrefix) + serviceID := fmt.Sprintf("%s-1234", structs.NomadConsulPrefix) - check1 := c.makeCheck(serviceId, service.Checks[0], "10.10.0.1", 8090) - check2 := c.makeCheck(serviceId, service.Checks[1], "10.10.0.1", 8090) - check3 := c.makeCheck(serviceId, service.Checks[2], "10.10.0.1", 8090) + check1 := c.makeCheck(serviceID, service.Checks[0], "10.10.0.1", 8090) + check2 := c.makeCheck(serviceID, service.Checks[1], "10.10.0.1", 8090) + check3 := c.makeCheck(serviceID, service.Checks[2], "10.10.0.1", 8090) if check1.HTTP != "http://10.10.0.1:8090/foo/bar" { t.Fatalf("Invalid http url for check: %v", check1.HTTP) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 6366c73bc..5303dd64a 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1130,9 +1130,9 @@ func (sc *ServiceCheck) Validate() error { return nil } -func (sc *ServiceCheck) Hash(serviceId string) string { +func (sc *ServiceCheck) Hash(serviceID string) string { h := sha1.New() - io.WriteString(h, serviceId) + io.WriteString(h, serviceID) io.WriteString(h, sc.Name) io.WriteString(h, sc.Type) io.WriteString(h, sc.Script) @@ -1500,7 +1500,7 @@ func (a *Allocation) Stub() *AllocListStub { } } -func (a *Allocation) PopulateServiceIds() { +func (a *Allocation) PopulateServiceIDs() { a.Services = make(map[string]string) tg := a.Job.LookupTaskGroup(a.TaskGroup) for _, task := range tg.Tasks { diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 78c3828dd..5111ea4a5 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -406,7 +406,7 @@ func TestInvalidServiceCheck(t *testing.T) { } } -func TestDistinctCheckId(t *testing.T) { +func TestDistinctCheckID(t *testing.T) { c1 := ServiceCheck{ Name: "web-health", Type: "http", @@ -429,10 +429,10 @@ func TestDistinctCheckId(t *testing.T) { Interval: 4 * time.Second, Timeout: 3 * time.Second, } - serviceId := "123" - c1Hash := c1.Hash(serviceId) - c2Hash := c2.Hash(serviceId) - c3Hash := c3.Hash(serviceId) + serviceID := "123" + c1Hash := c1.Hash(serviceID) + c2Hash := c2.Hash(serviceID) + c3Hash := c3.Hash(serviceID) if c1Hash == c2Hash || c1Hash == c3Hash || c3Hash == c2Hash { t.Fatalf("Checks need to be uniq c1: %s, c2: %s, c3: %s", c1Hash, c2Hash, c3Hash) diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index 436e4991b..26bb737bd 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -281,7 +281,7 @@ func (s *GenericScheduler) computePlacements(place []allocTuple) error { // Generate the service ids for the tasks which this allocation is going // to run - alloc.PopulateServiceIds() + alloc.PopulateServiceIDs() // Set fields based on if we found an allocation option if option != nil { diff --git a/scheduler/system_sched.go b/scheduler/system_sched.go index b8b99bee6..42370b8fd 100644 --- a/scheduler/system_sched.go +++ b/scheduler/system_sched.go @@ -248,7 +248,7 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error { // Generate the service ids for the tasks that this allocation is going // to run - alloc.PopulateServiceIds() + alloc.PopulateServiceIDs() // Set fields based on if we found an allocation option if option != nil { From f4526cd7e32471b9e26215add018b800dd488e36 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 14 Dec 2015 17:06:58 -0800 Subject: [PATCH 37/82] Re-initializing the service map for in place updates --- scheduler/util.go | 1 + scheduler/util_test.go | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/scheduler/util.go b/scheduler/util.go index 44bd2ae08..27a714a0b 100644 --- a/scheduler/util.go +++ b/scheduler/util.go @@ -383,6 +383,7 @@ func inplaceUpdate(ctx Context, eval *structs.Evaluation, job *structs.Job, newAlloc.Metrics = ctx.Metrics() newAlloc.DesiredStatus = structs.AllocDesiredStatusRun newAlloc.ClientStatus = structs.AllocClientStatusPending + newAlloc.PopulateServiceIDs() ctx.Plan().AppendAlloc(newAlloc) // Remove this allocation from the slice diff --git a/scheduler/util_test.go b/scheduler/util_test.go index 747161e51..d1e610c8d 100644 --- a/scheduler/util_test.go +++ b/scheduler/util_test.go @@ -533,17 +533,19 @@ func TestInplaceUpdate_Success(t *testing.T) { state, ctx := testContext(t) eval := mock.Eval() job := mock.Job() + job.InitAllServiceFields() node := mock.Node() noErr(t, state.UpsertNode(1000, node)) // Register an alloc alloc := &structs.Allocation{ - ID: structs.GenerateUUID(), - EvalID: eval.ID, - NodeID: node.ID, - JobID: job.ID, - Job: job, + ID: structs.GenerateUUID(), + EvalID: eval.ID, + NodeID: node.ID, + JobID: job.ID, + Job: job, + TaskGroup: job.TaskGroups[0].Name, Resources: &structs.Resources{ CPU: 2048, MemoryMB: 2048, @@ -551,13 +553,19 @@ func TestInplaceUpdate_Success(t *testing.T) { DesiredStatus: structs.AllocDesiredStatusRun, } alloc.TaskResources = map[string]*structs.Resources{"web": alloc.Resources} + alloc.PopulateServiceIDs() noErr(t, state.UpsertAllocs(1001, []*structs.Allocation{alloc})) + if alloc.Services["web-frontend"] == "" { + t.Fatal("Service ID needs to be generated for service") + } + // Create a new task group that updates the resources. tg := &structs.TaskGroup{} *tg = *job.TaskGroups[0] resource := &structs.Resources{CPU: 737} tg.Tasks[0].Resources = resource + tg.Tasks[0].Services = []*structs.Service{} updates := []allocTuple{{Alloc: alloc, TaskGroup: tg}} stack := NewGenericStack(false, ctx) From c52a5865c0c59ebc8bbfb8bb1e57e0ac183480db Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 14 Dec 2015 17:17:29 -0800 Subject: [PATCH 38/82] Fixed the job endpoints test --- nomad/job_endpoint_test.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go index 53934bf21..986ebc102 100644 --- a/nomad/job_endpoint_test.go +++ b/nomad/job_endpoint_test.go @@ -359,15 +359,12 @@ func TestJobEndpoint_GetJob(t *testing.T) { for tgix, tg := range j.TaskGroups { for tidx, t := range tg.Tasks { for sidx, service := range t.Services { - service.Id = resp2.Job.TaskGroups[tgix].Tasks[tidx].Services[sidx].Id for cidx, check := range service.Checks { check.Name = resp2.Job.TaskGroups[tgix].Tasks[tidx].Services[sidx].Checks[cidx].Name - check.Id = resp2.Job.TaskGroups[tgix].Tasks[tidx].Services[sidx].Checks[cidx].Id } } } } - j.TaskGroups[0].Tasks[0].Services[0].Id = resp2.Job.TaskGroups[0].Tasks[0].Services[0].Id if !reflect.DeepEqual(j, resp2.Job) { t.Fatalf("bad: %#v %#v", job, resp2.Job) From ae09df69b61ffa847d71b903c068893c2847cda4 Mon Sep 17 00:00:00 2001 From: Chris Bednarski Date: Mon, 14 Dec 2015 17:46:52 -0800 Subject: [PATCH 39/82] Check length 0 (no configs) instead of 1 (some configs) --- command/agent/command.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command/agent/command.go b/command/agent/command.go index aed67aeb6..4ec51a5fc 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -356,7 +356,7 @@ func (c *Command) Run(args []string) int { } // Log config files - if len(config.Files) > 1 { + if len(config.Files) > 0 { c.Ui.Info(fmt.Sprintf("Loaded configuration from %s", strings.Join(config.Files, ", "))) } else { c.Ui.Info("No configuration files loaded") From cbc70a54656e4f720a975b30b7878f81ee3c10f7 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 14 Dec 2015 18:05:58 -0800 Subject: [PATCH 40/82] Changed some comments --- client/consul.go | 1 - nomad/mock/mock.go | 1 + nomad/structs/structs.go | 6 ++++-- scheduler/generic_sched.go | 8 ++++---- scheduler/util_test.go | 1 - 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/client/consul.go b/client/consul.go index 4fff4fbb1..0340f9bed 100644 --- a/client/consul.go +++ b/client/consul.go @@ -152,7 +152,6 @@ func (c *ConsulService) Register(task *structs.Task, alloc *structs.Allocation) for _, service := range task.Services { c.logger.Printf("[INFO] consul: registering service %s with consul.", service.Name) if err := c.registerService(service, task, alloc); err != nil { - fmt.Printf("DIPTANU ERR %v\n", err) mErr.Errors = append(mErr.Errors, err) } } diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index 8de856483..d645f58ee 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -122,6 +122,7 @@ func Job() *structs.Job { CreateIndex: 42, ModifyIndex: 99, } + job.InitAllServiceFields() return job } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 5303dd64a..b8f3ddaae 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1444,7 +1444,7 @@ type Allocation struct { // task. These should sum to the total Resources. TaskResources map[string]*Resources - // Services is a map of service names and service ids + // Services is a map of service names to service ids Services map[string]string // Metrics associated with this allocation @@ -1500,13 +1500,15 @@ func (a *Allocation) Stub() *AllocListStub { } } +// PopulateServiceIDs generates the service IDs for all the service definitions +// in that Allocation func (a *Allocation) PopulateServiceIDs() { a.Services = make(map[string]string) tg := a.Job.LookupTaskGroup(a.TaskGroup) for _, task := range tg.Tasks { for _, service := range task.Services { // We add a prefix to the Service ID so that we can know that this service - // is managed by Consul since Consul can also have service which are not + // is managed by Nomad Consul since Consul can also have service which are not // managed by Nomad a.Services[service.Name] = fmt.Sprintf("%s-%s", NomadConsulPrefix, GenerateUUID()) } diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index 26bb737bd..edaeaf94c 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -279,12 +279,12 @@ func (s *GenericScheduler) computePlacements(place []allocTuple) error { Metrics: s.ctx.Metrics(), } - // Generate the service ids for the tasks which this allocation is going - // to run - alloc.PopulateServiceIDs() - // Set fields based on if we found an allocation option if option != nil { + // Generate the service ids for the tasks which this allocation is going + // to run + alloc.PopulateServiceIDs() + alloc.NodeID = option.Node.ID alloc.TaskResources = option.TaskResources alloc.DesiredStatus = structs.AllocDesiredStatusRun diff --git a/scheduler/util_test.go b/scheduler/util_test.go index d1e610c8d..cb5acb3ac 100644 --- a/scheduler/util_test.go +++ b/scheduler/util_test.go @@ -533,7 +533,6 @@ func TestInplaceUpdate_Success(t *testing.T) { state, ctx := testContext(t) eval := mock.Eval() job := mock.Job() - job.InitAllServiceFields() node := mock.Node() noErr(t, state.UpsertNode(1000, node)) From 1668a4804e79061dee3420c5e0cdbb3d1b30e0ed Mon Sep 17 00:00:00 2001 From: Chris Hines Date: Tue, 15 Dec 2015 02:05:25 -0500 Subject: [PATCH 41/82] Sort config files as documented. --- command/agent/config.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/command/agent/config.go b/command/agent/config.go index 8ee166842..52bd0911b 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" "runtime" + "sort" "strings" "time" @@ -705,6 +706,8 @@ func LoadConfigDir(dir string) (*Config, error) { return &Config{}, nil } + sort.Strings(files) + var result *Config for _, f := range files { config, err := LoadConfigFile(f) From 962bdf744d54f114dc2903e8b8114a29d55eb97e Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Tue, 15 Dec 2015 08:35:26 -0800 Subject: [PATCH 42/82] Added a test to prove services are removed from the map in Alloc if they are removed from the Tasks --- scheduler/util_test.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scheduler/util_test.go b/scheduler/util_test.go index cb5acb3ac..2b287b83a 100644 --- a/scheduler/util_test.go +++ b/scheduler/util_test.go @@ -580,6 +580,12 @@ func TestInplaceUpdate_Success(t *testing.T) { if len(ctx.plan.NodeAllocation) != 1 { t.Fatal("inplaceUpdate did not do an inplace update") } + + // Get the alloc we inserted. + a := ctx.plan.NodeAllocation[alloc.NodeID][0] + if len(a.Services) != 0 { + t.Fatalf("Expected number of services: %v, Actual: %v", 0, len(a.Services)) + } } func TestEvictAndPlace_LimitGreaterThanAllocs(t *testing.T) { From fa5beb7fe5e7d2b34b86dec616129cac94572e0a Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Tue, 15 Dec 2015 08:38:18 -0800 Subject: [PATCH 43/82] Populating service ids only if allocations can be placed for system jobs --- nomad/structs/structs.go | 2 +- scheduler/system_sched.go | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index b8f3ddaae..722319901 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1508,7 +1508,7 @@ func (a *Allocation) PopulateServiceIDs() { for _, task := range tg.Tasks { for _, service := range task.Services { // We add a prefix to the Service ID so that we can know that this service - // is managed by Nomad Consul since Consul can also have service which are not + // is managed by Nomad since Consul can also have service which are not // managed by Nomad a.Services[service.Name] = fmt.Sprintf("%s-%s", NomadConsulPrefix, GenerateUUID()) } diff --git a/scheduler/system_sched.go b/scheduler/system_sched.go index 42370b8fd..80a87fe94 100644 --- a/scheduler/system_sched.go +++ b/scheduler/system_sched.go @@ -246,12 +246,12 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error { Metrics: s.ctx.Metrics(), } - // Generate the service ids for the tasks that this allocation is going - // to run - alloc.PopulateServiceIDs() - // Set fields based on if we found an allocation option if option != nil { + // Generate the service ids for the tasks that this allocation is going + // to run + alloc.PopulateServiceIDs() + alloc.NodeID = option.Node.ID alloc.TaskResources = option.TaskResources alloc.DesiredStatus = structs.AllocDesiredStatusRun From 9686f6e26f8f6de7522e1845250a117fcac74de9 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Tue, 15 Dec 2015 09:14:32 -0800 Subject: [PATCH 44/82] Making sure existing ids for services are not re-generated --- nomad/structs/structs.go | 22 ++++++++++++++++++---- scheduler/util_test.go | 25 +++++++++++++++++++++---- 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 722319901..a86fae843 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1503,14 +1503,28 @@ func (a *Allocation) Stub() *AllocListStub { // PopulateServiceIDs generates the service IDs for all the service definitions // in that Allocation func (a *Allocation) PopulateServiceIDs() { + // Make a copy of the old map which contains the service names and their + // generated IDs + oldIDs := make(map[string]string) + for k, v := range a.Services { + oldIDs[k] = v + } + a.Services = make(map[string]string) tg := a.Job.LookupTaskGroup(a.TaskGroup) for _, task := range tg.Tasks { for _, service := range task.Services { - // We add a prefix to the Service ID so that we can know that this service - // is managed by Nomad since Consul can also have service which are not - // managed by Nomad - a.Services[service.Name] = fmt.Sprintf("%s-%s", NomadConsulPrefix, GenerateUUID()) + // If the ID for a service name is already generated then we re-use + // it + if ID, ok := oldIDs[service.Name]; ok { + a.Services[service.Name] = ID + } else { + // If the service hasn't been generated an ID, we generate one. + // We add a prefix to the Service ID so that we can know that this service + // is managed by Nomad since Consul can also have service which are not + // managed by Nomad + a.Services[service.Name] = fmt.Sprintf("%s-%s", NomadConsulPrefix, GenerateUUID()) + } } } } diff --git a/scheduler/util_test.go b/scheduler/util_test.go index 2b287b83a..421ca8f1f 100644 --- a/scheduler/util_test.go +++ b/scheduler/util_test.go @@ -555,7 +555,9 @@ func TestInplaceUpdate_Success(t *testing.T) { alloc.PopulateServiceIDs() noErr(t, state.UpsertAllocs(1001, []*structs.Allocation{alloc})) - if alloc.Services["web-frontend"] == "" { + webFeSrvID := alloc.Services["web-frontend"] + + if webFeSrvID == "" { t.Fatal("Service ID needs to be generated for service") } @@ -564,7 +566,17 @@ func TestInplaceUpdate_Success(t *testing.T) { *tg = *job.TaskGroups[0] resource := &structs.Resources{CPU: 737} tg.Tasks[0].Resources = resource - tg.Tasks[0].Services = []*structs.Service{} + newServices := []*structs.Service{ + { + Name: "dummy-service", + PortLabel: "http", + }, + { + Name: "dummy-service2", + PortLabel: "http", + }, + } + tg.Tasks[0].Services = append(tg.Tasks[0].Services, newServices...) updates := []allocTuple{{Alloc: alloc, TaskGroup: tg}} stack := NewGenericStack(false, ctx) @@ -583,8 +595,13 @@ func TestInplaceUpdate_Success(t *testing.T) { // Get the alloc we inserted. a := ctx.plan.NodeAllocation[alloc.NodeID][0] - if len(a.Services) != 0 { - t.Fatalf("Expected number of services: %v, Actual: %v", 0, len(a.Services)) + if len(a.Services) != 3 { + t.Fatalf("Expected number of services: %v, Actual: %v", 3, len(a.Services)) + } + + // Test that the service id for the old service is still the same + if a.Services["web-frontend"] != webFeSrvID { + t.Fatalf("Expected service ID: %v, Actual: %v", webFeSrvID, a.Services["web-frontend"]) } } From ed911ca0116aab028e879ce72659c7449e63c848 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Tue, 15 Dec 2015 10:43:56 -0800 Subject: [PATCH 45/82] Added a test to make sure services no longer present are being removed --- nomad/mock/mock.go | 6 +++++- scheduler/util_test.go | 14 +++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index d645f58ee..012677f18 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -95,6 +95,10 @@ func Job() *structs.Job { Name: "${TASK}-frontend", PortLabel: "http", }, + { + Name: "${TASK}-admin", + PortLabel: "admin", + }, }, Resources: &structs.Resources{ CPU: 500, @@ -102,7 +106,7 @@ func Job() *structs.Job { Networks: []*structs.NetworkResource{ &structs.NetworkResource{ MBits: 50, - DynamicPorts: []structs.Port{{Label: "http"}}, + DynamicPorts: []structs.Port{{Label: "http"}, {Label: "admin"}}, }, }, }, diff --git a/scheduler/util_test.go b/scheduler/util_test.go index 421ca8f1f..4d6d21db8 100644 --- a/scheduler/util_test.go +++ b/scheduler/util_test.go @@ -556,8 +556,9 @@ func TestInplaceUpdate_Success(t *testing.T) { noErr(t, state.UpsertAllocs(1001, []*structs.Allocation{alloc})) webFeSrvID := alloc.Services["web-frontend"] + adminSrvID := alloc.Services["web-admin"] - if webFeSrvID == "" { + if webFeSrvID == "" || adminSrvID == "" { t.Fatal("Service ID needs to be generated for service") } @@ -576,6 +577,11 @@ func TestInplaceUpdate_Success(t *testing.T) { PortLabel: "http", }, } + + // Delete service 2 + tg.Tasks[0].Services = tg.Tasks[0].Services[:1] + + // Add the new services tg.Tasks[0].Services = append(tg.Tasks[0].Services, newServices...) updates := []allocTuple{{Alloc: alloc, TaskGroup: tg}} @@ -603,6 +609,12 @@ func TestInplaceUpdate_Success(t *testing.T) { if a.Services["web-frontend"] != webFeSrvID { t.Fatalf("Expected service ID: %v, Actual: %v", webFeSrvID, a.Services["web-frontend"]) } + + // Test that the map doesn't contain the service ID of the admin Service + // anymore + if _, ok := a.Services["web-admin"]; ok { + t.Fatal("Service shouldn't be present") + } } func TestEvictAndPlace_LimitGreaterThanAllocs(t *testing.T) { From 06443a6629ac67b56c2fbeebf1b6611ad7e5c781 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Tue, 15 Dec 2015 11:12:13 -0800 Subject: [PATCH 46/82] Using cgo dependencies to look up users --- client/allocdir/alloc_dir_posix.go | 3 +-- client/driver/executor/exec_linux.go | 3 +-- helper/user-lookup/user-lookup.go | 35 ---------------------------- scripts/build.sh | 1 + 4 files changed, 3 insertions(+), 39 deletions(-) delete mode 100644 helper/user-lookup/user-lookup.go diff --git a/client/allocdir/alloc_dir_posix.go b/client/allocdir/alloc_dir_posix.go index e6205b220..d5e4a9296 100644 --- a/client/allocdir/alloc_dir_posix.go +++ b/client/allocdir/alloc_dir_posix.go @@ -5,7 +5,6 @@ package allocdir import ( "fmt" - "github.com/hashicorp/nomad/helper/user-lookup" "os" "os/user" "strconv" @@ -27,7 +26,7 @@ func (d *AllocDir) dropDirPermissions(path string) error { return nil } - u, err := userlookup.Lookup("nobody") + u, err := user.Lookup("nobody") if err != nil { return err } diff --git a/client/driver/executor/exec_linux.go b/client/driver/executor/exec_linux.go index 256cc39f4..cbd33be80 100644 --- a/client/driver/executor/exec_linux.go +++ b/client/driver/executor/exec_linux.go @@ -24,7 +24,6 @@ import ( "github.com/hashicorp/nomad/client/driver/spawn" cstructs "github.com/hashicorp/nomad/client/driver/structs" "github.com/hashicorp/nomad/helper/args" - "github.com/hashicorp/nomad/helper/user-lookup" "github.com/hashicorp/nomad/nomad/structs" ) @@ -124,7 +123,7 @@ func (e *LinuxExecutor) ID() (string, error) { // runAs takes a user id as a string and looks up the user, and sets the command // to execute as that user. func (e *LinuxExecutor) runAs(userid string) error { - u, err := userlookup.Lookup(userid) + u, err := user.Lookup(userid) if err != nil { return fmt.Errorf("Failed to identify user %v: %v", userid, err) } diff --git a/helper/user-lookup/user-lookup.go b/helper/user-lookup/user-lookup.go deleted file mode 100644 index b0826e6c9..000000000 --- a/helper/user-lookup/user-lookup.go +++ /dev/null @@ -1,35 +0,0 @@ -// +build !windows - -package userlookup - -import ( - "fmt" - "io/ioutil" - "os/user" - "strings" -) - -// Lookup checks if the given username or uid is present in /etc/passwd -// and returns the user struct. -// If the username is not found, an error is returned. -// Credit to @creak, https://github.com/docker/docker/pull/1096 -func Lookup(uid string) (*user.User, error) { - file, err := ioutil.ReadFile("/etc/passwd") - if err != nil { - return nil, err - } - - for _, line := range strings.Split(string(file), "\n") { - data := strings.Split(line, ":") - if len(data) > 5 && (data[0] == uid || data[2] == uid) { - return &user.User{ - Uid: data[2], - Gid: data[3], - Username: data[0], - Name: data[4], - HomeDir: data[5], - }, nil - } - } - return nil, fmt.Errorf("User not found in /etc/passwd") -} diff --git a/scripts/build.sh b/scripts/build.sh index 36566413a..4a79e8351 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -44,6 +44,7 @@ gox \ -arch="${XC_ARCH}" \ -osarch="!linux/arm !darwin/386" \ -ldflags "-X main.GitCommit ${GIT_COMMIT}${GIT_DIRTY}" \ + -cgo \ -output "pkg/{{.OS}}_{{.Arch}}/nomad" \ . From dc2c9fc58b29d864d33cd4cfef978b1a9b0b3bd7 Mon Sep 17 00:00:00 2001 From: "jorge.marey" Date: Tue, 15 Dec 2015 22:22:58 +0100 Subject: [PATCH 47/82] Fix dns_search_domains option in the documentation. --- website/source/docs/drivers/docker.html.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/source/docs/drivers/docker.html.md b/website/source/docs/drivers/docker.html.md index c9725d0a7..7beaca36e 100644 --- a/website/source/docs/drivers/docker.html.md +++ b/website/source/docs/drivers/docker.html.md @@ -62,7 +62,7 @@ The following options are available for use in the job specification. * `dns_servers` - (Optional) A list of DNS servers for the container to use (e.g. ["8.8.8.8", "8.8.4.4"]). *Docker API v1.10 and above only* -* `search_domains` - (Optional) A list of DNS search domains for the container +* `dns_search_domains` - (Optional) A list of DNS search domains for the container to use. * `port_map` - (Optional) A key/value map of port labels (see below). From 83a5eaa1264b4b9cae11c24089c03b1e2d27eb8f Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Tue, 15 Dec 2015 17:53:02 -0800 Subject: [PATCH 48/82] Updating the version --- CHANGELOG.md | 9 +++++++++ version.go | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 481d6b4fe..1eb6eb9f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +## 0.2.3-rc1 (December 15, 2015) + +BUG FIXES: + * client: Fixes for user lookup to support CoreOS [GH-591] + * discovery: Fixes for service registration when multiple allocations are bin + packed on a node [GH-583] + * discovery: Using a random prefix for nomad managed services [GH-579] + * configuration: Sort configuration files [GH-588] + ## 0.2.2 (December 11, 2015) IMPROVEMENTS: diff --git a/version.go b/version.go index 3361dd423..29bb9b227 100644 --- a/version.go +++ b/version.go @@ -5,9 +5,9 @@ var GitCommit string var GitDescribe string // The main version number that is being run at the moment. -const Version = "0.2.2" +const Version = "0.2.3" // A pre-release marker for the version. If this is "" (empty string) // then it means that it is a final release. Otherwise, this is a pre-release // such as "dev" (in development), "beta", "rc1", etc. -const VersionPrerelease = "" +const VersionPrerelease = "rc1" From 17bc13bfe5fa9a0ca7eadd94ac7bc692b98d5cc5 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 14 Dec 2015 19:20:57 -0800 Subject: [PATCH 49/82] Add garbage collection to jobs --- api/jobs.go | 7 + jobspec/parse.go | 44 ++++++ jobspec/parse_test.go | 16 +++ jobspec/test-fixtures/gc.hcl | 5 + nomad/config.go | 5 + nomad/core_sched.go | 159 +++++++++++++++++----- nomad/core_sched_test.go | 108 +++++++++++++++ nomad/job_endpoint.go | 5 +- nomad/leader.go | 4 + nomad/state/schema.go | 24 ++++ nomad/state/state_store.go | 12 ++ nomad/state/state_store_test.go | 60 ++++++++ nomad/structs/funcs_test.go | 38 +++++- nomad/structs/structs.go | 73 +++++++++- scheduler/context_test.go | 6 + scheduler/rank_test.go | 14 +- website/source/docs/jobspec/index.html.md | 23 ++++ 17 files changed, 561 insertions(+), 42 deletions(-) create mode 100644 jobspec/test-fixtures/gc.hcl diff --git a/api/jobs.go b/api/jobs.go index 17e75daff..f50826ad3 100644 --- a/api/jobs.go +++ b/api/jobs.go @@ -107,6 +107,12 @@ type UpdateStrategy struct { MaxParallel int } +// JobGCConfig configures the garbage collection policy of a job. +type JobGCConfig struct { + Enabled bool + Threshold time.Duration +} + // Job is used to serialize a job. type Job struct { Region string @@ -119,6 +125,7 @@ type Job struct { Constraints []*Constraint TaskGroups []*TaskGroup Update *UpdateStrategy + GC *JobGCConfig Meta map[string]string Status string StatusDescription string diff --git a/jobspec/parse.go b/jobspec/parse.go index 765f58b3a..723fac6a1 100644 --- a/jobspec/parse.go +++ b/jobspec/parse.go @@ -91,6 +91,7 @@ func parseJob(result *structs.Job, list *ast.ObjectList) error { delete(m, "meta") delete(m, "update") delete(m, "periodic") + delete(m, "gc") // Set the ID and name to the object key result.ID = obj.Keys[0].Token.Value().(string) @@ -135,6 +136,13 @@ func parseJob(result *structs.Job, list *ast.ObjectList) error { } } + // If we have a gc config, then parse that + if o := listVal.Filter("gc"); len(o.Items) > 0 { + if err := parseGC(&result.GC, o); err != nil { + return err + } + } + // Parse out meta fields. These are in HCL as a list so we need // to iterate over them and merge them. if metaO := listVal.Filter("meta"); len(metaO.Items) > 0 { @@ -714,3 +722,39 @@ func parsePeriodic(result **structs.PeriodicConfig, list *ast.ObjectList) error *result = &p return nil } + +func parseGC(result **structs.JobGCConfig, list *ast.ObjectList) error { + list = list.Elem() + if len(list.Items) > 1 { + return fmt.Errorf("only one 'gc' block allowed per job") + } + + // Get our resource object + o := list.Items[0] + + var m map[string]interface{} + if err := hcl.DecodeObject(&m, o.Val); err != nil { + return err + } + + // Enabled by default if the gc block exists. + if value, ok := m["enabled"]; !ok { + m["Enabled"] = true + } else { + enabled, err := parseBool(value) + if err != nil { + return fmt.Errorf("gc.enabled should be set to true or false; %v", err) + } + m["Enabled"] = enabled + } + + dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ + DecodeHook: mapstructure.StringToTimeDurationHookFunc(), + WeaklyTypedInput: true, + Result: result, + }) + if err != nil { + return err + } + return dec.Decode(m) +} diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go index 4497348eb..768fa9988 100644 --- a/jobspec/parse_test.go +++ b/jobspec/parse_test.go @@ -246,6 +246,22 @@ func TestParse(t *testing.T) { false, }, + { + "gc.hcl", + &structs.Job{ + ID: "foo", + Name: "foo", + Priority: 50, + Region: "global", + Type: "service", + GC: &structs.JobGCConfig{ + Enabled: true, + Threshold: 2 * time.Hour, + }, + }, + false, + }, + { "specify-job.hcl", &structs.Job{ diff --git a/jobspec/test-fixtures/gc.hcl b/jobspec/test-fixtures/gc.hcl new file mode 100644 index 000000000..cef4bcabd --- /dev/null +++ b/jobspec/test-fixtures/gc.hcl @@ -0,0 +1,5 @@ +job "foo" { + gc { + threshold = "2h" + } +} diff --git a/nomad/config.go b/nomad/config.go index 91986644f..850aa58c2 100644 --- a/nomad/config.go +++ b/nomad/config.go @@ -131,6 +131,10 @@ type Config struct { // for GC. This gives users some time to debug a failed evaluation. EvalGCThreshold time.Duration + // JobGCInterval is how often we dispatch a job to GC jobs that are + // available for garbage collection. + JobGCInterval time.Duration + // NodeGCInterval is how often we dispatch a job to GC failed nodes. NodeGCInterval time.Duration @@ -202,6 +206,7 @@ func DefaultConfig() *Config { ReconcileInterval: 60 * time.Second, EvalGCInterval: 5 * time.Minute, EvalGCThreshold: 1 * time.Hour, + JobGCInterval: 5 * time.Minute, NodeGCInterval: 5 * time.Minute, NodeGCThreshold: 24 * time.Hour, EvalNackTimeout: 60 * time.Second, diff --git a/nomad/core_sched.go b/nomad/core_sched.go index b5ed092f9..27ca5cfcf 100644 --- a/nomad/core_sched.go +++ b/nomad/core_sched.go @@ -33,11 +33,88 @@ func (s *CoreScheduler) Process(eval *structs.Evaluation) error { return s.evalGC(eval) case structs.CoreJobNodeGC: return s.nodeGC(eval) + case structs.CoreJobJobGC: + return s.jobGC(eval) default: return fmt.Errorf("core scheduler cannot handle job '%s'", eval.JobID) } } +// jobGC is used to garbage collect eligible jobs. +func (c *CoreScheduler) jobGC(eval *structs.Evaluation) error { + // Get all the jobs eligible for garbage collection. + jIter, err := c.snap.JobsByGC(true) + if err != nil { + return err + } + + // Get the time table to calculate GC cutoffs. + tt := c.srv.fsm.TimeTable() + + // Collect the allocations and evaluations to GC + var gcAlloc, gcEval, gcJob []string + +OUTER: + for i := jIter.Next(); i != nil; i = jIter.Next() { + job := i.(*structs.Job) + cutoff := time.Now().UTC().Add(-1 * job.GC.Threshold) + oldThreshold := tt.NearestIndex(cutoff) + + // Ignore new jobs. + if job.CreateIndex > oldThreshold { + continue OUTER + } + + evals, err := c.snap.EvalsByJob(job.ID) + if err != nil { + c.srv.logger.Printf("[ERR] sched.core: failed to get evals for job %s: %v", job.ID, err) + continue + } + + for _, eval := range evals { + gc, allocs, err := c.gcEval(eval, oldThreshold) + if err != nil || !gc { + continue OUTER + } + + gcEval = append(gcEval, eval.ID) + gcAlloc = append(gcAlloc, allocs...) + } + + // Job is eligible for garbage collection + gcJob = append(gcJob, job.ID) + } + + // Fast-path the nothing case + if len(gcEval) == 0 && len(gcAlloc) == 0 && len(gcJob) == 0 { + return nil + } + c.srv.logger.Printf("[DEBUG] sched.core: job GC: %d jobs, %d evaluations, %d allocs eligible", + len(gcJob), len(gcEval), len(gcAlloc)) + + // Reap the evals and allocs + if err := c.evalReap(gcEval, gcAlloc); err != nil { + return err + } + + // Call to the leader to deregister the jobs. + for _, job := range gcJob { + req := structs.JobDeregisterRequest{ + JobID: job, + WriteRequest: structs.WriteRequest{ + Region: c.srv.config.Region, + }, + } + var resp structs.JobDeregisterResponse + if err := c.srv.RPC("Job.Deregister", &req, &resp); err != nil { + c.srv.logger.Printf("[ERR] sched.core: job deregister failed: %v", err) + return err + } + } + + return nil +} + // evalGC is used to garbage collect old evaluations func (c *CoreScheduler) evalGC(eval *structs.Evaluation) error { // Iterate over the evaluations @@ -57,39 +134,16 @@ func (c *CoreScheduler) evalGC(eval *structs.Evaluation) error { // Collect the allocations and evaluations to GC var gcAlloc, gcEval []string - -OUTER: - for { - raw := iter.Next() - if raw == nil { - break - } + for raw := iter.Next(); raw != nil; raw = iter.Next() { eval := raw.(*structs.Evaluation) - - // Ignore non-terminal and new evaluations - if !eval.TerminalStatus() || eval.ModifyIndex > oldThreshold { - continue - } - - // Get the allocations by eval - allocs, err := c.snap.AllocsByEval(eval.ID) + gc, allocs, err := c.gcEval(eval, oldThreshold) if err != nil { - c.srv.logger.Printf("[ERR] sched.core: failed to get allocs for eval %s: %v", - eval.ID, err) - continue + return err } - // Scan the allocations to ensure they are terminal and old - for _, alloc := range allocs { - if !alloc.TerminalStatus() || alloc.ModifyIndex > oldThreshold { - continue OUTER - } - } - - // Evaluation is eligible for garbage collection - gcEval = append(gcEval, eval.ID) - for _, alloc := range allocs { - gcAlloc = append(gcAlloc, alloc.ID) + if gc { + gcEval = append(gcEval, eval.ID) + gcAlloc = append(gcAlloc, allocs...) } } @@ -100,10 +154,52 @@ OUTER: c.srv.logger.Printf("[DEBUG] sched.core: eval GC: %d evaluations, %d allocs eligible", len(gcEval), len(gcAlloc)) + return c.evalReap(gcEval, gcAlloc) +} + +// gcEval returns whether the eval should be garbage collected given a raft +// threshold index. The eval disqualifies for garbage collection if it or its +// allocs are not older than the threshold. If the eval should be garbage +// collected, the associated alloc ids that should also be removed are also +// returned +func (c *CoreScheduler) gcEval(eval *structs.Evaluation, thresholdIndex uint64) ( + bool, []string, error) { + // Ignore non-terminal and new evaluations + if !eval.TerminalStatus() || eval.ModifyIndex > thresholdIndex { + return false, nil, nil + } + + // Get the allocations by eval + allocs, err := c.snap.AllocsByEval(eval.ID) + if err != nil { + c.srv.logger.Printf("[ERR] sched.core: failed to get allocs for eval %s: %v", + eval.ID, err) + return false, nil, err + } + + // Scan the allocations to ensure they are terminal and old + for _, alloc := range allocs { + if !alloc.TerminalStatus() || alloc.ModifyIndex > thresholdIndex { + return false, nil, nil + } + } + + allocIds := make([]string, len(allocs)) + for i, alloc := range allocs { + allocIds[i] = alloc.ID + } + + // Evaluation is eligible for garbage collection + return true, allocIds, nil +} + +// evalReap contacts the leader and issues a reap on the passed evals and +// allocs. +func (c *CoreScheduler) evalReap(evals, allocs []string) error { // Call to the leader to issue the reap req := structs.EvalDeleteRequest{ - Evals: gcEval, - Allocs: gcAlloc, + Evals: evals, + Allocs: allocs, WriteRequest: structs.WriteRequest{ Region: c.srv.config.Region, }, @@ -113,6 +209,7 @@ OUTER: c.srv.logger.Printf("[ERR] sched.core: eval reap failed: %v", err) return err } + return nil } diff --git a/nomad/core_sched_test.go b/nomad/core_sched_test.go index b8dfae961..2d4057d50 100644 --- a/nomad/core_sched_test.go +++ b/nomad/core_sched_test.go @@ -111,3 +111,111 @@ func TestCoreScheduler_NodeGC(t *testing.T) { t.Fatalf("bad: %v", out) } } + +func TestCoreScheduler_JobGC(t *testing.T) { + tests := []struct { + test, evalStatus, allocStatus string + shouldExist bool + }{ + { + test: "Terminal", + evalStatus: structs.EvalStatusFailed, + allocStatus: structs.AllocDesiredStatusFailed, + shouldExist: false, + }, + { + test: "Has Alloc", + evalStatus: structs.EvalStatusFailed, + allocStatus: structs.AllocDesiredStatusRun, + shouldExist: true, + }, + { + test: "Has Eval", + evalStatus: structs.EvalStatusPending, + allocStatus: structs.AllocDesiredStatusFailed, + shouldExist: true, + }, + } + + for _, test := range tests { + s1 := testServer(t, nil) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + // Insert job. + state := s1.fsm.State() + job := mock.Job() + threshold := 1 * time.Hour + job.GC = &structs.JobGCConfig{ + Enabled: true, + Threshold: threshold, + } + err := state.UpsertJob(1000, job) + if err != nil { + t.Fatalf("test(%s) err: %v", test.test, err) + } + + // Insert eval + eval := mock.Eval() + eval.JobID = job.ID + eval.Status = test.evalStatus + err = state.UpsertEvals(1001, []*structs.Evaluation{eval}) + if err != nil { + t.Fatalf("test(%s) err: %v", test.test, err) + } + + // Insert alloc + alloc := mock.Alloc() + alloc.JobID = job.ID + alloc.EvalID = eval.ID + alloc.DesiredStatus = test.allocStatus + err = state.UpsertAllocs(1002, []*structs.Allocation{alloc}) + if err != nil { + t.Fatalf("test(%s) err: %v", test.test, err) + } + + // Update the time tables to make this work + tt := s1.fsm.TimeTable() + tt.Witness(2000, time.Now().UTC().Add(-1*threshold)) + + // Create a core scheduler + snap, err := state.Snapshot() + if err != nil { + t.Fatalf("test(%s) err: %v", test.test, err) + } + core := NewCoreScheduler(s1, snap) + + // Attempt the GC + gc := s1.coreJobEval(structs.CoreJobJobGC) + gc.ModifyIndex = 2000 + err = core.Process(gc) + if err != nil { + t.Fatalf("test(%s) err: %v", test.test, err) + } + + // Should still exist + out, err := state.JobByID(job.ID) + if err != nil { + t.Fatalf("test(%s) err: %v", err) + } + if (test.shouldExist && out == nil) || (!test.shouldExist && out != nil) { + t.Fatalf("test(%s) bad: %v", test.test, out) + } + + outE, err := state.EvalByID(eval.ID) + if err != nil { + t.Fatalf("test(%s) err: %v", err) + } + if (test.shouldExist && outE == nil) || (!test.shouldExist && outE != nil) { + t.Fatalf("test(%s) bad: %v", test.test, out) + } + + outA, err := state.AllocByID(alloc.ID) + if err != nil { + t.Fatalf("test(%s) err: %v", err) + } + if (test.shouldExist && outA == nil) || (!test.shouldExist && outA != nil) { + t.Fatalf("test(%s) bad: %v", test.test, outA) + } + } +} diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go index 4d8cc128d..f693af9c4 100644 --- a/nomad/job_endpoint.go +++ b/nomad/job_endpoint.go @@ -29,8 +29,9 @@ func (j *Job) Register(args *structs.JobRegisterRequest, reply *structs.JobRegis return err } - // Initialize all the fields of services - args.Job.InitAllServiceFields() + // Initialize the job fields (sets defaults and any other necessary init + // work). + args.Job.InitFields() if args.Job.Type == structs.JobTypeCore { return fmt.Errorf("job type cannot be core") diff --git a/nomad/leader.go b/nomad/leader.go index 8e0d6be7d..0e267a7dc 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -173,6 +173,8 @@ func (s *Server) schedulePeriodic(stopCh chan struct{}) { defer evalGC.Stop() nodeGC := time.NewTicker(s.config.NodeGCInterval) defer nodeGC.Stop() + jobGC := time.NewTicker(s.config.JobGCInterval) + defer jobGC.Stop() for { select { @@ -180,6 +182,8 @@ func (s *Server) schedulePeriodic(stopCh chan struct{}) { s.evalBroker.Enqueue(s.coreJobEval(structs.CoreJobEvalGC)) case <-nodeGC.C: s.evalBroker.Enqueue(s.coreJobEval(structs.CoreJobNodeGC)) + case <-jobGC.C: + s.evalBroker.Enqueue(s.coreJobEval(structs.CoreJobJobGC)) case <-stopCh: return } diff --git a/nomad/state/schema.go b/nomad/state/schema.go index dfd663aba..f795d1ff0 100644 --- a/nomad/state/schema.go +++ b/nomad/state/schema.go @@ -4,6 +4,7 @@ import ( "fmt" "github.com/hashicorp/go-memdb" + "github.com/hashicorp/nomad/nomad/structs" ) // stateStoreSchema is used to return the schema for the state store @@ -100,10 +101,33 @@ func jobTableSchema() *memdb.TableSchema { Lowercase: false, }, }, + "gc": &memdb.IndexSchema{ + Name: "gc", + AllowMissing: false, + Unique: false, + Indexer: &memdb.ConditionalIndex{ + Conditional: jobIsGCable, + }, + }, }, } } +// jobIsGCable satisfies the ConditionalIndexFunc interface and creates an index +// on whether a job is eligible for garbage collection. +func jobIsGCable(obj interface{}) (bool, error) { + j, ok := obj.(*structs.Job) + if !ok { + return false, fmt.Errorf("Unexpected type: %v", obj) + } + + if j.GC == nil || !j.GC.Enabled { + return false, nil + } + + return true, nil +} + // evalTableSchema returns the MemDB schema for the eval table. // This table is used to store all the evaluations that are pending // or recently completed. diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 30ee87259..c6236183a 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -372,6 +372,18 @@ func (s *StateStore) JobsByScheduler(schedulerType string) (memdb.ResultIterator return iter, nil } +// JobsByGC returns an iterator over all jobs eligible or uneligible for garbage +// collection. +func (s *StateStore) JobsByGC(gc bool) (memdb.ResultIterator, error) { + txn := s.db.Txn(false) + + iter, err := txn.Get("jobs", "gc", gc) + if err != nil { + return nil, err + } + return iter, nil +} + // UpsertEvaluation is used to upsert an evaluation func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) error { txn := s.db.Txn(true) diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 5e1021e55..aa497cd4a 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -471,6 +471,66 @@ func TestStateStore_JobsByScheduler(t *testing.T) { } } +func TestStateStore_JobsByGC(t *testing.T) { + state := testStateStore(t) + var gc, nonGc []*structs.Job + + for i := 0; i < 10; i++ { + job := mock.Job() + nonGc = append(nonGc, job) + + if err := state.UpsertJob(1000+uint64(i), job); err != nil { + t.Fatalf("err: %v", err) + } + } + + for i := 0; i < 10; i++ { + job := mock.Job() + job.GC = &structs.JobGCConfig{ + Enabled: true, + Threshold: structs.DefaultJobGCThreshold, + } + gc = append(gc, job) + + if err := state.UpsertJob(2000+uint64(i), job); err != nil { + t.Fatalf("err: %v", err) + } + } + + iter, err := state.JobsByGC(true) + if err != nil { + t.Fatalf("err: %v", err) + } + + var outGc []*structs.Job + for i := iter.Next(); i != nil; i = iter.Next() { + outGc = append(outGc, i.(*structs.Job)) + } + + iter, err = state.JobsByGC(false) + if err != nil { + t.Fatalf("err: %v", err) + } + + var outNonGc []*structs.Job + for i := iter.Next(); i != nil; i = iter.Next() { + outNonGc = append(outNonGc, i.(*structs.Job)) + } + + sort.Sort(JobIDSort(gc)) + sort.Sort(JobIDSort(nonGc)) + sort.Sort(JobIDSort(outGc)) + sort.Sort(JobIDSort(outNonGc)) + + if !reflect.DeepEqual(gc, outGc) { + t.Fatalf("bad: %#v %#v", gc, outGc) + } + + if !reflect.DeepEqual(nonGc, outNonGc) { + t.Fatalf("bad: %#v %#v", nonGc, outNonGc) + } +} + func TestStateStore_RestoreJob(t *testing.T) { state := testStateStore(t) job := mock.Job() diff --git a/nomad/structs/funcs_test.go b/nomad/structs/funcs_test.go index d156394dc..a3bf23885 100644 --- a/nomad/structs/funcs_test.go +++ b/nomad/structs/funcs_test.go @@ -22,19 +22,47 @@ func TestRemoveAllocs(t *testing.T) { } } -func TestFilterTerminalALlocs(t *testing.T) { +func TestFilterTerminalAllocs(t *testing.T) { l := []*Allocation{ - &Allocation{ID: "foo", DesiredStatus: AllocDesiredStatusRun}, &Allocation{ID: "bar", DesiredStatus: AllocDesiredStatusEvict}, &Allocation{ID: "baz", DesiredStatus: AllocDesiredStatusStop}, - &Allocation{ID: "zip", DesiredStatus: AllocDesiredStatusRun}, + &Allocation{ + ID: "zip", + DesiredStatus: AllocDesiredStatusRun, + TaskStates: map[string]*TaskState{ + "a": &TaskState{State: TaskStatePending}, + }, + }, + &Allocation{ + ID: "foo", + DesiredStatus: AllocDesiredStatusRun, + TaskStates: map[string]*TaskState{ + "a": &TaskState{State: TaskStatePending}, + }, + }, + &Allocation{ + ID: "bam", + DesiredStatus: AllocDesiredStatusRun, + TaskStates: map[string]*TaskState{ + "a": &TaskState{State: TaskStatePending}, + "b": &TaskState{State: TaskStateDead}, + }, + }, + &Allocation{ + ID: "fizz", + DesiredStatus: AllocDesiredStatusRun, + TaskStates: map[string]*TaskState{ + "a": &TaskState{State: TaskStateDead}, + "b": &TaskState{State: TaskStateDead}, + }, + }, } out := FilterTerminalAllocs(l) - if len(out) != 2 { + if len(out) != 3 { t.Fatalf("bad: %#v", out) } - if out[0].ID != "foo" && out[1].ID != "zip" { + if out[0].ID != "zip" && out[1].ID != "foo" && out[2].ID != "bam" { t.Fatalf("bad: %#v", out) } } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index a86fae843..6d384623e 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -764,6 +764,10 @@ type Job struct { // Periodic is used to define the interval the job is run at. Periodic *PeriodicConfig + // GC is used to mark the job as available for garbage collection after it + // has no outstanding evaluations or allocations. + GC *JobGCConfig + // Meta is used to associate arbitrary metadata with this // job. This is opaque to Nomad. Meta map[string]string @@ -779,6 +783,18 @@ type Job struct { ModifyIndex uint64 } +// InitFields is used to initialize fields in the Job. This should be called +// when registering a Job. +func (j *Job) InitFields() { + // Initialize the service block. + j.InitAllServiceFields() + + // Initalize the GC policy + if j.GC != nil { + j.GC.Init() + } +} + // InitAllServiceFields traverses all Task Groups and makes them // interpolate Job, Task group and Task names in all Service names. // It also generates the check names if they are not set. This method also @@ -854,6 +870,13 @@ func (j *Job) Validate() error { fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch)) } + // Validate the GC config. + if j.GC != nil { + if err := j.GC.Validate(); err != nil { + mErr.Errors = append(mErr.Errors, err) + } + } + return mErr.ErrorOrNil() } @@ -899,6 +922,37 @@ type JobListStub struct { ModifyIndex uint64 } +const ( + // DefaultJobGCThreshold is the default threshold for garbage collecting + // eligible jobs. + DefaultJobGCThreshold = 4 * time.Hour +) + +// JobGCConfig configures the garbage collection policy of a job. +type JobGCConfig struct { + // Enabled determines whether the job is eligible for garbage collection. + Enabled bool + + // Threshold is how old a job must be before it eligible for GC. This gives + // the user time to inspect the job. + Threshold time.Duration +} + +// Init sets the Threshold time to its default value if it is un-specified but +// garbage collection is enabled. +func (gc *JobGCConfig) Init() { + if gc.Enabled && gc.Threshold == 0 { + gc.Threshold = DefaultJobGCThreshold + } +} + +func (gc *JobGCConfig) Validate() error { + if gc.Threshold < 0 { + return fmt.Errorf("job GC threshold must be positive: %v", gc.Threshold) + } + return nil +} + // UpdateStrategy is used to modify how updates are done type UpdateStrategy struct { // Stagger is the amount of time between the updates @@ -1470,14 +1524,21 @@ type Allocation struct { ModifyIndex uint64 } -// TerminalStatus returns if the desired status is terminal and -// will no longer transition. This is not based on the current client status. +// TerminalStatus returns if the desired or actual status is terminal and +// will no longer transition. func (a *Allocation) TerminalStatus() bool { switch a.DesiredStatus { case AllocDesiredStatusStop, AllocDesiredStatusEvict, AllocDesiredStatusFailed: return true default: - return false + // If all tasks are dead, the alloc is terminal. + for _, state := range a.TaskStates { + if state.State != TaskStateDead { + return false + } + } + + return true } } @@ -1656,6 +1717,12 @@ const ( // We periodically scan nodes in a terminal state, and if they have no // corresponding allocations we delete these out of the system. CoreJobNodeGC = "node-gc" + + // CoreJobJobGC is used for the garbage collection of eligible jobs. We + // periodically scan garbage collectible jobs and check if both their + // evaluations and allocations are terminal. If so, we delete these out of + // the system. + CoreJobJobGC = "job-gc" ) // Evaluation is used anytime we need to apply business logic as a result diff --git a/scheduler/context_test.go b/scheduler/context_test.go index 914b54b06..1f5730286 100644 --- a/scheduler/context_test.go +++ b/scheduler/context_test.go @@ -61,6 +61,9 @@ func TestEvalContext_ProposedAlloc(t *testing.T) { MemoryMB: 2048, }, DesiredStatus: structs.AllocDesiredStatusRun, + TaskStates: map[string]*structs.TaskState{ + "foo": &structs.TaskState{State: structs.TaskStatePending}, + }, } alloc2 := &structs.Allocation{ ID: structs.GenerateUUID(), @@ -72,6 +75,9 @@ func TestEvalContext_ProposedAlloc(t *testing.T) { MemoryMB: 1024, }, DesiredStatus: structs.AllocDesiredStatusRun, + TaskStates: map[string]*structs.TaskState{ + "foo": &structs.TaskState{State: structs.TaskStatePending}, + }, } noErr(t, state.UpsertAllocs(1000, []*structs.Allocation{alloc1, alloc2})) diff --git a/scheduler/rank_test.go b/scheduler/rank_test.go index 605902ed4..68716ea76 100644 --- a/scheduler/rank_test.go +++ b/scheduler/rank_test.go @@ -203,6 +203,9 @@ func TestBinPackIterator_ExistingAlloc(t *testing.T) { MemoryMB: 2048, }, DesiredStatus: structs.AllocDesiredStatusRun, + TaskStates: map[string]*structs.TaskState{ + "foo": &structs.TaskState{State: structs.TaskStatePending}, + }, } alloc2 := &structs.Allocation{ ID: structs.GenerateUUID(), @@ -214,6 +217,9 @@ func TestBinPackIterator_ExistingAlloc(t *testing.T) { MemoryMB: 1024, }, DesiredStatus: structs.AllocDesiredStatusRun, + TaskStates: map[string]*structs.TaskState{ + "foo": &structs.TaskState{State: structs.TaskStatePending}, + }, } noErr(t, state.UpsertAllocs(1000, []*structs.Allocation{alloc1, alloc2})) @@ -277,6 +283,9 @@ func TestBinPackIterator_ExistingAlloc_PlannedEvict(t *testing.T) { MemoryMB: 2048, }, DesiredStatus: structs.AllocDesiredStatusRun, + TaskStates: map[string]*structs.TaskState{ + "foo": &structs.TaskState{State: structs.TaskStatePending}, + }, } alloc2 := &structs.Allocation{ ID: structs.GenerateUUID(), @@ -288,6 +297,9 @@ func TestBinPackIterator_ExistingAlloc_PlannedEvict(t *testing.T) { MemoryMB: 1024, }, DesiredStatus: structs.AllocDesiredStatusRun, + TaskStates: map[string]*structs.TaskState{ + "foo": &structs.TaskState{State: structs.TaskStatePending}, + }, } noErr(t, state.UpsertAllocs(1000, []*structs.Allocation{alloc1, alloc2})) @@ -317,7 +329,7 @@ func TestBinPackIterator_ExistingAlloc_PlannedEvict(t *testing.T) { t.Fatalf("Bad: %v", out[0]) } if out[1].Score != 18 { - t.Fatalf("Bad: %v", out[0]) + t.Fatalf("Bad: %v", out[1]) } } diff --git a/website/source/docs/jobspec/index.html.md b/website/source/docs/jobspec/index.html.md index 99aefe2b4..8b8d0ef1c 100644 --- a/website/source/docs/jobspec/index.html.md +++ b/website/source/docs/jobspec/index.html.md @@ -156,6 +156,29 @@ The `job` object supports the following keys: and "h" suffix can be used, such as "30s". Both values default to zero, which disables rolling updates. +* `gc` - Specifies the job's garbage collection configuration. This allows jobs + to be garbage collected when all their evaluations and allocations are + terminal. The `gc` block has the following format: + + ``` + "gc" { + // Enabled is set to true by default if the "gc" block is included. + enabled = true + + // Threshold is a duration that configures how old a job must be + // before it is garbage collected. + threshold = "4h" + } + ``` + + * `enabled`: Toggles the eligibility of a job for garbage collection. + + * `threshold`: `threshold` is a string that should be parseable as a + [time.Duration](https://golang.org/pkg/time/#ParseDuration). A job will + only be garbage collected after the job, its evaluations and allocations + are all older than the threshold. + + ### Task Group The `group` object supports the following keys: From 1375d255e9309c25e114911cce5d08f27298ef36 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 15 Dec 2015 13:07:25 -0800 Subject: [PATCH 50/82] Remove user-specifiable gc threshold --- api/jobs.go | 8 +--- jobspec/parse.go | 20 ++++------ jobspec/parse_test.go | 5 +-- jobspec/test-fixtures/gc.hcl | 2 +- nomad/config.go | 5 +++ nomad/core_sched.go | 6 ++- nomad/core_sched_test.go | 6 +-- nomad/state/schema.go | 6 +-- nomad/state/state_store_test.go | 5 +-- nomad/structs/structs.go | 45 +---------------------- website/source/docs/jobspec/index.html.md | 14 ++----- 11 files changed, 27 insertions(+), 95 deletions(-) diff --git a/api/jobs.go b/api/jobs.go index f50826ad3..912bcb972 100644 --- a/api/jobs.go +++ b/api/jobs.go @@ -107,12 +107,6 @@ type UpdateStrategy struct { MaxParallel int } -// JobGCConfig configures the garbage collection policy of a job. -type JobGCConfig struct { - Enabled bool - Threshold time.Duration -} - // Job is used to serialize a job. type Job struct { Region string @@ -125,7 +119,7 @@ type Job struct { Constraints []*Constraint TaskGroups []*TaskGroup Update *UpdateStrategy - GC *JobGCConfig + GC bool Meta map[string]string Status string StatusDescription string diff --git a/jobspec/parse.go b/jobspec/parse.go index 723fac6a1..2573341bd 100644 --- a/jobspec/parse.go +++ b/jobspec/parse.go @@ -723,7 +723,7 @@ func parsePeriodic(result **structs.PeriodicConfig, list *ast.ObjectList) error return nil } -func parseGC(result **structs.JobGCConfig, list *ast.ObjectList) error { +func parseGC(result *bool, list *ast.ObjectList) error { list = list.Elem() if len(list.Items) > 1 { return fmt.Errorf("only one 'gc' block allowed per job") @@ -738,23 +738,17 @@ func parseGC(result **structs.JobGCConfig, list *ast.ObjectList) error { } // Enabled by default if the gc block exists. + enabled := false if value, ok := m["enabled"]; !ok { - m["Enabled"] = true + enabled = true } else { - enabled, err := parseBool(value) + var err error + enabled, err = parseBool(value) if err != nil { return fmt.Errorf("gc.enabled should be set to true or false; %v", err) } - m["Enabled"] = enabled } - dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ - DecodeHook: mapstructure.StringToTimeDurationHookFunc(), - WeaklyTypedInput: true, - Result: result, - }) - if err != nil { - return err - } - return dec.Decode(m) + *result = enabled + return nil } diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go index 768fa9988..7a89595f8 100644 --- a/jobspec/parse_test.go +++ b/jobspec/parse_test.go @@ -254,10 +254,7 @@ func TestParse(t *testing.T) { Priority: 50, Region: "global", Type: "service", - GC: &structs.JobGCConfig{ - Enabled: true, - Threshold: 2 * time.Hour, - }, + GC: true, }, false, }, diff --git a/jobspec/test-fixtures/gc.hcl b/jobspec/test-fixtures/gc.hcl index cef4bcabd..5af5de1e1 100644 --- a/jobspec/test-fixtures/gc.hcl +++ b/jobspec/test-fixtures/gc.hcl @@ -1,5 +1,5 @@ job "foo" { gc { - threshold = "2h" + enabled = true } } diff --git a/nomad/config.go b/nomad/config.go index 850aa58c2..177392198 100644 --- a/nomad/config.go +++ b/nomad/config.go @@ -135,6 +135,10 @@ type Config struct { // available for garbage collection. JobGCInterval time.Duration + // JobGCThreshold is how old a job must be before it eligible for GC. This gives + // the user time to inspect the job. + JobGCThreshold time.Duration + // NodeGCInterval is how often we dispatch a job to GC failed nodes. NodeGCInterval time.Duration @@ -207,6 +211,7 @@ func DefaultConfig() *Config { EvalGCInterval: 5 * time.Minute, EvalGCThreshold: 1 * time.Hour, JobGCInterval: 5 * time.Minute, + JobGCThreshold: 4 * time.Hour, NodeGCInterval: 5 * time.Minute, NodeGCThreshold: 24 * time.Hour, EvalNackTimeout: 60 * time.Second, diff --git a/nomad/core_sched.go b/nomad/core_sched.go index 27ca5cfcf..ed096a53f 100644 --- a/nomad/core_sched.go +++ b/nomad/core_sched.go @@ -50,6 +50,10 @@ func (c *CoreScheduler) jobGC(eval *structs.Evaluation) error { // Get the time table to calculate GC cutoffs. tt := c.srv.fsm.TimeTable() + cutoff := time.Now().UTC().Add(-1 * c.srv.config.JobGCThreshold) + oldThreshold := tt.NearestIndex(cutoff) + c.srv.logger.Printf("[DEBUG] sched.core: job GC: scanning before index %d (%v)", + oldThreshold, c.srv.config.JobGCThreshold) // Collect the allocations and evaluations to GC var gcAlloc, gcEval, gcJob []string @@ -57,8 +61,6 @@ func (c *CoreScheduler) jobGC(eval *structs.Evaluation) error { OUTER: for i := jIter.Next(); i != nil; i = jIter.Next() { job := i.(*structs.Job) - cutoff := time.Now().UTC().Add(-1 * job.GC.Threshold) - oldThreshold := tt.NearestIndex(cutoff) // Ignore new jobs. if job.CreateIndex > oldThreshold { diff --git a/nomad/core_sched_test.go b/nomad/core_sched_test.go index 2d4057d50..28c0580f1 100644 --- a/nomad/core_sched_test.go +++ b/nomad/core_sched_test.go @@ -145,11 +145,7 @@ func TestCoreScheduler_JobGC(t *testing.T) { // Insert job. state := s1.fsm.State() job := mock.Job() - threshold := 1 * time.Hour - job.GC = &structs.JobGCConfig{ - Enabled: true, - Threshold: threshold, - } + job.GC = true err := state.UpsertJob(1000, job) if err != nil { t.Fatalf("test(%s) err: %v", test.test, err) diff --git a/nomad/state/schema.go b/nomad/state/schema.go index f795d1ff0..961cb67a7 100644 --- a/nomad/state/schema.go +++ b/nomad/state/schema.go @@ -121,11 +121,7 @@ func jobIsGCable(obj interface{}) (bool, error) { return false, fmt.Errorf("Unexpected type: %v", obj) } - if j.GC == nil || !j.GC.Enabled { - return false, nil - } - - return true, nil + return j.GC, nil } // evalTableSchema returns the MemDB schema for the eval table. diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index aa497cd4a..0609f3048 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -486,10 +486,7 @@ func TestStateStore_JobsByGC(t *testing.T) { for i := 0; i < 10; i++ { job := mock.Job() - job.GC = &structs.JobGCConfig{ - Enabled: true, - Threshold: structs.DefaultJobGCThreshold, - } + job.GC = true gc = append(gc, job) if err := state.UpsertJob(2000+uint64(i), job); err != nil { diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 6d384623e..1f45f8842 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -766,7 +766,7 @@ type Job struct { // GC is used to mark the job as available for garbage collection after it // has no outstanding evaluations or allocations. - GC *JobGCConfig + GC bool // Meta is used to associate arbitrary metadata with this // job. This is opaque to Nomad. @@ -788,11 +788,6 @@ type Job struct { func (j *Job) InitFields() { // Initialize the service block. j.InitAllServiceFields() - - // Initalize the GC policy - if j.GC != nil { - j.GC.Init() - } } // InitAllServiceFields traverses all Task Groups and makes them @@ -870,13 +865,6 @@ func (j *Job) Validate() error { fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch)) } - // Validate the GC config. - if j.GC != nil { - if err := j.GC.Validate(); err != nil { - mErr.Errors = append(mErr.Errors, err) - } - } - return mErr.ErrorOrNil() } @@ -922,37 +910,6 @@ type JobListStub struct { ModifyIndex uint64 } -const ( - // DefaultJobGCThreshold is the default threshold for garbage collecting - // eligible jobs. - DefaultJobGCThreshold = 4 * time.Hour -) - -// JobGCConfig configures the garbage collection policy of a job. -type JobGCConfig struct { - // Enabled determines whether the job is eligible for garbage collection. - Enabled bool - - // Threshold is how old a job must be before it eligible for GC. This gives - // the user time to inspect the job. - Threshold time.Duration -} - -// Init sets the Threshold time to its default value if it is un-specified but -// garbage collection is enabled. -func (gc *JobGCConfig) Init() { - if gc.Enabled && gc.Threshold == 0 { - gc.Threshold = DefaultJobGCThreshold - } -} - -func (gc *JobGCConfig) Validate() error { - if gc.Threshold < 0 { - return fmt.Errorf("job GC threshold must be positive: %v", gc.Threshold) - } - return nil -} - // UpdateStrategy is used to modify how updates are done type UpdateStrategy struct { // Stagger is the amount of time between the updates diff --git a/website/source/docs/jobspec/index.html.md b/website/source/docs/jobspec/index.html.md index 8b8d0ef1c..d3f4ee650 100644 --- a/website/source/docs/jobspec/index.html.md +++ b/website/source/docs/jobspec/index.html.md @@ -156,7 +156,7 @@ The `job` object supports the following keys: and "h" suffix can be used, such as "30s". Both values default to zero, which disables rolling updates. -* `gc` - Specifies the job's garbage collection configuration. This allows jobs +* `gc` - Toggles the job's eligibility for garbage collection. This allows jobs to be garbage collected when all their evaluations and allocations are terminal. The `gc` block has the following format: @@ -164,20 +164,14 @@ The `job` object supports the following keys: "gc" { // Enabled is set to true by default if the "gc" block is included. enabled = true - - // Threshold is a duration that configures how old a job must be - // before it is garbage collected. - threshold = "4h" } + + // Equivalent configuration. + "gc" {} ``` * `enabled`: Toggles the eligibility of a job for garbage collection. - * `threshold`: `threshold` is a string that should be parseable as a - [time.Duration](https://golang.org/pkg/time/#ParseDuration). A job will - only be garbage collected after the job, its evaluations and allocations - are all older than the threshold. - ### Task Group From 4cc880249a057ff238e3726af8268755016131eb Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 15 Dec 2015 13:32:31 -0800 Subject: [PATCH 51/82] Fix test --- nomad/core_sched_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nomad/core_sched_test.go b/nomad/core_sched_test.go index 28c0580f1..54c876051 100644 --- a/nomad/core_sched_test.go +++ b/nomad/core_sched_test.go @@ -172,7 +172,7 @@ func TestCoreScheduler_JobGC(t *testing.T) { // Update the time tables to make this work tt := s1.fsm.TimeTable() - tt.Witness(2000, time.Now().UTC().Add(-1*threshold)) + tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold)) // Create a core scheduler snap, err := state.Snapshot() From c496c761b82a2428f9d6ef20556621a94f84afb7 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 15 Dec 2015 17:20:07 -0800 Subject: [PATCH 52/82] Remove from parser --- api/jobs.go | 1 - jobspec/parse.go | 38 ----------------------- jobspec/parse_test.go | 13 -------- jobspec/test-fixtures/gc.hcl | 5 --- website/source/docs/jobspec/index.html.md | 17 ---------- 5 files changed, 74 deletions(-) delete mode 100644 jobspec/test-fixtures/gc.hcl diff --git a/api/jobs.go b/api/jobs.go index 912bcb972..17e75daff 100644 --- a/api/jobs.go +++ b/api/jobs.go @@ -119,7 +119,6 @@ type Job struct { Constraints []*Constraint TaskGroups []*TaskGroup Update *UpdateStrategy - GC bool Meta map[string]string Status string StatusDescription string diff --git a/jobspec/parse.go b/jobspec/parse.go index 2573341bd..765f58b3a 100644 --- a/jobspec/parse.go +++ b/jobspec/parse.go @@ -91,7 +91,6 @@ func parseJob(result *structs.Job, list *ast.ObjectList) error { delete(m, "meta") delete(m, "update") delete(m, "periodic") - delete(m, "gc") // Set the ID and name to the object key result.ID = obj.Keys[0].Token.Value().(string) @@ -136,13 +135,6 @@ func parseJob(result *structs.Job, list *ast.ObjectList) error { } } - // If we have a gc config, then parse that - if o := listVal.Filter("gc"); len(o.Items) > 0 { - if err := parseGC(&result.GC, o); err != nil { - return err - } - } - // Parse out meta fields. These are in HCL as a list so we need // to iterate over them and merge them. if metaO := listVal.Filter("meta"); len(metaO.Items) > 0 { @@ -722,33 +714,3 @@ func parsePeriodic(result **structs.PeriodicConfig, list *ast.ObjectList) error *result = &p return nil } - -func parseGC(result *bool, list *ast.ObjectList) error { - list = list.Elem() - if len(list.Items) > 1 { - return fmt.Errorf("only one 'gc' block allowed per job") - } - - // Get our resource object - o := list.Items[0] - - var m map[string]interface{} - if err := hcl.DecodeObject(&m, o.Val); err != nil { - return err - } - - // Enabled by default if the gc block exists. - enabled := false - if value, ok := m["enabled"]; !ok { - enabled = true - } else { - var err error - enabled, err = parseBool(value) - if err != nil { - return fmt.Errorf("gc.enabled should be set to true or false; %v", err) - } - } - - *result = enabled - return nil -} diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go index 7a89595f8..4497348eb 100644 --- a/jobspec/parse_test.go +++ b/jobspec/parse_test.go @@ -246,19 +246,6 @@ func TestParse(t *testing.T) { false, }, - { - "gc.hcl", - &structs.Job{ - ID: "foo", - Name: "foo", - Priority: 50, - Region: "global", - Type: "service", - GC: true, - }, - false, - }, - { "specify-job.hcl", &structs.Job{ diff --git a/jobspec/test-fixtures/gc.hcl b/jobspec/test-fixtures/gc.hcl deleted file mode 100644 index 5af5de1e1..000000000 --- a/jobspec/test-fixtures/gc.hcl +++ /dev/null @@ -1,5 +0,0 @@ -job "foo" { - gc { - enabled = true - } -} diff --git a/website/source/docs/jobspec/index.html.md b/website/source/docs/jobspec/index.html.md index d3f4ee650..99aefe2b4 100644 --- a/website/source/docs/jobspec/index.html.md +++ b/website/source/docs/jobspec/index.html.md @@ -156,23 +156,6 @@ The `job` object supports the following keys: and "h" suffix can be used, such as "30s". Both values default to zero, which disables rolling updates. -* `gc` - Toggles the job's eligibility for garbage collection. This allows jobs - to be garbage collected when all their evaluations and allocations are - terminal. The `gc` block has the following format: - - ``` - "gc" { - // Enabled is set to true by default if the "gc" block is included. - enabled = true - } - - // Equivalent configuration. - "gc" {} - ``` - - * `enabled`: Toggles the eligibility of a job for garbage collection. - - ### Task Group The `group` object supports the following keys: From b09440a018ab5014a2d2d1f32081fe7dcf32db61 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 15 Dec 2015 17:30:50 -0800 Subject: [PATCH 53/82] Don't allow users to set gc and make batch gc --- nomad/job_endpoint.go | 21 +++++++++++-- nomad/job_endpoint_test.go | 62 ++++++++++++++++++++++++++++++++++++++ nomad/structs/structs.go | 5 +++ 3 files changed, 85 insertions(+), 3 deletions(-) diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go index f693af9c4..18da75268 100644 --- a/nomad/job_endpoint.go +++ b/nomad/job_endpoint.go @@ -1,6 +1,7 @@ package nomad import ( + "errors" "fmt" "time" @@ -25,14 +26,18 @@ func (j *Job) Register(args *structs.JobRegisterRequest, reply *structs.JobRegis if args.Job == nil { return fmt.Errorf("missing job for registration") } - if err := args.Job.Validate(); err != nil { + + if err := j.checkBlacklist(args.Job); err != nil { return err } - // Initialize the job fields (sets defaults and any other necessary init - // work). + // Initialize the job fields (sets defaults and any necessary init work). args.Job.InitFields() + if err := args.Job.Validate(); err != nil { + return err + } + if args.Job.Type == structs.JobTypeCore { return fmt.Errorf("job type cannot be core") } @@ -76,6 +81,16 @@ func (j *Job) Register(args *structs.JobRegisterRequest, reply *structs.JobRegis return nil } +// checkBlacklist returns an error if the user has set any blacklisted field in +// the job. +func (j *Job) checkBlacklist(job *structs.Job) error { + if job.GC { + return errors.New("GC field of a job is used only internally and should not be set by user") + } + + return nil +} + // Evaluate is used to force a job for re-evaluation func (j *Job) Evaluate(args *structs.JobEvaluateRequest, reply *structs.JobRegisterResponse) error { if done, err := j.srv.forward("Job.Evaluate", args, args, reply); done { diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go index 986ebc102..5bc3bb952 100644 --- a/nomad/job_endpoint_test.go +++ b/nomad/job_endpoint_test.go @@ -171,6 +171,68 @@ func TestJobEndpoint_Register_Existing(t *testing.T) { } } +func TestJobEndpoint_Register_Batch(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the register request + job := mock.Job() + job.Type = structs.JobTypeBatch + req := &structs.JobRegisterRequest{ + Job: job, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + + // Fetch the response + var resp structs.JobRegisterResponse + if err := msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp); err != nil { + t.Fatalf("err: %v", err) + } + if resp.Index == 0 { + t.Fatalf("bad index: %d", resp.Index) + } + + // Check for the node in the FSM + state := s1.fsm.State() + out, err := state.JobByID(job.ID) + if err != nil { + t.Fatalf("err: %v", err) + } + if out == nil { + t.Fatalf("expected job") + } + if !out.GC { + t.Fatal("expect batch job to be made garbage collectible") + } +} + +func TestJobEndpoint_Register_GC_Set(t *testing.T) { + s1 := testServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + defer s1.Shutdown() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the register request + job := mock.Job() + job.GC = true + req := &structs.JobRegisterRequest{ + Job: job, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + + // Fetch the response + var resp structs.JobRegisterResponse + if err := msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp); err == nil { + t.Fatalf("expect err") + } +} + func TestJobEndpoint_Evaluate(t *testing.T) { s1 := testServer(t, func(c *Config) { c.NumSchedulers = 0 // Prevent automatic dequeue diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 1f45f8842..14912dd0c 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -788,6 +788,11 @@ type Job struct { func (j *Job) InitFields() { // Initialize the service block. j.InitAllServiceFields() + + // If the job is batch then make it GC. + if j.Type == JobTypeBatch { + j.GC = true + } } // InitAllServiceFields traverses all Task Groups and makes them From 6cdd01366013cb2a7a8d5c064cc46e4bdd27f9d7 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 16 Dec 2015 14:27:40 -0800 Subject: [PATCH 54/82] Small cleanup --- nomad/core_sched.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nomad/core_sched.go b/nomad/core_sched.go index ed096a53f..f557c9285 100644 --- a/nomad/core_sched.go +++ b/nomad/core_sched.go @@ -43,7 +43,7 @@ func (s *CoreScheduler) Process(eval *structs.Evaluation) error { // jobGC is used to garbage collect eligible jobs. func (c *CoreScheduler) jobGC(eval *structs.Evaluation) error { // Get all the jobs eligible for garbage collection. - jIter, err := c.snap.JobsByGC(true) + iter, err := c.snap.JobsByGC(true) if err != nil { return err } @@ -55,16 +55,16 @@ func (c *CoreScheduler) jobGC(eval *structs.Evaluation) error { c.srv.logger.Printf("[DEBUG] sched.core: job GC: scanning before index %d (%v)", oldThreshold, c.srv.config.JobGCThreshold) - // Collect the allocations and evaluations to GC + // Collect the allocations, evaluations and jobs to GC var gcAlloc, gcEval, gcJob []string OUTER: - for i := jIter.Next(); i != nil; i = jIter.Next() { + for i := iter.Next(); i != nil; i = iter.Next() { job := i.(*structs.Job) // Ignore new jobs. if job.CreateIndex > oldThreshold { - continue OUTER + continue } evals, err := c.snap.EvalsByJob(job.ID) From 2752a951a254f06cf5e11eb448e6ff336d91d5a4 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 16 Dec 2015 14:34:17 -0800 Subject: [PATCH 55/82] Use client state --- nomad/structs/structs.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 14912dd0c..ec4f08986 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1489,18 +1489,19 @@ type Allocation struct { // TerminalStatus returns if the desired or actual status is terminal and // will no longer transition. func (a *Allocation) TerminalStatus() bool { + // First check the desired state and if that isn't terminal, check client + // state. switch a.DesiredStatus { case AllocDesiredStatusStop, AllocDesiredStatusEvict, AllocDesiredStatusFailed: return true default: - // If all tasks are dead, the alloc is terminal. - for _, state := range a.TaskStates { - if state.State != TaskStateDead { - return false - } - } + } + switch a.ClientStatus { + case AllocClientStatusDead, AllocClientStatusFailed: return true + default: + return false } } From bc13dcaf48bf66b24ecaea20209848cea1e622b5 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 16 Dec 2015 15:01:15 -0800 Subject: [PATCH 56/82] merge --- nomad/mock/mock.go | 6 ------ nomad/structs/funcs_test.go | 28 ++++------------------------ scheduler/context_test.go | 8 ++------ scheduler/rank_test.go | 16 ++++------------ 4 files changed, 10 insertions(+), 48 deletions(-) diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index 012677f18..2e0057c8e 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -231,12 +231,6 @@ func Alloc() *structs.Allocation { }, }, }, - Services: map[string]string{"web-frontend": "nomad-registered-task-1234"}, - TaskStates: map[string]*structs.TaskState{ - "web": &structs.TaskState{ - State: structs.TaskStatePending, - }, - }, Job: Job(), DesiredStatus: structs.AllocDesiredStatusRun, ClientStatus: structs.AllocClientStatusPending, diff --git a/nomad/structs/funcs_test.go b/nomad/structs/funcs_test.go index a3bf23885..93ce5cb30 100644 --- a/nomad/structs/funcs_test.go +++ b/nomad/structs/funcs_test.go @@ -26,43 +26,23 @@ func TestFilterTerminalAllocs(t *testing.T) { l := []*Allocation{ &Allocation{ID: "bar", DesiredStatus: AllocDesiredStatusEvict}, &Allocation{ID: "baz", DesiredStatus: AllocDesiredStatusStop}, - &Allocation{ - ID: "zip", - DesiredStatus: AllocDesiredStatusRun, - TaskStates: map[string]*TaskState{ - "a": &TaskState{State: TaskStatePending}, - }, - }, &Allocation{ ID: "foo", DesiredStatus: AllocDesiredStatusRun, - TaskStates: map[string]*TaskState{ - "a": &TaskState{State: TaskStatePending}, - }, + ClientStatus: AllocClientStatusPending, }, &Allocation{ ID: "bam", DesiredStatus: AllocDesiredStatusRun, - TaskStates: map[string]*TaskState{ - "a": &TaskState{State: TaskStatePending}, - "b": &TaskState{State: TaskStateDead}, - }, - }, - &Allocation{ - ID: "fizz", - DesiredStatus: AllocDesiredStatusRun, - TaskStates: map[string]*TaskState{ - "a": &TaskState{State: TaskStateDead}, - "b": &TaskState{State: TaskStateDead}, - }, + ClientStatus: AllocClientStatusDead, }, } out := FilterTerminalAllocs(l) - if len(out) != 3 { + if len(out) != 1 { t.Fatalf("bad: %#v", out) } - if out[0].ID != "zip" && out[1].ID != "foo" && out[2].ID != "bam" { + if out[0].ID != "foo" { t.Fatalf("bad: %#v", out) } } diff --git a/scheduler/context_test.go b/scheduler/context_test.go index 1f5730286..006e1ae99 100644 --- a/scheduler/context_test.go +++ b/scheduler/context_test.go @@ -61,9 +61,7 @@ func TestEvalContext_ProposedAlloc(t *testing.T) { MemoryMB: 2048, }, DesiredStatus: structs.AllocDesiredStatusRun, - TaskStates: map[string]*structs.TaskState{ - "foo": &structs.TaskState{State: structs.TaskStatePending}, - }, + ClientStatus: structs.AllocClientStatusPending, } alloc2 := &structs.Allocation{ ID: structs.GenerateUUID(), @@ -75,9 +73,7 @@ func TestEvalContext_ProposedAlloc(t *testing.T) { MemoryMB: 1024, }, DesiredStatus: structs.AllocDesiredStatusRun, - TaskStates: map[string]*structs.TaskState{ - "foo": &structs.TaskState{State: structs.TaskStatePending}, - }, + ClientStatus: structs.AllocClientStatusPending, } noErr(t, state.UpsertAllocs(1000, []*structs.Allocation{alloc1, alloc2})) diff --git a/scheduler/rank_test.go b/scheduler/rank_test.go index 68716ea76..d53aa996f 100644 --- a/scheduler/rank_test.go +++ b/scheduler/rank_test.go @@ -203,9 +203,7 @@ func TestBinPackIterator_ExistingAlloc(t *testing.T) { MemoryMB: 2048, }, DesiredStatus: structs.AllocDesiredStatusRun, - TaskStates: map[string]*structs.TaskState{ - "foo": &structs.TaskState{State: structs.TaskStatePending}, - }, + ClientStatus: structs.AllocClientStatusPending, } alloc2 := &structs.Allocation{ ID: structs.GenerateUUID(), @@ -217,9 +215,7 @@ func TestBinPackIterator_ExistingAlloc(t *testing.T) { MemoryMB: 1024, }, DesiredStatus: structs.AllocDesiredStatusRun, - TaskStates: map[string]*structs.TaskState{ - "foo": &structs.TaskState{State: structs.TaskStatePending}, - }, + ClientStatus: structs.AllocClientStatusPending, } noErr(t, state.UpsertAllocs(1000, []*structs.Allocation{alloc1, alloc2})) @@ -283,9 +279,7 @@ func TestBinPackIterator_ExistingAlloc_PlannedEvict(t *testing.T) { MemoryMB: 2048, }, DesiredStatus: structs.AllocDesiredStatusRun, - TaskStates: map[string]*structs.TaskState{ - "foo": &structs.TaskState{State: structs.TaskStatePending}, - }, + ClientStatus: structs.AllocClientStatusPending, } alloc2 := &structs.Allocation{ ID: structs.GenerateUUID(), @@ -297,9 +291,7 @@ func TestBinPackIterator_ExistingAlloc_PlannedEvict(t *testing.T) { MemoryMB: 1024, }, DesiredStatus: structs.AllocDesiredStatusRun, - TaskStates: map[string]*structs.TaskState{ - "foo": &structs.TaskState{State: structs.TaskStatePending}, - }, + ClientStatus: structs.AllocClientStatusPending, } noErr(t, state.UpsertAllocs(1000, []*structs.Allocation{alloc1, alloc2})) From dd6cc455feeb33e7c97a206bceaaff7a68dd0ebe Mon Sep 17 00:00:00 2001 From: Clint Shryock Date: Thu, 17 Dec 2015 10:25:42 -0600 Subject: [PATCH 57/82] Executors/Linux: Update Executor config struct --- client/driver/executor/exec_linux.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/client/driver/executor/exec_linux.go b/client/driver/executor/exec_linux.go index cbd33be80..7b3cf4883 100644 --- a/client/driver/executor/exec_linux.go +++ b/client/driver/executor/exec_linux.go @@ -345,16 +345,17 @@ func (e *LinuxExecutor) cleanTaskDir() error { // cgroup configuration. It returns an error if the resources are invalid. func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error { e.groups = &cgroupConfig.Cgroup{} + e.groups.Resources = &cgroupConfig.Resources{} e.groups.Name = structs.GenerateUUID() // TODO: verify this is needed for things like network access - e.groups.AllowAllDevices = true + e.groups.Resources.AllowAllDevices = true if resources.MemoryMB > 0 { // Total amount of memory allowed to consume - e.groups.Memory = int64(resources.MemoryMB * 1024 * 1024) + e.groups.Resources.Memory = int64(resources.MemoryMB * 1024 * 1024) // Disable swap to avoid issues on the machine - e.groups.MemorySwap = int64(-1) + e.groups.Resources.MemorySwap = int64(-1) } if resources.CPU < 2 { @@ -362,7 +363,7 @@ func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error { } // Set the relative CPU shares for this cgroup. - e.groups.CpuShares = int64(resources.CPU) + e.groups.Resources.CpuShares = int64(resources.CPU) if resources.IOPS != 0 { // Validate it is in an acceptable range. @@ -370,7 +371,7 @@ func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error { return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS) } - e.groups.BlkioWeight = uint16(resources.IOPS) + e.groups.Resources.BlkioWeight = uint16(resources.IOPS) } return nil From c2f81fe8452ad93e54ca02f620e2960aab51c4cf Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Thu, 17 Dec 2015 13:12:28 -0800 Subject: [PATCH 58/82] Removed a broken dependency --- scripts/deps.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/deps.sh b/scripts/deps.sh index 2c2899ff2..bdd683c1c 100755 --- a/scripts/deps.sh +++ b/scripts/deps.sh @@ -3,7 +3,6 @@ # First get the OS-specific packages GOOS=windows go get $DEP_ARGS github.com/StackExchange/wmi GOOS=windows go get $DEP_ARGS github.com/shirou/w32 -GOOS=linux go get $DEP_ARGS github.com/docker/docker/pkg/units GOOS=linux go get $DEP_ARGS github.com/docker/docker/pkg/mount GOOS=linux go get $DEP_ARGS github.com/opencontainers/runc/libcontainer/cgroups/fs GOOS=linux go get $DEP_ARGS github.com/opencontainers/runc/libcontainer/configs From ad1a161e4d19b724e33bd88c1f5e1fdc8ca2023b Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Thu, 17 Dec 2015 13:23:32 -0800 Subject: [PATCH 59/82] Bumping version to 0.2.3 final --- version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.go b/version.go index 29bb9b227..f7908dc06 100644 --- a/version.go +++ b/version.go @@ -10,4 +10,4 @@ const Version = "0.2.3" // A pre-release marker for the version. If this is "" (empty string) // then it means that it is a final release. Otherwise, this is a pre-release // such as "dev" (in development), "beta", "rc1", etc. -const VersionPrerelease = "rc1" +const VersionPrerelease = "" From fa655404c493920a0b590a49d6a73bb8a9abd947 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Thu, 17 Dec 2015 13:29:41 -0800 Subject: [PATCH 60/82] Setting versions to 0.2.3 --- CHANGELOG.md | 2 +- demo/vagrant/Vagrantfile | 2 +- website/config.rb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1eb6eb9f5..848a2b911 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.2.3-rc1 (December 15, 2015) +## 0.2.3 (December 17, 2015) BUG FIXES: * client: Fixes for user lookup to support CoreOS [GH-591] diff --git a/demo/vagrant/Vagrantfile b/demo/vagrant/Vagrantfile index 8ed3319f0..5c93f97e6 100644 --- a/demo/vagrant/Vagrantfile +++ b/demo/vagrant/Vagrantfile @@ -9,7 +9,7 @@ sudo apt-get install -y unzip curl wget vim # Download Nomad echo Fetching Nomad... cd /tmp/ -curl -sSL https://releases.hashicorp.com/nomad/0.2.1/nomad_0.2.1_linux_amd64.zip -o nomad.zip +curl -sSL https://releases.hashicorp.com/nomad/0.2.3/nomad_0.2.3_linux_amd64.zip -o nomad.zip echo Installing Nomad... unzip nomad.zip diff --git a/website/config.rb b/website/config.rb index 96b57dc74..6afe177d9 100644 --- a/website/config.rb +++ b/website/config.rb @@ -2,7 +2,7 @@ set :base_url, "https://www.nomadproject.io/" activate :hashicorp do |h| h.name = "nomad" - h.version = "0.2.2" + h.version = "0.2.3" h.github_slug = "hashicorp/nomad" h.minify_javascript = false From 24521e525d3103005d3ba03e3c98eb8fb119a83c Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Thu, 17 Dec 2015 13:50:59 -0800 Subject: [PATCH 61/82] Added some more deps into the vagrant box --- Vagrantfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Vagrantfile b/Vagrantfile index a5b251d2f..0fe9ccf9a 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -7,7 +7,7 @@ VAGRANTFILE_API_VERSION = "2" $script = <