From a05862dbdf154c7d046d4462237ce647b2a66d26 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Wed, 15 Nov 2017 17:44:57 -0800 Subject: [PATCH] Destroy partially migrated alloc dirs Test that snapshot errors don't return a valid tar currently fails. --- client/alloc_runner.go | 10 +++- client/allocdir/alloc_dir.go | 1 + command/agent/alloc_endpoint_test.go | 85 ++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 1 deletion(-) diff --git a/client/alloc_runner.go b/client/alloc_runner.go index b45a553a6..1595050ac 100644 --- a/client/alloc_runner.go +++ b/client/alloc_runner.go @@ -819,7 +819,7 @@ func (r *AllocRunner) Run() { r.allocDirLock.Unlock() if err != nil { - r.logger.Printf("[ERR] client: failed to build task directories: %v", err) + r.logger.Printf("[ERR] client: alloc %q failed to build task directories: %v", r.allocID, err) r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to build task dirs for '%s'", alloc.TaskGroup)) return } @@ -841,6 +841,14 @@ func (r *AllocRunner) Run() { // Soft-fail on migration errors r.logger.Printf("[WARN] client: alloc %q error while migrating data from previous alloc: %v", r.allocID, err) + + // Recreate alloc dir to ensure a clean slate + r.allocDir.Destroy() + if err := r.allocDir.Build(); err != nil { + r.logger.Printf("[ERR] client: alloc %q failed to clean task directories after failed migration: %v", r.allocID, err) + r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to rebuild task dirs for '%s'", alloc.TaskGroup)) + return + } } // Check if the allocation is in a terminal status. In this case, we don't diff --git a/client/allocdir/alloc_dir.go b/client/allocdir/alloc_dir.go index 19c53cbcb..16d1dec7d 100644 --- a/client/allocdir/alloc_dir.go +++ b/client/allocdir/alloc_dir.go @@ -246,6 +246,7 @@ func (d *AllocDir) Destroy() error { mErr.Errors = append(mErr.Errors, fmt.Errorf("failed to remove alloc dir %q: %v", d.AllocDir, err)) } + d.built = false return mErr.ErrorOrNil() } diff --git a/command/agent/alloc_endpoint_test.go b/command/agent/alloc_endpoint_test.go index e81c897c9..9ef08d69a 100644 --- a/command/agent/alloc_endpoint_test.go +++ b/command/agent/alloc_endpoint_test.go @@ -1,18 +1,24 @@ package agent import ( + "archive/tar" "fmt" + "io" + "io/ioutil" "net/http" "net/http/httptest" + "os" "reflect" "strings" "testing" "github.com/golang/snappy" "github.com/hashicorp/nomad/acl" + "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/nomad" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/testutil" "github.com/stretchr/testify/assert" ) @@ -379,6 +385,85 @@ func TestHTTP_AllocSnapshot_WithMigrateToken(t *testing.T) { }) } +// TestHTTP_AllocSnapshot_Atomic ensures that when a client encounters an error +// snapshotting a valid tar is not returned. +func TestHTTP_AllocSnapshot_Atomic(t *testing.T) { + t.Parallel() + httpTest(t, nil, func(s *TestAgent) { + // Create an alloc + state := s.server.State() + alloc := mock.Alloc() + alloc.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver" + alloc.Job.TaskGroups[0].Tasks[0].Config["run_for"] = "30s" + alloc.NodeID = s.client.NodeID() + state.UpsertJobSummary(998, mock.JobSummary(alloc.JobID)) + if err := state.UpsertAllocs(1000, []*structs.Allocation{alloc.Copy()}); err != nil { + t.Fatalf("error upserting alloc: %v", err) + } + + // Wait for the client to run it + testutil.WaitForResult(func() (bool, error) { + if _, err := s.client.GetClientAlloc(alloc.ID); err != nil { + return false, err + } + + serverAlloc, err := state.AllocByID(nil, alloc.ID) + if err != nil { + return false, err + } + + return serverAlloc.ClientStatus == structs.AllocClientStatusRunning, fmt.Errorf(serverAlloc.ClientStatus) + }, func(err error) { + t.Fatalf("client not running alloc: %v", err) + }) + + // Now write to its shared dir + allocDirI, err := s.client.GetAllocFS(alloc.ID) + if err != nil { + t.Fatalf("unable to find alloc dir: %v", err) + } + allocDir := allocDirI.(*allocdir.AllocDir) + + // Remove the task dir to break Snapshot + os.RemoveAll(allocDir.TaskDirs["web"].LocalDir) + + // Assert Snapshot fails + if err := allocDir.Snapshot(ioutil.Discard); err == nil { + t.Errorf("expected Snapshot() to fail but it did not") + } else { + s.logger.Printf("[DEBUG] agent.test: snapshot returned error: %v", err) + } + + // Make the HTTP request to ensure the Snapshot error is + // propagated through to the HTTP layer and that a valid tar + // just isn't emitted. + respW := httptest.NewRecorder() + req, err := http.NewRequest("GET", fmt.Sprintf("/v1/client/allocation/%s/snapshot", alloc.ID), nil) + if err != nil { + t.Fatalf("err: %v", err) + } + + // Make the request via the mux to make sure the error returned + // by Snapshot is properly propogated via HTTP + s.Server.mux.ServeHTTP(respW, req) + resp := respW.Result() + t.Logf("HTTP Response Status Code: %d", resp.StatusCode) + r := tar.NewReader(resp.Body) + for { + header, err := r.Next() + if err != nil { + if err == io.EOF { + t.Fatalf("Looks like a valid tar file to me?") + } + t.Logf("Yay! An error: %v", err) + return + } + + t.Logf("Valid file returned: %s", header.Name) + } + }) +} + func TestHTTP_AllocGC(t *testing.T) { t.Parallel() httpTest(t, nil, func(s *TestAgent) {