mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
CNI: migrate from persistent state to ephemeral state during restart (#25093)
In #24650 we switched to using ephemeral state for CNI plugins, so that when a host reboots and we lose all the allocations we don't end up trying to use IPs we created in network namespaces we just destroyed. Unfortunately upgrade testing missed that in a non-reboot scenario, the existing CNI state was being used by plugins like the ipam plugin to hand out the "next available" IP address. So with no state carried over, we might allocate new addresses that conflict with existing allocations. (This can be avoided by draining the node first.) As a compatibility shim, copy the old CNI state directory to the new CNI state directory during agent startup, if the new CNI state directory doesn't already exist. Ref: https://github.com/hashicorp/nomad/pull/24650
This commit is contained in:
3
.changelog/25093.txt
Normal file
3
.changelog/25093.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
```release-note:bug
|
||||
cni: Fixed a bug where CNI state was not migrated after upgrade, resulting in IP collisions
|
||||
```
|
||||
@@ -54,6 +54,7 @@ import (
|
||||
"github.com/hashicorp/nomad/command/agent/consul"
|
||||
"github.com/hashicorp/nomad/helper"
|
||||
"github.com/hashicorp/nomad/helper/envoy"
|
||||
"github.com/hashicorp/nomad/helper/escapingfs"
|
||||
"github.com/hashicorp/nomad/helper/goruntime"
|
||||
"github.com/hashicorp/nomad/helper/group"
|
||||
"github.com/hashicorp/nomad/helper/pointer"
|
||||
@@ -762,6 +763,26 @@ func (c *Client) init() error {
|
||||
// setup the nsd check store
|
||||
c.checkStore = checkstore.NewStore(c.logger, c.stateDB)
|
||||
|
||||
// COMPAT(1.12.0): remove in Nomad 1.12.0
|
||||
oldCNIDir := "/var/lib/cni/networks/nomad"
|
||||
newCNIDir := "/var/run/cni/nomad"
|
||||
if _, err := os.Stat(newCNIDir); os.IsNotExist(err) {
|
||||
if _, err := os.Stat(oldCNIDir); err == nil {
|
||||
err := escapingfs.CopyDir(oldCNIDir, newCNIDir)
|
||||
if err != nil {
|
||||
c.logger.Error("failed to migrate existing CNI state",
|
||||
"error", err, "src", oldCNIDir, "dest", newCNIDir)
|
||||
} else {
|
||||
err := os.RemoveAll(oldCNIDir)
|
||||
if err != nil {
|
||||
c.logger.Error("migrated CNI state but could not remove old state",
|
||||
"error", err, "src", oldCNIDir, "dest", newCNIDir)
|
||||
}
|
||||
c.logger.Info("migrated CNI state", "src", oldCNIDir, "dest", newCNIDir)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
58
helper/escapingfs/copydir.go
Normal file
58
helper/escapingfs/copydir.go
Normal file
@@ -0,0 +1,58 @@
|
||||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
package escapingfs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// CopyDir copies a directory's contents to a new location, returning an error
|
||||
// on symlinks. This implementation is roughly the same as the stdlib os.CopyDir
|
||||
// but with th e important difference that we preserve file modes.
|
||||
func CopyDir(src, dst string) error {
|
||||
srcFs := os.DirFS(src)
|
||||
|
||||
return fs.WalkDir(srcFs, ".", func(oldPath string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
newPath := filepath.Join(dst, oldPath)
|
||||
if d.IsDir() {
|
||||
info, err := d.Info()
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not stat directory: %v", err)
|
||||
}
|
||||
return os.MkdirAll(newPath, info.Mode())
|
||||
}
|
||||
if !d.Type().IsRegular() {
|
||||
return fmt.Errorf("copying cannot traverse symlinks")
|
||||
}
|
||||
|
||||
r, err := srcFs.Open(oldPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not open existing file: %v", err)
|
||||
}
|
||||
defer r.Close()
|
||||
info, err := r.Stat()
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not stat file: %v", err)
|
||||
}
|
||||
|
||||
w, err := os.OpenFile(newPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, info.Mode())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := io.Copy(w, r); err != nil {
|
||||
w.Close()
|
||||
return fmt.Errorf("could not copy file: %v", err)
|
||||
}
|
||||
return w.Close()
|
||||
})
|
||||
}
|
||||
41
helper/escapingfs/copydir_test.go
Normal file
41
helper/escapingfs/copydir_test.go
Normal file
@@ -0,0 +1,41 @@
|
||||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
package escapingfs
|
||||
|
||||
import (
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/shoenig/test/must"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func TestCopyDir(t *testing.T) {
|
||||
|
||||
testDir := t.TempDir()
|
||||
src := filepath.Join(testDir, "src")
|
||||
dst := filepath.Join(testDir, "dst")
|
||||
|
||||
must.NoError(t, os.Mkdir(src, 0700))
|
||||
must.NoError(t, os.WriteFile(filepath.Join(src, "foo"), []byte("foo"), 0770))
|
||||
must.NoError(t, os.WriteFile(filepath.Join(src, "bar"), []byte("bar"), 0555))
|
||||
must.NoError(t, os.Mkdir(filepath.Join(src, "bazDir"), 0700))
|
||||
must.NoError(t, os.WriteFile(filepath.Join(src, "bazDir", "baz"), []byte("baz"), 0555))
|
||||
|
||||
err := CopyDir(src, dst)
|
||||
must.NoError(t, err)
|
||||
|
||||
// This is really how you have to retrieve umask. See `man 2 umask`
|
||||
umask := unix.Umask(0)
|
||||
unix.Umask(umask)
|
||||
|
||||
must.FileContains(t, filepath.Join(dst, "foo"), "foo")
|
||||
must.FileMode(t, filepath.Join(dst, "foo"), fs.FileMode(0o770&(^umask)))
|
||||
must.FileContains(t, filepath.Join(dst, "bar"), "bar")
|
||||
must.FileMode(t, filepath.Join(dst, "bar"), fs.FileMode(0o555&(^umask)))
|
||||
must.FileContains(t, filepath.Join(dst, "bazDir", "baz"), "baz")
|
||||
must.FileMode(t, filepath.Join(dst, "bazDir", "baz"), fs.FileMode(0o555&(^umask)))
|
||||
}
|
||||
Reference in New Issue
Block a user