CNI: migrate from persistent state to ephemeral state during restart (#25093)

In #24650 we switched to using ephemeral state for CNI plugins, so that when a
host reboots and we lose all the allocations we don't end up trying to use IPs
we created in network namespaces we just destroyed. Unfortunately upgrade
testing missed that in a non-reboot scenario, the existing CNI state was being
used by plugins like the ipam plugin to hand out the "next available" IP
address. So with no state carried over, we might allocate new addresses that
conflict with existing allocations. (This can be avoided by draining the node
first.)

As a compatibility shim, copy the old CNI state directory to the new CNI state
directory during agent startup, if the new CNI state directory doesn't already
exist.

Ref: https://github.com/hashicorp/nomad/pull/24650
This commit is contained in:
Tim Gross
2025-02-12 09:25:50 -05:00
committed by GitHub
parent f0d3c2834e
commit 716df52788
4 changed files with 123 additions and 0 deletions

3
.changelog/25093.txt Normal file
View File

@@ -0,0 +1,3 @@
```release-note:bug
cni: Fixed a bug where CNI state was not migrated after upgrade, resulting in IP collisions
```

View File

@@ -54,6 +54,7 @@ import (
"github.com/hashicorp/nomad/command/agent/consul"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/envoy"
"github.com/hashicorp/nomad/helper/escapingfs"
"github.com/hashicorp/nomad/helper/goruntime"
"github.com/hashicorp/nomad/helper/group"
"github.com/hashicorp/nomad/helper/pointer"
@@ -762,6 +763,26 @@ func (c *Client) init() error {
// setup the nsd check store
c.checkStore = checkstore.NewStore(c.logger, c.stateDB)
// COMPAT(1.12.0): remove in Nomad 1.12.0
oldCNIDir := "/var/lib/cni/networks/nomad"
newCNIDir := "/var/run/cni/nomad"
if _, err := os.Stat(newCNIDir); os.IsNotExist(err) {
if _, err := os.Stat(oldCNIDir); err == nil {
err := escapingfs.CopyDir(oldCNIDir, newCNIDir)
if err != nil {
c.logger.Error("failed to migrate existing CNI state",
"error", err, "src", oldCNIDir, "dest", newCNIDir)
} else {
err := os.RemoveAll(oldCNIDir)
if err != nil {
c.logger.Error("migrated CNI state but could not remove old state",
"error", err, "src", oldCNIDir, "dest", newCNIDir)
}
c.logger.Info("migrated CNI state", "src", oldCNIDir, "dest", newCNIDir)
}
}
}
return nil
}

View File

@@ -0,0 +1,58 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package escapingfs
import (
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
)
// CopyDir copies a directory's contents to a new location, returning an error
// on symlinks. This implementation is roughly the same as the stdlib os.CopyDir
// but with th e important difference that we preserve file modes.
func CopyDir(src, dst string) error {
srcFs := os.DirFS(src)
return fs.WalkDir(srcFs, ".", func(oldPath string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
newPath := filepath.Join(dst, oldPath)
if d.IsDir() {
info, err := d.Info()
if err != nil {
return fmt.Errorf("could not stat directory: %v", err)
}
return os.MkdirAll(newPath, info.Mode())
}
if !d.Type().IsRegular() {
return fmt.Errorf("copying cannot traverse symlinks")
}
r, err := srcFs.Open(oldPath)
if err != nil {
return fmt.Errorf("could not open existing file: %v", err)
}
defer r.Close()
info, err := r.Stat()
if err != nil {
return fmt.Errorf("could not stat file: %v", err)
}
w, err := os.OpenFile(newPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, info.Mode())
if err != nil {
return err
}
if _, err := io.Copy(w, r); err != nil {
w.Close()
return fmt.Errorf("could not copy file: %v", err)
}
return w.Close()
})
}

View File

@@ -0,0 +1,41 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package escapingfs
import (
"io/fs"
"os"
"path/filepath"
"testing"
"github.com/shoenig/test/must"
"golang.org/x/sys/unix"
)
func TestCopyDir(t *testing.T) {
testDir := t.TempDir()
src := filepath.Join(testDir, "src")
dst := filepath.Join(testDir, "dst")
must.NoError(t, os.Mkdir(src, 0700))
must.NoError(t, os.WriteFile(filepath.Join(src, "foo"), []byte("foo"), 0770))
must.NoError(t, os.WriteFile(filepath.Join(src, "bar"), []byte("bar"), 0555))
must.NoError(t, os.Mkdir(filepath.Join(src, "bazDir"), 0700))
must.NoError(t, os.WriteFile(filepath.Join(src, "bazDir", "baz"), []byte("baz"), 0555))
err := CopyDir(src, dst)
must.NoError(t, err)
// This is really how you have to retrieve umask. See `man 2 umask`
umask := unix.Umask(0)
unix.Umask(umask)
must.FileContains(t, filepath.Join(dst, "foo"), "foo")
must.FileMode(t, filepath.Join(dst, "foo"), fs.FileMode(0o770&(^umask)))
must.FileContains(t, filepath.Join(dst, "bar"), "bar")
must.FileMode(t, filepath.Join(dst, "bar"), fs.FileMode(0o555&(^umask)))
must.FileContains(t, filepath.Join(dst, "bazDir", "baz"), "baz")
must.FileMode(t, filepath.Join(dst, "bazDir", "baz"), fs.FileMode(0o555&(^umask)))
}