mirror of
https://github.com/kemko/nomad.git
synced 2026-01-01 16:05:42 +03:00
Add historical journald and log export flags to operator debug command (#26410)
* Add -log-file-export and -log-lookback commands to add historical log to debug capture * use monitor.PrepFile() helper for other historical log tests
This commit is contained in:
3
.changelog/26410.txt
Normal file
3
.changelog/26410.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
```release-note:improvement
|
||||
command: Add historical log capture to `nomad operator debug` command with `-log-lookback` and `-log-file-export` flags
|
||||
```
|
||||
@@ -454,14 +454,7 @@ func TestMonitor_MonitorExport(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
// Create test file
|
||||
dir := t.TempDir()
|
||||
f, err := os.CreateTemp(dir, "log")
|
||||
must.NoError(t, err)
|
||||
for range 1000 {
|
||||
_, _ = f.WriteString(fmt.Sprintf("%v [INFO] it's log, it's log, it's big it's heavy it's wood", time.Now()))
|
||||
}
|
||||
f.Close()
|
||||
testFilePath := f.Name()
|
||||
testFilePath := monitor.PrepFile(t).Name()
|
||||
testFileContents, err := os.ReadFile(testFilePath)
|
||||
must.NoError(t, err)
|
||||
|
||||
|
||||
@@ -100,14 +100,7 @@ func TestMonitor_Export(t *testing.T) {
|
||||
expectedText = "log log log log log"
|
||||
)
|
||||
|
||||
dir := t.TempDir()
|
||||
f, err := os.CreateTemp(dir, "log")
|
||||
must.NoError(t, err)
|
||||
for range 1000 {
|
||||
_, _ = f.WriteString(fmt.Sprintf("%v [INFO] it's log, it's log, it's big it's heavy it's wood", time.Now()))
|
||||
}
|
||||
f.Close()
|
||||
goldenFilePath := f.Name()
|
||||
goldenFilePath := PrepFile(t).Name()
|
||||
goldenFileContents, err := os.ReadFile(goldenFilePath)
|
||||
must.NoError(t, err)
|
||||
|
||||
|
||||
@@ -17,27 +17,6 @@ import (
|
||||
"github.com/shoenig/test/must"
|
||||
)
|
||||
|
||||
var writeLine = []byte("[INFO] log log log made of wood you are heavy but so good\n")
|
||||
|
||||
func prepFile(t *testing.T) *os.File {
|
||||
const loopCount = 10
|
||||
// Create test file to read from
|
||||
dir := t.TempDir()
|
||||
f, err := os.CreateTemp(dir, "log")
|
||||
must.NoError(t, err)
|
||||
|
||||
for range loopCount {
|
||||
_, _ = f.Write(writeLine)
|
||||
}
|
||||
f.Close()
|
||||
|
||||
// Create test file reader for stream set up
|
||||
goldenFilePath := f.Name()
|
||||
fileReader, err := os.Open(goldenFilePath)
|
||||
must.NoError(t, err)
|
||||
return fileReader
|
||||
}
|
||||
|
||||
func TestClientStreamReader_StreamFixed(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
@@ -80,7 +59,7 @@ func TestClientStreamReader_StreamFixed(t *testing.T) {
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
file := prepFile(t)
|
||||
file := PrepFile(t)
|
||||
goldenFileContents, err := os.ReadFile(file.Name())
|
||||
must.NoError(t, err)
|
||||
|
||||
|
||||
@@ -6,15 +6,19 @@ package monitor
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-msgpack/v2/codec"
|
||||
sframer "github.com/hashicorp/nomad/client/lib/streamframer"
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/shoenig/test/must"
|
||||
)
|
||||
|
||||
// StreamingClient is an interface that implements the StreamingRpcHandler function
|
||||
@@ -22,6 +26,27 @@ type StreamingClient interface {
|
||||
StreamingRpcHandler(string) (structs.StreamingRpcHandler, error)
|
||||
}
|
||||
|
||||
var writeLine = []byte(fmt.Sprintf("[INFO] log log log made of wood you are heavy but so good, %v\n", time.Now()))
|
||||
|
||||
func PrepFile(t *testing.T) *os.File {
|
||||
const loopCount = 100
|
||||
// Create test file to read from
|
||||
dir := t.TempDir()
|
||||
f, err := os.CreateTemp(dir, "log")
|
||||
must.NoError(t, err)
|
||||
|
||||
for range loopCount {
|
||||
_, _ = f.Write(writeLine)
|
||||
}
|
||||
f.Close()
|
||||
|
||||
// Create test file reader for stream set up
|
||||
goldenFilePath := f.Name()
|
||||
fileReader, err := os.Open(goldenFilePath)
|
||||
must.NoError(t, err)
|
||||
return fileReader
|
||||
}
|
||||
|
||||
// ExportMonitorClient_TestHelper consolidates streaming test setup for use in
|
||||
// client and server RPChandler tests
|
||||
func ExportMonitorClient_TestHelper(req cstructs.MonitorExportRequest, c StreamingClient,
|
||||
|
||||
@@ -47,6 +47,8 @@ type OperatorDebugCommand struct {
|
||||
pprofDuration time.Duration
|
||||
logLevel string
|
||||
logIncludeLocation bool
|
||||
logLookback time.Duration
|
||||
logFileExport bool
|
||||
maxNodes int
|
||||
nodeClass string
|
||||
nodeIDs []string
|
||||
@@ -183,6 +185,21 @@ Debug Options:
|
||||
Include file and line information in each log line monitored. The default
|
||||
is true.
|
||||
|
||||
-log-file-export=<bool>
|
||||
Include the contents of agents' Nomad logfiles in the debug capture. The
|
||||
log export monitor runs concurrently with the log monitor and ignores the
|
||||
-log-level and -log-include-location flags used to configure that monitor.
|
||||
Nomad returns an error if the agent does not have file logging configured.
|
||||
Cannot be used with -log-lookback.
|
||||
|
||||
-log-lookback=<duration>
|
||||
Include historical journald logs in the debug capture. The journald
|
||||
export monitor runs concurrently with the log monitor and ignores the
|
||||
-log-level and -log-include-location flags used to configure that monitor.
|
||||
This flag is only available on Linux systems using systemd. Refer to the
|
||||
-log-file-export flag to retrieve historical logs on non-Linux systems, or
|
||||
those without systemd. Cannot be used with -log-file-export.
|
||||
|
||||
-max-nodes=<count>
|
||||
Cap the maximum number of client nodes included in the capture. Defaults
|
||||
to 10, set to 0 for unlimited.
|
||||
@@ -353,8 +370,7 @@ func (c *OperatorDebugCommand) Name() string { return "debug" }
|
||||
func (c *OperatorDebugCommand) Run(args []string) int {
|
||||
flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
|
||||
flags.Usage = func() { c.Ui.Output(c.Help()) }
|
||||
|
||||
var duration, interval, pprofInterval, output, pprofDuration, eventTopic string
|
||||
var duration, interval, pprofInterval, output, pprofDuration, eventTopic, logLookback string
|
||||
var eventIndex int64
|
||||
var nodeIDs, serverIDs string
|
||||
var allowStale bool
|
||||
@@ -365,6 +381,8 @@ func (c *OperatorDebugCommand) Run(args []string) int {
|
||||
flags.StringVar(&interval, "interval", "30s", "")
|
||||
flags.StringVar(&c.logLevel, "log-level", "TRACE", "")
|
||||
flags.BoolVar(&c.logIncludeLocation, "log-include-location", true, "")
|
||||
flags.StringVar(&logLookback, "log-lookback", "", "")
|
||||
flags.BoolVar(&c.logFileExport, "log-file-export", false, "")
|
||||
flags.IntVar(&c.maxNodes, "max-nodes", 10, "")
|
||||
flags.StringVar(&c.nodeClass, "node-class", "", "")
|
||||
flags.StringVar(&nodeIDs, "node-id", "all", "")
|
||||
@@ -400,6 +418,19 @@ func (c *OperatorDebugCommand) Run(args []string) int {
|
||||
return 1
|
||||
}
|
||||
|
||||
// Parse the logLookback duration
|
||||
l, err := time.ParseDuration(logLookback)
|
||||
if err != nil && logLookback != "" {
|
||||
c.Ui.Error(fmt.Sprintf("Error parsing -log-lookback: %s: %s", logLookback, err.Error()))
|
||||
return 1
|
||||
}
|
||||
c.logLookback = l
|
||||
|
||||
if c.logLookback != 0 && c.logFileExport {
|
||||
c.Ui.Error("Error parsing inputs, -log-file-export and -log-lookback cannot be used together.")
|
||||
return 1
|
||||
}
|
||||
|
||||
// Parse the capture duration
|
||||
d, err := time.ParseDuration(duration)
|
||||
if err != nil {
|
||||
@@ -753,6 +784,16 @@ func (c *OperatorDebugCommand) mkdir(paths ...string) error {
|
||||
|
||||
// startMonitors starts go routines for each node and client
|
||||
func (c *OperatorDebugCommand) startMonitors(client *api.Client) {
|
||||
// if requested, start monitor export first
|
||||
if c.logLookback != 0 || c.logFileExport {
|
||||
for _, id := range c.nodeIDs {
|
||||
go c.startMonitorExport(clientDir, "node_id", id, client)
|
||||
}
|
||||
|
||||
for _, id := range c.serverIDs {
|
||||
go c.startMonitorExport(serverDir, "server_id", id, client)
|
||||
}
|
||||
}
|
||||
for _, id := range c.nodeIDs {
|
||||
go c.startMonitor(clientDir, "node_id", id, client)
|
||||
}
|
||||
@@ -801,6 +842,54 @@ func (c *OperatorDebugCommand) startMonitor(path, idKey, nodeID string, client *
|
||||
}
|
||||
}
|
||||
|
||||
// startMonitor starts one monitor api request, writing to a file. It blocks and should be
|
||||
// called in a go routine. Errors are ignored, we want to build the archive even if a node
|
||||
// is unavailable
|
||||
func (c *OperatorDebugCommand) startMonitorExport(path, idKey, nodeID string, client *api.Client) {
|
||||
monitorExportPath := "monitor_export.log"
|
||||
qo := api.QueryOptions{
|
||||
Params: map[string]string{
|
||||
idKey: nodeID,
|
||||
"on_disk": strconv.FormatBool(c.logFileExport),
|
||||
"logs_since": c.logLookback.String(),
|
||||
},
|
||||
AllowStale: c.queryOpts().AllowStale,
|
||||
}
|
||||
|
||||
// serviceName and onDisk cannot be set together, only set servicename if we're sure
|
||||
// loglookback is set and logFileExport is false
|
||||
if lookback := c.logLookback.String(); lookback != "" && !c.logFileExport {
|
||||
qo.Params["service_name"] = "nomad"
|
||||
}
|
||||
|
||||
// prepare output location
|
||||
c.mkdir(path, nodeID)
|
||||
fh, err := os.Create(c.path(path, nodeID, monitorExportPath))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
outCh, errCh := client.Agent().MonitorExport(c.ctx.Done(), &qo)
|
||||
for {
|
||||
select {
|
||||
case out := <-outCh:
|
||||
if out == nil {
|
||||
continue
|
||||
}
|
||||
fh.Write(out.Data)
|
||||
|
||||
case err := <-errCh:
|
||||
if err != io.EOF {
|
||||
fh.WriteString(fmt.Sprintf("monitor: %s\n", err.Error()))
|
||||
return
|
||||
}
|
||||
case <-c.ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// captureEventStream wraps the event stream capture process.
|
||||
func (c *OperatorDebugCommand) startEventStream(client *api.Client) {
|
||||
c.verboseOut("Launching eventstream goroutine...")
|
||||
|
||||
@@ -22,10 +22,12 @@ import (
|
||||
"github.com/hashicorp/nomad/ci"
|
||||
clienttest "github.com/hashicorp/nomad/client/testutil"
|
||||
"github.com/hashicorp/nomad/command/agent"
|
||||
mon "github.com/hashicorp/nomad/command/agent/monitor"
|
||||
"github.com/hashicorp/nomad/helper"
|
||||
"github.com/hashicorp/nomad/helper/pointer"
|
||||
"github.com/hashicorp/nomad/nomad/state"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
"github.com/shoenig/test/must"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
@@ -1070,3 +1072,124 @@ func extractArchiveName(captureOutput string) string {
|
||||
|
||||
return file
|
||||
}
|
||||
|
||||
func TestDebug_MonitorExportFiles(t *testing.T) {
|
||||
f := mon.PrepFile(t).Name()
|
||||
setLogFile := func(c *agent.Config) {
|
||||
c.LogFile = f
|
||||
}
|
||||
srv, _, url := testServer(t, true, setLogFile)
|
||||
testutil.WaitForLeader(t, srv.Agent.RPC)
|
||||
logFileContents, err := os.ReadFile(f)
|
||||
must.NoError(t, err)
|
||||
serverNodeName := srv.Config.NodeName
|
||||
region := srv.Config.Region
|
||||
serverName := fmt.Sprintf("%s.%s", serverNodeName, region)
|
||||
clientID := srv.Agent.Client().NodeID()
|
||||
testutil.WaitForClient(t, srv.Agent.Client().RPC, clientID, srv.Agent.Client().Region())
|
||||
|
||||
testDir := t.TempDir()
|
||||
defer os.Remove(testDir)
|
||||
|
||||
duration := 2 * time.Second
|
||||
interval := 750 * time.Millisecond
|
||||
waitTime := 2 * duration
|
||||
|
||||
baseArgs := []string{
|
||||
"-address", url,
|
||||
"-output", testDir,
|
||||
"-server-id", serverName,
|
||||
"-node-id", clientID,
|
||||
"-duration", duration.String(),
|
||||
"-interval", interval.String(),
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
cmdArgs []string
|
||||
errString string
|
||||
runErr bool
|
||||
wantExporter bool
|
||||
}{
|
||||
{
|
||||
name: "exporter",
|
||||
cmdArgs: []string{"-log-file-export"},
|
||||
wantExporter: true,
|
||||
},
|
||||
{
|
||||
name: "no_exporter",
|
||||
wantExporter: false,
|
||||
},
|
||||
{
|
||||
name: "bad_value_for_log_lookback",
|
||||
cmdArgs: []string{"-log-lookback", "blue"},
|
||||
errString: "Error parsing -log-lookback",
|
||||
runErr: true,
|
||||
wantExporter: false,
|
||||
},
|
||||
{
|
||||
name: "set_both_flags",
|
||||
cmdArgs: []string{
|
||||
"-log-lookback", "5h",
|
||||
"-log-file-export",
|
||||
},
|
||||
errString: "Error parsing inputs, -log-file-export and -log-lookback cannot be used together",
|
||||
runErr: true,
|
||||
wantExporter: false,
|
||||
},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
clientFiles := []string{
|
||||
"monitor.log",
|
||||
"monitor_export.log",
|
||||
}
|
||||
args := baseArgs
|
||||
if len(tc.cmdArgs) > 0 {
|
||||
args = append(args, tc.cmdArgs...)
|
||||
}
|
||||
|
||||
serverFiles := []string{
|
||||
"monitor.log",
|
||||
"monitor_export.log",
|
||||
}
|
||||
ui := cli.NewMockUi()
|
||||
cmd := &OperatorDebugCommand{Meta: Meta{Ui: ui}}
|
||||
|
||||
code := cmd.Run(args)
|
||||
if tc.runErr {
|
||||
must.One(t, code)
|
||||
must.StrContains(t, ui.ErrorWriter.String(), tc.errString)
|
||||
return
|
||||
} else {
|
||||
must.Zero(t, code)
|
||||
}
|
||||
|
||||
// Wait until client's monitor.log file is written
|
||||
clientPaths := buildPathSlice(cmd.path(clientDir, clientID), clientFiles)
|
||||
t.Logf("Waiting for client files in path: %s", clientDir)
|
||||
|
||||
testutil.WaitForFilesUntil(t, clientPaths[:0], waitTime)
|
||||
|
||||
// Wait until server's monitor.log file is written
|
||||
serverPaths := buildPathSlice(cmd.path(serverDir, serverName), serverFiles)
|
||||
t.Logf("Waiting for server files in path: %s", serverDir)
|
||||
testutil.WaitForFilesUntil(t, serverPaths[:0], waitTime)
|
||||
|
||||
// Validate historical log files exist and match expected value
|
||||
clientLog, clientReadErr := os.ReadFile(clientPaths[1])
|
||||
serverLog, serverReadErr := os.ReadFile(serverPaths[1])
|
||||
if tc.wantExporter {
|
||||
must.NoError(t, clientReadErr)
|
||||
must.NoError(t, serverReadErr)
|
||||
// Verify monitor export file contents as expected
|
||||
must.Eq(t, logFileContents, serverLog)
|
||||
must.Eq(t, logFileContents, clientLog)
|
||||
} else {
|
||||
must.NotNil(t, clientReadErr)
|
||||
must.NotNil(t, serverReadErr)
|
||||
}
|
||||
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1032,14 +1032,7 @@ func TestMonitor_MonitorExport(t *testing.T) {
|
||||
shortText = "log log log log log"
|
||||
)
|
||||
// Create test file
|
||||
dir := t.TempDir()
|
||||
f, err := os.CreateTemp(dir, "log")
|
||||
must.NoError(t, err)
|
||||
for range 1000 {
|
||||
_, _ = f.WriteString(fmt.Sprintf("%v [INFO] it's log, it's log, it's big it's heavy it's wood", time.Now()))
|
||||
}
|
||||
f.Close()
|
||||
longFilePath := f.Name()
|
||||
longFilePath := monitor.PrepFile(t).Name()
|
||||
longFileContents, err := os.ReadFile(longFilePath)
|
||||
must.NoError(t, err)
|
||||
|
||||
|
||||
@@ -54,6 +54,19 @@ true.
|
||||
- `-log-include-location`: Include file and line information in each log line
|
||||
monitored. The default is `true`.
|
||||
|
||||
- `log-file-export`: Include agents' Nomad logfiles in the debug capture.
|
||||
The historical log export monitor runs concurrently with the log monitor
|
||||
and ignores the `-log-level` and `-log-include-location` flags used to
|
||||
configure that monitor. Nomad will return an error if the agent does not
|
||||
have file logging configured. Cannot be used with `-log-lookback`.
|
||||
|
||||
- `log-lookback`: Include historical journald logs in the debug capture. The
|
||||
journald export monitor runs concurrently with the log monitor and ignores
|
||||
the `-log-level` and `-log-include-location` flags passed to that monitor.
|
||||
This flag is only available on Linux systems using systemd, see the
|
||||
`-log-file-export` flag to retrieve historical logs from non-Linux systems,
|
||||
or those without systemd. Cannot be used with `-log-file-export`.
|
||||
|
||||
- `-max-nodes=<count>`: Cap the maximum number of client nodes included
|
||||
in the capture. Defaults to 10, set to 0 for unlimited.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user