From 9a29df2292a2b862ea566329731f8f3b05ae9f95 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Tue, 1 Jul 2025 10:37:44 -0400 Subject: [PATCH] scheduler: emit structured logs from reconciliation (#26169) Both the cluster reconciler and node reconciler emit a debug-level log line with their results, but these are unstructured multi-line logs that are annoying for operators to parse. Change these to emit structured key-value pairs like we do everywhere else. Ref: https://hashicorp.atlassian.net/browse/NMD-818 Ref: https://go.hashi.co/rfc/nmd-212 --- .changelog/26169.txt | 3 +++ scheduler/generic_sched.go | 4 ++- scheduler/reconciler/reconcile_cluster.go | 33 ++++++++++++++++------- scheduler/reconciler/reconcile_node.go | 16 ++++++++--- scheduler/scheduler_system.go | 4 ++- 5 files changed, 46 insertions(+), 14 deletions(-) create mode 100644 .changelog/26169.txt diff --git a/.changelog/26169.txt b/.changelog/26169.txt new file mode 100644 index 000000000..c79545736 --- /dev/null +++ b/.changelog/26169.txt @@ -0,0 +1,3 @@ +```release-note:improvement +scheduler: Debug-level logs emitted by the scheduler are now single-line structured logs +``` diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index 03259410c..882f3c102 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -355,7 +355,9 @@ func (s *GenericScheduler) computeJobAllocs() error { Now: time.Now().UTC(), }) result := r.Compute() - s.logger.Debug("reconciled current state with desired state", "results", log.Fmt("%#v", result)) + if s.logger.IsDebug() { + s.logger.Debug("reconciled current state with desired state", result.Fields()...) + } if s.eval.AnnotatePlan { s.plan.Annotations = &structs.PlanAnnotations{ diff --git a/scheduler/reconciler/reconcile_cluster.go b/scheduler/reconciler/reconcile_cluster.go index c86fdc561..f7843adac 100644 --- a/scheduler/reconciler/reconcile_cluster.go +++ b/scheduler/reconciler/reconcile_cluster.go @@ -202,22 +202,37 @@ type delayedRescheduleInfo struct { rescheduleTime time.Time } -func (r *ReconcileResults) GoString() string { - base := fmt.Sprintf("Total changes: (place %d) (destructive %d) (inplace %d) (stop %d) (disconnect %d) (reconnect %d)", - len(r.Place), len(r.DestructiveUpdate), len(r.InplaceUpdate), len(r.Stop), len(r.DisconnectUpdates), len(r.ReconnectUpdates)) - +func (r *ReconcileResults) Fields() []any { + fields := []any{ + "total_place", len(r.Place), + "total_destructive", len(r.DestructiveUpdate), + "total_inplace", len(r.InplaceUpdate), + "total_stop", len(r.Stop), + "total_disconnect", len(r.DisconnectUpdates), + "total_reconnect", len(r.ReconnectUpdates), + } if r.Deployment != nil { - base += fmt.Sprintf("\nCreated Deployment: %q", r.Deployment.ID) + fields = append(fields, "deployment_created", r.Deployment.ID) } for _, u := range r.DeploymentUpdates { - base += fmt.Sprintf("\nDeployment Update for ID %q: Status %q; Description %q", - u.DeploymentID, u.Status, u.StatusDescription) + fields = append(fields, + "deployment_updated", u.DeploymentID, + "deployment_update", fmt.Sprintf("%s (%s)", u.Status, u.StatusDescription)) } for tg, u := range r.DesiredTGUpdates { - base += fmt.Sprintf("\nDesired Changes for %q: %#v", tg, u) + fields = append(fields, + tg+"_ignore", u.Ignore, + tg+"_place", u.Place, + tg+"_destructive", u.DestructiveUpdate, + tg+"_inplace", u.InPlaceUpdate, + tg+"_stop", u.Stop, + tg+"_migrate", u.Migrate, + tg+"_canary", u.Canary, + tg+"_preempt", u.Preemptions, + ) } - return base + return fields } // ClusterState holds frequently used information about the state of the diff --git a/scheduler/reconciler/reconcile_node.go b/scheduler/reconciler/reconcile_node.go index c0c9e5680..fe57cc3f4 100644 --- a/scheduler/reconciler/reconcile_node.go +++ b/scheduler/reconciler/reconcile_node.go @@ -339,9 +339,19 @@ type NodeReconcileResult struct { Place, Update, Migrate, Stop, Ignore, Lost, Disconnecting, Reconnecting []AllocTuple } -func (d *NodeReconcileResult) GoString() string { - return fmt.Sprintf("allocs: (place %d) (update %d) (migrate %d) (stop %d) (ignore %d) (lost %d) (disconnecting %d) (reconnecting %d)", - len(d.Place), len(d.Update), len(d.Migrate), len(d.Stop), len(d.Ignore), len(d.Lost), len(d.Disconnecting), len(d.Reconnecting)) +func (d *NodeReconcileResult) Fields() []any { + fields := []any{ + "ignore", d.Ignore, + "place", d.Place, + "update", d.Update, + "stop", d.Stop, + "migrate", d.Migrate, + "lost", d.Lost, + "disconnecting", d.Disconnecting, + "reconnecting", d.Reconnecting, + } + + return fields } func (d *NodeReconcileResult) Append(other *NodeReconcileResult) { diff --git a/scheduler/scheduler_system.go b/scheduler/scheduler_system.go index f33fc715f..131cec825 100644 --- a/scheduler/scheduler_system.go +++ b/scheduler/scheduler_system.go @@ -260,7 +260,9 @@ func (s *SystemScheduler) computeJobAllocs() error { // Diff the required and existing allocations r := reconciler.Node(s.job, s.nodes, s.notReadyNodes, tainted, live, term, s.planner.ServersMeetMinimumVersion(minVersionMaxClientDisconnect, true)) - s.logger.Debug("reconciled current state with desired state", "results", log.Fmt("%#v", r)) + if s.logger.IsDebug() { + s.logger.Debug("reconciled current state with desired state", r.Fields()...) + } // Add all the allocs to stop for _, e := range r.Stop {