From 50250b145559616aceeccd8dfe225646334d47c3 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Fri, 29 Apr 2016 13:03:02 -0700 Subject: [PATCH] Added the nomad stats command --- api/allocations.go | 49 ++++++++ api/tasks.go | 21 ++++ client/driver/executor/executor_linux.go | 16 ++- client/driver/structs/structs.go | 14 ++- command/alloc_status.go | 20 ++- command/stats.go | 149 +++++++++++++++++++++++ commands.go | 5 + 7 files changed, 262 insertions(+), 12 deletions(-) create mode 100644 command/stats.go diff --git a/api/allocations.go b/api/allocations.go index 6badd3f69..765a349fc 100644 --- a/api/allocations.go +++ b/api/allocations.go @@ -1,6 +1,11 @@ package api import ( + "encoding/json" + "fmt" + "io/ioutil" + "net/http" + "net/url" "sort" "time" ) @@ -40,6 +45,50 @@ func (a *Allocations) Info(allocID string, q *QueryOptions) (*Allocation, *Query return &resp, qm, nil } +func (a *Allocations) Stats(alloc *Allocation, q *QueryOptions) (map[string]*TaskResourceUsage, error) { + node, _, err := a.client.Nodes().Info(alloc.NodeID, &QueryOptions{}) + if err != nil { + return nil, err + } + if node.HTTPAddr == "" { + return nil, fmt.Errorf("http addr of the node where alloc %q is running is not advertised", alloc.ID) + } + u := &url.URL{ + Scheme: "http", + Host: node.HTTPAddr, + Path: "/v1/client/stats/", + } + v := url.Values{} + v.Set("allocation", alloc.ID) + u.RawQuery = v.Encode() + req := &http.Request{ + Method: "GET", + URL: u, + } + c := http.Client{} + resp, err := c.Do(req) + if err != nil { + return nil, err + } + if resp.StatusCode != 200 { + return nil, a.getErrorMsg(resp) + } + decoder := json.NewDecoder(resp.Body) + var stats map[string]*TaskResourceUsage + if err := decoder.Decode(&stats); err != nil { + return nil, err + } + return stats, nil +} + +func (a *Allocations) getErrorMsg(resp *http.Response) error { + if errMsg, err := ioutil.ReadAll(resp.Body); err == nil { + return fmt.Errorf(string(errMsg)) + } else { + return err + } +} + // Allocation is used for serialization of allocations. type Allocation struct { ID string diff --git a/api/tasks.go b/api/tasks.go index e0fd67bac..1a80212fd 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -4,6 +4,27 @@ import ( "time" ) +type MemoryStats struct { + RSS uint64 + Cache uint64 + Swap uint64 + MaxUsage uint64 + KernelUsage uint64 + KernelMaxUsage uint64 +} + +type CpuUsage struct { + SystemMode uint64 + UserMode uint64 + ThrottledPeriods uint64 + ThrottledTime uint64 +} + +type TaskResourceUsage struct { + MemoryStats *MemoryStats + CpuStats *CpuUsage +} + // RestartPolicy defines how the Nomad client restarts // tasks in a taskgroup when they fail type RestartPolicy struct { diff --git a/client/driver/executor/executor_linux.go b/client/driver/executor/executor_linux.go index b3721d62e..bb170af55 100644 --- a/client/driver/executor/executor_linux.go +++ b/client/driver/executor/executor_linux.go @@ -131,11 +131,17 @@ func (e *UniversalExecutor) Stats() (*cstructs.TaskResourceUsage, error) { } // Memory Related Stats + swap := stats.MemoryStats.SwapUsage + maxUsage := stats.MemoryStats.Usage.MaxUsage rss := stats.MemoryStats.Stats["rss"] cache := stats.MemoryStats.Stats["cache"] ms := &cstructs.MemoryStats{ - RSS: rss, - Cache: cache, + RSS: rss, + Cache: cache, + Swap: swap.Usage, + MaxUsage: maxUsage, + KernelUsage: stats.MemoryStats.KernelUsage.Usage, + KernelMaxUsage: stats.MemoryStats.KernelUsage.MaxUsage, } // CPU Related Stats @@ -146,8 +152,10 @@ func (e *UniversalExecutor) Stats() (*cstructs.TaskResourceUsage, error) { kmTicks := (kernelModeTime * clockTicks) / nanosecondsInSecond cs := &cstructs.CpuUsage{ - SystemMode: kmTicks, - UserMode: umTicks, + SystemMode: kmTicks, + UserMode: umTicks, + ThrottledPeriods: stats.CpuStats.ThrottlingData.ThrottledPeriods, + ThrottledTime: stats.CpuStats.ThrottlingData.ThrottledTime, } return &cstructs.TaskResourceUsage{MemoryStats: ms, CpuStats: cs}, nil } diff --git a/client/driver/structs/structs.go b/client/driver/structs/structs.go index 8f55087ac..5ec4f211d 100644 --- a/client/driver/structs/structs.go +++ b/client/driver/structs/structs.go @@ -86,13 +86,19 @@ type CheckResult struct { } type MemoryStats struct { - RSS uint64 - Cache uint64 + RSS uint64 + Cache uint64 + Swap uint64 + MaxUsage uint64 + KernelUsage uint64 + KernelMaxUsage uint64 } type CpuUsage struct { - SystemMode uint64 - UserMode uint64 + SystemMode uint64 + UserMode uint64 + ThrottledPeriods uint64 + ThrottledTime uint64 } type TaskResourceUsage struct { diff --git a/command/alloc_status.go b/command/alloc_status.go index 700a17546..6f4eeec3c 100644 --- a/command/alloc_status.go +++ b/command/alloc_status.go @@ -3,6 +3,7 @@ package command import ( "fmt" "sort" + "strconv" "strings" "time" @@ -119,6 +120,11 @@ func (c *AllocStatusCommand) Run(args []string) int { return 1 } + stats, err := client.Allocations().Stats(alloc, nil) + if err != nil { + c.Ui.Error(fmt.Sprintf("couldn't retreive stats: %v", err)) + } + // Format the allocation data basic := []string{ fmt.Sprintf("ID|%s", limit(alloc.ID, length)), @@ -140,7 +146,7 @@ func (c *AllocStatusCommand) Run(args []string) int { c.Ui.Output(formatKV(basic)) if !short { - c.taskResources(alloc) + c.taskResources(alloc, stats) } // Print the state of each task. @@ -302,7 +308,7 @@ func (c *AllocStatusCommand) allocResources(alloc *api.Allocation) { } // taskResources prints out the tasks current resource usage -func (c *AllocStatusCommand) taskResources(alloc *api.Allocation) { +func (c *AllocStatusCommand) taskResources(alloc *api.Allocation, stats map[string]*api.TaskResourceUsage) { if len(alloc.TaskResources) == 0 { return } @@ -338,9 +344,15 @@ func (c *AllocStatusCommand) taskResources(alloc *api.Allocation) { if len(addr) > 0 { firstAddr = addr[0] } + cpuUsage := strconv.Itoa(resource.CPU) + memUsage := strconv.Itoa(resource.MemoryMB) + if ru, ok := stats[task]; ok { + cpuUsage = fmt.Sprintf("%v/%v", (ru.CpuStats.SystemMode + ru.CpuStats.UserMode), resource.CPU) + memUsage = fmt.Sprintf("%v/%v", ru.MemoryStats.RSS/(1024*1024), resource.MemoryMB) + } resourcesOutput = append(resourcesOutput, fmt.Sprintf("%v|%v|%v|%v|%v", - resource.CPU, - resource.MemoryMB, + cpuUsage, + memUsage, resource.DiskMB, resource.IOPS, firstAddr)) diff --git a/command/stats.go b/command/stats.go new file mode 100644 index 000000000..3d395515b --- /dev/null +++ b/command/stats.go @@ -0,0 +1,149 @@ +package command + +import ( + "fmt" + "github.com/hashicorp/nomad/api" + "strings" +) + +type StatsCommand struct { + Meta +} + +func (f *StatsCommand) Help() string { + return "Dispalys stats of an allocation or a task running on a nomad client" +} + +func (f *StatsCommand) Synopsis() string { + return "Dispalys stats of an allocation or a task running on a nomad client" +} + +func (f *StatsCommand) Run(args []string) int { + var verbose bool + flags := f.Meta.FlagSet("fs-list", FlagSetClient) + flags.BoolVar(&verbose, "verbose", false, "") + flags.Usage = func() { f.Ui.Output(f.Help()) } + + if err := flags.Parse(args); err != nil { + return 1 + } + + args = flags.Args() + if len(args) < 1 { + f.Ui.Error("allocation id is a required parameter") + return 1 + } + client, err := f.Meta.Client() + if err != nil { + f.Ui.Error(fmt.Sprintf("Error initializing client: %v", err)) + return 1 + } + + var allocID, task string + allocID = strings.TrimSpace(args[0]) + if len(args) == 2 { + task = strings.TrimSpace(args[1]) + } + + // Truncate the id unless full length is requested + length := shortId + if verbose { + length = fullId + } + + // Query the allocation info + if len(allocID) == 1 { + f.Ui.Error(fmt.Sprintf("Alloc ID must contain at least two characters.")) + return 1 + } + if len(allocID)%2 == 1 { + // Identifiers must be of even length, so we strip off the last byte + // to provide a consistent user experience. + allocID = allocID[:len(allocID)-1] + } + + allocs, _, err := client.Allocations().PrefixList(allocID) + if err != nil { + f.Ui.Error(fmt.Sprintf("Error querying allocation: %v", err)) + return 1 + } + if len(allocs) == 0 { + f.Ui.Error(fmt.Sprintf("No allocation(s) with prefix or id %q found", allocID)) + return 1 + } + if len(allocs) > 1 { + // Format the allocs + out := make([]string, len(allocs)+1) + out[0] = "ID|Eval ID|Job ID|Task Group|Desired Status|Client Status" + for i, alloc := range allocs { + out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s", + limit(alloc.ID, length), + limit(alloc.EvalID, length), + alloc.JobID, + alloc.TaskGroup, + alloc.DesiredStatus, + alloc.ClientStatus, + ) + } + f.Ui.Output(fmt.Sprintf("Prefix matched multiple allocations\n\n%s", formatList(out))) + return 0 + } + // Prefix lookup matched a single allocation + alloc, _, err := client.Allocations().Info(allocs[0].ID, nil) + if err != nil { + f.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err)) + return 1 + } + + stats, err := client.Allocations().Stats(alloc, nil) + if err != nil { + f.Ui.Error(fmt.Sprintf("unable to get stats: %v", err)) + return 1 + } + if task == "" { + f.printAllocResourceUsage(alloc, stats) + } else { + f.printTaskResourceUsage(task, stats) + } + return 0 +} + +func (f *StatsCommand) printTaskResourceUsage(task string, resourceUsage map[string]*api.TaskResourceUsage) { + tu, ok := resourceUsage[task] + if !ok { + return + } + f.Ui.Output(fmt.Sprintf("===> Task: %q", task)) + f.Ui.Output("Memory Stats") + out := make([]string, 2) + out[0] = "RSS|Cache|Swap|Max Usage|Kernel Usage|KernelMaxUsage" + out[1] = fmt.Sprintf("%v|%v|%v|%v|%v|%v", + f.inMB(tu.MemoryStats.RSS), + f.inMB(tu.MemoryStats.Cache), + f.inMB(tu.MemoryStats.Swap), + f.inMB(tu.MemoryStats.MaxUsage), + f.inMB(tu.MemoryStats.KernelUsage), + f.inMB(tu.MemoryStats.KernelMaxUsage), + ) + f.Ui.Output(formatList(out)) + + f.Ui.Output("") + + f.Ui.Output("CPU Stats") + out = make([]string, 2) + out[0] = "Kernel Mode|User Mode|Throttled Periods|Throttled Time" + out[1] = fmt.Sprintf("%v|%v|%v|%v", tu.CpuStats.SystemMode, tu.CpuStats.UserMode, + tu.CpuStats.ThrottledPeriods, tu.CpuStats.ThrottledTime) + f.Ui.Output(formatList(out)) +} + +func (f *StatsCommand) printAllocResourceUsage(alloc *api.Allocation, resourceUsage map[string]*api.TaskResourceUsage) { + f.Ui.Output(fmt.Sprintf("Resource Usage of Tasks running in Allocation %q", alloc.ID)) + for task, _ := range alloc.TaskStates { + f.printTaskResourceUsage(task, resourceUsage) + } +} + +func (f *StatsCommand) inMB(bytes uint64) uint64 { + return bytes +} diff --git a/commands.go b/commands.go index b402f429b..023a74d7d 100644 --- a/commands.go +++ b/commands.go @@ -121,6 +121,11 @@ func Commands(metaPtr *command.Meta) map[string]cli.CommandFactory { Meta: meta, }, nil }, + "stats": func() (cli.Command, error) { + return &command.StatsCommand{ + Meta: meta, + }, nil + }, "status": func() (cli.Command, error) { return &command.StatusCommand{ Meta: meta,