Added the nomad stats command

This commit is contained in:
Diptanu Choudhury
2016-04-29 13:03:02 -07:00
parent a485a38029
commit 50250b1455
7 changed files with 262 additions and 12 deletions

View File

@@ -1,6 +1,11 @@
package api
import (
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"sort"
"time"
)
@@ -40,6 +45,50 @@ func (a *Allocations) Info(allocID string, q *QueryOptions) (*Allocation, *Query
return &resp, qm, nil
}
func (a *Allocations) Stats(alloc *Allocation, q *QueryOptions) (map[string]*TaskResourceUsage, error) {
node, _, err := a.client.Nodes().Info(alloc.NodeID, &QueryOptions{})
if err != nil {
return nil, err
}
if node.HTTPAddr == "" {
return nil, fmt.Errorf("http addr of the node where alloc %q is running is not advertised", alloc.ID)
}
u := &url.URL{
Scheme: "http",
Host: node.HTTPAddr,
Path: "/v1/client/stats/",
}
v := url.Values{}
v.Set("allocation", alloc.ID)
u.RawQuery = v.Encode()
req := &http.Request{
Method: "GET",
URL: u,
}
c := http.Client{}
resp, err := c.Do(req)
if err != nil {
return nil, err
}
if resp.StatusCode != 200 {
return nil, a.getErrorMsg(resp)
}
decoder := json.NewDecoder(resp.Body)
var stats map[string]*TaskResourceUsage
if err := decoder.Decode(&stats); err != nil {
return nil, err
}
return stats, nil
}
func (a *Allocations) getErrorMsg(resp *http.Response) error {
if errMsg, err := ioutil.ReadAll(resp.Body); err == nil {
return fmt.Errorf(string(errMsg))
} else {
return err
}
}
// Allocation is used for serialization of allocations.
type Allocation struct {
ID string

View File

@@ -4,6 +4,27 @@ import (
"time"
)
type MemoryStats struct {
RSS uint64
Cache uint64
Swap uint64
MaxUsage uint64
KernelUsage uint64
KernelMaxUsage uint64
}
type CpuUsage struct {
SystemMode uint64
UserMode uint64
ThrottledPeriods uint64
ThrottledTime uint64
}
type TaskResourceUsage struct {
MemoryStats *MemoryStats
CpuStats *CpuUsage
}
// RestartPolicy defines how the Nomad client restarts
// tasks in a taskgroup when they fail
type RestartPolicy struct {

View File

@@ -131,11 +131,17 @@ func (e *UniversalExecutor) Stats() (*cstructs.TaskResourceUsage, error) {
}
// Memory Related Stats
swap := stats.MemoryStats.SwapUsage
maxUsage := stats.MemoryStats.Usage.MaxUsage
rss := stats.MemoryStats.Stats["rss"]
cache := stats.MemoryStats.Stats["cache"]
ms := &cstructs.MemoryStats{
RSS: rss,
Cache: cache,
RSS: rss,
Cache: cache,
Swap: swap.Usage,
MaxUsage: maxUsage,
KernelUsage: stats.MemoryStats.KernelUsage.Usage,
KernelMaxUsage: stats.MemoryStats.KernelUsage.MaxUsage,
}
// CPU Related Stats
@@ -146,8 +152,10 @@ func (e *UniversalExecutor) Stats() (*cstructs.TaskResourceUsage, error) {
kmTicks := (kernelModeTime * clockTicks) / nanosecondsInSecond
cs := &cstructs.CpuUsage{
SystemMode: kmTicks,
UserMode: umTicks,
SystemMode: kmTicks,
UserMode: umTicks,
ThrottledPeriods: stats.CpuStats.ThrottlingData.ThrottledPeriods,
ThrottledTime: stats.CpuStats.ThrottlingData.ThrottledTime,
}
return &cstructs.TaskResourceUsage{MemoryStats: ms, CpuStats: cs}, nil
}

View File

@@ -86,13 +86,19 @@ type CheckResult struct {
}
type MemoryStats struct {
RSS uint64
Cache uint64
RSS uint64
Cache uint64
Swap uint64
MaxUsage uint64
KernelUsage uint64
KernelMaxUsage uint64
}
type CpuUsage struct {
SystemMode uint64
UserMode uint64
SystemMode uint64
UserMode uint64
ThrottledPeriods uint64
ThrottledTime uint64
}
type TaskResourceUsage struct {

View File

@@ -3,6 +3,7 @@ package command
import (
"fmt"
"sort"
"strconv"
"strings"
"time"
@@ -119,6 +120,11 @@ func (c *AllocStatusCommand) Run(args []string) int {
return 1
}
stats, err := client.Allocations().Stats(alloc, nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("couldn't retreive stats: %v", err))
}
// Format the allocation data
basic := []string{
fmt.Sprintf("ID|%s", limit(alloc.ID, length)),
@@ -140,7 +146,7 @@ func (c *AllocStatusCommand) Run(args []string) int {
c.Ui.Output(formatKV(basic))
if !short {
c.taskResources(alloc)
c.taskResources(alloc, stats)
}
// Print the state of each task.
@@ -302,7 +308,7 @@ func (c *AllocStatusCommand) allocResources(alloc *api.Allocation) {
}
// taskResources prints out the tasks current resource usage
func (c *AllocStatusCommand) taskResources(alloc *api.Allocation) {
func (c *AllocStatusCommand) taskResources(alloc *api.Allocation, stats map[string]*api.TaskResourceUsage) {
if len(alloc.TaskResources) == 0 {
return
}
@@ -338,9 +344,15 @@ func (c *AllocStatusCommand) taskResources(alloc *api.Allocation) {
if len(addr) > 0 {
firstAddr = addr[0]
}
cpuUsage := strconv.Itoa(resource.CPU)
memUsage := strconv.Itoa(resource.MemoryMB)
if ru, ok := stats[task]; ok {
cpuUsage = fmt.Sprintf("%v/%v", (ru.CpuStats.SystemMode + ru.CpuStats.UserMode), resource.CPU)
memUsage = fmt.Sprintf("%v/%v", ru.MemoryStats.RSS/(1024*1024), resource.MemoryMB)
}
resourcesOutput = append(resourcesOutput, fmt.Sprintf("%v|%v|%v|%v|%v",
resource.CPU,
resource.MemoryMB,
cpuUsage,
memUsage,
resource.DiskMB,
resource.IOPS,
firstAddr))

149
command/stats.go Normal file
View File

@@ -0,0 +1,149 @@
package command
import (
"fmt"
"github.com/hashicorp/nomad/api"
"strings"
)
type StatsCommand struct {
Meta
}
func (f *StatsCommand) Help() string {
return "Dispalys stats of an allocation or a task running on a nomad client"
}
func (f *StatsCommand) Synopsis() string {
return "Dispalys stats of an allocation or a task running on a nomad client"
}
func (f *StatsCommand) Run(args []string) int {
var verbose bool
flags := f.Meta.FlagSet("fs-list", FlagSetClient)
flags.BoolVar(&verbose, "verbose", false, "")
flags.Usage = func() { f.Ui.Output(f.Help()) }
if err := flags.Parse(args); err != nil {
return 1
}
args = flags.Args()
if len(args) < 1 {
f.Ui.Error("allocation id is a required parameter")
return 1
}
client, err := f.Meta.Client()
if err != nil {
f.Ui.Error(fmt.Sprintf("Error initializing client: %v", err))
return 1
}
var allocID, task string
allocID = strings.TrimSpace(args[0])
if len(args) == 2 {
task = strings.TrimSpace(args[1])
}
// Truncate the id unless full length is requested
length := shortId
if verbose {
length = fullId
}
// Query the allocation info
if len(allocID) == 1 {
f.Ui.Error(fmt.Sprintf("Alloc ID must contain at least two characters."))
return 1
}
if len(allocID)%2 == 1 {
// Identifiers must be of even length, so we strip off the last byte
// to provide a consistent user experience.
allocID = allocID[:len(allocID)-1]
}
allocs, _, err := client.Allocations().PrefixList(allocID)
if err != nil {
f.Ui.Error(fmt.Sprintf("Error querying allocation: %v", err))
return 1
}
if len(allocs) == 0 {
f.Ui.Error(fmt.Sprintf("No allocation(s) with prefix or id %q found", allocID))
return 1
}
if len(allocs) > 1 {
// Format the allocs
out := make([]string, len(allocs)+1)
out[0] = "ID|Eval ID|Job ID|Task Group|Desired Status|Client Status"
for i, alloc := range allocs {
out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s",
limit(alloc.ID, length),
limit(alloc.EvalID, length),
alloc.JobID,
alloc.TaskGroup,
alloc.DesiredStatus,
alloc.ClientStatus,
)
}
f.Ui.Output(fmt.Sprintf("Prefix matched multiple allocations\n\n%s", formatList(out)))
return 0
}
// Prefix lookup matched a single allocation
alloc, _, err := client.Allocations().Info(allocs[0].ID, nil)
if err != nil {
f.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
return 1
}
stats, err := client.Allocations().Stats(alloc, nil)
if err != nil {
f.Ui.Error(fmt.Sprintf("unable to get stats: %v", err))
return 1
}
if task == "" {
f.printAllocResourceUsage(alloc, stats)
} else {
f.printTaskResourceUsage(task, stats)
}
return 0
}
func (f *StatsCommand) printTaskResourceUsage(task string, resourceUsage map[string]*api.TaskResourceUsage) {
tu, ok := resourceUsage[task]
if !ok {
return
}
f.Ui.Output(fmt.Sprintf("===> Task: %q", task))
f.Ui.Output("Memory Stats")
out := make([]string, 2)
out[0] = "RSS|Cache|Swap|Max Usage|Kernel Usage|KernelMaxUsage"
out[1] = fmt.Sprintf("%v|%v|%v|%v|%v|%v",
f.inMB(tu.MemoryStats.RSS),
f.inMB(tu.MemoryStats.Cache),
f.inMB(tu.MemoryStats.Swap),
f.inMB(tu.MemoryStats.MaxUsage),
f.inMB(tu.MemoryStats.KernelUsage),
f.inMB(tu.MemoryStats.KernelMaxUsage),
)
f.Ui.Output(formatList(out))
f.Ui.Output("")
f.Ui.Output("CPU Stats")
out = make([]string, 2)
out[0] = "Kernel Mode|User Mode|Throttled Periods|Throttled Time"
out[1] = fmt.Sprintf("%v|%v|%v|%v", tu.CpuStats.SystemMode, tu.CpuStats.UserMode,
tu.CpuStats.ThrottledPeriods, tu.CpuStats.ThrottledTime)
f.Ui.Output(formatList(out))
}
func (f *StatsCommand) printAllocResourceUsage(alloc *api.Allocation, resourceUsage map[string]*api.TaskResourceUsage) {
f.Ui.Output(fmt.Sprintf("Resource Usage of Tasks running in Allocation %q", alloc.ID))
for task, _ := range alloc.TaskStates {
f.printTaskResourceUsage(task, resourceUsage)
}
}
func (f *StatsCommand) inMB(bytes uint64) uint64 {
return bytes
}

View File

@@ -121,6 +121,11 @@ func Commands(metaPtr *command.Meta) map[string]cli.CommandFactory {
Meta: meta,
}, nil
},
"stats": func() (cli.Command, error) {
return &command.StatsCommand{
Meta: meta,
}, nil
},
"status": func() (cli.Command, error) {
return &command.StatusCommand{
Meta: meta,