diff --git a/client/alloc_runner.go b/client/alloc_runner.go index 63491ed1d..b796ccbe4 100644 --- a/client/alloc_runner.go +++ b/client/alloc_runner.go @@ -485,7 +485,7 @@ func (r *AllocRunner) StatsReporter() AllocStatsReporter { // AllocStats returns the stats reporter of all the tasks running in the // allocation func (r *AllocRunner) AllocStats() map[string]TaskStatsReporter { - r.taskLock.Lock() + r.taskLock.RLock() defer r.taskLock.RUnlock() res := make(map[string]TaskStatsReporter) for task, tr := range r.tasks { diff --git a/client/driver/executor/executor.go b/client/driver/executor/executor.go index 5beaa9514..c654d3e3e 100644 --- a/client/driver/executor/executor.go +++ b/client/driver/executor/executor.go @@ -742,3 +742,44 @@ func (e *UniversalExecutor) scanPids(parentPid int, allPids []ps.Process) ([]*no } return res, nil } + +// aggregatedResourceUsage aggregates the resource usage of all the pids and +// returns a TaskResourceUsage data point +func (e *UniversalExecutor) aggregatedResourceUsage(pidStats map[string]*cstructs.ResourceUsage) *cstructs.TaskResourceUsage { + ts := time.Now() + var ( + systemModeCPU, userModeCPU, percent float64 + totalRSS, totalSwap uint64 + ) + + for _, pidStat := range pidStats { + systemModeCPU += pidStat.CpuStats.SystemMode + userModeCPU += pidStat.CpuStats.UserMode + percent += pidStat.CpuStats.Percent + + totalRSS += pidStat.MemoryStats.RSS + totalSwap += pidStat.MemoryStats.Swap + } + + totalCPU := &cstructs.CpuStats{ + SystemMode: systemModeCPU, + UserMode: userModeCPU, + Percent: percent, + } + + totalMemory := &cstructs.MemoryStats{ + RSS: totalRSS, + Swap: totalSwap, + } + + resourceUsage := cstructs.ResourceUsage{ + MemoryStats: totalMemory, + CpuStats: totalCPU, + Timestamp: ts, + } + return &cstructs.TaskResourceUsage{ + ResourceUsage: &resourceUsage, + Timestamp: ts, + Pids: pidStats, + } +} diff --git a/client/driver/executor/executor_basic.go b/client/driver/executor/executor_basic.go index a21bb9c64..d66cfb078 100644 --- a/client/driver/executor/executor_basic.go +++ b/client/driver/executor/executor_basic.go @@ -4,7 +4,6 @@ package executor import ( "os" - "time" cstructs "github.com/hashicorp/nomad/client/driver/structs" @@ -37,46 +36,11 @@ func (e *UniversalExecutor) configureIsolation() error { } func (e *UniversalExecutor) Stats() (*cstructs.TaskResourceUsage, error) { - ts := time.Now() pidStats, err := e.pidStats() if err != nil { return nil, err } - var ( - systemModeCPU, userModeCPU, percent float64 - totalRSS, totalSwap uint64 - ) - - for _, pidStat := range pidStats { - systemModeCPU += pidStat.CpuStats.SystemMode - userModeCPU += pidStat.CpuStats.UserMode - percent += pidStat.CpuStats.Percent - - totalRSS += pidStat.MemoryStats.RSS - totalSwap += pidStat.MemoryStats.Swap - } - - totalCPU := &cstructs.CpuStats{ - SystemMode: systemModeCPU, - UserMode: userModeCPU, - Percent: percent, - } - - totalMemory := &cstructs.MemoryStats{ - RSS: totalRSS, - Swap: totalSwap, - } - - resourceUsage := cstructs.ResourceUsage{ - MemoryStats: totalMemory, - CpuStats: totalCPU, - Timestamp: ts, - } - return &cstructs.TaskResourceUsage{ - ResourceUsage: &resourceUsage, - Timestamp: ts, - Pids: pidStats, - }, nil + return e.aggregatedResourceUsage(pidStats), nil } func (e *UniversalExecutor) getAllPids() ([]*nomadPid, error) { diff --git a/client/driver/executor/executor_linux.go b/client/driver/executor/executor_linux.go index e5b0376ad..4020c8e71 100644 --- a/client/driver/executor/executor_linux.go +++ b/client/driver/executor/executor_linux.go @@ -11,6 +11,7 @@ import ( "time" "github.com/hashicorp/go-multierror" + "github.com/mitchellh/go-ps" "github.com/opencontainers/runc/libcontainer/cgroups" cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs" cgroupConfig "github.com/opencontainers/runc/libcontainer/configs" @@ -130,7 +131,11 @@ func (e *UniversalExecutor) configureCgroups(resources *structs.Resources) error // the executor. func (e *UniversalExecutor) Stats() (*cstructs.TaskResourceUsage, error) { if !e.command.ResourceLimits { - return e.resourceUsagePids() + pidStats, err := e.pidStats() + if err != nil { + return nil, err + } + return e.aggregatedResourceUsage(pidStats), nil } ts := time.Now() manager := getCgroupManager(e.groups, e.cgPaths) @@ -178,7 +183,7 @@ func (e *UniversalExecutor) Stats() (*cstructs.TaskResourceUsage, error) { }, Timestamp: ts, } - if pidStats, err := e.PidStats(); err == nil { + if pidStats, err := e.pidStats(); err == nil { taskResUsage.Pids = pidStats } return &taskResUsage, nil @@ -254,20 +259,24 @@ func (e *UniversalExecutor) removeChrootMounts() error { return e.ctx.AllocDir.UnmountAll() } -func (e *UniversalExecutor) getAllPids() ([]*NomadPid, error) { +func (e *UniversalExecutor) getAllPids() ([]*nomadPid, error) { if e.command.ResourceLimits { manager := getCgroupManager(e.groups, e.cgPaths) pids, err := manager.GetAllPids() if err != nil { return nil, err } - np := make([]*NomadPid, len(pids)) + np := make([]*nomadPid, len(pids)) for idx, pid := range pids { - np[idx] = &NomadPid{pid, stats.NewCpuStats()} + np[idx] = &nomadPid{pid, stats.NewCpuStats()} } return np, nil } - return e.scanPids() + allProcesses, err := ps.Processes() + if err != nil { + return nil, err + } + return e.scanPids(os.Getpid(), allProcesses) } // destroyCgroup kills all processes in the cgroup and removes the cgroup diff --git a/command/agent/config.go b/command/agent/config.go index 41b6d4121..be0cd0db2 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -453,6 +453,7 @@ func DefaultConfig() *Config { StatsConfig: &StatsConfig{ DataPoints: 60, CollectionInterval: "1s", + collectionInterval: 1 * time.Second, }, }, Server: &ServerConfig{