Revert libcontainer to 89ab7f2ccc1

This commit is contained in:
Michael Schurter
2017-03-01 10:41:07 -08:00
parent 2b479554fd
commit 1df41cffb8
27 changed files with 264 additions and 713 deletions

View File

@@ -18,7 +18,6 @@ import (
"github.com/hashicorp/nomad/client/stats"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/structs"
)
@@ -77,7 +76,7 @@ func (e *UniversalExecutor) configureCgroups(resources *structs.Resources) error
e.resConCtx.groups.Path = filepath.Join("/nomad", cgroupName)
// TODO: verify this is needed for things like network access
e.resConCtx.groups.Resources.AllowAllDevices = helper.BoolToPtr(true)
e.resConCtx.groups.Resources.AllowAllDevices = true
if resources.MemoryMB > 0 {
// Total amount of memory allowed to consume

View File

@@ -27,9 +27,9 @@ type Manager interface {
// Destroys the cgroup set
Destroy() error
// The option func SystemdCgroups() and Cgroupfs() require following attributes:
// NewCgroupManager() and LoadCgroupManager() require following attributes:
// Paths map[string]string
// Cgroups *configs.Cgroup
// Cgroups *cgroups.Cgroup
// Paths maps cgroup subsystem to path at which it is mounted.
// Cgroups specifies specific cgroup settings for the various subsystems
@@ -37,7 +37,7 @@ type Manager interface {
// restore the object later.
GetPaths() map[string]string
// Sets the cgroup as configured.
// Set the cgroup as configured.
Set(container *configs.Config) error
}

View File

@@ -9,6 +9,7 @@ import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
"sync"
"github.com/opencontainers/runc/libcontainer/cgroups"
@@ -32,6 +33,7 @@ var (
&FreezerGroup{},
&NameGroup{GroupName: "name=systemd", Join: true},
}
CgroupProcesses = "cgroup.procs"
HugePageSizes, _ = cgroups.GetHugePageSize()
)
@@ -104,8 +106,6 @@ func (m *Manager) Apply(pid int) (err error) {
if m.Cgroups == nil {
return nil
}
m.mu.Lock()
defer m.mu.Unlock()
var c = m.Cgroups
@@ -114,8 +114,8 @@ func (m *Manager) Apply(pid int) (err error) {
return err
}
m.Paths = make(map[string]string)
if c.Paths != nil {
paths := make(map[string]string)
for name, path := range c.Paths {
_, err := d.path(name)
if err != nil {
@@ -124,30 +124,32 @@ func (m *Manager) Apply(pid int) (err error) {
}
return err
}
m.Paths[name] = path
paths[name] = path
}
m.Paths = paths
return cgroups.EnterPid(m.Paths, pid)
}
m.mu.Lock()
defer m.mu.Unlock()
paths := make(map[string]string)
for _, sys := range subsystems {
if err := sys.Apply(d); err != nil {
return err
}
// TODO: Apply should, ideally, be reentrant or be broken up into a separate
// create and join phase so that the cgroup hierarchy for a container can be
// created then join consists of writing the process pids to cgroup.procs
p, err := d.path(sys.Name())
if err != nil {
// The non-presence of the devices subsystem is
// considered fatal for security reasons.
if cgroups.IsNotFound(err) && sys.Name() != "devices" {
if cgroups.IsNotFound(err) {
continue
}
return err
}
m.Paths[sys.Name()] = p
if err := sys.Apply(d); err != nil {
return err
}
paths[sys.Name()] = p
}
m.Paths = paths
return nil
}
@@ -188,15 +190,18 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
}
func (m *Manager) Set(container *configs.Config) error {
// If Paths are set, then we are just joining cgroups paths
// and there is no need to set any values.
if m.Cgroups.Paths != nil {
return nil
}
paths := m.GetPaths()
for _, sys := range subsystems {
path := paths[sys.Name()]
// Generate fake cgroup data.
d, err := getCgroupData(container.Cgroups, -1)
if err != nil {
return err
}
// Get the path, but don't error out if the cgroup wasn't found.
path, err := d.path(sys.Name())
if err != nil && !cgroups.IsNotFound(err) {
return err
}
if err := sys.Set(path, container.Cgroups); err != nil {
return err
}
@@ -213,8 +218,14 @@ func (m *Manager) Set(container *configs.Config) error {
// Freeze toggles the container's freezer cgroup depending on the state
// provided
func (m *Manager) Freeze(state configs.FreezerState) error {
paths := m.GetPaths()
dir := paths["freezer"]
d, err := getCgroupData(m.Cgroups, 0)
if err != nil {
return err
}
dir, err := d.path("freezer")
if err != nil {
return err
}
prevState := m.Cgroups.Resources.Freezer
m.Cgroups.Resources.Freezer = state
freezer, err := subsystems.Get("freezer")
@@ -230,13 +241,28 @@ func (m *Manager) Freeze(state configs.FreezerState) error {
}
func (m *Manager) GetPids() ([]int, error) {
paths := m.GetPaths()
return cgroups.GetPids(paths["devices"])
dir, err := getCgroupPath(m.Cgroups)
if err != nil {
return nil, err
}
return cgroups.GetPids(dir)
}
func (m *Manager) GetAllPids() ([]int, error) {
paths := m.GetPaths()
return cgroups.GetAllPids(paths["devices"])
dir, err := getCgroupPath(m.Cgroups)
if err != nil {
return nil, err
}
return cgroups.GetAllPids(dir)
}
func getCgroupPath(c *configs.Cgroup) (string, error) {
d, err := getCgroupData(c, 0)
if err != nil {
return "", err
}
return d.path("devices")
}
func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
@@ -293,7 +319,7 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
if filepath.IsAbs(raw.innerPath) {
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
// Sometimes subsystems can be mounted togethger as 'cpu,cpuacct'.
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
}
@@ -313,7 +339,7 @@ func (raw *cgroupData) join(subsystem string) (string, error) {
if err := os.MkdirAll(path, 0755); err != nil {
return "", err
}
if err := cgroups.WriteCgroupProc(path, raw.pid); err != nil {
if err := writeFile(path, CgroupProcesses, strconv.Itoa(raw.pid)); err != nil {
return "", err
}
return path, nil
@@ -323,7 +349,7 @@ func writeFile(dir, file, data string) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s", file)
return fmt.Errorf("no such directory for %s.", file)
}
if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", data, file, err)

View File

@@ -22,48 +22,10 @@ func (s *CpuGroup) Name() string {
func (s *CpuGroup) Apply(d *cgroupData) error {
// We always want to join the cpu group, to allow fair cpu scheduling
// on a container basis
path, err := d.path("cpu")
_, err := d.join("cpu")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
return s.ApplyDir(path, d.config, d.pid)
}
func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error {
// This might happen if we have no cpu cgroup mounted.
// Just do nothing and don't fail.
if path == "" {
return nil
}
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
// We should set the real-Time group scheduling settings before moving
// in the process because if the process is already in SCHED_RR mode
// and no RT bandwidth is set, adding it will fail.
if err := s.SetRtSched(path, cgroup); err != nil {
return err
}
// because we are not using d.join we need to place the pid into the procs file
// unlike the other subsystems
if err := cgroups.WriteCgroupProc(path, pid); err != nil {
return err
}
return nil
}
func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
if cgroup.Resources.CpuRtPeriod != 0 {
if err := writeFile(path, "cpu.rt_period_us", strconv.FormatInt(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
return err
}
}
if cgroup.Resources.CpuRtRuntime != 0 {
if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
return err
}
}
return nil
}
@@ -83,8 +45,15 @@ func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
return err
}
}
if err := s.SetRtSched(path, cgroup); err != nil {
return err
if cgroup.Resources.CpuRtPeriod != 0 {
if err := writeFile(path, "cpu.rt_period_us", strconv.FormatInt(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
return err
}
}
if cgroup.Resources.CpuRtRuntime != 0 {
if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
return err
}
}
return nil

View File

@@ -8,6 +8,7 @@ import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
@@ -61,29 +62,12 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
if err != nil {
return err
}
// 'ensureParent' start with parent because we don't want to
// explicitly inherit from parent, it could conflict with
// 'cpuset.cpu_exclusive'.
if err := s.ensureParent(filepath.Dir(dir), root); err != nil {
if err := s.ensureParent(dir, root); err != nil {
return err
}
if err := os.MkdirAll(dir, 0755); err != nil {
return err
}
// We didn't inherit cpuset configs from parent, but we have
// to ensure cpuset configs are set before moving task into the
// cgroup.
// The logic is, if user specified cpuset configs, use these
// specified configs, otherwise, inherit from parent. This makes
// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
// keep backward compatbility.
if err := s.ensureCpusAndMems(dir, cgroup); err != nil {
return err
}
// because we are not using d.join we need to place the pid into the procs file
// unlike the other subsystems
if err := cgroups.WriteCgroupProc(dir, pid); err != nil {
if err := writeFile(dir, "cgroup.procs", strconv.Itoa(pid)); err != nil {
return err
}
@@ -153,10 +137,3 @@ func (s *CpusetGroup) copyIfNeeded(current, parent string) error {
func (s *CpusetGroup) isEmpty(b []byte) bool {
return len(bytes.Trim(b, "\n")) == 0
}
func (s *CpusetGroup) ensureCpusAndMems(path string, cgroup *configs.Cgroup) error {
if err := s.Set(path, cgroup); err != nil {
return err
}
return s.copyIfNeeded(path, filepath.Dir(path))
}

View File

@@ -43,23 +43,21 @@ func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
}
return nil
}
if cgroup.Resources.AllowAllDevices != nil {
if *cgroup.Resources.AllowAllDevices == false {
if err := writeFile(path, "devices.deny", "a"); err != nil {
return err
}
for _, dev := range cgroup.Resources.AllowedDevices {
if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
return err
}
}
return nil
}
if err := writeFile(path, "devices.allow", "a"); err != nil {
if !cgroup.Resources.AllowAllDevices {
if err := writeFile(path, "devices.deny", "a"); err != nil {
return err
}
for _, dev := range cgroup.Resources.AllowedDevices {
if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
return err
}
}
return nil
}
if err := writeFile(path, "devices.allow", "a"); err != nil {
return err
}
for _, dev := range cgroup.Resources.DeniedDevices {

View File

@@ -5,23 +5,15 @@ package fs
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"syscall"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
const (
cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
cgroupMemoryLimit = "memory.limit_in_bytes"
)
type MemoryGroup struct {
}
@@ -33,23 +25,20 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
path, err := d.path("memory")
if err != nil && !cgroups.IsNotFound(err) {
return err
} else if path == "" {
return nil
}
if memoryAssigned(d.config) {
if _, err := os.Stat(path); os.IsNotExist(err) {
if path != "" {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
// Only enable kernel memory accouting when this cgroup
// is created by libcontainer, otherwise we might get
// error when people use `cgroupsPath` to join an existed
// cgroup whose kernel memory is not initialized.
if err := EnableKernelMemoryAccounting(path); err != nil {
return err
}
}
// We have to set kernel memory here, as we can't change it once
// processes have been attached.
if err := s.SetKernelMemory(path, d.config); err != nil {
return err
}
}
defer func() {
if err != nil {
os.RemoveAll(path)
@@ -65,113 +54,33 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
return nil
}
func EnableKernelMemoryAccounting(path string) error {
// Check if kernel memory is enabled
// We have to limit the kernel memory here as it won't be accounted at all
// until a limit is set on the cgroup and limit cannot be set once the
// cgroup has children, or if there are already tasks in the cgroup.
for _, i := range []int64{1, -1} {
if err := setKernelMemory(path, i); err != nil {
func (s *MemoryGroup) SetKernelMemory(path string, cgroup *configs.Cgroup) error {
// This has to be done separately because it has special constraints (it
// can't be done after there are processes attached to the cgroup).
if cgroup.Resources.KernelMemory > 0 {
if err := writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemory, 10)); err != nil {
return err
}
}
return nil
}
func setKernelMemory(path string, kernelMemoryLimit int64) error {
if path == "" {
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
}
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
// kernel memory is not enabled on the system so we should do nothing
return nil
}
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
// Check if the error number returned by the syscall is "EBUSY"
// The EBUSY signal is returned on attempts to write to the
// memory.kmem.limit_in_bytes file if the cgroup has children or
// once tasks have been attached to the cgroup
if pathErr, ok := err.(*os.PathError); ok {
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
if errNo == syscall.EBUSY {
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
}
}
}
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
}
return nil
}
func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
// If the memory update is set to -1 we should also set swap to -1
// -1 means unlimited memory
if cgroup.Resources.Memory == -1 {
// Only set swap if it's enbled in kernel
if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
cgroup.Resources.MemorySwap = -1
}
}
// When memory and swap memory are both set, we need to handle the cases
// for updating container.
if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 {
memoryUsage, err := getMemoryData(path, "")
if err != nil {
return err
}
// When update memory limit, we should adapt the write sequence
// for memory and swap memory, so it won't fail because the new
// value and the old value don't fit kernel's validation.
if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
} else {
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
}
} else {
if cgroup.Resources.Memory != 0 {
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
}
if cgroup.Resources.MemorySwap != 0 {
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
}
}
return nil
}
func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
if err := setMemoryAndSwap(path, cgroup); err != nil {
return err
}
if cgroup.Resources.KernelMemory != 0 {
if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil {
if cgroup.Resources.Memory != 0 {
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
}
if cgroup.Resources.MemoryReservation != 0 {
if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
return err
}
}
if cgroup.Resources.MemorySwap > 0 {
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
}
if cgroup.Resources.KernelMemoryTCP != 0 {
if err := writeFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
return err

View File

@@ -3,8 +3,6 @@
package fs
import (
"strconv"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
@@ -25,8 +23,8 @@ func (s *NetClsGroup) Apply(d *cgroupData) error {
}
func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
if cgroup.Resources.NetClsClassid != 0 {
if err := writeFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil {
if cgroup.Resources.NetClsClassid != "" {
if err := writeFile(path, "net_cls.classid", cgroup.Resources.NetClsClassid); err != nil {
return err
}
}

View File

@@ -12,6 +12,7 @@ import (
)
var (
ErrNotSupportStat = errors.New("stats are not supported for subsystem")
ErrNotValidFormat = errors.New("line is not a valid key value format")
)

View File

@@ -11,7 +11,6 @@ type ThrottlingData struct {
ThrottledTime uint64 `json:"throttled_time,omitempty"`
}
// CpuUsage denotes the usage of a CPU.
// All CPU stats are aggregate since container inception.
type CpuUsage struct {
// Total CPU time consumed.

View File

@@ -16,24 +16,37 @@ import (
"github.com/docker/go-units"
)
const (
cgroupNamePrefix = "name="
CgroupProcesses = "cgroup.procs"
)
const cgroupNamePrefix = "name="
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
func FindCgroupMountpoint(subsystem string) (string, error) {
mnt, _, err := FindCgroupMountpointAndRoot(subsystem)
return mnt, err
}
func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
if !isSubsystemAvailable(subsystem) {
return "", "", NewNotFoundError(subsystem)
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
txt := scanner.Text()
fields := strings.Split(txt, " ")
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if opt == subsystem {
return fields[4], nil
}
}
}
if err := scanner.Err(); err != nil {
return "", err
}
return "", NewNotFoundError(subsystem)
}
func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", "", err
@@ -57,15 +70,6 @@ func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
return "", "", NewNotFoundError(subsystem)
}
func isSubsystemAvailable(subsystem string) bool {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return false
}
_, avail := cgroups[subsystem]
return avail
}
func FindCgroupMountpointDir() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
@@ -117,17 +121,16 @@ func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
return getControllerPath(m.Subsystems[0], cgroups)
}
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
res := make([]Mount, 0, len(ss))
scanner := bufio.NewScanner(mi)
numFound := 0
for scanner.Scan() && numFound < len(ss) {
for scanner.Scan() {
txt := scanner.Text()
sepIdx := strings.Index(txt, " - ")
if sepIdx == -1 {
return nil, fmt.Errorf("invalid mountinfo format")
}
if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
if txt[sepIdx+3:sepIdx+9] != "cgroup" {
continue
}
fields := strings.Split(txt, " ")
@@ -136,16 +139,11 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount,
Root: fields[3],
}
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if !ss[opt] {
continue
}
if strings.HasPrefix(opt, cgroupNamePrefix) {
m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
} else {
m.Subsystems = append(m.Subsystems, opt)
}
if !all {
numFound++
if ss[opt] {
m.Subsystems = append(m.Subsystems, opt)
}
}
res = append(res, m)
@@ -156,28 +154,26 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount,
return res, nil
}
// GetCgroupMounts returns the mounts for the cgroup subsystems.
// all indicates whether to return just the first instance or all the mounts.
func GetCgroupMounts(all bool) ([]Mount, error) {
func GetCgroupMounts() ([]Mount, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return nil, err
}
defer f.Close()
allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
all, err := GetAllSubsystems()
if err != nil {
return nil, err
}
allMap := make(map[string]bool)
for s := range allSubsystems {
for _, s := range all {
allMap[s] = true
}
return getCgroupMountsHelper(allMap, f, all)
return getCgroupMountsHelper(allMap, f)
}
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
// Returns all the cgroup subsystems supported by the kernel
func GetAllSubsystems() ([]string, error) {
f, err := os.Open("/proc/cgroups")
if err != nil {
@@ -189,6 +185,9 @@ func GetAllSubsystems() ([]string, error) {
s := bufio.NewScanner(f)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
text := s.Text()
if text[0] != '#' {
parts := strings.Fields(text)
@@ -197,13 +196,10 @@ func GetAllSubsystems() ([]string, error) {
}
}
}
if err := s.Err(); err != nil {
return nil, err
}
return subsystems, nil
}
// GetThisCgroupDir returns the relative path to the cgroup docker is running in.
// Returns the relative path to the cgroup docker is running in.
func GetThisCgroupDir(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
@@ -224,7 +220,7 @@ func GetInitCgroupDir(subsystem string) (string, error) {
}
func readProcsFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, CgroupProcesses))
f, err := os.Open(filepath.Join(dir, "cgroup.procs"))
if err != nil {
return nil, err
}
@@ -247,8 +243,6 @@ func readProcsFile(dir string) ([]int, error) {
return out, nil
}
// ParseCgroupFile parses the given cgroup file, typically from
// /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
func ParseCgroupFile(path string) (map[string]string, error) {
f, err := os.Open(path)
if err != nil {
@@ -256,35 +250,21 @@ func ParseCgroupFile(path string) (map[string]string, error) {
}
defer f.Close()
return parseCgroupFromReader(f)
}
// helper function for ParseCgroupFile to make testing easier
func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
s := bufio.NewScanner(r)
s := bufio.NewScanner(f)
cgroups := make(map[string]string)
for s.Scan() {
text := s.Text()
// from cgroups(7):
// /proc/[pid]/cgroup
// ...
// For each cgroup hierarchy ... there is one entry
// containing three colon-separated fields of the form:
// hierarchy-ID:subsystem-list:cgroup-path
parts := strings.SplitN(text, ":", 3)
if len(parts) < 3 {
return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
if err := s.Err(); err != nil {
return nil, err
}
text := s.Text()
parts := strings.Split(text, ":")
for _, subs := range strings.Split(parts[1], ",") {
cgroups[subs] = parts[2]
}
}
if err := s.Err(); err != nil {
return nil, err
}
return cgroups, nil
}
@@ -311,7 +291,8 @@ func PathExists(path string) bool {
func EnterPid(cgroupPaths map[string]string, pid int) error {
for _, path := range cgroupPaths {
if PathExists(path) {
if err := WriteCgroupProc(path, pid); err != nil {
if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"),
[]byte(strconv.Itoa(pid)), 0700); err != nil {
return err
}
}
@@ -380,7 +361,7 @@ func GetAllPids(path string) ([]int, error) {
// collect pids from all sub-cgroups
err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
dir, file := filepath.Split(p)
if file != CgroupProcesses {
if file != "cgroup.procs" {
return nil
}
if iErr != nil {
@@ -395,20 +376,3 @@ func GetAllPids(path string) ([]int, error) {
})
return pids, err
}
// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
func WriteCgroupProc(dir string, pid int) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s", CgroupProcesses)
}
// Dont attach any pid to the cgroup if -1 is specified as a pid
if pid != -1 {
if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
}
}
return nil
}

View File

@@ -22,7 +22,7 @@ type Cgroup struct {
// The path is assumed to be relative to the host system cgroup mountpoint.
Path string `json:"path"`
// ScopePrefix describes prefix for the scope name
// ScopePrefix decribes prefix for the scope name
ScopePrefix string `json:"scope_prefix"`
// Paths represent the absolute cgroups paths to join.
@@ -36,7 +36,7 @@ type Cgroup struct {
type Resources struct {
// If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
// Deprecated
AllowAllDevices *bool `json:"allow_all_devices,omitempty"`
AllowAllDevices bool `json:"allow_all_devices,omitempty"`
// Deprecated
AllowedDevices []*Device `json:"allowed_devices,omitempty"`
// Deprecated
@@ -69,10 +69,10 @@ type Resources struct {
CpuPeriod int64 `json:"cpu_period"`
// How many time CPU will use in realtime scheduling (in usecs).
CpuRtRuntime int64 `json:"cpu_rt_quota"`
CpuRtRuntime int64 `json:"cpu_quota"`
// CPU period to be used for realtime scheduling (in usecs).
CpuRtPeriod int64 `json:"cpu_rt_period"`
CpuRtPeriod int64 `json:"cpu_period"`
// CPU to use
CpusetCpus string `json:"cpuset_cpus"`
@@ -95,7 +95,7 @@ type Resources struct {
// IO read rate limit per cgroup per device, bytes per second.
BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"`
// IO write rate limit per cgroup per device, bytes per second.
// IO write rate limit per cgroup per divice, bytes per second.
BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"`
// IO read rate limit per cgroup per device, IO per second.
@@ -120,5 +120,5 @@ type Resources struct {
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
// Set class identifier for container's network packets
NetClsClassid uint32 `json:"net_cls_classid_u"`
NetClsClassid string `json:"net_cls_classid"`
}

View File

@@ -8,7 +8,6 @@ import (
"time"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runtime-spec/specs-go"
)
type Rlimit struct {
@@ -34,7 +33,7 @@ type Seccomp struct {
Syscalls []*Syscall `json:"syscalls"`
}
// Action is taken upon rule match in Seccomp
// An action to be taken upon rule match in Seccomp
type Action int
const (
@@ -45,7 +44,7 @@ const (
Trace
)
// Operator is a comparison operator to be used when matching syscall arguments in Seccomp
// A comparison operator to be used when matching syscall arguments in Seccomp
type Operator int
const (
@@ -58,7 +57,7 @@ const (
MaskEqualTo
)
// Arg is a rule to match a specific syscall argument in Seccomp
// A rule to match a specific syscall argument in Seccomp
type Arg struct {
Index uint `json:"index"`
Value uint64 `json:"value"`
@@ -66,7 +65,7 @@ type Arg struct {
Op Operator `json:"op"`
}
// Syscall is a rule to match a syscall in Seccomp
// An rule to match a syscall in Seccomp
type Syscall struct {
Name string `json:"name"`
Action Action `json:"action"`
@@ -86,6 +85,11 @@ type Config struct {
// that the parent process dies.
ParentDeathSignal int `json:"parent_death_signal"`
// PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set.
// When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable.
// This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot.
PivotDir string `json:"pivot_dir"`
// Path to a directory containing the container's root filesystem.
Rootfs string `json:"rootfs"`
@@ -144,6 +148,10 @@ type Config struct {
// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
OomScoreAdj int `json:"oom_score_adj"`
// AdditionalGroups specifies the gids that should be added to supplementary groups
// in addition to those that the user belongs to.
AdditionalGroups []string `json:"additional_groups"`
// UidMappings is an array of User ID mappings for User Namespaces
UidMappings []IDMap `json:"uid_mappings"`
@@ -179,10 +187,6 @@ type Config struct {
// Labels are user defined metadata that is stored in the config and populated on the state
Labels []string `json:"labels"`
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
// callers keyring in this case.
NoNewKeyring bool `json:"no_new_keyring"`
}
type Hooks struct {
@@ -244,14 +248,19 @@ func (hooks Hooks) MarshalJSON() ([]byte, error) {
}
// HookState is the payload provided to a hook on execution.
type HookState specs.State
type HookState struct {
Version string `json:"version"`
ID string `json:"id"`
Pid int `json:"pid"`
Root string `json:"root"`
}
type Hook interface {
// Run executes the hook with the provided state.
Run(HookState) error
}
// NewFunctionHook will call the provided function when the hook is run.
// NewFunctionHooks will call the provided function when the hook is run.
func NewFunctionHook(f func(HookState) error) FuncHook {
return FuncHook{
run: f,
@@ -274,7 +283,7 @@ type Command struct {
Timeout *time.Duration `json:"timeout"`
}
// NewCommandHook will execute the provided command when the hook is run.
// NewCommandHooks will execute the provided command when the hook is run.
func NewCommandHook(cmd Command) CommandHook {
return CommandHook{
Command: cmd,
@@ -290,38 +299,25 @@ func (c Command) Run(s HookState) error {
if err != nil {
return err
}
var stdout, stderr bytes.Buffer
cmd := exec.Cmd{
Path: c.Path,
Args: c.Args,
Env: c.Env,
Stdin: bytes.NewReader(b),
Stdout: &stdout,
Stderr: &stderr,
}
if err := cmd.Start(); err != nil {
return err
Path: c.Path,
Args: c.Args,
Env: c.Env,
Stdin: bytes.NewReader(b),
}
errC := make(chan error, 1)
go func() {
err := cmd.Wait()
if err != nil {
err = fmt.Errorf("error running hook: %v, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
}
errC <- err
errC <- cmd.Run()
}()
var timerCh <-chan time.Time
if c.Timeout != nil {
timer := time.NewTimer(*c.Timeout)
defer timer.Stop()
timerCh = timer.C
}
select {
case err := <-errC:
return err
case <-timerCh:
cmd.Process.Kill()
cmd.Wait()
return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
select {
case err := <-errC:
return err
case <-time.After(*c.Timeout):
cmd.Process.Kill()
cmd.Wait()
return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
}
}
return <-errC
}

View File

@@ -4,7 +4,7 @@ package configs
import "fmt"
// HostUID gets the root uid for the process on host which could be non-zero
// Gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostUID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
@@ -21,7 +21,7 @@ func (c Config) HostUID() (int, error) {
return 0, nil
}
// HostGID gets the root gid for the process on host which could be non-zero
// Gets the root gid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostGID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {

View File

@@ -3,7 +3,7 @@
package configs
var (
// DefaultSimpleDevices are devices that are to be both allowed and created.
// These are devices that are to be both allowed and created.
DefaultSimpleDevices = []*Device{
// /dev/null and zero
{
@@ -107,5 +107,19 @@ var (
Permissions: "rwm",
},
}, DefaultSimpleDevices...)
DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...)
DefaultAutoCreatedDevices = append([]*Device{
{
// /dev/fuse is created but not allowed.
// This is to allow java to work. Because java
// Insists on there being a /dev/fuse
// https://github.com/docker/docker/issues/514
// https://github.com/docker/docker/issues/2393
//
Path: "/dev/fuse",
Type: 'c',
Major: 10,
Minor: 229,
Permissions: "rwm",
},
}, DefaultSimpleDevices...)
)

View File

@@ -1,11 +1,5 @@
package configs
const (
// EXT_COPYUP is a directive to copy up the contents of a directory when
// a tmpfs is mounted over it.
EXT_COPYUP = 1 << iota
)
type Mount struct {
// Source path for the mount.
Source string `json:"source"`
@@ -28,9 +22,6 @@ type Mount struct {
// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
Relabel string `json:"relabel"`
// Extensions are additional flags that are specific to runc.
Extensions int `json:"extensions"`
// Optional Command to be run before Source is mounted.
PremountCmds []Command `json:"premount_cmds"`

View File

@@ -4,10 +4,12 @@ package configs
func (n *Namespace) Syscall() int {
panic("No namespace syscall support")
return 0
}
// CloneFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare. This function returns flags only for new namespaces.
func (n *Namespaces) CloneFlags() uintptr {
panic("No namespace syscall support")
return uintptr(0)
}

View File

@@ -22,8 +22,8 @@ var (
supportedNamespaces = make(map[NamespaceType]bool)
)
// NsName converts the namespace type to its filename
func NsName(ns NamespaceType) string {
// nsToFile converts the namespace type to its filename
func nsToFile(ns NamespaceType) string {
switch ns {
case NEWNET:
return "net"
@@ -50,7 +50,7 @@ func IsNamespaceSupported(ns NamespaceType) bool {
if ok {
return supported
}
nsFile := NsName(ns)
nsFile := nsToFile(ns)
// if the namespace type is unknown, just return false
if nsFile == "" {
return false
@@ -84,7 +84,7 @@ func (n *Namespace) GetPath(pid int) string {
if n.Path != "" {
return n.Path
}
return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type))
return fmt.Sprintf("/proc/%d/ns/%s", pid, nsToFile(n.Type))
}
func (n *Namespaces) Remove(t NamespaceType) bool {

View File

@@ -100,12 +100,17 @@ func Setctty() error {
return nil
}
// RunningInUserNS detects whether we are currently running in a user namespace.
// Copied from github.com/lxc/lxd/shared/util.go
/*
* Detect whether we are currently running in a user namespace.
* Copied from github.com/lxc/lxd/shared/util.go
*/
func RunningInUserNS() bool {
file, err := os.Open("/proc/self/uid_map")
if err != nil {
// This kernel-provided file only exists if user namespaces are supported
/*
* This kernel-provided file only exists if user namespaces are
* supported
*/
return false
}
defer file.Close()

View File

@@ -14,10 +14,8 @@ func GetProcessStartTime(pid int) (string, error) {
if err != nil {
return "", err
}
return parseStartTime(string(data))
}
func parseStartTime(stat string) (string, error) {
parts := strings.Split(string(data), " ")
// the starttime is located at pos 22
// from the man page
//
@@ -25,19 +23,5 @@ func parseStartTime(stat string) (string, error) {
// (22) The time the process started after system boot. In kernels before Linux 2.6, this
// value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks
// (divide by sysconf(_SC_CLK_TCK)).
//
// NOTE:
// pos 2 could contain space and is inside `(` and `)`:
// (2) comm %s
// The filename of the executable, in parentheses.
// This is visible whether or not the executable is
// swapped out.
//
// the following is an example:
// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
// get parts after last `)`:
s := strings.Split(stat, ")")
parts := strings.Split(strings.TrimSpace(s[len(s)-1]), " ")
return parts[22-3], nil // starts at 3 (after the filename pos `2`)
return parts[22-1], nil // starts at 1
}

View File

@@ -8,7 +8,7 @@ import (
// Setuid sets the uid of the calling thread to the specified uid.
func Setuid(uid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0)
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0)
if e1 != 0 {
err = e1
}

View File

@@ -199,16 +199,18 @@ type ExecUser struct {
// files cannot be opened for any reason, the error is ignored and a nil
// io.Reader is passed instead.
func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) {
var passwd, group io.Reader
if passwdFile, err := os.Open(passwdPath); err == nil {
passwd = passwdFile
defer passwdFile.Close()
passwd, err := os.Open(passwdPath)
if err != nil {
passwd = nil
} else {
defer passwd.Close()
}
if groupFile, err := os.Open(groupPath); err == nil {
group = groupFile
defer groupFile.Close()
group, err := os.Open(groupPath)
if err != nil {
group = nil
} else {
defer group.Close()
}
return GetExecUser(userSpec, defaults, passwd, group)
@@ -341,7 +343,7 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (
if len(groups) > 0 {
// First match wins, even if there's more than one matching entry.
user.Gid = groups[0].Gid
} else {
} else if groupArg != "" {
// If we can't find a group with the given name, the only other valid
// option is if it's a numeric group name with no associated entry in group.
@@ -431,11 +433,9 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err
// that opens the groupPath given and gives it as an argument to
// GetAdditionalGroups.
func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) {
var group io.Reader
if groupFile, err := os.Open(groupPath); err == nil {
group = groupFile
defer groupFile.Close()
group, err := os.Open(groupPath)
if err == nil {
defer group.Close()
}
return GetAdditionalGroups(additionalGroups, group)
}

View File

@@ -1,148 +0,0 @@
/*
* Copyright 2016 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>
#include "cmsg.h"
#define error(fmt, ...) \
({ \
fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \
errno = ECOMM; \
goto err; /* return value */ \
})
/*
* Sends a file descriptor along the sockfd provided. Returns the return
* value of sendmsg(2). Any synchronisation and preparation of state
* should be done external to this (we expect the other side to be in
* recvfd() in the code).
*/
ssize_t sendfd(int sockfd, struct file_t file)
{
struct msghdr msg = {0};
struct iovec iov[1] = {0};
struct cmsghdr *cmsg;
int *fdptr;
int ret;
union {
char buf[CMSG_SPACE(sizeof(file.fd))];
struct cmsghdr align;
} u;
/*
* We need to send some other data along with the ancillary data,
* otherwise the other side won't recieve any data. This is very
* well-hidden in the documentation (and only applies to
* SOCK_STREAM). See the bottom part of unix(7).
*/
iov[0].iov_base = file.name;
iov[0].iov_len = strlen(file.name) + 1;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_iov = iov;
msg.msg_iovlen = 1;
msg.msg_control = u.buf;
msg.msg_controllen = sizeof(u.buf);
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
fdptr = (int *) CMSG_DATA(cmsg);
memcpy(fdptr, &file.fd, sizeof(int));
return sendmsg(sockfd, &msg, 0);
}
/*
* Receives a file descriptor from the sockfd provided. Returns the file
* descriptor as sent from sendfd(). It will return the file descriptor
* or die (literally) trying. Any synchronisation and preparation of
* state should be done external to this (we expect the other side to be
* in sendfd() in the code).
*/
struct file_t recvfd(int sockfd)
{
struct msghdr msg = {0};
struct iovec iov[1] = {0};
struct cmsghdr *cmsg;
struct file_t file = {0};
int *fdptr;
int olderrno;
union {
char buf[CMSG_SPACE(sizeof(file.fd))];
struct cmsghdr align;
} u;
/* Allocate a buffer. */
/* TODO: Make this dynamic with MSG_PEEK. */
file.name = malloc(TAG_BUFFER);
if (!file.name)
error("recvfd: failed to allocate file.tag buffer\n");
/*
* We need to "recieve" the non-ancillary data even though we don't
* plan to use it at all. Otherwise, things won't work as expected.
* See unix(7) and other well-hidden documentation.
*/
iov[0].iov_base = file.name;
iov[0].iov_len = TAG_BUFFER;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_iov = iov;
msg.msg_iovlen = 1;
msg.msg_control = u.buf;
msg.msg_controllen = sizeof(u.buf);
ssize_t ret = recvmsg(sockfd, &msg, 0);
if (ret < 0)
goto err;
cmsg = CMSG_FIRSTHDR(&msg);
if (!cmsg)
error("recvfd: got NULL from CMSG_FIRSTHDR");
if (cmsg->cmsg_level != SOL_SOCKET)
error("recvfd: expected SOL_SOCKET in cmsg: %d", cmsg->cmsg_level);
if (cmsg->cmsg_type != SCM_RIGHTS)
error("recvfd: expected SCM_RIGHTS in cmsg: %d", cmsg->cmsg_type);
if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
error("recvfd: expected correct CMSG_LEN in cmsg: %lu", (unsigned long)cmsg->cmsg_len);
fdptr = (int *) CMSG_DATA(cmsg);
if (!fdptr || *fdptr < 0)
error("recvfd: recieved invalid pointer");
file.fd = *fdptr;
return file;
err:
olderrno = errno;
free(file.name);
errno = olderrno;
return (struct file_t){0};
}

View File

@@ -1,57 +0,0 @@
// +build linux
package utils
/*
* Copyright 2016 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
#include <errno.h>
#include <stdlib.h>
#include "cmsg.h"
*/
import "C"
import (
"os"
"unsafe"
)
// RecvFd waits for a file descriptor to be sent over the given AF_UNIX
// socket. The file name of the remote file descriptor will be recreated
// locally (it is sent as non-auxiliary data in the same payload).
func RecvFd(socket *os.File) (*os.File, error) {
file, err := C.recvfd(C.int(socket.Fd()))
if err != nil {
return nil, err
}
defer C.free(unsafe.Pointer(file.name))
return os.NewFile(uintptr(file.fd), C.GoString(file.name)), nil
}
// SendFd sends a file descriptor over the given AF_UNIX socket. In
// addition, the file.Name() of the given file will also be sent as
// non-auxiliary data in the same payload (allowing to send contextual
// information for a file descriptor).
func SendFd(socket, file *os.File) error {
var cfile C.struct_file_t
cfile.fd = C.int(file.Fd())
cfile.name = C.CString(file.Name())
defer C.free(unsafe.Pointer(cfile.name))
_, err := C.sendfd(C.int(socket.Fd()), cfile)
return err
}

View File

@@ -1,36 +0,0 @@
/*
* Copyright 2016 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#if !defined(CMSG_H)
#define CMSG_H
#include <sys/types.h>
/* TODO: Implement this properly with MSG_PEEK. */
#define TAG_BUFFER 4096
/* This mirrors Go's (*os.File). */
struct file_t {
char *name;
int fd;
};
struct file_t recvfd(int sockfd);
ssize_t sendfd(int sockfd, struct file_t file);
#endif /* !defined(CMSG_H) */

View File

@@ -7,9 +7,7 @@ import (
"io"
"os"
"path/filepath"
"strings"
"syscall"
"unsafe"
)
const (
@@ -86,41 +84,3 @@ func CleanPath(path string) string {
// Clean the path again for good measure.
return filepath.Clean(path)
}
// SearchLabels searches a list of key-value pairs for the provided key and
// returns the corresponding value. The pairs must be separated with '='.
func SearchLabels(labels []string, query string) string {
for _, l := range labels {
parts := strings.SplitN(l, "=", 2)
if len(parts) < 2 {
continue
}
if parts[0] == query {
return parts[1]
}
}
return ""
}
// Annotations returns the bundle path and user defined annotations from the
// libcontainer state. We need to remove the bundle because that is a label
// added by libcontainer.
func Annotations(labels []string) (bundle string, userAnnotations map[string]string) {
userAnnotations = make(map[string]string)
for _, l := range labels {
parts := strings.SplitN(l, "=", 2)
if len(parts) < 2 {
continue
}
if parts[0] == "bundle" {
bundle = parts[1]
} else {
userAnnotations[parts[0]] = parts[1]
}
}
return
}
func GetIntSize() int {
return int(unsafe.Sizeof(1))
}

36
vendor/vendor.json vendored
View File

@@ -969,45 +969,45 @@
"revisionTime": "2017-01-11T18:16:59Z"
},
{
"checksumSHA1": "IBNE4Zy7Jaz2vjAVWv1QsaNV4xI=",
"checksumSHA1": "UPfYFznGeLEloCkGG8KKgsKjtPU=",
"comment": "v0.0.9-108-g89ab7f2",
"path": "github.com/opencontainers/runc/libcontainer/cgroups",
"revision": "c50d024500f3b7ffaf902d3a7b3001edd50285ae",
"revisionTime": "2017-02-27T19:22:58Z"
"revision": "89ab7f2ccc1e45ddf6485eaa802c35dcf321dfc8",
"revisionTime": "2016-03-31T09:02:02Z"
},
{
"checksumSHA1": "F5X1+hqy63SbX+Ftd/XAmmlc2Pw=",
"checksumSHA1": "xIrNApwB9+yQBhu2p6bt7blV+x4=",
"comment": "v0.0.9-108-g89ab7f2",
"path": "github.com/opencontainers/runc/libcontainer/cgroups/fs",
"revision": "c50d024500f3b7ffaf902d3a7b3001edd50285ae",
"revisionTime": "2017-02-27T19:22:58Z"
"revision": "89ab7f2ccc1e45ddf6485eaa802c35dcf321dfc8",
"revisionTime": "2016-03-31T09:02:02Z"
},
{
"checksumSHA1": "mM7s5vNZbM5T4Lk4TKIG9JqWx0I=",
"checksumSHA1": "CdhRZZmDeY+t3PN4Qpi1ojy50pE=",
"comment": "v0.0.9-108-g89ab7f2",
"path": "github.com/opencontainers/runc/libcontainer/configs",
"revision": "c50d024500f3b7ffaf902d3a7b3001edd50285ae",
"revisionTime": "2017-02-27T19:22:58Z"
"revision": "89ab7f2ccc1e45ddf6485eaa802c35dcf321dfc8",
"revisionTime": "2016-03-31T09:02:02Z"
},
{
"checksumSHA1": "Ssz9STAL5P2+1poiM63klFjhqhE=",
"checksumSHA1": "xo2LfoL27cGF6cChWvew2KfhpNI=",
"comment": "v0.0.9-108-g89ab7f2",
"path": "github.com/opencontainers/runc/libcontainer/system",
"revision": "c50d024500f3b7ffaf902d3a7b3001edd50285ae",
"revisionTime": "2017-02-27T19:22:58Z"
"revision": "89ab7f2ccc1e45ddf6485eaa802c35dcf321dfc8",
"revisionTime": "2016-03-31T09:02:02Z"
},
{
"checksumSHA1": "MA07KaAp1aPVheuopyrFr7pxANs=",
"checksumSHA1": "3AoPMXlmVq2+iWMpsdJZkcUKHB8=",
"path": "github.com/opencontainers/runc/libcontainer/user",
"revision": "c50d024500f3b7ffaf902d3a7b3001edd50285ae",
"revisionTime": "2017-02-27T19:22:58Z"
"revision": "89ab7f2ccc1e45ddf6485eaa802c35dcf321dfc8",
"revisionTime": "2016-03-31T09:02:02Z"
},
{
"checksumSHA1": "tyU2Ly7L5tqKa1702ij4BMYyhdU=",
"checksumSHA1": "PCfoPliJrUhzLP4arNs2KjEb2Bc=",
"comment": "v0.0.9-108-g89ab7f2",
"path": "github.com/opencontainers/runc/libcontainer/utils",
"revision": "c50d024500f3b7ffaf902d3a7b3001edd50285ae",
"revisionTime": "2017-02-27T19:22:58Z"
"revision": "89ab7f2ccc1e45ddf6485eaa802c35dcf321dfc8",
"revisionTime": "2016-03-31T09:02:02Z"
},
{
"checksumSHA1": "fB/0tEPu/K5wbajyToDV8h1okUo=",