mirror of
https://github.com/kemko/nomad.git
synced 2026-01-05 01:45:44 +03:00
docker: back out cgroup v2 OOM detection (#12735)
When shutting down an allocation that ends up needing to be force-killed, we're getting a spurious "OOM Killed (137)" message on the task termination event. We introduced this as part of cgroups v2 support because the Docker daemon isn't detecting the container status correctly. Although exit code 137 is the exit code we get for OOM-killed processes, that's because OOM kill is a `SIGKILL`. So any sigkilled process will get that exit code.
This commit is contained in:
@@ -242,14 +242,11 @@ func (h *taskHandle) run() {
|
||||
if ierr != nil {
|
||||
h.logger.Error("failed to inspect container", "error", ierr)
|
||||
} else if container.State.OOMKilled {
|
||||
// Note that with cgroups.v2 the cgroup OOM killer is not
|
||||
// observed by docker container status. But we can't test the
|
||||
// exit code, as 137 is used for any SIGKILL
|
||||
oom = true
|
||||
werr = fmt.Errorf("OOM Killed")
|
||||
} else if container.State.ExitCode == 137 {
|
||||
// With cgroups.v2 it seems the cgroup OOM killer is not observed by docker
|
||||
// container status. So just fudge the connection for now.
|
||||
// [Mon Mar 21 19:48:21 2022] Memory cgroup out of memory: Killed process 92768 (sh) [...]
|
||||
oom = true
|
||||
werr = fmt.Errorf("OOM Killed (137)")
|
||||
}
|
||||
|
||||
// Shutdown stats collection
|
||||
|
||||
Reference in New Issue
Block a user