diff -Nru runc-1.1.7/debian/changelog runc-1.1.7/debian/changelog --- runc-1.1.7/debian/changelog 2024-01-18 07:06:53.000000000 +0000 +++ runc-1.1.7/debian/changelog 2024-01-24 11:11:08.000000000 +0000 @@ -1,3 +1,21 @@ +runc (1.1.7-0ubuntu2.2) mantic-security; urgency=medium + + * SECURITY UPDATE: container escape vulnerability + - d/p/0001-Fix-File-to-Close.patch: Fix File to Close + - d/p/0002-init-verify-after-chdir-that-cwd-is-inside-the-conta.patch: + init: verify after chdir that cwd is inside the container + - d/p/0003-setns-init-do-explicit-lookup-of-execve-argument-ear.patch: + setns init: do explicit lookup of execve argument early + - d/p/0004-init-close-internal-fds-before-execve.patch: init: close + internal fds before execve + - d/p/0005-cgroup-plug-leaks-of-sys-fs-cgroup-handle.patch: cgroup: + plug leaks of /sys/fs/cgroup handle + - d/p/0006-libcontainer-mark-all-non-stdio-fds-O_CLOEXEC-before.patch: + ibcontainer: mark all non-stdio fds O_CLOEXEC before spawning init + - CVE-2024-21626 + + -- Nishit Majithia Wed, 24 Jan 2024 16:41:08 +0530 + runc (1.1.7-0ubuntu2.1) mantic-security; urgency=medium * No change rebuild due to golang-1.20, golang-1.21 updates diff -Nru runc-1.1.7/debian/patches/0001-Fix-File-to-Close.patch runc-1.1.7/debian/patches/0001-Fix-File-to-Close.patch --- runc-1.1.7/debian/patches/0001-Fix-File-to-Close.patch 1970-01-01 00:00:00.000000000 +0000 +++ runc-1.1.7/debian/patches/0001-Fix-File-to-Close.patch 2024-01-19 03:53:29.000000000 +0000 @@ -0,0 +1,35 @@ +From 7362cd5afe9d40131fb62cb075092025c7c71064 Mon Sep 17 00:00:00 2001 +From: "hang.jiang" +Date: Fri, 1 Sep 2023 16:17:13 +0800 +Subject: [runc v1.1.z PATCH 1/6] Fix File to Close + +(This is a cherry-pick of 937ca107c3d22da77eb8e8030f2342253b980980.) + +Signed-off-by: hang.jiang +Fixes: GHSA-xr7r-f8xq-vfvv CVE-2024-21626 +Signed-off-by: Aleksa Sarai +--- + libcontainer/cgroups/fs/paths.go | 1 + + update.go | 1 + + 2 files changed, 2 insertions(+) + +--- runc-1.1.7.orig/libcontainer/cgroups/fs/paths.go ++++ runc-1.1.7/libcontainer/cgroups/fs/paths.go +@@ -83,6 +83,7 @@ func tryDefaultCgroupRoot() string { + if err != nil { + return "" + } ++ defer dir.Close() + names, err := dir.Readdirnames(1) + if err != nil { + return "" +--- runc-1.1.7.orig/update.go ++++ runc-1.1.7/update.go +@@ -174,6 +174,7 @@ other options are ignored. + if err != nil { + return err + } ++ defer f.Close() + } + err = json.NewDecoder(f).Decode(&r) + if err != nil { diff -Nru runc-1.1.7/debian/patches/0002-init-verify-after-chdir-that-cwd-is-inside-the-conta.patch runc-1.1.7/debian/patches/0002-init-verify-after-chdir-that-cwd-is-inside-the-conta.patch --- runc-1.1.7/debian/patches/0002-init-verify-after-chdir-that-cwd-is-inside-the-conta.patch 1970-01-01 00:00:00.000000000 +0000 +++ runc-1.1.7/debian/patches/0002-init-verify-after-chdir-that-cwd-is-inside-the-conta.patch 2024-01-19 03:53:39.000000000 +0000 @@ -0,0 +1,172 @@ +From 2ed79a6c91e56dcd2d1da47f8ffd663066153746 Mon Sep 17 00:00:00 2001 +From: Aleksa Sarai +Date: Tue, 26 Dec 2023 23:53:07 +1100 +Subject: [runc v1.1.z PATCH 2/6] init: verify after chdir that cwd is inside + the container + +If a file descriptor of a directory in the host's mount namespace is +leaked to runc init, a malicious config.json could use /proc/self/fd/... +as a working directory to allow for host filesystem access after the +container runs. This can also be exploited by a container process if it +knows that an administrator will use "runc exec --cwd" and the target +--cwd (the attacker can change that cwd to be a symlink pointing to +/proc/self/fd/... and wait for the process to exec and then snoop on +/proc/$pid/cwd to get access to the host). The former issue can lead to +a critical vulnerability in Docker and Kubernetes, while the latter is a +container breakout. + +We can (ab)use the fact that getcwd(2) on Linux detects this exact case, +and getcwd(3) and Go's Getwd() return an error as a result. Thus, if we +just do os.Getwd() after chdir we can easily detect this case and error +out. + +In runc 1.1, a /sys/fs/cgroup handle happens to be leaked to "runc +init", making this exploitable. On runc main it just so happens that the +leaked /sys/fs/cgroup gets clobbered and thus this is only consistently +exploitable for runc 1.1. + +Fixes: GHSA-xr7r-f8xq-vfvv CVE-2024-21626 +Co-developed-by: lifubang +Signed-off-by: lifubang +[refactored the implementation and added more comments] +Signed-off-by: Aleksa Sarai +--- + libcontainer/init_linux.go | 31 ++++++++++++++++++++++++ + libcontainer/integration/seccomp_test.go | 20 +++++++-------- + 2 files changed, 41 insertions(+), 10 deletions(-) + +--- runc-1.1.7.orig/libcontainer/init_linux.go ++++ runc-1.1.7/libcontainer/init_linux.go +@@ -8,6 +8,7 @@ import ( + "io" + "net" + "os" ++ "path/filepath" + "strings" + "unsafe" + +@@ -135,6 +136,32 @@ func populateProcessEnvironment(env []st + return nil + } + ++// verifyCwd ensures that the current directory is actually inside the mount ++// namespace root of the current process. ++func verifyCwd() error { ++ // getcwd(2) on Linux detects if cwd is outside of the rootfs of the ++ // current mount namespace root, and in that case prefixes "(unreachable)" ++ // to the returned string. glibc's getcwd(3) and Go's Getwd() both detect ++ // when this happens and return ENOENT rather than returning a non-absolute ++ // path. In both cases we can therefore easily detect if we have an invalid ++ // cwd by checking the return value of getcwd(3). See getcwd(3) for more ++ // details, and CVE-2024-21626 for the security issue that motivated this ++ // check. ++ // ++ // We have to use unix.Getwd() here because os.Getwd() has a workaround for ++ // $PWD which involves doing stat(.), which can fail if the current ++ // directory is inaccessible to the container process. ++ if wd, err := unix.Getwd(); err == unix.ENOENT { ++ return errors.New("current working directory is outside of container mount namespace root -- possible container breakout detected") ++ } else if err != nil { ++ return fmt.Errorf("failed to verify if current working directory is safe: %w", err) ++ } else if !filepath.IsAbs(wd) { ++ // We shouldn't ever hit this, but check just in case. ++ return fmt.Errorf("current working directory is not absolute -- possible container breakout detected: cwd is %q", wd) ++ } ++ return nil ++} ++ + // finalizeNamespace drops the caps, sets the correct user + // and working dir, and closes any leaked file descriptors + // before executing the command inside the namespace +@@ -193,6 +220,10 @@ func finalizeNamespace(config *initConfi + return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err) + } + } ++ // Make sure our final working directory is inside the container. ++ if err := verifyCwd(); err != nil { ++ return err ++ } + if err := system.ClearKeepCaps(); err != nil { + return fmt.Errorf("unable to clear keep caps: %w", err) + } +--- runc-1.1.7.orig/libcontainer/integration/seccomp_test.go ++++ runc-1.1.7/libcontainer/integration/seccomp_test.go +@@ -13,7 +13,7 @@ import ( + libseccomp "github.com/seccomp/libseccomp-golang" + ) + +-func TestSeccompDenyGetcwdWithErrno(t *testing.T) { ++func TestSeccompDenySyslogWithErrno(t *testing.T) { + if testing.Short() { + return + } +@@ -25,7 +25,7 @@ func TestSeccompDenyGetcwdWithErrno(t *t + DefaultAction: configs.Allow, + Syscalls: []*configs.Syscall{ + { +- Name: "getcwd", ++ Name: "syslog", + Action: configs.Errno, + ErrnoRet: &errnoRet, + }, +@@ -39,7 +39,7 @@ func TestSeccompDenyGetcwdWithErrno(t *t + buffers := newStdBuffers() + pwd := &libcontainer.Process{ + Cwd: "/", +- Args: []string{"pwd"}, ++ Args: []string{"dmesg"}, + Env: standardEnvironment, + Stdin: buffers.Stdin, + Stdout: buffers.Stdout, +@@ -65,17 +65,17 @@ func TestSeccompDenyGetcwdWithErrno(t *t + } + + if exitCode == 0 { +- t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode) ++ t.Fatalf("dmesg should fail with negative exit code, instead got %d!", exitCode) + } + +- expected := "pwd: getcwd: No such process" ++ expected := "dmesg: klogctl: No such process" + actual := strings.Trim(buffers.Stderr.String(), "\n") + if actual != expected { + t.Fatalf("Expected output %s but got %s\n", expected, actual) + } + } + +-func TestSeccompDenyGetcwd(t *testing.T) { ++func TestSeccompDenySyslog(t *testing.T) { + if testing.Short() { + return + } +@@ -85,7 +85,7 @@ func TestSeccompDenyGetcwd(t *testing.T) + DefaultAction: configs.Allow, + Syscalls: []*configs.Syscall{ + { +- Name: "getcwd", ++ Name: "syslog", + Action: configs.Errno, + }, + }, +@@ -98,7 +98,7 @@ func TestSeccompDenyGetcwd(t *testing.T) + buffers := newStdBuffers() + pwd := &libcontainer.Process{ + Cwd: "/", +- Args: []string{"pwd"}, ++ Args: []string{"dmesg"}, + Env: standardEnvironment, + Stdin: buffers.Stdin, + Stdout: buffers.Stdout, +@@ -124,10 +124,10 @@ func TestSeccompDenyGetcwd(t *testing.T) + } + + if exitCode == 0 { +- t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode) ++ t.Fatalf("dmesg should fail with negative exit code, instead got %d!", exitCode) + } + +- expected := "pwd: getcwd: Operation not permitted" ++ expected := "dmesg: klogctl: Operation not permitted" + actual := strings.Trim(buffers.Stderr.String(), "\n") + if actual != expected { + t.Fatalf("Expected output %s but got %s\n", expected, actual) diff -Nru runc-1.1.7/debian/patches/0003-setns-init-do-explicit-lookup-of-execve-argument-ear.patch runc-1.1.7/debian/patches/0003-setns-init-do-explicit-lookup-of-execve-argument-ear.patch --- runc-1.1.7/debian/patches/0003-setns-init-do-explicit-lookup-of-execve-argument-ear.patch 1970-01-01 00:00:00.000000000 +0000 +++ runc-1.1.7/debian/patches/0003-setns-init-do-explicit-lookup-of-execve-argument-ear.patch 2024-01-19 03:53:47.000000000 +0000 @@ -0,0 +1,65 @@ +From b2b5754eb34174e032f1048beb1b27db83e77c5a Mon Sep 17 00:00:00 2001 +From: Aleksa Sarai +Date: Fri, 5 Jan 2024 01:42:32 +1100 +Subject: [runc v1.1.z PATCH 3/6] setns init: do explicit lookup of execve + argument early + +(This is a partial backport of a minor change included in commit +dac41717465462b21fab5b5942fe4cb3f47d7e53.) + +This mirrors the logic in standard_init_linux.go, and also ensures that +we do not call exec.LookPath in the final execve step. + +While this is okay for regular binaries, it seems exec.LookPath calls +os.Getenv which tries to emit a log entry to the test harness when +running in "go test" mode. In a future patch (in order to fix +CVE-2024-21626), we will close all of the file descriptors immediately +before execve, which would mean the file descriptor for test harness +logging would be closed at execve time. So, moving exec.LookPath earlier +is necessary. + +Ref: dac417174654 ("runc-dmz: reduce memfd binary cloning cost with small C binary") +Signed-off-by: Aleksa Sarai +--- + libcontainer/setns_init_linux.go | 18 +++++++++++++++++- + 1 file changed, 17 insertions(+), 1 deletion(-) + +--- runc-1.1.7.orig/libcontainer/setns_init_linux.go ++++ runc-1.1.7/libcontainer/setns_init_linux.go +@@ -4,6 +4,7 @@ import ( + "errors" + "fmt" + "os" ++ "os/exec" + "strconv" + + "github.com/opencontainers/selinux/go-selinux" +@@ -82,6 +83,21 @@ func (l *linuxSetnsInit) Init() error { + if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { + return err + } ++ ++ // Check for the arg before waiting to make sure it exists and it is ++ // returned as a create time error. ++ name, err := exec.LookPath(l.config.Args[0]) ++ if err != nil { ++ return err ++ } ++ // exec.LookPath in Go < 1.20 might return no error for an executable ++ // residing on a file system mounted with noexec flag, so perform this ++ // extra check now while we can still return a proper error. ++ // TODO: remove this once go < 1.20 is not supported. ++ if err := eaccess(name); err != nil { ++ return &os.PathError{Op: "eaccess", Path: name, Err: err} ++ } ++ + // Set seccomp as close to execve as possible, so as few syscalls take + // place afterward (reducing the amount of syscalls that users need to + // enable in their seccomp profiles). +@@ -101,5 +117,5 @@ func (l *linuxSetnsInit) Init() error { + return &os.PathError{Op: "close log pipe", Path: "fd " + strconv.Itoa(l.logFd), Err: err} + } + +- return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) ++ return system.Exec(name, l.config.Args[0:], os.Environ()) + } diff -Nru runc-1.1.7/debian/patches/0004-init-close-internal-fds-before-execve.patch runc-1.1.7/debian/patches/0004-init-close-internal-fds-before-execve.patch --- runc-1.1.7/debian/patches/0004-init-close-internal-fds-before-execve.patch 1970-01-01 00:00:00.000000000 +0000 +++ runc-1.1.7/debian/patches/0004-init-close-internal-fds-before-execve.patch 2024-01-19 03:53:54.000000000 +0000 @@ -0,0 +1,235 @@ +From 996a33c0e468435c70a34a25be1cb023d7554563 Mon Sep 17 00:00:00 2001 +From: Aleksa Sarai +Date: Tue, 2 Jan 2024 14:58:28 +1100 +Subject: [runc v1.1.z PATCH 4/6] init: close internal fds before execve + +If we leak a file descriptor referencing the host filesystem, an +attacker could use a /proc/self/fd magic-link as the source for execve +to execute a host binary in the container. This would allow the binary +itself (or a process inside the container in the 'runc exec' case) to +write to a host binary, leading to a container escape. + +The simple solution is to make sure we close all file descriptors +immediately before the execve(2) step. Doing this earlier can lead to very +serious issues in Go (as file descriptors can be reused, any (*os.File) +reference could start silently operating on a different file) so we have +to do it as late as possible. + +Unfortunately, there are some Go runtime file descriptors that we must +not close (otherwise the Go scheduler panics randomly). The only way of +being sure which file descriptors cannot be closed is to sneakily +go:linkname the runtime internal "internal/poll.IsPollDescriptor" +function. This is almost certainly not recommended but there isn't any +other way to be absolutely sure, while also closing any other possible +files. + +In addition, we can keep the logrus forwarding logfd open because you +cannot execve a pipe and the contents of the pipe are so restricted +(JSON-encoded in a format we pick) that it seems unlikely you could even +construct shellcode. Closing the logfd causes issues if there is an +error returned from execve. + +In mainline runc, runc-dmz protects us against this attack because the +intermediate execve(2) closes all of the O_CLOEXEC internal runc file +descriptors and thus runc-dmz cannot access them to attack the host. + +Fixes: GHSA-xr7r-f8xq-vfvv CVE-2024-21626 +Signed-off-by: Aleksa Sarai +--- + libcontainer/logs/logs.go | 9 ++++ + libcontainer/setns_init_linux.go | 19 ++++++++ + libcontainer/standard_init_linux.go | 19 ++++++++ + libcontainer/utils/utils_unix.go | 72 +++++++++++++++++++++++++---- + 4 files changed, 111 insertions(+), 8 deletions(-) + +--- runc-1.1.7.orig/libcontainer/logs/logs.go ++++ runc-1.1.7/libcontainer/logs/logs.go +@@ -4,10 +4,19 @@ import ( + "bufio" + "encoding/json" + "io" ++ "os" + + "github.com/sirupsen/logrus" + ) + ++// IsLogrusFd returns whether the provided fd matches the one that logrus is ++// currently outputting to. This should only ever be called by UnsafeCloseFrom ++// from `runc init`. ++func IsLogrusFd(fd uintptr) bool { ++ file, ok := logrus.StandardLogger().Out.(*os.File) ++ return ok && file.Fd() == fd ++} ++ + func ForwardLogs(logPipe io.ReadCloser) chan error { + done := make(chan error, 1) + s := bufio.NewScanner(logPipe) +--- runc-1.1.7.orig/libcontainer/setns_init_linux.go ++++ runc-1.1.7/libcontainer/setns_init_linux.go +@@ -15,6 +15,7 @@ import ( + "github.com/opencontainers/runc/libcontainer/keys" + "github.com/opencontainers/runc/libcontainer/seccomp" + "github.com/opencontainers/runc/libcontainer/system" ++ "github.com/opencontainers/runc/libcontainer/utils" + ) + + // linuxSetnsInit performs the container's initialization for running a new process +@@ -117,5 +118,23 @@ func (l *linuxSetnsInit) Init() error { + return &os.PathError{Op: "close log pipe", Path: "fd " + strconv.Itoa(l.logFd), Err: err} + } + ++ // Close all file descriptors we are not passing to the container. This is ++ // necessary because the execve target could use internal runc fds as the ++ // execve path, potentially giving access to binary files from the host ++ // (which can then be opened by container processes, leading to container ++ // escapes). Note that because this operation will close any open file ++ // descriptors that are referenced by (*os.File) handles from underneath ++ // the Go runtime, we must not do any file operations after this point ++ // (otherwise the (*os.File) finaliser could close the wrong file). See ++ // CVE-2024-21626 for more information as to why this protection is ++ // necessary. ++ // ++ // This is not needed for runc-dmz, because the extra execve(2) step means ++ // that all O_CLOEXEC file descriptors have already been closed and thus ++ // the second execve(2) from runc-dmz cannot access internal file ++ // descriptors from runc. ++ if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil { ++ return err ++ } + return system.Exec(name, l.config.Args[0:], os.Environ()) + } +--- runc-1.1.7.orig/libcontainer/standard_init_linux.go ++++ runc-1.1.7/libcontainer/standard_init_linux.go +@@ -17,6 +17,7 @@ import ( + "github.com/opencontainers/runc/libcontainer/keys" + "github.com/opencontainers/runc/libcontainer/seccomp" + "github.com/opencontainers/runc/libcontainer/system" ++ "github.com/opencontainers/runc/libcontainer/utils" + ) + + type linuxStandardInit struct { +@@ -258,5 +259,23 @@ func (l *linuxStandardInit) Init() error + return err + } + ++ // Close all file descriptors we are not passing to the container. This is ++ // necessary because the execve target could use internal runc fds as the ++ // execve path, potentially giving access to binary files from the host ++ // (which can then be opened by container processes, leading to container ++ // escapes). Note that because this operation will close any open file ++ // descriptors that are referenced by (*os.File) handles from underneath ++ // the Go runtime, we must not do any file operations after this point ++ // (otherwise the (*os.File) finaliser could close the wrong file). See ++ // CVE-2024-21626 for more information as to why this protection is ++ // necessary. ++ // ++ // This is not needed for runc-dmz, because the extra execve(2) step means ++ // that all O_CLOEXEC file descriptors have already been closed and thus ++ // the second execve(2) from runc-dmz cannot access internal file ++ // descriptors from runc. ++ if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil { ++ return err ++ } + return system.Exec(name, l.config.Args[0:], os.Environ()) + } +--- runc-1.1.7.orig/libcontainer/utils/utils_unix.go ++++ runc-1.1.7/libcontainer/utils/utils_unix.go +@@ -7,8 +7,11 @@ import ( + "fmt" + "os" + "strconv" ++ _ "unsafe" // for go:linkname + + "golang.org/x/sys/unix" ++ ++ "github.com/opencontainers/runc/libcontainer/logs" + ) + + // EnsureProcHandle returns whether or not the given file handle is on procfs. +@@ -23,9 +26,11 @@ func EnsureProcHandle(fh *os.File) error + return nil + } + +-// CloseExecFrom applies O_CLOEXEC to all file descriptors currently open for +-// the process (except for those below the given fd value). +-func CloseExecFrom(minFd int) error { ++type fdFunc func(fd int) ++ ++// fdRangeFrom calls the passed fdFunc for each file descriptor that is open in ++// the current process. ++func fdRangeFrom(minFd int, fn fdFunc) error { + fdDir, err := os.Open("/proc/self/fd") + if err != nil { + return err +@@ -50,15 +55,66 @@ func CloseExecFrom(minFd int) error { + if fd < minFd { + continue + } +- // Intentionally ignore errors from unix.CloseOnExec -- the cases where +- // this might fail are basically file descriptors that have already +- // been closed (including and especially the one that was created when +- // os.ReadDir did the "opendir" syscall). +- unix.CloseOnExec(fd) ++ // Ignore the file descriptor we used for readdir, as it will be closed ++ // when we return. ++ if uintptr(fd) == fdDir.Fd() { ++ continue ++ } ++ // Run the closure. ++ fn(fd) + } + return nil + } + ++// CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or ++// equal to minFd in the current process. ++func CloseExecFrom(minFd int) error { ++ return fdRangeFrom(minFd, unix.CloseOnExec) ++} ++ ++//go:linkname runtime_IsPollDescriptor internal/poll.IsPollDescriptor ++ ++// In order to make sure we do not close the internal epoll descriptors the Go ++// runtime uses, we need to ensure that we skip descriptors that match ++// "internal/poll".IsPollDescriptor. Yes, this is a Go runtime internal thing, ++// unfortunately there's no other way to be sure we're only keeping the file ++// descriptors the Go runtime needs. Hopefully nothing blows up doing this... ++func runtime_IsPollDescriptor(fd uintptr) bool ++ ++// UnsafeCloseFrom closes all file descriptors greater or equal to minFd in the ++// current process, except for those critical to Go's runtime (such as the ++// netpoll management descriptors). ++// ++// NOTE: That this function is incredibly dangerous to use in most Go code, as ++// closing file descriptors from underneath *os.File handles can lead to very ++// bad behaviour (the closed file descriptor can be re-used and then any ++// *os.File operations would apply to the wrong file). This function is only ++// intended to be called from the last stage of runc init. ++func UnsafeCloseFrom(minFd int) error { ++ // We must not close some file descriptors. ++ return fdRangeFrom(minFd, func(fd int) { ++ if runtime_IsPollDescriptor(uintptr(fd)) { ++ // These are the Go runtimes internal netpoll file descriptors. ++ // These file descriptors are operated on deep in the Go scheduler, ++ // and closing those files from underneath Go can result in panics. ++ // There is no issue with keeping them because they are not ++ // executable and are not useful to an attacker anyway. Also we ++ // don't have any choice. ++ return ++ } ++ if logs.IsLogrusFd(uintptr(fd)) { ++ // Do not close the logrus output fd. We cannot exec a pipe, and ++ // the contents are quite limited (very little attacker control, ++ // JSON-encoded) making shellcode attacks unlikely. ++ return ++ } ++ // There's nothing we can do about errors from close(2), and the ++ // only likely error to be seen is EBADF which indicates the fd was ++ // already closed (in which case, we got what we wanted). ++ _ = unix.Close(fd) ++ }) ++} ++ + // NewSockPair returns a new unix socket pair + func NewSockPair(name string) (parent *os.File, child *os.File, err error) { + fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) diff -Nru runc-1.1.7/debian/patches/0005-cgroup-plug-leaks-of-sys-fs-cgroup-handle.patch runc-1.1.7/debian/patches/0005-cgroup-plug-leaks-of-sys-fs-cgroup-handle.patch --- runc-1.1.7/debian/patches/0005-cgroup-plug-leaks-of-sys-fs-cgroup-handle.patch 1970-01-01 00:00:00.000000000 +0000 +++ runc-1.1.7/debian/patches/0005-cgroup-plug-leaks-of-sys-fs-cgroup-handle.patch 2024-01-19 03:54:00.000000000 +0000 @@ -0,0 +1,108 @@ +From f3429eda1a8d478d7dbcb9c07cef943d88e67671 Mon Sep 17 00:00:00 2001 +From: Aleksa Sarai +Date: Tue, 26 Dec 2023 23:36:51 +1100 +Subject: [runc v1.1.z PATCH 5/6] cgroup: plug leaks of /sys/fs/cgroup handle + +We auto-close this file descriptor in the final exec step, but it's +probably a good idea to not possibly leak the file descriptor to "runc +init" (we've had issues like this in the past) especially since it is a +directory handle from the host mount namespace. + +In practice, on runc 1.1 this does leak to "runc init" but on main the +handle has a low enough file descriptor that it gets clobbered by the +ForkExec of "runc init". + +OPEN_TREE_CLONE would let us protect this handle even further, but the +performance impact of creating an anonymous mount namespace is probably +not worth it. + +Also, switch to using an *os.File for the handle so if it goes out of +scope during setup (i.e. an error occurs during setup) it will get +cleaned up by the GC. + +Fixes: GHSA-xr7r-f8xq-vfvv CVE-2024-21626 +Signed-off-by: Aleksa Sarai +--- + libcontainer/cgroups/file.go | 31 ++++++++++++++++--------------- + 1 file changed, 16 insertions(+), 15 deletions(-) + +--- runc-1.1.7.orig/libcontainer/cgroups/file.go ++++ runc-1.1.7/libcontainer/cgroups/file.go +@@ -76,16 +76,16 @@ var ( + // TestMode is set to true by unit tests that need "fake" cgroupfs. + TestMode bool + +- cgroupFd int = -1 +- prepOnce sync.Once +- prepErr error +- resolveFlags uint64 ++ cgroupRootHandle *os.File ++ prepOnce sync.Once ++ prepErr error ++ resolveFlags uint64 + ) + + func prepareOpenat2() error { + prepOnce.Do(func() { + fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{ +- Flags: unix.O_DIRECTORY | unix.O_PATH, ++ Flags: unix.O_DIRECTORY | unix.O_PATH | unix.O_CLOEXEC, + }) + if err != nil { + prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err} +@@ -96,15 +96,16 @@ func prepareOpenat2() error { + } + return + } ++ file := os.NewFile(uintptr(fd), cgroupfsDir) ++ + var st unix.Statfs_t +- if err = unix.Fstatfs(fd, &st); err != nil { ++ if err := unix.Fstatfs(int(file.Fd()), &st); err != nil { + prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err} + logrus.Warnf("falling back to securejoin: %s", prepErr) + return + } + +- cgroupFd = fd +- ++ cgroupRootHandle = file + resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS + if st.Type == unix.CGROUP2_SUPER_MAGIC { + // cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks +@@ -131,7 +132,7 @@ func openFile(dir, file string, flags in + return openFallback(path, flags, mode) + } + +- fd, err := unix.Openat2(cgroupFd, relPath, ++ fd, err := unix.Openat2(int(cgroupRootHandle.Fd()), relPath, + &unix.OpenHow{ + Resolve: resolveFlags, + Flags: uint64(flags) | unix.O_CLOEXEC, +@@ -139,20 +140,20 @@ func openFile(dir, file string, flags in + }) + if err != nil { + err = &os.PathError{Op: "openat2", Path: path, Err: err} +- // Check if cgroupFd is still opened to cgroupfsDir ++ // Check if cgroupRootHandle is still opened to cgroupfsDir + // (happens when this package is incorrectly used + // across the chroot/pivot_root/mntns boundary, or + // when /sys/fs/cgroup is remounted). + // + // TODO: if such usage will ever be common, amend this +- // to reopen cgroupFd and retry openat2. +- fdStr := strconv.Itoa(cgroupFd) ++ // to reopen cgroupRootHandle and retry openat2. ++ fdStr := strconv.Itoa(int(cgroupRootHandle.Fd())) + fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr) + if fdDest != cgroupfsDir { +- // Wrap the error so it is clear that cgroupFd ++ // Wrap the error so it is clear that cgroupRootHandle + // is opened to an unexpected/wrong directory. +- err = fmt.Errorf("cgroupFd %s unexpectedly opened to %s != %s: %w", +- fdStr, fdDest, cgroupfsDir, err) ++ err = fmt.Errorf("cgroupRootHandle %d unexpectedly opened to %s != %s: %w", ++ cgroupRootHandle.Fd(), fdDest, cgroupfsDir, err) + } + return nil, err + } diff -Nru runc-1.1.7/debian/patches/0006-libcontainer-mark-all-non-stdio-fds-O_CLOEXEC-before.patch runc-1.1.7/debian/patches/0006-libcontainer-mark-all-non-stdio-fds-O_CLOEXEC-before.patch --- runc-1.1.7/debian/patches/0006-libcontainer-mark-all-non-stdio-fds-O_CLOEXEC-before.patch 1970-01-01 00:00:00.000000000 +0000 +++ runc-1.1.7/debian/patches/0006-libcontainer-mark-all-non-stdio-fds-O_CLOEXEC-before.patch 2024-01-19 03:54:09.000000000 +0000 @@ -0,0 +1,41 @@ +From 4f263985016276c2faf709e9db98fcc8677a8a13 Mon Sep 17 00:00:00 2001 +From: Aleksa Sarai +Date: Thu, 28 Dec 2023 00:42:23 +1100 +Subject: [runc v1.1.z PATCH 6/6] libcontainer: mark all non-stdio fds + O_CLOEXEC before spawning init + +Given the core issue in GHSA-xr7r-f8xq-vfvv was that we were unknowingly +leaking file descriptors to "runc init", it seems prudent to make sure +we proactively prevent this in the future. The solution is to simply +mark all non-stdio file descriptors as O_CLOEXEC before we spawn "runc +init". + +For libcontainer library users, this could result in unrelated files +being marked as O_CLOEXEC -- however (for the same reason we are doing +this for runc), for security reasons those files should've been marked +as O_CLOEXEC anyway. + +Fixes: GHSA-xr7r-f8xq-vfvv CVE-2024-21626 +Signed-off-by: Aleksa Sarai +--- + libcontainer/container_linux.go | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- runc-1.1.7.orig/libcontainer/container_linux.go ++++ runc-1.1.7/libcontainer/container_linux.go +@@ -351,6 +351,15 @@ func (c *linuxContainer) start(process * + }() + } + ++ // Before starting "runc init", mark all non-stdio open files as O_CLOEXEC ++ // to make sure we don't leak any files into "runc init". Any files to be ++ // passed to "runc init" throuhg ExtraFiles will get dup2'd by the Go ++ // runtime and thus their O_CLOEXEC flag will be cleared. This is some ++ // additional protection against attacks like CVE-2024-21626, by making ++ // sure we never leak files to "runc init" we didn't intend to. ++ if err := utils.CloseExecFrom(3); err != nil { ++ return fmt.Errorf("unable to mark non-stdio fds as cloexec: %w", err) ++ } + if err := parent.start(); err != nil { + return fmt.Errorf("unable to start container process: %w", err) + } diff -Nru runc-1.1.7/debian/patches/series runc-1.1.7/debian/patches/series --- runc-1.1.7/debian/patches/series 2023-06-12 20:39:01.000000000 +0000 +++ runc-1.1.7/debian/patches/series 2024-01-19 03:54:08.000000000 +0000 @@ -1,3 +1,9 @@ test--skip-privileged-test-factory_linux_test.go.patch test--skip-fs-related-cgroups-tests.patch 0011-export-blockIODevice.patch +0001-Fix-File-to-Close.patch +0002-init-verify-after-chdir-that-cwd-is-inside-the-conta.patch +0003-setns-init-do-explicit-lookup-of-execve-argument-ear.patch +0004-init-close-internal-fds-before-execve.patch +0005-cgroup-plug-leaks-of-sys-fs-cgroup-handle.patch +0006-libcontainer-mark-all-non-stdio-fds-O_CLOEXEC-before.patch