Skip to content

Commit

Permalink
Merge pull request #23206 from Luap99/rootless-reexec-userns
Browse files Browse the repository at this point in the history
pkg/rootless: simplify reexec for container code
  • Loading branch information
openshift-merge-bot[bot] committed Jul 8, 2024
2 parents 464a799 + a2c83cb commit abf0350
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 143 deletions.
21 changes: 11 additions & 10 deletions pkg/domain/infra/abi/system_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,16 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool,
}
}

configureCgroup := cgroupMode != "disabled"
if configureCgroup {
hasCapSysAdmin, err := unshare.HasCapSysAdmin()
if err != nil {
return err
}

// check for both euid == 0 and CAP_SYS_ADMIN because we may be running in a container with CAP_SYS_ADMIN set.
if os.Geteuid() == 0 && hasCapSysAdmin {
// do it only after podman has already re-execed and running with uid==0.
hasCapSysAdmin, err := unshare.HasCapSysAdmin()
if err != nil {
return err
}
// check for both euid == 0 and CAP_SYS_ADMIN because we may be running in a container with CAP_SYS_ADMIN set.
if os.Geteuid() == 0 && hasCapSysAdmin {
configureCgroup := cgroupMode != "disabled"
if configureCgroup {
ownsCgroup, err := cgroups.UserOwnsCurrentSystemdCgroup()
if err != nil {
logrus.Infof("Failed to detect the owner for the current cgroup: %v", err)
Expand All @@ -55,8 +56,8 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool,
}
}
}
return nil
}
return nil
}

pausePidPath, err := util.GetRootlessPauseProcessPidPath()
Expand Down Expand Up @@ -88,7 +89,7 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool,
}

if len(paths) > 0 {
became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, true, paths)
became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, paths)
} else {
became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
if err == nil {
Expand Down
4 changes: 2 additions & 2 deletions pkg/rootless/rootless.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func TryJoinPauseProcess(pausePidPath string) (bool, int, error) {
return false, -1, err
}

became, ret, err := TryJoinFromFilePaths("", false, []string{pausePidPath})
became, ret, err := TryJoinFromFilePaths("", []string{pausePidPath})
if err == nil {
return became, ret, nil
}
Expand All @@ -45,7 +45,7 @@ func TryJoinPauseProcess(pausePidPath string) (bool, int, error) {
}()

// Now the pause PID file is locked. Try to join once again in case it changed while it was not locked.
became, ret, err = TryJoinFromFilePaths("", false, []string{pausePidPath})
became, ret, err = TryJoinFromFilePaths("", []string{pausePidPath})
if err != nil {
// It is still failing. We can safely remove it.
os.Remove(pausePidPath)
Expand Down
6 changes: 1 addition & 5 deletions pkg/rootless/rootless_freebsd.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,7 @@ func GetRootlessGID() int {
// This is useful when there are already running containers and we
// don't have a pause process yet. We can use the paths to the conmon
// processes to attempt joining their namespaces.
// If needNewNamespace is set, the file is read from a temporary user
// namespace, this is useful for containers that are running with a
// different uidmap and the unprivileged user has no way to read the
// file owned by the root in the container.
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
func TryJoinFromFilePaths(pausePidPath string, paths []string) (bool, int, error) {
return false, -1, errors.New("this function is not supported on this os")
}

Expand Down
50 changes: 1 addition & 49 deletions pkg/rootless/rootless_linux.c
Original file line number Diff line number Diff line change
Expand Up @@ -947,49 +947,8 @@ check_proc_sys_userns_file (const char *path)
}
}

static int
copy_file_to_fd (const char *file_to_read, int outfd)
{
char buf[512];
cleanup_close int fd = -1;

fd = open (file_to_read, O_RDONLY);
if (fd < 0)
{
fprintf (stderr, "open `%s`: %m\n", file_to_read);
return fd;
}

for (;;)
{
ssize_t r, w, t = 0;

r = TEMP_FAILURE_RETRY (read (fd, buf, sizeof buf));
if (r < 0)
{
fprintf (stderr, "read from `%s`: %m\n", file_to_read);
return r;
}

if (r == 0)
break;

while (t < r)
{
w = TEMP_FAILURE_RETRY (write (outfd, &buf[t], r - t));
if (w < 0)
{
fprintf (stderr, "write file to output fd `%s`: %m\n", file_to_read);
return w;
}
t += w;
}
}
return 0;
}

int
reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_read, int outputfd)
reexec_in_user_namespace (int ready, char *pause_pid_file_path)
{
cleanup_free char **argv = NULL;
cleanup_free char *argv0 = NULL;
Expand Down Expand Up @@ -1138,13 +1097,6 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_re
_exit (EXIT_FAILURE);
}

if (file_to_read && file_to_read[0])
{
ret = copy_file_to_fd (file_to_read, outputfd);
close (outputfd);
_exit (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}

execvp ("/proc/self/exe", argv);
fprintf (stderr, "failed to reexec: %m\n");

Expand Down
85 changes: 14 additions & 71 deletions pkg/rootless/rootless_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import (
#include <sys/types.h>
extern uid_t rootless_uid();
extern uid_t rootless_gid();
extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path, char *file_to_read, int fd);
extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path);
extern int reexec_in_user_namespace_wait(int pid, int options);
extern int reexec_userns_join(int pid, char *pause_pid_file_path);
extern int is_fd_inherited(int fd);
Expand Down Expand Up @@ -213,7 +213,7 @@ func copyMappings(from, to string) error {
return os.WriteFile(to, content, 0600)
}

func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ bool, _ int, retErr error) {
func becomeRootInUserNS(pausePid string) (_ bool, _ int, retErr error) {
hasCapSysAdmin, err := unshare.HasCapSysAdmin()
if err != nil {
return false, 0, err
Expand Down Expand Up @@ -249,13 +249,6 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo
cPausePid := C.CString(pausePid)
defer C.free(unsafe.Pointer(cPausePid))

cFileToRead := C.CString(fileToRead)
defer C.free(unsafe.Pointer(cFileToRead))
var fileOutputFD C.int
if fileOutput != nil {
fileOutputFD = C.int(fileOutput.Fd())
}

runtime.LockOSThread()
defer runtime.UnlockOSThread()

Expand Down Expand Up @@ -287,7 +280,7 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo
}
}()

pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid, cFileToRead, fileOutputFD)
pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid)
pid = int(pidC)
if pid < 0 {
return false, -1, fmt.Errorf("cannot re-exec process")
Expand Down Expand Up @@ -361,14 +354,6 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo
return false, -1, fmt.Errorf("read from sync pipe: %w", err)
}

if fileOutput != nil {
ret := C.reexec_in_user_namespace_wait(pidC, 0)
if ret < 0 {
return false, -1, errors.New("waiting for the re-exec process")
}
return true, 0, nil
}

if b[0] == '2' {
// We have lost the race for writing the PID file, as probably another
// process created a namespace and wrote the PID.
Expand Down Expand Up @@ -434,69 +419,27 @@ func waitAndProxySignalsToChild(pid C.int) (bool, int, error) {
// If podman was re-executed the caller needs to propagate the error code returned by the child
// process.
func BecomeRootInUserNS(pausePid string) (bool, int, error) {
return becomeRootInUserNS(pausePid, "", nil)
return becomeRootInUserNS(pausePid)
}

// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths.
// This is useful when there are already running containers and we
// don't have a pause process yet. We can use the paths to the conmon
// processes to attempt joining their namespaces.
// If needNewNamespace is set, the file is read from a temporary user
// namespace, this is useful for containers that are running with a
// different uidmap and the unprivileged user has no way to read the
// file owned by the root in the container.
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
func TryJoinFromFilePaths(pausePidPath string, paths []string) (bool, int, error) {
var lastErr error
var pausePid int

for _, path := range paths {
if !needNewNamespace {
data, err := os.ReadFile(path)
if err != nil {
lastErr = err
continue
}

pausePid, err = strconv.Atoi(string(data))
if err != nil {
lastErr = fmt.Errorf("cannot parse file %q: %w", path, err)
continue
}
} else {
r, w, err := os.Pipe()
if err != nil {
lastErr = err
continue
}

defer errorhandling.CloseQuiet(r)

if _, _, err := becomeRootInUserNS("", path, w); err != nil {
w.Close()
lastErr = err
continue
}

if err := w.Close(); err != nil {
return false, 0, err
}
defer func() {
C.reexec_in_user_namespace_wait(-1, 0)
}()

b := make([]byte, 32)

n, err := r.Read(b)
if err != nil {
lastErr = fmt.Errorf("cannot read %q: %w", path, err)
continue
}
data, err := os.ReadFile(path)
if err != nil {
lastErr = err
continue
}

pausePid, err = strconv.Atoi(string(b[:n]))
if err != nil {
lastErr = err
continue
}
pausePid, err := strconv.Atoi(string(data))
if err != nil {
lastErr = fmt.Errorf("cannot parse file %q: %w", path, err)
continue
}

if pausePid > 0 && unix.Kill(pausePid, 0) == nil {
Expand Down
6 changes: 1 addition & 5 deletions pkg/rootless/rootless_unsupported.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,7 @@ func GetRootlessGID() int {
// This is useful when there are already running containers and we
// don't have a pause process yet. We can use the paths to the conmon
// processes to attempt joining their namespaces.
// If needNewNamespace is set, the file is read from a temporary user
// namespace, this is useful for containers that are running with a
// different uidmap and the unprivileged user has no way to read the
// file owned by the root in the container.
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
func TryJoinFromFilePaths(pausePidPath string, paths []string) (bool, int, error) {
return false, -1, errors.New("this function is not supported on this os")
}

Expand Down
5 changes: 4 additions & 1 deletion test/system/550-pause-process.bats
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ function _check_pause_process() {

# First let's run a container in the background to keep the userns active
local cname1=c1_$(random_string)
run_podman run -d --name $cname1 $IMAGE top
run_podman run -d --name $cname1 --uidmap 0:100:100 $IMAGE top

run_podman unshare readlink /proc/self/ns/user
userns="$output"
Expand All @@ -136,6 +136,9 @@ function _check_pause_process() {

_test_sigproxy $cname2 $kidpid

# check pause process again
_check_pause_process

# our container exits 0 so podman should too
wait $kidpid || die "podman run exited $? instead of zero"

Expand Down

1 comment on commit abf0350

@packit-as-a-service
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

podman-next COPR build failed. @containers/packit-build please check.

Please sign in to comment.