2009-10-04 02:42:08 +07:00
|
|
|
/*
|
|
|
|
* unshare(1) - command-line interface for unshare(2)
|
|
|
|
*
|
|
|
|
* Copyright (C) 2009 Mikhail Gusarov <dottedmag@dottedmag.net>
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
|
|
* under the terms of the GNU General Public License as published by the
|
|
|
|
* Free Software Foundation; either version 2, or (at your option) any
|
|
|
|
* later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful, but
|
|
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
2012-02-22 22:28:48 +01:00
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
2009-10-04 02:42:08 +07:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <errno.h>
|
|
|
|
#include <getopt.h>
|
2022-03-07 20:41:24 -08:00
|
|
|
#include <poll.h>
|
2009-10-04 02:42:08 +07:00
|
|
|
#include <sched.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <unistd.h>
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
#include <sys/eventfd.h>
|
2013-06-27 20:04:58 -04:00
|
|
|
#include <sys/wait.h>
|
2013-07-03 12:28:16 +02:00
|
|
|
#include <sys/mount.h>
|
2015-04-09 11:48:07 +02:00
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
2017-09-19 20:39:00 +02:00
|
|
|
#include <sys/prctl.h>
|
2018-10-05 13:09:30 +02:00
|
|
|
#include <grp.h>
|
2015-04-09 11:48:07 +02:00
|
|
|
|
2014-03-17 11:48:47 +01:00
|
|
|
/* we only need some defines missing in sys/mount.h, no libmount linkage */
|
|
|
|
#include <libmount.h>
|
|
|
|
|
2009-10-04 02:42:08 +07:00
|
|
|
#include "nls.h"
|
2011-01-25 22:44:52 +01:00
|
|
|
#include "c.h"
|
2019-01-17 14:16:54 -08:00
|
|
|
#include "caputils.h"
|
2012-04-04 19:49:40 +02:00
|
|
|
#include "closestream.h"
|
2013-01-16 16:36:32 -08:00
|
|
|
#include "namespace.h"
|
2022-03-07 20:41:24 -08:00
|
|
|
#include "pidfd-utils.h"
|
2013-02-13 21:05:48 -05:00
|
|
|
#include "exec_shell.h"
|
2013-12-27 22:14:48 +01:00
|
|
|
#include "xalloc.h"
|
|
|
|
#include "pathnames.h"
|
|
|
|
#include "all-io.h"
|
2017-10-14 04:31:57 +02:00
|
|
|
#include "signames.h"
|
2018-10-05 13:09:30 +02:00
|
|
|
#include "strutils.h"
|
2020-04-15 23:05:16 +10:00
|
|
|
#include "pwdutils.h"
|
2013-12-27 22:14:48 +01:00
|
|
|
|
2016-01-30 16:18:39 +03:00
|
|
|
/* synchronize parent and child by pipe */
|
|
|
|
#define PIPE_SYNC_BYTE 0x06
|
|
|
|
|
2015-03-18 15:13:15 +01:00
|
|
|
/* 'private' is kernel default */
|
|
|
|
#define UNSHARE_PROPAGATION_DEFAULT (MS_REC | MS_PRIVATE)
|
|
|
|
|
2015-04-09 11:34:02 +02:00
|
|
|
/* /proc namespace files and mountpoints for binds */
|
|
|
|
static struct namespace_file {
|
|
|
|
int type; /* CLONE_NEW* */
|
|
|
|
const char *name; /* ns/<type> */
|
|
|
|
const char *target; /* user specified target for bind mount */
|
|
|
|
} namespace_files[] = {
|
2016-03-02 17:53:42 -08:00
|
|
|
{ .type = CLONE_NEWUSER, .name = "ns/user" },
|
|
|
|
{ .type = CLONE_NEWCGROUP,.name = "ns/cgroup" },
|
|
|
|
{ .type = CLONE_NEWIPC, .name = "ns/ipc" },
|
|
|
|
{ .type = CLONE_NEWUTS, .name = "ns/uts" },
|
|
|
|
{ .type = CLONE_NEWNET, .name = "ns/net" },
|
2020-04-15 23:16:53 +02:00
|
|
|
{ .type = CLONE_NEWPID, .name = "ns/pid_for_children" },
|
2016-03-02 17:53:42 -08:00
|
|
|
{ .type = CLONE_NEWNS, .name = "ns/mnt" },
|
2020-04-15 23:16:53 +02:00
|
|
|
{ .type = CLONE_NEWTIME, .name = "ns/time_for_children" },
|
2015-04-09 11:34:02 +02:00
|
|
|
{ .name = NULL }
|
|
|
|
};
|
|
|
|
|
|
|
|
static int npersists; /* number of persistent namespaces */
|
|
|
|
|
2015-01-08 11:51:58 +01:00
|
|
|
enum {
|
|
|
|
SETGROUPS_NONE = -1,
|
|
|
|
SETGROUPS_DENY = 0,
|
|
|
|
SETGROUPS_ALLOW = 1,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const char *setgroups_strings[] =
|
|
|
|
{
|
|
|
|
[SETGROUPS_DENY] = "deny",
|
|
|
|
[SETGROUPS_ALLOW] = "allow"
|
|
|
|
};
|
|
|
|
|
|
|
|
static int setgroups_str2id(const char *str)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(setgroups_strings); i++)
|
|
|
|
if (strcmp(str, setgroups_strings[i]) == 0)
|
|
|
|
return i;
|
|
|
|
|
|
|
|
errx(EXIT_FAILURE, _("unsupported --setgroups argument '%s'"), str);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void setgroups_control(int action)
|
2014-12-17 17:06:03 -06:00
|
|
|
{
|
|
|
|
const char *file = _PATH_PROC_SETGROUPS;
|
2015-01-08 11:51:58 +01:00
|
|
|
const char *cmd;
|
2014-12-17 17:06:03 -06:00
|
|
|
int fd;
|
|
|
|
|
2015-01-08 11:51:58 +01:00
|
|
|
if (action < 0 || (size_t) action >= ARRAY_SIZE(setgroups_strings))
|
|
|
|
return;
|
|
|
|
cmd = setgroups_strings[action];
|
|
|
|
|
2014-12-17 17:06:03 -06:00
|
|
|
fd = open(file, O_WRONLY);
|
|
|
|
if (fd < 0) {
|
|
|
|
if (errno == ENOENT)
|
|
|
|
return;
|
2015-08-05 13:22:05 +02:00
|
|
|
err(EXIT_FAILURE, _("cannot open %s"), file);
|
2014-12-17 17:06:03 -06:00
|
|
|
}
|
|
|
|
|
2015-01-08 11:51:58 +01:00
|
|
|
if (write_all(fd, cmd, strlen(cmd)))
|
2014-12-17 17:06:03 -06:00
|
|
|
err(EXIT_FAILURE, _("write failed %s"), file);
|
|
|
|
close(fd);
|
|
|
|
}
|
|
|
|
|
2013-12-27 22:14:48 +01:00
|
|
|
static void map_id(const char *file, uint32_t from, uint32_t to)
|
|
|
|
{
|
|
|
|
char *buf;
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
fd = open(file, O_WRONLY);
|
|
|
|
if (fd < 0)
|
|
|
|
err(EXIT_FAILURE, _("cannot open %s"), file);
|
|
|
|
|
|
|
|
xasprintf(&buf, "%u %u 1", from, to);
|
|
|
|
if (write_all(fd, buf, strlen(buf)))
|
|
|
|
err(EXIT_FAILURE, _("write failed %s"), file);
|
|
|
|
free(buf);
|
|
|
|
close(fd);
|
|
|
|
}
|
2009-10-04 02:42:08 +07:00
|
|
|
|
2015-03-18 15:13:15 +01:00
|
|
|
static unsigned long parse_propagation(const char *str)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
static const struct prop_opts {
|
|
|
|
const char *name;
|
|
|
|
unsigned long flag;
|
|
|
|
} opts[] = {
|
|
|
|
{ "slave", MS_REC | MS_SLAVE },
|
|
|
|
{ "private", MS_REC | MS_PRIVATE },
|
|
|
|
{ "shared", MS_REC | MS_SHARED },
|
|
|
|
{ "unchanged", 0 }
|
|
|
|
};
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(opts); i++) {
|
|
|
|
if (strcmp(opts[i].name, str) == 0)
|
|
|
|
return opts[i].flag;
|
|
|
|
}
|
|
|
|
|
|
|
|
errx(EXIT_FAILURE, _("unsupported propagation mode: %s"), str);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void set_propagation(unsigned long flags)
|
|
|
|
{
|
|
|
|
if (flags == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (mount("none", "/", NULL, flags, NULL) != 0)
|
|
|
|
err(EXIT_FAILURE, _("cannot change root filesystem propagation"));
|
|
|
|
}
|
|
|
|
|
2015-04-09 11:34:02 +02:00
|
|
|
|
|
|
|
static int set_ns_target(int type, const char *path)
|
|
|
|
{
|
|
|
|
struct namespace_file *ns;
|
|
|
|
|
|
|
|
for (ns = namespace_files; ns->name; ns++) {
|
|
|
|
if (ns->type != type)
|
|
|
|
continue;
|
|
|
|
ns->target = path;
|
|
|
|
npersists++;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int bind_ns_files(pid_t pid)
|
|
|
|
{
|
|
|
|
struct namespace_file *ns;
|
|
|
|
char src[PATH_MAX];
|
|
|
|
|
|
|
|
for (ns = namespace_files; ns->name; ns++) {
|
|
|
|
if (!ns->target)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
snprintf(src, sizeof(src), "/proc/%u/%s", (unsigned) pid, ns->name);
|
|
|
|
|
|
|
|
if (mount(src, ns->target, NULL, MS_BIND, NULL) != 0)
|
|
|
|
err(EXIT_FAILURE, _("mount %s on %s failed"), src, ns->target);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-04-09 11:48:07 +02:00
|
|
|
static ino_t get_mnt_ino(pid_t pid)
|
|
|
|
{
|
|
|
|
struct stat st;
|
|
|
|
char path[PATH_MAX];
|
|
|
|
|
|
|
|
snprintf(path, sizeof(path), "/proc/%u/ns/mnt", (unsigned) pid);
|
|
|
|
|
|
|
|
if (stat(path, &st) != 0)
|
2021-06-21 12:40:41 +02:00
|
|
|
err(EXIT_FAILURE, _("stat of %s failed"), path);
|
2015-04-09 11:48:07 +02:00
|
|
|
return st.st_ino;
|
|
|
|
}
|
|
|
|
|
2023-06-30 22:36:06 +02:00
|
|
|
static void settime(int64_t offset, clockid_t clk_id)
|
unshare: support the time namespace
This adds support to unshare for time namespaces. With the newly added
options '-t, --time' and '--monotonic' and '--boottime' it is now
possible to change CLOCK_MONOTONIC and CLOCK_BOOTTIME in a new time
namespace.
The time namespace has been merged in kernel version 5.6 and an easy way
to test it is using CLOCK_BOOTTIME and the uptime command:
# uptime
11:08:26 up 20:28, 1 user, load average: 0.00, 0.00, 0.00
# ./unshare --fork --time --boottime 100000000 uptime
11:08:29 up 1158 days, 6:15, 1 user, load average: 0.00, 0.00, 0.00
Signed-off-by: Adrian Reber <areber@redhat.com>
2020-03-06 12:05:00 +01:00
|
|
|
{
|
|
|
|
char buf[sizeof(stringify_value(ULONG_MAX)) * 3];
|
|
|
|
int fd, len;
|
|
|
|
|
2023-06-30 22:36:06 +02:00
|
|
|
len = snprintf(buf, sizeof(buf), "%d %" PRId64 " 0", clk_id, offset);
|
unshare: support the time namespace
This adds support to unshare for time namespaces. With the newly added
options '-t, --time' and '--monotonic' and '--boottime' it is now
possible to change CLOCK_MONOTONIC and CLOCK_BOOTTIME in a new time
namespace.
The time namespace has been merged in kernel version 5.6 and an easy way
to test it is using CLOCK_BOOTTIME and the uptime command:
# uptime
11:08:26 up 20:28, 1 user, load average: 0.00, 0.00, 0.00
# ./unshare --fork --time --boottime 100000000 uptime
11:08:29 up 1158 days, 6:15, 1 user, load average: 0.00, 0.00, 0.00
Signed-off-by: Adrian Reber <areber@redhat.com>
2020-03-06 12:05:00 +01:00
|
|
|
|
|
|
|
fd = open("/proc/self/timens_offsets", O_WRONLY);
|
|
|
|
if (fd < 0)
|
|
|
|
err(EXIT_FAILURE, _("failed to open /proc/self/timens_offsets"));
|
|
|
|
|
|
|
|
if (write(fd, buf, len) != len)
|
|
|
|
err(EXIT_FAILURE, _("failed to write to /proc/self/timens_offsets"));
|
|
|
|
|
|
|
|
close(fd);
|
|
|
|
}
|
|
|
|
|
2021-11-24 13:26:14 -05:00
|
|
|
/**
|
|
|
|
* waitchild() - Wait for a process to exit successfully
|
|
|
|
* @pid: PID of the process to wait for
|
|
|
|
*
|
|
|
|
* Wait for a process to exit successfully. If it exits with a non-zero return
|
|
|
|
* code, then exit() with the same status.
|
|
|
|
*/
|
|
|
|
static void waitchild(int pid)
|
|
|
|
{
|
|
|
|
int rc, status;
|
|
|
|
|
|
|
|
do {
|
|
|
|
rc = waitpid(pid, &status, 0);
|
|
|
|
if (rc < 0) {
|
|
|
|
if (errno == EINTR)
|
|
|
|
continue;
|
|
|
|
err(EXIT_FAILURE, _("waitpid failed"));
|
|
|
|
}
|
|
|
|
if (WIFEXITED(status) &&
|
|
|
|
WEXITSTATUS(status) != EXIT_SUCCESS)
|
|
|
|
exit(WEXITSTATUS(status));
|
|
|
|
} while (rc < 0);
|
|
|
|
}
|
|
|
|
|
2021-11-24 13:26:15 -05:00
|
|
|
/**
|
|
|
|
* sync_with_child() - Tell our child we're ready and wait for it to exit
|
|
|
|
* @pid: The pid of our child
|
|
|
|
* @fd: A file descriptor created with eventfd()
|
|
|
|
*
|
|
|
|
* This tells a child created with fork_and_wait() that we are ready for it to
|
|
|
|
* continue. Once we have done that, wait for our child to exit.
|
|
|
|
*/
|
|
|
|
static void sync_with_child(pid_t pid, int fd)
|
2015-04-09 11:48:07 +02:00
|
|
|
{
|
2021-11-24 13:26:15 -05:00
|
|
|
uint64_t ch = PIPE_SYNC_BYTE;
|
|
|
|
|
|
|
|
write_all(fd, &ch, sizeof(ch));
|
|
|
|
close(fd);
|
2015-04-09 11:48:07 +02:00
|
|
|
|
2021-11-24 13:26:15 -05:00
|
|
|
waitchild(pid);
|
|
|
|
}
|
2016-01-30 16:18:39 +03:00
|
|
|
|
2021-11-24 13:26:15 -05:00
|
|
|
/**
|
|
|
|
* fork_and_wait() - Fork and wait to be sync'd with
|
|
|
|
* @fd - A file descriptor created with eventfd() which should be passed to
|
|
|
|
* sync_with_child()
|
|
|
|
*
|
|
|
|
* This creates an eventfd and forks. The parent process returns immediately,
|
|
|
|
* but the child waits for a %PIPE_SYNC_BYTE on the eventfd before returning.
|
|
|
|
* This allows the parent to perform some tasks before the child starts its
|
|
|
|
* work. The parent should call sync_with_child() once it is ready for the
|
|
|
|
* child to continue.
|
|
|
|
*
|
|
|
|
* Return: The pid from fork()
|
|
|
|
*/
|
|
|
|
static pid_t fork_and_wait(int *fd)
|
|
|
|
{
|
|
|
|
pid_t pid;
|
|
|
|
uint64_t ch;
|
2015-04-09 11:48:07 +02:00
|
|
|
|
2021-11-24 13:26:15 -05:00
|
|
|
*fd = eventfd(0, 0);
|
|
|
|
if (*fd < 0)
|
|
|
|
err(EXIT_FAILURE, _("eventfd failed"));
|
|
|
|
|
|
|
|
pid = fork();
|
|
|
|
if (pid < 0)
|
2015-04-09 11:48:07 +02:00
|
|
|
err(EXIT_FAILURE, _("fork failed"));
|
2016-01-30 16:18:39 +03:00
|
|
|
|
2021-11-24 13:26:15 -05:00
|
|
|
if (!pid) {
|
|
|
|
/* wait for the our parent to tell us to continue */
|
|
|
|
if (read_all(*fd, (char *)&ch, sizeof(ch)) != sizeof(ch) ||
|
|
|
|
ch != PIPE_SYNC_BYTE)
|
|
|
|
err(EXIT_FAILURE, _("failed to read eventfd"));
|
|
|
|
close(*fd);
|
2015-04-09 11:48:07 +02:00
|
|
|
}
|
2021-11-24 13:26:15 -05:00
|
|
|
|
|
|
|
return pid;
|
|
|
|
}
|
|
|
|
|
|
|
|
static pid_t bind_ns_files_from_child(int *fd)
|
|
|
|
{
|
|
|
|
pid_t child, ppid = getpid();
|
|
|
|
ino_t ino = get_mnt_ino(ppid);
|
|
|
|
|
|
|
|
child = fork_and_wait(fd);
|
|
|
|
if (child)
|
|
|
|
return child;
|
|
|
|
|
|
|
|
if (get_mnt_ino(ppid) == ino)
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
bind_ns_files(ppid);
|
|
|
|
exit(EXIT_SUCCESS);
|
2015-04-09 11:48:07 +02:00
|
|
|
}
|
|
|
|
|
2020-04-15 23:05:16 +10:00
|
|
|
static uid_t get_user(const char *s, const char *err)
|
|
|
|
{
|
|
|
|
struct passwd *pw;
|
|
|
|
char *buf = NULL;
|
|
|
|
uid_t ret;
|
|
|
|
|
|
|
|
pw = xgetpwnam(s, &buf);
|
|
|
|
if (pw) {
|
|
|
|
ret = pw->pw_uid;
|
|
|
|
free(pw);
|
|
|
|
free(buf);
|
|
|
|
} else {
|
|
|
|
ret = strtoul_or_err(s, err);
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static gid_t get_group(const char *s, const char *err)
|
|
|
|
{
|
|
|
|
struct group *gr;
|
|
|
|
char *buf = NULL;
|
|
|
|
gid_t ret;
|
|
|
|
|
|
|
|
gr = xgetgrnam(s, &buf);
|
|
|
|
if (gr) {
|
|
|
|
ret = gr->gr_gid;
|
|
|
|
free(gr);
|
|
|
|
free(buf);
|
|
|
|
} else {
|
|
|
|
ret = strtoul_or_err(s, err);
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
/**
|
|
|
|
* struct map_range - A range of IDs to map
|
2022-01-15 11:29:26 -05:00
|
|
|
* @outer: First ID mapped on the outside of the namespace
|
|
|
|
* @inner: First ID mapped on the inside of the namespace
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
* @count: Length of the inside and outside ranges
|
|
|
|
*
|
|
|
|
* A range of uids/gids to map using new[gu]idmap.
|
|
|
|
*/
|
|
|
|
struct map_range {
|
|
|
|
unsigned int outer;
|
|
|
|
unsigned int inner;
|
|
|
|
unsigned int count;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define UID_BUFSIZ sizeof(stringify_value(ULONG_MAX))
|
|
|
|
|
|
|
|
/**
|
|
|
|
* get_map_range() - Parse a mapping range from a string
|
2023-01-10 13:58:58 +00:00
|
|
|
* @s: A string of the format inner:outer:count or outer,inner,count
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
*
|
2023-01-10 13:58:58 +00:00
|
|
|
* Parse a string of the form inner:outer:count or outer,inner,count into
|
|
|
|
* a new mapping range.
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
*
|
|
|
|
* Return: A new &struct map_range
|
|
|
|
*/
|
|
|
|
static struct map_range *get_map_range(const char *s)
|
|
|
|
{
|
2023-01-10 13:58:58 +00:00
|
|
|
int end;
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
struct map_range *ret;
|
|
|
|
|
|
|
|
ret = xmalloc(sizeof(*ret));
|
2023-01-10 13:58:58 +00:00
|
|
|
|
|
|
|
if (sscanf(s, "%u:%u:%u%n", &ret->inner, &ret->outer, &ret->count,
|
|
|
|
&end) >= 3 && !s[end])
|
|
|
|
return ret; /* inner:outer:count */
|
|
|
|
|
|
|
|
if (sscanf(s, "%u,%u,%u%n", &ret->outer, &ret->inner, &ret->count,
|
|
|
|
&end) >= 3 && !s[end])
|
|
|
|
return ret; /* outer,inner,count */
|
|
|
|
|
|
|
|
errx(EXIT_FAILURE, _("invalid mapping '%s'"), s);
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
}
|
|
|
|
|
2021-11-24 13:26:17 -05:00
|
|
|
/**
|
|
|
|
* read_subid_range() - Look up a user's sub[gu]id range
|
|
|
|
* @filename: The file to look up the range from. This should be either
|
|
|
|
* ``/etc/subuid`` or ``/etc/subgid``.
|
|
|
|
* @uid: The uid of the user whose range we should look up.
|
|
|
|
*
|
|
|
|
* This finds the first subid range matching @uid in @filename.
|
|
|
|
*/
|
|
|
|
static struct map_range *read_subid_range(char *filename, uid_t uid)
|
|
|
|
{
|
|
|
|
char *line = NULL, *pwbuf;
|
|
|
|
FILE *idmap;
|
2022-02-14 13:00:36 +01:00
|
|
|
size_t n = 0;
|
2021-11-24 13:26:17 -05:00
|
|
|
struct passwd *pw;
|
|
|
|
struct map_range *map;
|
|
|
|
|
|
|
|
map = xmalloc(sizeof(*map));
|
2022-08-24 12:52:55 +01:00
|
|
|
map->inner = -1;
|
2021-11-24 13:26:17 -05:00
|
|
|
|
|
|
|
pw = xgetpwuid(uid, &pwbuf);
|
|
|
|
if (!pw)
|
|
|
|
errx(EXIT_FAILURE, _("you (user %d) don't exist."), uid);
|
|
|
|
|
|
|
|
idmap = fopen(filename, "r");
|
|
|
|
if (!idmap)
|
|
|
|
err(EXIT_FAILURE, _("could not open '%s'"), filename);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Each line in sub[ug]idmap looks like
|
|
|
|
* username:subuid:count
|
|
|
|
* OR
|
|
|
|
* uid:subuid:count
|
|
|
|
*/
|
|
|
|
while (getline(&line, &n, idmap) != -1) {
|
|
|
|
char *rest, *s;
|
|
|
|
|
|
|
|
rest = strchr(line, ':');
|
|
|
|
if (!rest)
|
|
|
|
continue;
|
|
|
|
*rest = '\0';
|
|
|
|
|
|
|
|
if (strcmp(line, pw->pw_name) &&
|
|
|
|
strtoul(line, NULL, 10) != pw->pw_uid)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
s = rest + 1;
|
|
|
|
rest = strchr(s, ':');
|
|
|
|
if (!rest)
|
|
|
|
continue;
|
|
|
|
*rest = '\0';
|
|
|
|
map->outer = strtoul_or_err(s, _("failed to parse subid map"));
|
|
|
|
|
|
|
|
s = rest + 1;
|
|
|
|
rest = strchr(s, '\n');
|
|
|
|
if (rest)
|
|
|
|
*rest = '\0';
|
|
|
|
map->count = strtoul_or_err(s, _("failed to parse subid map"));
|
|
|
|
|
|
|
|
fclose(idmap);
|
2021-12-02 10:24:56 +01:00
|
|
|
free(pw);
|
|
|
|
free(pwbuf);
|
|
|
|
|
2021-11-24 13:26:17 -05:00
|
|
|
return map;
|
|
|
|
}
|
|
|
|
|
2022-12-25 16:34:37 +00:00
|
|
|
errx(EXIT_FAILURE, _("no line matching user \"%s\" in %s"),
|
2021-11-24 13:26:17 -05:00
|
|
|
pw->pw_name, filename);
|
|
|
|
}
|
|
|
|
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
/**
|
|
|
|
* map_ids() - Create a new uid/gid map
|
|
|
|
* @idmapper: Either newuidmap or newgidmap
|
|
|
|
* @ppid: Pid to set the map for
|
|
|
|
* @outer: ID outside the namespace for a single map.
|
|
|
|
* @inner: ID inside the namespace for a single map. May be -1 to only use @map.
|
|
|
|
* @map: A range of IDs to map
|
|
|
|
*
|
|
|
|
* This creates a new uid/gid map for @ppid using @idmapper. The ID @outer in
|
|
|
|
* the parent (our) namespace is mapped to the ID @inner in the child (@ppid's)
|
|
|
|
* namespace. In addition, the range of IDs beginning at @map->outer is mapped
|
|
|
|
* to the range of IDs beginning at @map->inner. The tricky bit is that we
|
|
|
|
* cannot let these mappings overlap. We accomplish this by removing a "hole"
|
|
|
|
* from @map, if @outer or @inner overlap it. This may result in one less than
|
|
|
|
* @map->count IDs being mapped from @map. The unmapped IDs are always the
|
|
|
|
* topmost IDs of the mapping (either in the parent or the child namespace).
|
|
|
|
*
|
|
|
|
* Most of the time, this function will be called with @map->outer as some
|
|
|
|
* large ID, @map->inner as 0, and @map->count as a large number (at least
|
|
|
|
* 1000, but less than @map->outer). Typically, there will be no conflict with
|
|
|
|
* @outer. However, @inner may split the mapping for e.g. --map-current-user.
|
|
|
|
*
|
|
|
|
* This function always exec()s or errors out and does not return.
|
|
|
|
*/
|
|
|
|
static void __attribute__((__noreturn__))
|
|
|
|
map_ids(const char *idmapper, int ppid, unsigned int outer, unsigned int inner,
|
|
|
|
struct map_range *map)
|
|
|
|
{
|
|
|
|
/* idmapper + pid + 4 * map + NULL */
|
|
|
|
char *argv[15];
|
|
|
|
/* argv - idmapper - "1" - NULL */
|
|
|
|
char args[12][UID_BUFSIZ];
|
|
|
|
int i = 0, j = 0;
|
|
|
|
struct map_range lo, mid, hi;
|
|
|
|
unsigned int inner_offset, outer_offset;
|
|
|
|
|
|
|
|
/* Some helper macros to reduce bookkeeping */
|
|
|
|
#define push_str(s) do { \
|
|
|
|
argv[i++] = s; \
|
|
|
|
} while (0)
|
|
|
|
#define push_ul(x) do { \
|
|
|
|
snprintf(args[j], sizeof(args[j]), "%u", x); \
|
|
|
|
push_str(args[j++]); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
push_str(xstrdup(idmapper));
|
|
|
|
push_ul(ppid);
|
|
|
|
if ((int)inner == -1) {
|
|
|
|
/*
|
2022-08-24 12:52:55 +01:00
|
|
|
* If we don't have a "single" mapping, then we can just use map
|
|
|
|
* directly, starting inner IDs from zero for an auto mapping
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
*/
|
2022-08-24 12:52:55 +01:00
|
|
|
push_ul(map->inner + 1 ? map->inner : 0);
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
push_ul(map->outer);
|
|
|
|
push_ul(map->count);
|
|
|
|
push_str(NULL);
|
|
|
|
|
|
|
|
execvp(idmapper, argv);
|
|
|
|
errexec(idmapper);
|
|
|
|
}
|
|
|
|
|
2022-08-24 12:52:55 +01:00
|
|
|
/*
|
|
|
|
* Start inner IDs from zero for an auto mapping; otherwise, if the two
|
|
|
|
* fixed mappings overlap, remove an ID from map
|
|
|
|
*/
|
|
|
|
if (map->inner + 1 == 0)
|
|
|
|
map->inner = 0;
|
|
|
|
else if ((outer >= map->outer && outer <= map->outer + map->count) ||
|
|
|
|
(inner >= map->inner && inner <= map->inner + map->count))
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
map->count--;
|
|
|
|
|
|
|
|
/* Determine where the splits between lo, mid, and hi will be */
|
|
|
|
outer_offset = min(outer > map->outer ? outer - map->outer : 0,
|
|
|
|
map->count);
|
|
|
|
inner_offset = min(inner > map->inner ? inner - map->inner : 0,
|
|
|
|
map->count);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In the worst case, we need three mappings:
|
|
|
|
* From the bottom of map to either inner or outer
|
|
|
|
*/
|
|
|
|
lo.outer = map->outer;
|
|
|
|
lo.inner = map->inner;
|
|
|
|
lo.count = min(inner_offset, outer_offset);
|
|
|
|
|
|
|
|
/* From the lower of inner or outer to the higher */
|
|
|
|
mid.outer = lo.outer + lo.count;
|
|
|
|
mid.outer += mid.outer == outer;
|
|
|
|
mid.inner = lo.inner + lo.count;
|
|
|
|
mid.inner += mid.inner == inner;
|
|
|
|
mid.count = abs_diff(outer_offset, inner_offset);
|
|
|
|
|
|
|
|
/* And from the higher of inner or outer to the end of the map */
|
|
|
|
hi.outer = mid.outer + mid.count;
|
|
|
|
hi.outer += hi.outer == outer;
|
|
|
|
hi.inner = mid.inner + mid.count;
|
|
|
|
hi.inner += hi.inner == inner;
|
|
|
|
hi.count = map->count - lo.count - mid.count;
|
|
|
|
|
|
|
|
push_ul(inner);
|
|
|
|
push_ul(outer);
|
|
|
|
push_str("1");
|
|
|
|
/* new[gu]idmap doesn't like zero-length mappings, so skip them */
|
|
|
|
if (lo.count) {
|
|
|
|
push_ul(lo.inner);
|
|
|
|
push_ul(lo.outer);
|
|
|
|
push_ul(lo.count);
|
|
|
|
}
|
|
|
|
if (mid.count) {
|
|
|
|
push_ul(mid.inner);
|
|
|
|
push_ul(mid.outer);
|
|
|
|
push_ul(mid.count);
|
|
|
|
}
|
|
|
|
if (hi.count) {
|
|
|
|
push_ul(hi.inner);
|
|
|
|
push_ul(hi.outer);
|
|
|
|
push_ul(hi.count);
|
|
|
|
}
|
|
|
|
push_str(NULL);
|
|
|
|
execvp(idmapper, argv);
|
|
|
|
errexec(idmapper);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* map_ids_from_child() - Set up a new uid/gid map
|
|
|
|
* @fd: The eventfd to wait on
|
|
|
|
* @mapuser: The user to map the current user to (or -1)
|
|
|
|
* @usermap: The range of UIDs to map (or %NULL)
|
|
|
|
* @mapgroup: The group to map the current group to (or -1)
|
|
|
|
* @groupmap: The range of GIDs to map (or %NULL)
|
|
|
|
*
|
|
|
|
* fork_and_wait() for our parent to call sync_with_child() on @fd. Upon
|
|
|
|
* recieving the go-ahead, use newuidmap and newgidmap to set the uid/gid map
|
|
|
|
* for our parent's PID.
|
|
|
|
*
|
|
|
|
* Return: The pid of the child.
|
|
|
|
*/
|
|
|
|
static pid_t map_ids_from_child(int *fd, uid_t mapuser,
|
|
|
|
struct map_range *usermap, gid_t mapgroup,
|
|
|
|
struct map_range *groupmap)
|
|
|
|
{
|
|
|
|
pid_t child, pid = 0;
|
|
|
|
pid_t ppid = getpid();
|
|
|
|
|
|
|
|
child = fork_and_wait(fd);
|
|
|
|
if (child)
|
|
|
|
return child;
|
|
|
|
|
|
|
|
/* Avoid forking more than we need to */
|
|
|
|
if (usermap && groupmap) {
|
|
|
|
pid = fork();
|
|
|
|
if (pid < 0)
|
|
|
|
err(EXIT_FAILURE, _("fork failed"));
|
|
|
|
if (pid)
|
|
|
|
waitchild(pid);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!pid && usermap)
|
|
|
|
map_ids("newuidmap", ppid, geteuid(), mapuser, usermap);
|
|
|
|
if (groupmap)
|
|
|
|
map_ids("newgidmap", ppid, getegid(), mapgroup, groupmap);
|
|
|
|
exit(EXIT_SUCCESS);
|
|
|
|
}
|
|
|
|
|
2017-06-20 20:30:29 +02:00
|
|
|
static void __attribute__((__noreturn__)) usage(void)
|
2009-10-04 02:42:08 +07:00
|
|
|
{
|
2017-06-20 20:30:29 +02:00
|
|
|
FILE *out = stdout;
|
2009-10-04 02:42:08 +07:00
|
|
|
|
2011-10-30 14:35:06 +01:00
|
|
|
fputs(USAGE_HEADER, out);
|
2017-01-09 12:48:14 +01:00
|
|
|
fprintf(out, _(" %s [options] [<program> [<argument>...]]\n"),
|
2014-09-22 22:15:39 +02:00
|
|
|
program_invocation_short_name);
|
2009-10-04 02:42:08 +07:00
|
|
|
|
2014-12-22 22:57:17 +01:00
|
|
|
fputs(USAGE_SEPARATOR, out);
|
|
|
|
fputs(_("Run a program with some namespaces unshared from the parent.\n"), out);
|
|
|
|
|
2011-10-30 14:35:06 +01:00
|
|
|
fputs(USAGE_OPTIONS, out);
|
2015-04-09 11:34:02 +02:00
|
|
|
fputs(_(" -m, --mount[=<file>] unshare mounts namespace\n"), out);
|
|
|
|
fputs(_(" -u, --uts[=<file>] unshare UTS namespace (hostname etc)\n"), out);
|
|
|
|
fputs(_(" -i, --ipc[=<file>] unshare System V IPC namespace\n"), out);
|
|
|
|
fputs(_(" -n, --net[=<file>] unshare network namespace\n"), out);
|
|
|
|
fputs(_(" -p, --pid[=<file>] unshare pid namespace\n"), out);
|
|
|
|
fputs(_(" -U, --user[=<file>] unshare user namespace\n"), out);
|
2016-03-02 17:53:42 -08:00
|
|
|
fputs(_(" -C, --cgroup[=<file>] unshare cgroup namespace\n"), out);
|
2020-03-09 12:20:51 +00:00
|
|
|
fputs(_(" -T, --time[=<file>] unshare time namespace\n"), out);
|
2018-09-11 12:43:03 +02:00
|
|
|
fputs(USAGE_SEPARATOR, out);
|
2013-07-03 12:28:16 +02:00
|
|
|
fputs(_(" -f, --fork fork before launching <program>\n"), out);
|
2020-04-15 23:05:16 +10:00
|
|
|
fputs(_(" --map-user=<uid>|<name> map current user to uid (implies --user)\n"), out);
|
|
|
|
fputs(_(" --map-group=<gid>|<name> map current group to gid (implies --user)\n"), out);
|
2013-12-27 22:14:48 +01:00
|
|
|
fputs(_(" -r, --map-root-user map current user to root (implies --user)\n"), out);
|
2019-01-17 14:17:54 -08:00
|
|
|
fputs(_(" -c, --map-current-user map current user to itself (implies --user)\n"), out);
|
2021-11-24 13:26:17 -05:00
|
|
|
fputs(_(" --map-auto map users and groups automatically (implies --user)\n"), out);
|
2023-01-10 13:58:58 +00:00
|
|
|
fputs(_(" --map-users=<inneruid>:<outeruid>:<count>\n"
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
" map count users from outeruid to inneruid (implies --user)\n"), out);
|
2023-01-10 13:58:58 +00:00
|
|
|
fputs(_(" --map-groups=<innergid>:<outergid>:<count>\n"
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
" map count groups from outergid to innergid (implies --user)\n"), out);
|
2018-09-11 12:43:03 +02:00
|
|
|
fputs(USAGE_SEPARATOR, out);
|
|
|
|
fputs(_(" --kill-child[=<signame>] when dying, kill the forked child (implies --fork)\n"
|
|
|
|
" defaults to SIGKILL\n"), out);
|
|
|
|
fputs(_(" --mount-proc[=<dir>] mount proc filesystem first (implies --mount)\n"), out);
|
|
|
|
fputs(_(" --propagation slave|shared|private|unchanged\n"
|
2015-03-18 15:13:15 +01:00
|
|
|
" modify mount propagation in mount namespace\n"), out);
|
2018-09-11 12:43:03 +02:00
|
|
|
fputs(_(" --setgroups allow|deny control the setgroups syscall in user namespaces\n"), out);
|
2019-01-17 14:16:54 -08:00
|
|
|
fputs(_(" --keep-caps retain capabilities granted in user namespaces\n"), out);
|
2018-10-05 13:09:29 +02:00
|
|
|
fputs(USAGE_SEPARATOR, out);
|
2020-03-06 12:03:50 +01:00
|
|
|
fputs(_(" -R, --root=<dir> run the command with root directory set to <dir>\n"), out);
|
|
|
|
fputs(_(" -w, --wd=<dir> change working directory to <dir>\n"), out);
|
|
|
|
fputs(_(" -S, --setuid <uid> set uid in entered namespace\n"), out);
|
|
|
|
fputs(_(" -G, --setgid <gid> set gid in entered namespace\n"), out);
|
unshare: support the time namespace
This adds support to unshare for time namespaces. With the newly added
options '-t, --time' and '--monotonic' and '--boottime' it is now
possible to change CLOCK_MONOTONIC and CLOCK_BOOTTIME in a new time
namespace.
The time namespace has been merged in kernel version 5.6 and an easy way
to test it is using CLOCK_BOOTTIME and the uptime command:
# uptime
11:08:26 up 20:28, 1 user, load average: 0.00, 0.00, 0.00
# ./unshare --fork --time --boottime 100000000 uptime
11:08:29 up 1158 days, 6:15, 1 user, load average: 0.00, 0.00, 0.00
Signed-off-by: Adrian Reber <areber@redhat.com>
2020-03-06 12:05:00 +01:00
|
|
|
fputs(_(" --monotonic <offset> set clock monotonic offset (seconds) in time namespaces\n"), out);
|
|
|
|
fputs(_(" --boottime <offset> set clock boottime offset (seconds) in time namespaces\n"), out);
|
2009-10-04 02:42:08 +07:00
|
|
|
|
2011-10-30 14:35:06 +01:00
|
|
|
fputs(USAGE_SEPARATOR, out);
|
2017-06-29 15:52:16 +02:00
|
|
|
printf(USAGE_HELP_OPTIONS(27));
|
|
|
|
printf(USAGE_MAN_TAIL("unshare(1)"));
|
2011-10-30 14:35:06 +01:00
|
|
|
|
2017-06-20 20:30:29 +02:00
|
|
|
exit(EXIT_SUCCESS);
|
2009-10-04 02:42:08 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
int main(int argc, char *argv[])
|
|
|
|
{
|
2013-07-03 12:28:16 +02:00
|
|
|
enum {
|
2015-01-08 11:51:58 +01:00
|
|
|
OPT_MOUNTPROC = CHAR_MAX + 1,
|
2015-03-18 15:13:15 +01:00
|
|
|
OPT_PROPAGATION,
|
2017-09-19 20:39:00 +02:00
|
|
|
OPT_SETGROUPS,
|
2018-10-05 13:09:29 +02:00
|
|
|
OPT_KILLCHILD,
|
2019-01-17 14:16:54 -08:00
|
|
|
OPT_KEEPCAPS,
|
unshare: support the time namespace
This adds support to unshare for time namespaces. With the newly added
options '-t, --time' and '--monotonic' and '--boottime' it is now
possible to change CLOCK_MONOTONIC and CLOCK_BOOTTIME in a new time
namespace.
The time namespace has been merged in kernel version 5.6 and an easy way
to test it is using CLOCK_BOOTTIME and the uptime command:
# uptime
11:08:26 up 20:28, 1 user, load average: 0.00, 0.00, 0.00
# ./unshare --fork --time --boottime 100000000 uptime
11:08:29 up 1158 days, 6:15, 1 user, load average: 0.00, 0.00, 0.00
Signed-off-by: Adrian Reber <areber@redhat.com>
2020-03-06 12:05:00 +01:00
|
|
|
OPT_MONOTONIC,
|
|
|
|
OPT_BOOTTIME,
|
2020-01-04 09:11:30 +11:00
|
|
|
OPT_MAPUSER,
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
OPT_MAPUSERS,
|
2020-01-04 09:11:30 +11:00
|
|
|
OPT_MAPGROUP,
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
OPT_MAPGROUPS,
|
2021-11-24 13:26:17 -05:00
|
|
|
OPT_MAPAUTO,
|
2013-07-03 12:28:16 +02:00
|
|
|
};
|
2011-03-03 15:00:30 +01:00
|
|
|
static const struct option longopts[] = {
|
2017-02-11 20:23:26 +00:00
|
|
|
{ "help", no_argument, NULL, 'h' },
|
|
|
|
{ "version", no_argument, NULL, 'V' },
|
|
|
|
|
|
|
|
{ "mount", optional_argument, NULL, 'm' },
|
|
|
|
{ "uts", optional_argument, NULL, 'u' },
|
|
|
|
{ "ipc", optional_argument, NULL, 'i' },
|
|
|
|
{ "net", optional_argument, NULL, 'n' },
|
|
|
|
{ "pid", optional_argument, NULL, 'p' },
|
|
|
|
{ "user", optional_argument, NULL, 'U' },
|
|
|
|
{ "cgroup", optional_argument, NULL, 'C' },
|
2020-03-09 12:20:51 +00:00
|
|
|
{ "time", optional_argument, NULL, 'T' },
|
2017-02-11 20:23:26 +00:00
|
|
|
|
|
|
|
{ "fork", no_argument, NULL, 'f' },
|
2017-10-14 04:31:57 +02:00
|
|
|
{ "kill-child", optional_argument, NULL, OPT_KILLCHILD },
|
2017-02-11 20:23:26 +00:00
|
|
|
{ "mount-proc", optional_argument, NULL, OPT_MOUNTPROC },
|
2020-01-04 09:11:30 +11:00
|
|
|
{ "map-user", required_argument, NULL, OPT_MAPUSER },
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
{ "map-users", required_argument, NULL, OPT_MAPUSERS },
|
2020-01-04 09:11:30 +11:00
|
|
|
{ "map-group", required_argument, NULL, OPT_MAPGROUP },
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
{ "map-groups", required_argument, NULL, OPT_MAPGROUPS },
|
2017-02-11 20:23:26 +00:00
|
|
|
{ "map-root-user", no_argument, NULL, 'r' },
|
2019-01-17 14:17:54 -08:00
|
|
|
{ "map-current-user", no_argument, NULL, 'c' },
|
2021-11-24 13:26:17 -05:00
|
|
|
{ "map-auto", no_argument, NULL, OPT_MAPAUTO },
|
2017-02-11 20:23:26 +00:00
|
|
|
{ "propagation", required_argument, NULL, OPT_PROPAGATION },
|
|
|
|
{ "setgroups", required_argument, NULL, OPT_SETGROUPS },
|
2019-01-17 14:16:54 -08:00
|
|
|
{ "keep-caps", no_argument, NULL, OPT_KEEPCAPS },
|
2018-10-05 13:09:30 +02:00
|
|
|
{ "setuid", required_argument, NULL, 'S' },
|
|
|
|
{ "setgid", required_argument, NULL, 'G' },
|
2018-10-05 13:09:29 +02:00
|
|
|
{ "root", required_argument, NULL, 'R' },
|
|
|
|
{ "wd", required_argument, NULL, 'w' },
|
unshare: support the time namespace
This adds support to unshare for time namespaces. With the newly added
options '-t, --time' and '--monotonic' and '--boottime' it is now
possible to change CLOCK_MONOTONIC and CLOCK_BOOTTIME in a new time
namespace.
The time namespace has been merged in kernel version 5.6 and an easy way
to test it is using CLOCK_BOOTTIME and the uptime command:
# uptime
11:08:26 up 20:28, 1 user, load average: 0.00, 0.00, 0.00
# ./unshare --fork --time --boottime 100000000 uptime
11:08:29 up 1158 days, 6:15, 1 user, load average: 0.00, 0.00, 0.00
Signed-off-by: Adrian Reber <areber@redhat.com>
2020-03-06 12:05:00 +01:00
|
|
|
{ "monotonic", required_argument, NULL, OPT_MONOTONIC },
|
|
|
|
{ "boottime", required_argument, NULL, OPT_BOOTTIME },
|
2017-02-11 20:23:26 +00:00
|
|
|
{ NULL, 0, NULL, 0 }
|
2009-10-04 02:42:08 +07:00
|
|
|
};
|
|
|
|
|
2015-01-08 11:51:58 +01:00
|
|
|
int setgrpcmd = SETGROUPS_NONE;
|
2009-10-04 02:42:08 +07:00
|
|
|
int unshare_flags = 0;
|
2020-01-04 09:11:30 +11:00
|
|
|
int c, forkit = 0;
|
|
|
|
uid_t mapuser = -1;
|
|
|
|
gid_t mapgroup = -1;
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
struct map_range *usermap = NULL;
|
|
|
|
struct map_range *groupmap = NULL;
|
2017-10-14 04:31:57 +02:00
|
|
|
int kill_child_signo = 0; /* 0 means --kill-child was not used */
|
2013-07-03 12:28:16 +02:00
|
|
|
const char *procmnt = NULL;
|
2018-10-05 13:09:29 +02:00
|
|
|
const char *newroot = NULL;
|
|
|
|
const char *newdir = NULL;
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
pid_t pid_bind = 0, pid_idmap = 0;
|
2022-02-28 11:02:04 +01:00
|
|
|
pid_t pid = 0;
|
2022-03-31 10:55:30 +02:00
|
|
|
#ifdef UL_HAVE_PIDFD
|
2022-03-07 20:41:24 -08:00
|
|
|
int fd_parent_pid = -1;
|
2022-03-31 10:55:30 +02:00
|
|
|
#endif
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
int fd_idmap, fd_bind = -1;
|
2022-01-09 14:01:21 -08:00
|
|
|
sigset_t sigset, oldsigset;
|
2015-04-09 11:48:07 +02:00
|
|
|
int status;
|
2015-03-18 15:13:15 +01:00
|
|
|
unsigned long propagation = UNSHARE_PROPAGATION_DEFAULT;
|
2018-10-05 13:09:30 +02:00
|
|
|
int force_uid = 0, force_gid = 0;
|
|
|
|
uid_t uid = 0, real_euid = geteuid();
|
|
|
|
gid_t gid = 0, real_egid = getegid();
|
2019-01-17 14:16:54 -08:00
|
|
|
int keepcaps = 0;
|
2023-06-30 22:36:06 +02:00
|
|
|
int64_t monotonic = 0;
|
|
|
|
int64_t boottime = 0;
|
unshare: support the time namespace
This adds support to unshare for time namespaces. With the newly added
options '-t, --time' and '--monotonic' and '--boottime' it is now
possible to change CLOCK_MONOTONIC and CLOCK_BOOTTIME in a new time
namespace.
The time namespace has been merged in kernel version 5.6 and an easy way
to test it is using CLOCK_BOOTTIME and the uptime command:
# uptime
11:08:26 up 20:28, 1 user, load average: 0.00, 0.00, 0.00
# ./unshare --fork --time --boottime 100000000 uptime
11:08:29 up 1158 days, 6:15, 1 user, load average: 0.00, 0.00, 0.00
Signed-off-by: Adrian Reber <areber@redhat.com>
2020-03-06 12:05:00 +01:00
|
|
|
int force_monotonic = 0;
|
|
|
|
int force_boottime = 0;
|
2009-10-04 02:42:08 +07:00
|
|
|
|
2013-10-18 10:19:15 +02:00
|
|
|
setlocale(LC_ALL, "");
|
2009-10-04 02:42:08 +07:00
|
|
|
bindtextdomain(PACKAGE, LOCALEDIR);
|
|
|
|
textdomain(PACKAGE);
|
2019-04-16 15:14:13 +02:00
|
|
|
close_stdout_atexit();
|
2009-10-04 02:42:08 +07:00
|
|
|
|
2020-03-09 12:20:51 +00:00
|
|
|
while ((c = getopt_long(argc, argv, "+fhVmuinpCTUrR:w:S:G:c", longopts, NULL)) != -1) {
|
2013-02-28 23:03:02 -05:00
|
|
|
switch (c) {
|
2013-06-27 20:04:58 -04:00
|
|
|
case 'f':
|
|
|
|
forkit = 1;
|
|
|
|
break;
|
2009-10-04 02:42:08 +07:00
|
|
|
case 'm':
|
2013-01-17 13:00:44 +01:00
|
|
|
unshare_flags |= CLONE_NEWNS;
|
2015-04-09 11:34:02 +02:00
|
|
|
if (optarg)
|
|
|
|
set_ns_target(CLONE_NEWNS, optarg);
|
2009-10-04 02:42:08 +07:00
|
|
|
break;
|
|
|
|
case 'u':
|
2013-01-17 13:00:44 +01:00
|
|
|
unshare_flags |= CLONE_NEWUTS;
|
2015-04-09 11:34:02 +02:00
|
|
|
if (optarg)
|
|
|
|
set_ns_target(CLONE_NEWUTS, optarg);
|
2009-10-04 02:42:08 +07:00
|
|
|
break;
|
|
|
|
case 'i':
|
2013-01-17 13:00:44 +01:00
|
|
|
unshare_flags |= CLONE_NEWIPC;
|
2015-04-09 11:34:02 +02:00
|
|
|
if (optarg)
|
|
|
|
set_ns_target(CLONE_NEWIPC, optarg);
|
2009-10-04 02:42:08 +07:00
|
|
|
break;
|
|
|
|
case 'n':
|
2013-01-17 13:00:44 +01:00
|
|
|
unshare_flags |= CLONE_NEWNET;
|
2015-04-09 11:34:02 +02:00
|
|
|
if (optarg)
|
|
|
|
set_ns_target(CLONE_NEWNET, optarg);
|
2009-10-04 02:42:08 +07:00
|
|
|
break;
|
2013-01-11 14:53:34 -08:00
|
|
|
case 'p':
|
|
|
|
unshare_flags |= CLONE_NEWPID;
|
2015-04-09 11:34:02 +02:00
|
|
|
if (optarg)
|
|
|
|
set_ns_target(CLONE_NEWPID, optarg);
|
2013-01-11 14:53:34 -08:00
|
|
|
break;
|
|
|
|
case 'U':
|
|
|
|
unshare_flags |= CLONE_NEWUSER;
|
2015-04-09 11:34:02 +02:00
|
|
|
if (optarg)
|
|
|
|
set_ns_target(CLONE_NEWUSER, optarg);
|
2013-01-11 14:53:34 -08:00
|
|
|
break;
|
2016-03-02 17:53:42 -08:00
|
|
|
case 'C':
|
|
|
|
unshare_flags |= CLONE_NEWCGROUP;
|
|
|
|
if (optarg)
|
|
|
|
set_ns_target(CLONE_NEWCGROUP, optarg);
|
|
|
|
break;
|
2020-03-09 12:20:51 +00:00
|
|
|
case 'T':
|
unshare: support the time namespace
This adds support to unshare for time namespaces. With the newly added
options '-t, --time' and '--monotonic' and '--boottime' it is now
possible to change CLOCK_MONOTONIC and CLOCK_BOOTTIME in a new time
namespace.
The time namespace has been merged in kernel version 5.6 and an easy way
to test it is using CLOCK_BOOTTIME and the uptime command:
# uptime
11:08:26 up 20:28, 1 user, load average: 0.00, 0.00, 0.00
# ./unshare --fork --time --boottime 100000000 uptime
11:08:29 up 1158 days, 6:15, 1 user, load average: 0.00, 0.00, 0.00
Signed-off-by: Adrian Reber <areber@redhat.com>
2020-03-06 12:05:00 +01:00
|
|
|
unshare_flags |= CLONE_NEWTIME;
|
|
|
|
if (optarg)
|
|
|
|
set_ns_target(CLONE_NEWTIME, optarg);
|
|
|
|
break;
|
2013-07-03 12:28:16 +02:00
|
|
|
case OPT_MOUNTPROC:
|
|
|
|
unshare_flags |= CLONE_NEWNS;
|
|
|
|
procmnt = optarg ? optarg : "/proc";
|
|
|
|
break;
|
2020-01-04 09:11:30 +11:00
|
|
|
case OPT_MAPUSER:
|
|
|
|
unshare_flags |= CLONE_NEWUSER;
|
2020-04-15 23:05:16 +10:00
|
|
|
mapuser = get_user(optarg, _("failed to parse uid"));
|
2020-01-04 09:11:30 +11:00
|
|
|
break;
|
|
|
|
case OPT_MAPGROUP:
|
|
|
|
unshare_flags |= CLONE_NEWUSER;
|
2020-04-15 23:05:16 +10:00
|
|
|
mapgroup = get_group(optarg, _("failed to parse gid"));
|
2020-01-04 09:11:30 +11:00
|
|
|
break;
|
2013-12-27 22:14:48 +01:00
|
|
|
case 'r':
|
2019-01-17 14:17:54 -08:00
|
|
|
unshare_flags |= CLONE_NEWUSER;
|
2020-01-04 09:11:30 +11:00
|
|
|
mapuser = 0;
|
|
|
|
mapgroup = 0;
|
2019-01-17 14:17:54 -08:00
|
|
|
break;
|
|
|
|
case 'c':
|
2013-12-27 22:14:48 +01:00
|
|
|
unshare_flags |= CLONE_NEWUSER;
|
2020-01-04 09:11:30 +11:00
|
|
|
mapuser = real_euid;
|
|
|
|
mapgroup = real_egid;
|
2013-12-27 22:14:48 +01:00
|
|
|
break;
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
case OPT_MAPUSERS:
|
|
|
|
unshare_flags |= CLONE_NEWUSER;
|
2021-11-24 13:26:17 -05:00
|
|
|
if (!strcmp(optarg, "auto"))
|
|
|
|
usermap = read_subid_range(_PATH_SUBUID, real_euid);
|
|
|
|
else
|
|
|
|
usermap = get_map_range(optarg);
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
break;
|
|
|
|
case OPT_MAPGROUPS:
|
|
|
|
unshare_flags |= CLONE_NEWUSER;
|
2021-11-24 13:26:17 -05:00
|
|
|
if (!strcmp(optarg, "auto"))
|
2022-07-08 12:09:13 -04:00
|
|
|
groupmap = read_subid_range(_PATH_SUBGID, real_euid);
|
2021-11-24 13:26:17 -05:00
|
|
|
else
|
|
|
|
groupmap = get_map_range(optarg);
|
|
|
|
break;
|
|
|
|
case OPT_MAPAUTO:
|
|
|
|
unshare_flags |= CLONE_NEWUSER;
|
|
|
|
usermap = read_subid_range(_PATH_SUBUID, real_euid);
|
2022-07-08 12:09:13 -04:00
|
|
|
groupmap = read_subid_range(_PATH_SUBGID, real_euid);
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
break;
|
2015-01-08 11:51:58 +01:00
|
|
|
case OPT_SETGROUPS:
|
|
|
|
setgrpcmd = setgroups_str2id(optarg);
|
|
|
|
break;
|
2015-03-18 15:13:15 +01:00
|
|
|
case OPT_PROPAGATION:
|
|
|
|
propagation = parse_propagation(optarg);
|
|
|
|
break;
|
2017-09-19 20:39:00 +02:00
|
|
|
case OPT_KILLCHILD:
|
|
|
|
forkit = 1;
|
2017-10-14 04:31:57 +02:00
|
|
|
if (optarg) {
|
|
|
|
if ((kill_child_signo = signame_to_signum(optarg)) < 0)
|
|
|
|
errx(EXIT_FAILURE, _("unknown signal: %s"),
|
|
|
|
optarg);
|
|
|
|
} else {
|
|
|
|
kill_child_signo = SIGKILL;
|
|
|
|
}
|
2017-09-19 20:39:00 +02:00
|
|
|
break;
|
2019-01-17 14:16:54 -08:00
|
|
|
case OPT_KEEPCAPS:
|
|
|
|
keepcaps = 1;
|
|
|
|
cap_last_cap(); /* Force last cap to be cached before we fork. */
|
|
|
|
break;
|
2018-10-05 13:09:30 +02:00
|
|
|
case 'S':
|
|
|
|
uid = strtoul_or_err(optarg, _("failed to parse uid"));
|
|
|
|
force_uid = 1;
|
|
|
|
break;
|
|
|
|
case 'G':
|
|
|
|
gid = strtoul_or_err(optarg, _("failed to parse gid"));
|
|
|
|
force_gid = 1;
|
|
|
|
break;
|
2018-10-05 13:09:29 +02:00
|
|
|
case 'R':
|
|
|
|
newroot = optarg;
|
|
|
|
break;
|
|
|
|
case 'w':
|
|
|
|
newdir = optarg;
|
|
|
|
break;
|
unshare: support the time namespace
This adds support to unshare for time namespaces. With the newly added
options '-t, --time' and '--monotonic' and '--boottime' it is now
possible to change CLOCK_MONOTONIC and CLOCK_BOOTTIME in a new time
namespace.
The time namespace has been merged in kernel version 5.6 and an easy way
to test it is using CLOCK_BOOTTIME and the uptime command:
# uptime
11:08:26 up 20:28, 1 user, load average: 0.00, 0.00, 0.00
# ./unshare --fork --time --boottime 100000000 uptime
11:08:29 up 1158 days, 6:15, 1 user, load average: 0.00, 0.00, 0.00
Signed-off-by: Adrian Reber <areber@redhat.com>
2020-03-06 12:05:00 +01:00
|
|
|
case OPT_MONOTONIC:
|
2023-06-30 22:36:06 +02:00
|
|
|
monotonic = strtos64_or_err(optarg, _("failed to parse monotonic offset"));
|
unshare: support the time namespace
This adds support to unshare for time namespaces. With the newly added
options '-t, --time' and '--monotonic' and '--boottime' it is now
possible to change CLOCK_MONOTONIC and CLOCK_BOOTTIME in a new time
namespace.
The time namespace has been merged in kernel version 5.6 and an easy way
to test it is using CLOCK_BOOTTIME and the uptime command:
# uptime
11:08:26 up 20:28, 1 user, load average: 0.00, 0.00, 0.00
# ./unshare --fork --time --boottime 100000000 uptime
11:08:29 up 1158 days, 6:15, 1 user, load average: 0.00, 0.00, 0.00
Signed-off-by: Adrian Reber <areber@redhat.com>
2020-03-06 12:05:00 +01:00
|
|
|
force_monotonic = 1;
|
|
|
|
break;
|
|
|
|
case OPT_BOOTTIME:
|
2023-06-30 22:36:06 +02:00
|
|
|
boottime = strtos64_or_err(optarg, _("failed to parse boottime offset"));
|
unshare: support the time namespace
This adds support to unshare for time namespaces. With the newly added
options '-t, --time' and '--monotonic' and '--boottime' it is now
possible to change CLOCK_MONOTONIC and CLOCK_BOOTTIME in a new time
namespace.
The time namespace has been merged in kernel version 5.6 and an easy way
to test it is using CLOCK_BOOTTIME and the uptime command:
# uptime
11:08:26 up 20:28, 1 user, load average: 0.00, 0.00, 0.00
# ./unshare --fork --time --boottime 100000000 uptime
11:08:29 up 1158 days, 6:15, 1 user, load average: 0.00, 0.00, 0.00
Signed-off-by: Adrian Reber <areber@redhat.com>
2020-03-06 12:05:00 +01:00
|
|
|
force_boottime = 1;
|
|
|
|
break;
|
2019-04-16 15:14:13 +02:00
|
|
|
|
|
|
|
case 'h':
|
|
|
|
usage();
|
|
|
|
case 'V':
|
|
|
|
print_version(EXIT_SUCCESS);
|
2009-10-04 02:42:08 +07:00
|
|
|
default:
|
2016-12-19 13:13:34 +01:00
|
|
|
errtryhelp(EXIT_FAILURE);
|
2009-10-04 02:42:08 +07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
unshare: support the time namespace
This adds support to unshare for time namespaces. With the newly added
options '-t, --time' and '--monotonic' and '--boottime' it is now
possible to change CLOCK_MONOTONIC and CLOCK_BOOTTIME in a new time
namespace.
The time namespace has been merged in kernel version 5.6 and an easy way
to test it is using CLOCK_BOOTTIME and the uptime command:
# uptime
11:08:26 up 20:28, 1 user, load average: 0.00, 0.00, 0.00
# ./unshare --fork --time --boottime 100000000 uptime
11:08:29 up 1158 days, 6:15, 1 user, load average: 0.00, 0.00, 0.00
Signed-off-by: Adrian Reber <areber@redhat.com>
2020-03-06 12:05:00 +01:00
|
|
|
if ((force_monotonic || force_boottime) && !(unshare_flags & CLONE_NEWTIME))
|
|
|
|
errx(EXIT_FAILURE, _("options --monotonic and --boottime require "
|
2023-06-25 13:28:19 +02:00
|
|
|
"unsharing of a time namespace (-T)"));
|
unshare: support the time namespace
This adds support to unshare for time namespaces. With the newly added
options '-t, --time' and '--monotonic' and '--boottime' it is now
possible to change CLOCK_MONOTONIC and CLOCK_BOOTTIME in a new time
namespace.
The time namespace has been merged in kernel version 5.6 and an easy way
to test it is using CLOCK_BOOTTIME and the uptime command:
# uptime
11:08:26 up 20:28, 1 user, load average: 0.00, 0.00, 0.00
# ./unshare --fork --time --boottime 100000000 uptime
11:08:29 up 1158 days, 6:15, 1 user, load average: 0.00, 0.00, 0.00
Signed-off-by: Adrian Reber <areber@redhat.com>
2020-03-06 12:05:00 +01:00
|
|
|
|
2021-11-15 14:44:02 +01:00
|
|
|
/* clear any inherited settings */
|
|
|
|
signal(SIGCHLD, SIG_DFL);
|
|
|
|
|
2015-04-09 11:48:07 +02:00
|
|
|
if (npersists && (unshare_flags & CLONE_NEWNS))
|
2021-11-24 13:26:15 -05:00
|
|
|
pid_bind = bind_ns_files_from_child(&fd_bind);
|
2015-04-09 11:48:07 +02:00
|
|
|
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
if (usermap || groupmap)
|
|
|
|
pid_idmap = map_ids_from_child(&fd_idmap, mapuser, usermap,
|
|
|
|
mapgroup, groupmap);
|
|
|
|
|
2013-02-28 23:03:02 -05:00
|
|
|
if (-1 == unshare(unshare_flags))
|
2009-10-04 02:42:08 +07:00
|
|
|
err(EXIT_FAILURE, _("unshare failed"));
|
|
|
|
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
/* Tell child we've called unshare() */
|
|
|
|
if (usermap || groupmap)
|
|
|
|
sync_with_child(pid_idmap, fd_idmap);
|
|
|
|
|
2020-04-15 23:16:53 +02:00
|
|
|
if (force_boottime)
|
|
|
|
settime(boottime, CLOCK_BOOTTIME);
|
|
|
|
|
|
|
|
if (force_monotonic)
|
|
|
|
settime(monotonic, CLOCK_MONOTONIC);
|
|
|
|
|
|
|
|
if (forkit) {
|
2022-01-09 14:01:21 -08:00
|
|
|
if (sigemptyset(&sigset) != 0 ||
|
|
|
|
sigaddset(&sigset, SIGINT) != 0 ||
|
|
|
|
sigaddset(&sigset, SIGTERM) != 0 ||
|
|
|
|
sigprocmask(SIG_BLOCK, &sigset, &oldsigset) != 0)
|
|
|
|
err(EXIT_FAILURE, _("sigprocmask block failed"));
|
2022-03-31 10:55:30 +02:00
|
|
|
#ifdef UL_HAVE_PIDFD
|
|
|
|
if (kill_child_signo != 0) {
|
|
|
|
/* make a connection to the original process (parent) */
|
|
|
|
fd_parent_pid = pidfd_open(getpid(), 0);
|
|
|
|
if (0 > fd_parent_pid)
|
|
|
|
err(EXIT_FAILURE, _("pidfd_open failed"));
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/* force child forking before mountspace binding so
|
|
|
|
* pid_for_children is populated */
|
2020-04-15 23:16:53 +02:00
|
|
|
pid = fork();
|
|
|
|
|
|
|
|
switch(pid) {
|
|
|
|
case -1:
|
|
|
|
err(EXIT_FAILURE, _("fork failed"));
|
|
|
|
case 0: /* child */
|
2022-01-09 14:01:21 -08:00
|
|
|
if (sigprocmask(SIG_SETMASK, &oldsigset, NULL))
|
|
|
|
err(EXIT_FAILURE,
|
|
|
|
_("sigprocmask restore failed"));
|
2021-11-24 13:26:15 -05:00
|
|
|
if (npersists && (unshare_flags & CLONE_NEWNS))
|
|
|
|
close(fd_bind);
|
2020-04-15 23:16:53 +02:00
|
|
|
break;
|
|
|
|
default: /* parent */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (npersists && (pid || !forkit)) {
|
|
|
|
/* run in parent */
|
2021-11-24 13:26:15 -05:00
|
|
|
if (pid_bind && (unshare_flags & CLONE_NEWNS))
|
|
|
|
sync_with_child(pid_bind, fd_bind);
|
|
|
|
else
|
2015-04-09 11:48:07 +02:00
|
|
|
/* simple way, just bind */
|
|
|
|
bind_ns_files(getpid());
|
|
|
|
}
|
|
|
|
|
2020-04-15 23:16:53 +02:00
|
|
|
if (pid) {
|
|
|
|
if (waitpid(pid, &status, 0) == -1)
|
|
|
|
err(EXIT_FAILURE, _("waitpid failed"));
|
2020-07-02 16:26:39 +02:00
|
|
|
|
2020-04-15 23:16:53 +02:00
|
|
|
if (WIFEXITED(status))
|
|
|
|
return WEXITSTATUS(status);
|
2022-01-09 14:01:21 -08:00
|
|
|
if (WIFSIGNALED(status)) {
|
|
|
|
|
|
|
|
/* Ensure the signal that terminated the child will
|
|
|
|
* also terminate the parent. */
|
|
|
|
|
|
|
|
int termsig = WTERMSIG(status);
|
|
|
|
|
|
|
|
if (signal(termsig, SIG_DFL) == SIG_ERR ||
|
|
|
|
sigemptyset(&sigset) != 0 ||
|
|
|
|
sigaddset(&sigset, termsig) != 0 ||
|
|
|
|
sigprocmask(SIG_UNBLOCK, &sigset, NULL) != 0)
|
|
|
|
err(EXIT_FAILURE,
|
|
|
|
_("sigprocmask unblock failed"));
|
|
|
|
|
|
|
|
kill(getpid(), termsig);
|
|
|
|
}
|
2020-04-15 23:16:53 +02:00
|
|
|
err(EXIT_FAILURE, _("child exit failed"));
|
2013-06-27 20:04:58 -04:00
|
|
|
}
|
|
|
|
|
2022-03-07 20:41:24 -08:00
|
|
|
if (kill_child_signo != 0) {
|
|
|
|
if (prctl(PR_SET_PDEATHSIG, kill_child_signo) < 0)
|
|
|
|
err(EXIT_FAILURE, "prctl failed");
|
2022-03-31 10:55:30 +02:00
|
|
|
#ifdef UL_HAVE_PIDFD
|
|
|
|
/* Use poll() to check that there is still the original parent. */
|
|
|
|
if (fd_parent_pid != -1) {
|
|
|
|
struct pollfd pollfds[1] = {
|
|
|
|
{ .fd = fd_parent_pid, .events = POLLIN }
|
|
|
|
};
|
|
|
|
int nfds = poll(pollfds, 1, 0);
|
|
|
|
|
|
|
|
if (0 > nfds)
|
|
|
|
err(EXIT_FAILURE, "poll parent pidfd failed");
|
|
|
|
|
|
|
|
/* If the child was re-parented before prctl(2) was called, the
|
|
|
|
* new parent will likely not be interested in the precise exit
|
|
|
|
* status of the orphan.
|
|
|
|
*/
|
|
|
|
if (nfds)
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
|
|
|
close(fd_parent_pid);
|
|
|
|
fd_parent_pid = -1;
|
|
|
|
}
|
|
|
|
#endif
|
2022-03-07 20:41:24 -08:00
|
|
|
}
|
|
|
|
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
if (mapuser != (uid_t) -1 && !usermap)
|
2020-01-04 09:11:30 +11:00
|
|
|
map_id(_PATH_PROC_UIDMAP, mapuser, real_euid);
|
|
|
|
|
2019-01-17 14:17:54 -08:00
|
|
|
/* Since Linux 3.19 unprivileged writing of /proc/self/gid_map
|
|
|
|
* has been disabled unless /proc/self/setgroups is written
|
|
|
|
* first to permanently disable the ability to call setgroups
|
|
|
|
* in that user namespace. */
|
unshare: Add options to map blocks of user/group IDs
This adds the ability to map multiple user/group IDs when creating a new
user namespace. Regular processes cannot map any user other than the
effective user, so we need to use the setuid helpers newuidmap and
newgidmap, provided by shadow. Typically, users will be assigned blocks
of user/group IDs in /etc/sub{u,g}id, although it is also possible to
use NSS. There is a second advantage in using these helpers: because we
never write to /proc/self/gid_map, we don't have to disable setgroups.
Because the process of mapping IDs is almost identical, whether we are
mapping user IDs or group IDs, we put both in a common "map_range"
structure. These are read in by (ab)using string_to_idarray. In addition
to any map created with --map-users, we still need to handle a map of
size one created with --map-user. This makes constructing the helpers'
command line the trickiest part of the whole process. newuidmap/
newgidmap check to see if any ranges overlap before creating a mapping.
To avoid failing, we carve out a hole in the mapping for the singular
map. In the worst case, we may have three separate maps.
Signed-off-by: Sean Anderson <seanga2@gmail.com>
2021-11-24 13:26:16 -05:00
|
|
|
if (mapgroup != (gid_t) -1 && !groupmap) {
|
2015-01-08 11:51:58 +01:00
|
|
|
if (setgrpcmd == SETGROUPS_ALLOW)
|
|
|
|
errx(EXIT_FAILURE, _("options --setgroups=allow and "
|
2020-01-04 09:11:30 +11:00
|
|
|
"--map-group are mutually exclusive"));
|
2015-01-08 11:51:58 +01:00
|
|
|
setgroups_control(SETGROUPS_DENY);
|
2020-01-04 09:11:30 +11:00
|
|
|
map_id(_PATH_PROC_GIDMAP, mapgroup, real_egid);
|
|
|
|
}
|
2015-01-08 11:51:58 +01:00
|
|
|
|
2020-01-04 09:11:30 +11:00
|
|
|
if (setgrpcmd != SETGROUPS_NONE)
|
|
|
|
setgroups_control(setgrpcmd);
|
2013-12-27 22:14:48 +01:00
|
|
|
|
2015-03-18 15:13:15 +01:00
|
|
|
if ((unshare_flags & CLONE_NEWNS) && propagation)
|
|
|
|
set_propagation(propagation);
|
|
|
|
|
2018-10-05 13:09:29 +02:00
|
|
|
if (newroot) {
|
|
|
|
if (chroot(newroot) != 0)
|
|
|
|
err(EXIT_FAILURE,
|
|
|
|
_("cannot change root directory to '%s'"), newroot);
|
|
|
|
newdir = newdir ?: "/";
|
|
|
|
}
|
|
|
|
if (newdir && chdir(newdir))
|
|
|
|
err(EXIT_FAILURE, _("cannot chdir to '%s'"), newdir);
|
|
|
|
|
|
|
|
if (procmnt) {
|
2021-06-04 12:34:52 +00:00
|
|
|
/* When not changing root and using the default propagation flags
|
|
|
|
then the recursive propagation change of root will
|
|
|
|
automatically change that of an existing proc mount. */
|
|
|
|
if (!newroot && propagation != (MS_PRIVATE|MS_REC)) {
|
|
|
|
int rc = mount("none", procmnt, NULL, MS_PRIVATE|MS_REC, NULL);
|
|
|
|
|
|
|
|
/* Custom procmnt means that proc is very likely not mounted, causing EINVAL.
|
|
|
|
Ignoring the error in this specific instance is considered safe. */
|
|
|
|
if(rc != 0 && errno != EINVAL)
|
|
|
|
err(EXIT_FAILURE, _("cannot change %s filesystem propagation"), procmnt);
|
|
|
|
}
|
|
|
|
|
2018-10-05 13:09:29 +02:00
|
|
|
if (mount("proc", procmnt, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) != 0)
|
2013-07-03 12:28:16 +02:00
|
|
|
err(EXIT_FAILURE, _("mount %s failed"), procmnt);
|
2018-10-05 13:09:29 +02:00
|
|
|
}
|
2013-07-03 12:28:16 +02:00
|
|
|
|
2018-10-05 13:09:30 +02:00
|
|
|
if (force_gid) {
|
|
|
|
if (setgroups(0, NULL) != 0) /* drop supplementary groups */
|
|
|
|
err(EXIT_FAILURE, _("setgroups failed"));
|
|
|
|
if (setgid(gid) < 0) /* change GID */
|
|
|
|
err(EXIT_FAILURE, _("setgid failed"));
|
|
|
|
}
|
|
|
|
if (force_uid && setuid(uid) < 0) /* change UID */
|
|
|
|
err(EXIT_FAILURE, _("setuid failed"));
|
|
|
|
|
2023-03-29 13:36:15 +11:00
|
|
|
if (keepcaps && (unshare_flags & CLONE_NEWUSER))
|
|
|
|
cap_permitted_to_ambient();
|
2019-01-17 14:16:54 -08:00
|
|
|
|
2013-02-13 21:05:48 -05:00
|
|
|
if (optind < argc) {
|
|
|
|
execvp(argv[optind], argv + optind);
|
2018-02-01 15:44:25 +01:00
|
|
|
errexec(argv[optind]);
|
2013-02-13 21:05:48 -05:00
|
|
|
}
|
|
|
|
exec_shell();
|
2009-10-04 02:42:08 +07:00
|
|
|
}
|