@@ -52,6 +52,10 @@ struct module;
struct bpf_func_state;
struct ftrace_ops;
struct cgroup;
+struct bpf_token;
+struct user_namespace;
+struct super_block;
+struct inode;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
@@ -1621,6 +1625,13 @@ struct bpf_mount_opts {
u64 delegate_attachs;
};
+struct bpf_token {
+ struct work_struct work;
+ atomic64_t refcnt;
+ struct user_namespace *userns;
+ u64 allowed_cmds;
+};
+
struct bpf_struct_ops_value;
struct btf_member;
@@ -2080,6 +2091,7 @@ static inline void bpf_enable_instrumentation(void)
migrate_enable();
}
+extern const struct super_operations bpf_super_ops;
extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;
extern const struct file_operations bpf_iter_fops;
@@ -2214,6 +2226,8 @@ static inline void bpf_map_dec_elem_count(struct bpf_map *map)
extern int sysctl_unprivileged_bpf_disabled;
+bool bpf_token_capable(const struct bpf_token *token, int cap);
+
static inline bool bpf_allow_ptr_leaks(void)
{
return perfmon_capable();
@@ -2248,8 +2262,17 @@ int bpf_link_new_fd(struct bpf_link *link);
struct bpf_link *bpf_link_get_from_fd(u32 ufd);
struct bpf_link *bpf_link_get_curr_or_next(u32 *id);
+void bpf_token_inc(struct bpf_token *token);
+void bpf_token_put(struct bpf_token *token);
+int bpf_token_create(union bpf_attr *attr);
+struct bpf_token *bpf_token_get_from_fd(u32 ufd);
+
+bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
+
int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname);
int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags);
+struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir,
+ umode_t mode);
#define BPF_ITER_FUNC_PREFIX "bpf_iter_"
#define DEFINE_BPF_ITER_FUNC(target, args...) \
@@ -2609,6 +2632,24 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags)
return -EOPNOTSUPP;
}
+static inline bool bpf_token_capable(const struct bpf_token *token, int cap)
+{
+ return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN));
+}
+
+static inline void bpf_token_inc(struct bpf_token *token)
+{
+}
+
+static inline void bpf_token_put(struct bpf_token *token)
+{
+}
+
+static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
static inline void __dev_flush(void)
{
}
@@ -847,6 +847,36 @@ union bpf_iter_link_info {
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
+ * BPF_TOKEN_CREATE
+ * Description
+ * Create BPF token with embedded information about what
+ * BPF-related functionality it allows:
+ * - a set of allowed bpf() syscall commands;
+ * - a set of allowed BPF map types to be created with
+ * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed;
+ * - a set of allowed BPF program types and BPF program attach
+ * types to be loaded with BPF_PROG_LOAD command, if
+ * BPF_PROG_LOAD itself is allowed.
+ *
+ * BPF token is created (derived) from an instance of BPF FS,
+ * assuming it has necessary delegation mount options specified.
+ * This BPF token can be passed as an extra parameter to various
+ * bpf() syscall commands to grant BPF subsystem functionality to
+ * unprivileged processes.
+ *
+ * When created, BPF token is "associated" with the owning
+ * user namespace of BPF FS instance (super block) that it was
+ * derived from, and subsequent BPF operations performed with
+ * BPF token would be performing capabilities checks (i.e.,
+ * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within
+ * that user namespace. Without BPF token, such capabilities
+ * have to be granted in init user namespace, making bpf()
+ * syscall incompatible with user namespace, for the most part.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
* NOTES
* eBPF objects (maps and programs) can be shared between processes.
*
@@ -901,6 +931,8 @@ enum bpf_cmd {
BPF_ITER_CREATE,
BPF_LINK_DETACH,
BPF_PROG_BIND_MAP,
+ BPF_TOKEN_CREATE,
+ __MAX_BPF_CMD,
};
enum bpf_map_type {
@@ -1714,6 +1746,11 @@ union bpf_attr {
__u32 flags; /* extra flags */
} prog_bind_map;
+ struct { /* struct used by BPF_TOKEN_CREATE command */
+ __u32 flags;
+ __u32 bpffs_fd;
+ } token_create;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
endif
CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
@@ -99,9 +99,9 @@ static const struct inode_operations bpf_prog_iops = { };
static const struct inode_operations bpf_map_iops = { };
static const struct inode_operations bpf_link_iops = { };
-static struct inode *bpf_get_inode(struct super_block *sb,
- const struct inode *dir,
- umode_t mode)
+struct inode *bpf_get_inode(struct super_block *sb,
+ const struct inode *dir,
+ umode_t mode)
{
struct inode *inode;
@@ -603,6 +603,7 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root)
struct inode *inode = d_inode(root);
umode_t mode = inode->i_mode & S_IALLUGO & ~S_ISVTX;
struct bpf_mount_opts *opts = root->d_sb->s_fs_info;
+ u64 mask;
if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID))
seq_printf(m, ",uid=%u",
@@ -613,7 +614,8 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root)
if (mode != S_IRWXUGO)
seq_printf(m, ",mode=%o", mode);
- if (opts->delegate_cmds == ~0ULL)
+ mask = (1ULL << __MAX_BPF_CMD) - 1;
+ if ((opts->delegate_cmds & mask) == mask)
seq_printf(m, ",delegate_cmds=any");
else if (opts->delegate_cmds)
seq_printf(m, ",delegate_cmds=0x%llx", opts->delegate_cmds);
@@ -646,7 +648,7 @@ static void bpf_free_inode(struct inode *inode)
free_inode_nonrcu(inode);
}
-static const struct super_operations bpf_super_ops = {
+const struct super_operations bpf_super_ops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
.show_options = bpf_show_options,
@@ -5424,6 +5424,20 @@ static int bpf_prog_bind_map(union bpf_attr *attr)
return ret;
}
+#define BPF_TOKEN_CREATE_LAST_FIELD token_create.bpffs_fd
+
+static int token_create(union bpf_attr *attr)
+{
+ if (CHECK_ATTR(BPF_TOKEN_CREATE))
+ return -EINVAL;
+
+ /* no flags are supported yet */
+ if (attr->token_create.flags)
+ return -EINVAL;
+
+ return bpf_token_create(attr);
+}
+
static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
{
union bpf_attr attr;
@@ -5557,6 +5571,9 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
case BPF_PROG_BIND_MAP:
err = bpf_prog_bind_map(&attr);
break;
+ case BPF_TOKEN_CREATE:
+ err = token_create(&attr);
+ break;
default:
err = -EINVAL;
break;
new file mode 100644
@@ -0,0 +1,217 @@
+#include <linux/bpf.h>
+#include <linux/vmalloc.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/idr.h>
+#include <linux/namei.h>
+#include <linux/user_namespace.h>
+
+static bool bpf_ns_capable(struct user_namespace *ns, int cap)
+{
+ return ns_capable(ns, cap) || (cap != CAP_SYS_ADMIN && ns_capable(ns, CAP_SYS_ADMIN));
+}
+
+bool bpf_token_capable(const struct bpf_token *token, int cap)
+{
+ struct user_namespace *userns;
+
+ /* BPF token allows ns_capable() level of capabilities */
+ userns = token ? token->userns : &init_user_ns;
+ if (!bpf_ns_capable(userns, cap))
+ return false;
+ return true;
+}
+
+void bpf_token_inc(struct bpf_token *token)
+{
+ atomic64_inc(&token->refcnt);
+}
+
+static void bpf_token_free(struct bpf_token *token)
+{
+ put_user_ns(token->userns);
+ kfree(token);
+}
+
+static void bpf_token_put_deferred(struct work_struct *work)
+{
+ struct bpf_token *token = container_of(work, struct bpf_token, work);
+
+ bpf_token_free(token);
+}
+
+void bpf_token_put(struct bpf_token *token)
+{
+ if (!token)
+ return;
+
+ if (!atomic64_dec_and_test(&token->refcnt))
+ return;
+
+ INIT_WORK(&token->work, bpf_token_put_deferred);
+ schedule_work(&token->work);
+}
+
+static int bpf_token_release(struct inode *inode, struct file *filp)
+{
+ struct bpf_token *token = filp->private_data;
+
+ bpf_token_put(token);
+ return 0;
+}
+
+static void bpf_token_show_fdinfo(struct seq_file *m, struct file *filp)
+{
+ struct bpf_token *token = filp->private_data;
+ u64 mask;
+
+ BUILD_BUG_ON(__MAX_BPF_CMD >= 64);
+ mask = (1ULL << __MAX_BPF_CMD) - 1;
+ if ((token->allowed_cmds & mask) == mask)
+ seq_printf(m, "allowed_cmds:\tany\n");
+ else
+ seq_printf(m, "allowed_cmds:\t0x%llx\n", token->allowed_cmds);
+}
+
+#define BPF_TOKEN_INODE_NAME "bpf-token"
+
+static const struct inode_operations bpf_token_iops = { };
+
+static const struct file_operations bpf_token_fops = {
+ .release = bpf_token_release,
+ .show_fdinfo = bpf_token_show_fdinfo,
+};
+
+int bpf_token_create(union bpf_attr *attr)
+{
+ struct bpf_mount_opts *mnt_opts;
+ struct bpf_token *token = NULL;
+ struct user_namespace *userns;
+ struct inode *inode;
+ struct file *file;
+ struct path path;
+ struct fd f;
+ umode_t mode;
+ int err, fd;
+
+ f = fdget(attr->token_create.bpffs_fd);
+ if (!f.file)
+ return -EBADF;
+
+ path = f.file->f_path;
+ path_get(&path);
+ fdput(f);
+
+ if (path.dentry != path.mnt->mnt_sb->s_root) {
+ err = -EINVAL;
+ goto out_path;
+ }
+ if (path.mnt->mnt_sb->s_op != &bpf_super_ops) {
+ err = -EINVAL;
+ goto out_path;
+ }
+ err = path_permission(&path, MAY_ACCESS);
+ if (err)
+ goto out_path;
+
+ userns = path.dentry->d_sb->s_user_ns;
+ /*
+ * Enforce that creators of BPF tokens are in the same user
+ * namespace as the BPF FS instance. This makes reasoning about
+ * permissions a lot easier and we can always relax this later.
+ */
+ if (current_user_ns() != userns) {
+ err = -EPERM;
+ goto out_path;
+ }
+ if (!ns_capable(userns, CAP_BPF)) {
+ err = -EPERM;
+ goto out_path;
+ }
+
+ /* Creating BPF token in init_user_ns doesn't make much sense. */
+ if (current_user_ns() == &init_user_ns) {
+ err = -EOPNOTSUPP;
+ goto out_path;
+ }
+
+ mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask());
+ inode = bpf_get_inode(path.mnt->mnt_sb, NULL, mode);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out_path;
+ }
+
+ inode->i_op = &bpf_token_iops;
+ inode->i_fop = &bpf_token_fops;
+ clear_nlink(inode); /* make sure it is unlinked */
+
+ file = alloc_file_pseudo(inode, path.mnt, BPF_TOKEN_INODE_NAME, O_RDWR, &bpf_token_fops);
+ if (IS_ERR(file)) {
+ iput(inode);
+ err = PTR_ERR(file);
+ goto out_path;
+ }
+
+ token = kzalloc(sizeof(*token), GFP_USER);
+ if (!token) {
+ err = -ENOMEM;
+ goto out_file;
+ }
+
+ atomic64_set(&token->refcnt, 1);
+
+ /* remember bpffs owning userns for future ns_capable() checks */
+ token->userns = get_user_ns(userns);
+
+ mnt_opts = path.dentry->d_sb->s_fs_info;
+ token->allowed_cmds = mnt_opts->delegate_cmds;
+
+ fd = get_unused_fd_flags(O_CLOEXEC);
+ if (fd < 0) {
+ err = fd;
+ goto out_token;
+ }
+
+ file->private_data = token;
+ fd_install(fd, file);
+
+ path_put(&path);
+ return fd;
+
+out_token:
+ bpf_token_free(token);
+out_file:
+ fput(file);
+out_path:
+ path_put(&path);
+ return err;
+}
+
+struct bpf_token *bpf_token_get_from_fd(u32 ufd)
+{
+ struct fd f = fdget(ufd);
+ struct bpf_token *token;
+
+ if (!f.file)
+ return ERR_PTR(-EBADF);
+ if (f.file->f_op != &bpf_token_fops) {
+ fdput(f);
+ return ERR_PTR(-EINVAL);
+ }
+
+ token = f.file->private_data;
+ bpf_token_inc(token);
+ fdput(f);
+
+ return token;
+}
+
+bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd)
+{
+ if (!token)
+ return false;
+ return token->allowed_cmds & (1ULL << cmd);
+}
@@ -847,6 +847,36 @@ union bpf_iter_link_info {
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
+ * BPF_TOKEN_CREATE
+ * Description
+ * Create BPF token with embedded information about what
+ * BPF-related functionality it allows:
+ * - a set of allowed bpf() syscall commands;
+ * - a set of allowed BPF map types to be created with
+ * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed;
+ * - a set of allowed BPF program types and BPF program attach
+ * types to be loaded with BPF_PROG_LOAD command, if
+ * BPF_PROG_LOAD itself is allowed.
+ *
+ * BPF token is created (derived) from an instance of BPF FS,
+ * assuming it has necessary delegation mount options specified.
+ * This BPF token can be passed as an extra parameter to various
+ * bpf() syscall commands to grant BPF subsystem functionality to
+ * unprivileged processes.
+ *
+ * When created, BPF token is "associated" with the owning
+ * user namespace of BPF FS instance (super block) that it was
+ * derived from, and subsequent BPF operations performed with
+ * BPF token would be performing capabilities checks (i.e.,
+ * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within
+ * that user namespace. Without BPF token, such capabilities
+ * have to be granted in init user namespace, making bpf()
+ * syscall incompatible with user namespace, for the most part.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
* NOTES
* eBPF objects (maps and programs) can be shared between processes.
*
@@ -901,6 +931,8 @@ enum bpf_cmd {
BPF_ITER_CREATE,
BPF_LINK_DETACH,
BPF_PROG_BIND_MAP,
+ BPF_TOKEN_CREATE,
+ __MAX_BPF_CMD,
};
enum bpf_map_type {
@@ -1714,6 +1746,11 @@ union bpf_attr {
__u32 flags; /* extra flags */
} prog_bind_map;
+ struct { /* struct used by BPF_TOKEN_CREATE command */
+ __u32 flags;
+ __u32 bpffs_fd;
+ } token_create;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF