Message ID | 20201118083253.4150-2-mariuszx.dudek@intel.com (mailing list archive) |
---|---|
State | New, archived |
Delegated to: | BPF |
Headers | show |
Series | libbpf: add support for privileged/unprivileged control separation | expand |
Context | Check | Description |
---|---|---|
netdev/cover_letter | success | Link |
netdev/fixes_present | success | Link |
netdev/patch_count | success | Link |
netdev/tree_selection | success | Clearly marked for bpf-next |
netdev/subject_prefix | success | Link |
netdev/source_inline | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Link |
netdev/module_param | success | Was 0 now: 0 |
netdev/build_32bit | success | Errors and warnings before: 0 this patch: 0 |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/verify_fixes | success | Link |
netdev/checkpatch | warning | CHECK: Alignment should match open parenthesis |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 0 this patch: 0 |
netdev/header_inline | success | Link |
netdev/stable | success | Stable not CCed |
On Wed, Nov 18, 2020 at 9:34 AM <mariusz.dudek@gmail.com> wrote: > > From: Mariusz Dudek <mariuszx.dudek@intel.com> > > Add support for separation of eBPF program load and xsk socket > creation. > > This is needed for use-case when you want to privide as little > privileges as possible to the data plane application that will > handle xsk socket creation and incoming traffic. > > With this patch the data entity container can be run with only > CAP_NET_RAW capability to fulfill its purpose of creating xsk > socket and handling packages. In case your umem is larger or > equal process limit for MEMLOCK you need either increase the > limit or CAP_IPC_LOCK capability. > > To resolve privileges issue two APIs are introduced: > > - xsk_setup_xdp_prog - loads the built in XDP program. It can > also return xsks_map_fd which is needed by unprivileged process > to update xsks_map with AF_XDP socket "fd" > > - xsk_socket__update_xskmap - inserts an AF_XDP socket into an xskmap > for a particular xsk_socket > > Signed-off-by: Mariusz Dudek <mariuszx.dudek@intel.com> > --- > tools/lib/bpf/libbpf.map | 2 + > tools/lib/bpf/xsk.c | 97 ++++++++++++++++++++++++++++++++++++---- > tools/lib/bpf/xsk.h | 5 +++ > 3 files changed, 95 insertions(+), 9 deletions(-) > > diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map > index 29ff4807b909..d939d5ac092e 100644 > --- a/tools/lib/bpf/libbpf.map > +++ b/tools/lib/bpf/libbpf.map > @@ -345,4 +345,6 @@ LIBBPF_0.3.0 { > btf__parse_split; > btf__new_empty_split; > btf__new_split; > + xsk_setup_xdp_prog; > + xsk_socket__update_xskmap; > } LIBBPF_0.2.0; > diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c > index 9bc537d0b92d..e16f920d2ef9 100644 > --- a/tools/lib/bpf/xsk.c > +++ b/tools/lib/bpf/xsk.c > @@ -566,8 +566,42 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk) > &xsk->fd, 0); > } > > -static int xsk_setup_xdp_prog(struct xsk_socket *xsk) > +static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) > { > + char ifname[IFNAMSIZ]; > + struct xsk_ctx *ctx; > + char *interface; > + int res = -1; No need to set it to -1 anymore, due to the below. > + > + ctx = calloc(1, sizeof(*ctx)); > + if (!ctx) > + goto error_ctx; return an -ENOMEM here directly. > + > + interface = if_indextoname(ifindex, &ifname[0]); > + if (!interface) { > + res = -errno; > + goto error_ifindex; > + } > + > + ctx->ifindex = ifindex; > + strncpy(ctx->ifname, ifname, IFNAMSIZ - 1); > + ctx->ifname[IFNAMSIZ - 1] = 0; > + > + xsk->ctx = ctx; > + > + return 0; > + > +error_ifindex: > + free(ctx); > +error_ctx: And you can get rid of this label. > + return res; > +} > + > +static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, > + bool force_set_map, force_set_map always seems to be false now. Correct? If it is, then it is not needed anymore. What was the original use case of this boolean? > + int *xsks_map_fd) > +{ > + struct xsk_socket *xsk = _xdp; > struct xsk_ctx *ctx = xsk->ctx; > __u32 prog_id = 0; > int err; > @@ -584,8 +618,7 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) > > err = xsk_load_xdp_prog(xsk); > if (err) { > - xsk_delete_bpf_maps(xsk); > - return err; > + goto err_load_xdp_prog; > } > } else { > ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); > @@ -598,15 +631,29 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) > } > } > > - if (xsk->rx) > + if (xsk->rx || force_set_map) { > err = xsk_set_bpf_maps(xsk); > - if (err) { > - xsk_delete_bpf_maps(xsk); > - close(ctx->prog_fd); > - return err; > + if (err) { > + if (!prog_id) { > + goto err_set_bpf_maps; > + } else { > + close(ctx->prog_fd); > + return err; > + } > + } > } > + if (xsks_map_fd) > + *xsks_map_fd = ctx->xsks_map_fd; > > return 0; > + > +err_set_bpf_maps: > + close(ctx->prog_fd); > + bpf_set_link_xdp_fd(ctx->ifindex, -1, 0); > +err_load_xdp_prog: > + xsk_delete_bpf_maps(xsk); > + > + return err; > } > > static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex, > @@ -689,6 +736,38 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, > return ctx; > } > > +static void xsk_destroy_xsk_struct(struct xsk_socket *xsk) > +{ > + free(xsk->ctx); > + free(xsk); > +} > + > +int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd) > +{ > + xsk->ctx->xsks_map_fd = fd; > + return xsk_set_bpf_maps(xsk); > +} > + > +int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd) > +{ > + struct xsk_socket *xsk; > + int res = -1; > + > + xsk = calloc(1, sizeof(*xsk)); > + if (!xsk) > + return res; > + > + res = xsk_create_xsk_struct(ifindex, xsk); > + if (res) > + return -EINVAL; Here you can now return the error from the function, i.e. return res, as we returned -ENOMEM in that function. You are however leaking the xsk struct you just allocated in case of error. Needs to be deallocated. > + > + res = __xsk_setup_xdp_prog(xsk, false, xsks_map_fd); > + > + xsk_destroy_xsk_struct(xsk); > + > + return res; > +} > + > int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, > const char *ifname, > __u32 queue_id, struct xsk_umem *umem, > @@ -838,7 +917,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, > ctx->prog_fd = -1; > > if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { > - err = xsk_setup_xdp_prog(xsk); > + err = __xsk_setup_xdp_prog(xsk, false, NULL); > if (err) > goto out_mmap_tx; > } > diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h > index 1069c46364ff..5b74c17ed3d4 100644 > --- a/tools/lib/bpf/xsk.h > +++ b/tools/lib/bpf/xsk.h > @@ -201,6 +201,11 @@ struct xsk_umem_config { > __u32 flags; > }; > > +LIBBPF_API int xsk_setup_xdp_prog(int ifindex, > + int *xsks_map_fd); > +LIBBPF_API int xsk_socket__update_xskmap(struct xsk_socket *xsk, > + int xsks_map_fd); > + > /* Flags for the libbpf_flags field. */ > #define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) > > -- > 2.20.1 >
On Wed, Nov 25, 2020 at 3:30 PM Magnus Karlsson <magnus.karlsson@gmail.com> wrote: > > On Wed, Nov 18, 2020 at 9:34 AM <mariusz.dudek@gmail.com> wrote: > > > > From: Mariusz Dudek <mariuszx.dudek@intel.com> > > > > Add support for separation of eBPF program load and xsk socket > > creation. > > > > This is needed for use-case when you want to privide as little > > privileges as possible to the data plane application that will > > handle xsk socket creation and incoming traffic. > > > > With this patch the data entity container can be run with only > > CAP_NET_RAW capability to fulfill its purpose of creating xsk > > socket and handling packages. In case your umem is larger or > > equal process limit for MEMLOCK you need either increase the > > limit or CAP_IPC_LOCK capability. > > > > To resolve privileges issue two APIs are introduced: > > > > - xsk_setup_xdp_prog - loads the built in XDP program. It can > > also return xsks_map_fd which is needed by unprivileged process > > to update xsks_map with AF_XDP socket "fd" > > > > - xsk_socket__update_xskmap - inserts an AF_XDP socket into an xskmap > > for a particular xsk_socket > > > > Signed-off-by: Mariusz Dudek <mariuszx.dudek@intel.com> > > --- > > tools/lib/bpf/libbpf.map | 2 + > > tools/lib/bpf/xsk.c | 97 ++++++++++++++++++++++++++++++++++++---- > > tools/lib/bpf/xsk.h | 5 +++ > > 3 files changed, 95 insertions(+), 9 deletions(-) > > > > diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map > > index 29ff4807b909..d939d5ac092e 100644 > > --- a/tools/lib/bpf/libbpf.map > > +++ b/tools/lib/bpf/libbpf.map > > @@ -345,4 +345,6 @@ LIBBPF_0.3.0 { > > btf__parse_split; > > btf__new_empty_split; > > btf__new_split; > > + xsk_setup_xdp_prog; > > + xsk_socket__update_xskmap; > > } LIBBPF_0.2.0; > > diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c > > index 9bc537d0b92d..e16f920d2ef9 100644 > > --- a/tools/lib/bpf/xsk.c > > +++ b/tools/lib/bpf/xsk.c > > @@ -566,8 +566,42 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk) > > &xsk->fd, 0); > > } > > > > -static int xsk_setup_xdp_prog(struct xsk_socket *xsk) > > +static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) > > { > > + char ifname[IFNAMSIZ]; > > + struct xsk_ctx *ctx; > > + char *interface; > > + int res = -1; > > No need to set it to -1 anymore, due to the below. Will fix this > > > + > > + ctx = calloc(1, sizeof(*ctx)); > > + if (!ctx) > > + goto error_ctx; > > return an -ENOMEM here directly. -ENOMEM will be returned > > > + > > + interface = if_indextoname(ifindex, &ifname[0]); > > + if (!interface) { > > + res = -errno; > > + goto error_ifindex; > > + } > > + > > + ctx->ifindex = ifindex; > > + strncpy(ctx->ifname, ifname, IFNAMSIZ - 1); > > + ctx->ifname[IFNAMSIZ - 1] = 0; > > + > > + xsk->ctx = ctx; > > + > > + return 0; > > + > > +error_ifindex: > > + free(ctx); > > +error_ctx: > > And you can get rid of this label. I will get rid of both labels as I can return either -ENOMEM or -errno from both places directly > > > + return res; > > +} > > + > > +static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, > > + bool force_set_map, > > force_set_map always seems to be false now. Correct? If it is, then it > is not needed anymore. What was the original use case of this boolean? > force_set_map was used before for setting xsk bpf maps, but after code change it is no longer needed. I will remove it. > > + int *xsks_map_fd) > > +{ > > + struct xsk_socket *xsk = _xdp; > > struct xsk_ctx *ctx = xsk->ctx; > > __u32 prog_id = 0; > > int err; > > @@ -584,8 +618,7 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) > > > > err = xsk_load_xdp_prog(xsk); > > if (err) { > > - xsk_delete_bpf_maps(xsk); > > - return err; > > + goto err_load_xdp_prog; > > } > > } else { > > ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); > > @@ -598,15 +631,29 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) > > } > > } > > > > - if (xsk->rx) > > + if (xsk->rx || force_set_map) { > > err = xsk_set_bpf_maps(xsk); > > - if (err) { > > - xsk_delete_bpf_maps(xsk); > > - close(ctx->prog_fd); > > - return err; > > + if (err) { > > + if (!prog_id) { > > + goto err_set_bpf_maps; > > + } else { > > + close(ctx->prog_fd); > > + return err; > > + } > > + } > > } > > + if (xsks_map_fd) > > + *xsks_map_fd = ctx->xsks_map_fd; > > > > return 0; > > + > > +err_set_bpf_maps: > > + close(ctx->prog_fd); > > + bpf_set_link_xdp_fd(ctx->ifindex, -1, 0); > > +err_load_xdp_prog: > > + xsk_delete_bpf_maps(xsk); > > + > > + return err; > > } > > > > static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex, > > @@ -689,6 +736,38 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, > > return ctx; > > } > > > > +static void xsk_destroy_xsk_struct(struct xsk_socket *xsk) > > +{ > > + free(xsk->ctx); > > + free(xsk); > > +} > > + > > +int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd) > > +{ > > + xsk->ctx->xsks_map_fd = fd; > > + return xsk_set_bpf_maps(xsk); > > +} > > + > > +int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd) > > +{ > > + struct xsk_socket *xsk; > > + int res = -1; > > + > > + xsk = calloc(1, sizeof(*xsk)); > > + if (!xsk) > > + return res; > > + > > + res = xsk_create_xsk_struct(ifindex, xsk); > > + if (res) > > + return -EINVAL; > > Here you can now return the error from the function, i.e. return res, > as we returned -ENOMEM in that function. You are however leaking the > xsk struct you just allocated in case of error. Needs to be > deallocated. > xsk struct deallocated. -ENOMEM returned in case calloc fails. > > + > > + res = __xsk_setup_xdp_prog(xsk, false, xsks_map_fd); > > + > > + xsk_destroy_xsk_struct(xsk); > > + > > + return res; > > +} > > + > > int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, > > const char *ifname, > > __u32 queue_id, struct xsk_umem *umem, > > @@ -838,7 +917,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, > > ctx->prog_fd = -1; > > > > if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { > > - err = xsk_setup_xdp_prog(xsk); > > + err = __xsk_setup_xdp_prog(xsk, false, NULL); > > if (err) > > goto out_mmap_tx; > > } > > diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h > > index 1069c46364ff..5b74c17ed3d4 100644 > > --- a/tools/lib/bpf/xsk.h > > +++ b/tools/lib/bpf/xsk.h > > @@ -201,6 +201,11 @@ struct xsk_umem_config { > > __u32 flags; > > }; > > > > +LIBBPF_API int xsk_setup_xdp_prog(int ifindex, > > + int *xsks_map_fd); > > +LIBBPF_API int xsk_socket__update_xskmap(struct xsk_socket *xsk, > > + int xsks_map_fd); > > + > > /* Flags for the libbpf_flags field. */ > > #define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) > > > > -- > > 2.20.1 > >
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 29ff4807b909..d939d5ac092e 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -345,4 +345,6 @@ LIBBPF_0.3.0 { btf__parse_split; btf__new_empty_split; btf__new_split; + xsk_setup_xdp_prog; + xsk_socket__update_xskmap; } LIBBPF_0.2.0; diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 9bc537d0b92d..e16f920d2ef9 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -566,8 +566,42 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk) &xsk->fd, 0); } -static int xsk_setup_xdp_prog(struct xsk_socket *xsk) +static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) { + char ifname[IFNAMSIZ]; + struct xsk_ctx *ctx; + char *interface; + int res = -1; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) + goto error_ctx; + + interface = if_indextoname(ifindex, &ifname[0]); + if (!interface) { + res = -errno; + goto error_ifindex; + } + + ctx->ifindex = ifindex; + strncpy(ctx->ifname, ifname, IFNAMSIZ - 1); + ctx->ifname[IFNAMSIZ - 1] = 0; + + xsk->ctx = ctx; + + return 0; + +error_ifindex: + free(ctx); +error_ctx: + return res; +} + +static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, + bool force_set_map, + int *xsks_map_fd) +{ + struct xsk_socket *xsk = _xdp; struct xsk_ctx *ctx = xsk->ctx; __u32 prog_id = 0; int err; @@ -584,8 +618,7 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) err = xsk_load_xdp_prog(xsk); if (err) { - xsk_delete_bpf_maps(xsk); - return err; + goto err_load_xdp_prog; } } else { ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); @@ -598,15 +631,29 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) } } - if (xsk->rx) + if (xsk->rx || force_set_map) { err = xsk_set_bpf_maps(xsk); - if (err) { - xsk_delete_bpf_maps(xsk); - close(ctx->prog_fd); - return err; + if (err) { + if (!prog_id) { + goto err_set_bpf_maps; + } else { + close(ctx->prog_fd); + return err; + } + } } + if (xsks_map_fd) + *xsks_map_fd = ctx->xsks_map_fd; return 0; + +err_set_bpf_maps: + close(ctx->prog_fd); + bpf_set_link_xdp_fd(ctx->ifindex, -1, 0); +err_load_xdp_prog: + xsk_delete_bpf_maps(xsk); + + return err; } static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex, @@ -689,6 +736,38 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, return ctx; } +static void xsk_destroy_xsk_struct(struct xsk_socket *xsk) +{ + free(xsk->ctx); + free(xsk); +} + +int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd) +{ + xsk->ctx->xsks_map_fd = fd; + return xsk_set_bpf_maps(xsk); +} + +int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd) +{ + struct xsk_socket *xsk; + int res = -1; + + xsk = calloc(1, sizeof(*xsk)); + if (!xsk) + return res; + + res = xsk_create_xsk_struct(ifindex, xsk); + if (res) + return -EINVAL; + + res = __xsk_setup_xdp_prog(xsk, false, xsks_map_fd); + + xsk_destroy_xsk_struct(xsk); + + return res; +} + int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, const char *ifname, __u32 queue_id, struct xsk_umem *umem, @@ -838,7 +917,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, ctx->prog_fd = -1; if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { - err = xsk_setup_xdp_prog(xsk); + err = __xsk_setup_xdp_prog(xsk, false, NULL); if (err) goto out_mmap_tx; } diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index 1069c46364ff..5b74c17ed3d4 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -201,6 +201,11 @@ struct xsk_umem_config { __u32 flags; }; +LIBBPF_API int xsk_setup_xdp_prog(int ifindex, + int *xsks_map_fd); +LIBBPF_API int xsk_socket__update_xskmap(struct xsk_socket *xsk, + int xsks_map_fd); + /* Flags for the libbpf_flags field. */ #define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)