Message ID | 160200018165.719143.3249298786187115149.stgit@firesoul (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | bpf: New approach for BPF MTU handling and enforcement | expand |
On Tue, 06 Oct 2020 18:03:01 +0200 Jesper Dangaard Brouer <brouer@redhat.com> wrote: > FIXME: add description. Ups, I will obviously send a V2. I still want feedback on whether I should implement another BPF-helper as sketched below: > FIXME: IMHO we can create a better BPF-helper named bpf_mtu_check() > instead of bpf_mtu_lookup(), because a flag can be used for requesting > GRO segment size checking. The ret value of bpf_mtu_check() says > if MTU was violoated, but also return MTU via pointer arg to allow > BPF-progs to do own logic. > > Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> > --- > include/uapi/linux/bpf.h | 13 +++++++++++ > net/core/filter.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 69 insertions(+) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 50ce65e37b16..29b335cb96ef 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -3718,6 +3718,18 @@ union bpf_attr { > * never return NULL. > * Return > * A pointer pointing to the kernel percpu variable on this cpu. > + * > + * int bpf_mtu_lookup(void *ctx, u32 ifindex, u64 flags) > + * Description > + * Lookup MTU of net device based on ifindex. The Linux kernel > + * route table can configure MTUs on a more specific per route > + * level, which is not provided by this helper. For route level > + * MTU checks use the **bpf_fib_lookup**\ () helper. > + * > + * *ctx* is either **struct xdp_md** for XDP programs or > + * **struct sk_buff** tc cls_act programs. > + * Return > + * On success, MTU size is returned. On error, a negative value. > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -3875,6 +3887,7 @@ union bpf_attr { > FN(redirect_neigh), \ > FN(bpf_per_cpu_ptr), \ > FN(bpf_this_cpu_ptr), \ > + FN(mtu_lookup), \ > /* */ > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > diff --git a/net/core/filter.c b/net/core/filter.c > index d84723f347c0..49ae3b80027b 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -5512,6 +5512,58 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = { > .arg4_type = ARG_ANYTHING, > }; > > +static int bpf_mtu_lookup(struct net *netns, u32 ifindex, u64 flags) > +{ > + struct net_device *dev; > + > + // XXX: Do we even need flags? > + // Flag idea: get ctx dev->mtu for XDP_TX or redir out-same-dev > + if (flags) > + return -EINVAL; > + > + dev = dev_get_by_index_rcu(netns, ifindex); > + if (!dev) > + return -ENODEV; > + > + return dev->mtu; > +} > + > +BPF_CALL_3(bpf_skb_mtu_lookup, struct sk_buff *, skb, > + u32, ifindex, u64, flags) > +{ > + struct net *netns = dev_net(skb->dev); > + > + return bpf_mtu_lookup(netns, ifindex, flags); > +} > + > +BPF_CALL_3(bpf_xdp_mtu_lookup, struct xdp_buff *, xdp, > + u32, ifindex, u64, flags) > +{ > + struct net *netns = dev_net(xdp->rxq->dev); > + // XXX: Handle if this runs in devmap prog (then is rxq invalid?) > + > + return bpf_mtu_lookup(netns, ifindex, flags); > +} > + > +static const struct bpf_func_proto bpf_skb_mtu_lookup_proto = { > + .func = bpf_skb_mtu_lookup, > + .gpl_only = true, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_PTR_TO_CTX, > + .arg2_type = ARG_ANYTHING, > + .arg3_type = ARG_ANYTHING, > +}; > + > +static const struct bpf_func_proto bpf_xdp_mtu_lookup_proto = { > + .func = bpf_xdp_mtu_lookup, > + .gpl_only = true, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_PTR_TO_CTX, > + .arg2_type = ARG_ANYTHING, > + .arg3_type = ARG_ANYTHING, > +}; > + > + > #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) > static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) > { > @@ -7075,6 +7127,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > return &bpf_get_socket_uid_proto; > case BPF_FUNC_fib_lookup: > return &bpf_skb_fib_lookup_proto; > + case BPF_FUNC_mtu_lookup: > + return &bpf_skb_mtu_lookup_proto; > case BPF_FUNC_sk_fullsock: > return &bpf_sk_fullsock_proto; > case BPF_FUNC_sk_storage_get: > @@ -7144,6 +7198,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > return &bpf_xdp_adjust_tail_proto; > case BPF_FUNC_fib_lookup: > return &bpf_xdp_fib_lookup_proto; > + case BPF_FUNC_mtu_lookup: > + return &bpf_xdp_mtu_lookup_proto; > #ifdef CONFIG_INET > case BPF_FUNC_sk_lookup_udp: > return &bpf_xdp_sk_lookup_udp_proto; > >
On Tue, 6 Oct 2020 18:33:02 +0200 Jesper Dangaard Brouer wrote: > > +static const struct bpf_func_proto bpf_xdp_mtu_lookup_proto = { > > + .func = bpf_xdp_mtu_lookup, > > + .gpl_only = true, > > + .ret_type = RET_INTEGER, > > + .arg1_type = ARG_PTR_TO_CTX, > > + .arg2_type = ARG_ANYTHING, > > + .arg3_type = ARG_ANYTHING, > > +}; > > + > > + FWIW CHECK: Please don't use multiple blank lines #112: FILE: net/core/filter.c:5566: + +
On Tue, Oct 6, 2020 at 6:19 PM Jakub Kicinski <kuba@kernel.org> wrote: > > On Tue, 6 Oct 2020 18:33:02 +0200 Jesper Dangaard Brouer wrote: > > > +static const struct bpf_func_proto bpf_xdp_mtu_lookup_proto = { > > > + .func = bpf_xdp_mtu_lookup, > > > + .gpl_only = true, > > > + .ret_type = RET_INTEGER, > > > + .arg1_type = ARG_PTR_TO_CTX, > > > + .arg2_type = ARG_ANYTHING, > > > + .arg3_type = ARG_ANYTHING, > > > +}; > > > + > > > + > > FWIW > > CHECK: Please don't use multiple blank lines > #112: FILE: net/core/filter.c:5566: FYI: It would be nice to have a similar function to return a device's L2 header size (ie. 14 for ethernet) and/or hwtype. Also, should this be restricted to gpl only? [I'm not actually sure, I'm actually fed up with non-gpl code atm, and wouldn't be against all bpf code needing to be gpl'ed...]
On Tue, 6 Oct 2020 18:24:28 -0700 Maciej Żenczykowski <maze@google.com> wrote: > On Tue, Oct 6, 2020 at 6:19 PM Jakub Kicinski <kuba@kernel.org> wrote: > > > > On Tue, 6 Oct 2020 18:33:02 +0200 Jesper Dangaard Brouer wrote: > > > > +static const struct bpf_func_proto bpf_xdp_mtu_lookup_proto = { > > > > + .func = bpf_xdp_mtu_lookup, > > > > + .gpl_only = true, > > > > + .ret_type = RET_INTEGER, > > > > + .arg1_type = ARG_PTR_TO_CTX, > > > > + .arg2_type = ARG_ANYTHING, > > > > + .arg3_type = ARG_ANYTHING, > > > > +}; > > > > + > > > > + > > > > FWIW > > > > CHECK: Please don't use multiple blank lines > > #112: FILE: net/core/filter.c:5566: > > FYI: It would be nice to have a similar function to return a device's > L2 header size (ie. 14 for ethernet) and/or hwtype. > > Also, should this be restricted to gpl only? That is mostly because I copy-pasted it from the fib lookup helper, which with good reason is GPL. I would prefer it to be GPL, but given how simple it is I shouldn't. Maybe I could argue that my new mtu_check could be GPL as it does more work. > [I'm not actually sure, I'm actually fed up with non-gpl code atm, and > wouldn't be against all bpf code needing to be gpl'ed...]
On 10/6/20 6:24 PM, Maciej Żenczykowski wrote: > > FYI: It would be nice to have a similar function to return a device's > L2 header size (ie. 14 for ethernet) and/or hwtype. Why does that need to be looked up via a helper? It's a static number for a device and can plumbed to a program in a number of ways.
> > FYI: It would be nice to have a similar function to return a device's > > L2 header size (ie. 14 for ethernet) and/or hwtype. > > Why does that need to be looked up via a helper? It's a static number > for a device and can plumbed to a program in a number of ways. Fair enough.
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 50ce65e37b16..29b335cb96ef 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3718,6 +3718,18 @@ union bpf_attr { * never return NULL. * Return * A pointer pointing to the kernel percpu variable on this cpu. + * + * int bpf_mtu_lookup(void *ctx, u32 ifindex, u64 flags) + * Description + * Lookup MTU of net device based on ifindex. The Linux kernel + * route table can configure MTUs on a more specific per route + * level, which is not provided by this helper. For route level + * MTU checks use the **bpf_fib_lookup**\ () helper. + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** tc cls_act programs. + * Return + * On success, MTU size is returned. On error, a negative value. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3875,6 +3887,7 @@ union bpf_attr { FN(redirect_neigh), \ FN(bpf_per_cpu_ptr), \ FN(bpf_this_cpu_ptr), \ + FN(mtu_lookup), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/net/core/filter.c b/net/core/filter.c index d84723f347c0..49ae3b80027b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5512,6 +5512,58 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = { .arg4_type = ARG_ANYTHING, }; +static int bpf_mtu_lookup(struct net *netns, u32 ifindex, u64 flags) +{ + struct net_device *dev; + + // XXX: Do we even need flags? + // Flag idea: get ctx dev->mtu for XDP_TX or redir out-same-dev + if (flags) + return -EINVAL; + + dev = dev_get_by_index_rcu(netns, ifindex); + if (!dev) + return -ENODEV; + + return dev->mtu; +} + +BPF_CALL_3(bpf_skb_mtu_lookup, struct sk_buff *, skb, + u32, ifindex, u64, flags) +{ + struct net *netns = dev_net(skb->dev); + + return bpf_mtu_lookup(netns, ifindex, flags); +} + +BPF_CALL_3(bpf_xdp_mtu_lookup, struct xdp_buff *, xdp, + u32, ifindex, u64, flags) +{ + struct net *netns = dev_net(xdp->rxq->dev); + // XXX: Handle if this runs in devmap prog (then is rxq invalid?) + + return bpf_mtu_lookup(netns, ifindex, flags); +} + +static const struct bpf_func_proto bpf_skb_mtu_lookup_proto = { + .func = bpf_skb_mtu_lookup, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + +static const struct bpf_func_proto bpf_xdp_mtu_lookup_proto = { + .func = bpf_xdp_mtu_lookup, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + + #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) { @@ -7075,6 +7127,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_socket_uid_proto; case BPF_FUNC_fib_lookup: return &bpf_skb_fib_lookup_proto; + case BPF_FUNC_mtu_lookup: + return &bpf_skb_mtu_lookup_proto; case BPF_FUNC_sk_fullsock: return &bpf_sk_fullsock_proto; case BPF_FUNC_sk_storage_get: @@ -7144,6 +7198,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_adjust_tail_proto; case BPF_FUNC_fib_lookup: return &bpf_xdp_fib_lookup_proto; + case BPF_FUNC_mtu_lookup: + return &bpf_xdp_mtu_lookup_proto; #ifdef CONFIG_INET case BPF_FUNC_sk_lookup_udp: return &bpf_xdp_sk_lookup_udp_proto;
FIXME: add description. FIXME: IMHO we can create a better BPF-helper named bpf_mtu_check() instead of bpf_mtu_lookup(), because a flag can be used for requesting GRO segment size checking. The ret value of bpf_mtu_check() says if MTU was violoated, but also return MTU via pointer arg to allow BPF-progs to do own logic. Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> --- include/uapi/linux/bpf.h | 13 +++++++++++ net/core/filter.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+)