Message ID | 20201216143036.2296568-2-liuhangbin@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | xdp: add a new helper for dev map multicast support | expand |
Context | Check | Description |
---|---|---|
netdev/cover_letter | success | Link |
netdev/fixes_present | success | Link |
netdev/patch_count | success | Link |
netdev/tree_selection | success | Clearly marked for bpf-next |
netdev/subject_prefix | success | Link |
netdev/source_inline | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Link |
netdev/module_param | success | Was 0 now: 0 |
netdev/build_32bit | success | Errors and warnings before: 1 this patch: 1 |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/verify_fixes | success | Link |
netdev/checkpatch | success | total: 0 errors, 0 warnings, 0 checks, 193 lines checked |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 1 this patch: 1 |
netdev/header_inline | success | Link |
netdev/stable | success | Stable not CCed |
On 12/16/20 7:30 AM, Hangbin Liu wrote: > @@ -327,40 +328,92 @@ bool dev_map_can_have_prog(struct bpf_map *map) > return false; > } > > +static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog, > + struct xdp_frame **frames, int n, > + struct net_device *dev) > +{ > + struct xdp_txq_info txq = { .dev = dev }; > + struct xdp_buff xdp; > + int i, nframes = 0; > + > + for (i = 0; i < n; i++) { > + struct xdp_frame *xdpf = frames[i]; > + u32 act; > + int err; > + > + xdp_convert_frame_to_buff(xdpf, &xdp); > + xdp.txq = &txq; > + > + act = bpf_prog_run_xdp(xdp_prog, &xdp); > + switch (act) { > + case XDP_PASS: > + err = xdp_update_frame_from_buff(&xdp, xdpf); > + if (unlikely(err < 0)) > + xdp_return_frame_rx_napi(xdpf); > + else > + frames[nframes++] = xdpf; > + break; > + default: > + bpf_warn_invalid_xdp_action(act); > + fallthrough; > + case XDP_ABORTED: > + trace_xdp_exception(dev, xdp_prog, act); > + fallthrough; > + case XDP_DROP: > + xdp_return_frame_rx_napi(xdpf); > + break; > + } > + } > + return n - nframes; /* dropped frames count */ just return nframes here, since ... > +} > + > static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags) > { > struct net_device *dev = bq->dev; > int sent = 0, drops = 0, err = 0; > + unsigned int cnt = bq->count; > + unsigned int xdp_drop; > int i; > > - if (unlikely(!bq->count)) > + if (unlikely(!cnt)) > return; > > - for (i = 0; i < bq->count; i++) { > + for (i = 0; i < cnt; i++) { > struct xdp_frame *xdpf = bq->q[i]; > > prefetch(xdpf); > } > > - sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags); > + if (unlikely(bq->xdp_prog)) { > + xdp_drop = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev); > + cnt -= xdp_drop; ... that is apparently what you really want. > + if (!cnt) { > + sent = 0; > + drops = xdp_drop; > + goto out; > + } > + } > + > + sent = dev->netdev_ops->ndo_xdp_xmit(dev, cnt, bq->q, flags); > if (sent < 0) { > err = sent; > sent = 0; > goto error; > } > - drops = bq->count - sent; > + drops = (cnt - sent) + xdp_drop; > out: > bq->count = 0; > > trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, drops, err); > bq->dev_rx = NULL; > + bq->xdp_prog = NULL; > __list_del_clearprev(&bq->flush_node); > return; > error: > /* If ndo_xdp_xmit fails with an errno, no frames have been > * xmit'ed and it's our responsibility to them free all. > */ > - for (i = 0; i < bq->count; i++) { > + for (i = 0; i < cnt; i++) { > struct xdp_frame *xdpf = bq->q[i]; > > xdp_return_frame_rx_napi(xdpf); > @@ -408,7 +461,8 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key) > * Thus, safe percpu variable access. > */ > static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, > - struct net_device *dev_rx) > + struct net_device *dev_rx, > + struct bpf_dtab_netdev *dst) > { > struct list_head *flush_list = this_cpu_ptr(&dev_flush_list); > struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq); > @@ -423,6 +477,14 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, > if (!bq->dev_rx) > bq->dev_rx = dev_rx; > > + /* Store (potential) xdp_prog that run before egress to dev as > + * part of bulk_queue. This will be same xdp_prog for all > + * xdp_frame's in bulk_queue, because this per-CPU store must > + * be flushed from net_device drivers NAPI func end. > + */ > + if (dst && dst->xdp_prog && !bq->xdp_prog) > + bq->xdp_prog = dst->xdp_prog; if you pass in xdp_prog through __xdp_enqueue you can reduce that to just: if (!bq->xdp_prog) bq->xdp_prog = xdp_prog; > bq->q[bq->count++] = xdpf; > > if (!bq->flush_node.prev) > @@ -430,7 +492,8 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, > } > > static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, > - struct net_device *dev_rx) > + struct net_device *dev_rx, > + struct bpf_dtab_netdev *dst) > { > struct xdp_frame *xdpf; > int err; > @@ -446,42 +509,14 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, > if (unlikely(!xdpf)) > return -EOVERFLOW; > > - bq_enqueue(dev, xdpf, dev_rx); > + bq_enqueue(dev, xdpf, dev_rx, dst); > return 0; > } > > -static struct xdp_buff *dev_map_run_prog(struct net_device *dev, > - struct xdp_buff *xdp, > - struct bpf_prog *xdp_prog) > -{ > - struct xdp_txq_info txq = { .dev = dev }; > - u32 act; > - > - xdp_set_data_meta_invalid(xdp); > - xdp->txq = &txq; > - > - act = bpf_prog_run_xdp(xdp_prog, xdp); > - switch (act) { > - case XDP_PASS: > - return xdp; > - case XDP_DROP: > - break; > - default: > - bpf_warn_invalid_xdp_action(act); > - fallthrough; > - case XDP_ABORTED: > - trace_xdp_exception(dev, xdp_prog, act); > - break; > - } > - > - xdp_return_buff(xdp); > - return NULL; > -} > - > int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, > struct net_device *dev_rx) > { > - return __xdp_enqueue(dev, xdp, dev_rx); > + return __xdp_enqueue(dev, xdp, dev_rx, NULL); > } > > int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, > @@ -489,12 +524,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, > { > struct net_device *dev = dst->dev; > > - if (dst->xdp_prog) { > - xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog); > - if (!xdp) > - return 0; > - } > - return __xdp_enqueue(dev, xdp, dev_rx); > + return __xdp_enqueue(dev, xdp, dev_rx, dst); > } > > int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, >
Hi David, Thanks for the comment. On Thu, Dec 17, 2020 at 09:07:03AM -0700, David Ahern wrote: > > + return n - nframes; /* dropped frames count */ > > just return nframes here, since ... > > > + xdp_drop = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev); > > + cnt -= xdp_drop; > > ... that is apparently what you really want. I will fix this > > + if (dst && dst->xdp_prog && !bq->xdp_prog) > > + bq->xdp_prog = dst->xdp_prog; > > > if you pass in xdp_prog through __xdp_enqueue you can reduce that to just: > > if (!bq->xdp_prog) > bq->xdp_prog = xdp_prog; And this in the next PATCH version. Thanks Hangbin
Hi David, I just aware that, On Thu, Dec 17, 2020 at 09:07:03AM -0700, David Ahern wrote: > > +static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog, > > + struct xdp_frame **frames, int n, > > + struct net_device *dev) > > +{ > > + struct xdp_txq_info txq = { .dev = dev }; > > + struct xdp_buff xdp; > > + int i, nframes = 0; > > + > > + for (i = 0; i < n; i++) { > > + struct xdp_frame *xdpf = frames[i]; > > + u32 act; > > + int err; > > + > > + xdp_convert_frame_to_buff(xdpf, &xdp); > > + xdp.txq = &txq; > > + > > + act = bpf_prog_run_xdp(xdp_prog, &xdp); > > + switch (act) { > > + case XDP_PASS: > > + err = xdp_update_frame_from_buff(&xdp, xdpf); > > + if (unlikely(err < 0)) > > + xdp_return_frame_rx_napi(xdpf); > > + else > > + frames[nframes++] = xdpf; > > + break; > > + default: > > + bpf_warn_invalid_xdp_action(act); > > + fallthrough; > > + case XDP_ABORTED: > > + trace_xdp_exception(dev, xdp_prog, act); > > + fallthrough; > > + case XDP_DROP: > > + xdp_return_frame_rx_napi(xdpf); > > + break; > > + } > > + } > > + return n - nframes; /* dropped frames count */ > > just return nframes here, since ... If we return nframes here, > > > +} > > + > > static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags) > > { > > struct net_device *dev = bq->dev; > > int sent = 0, drops = 0, err = 0; > > + unsigned int cnt = bq->count; > > + unsigned int xdp_drop; > > int i; > > > > - if (unlikely(!bq->count)) > > + if (unlikely(!cnt)) > > return; > > > > - for (i = 0; i < bq->count; i++) { > > + for (i = 0; i < cnt; i++) { > > struct xdp_frame *xdpf = bq->q[i]; > > > > prefetch(xdpf); > > } > > > > - sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags); > > + if (unlikely(bq->xdp_prog)) { > > + xdp_drop = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev); > > + cnt -= xdp_drop; > > ... that is apparently what you really want. then this will be cnt = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev); xdp_drop = bq->count - cnt; So there is no much difference whether we return passed frames or dropped frames. > > > + if (!cnt) { > > + sent = 0; > > + drops = xdp_drop; > > + goto out; > > + } > > + } Thanks Hangbin
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index f6e9c68afdd4..2a83232cf63a 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -57,6 +57,7 @@ struct xdp_dev_bulk_queue { struct list_head flush_node; struct net_device *dev; struct net_device *dev_rx; + struct bpf_prog *xdp_prog; unsigned int count; }; @@ -327,40 +328,92 @@ bool dev_map_can_have_prog(struct bpf_map *map) return false; } +static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog, + struct xdp_frame **frames, int n, + struct net_device *dev) +{ + struct xdp_txq_info txq = { .dev = dev }; + struct xdp_buff xdp; + int i, nframes = 0; + + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; + u32 act; + int err; + + xdp_convert_frame_to_buff(xdpf, &xdp); + xdp.txq = &txq; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + switch (act) { + case XDP_PASS: + err = xdp_update_frame_from_buff(&xdp, xdpf); + if (unlikely(err < 0)) + xdp_return_frame_rx_napi(xdpf); + else + frames[nframes++] = xdpf; + break; + default: + bpf_warn_invalid_xdp_action(act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(dev, xdp_prog, act); + fallthrough; + case XDP_DROP: + xdp_return_frame_rx_napi(xdpf); + break; + } + } + return n - nframes; /* dropped frames count */ +} + static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags) { struct net_device *dev = bq->dev; int sent = 0, drops = 0, err = 0; + unsigned int cnt = bq->count; + unsigned int xdp_drop; int i; - if (unlikely(!bq->count)) + if (unlikely(!cnt)) return; - for (i = 0; i < bq->count; i++) { + for (i = 0; i < cnt; i++) { struct xdp_frame *xdpf = bq->q[i]; prefetch(xdpf); } - sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags); + if (unlikely(bq->xdp_prog)) { + xdp_drop = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev); + cnt -= xdp_drop; + if (!cnt) { + sent = 0; + drops = xdp_drop; + goto out; + } + } + + sent = dev->netdev_ops->ndo_xdp_xmit(dev, cnt, bq->q, flags); if (sent < 0) { err = sent; sent = 0; goto error; } - drops = bq->count - sent; + drops = (cnt - sent) + xdp_drop; out: bq->count = 0; trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, drops, err); bq->dev_rx = NULL; + bq->xdp_prog = NULL; __list_del_clearprev(&bq->flush_node); return; error: /* If ndo_xdp_xmit fails with an errno, no frames have been * xmit'ed and it's our responsibility to them free all. */ - for (i = 0; i < bq->count; i++) { + for (i = 0; i < cnt; i++) { struct xdp_frame *xdpf = bq->q[i]; xdp_return_frame_rx_napi(xdpf); @@ -408,7 +461,8 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key) * Thus, safe percpu variable access. */ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, - struct net_device *dev_rx) + struct net_device *dev_rx, + struct bpf_dtab_netdev *dst) { struct list_head *flush_list = this_cpu_ptr(&dev_flush_list); struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq); @@ -423,6 +477,14 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, if (!bq->dev_rx) bq->dev_rx = dev_rx; + /* Store (potential) xdp_prog that run before egress to dev as + * part of bulk_queue. This will be same xdp_prog for all + * xdp_frame's in bulk_queue, because this per-CPU store must + * be flushed from net_device drivers NAPI func end. + */ + if (dst && dst->xdp_prog && !bq->xdp_prog) + bq->xdp_prog = dst->xdp_prog; + bq->q[bq->count++] = xdpf; if (!bq->flush_node.prev) @@ -430,7 +492,8 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, } static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, - struct net_device *dev_rx) + struct net_device *dev_rx, + struct bpf_dtab_netdev *dst) { struct xdp_frame *xdpf; int err; @@ -446,42 +509,14 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, if (unlikely(!xdpf)) return -EOVERFLOW; - bq_enqueue(dev, xdpf, dev_rx); + bq_enqueue(dev, xdpf, dev_rx, dst); return 0; } -static struct xdp_buff *dev_map_run_prog(struct net_device *dev, - struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) -{ - struct xdp_txq_info txq = { .dev = dev }; - u32 act; - - xdp_set_data_meta_invalid(xdp); - xdp->txq = &txq; - - act = bpf_prog_run_xdp(xdp_prog, xdp); - switch (act) { - case XDP_PASS: - return xdp; - case XDP_DROP: - break; - default: - bpf_warn_invalid_xdp_action(act); - fallthrough; - case XDP_ABORTED: - trace_xdp_exception(dev, xdp_prog, act); - break; - } - - xdp_return_buff(xdp); - return NULL; -} - int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, struct net_device *dev_rx) { - return __xdp_enqueue(dev, xdp, dev_rx); + return __xdp_enqueue(dev, xdp, dev_rx, NULL); } int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, @@ -489,12 +524,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, { struct net_device *dev = dst->dev; - if (dst->xdp_prog) { - xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog); - if (!xdp) - return 0; - } - return __xdp_enqueue(dev, xdp, dev_rx); + return __xdp_enqueue(dev, xdp, dev_rx, dst); } int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,