Message ID | 20221208054045.3600-7-gerhard@engleder-embedded.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | tsnep: XDP support | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Clearly marked for net-next, async |
netdev/fixes_present | success | Fixes tag not required for -next series |
netdev/subject_prefix | success | Link |
netdev/cover_letter | success | Series has a cover letter |
netdev/patch_count | success | Link |
netdev/header_inline | success | No static functions without inline keyword in header files |
netdev/build_32bit | success | Errors and warnings before: 0 this patch: 0 |
netdev/cc_maintainers | success | CCed 11 of 11 maintainers |
netdev/build_clang | success | Errors and warnings before: 0 this patch: 0 |
netdev/module_param | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Signed-off-by tag matches author and committer |
netdev/check_selftest | success | No net selftest shell script |
netdev/verify_fixes | success | No Fixes tag |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 0 this patch: 0 |
netdev/checkpatch | warning | WARNING: line length of 95 exceeds 80 columns |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/source_inline | success | Was 0 now: 0 |
On Thu, Dec 08, 2022 at 06:40:45AM +0100, Gerhard Engleder wrote: > If BPF program is set up, then run BPF program for every received frame > and execute the selected action. > > Test results with A53 1.2GHz: > > XDP_DROP (samples/bpf/xdp1) > proto 17: 883878 pkt/s > > XDP_TX (samples/bpf/xdp2) > proto 17: 255693 pkt/s > > XDP_REDIRECT (samples/bpf/xdpsock) > sock0@eth2:0 rxdrop xdp-drv > pps pkts 1.00 > rx 855,582 5,404,523 > tx 0 0 > > XDP_REDIRECT (samples/bpf/xdp_redirect) > eth2->eth1 613,267 rx/s 0 err,drop/s 613,272 xmit/s > > Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com> > --- > drivers/net/ethernet/engleder/tsnep_main.c | 126 +++++++++++++++++++++ > 1 file changed, 126 insertions(+) > > diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c > index 2b662a98b62a..d59cb714c8cd 100644 > --- a/drivers/net/ethernet/engleder/tsnep_main.c > +++ b/drivers/net/ethernet/engleder/tsnep_main.c > @@ -27,6 +27,7 @@ > #include <linux/phy.h> > #include <linux/iopoll.h> > #include <linux/bpf.h> > +#include <linux/bpf_trace.h> > > #define TSNEP_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) > #define TSNEP_HEADROOM ALIGN(max(TSNEP_SKB_PAD, XDP_PACKET_HEADROOM), 4) > @@ -44,6 +45,9 @@ > #define TSNEP_COALESCE_USECS_MAX ((ECM_INT_DELAY_MASK >> ECM_INT_DELAY_SHIFT) * \ > ECM_INT_DELAY_BASE_US + ECM_INT_DELAY_BASE_US - 1) > > +#define TSNEP_XDP_TX BIT(0) > +#define TSNEP_XDP_REDIRECT BIT(1) > + > enum { > __TSNEP_DOWN, > }; > @@ -626,6 +630,33 @@ static void tsnep_xdp_xmit_flush(struct tsnep_tx *tx) > iowrite32(TSNEP_CONTROL_TX_ENABLE, tx->addr + TSNEP_CONTROL); > } > > +static int tsnep_xdp_xmit_back(struct tsnep_adapter *adapter, > + struct xdp_buff *xdp) > +{ > + struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); > + int cpu = smp_processor_id(); > + int queue; > + struct netdev_queue *nq; > + int retval; > + > + if (unlikely(!xdpf)) > + return -EFAULT; > + > + queue = cpu % adapter->num_tx_queues; > + nq = netdev_get_tx_queue(adapter->netdev, queue); > + > + __netif_tx_lock(nq, cpu); > + > + /* Avoid transmit queue timeout since we share it with the slow path */ > + txq_trans_cond_update(nq); > + > + retval = tsnep_xdp_xmit_frame_ring(xdpf, &adapter->tx[queue], false); > + > + __netif_tx_unlock(nq); > + > + return retval; > +} > + > static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) > { > unsigned long flags; > @@ -792,6 +823,11 @@ static unsigned int tsnep_rx_offset(struct tsnep_rx *rx) > return TSNEP_SKB_PAD; > } > > +static unsigned int tsnep_rx_offset_xdp(void) > +{ > + return XDP_PACKET_HEADROOM; > +} I don't see much of a value in this func :P > + > static void tsnep_rx_ring_cleanup(struct tsnep_rx *rx) > { > struct device *dmadev = rx->adapter->dmadev; > @@ -997,6 +1033,67 @@ static int tsnep_rx_refill(struct tsnep_rx *rx, int count, bool reuse) > return i; > } > > +static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog, > + struct xdp_buff *xdp, int *status) > +{ > + unsigned int length; > + unsigned int sync; > + u32 act; > + > + length = xdp->data_end - xdp->data_hard_start - tsnep_rx_offset_xdp(); could this be xdp->data_end - xdp->data - TSNEP_RX_INLINE_METADATA_SIZE ? Can you tell a bit more about that metadata macro that you have to handle by yourself all the time? would be good to tell about the impact on data_meta since you're not configuring it on xdp_prepare_buff(). > + > + act = bpf_prog_run_xdp(prog, xdp); > + > + /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */ > + sync = xdp->data_end - xdp->data_hard_start - tsnep_rx_offset_xdp(); > + sync = max(sync, length); > + > + switch (act) { > + case XDP_PASS: > + return false; > + case XDP_TX: > + if (tsnep_xdp_xmit_back(rx->adapter, xdp) < 0) > + goto out_failure; > + *status |= TSNEP_XDP_TX; > + return true; > + case XDP_REDIRECT: > + if (xdp_do_redirect(rx->adapter->netdev, xdp, prog) < 0) > + goto out_failure; > + *status |= TSNEP_XDP_REDIRECT; > + return true; > + default: > + bpf_warn_invalid_xdp_action(rx->adapter->netdev, prog, act); > + fallthrough; > + case XDP_ABORTED: > +out_failure: > + trace_xdp_exception(rx->adapter->netdev, prog, act); > + fallthrough; > + case XDP_DROP: > + page_pool_put_page(rx->page_pool, virt_to_head_page(xdp->data), > + sync, true); > + return true; > + } > +} > + > +static void tsnep_finalize_xdp(struct tsnep_adapter *adapter, int status) > +{ > + int cpu = smp_processor_id(); > + int queue; > + struct netdev_queue *nq; do you care about RCT, or? > + > + if (status & TSNEP_XDP_TX) { > + queue = cpu % adapter->num_tx_queues; > + nq = netdev_get_tx_queue(adapter->netdev, queue); > + > + __netif_tx_lock(nq, cpu); > + tsnep_xdp_xmit_flush(&adapter->tx[queue]); > + __netif_tx_unlock(nq); > + } > + > + if (status & TSNEP_XDP_REDIRECT) > + xdp_do_flush(); > +} > + > static struct sk_buff *tsnep_build_skb(struct tsnep_rx *rx, struct page *page, > int length) did you consider making tsnep_build_skb() to work on xdp_buff directly? probably will help you once you'll implement XDP mbuf support here. > { > @@ -1035,12 +1132,16 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, > int desc_available; > int done = 0; > enum dma_data_direction dma_dir; > + struct bpf_prog *prog; > struct tsnep_rx_entry *entry; > + struct xdp_buff xdp; > + int xdp_status = 0; > struct sk_buff *skb; > int length; > > desc_available = tsnep_rx_desc_available(rx); > dma_dir = page_pool_get_dma_dir(rx->page_pool); > + prog = READ_ONCE(rx->adapter->xdp_prog); > > while (likely(done < budget) && (rx->read != rx->write)) { > entry = &rx->entry[rx->read]; > @@ -1084,6 +1185,28 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, > rx->read = (rx->read + 1) % TSNEP_RING_SIZE; > desc_available++; > > + if (prog) { > + bool consume; > + > + xdp_init_buff(&xdp, PAGE_SIZE, &rx->xdp_rxq); > + xdp_prepare_buff(&xdp, page_address(entry->page), > + tsnep_rx_offset_xdp() + TSNEP_RX_INLINE_METADATA_SIZE, > + length - TSNEP_RX_INLINE_METADATA_SIZE, Would it make sense to subtract TSNEP_RX_INLINE_METADATA_SIZE from length right after dma_sync_single_range_for_cpu? > + false); > + > + consume = tsnep_xdp_run_prog(rx, prog, &xdp, > + &xdp_status); > + if (consume) { > + rx->packets++; > + rx->bytes += > + length - TSNEP_RX_INLINE_METADATA_SIZE; > + > + entry->page = NULL; > + > + continue; > + } > + } > + > skb = tsnep_build_skb(rx, entry->page, length); > if (skb) { > page_pool_release_page(rx->page_pool, entry->page); > @@ -1102,6 +1225,9 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, > entry->page = NULL; > } > > + if (xdp_status) > + tsnep_finalize_xdp(rx->adapter, xdp_status); > + > if (desc_available) > tsnep_rx_refill(rx, desc_available, false); > > -- > 2.30.2 >
On 08.12.22 14:40, Maciej Fijalkowski wrote: >> +static unsigned int tsnep_rx_offset_xdp(void) >> +{ >> + return XDP_PACKET_HEADROOM; >> +} > > I don't see much of a value in this func :P It is a variant of tsnep_rx_offset() for the XDP path to prevent unneeded calls of tsnep_xdp_is_enabled(). With this function I keep the RX offset local. But yes, it provides actually no functionality. >> + >> static void tsnep_rx_ring_cleanup(struct tsnep_rx *rx) >> { >> struct device *dmadev = rx->adapter->dmadev; >> @@ -997,6 +1033,67 @@ static int tsnep_rx_refill(struct tsnep_rx *rx, int count, bool reuse) >> return i; >> } >> >> +static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog, >> + struct xdp_buff *xdp, int *status) >> +{ >> + unsigned int length; >> + unsigned int sync; >> + u32 act; >> + >> + length = xdp->data_end - xdp->data_hard_start - tsnep_rx_offset_xdp(); > > could this be xdp->data_end - xdp->data - TSNEP_RX_INLINE_METADATA_SIZE ? xdp->data points to the start of the Ethernet frame after TSNEP_RX_INLINE_METADATA_SIZE, so it would be wrong to substract the metadata which is not there. Actually xdp->data_end - xdp->data + TSNEP_RX_INLINE_METADATA_SIZE would be equivalent TSNEP_RX_INLINE_METADATA_SIZE contains timestamps of received frames. It is written by DMA at the beginning of the RX buffer. So it extends the DMA length and needs to be considered for DMA sync. > Can you tell a bit more about that metadata macro that you have to handle > by yourself all the time? would be good to tell about the impact on > data_meta since you're not configuring it on xdp_prepare_buff(). I will add comments. >> + >> + act = bpf_prog_run_xdp(prog, xdp); >> + >> + /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */ >> + sync = xdp->data_end - xdp->data_hard_start - tsnep_rx_offset_xdp(); >> + sync = max(sync, length); >> + >> + switch (act) { >> + case XDP_PASS: >> + return false; >> + case XDP_TX: >> + if (tsnep_xdp_xmit_back(rx->adapter, xdp) < 0) >> + goto out_failure; >> + *status |= TSNEP_XDP_TX; >> + return true; >> + case XDP_REDIRECT: >> + if (xdp_do_redirect(rx->adapter->netdev, xdp, prog) < 0) >> + goto out_failure; >> + *status |= TSNEP_XDP_REDIRECT; >> + return true; >> + default: >> + bpf_warn_invalid_xdp_action(rx->adapter->netdev, prog, act); >> + fallthrough; >> + case XDP_ABORTED: >> +out_failure: >> + trace_xdp_exception(rx->adapter->netdev, prog, act); >> + fallthrough; >> + case XDP_DROP: >> + page_pool_put_page(rx->page_pool, virt_to_head_page(xdp->data), >> + sync, true); >> + return true; >> + } >> +} >> + >> +static void tsnep_finalize_xdp(struct tsnep_adapter *adapter, int status) >> +{ >> + int cpu = smp_processor_id(); >> + int queue; >> + struct netdev_queue *nq; > > do you care about RCT, or? Do you mean Redundancy Control Trailer (RCT) of PRP? This is new to me. Do I have to take care about it in the driver? There are no plans to use redundancy protocols with this device so far. >> + >> + if (status & TSNEP_XDP_TX) { >> + queue = cpu % adapter->num_tx_queues; >> + nq = netdev_get_tx_queue(adapter->netdev, queue); >> + >> + __netif_tx_lock(nq, cpu); >> + tsnep_xdp_xmit_flush(&adapter->tx[queue]); >> + __netif_tx_unlock(nq); >> + } >> + >> + if (status & TSNEP_XDP_REDIRECT) >> + xdp_do_flush(); >> +} >> +finalize_xdp >> static struct sk_buff *tsnep_build_skb(struct tsnep_rx *rx, struct page *page, >> int length) > > did you consider making tsnep_build_skb() to work on xdp_buff directly? > probably will help you once you'll implement XDP mbuf support here. I saw it in other drivers. I did not consider it, because in my opinion there was no advantage for this driver. Currently xdp_buff is only initialized on demand if BPF program is there. So for me there was no reason to change tsnep_build_skb(). >> { >> @@ -1035,12 +1132,16 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, >> int desc_available; >> int done = 0; >> enum dma_data_direction dma_dir; >> + struct bpf_prog *prog; >> struct tsnep_rx_entry *entry; >> + struct xdp_buff xdp; >> + int xdp_status = 0; >> struct sk_buff *skb; >> int length; >> >> desc_available = tsnep_rx_desc_available(rx); >> dma_dir = page_pool_get_dma_dir(rx->page_pool); >> + prog = READ_ONCE(rx->adapter->xdp_prog); >> >> while (likely(done < budget) && (rx->read != rx->write)) { >> entry = &rx->entry[rx->read]; >> @@ -1084,6 +1185,28 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, >> rx->read = (rx->read + 1) % TSNEP_RING_SIZE; >> desc_available++; >> >> + if (prog) { >> + bool consume; >> + >> + xdp_init_buff(&xdp, PAGE_SIZE, &rx->xdp_rxq); >> + xdp_prepare_buff(&xdp, page_address(entry->page), >> + tsnep_rx_offset_xdp() + TSNEP_RX_INLINE_METADATA_SIZE, >> + length - TSNEP_RX_INLINE_METADATA_SIZE, > > Would it make sense to subtract TSNEP_RX_INLINE_METADATA_SIZE from length > right after dma_sync_single_range_for_cpu? Yes, this could make the code simpler and more readable. I will try it. Thanks for the review! Gerhard
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 2b662a98b62a..d59cb714c8cd 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -27,6 +27,7 @@ #include <linux/phy.h> #include <linux/iopoll.h> #include <linux/bpf.h> +#include <linux/bpf_trace.h> #define TSNEP_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) #define TSNEP_HEADROOM ALIGN(max(TSNEP_SKB_PAD, XDP_PACKET_HEADROOM), 4) @@ -44,6 +45,9 @@ #define TSNEP_COALESCE_USECS_MAX ((ECM_INT_DELAY_MASK >> ECM_INT_DELAY_SHIFT) * \ ECM_INT_DELAY_BASE_US + ECM_INT_DELAY_BASE_US - 1) +#define TSNEP_XDP_TX BIT(0) +#define TSNEP_XDP_REDIRECT BIT(1) + enum { __TSNEP_DOWN, }; @@ -626,6 +630,33 @@ static void tsnep_xdp_xmit_flush(struct tsnep_tx *tx) iowrite32(TSNEP_CONTROL_TX_ENABLE, tx->addr + TSNEP_CONTROL); } +static int tsnep_xdp_xmit_back(struct tsnep_adapter *adapter, + struct xdp_buff *xdp) +{ + struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); + int cpu = smp_processor_id(); + int queue; + struct netdev_queue *nq; + int retval; + + if (unlikely(!xdpf)) + return -EFAULT; + + queue = cpu % adapter->num_tx_queues; + nq = netdev_get_tx_queue(adapter->netdev, queue); + + __netif_tx_lock(nq, cpu); + + /* Avoid transmit queue timeout since we share it with the slow path */ + txq_trans_cond_update(nq); + + retval = tsnep_xdp_xmit_frame_ring(xdpf, &adapter->tx[queue], false); + + __netif_tx_unlock(nq); + + return retval; +} + static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) { unsigned long flags; @@ -792,6 +823,11 @@ static unsigned int tsnep_rx_offset(struct tsnep_rx *rx) return TSNEP_SKB_PAD; } +static unsigned int tsnep_rx_offset_xdp(void) +{ + return XDP_PACKET_HEADROOM; +} + static void tsnep_rx_ring_cleanup(struct tsnep_rx *rx) { struct device *dmadev = rx->adapter->dmadev; @@ -997,6 +1033,67 @@ static int tsnep_rx_refill(struct tsnep_rx *rx, int count, bool reuse) return i; } +static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog, + struct xdp_buff *xdp, int *status) +{ + unsigned int length; + unsigned int sync; + u32 act; + + length = xdp->data_end - xdp->data_hard_start - tsnep_rx_offset_xdp(); + + act = bpf_prog_run_xdp(prog, xdp); + + /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */ + sync = xdp->data_end - xdp->data_hard_start - tsnep_rx_offset_xdp(); + sync = max(sync, length); + + switch (act) { + case XDP_PASS: + return false; + case XDP_TX: + if (tsnep_xdp_xmit_back(rx->adapter, xdp) < 0) + goto out_failure; + *status |= TSNEP_XDP_TX; + return true; + case XDP_REDIRECT: + if (xdp_do_redirect(rx->adapter->netdev, xdp, prog) < 0) + goto out_failure; + *status |= TSNEP_XDP_REDIRECT; + return true; + default: + bpf_warn_invalid_xdp_action(rx->adapter->netdev, prog, act); + fallthrough; + case XDP_ABORTED: +out_failure: + trace_xdp_exception(rx->adapter->netdev, prog, act); + fallthrough; + case XDP_DROP: + page_pool_put_page(rx->page_pool, virt_to_head_page(xdp->data), + sync, true); + return true; + } +} + +static void tsnep_finalize_xdp(struct tsnep_adapter *adapter, int status) +{ + int cpu = smp_processor_id(); + int queue; + struct netdev_queue *nq; + + if (status & TSNEP_XDP_TX) { + queue = cpu % adapter->num_tx_queues; + nq = netdev_get_tx_queue(adapter->netdev, queue); + + __netif_tx_lock(nq, cpu); + tsnep_xdp_xmit_flush(&adapter->tx[queue]); + __netif_tx_unlock(nq); + } + + if (status & TSNEP_XDP_REDIRECT) + xdp_do_flush(); +} + static struct sk_buff *tsnep_build_skb(struct tsnep_rx *rx, struct page *page, int length) { @@ -1035,12 +1132,16 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, int desc_available; int done = 0; enum dma_data_direction dma_dir; + struct bpf_prog *prog; struct tsnep_rx_entry *entry; + struct xdp_buff xdp; + int xdp_status = 0; struct sk_buff *skb; int length; desc_available = tsnep_rx_desc_available(rx); dma_dir = page_pool_get_dma_dir(rx->page_pool); + prog = READ_ONCE(rx->adapter->xdp_prog); while (likely(done < budget) && (rx->read != rx->write)) { entry = &rx->entry[rx->read]; @@ -1084,6 +1185,28 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, rx->read = (rx->read + 1) % TSNEP_RING_SIZE; desc_available++; + if (prog) { + bool consume; + + xdp_init_buff(&xdp, PAGE_SIZE, &rx->xdp_rxq); + xdp_prepare_buff(&xdp, page_address(entry->page), + tsnep_rx_offset_xdp() + TSNEP_RX_INLINE_METADATA_SIZE, + length - TSNEP_RX_INLINE_METADATA_SIZE, + false); + + consume = tsnep_xdp_run_prog(rx, prog, &xdp, + &xdp_status); + if (consume) { + rx->packets++; + rx->bytes += + length - TSNEP_RX_INLINE_METADATA_SIZE; + + entry->page = NULL; + + continue; + } + } + skb = tsnep_build_skb(rx, entry->page, length); if (skb) { page_pool_release_page(rx->page_pool, entry->page); @@ -1102,6 +1225,9 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, entry->page = NULL; } + if (xdp_status) + tsnep_finalize_xdp(rx->adapter, xdp_status); + if (desc_available) tsnep_rx_refill(rx, desc_available, false);
If BPF program is set up, then run BPF program for every received frame and execute the selected action. Test results with A53 1.2GHz: XDP_DROP (samples/bpf/xdp1) proto 17: 883878 pkt/s XDP_TX (samples/bpf/xdp2) proto 17: 255693 pkt/s XDP_REDIRECT (samples/bpf/xdpsock) sock0@eth2:0 rxdrop xdp-drv pps pkts 1.00 rx 855,582 5,404,523 tx 0 0 XDP_REDIRECT (samples/bpf/xdp_redirect) eth2->eth1 613,267 rx/s 0 err,drop/s 613,272 xmit/s Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com> --- drivers/net/ethernet/engleder/tsnep_main.c | 126 +++++++++++++++++++++ 1 file changed, 126 insertions(+)