diff mbox series

[net-next,v4,09/10] tsnep: Add XDP RX support

Message ID 20230109191523.12070-10-gerhard@engleder-embedded.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series tsnep: XDP support | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 5 maintainers not CCed: bpf@vger.kernel.org ast@kernel.org daniel@iogearbox.net john.fastabend@gmail.com hawk@kernel.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: line length of 93 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Gerhard Engleder Jan. 9, 2023, 7:15 p.m. UTC
If BPF program is set up, then run BPF program for every received frame
and execute the selected action.

Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
---
 drivers/net/ethernet/engleder/tsnep_main.c | 122 ++++++++++++++++++++-
 1 file changed, 120 insertions(+), 2 deletions(-)

Comments

Alexander Duyck Jan. 10, 2023, 5:40 p.m. UTC | #1
On Mon, 2023-01-09 at 20:15 +0100, Gerhard Engleder wrote:
> If BPF program is set up, then run BPF program for every received frame
> and execute the selected action.
> 
> Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
> ---
>  drivers/net/ethernet/engleder/tsnep_main.c | 122 ++++++++++++++++++++-
>  1 file changed, 120 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
> index 451ad1849b9d..002c879639db 100644
> --- a/drivers/net/ethernet/engleder/tsnep_main.c
> +++ b/drivers/net/ethernet/engleder/tsnep_main.c
> @@ -27,6 +27,7 @@
>  #include <linux/phy.h>
>  #include <linux/iopoll.h>
>  #include <linux/bpf.h>
> +#include <linux/bpf_trace.h>
>  
>  #define TSNEP_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
>  #define TSNEP_HEADROOM ALIGN(max(TSNEP_SKB_PAD, XDP_PACKET_HEADROOM), 4)
> @@ -44,6 +45,9 @@
>  #define TSNEP_COALESCE_USECS_MAX     ((ECM_INT_DELAY_MASK >> ECM_INT_DELAY_SHIFT) * \
>  				      ECM_INT_DELAY_BASE_US + ECM_INT_DELAY_BASE_US - 1)
>  
> +#define TSNEP_XDP_TX		BIT(0)
> +#define TSNEP_XDP_REDIRECT	BIT(1)
> +
>  enum {
>  	__TSNEP_DOWN,
>  };
> @@ -625,6 +629,28 @@ static void tsnep_xdp_xmit_flush(struct tsnep_tx *tx)
>  	iowrite32(TSNEP_CONTROL_TX_ENABLE, tx->addr + TSNEP_CONTROL);
>  }
>  
> +static bool tsnep_xdp_xmit_back(struct tsnep_adapter *adapter,
> +				struct xdp_buff *xdp,
> +				struct netdev_queue *tx_nq, struct tsnep_tx *tx)
> +{
> +	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
> +	bool xmit;
> +
> +	if (unlikely(!xdpf))
> +		return false;
> +
> +	__netif_tx_lock(tx_nq, smp_processor_id());
> +
> +	/* Avoid transmit queue timeout since we share it with the slow path */
> +	txq_trans_cond_update(tx_nq);
> +
> +	xmit = tsnep_xdp_xmit_frame_ring(xdpf, tx, TSNEP_TX_TYPE_XDP_TX);
> +

Again the trans_cond_update should be after the xmit and only if it is
not indicating it completed the transmit.

> +	__netif_tx_unlock(tx_nq);
> +
> +	return xmit;
> +}
> +
>  static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
>  {
>  	struct tsnep_tx_entry *entry;
> @@ -983,6 +1009,62 @@ static int tsnep_rx_refill(struct tsnep_rx *rx, int count, bool reuse)
>  	return i;
>  }
>  
> +static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog,
> +			       struct xdp_buff *xdp, int *status,
> +			       struct netdev_queue *tx_nq, struct tsnep_tx *tx)
> +{
> +	unsigned int length;
> +	unsigned int sync;
> +	u32 act;
> +
> +	length = xdp->data_end - xdp->data_hard_start - XDP_PACKET_HEADROOM;
> +
> +	act = bpf_prog_run_xdp(prog, xdp);
> +
> +	/* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
> +	sync = xdp->data_end - xdp->data_hard_start - XDP_PACKET_HEADROOM;
> +	sync = max(sync, length);
> +
> +	switch (act) {
> +	case XDP_PASS:
> +		return false;
> +	case XDP_TX:
> +		if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx))
> +			goto out_failure;
> +		*status |= TSNEP_XDP_TX;
> +		return true;
> +	case XDP_REDIRECT:
> +		if (xdp_do_redirect(rx->adapter->netdev, xdp, prog) < 0)
> +			goto out_failure;
> +		*status |= TSNEP_XDP_REDIRECT;
> +		return true;
> +	default:
> +		bpf_warn_invalid_xdp_action(rx->adapter->netdev, prog, act);
> +		fallthrough;
> +	case XDP_ABORTED:
> +out_failure:
> +		trace_xdp_exception(rx->adapter->netdev, prog, act);
> +		fallthrough;
> +	case XDP_DROP:
> +		page_pool_put_page(rx->page_pool, virt_to_head_page(xdp->data),
> +				   sync, true);
> +		return true;
> +	}
> +}
> +
> +static void tsnep_finalize_xdp(struct tsnep_adapter *adapter, int status,
> +			       struct netdev_queue *tx_nq, struct tsnep_tx *tx)
> +{
> +	if (status & TSNEP_XDP_TX) {
> +		__netif_tx_lock(tx_nq, smp_processor_id());
> +		tsnep_xdp_xmit_flush(tx);
> +		__netif_tx_unlock(tx_nq);
> +	}
> +
> +	if (status & TSNEP_XDP_REDIRECT)
> +		xdp_do_flush();
> +}
> +
>  static struct sk_buff *tsnep_build_skb(struct tsnep_rx *rx, struct page *page,
>  				       int length)
>  {
> @@ -1018,15 +1100,29 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
>  			 int budget)
>  {
>  	struct device *dmadev = rx->adapter->dmadev;
> -	int desc_available;
> -	int done = 0;
>  	enum dma_data_direction dma_dir;
>  	struct tsnep_rx_entry *entry;
> +	struct netdev_queue *tx_nq;
> +	struct bpf_prog *prog;
> +	struct xdp_buff xdp;
>  	struct sk_buff *skb;
> +	struct tsnep_tx *tx;
> +	int desc_available;
> +	int xdp_status = 0;
> +	int done = 0;
>  	int length;
>  
>  	desc_available = tsnep_rx_desc_available(rx);
>  	dma_dir = page_pool_get_dma_dir(rx->page_pool);
> +	prog = READ_ONCE(rx->adapter->xdp_prog);
> +	if (prog) {
> +		int queue = smp_processor_id() % rx->adapter->num_tx_queues;
> +

As I mentioned before. Take a look at how this was addressed in
skb_tx_hash. The modulus division is really expensive.

Also does this make sense. I am assuming you have a 1:1 Tx to Rx
mapping for your queues don't you? If so it might make more sense to
use the Tx queue that you clean in this queue pair.

> +		tx_nq = netdev_get_tx_queue(rx->adapter->netdev, queue);
> +		tx = &rx->adapter->tx[queue];
> +
> +		xdp_init_buff(&xdp, PAGE_SIZE, &rx->xdp_rxq);
> +	}
>  
>  	while (likely(done < budget) && (rx->read != rx->write)) {
>  		entry = &rx->entry[rx->read];
> @@ -1076,6 +1172,25 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
>  		rx->read = (rx->read + 1) % TSNEP_RING_SIZE;
>  		desc_available++;
>  
> +		if (prog) {
> +			bool consume;
> +
> +			xdp_prepare_buff(&xdp, page_address(entry->page),
> +					 XDP_PACKET_HEADROOM + TSNEP_RX_INLINE_METADATA_SIZE,
> +					 length, false);
> +
> +			consume = tsnep_xdp_run_prog(rx, prog, &xdp,
> +						     &xdp_status, tx_nq, tx);
> +			if (consume) {
> +				rx->packets++;
> +				rx->bytes += length;
> +
> +				entry->page = NULL;
> +
> +				continue;
> +			}
> +		}
> +
>  		skb = tsnep_build_skb(rx, entry->page, length);
>  		if (skb) {
>  			page_pool_release_page(rx->page_pool, entry->page);
> @@ -1094,6 +1209,9 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
>  		entry->page = NULL;
>  	}
>  
> +	if (xdp_status)
> +		tsnep_finalize_xdp(rx->adapter, xdp_status, tx_nq, tx);
> +
>  	if (desc_available)
>  		tsnep_rx_refill(rx, desc_available, false);
>
Gerhard Engleder Jan. 10, 2023, 9:28 p.m. UTC | #2
On 10.01.23 18:40, Alexander H Duyck wrote:
> On Mon, 2023-01-09 at 20:15 +0100, Gerhard Engleder wrote:
>> If BPF program is set up, then run BPF program for every received frame
>> and execute the selected action.
>>
>> Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
>> ---
>>   drivers/net/ethernet/engleder/tsnep_main.c | 122 ++++++++++++++++++++-
>>   1 file changed, 120 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
>> index 451ad1849b9d..002c879639db 100644
>> --- a/drivers/net/ethernet/engleder/tsnep_main.c
>> +++ b/drivers/net/ethernet/engleder/tsnep_main.c
>> @@ -27,6 +27,7 @@
>>   #include <linux/phy.h>
>>   #include <linux/iopoll.h>
>>   #include <linux/bpf.h>
>> +#include <linux/bpf_trace.h>
>>   
>>   #define TSNEP_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
>>   #define TSNEP_HEADROOM ALIGN(max(TSNEP_SKB_PAD, XDP_PACKET_HEADROOM), 4)
>> @@ -44,6 +45,9 @@
>>   #define TSNEP_COALESCE_USECS_MAX     ((ECM_INT_DELAY_MASK >> ECM_INT_DELAY_SHIFT) * \
>>   				      ECM_INT_DELAY_BASE_US + ECM_INT_DELAY_BASE_US - 1)
>>   
>> +#define TSNEP_XDP_TX		BIT(0)
>> +#define TSNEP_XDP_REDIRECT	BIT(1)
>> +
>>   enum {
>>   	__TSNEP_DOWN,
>>   };
>> @@ -625,6 +629,28 @@ static void tsnep_xdp_xmit_flush(struct tsnep_tx *tx)
>>   	iowrite32(TSNEP_CONTROL_TX_ENABLE, tx->addr + TSNEP_CONTROL);
>>   }
>>   
>> +static bool tsnep_xdp_xmit_back(struct tsnep_adapter *adapter,
>> +				struct xdp_buff *xdp,
>> +				struct netdev_queue *tx_nq, struct tsnep_tx *tx)
>> +{
>> +	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
>> +	bool xmit;
>> +
>> +	if (unlikely(!xdpf))
>> +		return false;
>> +
>> +	__netif_tx_lock(tx_nq, smp_processor_id());
>> +
>> +	/* Avoid transmit queue timeout since we share it with the slow path */
>> +	txq_trans_cond_update(tx_nq);
>> +
>> +	xmit = tsnep_xdp_xmit_frame_ring(xdpf, tx, TSNEP_TX_TYPE_XDP_TX);
>> +
> 
> Again the trans_cond_update should be after the xmit and only if it is
> not indicating it completed the transmit.

tsnep_xdp_xmit_frame_ring() only adds xpdf to the descriptor ring, so it
cannot complete the transmit. Therefore and in line with your previous
comment trans_cond_update() should be called here if xpdf is
successfully placed in the descriptor ring. Is that right?

>> +	__netif_tx_unlock(tx_nq);
>> +
>> +	return xmit;
>> +}
>> +
>>   static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
>>   {
>>   	struct tsnep_tx_entry *entry;
>> @@ -983,6 +1009,62 @@ static int tsnep_rx_refill(struct tsnep_rx *rx, int count, bool reuse)
>>   	return i;
>>   }
>>   
>> +static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog,
>> +			       struct xdp_buff *xdp, int *status,
>> +			       struct netdev_queue *tx_nq, struct tsnep_tx *tx)
>> +{
>> +	unsigned int length;
>> +	unsigned int sync;
>> +	u32 act;
>> +
>> +	length = xdp->data_end - xdp->data_hard_start - XDP_PACKET_HEADROOM;
>> +
>> +	act = bpf_prog_run_xdp(prog, xdp);
>> +
>> +	/* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
>> +	sync = xdp->data_end - xdp->data_hard_start - XDP_PACKET_HEADROOM;
>> +	sync = max(sync, length);
>> +
>> +	switch (act) {
>> +	case XDP_PASS:
>> +		return false;
>> +	case XDP_TX:
>> +		if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx))
>> +			goto out_failure;
>> +		*status |= TSNEP_XDP_TX;
>> +		return true;
>> +	case XDP_REDIRECT:
>> +		if (xdp_do_redirect(rx->adapter->netdev, xdp, prog) < 0)
>> +			goto out_failure;
>> +		*status |= TSNEP_XDP_REDIRECT;
>> +		return true;
>> +	default:
>> +		bpf_warn_invalid_xdp_action(rx->adapter->netdev, prog, act);
>> +		fallthrough;
>> +	case XDP_ABORTED:
>> +out_failure:
>> +		trace_xdp_exception(rx->adapter->netdev, prog, act);
>> +		fallthrough;
>> +	case XDP_DROP:
>> +		page_pool_put_page(rx->page_pool, virt_to_head_page(xdp->data),
>> +				   sync, true);
>> +		return true;
>> +	}
>> +}
>> +
>> +static void tsnep_finalize_xdp(struct tsnep_adapter *adapter, int status,
>> +			       struct netdev_queue *tx_nq, struct tsnep_tx *tx)
>> +{
>> +	if (status & TSNEP_XDP_TX) {
>> +		__netif_tx_lock(tx_nq, smp_processor_id());
>> +		tsnep_xdp_xmit_flush(tx);
>> +		__netif_tx_unlock(tx_nq);
>> +	}
>> +
>> +	if (status & TSNEP_XDP_REDIRECT)
>> +		xdp_do_flush();
>> +}
>> +
>>   static struct sk_buff *tsnep_build_skb(struct tsnep_rx *rx, struct page *page,
>>   				       int length)
>>   {
>> @@ -1018,15 +1100,29 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
>>   			 int budget)
>>   {
>>   	struct device *dmadev = rx->adapter->dmadev;
>> -	int desc_available;
>> -	int done = 0;
>>   	enum dma_data_direction dma_dir;
>>   	struct tsnep_rx_entry *entry;
>> +	struct netdev_queue *tx_nq;
>> +	struct bpf_prog *prog;
>> +	struct xdp_buff xdp;
>>   	struct sk_buff *skb;
>> +	struct tsnep_tx *tx;
>> +	int desc_available;
>> +	int xdp_status = 0;
>> +	int done = 0;
>>   	int length;
>>   
>>   	desc_available = tsnep_rx_desc_available(rx);
>>   	dma_dir = page_pool_get_dma_dir(rx->page_pool);
>> +	prog = READ_ONCE(rx->adapter->xdp_prog);
>> +	if (prog) {
>> +		int queue = smp_processor_id() % rx->adapter->num_tx_queues;
>> +
> 
> As I mentioned before. Take a look at how this was addressed in
> skb_tx_hash. The modulus division is really expensive.
> 
> Also does this make sense. I am assuming you have a 1:1 Tx to Rx
> mapping for your queues don't you? If so it might make more sense to
> use the Tx queue that you clean in this queue pair.

Sounds reasonable. I will work on that.

Gerhard
Alexander Duyck Jan. 10, 2023, 10:30 p.m. UTC | #3
On Tue, Jan 10, 2023 at 1:28 PM Gerhard Engleder
<gerhard@engleder-embedded.com> wrote:
>
> On 10.01.23 18:40, Alexander H Duyck wrote:
> > On Mon, 2023-01-09 at 20:15 +0100, Gerhard Engleder wrote:
> >> If BPF program is set up, then run BPF program for every received frame
> >> and execute the selected action.
> >>
> >> Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
> >> ---
> >>   drivers/net/ethernet/engleder/tsnep_main.c | 122 ++++++++++++++++++++-
> >>   1 file changed, 120 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
> >> index 451ad1849b9d..002c879639db 100644
> >> --- a/drivers/net/ethernet/engleder/tsnep_main.c
> >> +++ b/drivers/net/ethernet/engleder/tsnep_main.c
> >> @@ -27,6 +27,7 @@
> >>   #include <linux/phy.h>
> >>   #include <linux/iopoll.h>
> >>   #include <linux/bpf.h>
> >> +#include <linux/bpf_trace.h>
> >>
> >>   #define TSNEP_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
> >>   #define TSNEP_HEADROOM ALIGN(max(TSNEP_SKB_PAD, XDP_PACKET_HEADROOM), 4)
> >> @@ -44,6 +45,9 @@
> >>   #define TSNEP_COALESCE_USECS_MAX     ((ECM_INT_DELAY_MASK >> ECM_INT_DELAY_SHIFT) * \
> >>                                    ECM_INT_DELAY_BASE_US + ECM_INT_DELAY_BASE_US - 1)
> >>
> >> +#define TSNEP_XDP_TX                BIT(0)
> >> +#define TSNEP_XDP_REDIRECT  BIT(1)
> >> +
> >>   enum {
> >>      __TSNEP_DOWN,
> >>   };
> >> @@ -625,6 +629,28 @@ static void tsnep_xdp_xmit_flush(struct tsnep_tx *tx)
> >>      iowrite32(TSNEP_CONTROL_TX_ENABLE, tx->addr + TSNEP_CONTROL);
> >>   }
> >>
> >> +static bool tsnep_xdp_xmit_back(struct tsnep_adapter *adapter,
> >> +                            struct xdp_buff *xdp,
> >> +                            struct netdev_queue *tx_nq, struct tsnep_tx *tx)
> >> +{
> >> +    struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
> >> +    bool xmit;
> >> +
> >> +    if (unlikely(!xdpf))
> >> +            return false;
> >> +
> >> +    __netif_tx_lock(tx_nq, smp_processor_id());
> >> +
> >> +    /* Avoid transmit queue timeout since we share it with the slow path */
> >> +    txq_trans_cond_update(tx_nq);
> >> +
> >> +    xmit = tsnep_xdp_xmit_frame_ring(xdpf, tx, TSNEP_TX_TYPE_XDP_TX);
> >> +
> >
> > Again the trans_cond_update should be after the xmit and only if it is
> > not indicating it completed the transmit.
>
> tsnep_xdp_xmit_frame_ring() only adds xpdf to the descriptor ring, so it
> cannot complete the transmit. Therefore and in line with your previous
> comment trans_cond_update() should be called here if xpdf is
> successfully placed in the descriptor ring. Is that right?

Yes, that is what I meant by "complete the transmit" is if it places
the xdpf on the descriptor ring then you can update this. Basically
the idea is we should be updating the timer any time a frame goes onto
the ring. It shouldn't be an unconditional update as a stalled ring
could then go undetected.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index 451ad1849b9d..002c879639db 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -27,6 +27,7 @@ 
 #include <linux/phy.h>
 #include <linux/iopoll.h>
 #include <linux/bpf.h>
+#include <linux/bpf_trace.h>
 
 #define TSNEP_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
 #define TSNEP_HEADROOM ALIGN(max(TSNEP_SKB_PAD, XDP_PACKET_HEADROOM), 4)
@@ -44,6 +45,9 @@ 
 #define TSNEP_COALESCE_USECS_MAX     ((ECM_INT_DELAY_MASK >> ECM_INT_DELAY_SHIFT) * \
 				      ECM_INT_DELAY_BASE_US + ECM_INT_DELAY_BASE_US - 1)
 
+#define TSNEP_XDP_TX		BIT(0)
+#define TSNEP_XDP_REDIRECT	BIT(1)
+
 enum {
 	__TSNEP_DOWN,
 };
@@ -625,6 +629,28 @@  static void tsnep_xdp_xmit_flush(struct tsnep_tx *tx)
 	iowrite32(TSNEP_CONTROL_TX_ENABLE, tx->addr + TSNEP_CONTROL);
 }
 
+static bool tsnep_xdp_xmit_back(struct tsnep_adapter *adapter,
+				struct xdp_buff *xdp,
+				struct netdev_queue *tx_nq, struct tsnep_tx *tx)
+{
+	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
+	bool xmit;
+
+	if (unlikely(!xdpf))
+		return false;
+
+	__netif_tx_lock(tx_nq, smp_processor_id());
+
+	/* Avoid transmit queue timeout since we share it with the slow path */
+	txq_trans_cond_update(tx_nq);
+
+	xmit = tsnep_xdp_xmit_frame_ring(xdpf, tx, TSNEP_TX_TYPE_XDP_TX);
+
+	__netif_tx_unlock(tx_nq);
+
+	return xmit;
+}
+
 static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
 {
 	struct tsnep_tx_entry *entry;
@@ -983,6 +1009,62 @@  static int tsnep_rx_refill(struct tsnep_rx *rx, int count, bool reuse)
 	return i;
 }
 
+static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog,
+			       struct xdp_buff *xdp, int *status,
+			       struct netdev_queue *tx_nq, struct tsnep_tx *tx)
+{
+	unsigned int length;
+	unsigned int sync;
+	u32 act;
+
+	length = xdp->data_end - xdp->data_hard_start - XDP_PACKET_HEADROOM;
+
+	act = bpf_prog_run_xdp(prog, xdp);
+
+	/* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
+	sync = xdp->data_end - xdp->data_hard_start - XDP_PACKET_HEADROOM;
+	sync = max(sync, length);
+
+	switch (act) {
+	case XDP_PASS:
+		return false;
+	case XDP_TX:
+		if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx))
+			goto out_failure;
+		*status |= TSNEP_XDP_TX;
+		return true;
+	case XDP_REDIRECT:
+		if (xdp_do_redirect(rx->adapter->netdev, xdp, prog) < 0)
+			goto out_failure;
+		*status |= TSNEP_XDP_REDIRECT;
+		return true;
+	default:
+		bpf_warn_invalid_xdp_action(rx->adapter->netdev, prog, act);
+		fallthrough;
+	case XDP_ABORTED:
+out_failure:
+		trace_xdp_exception(rx->adapter->netdev, prog, act);
+		fallthrough;
+	case XDP_DROP:
+		page_pool_put_page(rx->page_pool, virt_to_head_page(xdp->data),
+				   sync, true);
+		return true;
+	}
+}
+
+static void tsnep_finalize_xdp(struct tsnep_adapter *adapter, int status,
+			       struct netdev_queue *tx_nq, struct tsnep_tx *tx)
+{
+	if (status & TSNEP_XDP_TX) {
+		__netif_tx_lock(tx_nq, smp_processor_id());
+		tsnep_xdp_xmit_flush(tx);
+		__netif_tx_unlock(tx_nq);
+	}
+
+	if (status & TSNEP_XDP_REDIRECT)
+		xdp_do_flush();
+}
+
 static struct sk_buff *tsnep_build_skb(struct tsnep_rx *rx, struct page *page,
 				       int length)
 {
@@ -1018,15 +1100,29 @@  static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
 			 int budget)
 {
 	struct device *dmadev = rx->adapter->dmadev;
-	int desc_available;
-	int done = 0;
 	enum dma_data_direction dma_dir;
 	struct tsnep_rx_entry *entry;
+	struct netdev_queue *tx_nq;
+	struct bpf_prog *prog;
+	struct xdp_buff xdp;
 	struct sk_buff *skb;
+	struct tsnep_tx *tx;
+	int desc_available;
+	int xdp_status = 0;
+	int done = 0;
 	int length;
 
 	desc_available = tsnep_rx_desc_available(rx);
 	dma_dir = page_pool_get_dma_dir(rx->page_pool);
+	prog = READ_ONCE(rx->adapter->xdp_prog);
+	if (prog) {
+		int queue = smp_processor_id() % rx->adapter->num_tx_queues;
+
+		tx_nq = netdev_get_tx_queue(rx->adapter->netdev, queue);
+		tx = &rx->adapter->tx[queue];
+
+		xdp_init_buff(&xdp, PAGE_SIZE, &rx->xdp_rxq);
+	}
 
 	while (likely(done < budget) && (rx->read != rx->write)) {
 		entry = &rx->entry[rx->read];
@@ -1076,6 +1172,25 @@  static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
 		rx->read = (rx->read + 1) % TSNEP_RING_SIZE;
 		desc_available++;
 
+		if (prog) {
+			bool consume;
+
+			xdp_prepare_buff(&xdp, page_address(entry->page),
+					 XDP_PACKET_HEADROOM + TSNEP_RX_INLINE_METADATA_SIZE,
+					 length, false);
+
+			consume = tsnep_xdp_run_prog(rx, prog, &xdp,
+						     &xdp_status, tx_nq, tx);
+			if (consume) {
+				rx->packets++;
+				rx->bytes += length;
+
+				entry->page = NULL;
+
+				continue;
+			}
+		}
+
 		skb = tsnep_build_skb(rx, entry->page, length);
 		if (skb) {
 			page_pool_release_page(rx->page_pool, entry->page);
@@ -1094,6 +1209,9 @@  static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
 		entry->page = NULL;
 	}
 
+	if (xdp_status)
+		tsnep_finalize_xdp(rx->adapter, xdp_status, tx_nq, tx);
+
 	if (desc_available)
 		tsnep_rx_refill(rx, desc_available, false);