diff mbox series

[iwl-next,v4,3/4] igb: add AF_XDP zero-copy Rx support

Message ID 20230804084051.14194-4-sriram.yagnaraman@est.tech (mailing list archive)
State Awaiting Upstream
Delegated to: Netdev Maintainers
Headers show
Series igb: Add support for AF_XDP zero-copy | expand

Checks

Context Check Description
netdev/series_format warning Target tree name not specified in the subject
netdev/tree_selection success Guessed tree name to be net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1328 this patch: 1328
netdev/cc_maintainers warning 1 maintainers not CCed: richardcochran@gmail.com
netdev/build_clang success Errors and warnings before: 1351 this patch: 1351
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1351 this patch: 1351
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 482 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Sriram Yagnaraman Aug. 4, 2023, 8:40 a.m. UTC
Add support for AF_XDP zero-copy receive path.

When AF_XDP zero-copy is enabled, the rx buffers are allocated from the
xsk buff pool using igb_alloc_rx_buffers_zc.

Use xsk_pool_get_rx_frame_size to set SRRCTL rx buf size when zero-copy
is enabled.

Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
---
 drivers/net/ethernet/intel/igb/igb.h      |   4 +
 drivers/net/ethernet/intel/igb/igb_main.c |  92 ++++++--
 drivers/net/ethernet/intel/igb/igb_xsk.c  | 262 +++++++++++++++++++++-
 3 files changed, 336 insertions(+), 22 deletions(-)

Comments

kernel test robot Aug. 5, 2023, 2:54 p.m. UTC | #1
Hi Sriram,

kernel test robot noticed the following build warnings:

[auto build test WARNING on tnguy-next-queue/dev-queue]

url:    https://github.com/intel-lab-lkp/linux/commits/Sriram-Yagnaraman/igb-prepare-for-AF_XDP-zero-copy-support/20230804-164354
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue.git dev-queue
patch link:    https://lore.kernel.org/r/20230804084051.14194-4-sriram.yagnaraman%40est.tech
patch subject: [Intel-wired-lan] [PATCH iwl-next v4 3/4] igb: add AF_XDP zero-copy Rx support
config: i386-debian-10.3 (https://download.01.org/0day-ci/archive/20230805/202308052204.SQxxKnNI-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce: (https://download.01.org/0day-ci/archive/20230805/202308052204.SQxxKnNI-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202308052204.SQxxKnNI-lkp@intel.com/

All warnings (new ones prefixed by >>):

   drivers/net/ethernet/intel/igb/igb_main.c: In function 'igb_dump':
>> drivers/net/ethernet/intel/igb/igb_main.c:505:42: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
     505 |                         dma_addr_t dma = (dma_addr_t)NULL;
         |                                          ^


vim +505 drivers/net/ethernet/intel/igb/igb_main.c

   348	
   349	/* igb_dump - Print registers, Tx-rings and Rx-rings */
   350	static void igb_dump(struct igb_adapter *adapter)
   351	{
   352		struct net_device *netdev = adapter->netdev;
   353		struct e1000_hw *hw = &adapter->hw;
   354		struct igb_reg_info *reginfo;
   355		struct igb_ring *tx_ring;
   356		union e1000_adv_tx_desc *tx_desc;
   357		struct my_u0 { __le64 a; __le64 b; } *u0;
   358		struct igb_ring *rx_ring;
   359		union e1000_adv_rx_desc *rx_desc;
   360		u32 staterr;
   361		u16 i, n;
   362	
   363		if (!netif_msg_hw(adapter))
   364			return;
   365	
   366		/* Print netdevice Info */
   367		if (netdev) {
   368			dev_info(&adapter->pdev->dev, "Net device Info\n");
   369			pr_info("Device Name     state            trans_start\n");
   370			pr_info("%-15s %016lX %016lX\n", netdev->name,
   371				netdev->state, dev_trans_start(netdev));
   372		}
   373	
   374		/* Print Registers */
   375		dev_info(&adapter->pdev->dev, "Register Dump\n");
   376		pr_info(" Register Name   Value\n");
   377		for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
   378		     reginfo->name; reginfo++) {
   379			igb_regdump(hw, reginfo);
   380		}
   381	
   382		/* Print TX Ring Summary */
   383		if (!netdev || !netif_running(netdev))
   384			goto exit;
   385	
   386		dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
   387		pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
   388		for (n = 0; n < adapter->num_tx_queues; n++) {
   389			struct igb_tx_buffer *buffer_info;
   390			tx_ring = adapter->tx_ring[n];
   391			buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
   392			pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
   393				n, tx_ring->next_to_use, tx_ring->next_to_clean,
   394				(u64)dma_unmap_addr(buffer_info, dma),
   395				dma_unmap_len(buffer_info, len),
   396				buffer_info->next_to_watch,
   397				(u64)buffer_info->time_stamp);
   398		}
   399	
   400		/* Print TX Rings */
   401		if (!netif_msg_tx_done(adapter))
   402			goto rx_ring_summary;
   403	
   404		dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
   405	
   406		/* Transmit Descriptor Formats
   407		 *
   408		 * Advanced Transmit Descriptor
   409		 *   +--------------------------------------------------------------+
   410		 * 0 |         Buffer Address [63:0]                                |
   411		 *   +--------------------------------------------------------------+
   412		 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
   413		 *   +--------------------------------------------------------------+
   414		 *   63      46 45    40 39 38 36 35 32 31   24             15       0
   415		 */
   416	
   417		for (n = 0; n < adapter->num_tx_queues; n++) {
   418			tx_ring = adapter->tx_ring[n];
   419			pr_info("------------------------------------\n");
   420			pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
   421			pr_info("------------------------------------\n");
   422			pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] [bi->dma       ] leng  ntw timestamp        bi->skb\n");
   423	
   424			for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
   425				const char *next_desc;
   426				struct igb_tx_buffer *buffer_info;
   427				tx_desc = IGB_TX_DESC(tx_ring, i);
   428				buffer_info = &tx_ring->tx_buffer_info[i];
   429				u0 = (struct my_u0 *)tx_desc;
   430				if (i == tx_ring->next_to_use &&
   431				    i == tx_ring->next_to_clean)
   432					next_desc = " NTC/U";
   433				else if (i == tx_ring->next_to_use)
   434					next_desc = " NTU";
   435				else if (i == tx_ring->next_to_clean)
   436					next_desc = " NTC";
   437				else
   438					next_desc = "";
   439	
   440				pr_info("T [0x%03X]    %016llX %016llX %016llX %04X  %p %016llX %p%s\n",
   441					i, le64_to_cpu(u0->a),
   442					le64_to_cpu(u0->b),
   443					(u64)dma_unmap_addr(buffer_info, dma),
   444					dma_unmap_len(buffer_info, len),
   445					buffer_info->next_to_watch,
   446					(u64)buffer_info->time_stamp,
   447					buffer_info->skb, next_desc);
   448	
   449				if (netif_msg_pktdata(adapter) && buffer_info->skb)
   450					print_hex_dump(KERN_INFO, "",
   451						DUMP_PREFIX_ADDRESS,
   452						16, 1, buffer_info->skb->data,
   453						dma_unmap_len(buffer_info, len),
   454						true);
   455			}
   456		}
   457	
   458		/* Print RX Rings Summary */
   459	rx_ring_summary:
   460		dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
   461		pr_info("Queue [NTU] [NTC]\n");
   462		for (n = 0; n < adapter->num_rx_queues; n++) {
   463			rx_ring = adapter->rx_ring[n];
   464			pr_info(" %5d %5X %5X\n",
   465				n, rx_ring->next_to_use, rx_ring->next_to_clean);
   466		}
   467	
   468		/* Print RX Rings */
   469		if (!netif_msg_rx_status(adapter))
   470			goto exit;
   471	
   472		dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
   473	
   474		/* Advanced Receive Descriptor (Read) Format
   475		 *    63                                           1        0
   476		 *    +-----------------------------------------------------+
   477		 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
   478		 *    +----------------------------------------------+------+
   479		 *  8 |       Header Buffer Address [63:1]           |  DD  |
   480		 *    +-----------------------------------------------------+
   481		 *
   482		 *
   483		 * Advanced Receive Descriptor (Write-Back) Format
   484		 *
   485		 *   63       48 47    32 31  30      21 20 17 16   4 3     0
   486		 *   +------------------------------------------------------+
   487		 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
   488		 *   | Checksum   Ident  |   |           |    | Type | Type |
   489		 *   +------------------------------------------------------+
   490		 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
   491		 *   +------------------------------------------------------+
   492		 *   63       48 47    32 31            20 19               0
   493		 */
   494	
   495		for (n = 0; n < adapter->num_rx_queues; n++) {
   496			rx_ring = adapter->rx_ring[n];
   497			pr_info("------------------------------------\n");
   498			pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
   499			pr_info("------------------------------------\n");
   500			pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] [bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
   501			pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] ---------------- [bi->skb] <-- Adv Rx Write-Back format\n");
   502	
   503			for (i = 0; i < rx_ring->count; i++) {
   504				const char *next_desc;
 > 505				dma_addr_t dma = (dma_addr_t)NULL;
   506				struct igb_rx_buffer *buffer_info = NULL;
   507				rx_desc = IGB_RX_DESC(rx_ring, i);
   508				u0 = (struct my_u0 *)rx_desc;
   509				staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
   510	
   511				if (!rx_ring->xsk_pool) {
   512					buffer_info = &rx_ring->rx_buffer_info[i];
   513					dma = buffer_info->dma;
   514				}
   515	
   516				if (i == rx_ring->next_to_use)
   517					next_desc = " NTU";
   518				else if (i == rx_ring->next_to_clean)
   519					next_desc = " NTC";
   520				else
   521					next_desc = "";
   522	
   523				if (staterr & E1000_RXD_STAT_DD) {
   524					/* Descriptor Done */
   525					pr_info("%s[0x%03X]     %016llX %016llX ---------------- %s\n",
   526						"RWB", i,
   527						le64_to_cpu(u0->a),
   528						le64_to_cpu(u0->b),
   529						next_desc);
   530				} else {
   531					pr_info("%s[0x%03X]     %016llX %016llX %016llX %s\n",
   532						"R  ", i,
   533						le64_to_cpu(u0->a),
   534						le64_to_cpu(u0->b),
   535						(u64)dma,
   536						next_desc);
   537	
   538					if (netif_msg_pktdata(adapter) &&
   539					    buffer_info && dma && buffer_info->page) {
   540						print_hex_dump(KERN_INFO, "",
   541						  DUMP_PREFIX_ADDRESS,
   542						  16, 1,
   543						  page_address(buffer_info->page) +
   544							      buffer_info->page_offset,
   545						  igb_rx_bufsz(rx_ring), true);
   546					}
   547				}
   548			}
   549		}
   550	
   551	exit:
   552		return;
   553	}
   554
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 1af1a0423fba..39202c40116a 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -87,6 +87,7 @@  struct igb_adapter;
 #define IGB_XDP_CONSUMED	BIT(0)
 #define IGB_XDP_TX		BIT(1)
 #define IGB_XDP_REDIR		BIT(2)
+#define IGB_XDP_EXIT		BIT(3)
 
 struct vf_data_storage {
 	unsigned char vf_mac_addresses[ETH_ALEN];
@@ -832,6 +833,9 @@  struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter,
 int igb_xsk_pool_setup(struct igb_adapter *adapter,
 		       struct xsk_buff_pool *pool,
 		       u16 qid);
+bool igb_alloc_rx_buffers_zc(struct igb_ring *rx_ring, u16 count);
+void igb_clean_rx_ring_zc(struct igb_ring *rx_ring);
+int igb_clean_rx_irq_zc(struct igb_q_vector *q_vector, const int budget);
 int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
 
 #endif /* _IGB_H_ */
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 2c1e1d70bcf9..b13cc94ac178 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -502,12 +502,17 @@  static void igb_dump(struct igb_adapter *adapter)
 
 		for (i = 0; i < rx_ring->count; i++) {
 			const char *next_desc;
-			struct igb_rx_buffer *buffer_info;
-			buffer_info = &rx_ring->rx_buffer_info[i];
+			dma_addr_t dma = (dma_addr_t)NULL;
+			struct igb_rx_buffer *buffer_info = NULL;
 			rx_desc = IGB_RX_DESC(rx_ring, i);
 			u0 = (struct my_u0 *)rx_desc;
 			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
 
+			if (!rx_ring->xsk_pool) {
+				buffer_info = &rx_ring->rx_buffer_info[i];
+				dma = buffer_info->dma;
+			}
+
 			if (i == rx_ring->next_to_use)
 				next_desc = " NTU";
 			else if (i == rx_ring->next_to_clean)
@@ -527,11 +532,11 @@  static void igb_dump(struct igb_adapter *adapter)
 					"R  ", i,
 					le64_to_cpu(u0->a),
 					le64_to_cpu(u0->b),
-					(u64)buffer_info->dma,
+					(u64)dma,
 					next_desc);
 
 				if (netif_msg_pktdata(adapter) &&
-				    buffer_info->dma && buffer_info->page) {
+				    buffer_info && dma && buffer_info->page) {
 					print_hex_dump(KERN_INFO, "",
 					  DUMP_PREFIX_ADDRESS,
 					  16, 1,
@@ -2011,7 +2016,10 @@  static void igb_configure(struct igb_adapter *adapter)
 	 */
 	for (i = 0; i < adapter->num_rx_queues; i++) {
 		struct igb_ring *ring = adapter->rx_ring[i];
-		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
+		if (ring->xsk_pool)
+			igb_alloc_rx_buffers_zc(ring, igb_desc_unused(ring));
+		else
+			igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
 	}
 }
 
@@ -3360,7 +3368,8 @@  static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	netdev->priv_flags |= IFF_SUPP_NOFCS;
 
 	netdev->priv_flags |= IFF_UNICAST_FLT;
-	netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT;
+	netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+			       NETDEV_XDP_ACT_XSK_ZEROCOPY;
 
 	/* MTU range: 68 - 9216 */
 	netdev->min_mtu = ETH_MIN_MTU;
@@ -4740,12 +4749,17 @@  void igb_setup_srrctl(struct igb_adapter *adapter, struct igb_ring *ring)
 	struct e1000_hw *hw = &adapter->hw;
 	int reg_idx = ring->reg_idx;
 	u32 srrctl = 0;
+	u32 buf_size;
 
-	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
-	if (ring_uses_large_buffer(ring))
-		srrctl |= IGB_RXBUFFER_3072 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+	if (ring->xsk_pool)
+		buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool);
+	else if (ring_uses_large_buffer(ring))
+		buf_size = IGB_RXBUFFER_3072;
 	else
-		srrctl |= IGB_RXBUFFER_2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+		buf_size = IGB_RXBUFFER_2048;
+
+	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
+	srrctl |= buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT;
 	srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
 	if (hw->mac.type >= e1000_82580)
 		srrctl |= E1000_SRRCTL_TIMESTAMP;
@@ -4777,9 +4791,17 @@  void igb_configure_rx_ring(struct igb_adapter *adapter,
 	u32 rxdctl = 0;
 
 	xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
-	WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
-					   MEM_TYPE_PAGE_SHARED, NULL));
 	ring->xsk_pool = igb_xsk_pool(adapter, ring);
+	if (ring->xsk_pool) {
+		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+						   MEM_TYPE_XSK_BUFF_POOL,
+						   NULL));
+		xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
+	} else {
+		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+						   MEM_TYPE_PAGE_SHARED,
+						   NULL));
+	}
 
 	/* disable the queue */
 	wr32(E1000_RXDCTL(reg_idx), 0);
@@ -4806,9 +4828,12 @@  void igb_configure_rx_ring(struct igb_adapter *adapter,
 	rxdctl |= IGB_RX_HTHRESH << 8;
 	rxdctl |= IGB_RX_WTHRESH << 16;
 
-	/* initialize rx_buffer_info */
-	memset(ring->rx_buffer_info, 0,
-	       sizeof(struct igb_rx_buffer) * ring->count);
+	if (ring->xsk_pool)
+		memset(ring->rx_buffer_info_zc, 0,
+		       sizeof(*ring->rx_buffer_info_zc) * ring->count);
+	else
+		memset(ring->rx_buffer_info, 0,
+		       sizeof(*ring->rx_buffer_info) * ring->count);
 
 	/* initialize Rx descriptor 0 */
 	rx_desc = IGB_RX_DESC(ring, 0);
@@ -4992,8 +5017,13 @@  void igb_free_rx_resources(struct igb_ring *rx_ring)
 
 	rx_ring->xdp_prog = NULL;
 	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
-	vfree(rx_ring->rx_buffer_info);
-	rx_ring->rx_buffer_info = NULL;
+	if (rx_ring->xsk_pool) {
+		vfree(rx_ring->rx_buffer_info_zc);
+		rx_ring->rx_buffer_info_zc = NULL;
+	} else {
+		vfree(rx_ring->rx_buffer_info);
+		rx_ring->rx_buffer_info = NULL;
+	}
 
 	/* if not set, then don't free */
 	if (!rx_ring->desc)
@@ -5031,6 +5061,11 @@  void igb_clean_rx_ring(struct igb_ring *rx_ring)
 	dev_kfree_skb(rx_ring->skb);
 	rx_ring->skb = NULL;
 
+	if (rx_ring->xsk_pool) {
+		igb_clean_rx_ring_zc(rx_ring);
+		goto skip_for_xsk;
+	}
+
 	/* Free all the Rx ring sk_buffs */
 	while (i != rx_ring->next_to_alloc) {
 		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
@@ -5058,6 +5093,7 @@  void igb_clean_rx_ring(struct igb_ring *rx_ring)
 			i = 0;
 	}
 
+skip_for_xsk:
 	rx_ring->next_to_alloc = 0;
 	rx_ring->next_to_clean = 0;
 	rx_ring->next_to_use = 0;
@@ -8226,7 +8262,9 @@  static int igb_poll(struct napi_struct *napi, int budget)
 		clean_complete = igb_clean_tx_irq(q_vector, budget);
 
 	if (q_vector->rx.ring) {
-		int cleaned = igb_clean_rx_irq(q_vector, budget);
+		int cleaned = q_vector->rx.ring->xsk_pool ?
+			igb_clean_rx_irq_zc(q_vector, budget) :
+			igb_clean_rx_irq(q_vector, budget);
 
 		work_done += cleaned;
 		if (cleaned >= budget)
@@ -8634,8 +8672,15 @@  struct sk_buff *igb_run_xdp(struct igb_adapter *adapter,
 		break;
 	case XDP_REDIRECT:
 		err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
-		if (err)
+		if (err) {
+			if (rx_ring->xsk_pool &&
+			    xsk_uses_need_wakeup(rx_ring->xsk_pool) &&
+			    err == -ENOBUFS)
+				result = IGB_XDP_EXIT;
+			else
+				result = IGB_XDP_CONSUMED;
 			goto out_failure;
+		}
 		result = IGB_XDP_REDIR;
 		break;
 	default:
@@ -8892,12 +8937,14 @@  static void igb_put_rx_buffer(struct igb_ring *rx_ring,
 
 static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
 {
+	unsigned int total_bytes = 0, total_packets = 0;
 	struct igb_adapter *adapter = q_vector->adapter;
 	struct igb_ring *rx_ring = q_vector->rx.ring;
-	struct sk_buff *skb = rx_ring->skb;
-	unsigned int total_bytes = 0, total_packets = 0;
 	u16 cleaned_count = igb_desc_unused(rx_ring);
+	struct sk_buff *skb = rx_ring->skb;
+	int cpu = smp_processor_id();
 	unsigned int xdp_xmit = 0;
+	struct netdev_queue *nq;
 	struct xdp_buff xdp;
 	u32 frame_sz = 0;
 	int rx_buf_pgcnt;
@@ -9025,7 +9072,10 @@  static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
 	if (xdp_xmit & IGB_XDP_TX) {
 		struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter);
 
+		nq = txring_txq(tx_ring);
+		__netif_tx_lock(nq, cpu);
 		igb_xdp_ring_update_tail(tx_ring);
+		__netif_tx_unlock(nq);
 	}
 
 	u64_stats_update_begin(&rx_ring->rx_syncp);
diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c b/drivers/net/ethernet/intel/igb/igb_xsk.c
index 925bf97f7caa..5e0244772914 100644
--- a/drivers/net/ethernet/intel/igb/igb_xsk.c
+++ b/drivers/net/ethernet/intel/igb/igb_xsk.c
@@ -66,7 +66,10 @@  static void igb_txrx_ring_enable(struct igb_adapter *adapter, u16 qid)
 	 * at least 1 descriptor unused to make sure
 	 * next_to_use != next_to_clean
 	 */
-	igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring));
+	if (rx_ring->xsk_pool)
+		igb_alloc_rx_buffers_zc(rx_ring, igb_desc_unused(rx_ring));
+	else
+		igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring));
 
 	/* Rx/Tx share the same napi context. */
 	napi_enable(&rx_ring->q_vector->napi);
@@ -172,6 +175,263 @@  int igb_xsk_pool_setup(struct igb_adapter *adapter,
 		igb_xsk_pool_disable(adapter, qid);
 }
 
+static u16 igb_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
+			     union e1000_adv_rx_desc *rx_desc, u16 count)
+{
+	dma_addr_t dma;
+	u16 buffs;
+	int i;
+
+	/* nothing to do */
+	if (!count)
+		return 0;
+
+	buffs = xsk_buff_alloc_batch(pool, xdp, count);
+	for (i = 0; i < buffs; i++) {
+		dma = xsk_buff_xdp_get_dma(*xdp);
+		rx_desc->read.pkt_addr = cpu_to_le64(dma);
+		rx_desc->wb.upper.length = 0;
+
+		rx_desc++;
+		xdp++;
+	}
+
+	return buffs;
+}
+
+bool igb_alloc_rx_buffers_zc(struct igb_ring *rx_ring, u16 count)
+{
+	u32 nb_buffs_extra = 0, nb_buffs = 0;
+	union e1000_adv_rx_desc *rx_desc;
+	u16 ntu = rx_ring->next_to_use;
+	u16 total_count = count;
+	struct xdp_buff **xdp;
+
+	rx_desc = IGB_RX_DESC(rx_ring, ntu);
+	xdp = &rx_ring->rx_buffer_info_zc[ntu];
+
+	if (ntu + count >= rx_ring->count) {
+		nb_buffs_extra = igb_fill_rx_descs(rx_ring->xsk_pool, xdp,
+						   rx_desc,
+						   rx_ring->count - ntu);
+		if (nb_buffs_extra != rx_ring->count - ntu) {
+			ntu += nb_buffs_extra;
+			goto exit;
+		}
+		rx_desc = IGB_RX_DESC(rx_ring, 0);
+		xdp = rx_ring->rx_buffer_info_zc;
+		ntu = 0;
+		count -= nb_buffs_extra;
+	}
+
+	nb_buffs = igb_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count);
+	ntu += nb_buffs;
+	if (ntu == rx_ring->count)
+		ntu = 0;
+
+	/* clear the length for the next_to_use descriptor */
+	rx_desc = IGB_RX_DESC(rx_ring, ntu);
+	rx_desc->wb.upper.length = 0;
+
+exit:
+	if (rx_ring->next_to_use != ntu) {
+		rx_ring->next_to_use = ntu;
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		writel(ntu, rx_ring->tail);
+	}
+
+	return total_count == (nb_buffs + nb_buffs_extra);
+}
+
+void igb_clean_rx_ring_zc(struct igb_ring *rx_ring)
+{
+	u16 ntc = rx_ring->next_to_clean;
+	u16 ntu = rx_ring->next_to_use;
+
+	while (ntc != ntu) {
+		struct xdp_buff *xdp = rx_ring->rx_buffer_info_zc[ntc];
+
+		xsk_buff_free(xdp);
+		ntc++;
+		if (ntc >= rx_ring->count)
+			ntc = 0;
+	}
+}
+
+static struct sk_buff *igb_construct_skb_zc(struct igb_ring *rx_ring,
+					    struct xdp_buff *xdp,
+					    ktime_t timestamp)
+{
+	unsigned int totalsize = xdp->data_end - xdp->data_meta;
+	unsigned int metasize = xdp->data - xdp->data_meta;
+	struct sk_buff *skb;
+
+	net_prefetch(xdp->data_meta);
+
+	/* allocate a skb to store the frags */
+	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
+			       GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		return NULL;
+
+	if (timestamp)
+		skb_hwtstamps(skb)->hwtstamp = timestamp;
+
+	memcpy(__skb_put(skb, totalsize), xdp->data_meta,
+	       ALIGN(totalsize, sizeof(long)));
+
+	if (metasize) {
+		skb_metadata_set(skb, metasize);
+		__skb_pull(skb, metasize);
+	}
+
+	return skb;
+}
+
+static void igb_update_ntc(struct igb_ring *rx_ring)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	/* fetch, update, and store next to clean */
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+
+	prefetch(IGB_RX_DESC(rx_ring, ntc));
+}
+
+int igb_clean_rx_irq_zc(struct igb_q_vector *q_vector, const int budget)
+{
+	struct igb_adapter *adapter = q_vector->adapter;
+	unsigned int total_bytes = 0, total_packets = 0;
+	struct igb_ring *rx_ring = q_vector->rx.ring;
+	int cpu = smp_processor_id();
+	unsigned int xdp_xmit = 0;
+	struct netdev_queue *nq;
+	bool failure = false;
+	u16 entries_to_alloc;
+	struct sk_buff *skb;
+
+	while (likely(total_packets < budget)) {
+		union e1000_adv_rx_desc *rx_desc;
+		struct xdp_buff *xdp;
+		ktime_t timestamp = 0;
+		unsigned int size;
+
+		rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean);
+		size = le16_to_cpu(rx_desc->wb.upper.length);
+		if (!size)
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * descriptor has been written back
+		 */
+		dma_rmb();
+
+		xdp = rx_ring->rx_buffer_info_zc[rx_ring->next_to_clean];
+		xsk_buff_set_size(xdp, size);
+		xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool);
+
+		/* pull rx packet timestamp if available and valid */
+		if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
+			int ts_hdr_len;
+
+			ts_hdr_len = igb_ptp_rx_pktstamp(rx_ring->q_vector,
+							 xdp->data,
+							 &timestamp);
+
+			xdp->data += ts_hdr_len;
+			xdp->data_meta += ts_hdr_len;
+			size -= ts_hdr_len;
+		}
+
+		skb = igb_run_xdp(adapter, rx_ring, xdp);
+
+		if (IS_ERR(skb)) {
+			unsigned int xdp_res = -PTR_ERR(skb);
+
+			if (likely(xdp_res & (IGB_XDP_TX | IGB_XDP_REDIR))) {
+				xdp_xmit |= xdp_res;
+			} else if (xdp_res == IGB_XDP_EXIT) {
+				failure = true;
+				break;
+			} else if (xdp_res == IGB_XDP_CONSUMED) {
+				xsk_buff_free(xdp);
+			}
+
+			total_packets++;
+			total_bytes += size;
+
+			igb_update_ntc(rx_ring);
+			continue;
+		}
+
+		skb = igb_construct_skb_zc(rx_ring, xdp, timestamp);
+
+		/* exit if we failed to retrieve a buffer */
+		if (!skb) {
+			rx_ring->rx_stats.alloc_failed++;
+			break;
+		}
+
+		xsk_buff_free(xdp);
+		igb_update_ntc(rx_ring);
+
+		if (eth_skb_pad(skb))
+			continue;
+
+		/* probably a little skewed due to removing CRC */
+		total_bytes += skb->len;
+
+		/* populate checksum, timestamp, VLAN, and protocol */
+		igb_process_skb_fields(rx_ring, rx_desc, skb);
+
+		napi_gro_receive(&q_vector->napi, skb);
+
+		/* update budget accounting */
+		total_packets++;
+	}
+
+	if (xdp_xmit & IGB_XDP_REDIR)
+		xdp_do_flush();
+
+	if (xdp_xmit & IGB_XDP_TX) {
+		struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter);
+
+		nq = txring_txq(tx_ring);
+		__netif_tx_lock(nq, cpu);
+		igb_xdp_ring_update_tail(tx_ring);
+		__netif_tx_unlock(nq);
+	}
+
+	u64_stats_update_begin(&rx_ring->rx_syncp);
+	rx_ring->rx_stats.packets += total_packets;
+	rx_ring->rx_stats.bytes += total_bytes;
+	u64_stats_update_end(&rx_ring->rx_syncp);
+	q_vector->rx.total_packets += total_packets;
+	q_vector->rx.total_bytes += total_bytes;
+
+	entries_to_alloc = igb_desc_unused(rx_ring);
+	if (entries_to_alloc >= IGB_RX_BUFFER_WRITE)
+		failure |= !igb_alloc_rx_buffers_zc(rx_ring, entries_to_alloc);
+
+	if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
+		if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
+			xsk_set_rx_need_wakeup(rx_ring->xsk_pool);
+		else
+			xsk_clear_rx_need_wakeup(rx_ring->xsk_pool);
+
+		return (int)total_packets;
+	}
+	return failure ? budget : (int)total_packets;
+}
+
 int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
 {
 	struct igb_adapter *adapter = netdev_priv(dev);