diff mbox series

[net-next,v2,6/6] tsnep: Add XDP socket zero-copy TX support

Message ID 20230415144256.27884-7-gerhard@engleder-embedded.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series tsnep: XDP socket zero-copy support | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 18 this patch: 18
netdev/cc_maintainers warning 4 maintainers not CCed: ast@kernel.org hawk@kernel.org daniel@iogearbox.net john.fastabend@gmail.com
netdev/build_clang success Errors and warnings before: 18 this patch: 18
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 18 this patch: 18
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 222 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Gerhard Engleder April 15, 2023, 2:42 p.m. UTC
Send and complete XSK pool frames within TX NAPI context. NAPI context
is triggered by ndo_xsk_wakeup.

Test results with A53 1.2GHz:

xdpsock txonly copy mode:
                   pps            pkts           1.00
tx                 284,409        11,398,144
Two CPUs with 100% and 10% utilization.

xdpsock txonly zero-copy mode:
                   pps            pkts           1.00
tx                 511,929        5,890,368
Two CPUs with 100% and 1% utilization.

Packet rate increases and CPU utilization is reduced.

Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
---
 drivers/net/ethernet/engleder/tsnep.h      |   2 +
 drivers/net/ethernet/engleder/tsnep_main.c | 131 +++++++++++++++++++--
 2 files changed, 123 insertions(+), 10 deletions(-)

Comments

Paolo Abeni April 18, 2023, 8:27 a.m. UTC | #1
On Sat, 2023-04-15 at 16:42 +0200, Gerhard Engleder wrote:
> Send and complete XSK pool frames within TX NAPI context. NAPI context
> is triggered by ndo_xsk_wakeup.
> 
> Test results with A53 1.2GHz:
> 
> xdpsock txonly copy mode:
>                    pps            pkts           1.00
> tx                 284,409        11,398,144
> Two CPUs with 100% and 10% utilization.
> 
> xdpsock txonly zero-copy mode:
>                    pps            pkts           1.00
> tx                 511,929        5,890,368
> Two CPUs with 100% and 1% utilization.
> 
> Packet rate increases and CPU utilization is reduced.
> 
> Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
> ---
>  drivers/net/ethernet/engleder/tsnep.h      |   2 +
>  drivers/net/ethernet/engleder/tsnep_main.c | 131 +++++++++++++++++++--
>  2 files changed, 123 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h
> index d0bea605a1d1..11b29f56aaf9 100644
> --- a/drivers/net/ethernet/engleder/tsnep.h
> +++ b/drivers/net/ethernet/engleder/tsnep.h
> @@ -70,6 +70,7 @@ struct tsnep_tx_entry {
>  	union {
>  		struct sk_buff *skb;
>  		struct xdp_frame *xdpf;
> +		bool zc;
>  	};
>  	size_t len;
>  	DEFINE_DMA_UNMAP_ADDR(dma);
> @@ -88,6 +89,7 @@ struct tsnep_tx {
>  	int read;
>  	u32 owner_counter;
>  	int increment_owner_counter;
> +	struct xsk_buff_pool *xsk_pool;
>  
>  	u32 packets;
>  	u32 bytes;
> diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
> index 13e5d4438082..de51d0cc8935 100644
> --- a/drivers/net/ethernet/engleder/tsnep_main.c
> +++ b/drivers/net/ethernet/engleder/tsnep_main.c
> @@ -54,6 +54,8 @@
>  #define TSNEP_TX_TYPE_SKB_FRAG	BIT(1)
>  #define TSNEP_TX_TYPE_XDP_TX	BIT(2)
>  #define TSNEP_TX_TYPE_XDP_NDO	BIT(3)
> +#define TSNEP_TX_TYPE_XDP	(TSNEP_TX_TYPE_XDP_TX | TSNEP_TX_TYPE_XDP_NDO)
> +#define TSNEP_TX_TYPE_XSK	BIT(4)
>  
>  #define TSNEP_XDP_TX		BIT(0)
>  #define TSNEP_XDP_REDIRECT	BIT(1)
> @@ -322,13 +324,51 @@ static void tsnep_tx_init(struct tsnep_tx *tx)
>  	tx->increment_owner_counter = TSNEP_RING_SIZE - 1;
>  }
>  
> +static void tsnep_tx_enable(struct tsnep_tx *tx)
> +{
> +	struct netdev_queue *nq;
> +
> +	nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index);
> +
> +	local_bh_disable();
> +	__netif_tx_lock(nq, smp_processor_id());

The above 2 statements could be replaced with:

	__netif_tx_lock_bh()

> +	netif_tx_wake_queue(nq);
> +	__netif_tx_unlock(nq);
> +	local_bh_enable();

__netif_tx_unlock_bh()

> +}
> +
> +static void tsnep_tx_disable(struct tsnep_tx *tx, struct napi_struct *napi)
> +{
> +	struct netdev_queue *nq;
> +	u32 val;
> +
> +	nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index);
> +
> +	local_bh_disable();
> +	__netif_tx_lock(nq, smp_processor_id());

Same here.


Thanks!

Paolo
Gerhard Engleder April 18, 2023, 6:25 p.m. UTC | #2
On 18.04.23 10:27, Paolo Abeni wrote:
> On Sat, 2023-04-15 at 16:42 +0200, Gerhard Engleder wrote:
>> Send and complete XSK pool frames within TX NAPI context. NAPI context
>> is triggered by ndo_xsk_wakeup.
>>
>> Test results with A53 1.2GHz:
>>
>> xdpsock txonly copy mode:
>>                     pps            pkts           1.00
>> tx                 284,409        11,398,144
>> Two CPUs with 100% and 10% utilization.
>>
>> xdpsock txonly zero-copy mode:
>>                     pps            pkts           1.00
>> tx                 511,929        5,890,368
>> Two CPUs with 100% and 1% utilization.
>>
>> Packet rate increases and CPU utilization is reduced.
>>
>> Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
>> ---
>>   drivers/net/ethernet/engleder/tsnep.h      |   2 +
>>   drivers/net/ethernet/engleder/tsnep_main.c | 131 +++++++++++++++++++--
>>   2 files changed, 123 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h
>> index d0bea605a1d1..11b29f56aaf9 100644
>> --- a/drivers/net/ethernet/engleder/tsnep.h
>> +++ b/drivers/net/ethernet/engleder/tsnep.h
>> @@ -70,6 +70,7 @@ struct tsnep_tx_entry {
>>   	union {
>>   		struct sk_buff *skb;
>>   		struct xdp_frame *xdpf;
>> +		bool zc;
>>   	};
>>   	size_t len;
>>   	DEFINE_DMA_UNMAP_ADDR(dma);
>> @@ -88,6 +89,7 @@ struct tsnep_tx {
>>   	int read;
>>   	u32 owner_counter;
>>   	int increment_owner_counter;
>> +	struct xsk_buff_pool *xsk_pool;
>>   
>>   	u32 packets;
>>   	u32 bytes;
>> diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
>> index 13e5d4438082..de51d0cc8935 100644
>> --- a/drivers/net/ethernet/engleder/tsnep_main.c
>> +++ b/drivers/net/ethernet/engleder/tsnep_main.c
>> @@ -54,6 +54,8 @@
>>   #define TSNEP_TX_TYPE_SKB_FRAG	BIT(1)
>>   #define TSNEP_TX_TYPE_XDP_TX	BIT(2)
>>   #define TSNEP_TX_TYPE_XDP_NDO	BIT(3)
>> +#define TSNEP_TX_TYPE_XDP	(TSNEP_TX_TYPE_XDP_TX | TSNEP_TX_TYPE_XDP_NDO)
>> +#define TSNEP_TX_TYPE_XSK	BIT(4)
>>   
>>   #define TSNEP_XDP_TX		BIT(0)
>>   #define TSNEP_XDP_REDIRECT	BIT(1)
>> @@ -322,13 +324,51 @@ static void tsnep_tx_init(struct tsnep_tx *tx)
>>   	tx->increment_owner_counter = TSNEP_RING_SIZE - 1;
>>   }
>>   
>> +static void tsnep_tx_enable(struct tsnep_tx *tx)
>> +{
>> +	struct netdev_queue *nq;
>> +
>> +	nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index);
>> +
>> +	local_bh_disable();
>> +	__netif_tx_lock(nq, smp_processor_id());
> 
> The above 2 statements could be replaced with:
> 
> 	__netif_tx_lock_bh()
> 
>> +	netif_tx_wake_queue(nq);
>> +	__netif_tx_unlock(nq);
>> +	local_bh_enable();
> 
> __netif_tx_unlock_bh()
> 
>> +}
>> +
>> +static void tsnep_tx_disable(struct tsnep_tx *tx, struct napi_struct *napi)
>> +{
>> +	struct netdev_queue *nq;
>> +	u32 val;
>> +
>> +	nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index);
>> +
>> +	local_bh_disable();
>> +	__netif_tx_lock(nq, smp_processor_id());
> 
> Same here.

Will be done.

Thank you!

Gerhard
diff mbox series

Patch

diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h
index d0bea605a1d1..11b29f56aaf9 100644
--- a/drivers/net/ethernet/engleder/tsnep.h
+++ b/drivers/net/ethernet/engleder/tsnep.h
@@ -70,6 +70,7 @@  struct tsnep_tx_entry {
 	union {
 		struct sk_buff *skb;
 		struct xdp_frame *xdpf;
+		bool zc;
 	};
 	size_t len;
 	DEFINE_DMA_UNMAP_ADDR(dma);
@@ -88,6 +89,7 @@  struct tsnep_tx {
 	int read;
 	u32 owner_counter;
 	int increment_owner_counter;
+	struct xsk_buff_pool *xsk_pool;
 
 	u32 packets;
 	u32 bytes;
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index 13e5d4438082..de51d0cc8935 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -54,6 +54,8 @@ 
 #define TSNEP_TX_TYPE_SKB_FRAG	BIT(1)
 #define TSNEP_TX_TYPE_XDP_TX	BIT(2)
 #define TSNEP_TX_TYPE_XDP_NDO	BIT(3)
+#define TSNEP_TX_TYPE_XDP	(TSNEP_TX_TYPE_XDP_TX | TSNEP_TX_TYPE_XDP_NDO)
+#define TSNEP_TX_TYPE_XSK	BIT(4)
 
 #define TSNEP_XDP_TX		BIT(0)
 #define TSNEP_XDP_REDIRECT	BIT(1)
@@ -322,13 +324,51 @@  static void tsnep_tx_init(struct tsnep_tx *tx)
 	tx->increment_owner_counter = TSNEP_RING_SIZE - 1;
 }
 
+static void tsnep_tx_enable(struct tsnep_tx *tx)
+{
+	struct netdev_queue *nq;
+
+	nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index);
+
+	local_bh_disable();
+	__netif_tx_lock(nq, smp_processor_id());
+	netif_tx_wake_queue(nq);
+	__netif_tx_unlock(nq);
+	local_bh_enable();
+}
+
+static void tsnep_tx_disable(struct tsnep_tx *tx, struct napi_struct *napi)
+{
+	struct netdev_queue *nq;
+	u32 val;
+
+	nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index);
+
+	local_bh_disable();
+	__netif_tx_lock(nq, smp_processor_id());
+	netif_tx_stop_queue(nq);
+	__netif_tx_unlock(nq);
+	local_bh_enable();
+
+	/* wait until TX is done in hardware */
+	readx_poll_timeout(ioread32, tx->addr + TSNEP_CONTROL, val,
+			   ((val & TSNEP_CONTROL_TX_ENABLE) == 0), 10000,
+			   1000000);
+
+	/* wait until TX is also done in software */
+	while (READ_ONCE(tx->read) != tx->write) {
+		napi_schedule(napi);
+		napi_synchronize(napi);
+	}
+}
+
 static void tsnep_tx_activate(struct tsnep_tx *tx, int index, int length,
 			      bool last)
 {
 	struct tsnep_tx_entry *entry = &tx->entry[index];
 
 	entry->properties = 0;
-	/* xdpf is union with skb */
+	/* xdpf and zc are union with skb */
 	if (entry->skb) {
 		entry->properties = length & TSNEP_DESC_LENGTH_MASK;
 		entry->properties |= TSNEP_DESC_INTERRUPT_FLAG;
@@ -646,10 +686,69 @@  static bool tsnep_xdp_xmit_back(struct tsnep_adapter *adapter,
 	return xmit;
 }
 
+static int tsnep_xdp_tx_map_zc(struct xdp_desc *xdpd, struct tsnep_tx *tx)
+{
+	struct tsnep_tx_entry *entry;
+	dma_addr_t dma;
+
+	entry = &tx->entry[tx->write];
+	entry->zc = true;
+
+	dma = xsk_buff_raw_get_dma(tx->xsk_pool, xdpd->addr);
+	xsk_buff_raw_dma_sync_for_device(tx->xsk_pool, dma, xdpd->len);
+
+	entry->type = TSNEP_TX_TYPE_XSK;
+	entry->len = xdpd->len;
+
+	entry->desc->tx = __cpu_to_le64(dma);
+
+	return xdpd->len;
+}
+
+static void tsnep_xdp_xmit_frame_ring_zc(struct xdp_desc *xdpd,
+					 struct tsnep_tx *tx)
+{
+	int length;
+
+	length = tsnep_xdp_tx_map_zc(xdpd, tx);
+
+	tsnep_tx_activate(tx, tx->write, length, true);
+	tx->write = (tx->write + 1) & TSNEP_RING_MASK;
+}
+
+static void tsnep_xdp_xmit_zc(struct tsnep_tx *tx)
+{
+	int desc_available = tsnep_tx_desc_available(tx);
+	struct xdp_desc *descs = tx->xsk_pool->tx_descs;
+	int batch, i;
+
+	/* ensure that TX ring is not filled up by XDP, always MAX_SKB_FRAGS
+	 * will be available for normal TX path and queue is stopped there if
+	 * necessary
+	 */
+	if (desc_available <= (MAX_SKB_FRAGS + 1))
+		return;
+	desc_available -= MAX_SKB_FRAGS + 1;
+
+	batch = xsk_tx_peek_release_desc_batch(tx->xsk_pool, desc_available);
+	for (i = 0; i < batch; i++)
+		tsnep_xdp_xmit_frame_ring_zc(&descs[i], tx);
+
+	if (batch) {
+		/* descriptor properties shall be valid before hardware is
+		 * notified
+		 */
+		dma_wmb();
+
+		tsnep_xdp_xmit_flush(tx);
+	}
+}
+
 static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
 {
 	struct tsnep_tx_entry *entry;
 	struct netdev_queue *nq;
+	int xsk_frames = 0;
 	int budget = 128;
 	int length;
 	int count;
@@ -676,7 +775,7 @@  static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
 		if ((entry->type & TSNEP_TX_TYPE_SKB) &&
 		    skb_shinfo(entry->skb)->nr_frags > 0)
 			count += skb_shinfo(entry->skb)->nr_frags;
-		else if (!(entry->type & TSNEP_TX_TYPE_SKB) &&
+		else if ((entry->type & TSNEP_TX_TYPE_XDP) &&
 			 xdp_frame_has_frags(entry->xdpf))
 			count += xdp_get_shared_info_from_frame(entry->xdpf)->nr_frags;
 
@@ -705,9 +804,11 @@  static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
 
 		if (entry->type & TSNEP_TX_TYPE_SKB)
 			napi_consume_skb(entry->skb, napi_budget);
-		else
+		else if (entry->type & TSNEP_TX_TYPE_XDP)
 			xdp_return_frame_rx_napi(entry->xdpf);
-		/* xdpf is union with skb */
+		else
+			xsk_frames++;
+		/* xdpf and zc are union with skb */
 		entry->skb = NULL;
 
 		tx->read = (tx->read + count) & TSNEP_RING_MASK;
@@ -718,6 +819,14 @@  static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
 		budget--;
 	} while (likely(budget));
 
+	if (tx->xsk_pool) {
+		if (xsk_frames)
+			xsk_tx_completed(tx->xsk_pool, xsk_frames);
+		if (xsk_uses_need_wakeup(tx->xsk_pool))
+			xsk_set_tx_need_wakeup(tx->xsk_pool);
+		tsnep_xdp_xmit_zc(tx);
+	}
+
 	if ((tsnep_tx_desc_available(tx) >= ((MAX_SKB_FRAGS + 1) * 2)) &&
 	    netif_tx_queue_stopped(nq)) {
 		netif_tx_wake_queue(nq);
@@ -765,12 +874,6 @@  static int tsnep_tx_open(struct tsnep_tx *tx)
 
 static void tsnep_tx_close(struct tsnep_tx *tx)
 {
-	u32 val;
-
-	readx_poll_timeout(ioread32, tx->addr + TSNEP_CONTROL, val,
-			   ((val & TSNEP_CONTROL_TX_ENABLE) == 0), 10000,
-			   1000000);
-
 	tsnep_tx_ring_cleanup(tx);
 }
 
@@ -1783,12 +1886,18 @@  static void tsnep_queue_enable(struct tsnep_queue *queue)
 	napi_enable(&queue->napi);
 	tsnep_enable_irq(queue->adapter, queue->irq_mask);
 
+	if (queue->tx)
+		tsnep_tx_enable(queue->tx);
+
 	if (queue->rx)
 		tsnep_rx_enable(queue->rx);
 }
 
 static void tsnep_queue_disable(struct tsnep_queue *queue)
 {
+	if (queue->tx)
+		tsnep_tx_disable(queue->tx, &queue->napi);
+
 	napi_disable(&queue->napi);
 	tsnep_disable_irq(queue->adapter, queue->irq_mask);
 
@@ -1905,6 +2014,7 @@  int tsnep_enable_xsk(struct tsnep_queue *queue, struct xsk_buff_pool *pool)
 	if (running)
 		tsnep_queue_disable(queue);
 
+	queue->tx->xsk_pool = pool;
 	queue->rx->xsk_pool = pool;
 
 	if (running) {
@@ -1925,6 +2035,7 @@  void tsnep_disable_xsk(struct tsnep_queue *queue)
 	tsnep_rx_free_zc(queue->rx);
 
 	queue->rx->xsk_pool = NULL;
+	queue->tx->xsk_pool = NULL;
 
 	if (running) {
 		tsnep_rx_reopen(queue->rx);