Message ID | 20230415144256.27884-7-gerhard@engleder-embedded.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | tsnep: XDP socket zero-copy support | expand |
On Sat, 2023-04-15 at 16:42 +0200, Gerhard Engleder wrote: > Send and complete XSK pool frames within TX NAPI context. NAPI context > is triggered by ndo_xsk_wakeup. > > Test results with A53 1.2GHz: > > xdpsock txonly copy mode: > pps pkts 1.00 > tx 284,409 11,398,144 > Two CPUs with 100% and 10% utilization. > > xdpsock txonly zero-copy mode: > pps pkts 1.00 > tx 511,929 5,890,368 > Two CPUs with 100% and 1% utilization. > > Packet rate increases and CPU utilization is reduced. > > Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com> > --- > drivers/net/ethernet/engleder/tsnep.h | 2 + > drivers/net/ethernet/engleder/tsnep_main.c | 131 +++++++++++++++++++-- > 2 files changed, 123 insertions(+), 10 deletions(-) > > diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h > index d0bea605a1d1..11b29f56aaf9 100644 > --- a/drivers/net/ethernet/engleder/tsnep.h > +++ b/drivers/net/ethernet/engleder/tsnep.h > @@ -70,6 +70,7 @@ struct tsnep_tx_entry { > union { > struct sk_buff *skb; > struct xdp_frame *xdpf; > + bool zc; > }; > size_t len; > DEFINE_DMA_UNMAP_ADDR(dma); > @@ -88,6 +89,7 @@ struct tsnep_tx { > int read; > u32 owner_counter; > int increment_owner_counter; > + struct xsk_buff_pool *xsk_pool; > > u32 packets; > u32 bytes; > diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c > index 13e5d4438082..de51d0cc8935 100644 > --- a/drivers/net/ethernet/engleder/tsnep_main.c > +++ b/drivers/net/ethernet/engleder/tsnep_main.c > @@ -54,6 +54,8 @@ > #define TSNEP_TX_TYPE_SKB_FRAG BIT(1) > #define TSNEP_TX_TYPE_XDP_TX BIT(2) > #define TSNEP_TX_TYPE_XDP_NDO BIT(3) > +#define TSNEP_TX_TYPE_XDP (TSNEP_TX_TYPE_XDP_TX | TSNEP_TX_TYPE_XDP_NDO) > +#define TSNEP_TX_TYPE_XSK BIT(4) > > #define TSNEP_XDP_TX BIT(0) > #define TSNEP_XDP_REDIRECT BIT(1) > @@ -322,13 +324,51 @@ static void tsnep_tx_init(struct tsnep_tx *tx) > tx->increment_owner_counter = TSNEP_RING_SIZE - 1; > } > > +static void tsnep_tx_enable(struct tsnep_tx *tx) > +{ > + struct netdev_queue *nq; > + > + nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index); > + > + local_bh_disable(); > + __netif_tx_lock(nq, smp_processor_id()); The above 2 statements could be replaced with: __netif_tx_lock_bh() > + netif_tx_wake_queue(nq); > + __netif_tx_unlock(nq); > + local_bh_enable(); __netif_tx_unlock_bh() > +} > + > +static void tsnep_tx_disable(struct tsnep_tx *tx, struct napi_struct *napi) > +{ > + struct netdev_queue *nq; > + u32 val; > + > + nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index); > + > + local_bh_disable(); > + __netif_tx_lock(nq, smp_processor_id()); Same here. Thanks! Paolo
On 18.04.23 10:27, Paolo Abeni wrote: > On Sat, 2023-04-15 at 16:42 +0200, Gerhard Engleder wrote: >> Send and complete XSK pool frames within TX NAPI context. NAPI context >> is triggered by ndo_xsk_wakeup. >> >> Test results with A53 1.2GHz: >> >> xdpsock txonly copy mode: >> pps pkts 1.00 >> tx 284,409 11,398,144 >> Two CPUs with 100% and 10% utilization. >> >> xdpsock txonly zero-copy mode: >> pps pkts 1.00 >> tx 511,929 5,890,368 >> Two CPUs with 100% and 1% utilization. >> >> Packet rate increases and CPU utilization is reduced. >> >> Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com> >> --- >> drivers/net/ethernet/engleder/tsnep.h | 2 + >> drivers/net/ethernet/engleder/tsnep_main.c | 131 +++++++++++++++++++-- >> 2 files changed, 123 insertions(+), 10 deletions(-) >> >> diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h >> index d0bea605a1d1..11b29f56aaf9 100644 >> --- a/drivers/net/ethernet/engleder/tsnep.h >> +++ b/drivers/net/ethernet/engleder/tsnep.h >> @@ -70,6 +70,7 @@ struct tsnep_tx_entry { >> union { >> struct sk_buff *skb; >> struct xdp_frame *xdpf; >> + bool zc; >> }; >> size_t len; >> DEFINE_DMA_UNMAP_ADDR(dma); >> @@ -88,6 +89,7 @@ struct tsnep_tx { >> int read; >> u32 owner_counter; >> int increment_owner_counter; >> + struct xsk_buff_pool *xsk_pool; >> >> u32 packets; >> u32 bytes; >> diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c >> index 13e5d4438082..de51d0cc8935 100644 >> --- a/drivers/net/ethernet/engleder/tsnep_main.c >> +++ b/drivers/net/ethernet/engleder/tsnep_main.c >> @@ -54,6 +54,8 @@ >> #define TSNEP_TX_TYPE_SKB_FRAG BIT(1) >> #define TSNEP_TX_TYPE_XDP_TX BIT(2) >> #define TSNEP_TX_TYPE_XDP_NDO BIT(3) >> +#define TSNEP_TX_TYPE_XDP (TSNEP_TX_TYPE_XDP_TX | TSNEP_TX_TYPE_XDP_NDO) >> +#define TSNEP_TX_TYPE_XSK BIT(4) >> >> #define TSNEP_XDP_TX BIT(0) >> #define TSNEP_XDP_REDIRECT BIT(1) >> @@ -322,13 +324,51 @@ static void tsnep_tx_init(struct tsnep_tx *tx) >> tx->increment_owner_counter = TSNEP_RING_SIZE - 1; >> } >> >> +static void tsnep_tx_enable(struct tsnep_tx *tx) >> +{ >> + struct netdev_queue *nq; >> + >> + nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index); >> + >> + local_bh_disable(); >> + __netif_tx_lock(nq, smp_processor_id()); > > The above 2 statements could be replaced with: > > __netif_tx_lock_bh() > >> + netif_tx_wake_queue(nq); >> + __netif_tx_unlock(nq); >> + local_bh_enable(); > > __netif_tx_unlock_bh() > >> +} >> + >> +static void tsnep_tx_disable(struct tsnep_tx *tx, struct napi_struct *napi) >> +{ >> + struct netdev_queue *nq; >> + u32 val; >> + >> + nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index); >> + >> + local_bh_disable(); >> + __netif_tx_lock(nq, smp_processor_id()); > > Same here. Will be done. Thank you! Gerhard
diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h index d0bea605a1d1..11b29f56aaf9 100644 --- a/drivers/net/ethernet/engleder/tsnep.h +++ b/drivers/net/ethernet/engleder/tsnep.h @@ -70,6 +70,7 @@ struct tsnep_tx_entry { union { struct sk_buff *skb; struct xdp_frame *xdpf; + bool zc; }; size_t len; DEFINE_DMA_UNMAP_ADDR(dma); @@ -88,6 +89,7 @@ struct tsnep_tx { int read; u32 owner_counter; int increment_owner_counter; + struct xsk_buff_pool *xsk_pool; u32 packets; u32 bytes; diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 13e5d4438082..de51d0cc8935 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -54,6 +54,8 @@ #define TSNEP_TX_TYPE_SKB_FRAG BIT(1) #define TSNEP_TX_TYPE_XDP_TX BIT(2) #define TSNEP_TX_TYPE_XDP_NDO BIT(3) +#define TSNEP_TX_TYPE_XDP (TSNEP_TX_TYPE_XDP_TX | TSNEP_TX_TYPE_XDP_NDO) +#define TSNEP_TX_TYPE_XSK BIT(4) #define TSNEP_XDP_TX BIT(0) #define TSNEP_XDP_REDIRECT BIT(1) @@ -322,13 +324,51 @@ static void tsnep_tx_init(struct tsnep_tx *tx) tx->increment_owner_counter = TSNEP_RING_SIZE - 1; } +static void tsnep_tx_enable(struct tsnep_tx *tx) +{ + struct netdev_queue *nq; + + nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index); + + local_bh_disable(); + __netif_tx_lock(nq, smp_processor_id()); + netif_tx_wake_queue(nq); + __netif_tx_unlock(nq); + local_bh_enable(); +} + +static void tsnep_tx_disable(struct tsnep_tx *tx, struct napi_struct *napi) +{ + struct netdev_queue *nq; + u32 val; + + nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index); + + local_bh_disable(); + __netif_tx_lock(nq, smp_processor_id()); + netif_tx_stop_queue(nq); + __netif_tx_unlock(nq); + local_bh_enable(); + + /* wait until TX is done in hardware */ + readx_poll_timeout(ioread32, tx->addr + TSNEP_CONTROL, val, + ((val & TSNEP_CONTROL_TX_ENABLE) == 0), 10000, + 1000000); + + /* wait until TX is also done in software */ + while (READ_ONCE(tx->read) != tx->write) { + napi_schedule(napi); + napi_synchronize(napi); + } +} + static void tsnep_tx_activate(struct tsnep_tx *tx, int index, int length, bool last) { struct tsnep_tx_entry *entry = &tx->entry[index]; entry->properties = 0; - /* xdpf is union with skb */ + /* xdpf and zc are union with skb */ if (entry->skb) { entry->properties = length & TSNEP_DESC_LENGTH_MASK; entry->properties |= TSNEP_DESC_INTERRUPT_FLAG; @@ -646,10 +686,69 @@ static bool tsnep_xdp_xmit_back(struct tsnep_adapter *adapter, return xmit; } +static int tsnep_xdp_tx_map_zc(struct xdp_desc *xdpd, struct tsnep_tx *tx) +{ + struct tsnep_tx_entry *entry; + dma_addr_t dma; + + entry = &tx->entry[tx->write]; + entry->zc = true; + + dma = xsk_buff_raw_get_dma(tx->xsk_pool, xdpd->addr); + xsk_buff_raw_dma_sync_for_device(tx->xsk_pool, dma, xdpd->len); + + entry->type = TSNEP_TX_TYPE_XSK; + entry->len = xdpd->len; + + entry->desc->tx = __cpu_to_le64(dma); + + return xdpd->len; +} + +static void tsnep_xdp_xmit_frame_ring_zc(struct xdp_desc *xdpd, + struct tsnep_tx *tx) +{ + int length; + + length = tsnep_xdp_tx_map_zc(xdpd, tx); + + tsnep_tx_activate(tx, tx->write, length, true); + tx->write = (tx->write + 1) & TSNEP_RING_MASK; +} + +static void tsnep_xdp_xmit_zc(struct tsnep_tx *tx) +{ + int desc_available = tsnep_tx_desc_available(tx); + struct xdp_desc *descs = tx->xsk_pool->tx_descs; + int batch, i; + + /* ensure that TX ring is not filled up by XDP, always MAX_SKB_FRAGS + * will be available for normal TX path and queue is stopped there if + * necessary + */ + if (desc_available <= (MAX_SKB_FRAGS + 1)) + return; + desc_available -= MAX_SKB_FRAGS + 1; + + batch = xsk_tx_peek_release_desc_batch(tx->xsk_pool, desc_available); + for (i = 0; i < batch; i++) + tsnep_xdp_xmit_frame_ring_zc(&descs[i], tx); + + if (batch) { + /* descriptor properties shall be valid before hardware is + * notified + */ + dma_wmb(); + + tsnep_xdp_xmit_flush(tx); + } +} + static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) { struct tsnep_tx_entry *entry; struct netdev_queue *nq; + int xsk_frames = 0; int budget = 128; int length; int count; @@ -676,7 +775,7 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) if ((entry->type & TSNEP_TX_TYPE_SKB) && skb_shinfo(entry->skb)->nr_frags > 0) count += skb_shinfo(entry->skb)->nr_frags; - else if (!(entry->type & TSNEP_TX_TYPE_SKB) && + else if ((entry->type & TSNEP_TX_TYPE_XDP) && xdp_frame_has_frags(entry->xdpf)) count += xdp_get_shared_info_from_frame(entry->xdpf)->nr_frags; @@ -705,9 +804,11 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) if (entry->type & TSNEP_TX_TYPE_SKB) napi_consume_skb(entry->skb, napi_budget); - else + else if (entry->type & TSNEP_TX_TYPE_XDP) xdp_return_frame_rx_napi(entry->xdpf); - /* xdpf is union with skb */ + else + xsk_frames++; + /* xdpf and zc are union with skb */ entry->skb = NULL; tx->read = (tx->read + count) & TSNEP_RING_MASK; @@ -718,6 +819,14 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) budget--; } while (likely(budget)); + if (tx->xsk_pool) { + if (xsk_frames) + xsk_tx_completed(tx->xsk_pool, xsk_frames); + if (xsk_uses_need_wakeup(tx->xsk_pool)) + xsk_set_tx_need_wakeup(tx->xsk_pool); + tsnep_xdp_xmit_zc(tx); + } + if ((tsnep_tx_desc_available(tx) >= ((MAX_SKB_FRAGS + 1) * 2)) && netif_tx_queue_stopped(nq)) { netif_tx_wake_queue(nq); @@ -765,12 +874,6 @@ static int tsnep_tx_open(struct tsnep_tx *tx) static void tsnep_tx_close(struct tsnep_tx *tx) { - u32 val; - - readx_poll_timeout(ioread32, tx->addr + TSNEP_CONTROL, val, - ((val & TSNEP_CONTROL_TX_ENABLE) == 0), 10000, - 1000000); - tsnep_tx_ring_cleanup(tx); } @@ -1783,12 +1886,18 @@ static void tsnep_queue_enable(struct tsnep_queue *queue) napi_enable(&queue->napi); tsnep_enable_irq(queue->adapter, queue->irq_mask); + if (queue->tx) + tsnep_tx_enable(queue->tx); + if (queue->rx) tsnep_rx_enable(queue->rx); } static void tsnep_queue_disable(struct tsnep_queue *queue) { + if (queue->tx) + tsnep_tx_disable(queue->tx, &queue->napi); + napi_disable(&queue->napi); tsnep_disable_irq(queue->adapter, queue->irq_mask); @@ -1905,6 +2014,7 @@ int tsnep_enable_xsk(struct tsnep_queue *queue, struct xsk_buff_pool *pool) if (running) tsnep_queue_disable(queue); + queue->tx->xsk_pool = pool; queue->rx->xsk_pool = pool; if (running) { @@ -1925,6 +2035,7 @@ void tsnep_disable_xsk(struct tsnep_queue *queue) tsnep_rx_free_zc(queue->rx); queue->rx->xsk_pool = NULL; + queue->tx->xsk_pool = NULL; if (running) { tsnep_rx_reopen(queue->rx);
Send and complete XSK pool frames within TX NAPI context. NAPI context is triggered by ndo_xsk_wakeup. Test results with A53 1.2GHz: xdpsock txonly copy mode: pps pkts 1.00 tx 284,409 11,398,144 Two CPUs with 100% and 10% utilization. xdpsock txonly zero-copy mode: pps pkts 1.00 tx 511,929 5,890,368 Two CPUs with 100% and 1% utilization. Packet rate increases and CPU utilization is reduced. Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com> --- drivers/net/ethernet/engleder/tsnep.h | 2 + drivers/net/ethernet/engleder/tsnep_main.c | 131 +++++++++++++++++++-- 2 files changed, 123 insertions(+), 10 deletions(-)