Message ID | 1697570274-798170-4-git-send-email-radhey.shyam.pandey@amd.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: axienet: Introduce dmaengine | expand |
On Wed, 2023-10-18 at 00:47 +0530, Radhey Shyam Pandey wrote: [...] > @@ -727,6 +746,122 @@ static inline int axienet_check_tx_bd_space(struct axienet_local *lp, > return 0; > } > > +/** > + * axienet_dma_tx_cb - DMA engine callback for TX channel. > + * @data: Pointer to the axienet_local structure. > + * @result: error reporting through dmaengine_result. > + * This function is called by dmaengine driver for TX channel to notify > + * that the transmit is done. > + */ > +static void axienet_dma_tx_cb(void *data, const struct dmaengine_result *result) > +{ > + struct axienet_local *lp = data; > + struct skbuf_dma_descriptor *skbuf_dma; Minor nit: please use the reverse x-mas tree order > + > + skbuf_dma = axienet_get_tx_desc(lp, lp->tx_ring_tail++); > + u64_stats_update_begin(&lp->tx_stat_sync); > + u64_stats_add(&lp->tx_bytes, skbuf_dma->skb->len); > + u64_stats_add(&lp->tx_packets, 1); > + u64_stats_update_end(&lp->tx_stat_sync); > + dma_unmap_sg(lp->dev, skbuf_dma->sgl, skbuf_dma->sg_len, DMA_TO_DEVICE); > + dev_consume_skb_any(skbuf_dma->skb); > + if (CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX) > MAX_SKB_FRAGS + 1) > + netif_wake_queue(lp->ndev); > +} > + > +/** > + * axienet_start_xmit_dmaengine - Starts the transmission. > + * @skb: sk_buff pointer that contains data to be Txed. > + * @ndev: Pointer to net_device structure. > + * > + * Return: NETDEV_TX_OK on success or any non space errors. > + * NETDEV_TX_BUSY when free element in TX skb ring buffer > + * is not available. > + * > + * This function is invoked to initiate transmission. The > + * function sets the skbs, register dma callback API and submit > + * the dma transaction. > + * Additionally if checksum offloading is supported, > + * it populates AXI Stream Control fields with appropriate values. > + */ > +static netdev_tx_t > +axienet_start_xmit_dmaengine(struct sk_buff *skb, struct net_device *ndev) > +{ > + struct dma_async_tx_descriptor *dma_tx_desc = NULL; > + struct axienet_local *lp = netdev_priv(ndev); > + u32 app_metadata[DMA_NUM_APP_WORDS] = {0}; > + struct skbuf_dma_descriptor *skbuf_dma; > + struct dma_device *dma_dev; > + u32 csum_start_off; > + u32 csum_index_off; > + int sg_len; > + int ret; > + > + dma_dev = lp->tx_chan->device; > + sg_len = skb_shinfo(skb)->nr_frags + 1; > + if (CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX) <= sg_len) { > + netif_stop_queue(ndev); > + if (net_ratelimit()) > + netdev_warn(ndev, "TX ring unexpectedly full\n"); > + return NETDEV_TX_BUSY; > + } > + > + skbuf_dma = axienet_get_tx_desc(lp, lp->tx_ring_head); > + if (!skbuf_dma) { > + dev_kfree_skb_any(skb); > + return NETDEV_TX_OK; You can avoid some duplicate code with: goto drop_skb; and adding at the bottom of this function: drop_skb: dev_kfree_skb_any(skb); return NETDEV_TX_OK; > + } > + > + lp->tx_ring_head++; > + sg_init_table(skbuf_dma->sgl, sg_len); > + ret = skb_to_sgvec(skb, skbuf_dma->sgl, 0, skb->len); > + if (ret < 0) { > + dev_kfree_skb_any(skb); > + return NETDEV_TX_OK; Same here and below. > + } > + > + ret = dma_map_sg(lp->dev, skbuf_dma->sgl, sg_len, DMA_TO_DEVICE); > + if (!ret) { > + dev_kfree_skb_any(skb); > + return NETDEV_TX_OK; > + } > + > + /* Fill up app fields for checksum */ > + if (skb->ip_summed == CHECKSUM_PARTIAL) { > + if (lp->features & XAE_FEATURE_FULL_TX_CSUM) { > + /* Tx Full Checksum Offload Enabled */ > + app_metadata[0] |= 2; > + } else if (lp->features & XAE_FEATURE_PARTIAL_TX_CSUM) { > + csum_start_off = skb_transport_offset(skb); > + csum_index_off = csum_start_off + skb->csum_offset; > + /* Tx Partial Checksum Offload Enabled */ > + app_metadata[0] |= 1; > + app_metadata[1] = (csum_start_off << 16) | csum_index_off; > + } > + } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { > + app_metadata[0] |= 2; /* Tx Full Checksum Offload Enabled */ > + } > + > + dma_tx_desc = dma_dev->device_prep_slave_sg(lp->tx_chan, skbuf_dma->sgl, > + sg_len, DMA_MEM_TO_DEV, > + DMA_PREP_INTERRUPT, (void *)app_metadata); > + if (!dma_tx_desc) > + goto xmit_error_unmap_sg; Are you leaking an skb here? You forgot to add the netif_txq_maybe_stop() call, as suggested by Jakub in the previous revision. > + > + skbuf_dma->skb = skb; > + skbuf_dma->sg_len = sg_len; > + dma_tx_desc->callback_param = lp; > + dma_tx_desc->callback_result = axienet_dma_tx_cb; > + dmaengine_submit(dma_tx_desc); > + dma_async_issue_pending(lp->tx_chan); > + > + return NETDEV_TX_OK; > + > +xmit_error_unmap_sg: > + dma_unmap_sg(lp->dev, skbuf_dma->sgl, sg_len, DMA_TO_DEVICE); If you need to drop the skb (as I suspect), you can reuse the drop_skb label here: drop_skb: dev_kfree_skb_any(skb); > + return NETDEV_TX_OK; > +} > + > /** > * axienet_tx_poll - Invoked once a transmit is completed by the > * Axi DMA Tx channel. > @@ -893,6 +1028,42 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) > return NETDEV_TX_OK; > } > > +/** > + * axienet_dma_rx_cb - DMA engine callback for RX channel. > + * @data: Pointer to the skbuf_dma_descriptor structure. > + * @result: error reporting through dmaengine_result. > + * This function is called by dmaengine driver for RX channel to notify > + * that the packet is received. > + */ > +static void axienet_dma_rx_cb(void *data, const struct dmaengine_result *result) > +{ > + struct axienet_local *lp = data; > + struct skbuf_dma_descriptor *skbuf_dma; > + size_t meta_len, meta_max_len, rx_len; > + struct sk_buff *skb; > + u32 *app_metadata; Minor nit: please respect the reverse x-mas tree order > + > + skbuf_dma = axienet_get_rx_desc(lp, lp->rx_ring_tail++); > + skb = skbuf_dma->skb; > + app_metadata = dmaengine_desc_get_metadata_ptr(skbuf_dma->desc, &meta_len, > + &meta_max_len); > + dma_unmap_single(lp->dev, skbuf_dma->dma_address, lp->max_frm_size, > + DMA_FROM_DEVICE); > + /* TODO: Derive app word index programmatically */ > + rx_len = (app_metadata[LEN_APP] & 0xFFFF); > + skb_put(skb, rx_len); > + skb->protocol = eth_type_trans(skb, lp->ndev); > + skb->ip_summed = CHECKSUM_NONE; > + > + __netif_rx(skb); It's a pity you can't leverage NAPI here. I think that could be doable as a follow-up, but I'm unsure if that would fit the DMA engine model: in this callback you could cache the ready dma index (a single range should suffice) and schedule the napi instance. The actual dma processing will be done in napi poll. Another possible follow-up could be introducing a "bulk" RX callback in the DMA engine, to mitigate the indirect call overhead on a burst of RX DMA completion - assuming the DMA engine actually generates such burst. > + u64_stats_update_begin(&lp->rx_stat_sync); > + u64_stats_add(&lp->rx_packets, 1); > + u64_stats_add(&lp->rx_bytes, rx_len); > + u64_stats_update_end(&lp->rx_stat_sync); > + axienet_rx_submit_desc(lp->ndev); > + dma_async_issue_pending(lp->rx_chan); > +} > + > /** > * axienet_rx_poll - Triggered by RX ISR to complete the BD processing. > * @napi: Pointer to NAPI structure. > @@ -1126,6 +1297,150 @@ static irqreturn_t axienet_eth_irq(int irq, void *_ndev) > > static void axienet_dma_err_handler(struct work_struct *work); > > +/** > + * axienet_rx_submit_desc - Submit the rx descriptors to dmaengine. > + * allocate skbuff, map the scatterlist and obtain a descriptor > + * and then add the callback information and submit descriptor. > + * > + * @ndev: net_device pointer > + * > + *Return: 0, on success. > + * non-zero error value on failure > + */ > +static int axienet_rx_submit_desc(struct net_device *ndev) > +{ > + struct dma_async_tx_descriptor *dma_rx_desc = NULL; > + struct axienet_local *lp = netdev_priv(ndev); > + struct skbuf_dma_descriptor *skbuf_dma; > + struct sk_buff *skb; > + dma_addr_t addr; > + int ret; > + > + skbuf_dma = axienet_get_rx_desc(lp, lp->rx_ring_head); > + if (!skbuf_dma) > + return -ENOMEM; Minor nit: here a newline would make the core more readable > + lp->rx_ring_head++; > + skb = netdev_alloc_skb(ndev, lp->max_frm_size); > + if (!skb) > + return -ENOMEM; Another possible follow-up: usually the skb header is allocated just before sending it to the network stack (e.g. just before the __netif_rx() call) to be cache friendly. Here you could allocate just the data part and later use e.g. build_skb_around() > + > + sg_init_table(skbuf_dma->sgl, 1); > + addr = dma_map_single(lp->dev, skb->data, lp->max_frm_size, DMA_FROM_DEVICE); > + if (unlikely(dma_mapping_error(lp->dev, addr))) { > + if (net_ratelimit()) > + netdev_err(ndev, "DMA mapping error\n"); > + ret = -ENOMEM; > + goto rx_submit_err_free_skb; > + } > + sg_dma_address(skbuf_dma->sgl) = addr; > + sg_dma_len(skbuf_dma->sgl) = lp->max_frm_size; > + dma_rx_desc = dmaengine_prep_slave_sg(lp->rx_chan, skbuf_dma->sgl, > + 1, DMA_DEV_TO_MEM, > + DMA_PREP_INTERRUPT); > + if (!dma_rx_desc) { > + ret = -EINVAL; > + goto rx_submit_err_unmap_skb; > + } > + > + skbuf_dma->skb = skb; > + skbuf_dma->dma_address = sg_dma_address(skbuf_dma->sgl); > + skbuf_dma->desc = dma_rx_desc; > + dma_rx_desc->callback_param = lp; > + dma_rx_desc->callback_result = axienet_dma_rx_cb; > + dmaengine_submit(dma_rx_desc); > + > + return 0; > + > +rx_submit_err_unmap_skb: > + dma_unmap_single(lp->dev, addr, lp->max_frm_size, DMA_FROM_DEVICE); > +rx_submit_err_free_skb: > + dev_kfree_skb(skb); > + return ret; It looks like the error code is ignored by the caller. Possibly you can change this to a 'void' function. Cheers, Paolo
<snip> > Minor nit: please use the reverse x-mas tree order Sure, will fix it in next version. <snip> > You can avoid some duplicate code with: > goto drop_skb; > > and adding at the bottom of this function: > > drop_skb: > dev_kfree_skb_any(skb); > return NETDEV_TX_OK; > Agree , will switch to it in next version. <snip> > You forgot to add the netif_txq_maybe_stop() call, as suggested by Jakub in > the previous revision. I was in an impression that these are multi queue specific APIs, so I skipped them. But revisited the implementation and it seems clear now, and modified the driver to use these lockless queue stopping / waking helpers. + tax = skb_get_tx_queue(lp->ndev, skb); + netdev_tx_sent_queue(txq, skb->len); + netif_txq_maybe_stop(txq, CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, + TX_BD_NUM_MAX), MAX_SKB_FRAGS + 1, 2 * MAX_SKB_FRAGS); However, in netperf benchmark (TCP TX) I am seeing a dip in performance (~35-40Mbps) when switching to these stop/wake helpers. Is it expected considering extra logic in maintaining dynamic queue and these helpers? Also, in throughput benchmarking there was no occurrence when the queue was stopped/woken up. Throughput: (10^6bits/sec) 915.55 (v8 - without using lockless queue stop/wake helpers) ====== Switch to lockless queue stop/wake helpers # netperf -H 192.168.10.20 -t TCP_STREAM MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.10.20 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 131072 16384 16384 10.02 876.94 > > > + > > + skbuf_dma->skb = skb; > > + skbuf_dma->sg_len = sg_len; > > + dma_tx_desc->callback_param = lp; > > + dma_tx_desc->callback_result = axienet_dma_tx_cb; > > + dmaengine_submit(dma_tx_desc); > > + dma_async_issue_pending(lp->tx_chan); > > + > > + return NETDEV_TX_OK; > > + > > +xmit_error_unmap_sg: > > + dma_unmap_sg(lp->dev, skbuf_dma->sgl, sg_len, DMA_TO_DEVICE); > > If you need to drop the skb (as I suspect), you can reuse the drop_skb label > here: > > drop_skb: > dev_kfree_skb_any(skb); > > + return NETDEV_TX_OK; Yes , will make this change and this will also fix skb leak. > > +} > > + > > /** > > * axienet_tx_poll - Invoked once a transmit is completed by the > > * Axi DMA Tx channel. > > @@ -893,6 +1028,42 @@ axienet_start_xmit(struct sk_buff *skb, struct > net_device *ndev) > > return NETDEV_TX_OK; > > } > > > > +/** > > + * axienet_dma_rx_cb - DMA engine callback for RX channel. > > + * @data: Pointer to the skbuf_dma_descriptor structure. > > + * @result: error reporting through dmaengine_result. > > + * This function is called by dmaengine driver for RX channel to > > +notify > > + * that the packet is received. > > + */ > > +static void axienet_dma_rx_cb(void *data, const struct > > +dmaengine_result *result) { > > + struct axienet_local *lp = data; > > + struct skbuf_dma_descriptor *skbuf_dma; > > + size_t meta_len, meta_max_len, rx_len; > > + struct sk_buff *skb; > > + u32 *app_metadata; > > Minor nit: please respect the reverse x-mas tree order Yes, will fix it in next version. > > > + > > + skbuf_dma = axienet_get_rx_desc(lp, lp->rx_ring_tail++); > > + skb = skbuf_dma->skb; > > + app_metadata = dmaengine_desc_get_metadata_ptr(skbuf_dma- > >desc, &meta_len, > > + &meta_max_len); > > + dma_unmap_single(lp->dev, skbuf_dma->dma_address, lp- > >max_frm_size, > > + DMA_FROM_DEVICE); > > + /* TODO: Derive app word index programmatically */ > > + rx_len = (app_metadata[LEN_APP] & 0xFFFF); > > + skb_put(skb, rx_len); > > + skb->protocol = eth_type_trans(skb, lp->ndev); > > + skb->ip_summed = CHECKSUM_NONE; > > + > > + __netif_rx(skb); > > It's a pity you can't leverage NAPI here. > > I think that could be doable as a follow-up, but I'm unsure if that would fit > the DMA engine model: in this callback you could cache the ready dma index > (a single range should suffice) and schedule the napi instance. The actual > dma processing will be done in napi poll. > > Another possible follow-up could be introducing a "bulk" RX callback in the > DMA engine, to mitigate the indirect call overhead on a burst of RX DMA > completion - assuming the DMA engine actually generates such burst. Agree , these are possible thoughts and will start working on it once this baseline dmaengine support series is done. > > > + u64_stats_update_begin(&lp->rx_stat_sync); > > + u64_stats_add(&lp->rx_packets, 1); > > + u64_stats_add(&lp->rx_bytes, rx_len); > > + u64_stats_update_end(&lp->rx_stat_sync); > > + axienet_rx_submit_desc(lp->ndev); > > + dma_async_issue_pending(lp->rx_chan); > > +} > > + > > /** > > * axienet_rx_poll - Triggered by RX ISR to complete the BD processing. > > * @napi: Pointer to NAPI structure. > > @@ -1126,6 +1297,150 @@ static irqreturn_t axienet_eth_irq(int irq, > > void *_ndev) > > > > static void axienet_dma_err_handler(struct work_struct *work); > > > > +/** > > + * axienet_rx_submit_desc - Submit the rx descriptors to dmaengine. > > + * allocate skbuff, map the scatterlist and obtain a descriptor > > + * and then add the callback information and submit descriptor. > > + * > > + * @ndev: net_device pointer > > + * > > + *Return: 0, on success. > > + * non-zero error value on failure > > + */ > > +static int axienet_rx_submit_desc(struct net_device *ndev) { > > + struct dma_async_tx_descriptor *dma_rx_desc = NULL; > > + struct axienet_local *lp = netdev_priv(ndev); > > + struct skbuf_dma_descriptor *skbuf_dma; > > + struct sk_buff *skb; > > + dma_addr_t addr; > > + int ret; > > + > > + skbuf_dma = axienet_get_rx_desc(lp, lp->rx_ring_head); > > + if (!skbuf_dma) > > + return -ENOMEM; > > Minor nit: here a newline would make the core more readable Accepted , will add in next version. > > > + lp->rx_ring_head++; > > + skb = netdev_alloc_skb(ndev, lp->max_frm_size); > > + if (!skb) > > + return -ENOMEM; > > Another possible follow-up: usually the skb header is allocated just before > sending it to the network stack (e.g. just before the > __netif_rx() call) to be cache friendly. Here you could allocate just the data > part and later use e.g. build_skb_around() Sure, will explore on it. > > > + > > + sg_init_table(skbuf_dma->sgl, 1); > > + addr = dma_map_single(lp->dev, skb->data, lp->max_frm_size, > DMA_FROM_DEVICE); > > + if (unlikely(dma_mapping_error(lp->dev, addr))) { > > + if (net_ratelimit()) > > + netdev_err(ndev, "DMA mapping error\n"); > > + ret = -ENOMEM; > > + goto rx_submit_err_free_skb; > > + } > > + sg_dma_address(skbuf_dma->sgl) = addr; > > + sg_dma_len(skbuf_dma->sgl) = lp->max_frm_size; > > + dma_rx_desc = dmaengine_prep_slave_sg(lp->rx_chan, skbuf_dma- > >sgl, > > + 1, DMA_DEV_TO_MEM, > > + DMA_PREP_INTERRUPT); > > + if (!dma_rx_desc) { > > + ret = -EINVAL; > > + goto rx_submit_err_unmap_skb; > > + } > > + > > + skbuf_dma->skb = skb; > > + skbuf_dma->dma_address = sg_dma_address(skbuf_dma->sgl); > > + skbuf_dma->desc = dma_rx_desc; > > + dma_rx_desc->callback_param = lp; > > + dma_rx_desc->callback_result = axienet_dma_rx_cb; > > + dmaengine_submit(dma_rx_desc); > > + > > + return 0; > > + > > +rx_submit_err_unmap_skb: > > + dma_unmap_single(lp->dev, addr, lp->max_frm_size, > DMA_FROM_DEVICE); > > +rx_submit_err_free_skb: > > + dev_kfree_skb(skb); > > + return ret; > > It looks like the error code is ignored by the caller. Possibly you can change > this to a 'void' function. will make it void in next version. Thanks, Radhey
diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig index 0014729b8865..35d96c633a33 100644 --- a/drivers/net/ethernet/xilinx/Kconfig +++ b/drivers/net/ethernet/xilinx/Kconfig @@ -26,6 +26,7 @@ config XILINX_EMACLITE config XILINX_AXI_EMAC tristate "Xilinx 10/100/1000 AXI Ethernet support" depends on HAS_IOMEM + depends on XILINX_DMA select PHYLINK help This driver supports the 10/100/1000 Ethernet from Xilinx for the diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 3ead0bac597b..807ead678551 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -14,6 +14,7 @@ #include <linux/interrupt.h> #include <linux/if_vlan.h> #include <linux/phylink.h> +#include <linux/skbuff.h> /* Packet size info */ #define XAE_HDR_SIZE 14 /* Size of Ethernet header */ @@ -378,6 +379,22 @@ struct axidma_bd { #define XAE_NUM_MISC_CLOCKS 3 +/** + * struct skbuf_dma_descriptor - skb for each dma descriptor + * @sgl: Pointer for sglist. + * @desc: Pointer to dma descriptor. + * @dma_address: dma address of sglist. + * @skb: Pointer to SKB transferred using DMA + * @sg_len: number of entries in the sglist. + */ +struct skbuf_dma_descriptor { + struct scatterlist sgl[MAX_SKB_FRAGS + 1]; + struct dma_async_tx_descriptor *desc; + dma_addr_t dma_address; + struct sk_buff *skb; + int sg_len; +}; + /** * struct axienet_local - axienet private per device data * @ndev: Pointer for net_device to which it will be attached. @@ -436,6 +453,14 @@ struct axidma_bd { * @coalesce_count_tx: Store the irq coalesce on TX side. * @coalesce_usec_tx: IRQ coalesce delay for TX * @use_dmaengine: flag to check dmaengine framework usage. + * @tx_chan: TX DMA channel. + * @rx_chan: RX DMA channel. + * @tx_skb_ring: Pointer to TX skb ring buffer array. + * @rx_skb_ring: Pointer to RX skb ring buffer array. + * @tx_ring_head: TX skb ring buffer head index. + * @tx_ring_tail: TX skb ring buffer tail index. + * @rx_ring_head: RX skb ring buffer head index. + * @rx_ring_tail: RX skb ring buffer tail index. */ struct axienet_local { struct net_device *ndev; @@ -501,6 +526,14 @@ struct axienet_local { u32 coalesce_count_tx; u32 coalesce_usec_tx; u8 use_dmaengine; + struct dma_chan *tx_chan; + struct dma_chan *rx_chan; + struct skbuf_dma_descriptor **tx_skb_ring; + struct skbuf_dma_descriptor **rx_skb_ring; + int tx_ring_head; + int tx_ring_tail; + int rx_ring_head; + int rx_ring_tail; }; /** diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 188b03e86263..36c43b555921 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -38,6 +38,10 @@ #include <linux/phy.h> #include <linux/mii.h> #include <linux/ethtool.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/dma/xilinx_dma.h> +#include <linux/circ_buf.h> #include "xilinx_axienet.h" @@ -47,6 +51,9 @@ #define TX_BD_NUM_MIN (MAX_SKB_FRAGS + 1) #define TX_BD_NUM_MAX 4096 #define RX_BD_NUM_MAX 4096 +#define DMA_NUM_APP_WORDS 5 +#define LEN_APP 4 +#define RX_BUF_NUM_DEFAULT 128 /* Must be shorter than length of ethtool_drvinfo.driver field to fit */ #define DRIVER_NAME "xaxienet" @@ -55,6 +62,8 @@ #define AXIENET_REGS_N 40 +static int axienet_rx_submit_desc(struct net_device *ndev); + /* Match table for of_platform binding */ static const struct of_device_id axienet_of_match[] = { { .compatible = "xlnx,axi-ethernet-1.00.a", }, @@ -120,6 +129,16 @@ static struct axienet_option axienet_options[] = { {} }; +static struct skbuf_dma_descriptor *axienet_get_rx_desc(struct axienet_local *lp, int i) +{ + return lp->rx_skb_ring[i & (RX_BUF_NUM_DEFAULT - 1)]; +} + +static struct skbuf_dma_descriptor *axienet_get_tx_desc(struct axienet_local *lp, int i) +{ + return lp->tx_skb_ring[i & (TX_BD_NUM_MAX - 1)]; +} + /** * axienet_dma_in32 - Memory mapped Axi DMA register read * @lp: Pointer to axienet local structure @@ -727,6 +746,122 @@ static inline int axienet_check_tx_bd_space(struct axienet_local *lp, return 0; } +/** + * axienet_dma_tx_cb - DMA engine callback for TX channel. + * @data: Pointer to the axienet_local structure. + * @result: error reporting through dmaengine_result. + * This function is called by dmaengine driver for TX channel to notify + * that the transmit is done. + */ +static void axienet_dma_tx_cb(void *data, const struct dmaengine_result *result) +{ + struct axienet_local *lp = data; + struct skbuf_dma_descriptor *skbuf_dma; + + skbuf_dma = axienet_get_tx_desc(lp, lp->tx_ring_tail++); + u64_stats_update_begin(&lp->tx_stat_sync); + u64_stats_add(&lp->tx_bytes, skbuf_dma->skb->len); + u64_stats_add(&lp->tx_packets, 1); + u64_stats_update_end(&lp->tx_stat_sync); + dma_unmap_sg(lp->dev, skbuf_dma->sgl, skbuf_dma->sg_len, DMA_TO_DEVICE); + dev_consume_skb_any(skbuf_dma->skb); + if (CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX) > MAX_SKB_FRAGS + 1) + netif_wake_queue(lp->ndev); +} + +/** + * axienet_start_xmit_dmaengine - Starts the transmission. + * @skb: sk_buff pointer that contains data to be Txed. + * @ndev: Pointer to net_device structure. + * + * Return: NETDEV_TX_OK on success or any non space errors. + * NETDEV_TX_BUSY when free element in TX skb ring buffer + * is not available. + * + * This function is invoked to initiate transmission. The + * function sets the skbs, register dma callback API and submit + * the dma transaction. + * Additionally if checksum offloading is supported, + * it populates AXI Stream Control fields with appropriate values. + */ +static netdev_tx_t +axienet_start_xmit_dmaengine(struct sk_buff *skb, struct net_device *ndev) +{ + struct dma_async_tx_descriptor *dma_tx_desc = NULL; + struct axienet_local *lp = netdev_priv(ndev); + u32 app_metadata[DMA_NUM_APP_WORDS] = {0}; + struct skbuf_dma_descriptor *skbuf_dma; + struct dma_device *dma_dev; + u32 csum_start_off; + u32 csum_index_off; + int sg_len; + int ret; + + dma_dev = lp->tx_chan->device; + sg_len = skb_shinfo(skb)->nr_frags + 1; + if (CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX) <= sg_len) { + netif_stop_queue(ndev); + if (net_ratelimit()) + netdev_warn(ndev, "TX ring unexpectedly full\n"); + return NETDEV_TX_BUSY; + } + + skbuf_dma = axienet_get_tx_desc(lp, lp->tx_ring_head); + if (!skbuf_dma) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + lp->tx_ring_head++; + sg_init_table(skbuf_dma->sgl, sg_len); + ret = skb_to_sgvec(skb, skbuf_dma->sgl, 0, skb->len); + if (ret < 0) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + ret = dma_map_sg(lp->dev, skbuf_dma->sgl, sg_len, DMA_TO_DEVICE); + if (!ret) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + /* Fill up app fields for checksum */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (lp->features & XAE_FEATURE_FULL_TX_CSUM) { + /* Tx Full Checksum Offload Enabled */ + app_metadata[0] |= 2; + } else if (lp->features & XAE_FEATURE_PARTIAL_TX_CSUM) { + csum_start_off = skb_transport_offset(skb); + csum_index_off = csum_start_off + skb->csum_offset; + /* Tx Partial Checksum Offload Enabled */ + app_metadata[0] |= 1; + app_metadata[1] = (csum_start_off << 16) | csum_index_off; + } + } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + app_metadata[0] |= 2; /* Tx Full Checksum Offload Enabled */ + } + + dma_tx_desc = dma_dev->device_prep_slave_sg(lp->tx_chan, skbuf_dma->sgl, + sg_len, DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT, (void *)app_metadata); + if (!dma_tx_desc) + goto xmit_error_unmap_sg; + + skbuf_dma->skb = skb; + skbuf_dma->sg_len = sg_len; + dma_tx_desc->callback_param = lp; + dma_tx_desc->callback_result = axienet_dma_tx_cb; + dmaengine_submit(dma_tx_desc); + dma_async_issue_pending(lp->tx_chan); + + return NETDEV_TX_OK; + +xmit_error_unmap_sg: + dma_unmap_sg(lp->dev, skbuf_dma->sgl, sg_len, DMA_TO_DEVICE); + return NETDEV_TX_OK; +} + /** * axienet_tx_poll - Invoked once a transmit is completed by the * Axi DMA Tx channel. @@ -893,6 +1028,42 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) return NETDEV_TX_OK; } +/** + * axienet_dma_rx_cb - DMA engine callback for RX channel. + * @data: Pointer to the skbuf_dma_descriptor structure. + * @result: error reporting through dmaengine_result. + * This function is called by dmaengine driver for RX channel to notify + * that the packet is received. + */ +static void axienet_dma_rx_cb(void *data, const struct dmaengine_result *result) +{ + struct axienet_local *lp = data; + struct skbuf_dma_descriptor *skbuf_dma; + size_t meta_len, meta_max_len, rx_len; + struct sk_buff *skb; + u32 *app_metadata; + + skbuf_dma = axienet_get_rx_desc(lp, lp->rx_ring_tail++); + skb = skbuf_dma->skb; + app_metadata = dmaengine_desc_get_metadata_ptr(skbuf_dma->desc, &meta_len, + &meta_max_len); + dma_unmap_single(lp->dev, skbuf_dma->dma_address, lp->max_frm_size, + DMA_FROM_DEVICE); + /* TODO: Derive app word index programmatically */ + rx_len = (app_metadata[LEN_APP] & 0xFFFF); + skb_put(skb, rx_len); + skb->protocol = eth_type_trans(skb, lp->ndev); + skb->ip_summed = CHECKSUM_NONE; + + __netif_rx(skb); + u64_stats_update_begin(&lp->rx_stat_sync); + u64_stats_add(&lp->rx_packets, 1); + u64_stats_add(&lp->rx_bytes, rx_len); + u64_stats_update_end(&lp->rx_stat_sync); + axienet_rx_submit_desc(lp->ndev); + dma_async_issue_pending(lp->rx_chan); +} + /** * axienet_rx_poll - Triggered by RX ISR to complete the BD processing. * @napi: Pointer to NAPI structure. @@ -1126,6 +1297,150 @@ static irqreturn_t axienet_eth_irq(int irq, void *_ndev) static void axienet_dma_err_handler(struct work_struct *work); +/** + * axienet_rx_submit_desc - Submit the rx descriptors to dmaengine. + * allocate skbuff, map the scatterlist and obtain a descriptor + * and then add the callback information and submit descriptor. + * + * @ndev: net_device pointer + * + *Return: 0, on success. + * non-zero error value on failure + */ +static int axienet_rx_submit_desc(struct net_device *ndev) +{ + struct dma_async_tx_descriptor *dma_rx_desc = NULL; + struct axienet_local *lp = netdev_priv(ndev); + struct skbuf_dma_descriptor *skbuf_dma; + struct sk_buff *skb; + dma_addr_t addr; + int ret; + + skbuf_dma = axienet_get_rx_desc(lp, lp->rx_ring_head); + if (!skbuf_dma) + return -ENOMEM; + lp->rx_ring_head++; + skb = netdev_alloc_skb(ndev, lp->max_frm_size); + if (!skb) + return -ENOMEM; + + sg_init_table(skbuf_dma->sgl, 1); + addr = dma_map_single(lp->dev, skb->data, lp->max_frm_size, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(lp->dev, addr))) { + if (net_ratelimit()) + netdev_err(ndev, "DMA mapping error\n"); + ret = -ENOMEM; + goto rx_submit_err_free_skb; + } + sg_dma_address(skbuf_dma->sgl) = addr; + sg_dma_len(skbuf_dma->sgl) = lp->max_frm_size; + dma_rx_desc = dmaengine_prep_slave_sg(lp->rx_chan, skbuf_dma->sgl, + 1, DMA_DEV_TO_MEM, + DMA_PREP_INTERRUPT); + if (!dma_rx_desc) { + ret = -EINVAL; + goto rx_submit_err_unmap_skb; + } + + skbuf_dma->skb = skb; + skbuf_dma->dma_address = sg_dma_address(skbuf_dma->sgl); + skbuf_dma->desc = dma_rx_desc; + dma_rx_desc->callback_param = lp; + dma_rx_desc->callback_result = axienet_dma_rx_cb; + dmaengine_submit(dma_rx_desc); + + return 0; + +rx_submit_err_unmap_skb: + dma_unmap_single(lp->dev, addr, lp->max_frm_size, DMA_FROM_DEVICE); +rx_submit_err_free_skb: + dev_kfree_skb(skb); + return ret; +} + +/** + * axienet_init_dmaengine - init the dmaengine code. + * @ndev: Pointer to net_device structure + * + * Return: 0, on success. + * non-zero error value on failure + * + * This is the dmaengine initialization code. + */ +static int axienet_init_dmaengine(struct net_device *ndev) +{ + struct axienet_local *lp = netdev_priv(ndev); + struct skbuf_dma_descriptor *skbuf_dma; + int i, ret; + + lp->tx_chan = dma_request_chan(lp->dev, "tx_chan0"); + if (IS_ERR(lp->tx_chan)) { + dev_err(lp->dev, "No Ethernet DMA (TX) channel found\n"); + return PTR_ERR(lp->tx_chan); + } + + lp->rx_chan = dma_request_chan(lp->dev, "rx_chan0"); + if (IS_ERR(lp->rx_chan)) { + ret = PTR_ERR(lp->rx_chan); + dev_err(lp->dev, "No Ethernet DMA (RX) channel found\n"); + goto err_dma_release_tx; + } + + lp->tx_ring_tail = 0; + lp->tx_ring_head = 0; + lp->rx_ring_tail = 0; + lp->rx_ring_head = 0; + lp->tx_skb_ring = kcalloc(TX_BD_NUM_MAX, sizeof(*lp->tx_skb_ring), + GFP_KERNEL); + if (!lp->tx_skb_ring) { + ret = -ENOMEM; + goto err_dma_release_rx; + } + for (i = 0; i < TX_BD_NUM_MAX; i++) { + skbuf_dma = kzalloc(sizeof(*skbuf_dma), GFP_KERNEL); + if (!skbuf_dma) { + ret = -ENOMEM; + goto err_free_tx_skb_ring; + } + lp->tx_skb_ring[i] = skbuf_dma; + } + + lp->rx_skb_ring = kcalloc(RX_BUF_NUM_DEFAULT, sizeof(*lp->rx_skb_ring), + GFP_KERNEL); + if (!lp->rx_skb_ring) { + ret = -ENOMEM; + goto err_free_tx_skb_ring; + } + for (i = 0; i < RX_BUF_NUM_DEFAULT; i++) { + skbuf_dma = kzalloc(sizeof(*skbuf_dma), GFP_KERNEL); + if (!skbuf_dma) { + ret = -ENOMEM; + goto err_free_rx_skb_ring; + } + lp->rx_skb_ring[i] = skbuf_dma; + } + /* TODO: Instead of BD_NUM_DEFAULT use runtime support */ + for (i = 0; i < RX_BUF_NUM_DEFAULT; i++) + axienet_rx_submit_desc(ndev); + dma_async_issue_pending(lp->rx_chan); + + return 0; + +err_free_rx_skb_ring: + for (i = 0; i < RX_BUF_NUM_DEFAULT; i++) + kfree(lp->rx_skb_ring[i]); + kfree(lp->rx_skb_ring); +err_free_tx_skb_ring: + for (i = 0; i < TX_BD_NUM_MAX; i++) + kfree(lp->tx_skb_ring[i]); + kfree(lp->tx_skb_ring); +err_dma_release_rx: + dma_release_channel(lp->rx_chan); +err_dma_release_tx: + dma_release_channel(lp->tx_chan); + return ret; +} + /** * axienet_init_legacy_dma - init the dma legacy code. * @ndev: Pointer to net_device structure @@ -1216,7 +1531,19 @@ static int axienet_open(struct net_device *ndev) phylink_start(lp->phylink); - if (!lp->use_dmaengine) { + if (lp->use_dmaengine) { + /* Enable interrupts for Axi Ethernet core (if defined) */ + if (lp->eth_irq > 0) { + ret = request_irq(lp->eth_irq, axienet_eth_irq, IRQF_SHARED, + ndev->name, ndev); + if (ret) + goto err_phy; + } + + ret = axienet_init_dmaengine(ndev); + if (ret < 0) + goto err_free_eth_irq; + } else { ret = axienet_init_legacy_dma(ndev); if (ret) goto err_phy; @@ -1224,6 +1551,9 @@ static int axienet_open(struct net_device *ndev) return 0; +err_free_eth_irq: + if (lp->eth_irq > 0) + free_irq(lp->eth_irq, ndev); err_phy: phylink_stop(lp->phylink); phylink_disconnect_phy(lp->phylink); @@ -1243,6 +1573,7 @@ static int axienet_open(struct net_device *ndev) static int axienet_stop(struct net_device *ndev) { struct axienet_local *lp = netdev_priv(ndev); + int i; dev_dbg(&ndev->dev, "axienet_close()\n"); @@ -1263,6 +1594,21 @@ static int axienet_stop(struct net_device *ndev) free_irq(lp->tx_irq, ndev); free_irq(lp->rx_irq, ndev); axienet_dma_bd_release(ndev); + } else { + dmaengine_terminate_sync(lp->tx_chan); + dmaengine_synchronize(lp->tx_chan); + dmaengine_terminate_sync(lp->rx_chan); + dmaengine_synchronize(lp->rx_chan); + + for (i = 0; i < TX_BD_NUM_MAX; i++) + kfree(lp->tx_skb_ring[i]); + kfree(lp->tx_skb_ring); + for (i = 0; i < RX_BUF_NUM_DEFAULT; i++) + kfree(lp->rx_skb_ring[i]); + kfree(lp->rx_skb_ring); + + dma_release_channel(lp->rx_chan); + dma_release_channel(lp->tx_chan); } axienet_iow(lp, XAE_IE_OFFSET, 0); @@ -1365,6 +1711,18 @@ static const struct net_device_ops axienet_netdev_ops = { #endif }; +static const struct net_device_ops axienet_netdev_dmaengine_ops = { + .ndo_open = axienet_open, + .ndo_stop = axienet_stop, + .ndo_start_xmit = axienet_start_xmit_dmaengine, + .ndo_get_stats64 = axienet_get_stats64, + .ndo_change_mtu = axienet_change_mtu, + .ndo_set_mac_address = netdev_set_mac_address, + .ndo_validate_addr = eth_validate_addr, + .ndo_eth_ioctl = axienet_ioctl, + .ndo_set_rx_mode = axienet_set_multicast_list, +}; + /** * axienet_ethtools_get_drvinfo - Get various Axi Ethernet driver information. * @ndev: Pointer to net_device structure @@ -1897,7 +2255,6 @@ static int axienet_probe(struct platform_device *pdev) SET_NETDEV_DEV(ndev, &pdev->dev); ndev->flags &= ~IFF_MULTICAST; /* clear multicast */ ndev->features = NETIF_F_SG; - ndev->netdev_ops = &axienet_netdev_ops; ndev->ethtool_ops = &axienet_ethtool_ops; /* MTU range: 64 - 9000 */ @@ -2118,8 +2475,39 @@ static int axienet_probe(struct platform_device *pdev) } netif_napi_add(ndev, &lp->napi_rx, axienet_rx_poll); netif_napi_add(ndev, &lp->napi_tx, axienet_tx_poll); + } else { + struct xilinx_vdma_config cfg; + struct dma_chan *tx_chan; + + lp->eth_irq = platform_get_irq_optional(pdev, 0); + if (lp->eth_irq < 0 && lp->eth_irq != -ENXIO) { + ret = lp->eth_irq; + goto cleanup_clk; + } + tx_chan = dma_request_chan(lp->dev, "tx_chan0"); + if (IS_ERR(tx_chan)) { + ret = PTR_ERR(tx_chan); + dev_err_probe(lp->dev, ret, "No Ethernet DMA (TX) channel found\n"); + goto cleanup_clk; + } + + cfg.reset = 1; + /* As name says VDMA but it has support for DMA channel reset */ + ret = xilinx_vdma_channel_set_config(tx_chan, &cfg); + if (ret < 0) { + dev_err(&pdev->dev, "Reset channel failed\n"); + dma_release_channel(tx_chan); + goto cleanup_clk; + } + + dma_release_channel(tx_chan); + lp->use_dmaengine = 1; } + if (lp->use_dmaengine) + ndev->netdev_ops = &axienet_netdev_dmaengine_ops; + else + ndev->netdev_ops = &axienet_netdev_ops; /* Check for Ethernet core IRQ (optional) */ if (lp->eth_irq <= 0) dev_info(&pdev->dev, "Ethernet core IRQ not defined\n");
Add dmaengine framework to communicate with the xilinx DMAengine driver(AXIDMA). Axi ethernet driver uses separate channels for transmit and receive. Add support for these channels to handle TX and RX with skb and appropriate callbacks. Also add axi ethernet core interrupt for dmaengine framework support. The dmaengine framework was extended for metadata API support. However it still needs further enhancements to make it well suited for ethernet usecases. The ethernet features i.e ethtool set/get of DMA IP properties, ndo_poll_controller,(mentioned in TODO) are not supported and it requires follow-up discussions. dmaengine support has a dependency on xilinx_dma as it uses xilinx_vdma_channel_set_config() API to reset the DMA IP which internally reset MAC prior to accessing MDIO. Benchmark with netperf: xilinx-zcu102-20232:~$ netperf -H 192.168.10.20 -t TCP_STREAM MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.10.20 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 131072 16384 16384 10.03 915.55 xilinx-zcu102-20232:~$ netperf -H 192.168.10.20 -t UDP_STREAM MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.10.20 () port 0 AF_INET Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 212992 65507 10.00 18192 0 953.35 212992 10.00 18192 953.35 Signed-off-by: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com> --- Changes for v8: - Use dev_consume_skb_any() in transmit callback. - Fix queue stop logic in _xmit_dmaengine(). - Fix skb leak in _xmit_dmaengine() error path. - In dmaengine tx path use dma_device pointer to call prep_slave_sg. - In rx submit use rate limiting for mapping errors and fix error handling. - Revert dev_err_probe from _init_dmaengine(). - Remove unnecessary new line after call to _init_dmaengine(). - Move free_irq(lp->eth_irq) to error path. - Have separate netdev_ops for dmaengine flow. - Improve axienet_dma_tx_cb and axienet_rx_submit_desc documentation. Changes for v7: - Fix comment spaces. - In xmit use correct XAE_FEATURE_PARTIAL_TX_CSUM define. - Rename app to app_metadata. - Switch to __netif_rx. - In axienet_rx_submit_desc() add mapping error handling. Changes for v6: - Rename struct axi_skbuff to skbuf_dma_descriptor and removed __packed attribute. - Drop kmem_cache implementation and switch to using ring buffers. - Remove __inline from axienet_init_dmaengine(). - Name labels after the target. - Add error check for platform_get_irq_optional(). - Fix double space and no empty lines between call and its error check. Changes for v5: - Switch to amd.com email - Modified commit description. Remove lore link, mention reset API, add performance numbers. - Fix kmem_cache resource leak on stop. - Use dmaengine_terminate_sync instead of deprecated dmaengine_terminate_all API. Changes for v4: - Remove the AXIENET_USE_DMA. - Add dev_err_probe for dma_request_chan error handling. - Add kmem_cache_destroy for create in axienet_setup_dma_chan. - Add XILINX_DMA dependency in ethernet drier Kconfig file. - move setup_dma_channel to init_dmaengine func - Remove unlikely if (unlikely(ret < 0)) - if (ret == 0) to if (!ret) - Rename DMA_MEM_TO_DEV to DMA_TO_DEVICE - Remove else check for lp->use_dmaengine = 1; in the probe. Changes in V3: - New patch for dmaengine framework support. --- drivers/net/ethernet/xilinx/Kconfig | 1 + drivers/net/ethernet/xilinx/xilinx_axienet.h | 33 ++ .../net/ethernet/xilinx/xilinx_axienet_main.c | 392 +++++++++++++++++- 3 files changed, 424 insertions(+), 2 deletions(-)