diff mbox series

[net-next,v20,06/13] rtase: Implement .ndo_start_xmit function

Message ID 20240607084321.7254-7-justinlai0215@realtek.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series Add Realtek automotive PCIe driver | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 303 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Justin Lai June 7, 2024, 8:43 a.m. UTC
Implement .ndo_start_xmit function to fill the information of the packet
to be transmitted into the tx descriptor, and then the hardware will
transmit the packet using the information in the tx descriptor.
In addition, we also implemented the tx_handler function to enable the
tx descriptor to be reused.

Signed-off-by: Justin Lai <justinlai0215@realtek.com>
---
 .../net/ethernet/realtek/rtase/rtase_main.c   | 285 ++++++++++++++++++
 1 file changed, 285 insertions(+)

Comments

Hariprasad Kelam June 7, 2024, 9:03 a.m. UTC | #1
> Implement .ndo_start_xmit function to fill the information of the packet to be
> transmitted into the tx descriptor, and then the hardware will transmit the
> packet using the information in the tx descriptor.
> In addition, we also implemented the tx_handler function to enable the tx
> descriptor to be reused.
> 
> Signed-off-by: Justin Lai <justinlai0215@realtek.com>
> ---
>  .../net/ethernet/realtek/rtase/rtase_main.c   | 285 ++++++++++++++++++
>  1 file changed, 285 insertions(+)
> 
> diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c
> b/drivers/net/ethernet/realtek/rtase/rtase_main.c
> index 23406c195cff..6bdb4edbfbc1 100644
> --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c
> +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c
> @@ -256,6 +256,68 @@ static void rtase_mark_to_asic(union rtase_rx_desc
> *desc, u32 rx_buf_sz)
>  		   cpu_to_le32(RTASE_DESC_OWN | eor | rx_buf_sz));  }
> 
> +static u32 rtase_tx_avail(struct rtase_ring *ring) {
> +	return READ_ONCE(ring->dirty_idx) + RTASE_NUM_DESC -
> +	       READ_ONCE(ring->cur_idx);
> +}
> +
> +static int tx_handler(struct rtase_ring *ring, int budget) {
> +	const struct rtase_private *tp = ring->ivec->tp;
> +	struct net_device *dev = tp->dev;
> +	u32 dirty_tx, tx_left;
> +	u32 bytes_compl = 0;
> +	u32 pkts_compl = 0;
> +	int workdone = 0;
> +
> +	dirty_tx = ring->dirty_idx;
> +	tx_left = READ_ONCE(ring->cur_idx) - dirty_tx;
> +
> +	while (tx_left > 0) {
> +		u32 entry = dirty_tx % RTASE_NUM_DESC;
> +		struct rtase_tx_desc *desc = ring->desc +
> +				       sizeof(struct rtase_tx_desc) * entry;
> +		u32 status;
> +
> +		status = le32_to_cpu(desc->opts1);
> +
> +		if (status & RTASE_DESC_OWN)
> +			break;
> +
> +		rtase_unmap_tx_skb(tp->pdev, ring->mis.len[entry], desc);
> +		ring->mis.len[entry] = 0;
> +		if (ring->skbuff[entry]) {
> +			pkts_compl++;
> +			bytes_compl += ring->skbuff[entry]->len;
> +			napi_consume_skb(ring->skbuff[entry], budget);
> +			ring->skbuff[entry] = NULL;
> +		}
> +
> +		dirty_tx++;
> +		tx_left--;
> +		workdone++;
> +
> +		if (workdone == RTASE_TX_BUDGET_DEFAULT)
> +			break;
> +	}
> +
> +	if (ring->dirty_idx != dirty_tx) {
> +		dev_sw_netstats_tx_add(dev, pkts_compl, bytes_compl);
> +		WRITE_ONCE(ring->dirty_idx, dirty_tx);
> +
> +		netif_subqueue_completed_wake(dev, ring->index,
> pkts_compl,
> +					      bytes_compl,
> +					      rtase_tx_avail(ring),
> +					      RTASE_TX_START_THRS);
> +
> +		if (ring->cur_idx != dirty_tx)
> +			rtase_w8(tp, RTASE_TPPOLL, BIT(ring->index));
> +	}
> +
> +	return 0;
> +}
> +
>  static void rtase_tx_desc_init(struct rtase_private *tp, u16 idx)  {
>  	struct rtase_ring *ring = &tp->tx_ring[idx]; @@ -1014,6 +1076,228
> @@ static int rtase_close(struct net_device *dev)
>  	return 0;
>  }
> 
> +static u32 rtase_tx_vlan_tag(const struct rtase_private *tp,
> +			     const struct sk_buff *skb)
> +{
> +	return (skb_vlan_tag_present(skb)) ?
> +		(RTASE_TX_VLAN_TAG | swab16(skb_vlan_tag_get(skb))) :
> 0x00; }
> +
               Vlan protocol can be either 0x8100 or 0x88A8, how does hardware know which header to insert?
Thanks,
Hariprasad k

> +static u32 rtase_tx_csum(struct sk_buff *skb, const struct net_device
> +*dev) {
> +	u32 csum_cmd = 0;
> +	u8 ip_protocol;
> +
> +	switch (vlan_get_protocol(skb)) {
> +	case htons(ETH_P_IP):
> +		csum_cmd = RTASE_TX_IPCS_C;
> +		ip_protocol = ip_hdr(skb)->protocol;
> +		break;
> +
> +	case htons(ETH_P_IPV6):
> +		csum_cmd = RTASE_TX_IPV6F_C;
> +		ip_protocol = ipv6_hdr(skb)->nexthdr;
> +		break;
> +
> +	default:
> +		ip_protocol = IPPROTO_RAW;
> +		break;
> +	}
> +
> +	if (ip_protocol == IPPROTO_TCP)
> +		csum_cmd |= RTASE_TX_TCPCS_C;
> +	else if (ip_protocol == IPPROTO_UDP)
> +		csum_cmd |= RTASE_TX_UDPCS_C;
> +
> +	csum_cmd |= u32_encode_bits(skb_transport_offset(skb),
> +				    RTASE_TCPHO_MASK);
> +
> +	return csum_cmd;
> +}
> +
> +static int rtase_xmit_frags(struct rtase_ring *ring, struct sk_buff *skb,
> +			    u32 opts1, u32 opts2)
> +{
> +	const struct skb_shared_info *info = skb_shinfo(skb);
> +	const struct rtase_private *tp = ring->ivec->tp;
> +	const u8 nr_frags = info->nr_frags;
> +	struct rtase_tx_desc *txd = NULL;
> +	u32 cur_frag, entry;
> +
> +	entry = ring->cur_idx;
> +	for (cur_frag = 0; cur_frag < nr_frags; cur_frag++) {
> +		const skb_frag_t *frag = &info->frags[cur_frag];
> +		dma_addr_t mapping;
> +		u32 status, len;
> +		void *addr;
> +
> +		entry = (entry + 1) % RTASE_NUM_DESC;
> +
> +		txd = ring->desc + sizeof(struct rtase_tx_desc) * entry;
> +		len = skb_frag_size(frag);
> +		addr = skb_frag_address(frag);
> +		mapping = dma_map_single(&tp->pdev->dev, addr, len,
> +					 DMA_TO_DEVICE);
> +
> +		if (unlikely(dma_mapping_error(&tp->pdev->dev, mapping)))
> {
> +			if (unlikely(net_ratelimit()))
> +				netdev_err(tp->dev,
> +					   "Failed to map TX fragments
> DMA!\n");
> +
> +			goto err_out;
> +		}
> +
> +		if (((entry + 1) % RTASE_NUM_DESC) == 0)
> +			status = (opts1 | len | RTASE_RING_END);
> +		else
> +			status = opts1 | len;
> +
> +		if (cur_frag == (nr_frags - 1)) {
> +			ring->skbuff[entry] = skb;
> +			status |= RTASE_TX_LAST_FRAG;
> +		}
> +
> +		ring->mis.len[entry] = len;
> +		txd->addr = cpu_to_le64(mapping);
> +		txd->opts2 = cpu_to_le32(opts2);
> +
> +		/* make sure the operating fields have been updated */
> +		dma_wmb();
> +		txd->opts1 = cpu_to_le32(status);
> +	}
> +
> +	return cur_frag;
> +
> +err_out:
> +	rtase_tx_clear_range(ring, ring->cur_idx + 1, cur_frag);
> +	return -EIO;
> +}
> +
> +static netdev_tx_t rtase_start_xmit(struct sk_buff *skb,
> +				    struct net_device *dev)
> +{
> +	struct skb_shared_info *shinfo = skb_shinfo(skb);
> +	struct rtase_private *tp = netdev_priv(dev);
> +	u32 q_idx, entry, len, opts1, opts2;
> +	struct netdev_queue *tx_queue;
> +	bool stop_queue, door_bell;
> +	u32 mss = shinfo->gso_size;
> +	struct rtase_tx_desc *txd;
> +	struct rtase_ring *ring;
> +	dma_addr_t mapping;
> +	int frags;
> +
> +	/* multiqueues */
> +	q_idx = skb_get_queue_mapping(skb);
> +	ring = &tp->tx_ring[q_idx];
> +	tx_queue = netdev_get_tx_queue(dev, q_idx);
> +
> +	if (unlikely(!rtase_tx_avail(ring))) {
> +		if (net_ratelimit())
> +			netdev_err(dev, "BUG! Tx Ring full when queue
> awake!\n");
> +		goto err_stop;
> +	}
> +
> +	entry = ring->cur_idx % RTASE_NUM_DESC;
> +	txd = ring->desc + sizeof(struct rtase_tx_desc) * entry;
> +
> +	opts1 = RTASE_DESC_OWN;
> +	opts2 = rtase_tx_vlan_tag(tp, skb);
> +
> +	/* tcp segmentation offload (or tcp large send) */
> +	if (mss) {
> +		if (shinfo->gso_type & SKB_GSO_TCPV4) {
> +			opts1 |= RTASE_GIANT_SEND_V4;
> +		} else if (shinfo->gso_type & SKB_GSO_TCPV6) {
> +			if (skb_cow_head(skb, 0))
> +				goto err_dma_0;
> +
> +			tcp_v6_gso_csum_prep(skb);
> +			opts1 |= RTASE_GIANT_SEND_V6;
> +		} else {
> +			WARN_ON_ONCE(1);
> +		}
> +
> +		opts1 |= u32_encode_bits(skb_transport_offset(skb),
> +					 RTASE_TCPHO_MASK);
> +		opts2 |= u32_encode_bits(mss, RTASE_MSS_MASK);
> +	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
> +		opts2 |= rtase_tx_csum(skb, dev);
> +	}
> +
> +	frags = rtase_xmit_frags(ring, skb, opts1, opts2);
> +	if (unlikely(frags < 0))
> +		goto err_dma_0;
> +
> +	if (frags) {
> +		len = skb_headlen(skb);
> +		opts1 |= RTASE_TX_FIRST_FRAG;
> +	} else {
> +		len = skb->len;
> +		ring->skbuff[entry] = skb;
> +		opts1 |= RTASE_TX_FIRST_FRAG | RTASE_TX_LAST_FRAG;
> +	}
> +
> +	if (((entry + 1) % RTASE_NUM_DESC) == 0)
> +		opts1 |= (len | RTASE_RING_END);
> +	else
> +		opts1 |= len;
> +
> +	mapping = dma_map_single(&tp->pdev->dev, skb->data, len,
> +				 DMA_TO_DEVICE);
> +
> +	if (unlikely(dma_mapping_error(&tp->pdev->dev, mapping))) {
> +		if (unlikely(net_ratelimit()))
> +			netdev_err(dev, "Failed to map TX DMA!\n");
> +
> +		goto err_dma_1;
> +	}
> +
> +	ring->mis.len[entry] = len;
> +	txd->addr = cpu_to_le64(mapping);
> +	txd->opts2 = cpu_to_le32(opts2);
> +	txd->opts1 = cpu_to_le32(opts1 & ~RTASE_DESC_OWN);
> +
> +	/* make sure the operating fields have been updated */
> +	dma_wmb();
> +
> +	door_bell = __netdev_tx_sent_queue(tx_queue, skb->len,
> +					   netdev_xmit_more());
> +
> +	txd->opts1 = cpu_to_le32(opts1);
> +
> +	skb_tx_timestamp(skb);
> +
> +	/* tx needs to see descriptor changes before updated cur_idx */
> +	smp_wmb();
> +
> +	WRITE_ONCE(ring->cur_idx, ring->cur_idx + frags + 1);
> +
> +	stop_queue = !netif_subqueue_maybe_stop(dev, ring->index,
> +						rtase_tx_avail(ring),
> +						RTASE_TX_STOP_THRS,
> +						RTASE_TX_START_THRS);
> +
> +	if (door_bell || stop_queue)
> +		rtase_w8(tp, RTASE_TPPOLL, BIT(ring->index));
> +
> +	return NETDEV_TX_OK;
> +
> +err_dma_1:
> +	ring->skbuff[entry] = NULL;
> +	rtase_tx_clear_range(ring, ring->cur_idx + 1, frags);
> +
> +err_dma_0:
> +	dev->stats.tx_dropped++;
> +	dev_kfree_skb_any(skb);
> +	return NETDEV_TX_OK;
> +
> +err_stop:
> +	netif_stop_queue(dev);
> +	dev->stats.tx_dropped++;
> +	return NETDEV_TX_BUSY;
> +}
> +
>  static void rtase_enable_eem_write(const struct rtase_private *tp)  {
>  	u8 val;
> @@ -1065,6 +1349,7 @@ static void rtase_netpoll(struct net_device *dev)
> static const struct net_device_ops rtase_netdev_ops = {
>  	.ndo_open = rtase_open,
>  	.ndo_stop = rtase_close,
> +	.ndo_start_xmit = rtase_start_xmit,
>  #ifdef CONFIG_NET_POLL_CONTROLLER
>  	.ndo_poll_controller = rtase_netpoll,
>  #endif
> --
> 2.34.1
>
Ratheesh Kannoth June 7, 2024, 3:54 p.m. UTC | #2
On 2024-06-07 at 14:13:14, Justin Lai (justinlai0215@realtek.com) wrote:
> Implement .ndo_start_xmit function to fill the information of the packet
> to be transmitted into the tx descriptor, and then the hardware will
> transmit the packet using the information in the tx descriptor.
> In addition, we also implemented the tx_handler function to enable the
> tx descriptor to be reused.
>
> Signed-off-by: Justin Lai <justinlai0215@realtek.com>
> ---
>  .../net/ethernet/realtek/rtase/rtase_main.c   | 285 ++++++++++++++++++
>  1 file changed, 285 insertions(+)
>
> diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c
> index 23406c195cff..6bdb4edbfbc1 100644
> --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c
> +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c
> @@ -256,6 +256,68 @@ static void rtase_mark_to_asic(union rtase_rx_desc *desc, u32 rx_buf_sz)
>  		   cpu_to_le32(RTASE_DESC_OWN | eor | rx_buf_sz));
>  }
>
> +static u32 rtase_tx_avail(struct rtase_ring *ring)
> +{
> +	return READ_ONCE(ring->dirty_idx) + RTASE_NUM_DESC -
> +	       READ_ONCE(ring->cur_idx);
> +}
dirty_idx and cur_idx wont wrap ? its 32bit in size.

>
Justin Lai June 12, 2024, 4:20 a.m. UTC | #3
> On 2024-06-07 at 14:13:14, Justin Lai (justinlai0215@realtek.com) wrote:
> > Implement .ndo_start_xmit function to fill the information of the
> > packet to be transmitted into the tx descriptor, and then the hardware
> > will transmit the packet using the information in the tx descriptor.
> > In addition, we also implemented the tx_handler function to enable the
> > tx descriptor to be reused.
> >
> > Signed-off-by: Justin Lai <justinlai0215@realtek.com>
> > ---
> >  .../net/ethernet/realtek/rtase/rtase_main.c   | 285 ++++++++++++++++++
> >  1 file changed, 285 insertions(+)
> >
> > diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > b/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > index 23406c195cff..6bdb4edbfbc1 100644
> > --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > @@ -256,6 +256,68 @@ static void rtase_mark_to_asic(union rtase_rx_desc
> *desc, u32 rx_buf_sz)
> >                  cpu_to_le32(RTASE_DESC_OWN | eor | rx_buf_sz));  }
> >
> > +static u32 rtase_tx_avail(struct rtase_ring *ring) {
> > +     return READ_ONCE(ring->dirty_idx) + RTASE_NUM_DESC -
> > +            READ_ONCE(ring->cur_idx); }
> dirty_idx and cur_idx wont wrap ? its 32bit in size.
> 
> >
cur_idx and dirty_idx may wrap, but all we want is the difference between
them, so this won't have any effect. In addition, the difference between
the two will not exceed RTASE_NUM_DESC, and dirty_idx will not exceed
cur_idx, so the calculation won't go wrong.
Justin Lai June 12, 2024, 4:35 a.m. UTC | #4
> > Implement .ndo_start_xmit function to fill the information of the
> > packet to be transmitted into the tx descriptor, and then the hardware
> > will transmit the packet using the information in the tx descriptor.
> > In addition, we also implemented the tx_handler function to enable the
> > tx descriptor to be reused.
> >
> > Signed-off-by: Justin Lai <justinlai0215@realtek.com>
> > ---
> >  .../net/ethernet/realtek/rtase/rtase_main.c   | 285 ++++++++++++++++++
> >  1 file changed, 285 insertions(+)
> >
> > diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > b/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > index 23406c195cff..6bdb4edbfbc1 100644
> > --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > @@ -256,6 +256,68 @@ static void rtase_mark_to_asic(union
> > rtase_rx_desc *desc, u32 rx_buf_sz)
> >                  cpu_to_le32(RTASE_DESC_OWN | eor | rx_buf_sz));  }
> >
> > +static u32 rtase_tx_avail(struct rtase_ring *ring) {
> > +     return READ_ONCE(ring->dirty_idx) + RTASE_NUM_DESC -
> > +            READ_ONCE(ring->cur_idx); }
> > +
> > +static int tx_handler(struct rtase_ring *ring, int budget) {
> > +     const struct rtase_private *tp = ring->ivec->tp;
> > +     struct net_device *dev = tp->dev;
> > +     u32 dirty_tx, tx_left;
> > +     u32 bytes_compl = 0;
> > +     u32 pkts_compl = 0;
> > +     int workdone = 0;
> > +
> > +     dirty_tx = ring->dirty_idx;
> > +     tx_left = READ_ONCE(ring->cur_idx) - dirty_tx;
> > +
> > +     while (tx_left > 0) {
> > +             u32 entry = dirty_tx % RTASE_NUM_DESC;
> > +             struct rtase_tx_desc *desc = ring->desc +
> > +                                    sizeof(struct rtase_tx_desc) *
> entry;
> > +             u32 status;
> > +
> > +             status = le32_to_cpu(desc->opts1);
> > +
> > +             if (status & RTASE_DESC_OWN)
> > +                     break;
> > +
> > +             rtase_unmap_tx_skb(tp->pdev, ring->mis.len[entry], desc);
> > +             ring->mis.len[entry] = 0;
> > +             if (ring->skbuff[entry]) {
> > +                     pkts_compl++;
> > +                     bytes_compl += ring->skbuff[entry]->len;
> > +                     napi_consume_skb(ring->skbuff[entry], budget);
> > +                     ring->skbuff[entry] = NULL;
> > +             }
> > +
> > +             dirty_tx++;
> > +             tx_left--;
> > +             workdone++;
> > +
> > +             if (workdone == RTASE_TX_BUDGET_DEFAULT)
> > +                     break;
> > +     }
> > +
> > +     if (ring->dirty_idx != dirty_tx) {
> > +             dev_sw_netstats_tx_add(dev, pkts_compl, bytes_compl);
> > +             WRITE_ONCE(ring->dirty_idx, dirty_tx);
> > +
> > +             netif_subqueue_completed_wake(dev, ring->index,
> > pkts_compl,
> > +                                           bytes_compl,
> > +                                           rtase_tx_avail(ring),
> > +
> RTASE_TX_START_THRS);
> > +
> > +             if (ring->cur_idx != dirty_tx)
> > +                     rtase_w8(tp, RTASE_TPPOLL, BIT(ring->index));
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> >  static void rtase_tx_desc_init(struct rtase_private *tp, u16 idx)  {
> >       struct rtase_ring *ring = &tp->tx_ring[idx]; @@ -1014,6
> > +1076,228 @@ static int rtase_close(struct net_device *dev)
> >       return 0;
> >  }
> >
> > +static u32 rtase_tx_vlan_tag(const struct rtase_private *tp,
> > +                          const struct sk_buff *skb) {
> > +     return (skb_vlan_tag_present(skb)) ?
> > +             (RTASE_TX_VLAN_TAG | swab16(skb_vlan_tag_get(skb))) :
> > 0x00; }
> > +
>                Vlan protocol can be either 0x8100 or 0x88A8, how does
> hardware know which header to insert?
> Thanks,
> Hariprasad k

We only allow the hardware to add 0x8100, the VLAN must at least have
0x8100 to potentially have 0x88a8, skb_vlan_tag_present indicates that
VLAN exists, hence at least the 0x8100 VLAN would exist.
> 
> > +static u32 rtase_tx_csum(struct sk_buff *skb, const struct net_device
> > +*dev) {
> > +     u32 csum_cmd = 0;
> > +     u8 ip_protocol;
> > +
> > +     switch (vlan_get_protocol(skb)) {
> > +     case htons(ETH_P_IP):
> > +             csum_cmd = RTASE_TX_IPCS_C;
> > +             ip_protocol = ip_hdr(skb)->protocol;
> > +             break;
> > +
> > +     case htons(ETH_P_IPV6):
> > +             csum_cmd = RTASE_TX_IPV6F_C;
> > +             ip_protocol = ipv6_hdr(skb)->nexthdr;
> > +             break;
> > +
> > +     default:
> > +             ip_protocol = IPPROTO_RAW;
> > +             break;
> > +     }
> > +
> > +     if (ip_protocol == IPPROTO_TCP)
> > +             csum_cmd |= RTASE_TX_TCPCS_C;
> > +     else if (ip_protocol == IPPROTO_UDP)
> > +             csum_cmd |= RTASE_TX_UDPCS_C;
> > +
> > +     csum_cmd |= u32_encode_bits(skb_transport_offset(skb),
> > +                                 RTASE_TCPHO_MASK);
> > +
> > +     return csum_cmd;
> > +}
> > +
> > +static int rtase_xmit_frags(struct rtase_ring *ring, struct sk_buff *skb,
> > +                         u32 opts1, u32 opts2) {
> > +     const struct skb_shared_info *info = skb_shinfo(skb);
> > +     const struct rtase_private *tp = ring->ivec->tp;
> > +     const u8 nr_frags = info->nr_frags;
> > +     struct rtase_tx_desc *txd = NULL;
> > +     u32 cur_frag, entry;
> > +
> > +     entry = ring->cur_idx;
> > +     for (cur_frag = 0; cur_frag < nr_frags; cur_frag++) {
> > +             const skb_frag_t *frag = &info->frags[cur_frag];
> > +             dma_addr_t mapping;
> > +             u32 status, len;
> > +             void *addr;
> > +
> > +             entry = (entry + 1) % RTASE_NUM_DESC;
> > +
> > +             txd = ring->desc + sizeof(struct rtase_tx_desc) * entry;
> > +             len = skb_frag_size(frag);
> > +             addr = skb_frag_address(frag);
> > +             mapping = dma_map_single(&tp->pdev->dev, addr, len,
> > +                                      DMA_TO_DEVICE);
> > +
> > +             if (unlikely(dma_mapping_error(&tp->pdev->dev,
> > + mapping)))
> > {
> > +                     if (unlikely(net_ratelimit()))
> > +                             netdev_err(tp->dev,
> > +                                        "Failed to map TX
> fragments
> > DMA!\n");
> > +
> > +                     goto err_out;
> > +             }
> > +
> > +             if (((entry + 1) % RTASE_NUM_DESC) == 0)
> > +                     status = (opts1 | len | RTASE_RING_END);
> > +             else
> > +                     status = opts1 | len;
> > +
> > +             if (cur_frag == (nr_frags - 1)) {
> > +                     ring->skbuff[entry] = skb;
> > +                     status |= RTASE_TX_LAST_FRAG;
> > +             }
> > +
> > +             ring->mis.len[entry] = len;
> > +             txd->addr = cpu_to_le64(mapping);
> > +             txd->opts2 = cpu_to_le32(opts2);
> > +
> > +             /* make sure the operating fields have been updated */
> > +             dma_wmb();
> > +             txd->opts1 = cpu_to_le32(status);
> > +     }
> > +
> > +     return cur_frag;
> > +
> > +err_out:
> > +     rtase_tx_clear_range(ring, ring->cur_idx + 1, cur_frag);
> > +     return -EIO;
> > +}
> > +
> > +static netdev_tx_t rtase_start_xmit(struct sk_buff *skb,
> > +                                 struct net_device *dev) {
> > +     struct skb_shared_info *shinfo = skb_shinfo(skb);
> > +     struct rtase_private *tp = netdev_priv(dev);
> > +     u32 q_idx, entry, len, opts1, opts2;
> > +     struct netdev_queue *tx_queue;
> > +     bool stop_queue, door_bell;
> > +     u32 mss = shinfo->gso_size;
> > +     struct rtase_tx_desc *txd;
> > +     struct rtase_ring *ring;
> > +     dma_addr_t mapping;
> > +     int frags;
> > +
> > +     /* multiqueues */
> > +     q_idx = skb_get_queue_mapping(skb);
> > +     ring = &tp->tx_ring[q_idx];
> > +     tx_queue = netdev_get_tx_queue(dev, q_idx);
> > +
> > +     if (unlikely(!rtase_tx_avail(ring))) {
> > +             if (net_ratelimit())
> > +                     netdev_err(dev, "BUG! Tx Ring full when queue
> > awake!\n");
> > +             goto err_stop;
> > +     }
> > +
> > +     entry = ring->cur_idx % RTASE_NUM_DESC;
> > +     txd = ring->desc + sizeof(struct rtase_tx_desc) * entry;
> > +
> > +     opts1 = RTASE_DESC_OWN;
> > +     opts2 = rtase_tx_vlan_tag(tp, skb);
> > +
> > +     /* tcp segmentation offload (or tcp large send) */
> > +     if (mss) {
> > +             if (shinfo->gso_type & SKB_GSO_TCPV4) {
> > +                     opts1 |= RTASE_GIANT_SEND_V4;
> > +             } else if (shinfo->gso_type & SKB_GSO_TCPV6) {
> > +                     if (skb_cow_head(skb, 0))
> > +                             goto err_dma_0;
> > +
> > +                     tcp_v6_gso_csum_prep(skb);
> > +                     opts1 |= RTASE_GIANT_SEND_V6;
> > +             } else {
> > +                     WARN_ON_ONCE(1);
> > +             }
> > +
> > +             opts1 |= u32_encode_bits(skb_transport_offset(skb),
> > +                                      RTASE_TCPHO_MASK);
> > +             opts2 |= u32_encode_bits(mss, RTASE_MSS_MASK);
> > +     } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
> > +             opts2 |= rtase_tx_csum(skb, dev);
> > +     }
> > +
> > +     frags = rtase_xmit_frags(ring, skb, opts1, opts2);
> > +     if (unlikely(frags < 0))
> > +             goto err_dma_0;
> > +
> > +     if (frags) {
> > +             len = skb_headlen(skb);
> > +             opts1 |= RTASE_TX_FIRST_FRAG;
> > +     } else {
> > +             len = skb->len;
> > +             ring->skbuff[entry] = skb;
> > +             opts1 |= RTASE_TX_FIRST_FRAG | RTASE_TX_LAST_FRAG;
> > +     }
> > +
> > +     if (((entry + 1) % RTASE_NUM_DESC) == 0)
> > +             opts1 |= (len | RTASE_RING_END);
> > +     else
> > +             opts1 |= len;
> > +
> > +     mapping = dma_map_single(&tp->pdev->dev, skb->data, len,
> > +                              DMA_TO_DEVICE);
> > +
> > +     if (unlikely(dma_mapping_error(&tp->pdev->dev, mapping))) {
> > +             if (unlikely(net_ratelimit()))
> > +                     netdev_err(dev, "Failed to map TX DMA!\n");
> > +
> > +             goto err_dma_1;
> > +     }
> > +
> > +     ring->mis.len[entry] = len;
> > +     txd->addr = cpu_to_le64(mapping);
> > +     txd->opts2 = cpu_to_le32(opts2);
> > +     txd->opts1 = cpu_to_le32(opts1 & ~RTASE_DESC_OWN);
> > +
> > +     /* make sure the operating fields have been updated */
> > +     dma_wmb();
> > +
> > +     door_bell = __netdev_tx_sent_queue(tx_queue, skb->len,
> > +                                        netdev_xmit_more());
> > +
> > +     txd->opts1 = cpu_to_le32(opts1);
> > +
> > +     skb_tx_timestamp(skb);
> > +
> > +     /* tx needs to see descriptor changes before updated cur_idx */
> > +     smp_wmb();
> > +
> > +     WRITE_ONCE(ring->cur_idx, ring->cur_idx + frags + 1);
> > +
> > +     stop_queue = !netif_subqueue_maybe_stop(dev, ring->index,
> > +                                             rtase_tx_avail(ring),
> > +
> RTASE_TX_STOP_THRS,
> > +
> RTASE_TX_START_THRS);
> > +
> > +     if (door_bell || stop_queue)
> > +             rtase_w8(tp, RTASE_TPPOLL, BIT(ring->index));
> > +
> > +     return NETDEV_TX_OK;
> > +
> > +err_dma_1:
> > +     ring->skbuff[entry] = NULL;
> > +     rtase_tx_clear_range(ring, ring->cur_idx + 1, frags);
> > +
> > +err_dma_0:
> > +     dev->stats.tx_dropped++;
> > +     dev_kfree_skb_any(skb);
> > +     return NETDEV_TX_OK;
> > +
> > +err_stop:
> > +     netif_stop_queue(dev);
> > +     dev->stats.tx_dropped++;
> > +     return NETDEV_TX_BUSY;
> > +}
> > +
> >  static void rtase_enable_eem_write(const struct rtase_private *tp)  {
> >       u8 val;
> > @@ -1065,6 +1349,7 @@ static void rtase_netpoll(struct net_device
> > *dev) static const struct net_device_ops rtase_netdev_ops = {
> >       .ndo_open = rtase_open,
> >       .ndo_stop = rtase_close,
> > +     .ndo_start_xmit = rtase_start_xmit,
> >  #ifdef CONFIG_NET_POLL_CONTROLLER
> >       .ndo_poll_controller = rtase_netpoll,  #endif
> > --
> > 2.34.1
> >
Hariprasad Kelam June 12, 2024, 10:36 a.m. UTC | #5
> > > Implement .ndo_start_xmit function to fill the information of the
> > > packet to be transmitted into the tx descriptor, and then the
> > > hardware will transmit the packet using the information in the tx
> descriptor.
> > > In addition, we also implemented the tx_handler function to enable
> > > the tx descriptor to be reused.
> > >
> > > Signed-off-by: Justin Lai <justinlai0215@realtek.com>
> > > ---
> > >  .../net/ethernet/realtek/rtase/rtase_main.c   | 285 ++++++++++++++++++
> > >  1 file changed, 285 insertions(+)
> > >
> > > diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > > b/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > > index 23406c195cff..6bdb4edbfbc1 100644
> > > --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > > +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > > @@ -256,6 +256,68 @@ static void rtase_mark_to_asic(union
> > > rtase_rx_desc *desc, u32 rx_buf_sz)
> > >                  cpu_to_le32(RTASE_DESC_OWN | eor | rx_buf_sz));  }
> > >
> > > +static u32 rtase_tx_avail(struct rtase_ring *ring) {
> > > +     return READ_ONCE(ring->dirty_idx) + RTASE_NUM_DESC -
> > > +            READ_ONCE(ring->cur_idx); }
> > > +
> > > +static int tx_handler(struct rtase_ring *ring, int budget) {
> > > +     const struct rtase_private *tp = ring->ivec->tp;
> > > +     struct net_device *dev = tp->dev;
> > > +     u32 dirty_tx, tx_left;
> > > +     u32 bytes_compl = 0;
> > > +     u32 pkts_compl = 0;
> > > +     int workdone = 0;
> > > +
> > > +     dirty_tx = ring->dirty_idx;
> > > +     tx_left = READ_ONCE(ring->cur_idx) - dirty_tx;
> > > +
> > > +     while (tx_left > 0) {
> > > +             u32 entry = dirty_tx % RTASE_NUM_DESC;
> > > +             struct rtase_tx_desc *desc = ring->desc +
> > > +                                    sizeof(struct rtase_tx_desc) *
> > entry;
> > > +             u32 status;
> > > +
> > > +             status = le32_to_cpu(desc->opts1);
> > > +
> > > +             if (status & RTASE_DESC_OWN)
> > > +                     break;
> > > +
> > > +             rtase_unmap_tx_skb(tp->pdev, ring->mis.len[entry], desc);
> > > +             ring->mis.len[entry] = 0;
> > > +             if (ring->skbuff[entry]) {
> > > +                     pkts_compl++;
> > > +                     bytes_compl += ring->skbuff[entry]->len;
> > > +                     napi_consume_skb(ring->skbuff[entry], budget);
> > > +                     ring->skbuff[entry] = NULL;
> > > +             }
> > > +
> > > +             dirty_tx++;
> > > +             tx_left--;
> > > +             workdone++;
> > > +
> > > +             if (workdone == RTASE_TX_BUDGET_DEFAULT)
> > > +                     break;
> > > +     }
> > > +
> > > +     if (ring->dirty_idx != dirty_tx) {
> > > +             dev_sw_netstats_tx_add(dev, pkts_compl, bytes_compl);
> > > +             WRITE_ONCE(ring->dirty_idx, dirty_tx);
> > > +
> > > +             netif_subqueue_completed_wake(dev, ring->index,
> > > pkts_compl,
> > > +                                           bytes_compl,
> > > +                                           rtase_tx_avail(ring),
> > > +
> > RTASE_TX_START_THRS);
> > > +
> > > +             if (ring->cur_idx != dirty_tx)
> > > +                     rtase_w8(tp, RTASE_TPPOLL, BIT(ring->index));
> > > +     }
> > > +
> > > +     return 0;
> > > +}
> > > +
> > >  static void rtase_tx_desc_init(struct rtase_private *tp, u16 idx)  {
> > >       struct rtase_ring *ring = &tp->tx_ring[idx]; @@ -1014,6
> > > +1076,228 @@ static int rtase_close(struct net_device *dev)
> > >       return 0;
> > >  }
> > >
> > > +static u32 rtase_tx_vlan_tag(const struct rtase_private *tp,
> > > +                          const struct sk_buff *skb) {
> > > +     return (skb_vlan_tag_present(skb)) ?
> > > +             (RTASE_TX_VLAN_TAG | swab16(skb_vlan_tag_get(skb))) :
> > > 0x00; }
> > > +
> >                Vlan protocol can be either 0x8100 or 0x88A8, how does
> > hardware know which header to insert?
> > Thanks,
> > Hariprasad k
> 
> We only allow the hardware to add 0x8100, the VLAN must at least have
> 0x8100 to potentially have 0x88a8, skb_vlan_tag_present indicates that VLAN
> exists, hence at least the 0x8100 VLAN would exist.
> >
Thanks for the explanation, but one question which bothers me is that "how hardware knows offset with in the packet"

For example 
Case 1:       DMAC  + SMAC + 8100 VLAN_ID + IP
               Here offset is right after the SMAC.
Case 2:      DMAC + SMAC + 88A8 VLAN_ID + 8100 VLAN_ID + IP
               Here offset is right after first vlan tag.

Thanks,
Hariprasad k
    
> > > +static u32 rtase_tx_csum(struct sk_buff *skb, const struct
> > > +net_device
> > > +*dev) {
> > > +     u32 csum_cmd = 0;
> > > +     u8 ip_protocol;
> > > +
> > > +     switch (vlan_get_protocol(skb)) {
> > > +     case htons(ETH_P_IP):
> > > +             csum_cmd = RTASE_TX_IPCS_C;
> > > +             ip_protocol = ip_hdr(skb)->protocol;
> > > +             break;
> > > +
> > > +     case htons(ETH_P_IPV6):
> > > +             csum_cmd = RTASE_TX_IPV6F_C;
> > > +             ip_protocol = ipv6_hdr(skb)->nexthdr;
> > > +             break;
> > > +
> > > +     default:
> > > +             ip_protocol = IPPROTO_RAW;
> > > +             break;
> > > +     }
> > > +
> > > +     if (ip_protocol == IPPROTO_TCP)
> > > +             csum_cmd |= RTASE_TX_TCPCS_C;
> > > +     else if (ip_protocol == IPPROTO_UDP)
> > > +             csum_cmd |= RTASE_TX_UDPCS_C;
> > > +
> > > +     csum_cmd |= u32_encode_bits(skb_transport_offset(skb),
> > > +                                 RTASE_TCPHO_MASK);
> > > +
> > > +     return csum_cmd;
> > > +}
> > > +
> > > +static int rtase_xmit_frags(struct rtase_ring *ring, struct sk_buff *skb,
> > > +                         u32 opts1, u32 opts2) {
> > > +     const struct skb_shared_info *info = skb_shinfo(skb);
> > > +     const struct rtase_private *tp = ring->ivec->tp;
> > > +     const u8 nr_frags = info->nr_frags;
> > > +     struct rtase_tx_desc *txd = NULL;
> > > +     u32 cur_frag, entry;
> > > +
> > > +     entry = ring->cur_idx;
> > > +     for (cur_frag = 0; cur_frag < nr_frags; cur_frag++) {
> > > +             const skb_frag_t *frag = &info->frags[cur_frag];
> > > +             dma_addr_t mapping;
> > > +             u32 status, len;
> > > +             void *addr;
> > > +
> > > +             entry = (entry + 1) % RTASE_NUM_DESC;
> > > +
> > > +             txd = ring->desc + sizeof(struct rtase_tx_desc) * entry;
> > > +             len = skb_frag_size(frag);
> > > +             addr = skb_frag_address(frag);
> > > +             mapping = dma_map_single(&tp->pdev->dev, addr, len,
> > > +                                      DMA_TO_DEVICE);
> > > +
> > > +             if (unlikely(dma_mapping_error(&tp->pdev->dev,
> > > + mapping)))
> > > {
> > > +                     if (unlikely(net_ratelimit()))
> > > +                             netdev_err(tp->dev,
> > > +                                        "Failed to map TX
> > fragments
> > > DMA!\n");
> > > +
> > > +                     goto err_out;
> > > +             }
> > > +
> > > +             if (((entry + 1) % RTASE_NUM_DESC) == 0)
> > > +                     status = (opts1 | len | RTASE_RING_END);
> > > +             else
> > > +                     status = opts1 | len;
> > > +
> > > +             if (cur_frag == (nr_frags - 1)) {
> > > +                     ring->skbuff[entry] = skb;
> > > +                     status |= RTASE_TX_LAST_FRAG;
> > > +             }
> > > +
> > > +             ring->mis.len[entry] = len;
> > > +             txd->addr = cpu_to_le64(mapping);
> > > +             txd->opts2 = cpu_to_le32(opts2);
> > > +
> > > +             /* make sure the operating fields have been updated */
> > > +             dma_wmb();
> > > +             txd->opts1 = cpu_to_le32(status);
> > > +     }
> > > +
> > > +     return cur_frag;
> > > +
> > > +err_out:
> > > +     rtase_tx_clear_range(ring, ring->cur_idx + 1, cur_frag);
> > > +     return -EIO;
> > > +}
> > > +
> > > +static netdev_tx_t rtase_start_xmit(struct sk_buff *skb,
> > > +                                 struct net_device *dev) {
> > > +     struct skb_shared_info *shinfo = skb_shinfo(skb);
> > > +     struct rtase_private *tp = netdev_priv(dev);
> > > +     u32 q_idx, entry, len, opts1, opts2;
> > > +     struct netdev_queue *tx_queue;
> > > +     bool stop_queue, door_bell;
> > > +     u32 mss = shinfo->gso_size;
> > > +     struct rtase_tx_desc *txd;
> > > +     struct rtase_ring *ring;
> > > +     dma_addr_t mapping;
> > > +     int frags;
> > > +
> > > +     /* multiqueues */
> > > +     q_idx = skb_get_queue_mapping(skb);
> > > +     ring = &tp->tx_ring[q_idx];
> > > +     tx_queue = netdev_get_tx_queue(dev, q_idx);
> > > +
> > > +     if (unlikely(!rtase_tx_avail(ring))) {
> > > +             if (net_ratelimit())
> > > +                     netdev_err(dev, "BUG! Tx Ring full when queue
> > > awake!\n");
> > > +             goto err_stop;
> > > +     }
> > > +
> > > +     entry = ring->cur_idx % RTASE_NUM_DESC;
> > > +     txd = ring->desc + sizeof(struct rtase_tx_desc) * entry;
> > > +
> > > +     opts1 = RTASE_DESC_OWN;
> > > +     opts2 = rtase_tx_vlan_tag(tp, skb);
> > > +
> > > +     /* tcp segmentation offload (or tcp large send) */
> > > +     if (mss) {
> > > +             if (shinfo->gso_type & SKB_GSO_TCPV4) {
> > > +                     opts1 |= RTASE_GIANT_SEND_V4;
> > > +             } else if (shinfo->gso_type & SKB_GSO_TCPV6) {
> > > +                     if (skb_cow_head(skb, 0))
> > > +                             goto err_dma_0;
> > > +
> > > +                     tcp_v6_gso_csum_prep(skb);
> > > +                     opts1 |= RTASE_GIANT_SEND_V6;
> > > +             } else {
> > > +                     WARN_ON_ONCE(1);
> > > +             }
> > > +
> > > +             opts1 |= u32_encode_bits(skb_transport_offset(skb),
> > > +                                      RTASE_TCPHO_MASK);
> > > +             opts2 |= u32_encode_bits(mss, RTASE_MSS_MASK);
> > > +     } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
> > > +             opts2 |= rtase_tx_csum(skb, dev);
> > > +     }
> > > +
> > > +     frags = rtase_xmit_frags(ring, skb, opts1, opts2);
> > > +     if (unlikely(frags < 0))
> > > +             goto err_dma_0;
> > > +
> > > +     if (frags) {
> > > +             len = skb_headlen(skb);
> > > +             opts1 |= RTASE_TX_FIRST_FRAG;
> > > +     } else {
> > > +             len = skb->len;
> > > +             ring->skbuff[entry] = skb;
> > > +             opts1 |= RTASE_TX_FIRST_FRAG | RTASE_TX_LAST_FRAG;
> > > +     }
> > > +
> > > +     if (((entry + 1) % RTASE_NUM_DESC) == 0)
> > > +             opts1 |= (len | RTASE_RING_END);
> > > +     else
> > > +             opts1 |= len;
> > > +
> > > +     mapping = dma_map_single(&tp->pdev->dev, skb->data, len,
> > > +                              DMA_TO_DEVICE);
> > > +
> > > +     if (unlikely(dma_mapping_error(&tp->pdev->dev, mapping))) {
> > > +             if (unlikely(net_ratelimit()))
> > > +                     netdev_err(dev, "Failed to map TX DMA!\n");
> > > +
> > > +             goto err_dma_1;
> > > +     }
> > > +
> > > +     ring->mis.len[entry] = len;
> > > +     txd->addr = cpu_to_le64(mapping);
> > > +     txd->opts2 = cpu_to_le32(opts2);
> > > +     txd->opts1 = cpu_to_le32(opts1 & ~RTASE_DESC_OWN);
> > > +
> > > +     /* make sure the operating fields have been updated */
> > > +     dma_wmb();
> > > +
> > > +     door_bell = __netdev_tx_sent_queue(tx_queue, skb->len,
> > > +                                        netdev_xmit_more());
> > > +
> > > +     txd->opts1 = cpu_to_le32(opts1);
> > > +
> > > +     skb_tx_timestamp(skb);
> > > +
> > > +     /* tx needs to see descriptor changes before updated cur_idx */
> > > +     smp_wmb();
> > > +
> > > +     WRITE_ONCE(ring->cur_idx, ring->cur_idx + frags + 1);
> > > +
> > > +     stop_queue = !netif_subqueue_maybe_stop(dev, ring->index,
> > > +                                             rtase_tx_avail(ring),
> > > +
> > RTASE_TX_STOP_THRS,
> > > +
> > RTASE_TX_START_THRS);
> > > +
> > > +     if (door_bell || stop_queue)
> > > +             rtase_w8(tp, RTASE_TPPOLL, BIT(ring->index));
> > > +
> > > +     return NETDEV_TX_OK;
> > > +
> > > +err_dma_1:
> > > +     ring->skbuff[entry] = NULL;
> > > +     rtase_tx_clear_range(ring, ring->cur_idx + 1, frags);
> > > +
> > > +err_dma_0:
> > > +     dev->stats.tx_dropped++;
> > > +     dev_kfree_skb_any(skb);
> > > +     return NETDEV_TX_OK;
> > > +
> > > +err_stop:
> > > +     netif_stop_queue(dev);
> > > +     dev->stats.tx_dropped++;
> > > +     return NETDEV_TX_BUSY;
> > > +}
> > > +
> > >  static void rtase_enable_eem_write(const struct rtase_private *tp)  {
> > >       u8 val;
> > > @@ -1065,6 +1349,7 @@ static void rtase_netpoll(struct net_device
> > > *dev) static const struct net_device_ops rtase_netdev_ops = {
> > >       .ndo_open = rtase_open,
> > >       .ndo_stop = rtase_close,
> > > +     .ndo_start_xmit = rtase_start_xmit,
> > >  #ifdef CONFIG_NET_POLL_CONTROLLER
> > >       .ndo_poll_controller = rtase_netpoll,  #endif
> > > --
> > > 2.34.1
> > >
Justin Lai June 13, 2024, 3:38 a.m. UTC | #6
> > > > Implement .ndo_start_xmit function to fill the information of the
> > > > packet to be transmitted into the tx descriptor, and then the
> > > > hardware will transmit the packet using the information in the tx
> > descriptor.
> > > > In addition, we also implemented the tx_handler function to enable
> > > > the tx descriptor to be reused.
> > > >
> > > > Signed-off-by: Justin Lai <justinlai0215@realtek.com>
> > > > ---
> > > >  .../net/ethernet/realtek/rtase/rtase_main.c   | 285
> ++++++++++++++++++
> > > >  1 file changed, 285 insertions(+)
> > > >
> > > > diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > > > b/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > > > index 23406c195cff..6bdb4edbfbc1 100644
> > > > --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > > > +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > > > @@ -256,6 +256,68 @@ static void rtase_mark_to_asic(union
> > > > rtase_rx_desc *desc, u32 rx_buf_sz)
> > > >                  cpu_to_le32(RTASE_DESC_OWN | eor | rx_buf_sz));
> > > > }
> > > >
> > > > +static u32 rtase_tx_avail(struct rtase_ring *ring) {
> > > > +     return READ_ONCE(ring->dirty_idx) + RTASE_NUM_DESC -
> > > > +            READ_ONCE(ring->cur_idx); }
> > > > +
> > > > +static int tx_handler(struct rtase_ring *ring, int budget) {
> > > > +     const struct rtase_private *tp = ring->ivec->tp;
> > > > +     struct net_device *dev = tp->dev;
> > > > +     u32 dirty_tx, tx_left;
> > > > +     u32 bytes_compl = 0;
> > > > +     u32 pkts_compl = 0;
> > > > +     int workdone = 0;
> > > > +
> > > > +     dirty_tx = ring->dirty_idx;
> > > > +     tx_left = READ_ONCE(ring->cur_idx) - dirty_tx;
> > > > +
> > > > +     while (tx_left > 0) {
> > > > +             u32 entry = dirty_tx % RTASE_NUM_DESC;
> > > > +             struct rtase_tx_desc *desc = ring->desc +
> > > > +                                    sizeof(struct rtase_tx_desc)
> > > > + *
> > > entry;
> > > > +             u32 status;
> > > > +
> > > > +             status = le32_to_cpu(desc->opts1);
> > > > +
> > > > +             if (status & RTASE_DESC_OWN)
> > > > +                     break;
> > > > +
> > > > +             rtase_unmap_tx_skb(tp->pdev, ring->mis.len[entry],
> desc);
> > > > +             ring->mis.len[entry] = 0;
> > > > +             if (ring->skbuff[entry]) {
> > > > +                     pkts_compl++;
> > > > +                     bytes_compl += ring->skbuff[entry]->len;
> > > > +                     napi_consume_skb(ring->skbuff[entry],
> budget);
> > > > +                     ring->skbuff[entry] = NULL;
> > > > +             }
> > > > +
> > > > +             dirty_tx++;
> > > > +             tx_left--;
> > > > +             workdone++;
> > > > +
> > > > +             if (workdone == RTASE_TX_BUDGET_DEFAULT)
> > > > +                     break;
> > > > +     }
> > > > +
> > > > +     if (ring->dirty_idx != dirty_tx) {
> > > > +             dev_sw_netstats_tx_add(dev, pkts_compl,
> bytes_compl);
> > > > +             WRITE_ONCE(ring->dirty_idx, dirty_tx);
> > > > +
> > > > +             netif_subqueue_completed_wake(dev, ring->index,
> > > > pkts_compl,
> > > > +                                           bytes_compl,
> > > > +
> rtase_tx_avail(ring),
> > > > +
> > > RTASE_TX_START_THRS);
> > > > +
> > > > +             if (ring->cur_idx != dirty_tx)
> > > > +                     rtase_w8(tp, RTASE_TPPOLL,
> BIT(ring->index));
> > > > +     }
> > > > +
> > > > +     return 0;
> > > > +}
> > > > +
> > > >  static void rtase_tx_desc_init(struct rtase_private *tp, u16 idx)  {
> > > >       struct rtase_ring *ring = &tp->tx_ring[idx]; @@ -1014,6
> > > > +1076,228 @@ static int rtase_close(struct net_device *dev)
> > > >       return 0;
> > > >  }
> > > >
> > > > +static u32 rtase_tx_vlan_tag(const struct rtase_private *tp,
> > > > +                          const struct sk_buff *skb) {
> > > > +     return (skb_vlan_tag_present(skb)) ?
> > > > +             (RTASE_TX_VLAN_TAG |
> swab16(skb_vlan_tag_get(skb))) :
> > > > 0x00; }
> > > > +
> > >                Vlan protocol can be either 0x8100 or 0x88A8, how
> > > does hardware know which header to insert?
> > > Thanks,
> > > Hariprasad k
> >
> > We only allow the hardware to add 0x8100, the VLAN must at least have
> > 0x8100 to potentially have 0x88a8, skb_vlan_tag_present indicates that
> > VLAN exists, hence at least the 0x8100 VLAN would exist.
> > >
> Thanks for the explanation, but one question which bothers me is that "how
> hardware knows offset with in the packet"
> 
> For example
> Case 1:       DMAC  + SMAC + 8100 VLAN_ID + IP
>                Here offset is right after the SMAC.
> Case 2:      DMAC + SMAC + 88A8 VLAN_ID + 8100 VLAN_ID + IP
>                Here offset is right after first vlan tag.
> 
> Thanks,
> Hariprasad k

This driver only enables NETIF_F_HW_VLAN_CTAG_TX, and we only support case 1.

> 
> > > > +static u32 rtase_tx_csum(struct sk_buff *skb, const struct
> > > > +net_device
> > > > +*dev) {
> > > > +     u32 csum_cmd = 0;
> > > > +     u8 ip_protocol;
> > > > +
> > > > +     switch (vlan_get_protocol(skb)) {
> > > > +     case htons(ETH_P_IP):
> > > > +             csum_cmd = RTASE_TX_IPCS_C;
> > > > +             ip_protocol = ip_hdr(skb)->protocol;
> > > > +             break;
> > > > +
> > > > +     case htons(ETH_P_IPV6):
> > > > +             csum_cmd = RTASE_TX_IPV6F_C;
> > > > +             ip_protocol = ipv6_hdr(skb)->nexthdr;
> > > > +             break;
> > > > +
> > > > +     default:
> > > > +             ip_protocol = IPPROTO_RAW;
> > > > +             break;
> > > > +     }
> > > > +
> > > > +     if (ip_protocol == IPPROTO_TCP)
> > > > +             csum_cmd |= RTASE_TX_TCPCS_C;
> > > > +     else if (ip_protocol == IPPROTO_UDP)
> > > > +             csum_cmd |= RTASE_TX_UDPCS_C;
> > > > +
> > > > +     csum_cmd |= u32_encode_bits(skb_transport_offset(skb),
> > > > +                                 RTASE_TCPHO_MASK);
> > > > +
> > > > +     return csum_cmd;
> > > > +}
> > > > +
> > > > +static int rtase_xmit_frags(struct rtase_ring *ring, struct sk_buff *skb,
> > > > +                         u32 opts1, u32 opts2) {
> > > > +     const struct skb_shared_info *info = skb_shinfo(skb);
> > > > +     const struct rtase_private *tp = ring->ivec->tp;
> > > > +     const u8 nr_frags = info->nr_frags;
> > > > +     struct rtase_tx_desc *txd = NULL;
> > > > +     u32 cur_frag, entry;
> > > > +
> > > > +     entry = ring->cur_idx;
> > > > +     for (cur_frag = 0; cur_frag < nr_frags; cur_frag++) {
> > > > +             const skb_frag_t *frag = &info->frags[cur_frag];
> > > > +             dma_addr_t mapping;
> > > > +             u32 status, len;
> > > > +             void *addr;
> > > > +
> > > > +             entry = (entry + 1) % RTASE_NUM_DESC;
> > > > +
> > > > +             txd = ring->desc + sizeof(struct rtase_tx_desc) * entry;
> > > > +             len = skb_frag_size(frag);
> > > > +             addr = skb_frag_address(frag);
> > > > +             mapping = dma_map_single(&tp->pdev->dev, addr, len,
> > > > +                                      DMA_TO_DEVICE);
> > > > +
> > > > +             if (unlikely(dma_mapping_error(&tp->pdev->dev,
> > > > + mapping)))
> > > > {
> > > > +                     if (unlikely(net_ratelimit()))
> > > > +                             netdev_err(tp->dev,
> > > > +                                        "Failed to map TX
> > > fragments
> > > > DMA!\n");
> > > > +
> > > > +                     goto err_out;
> > > > +             }
> > > > +
> > > > +             if (((entry + 1) % RTASE_NUM_DESC) == 0)
> > > > +                     status = (opts1 | len | RTASE_RING_END);
> > > > +             else
> > > > +                     status = opts1 | len;
> > > > +
> > > > +             if (cur_frag == (nr_frags - 1)) {
> > > > +                     ring->skbuff[entry] = skb;
> > > > +                     status |= RTASE_TX_LAST_FRAG;
> > > > +             }
> > > > +
> > > > +             ring->mis.len[entry] = len;
> > > > +             txd->addr = cpu_to_le64(mapping);
> > > > +             txd->opts2 = cpu_to_le32(opts2);
> > > > +
> > > > +             /* make sure the operating fields have been updated */
> > > > +             dma_wmb();
> > > > +             txd->opts1 = cpu_to_le32(status);
> > > > +     }
> > > > +
> > > > +     return cur_frag;
> > > > +
> > > > +err_out:
> > > > +     rtase_tx_clear_range(ring, ring->cur_idx + 1, cur_frag);
> > > > +     return -EIO;
> > > > +}
> > > > +
> > > > +static netdev_tx_t rtase_start_xmit(struct sk_buff *skb,
> > > > +                                 struct net_device *dev) {
> > > > +     struct skb_shared_info *shinfo = skb_shinfo(skb);
> > > > +     struct rtase_private *tp = netdev_priv(dev);
> > > > +     u32 q_idx, entry, len, opts1, opts2;
> > > > +     struct netdev_queue *tx_queue;
> > > > +     bool stop_queue, door_bell;
> > > > +     u32 mss = shinfo->gso_size;
> > > > +     struct rtase_tx_desc *txd;
> > > > +     struct rtase_ring *ring;
> > > > +     dma_addr_t mapping;
> > > > +     int frags;
> > > > +
> > > > +     /* multiqueues */
> > > > +     q_idx = skb_get_queue_mapping(skb);
> > > > +     ring = &tp->tx_ring[q_idx];
> > > > +     tx_queue = netdev_get_tx_queue(dev, q_idx);
> > > > +
> > > > +     if (unlikely(!rtase_tx_avail(ring))) {
> > > > +             if (net_ratelimit())
> > > > +                     netdev_err(dev, "BUG! Tx Ring full when
> > > > + queue
> > > > awake!\n");
> > > > +             goto err_stop;
> > > > +     }
> > > > +
> > > > +     entry = ring->cur_idx % RTASE_NUM_DESC;
> > > > +     txd = ring->desc + sizeof(struct rtase_tx_desc) * entry;
> > > > +
> > > > +     opts1 = RTASE_DESC_OWN;
> > > > +     opts2 = rtase_tx_vlan_tag(tp, skb);
> > > > +
> > > > +     /* tcp segmentation offload (or tcp large send) */
> > > > +     if (mss) {
> > > > +             if (shinfo->gso_type & SKB_GSO_TCPV4) {
> > > > +                     opts1 |= RTASE_GIANT_SEND_V4;
> > > > +             } else if (shinfo->gso_type & SKB_GSO_TCPV6) {
> > > > +                     if (skb_cow_head(skb, 0))
> > > > +                             goto err_dma_0;
> > > > +
> > > > +                     tcp_v6_gso_csum_prep(skb);
> > > > +                     opts1 |= RTASE_GIANT_SEND_V6;
> > > > +             } else {
> > > > +                     WARN_ON_ONCE(1);
> > > > +             }
> > > > +
> > > > +             opts1 |= u32_encode_bits(skb_transport_offset(skb),
> > > > +                                      RTASE_TCPHO_MASK);
> > > > +             opts2 |= u32_encode_bits(mss, RTASE_MSS_MASK);
> > > > +     } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
> > > > +             opts2 |= rtase_tx_csum(skb, dev);
> > > > +     }
> > > > +
> > > > +     frags = rtase_xmit_frags(ring, skb, opts1, opts2);
> > > > +     if (unlikely(frags < 0))
> > > > +             goto err_dma_0;
> > > > +
> > > > +     if (frags) {
> > > > +             len = skb_headlen(skb);
> > > > +             opts1 |= RTASE_TX_FIRST_FRAG;
> > > > +     } else {
> > > > +             len = skb->len;
> > > > +             ring->skbuff[entry] = skb;
> > > > +             opts1 |= RTASE_TX_FIRST_FRAG |
> RTASE_TX_LAST_FRAG;
> > > > +     }
> > > > +
> > > > +     if (((entry + 1) % RTASE_NUM_DESC) == 0)
> > > > +             opts1 |= (len | RTASE_RING_END);
> > > > +     else
> > > > +             opts1 |= len;
> > > > +
> > > > +     mapping = dma_map_single(&tp->pdev->dev, skb->data, len,
> > > > +                              DMA_TO_DEVICE);
> > > > +
> > > > +     if (unlikely(dma_mapping_error(&tp->pdev->dev, mapping))) {
> > > > +             if (unlikely(net_ratelimit()))
> > > > +                     netdev_err(dev, "Failed to map TX DMA!\n");
> > > > +
> > > > +             goto err_dma_1;
> > > > +     }
> > > > +
> > > > +     ring->mis.len[entry] = len;
> > > > +     txd->addr = cpu_to_le64(mapping);
> > > > +     txd->opts2 = cpu_to_le32(opts2);
> > > > +     txd->opts1 = cpu_to_le32(opts1 & ~RTASE_DESC_OWN);
> > > > +
> > > > +     /* make sure the operating fields have been updated */
> > > > +     dma_wmb();
> > > > +
> > > > +     door_bell = __netdev_tx_sent_queue(tx_queue, skb->len,
> > > > +                                        netdev_xmit_more());
> > > > +
> > > > +     txd->opts1 = cpu_to_le32(opts1);
> > > > +
> > > > +     skb_tx_timestamp(skb);
> > > > +
> > > > +     /* tx needs to see descriptor changes before updated cur_idx */
> > > > +     smp_wmb();
> > > > +
> > > > +     WRITE_ONCE(ring->cur_idx, ring->cur_idx + frags + 1);
> > > > +
> > > > +     stop_queue = !netif_subqueue_maybe_stop(dev, ring->index,
> > > > +
> > > > + rtase_tx_avail(ring),
> > > > +
> > > RTASE_TX_STOP_THRS,
> > > > +
> > > RTASE_TX_START_THRS);
> > > > +
> > > > +     if (door_bell || stop_queue)
> > > > +             rtase_w8(tp, RTASE_TPPOLL, BIT(ring->index));
> > > > +
> > > > +     return NETDEV_TX_OK;
> > > > +
> > > > +err_dma_1:
> > > > +     ring->skbuff[entry] = NULL;
> > > > +     rtase_tx_clear_range(ring, ring->cur_idx + 1, frags);
> > > > +
> > > > +err_dma_0:
> > > > +     dev->stats.tx_dropped++;
> > > > +     dev_kfree_skb_any(skb);
> > > > +     return NETDEV_TX_OK;
> > > > +
> > > > +err_stop:
> > > > +     netif_stop_queue(dev);
> > > > +     dev->stats.tx_dropped++;
> > > > +     return NETDEV_TX_BUSY;
> > > > +}
> > > > +
> > > >  static void rtase_enable_eem_write(const struct rtase_private *tp)  {
> > > >       u8 val;
> > > > @@ -1065,6 +1349,7 @@ static void rtase_netpoll(struct net_device
> > > > *dev) static const struct net_device_ops rtase_netdev_ops = {
> > > >       .ndo_open = rtase_open,
> > > >       .ndo_stop = rtase_close,
> > > > +     .ndo_start_xmit = rtase_start_xmit,
> > > >  #ifdef CONFIG_NET_POLL_CONTROLLER
> > > >       .ndo_poll_controller = rtase_netpoll,  #endif
> > > > --
> > > > 2.34.1
> > > >
Hariprasad Kelam June 13, 2024, 7:24 a.m. UTC | #7
> > > > > Implement .ndo_start_xmit function to fill the information of
> > > > > the packet to be transmitted into the tx descriptor, and then
> > > > > the hardware will transmit the packet using the information in
> > > > > the tx
> > > descriptor.
> > > > > In addition, we also implemented the tx_handler function to
> > > > > enable the tx descriptor to be reused.
> > > > >
> > > > > Signed-off-by: Justin Lai <justinlai0215@realtek.com>
> > > > > ---
> > > > >  .../net/ethernet/realtek/rtase/rtase_main.c   | 285
> > ++++++++++++++++++
> > > > >  1 file changed, 285 insertions(+)
> > > > >
> > > > > diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > > > > b/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > > > > index 23406c195cff..6bdb4edbfbc1 100644
> > > > > --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > > > > +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c
> > > > > @@ -256,6 +256,68 @@ static void rtase_mark_to_asic(union
> > > > > rtase_rx_desc *desc, u32 rx_buf_sz)
> > > > >                  cpu_to_le32(RTASE_DESC_OWN | eor | rx_buf_sz));
> > > > > }
> > > > >
> > > > > +static u32 rtase_tx_avail(struct rtase_ring *ring) {
> > > > > +     return READ_ONCE(ring->dirty_idx) + RTASE_NUM_DESC -
> > > > > +            READ_ONCE(ring->cur_idx); }
> > > > > +
> > > > > +static int tx_handler(struct rtase_ring *ring, int budget) {
> > > > > +     const struct rtase_private *tp = ring->ivec->tp;
> > > > > +     struct net_device *dev = tp->dev;
> > > > > +     u32 dirty_tx, tx_left;
> > > > > +     u32 bytes_compl = 0;
> > > > > +     u32 pkts_compl = 0;
> > > > > +     int workdone = 0;
> > > > > +
> > > > > +     dirty_tx = ring->dirty_idx;
> > > > > +     tx_left = READ_ONCE(ring->cur_idx) - dirty_tx;
> > > > > +
> > > > > +     while (tx_left > 0) {
> > > > > +             u32 entry = dirty_tx % RTASE_NUM_DESC;
> > > > > +             struct rtase_tx_desc *desc = ring->desc +
> > > > > +                                    sizeof(struct
> > > > > + rtase_tx_desc)
> > > > > + *
> > > > entry;
> > > > > +             u32 status;
> > > > > +
> > > > > +             status = le32_to_cpu(desc->opts1);
> > > > > +
> > > > > +             if (status & RTASE_DESC_OWN)
> > > > > +                     break;
> > > > > +
> > > > > +             rtase_unmap_tx_skb(tp->pdev, ring->mis.len[entry],
> > desc);
> > > > > +             ring->mis.len[entry] = 0;
> > > > > +             if (ring->skbuff[entry]) {
> > > > > +                     pkts_compl++;
> > > > > +                     bytes_compl += ring->skbuff[entry]->len;
> > > > > +                     napi_consume_skb(ring->skbuff[entry],
> > budget);
> > > > > +                     ring->skbuff[entry] = NULL;
> > > > > +             }
> > > > > +
> > > > > +             dirty_tx++;
> > > > > +             tx_left--;
> > > > > +             workdone++;
> > > > > +
> > > > > +             if (workdone == RTASE_TX_BUDGET_DEFAULT)
> > > > > +                     break;
> > > > > +     }
> > > > > +
> > > > > +     if (ring->dirty_idx != dirty_tx) {
> > > > > +             dev_sw_netstats_tx_add(dev, pkts_compl,
> > bytes_compl);
> > > > > +             WRITE_ONCE(ring->dirty_idx, dirty_tx);
> > > > > +
> > > > > +             netif_subqueue_completed_wake(dev, ring->index,
> > > > > pkts_compl,
> > > > > +                                           bytes_compl,
> > > > > +
> > rtase_tx_avail(ring),
> > > > > +
> > > > RTASE_TX_START_THRS);
> > > > > +
> > > > > +             if (ring->cur_idx != dirty_tx)
> > > > > +                     rtase_w8(tp, RTASE_TPPOLL,
> > BIT(ring->index));
> > > > > +     }
> > > > > +
> > > > > +     return 0;
> > > > > +}
> > > > > +
> > > > >  static void rtase_tx_desc_init(struct rtase_private *tp, u16 idx)  {
> > > > >       struct rtase_ring *ring = &tp->tx_ring[idx]; @@ -1014,6
> > > > > +1076,228 @@ static int rtase_close(struct net_device *dev)
> > > > >       return 0;
> > > > >  }
> > > > >
> > > > > +static u32 rtase_tx_vlan_tag(const struct rtase_private *tp,
> > > > > +                          const struct sk_buff *skb) {
> > > > > +     return (skb_vlan_tag_present(skb)) ?
> > > > > +             (RTASE_TX_VLAN_TAG |
> > swab16(skb_vlan_tag_get(skb))) :
> > > > > 0x00; }
> > > > > +
> > > >                Vlan protocol can be either 0x8100 or 0x88A8, how
> > > > does hardware know which header to insert?
> > > > Thanks,
> > > > Hariprasad k
> > >
> > > We only allow the hardware to add 0x8100, the VLAN must at least
> > > have
> > > 0x8100 to potentially have 0x88a8, skb_vlan_tag_present indicates
> > > that VLAN exists, hence at least the 0x8100 VLAN would exist.
> > > >
> > Thanks for the explanation, but one question which bothers me is that
> > "how hardware knows offset with in the packet"
> >
> > For example
> > Case 1:       DMAC  + SMAC + 8100 VLAN_ID + IP
> >                Here offset is right after the SMAC.
> > Case 2:      DMAC + SMAC + 88A8 VLAN_ID + 8100 VLAN_ID + IP
> >                Here offset is right after first vlan tag.
> >
> > Thanks,
> > Hariprasad k
> 
> This driver only enables NETIF_F_HW_VLAN_CTAG_TX, and we only support
> case 1.
> 

   Got it . So the hardware is programmed with fixed offset.

> >
> > > > > +static u32 rtase_tx_csum(struct sk_buff *skb, const struct
> > > > > +net_device
> > > > > +*dev) {
> > > > > +     u32 csum_cmd = 0;
> > > > > +     u8 ip_protocol;
> > > > > +
> > > > > +     switch (vlan_get_protocol(skb)) {
> > > > > +     case htons(ETH_P_IP):
> > > > > +             csum_cmd = RTASE_TX_IPCS_C;
> > > > > +             ip_protocol = ip_hdr(skb)->protocol;
> > > > > +             break;
> > > > > +
> > > > > +     case htons(ETH_P_IPV6):
> > > > > +             csum_cmd = RTASE_TX_IPV6F_C;
> > > > > +             ip_protocol = ipv6_hdr(skb)->nexthdr;
> > > > > +             break;
> > > > > +
> > > > > +     default:
> > > > > +             ip_protocol = IPPROTO_RAW;
> > > > > +             break;
> > > > > +     }
> > > > > +
> > > > > +     if (ip_protocol == IPPROTO_TCP)
> > > > > +             csum_cmd |= RTASE_TX_TCPCS_C;
> > > > > +     else if (ip_protocol == IPPROTO_UDP)
> > > > > +             csum_cmd |= RTASE_TX_UDPCS_C;
> > > > > +
> > > > > +     csum_cmd |= u32_encode_bits(skb_transport_offset(skb),
> > > > > +                                 RTASE_TCPHO_MASK);
> > > > > +
> > > > > +     return csum_cmd;
> > > > > +}
> > > > > +
> > > > > +static int rtase_xmit_frags(struct rtase_ring *ring, struct sk_buff *skb,
> > > > > +                         u32 opts1, u32 opts2) {
> > > > > +     const struct skb_shared_info *info = skb_shinfo(skb);
> > > > > +     const struct rtase_private *tp = ring->ivec->tp;
> > > > > +     const u8 nr_frags = info->nr_frags;
> > > > > +     struct rtase_tx_desc *txd = NULL;
> > > > > +     u32 cur_frag, entry;
> > > > > +
> > > > > +     entry = ring->cur_idx;
> > > > > +     for (cur_frag = 0; cur_frag < nr_frags; cur_frag++) {
> > > > > +             const skb_frag_t *frag = &info->frags[cur_frag];
> > > > > +             dma_addr_t mapping;
> > > > > +             u32 status, len;
> > > > > +             void *addr;
> > > > > +
> > > > > +             entry = (entry + 1) % RTASE_NUM_DESC;
> > > > > +
> > > > > +             txd = ring->desc + sizeof(struct rtase_tx_desc) * entry;
> > > > > +             len = skb_frag_size(frag);
> > > > > +             addr = skb_frag_address(frag);
> > > > > +             mapping = dma_map_single(&tp->pdev->dev, addr, len,
> > > > > +                                      DMA_TO_DEVICE);
> > > > > +
> > > > > +             if (unlikely(dma_mapping_error(&tp->pdev->dev,
> > > > > + mapping)))
> > > > > {
> > > > > +                     if (unlikely(net_ratelimit()))
> > > > > +                             netdev_err(tp->dev,
> > > > > +                                        "Failed to map TX
> > > > fragments
> > > > > DMA!\n");
> > > > > +
> > > > > +                     goto err_out;
> > > > > +             }
> > > > > +
> > > > > +             if (((entry + 1) % RTASE_NUM_DESC) == 0)
> > > > > +                     status = (opts1 | len | RTASE_RING_END);
> > > > > +             else
> > > > > +                     status = opts1 | len;
> > > > > +
> > > > > +             if (cur_frag == (nr_frags - 1)) {
> > > > > +                     ring->skbuff[entry] = skb;
> > > > > +                     status |= RTASE_TX_LAST_FRAG;
> > > > > +             }
> > > > > +
> > > > > +             ring->mis.len[entry] = len;
> > > > > +             txd->addr = cpu_to_le64(mapping);
> > > > > +             txd->opts2 = cpu_to_le32(opts2);
> > > > > +
> > > > > +             /* make sure the operating fields have been updated */
> > > > > +             dma_wmb();
> > > > > +             txd->opts1 = cpu_to_le32(status);
> > > > > +     }
> > > > > +
> > > > > +     return cur_frag;
> > > > > +
> > > > > +err_out:
> > > > > +     rtase_tx_clear_range(ring, ring->cur_idx + 1, cur_frag);
> > > > > +     return -EIO;
> > > > > +}
> > > > > +
> > > > > +static netdev_tx_t rtase_start_xmit(struct sk_buff *skb,
> > > > > +                                 struct net_device *dev) {
> > > > > +     struct skb_shared_info *shinfo = skb_shinfo(skb);
> > > > > +     struct rtase_private *tp = netdev_priv(dev);
> > > > > +     u32 q_idx, entry, len, opts1, opts2;
> > > > > +     struct netdev_queue *tx_queue;
> > > > > +     bool stop_queue, door_bell;
> > > > > +     u32 mss = shinfo->gso_size;
> > > > > +     struct rtase_tx_desc *txd;
> > > > > +     struct rtase_ring *ring;
> > > > > +     dma_addr_t mapping;
> > > > > +     int frags;
> > > > > +
> > > > > +     /* multiqueues */
> > > > > +     q_idx = skb_get_queue_mapping(skb);
> > > > > +     ring = &tp->tx_ring[q_idx];
> > > > > +     tx_queue = netdev_get_tx_queue(dev, q_idx);
> > > > > +
> > > > > +     if (unlikely(!rtase_tx_avail(ring))) {
> > > > > +             if (net_ratelimit())
> > > > > +                     netdev_err(dev, "BUG! Tx Ring full when
> > > > > + queue
> > > > > awake!\n");
> > > > > +             goto err_stop;
> > > > > +     }
> > > > > +
> > > > > +     entry = ring->cur_idx % RTASE_NUM_DESC;
> > > > > +     txd = ring->desc + sizeof(struct rtase_tx_desc) * entry;
> > > > > +
> > > > > +     opts1 = RTASE_DESC_OWN;
> > > > > +     opts2 = rtase_tx_vlan_tag(tp, skb);
> > > > > +
> > > > > +     /* tcp segmentation offload (or tcp large send) */
> > > > > +     if (mss) {
> > > > > +             if (shinfo->gso_type & SKB_GSO_TCPV4) {
> > > > > +                     opts1 |= RTASE_GIANT_SEND_V4;
> > > > > +             } else if (shinfo->gso_type & SKB_GSO_TCPV6) {
> > > > > +                     if (skb_cow_head(skb, 0))
> > > > > +                             goto err_dma_0;
> > > > > +
> > > > > +                     tcp_v6_gso_csum_prep(skb);
> > > > > +                     opts1 |= RTASE_GIANT_SEND_V6;
> > > > > +             } else {
> > > > > +                     WARN_ON_ONCE(1);
> > > > > +             }
> > > > > +
> > > > > +             opts1 |= u32_encode_bits(skb_transport_offset(skb),
> > > > > +                                      RTASE_TCPHO_MASK);
> > > > > +             opts2 |= u32_encode_bits(mss, RTASE_MSS_MASK);
> > > > > +     } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
> > > > > +             opts2 |= rtase_tx_csum(skb, dev);
> > > > > +     }
> > > > > +
> > > > > +     frags = rtase_xmit_frags(ring, skb, opts1, opts2);
> > > > > +     if (unlikely(frags < 0))
> > > > > +             goto err_dma_0;
> > > > > +
> > > > > +     if (frags) {
> > > > > +             len = skb_headlen(skb);
> > > > > +             opts1 |= RTASE_TX_FIRST_FRAG;
> > > > > +     } else {
> > > > > +             len = skb->len;
> > > > > +             ring->skbuff[entry] = skb;
> > > > > +             opts1 |= RTASE_TX_FIRST_FRAG |
> > RTASE_TX_LAST_FRAG;
> > > > > +     }
> > > > > +
> > > > > +     if (((entry + 1) % RTASE_NUM_DESC) == 0)
> > > > > +             opts1 |= (len | RTASE_RING_END);
> > > > > +     else
> > > > > +             opts1 |= len;
> > > > > +
> > > > > +     mapping = dma_map_single(&tp->pdev->dev, skb->data, len,
> > > > > +                              DMA_TO_DEVICE);
> > > > > +
> > > > > +     if (unlikely(dma_mapping_error(&tp->pdev->dev, mapping))) {
> > > > > +             if (unlikely(net_ratelimit()))
> > > > > +                     netdev_err(dev, "Failed to map TX
> > > > > + DMA!\n");
> > > > > +
> > > > > +             goto err_dma_1;
> > > > > +     }
> > > > > +
> > > > > +     ring->mis.len[entry] = len;
> > > > > +     txd->addr = cpu_to_le64(mapping);
> > > > > +     txd->opts2 = cpu_to_le32(opts2);
> > > > > +     txd->opts1 = cpu_to_le32(opts1 & ~RTASE_DESC_OWN);
> > > > > +
> > > > > +     /* make sure the operating fields have been updated */
> > > > > +     dma_wmb();
> > > > > +
> > > > > +     door_bell = __netdev_tx_sent_queue(tx_queue, skb->len,
> > > > > +                                        netdev_xmit_more());
> > > > > +
> > > > > +     txd->opts1 = cpu_to_le32(opts1);
> > > > > +
> > > > > +     skb_tx_timestamp(skb);
> > > > > +
> > > > > +     /* tx needs to see descriptor changes before updated cur_idx */
> > > > > +     smp_wmb();
> > > > > +
> > > > > +     WRITE_ONCE(ring->cur_idx, ring->cur_idx + frags + 1);
> > > > > +
> > > > > +     stop_queue = !netif_subqueue_maybe_stop(dev, ring->index,
> > > > > +
> > > > > + rtase_tx_avail(ring),
> > > > > +
> > > > RTASE_TX_STOP_THRS,
> > > > > +
> > > > RTASE_TX_START_THRS);
> > > > > +
> > > > > +     if (door_bell || stop_queue)
> > > > > +             rtase_w8(tp, RTASE_TPPOLL, BIT(ring->index));
> > > > > +
> > > > > +     return NETDEV_TX_OK;
> > > > > +
> > > > > +err_dma_1:
> > > > > +     ring->skbuff[entry] = NULL;
> > > > > +     rtase_tx_clear_range(ring, ring->cur_idx + 1, frags);
> > > > > +
> > > > > +err_dma_0:
> > > > > +     dev->stats.tx_dropped++;
> > > > > +     dev_kfree_skb_any(skb);
> > > > > +     return NETDEV_TX_OK;
> > > > > +
> > > > > +err_stop:
> > > > > +     netif_stop_queue(dev);
> > > > > +     dev->stats.tx_dropped++;
> > > > > +     return NETDEV_TX_BUSY;
> > > > > +}
> > > > > +
> > > > >  static void rtase_enable_eem_write(const struct rtase_private *tp)  {
> > > > >       u8 val;
> > > > > @@ -1065,6 +1349,7 @@ static void rtase_netpoll(struct
> > > > > net_device
> > > > > *dev) static const struct net_device_ops rtase_netdev_ops = {
> > > > >       .ndo_open = rtase_open,
> > > > >       .ndo_stop = rtase_close,
> > > > > +     .ndo_start_xmit = rtase_start_xmit,
> > > > >  #ifdef CONFIG_NET_POLL_CONTROLLER
> > > > >       .ndo_poll_controller = rtase_netpoll,  #endif
> > > > > --
> > > > > 2.34.1
> > > > >
diff mbox series

Patch

diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c
index 23406c195cff..6bdb4edbfbc1 100644
--- a/drivers/net/ethernet/realtek/rtase/rtase_main.c
+++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c
@@ -256,6 +256,68 @@  static void rtase_mark_to_asic(union rtase_rx_desc *desc, u32 rx_buf_sz)
 		   cpu_to_le32(RTASE_DESC_OWN | eor | rx_buf_sz));
 }
 
+static u32 rtase_tx_avail(struct rtase_ring *ring)
+{
+	return READ_ONCE(ring->dirty_idx) + RTASE_NUM_DESC -
+	       READ_ONCE(ring->cur_idx);
+}
+
+static int tx_handler(struct rtase_ring *ring, int budget)
+{
+	const struct rtase_private *tp = ring->ivec->tp;
+	struct net_device *dev = tp->dev;
+	u32 dirty_tx, tx_left;
+	u32 bytes_compl = 0;
+	u32 pkts_compl = 0;
+	int workdone = 0;
+
+	dirty_tx = ring->dirty_idx;
+	tx_left = READ_ONCE(ring->cur_idx) - dirty_tx;
+
+	while (tx_left > 0) {
+		u32 entry = dirty_tx % RTASE_NUM_DESC;
+		struct rtase_tx_desc *desc = ring->desc +
+				       sizeof(struct rtase_tx_desc) * entry;
+		u32 status;
+
+		status = le32_to_cpu(desc->opts1);
+
+		if (status & RTASE_DESC_OWN)
+			break;
+
+		rtase_unmap_tx_skb(tp->pdev, ring->mis.len[entry], desc);
+		ring->mis.len[entry] = 0;
+		if (ring->skbuff[entry]) {
+			pkts_compl++;
+			bytes_compl += ring->skbuff[entry]->len;
+			napi_consume_skb(ring->skbuff[entry], budget);
+			ring->skbuff[entry] = NULL;
+		}
+
+		dirty_tx++;
+		tx_left--;
+		workdone++;
+
+		if (workdone == RTASE_TX_BUDGET_DEFAULT)
+			break;
+	}
+
+	if (ring->dirty_idx != dirty_tx) {
+		dev_sw_netstats_tx_add(dev, pkts_compl, bytes_compl);
+		WRITE_ONCE(ring->dirty_idx, dirty_tx);
+
+		netif_subqueue_completed_wake(dev, ring->index, pkts_compl,
+					      bytes_compl,
+					      rtase_tx_avail(ring),
+					      RTASE_TX_START_THRS);
+
+		if (ring->cur_idx != dirty_tx)
+			rtase_w8(tp, RTASE_TPPOLL, BIT(ring->index));
+	}
+
+	return 0;
+}
+
 static void rtase_tx_desc_init(struct rtase_private *tp, u16 idx)
 {
 	struct rtase_ring *ring = &tp->tx_ring[idx];
@@ -1014,6 +1076,228 @@  static int rtase_close(struct net_device *dev)
 	return 0;
 }
 
+static u32 rtase_tx_vlan_tag(const struct rtase_private *tp,
+			     const struct sk_buff *skb)
+{
+	return (skb_vlan_tag_present(skb)) ?
+		(RTASE_TX_VLAN_TAG | swab16(skb_vlan_tag_get(skb))) : 0x00;
+}
+
+static u32 rtase_tx_csum(struct sk_buff *skb, const struct net_device *dev)
+{
+	u32 csum_cmd = 0;
+	u8 ip_protocol;
+
+	switch (vlan_get_protocol(skb)) {
+	case htons(ETH_P_IP):
+		csum_cmd = RTASE_TX_IPCS_C;
+		ip_protocol = ip_hdr(skb)->protocol;
+		break;
+
+	case htons(ETH_P_IPV6):
+		csum_cmd = RTASE_TX_IPV6F_C;
+		ip_protocol = ipv6_hdr(skb)->nexthdr;
+		break;
+
+	default:
+		ip_protocol = IPPROTO_RAW;
+		break;
+	}
+
+	if (ip_protocol == IPPROTO_TCP)
+		csum_cmd |= RTASE_TX_TCPCS_C;
+	else if (ip_protocol == IPPROTO_UDP)
+		csum_cmd |= RTASE_TX_UDPCS_C;
+
+	csum_cmd |= u32_encode_bits(skb_transport_offset(skb),
+				    RTASE_TCPHO_MASK);
+
+	return csum_cmd;
+}
+
+static int rtase_xmit_frags(struct rtase_ring *ring, struct sk_buff *skb,
+			    u32 opts1, u32 opts2)
+{
+	const struct skb_shared_info *info = skb_shinfo(skb);
+	const struct rtase_private *tp = ring->ivec->tp;
+	const u8 nr_frags = info->nr_frags;
+	struct rtase_tx_desc *txd = NULL;
+	u32 cur_frag, entry;
+
+	entry = ring->cur_idx;
+	for (cur_frag = 0; cur_frag < nr_frags; cur_frag++) {
+		const skb_frag_t *frag = &info->frags[cur_frag];
+		dma_addr_t mapping;
+		u32 status, len;
+		void *addr;
+
+		entry = (entry + 1) % RTASE_NUM_DESC;
+
+		txd = ring->desc + sizeof(struct rtase_tx_desc) * entry;
+		len = skb_frag_size(frag);
+		addr = skb_frag_address(frag);
+		mapping = dma_map_single(&tp->pdev->dev, addr, len,
+					 DMA_TO_DEVICE);
+
+		if (unlikely(dma_mapping_error(&tp->pdev->dev, mapping))) {
+			if (unlikely(net_ratelimit()))
+				netdev_err(tp->dev,
+					   "Failed to map TX fragments DMA!\n");
+
+			goto err_out;
+		}
+
+		if (((entry + 1) % RTASE_NUM_DESC) == 0)
+			status = (opts1 | len | RTASE_RING_END);
+		else
+			status = opts1 | len;
+
+		if (cur_frag == (nr_frags - 1)) {
+			ring->skbuff[entry] = skb;
+			status |= RTASE_TX_LAST_FRAG;
+		}
+
+		ring->mis.len[entry] = len;
+		txd->addr = cpu_to_le64(mapping);
+		txd->opts2 = cpu_to_le32(opts2);
+
+		/* make sure the operating fields have been updated */
+		dma_wmb();
+		txd->opts1 = cpu_to_le32(status);
+	}
+
+	return cur_frag;
+
+err_out:
+	rtase_tx_clear_range(ring, ring->cur_idx + 1, cur_frag);
+	return -EIO;
+}
+
+static netdev_tx_t rtase_start_xmit(struct sk_buff *skb,
+				    struct net_device *dev)
+{
+	struct skb_shared_info *shinfo = skb_shinfo(skb);
+	struct rtase_private *tp = netdev_priv(dev);
+	u32 q_idx, entry, len, opts1, opts2;
+	struct netdev_queue *tx_queue;
+	bool stop_queue, door_bell;
+	u32 mss = shinfo->gso_size;
+	struct rtase_tx_desc *txd;
+	struct rtase_ring *ring;
+	dma_addr_t mapping;
+	int frags;
+
+	/* multiqueues */
+	q_idx = skb_get_queue_mapping(skb);
+	ring = &tp->tx_ring[q_idx];
+	tx_queue = netdev_get_tx_queue(dev, q_idx);
+
+	if (unlikely(!rtase_tx_avail(ring))) {
+		if (net_ratelimit())
+			netdev_err(dev, "BUG! Tx Ring full when queue awake!\n");
+		goto err_stop;
+	}
+
+	entry = ring->cur_idx % RTASE_NUM_DESC;
+	txd = ring->desc + sizeof(struct rtase_tx_desc) * entry;
+
+	opts1 = RTASE_DESC_OWN;
+	opts2 = rtase_tx_vlan_tag(tp, skb);
+
+	/* tcp segmentation offload (or tcp large send) */
+	if (mss) {
+		if (shinfo->gso_type & SKB_GSO_TCPV4) {
+			opts1 |= RTASE_GIANT_SEND_V4;
+		} else if (shinfo->gso_type & SKB_GSO_TCPV6) {
+			if (skb_cow_head(skb, 0))
+				goto err_dma_0;
+
+			tcp_v6_gso_csum_prep(skb);
+			opts1 |= RTASE_GIANT_SEND_V6;
+		} else {
+			WARN_ON_ONCE(1);
+		}
+
+		opts1 |= u32_encode_bits(skb_transport_offset(skb),
+					 RTASE_TCPHO_MASK);
+		opts2 |= u32_encode_bits(mss, RTASE_MSS_MASK);
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		opts2 |= rtase_tx_csum(skb, dev);
+	}
+
+	frags = rtase_xmit_frags(ring, skb, opts1, opts2);
+	if (unlikely(frags < 0))
+		goto err_dma_0;
+
+	if (frags) {
+		len = skb_headlen(skb);
+		opts1 |= RTASE_TX_FIRST_FRAG;
+	} else {
+		len = skb->len;
+		ring->skbuff[entry] = skb;
+		opts1 |= RTASE_TX_FIRST_FRAG | RTASE_TX_LAST_FRAG;
+	}
+
+	if (((entry + 1) % RTASE_NUM_DESC) == 0)
+		opts1 |= (len | RTASE_RING_END);
+	else
+		opts1 |= len;
+
+	mapping = dma_map_single(&tp->pdev->dev, skb->data, len,
+				 DMA_TO_DEVICE);
+
+	if (unlikely(dma_mapping_error(&tp->pdev->dev, mapping))) {
+		if (unlikely(net_ratelimit()))
+			netdev_err(dev, "Failed to map TX DMA!\n");
+
+		goto err_dma_1;
+	}
+
+	ring->mis.len[entry] = len;
+	txd->addr = cpu_to_le64(mapping);
+	txd->opts2 = cpu_to_le32(opts2);
+	txd->opts1 = cpu_to_le32(opts1 & ~RTASE_DESC_OWN);
+
+	/* make sure the operating fields have been updated */
+	dma_wmb();
+
+	door_bell = __netdev_tx_sent_queue(tx_queue, skb->len,
+					   netdev_xmit_more());
+
+	txd->opts1 = cpu_to_le32(opts1);
+
+	skb_tx_timestamp(skb);
+
+	/* tx needs to see descriptor changes before updated cur_idx */
+	smp_wmb();
+
+	WRITE_ONCE(ring->cur_idx, ring->cur_idx + frags + 1);
+
+	stop_queue = !netif_subqueue_maybe_stop(dev, ring->index,
+						rtase_tx_avail(ring),
+						RTASE_TX_STOP_THRS,
+						RTASE_TX_START_THRS);
+
+	if (door_bell || stop_queue)
+		rtase_w8(tp, RTASE_TPPOLL, BIT(ring->index));
+
+	return NETDEV_TX_OK;
+
+err_dma_1:
+	ring->skbuff[entry] = NULL;
+	rtase_tx_clear_range(ring, ring->cur_idx + 1, frags);
+
+err_dma_0:
+	dev->stats.tx_dropped++;
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+
+err_stop:
+	netif_stop_queue(dev);
+	dev->stats.tx_dropped++;
+	return NETDEV_TX_BUSY;
+}
+
 static void rtase_enable_eem_write(const struct rtase_private *tp)
 {
 	u8 val;
@@ -1065,6 +1349,7 @@  static void rtase_netpoll(struct net_device *dev)
 static const struct net_device_ops rtase_netdev_ops = {
 	.ndo_open = rtase_open,
 	.ndo_stop = rtase_close,
+	.ndo_start_xmit = rtase_start_xmit,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = rtase_netpoll,
 #endif