diff mbox series

[1/2] wifi: rtw88: usb: Copy instead of cloning the RX skb

Message ID 28b09f4d-5271-470d-99b6-a0bbe0224739@gmail.com (mailing list archive)
State New
Delegated to: Ping-Ke Shih
Headers show
Series [1/2] wifi: rtw88: usb: Copy instead of cloning the RX skb | expand

Commit Message

Bitterblue Smith Nov. 14, 2024, 11:06 p.m. UTC
"iperf3 -c 192.168.0.1 -R --udp -b 0" shows about 40% of datagrams
are lost. Many torrents don't download faster than 3 MiB/s, probably
because the Bittorrent protocol uses UDP. This is somehow related to
the use of skb_clone() in the RX path.

Don't use skb_clone(). Instead allocate a new skb for each 802.11 frame
received and copy the data from the big (32768 byte) skb.

With this patch, "iperf3 -c 192.168.0.1 -R --udp -b 0" shows only 1-2%
of datagrams are lost, and torrents can reach download speeds of 36
MiB/s.

Tested with RTL8812AU and RTL8822CU.

Signed-off-by: Bitterblue Smith <rtl8821cerfe2@gmail.com>
---
 drivers/net/wireless/realtek/rtw88/usb.c | 52 ++++++++++++++----------
 1 file changed, 31 insertions(+), 21 deletions(-)

Comments

Ping-Ke Shih Nov. 18, 2024, 9:16 a.m. UTC | #1
Bitterblue Smith <rtl8821cerfe2@gmail.com> wrote:
> "iperf3 -c 192.168.0.1 -R --udp -b 0" shows about 40% of datagrams
> are lost. Many torrents don't download faster than 3 MiB/s, probably
> because the Bittorrent protocol uses UDP. This is somehow related to
> the use of skb_clone() in the RX path.

Using skb_clone() can improve throughput is weird to me too. Do you check
top with 100% cpu usage?

> 
> Don't use skb_clone(). Instead allocate a new skb for each 802.11 frame
> received and copy the data from the big (32768 byte) skb.
> 
> With this patch, "iperf3 -c 192.168.0.1 -R --udp -b 0" shows only 1-2%
> of datagrams are lost, and torrents can reach download speeds of 36
> MiB/s.
> 
> Tested with RTL8812AU and RTL8822CU.
> 
> Signed-off-by: Bitterblue Smith <rtl8821cerfe2@gmail.com>
> ---
>  drivers/net/wireless/realtek/rtw88/usb.c | 52 ++++++++++++++----------
>  1 file changed, 31 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c
> index 93ac4837e1b5..727569d4ef4b 100644
> --- a/drivers/net/wireless/realtek/rtw88/usb.c
> +++ b/drivers/net/wireless/realtek/rtw88/usb.c
> @@ -7,6 +7,7 @@
>  #include <linux/mutex.h>
>  #include "main.h"
>  #include "debug.h"
> +#include "mac.h"
>  #include "reg.h"
>  #include "tx.h"
>  #include "rx.h"
> @@ -546,49 +547,58 @@ static void rtw_usb_rx_handler(struct work_struct *work)
>  {
>         struct rtw_usb *rtwusb = container_of(work, struct rtw_usb, rx_work);
>         struct rtw_dev *rtwdev = rtwusb->rtwdev;
> -       const struct rtw_chip_info *chip = rtwdev->chip;
> -       u32 pkt_desc_sz = chip->rx_pkt_desc_sz;
>         struct ieee80211_rx_status rx_status;
> -       u32 pkt_offset, next_pkt, urb_len;
>         struct rtw_rx_pkt_stat pkt_stat;
> -       struct sk_buff *next_skb;
> +       struct sk_buff *rx_skb;
>         struct sk_buff *skb;
> +       u32 pkt_desc_sz = rtwdev->chip->rx_pkt_desc_sz;
> +       u32 max_skb_len = pkt_desc_sz + PHY_STATUS_SIZE * 8 +
> +                         IEEE80211_MAX_MPDU_LEN_VHT_11454;
> +       u32 pkt_offset, next_pkt, skb_len;
>         u8 *rx_desc;
>         int limit;
> 
>         for (limit = 0; limit < 200; limit++) {
> -               skb = skb_dequeue(&rtwusb->rx_queue);
> -               if (!skb)
> +               rx_skb = skb_dequeue(&rtwusb->rx_queue);
> +               if (!rx_skb)
>                         break;
> 
>                 if (skb_queue_len(&rtwusb->rx_queue) >= RTW_USB_MAX_RXQ_LEN) {
>                         dev_dbg_ratelimited(rtwdev->dev, "failed to get rx_queue, overflow\n");
> -                       dev_kfree_skb_any(skb);
> +                       dev_kfree_skb_any(rx_skb);
>                         continue;
>                 }
> 
> -               urb_len = skb->len;
> +               rx_desc = rx_skb->data;
> 
>                 do {
> -                       rx_desc = skb->data;
>                         rtw_rx_query_rx_desc(rtwdev, rx_desc, &pkt_stat,
>                                              &rx_status);
>                         pkt_offset = pkt_desc_sz + pkt_stat.drv_info_sz +
>                                      pkt_stat.shift;
> 
> -                       next_pkt = round_up(pkt_stat.pkt_len + pkt_offset, 8);
> +                       skb_len = pkt_stat.pkt_len + pkt_offset;
> +                       if (skb_len > max_skb_len) {

This seems a new rule introduced by this patch. Do you really encounter this
case? Maybe this is the cause of slow download throughput?

> +                               rtw_dbg(rtwdev, RTW_DBG_USB,
> +                                       "skipping too big packet: %u\n",
> +                                       skb_len);
> +                               goto skip_packet;
> +                       }
> 
> -                       if (urb_len >= next_pkt + pkt_desc_sz)
> -                               next_skb = skb_clone(skb, GFP_KERNEL);
> -                       else
> -                               next_skb = NULL;
> +                       skb = alloc_skb(skb_len, GFP_KERNEL);
> +                       if (!skb) {
> +                               rtw_dbg(rtwdev, RTW_DBG_USB,
> +                                       "failed to allocate RX skb of size %u\n",
> +                                       skb_len);
> +                               goto skip_packet;
> +                       }
> +
> +                       skb_put_data(skb, rx_desc, skb_len);
> 
>                         if (pkt_stat.is_c2h) {
> -                               skb_trim(skb, pkt_stat.pkt_len + pkt_offset);
>                                 rtw_fw_c2h_cmd_rx_irqsafe(rtwdev, pkt_offset, skb);
>                         } else {
>                                 skb_pull(skb, pkt_offset);
> -                               skb_trim(skb, pkt_stat.pkt_len);
>                                 rtw_update_rx_freq_for_invalid(rtwdev, skb,
>                                                                &rx_status,
>                                                                &pkt_stat);
> @@ -597,12 +607,12 @@ static void rtw_usb_rx_handler(struct work_struct *work)
>                                 ieee80211_rx_irqsafe(rtwdev->hw, skb);
>                         }
> 
> -                       skb = next_skb;
> -                       if (skb)
> -                               skb_pull(skb, next_pkt);
> +skip_packet:
> +                       next_pkt = round_up(skb_len, 8);
> +                       rx_desc += next_pkt;
> +               } while (rx_desc + pkt_desc_sz < rx_skb->data + rx_skb->len);
> 
> -                       urb_len -= next_pkt;
> -               } while (skb);
> +               dev_kfree_skb_any(rx_skb);
>         }
>  }
> 
> --
> 2.47.0
diff mbox series

Patch

diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c
index 93ac4837e1b5..727569d4ef4b 100644
--- a/drivers/net/wireless/realtek/rtw88/usb.c
+++ b/drivers/net/wireless/realtek/rtw88/usb.c
@@ -7,6 +7,7 @@ 
 #include <linux/mutex.h>
 #include "main.h"
 #include "debug.h"
+#include "mac.h"
 #include "reg.h"
 #include "tx.h"
 #include "rx.h"
@@ -546,49 +547,58 @@  static void rtw_usb_rx_handler(struct work_struct *work)
 {
 	struct rtw_usb *rtwusb = container_of(work, struct rtw_usb, rx_work);
 	struct rtw_dev *rtwdev = rtwusb->rtwdev;
-	const struct rtw_chip_info *chip = rtwdev->chip;
-	u32 pkt_desc_sz = chip->rx_pkt_desc_sz;
 	struct ieee80211_rx_status rx_status;
-	u32 pkt_offset, next_pkt, urb_len;
 	struct rtw_rx_pkt_stat pkt_stat;
-	struct sk_buff *next_skb;
+	struct sk_buff *rx_skb;
 	struct sk_buff *skb;
+	u32 pkt_desc_sz = rtwdev->chip->rx_pkt_desc_sz;
+	u32 max_skb_len = pkt_desc_sz + PHY_STATUS_SIZE * 8 +
+			  IEEE80211_MAX_MPDU_LEN_VHT_11454;
+	u32 pkt_offset, next_pkt, skb_len;
 	u8 *rx_desc;
 	int limit;
 
 	for (limit = 0; limit < 200; limit++) {
-		skb = skb_dequeue(&rtwusb->rx_queue);
-		if (!skb)
+		rx_skb = skb_dequeue(&rtwusb->rx_queue);
+		if (!rx_skb)
 			break;
 
 		if (skb_queue_len(&rtwusb->rx_queue) >= RTW_USB_MAX_RXQ_LEN) {
 			dev_dbg_ratelimited(rtwdev->dev, "failed to get rx_queue, overflow\n");
-			dev_kfree_skb_any(skb);
+			dev_kfree_skb_any(rx_skb);
 			continue;
 		}
 
-		urb_len = skb->len;
+		rx_desc = rx_skb->data;
 
 		do {
-			rx_desc = skb->data;
 			rtw_rx_query_rx_desc(rtwdev, rx_desc, &pkt_stat,
 					     &rx_status);
 			pkt_offset = pkt_desc_sz + pkt_stat.drv_info_sz +
 				     pkt_stat.shift;
 
-			next_pkt = round_up(pkt_stat.pkt_len + pkt_offset, 8);
+			skb_len = pkt_stat.pkt_len + pkt_offset;
+			if (skb_len > max_skb_len) {
+				rtw_dbg(rtwdev, RTW_DBG_USB,
+					"skipping too big packet: %u\n",
+					skb_len);
+				goto skip_packet;
+			}
 
-			if (urb_len >= next_pkt + pkt_desc_sz)
-				next_skb = skb_clone(skb, GFP_KERNEL);
-			else
-				next_skb = NULL;
+			skb = alloc_skb(skb_len, GFP_KERNEL);
+			if (!skb) {
+				rtw_dbg(rtwdev, RTW_DBG_USB,
+					"failed to allocate RX skb of size %u\n",
+					skb_len);
+				goto skip_packet;
+			}
+
+			skb_put_data(skb, rx_desc, skb_len);
 
 			if (pkt_stat.is_c2h) {
-				skb_trim(skb, pkt_stat.pkt_len + pkt_offset);
 				rtw_fw_c2h_cmd_rx_irqsafe(rtwdev, pkt_offset, skb);
 			} else {
 				skb_pull(skb, pkt_offset);
-				skb_trim(skb, pkt_stat.pkt_len);
 				rtw_update_rx_freq_for_invalid(rtwdev, skb,
 							       &rx_status,
 							       &pkt_stat);
@@ -597,12 +607,12 @@  static void rtw_usb_rx_handler(struct work_struct *work)
 				ieee80211_rx_irqsafe(rtwdev->hw, skb);
 			}
 
-			skb = next_skb;
-			if (skb)
-				skb_pull(skb, next_pkt);
+skip_packet:
+			next_pkt = round_up(skb_len, 8);
+			rx_desc += next_pkt;
+		} while (rx_desc + pkt_desc_sz < rx_skb->data + rx_skb->len);
 
-			urb_len -= next_pkt;
-		} while (skb);
+		dev_kfree_skb_any(rx_skb);
 	}
 }