diff mbox series

[RFC,2/3] mt76usb: fix tx/rx stop

Message ID 20190416091305.4218-3-sgruszka@redhat.com (mailing list archive)
State RFC
Delegated to: Felix Fietkau
Headers show
Series mt76: suspend/stop/removal fixes | expand

Commit Message

Stanislaw Gruszka April 16, 2019, 9:13 a.m. UTC
Disabling tasklets on stopping rx/tx is wrong. If blocked tasklet
is scheduled and we remove device we get 100% cpu usage:

  PID USER      PR  NI    VIRT    RES    SHR S  %CPU  %MEM     TIME+ COMMAND
    9 root      20   0       0      0      0 R  93.8   0.0   1:47.19 ksoftirqd/0

by using memory after free and eventually crash:

[ 2068.591964] RIP: 0010:tasklet_action_common.isra.17+0x66/0x100
[ 2068.591966] Code: 41 89 f5 eb 25 f0 48 0f ba 33 00 0f 83 b1 00 00 00 48 8b 7a 20 48 8b 42 18 e8 56 a3 b5 00 f0 80 23 fd 48 89 ea 48 85 ed 74 53 <48> 8b 2a 48 8d 5a 08 f0 48 0f ba 6a 08 01 72 0b 8b 42 10 85 c0 74
[ 2068.591968] RSP: 0018:ffff98758c34be58 EFLAGS: 00010206
[ 2068.591969] RAX: ffff98758e6966d0 RBX: ffff98756e69aef8 RCX: 0000000000000006
[ 2068.591970] RDX: 01060a053d060305 RSI: 0000000000000006 RDI: ffff98758e6966d0
[ 2068.591971] RBP: 01060a053d060305 R08: 0000000000000000 R09: 00000000000203c0
[ 2068.591971] R10: 000003ff65b34f08 R11: 0000000000000001 R12: ffff98758e6966d0
[ 2068.591972] R13: 0000000000000006 R14: 0000000000000040 R15: 0000000000000006
[ 2068.591974] FS:  0000000000000000(0000) GS:ffff98758e680000(0000) knlGS:0000000000000000
[ 2068.591975] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2068.591975] CR2: 00002c5f73a6cc20 CR3: 00000002f920a001 CR4: 00000000003606e0
[ 2068.591977] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 2068.591978] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 2068.591978] Call Trace:
[ 2068.591985]  __do_softirq+0xe3/0x30a
[ 2068.591989]  ? sort_range+0x20/0x20
[ 2068.591990]  run_ksoftirqd+0x26/0x40
[ 2068.591992]  smpboot_thread_fn+0xc5/0x160
[ 2068.591995]  kthread+0x112/0x130
[ 2068.591997]  ? kthread_create_on_node+0x40/0x40
[ 2068.591998]  ret_from_fork+0x35/0x40
[ 2068.591999] Modules linked in: ccm arc4 fuse rfcomm cmac bnep sunrpc snd_hda_codec_hdmi snd_soc_skl snd_soc_core snd_soc_acpi_intel_match snd_hda_codec_realtek snd_soc_acpi snd_hda_codec_generic snd_soc_skl_ipc snd_soc_sst_ipc snd_soc_sst_dsp snd_hda_ext_core iTCO_wdt snd_hda_intel intel_rapl iTCO_vendor_support x86_pkg_temp_thermal intel_powerclamp btusb mei_wdt coretemp btrtl snd_hda_codec btbcm btintel intel_cstate snd_hwdep intel_uncore uvcvideo snd_hda_core videobuf2_vmalloc videobuf2_memops intel_rapl_perf wmi_bmof videobuf2_v4l2 intel_wmi_thunderbolt snd_seq bluetooth joydev videobuf2_common snd_seq_device snd_pcm videodev media i2c_i801 snd_timer idma64 ecdh_generic intel_lpss_pci intel_lpss mei_me mei ucsi_acpi typec_ucsi processor_thermal_device intel_soc_dts_iosf intel_pch_thermal typec thinkpad_acpi wmi snd soundcore rfkill int3403_thermal int340x_thermal_zone int3400_thermal acpi_thermal_rel acpi_pad pcc_cpufreq uas usb_storage crc32c_intel i915 i2c_algo_bit nvme serio_raw
[ 2068.592033]  drm_kms_helper e1000e nvme_core drm video ipv6 [last unloaded: cfg80211]

Fortunate thing is that this not happen frequently, as scheduling
tasklet on blocked state is very exceptional, though might happen.

Due to different RX/TX tasklet processing fix is different for those.

For RX we have to assure rx_tasklet do fail to resubmit buffers
by poisoning urb's and kill the tasklet.

For TX we need to handle all stop cases properly (suspend, module
unload, device removal).

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
---
 drivers/net/wireless/mediatek/mt76/mac80211.c |  3 +-
 drivers/net/wireless/mediatek/mt76/mt76.h     |  7 +-
 .../net/wireless/mediatek/mt76/mt76x0/usb.c   | 10 +--
 .../net/wireless/mediatek/mt76/mt76x2/usb.c   |  9 +--
 .../wireless/mediatek/mt76/mt76x2/usb_init.c  |  1 -
 .../wireless/mediatek/mt76/mt76x2/usb_main.c  |  1 +
 drivers/net/wireless/mediatek/mt76/usb.c      | 78 +++++++++++++------
 7 files changed, 67 insertions(+), 42 deletions(-)

Comments

Stanislaw Gruszka April 23, 2019, 7:59 a.m. UTC | #1
On Tue, Apr 16, 2019 at 11:13:04AM +0200, Stanislaw Gruszka wrote:
> -EXPORT_SYMBOL_GPL(mt76u_stop_queues);
> +		tasklet_kill(&dev->usb.tx_tasklet);
> +
> +		/* On phisical device removal we do not get tx complete
> +		 * callbacks, need to complete our skb's manually.
> +		 */

FTR: this comment is wrong, we can have queued skb's on device removal,
but not due to not having urb complete callback, but due to kick fail
to submit urb's.

Stanislaw
diff mbox series

Patch

diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index 4b63d061c2a0..613f770ba2f7 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -390,7 +390,7 @@  void mt76_rx(struct mt76_dev *dev, enum mt76_rxq_id q, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(mt76_rx);
 
-static bool mt76_has_tx_pending(struct mt76_dev *dev)
+bool mt76_has_tx_pending(struct mt76_dev *dev)
 {
 	struct mt76_queue *q;
 	int i;
@@ -403,6 +403,7 @@  static bool mt76_has_tx_pending(struct mt76_dev *dev)
 
 	return false;
 }
+EXPORT_SYMBOL_GPL(mt76_has_tx_pending);
 
 void mt76_set_channel(struct mt76_dev *dev)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index f0d34901c825..6250a11af2bf 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -683,6 +683,7 @@  void mt76_release_buffered_frames(struct ieee80211_hw *hw,
 				  u16 tids, int nframes,
 				  enum ieee80211_frame_release_type reason,
 				  bool more_data);
+bool mt76_has_tx_pending(struct mt76_dev *dev);
 void mt76_set_channel(struct mt76_dev *dev);
 int mt76_get_survey(struct ieee80211_hw *hw, int idx,
 		    struct survey_info *survey);
@@ -777,10 +778,10 @@  int mt76u_vendor_request(struct mt76_dev *dev, u8 req,
 void mt76u_single_wr(struct mt76_dev *dev, const u8 req,
 		     const u16 offset, const u32 val);
 int mt76u_init(struct mt76_dev *dev, struct usb_interface *intf);
-int mt76u_submit_rx_buffers(struct mt76_dev *dev);
 int mt76u_alloc_queues(struct mt76_dev *dev);
-void mt76u_stop_queues(struct mt76_dev *dev);
-void mt76u_stop_stat_wk(struct mt76_dev *dev);
+void mt76u_stop_tx(struct mt76_dev *dev);
+void mt76u_stop_rx(struct mt76_dev *dev);
+int mt76u_resume_rx(struct mt76_dev *dev);
 void mt76u_queues_deinit(struct mt76_dev *dev);
 
 struct sk_buff *
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c
index 3276382b2180..22c10722019d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c
@@ -86,7 +86,7 @@  static void mt76x0u_mac_stop(struct mt76x02_dev *dev)
 	clear_bit(MT76_STATE_RUNNING, &dev->mt76.state);
 	cancel_delayed_work_sync(&dev->cal_work);
 	cancel_delayed_work_sync(&dev->mac_work);
-	mt76u_stop_stat_wk(&dev->mt76);
+	mt76u_stop_tx(&dev->mt76);
 	mt76x02u_exit_beacon_config(dev);
 
 	if (test_bit(MT76_REMOVED, &dev->mt76.state))
@@ -313,7 +313,7 @@  static int __maybe_unused mt76x0_suspend(struct usb_interface *usb_intf,
 {
 	struct mt76x02_dev *dev = usb_get_intfdata(usb_intf);
 
-	mt76u_stop_queues(&dev->mt76);
+	mt76u_stop_rx(&dev->mt76);
 	clear_bit(MT76_STATE_MCU_RUNNING, &dev->mt76.state);
 	mt76x0_chip_onoff(dev, false, false);
 
@@ -323,16 +323,12 @@  static int __maybe_unused mt76x0_suspend(struct usb_interface *usb_intf,
 static int __maybe_unused mt76x0_resume(struct usb_interface *usb_intf)
 {
 	struct mt76x02_dev *dev = usb_get_intfdata(usb_intf);
-	struct mt76_usb *usb = &dev->mt76.usb;
 	int ret;
 
-	ret = mt76u_submit_rx_buffers(&dev->mt76);
+	ret = mt76u_resume_rx(&dev->mt76);
 	if (ret < 0)
 		goto err;
 
-	tasklet_enable(&usb->rx_tasklet);
-	tasklet_enable(&usb->tx_tasklet);
-
 	ret = mt76x0u_init_hardware(dev);
 	if (ret)
 		goto err;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
index af90774cae32..7fd437a0b65b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
@@ -107,7 +107,7 @@  static int __maybe_unused mt76x2u_suspend(struct usb_interface *intf,
 {
 	struct mt76x02_dev *dev = usb_get_intfdata(intf);
 
-	mt76u_stop_queues(&dev->mt76);
+	mt76u_stop_rx(&dev->mt76);
 
 	return 0;
 }
@@ -118,13 +118,10 @@  static int __maybe_unused mt76x2u_resume(struct usb_interface *intf)
 	struct mt76_usb *usb = &dev->mt76.usb;
 	int err;
 
-	err = mt76u_submit_rx_buffers(&dev->mt76);
-	if (err < 0)
+	err = mt76u_resume_rx(&dev->mt76);
+	if (err)
 		goto err;
 
-	tasklet_enable(&usb->rx_tasklet);
-	tasklet_enable(&usb->tx_tasklet);
-
 	err = mt76x2u_init_hardware(dev);
 	if (err < 0)
 		goto err;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_init.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_init.c
index 35bdf5ffae00..63834941d167 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_init.c
@@ -244,7 +244,6 @@  int mt76x2u_register_device(struct mt76x02_dev *dev)
 
 void mt76x2u_stop_hw(struct mt76x02_dev *dev)
 {
-	mt76u_stop_stat_wk(&dev->mt76);
 	cancel_delayed_work_sync(&dev->cal_work);
 	cancel_delayed_work_sync(&dev->mac_work);
 	mt76x2u_mac_stop(dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c
index eb414fb75fb1..0771de210c6a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c
@@ -42,6 +42,7 @@  static void mt76x2u_stop(struct ieee80211_hw *hw)
 
 	mutex_lock(&dev->mt76.mutex);
 	clear_bit(MT76_STATE_RUNNING, &dev->mt76.state);
+	mt76u_stop_tx(&dev->mt76);
 	mt76x2u_stop_hw(dev);
 	mutex_unlock(&dev->mt76.mutex);
 }
diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
index a3acc070063a..689ed7a7656c 100644
--- a/drivers/net/wireless/mediatek/mt76/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/usb.c
@@ -540,7 +540,7 @@  static void mt76u_rx_tasklet(unsigned long data)
 	rcu_read_unlock();
 }
 
-int mt76u_submit_rx_buffers(struct mt76_dev *dev)
+static int mt76u_submit_rx_buffers(struct mt76_dev *dev)
 {
 	struct mt76_queue *q = &dev->q_rx[MT_RXQ_MAIN];
 	unsigned long flags;
@@ -558,7 +558,6 @@  int mt76u_submit_rx_buffers(struct mt76_dev *dev)
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(mt76u_submit_rx_buffers);
 
 static int mt76u_alloc_rx(struct mt76_dev *dev)
 {
@@ -605,14 +604,29 @@  static void mt76u_free_rx(struct mt76_dev *dev)
 	memset(&q->rx_page, 0, sizeof(q->rx_page));
 }
 
-static void mt76u_stop_rx(struct mt76_dev *dev)
+void mt76u_stop_rx(struct mt76_dev *dev)
 {
 	struct mt76_queue *q = &dev->q_rx[MT_RXQ_MAIN];
 	int i;
 
 	for (i = 0; i < q->ndesc; i++)
-		usb_kill_urb(q->entry[i].urb);
+		usb_poison_urb(q->entry[i].urb);
+
+	tasklet_kill(&dev->usb.rx_tasklet);
 }
+EXPORT_SYMBOL_GPL(mt76u_stop_rx);
+
+int mt76u_resume_rx(struct mt76_dev *dev)
+{
+	struct mt76_queue *q = &dev->q_rx[MT_RXQ_MAIN];
+	int i;
+
+	for (i = 0; i < q->ndesc; i++)
+		usb_unpoison_urb(q->entry[i].urb);
+
+	return mt76u_submit_rx_buffers(dev);
+}
+EXPORT_SYMBOL_GPL(mt76u_resume_rx);
 
 static void mt76u_tx_tasklet(unsigned long data)
 {
@@ -827,38 +841,54 @@  static void mt76u_free_tx(struct mt76_dev *dev)
 	}
 }
 
-static void mt76u_stop_tx(struct mt76_dev *dev)
+void mt76u_stop_tx(struct mt76_dev *dev)
 {
+	struct mt76_queue_entry entry;
 	struct mt76_queue *q;
-	int i, j;
+	int i, j, ret;
 
-	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
-		q = dev->q_tx[i].q;
-		for (j = 0; j < q->ndesc; j++)
-			usb_kill_urb(q->entry[j].urb);
-	}
-}
+	ret = wait_event_timeout(dev->tx_wait, !mt76_has_tx_pending(dev), HZ/5);
+	if (!ret) {
+		dev_err(dev->dev, "timed out waiting for pending tx\n");
 
-void mt76u_stop_queues(struct mt76_dev *dev)
-{
-	tasklet_disable(&dev->usb.rx_tasklet);
-	tasklet_disable(&dev->usb.tx_tasklet);
+		for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+			q = dev->q_tx[i].q;
+			for (j = 0; j < q->ndesc; j++)
+				usb_kill_urb(q->entry[j].urb);
+		}
 
-	mt76u_stop_rx(dev);
-	mt76u_stop_tx(dev);
-}
-EXPORT_SYMBOL_GPL(mt76u_stop_queues);
+		tasklet_kill(&dev->usb.tx_tasklet);
+
+		/* On phisical device removal we do not get tx complete
+		 * callbacks, need to complete our skb's manually.
+		 */
+		for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+			q = dev->q_tx[i].q;
+
+			/* Assure we are in sync with killed tasklet. */
+			spin_lock_bh(&q->lock);
+			while (q->queued) {
+				entry = q->entry[q->head];
+				q->head = (q->head + 1) % q->ndesc;
+				q->queued--;
+
+				dev->drv->tx_complete_skb(dev, i, &entry);
+			}
+			spin_unlock_bh(&q->lock);
+		}
+	}
 
-void mt76u_stop_stat_wk(struct mt76_dev *dev)
-{
 	cancel_delayed_work_sync(&dev->usb.stat_work);
 	clear_bit(MT76_READING_STATS, &dev->state);
+
+	mt76_tx_status_check(dev, NULL, true);
 }
-EXPORT_SYMBOL_GPL(mt76u_stop_stat_wk);
+EXPORT_SYMBOL_GPL(mt76u_stop_tx);
 
 void mt76u_queues_deinit(struct mt76_dev *dev)
 {
-	mt76u_stop_queues(dev);
+	mt76u_stop_rx(dev);
+	mt76u_stop_tx(dev);
 
 	mt76u_free_rx(dev);
 	mt76u_free_tx(dev);