diff mbox series

[v2] wifi: ath11k: fix boot failure with one MSI vector

Message ID 20230907015606.16297-1-quic_bqiang@quicinc.com (mailing list archive)
State Accepted
Commit 39564b475ac5a589e6c22c43a08cbd283c295d2c
Delegated to: Kalle Valo
Headers show
Series [v2] wifi: ath11k: fix boot failure with one MSI vector | expand

Commit Message

Baochen Qiang Sept. 7, 2023, 1:56 a.m. UTC
Commit 5b32b6dd96633 ("ath11k: Remove core PCI references from
PCI common code") breaks with one MSI vector because it moves
affinity setting after IRQ request, see below log:

[ 1417.278835] ath11k_pci 0000:02:00.0: failed to receive control response completion, polling..
[ 1418.302829] ath11k_pci 0000:02:00.0: Service connect timeout
[ 1418.302833] ath11k_pci 0000:02:00.0: failed to connect to HTT: -110
[ 1418.303669] ath11k_pci 0000:02:00.0: failed to start core: -110

The detail is, if do affinity request after IRQ activated,
which is done in request_irq(), kernel caches that request and
returns success directly. Later when a subsequent MHI interrupt is
fired, kernel will do the real affinity setting work, as a result,
changs the MSI vector. However at that time host has configured
old vector to hardware, so host never receives CE or DP interrupts.

Fix it by setting affinity before registering MHI controller
where host is, for the first time, doing IRQ request.

Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3

Fixes: 5b32b6dd9663 ("ath11k: Remove core PCI references from PCI common code")
Signed-off-by: Baochen Qiang <quic_bqiang@quicinc.com>
---
v2:
 rebase on latest ath.git due to v1 apply failure.

 drivers/net/wireless/ath/ath11k/pci.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)


base-commit: 0263687f4441d5a5eab8074d56b4693c8f0acf85

Comments

Jeff Johnson Sept. 7, 2023, 5:06 a.m. UTC | #1
On 9/6/2023 6:56 PM, Baochen Qiang wrote:
> Commit 5b32b6dd96633 ("ath11k: Remove core PCI references from
> PCI common code") breaks with one MSI vector because it moves
> affinity setting after IRQ request, see below log:
> 
> [ 1417.278835] ath11k_pci 0000:02:00.0: failed to receive control response completion, polling..
> [ 1418.302829] ath11k_pci 0000:02:00.0: Service connect timeout
> [ 1418.302833] ath11k_pci 0000:02:00.0: failed to connect to HTT: -110
> [ 1418.303669] ath11k_pci 0000:02:00.0: failed to start core: -110
> 
> The detail is, if do affinity request after IRQ activated,
> which is done in request_irq(), kernel caches that request and
> returns success directly. Later when a subsequent MHI interrupt is
> fired, kernel will do the real affinity setting work, as a result,
> changs the MSI vector. However at that time host has configured
> old vector to hardware, so host never receives CE or DP interrupts.
> 
> Fix it by setting affinity before registering MHI controller
> where host is, for the first time, doing IRQ request.
> 
> Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3

Tested-on: WCN6855 hw2.1 PCI 
WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.23
Tested-on: WCN6750 hw1.0 AHB WLAN.MSL.1.0.1-01160-QCAMSLSWPLZ-1

> 
> Fixes: 5b32b6dd9663 ("ath11k: Remove core PCI references from PCI common code")
> Signed-off-by: Baochen Qiang <quic_bqiang@quicinc.com>

Acked-by: Jeff Johnson <quic_jjohnson@quicinc.com>

> ---
> v2:
>   rebase on latest ath.git due to v1 apply failure.
> 
>   drivers/net/wireless/ath/ath11k/pci.c | 24 ++++++++++++------------
>   1 file changed, 12 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
> index 9573bd959cac..aa049593f9b5 100644
> --- a/drivers/net/wireless/ath/ath11k/pci.c
> +++ b/drivers/net/wireless/ath/ath11k/pci.c
> @@ -852,10 +852,16 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
>   	if (ret)
>   		goto err_pci_disable_msi;
>   
> +	ret = ath11k_pci_set_irq_affinity_hint(ab_pci, cpumask_of(0));
> +	if (ret) {
> +		ath11k_err(ab, "failed to set irq affinity %d\n", ret);
> +		goto err_pci_disable_msi;
> +	}
> +
>   	ret = ath11k_mhi_register(ab_pci);
>   	if (ret) {
>   		ath11k_err(ab, "failed to register mhi: %d\n", ret);
> -		goto err_pci_disable_msi;
> +		goto err_irq_affinity_cleanup;
>   	}
>   
>   	ret = ath11k_hal_srng_init(ab);
> @@ -876,12 +882,6 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
>   		goto err_ce_free;
>   	}
>   
> -	ret = ath11k_pci_set_irq_affinity_hint(ab_pci, cpumask_of(0));
> -	if (ret) {
> -		ath11k_err(ab, "failed to set irq affinity %d\n", ret);
> -		goto err_free_irq;
> -	}
> -
>   	/* kernel may allocate a dummy vector before request_irq and
>   	 * then allocate a real vector when request_irq is called.
>   	 * So get msi_data here again to avoid spurious interrupt
> @@ -890,20 +890,17 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
>   	ret = ath11k_pci_config_msi_data(ab_pci);
>   	if (ret) {
>   		ath11k_err(ab, "failed to config msi_data: %d\n", ret);
> -		goto err_irq_affinity_cleanup;
> +		goto err_free_irq;
>   	}
>   
>   	ret = ath11k_core_init(ab);
>   	if (ret) {
>   		ath11k_err(ab, "failed to init core: %d\n", ret);
> -		goto err_irq_affinity_cleanup;
> +		goto err_free_irq;
>   	}
>   	ath11k_qmi_fwreset_from_cold_boot(ab);
>   	return 0;
>   
> -err_irq_affinity_cleanup:
> -	ath11k_pci_set_irq_affinity_hint(ab_pci, NULL);
> -
>   err_free_irq:
>   	ath11k_pcic_free_irq(ab);
>   
> @@ -916,6 +913,9 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
>   err_mhi_unregister:
>   	ath11k_mhi_unregister(ab_pci);
>   
> +err_irq_affinity_cleanup:
> +	ath11k_pci_set_irq_affinity_hint(ab_pci, NULL);
> +
>   err_pci_disable_msi:
>   	ath11k_pci_free_msi(ab_pci);
>   
> 
> base-commit: 0263687f4441d5a5eab8074d56b4693c8f0acf85
Kalle Valo Sept. 7, 2023, 5:40 a.m. UTC | #2
Jeff Johnson <quic_jjohnson@quicinc.com> writes:

> On 9/6/2023 6:56 PM, Baochen Qiang wrote:
>> Commit 5b32b6dd96633 ("ath11k: Remove core PCI references from
>> PCI common code") breaks with one MSI vector because it moves
>> affinity setting after IRQ request, see below log:
>> [ 1417.278835] ath11k_pci 0000:02:00.0: failed to receive control
>> response completion, polling..
>> [ 1418.302829] ath11k_pci 0000:02:00.0: Service connect timeout
>> [ 1418.302833] ath11k_pci 0000:02:00.0: failed to connect to HTT: -110
>> [ 1418.303669] ath11k_pci 0000:02:00.0: failed to start core: -110
>> The detail is, if do affinity request after IRQ activated,
>> which is done in request_irq(), kernel caches that request and
>> returns success directly. Later when a subsequent MHI interrupt is
>> fired, kernel will do the real affinity setting work, as a result,
>> changs the MSI vector. However at that time host has configured
>> old vector to hardware, so host never receives CE or DP interrupts.
>> Fix it by setting affinity before registering MHI controller
>> where host is, for the first time, doing IRQ request.
>> Tested-on: WCN6855 hw2.0 PCI
>> WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3
>
> Tested-on: WCN6855 hw2.1 PCI
> WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.23
> Tested-on: WCN6750 hw1.0 AHB WLAN.MSL.1.0.1-01160-QCAMSLSWPLZ-1

I can fix that in the pending branch.
Kalle Valo Sept. 21, 2023, 8:12 a.m. UTC | #3
Baochen Qiang <quic_bqiang@quicinc.com> wrote:

> Commit 5b32b6dd96633 ("ath11k: Remove core PCI references from
> PCI common code") breaks with one MSI vector because it moves
> affinity setting after IRQ request, see below log:
> 
> [ 1417.278835] ath11k_pci 0000:02:00.0: failed to receive control response completion, polling..
> [ 1418.302829] ath11k_pci 0000:02:00.0: Service connect timeout
> [ 1418.302833] ath11k_pci 0000:02:00.0: failed to connect to HTT: -110
> [ 1418.303669] ath11k_pci 0000:02:00.0: failed to start core: -110
> 
> The detail is, if do affinity request after IRQ activated,
> which is done in request_irq(), kernel caches that request and
> returns success directly. Later when a subsequent MHI interrupt is
> fired, kernel will do the real affinity setting work, as a result,
> changs the MSI vector. However at that time host has configured
> old vector to hardware, so host never receives CE or DP interrupts.
> 
> Fix it by setting affinity before registering MHI controller
> where host is, for the first time, doing IRQ request.
> 
> Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3
> Tested-on: WCN6855 hw2.1 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.23
> Tested-on: WCN6750 hw1.0 AHB WLAN.MSL.1.0.1-01160-QCAMSLSWPLZ-1
> 
> Fixes: 5b32b6dd9663 ("ath11k: Remove core PCI references from PCI common code")
> Signed-off-by: Baochen Qiang <quic_bqiang@quicinc.com>
> Acked-by: Jeff Johnson <quic_jjohnson@quicinc.com>
> Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>

Patch applied to ath-next branch of ath.git, thanks.

39564b475ac5 wifi: ath11k: fix boot failure with one MSI vector
diff mbox series

Patch

diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
index 9573bd959cac..aa049593f9b5 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
@@ -852,10 +852,16 @@  static int ath11k_pci_probe(struct pci_dev *pdev,
 	if (ret)
 		goto err_pci_disable_msi;
 
+	ret = ath11k_pci_set_irq_affinity_hint(ab_pci, cpumask_of(0));
+	if (ret) {
+		ath11k_err(ab, "failed to set irq affinity %d\n", ret);
+		goto err_pci_disable_msi;
+	}
+
 	ret = ath11k_mhi_register(ab_pci);
 	if (ret) {
 		ath11k_err(ab, "failed to register mhi: %d\n", ret);
-		goto err_pci_disable_msi;
+		goto err_irq_affinity_cleanup;
 	}
 
 	ret = ath11k_hal_srng_init(ab);
@@ -876,12 +882,6 @@  static int ath11k_pci_probe(struct pci_dev *pdev,
 		goto err_ce_free;
 	}
 
-	ret = ath11k_pci_set_irq_affinity_hint(ab_pci, cpumask_of(0));
-	if (ret) {
-		ath11k_err(ab, "failed to set irq affinity %d\n", ret);
-		goto err_free_irq;
-	}
-
 	/* kernel may allocate a dummy vector before request_irq and
 	 * then allocate a real vector when request_irq is called.
 	 * So get msi_data here again to avoid spurious interrupt
@@ -890,20 +890,17 @@  static int ath11k_pci_probe(struct pci_dev *pdev,
 	ret = ath11k_pci_config_msi_data(ab_pci);
 	if (ret) {
 		ath11k_err(ab, "failed to config msi_data: %d\n", ret);
-		goto err_irq_affinity_cleanup;
+		goto err_free_irq;
 	}
 
 	ret = ath11k_core_init(ab);
 	if (ret) {
 		ath11k_err(ab, "failed to init core: %d\n", ret);
-		goto err_irq_affinity_cleanup;
+		goto err_free_irq;
 	}
 	ath11k_qmi_fwreset_from_cold_boot(ab);
 	return 0;
 
-err_irq_affinity_cleanup:
-	ath11k_pci_set_irq_affinity_hint(ab_pci, NULL);
-
 err_free_irq:
 	ath11k_pcic_free_irq(ab);
 
@@ -916,6 +913,9 @@  static int ath11k_pci_probe(struct pci_dev *pdev,
 err_mhi_unregister:
 	ath11k_mhi_unregister(ab_pci);
 
+err_irq_affinity_cleanup:
+	ath11k_pci_set_irq_affinity_hint(ab_pci, NULL);
+
 err_pci_disable_msi:
 	ath11k_pci_free_msi(ab_pci);