diff mbox series

[net] r8169: fix deadlock on RTL8125 in jumbo mtu mode

Message ID caf6a487-ef8c-4570-88f9-f47a659faf33@gmail.com (mailing list archive)
State Accepted
Commit 59d395ed606d8df14615712b0cdcdadb2d962175
Delegated to: Netdev Maintainers
Headers show
Series [net] r8169: fix deadlock on RTL8125 in jumbo mtu mode | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/codegen success Generated files up to date
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1115 this patch: 1115
netdev/cc_maintainers success CCed 6 of 6 maintainers
netdev/build_clang success Errors and warnings before: 1142 this patch: 1142
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 1142 this patch: 1142
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Heiner Kallweit Nov. 26, 2023, 6:36 p.m. UTC
The original change results in a deadlock if jumbo mtu mode is used.
Reason is that the phydev lock is held when rtl_reset_work() is called
here, and rtl_jumbo_config() calls phy_start_aneg() which also tries
to acquire the phydev lock. Fix this by calling rtl_reset_work()
asynchronously.

Fixes: 621735f59064 ("r8169: fix rare issue with broken rx after link-down on RTL8125")
Reported-by: Ian Chen <free122448@hotmail.com>
Tested-by: Ian Chen <free122448@hotmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
 drivers/net/ethernet/realtek/r8169_main.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

Comments

Heiner Kallweit Nov. 26, 2023, 8:14 p.m. UTC | #1
On 26.11.2023 19:36, Heiner Kallweit wrote:
> The original change results in a deadlock if jumbo mtu mode is used.
> Reason is that the phydev lock is held when rtl_reset_work() is called
> here, and rtl_jumbo_config() calls phy_start_aneg() which also tries
> to acquire the phydev lock. Fix this by calling rtl_reset_work()
> asynchronously.
> 
> Fixes: 621735f59064 ("r8169: fix rare issue with broken rx after link-down on RTL8125")
> Reported-by: Ian Chen <free122448@hotmail.com>
> Tested-by: Ian Chen <free122448@hotmail.com>
> Cc: stable@vger.kernel.org
> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
> ---
>  drivers/net/ethernet/realtek/r8169_main.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index 0ee3579ce..e32cc3279 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -575,6 +575,7 @@ struct rtl8169_tc_offsets {
>  enum rtl_flag {
>  	RTL_FLAG_TASK_ENABLED = 0,
>  	RTL_FLAG_TASK_RESET_PENDING,
> +	RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE,
>  	RTL_FLAG_TASK_TX_TIMEOUT,
>  	RTL_FLAG_MAX
>  };
> @@ -4494,6 +4495,8 @@ static void rtl_task(struct work_struct *work)
>  reset:
>  		rtl_reset_work(tp);
>  		netif_wake_queue(tp->dev);
> +	} else if (test_and_clear_bit(RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE, tp->wk.flags)) {
> +		rtl_reset_work(tp);
>  	}
>  out_unlock:
>  	rtnl_unlock();
> @@ -4527,7 +4530,7 @@ static void r8169_phylink_handler(struct net_device *ndev)
>  	} else {
>  		/* In few cases rx is broken after link-down otherwise */
>  		if (rtl_is_8125(tp))
> -			rtl_reset_work(tp);
> +			rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE);
>  		pm_runtime_idle(d);
>  	}
>  

I noticed there's a potential issue with my approach.
So I have to rework this, please do not apply.
Heiner Kallweit Nov. 26, 2023, 10:03 p.m. UTC | #2
On 26.11.2023 21:14, Heiner Kallweit wrote:
> On 26.11.2023 19:36, Heiner Kallweit wrote:
>> The original change results in a deadlock if jumbo mtu mode is used.
>> Reason is that the phydev lock is held when rtl_reset_work() is called
>> here, and rtl_jumbo_config() calls phy_start_aneg() which also tries
>> to acquire the phydev lock. Fix this by calling rtl_reset_work()
>> asynchronously.
>>
>> Fixes: 621735f59064 ("r8169: fix rare issue with broken rx after link-down on RTL8125")
>> Reported-by: Ian Chen <free122448@hotmail.com>
>> Tested-by: Ian Chen <free122448@hotmail.com>
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
>> ---
>>  drivers/net/ethernet/realtek/r8169_main.c | 5 ++++-
>>  1 file changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
>> index 0ee3579ce..e32cc3279 100644
>> --- a/drivers/net/ethernet/realtek/r8169_main.c
>> +++ b/drivers/net/ethernet/realtek/r8169_main.c
>> @@ -575,6 +575,7 @@ struct rtl8169_tc_offsets {
>>  enum rtl_flag {
>>  	RTL_FLAG_TASK_ENABLED = 0,
>>  	RTL_FLAG_TASK_RESET_PENDING,
>> +	RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE,
>>  	RTL_FLAG_TASK_TX_TIMEOUT,
>>  	RTL_FLAG_MAX
>>  };
>> @@ -4494,6 +4495,8 @@ static void rtl_task(struct work_struct *work)
>>  reset:
>>  		rtl_reset_work(tp);
>>  		netif_wake_queue(tp->dev);
>> +	} else if (test_and_clear_bit(RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE, tp->wk.flags)) {
>> +		rtl_reset_work(tp);
>>  	}
>>  out_unlock:
>>  	rtnl_unlock();
>> @@ -4527,7 +4530,7 @@ static void r8169_phylink_handler(struct net_device *ndev)
>>  	} else {
>>  		/* In few cases rx is broken after link-down otherwise */
>>  		if (rtl_is_8125(tp))
>> -			rtl_reset_work(tp);
>> +			rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE);
>>  		pm_runtime_idle(d);
>>  	}
>>  
> 
> I noticed there's a potential issue with my approach.
> So I have to rework this, please do not apply.
> 
The mentioned issue has an impact on patch "r8169: prevent potential deadlock
in rtl8169_close" only. The patch here can remain as it is and is ready to be
applied. Sorry for the noise.
patchwork-bot+netdevbpf@kernel.org Nov. 28, 2023, 12:10 p.m. UTC | #3
Hello:

This patch was applied to netdev/net.git (main)
by Paolo Abeni <pabeni@redhat.com>:

On Sun, 26 Nov 2023 19:36:46 +0100 you wrote:
> The original change results in a deadlock if jumbo mtu mode is used.
> Reason is that the phydev lock is held when rtl_reset_work() is called
> here, and rtl_jumbo_config() calls phy_start_aneg() which also tries
> to acquire the phydev lock. Fix this by calling rtl_reset_work()
> asynchronously.
> 
> Fixes: 621735f59064 ("r8169: fix rare issue with broken rx after link-down on RTL8125")
> Reported-by: Ian Chen <free122448@hotmail.com>
> Tested-by: Ian Chen <free122448@hotmail.com>
> Cc: stable@vger.kernel.org
> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
> 
> [...]

Here is the summary with links:
  - [net] r8169: fix deadlock on RTL8125 in jumbo mtu mode
    https://git.kernel.org/netdev/net/c/59d395ed606d

You are awesome, thank you!
diff mbox series

Patch

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 0ee3579ce..e32cc3279 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -575,6 +575,7 @@  struct rtl8169_tc_offsets {
 enum rtl_flag {
 	RTL_FLAG_TASK_ENABLED = 0,
 	RTL_FLAG_TASK_RESET_PENDING,
+	RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE,
 	RTL_FLAG_TASK_TX_TIMEOUT,
 	RTL_FLAG_MAX
 };
@@ -4494,6 +4495,8 @@  static void rtl_task(struct work_struct *work)
 reset:
 		rtl_reset_work(tp);
 		netif_wake_queue(tp->dev);
+	} else if (test_and_clear_bit(RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE, tp->wk.flags)) {
+		rtl_reset_work(tp);
 	}
 out_unlock:
 	rtnl_unlock();
@@ -4527,7 +4530,7 @@  static void r8169_phylink_handler(struct net_device *ndev)
 	} else {
 		/* In few cases rx is broken after link-down otherwise */
 		if (rtl_is_8125(tp))
-			rtl_reset_work(tp);
+			rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE);
 		pm_runtime_idle(d);
 	}