diff mbox series

mm/vmscan: Fix hard LOCKUP in function isolate_lru_folios

Message ID 20240814091825.27262-1-liuye@kylinos.cn (mailing list archive)
State New
Headers show
Series mm/vmscan: Fix hard LOCKUP in function isolate_lru_folios | expand

Commit Message

liuye Aug. 14, 2024, 9:18 a.m. UTC
This fixes the following hard lockup in function isolate_lru_folios
when memory reclaim.If the LRU mostly contains ineligible folios
May trigger watchdog.

watchdog: Watchdog detected hard LOCKUP on cpu 173
RIP: 0010:native_queued_spin_lock_slowpath+0x255/0x2a0
Call Trace:
	_raw_spin_lock_irqsave+0x31/0x40
	folio_lruvec_lock_irqsave+0x5f/0x90
	folio_batch_move_lru+0x91/0x150
	lru_add_drain_per_cpu+0x1c/0x40
	process_one_work+0x17d/0x350
	worker_thread+0x27b/0x3a0
	kthread+0xe8/0x120
	ret_from_fork+0x34/0x50
	ret_from_fork_asm+0x1b/0x30

lruvec->lru_lock owner:

PID: 2865     TASK: ffff888139214d40  CPU: 40   COMMAND: "kswapd0"
 #0 [fffffe0000945e60] crash_nmi_callback at ffffffffa567a555
 #1 [fffffe0000945e68] nmi_handle at ffffffffa563b171
 #2 [fffffe0000945eb0] default_do_nmi at ffffffffa6575920
 #3 [fffffe0000945ed0] exc_nmi at ffffffffa6575af4
 #4 [fffffe0000945ef0] end_repeat_nmi at ffffffffa6601dde
    [exception RIP: isolate_lru_folios+403]
    RIP: ffffffffa597df53  RSP: ffffc90006fb7c28  RFLAGS: 00000002
    RAX: 0000000000000001  RBX: ffffc90006fb7c60  RCX: ffffea04a2196f88
    RDX: ffffc90006fb7c60  RSI: ffffc90006fb7c60  RDI: ffffea04a2197048
    RBP: ffff88812cbd3010   R8: ffffea04a2197008   R9: 0000000000000001
    R10: 0000000000000000  R11: 0000000000000001  R12: ffffea04a2197008
    R13: ffffea04a2197048  R14: ffffc90006fb7de8  R15: 0000000003e3e937
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
    <NMI exception stack>
 #5 [ffffc90006fb7c28] isolate_lru_folios at ffffffffa597df53
 #6 [ffffc90006fb7cf8] shrink_active_list at ffffffffa597f788
 #7 [ffffc90006fb7da8] balance_pgdat at ffffffffa5986db0
 #8 [ffffc90006fb7ec0] kswapd at ffffffffa5987354
 #9 [ffffc90006fb7ef8] kthread at ffffffffa5748238
crash>

Fixes: b2e18757f2c9 ("mm, vmscan: begin reclaiming pages on a per-node basis")
Signed-off-by: liuye <liuye@kylinos.cn>
---
 include/linux/swap.h | 1 +
 mm/vmscan.c          | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

Comments

Andrew Morton Aug. 14, 2024, 9:27 p.m. UTC | #1
On Wed, 14 Aug 2024 17:18:25 +0800 liuye <liuye@kylinos.cn> wrote:

> This fixes the following hard lockup in function isolate_lru_folios
> when memory reclaim.If the LRU mostly contains ineligible folios
> May trigger watchdog.
> 
> watchdog: Watchdog detected hard LOCKUP on cpu 173
> RIP: 0010:native_queued_spin_lock_slowpath+0x255/0x2a0
> Call Trace:
> 	_raw_spin_lock_irqsave+0x31/0x40
> 	folio_lruvec_lock_irqsave+0x5f/0x90
> 	folio_batch_move_lru+0x91/0x150
> 	lru_add_drain_per_cpu+0x1c/0x40
> 	process_one_work+0x17d/0x350
> 	worker_thread+0x27b/0x3a0
> 	kthread+0xe8/0x120
> 	ret_from_fork+0x34/0x50
> 	ret_from_fork_asm+0x1b/0x30
> 
> lruvec->lru_lock owner:
> 
> PID: 2865     TASK: ffff888139214d40  CPU: 40   COMMAND: "kswapd0"
>  #0 [fffffe0000945e60] crash_nmi_callback at ffffffffa567a555
>  #1 [fffffe0000945e68] nmi_handle at ffffffffa563b171
>  #2 [fffffe0000945eb0] default_do_nmi at ffffffffa6575920
>  #3 [fffffe0000945ed0] exc_nmi at ffffffffa6575af4
>  #4 [fffffe0000945ef0] end_repeat_nmi at ffffffffa6601dde
>     [exception RIP: isolate_lru_folios+403]
>     RIP: ffffffffa597df53  RSP: ffffc90006fb7c28  RFLAGS: 00000002
>     RAX: 0000000000000001  RBX: ffffc90006fb7c60  RCX: ffffea04a2196f88
>     RDX: ffffc90006fb7c60  RSI: ffffc90006fb7c60  RDI: ffffea04a2197048
>     RBP: ffff88812cbd3010   R8: ffffea04a2197008   R9: 0000000000000001
>     R10: 0000000000000000  R11: 0000000000000001  R12: ffffea04a2197008
>     R13: ffffea04a2197048  R14: ffffc90006fb7de8  R15: 0000000003e3e937
>     ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
>     <NMI exception stack>
>  #5 [ffffc90006fb7c28] isolate_lru_folios at ffffffffa597df53
>  #6 [ffffc90006fb7cf8] shrink_active_list at ffffffffa597f788
>  #7 [ffffc90006fb7da8] balance_pgdat at ffffffffa5986db0
>  #8 [ffffc90006fb7ec0] kswapd at ffffffffa5987354
>  #9 [ffffc90006fb7ef8] kthread at ffffffffa5748238
> crash>

Well that's bad.

> Fixes: b2e18757f2c9 ("mm, vmscan: begin reclaiming pages on a per-node basis")

Merged in 2016.

Can you please describe how to reproduce this?  Under what circumstances
does it occur?  Why do you think it took eight years to be discovered?

> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -1655,6 +1655,7 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
>  	unsigned long nr_skipped[MAX_NR_ZONES] = { 0, };
>  	unsigned long skipped = 0;
>  	unsigned long scan, total_scan, nr_pages;
> +	unsigned long max_nr_skipped = 0;
>  	LIST_HEAD(folios_skipped);
>  
>  	total_scan = 0;
> @@ -1669,10 +1670,12 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
>  		nr_pages = folio_nr_pages(folio);
>  		total_scan += nr_pages;
>  
> -		if (folio_zonenum(folio) > sc->reclaim_idx ||
> -				skip_cma(folio, sc)) {
> +		/* Using max_nr_skipped to prevent hard LOCKUP*/
> +		if ((max_nr_skipped < SWAP_CLUSTER_MAX_SKIPPED) &&
> +			(folio_zonenum(folio) > sc->reclaim_idx || skip_cma(folio, sc))) {
>  			nr_skipped[folio_zonenum(folio)] += nr_pages;
>  			move_to = &folios_skipped;
> +			max_nr_skipped++;
>  			goto move;
>  		}

It looks like that will fix, but perhaps something more fundamental
needs to be done - we're doing a tremendous amount of pretty pointless
work here.  Answers to my above questions will help us resolve this.

Thanks.
Andrew Morton Sept. 25, 2024, 12:22 a.m. UTC | #2
On Wed, 14 Aug 2024 17:18:25 +0800 liuye <liuye@kylinos.cn> wrote:

> @@ -1669,10 +1670,12 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
>  		nr_pages = folio_nr_pages(folio);
>  		total_scan += nr_pages;
>  
> -		if (folio_zonenum(folio) > sc->reclaim_idx ||
> -				skip_cma(folio, sc)) {
> +		/* Using max_nr_skipped to prevent hard LOCKUP*/
> +		if ((max_nr_skipped < SWAP_CLUSTER_MAX_SKIPPED) &&
> +			(folio_zonenum(folio) > sc->reclaim_idx || skip_cma(folio, sc))) {
>  			nr_skipped[folio_zonenum(folio)] += nr_pages;
>  			move_to = &folios_skipped;
> +			max_nr_skipped++;
>  			goto move;

This hunk is not applicable to current mainline.
liuye Sept. 25, 2024, 8:37 a.m. UTC | #3
On 2024/9/25 上午8:22, Andrew Morton wrote:
> On Wed, 14 Aug 2024 17:18:25 +0800 liuye <liuye@kylinos.cn> wrote:
> 
>> @@ -1669,10 +1670,12 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
>>  		nr_pages = folio_nr_pages(folio);
>>  		total_scan += nr_pages;
>>  
>> -		if (folio_zonenum(folio) > sc->reclaim_idx ||
>> -				skip_cma(folio, sc)) {
>> +		/* Using max_nr_skipped to prevent hard LOCKUP*/
>> +		if ((max_nr_skipped < SWAP_CLUSTER_MAX_SKIPPED) &&
>> +			(folio_zonenum(folio) > sc->reclaim_idx || skip_cma(folio, sc))) {
>>  			nr_skipped[folio_zonenum(folio)] += nr_pages;
>>  			move_to = &folios_skipped;
>> +			max_nr_skipped++;
>>  			goto move;
> 
> This hunk is not applicable to current mainline.
> 

Please see the PATCH v2 in link [1], and the related discussion in link [2].
Then please explain why it is not applicable,thank you.

[1]:https://lore.kernel.org/all/20240919021443.9170-1-liuye@kylinos.cn/
[2]:https://lore.kernel.org/all/e878653e-d380-81c2-90a8-fd2d1d4e7287@kylinos.cn/

Thanks,
liuye
Andrew Morton Sept. 25, 2024, 9:29 a.m. UTC | #4
On Wed, 25 Sep 2024 16:37:14 +0800 liuye <liuye@kylinos.cn> wrote:

> 
> 
> On 2024/9/25 上午8:22, Andrew Morton wrote:
> > On Wed, 14 Aug 2024 17:18:25 +0800 liuye <liuye@kylinos.cn> wrote:
> > 
> >> @@ -1669,10 +1670,12 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
> >>  		nr_pages = folio_nr_pages(folio);
> >>  		total_scan += nr_pages;
> >>  
> >> -		if (folio_zonenum(folio) > sc->reclaim_idx ||
> >> -				skip_cma(folio, sc)) {
> >> +		/* Using max_nr_skipped to prevent hard LOCKUP*/
> >> +		if ((max_nr_skipped < SWAP_CLUSTER_MAX_SKIPPED) &&
> >> +			(folio_zonenum(folio) > sc->reclaim_idx || skip_cma(folio, sc))) {
> >>  			nr_skipped[folio_zonenum(folio)] += nr_pages;
> >>  			move_to = &folios_skipped;
> >> +			max_nr_skipped++;
> >>  			goto move;
> > 
> > This hunk is not applicable to current mainline.
> > 
> 
> Please see the PATCH v2 in link [1], and the related discussion in link [2].
> Then please explain why it is not applicable,thank you.

What I mean is that the patch doesn't apply.

Current mainline has

		if (folio_zonenum(folio) > sc->reclaim_idx) {
			nr_skipped[folio_zonenum(folio)] += nr_pages;
			move_to = &folios_skipped;
			goto move;
		}
liuye Sept. 25, 2024, 9:53 a.m. UTC | #5
On 2024/9/25 下午5:29, Andrew Morton wrote:
> On Wed, 25 Sep 2024 16:37:14 +0800 liuye <liuye@kylinos.cn> wrote:
> 
>>
>>
>> On 2024/9/25 上午8:22, Andrew Morton wrote:
>>> On Wed, 14 Aug 2024 17:18:25 +0800 liuye <liuye@kylinos.cn> wrote:
>>>
>>>> @@ -1669,10 +1670,12 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
>>>>  		nr_pages = folio_nr_pages(folio);
>>>>  		total_scan += nr_pages;
>>>>  
>>>> -		if (folio_zonenum(folio) > sc->reclaim_idx ||
>>>> -				skip_cma(folio, sc)) {
>>>> +		/* Using max_nr_skipped to prevent hard LOCKUP*/
>>>> +		if ((max_nr_skipped < SWAP_CLUSTER_MAX_SKIPPED) &&
>>>> +			(folio_zonenum(folio) > sc->reclaim_idx || skip_cma(folio, sc))) {
>>>>  			nr_skipped[folio_zonenum(folio)] += nr_pages;
>>>>  			move_to = &folios_skipped;
>>>> +			max_nr_skipped++;
>>>>  			goto move;
>>>
>>> This hunk is not applicable to current mainline.
>>>
>>
>> Please see the PATCH v2 in link [1], and the related discussion in link [2].
>> Then please explain why it is not applicable,thank you.
> 
> What I mean is that the patch doesn't apply.
> 
> Current mainline has
> 
> 		if (folio_zonenum(folio) > sc->reclaim_idx) {
> 			nr_skipped[folio_zonenum(folio)] += nr_pages;
> 			move_to = &folios_skipped;
> 			goto move;
> 		}
> 

PATCH v2 base on current mainline.

@@ -1650,9 +1651,12 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
 		nr_pages = folio_nr_pages(folio);
 		total_scan += nr_pages;
 
-		if (folio_zonenum(folio) > sc->reclaim_idx) {
+		/* Using max_nr_skipped to prevent hard LOCKUP*/
+		if (max_nr_skipped < SWAP_CLUSTER_MAX_SKIPPED &&
+		    (folio_zonenum(folio) > sc->reclaim_idx)) {
 			nr_skipped[folio_zonenum(folio)] += nr_pages;
 			move_to = &folios_skipped;
+			max_nr_skipped++;
 			goto move;
 		}
diff mbox series

Patch

diff --git a/include/linux/swap.h b/include/linux/swap.h
index ba7ea95d1c57..afb3274c90ef 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -223,6 +223,7 @@  enum {
 };
 
 #define SWAP_CLUSTER_MAX 32UL
+#define SWAP_CLUSTER_MAX_SKIPPED (SWAP_CLUSTER_MAX << 10)
 #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
 
 /* Bit flag in swap_map */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cfa839284b92..02a8f86d4883 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1655,6 +1655,7 @@  static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
 	unsigned long nr_skipped[MAX_NR_ZONES] = { 0, };
 	unsigned long skipped = 0;
 	unsigned long scan, total_scan, nr_pages;
+	unsigned long max_nr_skipped = 0;
 	LIST_HEAD(folios_skipped);
 
 	total_scan = 0;
@@ -1669,10 +1670,12 @@  static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
 		nr_pages = folio_nr_pages(folio);
 		total_scan += nr_pages;
 
-		if (folio_zonenum(folio) > sc->reclaim_idx ||
-				skip_cma(folio, sc)) {
+		/* Using max_nr_skipped to prevent hard LOCKUP*/
+		if ((max_nr_skipped < SWAP_CLUSTER_MAX_SKIPPED) &&
+			(folio_zonenum(folio) > sc->reclaim_idx || skip_cma(folio, sc))) {
 			nr_skipped[folio_zonenum(folio)] += nr_pages;
 			move_to = &folios_skipped;
+			max_nr_skipped++;
 			goto move;
 		}