diff mbox series

[-next] blk-wbt: don't throttle swap writes in direct reclaim

Message ID 20240604030522.3686177-1-libaokun@huaweicloud.com (mailing list archive)
State New, archived
Headers show
Series [-next] blk-wbt: don't throttle swap writes in direct reclaim | expand

Commit Message

Baokun Li June 4, 2024, 3:05 a.m. UTC
From: Baokun Li <libaokun1@huawei.com>

Now we avoid throttling swap writes by determining whether the current
process is kswapd (aka current_is_kswapd()), but swap writes can come
from either kswapd or direct reclaim, so the swap writes from direct
reclaim will still be throttled.

When a process holds a lock to allocate a free page, and enters direct
reclaim because there is no free memory, then it might trigger a hung
due to the wbt throttling that causes other processes to fail to get
the lock.

Both kswapd and direct reclaim set the REQ_SWAP flag, so use REQ_SWAP
instead of current_is_kswapd() to avoid throttling swap writes. Also
renamed WBT_KSWAPD to WBT_SWAP and WBT_RWQ_KSWAPD to WBT_RWQ_SWAP.

Signed-off-by: Baokun Li <libaokun1@huawei.com>
---
 block/blk-wbt.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

Comments

Yu Kuai June 4, 2024, 7:01 a.m. UTC | #1
在 2024/06/04 11:05, libaokun@huaweicloud.com 写道:
> From: Baokun Li <libaokun1@huawei.com>
> 
> Now we avoid throttling swap writes by determining whether the current
> process is kswapd (aka current_is_kswapd()), but swap writes can come
> from either kswapd or direct reclaim, so the swap writes from direct
> reclaim will still be throttled.
> 
> When a process holds a lock to allocate a free page, and enters direct
> reclaim because there is no free memory, then it might trigger a hung
> due to the wbt throttling that causes other processes to fail to get
> the lock.
> 
> Both kswapd and direct reclaim set the REQ_SWAP flag, so use REQ_SWAP
> instead of current_is_kswapd() to avoid throttling swap writes. Also
> renamed WBT_KSWAPD to WBT_SWAP and WBT_RWQ_KSWAPD to WBT_RWQ_SWAP.
> 
> Signed-off-by: Baokun Li <libaokun1@huawei.com>

LGTM
Reviewed-by: Yu Kuai <yukuai3@huawei.com>

> ---
>   block/blk-wbt.c | 18 +++++++++---------
>   1 file changed, 9 insertions(+), 9 deletions(-)
> 
> diff --git a/block/blk-wbt.c b/block/blk-wbt.c
> index 64472134dd26..aaacf2f5b223 100644
> --- a/block/blk-wbt.c
> +++ b/block/blk-wbt.c
> @@ -37,7 +37,7 @@
>   enum wbt_flags {
>   	WBT_TRACKED		= 1,	/* write, tracked for throttling */
>   	WBT_READ		= 2,	/* read */
> -	WBT_KSWAPD		= 4,	/* write, from kswapd */
> +	WBT_SWAP		= 4,	/* write, from swap_writepage() */
>   	WBT_DISCARD		= 8,	/* discard */
>   
>   	WBT_NR_BITS		= 4,	/* number of bits */
> @@ -45,7 +45,7 @@ enum wbt_flags {
>   
>   enum {
>   	WBT_RWQ_BG		= 0,
> -	WBT_RWQ_KSWAPD,
> +	WBT_RWQ_SWAP,
>   	WBT_RWQ_DISCARD,
>   	WBT_NUM_RWQ,
>   };
> @@ -172,8 +172,8 @@ static bool wb_recent_wait(struct rq_wb *rwb)
>   static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
>   					  enum wbt_flags wb_acct)
>   {
> -	if (wb_acct & WBT_KSWAPD)
> -		return &rwb->rq_wait[WBT_RWQ_KSWAPD];
> +	if (wb_acct & WBT_SWAP)
> +		return &rwb->rq_wait[WBT_RWQ_SWAP];
>   	else if (wb_acct & WBT_DISCARD)
>   		return &rwb->rq_wait[WBT_RWQ_DISCARD];
>   
> @@ -528,7 +528,7 @@ static bool close_io(struct rq_wb *rwb)
>   		time_before(now, rwb->last_comp + HZ / 10);
>   }
>   
> -#define REQ_HIPRIO	(REQ_SYNC | REQ_META | REQ_PRIO)
> +#define REQ_HIPRIO	(REQ_SYNC | REQ_META | REQ_PRIO | REQ_SWAP)
>   
>   static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
>   {
> @@ -539,13 +539,13 @@ static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
>   
>   	/*
>   	 * At this point we know it's a buffered write. If this is
> -	 * kswapd trying to free memory, or REQ_SYNC is set, then
> +	 * swap trying to free memory, or REQ_SYNC is set, then
>   	 * it's WB_SYNC_ALL writeback, and we'll use the max limit for
>   	 * that. If the write is marked as a background write, then use
>   	 * the idle limit, or go to normal if we haven't had competing
>   	 * IO for a bit.
>   	 */
> -	if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
> +	if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb))
>   		limit = rwb->rq_depth.max_depth;
>   	else if ((opf & REQ_BACKGROUND) || close_io(rwb)) {
>   		/*
> @@ -622,8 +622,8 @@ static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio)
>   	if (bio_op(bio) == REQ_OP_READ) {
>   		flags = WBT_READ;
>   	} else if (wbt_should_throttle(bio)) {
> -		if (current_is_kswapd())
> -			flags |= WBT_KSWAPD;
> +		if (bio->bi_opf & REQ_SWAP)
> +			flags |= WBT_SWAP;
>   		if (bio_op(bio) == REQ_OP_DISCARD)
>   			flags |= WBT_DISCARD;
>   		flags |= WBT_TRACKED;
>
Baokun Li July 1, 2024, 6:19 a.m. UTC | #2
Friendly ping...

On 2024/6/4 11:05, libaokun@huaweicloud.com wrote:
> From: Baokun Li <libaokun1@huawei.com>
>
> Now we avoid throttling swap writes by determining whether the current
> process is kswapd (aka current_is_kswapd()), but swap writes can come
> from either kswapd or direct reclaim, so the swap writes from direct
> reclaim will still be throttled.
>
> When a process holds a lock to allocate a free page, and enters direct
> reclaim because there is no free memory, then it might trigger a hung
> due to the wbt throttling that causes other processes to fail to get
> the lock.
>
> Both kswapd and direct reclaim set the REQ_SWAP flag, so use REQ_SWAP
> instead of current_is_kswapd() to avoid throttling swap writes. Also
> renamed WBT_KSWAPD to WBT_SWAP and WBT_RWQ_KSWAPD to WBT_RWQ_SWAP.
>
> Signed-off-by: Baokun Li <libaokun1@huawei.com>
> ---
>   block/blk-wbt.c | 18 +++++++++---------
>   1 file changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/block/blk-wbt.c b/block/blk-wbt.c
> index 64472134dd26..aaacf2f5b223 100644
> --- a/block/blk-wbt.c
> +++ b/block/blk-wbt.c
> @@ -37,7 +37,7 @@
>   enum wbt_flags {
>   	WBT_TRACKED		= 1,	/* write, tracked for throttling */
>   	WBT_READ		= 2,	/* read */
> -	WBT_KSWAPD		= 4,	/* write, from kswapd */
> +	WBT_SWAP		= 4,	/* write, from swap_writepage() */
>   	WBT_DISCARD		= 8,	/* discard */
>   
>   	WBT_NR_BITS		= 4,	/* number of bits */
> @@ -45,7 +45,7 @@ enum wbt_flags {
>   
>   enum {
>   	WBT_RWQ_BG		= 0,
> -	WBT_RWQ_KSWAPD,
> +	WBT_RWQ_SWAP,
>   	WBT_RWQ_DISCARD,
>   	WBT_NUM_RWQ,
>   };
> @@ -172,8 +172,8 @@ static bool wb_recent_wait(struct rq_wb *rwb)
>   static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
>   					  enum wbt_flags wb_acct)
>   {
> -	if (wb_acct & WBT_KSWAPD)
> -		return &rwb->rq_wait[WBT_RWQ_KSWAPD];
> +	if (wb_acct & WBT_SWAP)
> +		return &rwb->rq_wait[WBT_RWQ_SWAP];
>   	else if (wb_acct & WBT_DISCARD)
>   		return &rwb->rq_wait[WBT_RWQ_DISCARD];
>   
> @@ -528,7 +528,7 @@ static bool close_io(struct rq_wb *rwb)
>   		time_before(now, rwb->last_comp + HZ / 10);
>   }
>   
> -#define REQ_HIPRIO	(REQ_SYNC | REQ_META | REQ_PRIO)
> +#define REQ_HIPRIO	(REQ_SYNC | REQ_META | REQ_PRIO | REQ_SWAP)
>   
>   static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
>   {
> @@ -539,13 +539,13 @@ static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
>   
>   	/*
>   	 * At this point we know it's a buffered write. If this is
> -	 * kswapd trying to free memory, or REQ_SYNC is set, then
> +	 * swap trying to free memory, or REQ_SYNC is set, then
>   	 * it's WB_SYNC_ALL writeback, and we'll use the max limit for
>   	 * that. If the write is marked as a background write, then use
>   	 * the idle limit, or go to normal if we haven't had competing
>   	 * IO for a bit.
>   	 */
> -	if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
> +	if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb))
>   		limit = rwb->rq_depth.max_depth;
>   	else if ((opf & REQ_BACKGROUND) || close_io(rwb)) {
>   		/*
> @@ -622,8 +622,8 @@ static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio)
>   	if (bio_op(bio) == REQ_OP_READ) {
>   		flags = WBT_READ;
>   	} else if (wbt_should_throttle(bio)) {
> -		if (current_is_kswapd())
> -			flags |= WBT_KSWAPD;
> +		if (bio->bi_opf & REQ_SWAP)
> +			flags |= WBT_SWAP;
>   		if (bio_op(bio) == REQ_OP_DISCARD)
>   			flags |= WBT_DISCARD;
>   		flags |= WBT_TRACKED;
Christoph Hellwig July 1, 2024, 6:38 a.m. UTC | #3
Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
Jens Axboe July 1, 2024, 12:53 p.m. UTC | #4
On Tue, 04 Jun 2024 11:05:22 +0800, libaokun@huaweicloud.com wrote:
> Now we avoid throttling swap writes by determining whether the current
> process is kswapd (aka current_is_kswapd()), but swap writes can come
> from either kswapd or direct reclaim, so the swap writes from direct
> reclaim will still be throttled.
> 
> When a process holds a lock to allocate a free page, and enters direct
> reclaim because there is no free memory, then it might trigger a hung
> due to the wbt throttling that causes other processes to fail to get
> the lock.
> 
> [...]

Applied, thanks!

[1/1] blk-wbt: don't throttle swap writes in direct reclaim
      commit: 4e63aeb5d0101ddada36a2f64f048e2f9d2202fc

Best regards,
diff mbox series

Patch

diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 64472134dd26..aaacf2f5b223 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -37,7 +37,7 @@ 
 enum wbt_flags {
 	WBT_TRACKED		= 1,	/* write, tracked for throttling */
 	WBT_READ		= 2,	/* read */
-	WBT_KSWAPD		= 4,	/* write, from kswapd */
+	WBT_SWAP		= 4,	/* write, from swap_writepage() */
 	WBT_DISCARD		= 8,	/* discard */
 
 	WBT_NR_BITS		= 4,	/* number of bits */
@@ -45,7 +45,7 @@  enum wbt_flags {
 
 enum {
 	WBT_RWQ_BG		= 0,
-	WBT_RWQ_KSWAPD,
+	WBT_RWQ_SWAP,
 	WBT_RWQ_DISCARD,
 	WBT_NUM_RWQ,
 };
@@ -172,8 +172,8 @@  static bool wb_recent_wait(struct rq_wb *rwb)
 static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
 					  enum wbt_flags wb_acct)
 {
-	if (wb_acct & WBT_KSWAPD)
-		return &rwb->rq_wait[WBT_RWQ_KSWAPD];
+	if (wb_acct & WBT_SWAP)
+		return &rwb->rq_wait[WBT_RWQ_SWAP];
 	else if (wb_acct & WBT_DISCARD)
 		return &rwb->rq_wait[WBT_RWQ_DISCARD];
 
@@ -528,7 +528,7 @@  static bool close_io(struct rq_wb *rwb)
 		time_before(now, rwb->last_comp + HZ / 10);
 }
 
-#define REQ_HIPRIO	(REQ_SYNC | REQ_META | REQ_PRIO)
+#define REQ_HIPRIO	(REQ_SYNC | REQ_META | REQ_PRIO | REQ_SWAP)
 
 static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
 {
@@ -539,13 +539,13 @@  static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
 
 	/*
 	 * At this point we know it's a buffered write. If this is
-	 * kswapd trying to free memory, or REQ_SYNC is set, then
+	 * swap trying to free memory, or REQ_SYNC is set, then
 	 * it's WB_SYNC_ALL writeback, and we'll use the max limit for
 	 * that. If the write is marked as a background write, then use
 	 * the idle limit, or go to normal if we haven't had competing
 	 * IO for a bit.
 	 */
-	if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
+	if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb))
 		limit = rwb->rq_depth.max_depth;
 	else if ((opf & REQ_BACKGROUND) || close_io(rwb)) {
 		/*
@@ -622,8 +622,8 @@  static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio)
 	if (bio_op(bio) == REQ_OP_READ) {
 		flags = WBT_READ;
 	} else if (wbt_should_throttle(bio)) {
-		if (current_is_kswapd())
-			flags |= WBT_KSWAPD;
+		if (bio->bi_opf & REQ_SWAP)
+			flags |= WBT_SWAP;
 		if (bio_op(bio) == REQ_OP_DISCARD)
 			flags |= WBT_DISCARD;
 		flags |= WBT_TRACKED;