Message ID | 20240604030522.3686177-1-libaokun@huaweicloud.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [-next] blk-wbt: don't throttle swap writes in direct reclaim | expand |
在 2024/06/04 11:05, libaokun@huaweicloud.com 写道: > From: Baokun Li <libaokun1@huawei.com> > > Now we avoid throttling swap writes by determining whether the current > process is kswapd (aka current_is_kswapd()), but swap writes can come > from either kswapd or direct reclaim, so the swap writes from direct > reclaim will still be throttled. > > When a process holds a lock to allocate a free page, and enters direct > reclaim because there is no free memory, then it might trigger a hung > due to the wbt throttling that causes other processes to fail to get > the lock. > > Both kswapd and direct reclaim set the REQ_SWAP flag, so use REQ_SWAP > instead of current_is_kswapd() to avoid throttling swap writes. Also > renamed WBT_KSWAPD to WBT_SWAP and WBT_RWQ_KSWAPD to WBT_RWQ_SWAP. > > Signed-off-by: Baokun Li <libaokun1@huawei.com> LGTM Reviewed-by: Yu Kuai <yukuai3@huawei.com> > --- > block/blk-wbt.c | 18 +++++++++--------- > 1 file changed, 9 insertions(+), 9 deletions(-) > > diff --git a/block/blk-wbt.c b/block/blk-wbt.c > index 64472134dd26..aaacf2f5b223 100644 > --- a/block/blk-wbt.c > +++ b/block/blk-wbt.c > @@ -37,7 +37,7 @@ > enum wbt_flags { > WBT_TRACKED = 1, /* write, tracked for throttling */ > WBT_READ = 2, /* read */ > - WBT_KSWAPD = 4, /* write, from kswapd */ > + WBT_SWAP = 4, /* write, from swap_writepage() */ > WBT_DISCARD = 8, /* discard */ > > WBT_NR_BITS = 4, /* number of bits */ > @@ -45,7 +45,7 @@ enum wbt_flags { > > enum { > WBT_RWQ_BG = 0, > - WBT_RWQ_KSWAPD, > + WBT_RWQ_SWAP, > WBT_RWQ_DISCARD, > WBT_NUM_RWQ, > }; > @@ -172,8 +172,8 @@ static bool wb_recent_wait(struct rq_wb *rwb) > static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, > enum wbt_flags wb_acct) > { > - if (wb_acct & WBT_KSWAPD) > - return &rwb->rq_wait[WBT_RWQ_KSWAPD]; > + if (wb_acct & WBT_SWAP) > + return &rwb->rq_wait[WBT_RWQ_SWAP]; > else if (wb_acct & WBT_DISCARD) > return &rwb->rq_wait[WBT_RWQ_DISCARD]; > > @@ -528,7 +528,7 @@ static bool close_io(struct rq_wb *rwb) > time_before(now, rwb->last_comp + HZ / 10); > } > > -#define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO) > +#define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO | REQ_SWAP) > > static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf) > { > @@ -539,13 +539,13 @@ static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf) > > /* > * At this point we know it's a buffered write. If this is > - * kswapd trying to free memory, or REQ_SYNC is set, then > + * swap trying to free memory, or REQ_SYNC is set, then > * it's WB_SYNC_ALL writeback, and we'll use the max limit for > * that. If the write is marked as a background write, then use > * the idle limit, or go to normal if we haven't had competing > * IO for a bit. > */ > - if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd()) > + if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb)) > limit = rwb->rq_depth.max_depth; > else if ((opf & REQ_BACKGROUND) || close_io(rwb)) { > /* > @@ -622,8 +622,8 @@ static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio) > if (bio_op(bio) == REQ_OP_READ) { > flags = WBT_READ; > } else if (wbt_should_throttle(bio)) { > - if (current_is_kswapd()) > - flags |= WBT_KSWAPD; > + if (bio->bi_opf & REQ_SWAP) > + flags |= WBT_SWAP; > if (bio_op(bio) == REQ_OP_DISCARD) > flags |= WBT_DISCARD; > flags |= WBT_TRACKED; >
Friendly ping... On 2024/6/4 11:05, libaokun@huaweicloud.com wrote: > From: Baokun Li <libaokun1@huawei.com> > > Now we avoid throttling swap writes by determining whether the current > process is kswapd (aka current_is_kswapd()), but swap writes can come > from either kswapd or direct reclaim, so the swap writes from direct > reclaim will still be throttled. > > When a process holds a lock to allocate a free page, and enters direct > reclaim because there is no free memory, then it might trigger a hung > due to the wbt throttling that causes other processes to fail to get > the lock. > > Both kswapd and direct reclaim set the REQ_SWAP flag, so use REQ_SWAP > instead of current_is_kswapd() to avoid throttling swap writes. Also > renamed WBT_KSWAPD to WBT_SWAP and WBT_RWQ_KSWAPD to WBT_RWQ_SWAP. > > Signed-off-by: Baokun Li <libaokun1@huawei.com> > --- > block/blk-wbt.c | 18 +++++++++--------- > 1 file changed, 9 insertions(+), 9 deletions(-) > > diff --git a/block/blk-wbt.c b/block/blk-wbt.c > index 64472134dd26..aaacf2f5b223 100644 > --- a/block/blk-wbt.c > +++ b/block/blk-wbt.c > @@ -37,7 +37,7 @@ > enum wbt_flags { > WBT_TRACKED = 1, /* write, tracked for throttling */ > WBT_READ = 2, /* read */ > - WBT_KSWAPD = 4, /* write, from kswapd */ > + WBT_SWAP = 4, /* write, from swap_writepage() */ > WBT_DISCARD = 8, /* discard */ > > WBT_NR_BITS = 4, /* number of bits */ > @@ -45,7 +45,7 @@ enum wbt_flags { > > enum { > WBT_RWQ_BG = 0, > - WBT_RWQ_KSWAPD, > + WBT_RWQ_SWAP, > WBT_RWQ_DISCARD, > WBT_NUM_RWQ, > }; > @@ -172,8 +172,8 @@ static bool wb_recent_wait(struct rq_wb *rwb) > static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, > enum wbt_flags wb_acct) > { > - if (wb_acct & WBT_KSWAPD) > - return &rwb->rq_wait[WBT_RWQ_KSWAPD]; > + if (wb_acct & WBT_SWAP) > + return &rwb->rq_wait[WBT_RWQ_SWAP]; > else if (wb_acct & WBT_DISCARD) > return &rwb->rq_wait[WBT_RWQ_DISCARD]; > > @@ -528,7 +528,7 @@ static bool close_io(struct rq_wb *rwb) > time_before(now, rwb->last_comp + HZ / 10); > } > > -#define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO) > +#define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO | REQ_SWAP) > > static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf) > { > @@ -539,13 +539,13 @@ static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf) > > /* > * At this point we know it's a buffered write. If this is > - * kswapd trying to free memory, or REQ_SYNC is set, then > + * swap trying to free memory, or REQ_SYNC is set, then > * it's WB_SYNC_ALL writeback, and we'll use the max limit for > * that. If the write is marked as a background write, then use > * the idle limit, or go to normal if we haven't had competing > * IO for a bit. > */ > - if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd()) > + if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb)) > limit = rwb->rq_depth.max_depth; > else if ((opf & REQ_BACKGROUND) || close_io(rwb)) { > /* > @@ -622,8 +622,8 @@ static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio) > if (bio_op(bio) == REQ_OP_READ) { > flags = WBT_READ; > } else if (wbt_should_throttle(bio)) { > - if (current_is_kswapd()) > - flags |= WBT_KSWAPD; > + if (bio->bi_opf & REQ_SWAP) > + flags |= WBT_SWAP; > if (bio_op(bio) == REQ_OP_DISCARD) > flags |= WBT_DISCARD; > flags |= WBT_TRACKED;
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
On Tue, 04 Jun 2024 11:05:22 +0800, libaokun@huaweicloud.com wrote: > Now we avoid throttling swap writes by determining whether the current > process is kswapd (aka current_is_kswapd()), but swap writes can come > from either kswapd or direct reclaim, so the swap writes from direct > reclaim will still be throttled. > > When a process holds a lock to allocate a free page, and enters direct > reclaim because there is no free memory, then it might trigger a hung > due to the wbt throttling that causes other processes to fail to get > the lock. > > [...] Applied, thanks! [1/1] blk-wbt: don't throttle swap writes in direct reclaim commit: 4e63aeb5d0101ddada36a2f64f048e2f9d2202fc Best regards,
diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 64472134dd26..aaacf2f5b223 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -37,7 +37,7 @@ enum wbt_flags { WBT_TRACKED = 1, /* write, tracked for throttling */ WBT_READ = 2, /* read */ - WBT_KSWAPD = 4, /* write, from kswapd */ + WBT_SWAP = 4, /* write, from swap_writepage() */ WBT_DISCARD = 8, /* discard */ WBT_NR_BITS = 4, /* number of bits */ @@ -45,7 +45,7 @@ enum wbt_flags { enum { WBT_RWQ_BG = 0, - WBT_RWQ_KSWAPD, + WBT_RWQ_SWAP, WBT_RWQ_DISCARD, WBT_NUM_RWQ, }; @@ -172,8 +172,8 @@ static bool wb_recent_wait(struct rq_wb *rwb) static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, enum wbt_flags wb_acct) { - if (wb_acct & WBT_KSWAPD) - return &rwb->rq_wait[WBT_RWQ_KSWAPD]; + if (wb_acct & WBT_SWAP) + return &rwb->rq_wait[WBT_RWQ_SWAP]; else if (wb_acct & WBT_DISCARD) return &rwb->rq_wait[WBT_RWQ_DISCARD]; @@ -528,7 +528,7 @@ static bool close_io(struct rq_wb *rwb) time_before(now, rwb->last_comp + HZ / 10); } -#define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO) +#define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO | REQ_SWAP) static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf) { @@ -539,13 +539,13 @@ static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf) /* * At this point we know it's a buffered write. If this is - * kswapd trying to free memory, or REQ_SYNC is set, then + * swap trying to free memory, or REQ_SYNC is set, then * it's WB_SYNC_ALL writeback, and we'll use the max limit for * that. If the write is marked as a background write, then use * the idle limit, or go to normal if we haven't had competing * IO for a bit. */ - if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd()) + if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb)) limit = rwb->rq_depth.max_depth; else if ((opf & REQ_BACKGROUND) || close_io(rwb)) { /* @@ -622,8 +622,8 @@ static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio) if (bio_op(bio) == REQ_OP_READ) { flags = WBT_READ; } else if (wbt_should_throttle(bio)) { - if (current_is_kswapd()) - flags |= WBT_KSWAPD; + if (bio->bi_opf & REQ_SWAP) + flags |= WBT_SWAP; if (bio_op(bio) == REQ_OP_DISCARD) flags |= WBT_DISCARD; flags |= WBT_TRACKED;