@@ -131,7 +131,8 @@ static inline bool preempt_tag(struct blk_mq_alloc_data *data,
struct sbitmap_queue *bt)
{
return data->preemption ||
- atomic_read(&bt->ws_active) <= SBQ_WAIT_QUEUES;
+ atomic_read(&bt->ws_active) <= SBQ_WAIT_QUEUES ||
+ READ_ONCE(bt->force_tag_preemption);
}
unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
@@ -118,6 +118,12 @@ struct sbitmap_queue {
*/
unsigned int wake_batch;
+ /**
+ * @force_tag_preemption: prrempt tag even is tag preemption is
+ * disabled.
+ */
+ bool force_tag_preemption;
+
/**
* @wake_index: Next wait queue in @ws to wake up.
*/
@@ -432,6 +432,7 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
sbq->min_shallow_depth = UINT_MAX;
sbq->wake_batch = sbq_calc_wake_batch(sbq, depth);
+ sbq->force_tag_preemption = 0;
atomic_set(&sbq->wake_index, 0);
atomic_set(&sbq->ws_active, 0);
@@ -650,6 +651,10 @@ static bool __sbq_wake_up(struct sbitmap_queue *sbq)
*/
ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch);
if (ret == wait_cnt) {
+ bool force = (sbq->sb.depth - sbitmap_weight(&sbq->sb) >
+ READ_ONCE(sbq->wake_batch) * 2);
+
+ WRITE_ONCE(sbq->force_tag_preemption, force);
sbq_index_atomic_inc(&sbq->wake_index);
wake_up_nr(&ws->wait, get_wake_nr(ws, &wake_batch));
if (wake_batch)
If tag preemption is disabled and system is under high io pressure, inflight io should use up tags. Since new io will wait directly, this rely on waked up threads will obtain at least 'wake_batch' tags. However, this might be broken if 8 waitqueues is unbalanced. This patch tries to calculate free tags each time a 'ws' is woken up, and force tag preemption if free tags are sufficient. Signed-off-by: Yu Kuai <yukuai3@huawei.com> --- block/blk-mq-tag.c | 3 ++- include/linux/sbitmap.h | 6 ++++++ lib/sbitmap.c | 5 +++++ 3 files changed, 13 insertions(+), 1 deletion(-)