diff mbox series

[PATCHv3,6/7] block: protect wbt_lat_usec using q->elevator_lock

Message ID 20250224133102.1240146-7-nilay@linux.ibm.com (mailing list archive)
State New
Headers show
Series block: fix lock order and remove redundant locking | expand

Commit Message

Nilay Shroff Feb. 24, 2025, 1:30 p.m. UTC
The wbt latency and state could be updated while initializing the
elevator or exiting the elevator. It could be also updates while
configuring IO latency QoS parameters using cgroup. The elevator
code path is now protected with q->elevator_lock. So we should
protect the access to sysfs attribute wbt_lat_usec using q->elevator
_lock instead of q->sysfs_lock. White we're at it, also protect
ioc_qos_write(), which configures wbt parameters via cgroup, using
q->elevator_lock.

Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
 block/blk-iocost.c |  2 ++
 block/blk-sysfs.c  | 20 ++++++++------------
 2 files changed, 10 insertions(+), 12 deletions(-)

Comments

Hannes Reinecke Feb. 25, 2025, 7:53 a.m. UTC | #1
On 2/24/25 14:30, Nilay Shroff wrote:
> The wbt latency and state could be updated while initializing the
> elevator or exiting the elevator. It could be also updates while
> configuring IO latency QoS parameters using cgroup. The elevator
> code path is now protected with q->elevator_lock. So we should
> protect the access to sysfs attribute wbt_lat_usec using q->elevator
> _lock instead of q->sysfs_lock. White we're at it, also protect
> ioc_qos_write(), which configures wbt parameters via cgroup, using
> q->elevator_lock.
> 
> Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
> ---
>   block/blk-iocost.c |  2 ++
>   block/blk-sysfs.c  | 20 ++++++++------------
>   2 files changed, 10 insertions(+), 12 deletions(-)
> 
> diff --git a/block/blk-iocost.c b/block/blk-iocost.c
> index 65a1d4427ccf..c68373361301 100644
> --- a/block/blk-iocost.c
> +++ b/block/blk-iocost.c
> @@ -3249,6 +3249,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
>   	}
>   
>   	memflags = blk_mq_freeze_queue(disk->queue);
> +	mutex_lock(&disk->queue->elevator_lock);
>   	blk_mq_quiesce_queue(disk->queue);
>   
Ordering?

Do we have a defined order between 'freeze', 'quiesce' and taking the 
respective lock?

Or, to put it the other way round: why do we tak the lock after the 
'freeze', but before the 'quiesce'?

Isn't that worth a comment somewhere?

Cheers,

Hannes
Nilay Shroff Feb. 25, 2025, 10:05 a.m. UTC | #2
On 2/25/25 1:23 PM, Hannes Reinecke wrote:
> On 2/24/25 14:30, Nilay Shroff wrote:
>> The wbt latency and state could be updated while initializing the
>> elevator or exiting the elevator. It could be also updates while
>> configuring IO latency QoS parameters using cgroup. The elevator
>> code path is now protected with q->elevator_lock. So we should
>> protect the access to sysfs attribute wbt_lat_usec using q->elevator
>> _lock instead of q->sysfs_lock. White we're at it, also protect
>> ioc_qos_write(), which configures wbt parameters via cgroup, using
>> q->elevator_lock.
>>
>> Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
>> ---
>>   block/blk-iocost.c |  2 ++
>>   block/blk-sysfs.c  | 20 ++++++++------------
>>   2 files changed, 10 insertions(+), 12 deletions(-)
>>
>> diff --git a/block/blk-iocost.c b/block/blk-iocost.c
>> index 65a1d4427ccf..c68373361301 100644
>> --- a/block/blk-iocost.c
>> +++ b/block/blk-iocost.c
>> @@ -3249,6 +3249,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
>>       }
>>         memflags = blk_mq_freeze_queue(disk->queue);
>> +    mutex_lock(&disk->queue->elevator_lock);
>>       blk_mq_quiesce_queue(disk->queue);
>>   
> Ordering?
> 
> Do we have a defined order between 'freeze', 'quiesce' and taking the respective lock?
> 
> Or, to put it the other way round: why do we tak the lock after the 'freeze', but before the 'quiesce'?

This is the order followed currently in __blk_mq_update_nr_hw_queues()
where we first freeze queue and then acquire the other locks. Yes this may
look a bit unusual but that's how it is. If we need to change that
ordering then we may require breaking the __blk_mq_update_nr_hw_queues()
function. So the current locking sequence wrt ->elevator_lock is:
freeze queue followed by acquiring ->elevator_lock.

So we try follow the same locking order at all call sites where we
need to freeze-queue and use ->elevator_lock.

> 
> Isn't that worth a comment somewhere?
Alright, I'd add comment about this ordering probably near the 
declaration of this lock in request_queue.

Thanks,
--Nilay
diff mbox series

Patch

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 65a1d4427ccf..c68373361301 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3249,6 +3249,7 @@  static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 	}
 
 	memflags = blk_mq_freeze_queue(disk->queue);
+	mutex_lock(&disk->queue->elevator_lock);
 	blk_mq_quiesce_queue(disk->queue);
 
 	spin_lock_irq(&ioc->lock);
@@ -3356,6 +3357,7 @@  static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 	spin_unlock_irq(&ioc->lock);
 
 	blk_mq_unquiesce_queue(disk->queue);
+	mutex_unlock(&disk->queue->elevator_lock);
 	blk_mq_unfreeze_queue(disk->queue, memflags);
 
 	ret = -EINVAL;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 1ca0938b2fd7..8f47d9f30fbf 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -557,7 +557,7 @@  static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page)
 	ssize_t ret;
 	struct request_queue *q = disk->queue;
 
-	mutex_lock(&q->sysfs_lock);
+	mutex_lock(&q->elevator_lock);
 	if (!wbt_rq_qos(q)) {
 		ret = -EINVAL;
 		goto out;
@@ -570,7 +570,7 @@  static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page)
 
 	ret = sysfs_emit(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
 out:
-	mutex_unlock(&q->sysfs_lock);
+	mutex_unlock(&q->elevator_lock);
 	return ret;
 }
 
@@ -589,8 +589,8 @@  static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
 	if (val < -1)
 		return -EINVAL;
 
-	mutex_lock(&q->sysfs_lock);
 	memflags = blk_mq_freeze_queue(q);
+	mutex_lock(&q->elevator_lock);
 
 	rqos = wbt_rq_qos(q);
 	if (!rqos) {
@@ -619,8 +619,8 @@  static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
 
 	blk_mq_unquiesce_queue(q);
 out:
+	mutex_unlock(&q->elevator_lock);
 	blk_mq_unfreeze_queue(q, memflags);
-	mutex_unlock(&q->sysfs_lock);
 
 	return ret;
 }
@@ -689,19 +689,15 @@  static struct attribute *queue_attrs[] = {
 
 /* Request-based queue attributes that are not relevant for bio-based queues. */
 static struct attribute *blk_mq_queue_attrs[] = {
-	/*
-	 * attributes protected with q->sysfs_lock
-	 */
-#ifdef CONFIG_BLK_WBT
-	&queue_wb_lat_entry.attr,
-#endif
 	/*
 	 * attributes which require some form of locking
 	 * other than q->sysfs_lock
 	 */
 	&elv_iosched_entry.attr,
 	&queue_requests_entry.attr,
-
+#ifdef CONFIG_BLK_WBT
+	&queue_wb_lat_entry.attr,
+#endif
 	/*
 	 * attributes which don't require locking
 	 */
@@ -882,10 +878,10 @@  int blk_register_queue(struct gendisk *disk)
 			goto out_crypto_sysfs_unregister;
 		}
 	}
+	wbt_enable_default(disk);
 	mutex_unlock(&q->elevator_lock);
 
 	blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
-	wbt_enable_default(disk);
 
 	/* Now everything is ready and send out KOBJ_ADD uevent */
 	kobject_uevent(&disk->queue_kobj, KOBJ_ADD);