diff mbox series

[4/4] block: update cached timestamp post schedule/preemption

Message ID 20240126213827.2757115-5-axboe@kernel.dk (mailing list archive)
State New, archived
Headers show
Series Cache issue side time querying | expand

Commit Message

Jens Axboe Jan. 26, 2024, 9:30 p.m. UTC
Mark the task as having a cached timestamp when set assign it, so we
can efficiently check if it needs updating post being scheduled back in.
This covers both the actual schedule out case, which would've flushed
the plug, and the preemption case which doesn't touch the plugged
requests (for many reasons, one of them being then we'd need to have
preemption disabled around plug state manipulation).

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       |  2 ++
 block/blk.h            |  4 +++-
 include/linux/blkdev.h | 16 ++++++++++++++++
 include/linux/sched.h  |  2 +-
 kernel/sched/core.c    |  6 ++++--
 5 files changed, 26 insertions(+), 4 deletions(-)

Comments

Johannes Thumshirn Jan. 29, 2024, 8:01 a.m. UTC | #1
On 26.01.24 22:39, Jens Axboe wrote:
>   static void sched_update_worker(struct task_struct *tsk)
>   {
> -	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
> +	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER | PF_BLOCK_TS)) {
> +		if (tsk->flags & PF_BLOCK_TS)
> +			blk_plug_invalidate_ts(tsk);
>   		if (tsk->flags & PF_WQ_WORKER)
>   			wq_worker_running(tsk);
> -		else
> +		else if (tsk->flags & PF_IO_WORKER)
>   			io_wq_worker_running(tsk);
>   	}
>   }


Why the nested if? Isn't that more readable:

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9116bcc90346..74beb0126da6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6787,12 +6787,12 @@ static inline void sched_submit_work(struct 
task_struct *tsk)

  static void sched_update_worker(struct task_struct *tsk)
  {
-       if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-               if (tsk->flags & PF_WQ_WORKER)
-                       wq_worker_running(tsk);
-               else
-                       io_wq_worker_running(tsk);
-       }
+       if (tsk->flags & PF_BLOCK_TS)
+               blk_plug_invalidate_ts(tsk);
+       if (tsk->flags & PF_WQ_WORKER)
+               wq_worker_running(tsk);
+       else if (tsk->flags & PF_IO_WORKER)
+               io_wq_worker_running(tsk);
  }

  static __always_inline void __schedule_loop(unsigned int sched_mode)
Jens Axboe Jan. 29, 2024, 2:02 p.m. UTC | #2
On 1/29/24 1:01 AM, Johannes Thumshirn wrote:
> On 26.01.24 22:39, Jens Axboe wrote:
>>   static void sched_update_worker(struct task_struct *tsk)
>>   {
>> -	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
>> +	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER | PF_BLOCK_TS)) {
>> +		if (tsk->flags & PF_BLOCK_TS)
>> +			blk_plug_invalidate_ts(tsk);
>>   		if (tsk->flags & PF_WQ_WORKER)
>>   			wq_worker_running(tsk);
>> -		else
>> +		else if (tsk->flags & PF_IO_WORKER)
>>   			io_wq_worker_running(tsk);
>>   	}
>>   }
> 
> 
> Why the nested if? Isn't that more readable:

It's so that we can keep it at a single branch for the fast case of none
of them being true, which is also how it was done before this change.
This one just adds one more flag to check. With your change, it's 3
branches instead of one for the fast case.
Johannes Thumshirn Jan. 29, 2024, 2:06 p.m. UTC | #3
On 29.01.24 15:02, Jens Axboe wrote:
> On 1/29/24 1:01 AM, Johannes Thumshirn wrote:
>> On 26.01.24 22:39, Jens Axboe wrote:
>>>    static void sched_update_worker(struct task_struct *tsk)
>>>    {
>>> -	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
>>> +	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER | PF_BLOCK_TS)) {
>>> +		if (tsk->flags & PF_BLOCK_TS)
>>> +			blk_plug_invalidate_ts(tsk);
>>>    		if (tsk->flags & PF_WQ_WORKER)
>>>    			wq_worker_running(tsk);
>>> -		else
>>> +		else if (tsk->flags & PF_IO_WORKER)
>>>    			io_wq_worker_running(tsk);
>>>    	}
>>>    }
>>
>>
>> Why the nested if? Isn't that more readable:
> 
> It's so that we can keep it at a single branch for the fast case of none
> of them being true, which is also how it was done before this change.
> This one just adds one more flag to check. With your change, it's 3
> branches instead of one for the fast case.
> 

Although I don't really have hard feelings for it, that could be solved 
as well:

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9116bcc90346..74beb0126da6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6787,12 +6787,12 @@ static inline void sched_submit_work(struct
task_struct *tsk)

   static void sched_update_worker(struct task_struct *tsk)
   {
-       if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-               if (tsk->flags & PF_WQ_WORKER)
-                       wq_worker_running(tsk);
-               else
-                       io_wq_worker_running(tsk);
-       }
+	if (tsk->flags & !(PF_WQ_WORKER | PF_IO_WORKER | PF_BLOCK_TS))
+		return;
+       if (tsk->flags & PF_BLOCK_TS)
+               blk_plug_invalidate_ts(tsk);
+       if (tsk->flags & PF_WQ_WORKER)
+               wq_worker_running(tsk);
+       else if (tsk->flags & PF_IO_WORKER)
+               io_wq_worker_running(tsk);
   }

But yep, that's bikeshedding I admit
Jens Axboe Jan. 29, 2024, 2:08 p.m. UTC | #4
On 1/29/24 7:06 AM, Johannes Thumshirn wrote:
> On 29.01.24 15:02, Jens Axboe wrote:
>> On 1/29/24 1:01 AM, Johannes Thumshirn wrote:
>>> On 26.01.24 22:39, Jens Axboe wrote:
>>>>    static void sched_update_worker(struct task_struct *tsk)
>>>>    {
>>>> -	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
>>>> +	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER | PF_BLOCK_TS)) {
>>>> +		if (tsk->flags & PF_BLOCK_TS)
>>>> +			blk_plug_invalidate_ts(tsk);
>>>>    		if (tsk->flags & PF_WQ_WORKER)
>>>>    			wq_worker_running(tsk);
>>>> -		else
>>>> +		else if (tsk->flags & PF_IO_WORKER)
>>>>    			io_wq_worker_running(tsk);
>>>>    	}
>>>>    }
>>>
>>>
>>> Why the nested if? Isn't that more readable:
>>
>> It's so that we can keep it at a single branch for the fast case of none
>> of them being true, which is also how it was done before this change.
>> This one just adds one more flag to check. With your change, it's 3
>> branches instead of one for the fast case.
>>
> 
> Although I don't really have hard feelings for it, that could be solved 
> as well:
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 9116bcc90346..74beb0126da6 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -6787,12 +6787,12 @@ static inline void sched_submit_work(struct
> task_struct *tsk)
> 
>    static void sched_update_worker(struct task_struct *tsk)
>    {
> -       if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
> -               if (tsk->flags & PF_WQ_WORKER)
> -                       wq_worker_running(tsk);
> -               else
> -                       io_wq_worker_running(tsk);
> -       }
> +	if (tsk->flags & !(PF_WQ_WORKER | PF_IO_WORKER | PF_BLOCK_TS))
> +		return;

Don't think that'd work :-)

> +       if (tsk->flags & PF_BLOCK_TS)
> +               blk_plug_invalidate_ts(tsk);
> +       if (tsk->flags & PF_WQ_WORKER)
> +               wq_worker_running(tsk);
> +       else if (tsk->flags & PF_IO_WORKER)
> +               io_wq_worker_running(tsk);
>    }
> 
> But yep, that's bikeshedding I admit

But agree, it'd accomplish the same. The patch is cleaner just keeping
the existing setup, however, rather than rewrite it like the above.
diff mbox series

Patch

diff --git a/block/blk-core.c b/block/blk-core.c
index cc4db4d92c75..71c6614a97fe 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1173,6 +1173,8 @@  void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
 	 */
 	if (unlikely(!rq_list_empty(plug->cached_rq)))
 		blk_mq_free_plug_rqs(plug);
+
+	current->flags &= ~PF_BLOCK_TS;
 }
 
 /**
diff --git a/block/blk.h b/block/blk.h
index 14bbc4b780f2..913c93838a01 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -529,8 +529,10 @@  static inline u64 blk_time_get_ns(void)
 	 * a valid timestamp" separately, just accept that we'll do an extra
 	 * ktime_get_ns() if we just happen to get 0 as the current time.
 	 */
-	if (!plug->cur_ktime)
+	if (!plug->cur_ktime) {
 		plug->cur_ktime = ktime_get_ns();
+		current->flags |= PF_BLOCK_TS;
+	}
 	return plug->cur_ktime;
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 996d2ad756ff..d7cac3de65b3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -973,6 +973,18 @@  static inline void blk_flush_plug(struct blk_plug *plug, bool async)
 		__blk_flush_plug(plug, async);
 }
 
+/*
+ * tsk == current here
+ */
+static inline void blk_plug_invalidate_ts(struct task_struct *tsk)
+{
+	struct blk_plug *plug = tsk->plug;
+
+	if (plug)
+		plug->cur_ktime = 0;
+	current->flags &= ~PF_BLOCK_TS;
+}
+
 int blkdev_issue_flush(struct block_device *bdev);
 long nr_blockdev_pages(void);
 #else /* CONFIG_BLOCK */
@@ -996,6 +1008,10 @@  static inline void blk_flush_plug(struct blk_plug *plug, bool async)
 {
 }
 
+static inline void blk_plug_invalidate_ts(struct task_struct *tsk)
+{
+}
+
 static inline int blkdev_issue_flush(struct block_device *bdev)
 {
 	return 0;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index cdb8ea53c365..801233cef2fc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1642,7 +1642,7 @@  extern struct pid *cad_pid;
 #define PF_NO_SETAFFINITY	0x04000000	/* Userland is not allowed to meddle with cpus_mask */
 #define PF_MCE_EARLY		0x08000000      /* Early kill for mce process policy */
 #define PF_MEMALLOC_PIN		0x10000000	/* Allocation context constrained to zones which allow long term pinning. */
-#define PF__HOLE__20000000	0x20000000
+#define PF_BLOCK_TS		0x20000000	/* plug has ts that needs updating */
 #define PF__HOLE__40000000	0x40000000
 #define PF_SUSPEND_TASK		0x80000000      /* This thread called freeze_processes() and should not be frozen */
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9116bcc90346..083f2258182d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6787,10 +6787,12 @@  static inline void sched_submit_work(struct task_struct *tsk)
 
 static void sched_update_worker(struct task_struct *tsk)
 {
-	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER | PF_BLOCK_TS)) {
+		if (tsk->flags & PF_BLOCK_TS)
+			blk_plug_invalidate_ts(tsk);
 		if (tsk->flags & PF_WQ_WORKER)
 			wq_worker_running(tsk);
-		else
+		else if (tsk->flags & PF_IO_WORKER)
 			io_wq_worker_running(tsk);
 	}
 }