diff mbox

[1/2] sched/wait: add round robin wakeup mode

Message ID f382714bd4ba5df3589e2e2e8bac114cdd4d7bb3.1423509605.git.jbaron@akamai.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jason Baron Feb. 9, 2015, 8:05 p.m. UTC
The motivation for this flag is to allow the distribution of wakeups from
a shared source in a balanced manner. Currently, we can add threads exclusively
but that often results in the same thread woken up again and again. In the case
where we are trying to balance work across threads this is not desirable.

The WQ_FLAG_ROUND_ROBIN is restricted to being exclusive as well, otherwise we
do not know who is being woken up.

Signed-off-by: Jason Baron <jbaron@akamai.com>
---
 include/linux/wait.h | 11 +++++++++++
 kernel/sched/wait.c  |  5 ++++-
 2 files changed, 15 insertions(+), 1 deletion(-)

Comments

Michael Kerrisk (man-pages) Feb. 9, 2015, 8:26 p.m. UTC | #1
[CC += linux-api@vger.kernel.org]


On Mon, Feb 9, 2015 at 9:05 PM, Jason Baron <jbaron@akamai.com> wrote:
> The motivation for this flag is to allow the distribution of wakeups from
> a shared source in a balanced manner. Currently, we can add threads exclusively
> but that often results in the same thread woken up again and again. In the case
> where we are trying to balance work across threads this is not desirable.
>
> The WQ_FLAG_ROUND_ROBIN is restricted to being exclusive as well, otherwise we
> do not know who is being woken up.
>
> Signed-off-by: Jason Baron <jbaron@akamai.com>
> ---
>  include/linux/wait.h | 11 +++++++++++
>  kernel/sched/wait.c  |  5 ++++-
>  2 files changed, 15 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/wait.h b/include/linux/wait.h
> index 2232ed1..bbdef98 100644
> --- a/include/linux/wait.h
> +++ b/include/linux/wait.h
> @@ -16,6 +16,7 @@ int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *ke
>  /* __wait_queue::flags */
>  #define WQ_FLAG_EXCLUSIVE      0x01
>  #define WQ_FLAG_WOKEN          0x02
> +#define WQ_FLAG_ROUND_ROBIN    0x04
>
>  struct __wait_queue {
>         unsigned int            flags;
> @@ -109,6 +110,16 @@ static inline int waitqueue_active(wait_queue_head_t *q)
>
>  extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
>  extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait);
> +
> +/*
> + * rr relies on exclusive, otherwise we don't know which entry was woken
> + */
> +static inline void add_wait_queue_rr(wait_queue_head_t *q, wait_queue_t *wait)
> +{
> +       wait->flags |= WQ_FLAG_ROUND_ROBIN;
> +       add_wait_queue_exclusive(q, wait);
> +}
> +
>  extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
>
>  static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
> diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
> index 852143a..17d1039 100644
> --- a/kernel/sched/wait.c
> +++ b/kernel/sched/wait.c
> @@ -71,8 +71,11 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
>                 unsigned flags = curr->flags;
>
>                 if (curr->func(curr, mode, wake_flags, key) &&
> -                               (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
> +                              (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) {
> +                       if (flags & WQ_FLAG_ROUND_ROBIN)
> +                               list_move_tail(&curr->task_list, &q->task_list);
>                         break;
> +               }
>         }
>  }
>
> --
> 1.8.2.rc2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Zijlstra Feb. 9, 2015, 9:50 p.m. UTC | #2
On Mon, Feb 09, 2015 at 08:05:57PM +0000, Jason Baron wrote:
> diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
> index 852143a..17d1039 100644
> --- a/kernel/sched/wait.c
> +++ b/kernel/sched/wait.c
> @@ -71,8 +71,11 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
>  		unsigned flags = curr->flags;
>  
>  		if (curr->func(curr, mode, wake_flags, key) &&
> -				(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
> +			       (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) {
> +			if (flags & WQ_FLAG_ROUND_ROBIN)
> +				list_move_tail(&curr->task_list, &q->task_list);
>  			break;
> +		}
>  	}
>  }

I think you meant to write something like:

	if (curr->func(curr, mode, wake_flags, key) &&
	    (flags & WQ_FLAG_EXCLUSIVE)) {
		if (flag & WQ_FLAG_ROUND_ROBIN)
			list_move_tail(&curr->task_list, &q->task_list);
		if (!--nr_exclusive)
			break;
	}

Otherwise can only work for nr_exclusive==1.
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Baron Feb. 10, 2015, 4:06 a.m. UTC | #3
On 02/09/2015 04:50 PM, Peter Zijlstra wrote:
> On Mon, Feb 09, 2015 at 08:05:57PM +0000, Jason Baron wrote:
>> diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
>> index 852143a..17d1039 100644
>> --- a/kernel/sched/wait.c
>> +++ b/kernel/sched/wait.c
>> @@ -71,8 +71,11 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
>>  		unsigned flags = curr->flags;
>>  
>>  		if (curr->func(curr, mode, wake_flags, key) &&
>> -				(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
>> +			       (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) {
>> +			if (flags & WQ_FLAG_ROUND_ROBIN)
>> +				list_move_tail(&curr->task_list, &q->task_list);
>>  			break;
>> +		}
>>  	}
>>  }
> I think you meant to write something like:
>
> 	if (curr->func(curr, mode, wake_flags, key) &&
> 	    (flags & WQ_FLAG_EXCLUSIVE)) {
> 		if (flag & WQ_FLAG_ROUND_ROBIN)
> 			list_move_tail(&curr->task_list, &q->task_list);
> 		if (!--nr_exclusive)
> 			break;
> 	}
>
> Otherwise can only work for nr_exclusive==1.

Indeed. I'm also wondering if its worth avoiding the list_move_tail() for the case where nr_exclusive is initially 0. IE the wake all case, where we are just going to end up doing a bunch of list_move_tail() calls, but end up in the same state.

Thanks,

-Jason



--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Zijlstra Feb. 10, 2015, 9:03 a.m. UTC | #4
On Mon, Feb 09, 2015 at 11:06:17PM -0500, Jason Baron wrote:
> On 02/09/2015 04:50 PM, Peter Zijlstra wrote:
> > On Mon, Feb 09, 2015 at 08:05:57PM +0000, Jason Baron wrote:
> >> diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
> >> index 852143a..17d1039 100644
> >> --- a/kernel/sched/wait.c
> >> +++ b/kernel/sched/wait.c
> >> @@ -71,8 +71,11 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
> >>  		unsigned flags = curr->flags;
> >>  
> >>  		if (curr->func(curr, mode, wake_flags, key) &&
> >> -				(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
> >> +			       (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) {
> >> +			if (flags & WQ_FLAG_ROUND_ROBIN)
> >> +				list_move_tail(&curr->task_list, &q->task_list);
> >>  			break;
> >> +		}
> >>  	}
> >>  }
> > I think you meant to write something like:
> >
> > 	if (curr->func(curr, mode, wake_flags, key) &&
> > 	    (flags & WQ_FLAG_EXCLUSIVE)) {
> > 		if (flag & WQ_FLAG_ROUND_ROBIN)
> > 			list_move_tail(&curr->task_list, &q->task_list);
> > 		if (!--nr_exclusive)
> > 			break;
> > 	}
> >
> > Otherwise can only work for nr_exclusive==1.
> 
> Indeed. I'm also wondering if its worth avoiding the list_move_tail()
> for the case where nr_exclusive is initially 0. IE the wake all case,
> where we are just going to end up doing a bunch of list_move_tail()
> calls, but end up in the same state.

After writing this email, it occurred to me that you could probably do
this with a custom wake function.

Where autoremove_wake_function() does a list_del_init() you could do a
rotate_wake_function() that does list_move_tail().

That would avoid the entire WQ flag muckery.
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Baron Feb. 10, 2015, 3:59 p.m. UTC | #5
On 02/10/2015 04:03 AM, Peter Zijlstra wrote:
> On Mon, Feb 09, 2015 at 11:06:17PM -0500, Jason Baron wrote:
>> On 02/09/2015 04:50 PM, Peter Zijlstra wrote:
>>> On Mon, Feb 09, 2015 at 08:05:57PM +0000, Jason Baron wrote:
>>>> diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
>>>> index 852143a..17d1039 100644
>>>> --- a/kernel/sched/wait.c
>>>> +++ b/kernel/sched/wait.c
>>>> @@ -71,8 +71,11 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
>>>>  		unsigned flags = curr->flags;
>>>>  
>>>>  		if (curr->func(curr, mode, wake_flags, key) &&
>>>> -				(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
>>>> +			       (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) {
>>>> +			if (flags & WQ_FLAG_ROUND_ROBIN)
>>>> +				list_move_tail(&curr->task_list, &q->task_list);
>>>>  			break;
>>>> +		}
>>>>  	}
>>>>  }
>>> I think you meant to write something like:
>>>
>>> 	if (curr->func(curr, mode, wake_flags, key) &&
>>> 	    (flags & WQ_FLAG_EXCLUSIVE)) {
>>> 		if (flag & WQ_FLAG_ROUND_ROBIN)
>>> 			list_move_tail(&curr->task_list, &q->task_list);
>>> 		if (!--nr_exclusive)
>>> 			break;
>>> 	}
>>>
>>> Otherwise can only work for nr_exclusive==1.
>> Indeed. I'm also wondering if its worth avoiding the list_move_tail()
>> for the case where nr_exclusive is initially 0. IE the wake all case,
>> where we are just going to end up doing a bunch of list_move_tail()
>> calls, but end up in the same state.
> After writing this email, it occurred to me that you could probably do
> this with a custom wake function.
>
> Where autoremove_wake_function() does a list_del_init() you could do a
> rotate_wake_function() that does list_move_tail().
>
> That would avoid the entire WQ flag muckery.

hmmm...but don't we need the head/tail of the list to add it back too?

Further, we can't just append to tail while walking the list b/c
otherwise it can result in multiple wakeups to the same item. So I could
add to a local list, for example, in __wake_up_common(). And then just
add that to the tail once the list_for_each() finishes.

In terms of the flag, maybe another option would be to have the
wait_queue_func_t return a 'ROTATE_ME' value instead
of 1, since I think we currently only make use of 0 and 1?

Thanks,

-Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Zijlstra Feb. 10, 2015, 4:11 p.m. UTC | #6
On Tue, Feb 10, 2015 at 10:59:01AM -0500, Jason Baron wrote:
> hmmm...but don't we need the head/tail of the list to add it back too?

Ah, good point that ;-)

> Further, we can't just append to tail while walking the list b/c
> otherwise it can result in multiple wakeups to the same item. So I could
> add to a local list, for example, in __wake_up_common(). And then just
> add that to the tail once the list_for_each() finishes.

True; you can do horrible things, but I think that is the safest option
indeed.

> In terms of the flag, maybe another option would be to have the
> wait_queue_func_t return a 'ROTATE_ME' value instead
> of 1, since I think we currently only make use of 0 and 1?

Lets stick with the flag then.
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 2232ed1..bbdef98 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -16,6 +16,7 @@  int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *ke
 /* __wait_queue::flags */
 #define WQ_FLAG_EXCLUSIVE	0x01
 #define WQ_FLAG_WOKEN		0x02
+#define WQ_FLAG_ROUND_ROBIN	0x04
 
 struct __wait_queue {
 	unsigned int		flags;
@@ -109,6 +110,16 @@  static inline int waitqueue_active(wait_queue_head_t *q)
 
 extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
 extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait);
+
+/*
+ * rr relies on exclusive, otherwise we don't know which entry was woken
+ */
+static inline void add_wait_queue_rr(wait_queue_head_t *q, wait_queue_t *wait)
+{
+	wait->flags |= WQ_FLAG_ROUND_ROBIN;
+	add_wait_queue_exclusive(q, wait);
+}
+
 extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
 
 static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 852143a..17d1039 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -71,8 +71,11 @@  static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
 		unsigned flags = curr->flags;
 
 		if (curr->func(curr, mode, wake_flags, key) &&
-				(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
+			       (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) {
+			if (flags & WQ_FLAG_ROUND_ROBIN)
+				list_move_tail(&curr->task_list, &q->task_list);
 			break;
+		}
 	}
 }