diff mbox series

[v3] sched/core: Address classes via __begin_sched_classes

Message ID 20220517030024.3388355-1-keescook@chromium.org (mailing list archive)
State Changes Requested
Headers show
Series [v3] sched/core: Address classes via __begin_sched_classes | expand

Commit Message

Kees Cook May 17, 2022, 3 a.m. UTC
GCC 12 is very sensitive about array checking, and views all negative
array accesses as unsafe (a not unreasonable position). Redefine
sched_class_highest in terms of its location from __begin_sched_classes,
and redefine sched_class_lowest to the actual lowest sched class instead
of one lower. This also means the for_class_range() must be redefined to
be inclusive, which means its 1 caller must be adjusted to have its
"lowest" argument bumped up one position. Silences this warning:

In file included from kernel/sched/core.c:81:
kernel/sched/core.c: In function ‘set_rq_online.part.0’:
kernel/sched/sched.h:2197:52: error: array subscript -1 is outside array bounds of ‘struct sched_class[44343134792571037]’
 [-Werror=array-bounds]
 2197 | #define sched_class_lowest  (__begin_sched_classes - 1)
      |                                                    ^
kernel/sched/sched.h:2200:41: note: in definition of macro ‘for_class_range’
 2200 |         for (class = (_from); class != (_to); class--)
      |                                         ^~~
kernel/sched/sched.h:2203:53: note: in expansion of macro ‘sched_class_lowest’
 2203 |for_class_range(class, sched_class_highest, sched_class_lowest)
      |                                            ^~~~~~~~~~~~~~~~~~
kernel/sched/core.c:9115:17: note: in expansion of macro ‘for_each_class’
 9115 |                 for_each_class(class) {
      |                 ^~~~~~~~~~~~~~
kernel/sched/sched.h:2193:27: note: at offset -208 into object ‘__begin_sched_classes’ of size [0, 9223372036854775807]
 2193 | extern struct sched_class __begin_sched_classes[];
      |                           ^~~~~~~~~~~~~~~~~~~~~

The introduce and use of sched_class_higher() could just be a bare "+ 1",
but this code's backwards walking and non-inclusive for loop was weird
enough, it seemed back to explicitly describe the manipulation
happening.

These can't just be object pointers because GCC still sees it as an
address of a single struct.

The resulting instruction output is identical to before except that one
less register is used in set_rq_online(), where an immediate can now
be used, resulting in a small instruction count savings:

│  set_rq_online():
│ -	push   %r12
│  	push   %rbp
│  	push   %rbx
│  	mov    0x9a0(%rdi),%rax
│  	mov    0xa10(%rdi),%edx
│  	lock bts %rdx,0x20(%rax)
│  	movabs $0x0,%rbx
│   R_X86_64_64	__end_sched_classes-0xd0
│ -	movabs $0x0,%r12
│ - R_X86_64_64	__begin_sched_classes-0xd0
│  	movl   $0x1,0xa14(%rdi)
│ -	cmp    %r12,%rbx
│ -	je     31ea <set_rq_online.part.0+0x5a>
│ -	mov    %rdi,%rbp
│ +	cmp    $0x0,%rbx
│ + R_X86_64_32S	__begin_sched_classes
│ +	jb     31e6 <set_rq_online.part.0+0x56>
│ +	mov    %rdi,%rbp
│  	mov    0x70(%rbx),%rax
│  	test   %rax,%rax
│ -	je     31de <set_rq_online.part.0+0x4e>
│ +	je     31d6 <set_rq_online.part.0+0x46>
│  	mov    %rbp,%rdi
│ -	call   31de <set_rq_online.part.0+0x4e>
│ +	call   31d6 <set_rq_online.part.0+0x46>
│   R_X86_64_PLT32	__x86_indirect_thunk_rax-0x4
│  	sub    $0xd0,%rbx
│ -	cmp    %r12,%rbx
│ -	jne    31cd <set_rq_online.part.0+0x3d>
│ +	cmp    $0x0,%rbx
│ + R_X86_64_32S	__begin_sched_classes
│ +	jae    31c5 <set_rq_online.part.0+0x35>
│  	pop    %rbx
│  	pop    %rbp
│ -	pop    %r12
│  	retq

Reported-by: Christophe de Dinechin <dinechin@redhat.com>
Link: https://lore.kernel.org/lkml/20220414150855.2407137-2-dinechin@redhat.com/
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
v1: https://lore.kernel.org/lkml/20220516194241.3064242-1-keescook@chromium.org
v2: https://lore.kernel.org/lkml/20220517000630.3383144-1-keescook@chromium.org
v3:
 - Add missing increment to the one for_class_range() user
 - Provide instruction sequence change analysis in commit log
---
 kernel/sched/core.c  |  3 ++-
 kernel/sched/sched.h | 11 ++++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

Comments

Kees Cook May 17, 2022, 3:33 a.m. UTC | #1
On Mon, May 16, 2022 at 08:00:23PM -0700, Kees Cook wrote:
> GCC 12 is very sensitive about array checking, and views all negative
> array accesses as unsafe (a not unreasonable position). Redefine
> sched_class_highest in terms of its location from __begin_sched_classes,
> and redefine sched_class_lowest to the actual lowest sched class instead
> of one lower. This also means the for_class_range() must be redefined to
> be inclusive, which means its 1 caller must be adjusted to have its
> "lowest" argument bumped up one position. Silences this warning:
> 
> In file included from kernel/sched/core.c:81:
> kernel/sched/core.c: In function ‘set_rq_online.part.0’:
> kernel/sched/sched.h:2197:52: error: array subscript -1 is outside array bounds of ‘struct sched_class[44343134792571037]’
>  [-Werror=array-bounds]
>  2197 | #define sched_class_lowest  (__begin_sched_classes - 1)
>       |                                                    ^
> kernel/sched/sched.h:2200:41: note: in definition of macro ‘for_class_range’
>  2200 |         for (class = (_from); class != (_to); class--)
>       |                                         ^~~
> kernel/sched/sched.h:2203:53: note: in expansion of macro ‘sched_class_lowest’
>  2203 |for_class_range(class, sched_class_highest, sched_class_lowest)
>       |                                            ^~~~~~~~~~~~~~~~~~
> kernel/sched/core.c:9115:17: note: in expansion of macro ‘for_each_class’
>  9115 |                 for_each_class(class) {
>       |                 ^~~~~~~~~~~~~~
> kernel/sched/sched.h:2193:27: note: at offset -208 into object ‘__begin_sched_classes’ of size [0, 9223372036854775807]
>  2193 | extern struct sched_class __begin_sched_classes[];
>       |                           ^~~~~~~~~~~~~~~~~~~~~
> 
> The introduce and use of sched_class_higher() could just be a bare "+ 1",
> but this code's backwards walking and non-inclusive for loop was weird
> enough, it seemed back to explicitly describe the manipulation
> happening.

I just need to start today over. This should read:

 The introduction and use of sched_class_higher() could just be a bare
 "+ 1", but this code's backwards-walking and non-inclusive for loop was
 weird enough, it seemed best to explicitly describe the manipulation
 happening.
Peter Zijlstra May 17, 2022, 6:42 a.m. UTC | #2
On Mon, May 16, 2022 at 08:00:23PM -0700, Kees Cook wrote:
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index d58c0389eb23..f2bcc7f15381 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -5665,7 +5665,8 @@ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
>  	 * We can terminate the balance pass as soon as we know there is
>  	 * a runnable task of @class priority or higher.
>  	 */
> -	for_class_range(class, prev->sched_class, &idle_sched_class) {
> +	for_class_range(class, prev->sched_class,
> +			sched_class_higher(&idle_sched_class)) {
>  		if (class->balance(rq, prev, rf))
>  			break;
>  	}
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 8dccb34eb190..c757bd26b01a 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2193,11 +2193,16 @@ const struct sched_class name##_sched_class \
>  extern struct sched_class __begin_sched_classes[];
>  extern struct sched_class __end_sched_classes[];
>  
> -#define sched_class_highest (__end_sched_classes - 1)
> -#define sched_class_lowest  (__begin_sched_classes - 1)
> +#define sched_class_higher(class) ((class) + 1)
>  
> +#define sched_class_highest (&__begin_sched_classes[__end_sched_classes     \
> +						    - __begin_sched_classes \
> +						    - 1])
> +#define sched_class_lowest  (&__begin_sched_classes[0])
> +
> +/* For each class, inclusive from _from down to _to. */
>  #define for_class_range(class, _from, _to) \
> -	for (class = (_from); class != (_to); class--)
> +	for (class = (_from); class >= (_to); class--)
>  
>  #define for_each_class(class) \
>  	for_class_range(class, sched_class_highest, sched_class_lowest)


Urgh, and we're back to unreadable garbage just because GCC is insane :/
Peter Zijlstra May 17, 2022, 11:46 a.m. UTC | #3
On Mon, May 16, 2022 at 08:33:25PM -0700, Kees Cook wrote:

> I just need to start today over.

:-)

Also, even with this sorted, there's a ton array bound things left.
Please don't just mangle the code until it stops complaining like in the
previous postings of these patches.

As such, I'm only barely ok with the below patch. Ideally I'd shoot GCC
in the head. Its *really* tedious you cannot just tell it to shut up
already.

---
Subject: sched: Reverse sched_class layout

Because GCC-12 is fully stupid about array bounds and it's just really
hard to get a solid array definition from a linker script, flip the
array order to avoid needing negative offsets :-/

This makes the whole relational pointer magic a little less obvious, but
alas.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/asm-generic/vmlinux.lds.h | 12 ++++++------
 kernel/sched/core.c               | 12 ++++++------
 kernel/sched/sched.h              | 15 ++++++++-------
 3 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 69138e9db787..7515a465ec03 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -126,13 +126,13 @@
  */
 #define SCHED_DATA				\
 	STRUCT_ALIGN();				\
-	__begin_sched_classes = .;		\
-	*(__idle_sched_class)			\
-	*(__fair_sched_class)			\
-	*(__rt_sched_class)			\
-	*(__dl_sched_class)			\
+	__sched_class_highest = .;		\
 	*(__stop_sched_class)			\
-	__end_sched_classes = .;
+	*(__dl_sched_class)			\
+	*(__rt_sched_class)			\
+	*(__fair_sched_class)			\
+	*(__idle_sched_class)			\
+	__sched_class_lowest = .;
 
 /* The actual configuration determine if the init/exit sections
  * are handled as text/data or they can be discarded (which
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 95bac3b094b3..a247f8d9d417 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2193,7 +2193,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 {
 	if (p->sched_class == rq->curr->sched_class)
 		rq->curr->sched_class->check_preempt_curr(rq, p, flags);
-	else if (p->sched_class > rq->curr->sched_class)
+	else if (sched_class_above(p->sched_class, rq->curr->sched_class))
 		resched_curr(rq);
 
 	/*
@@ -5692,7 +5692,7 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	 * higher scheduling class, because otherwise those lose the
 	 * opportunity to pull in more work from other CPUs.
 	 */
-	if (likely(prev->sched_class <= &fair_sched_class &&
+	if (likely(!sched_class_above(prev->sched_class, &fair_sched_class) &&
 		   rq->nr_running == rq->cfs.h_nr_running)) {
 
 		p = pick_next_task_fair(rq, prev, rf);
@@ -9472,11 +9472,11 @@ void __init sched_init(void)
 	int i;
 
 	/* Make sure the linker didn't screw up */
-	BUG_ON(&idle_sched_class + 1 != &fair_sched_class ||
-	       &fair_sched_class + 1 != &rt_sched_class ||
-	       &rt_sched_class + 1   != &dl_sched_class);
+	BUG_ON(&idle_sched_class != &fair_sched_class + 1 ||
+	       &fair_sched_class != &rt_sched_class + 1 ||
+	       &rt_sched_class   != &dl_sched_class + 1);
 #ifdef CONFIG_SMP
-	BUG_ON(&dl_sched_class + 1 != &stop_sched_class);
+	BUG_ON(&dl_sched_class != &stop_sched_class + 1);
 #endif
 
 	wait_bit_init();
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index fe4d1acb7e38..2ce18584dca3 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2177,6 +2177,8 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next)
  *
  *   include/asm-generic/vmlinux.lds.h
  *
+ * *CAREFUL* they are laid out in *REVERSE* order!!!
+ *
  * Also enforce alignment on the instance, not the type, to guarantee layout.
  */
 #define DEFINE_SCHED_CLASS(name) \
@@ -2185,17 +2187,16 @@ const struct sched_class name##_sched_class \
 	__section("__" #name "_sched_class")
 
 /* Defined in include/asm-generic/vmlinux.lds.h */
-extern struct sched_class __begin_sched_classes[];
-extern struct sched_class __end_sched_classes[];
-
-#define sched_class_highest (__end_sched_classes - 1)
-#define sched_class_lowest  (__begin_sched_classes - 1)
+extern struct sched_class __sched_class_highest[];
+extern struct sched_class __sched_class_lowest[];
 
 #define for_class_range(class, _from, _to) \
-	for (class = (_from); class != (_to); class--)
+	for (class = (_from); class < (_to); class++)
 
 #define for_each_class(class) \
-	for_class_range(class, sched_class_highest, sched_class_lowest)
+	for_class_range(class, __sched_class_highest, __sched_class_lowest)
+
+#define sched_class_above(_a, _b)	((_a) < (_b))
 
 extern const struct sched_class stop_sched_class;
 extern const struct sched_class dl_sched_class;
Kees Cook May 17, 2022, 5:35 p.m. UTC | #4
On Tue, May 17, 2022 at 01:46:54PM +0200, Peter Zijlstra wrote:
> On Mon, May 16, 2022 at 08:33:25PM -0700, Kees Cook wrote:
> 
> > I just need to start today over.
> 
> :-)
> 
> Also, even with this sorted, there's a ton array bound things left.
> Please don't just mangle the code until it stops complaining like in the
> previous postings of these patches.

Yeah, other code is much simpler. The sched code has been a bit tricky
to figure out.

> As such, I'm only barely ok with the below patch. Ideally I'd shoot GCC
> in the head. Its *really* tedious you cannot just tell it to shut up
> already.

What you've got below is almost exactly what I had in my first attempt
at this (that I never posted). What I was missing and couldn't track
down were the places you used sched_class_above(). I should have sent
_that_ patch and asked where the comparisons were that I couldn't find.
I think what you've got is much cleaner, as it makes the for loop use
the normal iterator idiom.

Thank you!

Reviewed-by: Kees Cook <keescook@chromium.org>
Peter Zijlstra May 17, 2022, 10:22 p.m. UTC | #5
On Tue, May 17, 2022 at 10:35:44AM -0700, Kees Cook wrote:

> What you've got below is almost exactly what I had in my first attempt
> at this (that I never posted). What I was missing and couldn't track
> down were the places you used sched_class_above(). I should have sent
> _that_ patch and asked where the comparisons were that I couldn't find.
> I think what you've got is much cleaner, as it makes the for loop use
> the normal iterator idiom.

Don't feel too bad; I forgot about the comparison in
check_preempt_curr() myself and cursed a while trying to figure out why
it stopped booting.

Anyway; I suppose I'll go queue the thing in sched/core so we can forget
about all this again.
diff mbox series

Patch

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d58c0389eb23..f2bcc7f15381 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5665,7 +5665,8 @@  static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
 	 * We can terminate the balance pass as soon as we know there is
 	 * a runnable task of @class priority or higher.
 	 */
-	for_class_range(class, prev->sched_class, &idle_sched_class) {
+	for_class_range(class, prev->sched_class,
+			sched_class_higher(&idle_sched_class)) {
 		if (class->balance(rq, prev, rf))
 			break;
 	}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 8dccb34eb190..c757bd26b01a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2193,11 +2193,16 @@  const struct sched_class name##_sched_class \
 extern struct sched_class __begin_sched_classes[];
 extern struct sched_class __end_sched_classes[];
 
-#define sched_class_highest (__end_sched_classes - 1)
-#define sched_class_lowest  (__begin_sched_classes - 1)
+#define sched_class_higher(class) ((class) + 1)
 
+#define sched_class_highest (&__begin_sched_classes[__end_sched_classes     \
+						    - __begin_sched_classes \
+						    - 1])
+#define sched_class_lowest  (&__begin_sched_classes[0])
+
+/* For each class, inclusive from _from down to _to. */
 #define for_class_range(class, _from, _to) \
-	for (class = (_from); class != (_to); class--)
+	for (class = (_from); class >= (_to); class--)
 
 #define for_each_class(class) \
 	for_class_range(class, sched_class_highest, sched_class_lowest)