diff mbox series

x86/boot: Rationalise stack handling during early boot

Message ID 20200108170040.7728-1-andrew.cooper3@citrix.com (mailing list archive)
State Superseded
Headers show
Series x86/boot: Rationalise stack handling during early boot | expand

Commit Message

Andrew Cooper Jan. 8, 2020, 5 p.m. UTC
The top (numerically higher addresses) of cpu0_stack[] contains the BSP's
cpu_info block.  Logic in Xen expects this to be initialised to 0, but this
area of stack is also used during early boot.

Update the head.S code to avoid using the cpu_info block.  Additionally,
update the stack_start variable to match, which avoids __high_start() and
efi_arch_post_exit_boot() needing to make the adjustment manually.

Finally, leave a big warning by the BIOS BSS initialisation, because it is by
no means obvious that the stack doesn't survive the REP STOS.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Wei Liu <wl@xen.org>
CC: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/x86/boot/head.S    | 10 +++++++---
 xen/arch/x86/boot/x86_64.S  |  3 +--
 xen/arch/x86/efi/efi-boot.h | 13 +++++++------
 xen/arch/x86/smpboot.c      |  2 +-
 4 files changed, 16 insertions(+), 12 deletions(-)

Comments

Jan Beulich Jan. 9, 2020, 10:28 a.m. UTC | #1
On 08.01.2020 18:00, Andrew Cooper wrote:
> --- a/xen/arch/x86/efi/efi-boot.h
> +++ b/xen/arch/x86/efi/efi-boot.h
> @@ -249,23 +249,24 @@ static void __init noreturn efi_arch_post_exit_boot(void)
>                     "or     $"__stringify(X86_CR4_PGE)", %[cr4]\n\t"
>                     "mov    %[cr4], %%cr4\n\t"
>  #endif
> -                   "movabs $__start_xen, %[rip]\n\t"
>                     "lgdt   boot_gdtr(%%rip)\n\t"
> -                   "mov    stack_start(%%rip), %%rsp\n\t"
>                     "mov    %[ds], %%ss\n\t"
>                     "mov    %[ds], %%ds\n\t"
>                     "mov    %[ds], %%es\n\t"
>                     "mov    %[ds], %%fs\n\t"
>                     "mov    %[ds], %%gs\n\t"
> -                   "movl   %[cs], 8(%%rsp)\n\t"
> -                   "mov    %[rip], (%%rsp)\n\t"
> -                   "lretq  %[stkoff]-16"
> +
> +                   /* Jump to higher mappings. */
> +                   "mov    stack_start(%%rip), %%rsp\n\t"
> +                   "movabs $__start_xen, %[rip]\n\t"
> +                   "push   %[cs]\n\t"

Either you need %q[cs] here (assuming q gets ignored for
immediate operands, which I didn't check) or ...

> +                   "push   %[rip]\n\t"
> +                   "lretq"
>                     : [rip] "=&r" (efer/* any dead 64-bit variable */),
>                       [cr4] "+&r" (cr4)
>                     : [cr3] "r" (idle_pg_table),
>                       [cs] "ir" (__HYPERVISOR_CS),

... you need to use just "i" here, for there not being 32-bit
PUSH forms.

> --- a/xen/arch/x86/smpboot.c
> +++ b/xen/arch/x86/smpboot.c
> @@ -554,7 +554,7 @@ static int do_boot_cpu(int apicid, int cpu)
>          printk("Booting processor %d/%d eip %lx\n",
>                 cpu, apicid, start_eip);
>  
> -    stack_start = stack_base[cpu];
> +    stack_start = stack_base[cpu] + STACK_SIZE - sizeof(struct cpu_info);
>  
>      /* This grunge runs the startup process for the targeted processor. */

Further down smp_prepare_cpus() has

    stack_base[0] = stack_start;

which I think you need to also adjust (or replace, e.g. by giving
stack_base[] an initializer).

Jan
Andrew Cooper Jan. 9, 2020, 10:43 a.m. UTC | #2
On 09/01/2020 10:28, Jan Beulich wrote:
> On 08.01.2020 18:00, Andrew Cooper wrote:
>> --- a/xen/arch/x86/efi/efi-boot.h
>> +++ b/xen/arch/x86/efi/efi-boot.h
>> @@ -249,23 +249,24 @@ static void __init noreturn efi_arch_post_exit_boot(void)
>>                     "or     $"__stringify(X86_CR4_PGE)", %[cr4]\n\t"
>>                     "mov    %[cr4], %%cr4\n\t"
>>  #endif
>> -                   "movabs $__start_xen, %[rip]\n\t"
>>                     "lgdt   boot_gdtr(%%rip)\n\t"
>> -                   "mov    stack_start(%%rip), %%rsp\n\t"
>>                     "mov    %[ds], %%ss\n\t"
>>                     "mov    %[ds], %%ds\n\t"
>>                     "mov    %[ds], %%es\n\t"
>>                     "mov    %[ds], %%fs\n\t"
>>                     "mov    %[ds], %%gs\n\t"
>> -                   "movl   %[cs], 8(%%rsp)\n\t"
>> -                   "mov    %[rip], (%%rsp)\n\t"
>> -                   "lretq  %[stkoff]-16"
>> +
>> +                   /* Jump to higher mappings. */
>> +                   "mov    stack_start(%%rip), %%rsp\n\t"
>> +                   "movabs $__start_xen, %[rip]\n\t"
>> +                   "push   %[cs]\n\t"
> Either you need %q[cs] here (assuming q gets ignored for
> immediate operands, which I didn't check) or ...
>
>> +                   "push   %[rip]\n\t"
>> +                   "lretq"
>>                     : [rip] "=&r" (efer/* any dead 64-bit variable */),
>>                       [cr4] "+&r" (cr4)
>>                     : [cr3] "r" (idle_pg_table),
>>                       [cs] "ir" (__HYPERVISOR_CS),
> ... you need to use just "i" here, for there not being 32-bit
> PUSH forms.

Lets just go with i.

"m" is also an option, and clang will probably find some way of pulling
it out of the stringtable, but anything other than an immediate encoding
here is going to be silly.

>
>> --- a/xen/arch/x86/smpboot.c
>> +++ b/xen/arch/x86/smpboot.c
>> @@ -554,7 +554,7 @@ static int do_boot_cpu(int apicid, int cpu)
>>          printk("Booting processor %d/%d eip %lx\n",
>>                 cpu, apicid, start_eip);
>>  
>> -    stack_start = stack_base[cpu];
>> +    stack_start = stack_base[cpu] + STACK_SIZE - sizeof(struct cpu_info);
>>  
>>      /* This grunge runs the startup process for the targeted processor. */
> Further down smp_prepare_cpus() has
>
>     stack_base[0] = stack_start;
>
> which I think you need to also adjust (or replace, e.g. by giving
> stack_base[] an initializer).

In practice, this variable is never used because we never offline the BSP.

However, the code as-is is correct.  The value in stack_start has
changed in this patch, but is still the correct value for the BSP.

It could also be made into an initialiser, but that would cause
stack_base[] to move from BSS into data, and it is a NR_CPUS sized
datastructure.

~Andrew
Jan Beulich Jan. 9, 2020, 10:52 a.m. UTC | #3
On 09.01.2020 11:43, Andrew Cooper wrote:
> On 09/01/2020 10:28, Jan Beulich wrote:
>> On 08.01.2020 18:00, Andrew Cooper wrote:
>>> --- a/xen/arch/x86/efi/efi-boot.h
>>> +++ b/xen/arch/x86/efi/efi-boot.h
>>> @@ -249,23 +249,24 @@ static void __init noreturn efi_arch_post_exit_boot(void)
>>>                     "or     $"__stringify(X86_CR4_PGE)", %[cr4]\n\t"
>>>                     "mov    %[cr4], %%cr4\n\t"
>>>  #endif
>>> -                   "movabs $__start_xen, %[rip]\n\t"
>>>                     "lgdt   boot_gdtr(%%rip)\n\t"
>>> -                   "mov    stack_start(%%rip), %%rsp\n\t"
>>>                     "mov    %[ds], %%ss\n\t"
>>>                     "mov    %[ds], %%ds\n\t"
>>>                     "mov    %[ds], %%es\n\t"
>>>                     "mov    %[ds], %%fs\n\t"
>>>                     "mov    %[ds], %%gs\n\t"
>>> -                   "movl   %[cs], 8(%%rsp)\n\t"
>>> -                   "mov    %[rip], (%%rsp)\n\t"
>>> -                   "lretq  %[stkoff]-16"
>>> +
>>> +                   /* Jump to higher mappings. */
>>> +                   "mov    stack_start(%%rip), %%rsp\n\t"
>>> +                   "movabs $__start_xen, %[rip]\n\t"
>>> +                   "push   %[cs]\n\t"
>> Either you need %q[cs] here (assuming q gets ignored for
>> immediate operands, which I didn't check) or ...
>>
>>> +                   "push   %[rip]\n\t"
>>> +                   "lretq"
>>>                     : [rip] "=&r" (efer/* any dead 64-bit variable */),
>>>                       [cr4] "+&r" (cr4)
>>>                     : [cr3] "r" (idle_pg_table),
>>>                       [cs] "ir" (__HYPERVISOR_CS),
>> ... you need to use just "i" here, for there not being 32-bit
>> PUSH forms.
> 
> Lets just go with i.
> 
> "m" is also an option, and clang will probably find some way of pulling
> it out of the stringtable, but anything other than an immediate encoding
> here is going to be silly.

No, sadly "m" is not an option when the expression is a constant:
Gcc at least is unable to materialize a memory variable in this
case, and will give some funny error message instead.

>>> --- a/xen/arch/x86/smpboot.c
>>> +++ b/xen/arch/x86/smpboot.c
>>> @@ -554,7 +554,7 @@ static int do_boot_cpu(int apicid, int cpu)
>>>          printk("Booting processor %d/%d eip %lx\n",
>>>                 cpu, apicid, start_eip);
>>>  
>>> -    stack_start = stack_base[cpu];
>>> +    stack_start = stack_base[cpu] + STACK_SIZE - sizeof(struct cpu_info);
>>>  
>>>      /* This grunge runs the startup process for the targeted processor. */
>> Further down smp_prepare_cpus() has
>>
>>     stack_base[0] = stack_start;
>>
>> which I think you need to also adjust (or replace, e.g. by giving
>> stack_base[] an initializer).
> 
> In practice, this variable is never used because we never offline the BSP.

traps.c uses stack_base[], for example, and hence it needs to be
correct also for the BSP. Even more important is perhaps
get_cpu_current()'s use.

> However, the code as-is is correct.  The value in stack_start has
> changed in this patch, but is still the correct value for the BSP.

But it's no longer the stack base (which is what stack_base[] holds
for all other CPUs), or else you wouldn't have had a need to change
do_boot_cpu().

> It could also be made into an initialiser, but that would cause
> stack_base[] to move from BSS into data, and it is a NR_CPUS sized
> datastructure.

I do realize this.

Jan
Andrew Cooper Jan. 9, 2020, 10:55 a.m. UTC | #4
On 09/01/2020 10:52, Jan Beulich wrote:
>>>> --- a/xen/arch/x86/smpboot.c
>>>> +++ b/xen/arch/x86/smpboot.c
>>>> @@ -554,7 +554,7 @@ static int do_boot_cpu(int apicid, int cpu)
>>>>          printk("Booting processor %d/%d eip %lx\n",
>>>>                 cpu, apicid, start_eip);
>>>>  
>>>> -    stack_start = stack_base[cpu];
>>>> +    stack_start = stack_base[cpu] + STACK_SIZE - sizeof(struct cpu_info);
>>>>  
>>>>      /* This grunge runs the startup process for the targeted processor. */
>>> Further down smp_prepare_cpus() has
>>>
>>>     stack_base[0] = stack_start;
>>>
>>> which I think you need to also adjust (or replace, e.g. by giving
>>> stack_base[] an initializer).
>> In practice, this variable is never used because we never offline the BSP.
> traps.c uses stack_base[], for example, and hence it needs to be
> correct also for the BSP. Even more important is perhaps
> get_cpu_current()'s use.
>
>> However, the code as-is is correct.  The value in stack_start has
>> changed in this patch, but is still the correct value for the BSP.
> But it's no longer the stack base (which is what stack_base[] holds
> for all other CPUs), or else you wouldn't have had a need to change
> do_boot_cpu().

Oh - of course.  I'm being dense.

To avoid a move into data, lets just go with & ~(STACK_SIZE - 1) here.

~Andrew
diff mbox series

Patch

diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
index 8d0ffbd1b0..2382b61dd4 100644
--- a/xen/arch/x86/boot/head.S
+++ b/xen/arch/x86/boot/head.S
@@ -400,7 +400,7 @@  __pvh_start:
         sub     $sym_offs(1b), %esi
 
         /* Set up stack. */
-        lea     STACK_SIZE + sym_esi(cpu0_stack), %esp
+        lea     STACK_SIZE - CPUINFO_sizeof + sym_esi(cpu0_stack), %esp
 
         mov     %ebx, sym_esi(pvh_start_info_pa)
 
@@ -447,7 +447,7 @@  __start:
         sub     $sym_offs(1b), %esi
 
         /* Set up stack. */
-        lea     STACK_SIZE + sym_esi(cpu0_stack), %esp
+        lea     STACK_SIZE - CPUINFO_sizeof + sym_esi(cpu0_stack), %esp
 
         /* Bootloaders may set multiboot{1,2}.mem_lower to a nonzero value. */
         xor     %edx,%edx
@@ -616,7 +616,11 @@  trampoline_setup:
         cmpb    $0,sym_fs(efi_platform)
         jnz     1f
 
-        /* Initialize BSS (no nasty surprises!). */
+        /*
+         * Initialise the BSS.
+         *
+         * !!! WARNING - also zeroes the current stack !!!
+         */
         mov     $sym_offs(__bss_start),%edi
         mov     $sym_offs(__bss_end),%ecx
         push    %fs
diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S
index b54d3aceea..0acf5e860c 100644
--- a/xen/arch/x86/boot/x86_64.S
+++ b/xen/arch/x86/boot/x86_64.S
@@ -16,7 +16,6 @@  ENTRY(__high_start)
         mov     %rcx,%cr4
 
         mov     stack_start(%rip),%rsp
-        or      $(STACK_SIZE-CPUINFO_sizeof),%rsp
 
         /* Reset EFLAGS (subsumes CLI and CLD). */
         pushq   $0
@@ -42,7 +41,7 @@  multiboot_ptr:
         .long   0
 
 GLOBAL(stack_start)
-        .quad   cpu0_stack
+        .quad   cpu0_stack + STACK_SIZE - CPUINFO_sizeof
 
         .section .data.page_aligned, "aw", @progbits
         .align PAGE_SIZE, 0
diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
index 676d616ff8..8debdc7ca8 100644
--- a/xen/arch/x86/efi/efi-boot.h
+++ b/xen/arch/x86/efi/efi-boot.h
@@ -249,23 +249,24 @@  static void __init noreturn efi_arch_post_exit_boot(void)
                    "or     $"__stringify(X86_CR4_PGE)", %[cr4]\n\t"
                    "mov    %[cr4], %%cr4\n\t"
 #endif
-                   "movabs $__start_xen, %[rip]\n\t"
                    "lgdt   boot_gdtr(%%rip)\n\t"
-                   "mov    stack_start(%%rip), %%rsp\n\t"
                    "mov    %[ds], %%ss\n\t"
                    "mov    %[ds], %%ds\n\t"
                    "mov    %[ds], %%es\n\t"
                    "mov    %[ds], %%fs\n\t"
                    "mov    %[ds], %%gs\n\t"
-                   "movl   %[cs], 8(%%rsp)\n\t"
-                   "mov    %[rip], (%%rsp)\n\t"
-                   "lretq  %[stkoff]-16"
+
+                   /* Jump to higher mappings. */
+                   "mov    stack_start(%%rip), %%rsp\n\t"
+                   "movabs $__start_xen, %[rip]\n\t"
+                   "push   %[cs]\n\t"
+                   "push   %[rip]\n\t"
+                   "lretq"
                    : [rip] "=&r" (efer/* any dead 64-bit variable */),
                      [cr4] "+&r" (cr4)
                    : [cr3] "r" (idle_pg_table),
                      [cs] "ir" (__HYPERVISOR_CS),
                      [ds] "r" (__HYPERVISOR_DS),
-                     [stkoff] "i" (STACK_SIZE - sizeof(struct cpu_info)),
                      "D" (&mbi)
                    : "memory" );
     unreachable();
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 7e29704080..0d0526e2b2 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -554,7 +554,7 @@  static int do_boot_cpu(int apicid, int cpu)
         printk("Booting processor %d/%d eip %lx\n",
                cpu, apicid, start_eip);
 
-    stack_start = stack_base[cpu];
+    stack_start = stack_base[cpu] + STACK_SIZE - sizeof(struct cpu_info);
 
     /* This grunge runs the startup process for the targeted processor. */