diff mbox series

[RFC,2/7] x86/boot: Remove gratuitous call back into low-memory code

Message ID c252e7b1f675f5fb0df9c0c90423fc7fc0bc5736.1556708226.git.dwmw2@infradead.org (mailing list archive)
State Superseded
Headers show
Series Clean up x86_64 boot code | expand

Commit Message

David Woodhouse May 1, 2019, 11:17 a.m. UTC
From: David Woodhouse <dwmw@amazon.co.uk>

We appear to have implemented a memcpy() in the low-memory trampoline
which we then call into from __start_xen(), for no adequately defined
reason.

Kill it with fire.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 xen/arch/x86/boot/mem.S    | 27 +++++----------------------
 xen/arch/x86/setup.c       | 12 ++++++++++++
 xen/include/asm-x86/e820.h |  5 ++---
 3 files changed, 19 insertions(+), 25 deletions(-)

Comments

Andrew Cooper May 1, 2019, 12:25 p.m. UTC | #1
On 01/05/2019 12:17, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
>
> We appear to have implemented a memcpy() in the low-memory trampoline
> which we then call into from __start_xen(), for no adequately defined
> reason.
>
> Kill it with fire.

Absolutely.

> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>

With the following fixup folded:

diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 213a1bd..e607a85 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -673,13 +673,11 @@ static char * __init cmdline_cook(char *p, const char *loader_name)
     return p;
 }
 
-static int copy_bios_e820(struct e820entry *map, unsigned int limit)
+static unsigned int copy_bios_e820(struct e820entry *map, unsigned int limit)
 {
-    unsigned int n = bootsym(bios_e820nr);
-    if (n > limit)
-        n = limit;
+    unsigned int n = min(bootsym(bios_e820nr), limit);
 
-    if (n)
+    if ( n )
         memcpy(map, bootsym(bios_e820map), sizeof(*map) * n);
 
     return n;

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

Interestingly, the version with min() results in surprisingly better code:

add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-21 (-21)
Function                                     old     new   delta
__start_xen                                13807   13786     -21
Total: Before=3363289, After=3363268, chg -0.00%
<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  </head>
  <body text="#000000" bgcolor="#FFFFFF">
    <div class="moz-cite-prefix">On 01/05/2019 12:17, David Woodhouse
      wrote:<br>
    </div>
    <blockquote type="cite"
cite="mid:c252e7b1f675f5fb0df9c0c90423fc7fc0bc5736.1556708226.git.dwmw2@infradead.org">
      <pre class="moz-quote-pre" wrap="">From: David Woodhouse <a class="moz-txt-link-rfc2396E" href="mailto:dwmw@amazon.co.uk">&lt;dwmw@amazon.co.uk&gt;</a>

We appear to have implemented a memcpy() in the low-memory trampoline
which we then call into from __start_xen(), for no adequately defined
reason.

Kill it with fire.</pre>
    </blockquote>
    <br>
    Absolutely.<br>
    <br>
    <blockquote type="cite"
cite="mid:c252e7b1f675f5fb0df9c0c90423fc7fc0bc5736.1556708226.git.dwmw2@infradead.org">
      <pre class="moz-quote-pre" wrap="">Signed-off-by: David Woodhouse <a class="moz-txt-link-rfc2396E" href="mailto:dwmw@amazon.co.uk">&lt;dwmw@amazon.co.uk&gt;</a></pre>
    </blockquote>
    <br>
    With the following fixup folded:<br>
    <br>
    <pre>diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 213a1bd..e607a85 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -673,13 +673,11 @@ static char * __init cmdline_cook(char *p, const char *loader_name)
     return p;
 }
 
-static int copy_bios_e820(struct e820entry *map, unsigned int limit)
+static unsigned int copy_bios_e820(struct e820entry *map, unsigned int limit)
 {
-    unsigned int n = bootsym(bios_e820nr);
-    if (n &gt; limit)
-        n = limit;
+    unsigned int n = min(bootsym(bios_e820nr), limit);
 
-    if (n)
+    if ( n )
         memcpy(map, bootsym(bios_e820map), sizeof(*map) * n);
 
     return n;

</pre>
    Acked-by: Andrew Cooper <a class="moz-txt-link-rfc2396E" href="mailto:andrew.cooper3@citrix.com">&lt;andrew.cooper3@citrix.com&gt;</a><br>
    <br>
    Interestingly, the version with min() results in surprisingly better
    code:<br>
    <br>
    <pre>add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-21 (-21)
Function                                     old     new   delta
__start_xen                                13807   13786     -21
Total: Before=3363289, After=3363268, chg -0.00%</pre>
  </body>
</html>
Jan Beulich May 2, 2019, 8:14 a.m. UTC | #2
>>> On 01.05.19 at 13:17, <dwmw2@infradead.org> wrote:
> We appear to have implemented a memcpy() in the low-memory trampoline
> which we then call into from __start_xen(), for no adequately defined
> reason.

May I suggest that in cases like this you look at the commit
introducing the function? It supplies a reason:

"Add a new raw e820 table for common purpose and copy the BIOS buffer
 to it. Doing the copying in assembly avoids the need to export the
 symbols for the BIOS E820 buffer and number of entries."

I have to admit that I see value in this, as it avoids introduction
of wrong accesses to these variables. Therefore I'd like to ask
for better justification of the change.

Jan
Andrew Cooper May 2, 2019, 9:23 a.m. UTC | #3
On 02/05/2019 09:14, Jan Beulich wrote:
>>>> On 01.05.19 at 13:17, <dwmw2@infradead.org> wrote:
>> We appear to have implemented a memcpy() in the low-memory trampoline
>> which we then call into from __start_xen(), for no adequately defined
>> reason.
> May I suggest that in cases like this you look at the commit
> introducing the function? It supplies a reason:
>
> "Add a new raw e820 table for common purpose and copy the BIOS buffer
>  to it. Doing the copying in assembly avoids the need to export the
>  symbols for the BIOS E820 buffer and number of entries."

I completely agree with David here.  That description is completely
insufficient for justifying why the logic was done that way in the end,
and I would not have accepted the patch in that form at all.

To be clear.  I understand (and agree) why having our main copy of the
e820 table in the trampoline is a bad thing, and moving the main copy
out of the trampoline is fine.

What isn't fine is why, in the adjusted world, we call back into what
appears to be the trampoline, but isn't, to get access to data which the
calling code can already access.  In particular...

> I have to admit that I see value in this, as it avoids introduction
> of wrong accesses to these variables.

...this reasoning is bogus.  We're either accessing the data itself, or
the memcpy function, but there is no possible way to programatically
avoid "wrong" access into the trampoline, because we're still accessing it.

Therefore, I don't understand what possible benefit not exporting the
data has in the first place, and why complicating it with a call to a
function (which isn't actually executing where it appears to live), is
considered a benefit.

~Andrew
David Woodhouse May 2, 2019, 9:42 a.m. UTC | #4
> On 02/05/2019 09:14, Jan Beulich wrote:
>>>>> On 01.05.19 at 13:17, <dwmw2@infradead.org> wrote:
>>> We appear to have implemented a memcpy() in the low-memory trampoline
>>> which we then call into from __start_xen(), for no adequately defined
>>> reason.
>> May I suggest that in cases like this you look at the commit
>> introducing the function? It supplies a reason:
>>
>> "Add a new raw e820 table for common purpose and copy the BIOS buffer
>>  to it. Doing the copying in assembly avoids the need to export the
>>  symbols for the BIOS E820 buffer and number of entries."
>
> I completely agree with David here.  That description is completely
> insufficient for justifying why the logic was done that way in the end,
> and I would not have accepted the patch in that form at all.
>
> To be clear.  I understand (and agree) why having our main copy of the
> e820 table in the trampoline is a bad thing, and moving the main copy
> out of the trampoline is fine.
>
> What isn't fine is why, in the adjusted world, we call back into what
> appears to be the trampoline, but isn't, to get access to data which the
> calling code can already access.  In particular...
>
>> I have to admit that I see value in this, as it avoids introduction
>> of wrong accesses to these variables.
>
> ...this reasoning is bogus.  We're either accessing the data itself, or
> the memcpy function, but there is no possible way to programatically
> avoid "wrong" access into the trampoline, because we're still accessing
> it.
>
> Therefore, I don't understand what possible benefit not exporting the
> data has in the first place, and why complicating it with a call to a
> function (which isn't actually executing where it appears to live), is
> considered a benefit.

Note that after bringing these two gratuitously differently handled
symbols in line with everything else in the boot trampoline, a later patch
in the series puts all this stuff into its own data section which is
copied back up to the Xen image at the end of the real mode phase of boot,
and all the pointer gymnastics for all of them go away completely

--
dwmw2
Jan Beulich May 2, 2019, 10 a.m. UTC | #5
>>> On 02.05.19 at 11:23, <andrew.cooper3@citrix.com> wrote:
> On 02/05/2019 09:14, Jan Beulich wrote:
>>>>> On 01.05.19 at 13:17, <dwmw2@infradead.org> wrote:
>>> We appear to have implemented a memcpy() in the low-memory trampoline
>>> which we then call into from __start_xen(), for no adequately defined
>>> reason.
>> May I suggest that in cases like this you look at the commit
>> introducing the function? It supplies a reason:
>>
>> "Add a new raw e820 table for common purpose and copy the BIOS buffer
>>  to it. Doing the copying in assembly avoids the need to export the
>>  symbols for the BIOS E820 buffer and number of entries."
> 
> I completely agree with David here.  That description is completely
> insufficient for justifying why the logic was done that way in the end,
> and I would not have accepted the patch in that form at all.
> 
> To be clear.  I understand (and agree) why having our main copy of the
> e820 table in the trampoline is a bad thing, and moving the main copy
> out of the trampoline is fine.
> 
> What isn't fine is why, in the adjusted world, we call back into what
> appears to be the trampoline, but isn't, to get access to data which the
> calling code can already access.  In particular...
> 
>> I have to admit that I see value in this, as it avoids introduction
>> of wrong accesses to these variables.
> 
> ...this reasoning is bogus.  We're either accessing the data itself, or
> the memcpy function, but there is no possible way to programatically
> avoid "wrong" access into the trampoline, because we're still accessing it.
> 
> Therefore, I don't understand what possible benefit not exporting the
> data has in the first place, and why complicating it with a call to a
> function (which isn't actually executing where it appears to live), is
> considered a benefit.

Without having gone back to the old thread, I think part of
the motivation to avoid such direct data accesses was that
an initial version of the patch actually broke things by
introducing such accesses.

Apart from that, despite not being a heavy C++ user, I
very much appreciate the language's fundamental desire to
avoid direct accesses to data elements, providing functions
(methods) to do so instead.

Jan
David Woodhouse May 2, 2019, 11:49 a.m. UTC | #6
On Thu, 2019-05-02 at 10:23 +0100, Andrew Cooper wrote:
> ...this reasoning is bogus.  We're either accessing the data itself, or
> the memcpy function, but there is no possible way to programatically
> avoid "wrong" access into the trampoline, because we're still accessing it.

Just to be clear, now I'm back at a real computer: yes there is a way
to avoid "wrong" access into the trampoline from the 64-bit C code.
It's the 6th patch in my series, for which this one is a necessary
preliminary cleanup.

http://git.infradead.org/users/dwmw2/xen.git/commitdiff/f54044fb3ad5d

It rips out all those horrid bootsym() pointer gymnastics in the 64-bit 
code, except for the limited cases where we really do need to put
things in place for the permanently AP/wakeup trampolines.

It puts all these boot-discovered variables into their own section,
then copies them back into the Xen image when the 16-bit boot code has
finished running, to be accessed from there ever after.

Which also paves the way for the no-real-mode boot not actually putting
*anything* into low memory during boot at all.

This allows us to clean up the EFI code to stop having to do that, too.
diff mbox series

Patch

diff --git a/xen/arch/x86/boot/mem.S b/xen/arch/x86/boot/mem.S
index c6a9bd4d3b..2d61d28835 100644
--- a/xen/arch/x86/boot/mem.S
+++ b/xen/arch/x86/boot/mem.S
@@ -7,7 +7,7 @@  get_memory_map:
 
 .Lmeme820:
         xorl    %ebx, %ebx                      # continuation counter
-        movw    $bootsym(e820map), %di             # point into the whitelist
+        movw    $bootsym(bios_e820map), %di     # point into the whitelist
                                                 # so we can have the bios
                                                 # directly write into it.
 
@@ -22,8 +22,8 @@  get_memory_map:
         cmpl    $SMAP,%eax                      # check the return is `SMAP'
         jne     .Lmem88
 
-        incw    bootsym(e820nr)
-        cmpw    $E820_BIOS_MAX,bootsym(e820nr)  # up to this many entries
+        incw    bootsym(bios_e820nr)
+        cmpw    $E820_BIOS_MAX,bootsym(bios_e820nr) # up to this many entries
         jae     .Lmem88
 
         movw    %di,%ax
@@ -66,27 +66,10 @@  get_memory_map:
 
         ret
 
-/*
- * Copy E820 map obtained from BIOS to a buffer allocated by Xen.
- * Input: %rdi: target address of e820 entry array
- *        %esi: maximum number of entries to copy
- * Output: %eax: number of entries copied
- */
-        .code64
-ENTRY(e820map_copy)
-        mov     %esi, %eax
-        lea     e820map(%rip), %rsi
-        mov     e820nr(%rip), %ecx
-        cmp     %ecx, %eax
-        cmova   %ecx, %eax                      # number of entries to move
-        imul    $5, %eax, %ecx
-        rep movsl                               # do the move
-        ret
-
         .align  4
-e820map:
+GLOBAL(bios_e820map)
         .fill   E820_BIOS_MAX*20,1,0
-e820nr:
+GLOBAL(bios_e820nr)
         .long   0
 GLOBAL(lowmem_kb)
         .long   0
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index a353d76f9a..5fa7d3b79c 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -664,6 +664,18 @@  static char * __init cmdline_cook(char *p, const char *loader_name)
     return p;
 }
 
+static int copy_bios_e820(struct e820entry *map, unsigned int limit)
+{
+    unsigned int n = bootsym(bios_e820nr);
+    if (n > limit)
+        n = limit;
+
+    if (n)
+        memcpy(map, bootsym(bios_e820map), sizeof(*map) * n);
+
+    return n;
+}
+
 void __init noreturn __start_xen(unsigned long mbi_p)
 {
     char *memmap_type = NULL;
diff --git a/xen/include/asm-x86/e820.h b/xen/include/asm-x86/e820.h
index ee317b17aa..52916fb75d 100644
--- a/xen/include/asm-x86/e820.h
+++ b/xen/include/asm-x86/e820.h
@@ -37,8 +37,7 @@  extern struct e820map e820_raw;
 
 /* These symbols live in the boot trampoline. */
 extern unsigned int lowmem_kb, highmem_kb;
-unsigned int e820map_copy(struct e820entry *map, unsigned int limit);
-
-#define copy_bios_e820 bootsym(e820map_copy)
+extern struct e820map bios_e820map[];
+extern unsigned int bios_e820nr;
 
 #endif /*__E820_HEADER*/