diff mbox

[v7,12/14] x86: make Xen early boot code relocatable

Message ID 1474667259-27290-13-git-send-email-daniel.kiper@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Daniel Kiper Sept. 23, 2016, 9:47 p.m. UTC
Every multiboot protocol (regardless of version) compatible image must
specify its load address (in ELF or multiboot header). Multiboot protocol
compatible loader have to load image at specified address. However, there
is no guarantee that the requested memory region (in case of Xen it starts
at 2 MiB and ends at ~5 MiB) where image should be loaded initially is a RAM
and it is free (legacy BIOS platforms are merciful for Xen but I found at
least one EFI platform on which Xen load address conflicts with EFI boot
services; it is Dell PowerEdge R820 with latest firmware). To cope with that
problem we must make Xen early boot code relocatable and help boot loader to
relocate image in proper way by suggesting, not requesting specific load
addresses as it is right now, allowed address ranges. This patch does former.
It does not add multiboot2 protocol interface which is done in "x86: add
multiboot2 protocol support for relocatable images" patch.

This patch changes following things:
  - %esi and %r15d registers are used as a storage for Xen image load base
    address (%r15d shortly because %rsi is used for EFI SystemTable address
    in 64-bit code); both registers are (%esi is mostly) unused in early
    boot code and preserved during C functions calls (%esi in 32-bit code
    and %r15d in 64-bit code),
  - %fs is used as base for Xen data relative addressing in 32-bit code
    if it is possible; %esi is used for that thing during error printing
    because it is not always possible to properly and efficiently
    initialize %fs.

Signed-off-by: Daniel Kiper <daniel.kiper@oracle.com>
---
v6 - suggestions/fixes:
   - leave static mapping of first
     16 MiB in l2_identmap as is
     (suggested by Jan Beulich),
   - use xen_phys_start instead of xen_img_load_base_addr
     (suggested by Daniel Kiper and Jan Beulich),
   - simplify BOOT_FS segment descriptor
     base address initialization
     (suggested by Jan Beulich),
   - fix BOOT_FS segment limit
     (suggested by Jan Beulich),
   - do not rename sym_phys in this patch
     (suggested by Jan Beulich),
   - rename esi_offset/fs_offset to
     sym_esi/sym_fs respectively
     (suggested by Jan Beulich),
   - use add instead of lea in assembly
     error printing code
     (suggested by Jan Beulich),
   - improve comments
     (suggested by Jan Beulich),
   - improve commit message
     (suggested by Jan Beulich),
   - various minor cleanups and fixes
     (suggested by Jan Beulich).

v4 - suggestions/fixes:
   - do not relocate Xen image if boot loader did work for us
     (suggested by Andrew Cooper and Jan Beulich),
   - initialize xen_img_load_base_addr in EFI boot code too,
   - properly initialize trampoline_xen_phys_start,
   - calculate Xen image load base address in
     x86_64 code ourselves,
     (suggested by Jan Beulich),
   - change how and when Xen image base address is printed,
   - use %fs instead of %esi for relative addressing
     (suggested by Andrew Cooper and Jan Beulich),
   - create esi_offset and fs_offset() macros in assembly,
   - calculate <final-exec-addr> mkelf32 argument automatically,
   - optimize and cleanup code,
   - improve comments,
   - improve commit message.

v3 - suggestions/fixes:
   - improve segment registers initialization
     (suggested by Jan Beulich),
   - simplify Xen image load base address calculation
     (suggested by Jan Beulich),
   - use %esi and %r15d instead of %ebp to store
     Xen image load base address,
   - use %esi instead of %fs for relative addressing;
     this way we get shorter and simpler code,
   - rename some variables and constants
     (suggested by Jan Beulich),
   - improve comments
     (suggested by Konrad Rzeszutek Wilk),
   - improve commit message
     (suggested by Jan Beulich).
---
 xen/arch/x86/boot/head.S          |  163 +++++++++++++++++++++++++++++--------
 xen/arch/x86/boot/trampoline.S    |    5 ++
 xen/arch/x86/boot/x86_64.S        |   26 +++---
 xen/arch/x86/setup.c              |   14 ++--
 xen/arch/x86/x86_64/asm-offsets.c |    3 +
 xen/include/asm-x86/page.h        |    2 +-
 6 files changed, 157 insertions(+), 56 deletions(-)

Comments

Jan Beulich Sept. 26, 2016, 3:03 p.m. UTC | #1
>>> On 23.09.16 at 23:47, <daniel.kiper@oracle.com> wrote:
> @@ -426,32 +453,65 @@ trampoline_bios_setup:
>          xor     %cl, %cl
>  
>  trampoline_setup:
> +        /*
> +         * Called on legacy BIOS and EFI platforms.
> +         *
> +         * Initialize 0-15 bits of BOOT_FS segment descriptor base address.
> +         */
> +        mov     %si,BOOT_FS+2+sym_esi(trampoline_gdt)
> +
> +        /* Initialize 16-23 bits of BOOT_FS segment descriptor base address. */
> +        mov     %esi,%edx
> +        shr     $16,%edx

I'd have liked it even better if you had done this with a single
instruction, but anyway.

> @@ -474,23 +534,53 @@ trampoline_setup:
>  
>          /* Stash TSC to calculate a good approximation of time-since-boot */
>          rdtsc
> -        mov     %eax,sym_phys(boot_tsc_stamp)
> -        mov     %edx,sym_phys(boot_tsc_stamp+4)
> +        mov     %eax,sym_fs(boot_tsc_stamp)
> +        mov     %edx,sym_fs(boot_tsc_stamp)+4
> +
> +        /*
> +         * Update frame addresses in page tables excluding l2_identmap
> +         * without its first entry which points to l1_identmap.
> +         */
> +        mov     $((__page_tables_end-__page_tables_start)/8),%ecx
> +        mov     $(((l2_identmap-__page_tables_start)/8)+2),%edx

The +2 instead of +1 here is confusing. Why don't you do the natural
thing here and ...

> +1:      cmp     $((l2_identmap+l2_identmap_sizeof-__page_tables_start)/8),%ecx
> +        cmove   %edx,%ecx
> +        je      2f

... simply drop this branch?

> +        testl   $_PAGE_PRESENT,sym_fs(__page_tables_start)-8(,%ecx,8)
> +        jz      2f
> +        add     %esi,sym_fs(__page_tables_start)-8(,%ecx,8)
> +2:      loop    1b
> +
> +        /* Initialize L2 boot-map/direct map page table entries (14MB). */
> +        lea     sym_esi(start),%ebx
> +        lea     (1<<L2_PAGETABLE_SHIFT)*7+(PAGE_HYPERVISOR|_PAGE_PSE)(%ebx),%eax
> +        shr     $(L2_PAGETABLE_SHIFT-3),%ebx
> +        mov     $8,%ecx

The comment saying 14Mb kind of contradicts this being 8 here.

> +1:      mov     %eax,sym_fs(l2_bootmap)-8(%ebx,%ecx,8)
> +        mov     %eax,sym_fs(l2_identmap)-8(%ebx,%ecx,8)
> +        sub     $(1<<L2_PAGETABLE_SHIFT),%eax
> +        loop    1b
> +
> +        /* Initialize L3 boot-map page directory entry. */
> +        lea     __PAGE_HYPERVISOR+(L2_PAGETABLE_ENTRIES*8)*3+sym_esi(l2_bootmap),%eax
> +        mov     $4,%ecx
> +1:      mov     %eax,sym_fs(l3_bootmap)-8(,%ecx,8)
> +        sub     $(L2_PAGETABLE_ENTRIES*8),%eax
> +        loop    1b
>  
>          /*
>           * During boot, hook 4kB mappings of first 2MB of memory into L2.
> -         * This avoids mixing cachability for the legacy VGA region, and is
> -         * corrected when Xen relocates itself.
> +         * This avoids mixing cachability for the legacy VGA region.
>           */
> -        mov     $sym_phys(l1_identmap)+__PAGE_HYPERVISOR,%edi
> -        mov     %edi,sym_phys(l2_xenmap)
> +        lea     __PAGE_HYPERVISOR+sym_esi(l1_identmap),%edi
> +        mov     %edi,sym_fs(l2_bootmap)

Switching from l2_xenmap to l2_bootmap here?

> @@ -121,8 +127,9 @@ GLOBAL(l2_identmap)
>   * page.
>   */
>  GLOBAL(l2_xenmap)
> -        idx = 0
> -        .rept 8
> +        .quad 0
> +        idx = 1
> +        .rept 7
>          .quad sym_phys(__image_base__) + (idx << L2_PAGETABLE_SHIFT) + (PAGE_HYPERVISOR | _PAGE_PSE)
>          idx = idx + 1
>          .endr

How come the first entry doesn't need filling anymore? I think such
less obvious changes should really get briefly mentioned/explained
in the commit message.

> @@ -674,6 +671,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>  
>      printk("Command line: %s\n", cmdline);
>  
> +    printk("Xen image load base address: 0x%08lx\n", xen_phys_start);

Please prefer %#lx in cases like this.

> @@ -1018,6 +1018,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>                  : "memory" );
>  
>              bootstrap_map(NULL);
> +
> +            printk("New Xen image base address: %#08lx\n", xen_phys_start);

# and a minimum width generally don't fit together well.

Jan
Daniel Kiper Sept. 27, 2016, 7:55 p.m. UTC | #2
On Mon, Sep 26, 2016 at 09:03:30AM -0600, Jan Beulich wrote:
> >>> On 23.09.16 at 23:47, <daniel.kiper@oracle.com> wrote:
> > @@ -426,32 +453,65 @@ trampoline_bios_setup:
> >          xor     %cl, %cl
> >
> >  trampoline_setup:
> > +        /*
> > +         * Called on legacy BIOS and EFI platforms.
> > +         *
> > +         * Initialize 0-15 bits of BOOT_FS segment descriptor base address.
> > +         */
> > +        mov     %si,BOOT_FS+2+sym_esi(trampoline_gdt)
> > +
> > +        /* Initialize 16-23 bits of BOOT_FS segment descriptor base address. */
> > +        mov     %esi,%edx
> > +        shr     $16,%edx
>
> I'd have liked it even better if you had done this with a single
> instruction, but anyway.

Do you think about "shld $16,%esi,%edx"?

> > @@ -474,23 +534,53 @@ trampoline_setup:
> >
> >          /* Stash TSC to calculate a good approximation of time-since-boot */
> >          rdtsc
> > -        mov     %eax,sym_phys(boot_tsc_stamp)
> > -        mov     %edx,sym_phys(boot_tsc_stamp+4)
> > +        mov     %eax,sym_fs(boot_tsc_stamp)
> > +        mov     %edx,sym_fs(boot_tsc_stamp)+4
> > +
> > +        /*
> > +         * Update frame addresses in page tables excluding l2_identmap
> > +         * without its first entry which points to l1_identmap.
> > +         */
> > +        mov     $((__page_tables_end-__page_tables_start)/8),%ecx
> > +        mov     $(((l2_identmap-__page_tables_start)/8)+2),%edx
>
> The +2 instead of +1 here is confusing. Why don't you do the natural
> thing here and ...
>
> > +1:      cmp     $((l2_identmap+l2_identmap_sizeof-__page_tables_start)/8),%ecx
> > +        cmove   %edx,%ecx
> > +        je      2f
>
> ... simply drop this branch?

Make sense. I will do that.

> > +        testl   $_PAGE_PRESENT,sym_fs(__page_tables_start)-8(,%ecx,8)
> > +        jz      2f
> > +        add     %esi,sym_fs(__page_tables_start)-8(,%ecx,8)
> > +2:      loop    1b
> > +
> > +        /* Initialize L2 boot-map/direct map page table entries (14MB). */
> > +        lea     sym_esi(start),%ebx
> > +        lea     (1<<L2_PAGETABLE_SHIFT)*7+(PAGE_HYPERVISOR|_PAGE_PSE)(%ebx),%eax
> > +        shr     $(L2_PAGETABLE_SHIFT-3),%ebx
> > +        mov     $8,%ecx
>
> The comment saying 14Mb kind of contradicts this being 8 here.

Ahhh... Right, comment is wrong.

> > +1:      mov     %eax,sym_fs(l2_bootmap)-8(%ebx,%ecx,8)
> > +        mov     %eax,sym_fs(l2_identmap)-8(%ebx,%ecx,8)
> > +        sub     $(1<<L2_PAGETABLE_SHIFT),%eax
> > +        loop    1b
> > +
> > +        /* Initialize L3 boot-map page directory entry. */
> > +        lea     __PAGE_HYPERVISOR+(L2_PAGETABLE_ENTRIES*8)*3+sym_esi(l2_bootmap),%eax
> > +        mov     $4,%ecx
> > +1:      mov     %eax,sym_fs(l3_bootmap)-8(,%ecx,8)
> > +        sub     $(L2_PAGETABLE_ENTRIES*8),%eax
> > +        loop    1b
> >
> >          /*
> >           * During boot, hook 4kB mappings of first 2MB of memory into L2.
> > -         * This avoids mixing cachability for the legacy VGA region, and is
> > -         * corrected when Xen relocates itself.
> > +         * This avoids mixing cachability for the legacy VGA region.
> >           */
> > -        mov     $sym_phys(l1_identmap)+__PAGE_HYPERVISOR,%edi
> > -        mov     %edi,sym_phys(l2_xenmap)
> > +        lea     __PAGE_HYPERVISOR+sym_esi(l1_identmap),%edi
> > +        mov     %edi,sym_fs(l2_bootmap)
>
> Switching from l2_xenmap to l2_bootmap here?

Do we need first 2 MiB mapped in Xen image mapping? It looks that we do not.
Am I missing something?

I must initialize l2_bootmap first entry with l1_identmap here because
I removed relevant static initialization from x86_64.S.

> > @@ -121,8 +127,9 @@ GLOBAL(l2_identmap)
> >   * page.
> >   */
> >  GLOBAL(l2_xenmap)
> > -        idx = 0
> > -        .rept 8
> > +        .quad 0
> > +        idx = 1
> > +        .rept 7
> >          .quad sym_phys(__image_base__) + (idx << L2_PAGETABLE_SHIFT) + (PAGE_HYPERVISOR | _PAGE_PSE)
> >          idx = idx + 1
> >          .endr
>
> How come the first entry doesn't need filling anymore? I think such
> less obvious changes should really get briefly mentioned/explained
> in the commit message.

Ditto. I will add a few words about that to commit message.

> > @@ -674,6 +671,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
> >
> >      printk("Command line: %s\n", cmdline);
> >
> > +    printk("Xen image load base address: 0x%08lx\n", xen_phys_start);
>
> Please prefer %#lx in cases like this.

If I do that then 0 is displayed as 0 instead of 0x00000000. I prefer
latter. If you do not care I can use "%#lx" as you wish.

> > @@ -1018,6 +1018,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
> >                  : "memory" );
> >
> >              bootstrap_map(NULL);
> > +
> > +            printk("New Xen image base address: %#08lx\n", xen_phys_start);
>
> # and a minimum width generally don't fit together well.

Why?

Daniel
Jan Beulich Sept. 28, 2016, 9:06 a.m. UTC | #3
>>> On 27.09.16 at 21:55, <daniel.kiper@oracle.com> wrote:
> On Mon, Sep 26, 2016 at 09:03:30AM -0600, Jan Beulich wrote:
>> >>> On 23.09.16 at 23:47, <daniel.kiper@oracle.com> wrote:
>> > @@ -426,32 +453,65 @@ trampoline_bios_setup:
>> >          xor     %cl, %cl
>> >
>> >  trampoline_setup:
>> > +        /*
>> > +         * Called on legacy BIOS and EFI platforms.
>> > +         *
>> > +         * Initialize 0-15 bits of BOOT_FS segment descriptor base address.
>> > +         */
>> > +        mov     %si,BOOT_FS+2+sym_esi(trampoline_gdt)
>> > +
>> > +        /* Initialize 16-23 bits of BOOT_FS segment descriptor base address. */
>> > +        mov     %esi,%edx
>> > +        shr     $16,%edx
>>
>> I'd have liked it even better if you had done this with a single
>> instruction, but anyway.
> 
> Do you think about "shld $16,%esi,%edx"?

Yes.

>> > +1:      mov     %eax,sym_fs(l2_bootmap)-8(%ebx,%ecx,8)
>> > +        mov     %eax,sym_fs(l2_identmap)-8(%ebx,%ecx,8)
>> > +        sub     $(1<<L2_PAGETABLE_SHIFT),%eax
>> > +        loop    1b
>> > +
>> > +        /* Initialize L3 boot-map page directory entry. */
>> > +        lea     __PAGE_HYPERVISOR+(L2_PAGETABLE_ENTRIES*8)*3+sym_esi(l2_bootmap),%eax
>> > +        mov     $4,%ecx
>> > +1:      mov     %eax,sym_fs(l3_bootmap)-8(,%ecx,8)
>> > +        sub     $(L2_PAGETABLE_ENTRIES*8),%eax
>> > +        loop    1b
>> >
>> >          /*
>> >           * During boot, hook 4kB mappings of first 2MB of memory into L2.
>> > -         * This avoids mixing cachability for the legacy VGA region, and is
>> > -         * corrected when Xen relocates itself.
>> > +         * This avoids mixing cachability for the legacy VGA region.
>> >           */
>> > -        mov     $sym_phys(l1_identmap)+__PAGE_HYPERVISOR,%edi
>> > -        mov     %edi,sym_phys(l2_xenmap)
>> > +        lea     __PAGE_HYPERVISOR+sym_esi(l1_identmap),%edi
>> > +        mov     %edi,sym_fs(l2_bootmap)
>>
>> Switching from l2_xenmap to l2_bootmap here?
> 
> Do we need first 2 MiB mapped in Xen image mapping? It looks that we do not.
> Am I missing something?

My point here isn't that the change is wrong, but that it's not
immediately obvious and hence should be explained in the commit
message. After all the need for the mapping of these 2Mb does not
- aiui - go away with this patch, but (presumably) with the earlier one
moving the load address up to 2Mb. I.e. it can generally be viewed as
an independent adjustment.

>> > @@ -674,6 +671,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>> >
>> >      printk("Command line: %s\n", cmdline);
>> >
>> > +    printk("Xen image load base address: 0x%08lx\n", xen_phys_start);
>>
>> Please prefer %#lx in cases like this.
> 
> If I do that then 0 is displayed as 0 instead of 0x00000000. I prefer
> latter. If you do not care I can use "%#lx" as you wish.

Please do - the base address won't ever be zero anyway, and even if
it was "0" instead of "0x00000000" is quite fine when there are no
other lines to align with..

>> > @@ -1018,6 +1018,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>> >                  : "memory" );
>> >
>> >              bootstrap_map(NULL);
>> > +
>> > +            printk("New Xen image base address: %#08lx\n", xen_phys_start);
>>
>> # and a minimum width generally don't fit together well.
> 
> Why?

Because this will result in e.g. 0x000123 instead of the apparently
expected by you 0x00000123, as the 0x contributes to the field
width.

Jan
Daniel Kiper Sept. 28, 2016, 9:56 a.m. UTC | #4
On Wed, Sep 28, 2016 at 03:06:31AM -0600, Jan Beulich wrote:
> >>> On 27.09.16 at 21:55, <daniel.kiper@oracle.com> wrote:
> > On Mon, Sep 26, 2016 at 09:03:30AM -0600, Jan Beulich wrote:
> >> >>> On 23.09.16 at 23:47, <daniel.kiper@oracle.com> wrote:
> >> > @@ -426,32 +453,65 @@ trampoline_bios_setup:
> >> >          xor     %cl, %cl
> >> >
> >> >  trampoline_setup:
> >> > +        /*
> >> > +         * Called on legacy BIOS and EFI platforms.
> >> > +         *
> >> > +         * Initialize 0-15 bits of BOOT_FS segment descriptor base address.
> >> > +         */
> >> > +        mov     %si,BOOT_FS+2+sym_esi(trampoline_gdt)
> >> > +
> >> > +        /* Initialize 16-23 bits of BOOT_FS segment descriptor base address. */
> >> > +        mov     %esi,%edx
> >> > +        shr     $16,%edx
> >>
> >> I'd have liked it even better if you had done this with a single
> >> instruction, but anyway.
> >
> > Do you think about "shld $16,%esi,%edx"?
>
> Yes.

By the way, my 12 years old daughter told me how to do this...
Daddy, just mix all arguments all together. And later when
I showed her final solution she said: I told you... :-)))

> >> > +1:      mov     %eax,sym_fs(l2_bootmap)-8(%ebx,%ecx,8)
> >> > +        mov     %eax,sym_fs(l2_identmap)-8(%ebx,%ecx,8)
> >> > +        sub     $(1<<L2_PAGETABLE_SHIFT),%eax
> >> > +        loop    1b
> >> > +
> >> > +        /* Initialize L3 boot-map page directory entry. */
> >> > +        lea     __PAGE_HYPERVISOR+(L2_PAGETABLE_ENTRIES*8)*3+sym_esi(l2_bootmap),%eax
> >> > +        mov     $4,%ecx
> >> > +1:      mov     %eax,sym_fs(l3_bootmap)-8(,%ecx,8)
> >> > +        sub     $(L2_PAGETABLE_ENTRIES*8),%eax
> >> > +        loop    1b
> >> >
> >> >          /*
> >> >           * During boot, hook 4kB mappings of first 2MB of memory into L2.
> >> > -         * This avoids mixing cachability for the legacy VGA region, and is
> >> > -         * corrected when Xen relocates itself.
> >> > +         * This avoids mixing cachability for the legacy VGA region.
> >> >           */
> >> > -        mov     $sym_phys(l1_identmap)+__PAGE_HYPERVISOR,%edi
> >> > -        mov     %edi,sym_phys(l2_xenmap)
> >> > +        lea     __PAGE_HYPERVISOR+sym_esi(l1_identmap),%edi
> >> > +        mov     %edi,sym_fs(l2_bootmap)
> >>
> >> Switching from l2_xenmap to l2_bootmap here?
> >
> > Do we need first 2 MiB mapped in Xen image mapping? It looks that we do not.
> > Am I missing something?
>
> My point here isn't that the change is wrong, but that it's not
> immediately obvious and hence should be explained in the commit
> message. After all the need for the mapping of these 2Mb does not
> - aiui - go away with this patch, but (presumably) with the earlier one
> moving the load address up to 2Mb. I.e. it can generally be viewed as
> an independent adjustment.

Probably. Should I move this change to patch #10 (x86: change default load
address from 1 MiB to 2 MiB) or leave it here with relevant comment?
Or do you wish separate patch for it?

> >> > @@ -674,6 +671,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
> >> >
> >> >      printk("Command line: %s\n", cmdline);
> >> >
> >> > +    printk("Xen image load base address: 0x%08lx\n", xen_phys_start);
> >>
> >> Please prefer %#lx in cases like this.
> >
> > If I do that then 0 is displayed as 0 instead of 0x00000000. I prefer
> > latter. If you do not care I can use "%#lx" as you wish.
>
> Please do - the base address won't ever be zero anyway, and even if

Here it can be. It happens if Xen image is loaded by boot loader without
relocation, e.g. via multiboot (v1) protocol.

> it was "0" instead of "0x00000000" is quite fine when there are no
> other lines to align with..
>
> >> > @@ -1018,6 +1018,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
> >> >                  : "memory" );
> >> >
> >> >              bootstrap_map(NULL);
> >> > +
> >> > +            printk("New Xen image base address: %#08lx\n", xen_phys_start);
> >>
> >> # and a minimum width generally don't fit together well.
> >
> > Why?
>
> Because this will result in e.g. 0x000123 instead of the apparently
> expected by you 0x00000123, as the 0x contributes to the field
> width.

Yep, this is a bit confusing.

Daniel
Jan Beulich Sept. 28, 2016, 10:19 a.m. UTC | #5
>>> On 28.09.16 at 11:56, <daniel.kiper@oracle.com> wrote:
> On Wed, Sep 28, 2016 at 03:06:31AM -0600, Jan Beulich wrote:
>> >>> On 27.09.16 at 21:55, <daniel.kiper@oracle.com> wrote:
>> > On Mon, Sep 26, 2016 at 09:03:30AM -0600, Jan Beulich wrote:
>> >> >>> On 23.09.16 at 23:47, <daniel.kiper@oracle.com> wrote:
>> >> > +1:      mov     %eax,sym_fs(l2_bootmap)-8(%ebx,%ecx,8)
>> >> > +        mov     %eax,sym_fs(l2_identmap)-8(%ebx,%ecx,8)
>> >> > +        sub     $(1<<L2_PAGETABLE_SHIFT),%eax
>> >> > +        loop    1b
>> >> > +
>> >> > +        /* Initialize L3 boot-map page directory entry. */
>> >> > +        lea     __PAGE_HYPERVISOR+(L2_PAGETABLE_ENTRIES*8)*3+sym_esi(l2_bootmap),%eax
>> >> > +        mov     $4,%ecx
>> >> > +1:      mov     %eax,sym_fs(l3_bootmap)-8(,%ecx,8)
>> >> > +        sub     $(L2_PAGETABLE_ENTRIES*8),%eax
>> >> > +        loop    1b
>> >> >
>> >> >          /*
>> >> >           * During boot, hook 4kB mappings of first 2MB of memory into L2.
>> >> > -         * This avoids mixing cachability for the legacy VGA region, and is
>> >> > -         * corrected when Xen relocates itself.
>> >> > +         * This avoids mixing cachability for the legacy VGA region.
>> >> >           */
>> >> > -        mov     $sym_phys(l1_identmap)+__PAGE_HYPERVISOR,%edi
>> >> > -        mov     %edi,sym_phys(l2_xenmap)
>> >> > +        lea     __PAGE_HYPERVISOR+sym_esi(l1_identmap),%edi
>> >> > +        mov     %edi,sym_fs(l2_bootmap)
>> >>
>> >> Switching from l2_xenmap to l2_bootmap here?
>> >
>> > Do we need first 2 MiB mapped in Xen image mapping? It looks that we do not.
>> > Am I missing something?
>>
>> My point here isn't that the change is wrong, but that it's not
>> immediately obvious and hence should be explained in the commit
>> message. After all the need for the mapping of these 2Mb does not
>> - aiui - go away with this patch, but (presumably) with the earlier one
>> moving the load address up to 2Mb. I.e. it can generally be viewed as
>> an independent adjustment.
> 
> Probably. Should I move this change to patch #10 (x86: change default load
> address from 1 MiB to 2 MiB) or leave it here with relevant comment?
> Or do you wish separate patch for it?

I think putting it in the other patch would be best, unless it would
end up as non-obvious as it did here, in which case a separate
patch would probably be better.

Jan
diff mbox

Patch

diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
index c301464..9b1fd0e 100644
--- a/xen/arch/x86/boot/head.S
+++ b/xen/arch/x86/boot/head.S
@@ -13,12 +13,15 @@ 
         .code32
 
 #define sym_phys(sym)     ((sym) - __XEN_VIRT_START)
+#define sym_esi(sym)      sym_phys(sym)(%esi)
+#define sym_fs(sym)       %fs:sym_phys(sym)
 
 #define BOOT_CS32        0x0008
 #define BOOT_CS64        0x0010
 #define BOOT_DS          0x0018
 #define BOOT_PSEUDORM_CS 0x0020
 #define BOOT_PSEUDORM_DS 0x0028
+#define BOOT_FS          0x0030
 
 #define MB2_HT(name)      (MULTIBOOT2_HEADER_TAG_##name)
 #define MB2_TT(name)      (MULTIBOOT2_TAG_TYPE_##name)
@@ -105,7 +108,8 @@  multiboot2_header_start:
 
         .word   0
 gdt_boot_descr:
-        .word   6*8-1
+        .word   7*8-1
+gdt_boot_base:
         .long   sym_phys(trampoline_gdt)
         .long   0 /* Needed for 64-bit lgdt */
 
@@ -127,27 +131,27 @@  vga_text_buffer:
         .section .init.text, "ax", @progbits
 
 bad_cpu:
-        mov     $(sym_phys(.Lbad_cpu_msg)),%esi # Error message
+        add     $sym_phys(.Lbad_cpu_msg),%esi   # Error message
         jmp     .Lget_vtb
 not_multiboot:
-        mov     $(sym_phys(.Lbad_ldr_msg)),%esi # Error message
+        add     $sym_phys(.Lbad_ldr_msg),%esi   # Error message
         jmp     .Lget_vtb
 .Lmb2_no_st:
-        mov     $(sym_phys(.Lbad_ldr_nst)),%esi # Error message
+        add     $sym_phys(.Lbad_ldr_nst),%esi   # Error message
         jmp     .Lget_vtb
 .Lmb2_no_ih:
-        mov     $(sym_phys(.Lbad_ldr_nih)),%esi # Error message
+        add     $sym_phys(.Lbad_ldr_nih),%esi   # Error message
         jmp     .Lget_vtb
 .Lmb2_no_bs:
-        mov     $(sym_phys(.Lbad_ldr_nbs)),%esi # Error message
+        add     $sym_phys(.Lbad_ldr_nbs),%esi   # Error message
         xor     %edi,%edi                       # No VGA text buffer
         jmp     .Lsend_chr
 .Lmb2_efi_ia_32:
-        mov     $(sym_phys(.Lbad_efi_msg)),%esi # Error message
+        add     $sym_phys(.Lbad_efi_msg),%esi   # Error message
         xor     %edi,%edi                       # No VGA text buffer
         jmp     .Lsend_chr
 .Lget_vtb:
-        mov     sym_phys(vga_text_buffer),%edi
+        mov     sym_esi(vga_text_buffer),%edi
 .Lsend_chr:
         mov     (%esi),%bl
         test    %bl,%bl        # Terminate on '\0' sentinel
@@ -176,6 +180,9 @@  __efi64_start:
         /* VGA is not available on EFI platforms. */
         movl   $0,vga_text_buffer(%rip)
 
+        /* Load Xen image load base address. */
+        lea     __image_base__(%rip),%r15d
+
         /* Check for Multiboot2 bootloader. */
         cmp     $MULTIBOOT2_BOOTLOADER_MAGIC,%eax
         je      .Lefi_multiboot2_proto
@@ -299,6 +306,9 @@  run_bs:
 
         pop     %rax
 
+        /* Store Xen image load base address in place accessible for 32-bit code. */
+        mov     %r15d,%esi
+
         /* Jump to trampoline_setup after switching CPU to x86_32 mode. */
         lea     trampoline_setup(%rip),%edi
 
@@ -306,9 +316,11 @@  x86_32_switch:
         cli
 
         /* Initialize GDTR. */
+        add     %esi,gdt_boot_base(%rip)
         lgdt    gdt_boot_descr(%rip)
 
         /* Reload code selector. */
+        add     %esi,cs32_switch_addr(%rip)
         ljmpl   *cs32_switch_addr(%rip)
 
         .code32
@@ -338,12 +350,8 @@  __start:
         cld
         cli
 
-        /* Initialise GDT and basic data segments. */
-        lgdt    %cs:sym_phys(gdt_boot_descr)
-        mov     $BOOT_DS,%ecx
-        mov     %ecx,%ds
-        mov     %ecx,%es
-        mov     %ecx,%ss
+        /* Load default Xen image load base address. */
+        mov     $sym_phys(__image_base__),%esi
 
         /* Bootloaders may set multiboot{1,2}.mem_lower to a nonzero value. */
         xor     %edx,%edx
@@ -399,6 +407,25 @@  __start:
         jmp     0b
 
 trampoline_bios_setup:
+        /*
+         * Called on legacy BIOS platforms only.
+         *
+         * Initialize GDTR and basic data segments.
+         */
+        add     %esi,sym_esi(gdt_boot_base)
+        lgdt    sym_esi(gdt_boot_descr)
+
+        mov     $BOOT_DS,%ecx
+        mov     %ecx,%ds
+        mov     %ecx,%es
+        mov     %ecx,%ss
+        /* %esp is initialized later. */
+
+        /* Load null descriptor to unused segment registers. */
+        xor     %ecx,%ecx
+        mov     %ecx,%fs
+        mov     %ecx,%gs
+
         /* Set up trampoline segment 64k below EBDA */
         movzwl  0x40e,%ecx          /* EBDA segment */
         cmp     $0xa000,%ecx        /* sanity check (high) */
@@ -426,32 +453,65 @@  trampoline_bios_setup:
         xor     %cl, %cl
 
 trampoline_setup:
+        /*
+         * Called on legacy BIOS and EFI platforms.
+         *
+         * Initialize 0-15 bits of BOOT_FS segment descriptor base address.
+         */
+        mov     %si,BOOT_FS+2+sym_esi(trampoline_gdt)
+
+        /* Initialize 16-23 bits of BOOT_FS segment descriptor base address. */
+        mov     %esi,%edx
+        shr     $16,%edx
+        mov     %dl,BOOT_FS+4+sym_esi(trampoline_gdt)
+
+        /* Initialize 24-31 bits of BOOT_FS segment descriptor base address. */
+        mov     %dh,BOOT_FS+7+sym_esi(trampoline_gdt)
+
+        /*
+         * Initialize %fs and later use it to access Xen data if possible.
+         * According to Intel 64 and IA-32 Architectures Software Developer's
+         * Manual it is safe to do that without reloading GDTR before.
+         */
+        mov     $BOOT_FS,%edx
+        mov     %edx,%fs
+
         /* Save trampoline address for later use. */
         shl     $4, %ecx
-        mov     %ecx,sym_phys(trampoline_phys)
+        mov     %ecx,sym_fs(trampoline_phys)
+
+        /* Save Xen image load base address for later use. */
+        mov     %esi,sym_fs(xen_phys_start)
+        mov     %esi,sym_fs(trampoline_xen_phys_start)
+
+        /* Setup stack. %ss was initialized earlier. */
+        lea     1024+sym_esi(cpu0_stack),%esp
 
         /* Save the Multiboot info struct (after relocation) for later use. */
-        mov     $sym_phys(cpu0_stack)+1024,%esp
         push    %ecx                /* Boot trampoline address. */
         push    %ebx                /* Multiboot information address. */
         push    %eax                /* Multiboot magic. */
         call    reloc
-        mov     %eax,sym_phys(multiboot_ptr)
+        mov     %eax,sym_fs(multiboot_ptr)
 
         /*
          * Do not zero BSS on EFI platform here.
          * It was initialized earlier.
          */
-        cmpb    $0,sym_phys(skip_realmode)
+        cmpb    $0,sym_fs(skip_realmode)
         jnz     1f
 
         /* Initialize BSS (no nasty surprises!). */
         mov     $sym_phys(__bss_start),%edi
         mov     $sym_phys(__bss_end),%ecx
+        push    %fs
+        pop     %es
         sub     %edi,%ecx
         xor     %eax,%eax
         shr     $2,%ecx
         rep stosl
+        push    %ds
+        pop     %es
 
 1:
         /* Interrogate CPU extended features via CPUID. */
@@ -465,8 +525,8 @@  trampoline_setup:
         jbe     1f
         mov     $0x80000001,%eax
         cpuid
-1:      mov     %edx,sym_phys(cpuid_ext_features)
-        mov     %edx,sym_phys(boot_cpu_data)+CPUINFO_FEATURE_OFFSET(X86_FEATURE_LM)
+1:      mov     %edx,sym_fs(cpuid_ext_features)
+        mov     %edx,sym_fs(boot_cpu_data)+CPUINFO_FEATURE_OFFSET(X86_FEATURE_LM)
 
         /* Check for availability of long mode. */
         bt      $cpufeat_bit(X86_FEATURE_LM),%edx
@@ -474,23 +534,53 @@  trampoline_setup:
 
         /* Stash TSC to calculate a good approximation of time-since-boot */
         rdtsc
-        mov     %eax,sym_phys(boot_tsc_stamp)
-        mov     %edx,sym_phys(boot_tsc_stamp+4)
+        mov     %eax,sym_fs(boot_tsc_stamp)
+        mov     %edx,sym_fs(boot_tsc_stamp)+4
+
+        /*
+         * Update frame addresses in page tables excluding l2_identmap
+         * without its first entry which points to l1_identmap.
+         */
+        mov     $((__page_tables_end-__page_tables_start)/8),%ecx
+        mov     $(((l2_identmap-__page_tables_start)/8)+2),%edx
+1:      cmp     $((l2_identmap+l2_identmap_sizeof-__page_tables_start)/8),%ecx
+        cmove   %edx,%ecx
+        je      2f
+        testl   $_PAGE_PRESENT,sym_fs(__page_tables_start)-8(,%ecx,8)
+        jz      2f
+        add     %esi,sym_fs(__page_tables_start)-8(,%ecx,8)
+2:      loop    1b
+
+        /* Initialize L2 boot-map/direct map page table entries (14MB). */
+        lea     sym_esi(start),%ebx
+        lea     (1<<L2_PAGETABLE_SHIFT)*7+(PAGE_HYPERVISOR|_PAGE_PSE)(%ebx),%eax
+        shr     $(L2_PAGETABLE_SHIFT-3),%ebx
+        mov     $8,%ecx
+1:      mov     %eax,sym_fs(l2_bootmap)-8(%ebx,%ecx,8)
+        mov     %eax,sym_fs(l2_identmap)-8(%ebx,%ecx,8)
+        sub     $(1<<L2_PAGETABLE_SHIFT),%eax
+        loop    1b
+
+        /* Initialize L3 boot-map page directory entry. */
+        lea     __PAGE_HYPERVISOR+(L2_PAGETABLE_ENTRIES*8)*3+sym_esi(l2_bootmap),%eax
+        mov     $4,%ecx
+1:      mov     %eax,sym_fs(l3_bootmap)-8(,%ecx,8)
+        sub     $(L2_PAGETABLE_ENTRIES*8),%eax
+        loop    1b
 
         /*
          * During boot, hook 4kB mappings of first 2MB of memory into L2.
-         * This avoids mixing cachability for the legacy VGA region, and is
-         * corrected when Xen relocates itself.
+         * This avoids mixing cachability for the legacy VGA region.
          */
-        mov     $sym_phys(l1_identmap)+__PAGE_HYPERVISOR,%edi
-        mov     %edi,sym_phys(l2_xenmap)
+        lea     __PAGE_HYPERVISOR+sym_esi(l1_identmap),%edi
+        mov     %edi,sym_fs(l2_bootmap)
 
         /* Apply relocations to bootstrap trampoline. */
-        mov     sym_phys(trampoline_phys),%edx
+        mov     sym_fs(trampoline_phys),%edx
         mov     $sym_phys(__trampoline_rel_start),%edi
 1:
-        mov     (%edi),%eax
-        add     %edx,(%edi,%eax)
+        mov     %fs:(%edi),%eax
+        add     %edx,%fs:(%edi,%eax)
         add     $4,%edi
         cmp     $sym_phys(__trampoline_rel_stop),%edi
         jb      1b
@@ -499,28 +589,29 @@  trampoline_setup:
         shr     $4,%edx
         mov     $sym_phys(__trampoline_seg_start),%edi
 1:
-        mov     (%edi),%eax
-        mov     %dx,(%edi,%eax)
+        mov     %fs:(%edi),%eax
+        mov     %dx,%fs:(%edi,%eax)
         add     $4,%edi
         cmp     $sym_phys(__trampoline_seg_stop),%edi
         jb      1b
 
         /* Do not parse command line on EFI platform here. */
-        cmpb    $0,sym_phys(skip_realmode)
+        cmpb    $0,sym_fs(skip_realmode)
         jnz     1f
 
         /* Bail if there is no command line to parse. */
-        mov     sym_phys(multiboot_ptr),%ebx
+        mov     sym_fs(multiboot_ptr),%ebx
         testl   $MBI_CMDLINE,MB_flags(%ebx)
         jz      1f
 
-        pushl   $sym_phys(early_boot_opts)
+        lea     sym_esi(early_boot_opts),%eax
+        push    %eax
         pushl   MB_cmdline(%ebx)
         call    cmdline_parse_early
 
 1:
         /* Switch to low-memory stack.  */
-        mov     sym_phys(trampoline_phys),%edi
+        mov     sym_fs(trampoline_phys),%edi
         lea     0x10000(%edi),%esp
         lea     trampoline_boot_cpu_entry-trampoline_start(%edi),%eax
         pushl   $BOOT_CS32
@@ -529,7 +620,7 @@  trampoline_setup:
         /* Copy bootstrap trampoline to low memory, below 1MB. */
         mov     $sym_phys(trampoline_start),%esi
         mov     $((trampoline_end - trampoline_start) / 4),%ecx
-        rep movsl
+        rep movsl %fs:(%esi),%es:(%edi)
 
         /* Jump into the relocated trampoline. */
         lret
diff --git a/xen/arch/x86/boot/trampoline.S b/xen/arch/x86/boot/trampoline.S
index 2715d17..64f8efd 100644
--- a/xen/arch/x86/boot/trampoline.S
+++ b/xen/arch/x86/boot/trampoline.S
@@ -54,6 +54,11 @@  trampoline_gdt:
         /* 0x0028: real-mode data @ BOOT_TRAMPOLINE */
         .long   0x0000ffff
         .long   0x00009200
+        /*
+         * 0x0030: ring 0 Xen data, 16 MiB size, base
+         * address is computed during runtime.
+         */
+        .quad   0x00c0920000000fff
 
         .pushsection .trampoline_rel, "a"
         .long   trampoline_gdt + BOOT_PSEUDORM_CS + 2 - .
diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S
index 139b2ca..7392004 100644
--- a/xen/arch/x86/boot/x86_64.S
+++ b/xen/arch/x86/boot/x86_64.S
@@ -81,7 +81,6 @@  GLOBAL(boot_cpu_compat_gdt_table)
         .quad 0x0000910000000000     /* per-CPU entry (limit == cpu)      */
         .align PAGE_SIZE, 0
 
-GLOBAL(__page_tables_start)
 /*
  * Mapping of first 2 megabytes of memory. This is mapped with 4kB mappings
  * to avoid type conflicts with fixed-range MTRRs covering the lowest megabyte
@@ -102,6 +101,13 @@  l1_identmap:
         .size l1_identmap, . - l1_identmap
 
 /*
+ * __page_tables_start does not cover l1_identmap because it (l1_identmap)
+ * contains 1-1 mappings. This means that frame addresses of these mappings
+ * are static and should not be updated during runtime.
+ */
+GLOBAL(__page_tables_start)
+
+/*
  * Space for mapping the first 4GB of memory, with the first 16 megabytes
  * actualy mapped (mostly using superpages).  Uses 4x 4k pages.
  */
@@ -121,8 +127,9 @@  GLOBAL(l2_identmap)
  * page.
  */
 GLOBAL(l2_xenmap)
-        idx = 0
-        .rept 8
+        .quad 0
+        idx = 1
+        .rept 7
         .quad sym_phys(__image_base__) + (idx << L2_PAGETABLE_SHIFT) + (PAGE_HYPERVISOR | _PAGE_PSE)
         idx = idx + 1
         .endr
@@ -185,21 +192,14 @@  GLOBAL(idle_pg_table)
 
 GLOBAL(__page_tables_end)
 
-/* Init pagetables.  Enough page directories to map into the bottom 1GB. */
+/* Init pagetables. Enough page directories to map into 4GB. */
         .section .init.data, "a", @progbits
         .align PAGE_SIZE, 0
 
 GLOBAL(l2_bootmap)
-        .quad sym_phys(l1_identmap) + __PAGE_HYPERVISOR
-        idx = 1
-        .rept 7
-        .quad (idx << L2_PAGETABLE_SHIFT) | __PAGE_HYPERVISOR | _PAGE_PSE
-        idx = idx + 1
-        .endr
-        .fill L2_PAGETABLE_ENTRIES - 8, 8, 0
+        .fill 4 * L2_PAGETABLE_ENTRIES, 8, 0
         .size l2_bootmap, . - l2_bootmap
 
 GLOBAL(l3_bootmap)
-        .quad sym_phys(l2_bootmap) + __PAGE_HYPERVISOR
-        .fill L3_PAGETABLE_ENTRIES - 1, 8, 0
+        .fill L3_PAGETABLE_ENTRIES, 8, 0
         .size l3_bootmap, . - l3_bootmap
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index ca1a97a..e6e05f9 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -286,9 +286,6 @@  static void *__init bootstrap_map(const module_t *mod)
     if ( start >= end )
         return NULL;
 
-    if ( end <= BOOTSTRAP_MAP_BASE )
-        return (void *)(unsigned long)start;
-
     ret = (void *)(map_cur + (unsigned long)(start & mask));
     start &= ~mask;
     end = (end + mask) & ~mask;
@@ -674,6 +671,8 @@  void __init noreturn __start_xen(unsigned long mbi_p)
 
     printk("Command line: %s\n", cmdline);
 
+    printk("Xen image load base address: 0x%08lx\n", xen_phys_start);
+
     printk("Video information:\n");
 
     /* Print VGA display mode information. */
@@ -931,7 +930,7 @@  void __init noreturn __start_xen(unsigned long mbi_p)
                     /* Not present, 1GB mapping, or already relocated? */
                     if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ||
                          (l3e_get_flags(*pl3e) & _PAGE_PSE) ||
-                         (l3e_get_pfn(*pl3e) > 0x1000) )
+                         (l3e_get_pfn(*pl3e) > PFN_DOWN(xen_phys_start)) )
                         continue;
                     *pl3e = l3e_from_intpte(l3e_get_intpte(*pl3e) +
                                             xen_phys_start);
@@ -941,7 +940,7 @@  void __init noreturn __start_xen(unsigned long mbi_p)
                         /* Not present, PSE, or already relocated? */
                         if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ||
                              (l2e_get_flags(*pl2e) & _PAGE_PSE) ||
-                             (l2e_get_pfn(*pl2e) > 0x1000) )
+                             (l2e_get_pfn(*pl2e) > PFN_DOWN(xen_phys_start)) )
                             continue;
                         *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) +
                                                 xen_phys_start);
@@ -964,7 +963,8 @@  void __init noreturn __start_xen(unsigned long mbi_p)
             {
                 unsigned int flags;
 
-                if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
+                if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ||
+                     (l2e_get_pfn(*pl2e) > PFN_DOWN(xen_phys_start)) )
                     continue;
 
                 if ( !using_2M_mapping() )
@@ -1018,6 +1018,8 @@  void __init noreturn __start_xen(unsigned long mbi_p)
                 : "memory" );
 
             bootstrap_map(NULL);
+
+            printk("New Xen image base address: %#08lx\n", xen_phys_start);
         }
 
         /* Is the region suitable for relocating the multiboot modules? */
diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
index 2a22659..5d7a8e5 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -179,5 +179,8 @@  void __dummy__(void)
     OFFSET(MB2_efi64_ih, multiboot2_tag_efi64_ih_t, pointer);
     BLANK();
 
+    DEFINE(l2_identmap_sizeof, sizeof(l2_identmap));
+    BLANK();
+
     OFFSET(DOMAIN_vm_assist, struct domain, vm_assist);
 }
diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h
index af7d3e8..a54fdd1 100644
--- a/xen/include/asm-x86/page.h
+++ b/xen/include/asm-x86/page.h
@@ -288,7 +288,7 @@  extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
 extern l2_pgentry_t  *compat_idle_pg_table_l2;
 extern unsigned int   m2p_compat_vstart;
 extern l2_pgentry_t l2_xenmap[L2_PAGETABLE_ENTRIES],
-    l2_bootmap[L2_PAGETABLE_ENTRIES];
+    l2_bootmap[4*L2_PAGETABLE_ENTRIES];
 extern l3_pgentry_t l3_bootmap[L3_PAGETABLE_ENTRIES];
 extern l2_pgentry_t l2_identmap[4*L2_PAGETABLE_ENTRIES];
 extern l1_pgentry_t l1_fixmap[L1_PAGETABLE_ENTRIES];