diff mbox

x86: partially revert use of 2M mappings for hypervisor image

Message ID 56E6E2F202000078000DC1A9@prv-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Beulich March 14, 2016, 3:12 p.m. UTC
As explained by Andrew in
http://lists.xenproject.org/archives/html/xen-devel/2016-03/msg01380.html 
that change makes the uncompressed xen.gz image too large for certain
boot environments. As a result this change makes some of the effects of
commits cf393624ee ("x86: use 2M superpages for text/data/bss
mappings") and 53aa3dde17 ("x86: unilaterally remove .init mappings")
conditional, restoring alternative previous code where necessary. This
is so that xen.efi can still benefit from the new mechanisms, as it is
unaffected by said limitations.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
The first, neater attempt (making the __2M_* symbols weak) failed:
- older gcc doesn't access the weak symbols through .got
- GOTPCREL relocations get treated just like PCREL ones by ld when
  linking xen.efi
x86: partially revert use of 2M mappings for hypervisor image

As explained by Andrew in
http://lists.xenproject.org/archives/html/xen-devel/2016-03/msg01380.html
that change makes the uncompressed xen.gz image too large for certain
boot environments. As a result this change makes some of the effects of
commits cf393624ee ("x86: use 2M superpages for text/data/bss
mappings") and 53aa3dde17 ("x86: unilaterally remove .init mappings")
conditional, restoring alternative previous code where necessary. This
is so that xen.efi can still benefit from the new mechanisms, as it is
unaffected by said limitations.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
The first, neater attempt (making the __2M_* symbols weak) failed:
- older gcc doesn't access the weak symbols through .got
- GOTPCREL relocations get treated just like PCREL ones by ld when
  linking xen.efi

--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -497,6 +497,17 @@ static void __init kexec_reserve_area(st
 #endif
 }
 
+static inline bool_t using_2M_mapping(void)
+{
+    return !l1_table_offset((unsigned long)__2M_text_end) &&
+           !l1_table_offset((unsigned long)__2M_rodata_start) &&
+           !l1_table_offset((unsigned long)__2M_rodata_end) &&
+           !l1_table_offset((unsigned long)__2M_init_start) &&
+           !l1_table_offset((unsigned long)__2M_init_end) &&
+           !l1_table_offset((unsigned long)__2M_rwdata_start) &&
+           !l1_table_offset((unsigned long)__2M_rwdata_end);
+}
+
 static void noinline init_done(void)
 {
     void *va;
@@ -509,10 +520,19 @@ static void noinline init_done(void)
     for ( va = __init_begin; va < _p(__init_end); va += PAGE_SIZE )
         clear_page(va);
 
-    /* Destroy Xen's mappings, and reuse the pages. */
-    destroy_xen_mappings((unsigned long)&__2M_init_start,
-                         (unsigned long)&__2M_init_end);
-    init_xenheap_pages(__pa(__2M_init_start), __pa(__2M_init_end));
+    if ( using_2M_mapping() )
+    {
+        /* Destroy Xen's mappings, and reuse the pages. */
+        destroy_xen_mappings((unsigned long)&__2M_init_start,
+                             (unsigned long)&__2M_init_end);
+        init_xenheap_pages(__pa(__2M_init_start), __pa(__2M_init_end));
+    }
+    else
+    {
+        destroy_xen_mappings((unsigned long)&__init_begin,
+                             (unsigned long)&__init_end);
+        init_xenheap_pages(__pa(__init_begin), __pa(__init_end));
+    }
 
     printk("Freed %ldkB init memory.\n", (long)(__init_end-__init_begin)>>10);
 
@@ -922,6 +942,8 @@ void __init noreturn __start_xen(unsigne
              * Undo the temporary-hooking of the l1_identmap.  __2M_text_start
              * is contained in this PTE.
              */
+            BUG_ON(l2_table_offset((unsigned long)_erodata) ==
+                   l2_table_offset((unsigned long)_stext));
             *pl2e++ = l2e_from_pfn(xen_phys_start >> PAGE_SHIFT,
                                    PAGE_HYPERVISOR_RX | _PAGE_PSE);
             for ( i = 1; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ )
@@ -931,6 +953,13 @@ void __init noreturn __start_xen(unsigne
                 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
                     continue;
 
+                if ( !using_2M_mapping() )
+                {
+                    *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) +
+                                            xen_phys_start);
+                    continue;
+                }
+
                 if ( i < l2_table_offset((unsigned long)&__2M_text_end) )
                 {
                     flags = PAGE_HYPERVISOR_RX | _PAGE_PSE;
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -53,11 +53,14 @@ SECTIONS
        _etext = .;             /* End of text section */
   } :text = 0x9090
 
+#ifdef EFI
   . = ALIGN(MB(2));
+#endif
   __2M_text_end = .;
 
   __2M_rodata_start = .;       /* Start of 2M superpages, mapped RO. */
   .rodata : {
+       _srodata = .;
        /* Bug frames table */
        . = ALIGN(4);
        __start_bug_frames = .;
@@ -79,9 +82,12 @@ SECTIONS
        *(.lockprofile.data)
        __lock_profile_end = .;
 #endif
+       _erodata = .;
   } :text
 
+#ifdef EFI
   . = ALIGN(MB(2));
+#endif
   __2M_rodata_end = .;
 
   __2M_init_start = .;         /* Start of 2M superpages, mapped RWX (boot only). */
@@ -148,7 +154,9 @@ SECTIONS
   . = ALIGN(PAGE_SIZE);
   __init_end = .;
 
+#ifdef EFI
   . = ALIGN(MB(2));
+#endif
   __2M_init_end = .;
 
   __2M_rwdata_start = .;       /* Start of 2M superpages, mapped RW. */
@@ -200,7 +208,9 @@ SECTIONS
   } :text
   _end = . ;
 
+#ifdef EFI
   . = ALIGN(MB(2));
+#endif
   __2M_rwdata_end = .;
 
 #ifdef EFI
@@ -250,6 +260,7 @@ ASSERT(kexec_reloc_size - kexec_reloc <=
 #endif
 
 ASSERT(IS_ALIGNED(__2M_text_start,   MB(2)), "__2M_text_start misaligned")
+#ifdef EFI
 ASSERT(IS_ALIGNED(__2M_text_end,     MB(2)), "__2M_text_end misaligned")
 ASSERT(IS_ALIGNED(__2M_rodata_start, MB(2)), "__2M_rodata_start misaligned")
 ASSERT(IS_ALIGNED(__2M_rodata_end,   MB(2)), "__2M_rodata_end misaligned")
@@ -257,6 +268,7 @@ ASSERT(IS_ALIGNED(__2M_init_start,   MB(
 ASSERT(IS_ALIGNED(__2M_init_end,     MB(2)), "__2M_init_end misaligned")
 ASSERT(IS_ALIGNED(__2M_rwdata_start, MB(2)), "__2M_rwdata_start misaligned")
 ASSERT(IS_ALIGNED(__2M_rwdata_end,   MB(2)), "__2M_rwdata_end misaligned")
+#endif
 
 ASSERT(IS_ALIGNED(cpu0_stack, STACK_SIZE), "cpu0_stack misaligned")

Comments

Andrew Cooper March 14, 2016, 3:23 p.m. UTC | #1
On 14/03/16 15:12, Jan Beulich wrote:
> As explained by Andrew in
> http://lists.xenproject.org/archives/html/xen-devel/2016-03/msg01380.html 
> that change makes the uncompressed xen.gz image too large for certain
> boot environments. As a result this change makes some of the effects of
> commits cf393624ee ("x86: use 2M superpages for text/data/bss
> mappings") and 53aa3dde17 ("x86: unilaterally remove .init mappings")
> conditional, restoring alternative previous code where necessary. This
> is so that xen.efi can still benefit from the new mechanisms, as it is
> unaffected by said limitations.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> The first, neater attempt (making the __2M_* symbols weak) failed:
> - older gcc doesn't access the weak symbols through .got
> - GOTPCREL relocations get treated just like PCREL ones by ld when
>   linking xen.efi
>
> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -497,6 +497,17 @@ static void __init kexec_reserve_area(st
>  #endif
>  }
>  
> +static inline bool_t using_2M_mapping(void)
> +{
> +    return !l1_table_offset((unsigned long)__2M_text_end) &&
> +           !l1_table_offset((unsigned long)__2M_rodata_start) &&
> +           !l1_table_offset((unsigned long)__2M_rodata_end) &&
> +           !l1_table_offset((unsigned long)__2M_init_start) &&
> +           !l1_table_offset((unsigned long)__2M_init_end) &&
> +           !l1_table_offset((unsigned long)__2M_rwdata_start) &&
> +           !l1_table_offset((unsigned long)__2M_rwdata_end);

I would recommend

#ifdef EFI
return 1;
#else
return 0;
#endif

The compiler is unable to collapse that expression into a constant,
because it can only be evaluated at link time.

> +}
> +
>  static void noinline init_done(void)
>  {
>      void *va;
> @@ -509,10 +520,19 @@ static void noinline init_done(void)
>      for ( va = __init_begin; va < _p(__init_end); va += PAGE_SIZE )
>          clear_page(va);
>  
> -    /* Destroy Xen's mappings, and reuse the pages. */
> -    destroy_xen_mappings((unsigned long)&__2M_init_start,
> -                         (unsigned long)&__2M_init_end);
> -    init_xenheap_pages(__pa(__2M_init_start), __pa(__2M_init_end));
> +    if ( using_2M_mapping() )
> +    {
> +        /* Destroy Xen's mappings, and reuse the pages. */

I would be tempted to leave this comment back outside using_2M_mapping()
which reduces the size of this hunk.

> +        destroy_xen_mappings((unsigned long)&__2M_init_start,
> +                             (unsigned long)&__2M_init_end);
> +        init_xenheap_pages(__pa(__2M_init_start), __pa(__2M_init_end));
> +    }
> +    else
> +    {
> +        destroy_xen_mappings((unsigned long)&__init_begin,
> +                             (unsigned long)&__init_end);
> +        init_xenheap_pages(__pa(__init_begin), __pa(__init_end));
> +    }
>  
>      printk("Freed %ldkB init memory.\n", (long)(__init_end-__init_begin)>>10);
>  
> @@ -922,6 +942,8 @@ void __init noreturn __start_xen(unsigne
>               * Undo the temporary-hooking of the l1_identmap.  __2M_text_start
>               * is contained in this PTE.
>               */
> +            BUG_ON(l2_table_offset((unsigned long)_erodata) ==
> +                   l2_table_offset((unsigned long)_stext));

Is this intentional to stay, or the remnants of debugging?

Irrespective of some of these minor tweaks, Reviewed-by: Andrew Cooper
<andrew.cooper3@citrix.com>
Jan Beulich March 14, 2016, 3:41 p.m. UTC | #2
>>> On 14.03.16 at 16:23, <andrew.cooper3@citrix.com> wrote:
> On 14/03/16 15:12, Jan Beulich wrote:
>> --- a/xen/arch/x86/setup.c
>> +++ b/xen/arch/x86/setup.c
>> @@ -497,6 +497,17 @@ static void __init kexec_reserve_area(st
>>  #endif
>>  }
>>  
>> +static inline bool_t using_2M_mapping(void)
>> +{
>> +    return !l1_table_offset((unsigned long)__2M_text_end) &&
>> +           !l1_table_offset((unsigned long)__2M_rodata_start) &&
>> +           !l1_table_offset((unsigned long)__2M_rodata_end) &&
>> +           !l1_table_offset((unsigned long)__2M_init_start) &&
>> +           !l1_table_offset((unsigned long)__2M_init_end) &&
>> +           !l1_table_offset((unsigned long)__2M_rwdata_start) &&
>> +           !l1_table_offset((unsigned long)__2M_rwdata_end);
> 
> I would recommend
> 
> #ifdef EFI
> return 1;
> #else
> return 0;
> #endif
> 
> The compiler is unable to collapse that expression into a constant,
> because it can only be evaluated at link time.

But that wouldn't work, since setup.c gets compiled just once. The
only usable difference in building is the linking stage. (And
performance here is of no issue - this gets executed exactly twice.)

>> @@ -509,10 +520,19 @@ static void noinline init_done(void)
>>      for ( va = __init_begin; va < _p(__init_end); va += PAGE_SIZE )
>>          clear_page(va);
>>  
>> -    /* Destroy Xen's mappings, and reuse the pages. */
>> -    destroy_xen_mappings((unsigned long)&__2M_init_start,
>> -                         (unsigned long)&__2M_init_end);
>> -    init_xenheap_pages(__pa(__2M_init_start), __pa(__2M_init_end));
>> +    if ( using_2M_mapping() )
>> +    {
>> +        /* Destroy Xen's mappings, and reuse the pages. */
> 
> I would be tempted to leave this comment back outside using_2M_mapping()
> which reduces the size of this hunk.

Oh, of course.

>> @@ -922,6 +942,8 @@ void __init noreturn __start_xen(unsigne
>>               * Undo the temporary-hooking of the l1_identmap.  __2M_text_start
>>               * is contained in this PTE.
>>               */
>> +            BUG_ON(l2_table_offset((unsigned long)_erodata) ==
>> +                   l2_table_offset((unsigned long)_stext));
> 
> Is this intentional to stay, or the remnants of debugging?

This is intentional, as it documents the validity of the immediately
following page table write: The change is not undoing the RWX ->
RX change that your original patch did, and using RX past
_erodata would obviously be wrong.

> Irrespective of some of these minor tweaks, Reviewed-by: Andrew Cooper
> <andrew.cooper3@citrix.com>

Thanks.

Jan
Andrew Cooper March 14, 2016, 4:10 p.m. UTC | #3
On 14/03/16 15:41, Jan Beulich wrote:
>>>> On 14.03.16 at 16:23, <andrew.cooper3@citrix.com> wrote:
>> On 14/03/16 15:12, Jan Beulich wrote:
>>> --- a/xen/arch/x86/setup.c
>>> +++ b/xen/arch/x86/setup.c
>>> @@ -497,6 +497,17 @@ static void __init kexec_reserve_area(st
>>>  #endif
>>>  }
>>>  
>>> +static inline bool_t using_2M_mapping(void)
>>> +{
>>> +    return !l1_table_offset((unsigned long)__2M_text_end) &&
>>> +           !l1_table_offset((unsigned long)__2M_rodata_start) &&
>>> +           !l1_table_offset((unsigned long)__2M_rodata_end) &&
>>> +           !l1_table_offset((unsigned long)__2M_init_start) &&
>>> +           !l1_table_offset((unsigned long)__2M_init_end) &&
>>> +           !l1_table_offset((unsigned long)__2M_rwdata_start) &&
>>> +           !l1_table_offset((unsigned long)__2M_rwdata_end);
>> I would recommend
>>
>> #ifdef EFI
>> return 1;
>> #else
>> return 0;
>> #endif
>>
>> The compiler is unable to collapse that expression into a constant,
>> because it can only be evaluated at link time.
> But that wouldn't work, since setup.c gets compiled just once. The
> only usable difference in building is the linking stage. (And
> performance here is of no issue - this gets executed exactly twice.)

Oh yes - that fact keeps on escaping me.  It is only an interim
solution, so is fine to stay.

>>> @@ -922,6 +942,8 @@ void __init noreturn __start_xen(unsigne
>>>               * Undo the temporary-hooking of the l1_identmap.  __2M_text_start
>>>               * is contained in this PTE.
>>>               */
>>> +            BUG_ON(l2_table_offset((unsigned long)_erodata) ==
>>> +                   l2_table_offset((unsigned long)_stext));
>> Is this intentional to stay, or the remnants of debugging?
> This is intentional, as it documents the validity of the immediately
> following page table write: The change is not undoing the RWX ->
> RX change that your original patch did, and using RX past
> _erodata would obviously be wrong.

Ah yes.

~Andrew
diff mbox

Patch

--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -497,6 +497,17 @@  static void __init kexec_reserve_area(st
 #endif
 }
 
+static inline bool_t using_2M_mapping(void)
+{
+    return !l1_table_offset((unsigned long)__2M_text_end) &&
+           !l1_table_offset((unsigned long)__2M_rodata_start) &&
+           !l1_table_offset((unsigned long)__2M_rodata_end) &&
+           !l1_table_offset((unsigned long)__2M_init_start) &&
+           !l1_table_offset((unsigned long)__2M_init_end) &&
+           !l1_table_offset((unsigned long)__2M_rwdata_start) &&
+           !l1_table_offset((unsigned long)__2M_rwdata_end);
+}
+
 static void noinline init_done(void)
 {
     void *va;
@@ -509,10 +520,19 @@  static void noinline init_done(void)
     for ( va = __init_begin; va < _p(__init_end); va += PAGE_SIZE )
         clear_page(va);
 
-    /* Destroy Xen's mappings, and reuse the pages. */
-    destroy_xen_mappings((unsigned long)&__2M_init_start,
-                         (unsigned long)&__2M_init_end);
-    init_xenheap_pages(__pa(__2M_init_start), __pa(__2M_init_end));
+    if ( using_2M_mapping() )
+    {
+        /* Destroy Xen's mappings, and reuse the pages. */
+        destroy_xen_mappings((unsigned long)&__2M_init_start,
+                             (unsigned long)&__2M_init_end);
+        init_xenheap_pages(__pa(__2M_init_start), __pa(__2M_init_end));
+    }
+    else
+    {
+        destroy_xen_mappings((unsigned long)&__init_begin,
+                             (unsigned long)&__init_end);
+        init_xenheap_pages(__pa(__init_begin), __pa(__init_end));
+    }
 
     printk("Freed %ldkB init memory.\n", (long)(__init_end-__init_begin)>>10);
 
@@ -922,6 +942,8 @@  void __init noreturn __start_xen(unsigne
              * Undo the temporary-hooking of the l1_identmap.  __2M_text_start
              * is contained in this PTE.
              */
+            BUG_ON(l2_table_offset((unsigned long)_erodata) ==
+                   l2_table_offset((unsigned long)_stext));
             *pl2e++ = l2e_from_pfn(xen_phys_start >> PAGE_SHIFT,
                                    PAGE_HYPERVISOR_RX | _PAGE_PSE);
             for ( i = 1; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ )
@@ -931,6 +953,13 @@  void __init noreturn __start_xen(unsigne
                 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
                     continue;
 
+                if ( !using_2M_mapping() )
+                {
+                    *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) +
+                                            xen_phys_start);
+                    continue;
+                }
+
                 if ( i < l2_table_offset((unsigned long)&__2M_text_end) )
                 {
                     flags = PAGE_HYPERVISOR_RX | _PAGE_PSE;
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -53,11 +53,14 @@  SECTIONS
        _etext = .;             /* End of text section */
   } :text = 0x9090
 
+#ifdef EFI
   . = ALIGN(MB(2));
+#endif
   __2M_text_end = .;
 
   __2M_rodata_start = .;       /* Start of 2M superpages, mapped RO. */
   .rodata : {
+       _srodata = .;
        /* Bug frames table */
        . = ALIGN(4);
        __start_bug_frames = .;
@@ -79,9 +82,12 @@  SECTIONS
        *(.lockprofile.data)
        __lock_profile_end = .;
 #endif
+       _erodata = .;
   } :text
 
+#ifdef EFI
   . = ALIGN(MB(2));
+#endif
   __2M_rodata_end = .;
 
   __2M_init_start = .;         /* Start of 2M superpages, mapped RWX (boot only). */
@@ -148,7 +154,9 @@  SECTIONS
   . = ALIGN(PAGE_SIZE);
   __init_end = .;
 
+#ifdef EFI
   . = ALIGN(MB(2));
+#endif
   __2M_init_end = .;
 
   __2M_rwdata_start = .;       /* Start of 2M superpages, mapped RW. */
@@ -200,7 +208,9 @@  SECTIONS
   } :text
   _end = . ;
 
+#ifdef EFI
   . = ALIGN(MB(2));
+#endif
   __2M_rwdata_end = .;
 
 #ifdef EFI
@@ -250,6 +260,7 @@  ASSERT(kexec_reloc_size - kexec_reloc <=
 #endif
 
 ASSERT(IS_ALIGNED(__2M_text_start,   MB(2)), "__2M_text_start misaligned")
+#ifdef EFI
 ASSERT(IS_ALIGNED(__2M_text_end,     MB(2)), "__2M_text_end misaligned")
 ASSERT(IS_ALIGNED(__2M_rodata_start, MB(2)), "__2M_rodata_start misaligned")
 ASSERT(IS_ALIGNED(__2M_rodata_end,   MB(2)), "__2M_rodata_end misaligned")
@@ -257,6 +268,7 @@  ASSERT(IS_ALIGNED(__2M_init_start,   MB(
 ASSERT(IS_ALIGNED(__2M_init_end,     MB(2)), "__2M_init_end misaligned")
 ASSERT(IS_ALIGNED(__2M_rwdata_start, MB(2)), "__2M_rwdata_start misaligned")
 ASSERT(IS_ALIGNED(__2M_rwdata_end,   MB(2)), "__2M_rwdata_end misaligned")
+#endif
 
 ASSERT(IS_ALIGNED(cpu0_stack, STACK_SIZE), "cpu0_stack misaligned")