Message ID | 1455818622-30625-4-git-send-email-andrew.cooper3@citrix.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
>>> On 18.02.16 at 19:03, <andrew.cooper3@citrix.com> wrote: > --- a/xen/arch/x86/setup.c > +++ b/xen/arch/x86/setup.c > @@ -921,13 +921,51 @@ void __init noreturn __start_xen(unsigned long mbi_p) > /* The only data mappings to be relocated are in the Xen area. */ > pl2e = __va(__pa(l2_xenmap)); > *pl2e++ = l2e_from_pfn(xen_phys_start >> PAGE_SHIFT, > - PAGE_HYPERVISOR_RWX | _PAGE_PSE); > + PAGE_HYPERVISOR_RX | _PAGE_PSE); > for ( i = 1; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ ) > { > + unsigned int flags; > + > if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) > continue; > - *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) + > - xen_phys_start); > + > + if ( /* > + * Should be: > + * > + * i >= l2_table_offset((unsigned long)&__2M_text_start) && > + * > + * but the EFI build can't manage the relocation. It > + * evaluates to 0, so just use the upper bound. > + */ > + i < l2_table_offset((unsigned long)&__2M_text_end) ) I'll need some more detail about this, not the least because excusing what looks like a hack with EFI, under which we won't ever get here, is suspicious. > + { > + flags = PAGE_HYPERVISOR_RX | _PAGE_PSE; > + } > + else if ( i >= l2_table_offset((unsigned long)&__2M_rodata_start) && > + i < l2_table_offset((unsigned long)&__2M_rodata_end) ) > + { > + flags = PAGE_HYPERVISOR_RO | _PAGE_PSE; > + } > + else if ( (i >= l2_table_offset((unsigned long)&__2M_data_start) && > + i < l2_table_offset((unsigned long)&__2M_data_end)) || > + (i >= l2_table_offset((unsigned long)&__2M_bss_start) && > + i < l2_table_offset((unsigned long)&__2M_bss_end)) ) This is odd - why can't .data and .bss share a (multiple of) 2M region, at once presumably getting the whole image down to 10M again? > --- a/xen/arch/x86/xen.lds.S > +++ b/xen/arch/x86/xen.lds.S > @@ -38,6 +38,9 @@ SECTIONS > . = __XEN_VIRT_START; > __image_base__ = .; > #endif > + > + __2M_text_start = .; /* Start of 2M superpages, mapped RX. */ Is the reason for aforementioned build problem perhaps the fact that this label (and the others too) lives outside of any section? > --- a/xen/include/xen/kernel.h > +++ b/xen/include/xen/kernel.h > @@ -65,6 +65,12 @@ > 1; \ > }) > > +extern unsigned long __2M_text_start[], __2M_text_end[]; > +extern unsigned long __2M_rodata_start[], __2M_rodata_end[]; > +extern unsigned long __2M_data_start[], __2M_data_end[]; > +extern unsigned long __2M_init_start[], __2M_init_end[]; > +extern unsigned long __2M_bss_start[], __2M_bss_end[]; I'd really like to see at least the ones which are reference to r/o sections marked const. Jan
On 19/02/16 14:58, Jan Beulich wrote: >>>> On 18.02.16 at 19:03, <andrew.cooper3@citrix.com> wrote: >> --- a/xen/arch/x86/setup.c >> +++ b/xen/arch/x86/setup.c >> @@ -921,13 +921,51 @@ void __init noreturn __start_xen(unsigned long mbi_p) >> /* The only data mappings to be relocated are in the Xen area. */ >> pl2e = __va(__pa(l2_xenmap)); >> *pl2e++ = l2e_from_pfn(xen_phys_start >> PAGE_SHIFT, >> - PAGE_HYPERVISOR_RWX | _PAGE_PSE); >> + PAGE_HYPERVISOR_RX | _PAGE_PSE); >> for ( i = 1; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ ) >> { >> + unsigned int flags; >> + >> if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) >> continue; >> - *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) + >> - xen_phys_start); >> + >> + if ( /* >> + * Should be: >> + * >> + * i >= l2_table_offset((unsigned long)&__2M_text_start) && >> + * >> + * but the EFI build can't manage the relocation. It >> + * evaluates to 0, so just use the upper bound. >> + */ >> + i < l2_table_offset((unsigned long)&__2M_text_end) ) > I'll need some more detail about this, not the least because > excusing what looks like a hack with EFI, under which we won't > ever get here, is suspicious. Specifically, the EFI uses i386pep and it objects to a 64bit relocation of 0xfffffffffffffffc. I can't explain why this symbol ends up with that relocation. > >> + { >> + flags = PAGE_HYPERVISOR_RX | _PAGE_PSE; >> + } >> + else if ( i >= l2_table_offset((unsigned long)&__2M_rodata_start) && >> + i < l2_table_offset((unsigned long)&__2M_rodata_end) ) >> + { >> + flags = PAGE_HYPERVISOR_RO | _PAGE_PSE; >> + } >> + else if ( (i >= l2_table_offset((unsigned long)&__2M_data_start) && >> + i < l2_table_offset((unsigned long)&__2M_data_end)) || >> + (i >= l2_table_offset((unsigned long)&__2M_bss_start) && >> + i < l2_table_offset((unsigned long)&__2M_bss_end)) ) > This is odd - why can't .data and .bss share a (multiple of) 2M > region, at once presumably getting the whole image down to 10M > again? .init is between .data and .bss, to allow .bss to be the final section and not included in the result of mkelf32 > >> --- a/xen/arch/x86/xen.lds.S >> +++ b/xen/arch/x86/xen.lds.S >> @@ -38,6 +38,9 @@ SECTIONS >> . = __XEN_VIRT_START; >> __image_base__ = .; >> #endif >> + >> + __2M_text_start = .; /* Start of 2M superpages, mapped RX. */ > Is the reason for aforementioned build problem perhaps the fact > that this label (and the others too) lives outside of any section? I am not sure. It is only this symbol which is a problem. All others are fine. I actually intended this to be an RFC patch, to see if anyone had suggestions. > >> --- a/xen/include/xen/kernel.h >> +++ b/xen/include/xen/kernel.h >> @@ -65,6 +65,12 @@ >> 1; \ >> }) >> >> +extern unsigned long __2M_text_start[], __2M_text_end[]; >> +extern unsigned long __2M_rodata_start[], __2M_rodata_end[]; >> +extern unsigned long __2M_data_start[], __2M_data_end[]; >> +extern unsigned long __2M_init_start[], __2M_init_end[]; >> +extern unsigned long __2M_bss_start[], __2M_bss_end[]; > I'd really like to see at least the ones which are reference to r/o > sections marked const. Ok. I should probably also make them char rather than unsigned long, so pointer arithmetic works sensibly for the length. ~Andrew
>>> On 19.02.16 at 16:51, <andrew.cooper3@citrix.com> wrote: > On 19/02/16 14:58, Jan Beulich wrote: >>>>> On 18.02.16 at 19:03, <andrew.cooper3@citrix.com> wrote: >>> --- a/xen/arch/x86/setup.c >>> +++ b/xen/arch/x86/setup.c >>> @@ -921,13 +921,51 @@ void __init noreturn __start_xen(unsigned long mbi_p) >>> /* The only data mappings to be relocated are in the Xen area. */ >>> pl2e = __va(__pa(l2_xenmap)); >>> *pl2e++ = l2e_from_pfn(xen_phys_start >> PAGE_SHIFT, >>> - PAGE_HYPERVISOR_RWX | _PAGE_PSE); >>> + PAGE_HYPERVISOR_RX | _PAGE_PSE); >>> for ( i = 1; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ ) >>> { >>> + unsigned int flags; >>> + >>> if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) >>> continue; >>> - *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) + >>> - xen_phys_start); >>> + >>> + if ( /* >>> + * Should be: >>> + * >>> + * i >= l2_table_offset((unsigned long)&__2M_text_start) && >>> + * >>> + * but the EFI build can't manage the relocation. It >>> + * evaluates to 0, so just use the upper bound. >>> + */ >>> + i < l2_table_offset((unsigned long)&__2M_text_end) ) >> I'll need some more detail about this, not the least because >> excusing what looks like a hack with EFI, under which we won't >> ever get here, is suspicious. > > Specifically, the EFI uses i386pep and it objects to a 64bit relocation > of 0xfffffffffffffffc. > > I can't explain why this symbol ends up with that relocation. Interesting. >>> + { >>> + flags = PAGE_HYPERVISOR_RX | _PAGE_PSE; >>> + } >>> + else if ( i >= l2_table_offset((unsigned long)&__2M_rodata_start) && >>> + i < l2_table_offset((unsigned long)&__2M_rodata_end) ) >>> + { >>> + flags = PAGE_HYPERVISOR_RO | _PAGE_PSE; >>> + } >>> + else if ( (i >= l2_table_offset((unsigned long)&__2M_data_start) && >>> + i < l2_table_offset((unsigned long)&__2M_data_end)) || >>> + (i >= l2_table_offset((unsigned long)&__2M_bss_start) && >>> + i < l2_table_offset((unsigned long)&__2M_bss_end)) ) >> This is odd - why can't .data and .bss share a (multiple of) 2M >> region, at once presumably getting the whole image down to 10M >> again? > > .init is between .data and .bss, to allow .bss to be the final section > and not included in the result of mkelf32 But I don't think it needs to remain there? Should be possible to be put between .text and .rodata, or between .rodata and .data ... >>> --- a/xen/arch/x86/xen.lds.S >>> +++ b/xen/arch/x86/xen.lds.S >>> @@ -38,6 +38,9 @@ SECTIONS >>> . = __XEN_VIRT_START; >>> __image_base__ = .; >>> #endif >>> + >>> + __2M_text_start = .; /* Start of 2M superpages, mapped RX. */ >> Is the reason for aforementioned build problem perhaps the fact >> that this label (and the others too) lives outside of any section? > > I am not sure. It is only this symbol which is a problem. All others > are fine. > > I actually intended this to be an RFC patch, to see if anyone had > suggestions. Since you now imply this to be at the image base, I don't see why using e.g. __XEN_VIRT_START (in its place, or via #define) wouldn't be as good an option. >>> --- a/xen/include/xen/kernel.h >>> +++ b/xen/include/xen/kernel.h >>> @@ -65,6 +65,12 @@ >>> 1; \ >>> }) >>> >>> +extern unsigned long __2M_text_start[], __2M_text_end[]; >>> +extern unsigned long __2M_rodata_start[], __2M_rodata_end[]; >>> +extern unsigned long __2M_data_start[], __2M_data_end[]; >>> +extern unsigned long __2M_init_start[], __2M_init_end[]; >>> +extern unsigned long __2M_bss_start[], __2M_bss_end[]; >> I'd really like to see at least the ones which are reference to r/o >> sections marked const. > > Ok. I should probably also make them char rather than unsigned long, so > pointer arithmetic works sensibly for the length. Good idea indeed. Jan
On 22/02/16 09:55, Jan Beulich wrote: > >>>> + { >>>> + flags = PAGE_HYPERVISOR_RX | _PAGE_PSE; >>>> + } >>>> + else if ( i >= l2_table_offset((unsigned long)&__2M_rodata_start) && >>>> + i < l2_table_offset((unsigned long)&__2M_rodata_end) ) >>>> + { >>>> + flags = PAGE_HYPERVISOR_RO | _PAGE_PSE; >>>> + } >>>> + else if ( (i >= l2_table_offset((unsigned long)&__2M_data_start) && >>>> + i < l2_table_offset((unsigned long)&__2M_data_end)) || >>>> + (i >= l2_table_offset((unsigned long)&__2M_bss_start) && >>>> + i < l2_table_offset((unsigned long)&__2M_bss_end)) ) >>> This is odd - why can't .data and .bss share a (multiple of) 2M >>> region, at once presumably getting the whole image down to 10M >>> again? >> .init is between .data and .bss, to allow .bss to be the final section >> and not included in the result of mkelf32 > But I don't think it needs to remain there? Should be possible to be > put between .text and .rodata, or between .rodata and .data ... Actually yes - shifting .init to between .rodata and .data should work fine. > >>>> --- a/xen/arch/x86/xen.lds.S >>>> +++ b/xen/arch/x86/xen.lds.S >>>> @@ -38,6 +38,9 @@ SECTIONS >>>> . = __XEN_VIRT_START; >>>> __image_base__ = .; >>>> #endif >>>> + >>>> + __2M_text_start = .; /* Start of 2M superpages, mapped RX. */ >>> Is the reason for aforementioned build problem perhaps the fact >>> that this label (and the others too) lives outside of any section? >> I am not sure. It is only this symbol which is a problem. All others >> are fine. >> >> I actually intended this to be an RFC patch, to see if anyone had >> suggestions. > Since you now imply this to be at the image base, I don't see > why using e.g. __XEN_VIRT_START (in its place, or via #define) > wouldn't be as good an option. I don't understand what you mean here. If you are suggesting #define __2M_text_start __XEN_VIRT_START then I don't see how that is going to help with the relocation. ~Andrew
>>> On 22.02.16 at 11:24, <andrew.cooper3@citrix.com> wrote: > On 22/02/16 09:55, Jan Beulich wrote: >>>>> --- a/xen/arch/x86/xen.lds.S >>>>> +++ b/xen/arch/x86/xen.lds.S >>>>> @@ -38,6 +38,9 @@ SECTIONS >>>>> . = __XEN_VIRT_START; >>>>> __image_base__ = .; >>>>> #endif >>>>> + >>>>> + __2M_text_start = .; /* Start of 2M superpages, mapped RX. */ >>>> Is the reason for aforementioned build problem perhaps the fact >>>> that this label (and the others too) lives outside of any section? >>> I am not sure. It is only this symbol which is a problem. All others >>> are fine. >>> >>> I actually intended this to be an RFC patch, to see if anyone had >>> suggestions. >> Since you now imply this to be at the image base, I don't see >> why using e.g. __XEN_VIRT_START (in its place, or via #define) >> wouldn't be as good an option. > > I don't understand what you mean here. > > If you are suggesting #define __2M_text_start __XEN_VIRT_START then I > don't see how that is going to help with the relocation. Why not? __XEN_VIRT_START is a constant, i.e. won't result in any relocations to get emitted. Jan
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index cddf954..c360fbc 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -921,13 +921,51 @@ void __init noreturn __start_xen(unsigned long mbi_p) /* The only data mappings to be relocated are in the Xen area. */ pl2e = __va(__pa(l2_xenmap)); *pl2e++ = l2e_from_pfn(xen_phys_start >> PAGE_SHIFT, - PAGE_HYPERVISOR_RWX | _PAGE_PSE); + PAGE_HYPERVISOR_RX | _PAGE_PSE); for ( i = 1; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ ) { + unsigned int flags; + if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) continue; - *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) + - xen_phys_start); + + if ( /* + * Should be: + * + * i >= l2_table_offset((unsigned long)&__2M_text_start) && + * + * but the EFI build can't manage the relocation. It + * evaluates to 0, so just use the upper bound. + */ + i < l2_table_offset((unsigned long)&__2M_text_end) ) + { + flags = PAGE_HYPERVISOR_RX | _PAGE_PSE; + } + else if ( i >= l2_table_offset((unsigned long)&__2M_rodata_start) && + i < l2_table_offset((unsigned long)&__2M_rodata_end) ) + { + flags = PAGE_HYPERVISOR_RO | _PAGE_PSE; + } + else if ( (i >= l2_table_offset((unsigned long)&__2M_data_start) && + i < l2_table_offset((unsigned long)&__2M_data_end)) || + (i >= l2_table_offset((unsigned long)&__2M_bss_start) && + i < l2_table_offset((unsigned long)&__2M_bss_end)) ) + { + flags = PAGE_HYPERVISOR_RW | _PAGE_PSE; + } + else if ( i >= l2_table_offset((unsigned long)&__2M_init_start) && + i < l2_table_offset((unsigned long)&__2M_init_end) ) + { + flags = PAGE_HYPERVISOR_RWX | _PAGE_PSE; + } + else + { + *pl2e = l2e_empty(); + continue; + } + + *pl2e = l2e_from_paddr( + l2e_get_paddr(*pl2e) + xen_phys_start, flags); } /* Re-sync the stack and then switch to relocated pagetables. */ diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S index 9fde1db..6c23c02 100644 --- a/xen/arch/x86/xen.lds.S +++ b/xen/arch/x86/xen.lds.S @@ -38,6 +38,9 @@ SECTIONS . = __XEN_VIRT_START; __image_base__ = .; #endif + + __2M_text_start = .; /* Start of 2M superpages, mapped RX. */ + . = __XEN_VIRT_START + MB(1); _start = .; .text : { @@ -50,6 +53,10 @@ SECTIONS _etext = .; /* End of text section */ } :text = 0x9090 + . = ALIGN(MB(2)); + __2M_text_end = .; + + __2M_rodata_start = .; /* Start of 2M superpages, mapped RO. */ .rodata : { /* Bug frames table */ . = ALIGN(4); @@ -67,6 +74,10 @@ SECTIONS *(.rodata.*) } :text + . = ALIGN(MB(2)); + __2M_rodata_end = .; + + __2M_data_start = .; /* Start of 2M superpages, mapped RW. */ . = ALIGN(SMP_CACHE_BYTES); .data.read_mostly : { /* Exception table */ @@ -104,6 +115,10 @@ SECTIONS __lock_profile_end = .; #endif + . = ALIGN(MB(2)); + __2M_data_end = .; + + __2M_init_start = .; /* Start of 2M superpages, mapped RWX (boot only). */ . = ALIGN(PAGE_SIZE); /* Init code and data */ __init_begin = .; .init.text : { @@ -166,6 +181,10 @@ SECTIONS . = ALIGN(STACK_SIZE); __init_end = .; + . = ALIGN(MB(2)); + __2M_init_end = .; + + __2M_bss_start = .; /* Start of 2M superpages, mapped RW. */ .bss : { /* BSS */ __bss_start = .; *(.bss.stack_aligned) @@ -183,6 +202,9 @@ SECTIONS } :text _end = . ; + . = ALIGN(MB(2)); + __2M_bss_end = .; + #ifdef EFI . = ALIGN(4); .reloc : { @@ -229,4 +251,15 @@ ASSERT(__image_base__ > XEN_VIRT_START || ASSERT(kexec_reloc_size - kexec_reloc <= PAGE_SIZE, "kexec_reloc is too large") #endif +ASSERT(IS_ALIGNED(__2M_text_start, MB(2)), "__2M_text_start misaligned") +ASSERT(IS_ALIGNED(__2M_text_end, MB(2)), "__2M_text_end misaligned") +ASSERT(IS_ALIGNED(__2M_rodata_start, MB(2)), "__2M_rodata_start misaligned") +ASSERT(IS_ALIGNED(__2M_rodata_end, MB(2)), "__2M_rodata_end misaligned") +ASSERT(IS_ALIGNED(__2M_data_start, MB(2)), "__2M_data_start misaligned") +ASSERT(IS_ALIGNED(__2M_data_end, MB(2)), "__2M_data_end misaligned") +ASSERT(IS_ALIGNED(__2M_init_start, MB(2)), "__2M_init_start misaligned") +ASSERT(IS_ALIGNED(__2M_init_end, MB(2)), "__2M_init_end misaligned") +ASSERT(IS_ALIGNED(__2M_bss_start, MB(2)), "__2M_bss_start misaligned") +ASSERT(IS_ALIGNED(__2M_bss_end, MB(2)), "__2M_bss_end misaligned") + ASSERT(IS_ALIGNED(cpu0_stack, STACK_SIZE), "cpu0_stack misaligned") diff --git a/xen/include/xen/kernel.h b/xen/include/xen/kernel.h index 548b64d..8d08eca 100644 --- a/xen/include/xen/kernel.h +++ b/xen/include/xen/kernel.h @@ -65,6 +65,12 @@ 1; \ }) +extern unsigned long __2M_text_start[], __2M_text_end[]; +extern unsigned long __2M_rodata_start[], __2M_rodata_end[]; +extern unsigned long __2M_data_start[], __2M_data_end[]; +extern unsigned long __2M_init_start[], __2M_init_end[]; +extern unsigned long __2M_bss_start[], __2M_bss_end[]; + extern char _start[], _end[], start[]; #define is_kernel(p) ({ \ char *__p = (char *)(unsigned long)(p); \
This balloons the size of Xen in memory from 3.4MB to 12MB, because of the required alignment adjustments. However * All mappings are 2M superpages. * .text (and .init at boot) are the only sections marked executable. * .text and .rodata are marked read-only. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- CC: Jan Beulich <JBeulich@suse.com> --- xen/arch/x86/setup.c | 44 +++++++++++++++++++++++++++++++++++++++++--- xen/arch/x86/xen.lds.S | 33 +++++++++++++++++++++++++++++++++ xen/include/xen/kernel.h | 6 ++++++ 3 files changed, 80 insertions(+), 3 deletions(-)