Message ID | 20170830025547.30347-2-nicolas.pitre@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Nicolas, On Wed, Aug 30, 2017 at 4:55 AM, Nicolas Pitre <nicolas.pitre@linaro.org> wrote: > Let's use optimized routines such as memcpy to copy .data and memzero > to clear .bss in the startup code instead of doing it one word at a > time. Those routines don't use any global data so they're safe to use > even if .data and .bss segments are not initialized. > > In the .data copy case a temporary stack is installed in the .bss area > as the actual kernel stack is located within the copied data area. The > XIP kernel linker script ensures a 8 byte alignment for that purpose. > > Finally, make the .data copy and related pointers surrounded by > CONFIG_XIP_KERNEL to make it obvious what it is all about. This will > allow for further cleanups in the non-XIP linker script. > > Signed-off-by: Nicolas Pitre <nico@linaro.org> > Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> This is now commit 9520b1a1b5f7a348 ("ARM: head-common.S: speed up startup code") in arm/for-next. If CONFIG_DEBUG_LOCK_ALLOC=y, the kernel log is spammed with a few hundred identical messages on various Renesas systems: unwind: Unknown symbol address c0800300 unwind: Index not found c0800300 I've bisected this to the aforementioned commit. c0800300 points to the instruction just after the __memzero call, cfr. below. Do you have a clue? Thanks! > --- a/arch/arm/kernel/head-common.S > +++ b/arch/arm/kernel/head-common.S > @@ -79,47 +79,59 @@ ENDPROC(__vet_atags) > */ > __INIT > __mmap_switched: > - adr r3, __mmap_switched_data > - > - ldmia r3!, {r4, r5, r6, r7} > - cmp r4, r5 @ Copy data segment if needed > -1: cmpne r5, r6 > - ldrne fp, [r4], #4 > - strne fp, [r5], #4 > - bne 1b > - > - mov fp, #0 @ Clear BSS (and zero fp) > -1: cmp r6, r7 > - strcc fp, [r6],#4 > - bcc 1b > - > - ARM( ldmia r3, {r4, r5, r6, r7, sp}) > - THUMB( ldmia r3, {r4, r5, r6, r7} ) > - THUMB( ldr sp, [r3, #16] ) > - str r9, [r4] @ Save processor ID > - str r1, [r5] @ Save machine type > - str r2, [r6] @ Save atags pointer > - cmp r7, #0 > - strne r0, [r7] @ Save control register values > + > + mov r7, r1 > + mov r8, r2 > + mov r10, r0 > + > + adr r4, __mmap_switched_data > + mov fp, #0 > + > +#ifdef CONFIG_XIP_KERNEL > + ARM( ldmia r4!, {r0, r1, r2, sp} ) > + THUMB( ldmia r4!, {r0, r1, r2, r3} ) > + THUMB( mov sp, r3 ) > + sub r2, r2, r1 > + bl memcpy @ copy .data to RAM > +#endif > + > + ARM( ldmia r4!, {r0, r1, sp} ) > + THUMB( ldmia r4!, {r0, r1, r3} ) > + THUMB( mov sp, r3 ) > + sub r1, r1, r0 > + bl __memzero @ clear .bss > + c0800300 is the address of the next instruction: > + ldmia r4, {r0, r1, r2, r3} > + str r9, [r0] @ Save processor ID > + str r7, [r1] @ Save machine type > + str r8, [r2] @ Save atags pointer > + cmp r3, #0 > + strne r10, [r3] @ Save control register values > b start_kernel > ENDPROC(__mmap_switched) > > .align 2 > .type __mmap_switched_data, %object > __mmap_switched_data: > - .long __data_loc @ r4 > - .long _sdata @ r5 > - .long __bss_start @ r6 > - .long _end @ r7 > - .long processor_id @ r4 > - .long __machine_arch_type @ r5 > - .long __atags_pointer @ r6 > +#ifdef CONFIG_XIP_KERNEL > + .long _sdata @ r0 > + .long __data_loc @ r1 > + .long _edata_loc @ r2 > + .long __bss_stop @ sp (temporary stack in .bss) > +#endif > + > + .long __bss_start @ r0 > + .long __bss_stop @ r1 > + .long init_thread_union + THREAD_START_SP @ sp > + > + .long processor_id @ r0 > + .long __machine_arch_type @ r1 > + .long __atags_pointer @ r2 > #ifdef CONFIG_CPU_CP15 > - .long cr_alignment @ r7 > + .long cr_alignment @ r3 > #else > - .long 0 @ r7 > + .long 0 @ r3 > #endif > - .long init_thread_union + THREAD_START_SP @ sp > .size __mmap_switched_data, . - __mmap_switched_data > Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
On 3 October 2017 at 13:41, Geert Uytterhoeven <geert@linux-m68k.org> wrote: > Hi Nicolas, > > On Wed, Aug 30, 2017 at 4:55 AM, Nicolas Pitre <nicolas.pitre@linaro.org> wrote: >> Let's use optimized routines such as memcpy to copy .data and memzero >> to clear .bss in the startup code instead of doing it one word at a >> time. Those routines don't use any global data so they're safe to use >> even if .data and .bss segments are not initialized. >> >> In the .data copy case a temporary stack is installed in the .bss area >> as the actual kernel stack is located within the copied data area. The >> XIP kernel linker script ensures a 8 byte alignment for that purpose. >> >> Finally, make the .data copy and related pointers surrounded by >> CONFIG_XIP_KERNEL to make it obvious what it is all about. This will >> allow for further cleanups in the non-XIP linker script. >> >> Signed-off-by: Nicolas Pitre <nico@linaro.org> >> Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> > > This is now commit 9520b1a1b5f7a348 ("ARM: head-common.S: speed up startup > code") in arm/for-next. > > If CONFIG_DEBUG_LOCK_ALLOC=y, the kernel log is spammed with a few hundred > identical messages on various Renesas systems: > > unwind: Unknown symbol address c0800300 > unwind: Index not found c0800300 > > I've bisected this to the aforementioned commit. > > c0800300 points to the instruction just after the __memzero call, cfr. below. > > Do you have a clue? Thanks! > Hallo Geert, It looks like this patch results in start_kernel() being entered with a different value for lr than before. Could you please try setting it to zero instead, right before the jump to start_kernel() ? I don't think the patch itself is to blame here, it simply triggers an existing issue in the unwinder (if my analysis is correct, of course)
Hoi Ard, On Tue, Oct 3, 2017 at 3:15 PM, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: > On 3 October 2017 at 13:41, Geert Uytterhoeven <geert@linux-m68k.org> wrote: >> On Wed, Aug 30, 2017 at 4:55 AM, Nicolas Pitre <nicolas.pitre@linaro.org> wrote: >>> Let's use optimized routines such as memcpy to copy .data and memzero >>> to clear .bss in the startup code instead of doing it one word at a >>> time. Those routines don't use any global data so they're safe to use >>> even if .data and .bss segments are not initialized. >>> >>> In the .data copy case a temporary stack is installed in the .bss area >>> as the actual kernel stack is located within the copied data area. The >>> XIP kernel linker script ensures a 8 byte alignment for that purpose. >>> >>> Finally, make the .data copy and related pointers surrounded by >>> CONFIG_XIP_KERNEL to make it obvious what it is all about. This will >>> allow for further cleanups in the non-XIP linker script. >>> >>> Signed-off-by: Nicolas Pitre <nico@linaro.org> >>> Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> >> >> This is now commit 9520b1a1b5f7a348 ("ARM: head-common.S: speed up startup >> code") in arm/for-next. >> >> If CONFIG_DEBUG_LOCK_ALLOC=y, the kernel log is spammed with a few hundred >> identical messages on various Renesas systems: >> >> unwind: Unknown symbol address c0800300 >> unwind: Index not found c0800300 >> >> I've bisected this to the aforementioned commit. >> >> c0800300 points to the instruction just after the __memzero call, cfr. below. >> >> Do you have a clue? Thanks! > > It looks like this patch results in start_kernel() being entered with > a different value for lr than before. Could you please try setting it > to zero instead, right before the jump to start_kernel() ? > > I don't think the patch itself is to blame here, it simply triggers an > existing issue in the unwinder (if my analysis is correct, of course) Your analysis looks correct to me, thanks! Patch sent. Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 8733012d23..bf9c4e38ec 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -79,47 +79,59 @@ ENDPROC(__vet_atags) */ __INIT __mmap_switched: - adr r3, __mmap_switched_data - - ldmia r3!, {r4, r5, r6, r7} - cmp r4, r5 @ Copy data segment if needed -1: cmpne r5, r6 - ldrne fp, [r4], #4 - strne fp, [r5], #4 - bne 1b - - mov fp, #0 @ Clear BSS (and zero fp) -1: cmp r6, r7 - strcc fp, [r6],#4 - bcc 1b - - ARM( ldmia r3, {r4, r5, r6, r7, sp}) - THUMB( ldmia r3, {r4, r5, r6, r7} ) - THUMB( ldr sp, [r3, #16] ) - str r9, [r4] @ Save processor ID - str r1, [r5] @ Save machine type - str r2, [r6] @ Save atags pointer - cmp r7, #0 - strne r0, [r7] @ Save control register values + + mov r7, r1 + mov r8, r2 + mov r10, r0 + + adr r4, __mmap_switched_data + mov fp, #0 + +#ifdef CONFIG_XIP_KERNEL + ARM( ldmia r4!, {r0, r1, r2, sp} ) + THUMB( ldmia r4!, {r0, r1, r2, r3} ) + THUMB( mov sp, r3 ) + sub r2, r2, r1 + bl memcpy @ copy .data to RAM +#endif + + ARM( ldmia r4!, {r0, r1, sp} ) + THUMB( ldmia r4!, {r0, r1, r3} ) + THUMB( mov sp, r3 ) + sub r1, r1, r0 + bl __memzero @ clear .bss + + ldmia r4, {r0, r1, r2, r3} + str r9, [r0] @ Save processor ID + str r7, [r1] @ Save machine type + str r8, [r2] @ Save atags pointer + cmp r3, #0 + strne r10, [r3] @ Save control register values b start_kernel ENDPROC(__mmap_switched) .align 2 .type __mmap_switched_data, %object __mmap_switched_data: - .long __data_loc @ r4 - .long _sdata @ r5 - .long __bss_start @ r6 - .long _end @ r7 - .long processor_id @ r4 - .long __machine_arch_type @ r5 - .long __atags_pointer @ r6 +#ifdef CONFIG_XIP_KERNEL + .long _sdata @ r0 + .long __data_loc @ r1 + .long _edata_loc @ r2 + .long __bss_stop @ sp (temporary stack in .bss) +#endif + + .long __bss_start @ r0 + .long __bss_stop @ r1 + .long init_thread_union + THREAD_START_SP @ sp + + .long processor_id @ r0 + .long __machine_arch_type @ r1 + .long __atags_pointer @ r2 #ifdef CONFIG_CPU_CP15 - .long cr_alignment @ r7 + .long cr_alignment @ r3 #else - .long 0 @ r7 + .long 0 @ r3 #endif - .long init_thread_union + THREAD_START_SP @ sp .size __mmap_switched_data, . - __mmap_switched_data /* diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 8265b11621..1598caada3 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -301,7 +301,7 @@ SECTIONS } #endif - BSS_SECTION(0, 0, 0) + BSS_SECTION(0, 0, 8) _end = .; STABS_DEBUG