Message ID | 1466012148-7674-3-git-send-email-james.morse@arm.com (mailing list archive) |
---|---|
State | Not Applicable, archived |
Headers | show |
On Wed, Jun 15, 2016 at 06:35:44PM +0100, James Morse wrote: > Resume from hibernate needs to clean any text executed by the kernel with > the MMU off to the PoC. Collect these functions together into a new > .mmuoff.text section. > > This covers booting of secondary cores and the cpu_suspend() path used > by cpu-idle and suspend-to-ram. > > The bulk of head.S is not included, as the primary boot code is only ever > executed once, the kernel never needs to ensure it is cleaned to a > particular point in the cache. > > Suggested-by: Mark Rutland <mark.rutland@arm.com> > Signed-off-by: James Morse <james.morse@arm.com> > Cc: Mark Rutland <mark.rutland@arm.com> > --- > This patch is new since v1. > > arch/arm64/include/asm/sections.h | 1 + > arch/arm64/kernel/head.S | 6 ++++-- > arch/arm64/kernel/sleep.S | 2 ++ > arch/arm64/kernel/vmlinux.lds.S | 3 +++ > arch/arm64/mm/proc.S | 4 ++++ > 5 files changed, 14 insertions(+), 2 deletions(-) > > diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h > index cb68eb348566..6f27c3f86a09 100644 > --- a/arch/arm64/include/asm/sections.h > +++ b/arch/arm64/include/asm/sections.h > @@ -24,5 +24,6 @@ extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; > extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; > extern char __hyp_text_start[], __hyp_text_end[]; > extern char __idmap_text_start[], __idmap_text_end[]; > +extern char __mmuoff_text_start[], __mmuoff_text_end[]; > > #endif /* __ASM_SECTIONS_H */ > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S > index 2c6e598a94dc..ff37231e2054 100644 > --- a/arch/arm64/kernel/head.S > +++ b/arch/arm64/kernel/head.S > @@ -477,6 +477,7 @@ ENTRY(kimage_vaddr) > * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if > * booted in EL1 or EL2 respectively. > */ > + .pushsection ".mmuoff.text", "ax" > ENTRY(el2_setup) > mrs x0, CurrentEL > cmp x0, #CurrentEL_EL2 > @@ -604,7 +605,7 @@ install_el2_stub: > mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 > eret > ENDPROC(el2_setup) > - > + .popsection > /* > * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed > * in x20. See arch/arm64/include/asm/virt.h for more info. > @@ -656,6 +657,7 @@ ENDPROC(secondary_holding_pen) > * Secondary entry point that jumps straight into the kernel. Only to > * be used where CPUs are brought online dynamically by the kernel. > */ > + .pushsection ".mmuoff.text", "ax" > ENTRY(secondary_entry) > bl el2_setup // Drop to EL1 > bl set_cpu_boot_mode_flag > @@ -687,7 +689,7 @@ __secondary_switched: > mov x29, #0 > b secondary_start_kernel > ENDPROC(__secondary_switched) > - > + .popsection I think we also need to cover set_cpu_boot_mode_flag and __boot_cpu_mode. Likewise secondary_holding_pen and secondary_holding_pen_release, in case you booted with maxcpus=1, suspended, resumed, then tried to bring secondaries up with spin-table. Otherwise, this looks good! Thanks, Mark. > /* > * The booting CPU updates the failed status @__early_cpu_boot_status, > * with MMU turned off. > diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S > index 9a3aec97ac09..e66ce9b7bbde 100644 > --- a/arch/arm64/kernel/sleep.S > +++ b/arch/arm64/kernel/sleep.S > @@ -97,6 +97,7 @@ ENTRY(__cpu_suspend_enter) > ENDPROC(__cpu_suspend_enter) > .ltorg > > + .pushsection ".mmuoff.text", "ax" > ENTRY(cpu_resume) > bl el2_setup // if in EL2 drop to EL1 cleanly > /* enable the MMU early - so we can access sleep_save_stash by va */ > @@ -106,6 +107,7 @@ ENTRY(cpu_resume) > adrp x26, swapper_pg_dir > b __cpu_setup > ENDPROC(cpu_resume) > + .popsection > > ENTRY(_cpu_resume) > mrs x1, mpidr_el1 > diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S > index 435e820e898d..64fe907bcc5f 100644 > --- a/arch/arm64/kernel/vmlinux.lds.S > +++ b/arch/arm64/kernel/vmlinux.lds.S > @@ -118,6 +118,9 @@ SECTIONS > __exception_text_end = .; > IRQENTRY_TEXT > SOFTIRQENTRY_TEXT > + __mmuoff_text_start = .; > + *(.mmuoff.text) > + __mmuoff_text_end = .; > TEXT_TEXT > SCHED_TEXT > LOCK_TEXT > diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S > index c4317879b938..655ff3ec90f2 100644 > --- a/arch/arm64/mm/proc.S > +++ b/arch/arm64/mm/proc.S > @@ -83,6 +83,7 @@ ENDPROC(cpu_do_suspend) > * > * x0: Address of context pointer > */ > + .pushsection ".mmuoff.text", "ax" > ENTRY(cpu_do_resume) > ldp x2, x3, [x0] > ldp x4, x5, [x0, #16] > @@ -111,6 +112,7 @@ ENTRY(cpu_do_resume) > isb > ret > ENDPROC(cpu_do_resume) > + .popsection > #endif > > /* > @@ -172,6 +174,7 @@ ENDPROC(idmap_cpu_replace_ttbr1) > * Initialise the processor for turning the MMU on. Return in x0 the > * value of the SCTLR_EL1 register. > */ > + .pushsection ".mmuoff.text", "ax" > ENTRY(__cpu_setup) > tlbi vmalle1 // Invalidate local TLB > dsb nsh > @@ -255,3 +258,4 @@ ENDPROC(__cpu_setup) > crval: > .word 0xfcffffff // clear > .word 0x34d5d91d // set > + .popsection > -- > 2.8.0.rc3 > -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Mark, On 16/06/16 12:10, Mark Rutland wrote: > On Wed, Jun 15, 2016 at 06:35:44PM +0100, James Morse wrote: >> Resume from hibernate needs to clean any text executed by the kernel with >> the MMU off to the PoC. Collect these functions together into a new >> .mmuoff.text section. >> >> This covers booting of secondary cores and the cpu_suspend() path used >> by cpu-idle and suspend-to-ram. >> >> The bulk of head.S is not included, as the primary boot code is only ever >> executed once, the kernel never needs to ensure it is cleaned to a >> particular point in the cache. >> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S >> index 2c6e598a94dc..ff37231e2054 100644 >> --- a/arch/arm64/kernel/head.S >> +++ b/arch/arm64/kernel/head.S >> @@ -656,6 +657,7 @@ ENDPROC(secondary_holding_pen) >> * Secondary entry point that jumps straight into the kernel. Only to >> * be used where CPUs are brought online dynamically by the kernel. >> */ >> + .pushsection ".mmuoff.text", "ax" >> ENTRY(secondary_entry) >> bl el2_setup // Drop to EL1 >> bl set_cpu_boot_mode_flag >> @@ -687,7 +689,7 @@ __secondary_switched: >> mov x29, #0 >> b secondary_start_kernel >> ENDPROC(__secondary_switched) >> - >> + .popsection > > I think we also need to cover set_cpu_boot_mode_flag and > __boot_cpu_mode. Bother, yes. How come cpu_resume doesn't call this? I guess we assume psci_cpu_suspend_enter() will bring the core back at the same EL > Likewise secondary_holding_pen and secondary_holding_pen_release, in > case you booted with maxcpus=1, suspended, resumed, then tried to bring > secondaries up with spin-table. Whoa! This must never happen! With KASLR: * relocate to location-1, * release the secondary cores from firmware into location-1:secondary_holding_pen, * resume from hibernate, at which point we are running from location-2, as the kaslr values are now from the hibernate kernel. * location-2:secondary_holding_pen is empty, * location-1:secondary_holding_pen now contains a user-space string, or some other horror. This didn't come up during testing because maxcpus=1 was permanent before v4.7, and we can't bundle cores back into the secondary_holding_pen. Hibernate without PSCI (or some other cpu_die()) mechanism fails in the core code: > [70648.097242] Disabling non-boot CPUs ... > [70648.117277] Error taking CPU1 down: -95 > [70648.117286] Non-boot CPUs are not disabled ... but if we never tried to boot those cpus, we don't hit this check, and the cores are left in secondary_holding_pen after smp_prepare_cpus(), called well before the late_initcall that kicks of resume. This is broken on systems that load the kernel at a different address over a reboot, (using KASLR or a fancy boot loader), and use spin-table for secondary cores. (probably none in practice, but still worth fixing) Thinking aloud: cpus_stuck_in_kernel only indicates cores that we failed to fully bring up. (incompatible translation granule etc). There could still be cores in the kernel's secondary holding pen. We should prevent kexec/hibernate in this case. (hibernate because we can't safely resume on such a machine) kexec[0] currently checks for a cpu_die() call: > if (num_online_cpus() > 1) Changing this to 'num_possible_cpus() > 1' will cover the above case. Similar code will need to be added to hibernate. An alternative is to increase cpus_stuck_in_kernel in smp_spin_table_cpu_prepare(), but it stops being a counter at this point. Thoughts? Does this make sense, or do I have the wrong end of the stick somewhere! James [0] http://www.spinics.net/lists/arm-kernel/msg510097.html -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Jun 16, 2016 at 02:22:21PM +0100, James Morse wrote: > Hi Mark, > > On 16/06/16 12:10, Mark Rutland wrote: > > On Wed, Jun 15, 2016 at 06:35:44PM +0100, James Morse wrote: > >> Resume from hibernate needs to clean any text executed by the kernel with > >> the MMU off to the PoC. Collect these functions together into a new > >> .mmuoff.text section. > >> > >> This covers booting of secondary cores and the cpu_suspend() path used > >> by cpu-idle and suspend-to-ram. > >> > >> The bulk of head.S is not included, as the primary boot code is only ever > >> executed once, the kernel never needs to ensure it is cleaned to a > >> particular point in the cache. > > > >> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S > >> index 2c6e598a94dc..ff37231e2054 100644 > >> --- a/arch/arm64/kernel/head.S > >> +++ b/arch/arm64/kernel/head.S > >> @@ -656,6 +657,7 @@ ENDPROC(secondary_holding_pen) > >> * Secondary entry point that jumps straight into the kernel. Only to > >> * be used where CPUs are brought online dynamically by the kernel. > >> */ > >> + .pushsection ".mmuoff.text", "ax" > >> ENTRY(secondary_entry) > >> bl el2_setup // Drop to EL1 > >> bl set_cpu_boot_mode_flag > >> @@ -687,7 +689,7 @@ __secondary_switched: > >> mov x29, #0 > >> b secondary_start_kernel > >> ENDPROC(__secondary_switched) > >> - > >> + .popsection > > > > I think we also need to cover set_cpu_boot_mode_flag and > > __boot_cpu_mode. > > Bother, yes. > How come cpu_resume doesn't call this? I guess we assume > psci_cpu_suspend_enter() will bring the core back at the same EL Hmm. It looks like we only bother to check once at boot time (in hyp_mode_check as part of smp_cpus_done), and otherwise never inspect the flag again. We should probably add checks to the hotplug-on path, and perhaps the idle cold return path. That's largely orthogonal to this series, though. I think we should for consistency we should place them in the mmuoff section regardless. > > Likewise secondary_holding_pen and secondary_holding_pen_release, in > > case you booted with maxcpus=1, suspended, resumed, then tried to bring > > secondaries up with spin-table. > > Whoa! This must never happen! > With KASLR: > * relocate to location-1, > * release the secondary cores from firmware into > location-1:secondary_holding_pen, > * resume from hibernate, at which point we are running from location-2, > as the kaslr values are now from the hibernate kernel. > * location-2:secondary_holding_pen is empty, > * location-1:secondary_holding_pen now contains a user-space string, or some > other horror. :( > This didn't come up during testing because maxcpus=1 was permanent before v4.7, > and we can't bundle cores back into the secondary_holding_pen. Hibernate without > PSCI (or some other cpu_die()) mechanism fails in the core code: > > [70648.097242] Disabling non-boot CPUs ... > > [70648.117277] Error taking CPU1 down: -95 > > [70648.117286] Non-boot CPUs are not disabled > > ... but if we never tried to boot those cpus, we don't hit this check, and the > cores are left in secondary_holding_pen after smp_prepare_cpus(), called well > before the late_initcall that kicks of resume. > > This is broken on systems that load the kernel at a different address over a > reboot, (using KASLR or a fancy boot loader), and use spin-table for secondary > cores. (probably none in practice, but still worth fixing) > > > Thinking aloud: > cpus_stuck_in_kernel only indicates cores that we failed to fully bring up. > (incompatible translation granule etc). There could still be cores in the > kernel's secondary holding pen. We should prevent kexec/hibernate in this case. > (hibernate because we can't safely resume on such a machine) This sounds sensible to me. > kexec[0] currently checks for a cpu_die() call: > > if (num_online_cpus() > 1) > > Changing this to 'num_possible_cpus() > 1' will cover the above case. > Similar code will need to be added to hibernate. That will also catch cases where CPUs failed to even enter the pen, but I don't think there's any reliable way to distinguish the two, so that's probably the best we can do. > An alternative is to increase cpus_stuck_in_kernel in > smp_spin_table_cpu_prepare(), but it stops being a counter at this point. I don't think we need it to be a counter. I'm happy to change the meaning slightly if we update the comment. > Thoughts? "Oh no", "aaarargarghargahgasdfsdfs". :( > Does this make sense, or do I have the wrong end of the stick somewhere! The above makes sense to me. Thanks for digging into this! Mark. -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index cb68eb348566..6f27c3f86a09 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -24,5 +24,6 @@ extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; extern char __hyp_text_start[], __hyp_text_end[]; extern char __idmap_text_start[], __idmap_text_end[]; +extern char __mmuoff_text_start[], __mmuoff_text_end[]; #endif /* __ASM_SECTIONS_H */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2c6e598a94dc..ff37231e2054 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -477,6 +477,7 @@ ENTRY(kimage_vaddr) * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if * booted in EL1 or EL2 respectively. */ + .pushsection ".mmuoff.text", "ax" ENTRY(el2_setup) mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 @@ -604,7 +605,7 @@ install_el2_stub: mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 eret ENDPROC(el2_setup) - + .popsection /* * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed * in x20. See arch/arm64/include/asm/virt.h for more info. @@ -656,6 +657,7 @@ ENDPROC(secondary_holding_pen) * Secondary entry point that jumps straight into the kernel. Only to * be used where CPUs are brought online dynamically by the kernel. */ + .pushsection ".mmuoff.text", "ax" ENTRY(secondary_entry) bl el2_setup // Drop to EL1 bl set_cpu_boot_mode_flag @@ -687,7 +689,7 @@ __secondary_switched: mov x29, #0 b secondary_start_kernel ENDPROC(__secondary_switched) - + .popsection /* * The booting CPU updates the failed status @__early_cpu_boot_status, * with MMU turned off. diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 9a3aec97ac09..e66ce9b7bbde 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -97,6 +97,7 @@ ENTRY(__cpu_suspend_enter) ENDPROC(__cpu_suspend_enter) .ltorg + .pushsection ".mmuoff.text", "ax" ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly /* enable the MMU early - so we can access sleep_save_stash by va */ @@ -106,6 +107,7 @@ ENTRY(cpu_resume) adrp x26, swapper_pg_dir b __cpu_setup ENDPROC(cpu_resume) + .popsection ENTRY(_cpu_resume) mrs x1, mpidr_el1 diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 435e820e898d..64fe907bcc5f 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -118,6 +118,9 @@ SECTIONS __exception_text_end = .; IRQENTRY_TEXT SOFTIRQENTRY_TEXT + __mmuoff_text_start = .; + *(.mmuoff.text) + __mmuoff_text_end = .; TEXT_TEXT SCHED_TEXT LOCK_TEXT diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c4317879b938..655ff3ec90f2 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -83,6 +83,7 @@ ENDPROC(cpu_do_suspend) * * x0: Address of context pointer */ + .pushsection ".mmuoff.text", "ax" ENTRY(cpu_do_resume) ldp x2, x3, [x0] ldp x4, x5, [x0, #16] @@ -111,6 +112,7 @@ ENTRY(cpu_do_resume) isb ret ENDPROC(cpu_do_resume) + .popsection #endif /* @@ -172,6 +174,7 @@ ENDPROC(idmap_cpu_replace_ttbr1) * Initialise the processor for turning the MMU on. Return in x0 the * value of the SCTLR_EL1 register. */ + .pushsection ".mmuoff.text", "ax" ENTRY(__cpu_setup) tlbi vmalle1 // Invalidate local TLB dsb nsh @@ -255,3 +258,4 @@ ENDPROC(__cpu_setup) crval: .word 0xfcffffff // clear .word 0x34d5d91d // set + .popsection
Resume from hibernate needs to clean any text executed by the kernel with the MMU off to the PoC. Collect these functions together into a new .mmuoff.text section. This covers booting of secondary cores and the cpu_suspend() path used by cpu-idle and suspend-to-ram. The bulk of head.S is not included, as the primary boot code is only ever executed once, the kernel never needs to ensure it is cleaned to a particular point in the cache. Suggested-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: James Morse <james.morse@arm.com> Cc: Mark Rutland <mark.rutland@arm.com> --- This patch is new since v1. arch/arm64/include/asm/sections.h | 1 + arch/arm64/kernel/head.S | 6 ++++-- arch/arm64/kernel/sleep.S | 2 ++ arch/arm64/kernel/vmlinux.lds.S | 3 +++ arch/arm64/mm/proc.S | 4 ++++ 5 files changed, 14 insertions(+), 2 deletions(-)