diff mbox

[v2,2/6] arm64: Add .mmuoff.text section

Message ID 1466012148-7674-3-git-send-email-james.morse@arm.com (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

James Morse June 15, 2016, 5:35 p.m. UTC
Resume from hibernate needs to clean any text executed by the kernel with
the MMU off to the PoC. Collect these functions together into a new
.mmuoff.text section.

This covers booting of secondary cores and the cpu_suspend() path used
by cpu-idle and suspend-to-ram.

The bulk of head.S is not included, as the primary boot code is only ever
executed once, the kernel never needs to ensure it is cleaned to a
particular point in the cache.

Suggested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
---
This patch is new since v1.

 arch/arm64/include/asm/sections.h | 1 +
 arch/arm64/kernel/head.S          | 6 ++++--
 arch/arm64/kernel/sleep.S         | 2 ++
 arch/arm64/kernel/vmlinux.lds.S   | 3 +++
 arch/arm64/mm/proc.S              | 4 ++++
 5 files changed, 14 insertions(+), 2 deletions(-)

Comments

Mark Rutland June 16, 2016, 11:10 a.m. UTC | #1
On Wed, Jun 15, 2016 at 06:35:44PM +0100, James Morse wrote:
> Resume from hibernate needs to clean any text executed by the kernel with
> the MMU off to the PoC. Collect these functions together into a new
> .mmuoff.text section.
> 
> This covers booting of secondary cores and the cpu_suspend() path used
> by cpu-idle and suspend-to-ram.
> 
> The bulk of head.S is not included, as the primary boot code is only ever
> executed once, the kernel never needs to ensure it is cleaned to a
> particular point in the cache.
> 
> Suggested-by: Mark Rutland <mark.rutland@arm.com>
> Signed-off-by: James Morse <james.morse@arm.com>
> Cc: Mark Rutland <mark.rutland@arm.com>
> ---
> This patch is new since v1.
> 
>  arch/arm64/include/asm/sections.h | 1 +
>  arch/arm64/kernel/head.S          | 6 ++++--
>  arch/arm64/kernel/sleep.S         | 2 ++
>  arch/arm64/kernel/vmlinux.lds.S   | 3 +++
>  arch/arm64/mm/proc.S              | 4 ++++
>  5 files changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
> index cb68eb348566..6f27c3f86a09 100644
> --- a/arch/arm64/include/asm/sections.h
> +++ b/arch/arm64/include/asm/sections.h
> @@ -24,5 +24,6 @@ extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
>  extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
>  extern char __hyp_text_start[], __hyp_text_end[];
>  extern char __idmap_text_start[], __idmap_text_end[];
> +extern char __mmuoff_text_start[], __mmuoff_text_end[];
>  
>  #endif /* __ASM_SECTIONS_H */
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 2c6e598a94dc..ff37231e2054 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -477,6 +477,7 @@ ENTRY(kimage_vaddr)
>   * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if
>   * booted in EL1 or EL2 respectively.
>   */
> +	.pushsection ".mmuoff.text", "ax"
>  ENTRY(el2_setup)
>  	mrs	x0, CurrentEL
>  	cmp	x0, #CurrentEL_EL2
> @@ -604,7 +605,7 @@ install_el2_stub:
>  	mov	w20, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
>  	eret
>  ENDPROC(el2_setup)
> -
> +	.popsection
>  /*
>   * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
>   * in x20. See arch/arm64/include/asm/virt.h for more info.
> @@ -656,6 +657,7 @@ ENDPROC(secondary_holding_pen)
>  	 * Secondary entry point that jumps straight into the kernel. Only to
>  	 * be used where CPUs are brought online dynamically by the kernel.
>  	 */
> +	.pushsection ".mmuoff.text", "ax"
>  ENTRY(secondary_entry)
>  	bl	el2_setup			// Drop to EL1
>  	bl	set_cpu_boot_mode_flag
> @@ -687,7 +689,7 @@ __secondary_switched:
>  	mov	x29, #0
>  	b	secondary_start_kernel
>  ENDPROC(__secondary_switched)
> -
> +	.popsection

I think we also need to cover set_cpu_boot_mode_flag and
__boot_cpu_mode.

Likewise secondary_holding_pen and secondary_holding_pen_release, in
case you booted with maxcpus=1, suspended, resumed, then tried to bring
secondaries up with spin-table.

Otherwise, this looks good!

Thanks,
Mark.

>  /*
>   * The booting CPU updates the failed status @__early_cpu_boot_status,
>   * with MMU turned off.
> diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
> index 9a3aec97ac09..e66ce9b7bbde 100644
> --- a/arch/arm64/kernel/sleep.S
> +++ b/arch/arm64/kernel/sleep.S
> @@ -97,6 +97,7 @@ ENTRY(__cpu_suspend_enter)
>  ENDPROC(__cpu_suspend_enter)
>  	.ltorg
>  
> +	.pushsection ".mmuoff.text", "ax"
>  ENTRY(cpu_resume)
>  	bl	el2_setup		// if in EL2 drop to EL1 cleanly
>  	/* enable the MMU early - so we can access sleep_save_stash by va */
> @@ -106,6 +107,7 @@ ENTRY(cpu_resume)
>  	adrp	x26, swapper_pg_dir
>  	b	__cpu_setup
>  ENDPROC(cpu_resume)
> +	.popsection
>  
>  ENTRY(_cpu_resume)
>  	mrs	x1, mpidr_el1
> diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> index 435e820e898d..64fe907bcc5f 100644
> --- a/arch/arm64/kernel/vmlinux.lds.S
> +++ b/arch/arm64/kernel/vmlinux.lds.S
> @@ -118,6 +118,9 @@ SECTIONS
>  			__exception_text_end = .;
>  			IRQENTRY_TEXT
>  			SOFTIRQENTRY_TEXT
> +			__mmuoff_text_start = .;
> +			*(.mmuoff.text)
> +			__mmuoff_text_end = .;
>  			TEXT_TEXT
>  			SCHED_TEXT
>  			LOCK_TEXT
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index c4317879b938..655ff3ec90f2 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -83,6 +83,7 @@ ENDPROC(cpu_do_suspend)
>   *
>   * x0: Address of context pointer
>   */
> +	.pushsection ".mmuoff.text", "ax"
>  ENTRY(cpu_do_resume)
>  	ldp	x2, x3, [x0]
>  	ldp	x4, x5, [x0, #16]
> @@ -111,6 +112,7 @@ ENTRY(cpu_do_resume)
>  	isb
>  	ret
>  ENDPROC(cpu_do_resume)
> +	.popsection
>  #endif
>  
>  /*
> @@ -172,6 +174,7 @@ ENDPROC(idmap_cpu_replace_ttbr1)
>   *	Initialise the processor for turning the MMU on.  Return in x0 the
>   *	value of the SCTLR_EL1 register.
>   */
> +	.pushsection ".mmuoff.text", "ax"
>  ENTRY(__cpu_setup)
>  	tlbi	vmalle1				// Invalidate local TLB
>  	dsb	nsh
> @@ -255,3 +258,4 @@ ENDPROC(__cpu_setup)
>  crval:
>  	.word	0xfcffffff			// clear
>  	.word	0x34d5d91d			// set
> +	.popsection
> -- 
> 2.8.0.rc3
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
James Morse June 16, 2016, 1:22 p.m. UTC | #2
Hi Mark,

On 16/06/16 12:10, Mark Rutland wrote:
> On Wed, Jun 15, 2016 at 06:35:44PM +0100, James Morse wrote:
>> Resume from hibernate needs to clean any text executed by the kernel with
>> the MMU off to the PoC. Collect these functions together into a new
>> .mmuoff.text section.
>>
>> This covers booting of secondary cores and the cpu_suspend() path used
>> by cpu-idle and suspend-to-ram.
>>
>> The bulk of head.S is not included, as the primary boot code is only ever
>> executed once, the kernel never needs to ensure it is cleaned to a
>> particular point in the cache.


>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>> index 2c6e598a94dc..ff37231e2054 100644
>> --- a/arch/arm64/kernel/head.S
>> +++ b/arch/arm64/kernel/head.S
>> @@ -656,6 +657,7 @@ ENDPROC(secondary_holding_pen)
>>  	 * Secondary entry point that jumps straight into the kernel. Only to
>>  	 * be used where CPUs are brought online dynamically by the kernel.
>>  	 */
>> +	.pushsection ".mmuoff.text", "ax"
>>  ENTRY(secondary_entry)
>>  	bl	el2_setup			// Drop to EL1
>>  	bl	set_cpu_boot_mode_flag
>> @@ -687,7 +689,7 @@ __secondary_switched:
>>  	mov	x29, #0
>>  	b	secondary_start_kernel
>>  ENDPROC(__secondary_switched)
>> -
>> +	.popsection
> 
> I think we also need to cover set_cpu_boot_mode_flag and
> __boot_cpu_mode.

Bother, yes.
How come cpu_resume doesn't call this? I guess we assume
psci_cpu_suspend_enter() will bring the core back at the same EL


> Likewise secondary_holding_pen and secondary_holding_pen_release, in
> case you booted with maxcpus=1, suspended, resumed, then tried to bring
> secondaries up with spin-table.

Whoa! This must never happen!
With KASLR:
* relocate to location-1,
* release the secondary cores from firmware into
  location-1:secondary_holding_pen,
* resume from hibernate, at which point we are running from location-2,
  as the kaslr values are now from the hibernate kernel.
* location-2:secondary_holding_pen is empty,
* location-1:secondary_holding_pen now contains a user-space string, or some
  other horror.


This didn't come up during testing because maxcpus=1 was permanent before v4.7,
and we can't bundle cores back into the secondary_holding_pen. Hibernate without
PSCI (or some other cpu_die()) mechanism fails in the core code:
> [70648.097242] Disabling non-boot CPUs ...
> [70648.117277] Error taking CPU1 down: -95
> [70648.117286] Non-boot CPUs are not disabled

... but if we never tried to boot those cpus, we don't hit this check, and the
cores are left in secondary_holding_pen after smp_prepare_cpus(), called well
before the late_initcall that kicks of resume.

This is broken on systems that load the kernel at a different address over a
reboot, (using KASLR or a fancy boot loader), and use spin-table for secondary
cores. (probably none in practice, but still worth fixing)


Thinking aloud:
cpus_stuck_in_kernel only indicates cores that we failed to fully bring up.
(incompatible translation granule etc). There could still be cores in the
kernel's secondary holding pen. We should prevent kexec/hibernate in this case.
(hibernate because we can't safely resume on such a machine)

kexec[0] currently checks for a cpu_die() call:
> if (num_online_cpus() > 1)

Changing this to 'num_possible_cpus() > 1' will cover the above case.
Similar code will need to be added to hibernate.


An alternative is to increase cpus_stuck_in_kernel in
smp_spin_table_cpu_prepare(), but it stops being a counter at this point.


Thoughts? Does this make sense, or do I have the wrong end of the stick somewhere!



James


[0] http://www.spinics.net/lists/arm-kernel/msg510097.html

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mark Rutland June 16, 2016, 1:55 p.m. UTC | #3
On Thu, Jun 16, 2016 at 02:22:21PM +0100, James Morse wrote:
> Hi Mark,
> 
> On 16/06/16 12:10, Mark Rutland wrote:
> > On Wed, Jun 15, 2016 at 06:35:44PM +0100, James Morse wrote:
> >> Resume from hibernate needs to clean any text executed by the kernel with
> >> the MMU off to the PoC. Collect these functions together into a new
> >> .mmuoff.text section.
> >>
> >> This covers booting of secondary cores and the cpu_suspend() path used
> >> by cpu-idle and suspend-to-ram.
> >>
> >> The bulk of head.S is not included, as the primary boot code is only ever
> >> executed once, the kernel never needs to ensure it is cleaned to a
> >> particular point in the cache.
> 
> 
> >> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> >> index 2c6e598a94dc..ff37231e2054 100644
> >> --- a/arch/arm64/kernel/head.S
> >> +++ b/arch/arm64/kernel/head.S
> >> @@ -656,6 +657,7 @@ ENDPROC(secondary_holding_pen)
> >>  	 * Secondary entry point that jumps straight into the kernel. Only to
> >>  	 * be used where CPUs are brought online dynamically by the kernel.
> >>  	 */
> >> +	.pushsection ".mmuoff.text", "ax"
> >>  ENTRY(secondary_entry)
> >>  	bl	el2_setup			// Drop to EL1
> >>  	bl	set_cpu_boot_mode_flag
> >> @@ -687,7 +689,7 @@ __secondary_switched:
> >>  	mov	x29, #0
> >>  	b	secondary_start_kernel
> >>  ENDPROC(__secondary_switched)
> >> -
> >> +	.popsection
> > 
> > I think we also need to cover set_cpu_boot_mode_flag and
> > __boot_cpu_mode.
> 
> Bother, yes.
> How come cpu_resume doesn't call this? I guess we assume
> psci_cpu_suspend_enter() will bring the core back at the same EL

Hmm. It looks like we only bother to check once at boot time (in
hyp_mode_check as part of smp_cpus_done), and otherwise never inspect
the flag again.

We should probably add checks to the hotplug-on path, and perhaps the
idle cold return path. That's largely orthogonal to this series, though.

I think we should for consistency we should place them in the mmuoff
section regardless.

> > Likewise secondary_holding_pen and secondary_holding_pen_release, in
> > case you booted with maxcpus=1, suspended, resumed, then tried to bring
> > secondaries up with spin-table.
> 
> Whoa! This must never happen!
> With KASLR:
> * relocate to location-1,
> * release the secondary cores from firmware into
>   location-1:secondary_holding_pen,
> * resume from hibernate, at which point we are running from location-2,
>   as the kaslr values are now from the hibernate kernel.
> * location-2:secondary_holding_pen is empty,
> * location-1:secondary_holding_pen now contains a user-space string, or some
>   other horror.

:(

> This didn't come up during testing because maxcpus=1 was permanent before v4.7,
> and we can't bundle cores back into the secondary_holding_pen. Hibernate without
> PSCI (or some other cpu_die()) mechanism fails in the core code:
> > [70648.097242] Disabling non-boot CPUs ...
> > [70648.117277] Error taking CPU1 down: -95
> > [70648.117286] Non-boot CPUs are not disabled
> 
> ... but if we never tried to boot those cpus, we don't hit this check, and the
> cores are left in secondary_holding_pen after smp_prepare_cpus(), called well
> before the late_initcall that kicks of resume.
> 
> This is broken on systems that load the kernel at a different address over a
> reboot, (using KASLR or a fancy boot loader), and use spin-table for secondary
> cores. (probably none in practice, but still worth fixing)
> 
> 
> Thinking aloud:
> cpus_stuck_in_kernel only indicates cores that we failed to fully bring up.
> (incompatible translation granule etc). There could still be cores in the
> kernel's secondary holding pen. We should prevent kexec/hibernate in this case.
> (hibernate because we can't safely resume on such a machine)

This sounds sensible to me.

> kexec[0] currently checks for a cpu_die() call:
> > if (num_online_cpus() > 1)
> 
> Changing this to 'num_possible_cpus() > 1' will cover the above case.
> Similar code will need to be added to hibernate.

That will also catch cases where CPUs failed to even enter the pen, but
I don't think there's any reliable way to distinguish the two, so that's
probably the best we can do.

> An alternative is to increase cpus_stuck_in_kernel in
> smp_spin_table_cpu_prepare(), but it stops being a counter at this point.

I don't think we need it to be a counter. I'm happy to change the
meaning slightly if we update the comment.

> Thoughts?

"Oh no", "aaarargarghargahgasdfsdfs". :(

> Does this make sense, or do I have the wrong end of the stick somewhere!

The above makes sense to me. Thanks for digging into this!

Mark.
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index cb68eb348566..6f27c3f86a09 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -24,5 +24,6 @@  extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
 extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
 extern char __hyp_text_start[], __hyp_text_end[];
 extern char __idmap_text_start[], __idmap_text_end[];
+extern char __mmuoff_text_start[], __mmuoff_text_end[];
 
 #endif /* __ASM_SECTIONS_H */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 2c6e598a94dc..ff37231e2054 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -477,6 +477,7 @@  ENTRY(kimage_vaddr)
  * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if
  * booted in EL1 or EL2 respectively.
  */
+	.pushsection ".mmuoff.text", "ax"
 ENTRY(el2_setup)
 	mrs	x0, CurrentEL
 	cmp	x0, #CurrentEL_EL2
@@ -604,7 +605,7 @@  install_el2_stub:
 	mov	w20, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
 	eret
 ENDPROC(el2_setup)
-
+	.popsection
 /*
  * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
  * in x20. See arch/arm64/include/asm/virt.h for more info.
@@ -656,6 +657,7 @@  ENDPROC(secondary_holding_pen)
 	 * Secondary entry point that jumps straight into the kernel. Only to
 	 * be used where CPUs are brought online dynamically by the kernel.
 	 */
+	.pushsection ".mmuoff.text", "ax"
 ENTRY(secondary_entry)
 	bl	el2_setup			// Drop to EL1
 	bl	set_cpu_boot_mode_flag
@@ -687,7 +689,7 @@  __secondary_switched:
 	mov	x29, #0
 	b	secondary_start_kernel
 ENDPROC(__secondary_switched)
-
+	.popsection
 /*
  * The booting CPU updates the failed status @__early_cpu_boot_status,
  * with MMU turned off.
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 9a3aec97ac09..e66ce9b7bbde 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -97,6 +97,7 @@  ENTRY(__cpu_suspend_enter)
 ENDPROC(__cpu_suspend_enter)
 	.ltorg
 
+	.pushsection ".mmuoff.text", "ax"
 ENTRY(cpu_resume)
 	bl	el2_setup		// if in EL2 drop to EL1 cleanly
 	/* enable the MMU early - so we can access sleep_save_stash by va */
@@ -106,6 +107,7 @@  ENTRY(cpu_resume)
 	adrp	x26, swapper_pg_dir
 	b	__cpu_setup
 ENDPROC(cpu_resume)
+	.popsection
 
 ENTRY(_cpu_resume)
 	mrs	x1, mpidr_el1
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 435e820e898d..64fe907bcc5f 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -118,6 +118,9 @@  SECTIONS
 			__exception_text_end = .;
 			IRQENTRY_TEXT
 			SOFTIRQENTRY_TEXT
+			__mmuoff_text_start = .;
+			*(.mmuoff.text)
+			__mmuoff_text_end = .;
 			TEXT_TEXT
 			SCHED_TEXT
 			LOCK_TEXT
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index c4317879b938..655ff3ec90f2 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -83,6 +83,7 @@  ENDPROC(cpu_do_suspend)
  *
  * x0: Address of context pointer
  */
+	.pushsection ".mmuoff.text", "ax"
 ENTRY(cpu_do_resume)
 	ldp	x2, x3, [x0]
 	ldp	x4, x5, [x0, #16]
@@ -111,6 +112,7 @@  ENTRY(cpu_do_resume)
 	isb
 	ret
 ENDPROC(cpu_do_resume)
+	.popsection
 #endif
 
 /*
@@ -172,6 +174,7 @@  ENDPROC(idmap_cpu_replace_ttbr1)
  *	Initialise the processor for turning the MMU on.  Return in x0 the
  *	value of the SCTLR_EL1 register.
  */
+	.pushsection ".mmuoff.text", "ax"
 ENTRY(__cpu_setup)
 	tlbi	vmalle1				// Invalidate local TLB
 	dsb	nsh
@@ -255,3 +258,4 @@  ENDPROC(__cpu_setup)
 crval:
 	.word	0xfcffffff			// clear
 	.word	0x34d5d91d			// set
+	.popsection