diff mbox

[08/16] arm64/kexec: Add core kexec support

Message ID a1c2a702f127d9dade1f9af8ab13decb2ef1c0da.1445297709.git.geoff@infradead.org (mailing list archive)
State New, archived
Headers show

Commit Message

Geoff Levand Oct. 19, 2015, 11:38 p.m. UTC
Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the
arm64 architecture that add support for the kexec re-boot mechanism
(CONFIG_KEXEC) on arm64 platforms.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/Kconfig                  |  10 +++
 arch/arm64/include/asm/kexec.h      |  48 +++++++++++
 arch/arm64/kernel/Makefile          |   2 +
 arch/arm64/kernel/cpu-reset.S       |   2 +-
 arch/arm64/kernel/machine_kexec.c   | 141 +++++++++++++++++++++++++++++++
 arch/arm64/kernel/relocate_kernel.S | 163 ++++++++++++++++++++++++++++++++++++
 include/uapi/linux/kexec.h          |   1 +
 7 files changed, 366 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/include/asm/kexec.h
 create mode 100644 arch/arm64/kernel/machine_kexec.c
 create mode 100644 arch/arm64/kernel/relocate_kernel.S

Comments

Pratyush Anand Oct. 20, 2015, 8:56 a.m. UTC | #1
Hi Geoff,

Thanks for the patches.

On 19/10/2015:11:38:53 PM, Geoff Levand wrote:
> +static void soft_restart(unsigned long addr)
> +{
> +	setup_mm_for_reboot();
> +	cpu_soft_restart(virt_to_phys(cpu_reset), addr,
> +		is_hyp_mode_available());

So now we do not flush cache for any memory region. Shouldn't we still flush
at least kernel and purgatory segments. 

kexec-tools loads a new kernel and purgatory executable. Some of those bits
might still be only in D-cache and we disable D-cache before control is passed
to the purgatory binary. Purgatory and some initial part of kernel code is
executed with D-cache disabled. So, We might land into a situation where correct
code is not executed while D-cache is disabled, no?

~Pratyush
Geoff Levand Oct. 20, 2015, 5:19 p.m. UTC | #2
Hi,

On Tue, 2015-10-20 at 14:26 +0530, Pratyush Anand wrote:
> On 19/10/2015:11:38:53 PM, Geoff Levand wrote:
> > +static void soft_restart(unsigned long addr)
> > +{
> > +> > 	> > setup_mm_for_reboot();
> > +> > 	> > cpu_soft_restart(virt_to_phys(cpu_reset), addr,
> > +> > 	> > 	> > is_hyp_mode_available());
> 
> So now we do not flush cache for any memory region. Shouldn't we still flush
> at least kernel and purgatory segments. 

Relevant pages of the kexec list are flushed in the code following the comment
'Invalidate dest page to PoC' of the arm64_relocate_new_kernel routine:

 The dcache is turned off
 The page is invalidated to PoC
 The new page is written

-Geoff
Pratyush Anand Oct. 23, 2015, 7:29 a.m. UTC | #3
On 20/10/2015:10:19:25 AM, Geoff Levand wrote:
> Hi,
> 
> On Tue, 2015-10-20 at 14:26 +0530, Pratyush Anand wrote:
> > On 19/10/2015:11:38:53 PM, Geoff Levand wrote:
> > > +static void soft_restart(unsigned long addr)
> > > +{
> > > +> > 	> > setup_mm_for_reboot();
> > > +> > 	> > cpu_soft_restart(virt_to_phys(cpu_reset), addr,
> > > +> > 	> > 	> > is_hyp_mode_available());
> > 
> > So now we do not flush cache for any memory region. Shouldn't we still flush
> > at least kernel and purgatory segments. 
> 
> Relevant pages of the kexec list are flushed in the code following the comment
> 'Invalidate dest page to PoC' of the arm64_relocate_new_kernel routine:
> 
>  The dcache is turned off
>  The page is invalidated to PoC
>  The new page is written

Thanks for clarifying it.

I tested your kexec-v10.2 with mustang.

Tested-by: Pratyush Anand <panand@redhat.com>
James Morse Oct. 30, 2015, 4:29 p.m. UTC | #4
Hi Geoff,

On 20/10/15 00:38, Geoff Levand wrote:
> Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the
> arm64 architecture that add support for the kexec re-boot mechanism
> (CONFIG_KEXEC) on arm64 platforms.
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/Kconfig                  |  10 +++
>  arch/arm64/include/asm/kexec.h      |  48 +++++++++++
>  arch/arm64/kernel/Makefile          |   2 +
>  arch/arm64/kernel/cpu-reset.S       |   2 +-
>  arch/arm64/kernel/machine_kexec.c   | 141 +++++++++++++++++++++++++++++++
>  arch/arm64/kernel/relocate_kernel.S | 163 ++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/kexec.h          |   1 +
>  7 files changed, 366 insertions(+), 1 deletion(-)
>  create mode 100644 arch/arm64/include/asm/kexec.h
>  create mode 100644 arch/arm64/kernel/machine_kexec.c
>  create mode 100644 arch/arm64/kernel/relocate_kernel.S
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 07d1811..73e8e31 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -491,6 +491,16 @@ config SECCOMP
>  	  and the task is only allowed to execute a few safe syscalls
>  	  defined by each seccomp mode.
>  
> +config KEXEC
> +	depends on (!SMP || PM_SLEEP_SMP)

Commit 4b3dc9679cf7 got rid of '!SMP'.


> +	select KEXEC_CORE
> +	bool "kexec system call"
> +	---help---
> +	  kexec is a system call that implements the ability to shutdown your
> +	  current kernel, and to start another kernel.  It is like a reboot
> +	  but it is independent of the system firmware.   And like a reboot
> +	  you can start any kernel with it, not just Linux.
> +
>  config XEN_DOM0
>  	def_bool y
>  	depends on XEN
> diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
> index ffc9e385e..7cc7f56 100644
> --- a/arch/arm64/kernel/cpu-reset.S
> +++ b/arch/arm64/kernel/cpu-reset.S
> @@ -3,7 +3,7 @@
>   *
>   * Copyright (C) 2001 Deep Blue Solutions Ltd.
>   * Copyright (C) 2012 ARM Ltd.
> - * Copyright (C) 2015 Huawei Futurewei Technologies.
> + * Copyright (C) Huawei Futurewei Technologies.

Move this hunk into the patch that adds the file?


>   *
>   * This program is free software; you can redistribute it and/or modify
>   * it under the terms of the GNU General Public License version 2 as
> diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
> new file mode 100644
> index 0000000..7b07a16
> --- /dev/null
> +++ b/arch/arm64/kernel/relocate_kernel.S
> @@ -0,0 +1,163 @@
> +/*
> + * kexec for arm64
> + *
> + * Copyright (C) Linaro.
> + * Copyright (C) Huawei Futurewei Technologies.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/kexec.h>
> +
> +#include <asm/assembler.h>
> +#include <asm/kexec.h>
> +#include <asm/memory.h>
> +#include <asm/page.h>
> +
> +
> +/*
> + * arm64_relocate_new_kernel - Put a 2nd stage kernel image in place and boot it.
> + *
> + * The memory that the old kernel occupies may be overwritten when coping the
> + * new image to its final location.  To assure that the
> + * arm64_relocate_new_kernel routine which does that copy is not overwritten,
> + * all code and data needed by arm64_relocate_new_kernel must be between the
> + * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end.  The
> + * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
> + * control_code_page, a special page which has been set up to be preserved
> + * during the copy operation.
> + */
> +.globl arm64_relocate_new_kernel
> +arm64_relocate_new_kernel:
> +
> +	/* Setup the list loop variables. */
> +	ldr	x18, .Lkimage_head		/* x18 = list entry */
> +	dcache_line_size x17, x0		/* x17 = dcache line size */
> +	mov	x16, xzr			/* x16 = segment start */
> +	mov	x15, xzr			/* x15 = entry ptr */
> +	mov	x14, xzr			/* x14 = copy dest */
> +
> +	/* Check if the new image needs relocation. */
> +	cbz	x18, .Ldone
> +	tbnz	x18, IND_DONE_BIT, .Ldone
> +
> +.Lloop:
> +	and	x13, x18, PAGE_MASK		/* x13 = addr */
> +
> +	/* Test the entry flags. */
> +.Ltest_source:
> +	tbz	x18, IND_SOURCE_BIT, .Ltest_indirection
> +
> +	mov x20, x14				/*  x20 = copy dest */
> +	mov x21, x13				/*  x21 = copy src */
> +
> +	/* Invalidate dest page to PoC. */
> +	mov	x0, x20
> +	add	x19, x0, #PAGE_SIZE
> +	sub	x1, x17, #1
> +	bic	x0, x0, x1
> +1:	dc	ivac, x0
> +	add	x0, x0, x17
> +	cmp	x0, x19
> +	b.lo	1b
> +	dsb	sy

If I've followed all this through properly:

With KVM - mmu+caches are configured, but then disabled by 'kvm: allows kvm
cpu hotplug'. This 'arm64_relocate_new_kernel' function then runs at EL2
with M=0, C=0, I=0.

Without KVM - when there is no user of EL2, the mmu+caches are left in
whatever state the bootloader (or efi stub) left them in. From
Documentation/arm64/booting.txt:
> Instruction cache may be on or off.
and
> System caches which respect the architected cache maintenance by VA
> operations must be configured and may be enabled.

So 'arm64_relocate_new_kernel' function could run at EL2 with M=0, C=?, I=?.

I think this means you can't guarantee anything you are copying below
actually makes it through the caches - booting secondary processors may get
stale values.

The EFI stub disables the M and C bits when booted at EL2 with uefi - but
it leaves the instruction cache enabled. You only clean the
reboot_code_buffer from the data cache, so there may be stale values in the
instruction cache.

I think you need to disable the i-cache at EL1. If you jump to EL2, I think
you need to disable the I/C bits there too - as you can't rely on the code
in 'kvm: allows kvm cpu hotplug' to do this in a non-kvm case.


> +
> +	/* Copy page. */
> +1:	ldp	x22, x23, [x21]
> +	ldp	x24, x25, [x21, #16]
> +	ldp	x26, x27, [x21, #32]
> +	ldp	x28, x29, [x21, #48]
> +	add	x21, x21, #64
> +	stnp	x22, x23, [x20]
> +	stnp	x24, x25, [x20, #16]
> +	stnp	x26, x27, [x20, #32]
> +	stnp	x28, x29, [x20, #48]
> +	add	x20, x20, #64
> +	tst	x21, #(PAGE_SIZE - 1)
> +	b.ne	1b
> +
> +	/* dest += PAGE_SIZE */
> +	add	x14, x14, PAGE_SIZE
> +	b	.Lnext
> +
> +.Ltest_indirection:
> +	tbz	x18, IND_INDIRECTION_BIT, .Ltest_destination
> +
> +	/* ptr = addr */
> +	mov	x15, x13
> +	b	.Lnext
> +
> +.Ltest_destination:
> +	tbz	x18, IND_DESTINATION_BIT, .Lnext
> +
> +	mov	x16, x13
> +
> +	/* dest = addr */
> +	mov	x14, x13
> +
> +.Lnext:
> +	/* entry = *ptr++ */
> +	ldr	x18, [x15], #8
> +
> +	/* while (!(entry & DONE)) */
> +	tbz	x18, IND_DONE_BIT, .Lloop
> +
> +.Ldone:
> +	dsb	sy
> +	isb
> +	ic	ialluis
> +	dsb	sy

Why the second dsb?


> +	isb
> +
> +	/* Start new image. */
> +	ldr	x4, .Lkimage_start
> +	mov	x0, xzr
> +	mov	x1, xzr
> +	mov	x2, xzr
> +	mov	x3, xzr

Once the kexec'd kernel is booting, I get:
> WARNING: x1-x3 nonzero in violation of boot protocol:
>         x1: 0000000080008000
>         x2: 0000000000000020
>         x3: 0000000000000020
> This indicates a broken bootloader or old kernel

Presumably this 'kimage_start' isn't pointing to the new kernel, but the
purgatory code, (which comes from user-space?). (If so what are these xzr-s
for?)


> +	br	x4
> +
> +.align 3	/* To keep the 64-bit values below naturally aligned. */
> +
> +/* The machine_kexec routine sets these variables via offsets from
> + * arm64_relocate_new_kernel.
> + */
> +
> +/*
> + * .Lkimage_start - Copy of image->start, the entry point of the new
> + * image.
> + */
> +.Lkimage_start:
> +	.quad	0x0
> +
> +/*
> + * .Lkimage_head - Copy of image->head, the list of kimage entries.
> + */
> +.Lkimage_head:
> +	.quad	0x0
> +

I assume these .quad-s are used because you can't pass the values in via
registers - due to the complicated soft_restart(). Given you are the only
user, couldn't you simplify it to do all the disabling in
arm64_relocate_new_kernel?


> +.Lcopy_end:
> +.org	KEXEC_CONTROL_PAGE_SIZE
> +
> +/*
> + * arm64_relocate_new_kernel_size - Number of bytes to copy to the control_code_page.
> + */
> +.globl arm64_relocate_new_kernel_size
> +arm64_relocate_new_kernel_size:
> +	.quad	.Lcopy_end - arm64_relocate_new_kernel
> +
> +/*
> + * arm64_kexec_kimage_start_offset - Offset for writing .Lkimage_start.
> + */
> +.globl arm64_kexec_kimage_start_offset
> +arm64_kexec_kimage_start_offset:
> +	.quad	.Lkimage_start - arm64_relocate_new_kernel
> +
> +/*
> + * arm64_kexec_kimage_head_offset - Offset for writing .Lkimage_head.
> + */
> +.globl arm64_kexec_kimage_head_offset
> +arm64_kexec_kimage_head_offset:
> +	.quad	.Lkimage_head - arm64_relocate_new_kernel


From 'kexec -e' to the first messages from the new kernel takes ~1 minute
on Juno, Did you see a similar delay? Or should I go looking for what I've
configured wrong!?

(Copying code with the mmu+caches on, then cleaning to PoC was noticeably
faster for hibernate)


I've used this series for kexec-ing between 4K and 64K page_size kernels on
Juno.

Tested-By: James Morse <james.morse@arm.com>



Thanks!

James
Mark Rutland Oct. 30, 2015, 4:54 p.m. UTC | #5
Hi,

> If I've followed all this through properly:
> 
> With KVM - mmu+caches are configured, but then disabled by 'kvm: allows kvm
> cpu hotplug'. This 'arm64_relocate_new_kernel' function then runs at EL2
> with M=0, C=0, I=0.
> 
> Without KVM - when there is no user of EL2, the mmu+caches are left in
> whatever state the bootloader (or efi stub) left them in. From
> Documentation/arm64/booting.txt:
> > Instruction cache may be on or off.
> and
> > System caches which respect the architected cache maintenance by VA
> > operations must be configured and may be enabled.
> 
> So 'arm64_relocate_new_kernel' function could run at EL2 with M=0, C=?, I=?.
> 
> I think this means you can't guarantee anything you are copying below
> actually makes it through the caches - booting secondary processors may get
> stale values.
> 
> The EFI stub disables the M and C bits when booted at EL2 with uefi - but
> it leaves the instruction cache enabled. You only clean the
> reboot_code_buffer from the data cache, so there may be stale values in the
> instruction cache.
> 
> I think you need to disable the i-cache at EL1. If you jump to EL2, I think
> you need to disable the I/C bits there too - as you can't rely on the code
> in 'kvm: allows kvm cpu hotplug' to do this in a non-kvm case.

The SCTLR_ELx.I only affects the attributes that the I-cache uses to
fetch with, not whether it is enabled (it cannot be disabled
architecturally).

It's not necessary to clear the I bit so long as the appropriate
maintenance has occurred, though I believe that when the I bit is set
instruction fetches may allocte in unified levels of cache, so
additional consideration is required for that case.

> > +	/* Copy page. */
> > +1:	ldp	x22, x23, [x21]
> > +	ldp	x24, x25, [x21, #16]
> > +	ldp	x26, x27, [x21, #32]
> > +	ldp	x28, x29, [x21, #48]
> > +	add	x21, x21, #64
> > +	stnp	x22, x23, [x20]
> > +	stnp	x24, x25, [x20, #16]
> > +	stnp	x26, x27, [x20, #32]
> > +	stnp	x28, x29, [x20, #48]
> > +	add	x20, x20, #64
> > +	tst	x21, #(PAGE_SIZE - 1)
> > +	b.ne	1b
> > +
> > +	/* dest += PAGE_SIZE */
> > +	add	x14, x14, PAGE_SIZE
> > +	b	.Lnext
> > +
> > +.Ltest_indirection:
> > +	tbz	x18, IND_INDIRECTION_BIT, .Ltest_destination
> > +
> > +	/* ptr = addr */
> > +	mov	x15, x13
> > +	b	.Lnext
> > +
> > +.Ltest_destination:
> > +	tbz	x18, IND_DESTINATION_BIT, .Lnext
> > +
> > +	mov	x16, x13
> > +
> > +	/* dest = addr */
> > +	mov	x14, x13
> > +
> > +.Lnext:
> > +	/* entry = *ptr++ */
> > +	ldr	x18, [x15], #8
> > +
> > +	/* while (!(entry & DONE)) */
> > +	tbz	x18, IND_DONE_BIT, .Lloop
> > +
> > +.Ldone:
> > +	dsb	sy
> > +	isb
> > +	ic	ialluis
> > +	dsb	sy
> 
> Why the second dsb?
> 
> 
> > +	isb

The first DSB ensures that the copied data is observable by the
I-caches.

The first ISB is unnecessary.

The second DSB ensures that the I-cache maintenance is completed.

The second ISB ensures that the I-cache maintenance is complete w.r.t.
the current instruction stream. There could be instructions in the
pipline fetched from the I-cache prior to invalidation which need to be
cleared.

Thanks,
Mark.
Pratyush Anand Nov. 2, 2015, 9:26 a.m. UTC | #6
Hi James,

On 30/10/2015:04:29:01 PM, James Morse wrote:
> 
> >From 'kexec -e' to the first messages from the new kernel takes ~1 minute
> on Juno, Did you see a similar delay? Or should I go looking for what I've
> configured wrong!?

I did had similar issues with mustang, where it was taking more than 2 min.

Can you please try with my kexec-tools repo [1] where I have patches to enable
D-cache for sha verification. Your feedback might help to upstream these patches.

Thanks
~Pratyush

[1] https://github.com/pratyushanand/kexec-tools.git : master
Geoff Levand Nov. 3, 2015, 12:30 a.m. UTC | #7
Hi James,

On Fri, 2015-10-30 at 16:29 +0000, James Morse wrote:
> On 20/10/15 00:38, Geoff Levand wrote:
> > +config KEXEC
> > +> > 	> > depends on (!SMP || PM_SLEEP_SMP)
> 
> Commit 4b3dc9679cf7 got rid of '!SMP'.

Fixed for v11.

> > - * Copyright (C) 2015 Huawei Futurewei Technologies.
> > + * Copyright (C) Huawei Futurewei Technologies.
> 
> Move this hunk into the patch that adds the file?

Was fixed in v10.2.
 

> > +++ b/arch/arm64/kernel/relocate_kernel.S

> If I've followed all this through properly:
> 
> With KVM - mmu+caches are configured, but then disabled by 'kvm: allows kvm
> cpu hotplug'. This 'arm64_relocate_new_kernel' function then runs at EL2
> with M=0, C=0, I=0.
> 
> Without KVM - when there is no user of EL2, the mmu+caches are left in
> whatever state the bootloader (or efi stub) left them in. From
> Documentation/arm64/booting.txt:
> > Instruction cache may be on or off.
> and
> > System caches which respect the architected cache maintenance by VA
> > operations must be configured and may be enabled.
> 
> So 'arm64_relocate_new_kernel' function could run at EL2 with M=0, C=?, I=?.
> 
> I think this means you can't guarantee anything you are copying below
> actually makes it through the caches - booting secondary processors may get
> stale values.
> 
> The EFI stub disables the M and C bits when booted at EL2 with uefi - but
> it leaves the instruction cache enabled. You only clean the
> reboot_code_buffer from the data cache, so there may be stale values in the
> instruction cache.
> 
> I think you need to disable the i-cache at EL1. If you jump to EL2, I think
> you need to disable the I/C bits there too - as you can't rely on the code
> in 'kvm: allows kvm cpu hotplug' to do this in a non-kvm case.

For consistency across all code paths, we could put in something like this:

+       /* Clear SCTLR_ELx_FLAGS. */
+       mrs     x0, CurrentEL
+       cmp     x0, #CurrentEL_EL2
+       b.ne    1f
+       mrs     x0, sctlr_el2
+       ldr     x1, =SCTLR_EL2_FLAGS
+       bic     x0, x0, x1
+       msr     sctlr_el2, x0
+       isb
+       b       2f
+1:     mrs     x0, sctlr_el1
+       ldr     x1, =SCTLR_EL2_FLAGS
+       bic     x0, x0, x1
+       msr     sctlr_el1, x0
+       isb



> > +.Ldone:
> > +> > 	> > dsb> > 	> > sy
> > +> > 	> > isb
> > +> > 	> > ic> > 	> > ialluis
> > +> > 	> > dsb> > 	> > sy
> 
> Why the second dsb?

I removed the first isb as Mark suggested.


> 
> > +> > 	> > isb
> > +
> > +> > 	> > /* Start new image. */
> > +> > 	> > ldr> > 	> > x4, .Lkimage_start
> > +> > 	> > mov> > 	> > x0, xzr
> > +> > 	> > mov> > 	> > x1, xzr
> > +> > 	> > mov> > 	> > x2, xzr
> > +> > 	> > mov> > 	> > x3, xzr
> 
> Once the kexec'd kernel is booting, I get:
> > WARNING: x1-x3 nonzero in violation of boot protocol:
> >         x1: 0000000080008000
> >         x2: 0000000000000020
> >         x3: 0000000000000020
> > This indicates a broken bootloader or old kernel
> 
> Presumably this 'kimage_start' isn't pointing to the new kernel, but the
> purgatory code, (which comes from user-space?). (If so what are these xzr-s
> for?)

The warning was from the arm64 purgatory in kexec-tools, now fixed.

We don't need to zero the registers anymore.   At one time I had
an option where the kernel found the dtb section and jumped
directly to the new image as the 32 bit arm kernel does.

> +/* The machine_kexec routine sets these variables via offsets from
> > + * arm64_relocate_new_kernel.
> > + */
> > +
> > +/*
> > + * .Lkimage_start - Copy of image->start, the entry point of the new
> > + * image.
> > + */
> > +.Lkimage_start:
> > +> > 	> > .quad> > 	> > 0x0
> > +
> > +/*
> > + * .Lkimage_head - Copy of image->head, the list of kimage entries.
> > + */
> > +.Lkimage_head:
> > +> > 	> > .quad> > 	> > 0x0
> > +
> 
> I assume these .quad-s are used because you can't pass the values in via
> registers - due to the complicated soft_restart(). Given you are the only
> user, couldn't you simplify it to do all the disabling in
> arm64_relocate_new_kernel?

I moved some things from cpu_reset to arm64_relocate_new_kernel, but
from what Takahiro has said, to support a modular kvm some of the CPU
shutdown code will be shared.  Maybe we can look into simplifying things
once work on modular kvm is started.


> 
> From 'kexec -e' to the first messages from the new kernel takes ~1 minute
> on Juno, Did you see a similar delay? Or should I go looking for what I've
> configured wrong!?

As Pratyush has mentioned this is most likely due to the dcaches
being disabled.

> (Copying code with the mmu+caches on, then cleaning to PoC was noticeably
> faster for hibernate)
> 
> 
> I've used this series for kexec-ing between 4K and 64K page_size kernels on
> Juno.

Thanks for testing.

-Geoff
diff mbox

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 07d1811..73e8e31 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -491,6 +491,16 @@  config SECCOMP
 	  and the task is only allowed to execute a few safe syscalls
 	  defined by each seccomp mode.
 
+config KEXEC
+	depends on (!SMP || PM_SLEEP_SMP)
+	select KEXEC_CORE
+	bool "kexec system call"
+	---help---
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
 config XEN_DOM0
 	def_bool y
 	depends on XEN
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
new file mode 100644
index 0000000..46d63cd
--- /dev/null
+++ b/arch/arm64/include/asm/kexec.h
@@ -0,0 +1,48 @@ 
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#if !defined(_ARM64_KEXEC_H)
+#define _ARM64_KEXEC_H
+
+/* Maximum physical address we can use pages from */
+
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control code buffer */
+
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+#define KEXEC_CONTROL_PAGE_SIZE	4096
+
+#define KEXEC_ARCH KEXEC_ARCH_ARM64
+
+#if !defined(__ASSEMBLY__)
+
+/**
+ * crash_setup_regs() - save registers for the panic kernel
+ *
+ * @newregs: registers are saved here
+ * @oldregs: registers to be saved (may be %NULL)
+ */
+
+static inline void crash_setup_regs(struct pt_regs *newregs,
+				    struct pt_regs *oldregs)
+{
+	/* Empty routine needed to avoid build errors. */
+}
+
+#endif /* !defined(__ASSEMBLY__) */
+
+#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 22dc9bc..989ccd7 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -36,6 +36,8 @@  arm64-obj-$(CONFIG_EFI)			+= efi.o efi-stub.o efi-entry.o
 arm64-obj-$(CONFIG_PCI)			+= pci.o
 arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
 arm64-obj-$(CONFIG_ACPI)		+= acpi.o
+arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o	\
+					   cpu-reset.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
index ffc9e385e..7cc7f56 100644
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -3,7 +3,7 @@ 
  *
  * Copyright (C) 2001 Deep Blue Solutions Ltd.
  * Copyright (C) 2012 ARM Ltd.
- * Copyright (C) 2015 Huawei Futurewei Technologies.
+ * Copyright (C) Huawei Futurewei Technologies.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
new file mode 100644
index 0000000..1fae6ae
--- /dev/null
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -0,0 +1,141 @@ 
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/system_misc.h>
+
+#include "cpu-reset.h"
+
+/* Global variables for the relocate_kernel routine. */
+extern const unsigned char arm64_relocate_new_kernel[];
+extern const unsigned long arm64_relocate_new_kernel_size;
+extern unsigned long arm64_kexec_kimage_head_offset;
+extern unsigned long arm64_kexec_kimage_start_offset;
+
+static unsigned long kimage_head;
+static unsigned long kimage_start;
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+	/* Empty routine needed to avoid build errors. */
+}
+
+/**
+ * machine_kexec_prepare - Prepare for a kexec reboot.
+ *
+ * Called from the core kexec code when a kernel image is loaded.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+	kimage_start = image->start;
+	return 0;
+}
+
+/**
+ * kexec_list_flush - Helper to flush the kimage list to PoC.
+ */
+static void kexec_list_flush(unsigned long kimage_head)
+{
+	unsigned long *entry;
+
+	for (entry = &kimage_head; ; entry++) {
+		unsigned int flag = *entry & IND_FLAGS;
+		void *addr = phys_to_virt(*entry & PAGE_MASK);
+
+		switch (flag) {
+		case IND_INDIRECTION:
+			entry = (unsigned long *)addr - 1;
+			__flush_dcache_area(addr, PAGE_SIZE);
+			break;
+		case IND_DESTINATION:
+			break;
+		case IND_SOURCE:
+			__flush_dcache_area(addr, PAGE_SIZE);
+			break;
+		case IND_DONE:
+			return;
+		default:
+			BUG();
+		}
+	}
+}
+
+static void soft_restart(unsigned long addr)
+{
+	setup_mm_for_reboot();
+	cpu_soft_restart(virt_to_phys(cpu_reset), addr,
+		is_hyp_mode_available());
+
+	BUG(); /* Should never get here. */
+}
+
+/**
+ * machine_kexec - Do the kexec reboot.
+ *
+ * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
+ */
+void machine_kexec(struct kimage *image)
+{
+	phys_addr_t reboot_code_buffer_phys;
+	void *reboot_code_buffer;
+
+	BUG_ON(num_online_cpus() > 1);
+
+	kimage_head = image->head;
+
+	reboot_code_buffer_phys = page_to_phys(image->control_code_page);
+	reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
+
+	/*
+	 * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
+	 * after the kernel is shut down.
+	 */
+	memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
+		arm64_relocate_new_kernel_size);
+
+	/* Set the variables in reboot_code_buffer. */
+
+	memcpy(reboot_code_buffer + arm64_kexec_kimage_start_offset,
+	       &kimage_start, sizeof(kimage_start));
+	memcpy(reboot_code_buffer + arm64_kexec_kimage_head_offset,
+	       &kimage_head, sizeof(kimage_head));
+
+	/* Flush the reboot_code_buffer in preparation for its execution. */
+	__flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
+
+	/* Flush the kimage list. */
+	kexec_list_flush(image->head);
+
+	pr_info("Bye!\n");
+
+	/* Disable all DAIF exceptions. */
+	asm volatile ("msr daifset, #0xf" : : : "memory");
+
+	/*
+	 * soft_restart() will shutdown the MMU, disable data caches, then
+	 * transfer control to the reboot_code_buffer which contains a copy of
+	 * the arm64_relocate_new_kernel routine.  arm64_relocate_new_kernel
+	 * will use physical addressing to relocate the new kernel to its final
+	 * position and then will transfer control to the entry point of the new
+	 * kernel.
+	 */
+	soft_restart(reboot_code_buffer_phys);
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	/* Empty routine needed to avoid build errors. */
+}
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
new file mode 100644
index 0000000..7b07a16
--- /dev/null
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -0,0 +1,163 @@ 
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kexec.h>
+
+#include <asm/assembler.h>
+#include <asm/kexec.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+
+
+/*
+ * arm64_relocate_new_kernel - Put a 2nd stage kernel image in place and boot it.
+ *
+ * The memory that the old kernel occupies may be overwritten when coping the
+ * new image to its final location.  To assure that the
+ * arm64_relocate_new_kernel routine which does that copy is not overwritten,
+ * all code and data needed by arm64_relocate_new_kernel must be between the
+ * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end.  The
+ * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
+ * control_code_page, a special page which has been set up to be preserved
+ * during the copy operation.
+ */
+.globl arm64_relocate_new_kernel
+arm64_relocate_new_kernel:
+
+	/* Setup the list loop variables. */
+	ldr	x18, .Lkimage_head		/* x18 = list entry */
+	dcache_line_size x17, x0		/* x17 = dcache line size */
+	mov	x16, xzr			/* x16 = segment start */
+	mov	x15, xzr			/* x15 = entry ptr */
+	mov	x14, xzr			/* x14 = copy dest */
+
+	/* Check if the new image needs relocation. */
+	cbz	x18, .Ldone
+	tbnz	x18, IND_DONE_BIT, .Ldone
+
+.Lloop:
+	and	x13, x18, PAGE_MASK		/* x13 = addr */
+
+	/* Test the entry flags. */
+.Ltest_source:
+	tbz	x18, IND_SOURCE_BIT, .Ltest_indirection
+
+	mov x20, x14				/*  x20 = copy dest */
+	mov x21, x13				/*  x21 = copy src */
+
+	/* Invalidate dest page to PoC. */
+	mov	x0, x20
+	add	x19, x0, #PAGE_SIZE
+	sub	x1, x17, #1
+	bic	x0, x0, x1
+1:	dc	ivac, x0
+	add	x0, x0, x17
+	cmp	x0, x19
+	b.lo	1b
+	dsb	sy
+
+	/* Copy page. */
+1:	ldp	x22, x23, [x21]
+	ldp	x24, x25, [x21, #16]
+	ldp	x26, x27, [x21, #32]
+	ldp	x28, x29, [x21, #48]
+	add	x21, x21, #64
+	stnp	x22, x23, [x20]
+	stnp	x24, x25, [x20, #16]
+	stnp	x26, x27, [x20, #32]
+	stnp	x28, x29, [x20, #48]
+	add	x20, x20, #64
+	tst	x21, #(PAGE_SIZE - 1)
+	b.ne	1b
+
+	/* dest += PAGE_SIZE */
+	add	x14, x14, PAGE_SIZE
+	b	.Lnext
+
+.Ltest_indirection:
+	tbz	x18, IND_INDIRECTION_BIT, .Ltest_destination
+
+	/* ptr = addr */
+	mov	x15, x13
+	b	.Lnext
+
+.Ltest_destination:
+	tbz	x18, IND_DESTINATION_BIT, .Lnext
+
+	mov	x16, x13
+
+	/* dest = addr */
+	mov	x14, x13
+
+.Lnext:
+	/* entry = *ptr++ */
+	ldr	x18, [x15], #8
+
+	/* while (!(entry & DONE)) */
+	tbz	x18, IND_DONE_BIT, .Lloop
+
+.Ldone:
+	dsb	sy
+	isb
+	ic	ialluis
+	dsb	sy
+	isb
+
+	/* Start new image. */
+	ldr	x4, .Lkimage_start
+	mov	x0, xzr
+	mov	x1, xzr
+	mov	x2, xzr
+	mov	x3, xzr
+	br	x4
+
+.align 3	/* To keep the 64-bit values below naturally aligned. */
+
+/* The machine_kexec routine sets these variables via offsets from
+ * arm64_relocate_new_kernel.
+ */
+
+/*
+ * .Lkimage_start - Copy of image->start, the entry point of the new
+ * image.
+ */
+.Lkimage_start:
+	.quad	0x0
+
+/*
+ * .Lkimage_head - Copy of image->head, the list of kimage entries.
+ */
+.Lkimage_head:
+	.quad	0x0
+
+.Lcopy_end:
+.org	KEXEC_CONTROL_PAGE_SIZE
+
+/*
+ * arm64_relocate_new_kernel_size - Number of bytes to copy to the control_code_page.
+ */
+.globl arm64_relocate_new_kernel_size
+arm64_relocate_new_kernel_size:
+	.quad	.Lcopy_end - arm64_relocate_new_kernel
+
+/*
+ * arm64_kexec_kimage_start_offset - Offset for writing .Lkimage_start.
+ */
+.globl arm64_kexec_kimage_start_offset
+arm64_kexec_kimage_start_offset:
+	.quad	.Lkimage_start - arm64_relocate_new_kernel
+
+/*
+ * arm64_kexec_kimage_head_offset - Offset for writing .Lkimage_head.
+ */
+.globl arm64_kexec_kimage_head_offset
+arm64_kexec_kimage_head_offset:
+	.quad	.Lkimage_head - arm64_relocate_new_kernel
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 99048e5..ccec467 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -39,6 +39,7 @@ 
 #define KEXEC_ARCH_SH      (42 << 16)
 #define KEXEC_ARCH_MIPS_LE (10 << 16)
 #define KEXEC_ARCH_MIPS    ( 8 << 16)
+#define KEXEC_ARCH_ARM64   (183 << 16)
 
 /* The artificial cap on the number of segments passed to kexec_load. */
 #define KEXEC_SEGMENT_MAX 16