new file mode 100644
@@ -0,0 +1,109 @@
+/******************************************************************************
+ * kexec.c
+ *
+ * Support of kexec (reboot locally into new mini-os kernel).
+ *
+ * Copyright (c) 2024, Juergen Gross, SUSE Linux GmbH
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef CONFIG_KEXEC
+
+#include <mini-os/os.h>
+#include <mini-os/lib.h>
+#include <mini-os/kexec.h>
+
+/*
+ * Final stage of kexec. Copies all data to the final destinations, zeroes
+ * .bss and activates new kernel.
+ * Must be called with interrupts off. Stack, code and data must be
+ * accessible via identity mapped virtual addresses (virt == phys). Copying
+ * and zeroing is done using virtual addresses.
+ * No relocations inside the function are allowed, as it is copied to an
+ * allocated page before being executed.
+ */
+static void __attribute__((__section__(".text.kexec")))
+ kexec_final(struct kexec_action *actions, unsigned long real)
+{
+ char *src, *dest;
+ unsigned int a, cnt;
+
+ for ( a = 0; ; a++ )
+ {
+ switch ( actions[a].action )
+ {
+ case KEXEC_COPY:
+ dest = actions[a].dest;
+ src = actions[a].src;
+ for ( cnt = 0; cnt < actions[a].len; cnt++ )
+ *dest++ = *src++;
+ break;
+
+ case KEXEC_ZERO:
+ dest = actions[a].dest;
+ for ( cnt = 0; cnt < actions[a].len; cnt++ )
+ *dest++ = 0;
+ break;
+
+ case KEXEC_CALL:
+ asm("movl %0, %%ebx\n\t"
+ "movl %1, %%edi\n\t"
+ "jmp *%2"
+ :"=m" (actions[a].src), "=m" (actions[a].dest)
+ :"m" (real));
+ break;
+ }
+ }
+}
+
+#define KEXEC_STACK_LONGS 8
+static unsigned long __attribute__((__section__(".data.kexec")))
+ kexec_stack[KEXEC_STACK_LONGS];
+
+static unsigned long get_kexec_addr(void *kexec_page, void *addr)
+{
+ unsigned long off = (unsigned long)addr - (unsigned long)_kexec_start;
+
+ return (unsigned long)kexec_page + off;
+}
+
+void do_kexec(void *kexec_page)
+{
+ unsigned long actions;
+ unsigned long stack;
+ unsigned long final;
+ unsigned long phys;
+
+ actions = get_kexec_addr(kexec_page, kexec_actions);
+ stack = get_kexec_addr(kexec_page, kexec_stack + KEXEC_STACK_LONGS);
+ final = get_kexec_addr(kexec_page, kexec_final);
+ phys = get_kexec_addr(kexec_page, kexec_phys);
+
+ memcpy(kexec_page, _kexec_start, KEXEC_SECSIZE);
+ asm("cli\n\t"
+ "mov %0, %%"ASM_SP"\n\t"
+ "mov %1, %%"ASM_ARG1"\n\t"
+ "mov %2, %%"ASM_ARG2"\n\t"
+ "jmp *%3"
+ :"=m" (stack), "=m" (actions), "=m" (phys)
+ :"m" (final));
+}
+
+#endif /* CONFIG_KEXEC */
@@ -87,6 +87,14 @@ SECTIONS
_edata = .; /* End of data section */
+ . = ALIGN(8);
+ _kexec_start = .; /* Kexec relocatable code/data */
+ .kexec : {
+ *(.text.kexec)
+ *(.data.kexec)
+ }
+ _kexec_end = .;
+
__bss_start = .; /* BSS */
.bss : {
*(.bss)
@@ -85,4 +85,50 @@ page_table_l2:
#endif
.align __PAGE_SIZE, 0
+#ifdef CONFIG_KEXEC
+.section .text.kexec, "ax", @progbits
+
+/*
+ * Switch off paging and call new OS for kexec.
+ * %ebx holds the physical address of the start_info structure
+ * %edi holds the physical address of the entry point to call
+ */
+.globl kexec_phys
+kexec_phys:
+ /* Set DS, ES, SS to 0...ffffffff. */
+ mov $(GDTE_DS32_DPL0 * 8), %eax
+ mov %eax, %ds
+ mov %eax, %es
+ mov %eax, %ss
+
+#ifdef __x86_64__
+ /* Switch to 32-bit mode. */
+ pushq $(GDTE_CS32_DPL0 * 8)
+ lea cs32_switch(%rip),%edx
+ push %rdx
+ lretq
+
+ .code32
+cs32_switch:
+#endif
+ /* Set %cr0 and %cr4 (disables paging). */
+ mov $X86_CR0_PE, %eax
+ mov %eax, %cr0
+ mov $0, %eax
+ mov %eax, %cr4
+#ifdef __x86_64__
+ /* Disable 64-bit mode. */
+ mov $MSR_EFER, %ecx
+ rdmsr
+ btr $_EFER_LME, %eax
+ wrmsr
+#endif
+
+ jmp *%edi
+
+#ifdef __x86_64__
+ .code64
+#endif
+#endif /* CONFIG_KEXEC */
+
.text
@@ -1,7 +1,34 @@
#ifndef _KEXEC_H
#define _KEXEC_H
+/* One element of kexec actions (last element must have action KEXEC_CALL): */
+struct kexec_action {
+ enum {
+ KEXEC_COPY, /* Copy len bytes from src to dest. */
+ KEXEC_ZERO, /* Zero len bytes at dest. */
+ KEXEC_CALL /* Call dest with paging turned off, param is src. */
+ } action;
+ unsigned int len;
+ void *dest;
+ void *src;
+};
+
+#define KEXEC_MAX_ACTIONS 16
+
+extern char _kexec_start[], _kexec_end[];
+extern struct kexec_action kexec_actions[KEXEC_MAX_ACTIONS];
+
+int kexec_add_action(int action, void *dest, void *src, unsigned int len);
+
+#define KEXEC_SECSIZE ((unsigned long)_kexec_end - (unsigned long)_kexec_start)
+
int kexec(void *kernel, unsigned long kernel_size,
const char *cmdline);
+/* Initiate final kexec stage. */
+void do_kexec(void *kexec_page);
+
+/* Assembler code for switching off paging and passing execution to new OS. */
+void kexec_phys(void);
+
#endif /* _KEXEC_H */
@@ -27,6 +27,7 @@
#define MSR_EFER 0xc0000080
#define _EFER_LME 8 /* Long mode enable */
+#define X86_CR0_PE 0x00000001 /* Protected mode enable */
#define X86_CR0_WP 0x00010000 /* Write protect */
#define X86_CR0_PG 0x80000000 /* Paging */
#define X86_CR4_PAE 0x00000020 /* enable physical address extensions */
@@ -64,9 +65,13 @@
#if defined(__i386__)
#define __SZ "l"
#define __REG "e"
+#define ASM_ARG1 "eax"
+#define ASM_ARG2 "edx"
#else
#define __SZ "q"
#define __REG "r"
+#define ASM_ARG1 "rdi"
+#define ASM_ARG2 "rsi"
#endif
#define ASM_SP __REG"sp"
@@ -60,3 +60,25 @@ int kexec(void *kernel, unsigned long kernel_size,
return ENOSYS;
}
EXPORT_SYMBOL(kexec);
+
+struct kexec_action __attribute__((__section__(".data.kexec")))
+ kexec_actions[KEXEC_MAX_ACTIONS];
+static unsigned int act_idx;
+
+int kexec_add_action(int action, void *dest, void *src, unsigned int len)
+{
+ struct kexec_action *act;
+
+ if ( act_idx == KEXEC_MAX_ACTIONS )
+ return -ENOSPC;
+
+ act = kexec_actions + act_idx;
+ act_idx++;
+
+ act->action = action;
+ act->len = len;
+ act->dest = dest;
+ act->src = src;
+
+ return 0;
+}
Add the code and data definitions of the final kexec stage. Put the code and related data into a dedicated section in order to be able to copy it to another location. For this reason there must be no absolute relocations being used in the code or data. Being functionally related, add a function for adding a final kexec action. Signed-off-by: Juergen Gross <jgross@suse.com> --- arch/x86/kexec.c | 109 ++++++++++++++++++++++++++++++++++++++ arch/x86/minios-x86.lds.S | 8 +++ arch/x86/x86_hvm.S | 46 ++++++++++++++++ include/kexec.h | 27 ++++++++++ include/x86/os.h | 5 ++ kexec.c | 22 ++++++++ 6 files changed, 217 insertions(+) create mode 100644 arch/x86/kexec.c