@@ -344,6 +344,16 @@ config KEXEC
The name comes from the similarity to the exec system call.
+config CRASH_DUMP
+ bool "Build kdump crash kernel"
+ help
+ Generate crash dump after being started by kexec. This should
+ be normally only set in special crash dump kernels which are
+ loaded in the main kernel with kexec-tools into a specially
+ reserved region and then later executed after a crash by
+ kdump/kexec.
+
+ For more details see Documentation/admin-guide/kdump/kdump.rst
endmenu
@@ -51,6 +51,6 @@ ifeq ($(CONFIG_RISCV_SBI), y)
obj-$(CONFIG_SMP) += cpu_ops_sbi.o
endif
obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o
-obj-${CONFIG_KEXEC} += kexec_relocate.o crash_save_regs.o machine_kexec.o
-
+obj-$(CONFIG_KEXEC) += kexec_relocate.o crash_save_regs.o machine_kexec.o
+obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
clean:
new file mode 100644
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This code is taken from arch/arm64/kernel/crash_dump.c
+ * Created by: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ * Copyright (C) 2017 Linaro Limited
+ */
+
+#include <linux/crash_dump.h>
+#include <linux/io.h>
+
+/**
+ * copy_oldmem_page() - copy one page from old kernel memory
+ * @pfn: page frame number to be copied
+ * @buf: buffer where the copied page is placed
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page
+ * @userbuf: if set, @buf is in a user address space
+ *
+ * This function copies one page from old kernel memory into buffer pointed by
+ * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
+ * copied or negative error in case of failure.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+ size_t csize, unsigned long offset,
+ int userbuf)
+{
+ void *vaddr;
+
+ if (!csize)
+ return 0;
+
+ vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB);
+ if (!vaddr)
+ return -ENOMEM;
+
+ if (userbuf) {
+ if (copy_to_user((char __user *)buf, vaddr + offset, csize)) {
+ memunmap(vaddr);
+ return -EFAULT;
+ }
+ } else
+ memcpy(buf, vaddr + offset, csize);
+
+ memunmap(vaddr);
+ return csize;
+}
@@ -64,6 +64,9 @@ static struct resource code_res = { .name = "Kernel code", };
static struct resource data_res = { .name = "Kernel data", };
static struct resource rodata_res = { .name = "Kernel rodata", };
static struct resource bss_res = { .name = "Kernel bss", };
+#ifdef CONFIG_CRASH_DUMP
+static struct resource elfcorehdr_res = { .name = "ELF Core hdr", };
+#endif
static void __init kdump_resource_init(void)
{
@@ -97,6 +100,16 @@ static void __init kdump_resource_init(void)
if (crashk_res.end > crashk_res.start)
insert_resource(&sysram_res, &crashk_res);
#endif
+
+#ifdef CONFIG_CRASH_DUMP
+ if (elfcorehdr_size) {
+ elfcorehdr_res.start = elfcorehdr_addr;
+ elfcorehdr_res.end = elfcorehdr_addr + elfcorehdr_size;
+ elfcorehdr_res.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
+ insert_resource(&sysram_res, &elfcorehdr_res);
+ }
+#endif
+
}
void __init parse_dtb(void)
@@ -123,6 +123,80 @@ static void __init setup_initrd(void)
}
#endif /* CONFIG_BLK_DEV_INITRD */
+#ifdef CONFIG_CRASH_DUMP
+/*
+ * These come from arm64
+ */
+static int __init early_init_dt_scan_elfcorehdr(unsigned long node,
+ const char *uname, int depth, void *data)
+{
+ struct memblock_region *elfcorehdr_mem = data;
+ const __be32 *reg;
+ int len;
+
+ if (depth != 1 || strcmp(uname, "chosen") != 0)
+ return 0;
+
+ reg = of_get_flat_dt_prop(node, "linux,elfcorehdr", &len);
+ if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
+ return 1;
+
+ elfcorehdr_mem->base = dt_mem_next_cell(dt_root_addr_cells, ®);
+ elfcorehdr_mem->size = dt_mem_next_cell(dt_root_size_cells, ®);
+
+ return 1;
+}
+
+static void __init reserve_elfcorehdr(void)
+{
+ struct memblock_region elfcorehdr_region = {0};
+
+ of_scan_flat_dt(early_init_dt_scan_elfcorehdr, &elfcorehdr_region);
+
+ if (elfcorehdr_region.size) {
+ memblock_reserve(elfcorehdr_region.base,
+ elfcorehdr_region.size);
+
+ pr_info("Reserving %lldKB of memory at 0x%llx for elfcorehdr\n",
+ elfcorehdr_region.size >> 10, elfcorehdr_region.base);
+
+ elfcorehdr_addr = elfcorehdr_region.base;
+ elfcorehdr_size = elfcorehdr_region.size;
+ }
+}
+
+static int __init early_init_dt_scan_usablemem(unsigned long node,
+ const char *uname, int depth, void *data)
+{
+ struct memblock_region *usable_mem = data;
+ const __be32 *reg;
+ int len;
+
+ if (depth != 1 || strcmp(uname, "chosen") != 0)
+ return 0;
+
+ reg = of_get_flat_dt_prop(node, "linux,usable-memory-range", &len);
+ if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
+ return 1;
+
+ usable_mem->base = dt_mem_next_cell(dt_root_addr_cells, ®);
+ usable_mem->size = dt_mem_next_cell(dt_root_size_cells, ®);
+
+ return 1;
+}
+
+static void __init fdt_enforce_memory_region(void)
+{
+ struct memblock_region usablemem_region = {0};
+
+ of_scan_flat_dt(early_init_dt_scan_usablemem, &usablemem_region);
+
+ if (usablemem_region.size)
+ memblock_cap_memory_range(usablemem_region.base,
+ usablemem_region.size);
+}
+#endif
+
static phys_addr_t dtb_early_pa __initdata;
void __init setup_bootmem(void)
@@ -132,6 +206,9 @@ void __init setup_bootmem(void)
phys_addr_t vmlinux_end = __pa_symbol(&_end);
phys_addr_t vmlinux_start = __pa_symbol(&_start);
+#ifdef CONFIG_CRASH_DUMP
+ fdt_enforce_memory_region();
+#endif
/* Find the memory region containing the kernel */
for_each_memblock(memory, reg) {
phys_addr_t end = reg->base + reg->size;
@@ -166,6 +243,13 @@ void __init setup_bootmem(void)
*/
memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
+ /*
+ * We need to reserve elfcorehdr here so that it doesn't
+ * get overwritten later on.
+ */
+#ifdef CONFIG_CRASH_DUMP
+ reserve_elfcorehdr();
+#endif
early_init_fdt_scan_reserved_mem();
memblock_allow_resize();
memblock_dump_all();
@@ -559,9 +643,20 @@ static void __init reserve_crashkernel(void)
unsigned long start_pfn = find_min_pfn_with_active_regions();
unsigned long search_start = start_pfn << PAGE_SHIFT;
unsigned long search_end = (unsigned long) PFN_PHYS(max_low_pfn) - 1;
-
int ret = 0;
+ /*
+ * Don't reserve a region for a crash kernel on a crash kernel
+ * since it doesn't make much sense and we have limited memory
+ * resources.
+ */
+#ifdef CONFIG_CRASH_DUMP
+ if (is_kdump_kernel()) {
+ pr_info("crashkernel: ignore reservation request\n");
+ return;
+ }
+#endif
+
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
&crash_size, &crash_base);
if (ret || !crash_size)
This patch allows Linux to act as a crash kernel for use with kdump. Userspace will let the crash kernel know about the memory region it can use through linux,usable-memory-range property, and about the memory region where the elf core header of the previous kernel is saved, through the linux,elfcorehdr property (both on /chosen). These dtb bindings are also used on arm64. I tested this on riscv64 qemu and it works as expected, you may test it by retrieving the dmesg of the previous kernel through /proc/vmcore, using the vmcore-dmesg utility from kexec-tools. Signed-off-by: Nick Kossifidis <mick@ics.forth.gr> --- arch/riscv/Kconfig | 10 ++++ arch/riscv/kernel/Makefile | 4 +- arch/riscv/kernel/crash_dump.c | 46 ++++++++++++++++ arch/riscv/kernel/setup.c | 13 +++++ arch/riscv/mm/init.c | 97 +++++++++++++++++++++++++++++++++- 5 files changed, 167 insertions(+), 3 deletions(-) create mode 100644 arch/riscv/kernel/crash_dump.c