diff mbox series

[3/3] RISC-V: Add crash kernel support

Message ID 20200424171214.1515457-4-mick@ics.forth.gr (mailing list archive)
State New, archived
Headers show
Series RISC-V: Add kexec/kdump support | expand

Commit Message

Nick Kossifidis April 24, 2020, 5:12 p.m. UTC
This patch allows Linux to act as a crash kernel for use with
kdump. Userspace will let the crash kernel know about the
memory region it can use through linux,usable-memory-range
property, and about the memory region where the elf core
header of the previous kernel is saved, through the
linux,elfcorehdr property (both on /chosen). These dtb bindings
are also used on arm64.

I tested this on riscv64 qemu and it works as expected, you
may test it by retrieving the dmesg of the previous kernel
through /proc/vmcore, using the vmcore-dmesg utility from
kexec-tools.

Signed-off-by: Nick Kossifidis <mick@ics.forth.gr>
---
 arch/riscv/Kconfig             | 10 ++++
 arch/riscv/kernel/Makefile     |  4 +-
 arch/riscv/kernel/crash_dump.c | 46 ++++++++++++++++
 arch/riscv/kernel/setup.c      | 13 +++++
 arch/riscv/mm/init.c           | 97 +++++++++++++++++++++++++++++++++-
 5 files changed, 167 insertions(+), 3 deletions(-)
 create mode 100644 arch/riscv/kernel/crash_dump.c
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 4934b2f62..3802c8888 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -344,6 +344,16 @@  config KEXEC
 
 	  The name comes from the similarity to the exec system call.
 
+config CRASH_DUMP
+	bool "Build kdump crash kernel"
+	help
+	  Generate crash dump after being started by kexec. This should
+	  be normally only set in special crash dump kernels which are
+	  loaded in the main kernel with kexec-tools into a specially
+	  reserved region and then later executed after a crash by
+	  kdump/kexec.
+
+	  For more details see Documentation/admin-guide/kdump/kdump.rst
 
 endmenu
 
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 65f6c9f1d..6374b0db5 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -51,6 +51,6 @@  ifeq ($(CONFIG_RISCV_SBI), y)
 obj-$(CONFIG_SMP) += cpu_ops_sbi.o
 endif
 obj-$(CONFIG_HOTPLUG_CPU)	+= cpu-hotplug.o
-obj-${CONFIG_KEXEC}		+= kexec_relocate.o crash_save_regs.o machine_kexec.o
-
+obj-$(CONFIG_KEXEC)		+= kexec_relocate.o crash_save_regs.o machine_kexec.o
+obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 clean:
diff --git a/arch/riscv/kernel/crash_dump.c b/arch/riscv/kernel/crash_dump.c
new file mode 100644
index 000000000..81b9d2a71
--- /dev/null
+++ b/arch/riscv/kernel/crash_dump.c
@@ -0,0 +1,46 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This code is taken from arch/arm64/kernel/crash_dump.c
+ * Created by: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ * Copyright (C) 2017 Linaro Limited
+ */
+
+#include <linux/crash_dump.h>
+#include <linux/io.h>
+
+/**
+ * copy_oldmem_page() - copy one page from old kernel memory
+ * @pfn: page frame number to be copied
+ * @buf: buffer where the copied page is placed
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page
+ * @userbuf: if set, @buf is in a user address space
+ *
+ * This function copies one page from old kernel memory into buffer pointed by
+ * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
+ * copied or negative error in case of failure.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+			 size_t csize, unsigned long offset,
+			 int userbuf)
+{
+	void *vaddr;
+
+	if (!csize)
+		return 0;
+
+	vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB);
+	if (!vaddr)
+		return -ENOMEM;
+
+	if (userbuf) {
+		if (copy_to_user((char __user *)buf, vaddr + offset, csize)) {
+			memunmap(vaddr);
+			return -EFAULT;
+		}
+	} else
+		memcpy(buf, vaddr + offset, csize);
+
+	memunmap(vaddr);
+	return csize;
+}
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 52d057bde..cbd8c8ba6 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -64,6 +64,9 @@  static struct resource code_res = { .name = "Kernel code", };
 static struct resource data_res = { .name = "Kernel data", };
 static struct resource rodata_res = { .name = "Kernel rodata", };
 static struct resource bss_res = { .name = "Kernel bss", };
+#ifdef CONFIG_CRASH_DUMP
+static struct resource elfcorehdr_res = { .name = "ELF Core hdr", };
+#endif
 
 static void __init kdump_resource_init(void)
 {
@@ -97,6 +100,16 @@  static void __init kdump_resource_init(void)
 	if (crashk_res.end > crashk_res.start)
 		insert_resource(&sysram_res, &crashk_res);
 #endif
+
+#ifdef CONFIG_CRASH_DUMP
+	if (elfcorehdr_size) {
+		elfcorehdr_res.start = elfcorehdr_addr;
+		elfcorehdr_res.end = elfcorehdr_addr + elfcorehdr_size;
+		elfcorehdr_res.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
+		insert_resource(&sysram_res, &elfcorehdr_res);
+	}
+#endif
+
 }
 
 void __init parse_dtb(void)
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 1c4461590..93cd434c5 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -123,6 +123,80 @@  static void __init setup_initrd(void)
 }
 #endif /* CONFIG_BLK_DEV_INITRD */
 
+#ifdef CONFIG_CRASH_DUMP
+/*
+ * These come from arm64
+ */
+static int __init early_init_dt_scan_elfcorehdr(unsigned long node,
+		const char *uname, int depth, void *data)
+{
+	struct memblock_region *elfcorehdr_mem = data;
+	const __be32 *reg;
+	int len;
+
+	if (depth != 1 || strcmp(uname, "chosen") != 0)
+		return 0;
+
+	reg = of_get_flat_dt_prop(node, "linux,elfcorehdr", &len);
+	if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
+		return 1;
+
+	elfcorehdr_mem->base = dt_mem_next_cell(dt_root_addr_cells, &reg);
+	elfcorehdr_mem->size = dt_mem_next_cell(dt_root_size_cells, &reg);
+
+	return 1;
+}
+
+static void __init reserve_elfcorehdr(void)
+{
+	struct memblock_region elfcorehdr_region = {0};
+
+	of_scan_flat_dt(early_init_dt_scan_elfcorehdr, &elfcorehdr_region);
+
+	if (elfcorehdr_region.size) {
+		memblock_reserve(elfcorehdr_region.base,
+				 elfcorehdr_region.size);
+
+		pr_info("Reserving %lldKB of memory at 0x%llx for elfcorehdr\n",
+			elfcorehdr_region.size >> 10, elfcorehdr_region.base);
+
+		elfcorehdr_addr = elfcorehdr_region.base;
+		elfcorehdr_size = elfcorehdr_region.size;
+	}
+}
+
+static int __init early_init_dt_scan_usablemem(unsigned long node,
+		const char *uname, int depth, void *data)
+{
+	struct memblock_region *usable_mem = data;
+	const __be32 *reg;
+	int len;
+
+	if (depth != 1 || strcmp(uname, "chosen") != 0)
+		return 0;
+
+	reg = of_get_flat_dt_prop(node, "linux,usable-memory-range", &len);
+	if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
+		return 1;
+
+	usable_mem->base = dt_mem_next_cell(dt_root_addr_cells, &reg);
+	usable_mem->size = dt_mem_next_cell(dt_root_size_cells, &reg);
+
+	return 1;
+}
+
+static void __init fdt_enforce_memory_region(void)
+{
+	struct memblock_region usablemem_region = {0};
+
+	of_scan_flat_dt(early_init_dt_scan_usablemem, &usablemem_region);
+
+	if (usablemem_region.size)
+		memblock_cap_memory_range(usablemem_region.base,
+					  usablemem_region.size);
+}
+#endif
+
 static phys_addr_t dtb_early_pa __initdata;
 
 void __init setup_bootmem(void)
@@ -132,6 +206,9 @@  void __init setup_bootmem(void)
 	phys_addr_t vmlinux_end = __pa_symbol(&_end);
 	phys_addr_t vmlinux_start = __pa_symbol(&_start);
 
+#ifdef CONFIG_CRASH_DUMP
+	fdt_enforce_memory_region();
+#endif
 	/* Find the memory region containing the kernel */
 	for_each_memblock(memory, reg) {
 		phys_addr_t end = reg->base + reg->size;
@@ -166,6 +243,13 @@  void __init setup_bootmem(void)
 	 */
 	memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
 
+	/*
+	 * We need to reserve elfcorehdr here so that it doesn't
+	 * get overwritten later on.
+	 */
+#ifdef CONFIG_CRASH_DUMP
+	reserve_elfcorehdr();
+#endif
 	early_init_fdt_scan_reserved_mem();
 	memblock_allow_resize();
 	memblock_dump_all();
@@ -559,9 +643,20 @@  static void __init reserve_crashkernel(void)
 	unsigned long start_pfn = find_min_pfn_with_active_regions();
 	unsigned long search_start = start_pfn << PAGE_SHIFT;
 	unsigned long search_end = (unsigned long) PFN_PHYS(max_low_pfn) - 1;
-
 	int ret = 0;
 
+	/*
+	 * Don't reserve a region for a crash kernel on a crash kernel
+	 * since it doesn't make much sense and we have limited memory
+	 * resources.
+	 */
+#ifdef CONFIG_CRASH_DUMP
+	if (is_kdump_kernel()) {
+		pr_info("crashkernel: ignore reservation request\n");
+		return;
+	}
+#endif
+
 	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
 				&crash_size, &crash_base);
 	if (ret || !crash_size)