@@ -3494,6 +3494,81 @@ struct arm64_stackframe {
#define _64BIT_
#define MACHINE_TYPE "RISCV64"
+typedef struct { ulong pgd; } pgd_t;
+typedef struct { ulong p4d; } p4d_t;
+typedef struct { ulong pud; } pud_t;
+typedef struct { ulong pmd; } pmd_t;
+typedef struct { ulong pte; } pte_t;
+typedef signed int s32;
+
+/* arch/riscv/include/asm/pgtable-64.h */
+#define PGD_SHIFT_L3 (30)
+#define PGD_SHIFT_L4 (39)
+#define PGD_SHIFT_L5 (48)
+
+#define P4D_SHIFT (39)
+#define PUD_SHIFT (30)
+#define PMD_SHIFT (21)
+
+#define PTRS_PER_PGD (512)
+#define PTRS_PER_P4D (512)
+#define PTRS_PER_PUD (512)
+#define PTRS_PER_PMD (512)
+#define PTRS_PER_PTE (512)
+
+/*
+ * Mask for PPN and PROT bit53~0 of PTE
+ * 63 6261 60 54 53 10 9 8 7 6 5 4 3 2 1 0
+ * N PBMT Reserved P P N RSW D A G U X W R V
+ */
+#define PTE_PFN_PROT_MASK 0x3FFFFFFFFFFFFF
+
+/*
+ * 3-levels / 4K pages
+ *
+ * sv39
+ * PGD | PMD | PTE | OFFSET |
+ * 9 | 9 | 9 | 12 |
+ */
+#define pgd_index_l3_4k(addr) (((addr) >> PGD_SHIFT_L3) & (PTRS_PER_PGD - 1))
+#define pmd_index_l3_4k(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+#define pte_index_l3_4k(addr) (((addr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1))
+
+/*
+ * 4-levels / 4K pages
+ *
+ * sv48
+ * PGD | PUD | PMD | PTE | OFFSET |
+ * 9 | 9 | 9 | 9 | 12 |
+ */
+#define pgd_index_l4_4k(addr) (((addr) >> PGD_SHIFT_L4) & (PTRS_PER_PGD - 1))
+#define pud_index_l4_4k(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+#define pmd_index_l4_4k(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+#define pte_index_l4_4k(addr) (((addr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1))
+
+/*
+ * 5-levels / 4K pages
+ *
+ * sv48
+ * PGD | P4D | PUD | PMD | PTE | OFFSET |
+ * 9 | 9 | 9 | 9 | 9 | 12 |
+ */
+#define pgd_index_l5_4k(addr) (((addr) >> PGD_SHIFT_L5) & (PTRS_PER_PGD - 1))
+#define p4d_index_l5_4k(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
+#define pud_index_l5_4k(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+#define pmd_index_l5_4k(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+#define pte_index_l5_4k(addr) (((addr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1))
+
+#define VM_L3_4K (0x2)
+#define VM_L3_2M (0x4)
+#define VM_L3_1G (0x8)
+#define VM_L4_4K (0x10)
+#define VM_L4_2M (0x20)
+#define VM_L4_1G (0x40)
+#define VM_L5_4K (0x80)
+#define VM_L5_2M (0x100)
+#define VM_L5_1G (0x200)
+
/*
* Direct memory mapping
*/
@@ -3545,6 +3620,14 @@ struct arm64_stackframe {
#define PHYS_MASK_SHIFT _MAX_PHYSMEM_BITS
#define PHYS_MASK (((1UL) << PHYS_MASK_SHIFT) - 1)
+#define IS_LAST_P4D_READ(p4d) ((ulong)(p4d) == machdep->machspec->last_p4d_read)
+#define FILL_P4D(P4D, TYPE, SIZE) \
+ if (!IS_LAST_P4D_READ(P4D)) { \
+ readmem((ulonglong)((ulong)(P4D)), TYPE, machdep->machspec->p4d, \
+ SIZE, "p4d page", FAULT_ON_ERROR); \
+ machdep->machspec->last_p4d_read = (ulong)(P4D); \
+ }
+
#endif /* RISCV64 */
#ifdef X86
@@ -6810,6 +6893,10 @@ struct machine_specific {
ulong _page_soft;
ulong _pfn_shift;
+ ulong va_bits;
+ char *p4d;
+ ulong last_p4d_read;
+ ulong struct_page_size;
struct riscv64_register *crash_task_regs;
};
@@ -6833,6 +6920,12 @@ struct machine_specific {
#define _PAGE_PROT_NONE _PAGE_READ
#define _PAGE_PFN_SHIFT 10
+/* from 'struct pt_regs' definitions of RISC-V arch */
+#define RISCV64_REGS_EPC 0
+#define RISCV64_REGS_RA 1
+#define RISCV64_REGS_SP 2
+#define RISCV64_REGS_FP 8
+
#endif /* RISCV64 */
/*
@@ -1531,6 +1531,12 @@ get_diskdump_regs_mips(struct bt_info *bt, ulong *eip, ulong *esp)
machdep->get_stack_frame(bt, eip, esp);
}
+static void
+get_diskdump_regs_riscv64(struct bt_info *bt, ulong *eip, ulong *esp)
+{
+ machdep->get_stack_frame(bt, eip, esp);
+}
+
static void
get_diskdump_regs_sparc64(struct bt_info *bt, ulong *eip, ulong *esp)
{
@@ -1610,6 +1616,10 @@ get_diskdump_regs(struct bt_info *bt, ulong *eip, ulong *esp)
get_diskdump_regs_sparc64(bt, eip, esp);
break;
+ case EM_RISCV:
+ get_diskdump_regs_riscv64(bt, eip, esp);
+ break;
+
default:
error(FATAL, "%s: unsupported machine type: %s\n",
DISKDUMP_VALID() ? "diskdump" : "compressed kdump",
@@ -16,10 +16,304 @@
#include <elf.h>
#include "defs.h"
+#include <math.h>
+
+static ulong riscv64_get_page_size(void);
+static int riscv64_vtop_3level_4k(ulong *pgd, ulong vaddr,
+ physaddr_t *paddr, int verbose);
+static int riscv64_vtop_4level_4k(ulong *pgd, ulong vaddr,
+ physaddr_t *paddr, int verbose);
+static int riscv64_vtop_5level_4k(ulong *pgd, ulong vaddr,
+ physaddr_t *paddr, int verbose);
+static void riscv64_page_type_init(void);
+static int riscv64_is_kvaddr(ulong vaddr);
+static int riscv64_is_uvaddr(ulong vaddr, struct task_context *tc);
+static int riscv64_uvtop(struct task_context *tc, ulong vaddr,
+ physaddr_t *paddr, int verbose);
+static int riscv64_kvtop(struct task_context *tc, ulong kvaddr,
+ physaddr_t *paddr, int verbose);
+static void riscv64_cmd_mach(void);
+static int riscv64_translate_pte(ulong, void *, ulonglong);
+static int riscv64_init_active_task_regs(void);
+static int riscv64_get_crash_notes(void);
+static int riscv64_get_elf_notes(void);
+static void riscv64_get_va_range(struct machine_specific *ms);
+static void riscv64_get_struct_page_size(struct machine_specific *ms);
+
+#define REG_FMT "%016lx"
+#define SZ_2G 0x80000000
+
+/*
+ * Holds registers during the crash.
+ */
+static struct riscv64_register *panic_task_regs;
+
+/* from arch/riscv/include/asm/stacktrace.h */
+struct stackframe {
+ ulong fp;
+ ulong ra;
+};
+
+static struct machine_specific riscv64_machine_specific = {
+ ._page_present = (1 << 0),
+ ._page_read = (1 << 1),
+ ._page_write = (1 << 2),
+ ._page_exec = (1 << 3),
+ ._page_user = (1 << 4),
+ ._page_global = (1 << 5),
+ ._page_accessed = (1 << 6),
+ ._page_dirty = (1 << 7),
+ ._page_soft = (1 << 8),
+
+ .va_bits = 0,
+ .struct_page_size = 0,
+};
+
+static void
+pt_level_alloc(char **lvl, char *name)
+{
+ size_t sz = PAGESIZE();
+ void *pointer = malloc(sz);
+
+ if (!pointer)
+ error(FATAL, name);
+ *lvl = pointer;
+}
+
+static void
+riscv64_get_phys_ram_base(struct machine_specific *ms)
+{
+ char *string;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(phys_ram_base)"))) {
+ ms->phys_base = atol(string);
+ free(string);
+ } else
+ /*
+ * It can't continue without phys_ram_base. As for qemu rv64
+ * env and hardware platform, phys_ram_base may different.
+ */
+ error(FATAL, "cannot read phys_ram_base\n");
+}
+
+static ulong
+riscv64_get_page_size(void)
+{
+ return memory_page_size();
+}
+
+static ulong
+riscv64_vmalloc_start(void)
+{
+ return ((ulong)VMALLOC_START);
+}
+
+/* Get the size of struct page {} */
+static void riscv64_get_struct_page_size(struct machine_specific *ms)
+{
+ char *string;
+
+ string = pc->read_vmcoreinfo("SIZE(page)");
+ if (string)
+ ms->struct_page_size = atol(string);
+ free(string);
+}
+
+/*
+ * Get the max shift of the size of struct page.
+ * Most of the time, it is 64 bytes, but not sure.
+*/
+static int riscv64_get_struct_page_max_shift(struct machine_specific *ms)
+{
+ return (int)ceil(log2(ms->struct_page_size));
+}
+
+static void
+riscv64_cmd_mach(void)
+{
+ /* TODO: */
+}
+
+static int
+riscv64_verify_symbol(const char *name, ulong value, char type)
+{
+ /* TODO: */
+ return TRUE;
+}
void
riscv64_dump_machdep_table(ulong arg)
{
+ /* TODO: */
+}
+
+static ulong
+riscv64_processor_speed(void)
+{
+ /* TODO: */
+ return 0;
+}
+
+static unsigned long riscv64_get_kernel_version(void)
+{
+ char *string;
+ char buf[BUFSIZE];
+ char *p1, *p2;
+
+ if (THIS_KERNEL_VERSION)
+ return THIS_KERNEL_VERSION;
+
+ string = pc->read_vmcoreinfo("OSRELEASE");
+ if (string) {
+ strcpy(buf, string);
+
+ p1 = p2 = buf;
+ while (*p2 != '.')
+ p2++;
+ *p2 = NULLCHAR;
+ kt->kernel_version[0] = atoi(p1);
+
+ p1 = ++p2;
+ while (*p2 != '.')
+ p2++;
+ *p2 = NULLCHAR;
+ kt->kernel_version[1] = atoi(p1);
+
+ p1 = ++p2;
+ while ((*p2 >= '0') && (*p2 <= '9'))
+ p2++;
+ *p2 = NULLCHAR;
+ kt->kernel_version[2] = atoi(p1);
+ }
+ free(string);
+ return THIS_KERNEL_VERSION;
+}
+
+static void riscv64_get_va_range(struct machine_specific *ms)
+{
+ unsigned long kernel_version = riscv64_get_kernel_version();
+ char *string;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(VA_BITS)"))) {
+ ms->va_bits = atol(string);
+ free(string);
+ } else
+ goto error;
+ if ((string = pc->read_vmcoreinfo("NUMBER(PAGE_OFFSET)"))) {
+ ms->page_offset = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ goto error;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(VMALLOC_START)"))) {
+ ms->vmalloc_start_addr = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ goto error;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(VMALLOC_END)"))) {
+ ms->vmalloc_end = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ goto error;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(VMEMMAP_START)"))) {
+ ms->vmemmap_vaddr = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ goto error;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(VMEMMAP_END)"))) {
+ ms->vmemmap_end = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ goto error;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(KERNEL_LINK_ADDR)"))) {
+ ms->kernel_link_addr = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ goto error;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(ADDRESS_SPACE_END)"))) {
+ ms->address_space_end = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ goto error;
+
+ /*
+ * From Linux 5.13, the kernel mapping is moved to the last 2GB
+ * of the address space, modules use the 2GB memory range right
+ * before the kernel. Before Linux 5.13, modules area is embedded
+ * in vmalloc area.
+ *
+ * 5.13 = 0x5 << 16 | 0xD << 8
+ */
+ if (kernel_version >= 0x50D00) {
+ if ((string = pc->read_vmcoreinfo("NUMBER(MODULES_VADDR)"))) {
+ ms->modules_vaddr = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ goto error;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(MODULES_END)"))) {
+ ms->modules_end = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ goto error;
+ } else {
+ ms->modules_vaddr = ms->vmalloc_start_addr;
+ ms->modules_end = ms->vmalloc_end;
+ }
+
+ if (CRASHDEBUG(8)) {
+ fprintf(fp, "va_bits : %ld\n", ms->va_bits);
+ fprintf(fp, "vmemmap : 0x%lx - 0x%lx\n",
+ ms->vmemmap_vaddr, ms->vmemmap_end);
+ fprintf(fp, "vmalloc : 0x%lx - 0x%lx\n",
+ ms->vmalloc_start_addr, ms->vmalloc_end);
+ fprintf(fp, "lowmem : 0x%lx -\n", ms->page_offset);
+ fprintf(fp, "mudules : 0x%lx - 0x%lx\n",
+ ms->modules_vaddr, ms->modules_end);
+ fprintf(fp, "kernel : 0x%lx - 0x%lx\n",
+ ms->kernel_link_addr, ms->address_space_end);
+ }
+ return;
+error:
+ error(FATAL, "cannot get vm layout\n");
+}
+
+static int
+riscv64_is_kvaddr(ulong vaddr)
+{
+ if (IS_VMALLOC_ADDR(vaddr))
+ return TRUE;
+
+ return (vaddr >= machdep->kvbase);
+}
+
+static int
+riscv64_is_uvaddr(ulong vaddr, struct task_context *unused)
+{
+ if (IS_VMALLOC_ADDR(vaddr))
+ return FALSE;
+
+ return (vaddr < machdep->kvbase);
+}
+
+static int
+riscv64_is_task_addr(ulong task)
+{
+ if (tt->flags & THREAD_INFO)
+ return IS_KVADDR(task);
+
+ return (IS_KVADDR(task) && ALIGNED_STACK_OFFSET(task) == 0);
+}
+
+static int
+riscv64_get_smp_cpus(void)
+{
+ return (get_cpus_online() > 0) ? get_cpus_online() : kt->cpus;
}
/*
@@ -33,11 +327,700 @@ riscv64_IS_VMALLOC_ADDR(ulong vaddr)
(vaddr >= MODULES_VADDR && vaddr <= MODULES_END));
}
+/*
+ * Translate a PTE, returning TRUE if the page is present.
+ * If a physaddr pointer is passed in, don't print anything.
+ */
+static int
+riscv64_translate_pte(ulong pte, void *physaddr, ulonglong unused)
+{
+ char ptebuf[BUFSIZE];
+ char physbuf[BUFSIZE];
+ char buf[BUFSIZE];
+ int page_present;
+ int len1, len2, others;
+ ulong paddr;
+
+ paddr = PTOB(pte >> _PAGE_PFN_SHIFT);
+ page_present = !!(pte & _PAGE_PRESENT);
+
+ if (physaddr) {
+ *(ulong *)physaddr = paddr;
+ return page_present;
+ }
+
+ sprintf(ptebuf, "%lx", pte);
+ len1 = MAX(strlen(ptebuf), strlen("PTE"));
+ fprintf(fp, "%s ", mkstring(buf, len1, CENTER | LJUST, "PTE"));
+
+ if (!page_present)
+ return page_present;
+
+ sprintf(physbuf, "%lx", paddr);
+ len2 = MAX(strlen(physbuf), strlen("PHYSICAL"));
+ fprintf(fp, "%s ", mkstring(buf, len2, CENTER | LJUST, "PHYSICAL"));
+
+ fprintf(fp, "FLAGS\n");
+ fprintf(fp, "%s %s ",
+ mkstring(ptebuf, len1, CENTER | RJUST, NULL),
+ mkstring(physbuf, len2, CENTER | RJUST, NULL));
+
+ fprintf(fp, "(");
+ others = 0;
+
+#define CHECK_PAGE_FLAG(flag) \
+ if ((_PAGE_##flag) && (pte & _PAGE_##flag)) \
+ fprintf(fp, "%s" #flag, others++ ? "|" : "")
+ if (pte) {
+ CHECK_PAGE_FLAG(PRESENT);
+ CHECK_PAGE_FLAG(READ);
+ CHECK_PAGE_FLAG(WRITE);
+ CHECK_PAGE_FLAG(EXEC);
+ CHECK_PAGE_FLAG(USER);
+ CHECK_PAGE_FLAG(GLOBAL);
+ CHECK_PAGE_FLAG(ACCESSED);
+ CHECK_PAGE_FLAG(DIRTY);
+ CHECK_PAGE_FLAG(SOFT);
+ } else {
+ fprintf(fp, "no mapping");
+ }
+
+ fprintf(fp, ")\n");
+
+ return page_present;
+}
+
+static void
+riscv64_page_type_init(void)
+{
+ ulong va_bits = machdep->machspec->va_bits;
+
+ /*
+ * For RISCV64 arch, any level of PTE may be a leaf PTE,
+ * so in addition to 4KiB pages,
+ * Sv39 supports 2 MiB megapages, 1 GiB gigapages;
+ * Sv48 supports 2 MiB megapages, 1 GiB gigapages, 512 GiB terapages;
+ * Sv57 supports 2 MiB megapages, 1 GiB gigapages, 512 GiB terapages, and 256 TiB petapages.
+ *
+ * refs to riscv-privileged spec.
+ *
+ * We just support 4KiB, 2MiB, 1GiB now.
+ */
+ switch (machdep->pagesize)
+ {
+ case 0x1000: // 4 KiB
+ machdep->flags |= (va_bits == 57 ? VM_L5_4K :
+ (va_bits == 48 ? VM_L4_4K : VM_L3_4K));
+ break;
+ case 0x200000: // 2 MiB
+ /* TODO: */
+ case 0x40000000: // 1 GiB
+ /* TODO: */
+ default:
+ if (machdep->pagesize)
+ error(FATAL, "invalid/unsupported page size: %d\n",
+ machdep->pagesize);
+ else
+ error(FATAL, "cannot determine page size\n");
+ }
+}
+
+static int
+riscv64_vtop_3level_4k(ulong *pgd, ulong vaddr, physaddr_t *paddr, int verbose)
+{
+ ulong *pgd_ptr, pgd_val;
+ ulong *pmd_ptr, pmd_val;
+ ulong *pte_ptr, pte_val, pte_pfn;
+ ulong pt_phys;
+
+ /* PGD */
+ pgd_ptr = pgd + pgd_index_l3_4k(vaddr);
+ FILL_PGD(pgd, KVADDR, PAGESIZE());
+ pgd_val = ULONG(machdep->pgd + PAGEOFFSET(pgd_ptr));
+ if (verbose)
+ fprintf(fp, " PGD: %lx => %lx\n", (ulong)pgd_ptr, pgd_val);
+ if (!pgd_val)
+ goto no_page;
+ pgd_val &= PTE_PFN_PROT_MASK;
+ pt_phys = (pgd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT();
+
+ /* PMD */
+ FILL_PMD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE());
+ pmd_val = ULONG(machdep->pmd + PAGEOFFSET(sizeof(pmd_t) *
+ pmd_index_l3_4k(vaddr)));
+ if (verbose)
+ fprintf(fp, " PMD: %016lx => %016lx\n", (ulong)pmd_ptr, pmd_val);
+ if (!pmd_val)
+ goto no_page;
+ pmd_val &= PTE_PFN_PROT_MASK;
+ pt_phys = (pmd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT();
+
+ /* PTE */
+ FILL_PTBL(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE());
+ pte_val = ULONG(machdep->ptbl + PAGEOFFSET(sizeof(pte_t) *
+ pte_index_l3_4k(vaddr)));
+ if (verbose)
+ fprintf(fp, " PTE: %lx => %lx\n", (ulong)pte_ptr, pte_val);
+ if (!pte_val)
+ goto no_page;
+ pte_val &= PTE_PFN_PROT_MASK;
+ pte_pfn = pte_val >> _PAGE_PFN_SHIFT;
+
+ if (!(pte_val & _PAGE_PRESENT)) {
+ if (verbose) {
+ fprintf(fp, "\n");
+ riscv64_translate_pte((ulong)pte_val, 0, 0);
+ }
+ fprintf(fp, " PAGE: %016lx not present\n\n", PAGEBASE(*paddr));
+ return FALSE;
+ }
+
+ *paddr = PTOB(pte_pfn) + PAGEOFFSET(vaddr);
+
+ if (verbose) {
+ fprintf(fp, " PAGE: %016lx\n\n", PAGEBASE(*paddr));
+ riscv64_translate_pte(pte_val, 0, 0);
+ }
+
+ return TRUE;
+no_page:
+ fprintf(fp, "invalid\n");
+ return FALSE;
+}
+
+static int
+riscv64_vtop_4level_4k(ulong *pgd, ulong vaddr, physaddr_t *paddr, int verbose)
+{
+ ulong *pgd_ptr, pgd_val;
+ ulong *pud_ptr, pud_val;
+ ulong *pmd_ptr, pmd_val;
+ ulong *pte_ptr, pte_val, pte_pfn;
+ ulong pt_phys;
+
+ /* PGD */
+ pgd_ptr = pgd + pgd_index_l4_4k(vaddr);
+ FILL_PGD(pgd, KVADDR, PAGESIZE());
+ pgd_val = ULONG(machdep->pgd + PAGEOFFSET(pgd_ptr));
+ if (verbose)
+ fprintf(fp, " PGD: %lx => %lx\n", (ulong)pgd_ptr, pgd_val);
+ if (!pgd_val)
+ goto no_page;
+ pgd_val &= PTE_PFN_PROT_MASK;
+ pt_phys = (pgd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT();
+
+ /* PUD */
+ FILL_PUD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE());
+ pud_val = ULONG(machdep->pud + PAGEOFFSET(sizeof(pud_t) *
+ pud_index_l4_4k(vaddr)));
+ if (verbose)
+ fprintf(fp, " PUD: %016lx => %016lx\n", (ulong)pud_ptr, pud_val);
+ if (!pud_val)
+ goto no_page;
+ pud_val &= PTE_PFN_PROT_MASK;
+ pt_phys = (pud_val >> _PAGE_PFN_SHIFT) << PAGESHIFT();
+
+ /* PMD */
+ FILL_PMD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE());
+ pmd_val = ULONG(machdep->pmd + PAGEOFFSET(sizeof(pmd_t) *
+ pmd_index_l4_4k(vaddr)));
+ if (verbose)
+ fprintf(fp, " PMD: %016lx => %016lx\n", (ulong)pmd_ptr, pmd_val);
+ if (!pmd_val)
+ goto no_page;
+ pmd_val &= PTE_PFN_PROT_MASK;
+ pt_phys = (pmd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT();
+
+ /* PTE */
+ FILL_PTBL(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE());
+ pte_val = ULONG(machdep->ptbl + PAGEOFFSET(sizeof(pte_t) *
+ pte_index_l4_4k(vaddr)));
+ if (verbose)
+ fprintf(fp, " PTE: %lx => %lx\n", (ulong)pte_ptr, pte_val);
+ if (!pte_val)
+ goto no_page;
+ pte_val &= PTE_PFN_PROT_MASK;
+ pte_pfn = pte_val >> _PAGE_PFN_SHIFT;
+
+ if (!(pte_val & _PAGE_PRESENT)) {
+ if (verbose) {
+ fprintf(fp, "\n");
+ riscv64_translate_pte((ulong)pte_val, 0, 0);
+ }
+ fprintf(fp, " PAGE: %016lx not present\n\n", PAGEBASE(*paddr));
+ return FALSE;
+ }
+
+ *paddr = PTOB(pte_pfn) + PAGEOFFSET(vaddr);
+
+ if (verbose) {
+ fprintf(fp, " PAGE: %016lx\n\n", PAGEBASE(*paddr));
+ riscv64_translate_pte(pte_val, 0, 0);
+ }
+
+ return TRUE;
+no_page:
+ fprintf(fp, "invalid\n");
+ return FALSE;
+}
+
+static int
+riscv64_vtop_5level_4k(ulong *pgd, ulong vaddr, physaddr_t *paddr, int verbose)
+{
+ ulong *pgd_ptr, pgd_val;
+ ulong *p4d_ptr, p4d_val;
+ ulong *pud_ptr, pud_val;
+ ulong *pmd_ptr, pmd_val;
+ ulong *pte_ptr, pte_val, pte_pfn;
+ ulong pt_phys;
+
+ /* PGD */
+ pgd_ptr = pgd + pgd_index_l5_4k(vaddr);
+ FILL_PGD(pgd, KVADDR, PAGESIZE());
+ pgd_val = ULONG(machdep->pgd + PAGEOFFSET(pgd_ptr));
+ if (verbose)
+ fprintf(fp, " PGD: %lx => %lx\n", (ulong)pgd_ptr, pgd_val);
+ if (!pgd_val)
+ goto no_page;
+ pgd_val &= PTE_PFN_PROT_MASK;
+ pt_phys = (pgd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT();
+
+ /* P4D */
+ FILL_P4D(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE());
+ p4d_val = ULONG(machdep->machspec->p4d + PAGEOFFSET(sizeof(p4d_t) *
+ p4d_index_l5_4k(vaddr)));
+ if (verbose)
+ fprintf(fp, " P4D: %016lx => %016lx\n", (ulong)p4d_ptr, p4d_val);
+ if (!p4d_val)
+ goto no_page;
+ p4d_val &= PTE_PFN_PROT_MASK;
+ pt_phys = (p4d_val >> _PAGE_PFN_SHIFT) << PAGESHIFT();
+
+ /* PUD */
+ FILL_PUD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE());
+ pud_val = ULONG(machdep->pud + PAGEOFFSET(sizeof(pud_t) *
+ pud_index_l5_4k(vaddr)));
+ if (verbose)
+ fprintf(fp, " PUD: %016lx => %016lx\n", (ulong)pud_ptr, pud_val);
+ if (!pud_val)
+ goto no_page;
+ pud_val &= PTE_PFN_PROT_MASK;
+ pt_phys = (pud_val >> _PAGE_PFN_SHIFT) << PAGESHIFT();
+
+ /* PMD */
+ FILL_PMD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE());
+ pmd_val = ULONG(machdep->pmd + PAGEOFFSET(sizeof(pmd_t) *
+ pmd_index_l4_4k(vaddr)));
+ if (verbose)
+ fprintf(fp, " PMD: %016lx => %016lx\n", (ulong)pmd_ptr, pmd_val);
+ if (!pmd_val)
+ goto no_page;
+ pmd_val &= PTE_PFN_PROT_MASK;
+ pt_phys = (pmd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT();
+
+ /* PTE */
+ FILL_PTBL(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE());
+ pte_val = ULONG(machdep->ptbl + PAGEOFFSET(sizeof(pte_t) *
+ pte_index_l4_4k(vaddr)));
+ if (verbose)
+ fprintf(fp, " PTE: %lx => %lx\n", (ulong)pte_ptr, pte_val);
+ if (!pte_val)
+ goto no_page;
+ pte_val &= PTE_PFN_PROT_MASK;
+ pte_pfn = pte_val >> _PAGE_PFN_SHIFT;
+
+ if (!(pte_val & _PAGE_PRESENT)) {
+ if (verbose) {
+ fprintf(fp, "\n");
+ riscv64_translate_pte((ulong)pte_val, 0, 0);
+ }
+ printf("!_PAGE_PRESENT\n");
+ return FALSE;
+ }
+
+ *paddr = PTOB(pte_pfn) + PAGEOFFSET(vaddr);
+
+ if (verbose) {
+ fprintf(fp, " PAGE: %016lx\n\n", PAGEBASE(*paddr));
+ riscv64_translate_pte(pte_val, 0, 0);
+ }
+
+ return TRUE;
+no_page:
+ fprintf(fp, "invalid\n");
+ return FALSE;
+}
+
+static int
+riscv64_init_active_task_regs(void)
+{
+ int retval;
+
+ retval = riscv64_get_crash_notes();
+ if (retval == TRUE)
+ return retval;
+
+ return riscv64_get_elf_notes();
+}
+
+/*
+ * Retrieve task registers for the time of the crash.
+ */
+static int
+riscv64_get_crash_notes(void)
+{
+ struct machine_specific *ms = machdep->machspec;
+ ulong crash_notes;
+ Elf64_Nhdr *note;
+ ulong offset;
+ char *buf, *p;
+ ulong *notes_ptrs;
+ ulong i;
+
+ /*
+ * crash_notes contains per cpu memory for storing cpu states
+ * in case of system crash.
+ */
+ if (!symbol_exists("crash_notes"))
+ return FALSE;
+
+ crash_notes = symbol_value("crash_notes");
+
+ notes_ptrs = (ulong *)GETBUF(kt->cpus*sizeof(notes_ptrs[0]));
+
+ /*
+ * Read crash_notes for the first CPU. crash_notes are in standard ELF
+ * note format.
+ */
+ if (!readmem(crash_notes, KVADDR, ¬es_ptrs[kt->cpus-1],
+ sizeof(notes_ptrs[kt->cpus-1]), "crash_notes",
+ RETURN_ON_ERROR)) {
+ error(WARNING, "cannot read crash_notes\n");
+ FREEBUF(notes_ptrs);
+ return FALSE;
+ }
+
+ if (symbol_exists("__per_cpu_offset")) {
+
+ /*
+ * Add __per_cpu_offset for each cpu to form the pointer to the notes
+ */
+ for (i = 0; i < kt->cpus; i++)
+ notes_ptrs[i] = notes_ptrs[kt->cpus-1] + kt->__per_cpu_offset[i];
+ }
+
+ buf = GETBUF(SIZE(note_buf));
+
+ if (!(panic_task_regs = calloc((size_t)kt->cpus, sizeof(*panic_task_regs))))
+ error(FATAL, "cannot calloc panic_task_regs space\n");
+
+ for (i = 0; i < kt->cpus; i++) {
+
+ if (!readmem(notes_ptrs[i], KVADDR, buf, SIZE(note_buf), "note_buf_t",
+ RETURN_ON_ERROR)) {
+ error(WARNING,
+ "cannot find NT_PRSTATUS note for cpu: %d\n", i);
+ goto fail;
+ }
+
+ /*
+ * Do some sanity checks for this note before reading registers from it.
+ */
+ note = (Elf64_Nhdr *)buf;
+ p = buf + sizeof(Elf64_Nhdr);
+
+ /*
+ * dumpfiles created with qemu won't have crash_notes, but there will
+ * be elf notes; dumpfiles created by kdump do not create notes for
+ * offline cpus.
+ */
+ if (note->n_namesz == 0 && (DISKDUMP_DUMPFILE() || KDUMP_DUMPFILE())) {
+ if (DISKDUMP_DUMPFILE())
+ note = diskdump_get_prstatus_percpu(i);
+ else if (KDUMP_DUMPFILE())
+ note = netdump_get_prstatus_percpu(i);
+ if (note) {
+ /*
+ * SIZE(note_buf) accounts for a "final note", which is a
+ * trailing empty elf note header.
+ */
+ long notesz = SIZE(note_buf) - sizeof(Elf64_Nhdr);
+
+ if (sizeof(Elf64_Nhdr) + roundup(note->n_namesz, 4) +
+ note->n_descsz == notesz)
+ BCOPY((char *)note, buf, notesz);
+ } else {
+ error(WARNING,
+ "cannot find NT_PRSTATUS note for cpu: %d\n", i);
+ continue;
+ }
+ }
+
+ /*
+ * Check the sanity of NT_PRSTATUS note only for each online cpu.
+ */
+ if (note->n_type != NT_PRSTATUS) {
+ error(WARNING, "invalid NT_PRSTATUS note (n_type != NT_PRSTATUS)\n");
+ goto fail;
+ }
+ if (!STRNEQ(p, "CORE")) {
+ error(WARNING, "invalid NT_PRSTATUS note (name != \"CORE\"\n");
+ goto fail;
+ }
+
+ /*
+ * Find correct location of note data. This contains elf_prstatus
+ * structure which has registers etc. for the crashed task.
+ */
+ offset = sizeof(Elf64_Nhdr);
+ offset = roundup(offset + note->n_namesz, 4);
+ p = buf + offset; /* start of elf_prstatus */
+
+ BCOPY(p + OFFSET(elf_prstatus_pr_reg), &panic_task_regs[i],
+ sizeof(panic_task_regs[i]));
+ }
+
+ /*
+ * And finally we have the registers for the crashed task. This is
+ * used later on when dumping backtrace.
+ */
+ ms->crash_task_regs = panic_task_regs;
+
+ FREEBUF(buf);
+ FREEBUF(notes_ptrs);
+ return TRUE;
+
+fail:
+ FREEBUF(buf);
+ FREEBUF(notes_ptrs);
+ free(panic_task_regs);
+ return FALSE;
+}
+
+static int
+riscv64_get_elf_notes(void)
+{
+ struct machine_specific *ms = machdep->machspec;
+ int i;
+
+ if (!DISKDUMP_DUMPFILE() && !KDUMP_DUMPFILE())
+ return false;
+
+ panic_task_regs = calloc(kt->cpus, sizeof(*panic_task_regs));
+ if (!panic_task_regs)
+ error(FATAL, "cannot calloc panic_task_regs space\n");
+
+ for (i = 0; i < kt->cpus; i++) {
+ Elf64_Nhdr *note = NULL;
+ size_t len;
+
+ if (DISKDUMP_DUMPFILE())
+ note = diskdump_get_prstatus_percpu(i);
+ else if (KDUMP_DUMPFILE())
+ note = netdump_get_prstatus_percpu(i);
+
+ if (!note) {
+ error(WARNING,
+ "cannot find NT_PRSTATUS note for cpu: %d\n", i);
+ continue;
+ }
+
+ len = sizeof(Elf64_Nhdr);
+ len = roundup(len + note->n_namesz, 4);
+
+ BCOPY((char *)note + len + OFFSET(elf_prstatus_pr_reg),
+ &panic_task_regs[i], sizeof(panic_task_regs[i]));
+ }
+
+ ms->crash_task_regs = panic_task_regs;
+
+ return TRUE;
+}
+
+/*
+ * Translates a user virtual address to its physical address.
+ */
+static int
+riscv64_uvtop(struct task_context *tc, ulong uvaddr, physaddr_t *paddr, int verbose)
+{
+ ulong mm, active_mm;
+ ulong *pgd;
+
+ if (!tc)
+ error(FATAL, "current context invalid\n");
+
+ *paddr = 0;
+
+ if (is_kernel_thread(tc->task) && IS_KVADDR(uvaddr)) {
+ readmem(tc->task + OFFSET(task_struct_active_mm),
+ KVADDR, &active_mm, sizeof(void *),
+ "task active_mm contents", FAULT_ON_ERROR);
+
+ if (!active_mm)
+ error(FATAL,
+ "no active_mm for this kernel thread\n");
+
+ readmem(active_mm + OFFSET(mm_struct_pgd),
+ KVADDR, &pgd, sizeof(long),
+ "mm_struct pgd", FAULT_ON_ERROR);
+ } else {
+ if ((mm = task_mm(tc->task, TRUE)))
+ pgd = ULONG_PTR(tt->mm_struct + OFFSET(mm_struct_pgd));
+ else
+ readmem(tc->mm_struct + OFFSET(mm_struct_pgd),
+ KVADDR, &pgd, sizeof(long), "mm_struct pgd",
+ FAULT_ON_ERROR);
+ }
+
+ switch (machdep->flags & (VM_L3_4K | VM_L4_4K | VM_L5_4K))
+ {
+ case VM_L3_4K:
+ return riscv64_vtop_3level_4k(pgd, uvaddr, paddr, verbose);
+ case VM_L4_4K:
+ return riscv64_vtop_4level_4k(pgd, uvaddr, paddr, verbose);
+ case VM_L5_4K:
+ return riscv64_vtop_5level_4k(pgd, uvaddr, paddr, verbose);
+ default:
+ return FALSE;
+ }
+}
+
+static int
+riscv64_kvtop(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbose)
+{
+ ulong kernel_pgd;
+
+ if (!IS_KVADDR(kvaddr))
+ return FALSE;
+
+ if (!vt->vmalloc_start) {
+ *paddr = VTOP(kvaddr);
+ return TRUE;
+ }
+
+ if (!IS_VMALLOC_ADDR(kvaddr)) {
+ *paddr = VTOP(kvaddr);
+ if (!verbose)
+ return TRUE;
+ }
+
+ kernel_pgd = vt->kernel_pgd[0];
+ *paddr = 0;
+
+ switch (machdep->flags & (VM_L3_4K | VM_L4_4K | VM_L5_4K))
+ {
+ case VM_L3_4K:
+ return riscv64_vtop_3level_4k((ulong *)kernel_pgd, kvaddr, paddr, verbose);
+ case VM_L4_4K:
+ return riscv64_vtop_4level_4k((ulong *)kernel_pgd, kvaddr, paddr, verbose);
+ case VM_L5_4K:
+ return riscv64_vtop_5level_4k((ulong *)kernel_pgd, kvaddr, paddr, verbose);
+ default:
+ return FALSE;
+ }
+}
+
void
riscv64_init(int when)
{
+ switch (when) {
+ case SETUP_ENV:
+ machdep->process_elf_notes = process_elf64_notes;
+ break;
+
+ case PRE_SYMTAB:
+ machdep->verify_symbol = riscv64_verify_symbol;
+ machdep->machspec = &riscv64_machine_specific;
+ if (pc->flags & KERNEL_DEBUG_QUERY)
+ return;
+
+ machdep->verify_paddr = generic_verify_paddr;
+ machdep->ptrs_per_pgd = PTRS_PER_PGD;
+ break;
+
+ case PRE_GDB:
+ machdep->pagesize = riscv64_get_page_size();
+ machdep->pageshift = ffs(machdep->pagesize) - 1;
+ machdep->pageoffset = machdep->pagesize - 1;
+ machdep->pagemask = ~((ulonglong)machdep->pageoffset);
+ machdep->stacksize = machdep->pagesize << THREAD_SIZE_ORDER;
+
+ riscv64_get_phys_ram_base(machdep->machspec);
+ riscv64_get_struct_page_size(machdep->machspec);
+ riscv64_get_va_range(machdep->machspec);
+
+ pt_level_alloc(&machdep->pgd, "cannot malloc pgd space.");
+ pt_level_alloc(&machdep->machspec->p4d, "cannot malloc p4d space.");
+ pt_level_alloc(&machdep->pud, "cannot malloc pud space.");
+ pt_level_alloc(&machdep->pmd, "cannot malloc pmd space.");
+ pt_level_alloc(&machdep->ptbl, "cannot malloc ptbl space.");
+
+ machdep->last_pgd_read = 0;
+ machdep->machspec->last_p4d_read = 0;
+ machdep->last_pud_read = 0;
+ machdep->last_pmd_read = 0;
+ machdep->last_ptbl_read = 0;
+
+ machdep->kvbase = machdep->machspec->page_offset;
+ machdep->identity_map_base = machdep->kvbase;
+ machdep->is_kvaddr = riscv64_is_kvaddr;
+ machdep->is_uvaddr = riscv64_is_uvaddr;
+ machdep->uvtop = riscv64_uvtop;
+ machdep->kvtop = riscv64_kvtop;
+ machdep->cmd_mach = riscv64_cmd_mach;
+
+ machdep->vmalloc_start = riscv64_vmalloc_start;
+ machdep->processor_speed = riscv64_processor_speed;
+ machdep->get_stackbase = generic_get_stackbase;
+ machdep->get_stacktop = generic_get_stacktop;
+ machdep->translate_pte = riscv64_translate_pte;
+ machdep->memory_size = generic_memory_size;
+ machdep->is_task_addr = riscv64_is_task_addr;
+ machdep->get_smp_cpus = riscv64_get_smp_cpus;
+ machdep->value_to_symbol = generic_machdep_value_to_symbol;
+ machdep->show_interrupts = generic_show_interrupts;
+ machdep->get_irq_affinity = generic_get_irq_affinity;
+ machdep->init_kernel_pgd = NULL; /* pgd set by symbol_value("swapper_pg_dir") */
+ break;
+
+ case POST_GDB:
+ machdep->section_size_bits = _SECTION_SIZE_BITS;
+ machdep->max_physmem_bits = _MAX_PHYSMEM_BITS;
+ riscv64_page_type_init();
+
+ if (!machdep->hz)
+ machdep->hz = 250;
+
+ if (symbol_exists("irq_desc"))
+ ARRAY_LENGTH_INIT(machdep->nr_irqs, irq_desc,
+ "irq_desc", NULL, 0);
+ else if (kernel_symbol_exists("nr_irqs"))
+ get_symbol_data("nr_irqs", sizeof(unsigned int),
+ &machdep->nr_irqs);
+
+ MEMBER_OFFSET_INIT(elf_prstatus_pr_reg, "elf_prstatus",
+ "pr_reg");
+
+ STRUCT_SIZE_INIT(note_buf, "note_buf_t");
+ break;
+
+ case POST_VM:
+ /*
+ * crash_notes contains machine specific information about the
+ * crash. In particular, it contains CPU registers at the time
+ * of the crash. We need this information to extract correct
+ * backtraces from the panic task.
+ */
+ if (!ACTIVE() && !riscv64_init_active_task_regs())
+ error(WARNING,
+ "cannot retrieve registers for active task%s\n\n",
+ kt->cpus > 1 ? "s" : "");
+ break;
+ }
}
+/*
+ * 'help -r' command output
+ */
void
riscv64_display_regs_from_elf_notes(int cpu, FILE *ofp)
{
1. Add riscv64_init() implementation, do all necessary machine-specific setup, which will be called multiple times during initialization. 2. Add riscv64 sv39/48/57 pagetable macro definitions, the function of converting virtual address to a physical address via 4k page table. 3. Add the implementation of the vtop command, which is used to convert a virtual address to a physical address(call the functions defined in 2). 4. Add the implementation to get virtual memory layout, va_bits, phys_ram_base from vmcoreinfo. As these configurations changes from time to time, we send a Linux kernel patch to export these configurations, which can simplify the development of crash tool. The Linux patch: https://patchwork.kernel.org/project/linux-riscv/patch/20220714113300.367854-2-xianting.tian@linux.alibaba.com/ 5. Add riscv64_get_smp_cpus() implementation, get the number of online cpus. 6. Add riscv64_get_page_size() implementation, get page size. And so on. With this patch, we can enter crash command line, and run "vtop", "mod", "rd", "*", "p", "kmem" ... Tested on QEMU RISCV64 env and SoC platform of T-head Xuantie 910 CPU. KERNEL: vmlinux DUMPFILE: vmcore CPUS: 1 DATE: Fri Jul 15 10:24:25 CST 2022 UPTIME: 00:00:33 LOAD AVERAGE: 0.05, 0.01, 0.00 TASKS: 41 NODENAME: buildroot RELEASE: 5.18.9 VERSION: #30 SMP Fri Jul 15 09:47:03 CST 2022 MACHINE: riscv64 (unknown Mhz) MEMORY: 1 GB PANIC: "Kernel panic - not syncing: sysrq triggered crash" PID: 113 COMMAND: "sh" TASK: ff60000002269600 [THREAD_INFO: ff60000002269600] CPU: 0 STATE: TASK_RUNNING (PANIC) crash> p mem_map mem_map = $1 = (struct page *) 0xff6000003effbf00 crash> p /x *(struct page *) 0xff6000003effbf00 $5 = { flags = 0x1000, { { { lru = { next = 0xff6000003effbf08, prev = 0xff6000003effbf08 }, { __filler = 0xff6000003effbf08, mlock_count = 0x3effbf08 } }, mapping = 0x0, index = 0x0, private = 0x0 }, crash> mod MODULE NAME BASE SIZE OBJECT FILE ffffffff0113e740 nvme_core ffffffff01133000 98304 (not loaded) [CONFIG_KALLSYMS] ffffffff011542c0 nvme ffffffff0114c000 61440 (not loaded) [CONFIG_KALLSYMS] crash> rd ffffffff0113e740 8 ffffffff0113e740: 0000000000000000 ffffffff810874f8 .........t...... ffffffff0113e750: ffffffff011542c8 726f635f656d766e .B......nvme_cor ffffffff0113e760: 0000000000000065 0000000000000000 e............... ffffffff0113e770: 0000000000000000 0000000000000000 ................ crash> vtop ffffffff0113e740 VIRTUAL PHYSICAL ffffffff0113e740 8254d740 PGD: ffffffff810e9ff8 => 2ffff001 P4D: 0000000000000000 => 000000002fffec01 PUD: 00005605c2957470 => 0000000020949801 PMD: 00007fff7f1750c0 => 0000000020947401 PTE: 0 => 209534e7 PAGE: 000000008254d000 PTE PHYSICAL FLAGS 209534e7 8254d000 (PRESENT|READ|WRITE|GLOBAL|ACCESSED|DIRTY) PAGE PHYSICAL MAPPING INDEX CNT FLAGS ff6000003f0777d8 8254d000 0 0 1 0 Signed-off-by: Xianting Tian <xianting.tian@linux.alibaba.com> --- defs.h | 93 +++++ diskdump.c | 10 + riscv64.c | 983 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1086 insertions(+)