Message ID | 20220718025346.411758-3-xianting.tian@linux.alibaba.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Support RISCV64 arch and common commands | expand |
On 2022/07/18 11:53, Xianting Tian wrote: > 1. Add riscv64_init() implementation, do all necessary machine-specific setup, > which will be called multiple times during initialization. > 2. Add riscv64 sv39/48/57 pagetable macro definitions, the function of converting > virtual address to a physical address via 4k page table. > 3. Add the implementation of the vtop command, which is used to convert a > virtual address to a physical address(call the functions defined in 2). > 4. Add the implementation to get virtual memory layout, va_bits, phys_ram_base > from vmcoreinfo. As these configurations changes from time to time, we send > a Linux kernel patch to export these configurations, which can simplify the > development of crash tool. > The Linux patch(patch 3 of the series of patches): > https://lore.kernel.org/linux-riscv/20220717101323.370245-1-xianting.tian@linux.alibaba.com/ > 5. Add riscv64_get_smp_cpus() implementation, get the number of online cpus. > 6. Add riscv64_get_page_size() implementation, get page size. > And so on. > > With this patch, we can enter crash command line, and run "vtop", "mod", "rd", > "*", "p", "kmem" ... > > Tested on QEMU RISCV64 end and SoC platform of T-head Xuantie 910 CPU. > > KERNEL: vmlinux > DUMPFILE: vmcore > CPUS: 1 > DATE: Fri Jul 15 10:24:25 CST 2022 > UPTIME: 00:00:33 > LOAD AVERAGE: 0.05, 0.01, 0.00 > TASKS: 41 > NODENAME: buildroot > RELEASE: 5.18.9 > VERSION: #30 SMP Fri Jul 15 09:47:03 CST 2022 > MACHINE: riscv64 (unknown Mhz) > MEMORY: 1 GB > PANIC: "Kernel panic - not syncing: sysrq triggered crash" > PID: 113 > COMMAND: "sh" > TASK: ff60000002269600 [THREAD_INFO: ff60000002269600] > CPU: 0 > STATE: TASK_RUNNING (PANIC) > > crash> p mem_map > mem_map = $1 = (struct page *) 0xff6000003effbf00 > > crash> p /x *(struct page *) 0xff6000003effbf00 > $5 = { > flags = 0x1000, > { > { > { > lru = { > next = 0xff6000003effbf08, > prev = 0xff6000003effbf08 > }, > { > __filler = 0xff6000003effbf08, > mlock_count = 0x3effbf08 > } > }, > mapping = 0x0, > index = 0x0, > private = 0x0 > }, > > crash> mod > MODULE NAME BASE SIZE OBJECT FILE > ffffffff0113e740 nvme_core ffffffff01133000 98304 (not loaded) [CONFIG_KALLSYMS] > ffffffff011542c0 nvme ffffffff0114c000 61440 (not loaded) [CONFIG_KALLSYMS] > > crash> rd ffffffff0113e740 8 > ffffffff0113e740: 0000000000000000 ffffffff810874f8 .........t...... > ffffffff0113e750: ffffffff011542c8 726f635f656d766e .B......nvme_cor > ffffffff0113e760: 0000000000000065 0000000000000000 e............... > ffffffff0113e770: 0000000000000000 0000000000000000 ................ > > crash> vtop ffffffff0113e740 > VIRTUAL PHYSICAL > ffffffff0113e740 8254d740 > > PGD: ffffffff810e9ff8 => 2ffff001 > P4D: 0000000000000000 => 000000002fffec01 > PUD: 00005605c2957470 => 0000000020949801 > PMD: 00007fff7f1750c0 => 0000000020947401 > PTE: 0 => 209534e7 > PAGE: 000000008254d000 > > PTE PHYSICAL FLAGS > 209534e7 8254d000 (PRESENT|READ|WRITE|GLOBAL|ACCESSED|DIRTY) > > PAGE PHYSICAL MAPPING INDEX CNT FLAGS > ff6000003f0777d8 8254d000 0 0 1 0 > > Signed-off-by: Xianting Tian <xianting.tian@linux.alibaba.com> > --- > defs.h | 93 +++++ > diskdump.c | 10 + > riscv64.c | 983 +++++++++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 1086 insertions(+) > > diff --git a/defs.h b/defs.h > index 42ffba3..be4db94 100644 > --- a/defs.h > +++ b/defs.h > @@ -3494,6 +3494,81 @@ struct arm64_stackframe { > #define _64BIT_ > #define MACHINE_TYPE "RISCV64" > > +typedef struct { ulong pgd; } pgd_t; > +typedef struct { ulong p4d; } p4d_t; > +typedef struct { ulong pud; } pud_t; > +typedef struct { ulong pmd; } pmd_t; > +typedef struct { ulong pte; } pte_t; > +typedef signed int s32; > + > +/* arch/riscv/include/asm/pgtable-64.h */ > +#define PGD_SHIFT_L3 (30) > +#define PGD_SHIFT_L4 (39) > +#define PGD_SHIFT_L5 (48) > + > +#define P4D_SHIFT (39) > +#define PUD_SHIFT (30) > +#define PMD_SHIFT (21) > + > +#define PTRS_PER_PGD (512) > +#define PTRS_PER_P4D (512) > +#define PTRS_PER_PUD (512) > +#define PTRS_PER_PMD (512) > +#define PTRS_PER_PTE (512) > + > +/* > + * Mask for PPN and PROT bit53~0 of PTE > + * 63 6261 60 54 53 10 9 8 7 6 5 4 3 2 1 0 > + * N PBMT Reserved P P N RSW D A G U X W R V > + */ > +#define PTE_PFN_PROT_MASK 0x3FFFFFFFFFFFFF > + > +/* > + * 3-levels / 4K pages > + * > + * sv39 > + * PGD | PMD | PTE | OFFSET | > + * 9 | 9 | 9 | 12 | > + */ > +#define pgd_index_l3_4k(addr) (((addr) >> PGD_SHIFT_L3) & (PTRS_PER_PGD - 1)) > +#define pmd_index_l3_4k(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) > +#define pte_index_l3_4k(addr) (((addr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1)) > + > +/* > + * 4-levels / 4K pages > + * > + * sv48 > + * PGD | PUD | PMD | PTE | OFFSET | > + * 9 | 9 | 9 | 9 | 12 | > + */ > +#define pgd_index_l4_4k(addr) (((addr) >> PGD_SHIFT_L4) & (PTRS_PER_PGD - 1)) > +#define pud_index_l4_4k(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) > +#define pmd_index_l4_4k(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) > +#define pte_index_l4_4k(addr) (((addr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1)) > + > +/* > + * 5-levels / 4K pages > + * > + * sv48 > + * PGD | P4D | PUD | PMD | PTE | OFFSET | > + * 9 | 9 | 9 | 9 | 9 | 12 | > + */ > +#define pgd_index_l5_4k(addr) (((addr) >> PGD_SHIFT_L5) & (PTRS_PER_PGD - 1)) > +#define p4d_index_l5_4k(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) > +#define pud_index_l5_4k(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) > +#define pmd_index_l5_4k(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) > +#define pte_index_l5_4k(addr) (((addr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1)) > + > +#define VM_L3_4K (0x2) > +#define VM_L3_2M (0x4) > +#define VM_L3_1G (0x8) > +#define VM_L4_4K (0x10) > +#define VM_L4_2M (0x20) > +#define VM_L4_1G (0x40) > +#define VM_L5_4K (0x80) > +#define VM_L5_2M (0x100) > +#define VM_L5_1G (0x200) > + > /* > * Direct memory mapping > */ > @@ -3545,6 +3620,14 @@ struct arm64_stackframe { > #define PHYS_MASK_SHIFT _MAX_PHYSMEM_BITS > #define PHYS_MASK (((1UL) << PHYS_MASK_SHIFT) - 1) > > +#define IS_LAST_P4D_READ(p4d) ((ulong)(p4d) == machdep->machspec->last_p4d_read) > +#define FILL_P4D(P4D, TYPE, SIZE) \ > + if (!IS_LAST_P4D_READ(P4D)) { \ > + readmem((ulonglong)((ulong)(P4D)), TYPE, machdep->machspec->p4d, \ > + SIZE, "p4d page", FAULT_ON_ERROR); \ > + machdep->machspec->last_p4d_read = (ulong)(P4D); \ > + } > + > #endif /* RISCV64 */ > > #ifdef X86 > @@ -6810,6 +6893,10 @@ struct machine_specific { > ulong _page_soft; > > ulong _pfn_shift; > + ulong va_bits; > + char *p4d; > + ulong last_p4d_read; > + ulong struct_page_size; > > struct riscv64_register *crash_task_regs; > }; > @@ -6833,6 +6920,12 @@ struct machine_specific { > #define _PAGE_PROT_NONE _PAGE_READ > #define _PAGE_PFN_SHIFT 10 > > +/* from 'struct pt_regs' definitions of RISC-V arch */ > +#define RISCV64_REGS_EPC 0 > +#define RISCV64_REGS_RA 1 > +#define RISCV64_REGS_SP 2 > +#define RISCV64_REGS_FP 8 > + > #endif /* RISCV64 */ > > /* > diff --git a/diskdump.c b/diskdump.c > index 28503bc..cf5f5d9 100644 > --- a/diskdump.c > +++ b/diskdump.c > @@ -1531,6 +1531,12 @@ get_diskdump_regs_mips(struct bt_info *bt, ulong *eip, ulong *esp) > machdep->get_stack_frame(bt, eip, esp); > } > > +static void > +get_diskdump_regs_riscv64(struct bt_info *bt, ulong *eip, ulong *esp) > +{ > + machdep->get_stack_frame(bt, eip, esp); > +} > + > static void > get_diskdump_regs_sparc64(struct bt_info *bt, ulong *eip, ulong *esp) > { > @@ -1610,6 +1616,10 @@ get_diskdump_regs(struct bt_info *bt, ulong *eip, ulong *esp) > get_diskdump_regs_sparc64(bt, eip, esp); > break; > > + case EM_RISCV: > + get_diskdump_regs_riscv64(bt, eip, esp); > + break; > + > default: > error(FATAL, "%s: unsupported machine type: %s\n", > DISKDUMP_VALID() ? "diskdump" : "compressed kdump", > diff --git a/riscv64.c b/riscv64.c > index c7df857..9d40297 100644 > --- a/riscv64.c > +++ b/riscv64.c > @@ -16,10 +16,304 @@ > > #include <elf.h> > #include "defs.h" > +#include <math.h> > + > +static ulong riscv64_get_page_size(void); > +static int riscv64_vtop_3level_4k(ulong *pgd, ulong vaddr, > + physaddr_t *paddr, int verbose); > +static int riscv64_vtop_4level_4k(ulong *pgd, ulong vaddr, > + physaddr_t *paddr, int verbose); > +static int riscv64_vtop_5level_4k(ulong *pgd, ulong vaddr, > + physaddr_t *paddr, int verbose); > +static void riscv64_page_type_init(void); > +static int riscv64_is_kvaddr(ulong vaddr); > +static int riscv64_is_uvaddr(ulong vaddr, struct task_context *tc); > +static int riscv64_uvtop(struct task_context *tc, ulong vaddr, > + physaddr_t *paddr, int verbose); > +static int riscv64_kvtop(struct task_context *tc, ulong kvaddr, > + physaddr_t *paddr, int verbose); (In whole, you can use about 100 chars in a line, if you like.) > +static void riscv64_cmd_mach(void); > +static int riscv64_translate_pte(ulong, void *, ulonglong); > +static int riscv64_init_active_task_regs(void); > +static int riscv64_get_crash_notes(void); > +static int riscv64_get_elf_notes(void); > +static void riscv64_get_va_range(struct machine_specific *ms); > +static void riscv64_get_struct_page_size(struct machine_specific *ms); > + > +#define REG_FMT "%016lx" > +#define SZ_2G 0x80000000 > + > +/* > + * Holds registers during the crash. > + */ > +static struct riscv64_register *panic_task_regs; > + > +/* from arch/riscv/include/asm/stacktrace.h */ > +struct stackframe { > + ulong fp; > + ulong ra; > +}; > + > +static struct machine_specific riscv64_machine_specific = { > + ._page_present = (1 << 0), > + ._page_read = (1 << 1), > + ._page_write = (1 << 2), > + ._page_exec = (1 << 3), > + ._page_user = (1 << 4), > + ._page_global = (1 << 5), > + ._page_accessed = (1 << 6), > + ._page_dirty = (1 << 7), > + ._page_soft = (1 << 8), > + > + .va_bits = 0, > + .struct_page_size = 0, > +}; > + > +static void > +pt_level_alloc(char **lvl, char *name) > +{ > + size_t sz = PAGESIZE(); > + void *pointer = malloc(sz); > + > + if (!pointer) > + error(FATAL, name); > + *lvl = pointer; > +} > + > +static void > +riscv64_get_phys_ram_base(struct machine_specific *ms) > +{ > + char *string; > + > + if ((string = pc->read_vmcoreinfo("NUMBER(phys_ram_base)"))) { > + ms->phys_base = atol(string); > + free(string); > + } else > + /* > + * It can't continue without phys_ram_base. As for qemu rv64 > + * env and hardware platform, phys_ram_base may different. > + */ > + error(FATAL, "cannot read phys_ram_base\n"); > +} > + > +static ulong > +riscv64_get_page_size(void) > +{ > + return memory_page_size(); > +} > + > +static ulong > +riscv64_vmalloc_start(void) > +{ > + return ((ulong)VMALLOC_START); > +} > + > +/* Get the size of struct page {} */ > +static void riscv64_get_struct_page_size(struct machine_specific *ms) > +{ > + char *string; > + > + string = pc->read_vmcoreinfo("SIZE(page)"); > + if (string) > + ms->struct_page_size = atol(string); > + free(string); > +} > + > +/* > + * Get the max shift of the size of struct page. > + * Most of the time, it is 64 bytes, but not sure. > +*/ > +static int riscv64_get_struct_page_max_shift(struct machine_specific *ms) > +{ > + return (int)ceil(log2(ms->struct_page_size)); > +} > + > +static void > +riscv64_cmd_mach(void) > +{ > + /* TODO: */ > +} > + > +static int > +riscv64_verify_symbol(const char *name, ulong value, char type) > +{ > + /* TODO: */ > + return TRUE; > +} > > void > riscv64_dump_machdep_table(ulong arg) > { > + /* TODO: */ > +} > + > +static ulong > +riscv64_processor_speed(void) > +{ > + /* TODO: */ > + return 0; > +} > + > +static unsigned long riscv64_get_kernel_version(void) > +{ > + char *string; > + char buf[BUFSIZE]; > + char *p1, *p2; > + > + if (THIS_KERNEL_VERSION) > + return THIS_KERNEL_VERSION; > + > + string = pc->read_vmcoreinfo("OSRELEASE"); > + if (string) { > + strcpy(buf, string); > + > + p1 = p2 = buf; > + while (*p2 != '.') > + p2++; > + *p2 = NULLCHAR; > + kt->kernel_version[0] = atoi(p1); > + > + p1 = ++p2; > + while (*p2 != '.') > + p2++; > + *p2 = NULLCHAR; > + kt->kernel_version[1] = atoi(p1); > + > + p1 = ++p2; > + while ((*p2 >= '0') && (*p2 <= '9')) > + p2++; > + *p2 = NULLCHAR; > + kt->kernel_version[2] = atoi(p1); > + } > + free(string); > + return THIS_KERNEL_VERSION; > +} > + > +static void riscv64_get_va_range(struct machine_specific *ms) > +{ > + unsigned long kernel_version = riscv64_get_kernel_version(); > + char *string; > + > + if ((string = pc->read_vmcoreinfo("NUMBER(VA_BITS)"))) { > + ms->va_bits = atol(string); > + free(string); > + } else > + goto error; > + if ((string = pc->read_vmcoreinfo("NUMBER(PAGE_OFFSET)"))) { > + ms->page_offset = htol(string, QUIET, NULL); > + free(string); > + } else > + goto error; > + > + if ((string = pc->read_vmcoreinfo("NUMBER(VMALLOC_START)"))) { > + ms->vmalloc_start_addr = htol(string, QUIET, NULL); > + free(string); > + } else > + goto error; > + > + if ((string = pc->read_vmcoreinfo("NUMBER(VMALLOC_END)"))) { > + ms->vmalloc_end = htol(string, QUIET, NULL); > + free(string); > + } else > + goto error; > + > + if ((string = pc->read_vmcoreinfo("NUMBER(VMEMMAP_START)"))) { > + ms->vmemmap_vaddr = htol(string, QUIET, NULL); > + free(string); > + } else > + goto error; > + > + if ((string = pc->read_vmcoreinfo("NUMBER(VMEMMAP_END)"))) { > + ms->vmemmap_end = htol(string, QUIET, NULL); > + free(string); > + } else > + goto error; > + > + if ((string = pc->read_vmcoreinfo("NUMBER(KERNEL_LINK_ADDR)"))) { > + ms->kernel_link_addr = htol(string, QUIET, NULL); > + free(string); > + } else > + goto error; > + > + if ((string = pc->read_vmcoreinfo("NUMBER(ADDRESS_SPACE_END)"))) { > + ms->address_space_end = htol(string, QUIET, NULL); > + free(string); > + } else > + goto error; > + > + /* > + * From Linux 5.13, the kernel mapping is moved to the last 2GB > + * of the address space, modules use the 2GB memory range right > + * before the kernel. Before Linux 5.13, modules area is embedded > + * in vmalloc area. > + * > + * 5.13 = 0x5 << 16 | 0xD << 8 > + */ > + if (kernel_version >= 0x50D00) { Please use LINUX() macro. > + if ((string = pc->read_vmcoreinfo("NUMBER(MODULES_VADDR)"))) { > + ms->modules_vaddr = htol(string, QUIET, NULL); > + free(string); > + } else > + goto error; > + > + if ((string = pc->read_vmcoreinfo("NUMBER(MODULES_END)"))) { > + ms->modules_end = htol(string, QUIET, NULL); > + free(string); > + } else > + goto error; > + } else { > + ms->modules_vaddr = ms->vmalloc_start_addr; > + ms->modules_end = ms->vmalloc_end; > + } > + > + if (CRASHDEBUG(8)) { > + fprintf(fp, "va_bits : %ld\n", ms->va_bits); > + fprintf(fp, "vmemmap : 0x%lx - 0x%lx\n", > + ms->vmemmap_vaddr, ms->vmemmap_end); > + fprintf(fp, "vmalloc : 0x%lx - 0x%lx\n", > + ms->vmalloc_start_addr, ms->vmalloc_end); > + fprintf(fp, "lowmem : 0x%lx -\n", ms->page_offset); > + fprintf(fp, "mudules : 0x%lx - 0x%lx\n", > + ms->modules_vaddr, ms->modules_end); > + fprintf(fp, "kernel : 0x%lx - 0x%lx\n", > + ms->kernel_link_addr, ms->address_space_end); These are essential information, I think CRASHDEBUG(1) would be better for checking them. The other patches look good to me. Thanks, Kazu > + } > + return; > +error: > + error(FATAL, "cannot get vm layout\n"); > +} > + > +static int > +riscv64_is_kvaddr(ulong vaddr) > +{ > + if (IS_VMALLOC_ADDR(vaddr)) > + return TRUE; > + > + return (vaddr >= machdep->kvbase); > +} > + > +static int > +riscv64_is_uvaddr(ulong vaddr, struct task_context *unused) > +{ > + if (IS_VMALLOC_ADDR(vaddr)) > + return FALSE; > + > + return (vaddr < machdep->kvbase); > +} > + > +static int > +riscv64_is_task_addr(ulong task) > +{ > + if (tt->flags & THREAD_INFO) > + return IS_KVADDR(task); > + > + return (IS_KVADDR(task) && ALIGNED_STACK_OFFSET(task) == 0); > +} > + > +static int > +riscv64_get_smp_cpus(void) > +{ > + return (get_cpus_online() > 0) ? get_cpus_online() : kt->cpus; > } > > /* > @@ -33,11 +327,700 @@ riscv64_IS_VMALLOC_ADDR(ulong vaddr) > (vaddr >= MODULES_VADDR && vaddr <= MODULES_END)); > } > > +/* > + * Translate a PTE, returning TRUE if the page is present. > + * If a physaddr pointer is passed in, don't print anything. > + */ > +static int > +riscv64_translate_pte(ulong pte, void *physaddr, ulonglong unused) > +{ > + char ptebuf[BUFSIZE]; > + char physbuf[BUFSIZE]; > + char buf[BUFSIZE]; > + int page_present; > + int len1, len2, others; > + ulong paddr; > + > + paddr = PTOB(pte >> _PAGE_PFN_SHIFT); > + page_present = !!(pte & _PAGE_PRESENT); > + > + if (physaddr) { > + *(ulong *)physaddr = paddr; > + return page_present; > + } > + > + sprintf(ptebuf, "%lx", pte); > + len1 = MAX(strlen(ptebuf), strlen("PTE")); > + fprintf(fp, "%s ", mkstring(buf, len1, CENTER | LJUST, "PTE")); > + > + if (!page_present) > + return page_present; > + > + sprintf(physbuf, "%lx", paddr); > + len2 = MAX(strlen(physbuf), strlen("PHYSICAL")); > + fprintf(fp, "%s ", mkstring(buf, len2, CENTER | LJUST, "PHYSICAL")); > + > + fprintf(fp, "FLAGS\n"); > + fprintf(fp, "%s %s ", > + mkstring(ptebuf, len1, CENTER | RJUST, NULL), > + mkstring(physbuf, len2, CENTER | RJUST, NULL)); > + > + fprintf(fp, "("); > + others = 0; > + > +#define CHECK_PAGE_FLAG(flag) \ > + if ((_PAGE_##flag) && (pte & _PAGE_##flag)) \ > + fprintf(fp, "%s" #flag, others++ ? "|" : "") > + if (pte) { > + CHECK_PAGE_FLAG(PRESENT); > + CHECK_PAGE_FLAG(READ); > + CHECK_PAGE_FLAG(WRITE); > + CHECK_PAGE_FLAG(EXEC); > + CHECK_PAGE_FLAG(USER); > + CHECK_PAGE_FLAG(GLOBAL); > + CHECK_PAGE_FLAG(ACCESSED); > + CHECK_PAGE_FLAG(DIRTY); > + CHECK_PAGE_FLAG(SOFT); > + } else { > + fprintf(fp, "no mapping"); > + } > + > + fprintf(fp, ")\n"); > + > + return page_present; > +} > + > +static void > +riscv64_page_type_init(void) > +{ > + ulong va_bits = machdep->machspec->va_bits; > + > + /* > + * For RISCV64 arch, any level of PTE may be a leaf PTE, > + * so in addition to 4KiB pages, > + * Sv39 supports 2 MiB megapages, 1 GiB gigapages; > + * Sv48 supports 2 MiB megapages, 1 GiB gigapages, 512 GiB terapages; > + * Sv57 supports 2 MiB megapages, 1 GiB gigapages, 512 GiB terapages, and 256 TiB petapages. > + * > + * refs to riscv-privileged spec. > + * > + * We just support 4KiB, 2MiB, 1GiB now. > + */ > + switch (machdep->pagesize) > + { > + case 0x1000: // 4 KiB > + machdep->flags |= (va_bits == 57 ? VM_L5_4K : > + (va_bits == 48 ? VM_L4_4K : VM_L3_4K)); > + break; > + case 0x200000: // 2 MiB > + /* TODO: */ > + case 0x40000000: // 1 GiB > + /* TODO: */ > + default: > + if (machdep->pagesize) > + error(FATAL, "invalid/unsupported page size: %d\n", > + machdep->pagesize); > + else > + error(FATAL, "cannot determine page size\n"); > + } > +} > + > +static int > +riscv64_vtop_3level_4k(ulong *pgd, ulong vaddr, physaddr_t *paddr, int verbose) > +{ > + ulong *pgd_ptr, pgd_val; > + ulong *pmd_ptr, pmd_val; > + ulong *pte_ptr, pte_val, pte_pfn; > + ulong pt_phys; > + > + /* PGD */ > + pgd_ptr = pgd + pgd_index_l3_4k(vaddr); > + FILL_PGD(pgd, KVADDR, PAGESIZE()); > + pgd_val = ULONG(machdep->pgd + PAGEOFFSET(pgd_ptr)); > + if (verbose) > + fprintf(fp, " PGD: %lx => %lx\n", (ulong)pgd_ptr, pgd_val); > + if (!pgd_val) > + goto no_page; > + pgd_val &= PTE_PFN_PROT_MASK; > + pt_phys = (pgd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT(); > + > + /* PMD */ > + FILL_PMD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE()); > + pmd_val = ULONG(machdep->pmd + PAGEOFFSET(sizeof(pmd_t) * > + pmd_index_l3_4k(vaddr))); > + if (verbose) > + fprintf(fp, " PMD: %016lx => %016lx\n", (ulong)pmd_ptr, pmd_val); > + if (!pmd_val) > + goto no_page; > + pmd_val &= PTE_PFN_PROT_MASK; > + pt_phys = (pmd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT(); > + > + /* PTE */ > + FILL_PTBL(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE()); > + pte_val = ULONG(machdep->ptbl + PAGEOFFSET(sizeof(pte_t) * > + pte_index_l3_4k(vaddr))); > + if (verbose) > + fprintf(fp, " PTE: %lx => %lx\n", (ulong)pte_ptr, pte_val); > + if (!pte_val) > + goto no_page; > + pte_val &= PTE_PFN_PROT_MASK; > + pte_pfn = pte_val >> _PAGE_PFN_SHIFT; > + > + if (!(pte_val & _PAGE_PRESENT)) { > + if (verbose) { > + fprintf(fp, "\n"); > + riscv64_translate_pte((ulong)pte_val, 0, 0); > + } > + fprintf(fp, " PAGE: %016lx not present\n\n", PAGEBASE(*paddr)); > + return FALSE; > + } > + > + *paddr = PTOB(pte_pfn) + PAGEOFFSET(vaddr); > + > + if (verbose) { > + fprintf(fp, " PAGE: %016lx\n\n", PAGEBASE(*paddr)); > + riscv64_translate_pte(pte_val, 0, 0); > + } > + > + return TRUE; > +no_page: > + fprintf(fp, "invalid\n"); > + return FALSE; > +} > + > +static int > +riscv64_vtop_4level_4k(ulong *pgd, ulong vaddr, physaddr_t *paddr, int verbose) > +{ > + ulong *pgd_ptr, pgd_val; > + ulong *pud_ptr, pud_val; > + ulong *pmd_ptr, pmd_val; > + ulong *pte_ptr, pte_val, pte_pfn; > + ulong pt_phys; > + > + /* PGD */ > + pgd_ptr = pgd + pgd_index_l4_4k(vaddr); > + FILL_PGD(pgd, KVADDR, PAGESIZE()); > + pgd_val = ULONG(machdep->pgd + PAGEOFFSET(pgd_ptr)); > + if (verbose) > + fprintf(fp, " PGD: %lx => %lx\n", (ulong)pgd_ptr, pgd_val); > + if (!pgd_val) > + goto no_page; > + pgd_val &= PTE_PFN_PROT_MASK; > + pt_phys = (pgd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT(); > + > + /* PUD */ > + FILL_PUD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE()); > + pud_val = ULONG(machdep->pud + PAGEOFFSET(sizeof(pud_t) * > + pud_index_l4_4k(vaddr))); > + if (verbose) > + fprintf(fp, " PUD: %016lx => %016lx\n", (ulong)pud_ptr, pud_val); > + if (!pud_val) > + goto no_page; > + pud_val &= PTE_PFN_PROT_MASK; > + pt_phys = (pud_val >> _PAGE_PFN_SHIFT) << PAGESHIFT(); > + > + /* PMD */ > + FILL_PMD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE()); > + pmd_val = ULONG(machdep->pmd + PAGEOFFSET(sizeof(pmd_t) * > + pmd_index_l4_4k(vaddr))); > + if (verbose) > + fprintf(fp, " PMD: %016lx => %016lx\n", (ulong)pmd_ptr, pmd_val); > + if (!pmd_val) > + goto no_page; > + pmd_val &= PTE_PFN_PROT_MASK; > + pt_phys = (pmd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT(); > + > + /* PTE */ > + FILL_PTBL(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE()); > + pte_val = ULONG(machdep->ptbl + PAGEOFFSET(sizeof(pte_t) * > + pte_index_l4_4k(vaddr))); > + if (verbose) > + fprintf(fp, " PTE: %lx => %lx\n", (ulong)pte_ptr, pte_val); > + if (!pte_val) > + goto no_page; > + pte_val &= PTE_PFN_PROT_MASK; > + pte_pfn = pte_val >> _PAGE_PFN_SHIFT; > + > + if (!(pte_val & _PAGE_PRESENT)) { > + if (verbose) { > + fprintf(fp, "\n"); > + riscv64_translate_pte((ulong)pte_val, 0, 0); > + } > + fprintf(fp, " PAGE: %016lx not present\n\n", PAGEBASE(*paddr)); > + return FALSE; > + } > + > + *paddr = PTOB(pte_pfn) + PAGEOFFSET(vaddr); > + > + if (verbose) { > + fprintf(fp, " PAGE: %016lx\n\n", PAGEBASE(*paddr)); > + riscv64_translate_pte(pte_val, 0, 0); > + } > + > + return TRUE; > +no_page: > + fprintf(fp, "invalid\n"); > + return FALSE; > +} > + > +static int > +riscv64_vtop_5level_4k(ulong *pgd, ulong vaddr, physaddr_t *paddr, int verbose) > +{ > + ulong *pgd_ptr, pgd_val; > + ulong *p4d_ptr, p4d_val; > + ulong *pud_ptr, pud_val; > + ulong *pmd_ptr, pmd_val; > + ulong *pte_ptr, pte_val, pte_pfn; > + ulong pt_phys; > + > + /* PGD */ > + pgd_ptr = pgd + pgd_index_l5_4k(vaddr); > + FILL_PGD(pgd, KVADDR, PAGESIZE()); > + pgd_val = ULONG(machdep->pgd + PAGEOFFSET(pgd_ptr)); > + if (verbose) > + fprintf(fp, " PGD: %lx => %lx\n", (ulong)pgd_ptr, pgd_val); > + if (!pgd_val) > + goto no_page; > + pgd_val &= PTE_PFN_PROT_MASK; > + pt_phys = (pgd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT(); > + > + /* P4D */ > + FILL_P4D(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE()); > + p4d_val = ULONG(machdep->machspec->p4d + PAGEOFFSET(sizeof(p4d_t) * > + p4d_index_l5_4k(vaddr))); > + if (verbose) > + fprintf(fp, " P4D: %016lx => %016lx\n", (ulong)p4d_ptr, p4d_val); > + if (!p4d_val) > + goto no_page; > + p4d_val &= PTE_PFN_PROT_MASK; > + pt_phys = (p4d_val >> _PAGE_PFN_SHIFT) << PAGESHIFT(); > + > + /* PUD */ > + FILL_PUD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE()); > + pud_val = ULONG(machdep->pud + PAGEOFFSET(sizeof(pud_t) * > + pud_index_l5_4k(vaddr))); > + if (verbose) > + fprintf(fp, " PUD: %016lx => %016lx\n", (ulong)pud_ptr, pud_val); > + if (!pud_val) > + goto no_page; > + pud_val &= PTE_PFN_PROT_MASK; > + pt_phys = (pud_val >> _PAGE_PFN_SHIFT) << PAGESHIFT(); > + > + /* PMD */ > + FILL_PMD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE()); > + pmd_val = ULONG(machdep->pmd + PAGEOFFSET(sizeof(pmd_t) * > + pmd_index_l4_4k(vaddr))); > + if (verbose) > + fprintf(fp, " PMD: %016lx => %016lx\n", (ulong)pmd_ptr, pmd_val); > + if (!pmd_val) > + goto no_page; > + pmd_val &= PTE_PFN_PROT_MASK; > + pt_phys = (pmd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT(); > + > + /* PTE */ > + FILL_PTBL(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE()); > + pte_val = ULONG(machdep->ptbl + PAGEOFFSET(sizeof(pte_t) * > + pte_index_l4_4k(vaddr))); > + if (verbose) > + fprintf(fp, " PTE: %lx => %lx\n", (ulong)pte_ptr, pte_val); > + if (!pte_val) > + goto no_page; > + pte_val &= PTE_PFN_PROT_MASK; > + pte_pfn = pte_val >> _PAGE_PFN_SHIFT; > + > + if (!(pte_val & _PAGE_PRESENT)) { > + if (verbose) { > + fprintf(fp, "\n"); > + riscv64_translate_pte((ulong)pte_val, 0, 0); > + } > + printf("!_PAGE_PRESENT\n"); > + return FALSE; > + } > + > + *paddr = PTOB(pte_pfn) + PAGEOFFSET(vaddr); > + > + if (verbose) { > + fprintf(fp, " PAGE: %016lx\n\n", PAGEBASE(*paddr)); > + riscv64_translate_pte(pte_val, 0, 0); > + } > + > + return TRUE; > +no_page: > + fprintf(fp, "invalid\n"); > + return FALSE; > +} > + > +static int > +riscv64_init_active_task_regs(void) > +{ > + int retval; > + > + retval = riscv64_get_crash_notes(); > + if (retval == TRUE) > + return retval; > + > + return riscv64_get_elf_notes(); > +} > + > +/* > + * Retrieve task registers for the time of the crash. > + */ > +static int > +riscv64_get_crash_notes(void) > +{ > + struct machine_specific *ms = machdep->machspec; > + ulong crash_notes; > + Elf64_Nhdr *note; > + ulong offset; > + char *buf, *p; > + ulong *notes_ptrs; > + ulong i; > + > + /* > + * crash_notes contains per cpu memory for storing cpu states > + * in case of system crash. > + */ > + if (!symbol_exists("crash_notes")) > + return FALSE; > + > + crash_notes = symbol_value("crash_notes"); > + > + notes_ptrs = (ulong *)GETBUF(kt->cpus*sizeof(notes_ptrs[0])); > + > + /* > + * Read crash_notes for the first CPU. crash_notes are in standard ELF > + * note format. > + */ > + if (!readmem(crash_notes, KVADDR, ¬es_ptrs[kt->cpus-1], > + sizeof(notes_ptrs[kt->cpus-1]), "crash_notes", > + RETURN_ON_ERROR)) { > + error(WARNING, "cannot read crash_notes\n"); > + FREEBUF(notes_ptrs); > + return FALSE; > + } > + > + if (symbol_exists("__per_cpu_offset")) { > + > + /* > + * Add __per_cpu_offset for each cpu to form the pointer to the notes > + */ > + for (i = 0; i < kt->cpus; i++) > + notes_ptrs[i] = notes_ptrs[kt->cpus-1] + kt->__per_cpu_offset[i]; > + } > + > + buf = GETBUF(SIZE(note_buf)); > + > + if (!(panic_task_regs = calloc((size_t)kt->cpus, sizeof(*panic_task_regs)))) > + error(FATAL, "cannot calloc panic_task_regs space\n"); > + > + for (i = 0; i < kt->cpus; i++) { > + > + if (!readmem(notes_ptrs[i], KVADDR, buf, SIZE(note_buf), "note_buf_t", > + RETURN_ON_ERROR)) { > + error(WARNING, > + "cannot find NT_PRSTATUS note for cpu: %d\n", i); > + goto fail; > + } > + > + /* > + * Do some sanity checks for this note before reading registers from it. > + */ > + note = (Elf64_Nhdr *)buf; > + p = buf + sizeof(Elf64_Nhdr); > + > + /* > + * dumpfiles created with qemu won't have crash_notes, but there will > + * be elf notes; dumpfiles created by kdump do not create notes for > + * offline cpus. > + */ > + if (note->n_namesz == 0 && (DISKDUMP_DUMPFILE() || KDUMP_DUMPFILE())) { > + if (DISKDUMP_DUMPFILE()) > + note = diskdump_get_prstatus_percpu(i); > + else if (KDUMP_DUMPFILE()) > + note = netdump_get_prstatus_percpu(i); > + if (note) { > + /* > + * SIZE(note_buf) accounts for a "final note", which is a > + * trailing empty elf note header. > + */ > + long notesz = SIZE(note_buf) - sizeof(Elf64_Nhdr); > + > + if (sizeof(Elf64_Nhdr) + roundup(note->n_namesz, 4) + > + note->n_descsz == notesz) > + BCOPY((char *)note, buf, notesz); > + } else { > + error(WARNING, > + "cannot find NT_PRSTATUS note for cpu: %d\n", i); > + continue; > + } > + } > + > + /* > + * Check the sanity of NT_PRSTATUS note only for each online cpu. > + */ > + if (note->n_type != NT_PRSTATUS) { > + error(WARNING, "invalid NT_PRSTATUS note (n_type != NT_PRSTATUS)\n"); > + goto fail; > + } > + if (!STRNEQ(p, "CORE")) { > + error(WARNING, "invalid NT_PRSTATUS note (name != \"CORE\"\n"); > + goto fail; > + } > + > + /* > + * Find correct location of note data. This contains elf_prstatus > + * structure which has registers etc. for the crashed task. > + */ > + offset = sizeof(Elf64_Nhdr); > + offset = roundup(offset + note->n_namesz, 4); > + p = buf + offset; /* start of elf_prstatus */ > + > + BCOPY(p + OFFSET(elf_prstatus_pr_reg), &panic_task_regs[i], > + sizeof(panic_task_regs[i])); > + } > + > + /* > + * And finally we have the registers for the crashed task. This is > + * used later on when dumping backtrace. > + */ > + ms->crash_task_regs = panic_task_regs; > + > + FREEBUF(buf); > + FREEBUF(notes_ptrs); > + return TRUE; > + > +fail: > + FREEBUF(buf); > + FREEBUF(notes_ptrs); > + free(panic_task_regs); > + return FALSE; > +} > + > +static int > +riscv64_get_elf_notes(void) > +{ > + struct machine_specific *ms = machdep->machspec; > + int i; > + > + if (!DISKDUMP_DUMPFILE() && !KDUMP_DUMPFILE()) > + return false; > + > + panic_task_regs = calloc(kt->cpus, sizeof(*panic_task_regs)); > + if (!panic_task_regs) > + error(FATAL, "cannot calloc panic_task_regs space\n"); > + > + for (i = 0; i < kt->cpus; i++) { > + Elf64_Nhdr *note = NULL; > + size_t len; > + > + if (DISKDUMP_DUMPFILE()) > + note = diskdump_get_prstatus_percpu(i); > + else if (KDUMP_DUMPFILE()) > + note = netdump_get_prstatus_percpu(i); > + > + if (!note) { > + error(WARNING, > + "cannot find NT_PRSTATUS note for cpu: %d\n", i); > + continue; > + } > + > + len = sizeof(Elf64_Nhdr); > + len = roundup(len + note->n_namesz, 4); > + > + BCOPY((char *)note + len + OFFSET(elf_prstatus_pr_reg), > + &panic_task_regs[i], sizeof(panic_task_regs[i])); > + } > + > + ms->crash_task_regs = panic_task_regs; > + > + return TRUE; > +} > + > +/* > + * Translates a user virtual address to its physical address. > + */ > +static int > +riscv64_uvtop(struct task_context *tc, ulong uvaddr, physaddr_t *paddr, int verbose) > +{ > + ulong mm, active_mm; > + ulong *pgd; > + > + if (!tc) > + error(FATAL, "current context invalid\n"); > + > + *paddr = 0; > + > + if (is_kernel_thread(tc->task) && IS_KVADDR(uvaddr)) { > + readmem(tc->task + OFFSET(task_struct_active_mm), > + KVADDR, &active_mm, sizeof(void *), > + "task active_mm contents", FAULT_ON_ERROR); > + > + if (!active_mm) > + error(FATAL, > + "no active_mm for this kernel thread\n"); > + > + readmem(active_mm + OFFSET(mm_struct_pgd), > + KVADDR, &pgd, sizeof(long), > + "mm_struct pgd", FAULT_ON_ERROR); > + } else { > + if ((mm = task_mm(tc->task, TRUE))) > + pgd = ULONG_PTR(tt->mm_struct + OFFSET(mm_struct_pgd)); > + else > + readmem(tc->mm_struct + OFFSET(mm_struct_pgd), > + KVADDR, &pgd, sizeof(long), "mm_struct pgd", > + FAULT_ON_ERROR); > + } > + > + switch (machdep->flags & (VM_L3_4K | VM_L4_4K | VM_L5_4K)) > + { > + case VM_L3_4K: > + return riscv64_vtop_3level_4k(pgd, uvaddr, paddr, verbose); > + case VM_L4_4K: > + return riscv64_vtop_4level_4k(pgd, uvaddr, paddr, verbose); > + case VM_L5_4K: > + return riscv64_vtop_5level_4k(pgd, uvaddr, paddr, verbose); > + default: > + return FALSE; > + } > +} > + > +static int > +riscv64_kvtop(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbose) > +{ > + ulong kernel_pgd; > + > + if (!IS_KVADDR(kvaddr)) > + return FALSE; > + > + if (!vt->vmalloc_start) { > + *paddr = VTOP(kvaddr); > + return TRUE; > + } > + > + if (!IS_VMALLOC_ADDR(kvaddr)) { > + *paddr = VTOP(kvaddr); > + if (!verbose) > + return TRUE; > + } > + > + kernel_pgd = vt->kernel_pgd[0]; > + *paddr = 0; > + > + switch (machdep->flags & (VM_L3_4K | VM_L4_4K | VM_L5_4K)) > + { > + case VM_L3_4K: > + return riscv64_vtop_3level_4k((ulong *)kernel_pgd, kvaddr, paddr, verbose); > + case VM_L4_4K: > + return riscv64_vtop_4level_4k((ulong *)kernel_pgd, kvaddr, paddr, verbose); > + case VM_L5_4K: > + return riscv64_vtop_5level_4k((ulong *)kernel_pgd, kvaddr, paddr, verbose); > + default: > + return FALSE; > + } > +} > + > void > riscv64_init(int when) > { > + switch (when) { > + case SETUP_ENV: > + machdep->process_elf_notes = process_elf64_notes; > + break; > + > + case PRE_SYMTAB: > + machdep->verify_symbol = riscv64_verify_symbol; > + machdep->machspec = &riscv64_machine_specific; > + if (pc->flags & KERNEL_DEBUG_QUERY) > + return; > + > + machdep->verify_paddr = generic_verify_paddr; > + machdep->ptrs_per_pgd = PTRS_PER_PGD; > + break; > + > + case PRE_GDB: > + machdep->pagesize = riscv64_get_page_size(); > + machdep->pageshift = ffs(machdep->pagesize) - 1; > + machdep->pageoffset = machdep->pagesize - 1; > + machdep->pagemask = ~((ulonglong)machdep->pageoffset); > + machdep->stacksize = machdep->pagesize << THREAD_SIZE_ORDER; > + > + riscv64_get_phys_ram_base(machdep->machspec); > + riscv64_get_struct_page_size(machdep->machspec); > + riscv64_get_va_range(machdep->machspec); > + > + pt_level_alloc(&machdep->pgd, "cannot malloc pgd space."); > + pt_level_alloc(&machdep->machspec->p4d, "cannot malloc p4d space."); > + pt_level_alloc(&machdep->pud, "cannot malloc pud space."); > + pt_level_alloc(&machdep->pmd, "cannot malloc pmd space."); > + pt_level_alloc(&machdep->ptbl, "cannot malloc ptbl space."); > + > + machdep->last_pgd_read = 0; > + machdep->machspec->last_p4d_read = 0; > + machdep->last_pud_read = 0; > + machdep->last_pmd_read = 0; > + machdep->last_ptbl_read = 0; > + > + machdep->kvbase = machdep->machspec->page_offset; > + machdep->identity_map_base = machdep->kvbase; > + machdep->is_kvaddr = riscv64_is_kvaddr; > + machdep->is_uvaddr = riscv64_is_uvaddr; > + machdep->uvtop = riscv64_uvtop; > + machdep->kvtop = riscv64_kvtop; > + machdep->cmd_mach = riscv64_cmd_mach; > + > + machdep->vmalloc_start = riscv64_vmalloc_start; > + machdep->processor_speed = riscv64_processor_speed; > + machdep->get_stackbase = generic_get_stackbase; > + machdep->get_stacktop = generic_get_stacktop; > + machdep->translate_pte = riscv64_translate_pte; > + machdep->memory_size = generic_memory_size; > + machdep->is_task_addr = riscv64_is_task_addr; > + machdep->get_smp_cpus = riscv64_get_smp_cpus; > + machdep->value_to_symbol = generic_machdep_value_to_symbol; > + machdep->show_interrupts = generic_show_interrupts; > + machdep->get_irq_affinity = generic_get_irq_affinity; > + machdep->init_kernel_pgd = NULL; /* pgd set by symbol_value("swapper_pg_dir") */ > + break; > + > + case POST_GDB: > + machdep->section_size_bits = _SECTION_SIZE_BITS; > + machdep->max_physmem_bits = _MAX_PHYSMEM_BITS; > + riscv64_page_type_init(); > + > + if (!machdep->hz) > + machdep->hz = 250; > + > + if (symbol_exists("irq_desc")) > + ARRAY_LENGTH_INIT(machdep->nr_irqs, irq_desc, > + "irq_desc", NULL, 0); > + else if (kernel_symbol_exists("nr_irqs")) > + get_symbol_data("nr_irqs", sizeof(unsigned int), > + &machdep->nr_irqs); > + > + MEMBER_OFFSET_INIT(elf_prstatus_pr_reg, "elf_prstatus", > + "pr_reg"); > + > + STRUCT_SIZE_INIT(note_buf, "note_buf_t"); > + break; > + > + case POST_VM: > + /* > + * crash_notes contains machine specific information about the > + * crash. In particular, it contains CPU registers at the time > + * of the crash. We need this information to extract correct > + * backtraces from the panic task. > + */ > + if (!ACTIVE() && !riscv64_init_active_task_regs()) > + error(WARNING, > + "cannot retrieve registers for active task%s\n\n", > + kt->cpus > 1 ? "s" : ""); > + break; > + } > } > > +/* > + * 'help -r' command output > + */ > void > riscv64_display_regs_from_elf_notes(int cpu, FILE *ofp) > {
diff --git a/defs.h b/defs.h index 42ffba3..be4db94 100644 --- a/defs.h +++ b/defs.h @@ -3494,6 +3494,81 @@ struct arm64_stackframe { #define _64BIT_ #define MACHINE_TYPE "RISCV64" +typedef struct { ulong pgd; } pgd_t; +typedef struct { ulong p4d; } p4d_t; +typedef struct { ulong pud; } pud_t; +typedef struct { ulong pmd; } pmd_t; +typedef struct { ulong pte; } pte_t; +typedef signed int s32; + +/* arch/riscv/include/asm/pgtable-64.h */ +#define PGD_SHIFT_L3 (30) +#define PGD_SHIFT_L4 (39) +#define PGD_SHIFT_L5 (48) + +#define P4D_SHIFT (39) +#define PUD_SHIFT (30) +#define PMD_SHIFT (21) + +#define PTRS_PER_PGD (512) +#define PTRS_PER_P4D (512) +#define PTRS_PER_PUD (512) +#define PTRS_PER_PMD (512) +#define PTRS_PER_PTE (512) + +/* + * Mask for PPN and PROT bit53~0 of PTE + * 63 6261 60 54 53 10 9 8 7 6 5 4 3 2 1 0 + * N PBMT Reserved P P N RSW D A G U X W R V + */ +#define PTE_PFN_PROT_MASK 0x3FFFFFFFFFFFFF + +/* + * 3-levels / 4K pages + * + * sv39 + * PGD | PMD | PTE | OFFSET | + * 9 | 9 | 9 | 12 | + */ +#define pgd_index_l3_4k(addr) (((addr) >> PGD_SHIFT_L3) & (PTRS_PER_PGD - 1)) +#define pmd_index_l3_4k(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) +#define pte_index_l3_4k(addr) (((addr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1)) + +/* + * 4-levels / 4K pages + * + * sv48 + * PGD | PUD | PMD | PTE | OFFSET | + * 9 | 9 | 9 | 9 | 12 | + */ +#define pgd_index_l4_4k(addr) (((addr) >> PGD_SHIFT_L4) & (PTRS_PER_PGD - 1)) +#define pud_index_l4_4k(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +#define pmd_index_l4_4k(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) +#define pte_index_l4_4k(addr) (((addr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1)) + +/* + * 5-levels / 4K pages + * + * sv48 + * PGD | P4D | PUD | PMD | PTE | OFFSET | + * 9 | 9 | 9 | 9 | 9 | 12 | + */ +#define pgd_index_l5_4k(addr) (((addr) >> PGD_SHIFT_L5) & (PTRS_PER_PGD - 1)) +#define p4d_index_l5_4k(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) +#define pud_index_l5_4k(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +#define pmd_index_l5_4k(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) +#define pte_index_l5_4k(addr) (((addr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1)) + +#define VM_L3_4K (0x2) +#define VM_L3_2M (0x4) +#define VM_L3_1G (0x8) +#define VM_L4_4K (0x10) +#define VM_L4_2M (0x20) +#define VM_L4_1G (0x40) +#define VM_L5_4K (0x80) +#define VM_L5_2M (0x100) +#define VM_L5_1G (0x200) + /* * Direct memory mapping */ @@ -3545,6 +3620,14 @@ struct arm64_stackframe { #define PHYS_MASK_SHIFT _MAX_PHYSMEM_BITS #define PHYS_MASK (((1UL) << PHYS_MASK_SHIFT) - 1) +#define IS_LAST_P4D_READ(p4d) ((ulong)(p4d) == machdep->machspec->last_p4d_read) +#define FILL_P4D(P4D, TYPE, SIZE) \ + if (!IS_LAST_P4D_READ(P4D)) { \ + readmem((ulonglong)((ulong)(P4D)), TYPE, machdep->machspec->p4d, \ + SIZE, "p4d page", FAULT_ON_ERROR); \ + machdep->machspec->last_p4d_read = (ulong)(P4D); \ + } + #endif /* RISCV64 */ #ifdef X86 @@ -6810,6 +6893,10 @@ struct machine_specific { ulong _page_soft; ulong _pfn_shift; + ulong va_bits; + char *p4d; + ulong last_p4d_read; + ulong struct_page_size; struct riscv64_register *crash_task_regs; }; @@ -6833,6 +6920,12 @@ struct machine_specific { #define _PAGE_PROT_NONE _PAGE_READ #define _PAGE_PFN_SHIFT 10 +/* from 'struct pt_regs' definitions of RISC-V arch */ +#define RISCV64_REGS_EPC 0 +#define RISCV64_REGS_RA 1 +#define RISCV64_REGS_SP 2 +#define RISCV64_REGS_FP 8 + #endif /* RISCV64 */ /* diff --git a/diskdump.c b/diskdump.c index 28503bc..cf5f5d9 100644 --- a/diskdump.c +++ b/diskdump.c @@ -1531,6 +1531,12 @@ get_diskdump_regs_mips(struct bt_info *bt, ulong *eip, ulong *esp) machdep->get_stack_frame(bt, eip, esp); } +static void +get_diskdump_regs_riscv64(struct bt_info *bt, ulong *eip, ulong *esp) +{ + machdep->get_stack_frame(bt, eip, esp); +} + static void get_diskdump_regs_sparc64(struct bt_info *bt, ulong *eip, ulong *esp) { @@ -1610,6 +1616,10 @@ get_diskdump_regs(struct bt_info *bt, ulong *eip, ulong *esp) get_diskdump_regs_sparc64(bt, eip, esp); break; + case EM_RISCV: + get_diskdump_regs_riscv64(bt, eip, esp); + break; + default: error(FATAL, "%s: unsupported machine type: %s\n", DISKDUMP_VALID() ? "diskdump" : "compressed kdump", diff --git a/riscv64.c b/riscv64.c index c7df857..9d40297 100644 --- a/riscv64.c +++ b/riscv64.c @@ -16,10 +16,304 @@ #include <elf.h> #include "defs.h" +#include <math.h> + +static ulong riscv64_get_page_size(void); +static int riscv64_vtop_3level_4k(ulong *pgd, ulong vaddr, + physaddr_t *paddr, int verbose); +static int riscv64_vtop_4level_4k(ulong *pgd, ulong vaddr, + physaddr_t *paddr, int verbose); +static int riscv64_vtop_5level_4k(ulong *pgd, ulong vaddr, + physaddr_t *paddr, int verbose); +static void riscv64_page_type_init(void); +static int riscv64_is_kvaddr(ulong vaddr); +static int riscv64_is_uvaddr(ulong vaddr, struct task_context *tc); +static int riscv64_uvtop(struct task_context *tc, ulong vaddr, + physaddr_t *paddr, int verbose); +static int riscv64_kvtop(struct task_context *tc, ulong kvaddr, + physaddr_t *paddr, int verbose); +static void riscv64_cmd_mach(void); +static int riscv64_translate_pte(ulong, void *, ulonglong); +static int riscv64_init_active_task_regs(void); +static int riscv64_get_crash_notes(void); +static int riscv64_get_elf_notes(void); +static void riscv64_get_va_range(struct machine_specific *ms); +static void riscv64_get_struct_page_size(struct machine_specific *ms); + +#define REG_FMT "%016lx" +#define SZ_2G 0x80000000 + +/* + * Holds registers during the crash. + */ +static struct riscv64_register *panic_task_regs; + +/* from arch/riscv/include/asm/stacktrace.h */ +struct stackframe { + ulong fp; + ulong ra; +}; + +static struct machine_specific riscv64_machine_specific = { + ._page_present = (1 << 0), + ._page_read = (1 << 1), + ._page_write = (1 << 2), + ._page_exec = (1 << 3), + ._page_user = (1 << 4), + ._page_global = (1 << 5), + ._page_accessed = (1 << 6), + ._page_dirty = (1 << 7), + ._page_soft = (1 << 8), + + .va_bits = 0, + .struct_page_size = 0, +}; + +static void +pt_level_alloc(char **lvl, char *name) +{ + size_t sz = PAGESIZE(); + void *pointer = malloc(sz); + + if (!pointer) + error(FATAL, name); + *lvl = pointer; +} + +static void +riscv64_get_phys_ram_base(struct machine_specific *ms) +{ + char *string; + + if ((string = pc->read_vmcoreinfo("NUMBER(phys_ram_base)"))) { + ms->phys_base = atol(string); + free(string); + } else + /* + * It can't continue without phys_ram_base. As for qemu rv64 + * env and hardware platform, phys_ram_base may different. + */ + error(FATAL, "cannot read phys_ram_base\n"); +} + +static ulong +riscv64_get_page_size(void) +{ + return memory_page_size(); +} + +static ulong +riscv64_vmalloc_start(void) +{ + return ((ulong)VMALLOC_START); +} + +/* Get the size of struct page {} */ +static void riscv64_get_struct_page_size(struct machine_specific *ms) +{ + char *string; + + string = pc->read_vmcoreinfo("SIZE(page)"); + if (string) + ms->struct_page_size = atol(string); + free(string); +} + +/* + * Get the max shift of the size of struct page. + * Most of the time, it is 64 bytes, but not sure. +*/ +static int riscv64_get_struct_page_max_shift(struct machine_specific *ms) +{ + return (int)ceil(log2(ms->struct_page_size)); +} + +static void +riscv64_cmd_mach(void) +{ + /* TODO: */ +} + +static int +riscv64_verify_symbol(const char *name, ulong value, char type) +{ + /* TODO: */ + return TRUE; +} void riscv64_dump_machdep_table(ulong arg) { + /* TODO: */ +} + +static ulong +riscv64_processor_speed(void) +{ + /* TODO: */ + return 0; +} + +static unsigned long riscv64_get_kernel_version(void) +{ + char *string; + char buf[BUFSIZE]; + char *p1, *p2; + + if (THIS_KERNEL_VERSION) + return THIS_KERNEL_VERSION; + + string = pc->read_vmcoreinfo("OSRELEASE"); + if (string) { + strcpy(buf, string); + + p1 = p2 = buf; + while (*p2 != '.') + p2++; + *p2 = NULLCHAR; + kt->kernel_version[0] = atoi(p1); + + p1 = ++p2; + while (*p2 != '.') + p2++; + *p2 = NULLCHAR; + kt->kernel_version[1] = atoi(p1); + + p1 = ++p2; + while ((*p2 >= '0') && (*p2 <= '9')) + p2++; + *p2 = NULLCHAR; + kt->kernel_version[2] = atoi(p1); + } + free(string); + return THIS_KERNEL_VERSION; +} + +static void riscv64_get_va_range(struct machine_specific *ms) +{ + unsigned long kernel_version = riscv64_get_kernel_version(); + char *string; + + if ((string = pc->read_vmcoreinfo("NUMBER(VA_BITS)"))) { + ms->va_bits = atol(string); + free(string); + } else + goto error; + if ((string = pc->read_vmcoreinfo("NUMBER(PAGE_OFFSET)"))) { + ms->page_offset = htol(string, QUIET, NULL); + free(string); + } else + goto error; + + if ((string = pc->read_vmcoreinfo("NUMBER(VMALLOC_START)"))) { + ms->vmalloc_start_addr = htol(string, QUIET, NULL); + free(string); + } else + goto error; + + if ((string = pc->read_vmcoreinfo("NUMBER(VMALLOC_END)"))) { + ms->vmalloc_end = htol(string, QUIET, NULL); + free(string); + } else + goto error; + + if ((string = pc->read_vmcoreinfo("NUMBER(VMEMMAP_START)"))) { + ms->vmemmap_vaddr = htol(string, QUIET, NULL); + free(string); + } else + goto error; + + if ((string = pc->read_vmcoreinfo("NUMBER(VMEMMAP_END)"))) { + ms->vmemmap_end = htol(string, QUIET, NULL); + free(string); + } else + goto error; + + if ((string = pc->read_vmcoreinfo("NUMBER(KERNEL_LINK_ADDR)"))) { + ms->kernel_link_addr = htol(string, QUIET, NULL); + free(string); + } else + goto error; + + if ((string = pc->read_vmcoreinfo("NUMBER(ADDRESS_SPACE_END)"))) { + ms->address_space_end = htol(string, QUIET, NULL); + free(string); + } else + goto error; + + /* + * From Linux 5.13, the kernel mapping is moved to the last 2GB + * of the address space, modules use the 2GB memory range right + * before the kernel. Before Linux 5.13, modules area is embedded + * in vmalloc area. + * + * 5.13 = 0x5 << 16 | 0xD << 8 + */ + if (kernel_version >= 0x50D00) { + if ((string = pc->read_vmcoreinfo("NUMBER(MODULES_VADDR)"))) { + ms->modules_vaddr = htol(string, QUIET, NULL); + free(string); + } else + goto error; + + if ((string = pc->read_vmcoreinfo("NUMBER(MODULES_END)"))) { + ms->modules_end = htol(string, QUIET, NULL); + free(string); + } else + goto error; + } else { + ms->modules_vaddr = ms->vmalloc_start_addr; + ms->modules_end = ms->vmalloc_end; + } + + if (CRASHDEBUG(8)) { + fprintf(fp, "va_bits : %ld\n", ms->va_bits); + fprintf(fp, "vmemmap : 0x%lx - 0x%lx\n", + ms->vmemmap_vaddr, ms->vmemmap_end); + fprintf(fp, "vmalloc : 0x%lx - 0x%lx\n", + ms->vmalloc_start_addr, ms->vmalloc_end); + fprintf(fp, "lowmem : 0x%lx -\n", ms->page_offset); + fprintf(fp, "mudules : 0x%lx - 0x%lx\n", + ms->modules_vaddr, ms->modules_end); + fprintf(fp, "kernel : 0x%lx - 0x%lx\n", + ms->kernel_link_addr, ms->address_space_end); + } + return; +error: + error(FATAL, "cannot get vm layout\n"); +} + +static int +riscv64_is_kvaddr(ulong vaddr) +{ + if (IS_VMALLOC_ADDR(vaddr)) + return TRUE; + + return (vaddr >= machdep->kvbase); +} + +static int +riscv64_is_uvaddr(ulong vaddr, struct task_context *unused) +{ + if (IS_VMALLOC_ADDR(vaddr)) + return FALSE; + + return (vaddr < machdep->kvbase); +} + +static int +riscv64_is_task_addr(ulong task) +{ + if (tt->flags & THREAD_INFO) + return IS_KVADDR(task); + + return (IS_KVADDR(task) && ALIGNED_STACK_OFFSET(task) == 0); +} + +static int +riscv64_get_smp_cpus(void) +{ + return (get_cpus_online() > 0) ? get_cpus_online() : kt->cpus; } /* @@ -33,11 +327,700 @@ riscv64_IS_VMALLOC_ADDR(ulong vaddr) (vaddr >= MODULES_VADDR && vaddr <= MODULES_END)); } +/* + * Translate a PTE, returning TRUE if the page is present. + * If a physaddr pointer is passed in, don't print anything. + */ +static int +riscv64_translate_pte(ulong pte, void *physaddr, ulonglong unused) +{ + char ptebuf[BUFSIZE]; + char physbuf[BUFSIZE]; + char buf[BUFSIZE]; + int page_present; + int len1, len2, others; + ulong paddr; + + paddr = PTOB(pte >> _PAGE_PFN_SHIFT); + page_present = !!(pte & _PAGE_PRESENT); + + if (physaddr) { + *(ulong *)physaddr = paddr; + return page_present; + } + + sprintf(ptebuf, "%lx", pte); + len1 = MAX(strlen(ptebuf), strlen("PTE")); + fprintf(fp, "%s ", mkstring(buf, len1, CENTER | LJUST, "PTE")); + + if (!page_present) + return page_present; + + sprintf(physbuf, "%lx", paddr); + len2 = MAX(strlen(physbuf), strlen("PHYSICAL")); + fprintf(fp, "%s ", mkstring(buf, len2, CENTER | LJUST, "PHYSICAL")); + + fprintf(fp, "FLAGS\n"); + fprintf(fp, "%s %s ", + mkstring(ptebuf, len1, CENTER | RJUST, NULL), + mkstring(physbuf, len2, CENTER | RJUST, NULL)); + + fprintf(fp, "("); + others = 0; + +#define CHECK_PAGE_FLAG(flag) \ + if ((_PAGE_##flag) && (pte & _PAGE_##flag)) \ + fprintf(fp, "%s" #flag, others++ ? "|" : "") + if (pte) { + CHECK_PAGE_FLAG(PRESENT); + CHECK_PAGE_FLAG(READ); + CHECK_PAGE_FLAG(WRITE); + CHECK_PAGE_FLAG(EXEC); + CHECK_PAGE_FLAG(USER); + CHECK_PAGE_FLAG(GLOBAL); + CHECK_PAGE_FLAG(ACCESSED); + CHECK_PAGE_FLAG(DIRTY); + CHECK_PAGE_FLAG(SOFT); + } else { + fprintf(fp, "no mapping"); + } + + fprintf(fp, ")\n"); + + return page_present; +} + +static void +riscv64_page_type_init(void) +{ + ulong va_bits = machdep->machspec->va_bits; + + /* + * For RISCV64 arch, any level of PTE may be a leaf PTE, + * so in addition to 4KiB pages, + * Sv39 supports 2 MiB megapages, 1 GiB gigapages; + * Sv48 supports 2 MiB megapages, 1 GiB gigapages, 512 GiB terapages; + * Sv57 supports 2 MiB megapages, 1 GiB gigapages, 512 GiB terapages, and 256 TiB petapages. + * + * refs to riscv-privileged spec. + * + * We just support 4KiB, 2MiB, 1GiB now. + */ + switch (machdep->pagesize) + { + case 0x1000: // 4 KiB + machdep->flags |= (va_bits == 57 ? VM_L5_4K : + (va_bits == 48 ? VM_L4_4K : VM_L3_4K)); + break; + case 0x200000: // 2 MiB + /* TODO: */ + case 0x40000000: // 1 GiB + /* TODO: */ + default: + if (machdep->pagesize) + error(FATAL, "invalid/unsupported page size: %d\n", + machdep->pagesize); + else + error(FATAL, "cannot determine page size\n"); + } +} + +static int +riscv64_vtop_3level_4k(ulong *pgd, ulong vaddr, physaddr_t *paddr, int verbose) +{ + ulong *pgd_ptr, pgd_val; + ulong *pmd_ptr, pmd_val; + ulong *pte_ptr, pte_val, pte_pfn; + ulong pt_phys; + + /* PGD */ + pgd_ptr = pgd + pgd_index_l3_4k(vaddr); + FILL_PGD(pgd, KVADDR, PAGESIZE()); + pgd_val = ULONG(machdep->pgd + PAGEOFFSET(pgd_ptr)); + if (verbose) + fprintf(fp, " PGD: %lx => %lx\n", (ulong)pgd_ptr, pgd_val); + if (!pgd_val) + goto no_page; + pgd_val &= PTE_PFN_PROT_MASK; + pt_phys = (pgd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT(); + + /* PMD */ + FILL_PMD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE()); + pmd_val = ULONG(machdep->pmd + PAGEOFFSET(sizeof(pmd_t) * + pmd_index_l3_4k(vaddr))); + if (verbose) + fprintf(fp, " PMD: %016lx => %016lx\n", (ulong)pmd_ptr, pmd_val); + if (!pmd_val) + goto no_page; + pmd_val &= PTE_PFN_PROT_MASK; + pt_phys = (pmd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT(); + + /* PTE */ + FILL_PTBL(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE()); + pte_val = ULONG(machdep->ptbl + PAGEOFFSET(sizeof(pte_t) * + pte_index_l3_4k(vaddr))); + if (verbose) + fprintf(fp, " PTE: %lx => %lx\n", (ulong)pte_ptr, pte_val); + if (!pte_val) + goto no_page; + pte_val &= PTE_PFN_PROT_MASK; + pte_pfn = pte_val >> _PAGE_PFN_SHIFT; + + if (!(pte_val & _PAGE_PRESENT)) { + if (verbose) { + fprintf(fp, "\n"); + riscv64_translate_pte((ulong)pte_val, 0, 0); + } + fprintf(fp, " PAGE: %016lx not present\n\n", PAGEBASE(*paddr)); + return FALSE; + } + + *paddr = PTOB(pte_pfn) + PAGEOFFSET(vaddr); + + if (verbose) { + fprintf(fp, " PAGE: %016lx\n\n", PAGEBASE(*paddr)); + riscv64_translate_pte(pte_val, 0, 0); + } + + return TRUE; +no_page: + fprintf(fp, "invalid\n"); + return FALSE; +} + +static int +riscv64_vtop_4level_4k(ulong *pgd, ulong vaddr, physaddr_t *paddr, int verbose) +{ + ulong *pgd_ptr, pgd_val; + ulong *pud_ptr, pud_val; + ulong *pmd_ptr, pmd_val; + ulong *pte_ptr, pte_val, pte_pfn; + ulong pt_phys; + + /* PGD */ + pgd_ptr = pgd + pgd_index_l4_4k(vaddr); + FILL_PGD(pgd, KVADDR, PAGESIZE()); + pgd_val = ULONG(machdep->pgd + PAGEOFFSET(pgd_ptr)); + if (verbose) + fprintf(fp, " PGD: %lx => %lx\n", (ulong)pgd_ptr, pgd_val); + if (!pgd_val) + goto no_page; + pgd_val &= PTE_PFN_PROT_MASK; + pt_phys = (pgd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT(); + + /* PUD */ + FILL_PUD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE()); + pud_val = ULONG(machdep->pud + PAGEOFFSET(sizeof(pud_t) * + pud_index_l4_4k(vaddr))); + if (verbose) + fprintf(fp, " PUD: %016lx => %016lx\n", (ulong)pud_ptr, pud_val); + if (!pud_val) + goto no_page; + pud_val &= PTE_PFN_PROT_MASK; + pt_phys = (pud_val >> _PAGE_PFN_SHIFT) << PAGESHIFT(); + + /* PMD */ + FILL_PMD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE()); + pmd_val = ULONG(machdep->pmd + PAGEOFFSET(sizeof(pmd_t) * + pmd_index_l4_4k(vaddr))); + if (verbose) + fprintf(fp, " PMD: %016lx => %016lx\n", (ulong)pmd_ptr, pmd_val); + if (!pmd_val) + goto no_page; + pmd_val &= PTE_PFN_PROT_MASK; + pt_phys = (pmd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT(); + + /* PTE */ + FILL_PTBL(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE()); + pte_val = ULONG(machdep->ptbl + PAGEOFFSET(sizeof(pte_t) * + pte_index_l4_4k(vaddr))); + if (verbose) + fprintf(fp, " PTE: %lx => %lx\n", (ulong)pte_ptr, pte_val); + if (!pte_val) + goto no_page; + pte_val &= PTE_PFN_PROT_MASK; + pte_pfn = pte_val >> _PAGE_PFN_SHIFT; + + if (!(pte_val & _PAGE_PRESENT)) { + if (verbose) { + fprintf(fp, "\n"); + riscv64_translate_pte((ulong)pte_val, 0, 0); + } + fprintf(fp, " PAGE: %016lx not present\n\n", PAGEBASE(*paddr)); + return FALSE; + } + + *paddr = PTOB(pte_pfn) + PAGEOFFSET(vaddr); + + if (verbose) { + fprintf(fp, " PAGE: %016lx\n\n", PAGEBASE(*paddr)); + riscv64_translate_pte(pte_val, 0, 0); + } + + return TRUE; +no_page: + fprintf(fp, "invalid\n"); + return FALSE; +} + +static int +riscv64_vtop_5level_4k(ulong *pgd, ulong vaddr, physaddr_t *paddr, int verbose) +{ + ulong *pgd_ptr, pgd_val; + ulong *p4d_ptr, p4d_val; + ulong *pud_ptr, pud_val; + ulong *pmd_ptr, pmd_val; + ulong *pte_ptr, pte_val, pte_pfn; + ulong pt_phys; + + /* PGD */ + pgd_ptr = pgd + pgd_index_l5_4k(vaddr); + FILL_PGD(pgd, KVADDR, PAGESIZE()); + pgd_val = ULONG(machdep->pgd + PAGEOFFSET(pgd_ptr)); + if (verbose) + fprintf(fp, " PGD: %lx => %lx\n", (ulong)pgd_ptr, pgd_val); + if (!pgd_val) + goto no_page; + pgd_val &= PTE_PFN_PROT_MASK; + pt_phys = (pgd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT(); + + /* P4D */ + FILL_P4D(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE()); + p4d_val = ULONG(machdep->machspec->p4d + PAGEOFFSET(sizeof(p4d_t) * + p4d_index_l5_4k(vaddr))); + if (verbose) + fprintf(fp, " P4D: %016lx => %016lx\n", (ulong)p4d_ptr, p4d_val); + if (!p4d_val) + goto no_page; + p4d_val &= PTE_PFN_PROT_MASK; + pt_phys = (p4d_val >> _PAGE_PFN_SHIFT) << PAGESHIFT(); + + /* PUD */ + FILL_PUD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE()); + pud_val = ULONG(machdep->pud + PAGEOFFSET(sizeof(pud_t) * + pud_index_l5_4k(vaddr))); + if (verbose) + fprintf(fp, " PUD: %016lx => %016lx\n", (ulong)pud_ptr, pud_val); + if (!pud_val) + goto no_page; + pud_val &= PTE_PFN_PROT_MASK; + pt_phys = (pud_val >> _PAGE_PFN_SHIFT) << PAGESHIFT(); + + /* PMD */ + FILL_PMD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE()); + pmd_val = ULONG(machdep->pmd + PAGEOFFSET(sizeof(pmd_t) * + pmd_index_l4_4k(vaddr))); + if (verbose) + fprintf(fp, " PMD: %016lx => %016lx\n", (ulong)pmd_ptr, pmd_val); + if (!pmd_val) + goto no_page; + pmd_val &= PTE_PFN_PROT_MASK; + pt_phys = (pmd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT(); + + /* PTE */ + FILL_PTBL(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE()); + pte_val = ULONG(machdep->ptbl + PAGEOFFSET(sizeof(pte_t) * + pte_index_l4_4k(vaddr))); + if (verbose) + fprintf(fp, " PTE: %lx => %lx\n", (ulong)pte_ptr, pte_val); + if (!pte_val) + goto no_page; + pte_val &= PTE_PFN_PROT_MASK; + pte_pfn = pte_val >> _PAGE_PFN_SHIFT; + + if (!(pte_val & _PAGE_PRESENT)) { + if (verbose) { + fprintf(fp, "\n"); + riscv64_translate_pte((ulong)pte_val, 0, 0); + } + printf("!_PAGE_PRESENT\n"); + return FALSE; + } + + *paddr = PTOB(pte_pfn) + PAGEOFFSET(vaddr); + + if (verbose) { + fprintf(fp, " PAGE: %016lx\n\n", PAGEBASE(*paddr)); + riscv64_translate_pte(pte_val, 0, 0); + } + + return TRUE; +no_page: + fprintf(fp, "invalid\n"); + return FALSE; +} + +static int +riscv64_init_active_task_regs(void) +{ + int retval; + + retval = riscv64_get_crash_notes(); + if (retval == TRUE) + return retval; + + return riscv64_get_elf_notes(); +} + +/* + * Retrieve task registers for the time of the crash. + */ +static int +riscv64_get_crash_notes(void) +{ + struct machine_specific *ms = machdep->machspec; + ulong crash_notes; + Elf64_Nhdr *note; + ulong offset; + char *buf, *p; + ulong *notes_ptrs; + ulong i; + + /* + * crash_notes contains per cpu memory for storing cpu states + * in case of system crash. + */ + if (!symbol_exists("crash_notes")) + return FALSE; + + crash_notes = symbol_value("crash_notes"); + + notes_ptrs = (ulong *)GETBUF(kt->cpus*sizeof(notes_ptrs[0])); + + /* + * Read crash_notes for the first CPU. crash_notes are in standard ELF + * note format. + */ + if (!readmem(crash_notes, KVADDR, ¬es_ptrs[kt->cpus-1], + sizeof(notes_ptrs[kt->cpus-1]), "crash_notes", + RETURN_ON_ERROR)) { + error(WARNING, "cannot read crash_notes\n"); + FREEBUF(notes_ptrs); + return FALSE; + } + + if (symbol_exists("__per_cpu_offset")) { + + /* + * Add __per_cpu_offset for each cpu to form the pointer to the notes + */ + for (i = 0; i < kt->cpus; i++) + notes_ptrs[i] = notes_ptrs[kt->cpus-1] + kt->__per_cpu_offset[i]; + } + + buf = GETBUF(SIZE(note_buf)); + + if (!(panic_task_regs = calloc((size_t)kt->cpus, sizeof(*panic_task_regs)))) + error(FATAL, "cannot calloc panic_task_regs space\n"); + + for (i = 0; i < kt->cpus; i++) { + + if (!readmem(notes_ptrs[i], KVADDR, buf, SIZE(note_buf), "note_buf_t", + RETURN_ON_ERROR)) { + error(WARNING, + "cannot find NT_PRSTATUS note for cpu: %d\n", i); + goto fail; + } + + /* + * Do some sanity checks for this note before reading registers from it. + */ + note = (Elf64_Nhdr *)buf; + p = buf + sizeof(Elf64_Nhdr); + + /* + * dumpfiles created with qemu won't have crash_notes, but there will + * be elf notes; dumpfiles created by kdump do not create notes for + * offline cpus. + */ + if (note->n_namesz == 0 && (DISKDUMP_DUMPFILE() || KDUMP_DUMPFILE())) { + if (DISKDUMP_DUMPFILE()) + note = diskdump_get_prstatus_percpu(i); + else if (KDUMP_DUMPFILE()) + note = netdump_get_prstatus_percpu(i); + if (note) { + /* + * SIZE(note_buf) accounts for a "final note", which is a + * trailing empty elf note header. + */ + long notesz = SIZE(note_buf) - sizeof(Elf64_Nhdr); + + if (sizeof(Elf64_Nhdr) + roundup(note->n_namesz, 4) + + note->n_descsz == notesz) + BCOPY((char *)note, buf, notesz); + } else { + error(WARNING, + "cannot find NT_PRSTATUS note for cpu: %d\n", i); + continue; + } + } + + /* + * Check the sanity of NT_PRSTATUS note only for each online cpu. + */ + if (note->n_type != NT_PRSTATUS) { + error(WARNING, "invalid NT_PRSTATUS note (n_type != NT_PRSTATUS)\n"); + goto fail; + } + if (!STRNEQ(p, "CORE")) { + error(WARNING, "invalid NT_PRSTATUS note (name != \"CORE\"\n"); + goto fail; + } + + /* + * Find correct location of note data. This contains elf_prstatus + * structure which has registers etc. for the crashed task. + */ + offset = sizeof(Elf64_Nhdr); + offset = roundup(offset + note->n_namesz, 4); + p = buf + offset; /* start of elf_prstatus */ + + BCOPY(p + OFFSET(elf_prstatus_pr_reg), &panic_task_regs[i], + sizeof(panic_task_regs[i])); + } + + /* + * And finally we have the registers for the crashed task. This is + * used later on when dumping backtrace. + */ + ms->crash_task_regs = panic_task_regs; + + FREEBUF(buf); + FREEBUF(notes_ptrs); + return TRUE; + +fail: + FREEBUF(buf); + FREEBUF(notes_ptrs); + free(panic_task_regs); + return FALSE; +} + +static int +riscv64_get_elf_notes(void) +{ + struct machine_specific *ms = machdep->machspec; + int i; + + if (!DISKDUMP_DUMPFILE() && !KDUMP_DUMPFILE()) + return false; + + panic_task_regs = calloc(kt->cpus, sizeof(*panic_task_regs)); + if (!panic_task_regs) + error(FATAL, "cannot calloc panic_task_regs space\n"); + + for (i = 0; i < kt->cpus; i++) { + Elf64_Nhdr *note = NULL; + size_t len; + + if (DISKDUMP_DUMPFILE()) + note = diskdump_get_prstatus_percpu(i); + else if (KDUMP_DUMPFILE()) + note = netdump_get_prstatus_percpu(i); + + if (!note) { + error(WARNING, + "cannot find NT_PRSTATUS note for cpu: %d\n", i); + continue; + } + + len = sizeof(Elf64_Nhdr); + len = roundup(len + note->n_namesz, 4); + + BCOPY((char *)note + len + OFFSET(elf_prstatus_pr_reg), + &panic_task_regs[i], sizeof(panic_task_regs[i])); + } + + ms->crash_task_regs = panic_task_regs; + + return TRUE; +} + +/* + * Translates a user virtual address to its physical address. + */ +static int +riscv64_uvtop(struct task_context *tc, ulong uvaddr, physaddr_t *paddr, int verbose) +{ + ulong mm, active_mm; + ulong *pgd; + + if (!tc) + error(FATAL, "current context invalid\n"); + + *paddr = 0; + + if (is_kernel_thread(tc->task) && IS_KVADDR(uvaddr)) { + readmem(tc->task + OFFSET(task_struct_active_mm), + KVADDR, &active_mm, sizeof(void *), + "task active_mm contents", FAULT_ON_ERROR); + + if (!active_mm) + error(FATAL, + "no active_mm for this kernel thread\n"); + + readmem(active_mm + OFFSET(mm_struct_pgd), + KVADDR, &pgd, sizeof(long), + "mm_struct pgd", FAULT_ON_ERROR); + } else { + if ((mm = task_mm(tc->task, TRUE))) + pgd = ULONG_PTR(tt->mm_struct + OFFSET(mm_struct_pgd)); + else + readmem(tc->mm_struct + OFFSET(mm_struct_pgd), + KVADDR, &pgd, sizeof(long), "mm_struct pgd", + FAULT_ON_ERROR); + } + + switch (machdep->flags & (VM_L3_4K | VM_L4_4K | VM_L5_4K)) + { + case VM_L3_4K: + return riscv64_vtop_3level_4k(pgd, uvaddr, paddr, verbose); + case VM_L4_4K: + return riscv64_vtop_4level_4k(pgd, uvaddr, paddr, verbose); + case VM_L5_4K: + return riscv64_vtop_5level_4k(pgd, uvaddr, paddr, verbose); + default: + return FALSE; + } +} + +static int +riscv64_kvtop(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbose) +{ + ulong kernel_pgd; + + if (!IS_KVADDR(kvaddr)) + return FALSE; + + if (!vt->vmalloc_start) { + *paddr = VTOP(kvaddr); + return TRUE; + } + + if (!IS_VMALLOC_ADDR(kvaddr)) { + *paddr = VTOP(kvaddr); + if (!verbose) + return TRUE; + } + + kernel_pgd = vt->kernel_pgd[0]; + *paddr = 0; + + switch (machdep->flags & (VM_L3_4K | VM_L4_4K | VM_L5_4K)) + { + case VM_L3_4K: + return riscv64_vtop_3level_4k((ulong *)kernel_pgd, kvaddr, paddr, verbose); + case VM_L4_4K: + return riscv64_vtop_4level_4k((ulong *)kernel_pgd, kvaddr, paddr, verbose); + case VM_L5_4K: + return riscv64_vtop_5level_4k((ulong *)kernel_pgd, kvaddr, paddr, verbose); + default: + return FALSE; + } +} + void riscv64_init(int when) { + switch (when) { + case SETUP_ENV: + machdep->process_elf_notes = process_elf64_notes; + break; + + case PRE_SYMTAB: + machdep->verify_symbol = riscv64_verify_symbol; + machdep->machspec = &riscv64_machine_specific; + if (pc->flags & KERNEL_DEBUG_QUERY) + return; + + machdep->verify_paddr = generic_verify_paddr; + machdep->ptrs_per_pgd = PTRS_PER_PGD; + break; + + case PRE_GDB: + machdep->pagesize = riscv64_get_page_size(); + machdep->pageshift = ffs(machdep->pagesize) - 1; + machdep->pageoffset = machdep->pagesize - 1; + machdep->pagemask = ~((ulonglong)machdep->pageoffset); + machdep->stacksize = machdep->pagesize << THREAD_SIZE_ORDER; + + riscv64_get_phys_ram_base(machdep->machspec); + riscv64_get_struct_page_size(machdep->machspec); + riscv64_get_va_range(machdep->machspec); + + pt_level_alloc(&machdep->pgd, "cannot malloc pgd space."); + pt_level_alloc(&machdep->machspec->p4d, "cannot malloc p4d space."); + pt_level_alloc(&machdep->pud, "cannot malloc pud space."); + pt_level_alloc(&machdep->pmd, "cannot malloc pmd space."); + pt_level_alloc(&machdep->ptbl, "cannot malloc ptbl space."); + + machdep->last_pgd_read = 0; + machdep->machspec->last_p4d_read = 0; + machdep->last_pud_read = 0; + machdep->last_pmd_read = 0; + machdep->last_ptbl_read = 0; + + machdep->kvbase = machdep->machspec->page_offset; + machdep->identity_map_base = machdep->kvbase; + machdep->is_kvaddr = riscv64_is_kvaddr; + machdep->is_uvaddr = riscv64_is_uvaddr; + machdep->uvtop = riscv64_uvtop; + machdep->kvtop = riscv64_kvtop; + machdep->cmd_mach = riscv64_cmd_mach; + + machdep->vmalloc_start = riscv64_vmalloc_start; + machdep->processor_speed = riscv64_processor_speed; + machdep->get_stackbase = generic_get_stackbase; + machdep->get_stacktop = generic_get_stacktop; + machdep->translate_pte = riscv64_translate_pte; + machdep->memory_size = generic_memory_size; + machdep->is_task_addr = riscv64_is_task_addr; + machdep->get_smp_cpus = riscv64_get_smp_cpus; + machdep->value_to_symbol = generic_machdep_value_to_symbol; + machdep->show_interrupts = generic_show_interrupts; + machdep->get_irq_affinity = generic_get_irq_affinity; + machdep->init_kernel_pgd = NULL; /* pgd set by symbol_value("swapper_pg_dir") */ + break; + + case POST_GDB: + machdep->section_size_bits = _SECTION_SIZE_BITS; + machdep->max_physmem_bits = _MAX_PHYSMEM_BITS; + riscv64_page_type_init(); + + if (!machdep->hz) + machdep->hz = 250; + + if (symbol_exists("irq_desc")) + ARRAY_LENGTH_INIT(machdep->nr_irqs, irq_desc, + "irq_desc", NULL, 0); + else if (kernel_symbol_exists("nr_irqs")) + get_symbol_data("nr_irqs", sizeof(unsigned int), + &machdep->nr_irqs); + + MEMBER_OFFSET_INIT(elf_prstatus_pr_reg, "elf_prstatus", + "pr_reg"); + + STRUCT_SIZE_INIT(note_buf, "note_buf_t"); + break; + + case POST_VM: + /* + * crash_notes contains machine specific information about the + * crash. In particular, it contains CPU registers at the time + * of the crash. We need this information to extract correct + * backtraces from the panic task. + */ + if (!ACTIVE() && !riscv64_init_active_task_regs()) + error(WARNING, + "cannot retrieve registers for active task%s\n\n", + kt->cpus > 1 ? "s" : ""); + break; + } } +/* + * 'help -r' command output + */ void riscv64_display_regs_from_elf_notes(int cpu, FILE *ofp) {
1. Add riscv64_init() implementation, do all necessary machine-specific setup, which will be called multiple times during initialization. 2. Add riscv64 sv39/48/57 pagetable macro definitions, the function of converting virtual address to a physical address via 4k page table. 3. Add the implementation of the vtop command, which is used to convert a virtual address to a physical address(call the functions defined in 2). 4. Add the implementation to get virtual memory layout, va_bits, phys_ram_base from vmcoreinfo. As these configurations changes from time to time, we send a Linux kernel patch to export these configurations, which can simplify the development of crash tool. The Linux patch(patch 3 of the series of patches): https://lore.kernel.org/linux-riscv/20220717101323.370245-1-xianting.tian@linux.alibaba.com/ 5. Add riscv64_get_smp_cpus() implementation, get the number of online cpus. 6. Add riscv64_get_page_size() implementation, get page size. And so on. With this patch, we can enter crash command line, and run "vtop", "mod", "rd", "*", "p", "kmem" ... Tested on QEMU RISCV64 end and SoC platform of T-head Xuantie 910 CPU. KERNEL: vmlinux DUMPFILE: vmcore CPUS: 1 DATE: Fri Jul 15 10:24:25 CST 2022 UPTIME: 00:00:33 LOAD AVERAGE: 0.05, 0.01, 0.00 TASKS: 41 NODENAME: buildroot RELEASE: 5.18.9 VERSION: #30 SMP Fri Jul 15 09:47:03 CST 2022 MACHINE: riscv64 (unknown Mhz) MEMORY: 1 GB PANIC: "Kernel panic - not syncing: sysrq triggered crash" PID: 113 COMMAND: "sh" TASK: ff60000002269600 [THREAD_INFO: ff60000002269600] CPU: 0 STATE: TASK_RUNNING (PANIC) crash> p mem_map mem_map = $1 = (struct page *) 0xff6000003effbf00 crash> p /x *(struct page *) 0xff6000003effbf00 $5 = { flags = 0x1000, { { { lru = { next = 0xff6000003effbf08, prev = 0xff6000003effbf08 }, { __filler = 0xff6000003effbf08, mlock_count = 0x3effbf08 } }, mapping = 0x0, index = 0x0, private = 0x0 }, crash> mod MODULE NAME BASE SIZE OBJECT FILE ffffffff0113e740 nvme_core ffffffff01133000 98304 (not loaded) [CONFIG_KALLSYMS] ffffffff011542c0 nvme ffffffff0114c000 61440 (not loaded) [CONFIG_KALLSYMS] crash> rd ffffffff0113e740 8 ffffffff0113e740: 0000000000000000 ffffffff810874f8 .........t...... ffffffff0113e750: ffffffff011542c8 726f635f656d766e .B......nvme_cor ffffffff0113e760: 0000000000000065 0000000000000000 e............... ffffffff0113e770: 0000000000000000 0000000000000000 ................ crash> vtop ffffffff0113e740 VIRTUAL PHYSICAL ffffffff0113e740 8254d740 PGD: ffffffff810e9ff8 => 2ffff001 P4D: 0000000000000000 => 000000002fffec01 PUD: 00005605c2957470 => 0000000020949801 PMD: 00007fff7f1750c0 => 0000000020947401 PTE: 0 => 209534e7 PAGE: 000000008254d000 PTE PHYSICAL FLAGS 209534e7 8254d000 (PRESENT|READ|WRITE|GLOBAL|ACCESSED|DIRTY) PAGE PHYSICAL MAPPING INDEX CNT FLAGS ff6000003f0777d8 8254d000 0 0 1 0 Signed-off-by: Xianting Tian <xianting.tian@linux.alibaba.com> --- defs.h | 93 +++++ diskdump.c | 10 + riscv64.c | 983 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1086 insertions(+)