Message ID | 20230202135810.1657792-2-dbarboza@ventanamicro.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | hw/riscv: handle kernel_entry high bits with 32bit CPUs | expand |
On Thu, Feb 2, 2023 at 11:58 PM Daniel Henrique Barboza <dbarboza@ventanamicro.com> wrote: > > load_elf_ram_sym() will sign-extend 32 bit addresses. If a 32 bit QEMU > guest happens to be running in a hypervisor that are using 64 bits to > encode its address, kernel_entry can be padded with '1's and create > problems [1]. > > Using a translate_fn() callback in load_elf_ram_sym() to filter the > padding from the address doesn't work. A more detailed explanation can > be found in [2]. The short version is that glue(load_elf, SZ), from > include/hw/elf_ops.h, will calculate 'pentry' (mapped into the > 'kernel_load_base' var in riscv_load_Kernel()) before using > translate_fn(), and will not recalculate it after executing it. This > means that the callback does not prevent the padding from > kernel_load_base to appear. > > Let's instead use a kernel_low var to capture the 'lowaddr' value from > load_elf_ram_sim(), and return it when we're dealing with 32 bit CPUs. > > [1] https://lists.gnu.org/archive/html/qemu-devel/2023-01/msg02281.html > [2] https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg00099.html > > Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Alistair > --- > hw/riscv/boot.c | 15 +++++++++++---- > hw/riscv/microchip_pfsoc.c | 3 ++- > hw/riscv/opentitan.c | 3 ++- > hw/riscv/sifive_e.c | 3 ++- > hw/riscv/sifive_u.c | 3 ++- > hw/riscv/spike.c | 3 ++- > hw/riscv/virt.c | 3 ++- > include/hw/riscv/boot.h | 1 + > 8 files changed, 24 insertions(+), 10 deletions(-) > > diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c > index c7e0e50bd8..5ec6d32165 100644 > --- a/hw/riscv/boot.c > +++ b/hw/riscv/boot.c > @@ -174,11 +174,12 @@ target_ulong riscv_load_firmware(const char *firmware_filename, > } > > target_ulong riscv_load_kernel(MachineState *machine, > + RISCVHartArrayState *harts, > target_ulong kernel_start_addr, > symbol_fn_t sym_cb) > { > const char *kernel_filename = machine->kernel_filename; > - uint64_t kernel_load_base, kernel_entry; > + uint64_t kernel_load_base, kernel_entry, kernel_low; > > g_assert(kernel_filename != NULL); > > @@ -189,10 +190,16 @@ target_ulong riscv_load_kernel(MachineState *machine, > * the (expected) load address load address. This allows kernels to have > * separate SBI and ELF entry points (used by FreeBSD, for example). > */ > - if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL, > - NULL, &kernel_load_base, NULL, NULL, 0, > + if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL, NULL, > + &kernel_load_base, &kernel_low, NULL, 0, > EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) { > - return kernel_load_base; > + kernel_entry = kernel_load_base; > + > + if (riscv_is_32bit(harts)) { > + kernel_entry = kernel_low; > + } > + > + return kernel_entry; > } > > if (load_uimage_as(kernel_filename, &kernel_entry, NULL, NULL, > diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c > index 2b91e49561..712625d2a4 100644 > --- a/hw/riscv/microchip_pfsoc.c > +++ b/hw/riscv/microchip_pfsoc.c > @@ -629,7 +629,8 @@ static void microchip_icicle_kit_machine_init(MachineState *machine) > kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus, > firmware_end_addr); > > - kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL); > + kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus, > + kernel_start_addr, NULL); > > if (machine->initrd_filename) { > riscv_load_initrd(machine, kernel_entry); > diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c > index 353f030d80..7fe4fb5628 100644 > --- a/hw/riscv/opentitan.c > +++ b/hw/riscv/opentitan.c > @@ -101,7 +101,8 @@ static void opentitan_board_init(MachineState *machine) > } > > if (machine->kernel_filename) { > - riscv_load_kernel(machine, memmap[IBEX_DEV_RAM].base, NULL); > + riscv_load_kernel(machine, &s->soc.cpus, > + memmap[IBEX_DEV_RAM].base, NULL); > } > } > > diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c > index 3e3f4b0088..1a7d381514 100644 > --- a/hw/riscv/sifive_e.c > +++ b/hw/riscv/sifive_e.c > @@ -114,7 +114,8 @@ static void sifive_e_machine_init(MachineState *machine) > memmap[SIFIVE_E_DEV_MROM].base, &address_space_memory); > > if (machine->kernel_filename) { > - riscv_load_kernel(machine, memmap[SIFIVE_E_DEV_DTIM].base, NULL); > + riscv_load_kernel(machine, &s->soc.cpus, > + memmap[SIFIVE_E_DEV_DTIM].base, NULL); > } > } > > diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c > index d3ab7a9cda..71be442a50 100644 > --- a/hw/riscv/sifive_u.c > +++ b/hw/riscv/sifive_u.c > @@ -598,7 +598,8 @@ static void sifive_u_machine_init(MachineState *machine) > kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus, > firmware_end_addr); > > - kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL); > + kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus, > + kernel_start_addr, NULL); > > if (machine->initrd_filename) { > riscv_load_initrd(machine, kernel_entry); > diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c > index cc3f6dac17..1fa91167ab 100644 > --- a/hw/riscv/spike.c > +++ b/hw/riscv/spike.c > @@ -305,7 +305,8 @@ static void spike_board_init(MachineState *machine) > kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0], > firmware_end_addr); > > - kernel_entry = riscv_load_kernel(machine, kernel_start_addr, > + kernel_entry = riscv_load_kernel(machine, &s->soc[0], > + kernel_start_addr, > htif_symbol_callback); > > if (machine->initrd_filename) { > diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c > index a061151a6f..d0531cc641 100644 > --- a/hw/riscv/virt.c > +++ b/hw/riscv/virt.c > @@ -1277,7 +1277,8 @@ static void virt_machine_done(Notifier *notifier, void *data) > kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0], > firmware_end_addr); > > - kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL); > + kernel_entry = riscv_load_kernel(machine, &s->soc[0], > + kernel_start_addr, NULL); > > if (machine->initrd_filename) { > riscv_load_initrd(machine, kernel_entry); > diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h > index 511390f60e..6295316afb 100644 > --- a/include/hw/riscv/boot.h > +++ b/include/hw/riscv/boot.h > @@ -44,6 +44,7 @@ target_ulong riscv_load_firmware(const char *firmware_filename, > hwaddr firmware_load_addr, > symbol_fn_t sym_cb); > target_ulong riscv_load_kernel(MachineState *machine, > + RISCVHartArrayState *harts, > target_ulong firmware_end_addr, > symbol_fn_t sym_cb); > void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry); > -- > 2.39.1 > >
On Thu, Feb 2, 2023 at 9:58 PM Daniel Henrique Barboza <dbarboza@ventanamicro.com> wrote: > > load_elf_ram_sym() will sign-extend 32 bit addresses. If a 32 bit QEMU > guest happens to be running in a hypervisor that are using 64 bits to > encode its address, kernel_entry can be padded with '1's and create > problems [1]. Still this commit message is inaccurate. It's not "a 32-bit QEMU guest happens to running in a hypervisor that are using 64 bits to encode tis address" as a 32-bit ELF can only hold a 32-bit address, but it's the QEMU ELF loader that does the sign extension and returns the address as uint64_t. It has nothing to do with hypervisor too. > > Using a translate_fn() callback in load_elf_ram_sym() to filter the > padding from the address doesn't work. A more detailed explanation can > be found in [2]. The short version is that glue(load_elf, SZ), from > include/hw/elf_ops.h, will calculate 'pentry' (mapped into the > 'kernel_load_base' var in riscv_load_Kernel()) before using > translate_fn(), and will not recalculate it after executing it. This > means that the callback does not prevent the padding from > kernel_load_base to appear. > > Let's instead use a kernel_low var to capture the 'lowaddr' value from > load_elf_ram_sim(), and return it when we're dealing with 32 bit CPUs. Looking at the prototype of load_elf_ram_sym() it has ssize_t load_elf_ram_sym(const char *filename, uint64_t (*elf_note_fn)(void *, void *, bool), uint64_t (*translate_fn)(void *, uint64_t), void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, uint64_t *highaddr, uint32_t *pflags, int big_endian, int elf_machine, int clear_lsb, int data_swab, AddressSpace *as, bool load_rom, symbol_fn_t sym_cb) So kernel_low is the "highaddr" parameter, not the 'lowaddr' value. > > [1] https://lists.gnu.org/archive/html/qemu-devel/2023-01/msg02281.html > [2] https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg00099.html > > Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> > --- > hw/riscv/boot.c | 15 +++++++++++---- > hw/riscv/microchip_pfsoc.c | 3 ++- > hw/riscv/opentitan.c | 3 ++- > hw/riscv/sifive_e.c | 3 ++- > hw/riscv/sifive_u.c | 3 ++- > hw/riscv/spike.c | 3 ++- > hw/riscv/virt.c | 3 ++- > include/hw/riscv/boot.h | 1 + > 8 files changed, 24 insertions(+), 10 deletions(-) > > diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c > index c7e0e50bd8..5ec6d32165 100644 > --- a/hw/riscv/boot.c > +++ b/hw/riscv/boot.c > @@ -174,11 +174,12 @@ target_ulong riscv_load_firmware(const char *firmware_filename, > } > > target_ulong riscv_load_kernel(MachineState *machine, > + RISCVHartArrayState *harts, > target_ulong kernel_start_addr, > symbol_fn_t sym_cb) > { > const char *kernel_filename = machine->kernel_filename; > - uint64_t kernel_load_base, kernel_entry; > + uint64_t kernel_load_base, kernel_entry, kernel_low; > > g_assert(kernel_filename != NULL); > > @@ -189,10 +190,16 @@ target_ulong riscv_load_kernel(MachineState *machine, > * the (expected) load address load address. This allows kernels to have > * separate SBI and ELF entry points (used by FreeBSD, for example). > */ > - if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL, > - NULL, &kernel_load_base, NULL, NULL, 0, > + if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL, NULL, > + &kernel_load_base, &kernel_low, NULL, 0, > EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) { > - return kernel_load_base; > + kernel_entry = kernel_load_base; > + > + if (riscv_is_32bit(harts)) { > + kernel_entry = kernel_low; > + } > + > + return kernel_entry; > } > > if (load_uimage_as(kernel_filename, &kernel_entry, NULL, NULL, > diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c > index 2b91e49561..712625d2a4 100644 > --- a/hw/riscv/microchip_pfsoc.c > +++ b/hw/riscv/microchip_pfsoc.c > @@ -629,7 +629,8 @@ static void microchip_icicle_kit_machine_init(MachineState *machine) > kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus, > firmware_end_addr); > > - kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL); > + kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus, > + kernel_start_addr, NULL); > > if (machine->initrd_filename) { > riscv_load_initrd(machine, kernel_entry); > diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c > index 353f030d80..7fe4fb5628 100644 > --- a/hw/riscv/opentitan.c > +++ b/hw/riscv/opentitan.c > @@ -101,7 +101,8 @@ static void opentitan_board_init(MachineState *machine) > } > > if (machine->kernel_filename) { > - riscv_load_kernel(machine, memmap[IBEX_DEV_RAM].base, NULL); > + riscv_load_kernel(machine, &s->soc.cpus, > + memmap[IBEX_DEV_RAM].base, NULL); > } > } > > diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c > index 3e3f4b0088..1a7d381514 100644 > --- a/hw/riscv/sifive_e.c > +++ b/hw/riscv/sifive_e.c > @@ -114,7 +114,8 @@ static void sifive_e_machine_init(MachineState *machine) > memmap[SIFIVE_E_DEV_MROM].base, &address_space_memory); > > if (machine->kernel_filename) { > - riscv_load_kernel(machine, memmap[SIFIVE_E_DEV_DTIM].base, NULL); > + riscv_load_kernel(machine, &s->soc.cpus, > + memmap[SIFIVE_E_DEV_DTIM].base, NULL); > } > } > > diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c > index d3ab7a9cda..71be442a50 100644 > --- a/hw/riscv/sifive_u.c > +++ b/hw/riscv/sifive_u.c > @@ -598,7 +598,8 @@ static void sifive_u_machine_init(MachineState *machine) > kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus, > firmware_end_addr); > > - kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL); > + kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus, > + kernel_start_addr, NULL); > > if (machine->initrd_filename) { > riscv_load_initrd(machine, kernel_entry); > diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c > index cc3f6dac17..1fa91167ab 100644 > --- a/hw/riscv/spike.c > +++ b/hw/riscv/spike.c > @@ -305,7 +305,8 @@ static void spike_board_init(MachineState *machine) > kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0], > firmware_end_addr); > > - kernel_entry = riscv_load_kernel(machine, kernel_start_addr, > + kernel_entry = riscv_load_kernel(machine, &s->soc[0], > + kernel_start_addr, > htif_symbol_callback); > > if (machine->initrd_filename) { > diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c > index a061151a6f..d0531cc641 100644 > --- a/hw/riscv/virt.c > +++ b/hw/riscv/virt.c > @@ -1277,7 +1277,8 @@ static void virt_machine_done(Notifier *notifier, void *data) > kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0], > firmware_end_addr); > > - kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL); > + kernel_entry = riscv_load_kernel(machine, &s->soc[0], > + kernel_start_addr, NULL); > > if (machine->initrd_filename) { > riscv_load_initrd(machine, kernel_entry); > diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h > index 511390f60e..6295316afb 100644 > --- a/include/hw/riscv/boot.h > +++ b/include/hw/riscv/boot.h > @@ -44,6 +44,7 @@ target_ulong riscv_load_firmware(const char *firmware_filename, > hwaddr firmware_load_addr, > symbol_fn_t sym_cb); > target_ulong riscv_load_kernel(MachineState *machine, > + RISCVHartArrayState *harts, > target_ulong firmware_end_addr, > symbol_fn_t sym_cb); > void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry); > -- Regards, Bin
On 2/3/23 02:39, Bin Meng wrote: > On Thu, Feb 2, 2023 at 9:58 PM Daniel Henrique Barboza > <dbarboza@ventanamicro.com> wrote: >> >> load_elf_ram_sym() will sign-extend 32 bit addresses. If a 32 bit QEMU >> guest happens to be running in a hypervisor that are using 64 bits to >> encode its address, kernel_entry can be padded with '1's and create >> problems [1]. > > Still this commit message is inaccurate. It's not > > "a 32-bit QEMU guest happens to running in a hypervisor that are using > 64 bits to encode tis address" > > as a 32-bit ELF can only hold a 32-bit address, but it's the QEMU ELF > loader that does the sign extension and returns the address as > uint64_t. It has nothing to do with hypervisor too. Yeah I'm still focusing too much on the use case instead of the root of the problem (sign-extension from QEMU elf). > >> >> Using a translate_fn() callback in load_elf_ram_sym() to filter the >> padding from the address doesn't work. A more detailed explanation can >> be found in [2]. The short version is that glue(load_elf, SZ), from >> include/hw/elf_ops.h, will calculate 'pentry' (mapped into the >> 'kernel_load_base' var in riscv_load_Kernel()) before using >> translate_fn(), and will not recalculate it after executing it. This >> means that the callback does not prevent the padding from >> kernel_load_base to appear. >> >> Let's instead use a kernel_low var to capture the 'lowaddr' value from >> load_elf_ram_sim(), and return it when we're dealing with 32 bit CPUs. > > Looking at the prototype of load_elf_ram_sym() it has > > ssize_t load_elf_ram_sym(const char *filename, > uint64_t (*elf_note_fn)(void *, void *, bool), > uint64_t (*translate_fn)(void *, uint64_t), > void *translate_opaque, uint64_t *pentry, > uint64_t *lowaddr, uint64_t *highaddr, > uint32_t *pflags, int big_endian, int elf_machine, > int clear_lsb, int data_swab, > AddressSpace *as, bool load_rom, symbol_fn_t sym_cb) > > So kernel_low is the "highaddr" parameter, not the 'lowaddr' value. And for some reason I thought kernel_base_addr was being used as 'pentry'. kernel_base_addr is already 'lowaddr'. Guess I'll have to rewrite the commit message. And revisit why my test case worked for riscv32 (I probably didn't use an ELF image ...) And the only way out seems to be filtering the bits from lowaddr. I'll do that. Daniel > >> >> [1] https://lists.gnu.org/archive/html/qemu-devel/2023-01/msg02281.html >> [2] https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg00099.html >> >> Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> >> --- >> hw/riscv/boot.c | 15 +++++++++++---- >> hw/riscv/microchip_pfsoc.c | 3 ++- >> hw/riscv/opentitan.c | 3 ++- >> hw/riscv/sifive_e.c | 3 ++- >> hw/riscv/sifive_u.c | 3 ++- >> hw/riscv/spike.c | 3 ++- >> hw/riscv/virt.c | 3 ++- >> include/hw/riscv/boot.h | 1 + >> 8 files changed, 24 insertions(+), 10 deletions(-) >> >> diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c >> index c7e0e50bd8..5ec6d32165 100644 >> --- a/hw/riscv/boot.c >> +++ b/hw/riscv/boot.c >> @@ -174,11 +174,12 @@ target_ulong riscv_load_firmware(const char *firmware_filename, >> } >> >> target_ulong riscv_load_kernel(MachineState *machine, >> + RISCVHartArrayState *harts, >> target_ulong kernel_start_addr, >> symbol_fn_t sym_cb) >> { >> const char *kernel_filename = machine->kernel_filename; >> - uint64_t kernel_load_base, kernel_entry; >> + uint64_t kernel_load_base, kernel_entry, kernel_low; >> >> g_assert(kernel_filename != NULL); >> >> @@ -189,10 +190,16 @@ target_ulong riscv_load_kernel(MachineState *machine, >> * the (expected) load address load address. This allows kernels to have >> * separate SBI and ELF entry points (used by FreeBSD, for example). >> */ >> - if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL, >> - NULL, &kernel_load_base, NULL, NULL, 0, >> + if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL, NULL, >> + &kernel_load_base, &kernel_low, NULL, 0, >> EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) { >> - return kernel_load_base; >> + kernel_entry = kernel_load_base; >> + >> + if (riscv_is_32bit(harts)) { >> + kernel_entry = kernel_low; >> + } >> + >> + return kernel_entry; >> } >> >> if (load_uimage_as(kernel_filename, &kernel_entry, NULL, NULL, >> diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c >> index 2b91e49561..712625d2a4 100644 >> --- a/hw/riscv/microchip_pfsoc.c >> +++ b/hw/riscv/microchip_pfsoc.c >> @@ -629,7 +629,8 @@ static void microchip_icicle_kit_machine_init(MachineState *machine) >> kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus, >> firmware_end_addr); >> >> - kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL); >> + kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus, >> + kernel_start_addr, NULL); >> >> if (machine->initrd_filename) { >> riscv_load_initrd(machine, kernel_entry); >> diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c >> index 353f030d80..7fe4fb5628 100644 >> --- a/hw/riscv/opentitan.c >> +++ b/hw/riscv/opentitan.c >> @@ -101,7 +101,8 @@ static void opentitan_board_init(MachineState *machine) >> } >> >> if (machine->kernel_filename) { >> - riscv_load_kernel(machine, memmap[IBEX_DEV_RAM].base, NULL); >> + riscv_load_kernel(machine, &s->soc.cpus, >> + memmap[IBEX_DEV_RAM].base, NULL); >> } >> } >> >> diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c >> index 3e3f4b0088..1a7d381514 100644 >> --- a/hw/riscv/sifive_e.c >> +++ b/hw/riscv/sifive_e.c >> @@ -114,7 +114,8 @@ static void sifive_e_machine_init(MachineState *machine) >> memmap[SIFIVE_E_DEV_MROM].base, &address_space_memory); >> >> if (machine->kernel_filename) { >> - riscv_load_kernel(machine, memmap[SIFIVE_E_DEV_DTIM].base, NULL); >> + riscv_load_kernel(machine, &s->soc.cpus, >> + memmap[SIFIVE_E_DEV_DTIM].base, NULL); >> } >> } >> >> diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c >> index d3ab7a9cda..71be442a50 100644 >> --- a/hw/riscv/sifive_u.c >> +++ b/hw/riscv/sifive_u.c >> @@ -598,7 +598,8 @@ static void sifive_u_machine_init(MachineState *machine) >> kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus, >> firmware_end_addr); >> >> - kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL); >> + kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus, >> + kernel_start_addr, NULL); >> >> if (machine->initrd_filename) { >> riscv_load_initrd(machine, kernel_entry); >> diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c >> index cc3f6dac17..1fa91167ab 100644 >> --- a/hw/riscv/spike.c >> +++ b/hw/riscv/spike.c >> @@ -305,7 +305,8 @@ static void spike_board_init(MachineState *machine) >> kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0], >> firmware_end_addr); >> >> - kernel_entry = riscv_load_kernel(machine, kernel_start_addr, >> + kernel_entry = riscv_load_kernel(machine, &s->soc[0], >> + kernel_start_addr, >> htif_symbol_callback); >> >> if (machine->initrd_filename) { >> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c >> index a061151a6f..d0531cc641 100644 >> --- a/hw/riscv/virt.c >> +++ b/hw/riscv/virt.c >> @@ -1277,7 +1277,8 @@ static void virt_machine_done(Notifier *notifier, void *data) >> kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0], >> firmware_end_addr); >> >> - kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL); >> + kernel_entry = riscv_load_kernel(machine, &s->soc[0], >> + kernel_start_addr, NULL); >> >> if (machine->initrd_filename) { >> riscv_load_initrd(machine, kernel_entry); >> diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h >> index 511390f60e..6295316afb 100644 >> --- a/include/hw/riscv/boot.h >> +++ b/include/hw/riscv/boot.h >> @@ -44,6 +44,7 @@ target_ulong riscv_load_firmware(const char *firmware_filename, >> hwaddr firmware_load_addr, >> symbol_fn_t sym_cb); >> target_ulong riscv_load_kernel(MachineState *machine, >> + RISCVHartArrayState *harts, >> target_ulong firmware_end_addr, >> symbol_fn_t sym_cb); >> void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry); >> -- > > Regards, > Bin
Hi Daniel, On Fri, Feb 3, 2023 at 6:31 PM Daniel Henrique Barboza <dbarboza@ventanamicro.com> wrote: > > > > On 2/3/23 02:39, Bin Meng wrote: > > On Thu, Feb 2, 2023 at 9:58 PM Daniel Henrique Barboza > > <dbarboza@ventanamicro.com> wrote: > >> > >> load_elf_ram_sym() will sign-extend 32 bit addresses. If a 32 bit QEMU > >> guest happens to be running in a hypervisor that are using 64 bits to > >> encode its address, kernel_entry can be padded with '1's and create > >> problems [1]. > > > > Still this commit message is inaccurate. It's not > > > > "a 32-bit QEMU guest happens to running in a hypervisor that are using > > 64 bits to encode tis address" > > > > as a 32-bit ELF can only hold a 32-bit address, but it's the QEMU ELF > > loader that does the sign extension and returns the address as > > uint64_t. It has nothing to do with hypervisor too. > > > Yeah I'm still focusing too much on the use case instead of the root of the > problem (sign-extension from QEMU elf). > > > > >> > >> Using a translate_fn() callback in load_elf_ram_sym() to filter the > >> padding from the address doesn't work. A more detailed explanation can > >> be found in [2]. The short version is that glue(load_elf, SZ), from > >> include/hw/elf_ops.h, will calculate 'pentry' (mapped into the > >> 'kernel_load_base' var in riscv_load_Kernel()) before using > >> translate_fn(), and will not recalculate it after executing it. This > >> means that the callback does not prevent the padding from > >> kernel_load_base to appear. > >> > >> Let's instead use a kernel_low var to capture the 'lowaddr' value from > >> load_elf_ram_sim(), and return it when we're dealing with 32 bit CPUs. > > > > Looking at the prototype of load_elf_ram_sym() it has > > > > ssize_t load_elf_ram_sym(const char *filename, > > uint64_t (*elf_note_fn)(void *, void *, bool), > > uint64_t (*translate_fn)(void *, uint64_t), > > void *translate_opaque, uint64_t *pentry, > > uint64_t *lowaddr, uint64_t *highaddr, > > uint32_t *pflags, int big_endian, int elf_machine, > > int clear_lsb, int data_swab, > > AddressSpace *as, bool load_rom, symbol_fn_t sym_cb) > > > > So kernel_low is the "highaddr" parameter, not the 'lowaddr' value. > > And for some reason I thought kernel_base_addr was being used as 'pentry'. kernel_base_addr > is already 'lowaddr'. Guess I'll have to rewrite the commit message. And revisit why my > test case worked for riscv32 (I probably didn't use an ELF image ...) > > And the only way out seems to be filtering the bits from lowaddr. I'll do that. > Can you check as to why QEMU ELF loader does the sign extension? I personally don't know why. Maybe the ELF loader does something wrong. Regards, Bin
Hey, On 2/3/23 07:45, Bin Meng wrote: > Hi Daniel, > > On Fri, Feb 3, 2023 at 6:31 PM Daniel Henrique Barboza > <dbarboza@ventanamicro.com> wrote: >> >> >> >> On 2/3/23 02:39, Bin Meng wrote: >>> On Thu, Feb 2, 2023 at 9:58 PM Daniel Henrique Barboza >>> <dbarboza@ventanamicro.com> wrote: >>>> >>>> load_elf_ram_sym() will sign-extend 32 bit addresses. If a 32 bit QEMU >>>> guest happens to be running in a hypervisor that are using 64 bits to >>>> encode its address, kernel_entry can be padded with '1's and create >>>> problems [1]. >>> >>> Still this commit message is inaccurate. It's not >>> >>> "a 32-bit QEMU guest happens to running in a hypervisor that are using >>> 64 bits to encode tis address" >>> >>> as a 32-bit ELF can only hold a 32-bit address, but it's the QEMU ELF >>> loader that does the sign extension and returns the address as >>> uint64_t. It has nothing to do with hypervisor too. >> >> >> Yeah I'm still focusing too much on the use case instead of the root of the >> problem (sign-extension from QEMU elf). >> >>> >>>> >>>> Using a translate_fn() callback in load_elf_ram_sym() to filter the >>>> padding from the address doesn't work. A more detailed explanation can >>>> be found in [2]. The short version is that glue(load_elf, SZ), from >>>> include/hw/elf_ops.h, will calculate 'pentry' (mapped into the >>>> 'kernel_load_base' var in riscv_load_Kernel()) before using >>>> translate_fn(), and will not recalculate it after executing it. This >>>> means that the callback does not prevent the padding from >>>> kernel_load_base to appear. >>>> >>>> Let's instead use a kernel_low var to capture the 'lowaddr' value from >>>> load_elf_ram_sim(), and return it when we're dealing with 32 bit CPUs. >>> >>> Looking at the prototype of load_elf_ram_sym() it has >>> >>> ssize_t load_elf_ram_sym(const char *filename, >>> uint64_t (*elf_note_fn)(void *, void *, bool), >>> uint64_t (*translate_fn)(void *, uint64_t), >>> void *translate_opaque, uint64_t *pentry, >>> uint64_t *lowaddr, uint64_t *highaddr, >>> uint32_t *pflags, int big_endian, int elf_machine, >>> int clear_lsb, int data_swab, >>> AddressSpace *as, bool load_rom, symbol_fn_t sym_cb) >>> >>> So kernel_low is the "highaddr" parameter, not the 'lowaddr' value. >> >> And for some reason I thought kernel_base_addr was being used as 'pentry'. kernel_base_addr >> is already 'lowaddr'. Guess I'll have to rewrite the commit message. And revisit why my >> test case worked for riscv32 (I probably didn't use an ELF image ...) >> >> And the only way out seems to be filtering the bits from lowaddr. I'll do that. >> > > Can you check as to why QEMU ELF loader does the sign extension? > > I personally don't know why. Maybe the ELF loader does something wrong. I took a look and didn't find out why. I checked that in the ELF specification some file headers can indicate a sign extension. E.g. R_X86_64_32S for example is described as "Direct 32 bit zero extended". Note that the use of the tags are dependent on how the ELF was built, so we would need the exact ELF to check for that. All I can say is that there is a sign extension going on, in the 'lowaddr' field, and that translate_fn() wasn't enough to filter it out. I can't say whether this is intended or a bug. But going back a little, this whole situation happened in v5 because, in the next patch, riscv_load_initrd() started to receive an uint64_t (kernel_entry) instead of receiving a target_ulong like it was doing before. This behavior change, which was accidental, not only revealed this sign-extension behavior but also potentially changed what riscv_load_initrd() is receiving from load_uimage_as() the same way it's impacting load_elf_ram_sym(). The third load option, load_image_targphys_as(), is already using a target_ulong (kernel_start_addr) as return value so it's not impacted. I believe Alistair suggested to clear bits instead of just doing a target_ulong cast for a reason (I guess we're trying to gate all 32/64 bit CPU logic using a direct approach such as checking the CPU directly), but I also think we should clear bits for all 'kernel_entry' possibilities, not just the one that comes from load_elf_ram_sym(), to be consistent in all three cases. We might be even avoiding a future headache from load_uimage_as(). Thoughts? Daniel [1] https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc > > Regards, > Bin
On Sat, Feb 4, 2023 at 7:01 AM Daniel Henrique Barboza <dbarboza@ventanamicro.com> wrote: > > Hey, > > On 2/3/23 07:45, Bin Meng wrote: > > Hi Daniel, > > > > On Fri, Feb 3, 2023 at 6:31 PM Daniel Henrique Barboza > > <dbarboza@ventanamicro.com> wrote: > >> > >> > >> > >> On 2/3/23 02:39, Bin Meng wrote: > >>> On Thu, Feb 2, 2023 at 9:58 PM Daniel Henrique Barboza > >>> <dbarboza@ventanamicro.com> wrote: > >>>> > >>>> load_elf_ram_sym() will sign-extend 32 bit addresses. If a 32 bit QEMU > >>>> guest happens to be running in a hypervisor that are using 64 bits to > >>>> encode its address, kernel_entry can be padded with '1's and create > >>>> problems [1]. > >>> > >>> Still this commit message is inaccurate. It's not > >>> > >>> "a 32-bit QEMU guest happens to running in a hypervisor that are using > >>> 64 bits to encode tis address" > >>> > >>> as a 32-bit ELF can only hold a 32-bit address, but it's the QEMU ELF > >>> loader that does the sign extension and returns the address as > >>> uint64_t. It has nothing to do with hypervisor too. > >> > >> > >> Yeah I'm still focusing too much on the use case instead of the root of the > >> problem (sign-extension from QEMU elf). > >> > >>> > >>>> > >>>> Using a translate_fn() callback in load_elf_ram_sym() to filter the > >>>> padding from the address doesn't work. A more detailed explanation can > >>>> be found in [2]. The short version is that glue(load_elf, SZ), from > >>>> include/hw/elf_ops.h, will calculate 'pentry' (mapped into the > >>>> 'kernel_load_base' var in riscv_load_Kernel()) before using > >>>> translate_fn(), and will not recalculate it after executing it. This > >>>> means that the callback does not prevent the padding from > >>>> kernel_load_base to appear. > >>>> > >>>> Let's instead use a kernel_low var to capture the 'lowaddr' value from > >>>> load_elf_ram_sim(), and return it when we're dealing with 32 bit CPUs. > >>> > >>> Looking at the prototype of load_elf_ram_sym() it has > >>> > >>> ssize_t load_elf_ram_sym(const char *filename, > >>> uint64_t (*elf_note_fn)(void *, void *, bool), > >>> uint64_t (*translate_fn)(void *, uint64_t), > >>> void *translate_opaque, uint64_t *pentry, > >>> uint64_t *lowaddr, uint64_t *highaddr, > >>> uint32_t *pflags, int big_endian, int elf_machine, > >>> int clear_lsb, int data_swab, > >>> AddressSpace *as, bool load_rom, symbol_fn_t sym_cb) > >>> > >>> So kernel_low is the "highaddr" parameter, not the 'lowaddr' value. > >> > >> And for some reason I thought kernel_base_addr was being used as 'pentry'. kernel_base_addr > >> is already 'lowaddr'. Guess I'll have to rewrite the commit message. And revisit why my > >> test case worked for riscv32 (I probably didn't use an ELF image ...) > >> > >> And the only way out seems to be filtering the bits from lowaddr. I'll do that. > >> > > > > Can you check as to why QEMU ELF loader does the sign extension? > > > > I personally don't know why. Maybe the ELF loader does something wrong. > > > I took a look and didn't find out why. I checked that in the ELF specification some > file headers can indicate a sign extension. E.g. R_X86_64_32S for example is described as > "Direct 32 bit zero extended". Note that the use of the tags are dependent on how the > ELF was built, so we would need the exact ELF to check for that. All I can say is that > there is a sign extension going on, in the 'lowaddr' field, and that translate_fn() > wasn't enough to filter it out. I can't say whether this is intended or a bug. > > > But going back a little, this whole situation happened in v5 because, in the next > patch, riscv_load_initrd() started to receive an uint64_t (kernel_entry) instead of > receiving a target_ulong like it was doing before. This behavior change, which was > accidental, not only revealed this sign-extension behavior but also potentially changed > what riscv_load_initrd() is receiving from load_uimage_as() the same way it's > impacting load_elf_ram_sym(). The third load option, load_image_targphys_as(), is > already using a target_ulong (kernel_start_addr) as return value so it's not > impacted. > > I believe Alistair suggested to clear bits instead of just doing a target_ulong > cast for a reason (I guess we're trying to gate all 32/64 bit CPU logic using a > direct approach such as checking the CPU directly), but I also think we should > clear bits for all 'kernel_entry' possibilities, not just the one that comes from > load_elf_ram_sym(), to be consistent in all three cases. We might be even avoiding > a future headache from load_uimage_as(). That seems like the approach to take. That way we aren't changing behaviour and it continues along with improving/adding RV32 support on 64-bit bit targets. If we want to change the behaviour in the future, we can add a patch that does that with a description of why. Alistair > > > > Thoughts? > > > Daniel > > > [1] https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc > > > > > Regards, > > Bin >
diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c index c7e0e50bd8..5ec6d32165 100644 --- a/hw/riscv/boot.c +++ b/hw/riscv/boot.c @@ -174,11 +174,12 @@ target_ulong riscv_load_firmware(const char *firmware_filename, } target_ulong riscv_load_kernel(MachineState *machine, + RISCVHartArrayState *harts, target_ulong kernel_start_addr, symbol_fn_t sym_cb) { const char *kernel_filename = machine->kernel_filename; - uint64_t kernel_load_base, kernel_entry; + uint64_t kernel_load_base, kernel_entry, kernel_low; g_assert(kernel_filename != NULL); @@ -189,10 +190,16 @@ target_ulong riscv_load_kernel(MachineState *machine, * the (expected) load address load address. This allows kernels to have * separate SBI and ELF entry points (used by FreeBSD, for example). */ - if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL, - NULL, &kernel_load_base, NULL, NULL, 0, + if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL, NULL, + &kernel_load_base, &kernel_low, NULL, 0, EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) { - return kernel_load_base; + kernel_entry = kernel_load_base; + + if (riscv_is_32bit(harts)) { + kernel_entry = kernel_low; + } + + return kernel_entry; } if (load_uimage_as(kernel_filename, &kernel_entry, NULL, NULL, diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c index 2b91e49561..712625d2a4 100644 --- a/hw/riscv/microchip_pfsoc.c +++ b/hw/riscv/microchip_pfsoc.c @@ -629,7 +629,8 @@ static void microchip_icicle_kit_machine_init(MachineState *machine) kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus, firmware_end_addr); - kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL); + kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus, + kernel_start_addr, NULL); if (machine->initrd_filename) { riscv_load_initrd(machine, kernel_entry); diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c index 353f030d80..7fe4fb5628 100644 --- a/hw/riscv/opentitan.c +++ b/hw/riscv/opentitan.c @@ -101,7 +101,8 @@ static void opentitan_board_init(MachineState *machine) } if (machine->kernel_filename) { - riscv_load_kernel(machine, memmap[IBEX_DEV_RAM].base, NULL); + riscv_load_kernel(machine, &s->soc.cpus, + memmap[IBEX_DEV_RAM].base, NULL); } } diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c index 3e3f4b0088..1a7d381514 100644 --- a/hw/riscv/sifive_e.c +++ b/hw/riscv/sifive_e.c @@ -114,7 +114,8 @@ static void sifive_e_machine_init(MachineState *machine) memmap[SIFIVE_E_DEV_MROM].base, &address_space_memory); if (machine->kernel_filename) { - riscv_load_kernel(machine, memmap[SIFIVE_E_DEV_DTIM].base, NULL); + riscv_load_kernel(machine, &s->soc.cpus, + memmap[SIFIVE_E_DEV_DTIM].base, NULL); } } diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c index d3ab7a9cda..71be442a50 100644 --- a/hw/riscv/sifive_u.c +++ b/hw/riscv/sifive_u.c @@ -598,7 +598,8 @@ static void sifive_u_machine_init(MachineState *machine) kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus, firmware_end_addr); - kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL); + kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus, + kernel_start_addr, NULL); if (machine->initrd_filename) { riscv_load_initrd(machine, kernel_entry); diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c index cc3f6dac17..1fa91167ab 100644 --- a/hw/riscv/spike.c +++ b/hw/riscv/spike.c @@ -305,7 +305,8 @@ static void spike_board_init(MachineState *machine) kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0], firmware_end_addr); - kernel_entry = riscv_load_kernel(machine, kernel_start_addr, + kernel_entry = riscv_load_kernel(machine, &s->soc[0], + kernel_start_addr, htif_symbol_callback); if (machine->initrd_filename) { diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index a061151a6f..d0531cc641 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -1277,7 +1277,8 @@ static void virt_machine_done(Notifier *notifier, void *data) kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0], firmware_end_addr); - kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL); + kernel_entry = riscv_load_kernel(machine, &s->soc[0], + kernel_start_addr, NULL); if (machine->initrd_filename) { riscv_load_initrd(machine, kernel_entry); diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h index 511390f60e..6295316afb 100644 --- a/include/hw/riscv/boot.h +++ b/include/hw/riscv/boot.h @@ -44,6 +44,7 @@ target_ulong riscv_load_firmware(const char *firmware_filename, hwaddr firmware_load_addr, symbol_fn_t sym_cb); target_ulong riscv_load_kernel(MachineState *machine, + RISCVHartArrayState *harts, target_ulong firmware_end_addr, symbol_fn_t sym_cb); void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry);
load_elf_ram_sym() will sign-extend 32 bit addresses. If a 32 bit QEMU guest happens to be running in a hypervisor that are using 64 bits to encode its address, kernel_entry can be padded with '1's and create problems [1]. Using a translate_fn() callback in load_elf_ram_sym() to filter the padding from the address doesn't work. A more detailed explanation can be found in [2]. The short version is that glue(load_elf, SZ), from include/hw/elf_ops.h, will calculate 'pentry' (mapped into the 'kernel_load_base' var in riscv_load_Kernel()) before using translate_fn(), and will not recalculate it after executing it. This means that the callback does not prevent the padding from kernel_load_base to appear. Let's instead use a kernel_low var to capture the 'lowaddr' value from load_elf_ram_sim(), and return it when we're dealing with 32 bit CPUs. [1] https://lists.gnu.org/archive/html/qemu-devel/2023-01/msg02281.html [2] https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg00099.html Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> --- hw/riscv/boot.c | 15 +++++++++++---- hw/riscv/microchip_pfsoc.c | 3 ++- hw/riscv/opentitan.c | 3 ++- hw/riscv/sifive_e.c | 3 ++- hw/riscv/sifive_u.c | 3 ++- hw/riscv/spike.c | 3 ++- hw/riscv/virt.c | 3 ++- include/hw/riscv/boot.h | 1 + 8 files changed, 24 insertions(+), 10 deletions(-)