Message ID | 20230627222152.177716-2-charlie@rivosinc.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Make SV39 the default address space | expand |
On 27 Jun 2023, at 23:21, Charlie Jenkins <charlie@rivosinc.com> wrote: > > Make sv39 the default address space for mmap as some applications > currently depend on this assumption. They are just plain wrong too. Sv48 was in even Priv v1.10 (the first spec where satp was named as such and contained the mode, rather than requiring M-mode’s help in configuring virtual memory), predating the ratified v1.11 spec. A 39-bit address space is pathetic and has implications for ASLR. I strongly suggest applications be forced to support at least Sv48, which is totally reasonable given the address space sizes used by other architectures. Sv57 is more disruptive to some runtimes, though ideally even that would be free for the kernel to use rather than committing to not using it for the default uABI. Jess > The RISC-V specification enforces > that bits outside of the virtual address range are not used, so > restricting the size of the default address space as such should be > temporary. A hint address passed to mmap will cause the largest address > space that fits entirely into the hint to be used. If the hint is less > than or equal to 1<<38, a 39-bit address will be used. After an address > space is completely full, the next smallest address space will be used. > > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com> > --- > arch/riscv/include/asm/elf.h | 2 +- > arch/riscv/include/asm/pgtable.h | 13 +++++++++- > arch/riscv/include/asm/processor.h | 41 +++++++++++++++++++++++++----- > 3 files changed, 47 insertions(+), 9 deletions(-) > > diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h > index 30e7d2455960..1b57f13a1afd 100644 > --- a/arch/riscv/include/asm/elf.h > +++ b/arch/riscv/include/asm/elf.h > @@ -49,7 +49,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr); > * the loader. We need to make sure that it is out of the way of the program > * that it will "exec", and that there is sufficient room for the brk. > */ > -#define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2) > +#define ELF_ET_DYN_BASE ((DEFAULT_MAP_WINDOW / 3) * 2) > > #ifdef CONFIG_64BIT > #ifdef CONFIG_COMPAT > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > index 75970ee2bda2..e83912e97870 100644 > --- a/arch/riscv/include/asm/pgtable.h > +++ b/arch/riscv/include/asm/pgtable.h > @@ -57,18 +57,29 @@ > #define MODULES_END (PFN_ALIGN((unsigned long)&_start)) > #endif > > + > /* > * Roughly size the vmemmap space to be large enough to fit enough > * struct pages to map half the virtual address space. Then > * position vmemmap directly below the VMALLOC region. > */ > #ifdef CONFIG_64BIT > +#define VA_BITS_SV39 39 > +#define VA_BITS_SV48 48 > +#define VA_BITS_SV57 57 > + > +#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1)) > +#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1)) > +#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1)) > + > #define VA_BITS (pgtable_l5_enabled ? \ > - 57 : (pgtable_l4_enabled ? 48 : 39)) > + VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39)) > #else > #define VA_BITS 32 > #endif > > +#define DEFAULT_VA_BITS ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS) > + > #define VMEMMAP_SHIFT \ > (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) > #define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h > index 6fb8bbec8459..019dcd4ecae4 100644 > --- a/arch/riscv/include/asm/processor.h > +++ b/arch/riscv/include/asm/processor.h > @@ -12,20 +12,47 @@ > > #include <asm/ptrace.h> > > -/* > - * This decides where the kernel will search for a free chunk of vm > - * space during mmap's. > - */ > -#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) > - > -#define STACK_TOP TASK_SIZE > #ifdef CONFIG_64BIT > +#define DEFAULT_MAP_WINDOW (UL(1) << (DEFAULT_VA_BITS - 1)) > #define STACK_TOP_MAX TASK_SIZE_64 > + > +#define arch_get_mmap_end(addr, len, flags) \ > + ((addr) == 0 || (addr) >= VA_USER_SV57 ? STACK_TOP_MAX : \ > + (((addr) >= VA_USER_SV48) && (VA_BITS >= VA_BITS_SV48)) ? \ > + VA_USER_SV48 : \ > + VA_USER_SV39) > + > +#define arch_get_mmap_base(addr, base) \ > + (((addr >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) ? \ > + base + STACK_TOP_MAX - DEFAULT_MAP_WINDOW : \ > + (((addr) >= VA_USER_SV48) && (VA_BITS >= VA_BITS_SV48)) ? \ > + base + VA_USER_SV48 - DEFAULT_MAP_WINDOW : \ > + base) > + > #else > +#define DEFAULT_MAP_WINDOW TASK_SIZE > #define STACK_TOP_MAX TASK_SIZE > + > +#define arch_get_mmap_end(addr, len, flags) \ > + ((addr) > DEFAULT_MAP_WINDOW ? STACK_TOP_MAX : DEFAULT_MAP_WINDOW) > + > +#define arch_get_mmap_base(addr, base) \ > + ((addr > DEFAULT_MAP_WINDOW) ? \ > + base + STACK_TOP_MAX - DEFAULT_MAP_WINDOW : \ > + base) > + > #endif > #define STACK_ALIGN 16 > > + > +#define STACK_TOP DEFAULT_MAP_WINDOW > + > +/* > + * This decides where the kernel will search for a free chunk of vm > + * space during mmap's. > + */ > +#define TASK_UNMAPPED_BASE PAGE_ALIGN(DEFAULT_MAP_WINDOW / 3) > + > #ifndef __ASSEMBLY__ > > struct task_struct; > -- > 2.34.1 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Tue, 27 Jun 2023 15:32:36 PDT (-0700), jrtc27@jrtc27.com wrote: > On 27 Jun 2023, at 23:21, Charlie Jenkins <charlie@rivosinc.com> wrote: >> >> Make sv39 the default address space for mmap as some applications >> currently depend on this assumption. > > They are just plain wrong too. Sv48 was in even Priv v1.10 (the first > spec where satp was named as such and contained the mode, rather than > requiring M-mode’s help in configuring virtual memory), predating the > ratified v1.11 spec. A 39-bit address space is pathetic and has > implications for ASLR. > > I strongly suggest applications be forced to support at least Sv48, > which is totally reasonable given the address space sizes used by other > architectures. Sv57 is more disruptive to some runtimes, though ideally > even that would be free for the kernel to use rather than committing to > not using it for the default uABI. Go and OpenJDK both broke when we expanded the VA width. I don't like it either, but if the change breaks userspace then it's a regression and we have to live with the bug. > Jess > >> The RISC-V specification enforces >> that bits outside of the virtual address range are not used, so >> restricting the size of the default address space as such should be >> temporary. A hint address passed to mmap will cause the largest address >> space that fits entirely into the hint to be used. If the hint is less >> than or equal to 1<<38, a 39-bit address will be used. After an address >> space is completely full, the next smallest address space will be used. >> >> Signed-off-by: Charlie Jenkins <charlie@rivosinc.com> >> --- >> arch/riscv/include/asm/elf.h | 2 +- >> arch/riscv/include/asm/pgtable.h | 13 +++++++++- >> arch/riscv/include/asm/processor.h | 41 +++++++++++++++++++++++++----- >> 3 files changed, 47 insertions(+), 9 deletions(-) >> >> diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h >> index 30e7d2455960..1b57f13a1afd 100644 >> --- a/arch/riscv/include/asm/elf.h >> +++ b/arch/riscv/include/asm/elf.h >> @@ -49,7 +49,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr); >> * the loader. We need to make sure that it is out of the way of the program >> * that it will "exec", and that there is sufficient room for the brk. >> */ >> -#define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2) >> +#define ELF_ET_DYN_BASE ((DEFAULT_MAP_WINDOW / 3) * 2) >> >> #ifdef CONFIG_64BIT >> #ifdef CONFIG_COMPAT >> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h >> index 75970ee2bda2..e83912e97870 100644 >> --- a/arch/riscv/include/asm/pgtable.h >> +++ b/arch/riscv/include/asm/pgtable.h >> @@ -57,18 +57,29 @@ >> #define MODULES_END (PFN_ALIGN((unsigned long)&_start)) >> #endif >> >> + >> /* >> * Roughly size the vmemmap space to be large enough to fit enough >> * struct pages to map half the virtual address space. Then >> * position vmemmap directly below the VMALLOC region. >> */ >> #ifdef CONFIG_64BIT >> +#define VA_BITS_SV39 39 >> +#define VA_BITS_SV48 48 >> +#define VA_BITS_SV57 57 >> + >> +#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1)) >> +#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1)) >> +#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1)) >> + >> #define VA_BITS (pgtable_l5_enabled ? \ >> - 57 : (pgtable_l4_enabled ? 48 : 39)) >> + VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39)) >> #else >> #define VA_BITS 32 >> #endif >> >> +#define DEFAULT_VA_BITS ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS) >> + >> #define VMEMMAP_SHIFT \ >> (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) >> #define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) >> diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h >> index 6fb8bbec8459..019dcd4ecae4 100644 >> --- a/arch/riscv/include/asm/processor.h >> +++ b/arch/riscv/include/asm/processor.h >> @@ -12,20 +12,47 @@ >> >> #include <asm/ptrace.h> >> >> -/* >> - * This decides where the kernel will search for a free chunk of vm >> - * space during mmap's. >> - */ >> -#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) >> - >> -#define STACK_TOP TASK_SIZE >> #ifdef CONFIG_64BIT >> +#define DEFAULT_MAP_WINDOW (UL(1) << (DEFAULT_VA_BITS - 1)) >> #define STACK_TOP_MAX TASK_SIZE_64 >> + >> +#define arch_get_mmap_end(addr, len, flags) \ >> + ((addr) == 0 || (addr) >= VA_USER_SV57 ? STACK_TOP_MAX : \ >> + (((addr) >= VA_USER_SV48) && (VA_BITS >= VA_BITS_SV48)) ? \ >> + VA_USER_SV48 : \ >> + VA_USER_SV39) >> + >> +#define arch_get_mmap_base(addr, base) \ >> + (((addr >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) ? \ >> + base + STACK_TOP_MAX - DEFAULT_MAP_WINDOW : \ >> + (((addr) >= VA_USER_SV48) && (VA_BITS >= VA_BITS_SV48)) ? \ >> + base + VA_USER_SV48 - DEFAULT_MAP_WINDOW : \ >> + base) >> + >> #else >> +#define DEFAULT_MAP_WINDOW TASK_SIZE >> #define STACK_TOP_MAX TASK_SIZE >> + >> +#define arch_get_mmap_end(addr, len, flags) \ >> + ((addr) > DEFAULT_MAP_WINDOW ? STACK_TOP_MAX : DEFAULT_MAP_WINDOW) >> + >> +#define arch_get_mmap_base(addr, base) \ >> + ((addr > DEFAULT_MAP_WINDOW) ? \ >> + base + STACK_TOP_MAX - DEFAULT_MAP_WINDOW : \ >> + base) >> + >> #endif >> #define STACK_ALIGN 16 >> >> + >> +#define STACK_TOP DEFAULT_MAP_WINDOW >> + >> +/* >> + * This decides where the kernel will search for a free chunk of vm >> + * space during mmap's. >> + */ >> +#define TASK_UNMAPPED_BASE PAGE_ALIGN(DEFAULT_MAP_WINDOW / 3) >> + >> #ifndef __ASSEMBLY__ >> >> struct task_struct; >> -- >> 2.34.1 >> >> >> _______________________________________________ >> linux-riscv mailing list >> linux-riscv@lists.infradead.org >> http://lists.infradead.org/mailman/listinfo/linux-riscv
Yes it is small to have a default of 38-bits of userspace. I would be interesting in the opinions of other people on whether it would be acceptable to have the default be sv48 and require applications that prefer fewer bits to specify so with the given mmap hinting.
On 6/28/23 02:36, Palmer Dabbelt wrote: > On Tue, 27 Jun 2023 15:32:36 PDT (-0700), jrtc27@jrtc27.com wrote: >> On 27 Jun 2023, at 23:21, Charlie Jenkins <charlie@rivosinc.com> wrote: >>> >>> Make sv39 the default address space for mmap as some applications >>> currently depend on this assumption. >> >> They are just plain wrong too. Sv48 was in even Priv v1.10 (the first >> spec where satp was named as such and contained the mode, rather than >> requiring M-mode’s help in configuring virtual memory), predating the >> ratified v1.11 spec. A 39-bit address space is pathetic and has >> implications for ASLR. >> >> I strongly suggest applications be forced to support at least Sv48, >> which is totally reasonable given the address space sizes used by other >> architectures. Sv57 is more disruptive to some runtimes, though ideally >> even that would be free for the kernel to use rather than committing to >> not using it for the default uABI. > > Go and OpenJDK both broke when we expanded the VA width. I don't like > it either, but if the change breaks userspace then it's a regression and > we have to live with the bug. > Have we debugged this ? do we at least know why they break ? Just disabling Sv48/57 by default for everyone because some userspace apps break doesn't seem the correct approach, it seems more like a bug in userspace IMHO.
On Wed, Jun 28, 2023 at 5:09 AM Charlie Jenkins <charlie@rivosinc.com> wrote: > > Yes it is small to have a default of 38-bits of userspace. I would be > interesting in the opinions of other people on whether it would be > acceptable to have the default be sv48 and require applications that > prefer fewer bits to specify so with the given mmap hinting. I think sv48 is a reasonable default instead of sv39. We should fallback to sv39 only if the underlying host does not support sv48. Regards, Anup
On Wed, Jun 28, 2023 at 06:04:41PM +0530, Anup Patel wrote: > On Wed, Jun 28, 2023 at 5:09 AM Charlie Jenkins <charlie@rivosinc.com> wrote: > > > > Yes it is small to have a default of 38-bits of userspace. I would be > > interesting in the opinions of other people on whether it would be > > acceptable to have the default be sv48 and require applications that > > prefer fewer bits to specify so with the given mmap hinting. > > I think sv48 is a reasonable default instead of sv39. We should fallback > to sv39 only if the underlying host does not support sv48. > > Regards, > Anup I did some research and it appears that Java does work on sv48, but not on sv57. Using the v6.4 kernel I was able to successfully run OpenJDK on both sv38 and sv48, but on sv57 there is a SIGSEGV error on QEMU. Relevant JDK discussion can be seen here https://mail.openjdk.org/pipermail/hotspot-dev/2022-November/067298.html. Go similarly appears to work even on sv57 according to https://go-review.googlesource.com/c/go/+/409055. I have not tried Go myself. The point of contention here I believe is that in v6.4, the highest address space available will be used, causing all of these applications that do not work properly in sv57 to fail when testing in sv57 environments. Given that these applications seem to work in sv48, it seems reasonable to default to sv48, unless there are an abundance of additional applications that are unhappy with this. Using the hint mechanism to mmap will then allow users to change the address space to sv57 if required. It should be possible to allow users to use sv38 if they need it using the same mechanism, but reducing the address space instead of growing it will require more thought from me to implement. Thanks, Charlie
diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index 30e7d2455960..1b57f13a1afd 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -49,7 +49,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr); * the loader. We need to make sure that it is out of the way of the program * that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2) +#define ELF_ET_DYN_BASE ((DEFAULT_MAP_WINDOW / 3) * 2) #ifdef CONFIG_64BIT #ifdef CONFIG_COMPAT diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 75970ee2bda2..e83912e97870 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -57,18 +57,29 @@ #define MODULES_END (PFN_ALIGN((unsigned long)&_start)) #endif + /* * Roughly size the vmemmap space to be large enough to fit enough * struct pages to map half the virtual address space. Then * position vmemmap directly below the VMALLOC region. */ #ifdef CONFIG_64BIT +#define VA_BITS_SV39 39 +#define VA_BITS_SV48 48 +#define VA_BITS_SV57 57 + +#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1)) +#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1)) +#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1)) + #define VA_BITS (pgtable_l5_enabled ? \ - 57 : (pgtable_l4_enabled ? 48 : 39)) + VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39)) #else #define VA_BITS 32 #endif +#define DEFAULT_VA_BITS ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS) + #define VMEMMAP_SHIFT \ (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) #define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 6fb8bbec8459..019dcd4ecae4 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -12,20 +12,47 @@ #include <asm/ptrace.h> -/* - * This decides where the kernel will search for a free chunk of vm - * space during mmap's. - */ -#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) - -#define STACK_TOP TASK_SIZE #ifdef CONFIG_64BIT +#define DEFAULT_MAP_WINDOW (UL(1) << (DEFAULT_VA_BITS - 1)) #define STACK_TOP_MAX TASK_SIZE_64 + +#define arch_get_mmap_end(addr, len, flags) \ + ((addr) == 0 || (addr) >= VA_USER_SV57 ? STACK_TOP_MAX : \ + (((addr) >= VA_USER_SV48) && (VA_BITS >= VA_BITS_SV48)) ? \ + VA_USER_SV48 : \ + VA_USER_SV39) + +#define arch_get_mmap_base(addr, base) \ + (((addr >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) ? \ + base + STACK_TOP_MAX - DEFAULT_MAP_WINDOW : \ + (((addr) >= VA_USER_SV48) && (VA_BITS >= VA_BITS_SV48)) ? \ + base + VA_USER_SV48 - DEFAULT_MAP_WINDOW : \ + base) + #else +#define DEFAULT_MAP_WINDOW TASK_SIZE #define STACK_TOP_MAX TASK_SIZE + +#define arch_get_mmap_end(addr, len, flags) \ + ((addr) > DEFAULT_MAP_WINDOW ? STACK_TOP_MAX : DEFAULT_MAP_WINDOW) + +#define arch_get_mmap_base(addr, base) \ + ((addr > DEFAULT_MAP_WINDOW) ? \ + base + STACK_TOP_MAX - DEFAULT_MAP_WINDOW : \ + base) + #endif #define STACK_ALIGN 16 + +#define STACK_TOP DEFAULT_MAP_WINDOW + +/* + * This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE PAGE_ALIGN(DEFAULT_MAP_WINDOW / 3) + #ifndef __ASSEMBLY__ struct task_struct;
Make sv39 the default address space for mmap as some applications currently depend on this assumption. The RISC-V specification enforces that bits outside of the virtual address range are not used, so restricting the size of the default address space as such should be temporary. A hint address passed to mmap will cause the largest address space that fits entirely into the hint to be used. If the hint is less than or equal to 1<<38, a 39-bit address will be used. After an address space is completely full, the next smallest address space will be used. Signed-off-by: Charlie Jenkins <charlie@rivosinc.com> --- arch/riscv/include/asm/elf.h | 2 +- arch/riscv/include/asm/pgtable.h | 13 +++++++++- arch/riscv/include/asm/processor.h | 41 +++++++++++++++++++++++++----- 3 files changed, 47 insertions(+), 9 deletions(-)