Message ID | 20220214163452.1568807-9-arnd@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | clean up asm/uaccess.h, kill set_fs for good | expand |
On 2022-02-14 16:34, Arnd Bergmann wrote: > From: Arnd Bergmann <arnd@arndb.de> > > arm64 has an inline asm implementation of access_ok() that is derived from > the 32-bit arm version and optimized for the case that both the limit and > the size are variable. With set_fs() gone, the limit is always constant, > and the size usually is as well, so just using the default implementation > reduces the check into a comparison against a constant that can be > scheduled by the compiler. Aww, I still vividly remember the birth of this madness, sat with my phone on a Saturday morning waiting for my bike to be MOT'd, staring at the 7-instruction sequence that Mark and I had come up with and certain that it could be shortened still. Kinda sad to see it go, but at the same time, glad that it can. Acked-by: Robin Murphy <robin.murphy@arm.com> > On a defconfig build, this saves over 28KB of .text. Not to mention saving those "WTF is going on there... oh yeah, access_ok()" moments when looking through disassembly :) Cheers, Robin. > Signed-off-by: Arnd Bergmann <arnd@arndb.de> > --- > arch/arm64/include/asm/uaccess.h | 28 +++++----------------------- > 1 file changed, 5 insertions(+), 23 deletions(-) > > diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h > index 357f7bd9c981..e8dce0cc5eaa 100644 > --- a/arch/arm64/include/asm/uaccess.h > +++ b/arch/arm64/include/asm/uaccess.h > @@ -26,6 +26,8 @@ > #include <asm/memory.h> > #include <asm/extable.h> > > +static inline int __access_ok(const void __user *ptr, unsigned long size); > + > /* > * Test whether a block of memory is a valid user space address. > * Returns 1 if the range is valid, 0 otherwise. > @@ -33,10 +35,8 @@ > * This is equivalent to the following test: > * (u65)addr + (u65)size <= (u65)TASK_SIZE_MAX > */ > -static inline unsigned long __access_ok(const void __user *addr, unsigned long size) > +static inline int access_ok(const void __user *addr, unsigned long size) > { > - unsigned long ret, limit = TASK_SIZE_MAX - 1; > - > /* > * Asynchronous I/O running in a kernel thread does not have the > * TIF_TAGGED_ADDR flag of the process owning the mm, so always untag > @@ -46,27 +46,9 @@ static inline unsigned long __access_ok(const void __user *addr, unsigned long s > (current->flags & PF_KTHREAD || test_thread_flag(TIF_TAGGED_ADDR))) > addr = untagged_addr(addr); > > - __chk_user_ptr(addr); > - asm volatile( > - // A + B <= C + 1 for all A,B,C, in four easy steps: > - // 1: X = A + B; X' = X % 2^64 > - " adds %0, %3, %2\n" > - // 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4 > - " csel %1, xzr, %1, hi\n" > - // 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X' > - // to compensate for the carry flag being set in step 4. For > - // X > 2^64, X' merely has to remain nonzero, which it does. > - " csinv %0, %0, xzr, cc\n" > - // 4: For X < 2^64, this gives us X' - C - 1 <= 0, where the -1 > - // comes from the carry in being clear. Otherwise, we are > - // testing X' - C == 0, subject to the previous adjustments. > - " sbcs xzr, %0, %1\n" > - " cset %0, ls\n" > - : "=&r" (ret), "+r" (limit) : "Ir" (size), "0" (addr) : "cc"); > - > - return ret; > + return likely(__access_ok(addr, size)); > } > -#define __access_ok __access_ok > +#define access_ok access_ok > > #include <asm-generic/access_ok.h> >
On Mon, 14 Feb 2022 at 17:37, Arnd Bergmann <arnd@kernel.org> wrote: > > From: Arnd Bergmann <arnd@arndb.de> > > arm64 has an inline asm implementation of access_ok() that is derived from > the 32-bit arm version and optimized for the case that both the limit and > the size are variable. With set_fs() gone, the limit is always constant, > and the size usually is as well, so just using the default implementation > reduces the check into a comparison against a constant that can be > scheduled by the compiler. > > On a defconfig build, this saves over 28KB of .text. > > Signed-off-by: Arnd Bergmann <arnd@arndb.de> > --- > arch/arm64/include/asm/uaccess.h | 28 +++++----------------------- > 1 file changed, 5 insertions(+), 23 deletions(-) > > diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h > index 357f7bd9c981..e8dce0cc5eaa 100644 > --- a/arch/arm64/include/asm/uaccess.h > +++ b/arch/arm64/include/asm/uaccess.h > @@ -26,6 +26,8 @@ > #include <asm/memory.h> > #include <asm/extable.h> > > +static inline int __access_ok(const void __user *ptr, unsigned long size); > + > /* > * Test whether a block of memory is a valid user space address. > * Returns 1 if the range is valid, 0 otherwise. > @@ -33,10 +35,8 @@ > * This is equivalent to the following test: > * (u65)addr + (u65)size <= (u65)TASK_SIZE_MAX > */ > -static inline unsigned long __access_ok(const void __user *addr, unsigned long size) > +static inline int access_ok(const void __user *addr, unsigned long size) > { > - unsigned long ret, limit = TASK_SIZE_MAX - 1; > - > /* > * Asynchronous I/O running in a kernel thread does not have the > * TIF_TAGGED_ADDR flag of the process owning the mm, so always untag > @@ -46,27 +46,9 @@ static inline unsigned long __access_ok(const void __user *addr, unsigned long s > (current->flags & PF_KTHREAD || test_thread_flag(TIF_TAGGED_ADDR))) > addr = untagged_addr(addr); > > - __chk_user_ptr(addr); > - asm volatile( > - // A + B <= C + 1 for all A,B,C, in four easy steps: > - // 1: X = A + B; X' = X % 2^64 > - " adds %0, %3, %2\n" > - // 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4 > - " csel %1, xzr, %1, hi\n" > - // 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X' > - // to compensate for the carry flag being set in step 4. For > - // X > 2^64, X' merely has to remain nonzero, which it does. > - " csinv %0, %0, xzr, cc\n" > - // 4: For X < 2^64, this gives us X' - C - 1 <= 0, where the -1 > - // comes from the carry in being clear. Otherwise, we are > - // testing X' - C == 0, subject to the previous adjustments. > - " sbcs xzr, %0, %1\n" > - " cset %0, ls\n" > - : "=&r" (ret), "+r" (limit) : "Ir" (size), "0" (addr) : "cc"); > - > - return ret; > + return likely(__access_ok(addr, size)); > } > -#define __access_ok __access_ok > +#define access_ok access_ok > > #include <asm-generic/access_ok.h> > > -- > 2.29.2 > With set_fs() out of the picture, wouldn't it be sufficient to check that bit #55 is clear? (the bit that selects between TTBR0 and TTBR1) That would also remove the need to strip the tag from the address. Something like asm goto("tbnz %0, #55, %2 \n" "tbnz %1, #55, %2 \n" :: "r"(addr), "r"(addr + size - 1) :: notok); return 1; notok: return 0; with an additional sanity check on the size which the compiler could eliminate for compile-time constant values.
On Tue, Feb 15, 2022 at 9:17 AM Ard Biesheuvel <ardb@kernel.org> wrote: > On Mon, 14 Feb 2022 at 17:37, Arnd Bergmann <arnd@kernel.org> wrote: > > From: Arnd Bergmann <arnd@arndb.de> > > > > With set_fs() out of the picture, wouldn't it be sufficient to check > that bit #55 is clear? (the bit that selects between TTBR0 and TTBR1) > That would also remove the need to strip the tag from the address. > > Something like > > asm goto("tbnz %0, #55, %2 \n" > "tbnz %1, #55, %2 \n" > :: "r"(addr), "r"(addr + size - 1) :: notok); > return 1; > notok: > return 0; > > with an additional sanity check on the size which the compiler could > eliminate for compile-time constant values. That should work, but I don't see it as a clear enough advantage to have a custom implementation. For the constant-size case, it probably isn't better than a compiler-scheduled comparison against a constant limit, but it does hurt maintainability when the next person wants to change the behavior of access_ok() globally. If we want to get into micro-optimizing uaccess, I think a better target would be a CONFIG_CC_HAS_ASM_GOTO_OUTPUT version of __get_user()/__put_user as we have on x86 and powerpc. Arnd
On Tue, 15 Feb 2022 at 10:13, Arnd Bergmann <arnd@kernel.org> wrote: > > On Tue, Feb 15, 2022 at 9:17 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > On Mon, 14 Feb 2022 at 17:37, Arnd Bergmann <arnd@kernel.org> wrote: > > > From: Arnd Bergmann <arnd@arndb.de> > > > > > > > With set_fs() out of the picture, wouldn't it be sufficient to check > > that bit #55 is clear? (the bit that selects between TTBR0 and TTBR1) > > That would also remove the need to strip the tag from the address. > > > > Something like > > > > asm goto("tbnz %0, #55, %2 \n" > > "tbnz %1, #55, %2 \n" > > :: "r"(addr), "r"(addr + size - 1) :: notok); > > return 1; > > notok: > > return 0; > > > > with an additional sanity check on the size which the compiler could > > eliminate for compile-time constant values. > > That should work, but I don't see it as a clear enough advantage to > have a custom implementation. For the constant-size case, it probably > isn't better than a compiler-scheduled comparison against a > constant limit, but it does hurt maintainability when the next person > wants to change the behavior of access_ok() globally. > arm64 also has this leading up to the range check, and I think we'd no longer need it: if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI) && (current->flags & PF_KTHREAD || test_thread_flag(TIF_TAGGED_ADDR))) addr = untagged_addr(addr); > If we want to get into micro-optimizing uaccess, I think a better target > would be a CONFIG_CC_HAS_ASM_GOTO_OUTPUT version > of __get_user()/__put_user as we have on x86 and powerpc. > > Arnd
From: Ard Biesheuvel > Sent: 15 February 2022 08:18 > > On Mon, 14 Feb 2022 at 17:37, Arnd Bergmann <arnd@kernel.org> wrote: > > > > From: Arnd Bergmann <arnd@arndb.de> > > > > arm64 has an inline asm implementation of access_ok() that is derived from > > the 32-bit arm version and optimized for the case that both the limit and > > the size are variable. With set_fs() gone, the limit is always constant, > > and the size usually is as well, so just using the default implementation > > reduces the check into a comparison against a constant that can be > > scheduled by the compiler. > > > > On a defconfig build, this saves over 28KB of .text. > > > > Signed-off-by: Arnd Bergmann <arnd@arndb.de> > > --- > > arch/arm64/include/asm/uaccess.h | 28 +++++----------------------- > > 1 file changed, 5 insertions(+), 23 deletions(-) > > > > diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h > > index 357f7bd9c981..e8dce0cc5eaa 100644 > > --- a/arch/arm64/include/asm/uaccess.h > > +++ b/arch/arm64/include/asm/uaccess.h > > @@ -26,6 +26,8 @@ > > #include <asm/memory.h> > > #include <asm/extable.h> > > > > +static inline int __access_ok(const void __user *ptr, unsigned long size); > > + > > /* > > * Test whether a block of memory is a valid user space address. > > * Returns 1 if the range is valid, 0 otherwise. > > @@ -33,10 +35,8 @@ > > * This is equivalent to the following test: > > * (u65)addr + (u65)size <= (u65)TASK_SIZE_MAX > > */ > > -static inline unsigned long __access_ok(const void __user *addr, unsigned long size) > > +static inline int access_ok(const void __user *addr, unsigned long size) > > { > > - unsigned long ret, limit = TASK_SIZE_MAX - 1; > > - > > /* > > * Asynchronous I/O running in a kernel thread does not have the > > * TIF_TAGGED_ADDR flag of the process owning the mm, so always untag > > @@ -46,27 +46,9 @@ static inline unsigned long __access_ok(const void __user *addr, unsigned long s > > (current->flags & PF_KTHREAD || test_thread_flag(TIF_TAGGED_ADDR))) > > addr = untagged_addr(addr); > > > > - __chk_user_ptr(addr); > > - asm volatile( > > - // A + B <= C + 1 for all A,B,C, in four easy steps: > > - // 1: X = A + B; X' = X % 2^64 > > - " adds %0, %3, %2\n" > > - // 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4 > > - " csel %1, xzr, %1, hi\n" > > - // 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X' > > - // to compensate for the carry flag being set in step 4. For > > - // X > 2^64, X' merely has to remain nonzero, which it does. > > - " csinv %0, %0, xzr, cc\n" > > - // 4: For X < 2^64, this gives us X' - C - 1 <= 0, where the -1 > > - // comes from the carry in being clear. Otherwise, we are > > - // testing X' - C == 0, subject to the previous adjustments. > > - " sbcs xzr, %0, %1\n" > > - " cset %0, ls\n" > > - : "=&r" (ret), "+r" (limit) : "Ir" (size), "0" (addr) : "cc"); > > - > > - return ret; > > + return likely(__access_ok(addr, size)); > > } > > -#define __access_ok __access_ok > > +#define access_ok access_ok > > > > #include <asm-generic/access_ok.h> > > > > -- > > 2.29.2 > > > > With set_fs() out of the picture, wouldn't it be sufficient to check > that bit #55 is clear? (the bit that selects between TTBR0 and TTBR1) > That would also remove the need to strip the tag from the address. > > Something like > > asm goto("tbnz %0, #55, %2 \n" > "tbnz %1, #55, %2 \n" > :: "r"(addr), "r"(addr + size - 1) :: notok); > return 1; > notok: > return 0; > > with an additional sanity check on the size which the compiler could > eliminate for compile-time constant values. Is there are reason not to just use: size < 1u << 48 && !((addr | (addr + size - 1)) & 1u << 55) (The -1 can be removed if the last user page is never mapped) Ugg, is arm64 addressing as horrid as it looks - with the 'kernel' bit in the middle of the virtual address space? It seems to be: <zero:4><tag:4><kernel:1><ignored:7><address:48> Although I found some references to 44 bit VA and to code using the 'ignored' bits as tags - relying on the hardware ignoring them. There might be some feature that uses the top 4 bits as well. Another option is assuming that accesses are 'reasonably sequential', removing the length check and ensuring there is an unmapped page between valid user and kernel addresses. That probably requires and unmapped page at the bottom of kernel space which may not be achievable. David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
On Tue, Feb 15, 2022 at 10:21 AM Ard Biesheuvel <ardb@kernel.org> wrote: > On Tue, 15 Feb 2022 at 10:13, Arnd Bergmann <arnd@kernel.org> wrote: > > arm64 also has this leading up to the range check, and I think we'd no > longer need it: > > if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI) && > (current->flags & PF_KTHREAD || test_thread_flag(TIF_TAGGED_ADDR))) > addr = untagged_addr(addr); I suspect the expensive part here is checking the two flags, as untagged_addr() seems to always just add a sbfx instruction. Would this work? #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI #define access_ok(ptr, size) __access_ok(untagged_addr(ptr), (size)) #else // the else path is the default, this can be left out. #define access_ok(ptr, size) __access_ok((ptr), (size)) #endif Arnd
On Tue, Feb 15, 2022 at 10:21:16AM +0100, Ard Biesheuvel wrote: > On Tue, 15 Feb 2022 at 10:13, Arnd Bergmann <arnd@kernel.org> wrote: > > > > On Tue, Feb 15, 2022 at 9:17 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > > On Mon, 14 Feb 2022 at 17:37, Arnd Bergmann <arnd@kernel.org> wrote: > > > > From: Arnd Bergmann <arnd@arndb.de> > > > > > > > > > > With set_fs() out of the picture, wouldn't it be sufficient to check > > > that bit #55 is clear? (the bit that selects between TTBR0 and TTBR1) > > > That would also remove the need to strip the tag from the address. > > > > > > Something like > > > > > > asm goto("tbnz %0, #55, %2 \n" > > > "tbnz %1, #55, %2 \n" > > > :: "r"(addr), "r"(addr + size - 1) :: notok); > > > return 1; > > > notok: > > > return 0; > > > > > > with an additional sanity check on the size which the compiler could > > > eliminate for compile-time constant values. > > > > That should work, but I don't see it as a clear enough advantage to > > have a custom implementation. For the constant-size case, it probably > > isn't better than a compiler-scheduled comparison against a > > constant limit, but it does hurt maintainability when the next person > > wants to change the behavior of access_ok() globally. > > > > arm64 also has this leading up to the range check, and I think we'd no > longer need it: > > if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI) && > (current->flags & PF_KTHREAD || test_thread_flag(TIF_TAGGED_ADDR))) > addr = untagged_addr(addr); > ABI-wise, we aim to *reject* tagged pointers unless the task is using the tagged addr ABI, so we need to retain both the untagging logic and the full pointer check (to actually check the tag bits) unless we relax that ABI decision generally (or go context-switch the TCR_EL1.TBI* bits). Since that has subtle ABI implications, I don't think we should change that within this series. If we *did* relax things, we could just check bit 55 here, and unconditionally clear that in uaccess_mask_ptr(), since LDTR/STTR should fault on kernel memory. On parts with meltdown those might not fault until committed, and so we need masking to avoid speculative access to a kernel pointer, and that requires the prior explciit check. Thanks, Mark.
On Tue, Feb 15, 2022 at 10:39:46AM +0100, Arnd Bergmann wrote: > On Tue, Feb 15, 2022 at 10:21 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > On Tue, 15 Feb 2022 at 10:13, Arnd Bergmann <arnd@kernel.org> wrote: > > > > arm64 also has this leading up to the range check, and I think we'd no > > longer need it: > > > > if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI) && > > (current->flags & PF_KTHREAD || test_thread_flag(TIF_TAGGED_ADDR))) > > addr = untagged_addr(addr); > > I suspect the expensive part here is checking the two flags, as untagged_addr() > seems to always just add a sbfx instruction. Would this work? > > #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI > #define access_ok(ptr, size) __access_ok(untagged_addr(ptr), (size)) > #else // the else path is the default, this can be left out. > #define access_ok(ptr, size) __access_ok((ptr), (size)) > #endif This would be an ABI change, e.g. for tasks without TIF_TAGGED_ADDR. I don't think we should change this as part of this series. Thanks, Mark.
On Mon, Feb 14, 2022 at 05:34:46PM +0100, Arnd Bergmann wrote: > From: Arnd Bergmann <arnd@arndb.de> > > arm64 has an inline asm implementation of access_ok() that is derived from > the 32-bit arm version and optimized for the case that both the limit and > the size are variable. With set_fs() gone, the limit is always constant, > and the size usually is as well, so just using the default implementation > reduces the check into a comparison against a constant that can be > scheduled by the compiler. > > On a defconfig build, this saves over 28KB of .text. > > Signed-off-by: Arnd Bergmann <arnd@arndb.de> I had a play around with this and a number of alternative options that had previously been discussed (e.g. using uint128_t for the check to allow the compiler to use the carry flag), and: * Any sequences which we significantly simpler involved an ABI change (e.g. not checking tags for tasks not using the relaxed tag ABI), or didn't interact well with the uaccess pointer masking we do for speculation hardening. * For all constant-size cases, this was joint-best for codegen. * For variable-size cases the difference between options (which did not change ABI or break pointer masking) fell in the noise and really depended on what you were optimizing for. This patch itself is clear, I believe the logic is sound and does not result in a behavioural change, so for this as-is: Acked-by: Mark Rutland <mark.rutland@arm.com> As on other replies, I think that if we want to make further changes to this, we should do that as follow-ups, since there are a number of subtleties in this area w.r.t. tag management and speculation with potential ABI implications. Thanks, Mark. > --- > arch/arm64/include/asm/uaccess.h | 28 +++++----------------------- > 1 file changed, 5 insertions(+), 23 deletions(-) > > diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h > index 357f7bd9c981..e8dce0cc5eaa 100644 > --- a/arch/arm64/include/asm/uaccess.h > +++ b/arch/arm64/include/asm/uaccess.h > @@ -26,6 +26,8 @@ > #include <asm/memory.h> > #include <asm/extable.h> > > +static inline int __access_ok(const void __user *ptr, unsigned long size); > + > /* > * Test whether a block of memory is a valid user space address. > * Returns 1 if the range is valid, 0 otherwise. > @@ -33,10 +35,8 @@ > * This is equivalent to the following test: > * (u65)addr + (u65)size <= (u65)TASK_SIZE_MAX > */ > -static inline unsigned long __access_ok(const void __user *addr, unsigned long size) > +static inline int access_ok(const void __user *addr, unsigned long size) > { > - unsigned long ret, limit = TASK_SIZE_MAX - 1; > - > /* > * Asynchronous I/O running in a kernel thread does not have the > * TIF_TAGGED_ADDR flag of the process owning the mm, so always untag > @@ -46,27 +46,9 @@ static inline unsigned long __access_ok(const void __user *addr, unsigned long s > (current->flags & PF_KTHREAD || test_thread_flag(TIF_TAGGED_ADDR))) > addr = untagged_addr(addr); > > - __chk_user_ptr(addr); > - asm volatile( > - // A + B <= C + 1 for all A,B,C, in four easy steps: > - // 1: X = A + B; X' = X % 2^64 > - " adds %0, %3, %2\n" > - // 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4 > - " csel %1, xzr, %1, hi\n" > - // 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X' > - // to compensate for the carry flag being set in step 4. For > - // X > 2^64, X' merely has to remain nonzero, which it does. > - " csinv %0, %0, xzr, cc\n" > - // 4: For X < 2^64, this gives us X' - C - 1 <= 0, where the -1 > - // comes from the carry in being clear. Otherwise, we are > - // testing X' - C == 0, subject to the previous adjustments. > - " sbcs xzr, %0, %1\n" > - " cset %0, ls\n" > - : "=&r" (ret), "+r" (limit) : "Ir" (size), "0" (addr) : "cc"); > - > - return ret; > + return likely(__access_ok(addr, size)); > } > -#define __access_ok __access_ok > +#define access_ok access_ok > > #include <asm-generic/access_ok.h> > > -- > 2.29.2 >
On Tue, Feb 15, 2022 at 09:30:41AM +0000, David Laight wrote: > From: Ard Biesheuvel > > Sent: 15 February 2022 08:18 > > > > On Mon, 14 Feb 2022 at 17:37, Arnd Bergmann <arnd@kernel.org> wrote: > > > > > > From: Arnd Bergmann <arnd@arndb.de> > > > > > > arm64 has an inline asm implementation of access_ok() that is derived from > > > the 32-bit arm version and optimized for the case that both the limit and > > > the size are variable. With set_fs() gone, the limit is always constant, > > > and the size usually is as well, so just using the default implementation > > > reduces the check into a comparison against a constant that can be > > > scheduled by the compiler. > > > > > > On a defconfig build, this saves over 28KB of .text. > > > > > > Signed-off-by: Arnd Bergmann <arnd@arndb.de> > > > --- > > > arch/arm64/include/asm/uaccess.h | 28 +++++----------------------- > > > 1 file changed, 5 insertions(+), 23 deletions(-) > > > > > > diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h > > > index 357f7bd9c981..e8dce0cc5eaa 100644 > > > --- a/arch/arm64/include/asm/uaccess.h > > > +++ b/arch/arm64/include/asm/uaccess.h > > > @@ -26,6 +26,8 @@ > > > #include <asm/memory.h> > > > #include <asm/extable.h> > > > > > > +static inline int __access_ok(const void __user *ptr, unsigned long size); > > > + > > > /* > > > * Test whether a block of memory is a valid user space address. > > > * Returns 1 if the range is valid, 0 otherwise. > > > @@ -33,10 +35,8 @@ > > > * This is equivalent to the following test: > > > * (u65)addr + (u65)size <= (u65)TASK_SIZE_MAX > > > */ > > > -static inline unsigned long __access_ok(const void __user *addr, unsigned long size) > > > +static inline int access_ok(const void __user *addr, unsigned long size) > > > { > > > - unsigned long ret, limit = TASK_SIZE_MAX - 1; > > > - > > > /* > > > * Asynchronous I/O running in a kernel thread does not have the > > > * TIF_TAGGED_ADDR flag of the process owning the mm, so always untag > > > @@ -46,27 +46,9 @@ static inline unsigned long __access_ok(const void __user *addr, unsigned long s > > > (current->flags & PF_KTHREAD || test_thread_flag(TIF_TAGGED_ADDR))) > > > addr = untagged_addr(addr); > > > > > > - __chk_user_ptr(addr); > > > - asm volatile( > > > - // A + B <= C + 1 for all A,B,C, in four easy steps: > > > - // 1: X = A + B; X' = X % 2^64 > > > - " adds %0, %3, %2\n" > > > - // 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4 > > > - " csel %1, xzr, %1, hi\n" > > > - // 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X' > > > - // to compensate for the carry flag being set in step 4. For > > > - // X > 2^64, X' merely has to remain nonzero, which it does. > > > - " csinv %0, %0, xzr, cc\n" > > > - // 4: For X < 2^64, this gives us X' - C - 1 <= 0, where the -1 > > > - // comes from the carry in being clear. Otherwise, we are > > > - // testing X' - C == 0, subject to the previous adjustments. > > > - " sbcs xzr, %0, %1\n" > > > - " cset %0, ls\n" > > > - : "=&r" (ret), "+r" (limit) : "Ir" (size), "0" (addr) : "cc"); > > > - > > > - return ret; > > > + return likely(__access_ok(addr, size)); > > > } > > > -#define __access_ok __access_ok > > > +#define access_ok access_ok > > > > > > #include <asm-generic/access_ok.h> > > > > > > -- > > > 2.29.2 > > > > > > > With set_fs() out of the picture, wouldn't it be sufficient to check > > that bit #55 is clear? (the bit that selects between TTBR0 and TTBR1) > > That would also remove the need to strip the tag from the address. > > > > Something like > > > > asm goto("tbnz %0, #55, %2 \n" > > "tbnz %1, #55, %2 \n" > > :: "r"(addr), "r"(addr + size - 1) :: notok); > > return 1; > > notok: > > return 0; > > > > with an additional sanity check on the size which the compiler could > > eliminate for compile-time constant values. > > Is there are reason not to just use: > size < 1u << 48 && !((addr | (addr + size - 1)) & 1u << 55) That has a few problems, including being an ABI change for tasks not using the relaxed tag ABI and not working for 52-bit VAs. If we really want to relax the tag checking aspect, there are simpler options, including variations on Ard's approach above. > Ugg, is arm64 addressing as horrid as it looks - with the 'kernel' > bit in the middle of the virtual address space? It's just sign-extension/canonical addressing, except bits [63:56] are configurable between a few uses, so the achitecture says bit 55 is the one to look at in all configurations to figure out if an address is high/low (in addition to checking the remaining bits are canonical). Thanks, Mark.
Le 15/02/2022 à 10:12, Arnd Bergmann a écrit : > On Tue, Feb 15, 2022 at 9:17 AM Ard Biesheuvel <ardb@kernel.org> wrote: >> On Mon, 14 Feb 2022 at 17:37, Arnd Bergmann <arnd@kernel.org> wrote: >>> From: Arnd Bergmann <arnd@arndb.de> >>> >> >> With set_fs() out of the picture, wouldn't it be sufficient to check >> that bit #55 is clear? (the bit that selects between TTBR0 and TTBR1) >> That would also remove the need to strip the tag from the address. >> >> Something like >> >> asm goto("tbnz %0, #55, %2 \n" >> "tbnz %1, #55, %2 \n" >> :: "r"(addr), "r"(addr + size - 1) :: notok); >> return 1; >> notok: >> return 0; >> >> with an additional sanity check on the size which the compiler could >> eliminate for compile-time constant values. > > That should work, but I don't see it as a clear enough advantage to > have a custom implementation. For the constant-size case, it probably > isn't better than a compiler-scheduled comparison against a > constant limit, but it does hurt maintainability when the next person > wants to change the behavior of access_ok() globally. > > If we want to get into micro-optimizing uaccess, I think a better target > would be a CONFIG_CC_HAS_ASM_GOTO_OUTPUT version > of __get_user()/__put_user as we have on x86 and powerpc. > There is also the user block accesses with user_access_begin()/user_access_end() together with unsafe_put_user() and unsafe_get_user() which allowed us to optimise user accesses on powerpc, especially in the signal code. Christophe
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 357f7bd9c981..e8dce0cc5eaa 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -26,6 +26,8 @@ #include <asm/memory.h> #include <asm/extable.h> +static inline int __access_ok(const void __user *ptr, unsigned long size); + /* * Test whether a block of memory is a valid user space address. * Returns 1 if the range is valid, 0 otherwise. @@ -33,10 +35,8 @@ * This is equivalent to the following test: * (u65)addr + (u65)size <= (u65)TASK_SIZE_MAX */ -static inline unsigned long __access_ok(const void __user *addr, unsigned long size) +static inline int access_ok(const void __user *addr, unsigned long size) { - unsigned long ret, limit = TASK_SIZE_MAX - 1; - /* * Asynchronous I/O running in a kernel thread does not have the * TIF_TAGGED_ADDR flag of the process owning the mm, so always untag @@ -46,27 +46,9 @@ static inline unsigned long __access_ok(const void __user *addr, unsigned long s (current->flags & PF_KTHREAD || test_thread_flag(TIF_TAGGED_ADDR))) addr = untagged_addr(addr); - __chk_user_ptr(addr); - asm volatile( - // A + B <= C + 1 for all A,B,C, in four easy steps: - // 1: X = A + B; X' = X % 2^64 - " adds %0, %3, %2\n" - // 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4 - " csel %1, xzr, %1, hi\n" - // 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X' - // to compensate for the carry flag being set in step 4. For - // X > 2^64, X' merely has to remain nonzero, which it does. - " csinv %0, %0, xzr, cc\n" - // 4: For X < 2^64, this gives us X' - C - 1 <= 0, where the -1 - // comes from the carry in being clear. Otherwise, we are - // testing X' - C == 0, subject to the previous adjustments. - " sbcs xzr, %0, %1\n" - " cset %0, ls\n" - : "=&r" (ret), "+r" (limit) : "Ir" (size), "0" (addr) : "cc"); - - return ret; + return likely(__access_ok(addr, size)); } -#define __access_ok __access_ok +#define access_ok access_ok #include <asm-generic/access_ok.h>