Message ID | 20230807123323.090897260@infradead.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | futex: More futex2 bits | expand |
On Mon, Aug 07 2023 at 14:18, Peter Zijlstra wrote: > To complement sys_futex_waitv() add sys_futex_wake(). This syscall > implements what was previously known as FUTEX_WAKE_BITSET except it > uses 'unsigned long' for the bitmask and takes FUTEX2 flags. > > The 'unsigned long' allows FUTEX2_SIZE_U64 on 64bit platforms. > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> > Acked-by: Geert Uytterhoeven <geert@linux-m68k.org> Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Hi Peter, Em 07/08/2023 09:18, Peter Zijlstra escreveu: > To complement sys_futex_waitv() add sys_futex_wake(). This syscall > implements what was previously known as FUTEX_WAKE_BITSET except it > uses 'unsigned long' for the bitmask and takes FUTEX2 flags. > > The 'unsigned long' allows FUTEX2_SIZE_U64 on 64bit platforms. > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> > Acked-by: Geert Uytterhoeven <geert@linux-m68k.org> > --- [...] > +/* > + * sys_futex_wake - Wake a number of futexes > + * @uaddr: Address of the futex(es) to wake > + * @mask: bitmask > + * @nr: Number of the futexes to wake > + * @flags: FUTEX2 flags > + * > + * Identical to the traditional FUTEX_WAKE_BITSET op, except it is part of the > + * futex2 family of calls. > + */ > + > +SYSCALL_DEFINE4(futex_wake, > + void __user *, uaddr, > + unsigned long, mask, > + int, nr, > + unsigned int, flags) > +{ Do you think we could have a if (!nr) return 0; here? Otherwise, calling futex_wake(&f, 0, flags) will wake 1 futex (if available), which is a strange undocumented behavior in my opinion. > + if (flags & ~FUTEX2_VALID_MASK) > + return -EINVAL; > + > + flags = futex2_to_flags(flags); > + if (!futex_flags_valid(flags)) > + return -EINVAL; > + > + if (!futex_validate_input(flags, mask)) > + return -EINVAL; > + > + return futex_wake(uaddr, flags, nr, mask); > +} > + > #ifdef CONFIG_COMPAT > COMPAT_SYSCALL_DEFINE2(set_robust_list, > struct compat_robust_list_head __user *, head, > --- a/kernel/sys_ni.c > +++ b/kernel/sys_ni.c > @@ -87,6 +87,7 @@ COND_SYSCALL_COMPAT(set_robust_list); > COND_SYSCALL(get_robust_list); > COND_SYSCALL_COMPAT(get_robust_list); > COND_SYSCALL(futex_waitv); > +COND_SYSCALL(futex_wake); > COND_SYSCALL(kexec_load); > COND_SYSCALL_COMPAT(kexec_load); > COND_SYSCALL(init_module); > >
On Wed, Aug 09, 2023 at 07:25:19PM -0300, André Almeida wrote: > Hi Peter, > > Em 07/08/2023 09:18, Peter Zijlstra escreveu: > > To complement sys_futex_waitv() add sys_futex_wake(). This syscall > > implements what was previously known as FUTEX_WAKE_BITSET except it > > uses 'unsigned long' for the bitmask and takes FUTEX2 flags. > > > > The 'unsigned long' allows FUTEX2_SIZE_U64 on 64bit platforms. > > > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> > > Acked-by: Geert Uytterhoeven <geert@linux-m68k.org> > > --- > > [...] > > > +/* > > + * sys_futex_wake - Wake a number of futexes > > + * @uaddr: Address of the futex(es) to wake > > + * @mask: bitmask > > + * @nr: Number of the futexes to wake > > + * @flags: FUTEX2 flags > > + * > > + * Identical to the traditional FUTEX_WAKE_BITSET op, except it is part of the > > + * futex2 family of calls. > > + */ > > + > > +SYSCALL_DEFINE4(futex_wake, > > + void __user *, uaddr, > > + unsigned long, mask, > > + int, nr, > > + unsigned int, flags) > > +{ > > Do you think we could have a > > if (!nr) > return 0; > > here? Otherwise, calling futex_wake(&f, 0, flags) will wake 1 futex (if > available), which is a strange undocumented behavior in my opinion. Oh 'cute' that.. yeah, but how about I put it ... > > + if (flags & ~FUTEX2_VALID_MASK) > > + return -EINVAL; > > + > > + flags = futex2_to_flags(flags); > > + if (!futex_flags_valid(flags)) > > + return -EINVAL; > > + > > + if (!futex_validate_input(flags, mask)) > > + return -EINVAL; here, because otherwise we get: sys_futex_wake(&f, 0xFFFF, 0, FUTEX2_SIZE_U8) to return 0, even though that is 'obviously' nonsensical and should return -EINVAL. Or even garbage flags would be 'accepted'. (because 0xFFFF is larger than U8 can accomodate) > > + > > + return futex_wake(uaddr, flags, nr, mask); > > +}
Em 10/08/2023 09:13, Peter Zijlstra escreveu: > On Wed, Aug 09, 2023 at 07:25:19PM -0300, André Almeida wrote: >> Hi Peter, >> >> Em 07/08/2023 09:18, Peter Zijlstra escreveu: >>> To complement sys_futex_waitv() add sys_futex_wake(). This syscall >>> implements what was previously known as FUTEX_WAKE_BITSET except it >>> uses 'unsigned long' for the bitmask and takes FUTEX2 flags. >>> >>> The 'unsigned long' allows FUTEX2_SIZE_U64 on 64bit platforms. >>> >>> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> >>> Acked-by: Geert Uytterhoeven <geert@linux-m68k.org> >>> --- >> >> [...] >> >>> +/* >>> + * sys_futex_wake - Wake a number of futexes >>> + * @uaddr: Address of the futex(es) to wake >>> + * @mask: bitmask >>> + * @nr: Number of the futexes to wake >>> + * @flags: FUTEX2 flags >>> + * >>> + * Identical to the traditional FUTEX_WAKE_BITSET op, except it is part of the >>> + * futex2 family of calls. >>> + */ >>> + >>> +SYSCALL_DEFINE4(futex_wake, >>> + void __user *, uaddr, >>> + unsigned long, mask, >>> + int, nr, >>> + unsigned int, flags) >>> +{ >> >> Do you think we could have a >> >> if (!nr) >> return 0; >> >> here? Otherwise, calling futex_wake(&f, 0, flags) will wake 1 futex (if >> available), which is a strange undocumented behavior in my opinion. > > Oh 'cute' that.. yeah, but how about I put it ... > >>> + if (flags & ~FUTEX2_VALID_MASK) >>> + return -EINVAL; >>> + >>> + flags = futex2_to_flags(flags); >>> + if (!futex_flags_valid(flags)) >>> + return -EINVAL; >>> + >>> + if (!futex_validate_input(flags, mask)) >>> + return -EINVAL; > > here, because otherwise we get: > > sys_futex_wake(&f, 0xFFFF, 0, FUTEX2_SIZE_U8) > > to return 0, even though that is 'obviously' nonsensical and should > return -EINVAL. Or even garbage flags would be 'accepted'. > > (because 0xFFFF is larger than U8 can accomodate) > That make sense to me, but we would also want to validate the value of f, if it's NULL or something strange to return -EINVAL... but this happens only inside get_futex_key()... To make this right, I think we would need to move this verification to the syscall validation part: if (unlikely((address % sizeof(u32)) != 0)) return -EINVAL; if (unlikely(!access_ok(uaddr, sizeof(u32)))) return -EFAULT; And have u32 replaced with the proper size being used. >>> + >>> + return futex_wake(uaddr, flags, nr, mask); >>> +}
--- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -491,3 +491,4 @@ 559 common futex_waitv sys_futex_waitv 560 common set_mempolicy_home_node sys_ni_syscall 561 common cachestat sys_cachestat +562 common futex_wake sys_futex_wake --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -465,3 +465,4 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common futex_wake sys_futex_wake --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -39,7 +39,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 452 +#define __NR_compat_syscalls 453 #endif #define __ARCH_WANT_SYS_CLONE --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -909,6 +909,8 @@ __SYSCALL(__NR_futex_waitv, sys_futex_wa __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) #define __NR_cachestat 451 __SYSCALL(__NR_cachestat, sys_cachestat) +#define __NR_futex_wake 452 +__SYSCALL(__NR_futex_wake, sys_futex_wake) /* * Please add new compat syscalls above this comment and update --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -372,3 +372,4 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common futex_wake sys_futex_wake --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -451,3 +451,4 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common futex_wake sys_futex_wake --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -457,3 +457,4 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common futex_wake sys_futex_wake --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -390,3 +390,4 @@ 449 n32 futex_waitv sys_futex_waitv 450 n32 set_mempolicy_home_node sys_set_mempolicy_home_node 451 n32 cachestat sys_cachestat +452 n32 futex_wake sys_futex_wake --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -366,3 +366,4 @@ 449 n64 futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 n64 cachestat sys_cachestat +452 n64 futex_wake sys_futex_wake --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -439,3 +439,4 @@ 449 o32 futex_waitv sys_futex_waitv 450 o32 set_mempolicy_home_node sys_set_mempolicy_home_node 451 o32 cachestat sys_cachestat +452 o32 futex_wake sys_futex_wake --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -450,3 +450,4 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common futex_wake sys_futex_wake --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -538,3 +538,4 @@ 449 common futex_waitv sys_futex_waitv 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common futex_wake sys_futex_wake --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -454,3 +454,4 @@ 449 common futex_waitv sys_futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat sys_cachestat +452 common futex_wake sys_futex_wake sys_futex_wake --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -454,3 +454,4 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common futex_wake sys_futex_wake --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -497,3 +497,4 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common futex_wake sys_futex_wake --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -456,3 +456,4 @@ 449 i386 futex_waitv sys_futex_waitv 450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node 451 i386 cachestat sys_cachestat +452 i386 futex_wake sys_futex_wake --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -373,6 +373,7 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common futex_wake sys_futex_wake # # Due to a historical design error, certain syscalls are numbered differently --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -422,3 +422,4 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common futex_wake sys_futex_wake --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -563,6 +563,9 @@ asmlinkage long sys_set_robust_list(stru asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes, unsigned int flags, struct __kernel_timespec __user *timeout, clockid_t clockid); + +asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -816,12 +816,13 @@ __SYSCALL(__NR_process_mrelease, sys_pro __SYSCALL(__NR_futex_waitv, sys_futex_waitv) #define __NR_set_mempolicy_home_node 450 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) - #define __NR_cachestat 451 __SYSCALL(__NR_cachestat, sys_cachestat) +#define __NR_futex_wake 452 +__SYSCALL(__NR_futex_wake, sys_futex_wake) #undef __NR_syscalls -#define __NR_syscalls 452 +#define __NR_syscalls 453 /* * 32 bit systems traditionally used different --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -306,6 +306,36 @@ SYSCALL_DEFINE5(futex_waitv, struct fute return ret; } +/* + * sys_futex_wake - Wake a number of futexes + * @uaddr: Address of the futex(es) to wake + * @mask: bitmask + * @nr: Number of the futexes to wake + * @flags: FUTEX2 flags + * + * Identical to the traditional FUTEX_WAKE_BITSET op, except it is part of the + * futex2 family of calls. + */ + +SYSCALL_DEFINE4(futex_wake, + void __user *, uaddr, + unsigned long, mask, + int, nr, + unsigned int, flags) +{ + if (flags & ~FUTEX2_VALID_MASK) + return -EINVAL; + + flags = futex2_to_flags(flags); + if (!futex_flags_valid(flags)) + return -EINVAL; + + if (!futex_validate_input(flags, mask)) + return -EINVAL; + + return futex_wake(uaddr, flags, nr, mask); +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(set_robust_list, struct compat_robust_list_head __user *, head, --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -87,6 +87,7 @@ COND_SYSCALL_COMPAT(set_robust_list); COND_SYSCALL(get_robust_list); COND_SYSCALL_COMPAT(get_robust_list); COND_SYSCALL(futex_waitv); +COND_SYSCALL(futex_wake); COND_SYSCALL(kexec_load); COND_SYSCALL_COMPAT(kexec_load); COND_SYSCALL(init_module);