Message ID | 20240717145018.3972922-2-yu.ma@intel.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | fs/file.c: optimize the critical section of file_lock in | expand |
Hello, kernel test robot noticed a 1.2% improvement of will-it-scale.per_process_ops on: commit: f1139c8e66d5c618aad04a93a2378ad9586464f9 ("[PATCH v5 1/3] fs/file.c: remove sanity_check and add likely/unlikely in alloc_fd()") url: https://github.com/intel-lab-lkp/linux/commits/Yu-Ma/fs-file-c-remove-sanity_check-and-add-likely-unlikely-in-alloc_fd/20240717-224830 base: https://git.kernel.org/cgit/linux/kernel/git/vfs/vfs.git vfs.all patch link: https://lore.kernel.org/all/20240717145018.3972922-2-yu.ma@intel.com/ patch subject: [PATCH v5 1/3] fs/file.c: remove sanity_check and add likely/unlikely in alloc_fd() testcase: will-it-scale test machine: 256 threads 2 sockets GENUINE INTEL(R) XEON(R) (Sierra Forest) with 128G memory parameters: nr_task: 100% mode: process test: dup1 cpufreq_governor: performance Details are as below: --------------------------------------------------------------------------------------------------> The kernel config and materials to reproduce are available at: https://download.01.org/0day-ci/archive/20240806/202408062146.832faa23-oliver.sang@intel.com ========================================================================================= compiler/cpufreq_governor/kconfig/mode/nr_task/rootfs/tbox_group/test/testcase: gcc-13/performance/x86_64-rhel-8.3/process/100%/debian-12-x86_64-20240206.cgz/lkp-srf-2sp1/dup1/will-it-scale commit: 5f30e082ab ("Merge branch 'vfs.iomap' into vfs.all") f1139c8e66 ("fs/file.c: remove sanity_check and add likely/unlikely in alloc_fd()") 5f30e082ab8b3431 f1139c8e66d5c618aad04a93a23 ---------------- --------------------------- %stddev %change %stddev \ | \ 377983 ± 69% +74.1% 658036 ± 17% numa-meminfo.node0.AnonPages 18.17 ± 10% -48.6% 9.33 ± 35% perf-c2c.DRAM.local 8.796e+08 +1.2% 8.903e+08 will-it-scale.256.processes 3436082 +1.2% 3477810 will-it-scale.per_process_ops 8.796e+08 +1.2% 8.903e+08 will-it-scale.workload 1.517e+11 -4.3% 1.452e+11 perf-stat.i.branch-instructions 0.03 ± 8% +0.0 0.04 ± 36% perf-stat.i.branch-miss-rate% 0.93 +3.9% 0.96 perf-stat.i.cpi 7.13e+11 -3.5% 6.88e+11 perf-stat.i.instructions 1.08 -3.4% 1.04 perf-stat.i.ipc 0.93 +3.4% 0.96 perf-stat.overall.cpi 1.08 -3.3% 1.04 perf-stat.overall.ipc 245130 -4.4% 234451 perf-stat.overall.path-length 1.512e+11 -4.3% 1.447e+11 perf-stat.ps.branch-instructions 7.106e+11 -3.5% 6.857e+11 perf-stat.ps.instructions 2.156e+14 -3.2% 2.087e+14 perf-stat.total.instructions 14.90 -0.7 14.20 perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.dup 12.01 -0.7 11.32 perf-profile.calltrace.cycles-pp.__x64_sys_dup.do_syscall_64.entry_SYSCALL_64_after_hwframe.dup 16.54 -0.7 15.88 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.dup 6.44 -0.6 5.89 perf-profile.calltrace.cycles-pp.alloc_fd.__x64_sys_dup.do_syscall_64.entry_SYSCALL_64_after_hwframe.dup 2.86 -0.0 2.82 perf-profile.calltrace.cycles-pp.entry_SYSRETQ_unsafe_stack.__close 8.94 -0.0 8.90 perf-profile.calltrace.cycles-pp.filp_flush.__x64_sys_close.do_syscall_64.entry_SYSCALL_64_after_hwframe.__close 7.76 -0.0 7.72 perf-profile.calltrace.cycles-pp.locks_remove_posix.filp_flush.__x64_sys_close.do_syscall_64.entry_SYSCALL_64_after_hwframe 2.58 -0.0 2.54 perf-profile.calltrace.cycles-pp.__fput_sync.__x64_sys_close.do_syscall_64.entry_SYSCALL_64_after_hwframe.__close 1.11 -0.0 1.10 perf-profile.calltrace.cycles-pp.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.__close 1.33 +0.0 1.35 perf-profile.calltrace.cycles-pp.testcase 0.54 +0.0 0.56 perf-profile.calltrace.cycles-pp.x64_sys_call.do_syscall_64.entry_SYSCALL_64_after_hwframe.__close 0.79 +0.0 0.82 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_safe_stack.dup 1.33 +0.0 1.37 perf-profile.calltrace.cycles-pp.close@plt 2.73 +0.1 2.78 perf-profile.calltrace.cycles-pp._raw_spin_lock.file_close_fd.__x64_sys_close.do_syscall_64.entry_SYSCALL_64_after_hwframe 1.05 +0.1 1.11 perf-profile.calltrace.cycles-pp.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.dup 4.35 +0.1 4.42 perf-profile.calltrace.cycles-pp.file_close_fd.__x64_sys_close.do_syscall_64.entry_SYSCALL_64_after_hwframe.__close 22.18 +0.3 22.51 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64.__close 21.50 ± 2% +1.5 23.02 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64.dup 12.10 -0.7 11.39 perf-profile.children.cycles-pp.__x64_sys_dup 34.79 -0.7 34.12 perf-profile.children.cycles-pp.do_syscall_64 38.04 -0.6 37.42 perf-profile.children.cycles-pp.entry_SYSCALL_64_after_hwframe 6.48 -0.6 5.90 perf-profile.children.cycles-pp.alloc_fd 1.86 -0.5 1.41 perf-profile.children.cycles-pp.syscall_return_via_sysret 0.57 -0.1 0.47 perf-profile.children.cycles-pp.fd_install 9.11 -0.0 9.07 perf-profile.children.cycles-pp.filp_flush 7.93 -0.0 7.89 perf-profile.children.cycles-pp.locks_remove_posix 2.61 -0.0 2.58 perf-profile.children.cycles-pp.__fput_sync 1.16 +0.0 1.18 perf-profile.children.cycles-pp.x64_sys_call 0.05 +0.0 0.07 ± 13% perf-profile.children.cycles-pp.clockevents_program_event 0.51 +0.0 0.53 perf-profile.children.cycles-pp.syscall_exit_to_user_mode_prepare 2.17 +0.0 2.20 perf-profile.children.cycles-pp.syscall_exit_to_user_mode 5.72 +0.0 5.75 perf-profile.children.cycles-pp._raw_spin_lock 2.10 +0.0 2.13 perf-profile.children.cycles-pp.testcase 2.02 +0.0 2.06 perf-profile.children.cycles-pp.entry_SYSCALL_64_safe_stack 0.13 ± 2% +0.0 0.17 perf-profile.children.cycles-pp.dup@plt 4.38 +0.1 4.46 perf-profile.children.cycles-pp.file_close_fd 23.00 +0.1 23.11 perf-profile.children.cycles-pp.entry_SYSRETQ_unsafe_stack 59.27 +0.5 59.73 perf-profile.children.cycles-pp.__close 28.73 +1.1 29.80 perf-profile.children.cycles-pp.entry_SYSCALL_64 1.86 -0.5 1.41 perf-profile.self.cycles-pp.syscall_return_via_sysret 2.28 -0.2 2.12 perf-profile.self.cycles-pp.alloc_fd 0.54 -0.1 0.43 perf-profile.self.cycles-pp.fd_install 7.87 -0.0 7.83 perf-profile.self.cycles-pp.locks_remove_posix 2.47 -0.0 2.44 perf-profile.self.cycles-pp.__fput_sync 1.23 +0.0 1.24 perf-profile.self.cycles-pp.file_close_fd_locked 1.09 +0.0 1.11 perf-profile.self.cycles-pp.x64_sys_call 0.51 +0.0 0.53 perf-profile.self.cycles-pp.syscall_exit_to_user_mode_prepare 1.29 +0.0 1.32 perf-profile.self.cycles-pp.testcase 5.66 +0.0 5.69 perf-profile.self.cycles-pp._raw_spin_lock 1.95 +0.0 1.99 perf-profile.self.cycles-pp.entry_SYSCALL_64_safe_stack 2.85 +0.0 2.90 perf-profile.self.cycles-pp.entry_SYSCALL_64_after_hwframe 0.02 ±141% +0.0 0.06 ± 13% perf-profile.self.cycles-pp.ktime_get 0.00 +0.1 0.07 perf-profile.self.cycles-pp.dup@plt 22.93 +0.1 23.05 perf-profile.self.cycles-pp.entry_SYSRETQ_unsafe_stack 10.11 +0.2 10.34 perf-profile.self.cycles-pp.dup 13.70 +0.3 13.98 perf-profile.self.cycles-pp.entry_SYSCALL_64 9.84 ± 3% +0.7 10.51 perf-profile.self.cycles-pp.__close Disclaimer: Results have been estimated based on internal Intel analysis and are provided for informational purposes only. Any difference in system hardware or software design or configuration may affect actual performance.
On Wed, Jul 17, 2024 at 10:50:16AM -0400, Yu Ma wrote: > alloc_fd() has a sanity check inside to make sure the struct file mapping to the > allocated fd is NULL. Remove this sanity check since it can be assured by > exisitng zero initilization and NULL set when recycling fd. Meanwhile, add > likely/unlikely and expand_file() call avoidance to reduce the work under > file_lock. > + if (unlikely(fd >= fdt->max_fds)) { > + error = expand_files(files, fd); > + if (error < 0) > + goto out; > > - /* > - * If we needed to expand the fs array we > - * might have blocked - try again. > - */ > - if (error) > - goto repeat; > + /* > + * If we needed to expand the fs array we > + * might have blocked - try again. > + */ > + if (error) > + goto repeat; With that change you can't get 0 from expand_files() here, so the last goto should be unconditional. The only case when expand_files() returns 0 is when it has found the descriptor already being covered by fdt; since fdt->max_fds is stabilized by ->files_lock we are holding here, comparison in expand_files() will give the same result as it just had. IOW, that goto repeat should be unconditional. The fun part here is that this was the only caller that distinguished between 0 and 1...
On 8/15/2024 5:38 AM, Al Viro wrote: > On Wed, Jul 17, 2024 at 10:50:16AM -0400, Yu Ma wrote: >> alloc_fd() has a sanity check inside to make sure the struct file mapping to the >> allocated fd is NULL. Remove this sanity check since it can be assured by >> exisitng zero initilization and NULL set when recycling fd. Meanwhile, add >> likely/unlikely and expand_file() call avoidance to reduce the work under >> file_lock. >> + if (unlikely(fd >= fdt->max_fds)) { >> + error = expand_files(files, fd); >> + if (error < 0) >> + goto out; >> >> - /* >> - * If we needed to expand the fs array we >> - * might have blocked - try again. >> - */ >> - if (error) >> - goto repeat; >> + /* >> + * If we needed to expand the fs array we >> + * might have blocked - try again. >> + */ >> + if (error) >> + goto repeat; > With that change you can't get 0 from expand_files() here, so the > last goto should be unconditional. The only case when expand_files() > returns 0 is when it has found the descriptor already being covered > by fdt; since fdt->max_fds is stabilized by ->files_lock we are > holding here, comparison in expand_files() will give the same > result as it just had. > > IOW, that goto repeat should be unconditional. The fun part here is > that this was the only caller that distinguished between 0 and 1... Yes, thanks Al, fully agree with you. The if (error) could be removed here as the above unlikely would make sure no 0 return here. Should I submit another version of patch set to update it, or you may help to update it directly during merge? I'm not very familiar with the rules, please let me know if I'd update and I'll take action soon. Thanks again for your careful check here.
On Thu, Aug 15, 2024 at 10:49:40AM +0800, Ma, Yu wrote: > Yes, thanks Al, fully agree with you. The if (error) could be removed here > as the above unlikely would make sure no 0 return here. Should I submit > another version of patch set to update it, or you may help to update it > directly during merge? I'm not very familiar with the rules, please let me > know if I'd update and I'll take action soon. Thanks again for your careful > check here. See the current work.fdtable...
On 8/15/2024 11:45 AM, Al Viro wrote: > On Thu, Aug 15, 2024 at 10:49:40AM +0800, Ma, Yu wrote: > >> Yes, thanks Al, fully agree with you. The if (error) could be removed here >> as the above unlikely would make sure no 0 return here. Should I submit >> another version of patch set to update it, or you may help to update it >> directly during merge? I'm not very familiar with the rules, please let me >> know if I'd update and I'll take action soon. Thanks again for your careful >> check here. > See the current work.fdtable... Saw it, thanks :)
On Thu, Aug 15, 2024 at 5:45 AM Al Viro <viro@zeniv.linux.org.uk> wrote: > > On Thu, Aug 15, 2024 at 10:49:40AM +0800, Ma, Yu wrote: > > > Yes, thanks Al, fully agree with you. The if (error) could be removed here > > as the above unlikely would make sure no 0 return here. Should I submit > > another version of patch set to update it, or you may help to update it > > directly during merge? I'm not very familiar with the rules, please let me > > know if I'd update and I'll take action soon. Thanks again for your careful > > check here. > > See the current work.fdtable... this patchset seems to have fallen through the cracks. anything holding up the merge?
On Thu, Oct 31, 2024 at 08:42:18AM +0100, Mateusz Guzik wrote: > On Thu, Aug 15, 2024 at 5:45 AM Al Viro <viro@zeniv.linux.org.uk> wrote: > > > > On Thu, Aug 15, 2024 at 10:49:40AM +0800, Ma, Yu wrote: > > > > > Yes, thanks Al, fully agree with you. The if (error) could be removed here > > > as the above unlikely would make sure no 0 return here. Should I submit > > > another version of patch set to update it, or you may help to update it > > > directly during merge? I'm not very familiar with the rules, please let me > > > know if I'd update and I'll take action soon. Thanks again for your careful > > > check here. > > > > See the current work.fdtable... > > this patchset seems to have fallen through the cracks. anything > holding up the merge? It's in next for this merge window.
diff --git a/fs/file.c b/fs/file.c index a3b72aa64f11..e1b9d6df7941 100644 --- a/fs/file.c +++ b/fs/file.c @@ -515,7 +515,7 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) if (fd < files->next_fd) fd = files->next_fd; - if (fd < fdt->max_fds) + if (likely(fd < fdt->max_fds)) fd = find_next_fd(fdt, fd); /* @@ -523,19 +523,21 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) * will limit the total number of files that can be opened. */ error = -EMFILE; - if (fd >= end) + if (unlikely(fd >= end)) goto out; - error = expand_files(files, fd); - if (error < 0) - goto out; + if (unlikely(fd >= fdt->max_fds)) { + error = expand_files(files, fd); + if (error < 0) + goto out; - /* - * If we needed to expand the fs array we - * might have blocked - try again. - */ - if (error) - goto repeat; + /* + * If we needed to expand the fs array we + * might have blocked - try again. + */ + if (error) + goto repeat; + } if (start <= files->next_fd) files->next_fd = fd + 1; @@ -546,13 +548,6 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) else __clear_close_on_exec(fd, fdt); error = fd; -#if 1 - /* Sanity check */ - if (rcu_access_pointer(fdt->fd[fd]) != NULL) { - printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); - rcu_assign_pointer(fdt->fd[fd], NULL); - } -#endif out: spin_unlock(&files->file_lock); @@ -618,7 +613,7 @@ void fd_install(unsigned int fd, struct file *file) rcu_read_unlock_sched(); spin_lock(&files->file_lock); fdt = files_fdtable(files); - BUG_ON(fdt->fd[fd] != NULL); + WARN_ON(fdt->fd[fd] != NULL); rcu_assign_pointer(fdt->fd[fd], file); spin_unlock(&files->file_lock); return;