@@ -65,21 +65,21 @@ static void file_free_rcu(struct rcu_head *head)
{
struct file *f = container_of(head, struct file, f_rcuhead);
- put_cred(f->f_cred);
- if (unlikely(f->f_mode & FMODE_BACKING))
- kfree(backing_file(f));
- else
- kmem_cache_free(filp_cachep, f);
+ kfree(backing_file(f));
}
static inline void file_free(struct file *f)
{
security_file_free(f);
- if (unlikely(f->f_mode & FMODE_BACKING))
- path_put(backing_file_real_path(f));
if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
percpu_counter_dec(&nr_files);
- call_rcu(&f->f_rcuhead, file_free_rcu);
+ put_cred(f->f_cred);
+ if (unlikely(f->f_mode & FMODE_BACKING)) {
+ path_put(backing_file_real_path(f));
+ call_rcu(&f->f_rcuhead, file_free_rcu);
+ } else {
+ kmem_cache_free(filp_cachep, f);
+ }
}
/*
@@ -471,7 +471,8 @@ EXPORT_SYMBOL(__fput_sync);
void __init files_init(void)
{
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
- SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT, NULL);
+ SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN
+ | SLAB_PANIC | SLAB_ACCOUNT, NULL);
percpu_counter_init(&nr_files, 0, GFP_KERNEL);
}
Sapphire Rapids, open1_processes -t 1 from will-it-scale + tmpfs on
/tmp (ops/s):
before: 1539109
after: 1785908 (+16%)
there was also a speed up for negative entries but the above should be
enough for the commit message and I don't want to duplicate the testcase
between them
Below is my rebased patch + rewritten commit message with updated bench
results. I decided to stick to fput_badopen name because with your patch
it legitimately has to unref. Naming that "release_empty_file" or
whatever would be rather misleading imho.
===================== cut here =====================
vfs: avoid delegating to task_work when cleaning up failed open
Failed opens (mostly ENOENT) legitimately happen a lot, for example here
are stats from stracing kernel build for few seconds (strace -fc make):
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ------------------
0.76 0.076233 5 15040 3688 openat
(this is tons of header files tried in different paths)
Normally these are closed from task_work machinery, but getting there is
very expensive (see 021a160abf62 ("fs: use __fput_sync in close(2)") and
in the common case trivially avoidable.
Benchmarked with will-it-scale with a custom testcase based on
tests/open1.c, stuffed into tests/openneg.c:
[snip]
while (1) {
int fd = open("/tmp/nonexistent", O_RDONLY);
assert(fd == -1);
(*iterations)++;
}
[/snip]
Sapphire Rapids, openneg_processes -t 1 (ops/s):
before: 2299006
after: 2986226 (+29%)
v3:
- rebase on top of the patch which dodges RCU freeing altogether. the
patch is no longer applicable on top of stock kernel.
v2:
- unexport fput_badopen and move to fs/internal.h
- handle the refcount with cmpxchg, adjust commentary accordingly
- tweak the commit message
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
---
fs/file_table.c | 22 ++++++++++++++++++++++
fs/internal.h | 2 ++
fs/namei.c | 2 +-
3 files changed, 25 insertions(+), 1 deletion(-)
@@ -468,6 +468,28 @@ void __fput_sync(struct file *file)
EXPORT_SYMBOL(fput);
EXPORT_SYMBOL(__fput_sync);
+/*
+ * Clean up after failing to open (e.g., open(2) returns with -ENOENT).
+ *
+ * In the common case this avoids delegating the free to task_work.
+ */
+void fput_badopen(struct file *file)
+{
+ if (unlikely(file->f_mode & FMODE_OPENED)) {
+ fput(file);
+ return;
+ }
+
+ /*
+ * While we did not expose the file to anyone, we may be racing against
+ * __fget_files_rcu refing a stale object. Should this happen it is
+ * going to backpedal with fput, but it means we have to unref with an
+ * atomic to synchronize against it.
+ */
+ if (atomic_long_dec_and_test(&file->f_count))
+ file_free(file);
+}
+
void __init files_init(void)
{
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
@@ -95,6 +95,8 @@ struct file *alloc_empty_file(int flags, const struct cred *cred);
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
+void fput_badopen(struct file *);
+
static inline void put_file_access(struct file *file)
{
if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) {
@@ -3802,7 +3802,7 @@ static struct file *path_openat(struct nameidata *nd,
WARN_ON(1);
error = -EINVAL;
}
- fput(file);
+ fput_badopen(file);
if (error == -EOPENSTALE) {
if (flags & LOOKUP_RCU)
error = -ECHILD;