diff mbox series

[v1] shmem: stable directory cookies

Message ID 168175931561.2843.16288612382874559384.stgit@manet.1015granger.net (mailing list archive)
State New
Headers show
Series [v1] shmem: stable directory cookies | expand

Commit Message

Chuck Lever April 17, 2023, 7:23 p.m. UTC
From: Chuck Lever <chuck.lever@oracle.com>

The current cursor-based directory cookie mechanism doesn't work
when a tmpfs filesystem is exported via NFS. This is because NFS
clients do not open directories: each READDIR operation has to open
the directory on the server, read it, then close it. The cursor
state for that directory, being associated strictly with the opened
struct file, is then discarded.

Directory cookies are cached not only by NFS clients, but also by
user space libraries on those clients. Essentially there is no way
to invalidate those caches when directory offsets have changed on
an NFS server after the offset-to-dentry mapping changes.

The solution we've come up with is to make the directory cookie for
each file in a tmpfs filesystem stable for the life of the directory
entry it represents.

Add a per-directory xarray. shmem_readdir() uses this to map each
directory offset (an loff_t integer) to the memory address of a
struct dentry.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/shmem_fs.h |    2 
 mm/shmem.c               |  213 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 201 insertions(+), 14 deletions(-)

Changes since RFC:
- Destroy xarray in shmem_destroy_inode() instead of free_in_core_inode()
- A few cosmetic updates

Comments

Jeff Layton April 20, 2023, 6:52 p.m. UTC | #1
On Mon, 2023-04-17 at 15:23 -0400, Chuck Lever wrote:
> From: Chuck Lever <chuck.lever@oracle.com>
> 
> The current cursor-based directory cookie mechanism doesn't work
> when a tmpfs filesystem is exported via NFS. This is because NFS
> clients do not open directories: each READDIR operation has to open
> the directory on the server, read it, then close it. The cursor
> state for that directory, being associated strictly with the opened
> struct file, is then discarded.
> 
> Directory cookies are cached not only by NFS clients, but also by
> user space libraries on those clients. Essentially there is no way
> to invalidate those caches when directory offsets have changed on
> an NFS server after the offset-to-dentry mapping changes.
> 
> The solution we've come up with is to make the directory cookie for
> each file in a tmpfs filesystem stable for the life of the directory
> entry it represents.
> 
> Add a per-directory xarray. shmem_readdir() uses this to map each
> directory offset (an loff_t integer) to the memory address of a
> struct dentry.
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  include/linux/shmem_fs.h |    2 
>  mm/shmem.c               |  213 +++++++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 201 insertions(+), 14 deletions(-)
> 
> Changes since RFC:
> - Destroy xarray in shmem_destroy_inode() instead of free_in_core_inode()
> - A few cosmetic updates
> 
> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
> index 103d1000a5a2..682ef885aa89 100644
> --- a/include/linux/shmem_fs.h
> +++ b/include/linux/shmem_fs.h
> @@ -26,6 +26,8 @@ struct shmem_inode_info {
>  	atomic_t		stop_eviction;	/* hold when working on inode */
>  	struct timespec64	i_crtime;	/* file creation time */
>  	unsigned int		fsflags;	/* flags for FS_IOC_[SG]ETFLAGS */
> +	struct xarray		doff_map;	/* dir offset to entry mapping */
> +	u32			next_doff;
>  	struct inode		vfs_inode;
>  };
>  
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 448f393d8ab2..ba4176499e5c 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -40,6 +40,8 @@
>  #include <linux/fs_parser.h>
>  #include <linux/swapfile.h>
>  #include <linux/iversion.h>
> +#include <linux/xarray.h>
> +
>  #include "swap.h"
>  
>  static struct vfsmount *shm_mnt;
> @@ -234,6 +236,7 @@ static const struct super_operations shmem_ops;
>  const struct address_space_operations shmem_aops;
>  static const struct file_operations shmem_file_operations;
>  static const struct inode_operations shmem_inode_operations;
> +static const struct file_operations shmem_dir_operations;
>  static const struct inode_operations shmem_dir_inode_operations;
>  static const struct inode_operations shmem_special_inode_operations;
>  static const struct vm_operations_struct shmem_vm_ops;
> @@ -2397,7 +2400,9 @@ static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block
>  			/* Some things misbehave if size == 0 on a directory */
>  			inode->i_size = 2 * BOGO_DIRENT_SIZE;
>  			inode->i_op = &shmem_dir_inode_operations;
> -			inode->i_fop = &simple_dir_operations;
> +			inode->i_fop = &shmem_dir_operations;
> +			xa_init_flags(&info->doff_map, XA_FLAGS_ALLOC1);
> +			info->next_doff = 0;
>  			break;
>  		case S_IFLNK:
>  			/*
> @@ -2917,6 +2922,71 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
>  	return 0;
>  }
>  
> +static struct xarray *shmem_doff_map(struct inode *dir)
> +{
> +	return &SHMEM_I(dir)->doff_map;
> +}
> +
> +static int shmem_doff_add(struct inode *dir, struct dentry *dentry)
> +{
> +	struct shmem_inode_info *info = SHMEM_I(dir);
> +	struct xa_limit limit = XA_LIMIT(2, U32_MAX);
> +	u32 offset;
> +	int ret;
> +
> +	if (dentry->d_fsdata)
> +		return -EBUSY;
> +
> +	offset = 0;
> +	ret = xa_alloc_cyclic(shmem_doff_map(dir), &offset, dentry, limit,
> +			      &info->next_doff, GFP_KERNEL);
> +	if (ret < 0)
> +		return ret;
> +
> +	dentry->d_fsdata = (void *)(unsigned long)offset;
> +	return 0;
> +}
> +
> +static struct dentry *shmem_doff_find_after(struct dentry *dir,
> +					    unsigned long *offset)
> +{
> +	struct xarray *xa = shmem_doff_map(d_inode(dir));
> +	struct dentry *d, *found = NULL;
> +
> +	spin_lock(&dir->d_lock);
> +	d = xa_find_after(xa, offset, ULONG_MAX, XA_PRESENT);
> +	if (d) {
> +		spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
> +		if (simple_positive(d))
> +			found = dget_dlock(d);
> +		spin_unlock(&d->d_lock);
> +	}
> +	spin_unlock(&dir->d_lock);

This part is kind of gross, but I think I get it now...

You have to take dir->d_lock to ensure that "d" doesn't go away when you
don't hold a ref on it, and you need the child's d_lock to ensure that
simple_positive result is stable while you take a reference (because
doing a dput there could be problematic). If that's right, then that's a
bit subtle, and might deserve a nice comment.

I do wonder if there is some way to do this with RCU instead, but this
seems to work well enough.

> +	return found;
> +}
> +
> +static void shmem_doff_remove(struct inode *dir, struct dentry *dentry)
> +{
> +	u32 offset = (u32)(unsigned long)dentry->d_fsdata;
> +
> +	if (!offset)
> +		return;
> +
> +	xa_erase(shmem_doff_map(dir), offset);
> +	dentry->d_fsdata = NULL;
> +}
> +
> +/*
> + * During fs teardown (eg. umount), a directory's doff_map might still
> + * contain entries. xa_destroy() cleans out anything that remains.
> + */
> +static void shmem_doff_map_destroy(struct inode *inode)
> +{
> +	struct xarray *xa = shmem_doff_map(inode);
> +
> +	xa_destroy(xa);
> +}
> +
>  /*
>   * File creation. Allocate an inode, and we're done..
>   */
> @@ -2938,6 +3008,10 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
>  		if (error && error != -EOPNOTSUPP)
>  			goto out_iput;
>  
> +		error = shmem_doff_add(dir, dentry);
> +		if (error)
> +			goto out_iput;
> +
>  		error = 0;
>  		dir->i_size += BOGO_DIRENT_SIZE;
>  		dir->i_ctime = dir->i_mtime = current_time(dir);
> @@ -3015,6 +3089,10 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
>  			goto out;
>  	}
>  
> +	ret = shmem_doff_add(dir, dentry);
> +	if (ret)
> +		goto out;
> +
>  	dir->i_size += BOGO_DIRENT_SIZE;
>  	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
>  	inode_inc_iversion(dir);
> @@ -3033,6 +3111,8 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
>  	if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
>  		shmem_free_inode(inode->i_sb);
>  
> +	shmem_doff_remove(dir, dentry);
> +
>  	dir->i_size -= BOGO_DIRENT_SIZE;
>  	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
>  	inode_inc_iversion(dir);
> @@ -3091,24 +3171,37 @@ static int shmem_rename2(struct mnt_idmap *idmap,
>  {
>  	struct inode *inode = d_inode(old_dentry);
>  	int they_are_dirs = S_ISDIR(inode->i_mode);
> +	int error;
>  
>  	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
>  		return -EINVAL;
>  
> -	if (flags & RENAME_EXCHANGE)
> +	if (flags & RENAME_EXCHANGE) {
> +		shmem_doff_remove(old_dir, old_dentry);
> +		shmem_doff_remove(new_dir, new_dentry);
> +		error = shmem_doff_add(new_dir, old_dentry);
> +		if (error)
> +			return error;
> +		error = shmem_doff_add(old_dir, new_dentry);
> +		if (error)
> +			return error;
>  		return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
> +	}
>  
>  	if (!simple_empty(new_dentry))
>  		return -ENOTEMPTY;
>  
>  	if (flags & RENAME_WHITEOUT) {
> -		int error;
> -
>  		error = shmem_whiteout(idmap, old_dir, old_dentry);
>  		if (error)
>  			return error;
>  	}
>  
> +	shmem_doff_remove(old_dir, old_dentry);
> +	error = shmem_doff_add(new_dir, old_dentry);
> +	if (error)
> +		return error;
> +
>  	if (d_really_is_positive(new_dentry)) {
>  		(void) shmem_unlink(new_dir, new_dentry);
>  		if (they_are_dirs) {
> @@ -3149,26 +3242,22 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
>  
>  	error = security_inode_init_security(inode, dir, &dentry->d_name,
>  					     shmem_initxattrs, NULL);
> -	if (error && error != -EOPNOTSUPP) {
> -		iput(inode);
> -		return error;
> -	}
> +	if (error && error != -EOPNOTSUPP)
> +		goto out_iput;
>  
>  	inode->i_size = len-1;
>  	if (len <= SHORT_SYMLINK_LEN) {
>  		inode->i_link = kmemdup(symname, len, GFP_KERNEL);
>  		if (!inode->i_link) {
> -			iput(inode);
> -			return -ENOMEM;
> +			error = -ENOMEM;
> +			goto out_iput;
>  		}
>  		inode->i_op = &shmem_short_symlink_operations;
>  	} else {
>  		inode_nohighmem(inode);
>  		error = shmem_get_folio(inode, 0, &folio, SGP_WRITE);
> -		if (error) {
> -			iput(inode);
> -			return error;
> -		}
> +		if (error)
> +			goto out_iput;
>  		inode->i_mapping->a_ops = &shmem_aops;
>  		inode->i_op = &shmem_symlink_inode_operations;
>  		memcpy(folio_address(folio), symname, len);
> @@ -3177,12 +3266,20 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
>  		folio_unlock(folio);
>  		folio_put(folio);
>  	}
> +
> +	error = shmem_doff_add(dir, dentry);
> +	if (error)
> +		goto out_iput;
> +
>  	dir->i_size += BOGO_DIRENT_SIZE;
>  	dir->i_ctime = dir->i_mtime = current_time(dir);
>  	inode_inc_iversion(dir);
>  	d_instantiate(dentry, inode);
>  	dget(dentry);
>  	return 0;
> +out_iput:
> +	iput(inode);
> +	return error;
>  }
>  
>  static void shmem_put_link(void *arg)
> @@ -3224,6 +3321,77 @@ static const char *shmem_get_link(struct dentry *dentry,
>  	return folio_address(folio);
>  }
>  
> +static loff_t shmem_dir_llseek(struct file *file, loff_t offset, int whence)
> +{
> +	switch (whence) {
> +	case SEEK_CUR:
> +		offset += file->f_pos;
> +		fallthrough;
> +	case SEEK_SET:
> +		if (offset >= 0)
> +			break;
> +		fallthrough;
> +	default:
> +		return -EINVAL;
> +	}
> +	return vfs_setpos(file, offset, U32_MAX);
> +}
> +
> +static bool shmem_dir_emit(struct dir_context *ctx, struct dentry *dentry)
> +{
> +	struct inode *inode = d_inode(dentry);
> +
> +	return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len,
> +			  (loff_t)dentry->d_fsdata, inode->i_ino,
> +			  fs_umode_to_dtype(inode->i_mode));
> +}
> +
> +/**
> + * shmem_readdir - Emit entries starting at offset @ctx->pos
> + * @file: an open directory to iterate over
> + * @ctx: directory iteration context
> + *
> + * Caller must hold @file's i_rwsem to prevent insertion or removal of
> + * entries during this call.
> + *
> + * On entry, @ctx->pos contains an offset that represents the first entry
> + * to be read from the directory.
> + *
> + * The operation continues until there are no more entries to read, or
> + * until the ctx->actor indicates there is no more space in the caller's
> + * output buffer.
> + *
> + * On return, @ctx->pos contains an offset that will read the next entry
> + * in this directory when shmem_readdir() is called again with @ctx.
> + *
> + * Return values:
> + *   %0 - Complete
> + */
> +static int shmem_readdir(struct file *file, struct dir_context *ctx)
> +{
> +	struct dentry *dentry, *dir = file->f_path.dentry;
> +	unsigned long offset;
> +
> +	lockdep_assert_held(&d_inode(dir)->i_rwsem);

You probably don't need the above. This is called via ->iterate_shared
so the lock had _better_ be held.

 
> +
> +	if (!dir_emit_dots(file, ctx))
> +		goto out;
> +	for (offset = ctx->pos - 1; offset < ULONG_MAX - 1;) {
> +		dentry = shmem_doff_find_after(dir, &offset);
> +		if (!dentry)
> +			break;
> +		if (!shmem_dir_emit(ctx, dentry)) {
> +			dput(dentry);
> +			break;
> +		}
> +		ctx->pos = offset + 1;
> +		dput(dentry);
> +	}
> +
> +out:
> +	return 0;
> +}
> +
>  #ifdef CONFIG_TMPFS_XATTR
>  
>  static int shmem_fileattr_get(struct dentry *dentry, struct fileattr *fa)
> @@ -3742,6 +3910,12 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
>  	return 0;
>  }
>  
> +#else /* CONFIG_TMPFS */
> +
> +static inline void shmem_doff_map_destroy(struct inode *dir)
> +{
> +}
> +
>  #endif /* CONFIG_TMPFS */
>  
>  static void shmem_put_super(struct super_block *sb)
> @@ -3888,6 +4062,8 @@ static void shmem_destroy_inode(struct inode *inode)
>  {
>  	if (S_ISREG(inode->i_mode))
>  		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
> +	if (S_ISDIR(inode->i_mode))
> +		shmem_doff_map_destroy(inode);
>  }
>  
>  static void shmem_init_inode(void *foo)
> @@ -3955,6 +4131,15 @@ static const struct inode_operations shmem_inode_operations = {
>  #endif
>  };
>  
> +static const struct file_operations shmem_dir_operations = {
> +#ifdef CONFIG_TMPFS
> +	.llseek		= shmem_dir_llseek,
> +	.iterate_shared	= shmem_readdir,
> +#endif
> +	.read		= generic_read_dir,
> +	.fsync		= noop_fsync,
> +};
> +
>  static const struct inode_operations shmem_dir_inode_operations = {
>  #ifdef CONFIG_TMPFS
>  	.getattr	= shmem_getattr,
> 
> 

Other than the nits above, this all looks fine to me. I've done some
testing with this series too and it all seems to work as expected, and
fixes some nasty problems when trying to recursively remove directories
via nfsd.

Have you done any performance testing? My expectation would be that
you'd have roughly similar (or even faster) performance with this set,
but at the expense of a bit of memory (for the xarrays).

One thing we could consider is lifting the bulk of this code into libfs,
so other shmem-like filesystems can take advantage of it, but that work
could be done later too when we have another proposed consumer.
Chuck Lever April 20, 2023, 8:12 p.m. UTC | #2
> On Apr 20, 2023, at 2:52 PM, Jeff Layton <jlayton@kernel.org> wrote:
> 
> On Mon, 2023-04-17 at 15:23 -0400, Chuck Lever wrote:
>> From: Chuck Lever <chuck.lever@oracle.com>
>> 
>> The current cursor-based directory cookie mechanism doesn't work
>> when a tmpfs filesystem is exported via NFS. This is because NFS
>> clients do not open directories: each READDIR operation has to open
>> the directory on the server, read it, then close it. The cursor
>> state for that directory, being associated strictly with the opened
>> struct file, is then discarded.
>> 
>> Directory cookies are cached not only by NFS clients, but also by
>> user space libraries on those clients. Essentially there is no way
>> to invalidate those caches when directory offsets have changed on
>> an NFS server after the offset-to-dentry mapping changes.
>> 
>> The solution we've come up with is to make the directory cookie for
>> each file in a tmpfs filesystem stable for the life of the directory
>> entry it represents.
>> 
>> Add a per-directory xarray. shmem_readdir() uses this to map each
>> directory offset (an loff_t integer) to the memory address of a
>> struct dentry.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>> include/linux/shmem_fs.h |    2 
>> mm/shmem.c               |  213 +++++++++++++++++++++++++++++++++++++++++++---
>> 2 files changed, 201 insertions(+), 14 deletions(-)
>> 
>> Changes since RFC:
>> - Destroy xarray in shmem_destroy_inode() instead of free_in_core_inode()
>> - A few cosmetic updates
>> 
>> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
>> index 103d1000a5a2..682ef885aa89 100644
>> --- a/include/linux/shmem_fs.h
>> +++ b/include/linux/shmem_fs.h
>> @@ -26,6 +26,8 @@ struct shmem_inode_info {
>> atomic_t stop_eviction; /* hold when working on inode */
>> struct timespec64 i_crtime; /* file creation time */
>> unsigned int fsflags; /* flags for FS_IOC_[SG]ETFLAGS */
>> + struct xarray doff_map; /* dir offset to entry mapping */
>> + u32 next_doff;
>> struct inode vfs_inode;
>> };
>> 
>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index 448f393d8ab2..ba4176499e5c 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -40,6 +40,8 @@
>> #include <linux/fs_parser.h>
>> #include <linux/swapfile.h>
>> #include <linux/iversion.h>
>> +#include <linux/xarray.h>
>> +
>> #include "swap.h"
>> 
>> static struct vfsmount *shm_mnt;
>> @@ -234,6 +236,7 @@ static const struct super_operations shmem_ops;
>> const struct address_space_operations shmem_aops;
>> static const struct file_operations shmem_file_operations;
>> static const struct inode_operations shmem_inode_operations;
>> +static const struct file_operations shmem_dir_operations;
>> static const struct inode_operations shmem_dir_inode_operations;
>> static const struct inode_operations shmem_special_inode_operations;
>> static const struct vm_operations_struct shmem_vm_ops;
>> @@ -2397,7 +2400,9 @@ static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block
>> /* Some things misbehave if size == 0 on a directory */
>> inode->i_size = 2 * BOGO_DIRENT_SIZE;
>> inode->i_op = &shmem_dir_inode_operations;
>> - inode->i_fop = &simple_dir_operations;
>> + inode->i_fop = &shmem_dir_operations;
>> + xa_init_flags(&info->doff_map, XA_FLAGS_ALLOC1);
>> + info->next_doff = 0;
>> break;
>> case S_IFLNK:
>> /*
>> @@ -2917,6 +2922,71 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
>> return 0;
>> }
>> 
>> +static struct xarray *shmem_doff_map(struct inode *dir)
>> +{
>> + return &SHMEM_I(dir)->doff_map;
>> +}
>> +
>> +static int shmem_doff_add(struct inode *dir, struct dentry *dentry)
>> +{
>> + struct shmem_inode_info *info = SHMEM_I(dir);
>> + struct xa_limit limit = XA_LIMIT(2, U32_MAX);
>> + u32 offset;
>> + int ret;
>> +
>> + if (dentry->d_fsdata)
>> + return -EBUSY;
>> +
>> + offset = 0;
>> + ret = xa_alloc_cyclic(shmem_doff_map(dir), &offset, dentry, limit,
>> +       &info->next_doff, GFP_KERNEL);
>> + if (ret < 0)
>> + return ret;
>> +
>> + dentry->d_fsdata = (void *)(unsigned long)offset;
>> + return 0;
>> +}
>> +
>> +static struct dentry *shmem_doff_find_after(struct dentry *dir,
>> +     unsigned long *offset)
>> +{
>> + struct xarray *xa = shmem_doff_map(d_inode(dir));
>> + struct dentry *d, *found = NULL;
>> +
>> + spin_lock(&dir->d_lock);
>> + d = xa_find_after(xa, offset, ULONG_MAX, XA_PRESENT);
>> + if (d) {
>> + spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
>> + if (simple_positive(d))
>> + found = dget_dlock(d);
>> + spin_unlock(&d->d_lock);
>> + }
>> + spin_unlock(&dir->d_lock);
> 
> This part is kind of gross, but I think I get it now...
> 
> You have to take dir->d_lock to ensure that "d" doesn't go away when you
> don't hold a ref on it, and you need the child's d_lock to ensure that
> simple_positive result is stable while you take a reference (because
> doing a dput there could be problematic). If that's right, then that's a
> bit subtle, and might deserve a nice comment.
> 
> I do wonder if there is some way to do this with RCU instead, but this
> seems to work well enough.

I lifted this from fs/libfs.c, fwiw.


>> + return found;
>> +}
>> +
>> +static void shmem_doff_remove(struct inode *dir, struct dentry *dentry)
>> +{
>> + u32 offset = (u32)(unsigned long)dentry->d_fsdata;
>> +
>> + if (!offset)
>> + return;
>> +
>> + xa_erase(shmem_doff_map(dir), offset);
>> + dentry->d_fsdata = NULL;
>> +}
>> +
>> +/*
>> + * During fs teardown (eg. umount), a directory's doff_map might still
>> + * contain entries. xa_destroy() cleans out anything that remains.
>> + */
>> +static void shmem_doff_map_destroy(struct inode *inode)
>> +{
>> + struct xarray *xa = shmem_doff_map(inode);
>> +
>> + xa_destroy(xa);
>> +}
>> +
>> /*
>>  * File creation. Allocate an inode, and we're done..
>>  */
>> @@ -2938,6 +3008,10 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
>> if (error && error != -EOPNOTSUPP)
>> goto out_iput;
>> 
>> + error = shmem_doff_add(dir, dentry);
>> + if (error)
>> + goto out_iput;
>> +
>> error = 0;
>> dir->i_size += BOGO_DIRENT_SIZE;
>> dir->i_ctime = dir->i_mtime = current_time(dir);
>> @@ -3015,6 +3089,10 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
>> goto out;
>> }
>> 
>> + ret = shmem_doff_add(dir, dentry);
>> + if (ret)
>> + goto out;
>> +
>> dir->i_size += BOGO_DIRENT_SIZE;
>> inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
>> inode_inc_iversion(dir);
>> @@ -3033,6 +3111,8 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
>> if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
>> shmem_free_inode(inode->i_sb);
>> 
>> + shmem_doff_remove(dir, dentry);
>> +
>> dir->i_size -= BOGO_DIRENT_SIZE;
>> inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
>> inode_inc_iversion(dir);
>> @@ -3091,24 +3171,37 @@ static int shmem_rename2(struct mnt_idmap *idmap,
>> {
>> struct inode *inode = d_inode(old_dentry);
>> int they_are_dirs = S_ISDIR(inode->i_mode);
>> + int error;
>> 
>> if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
>> return -EINVAL;
>> 
>> - if (flags & RENAME_EXCHANGE)
>> + if (flags & RENAME_EXCHANGE) {
>> + shmem_doff_remove(old_dir, old_dentry);
>> + shmem_doff_remove(new_dir, new_dentry);
>> + error = shmem_doff_add(new_dir, old_dentry);
>> + if (error)
>> + return error;
>> + error = shmem_doff_add(old_dir, new_dentry);
>> + if (error)
>> + return error;
>> return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
>> + }
>> 
>> if (!simple_empty(new_dentry))
>> return -ENOTEMPTY;
>> 
>> if (flags & RENAME_WHITEOUT) {
>> - int error;
>> -
>> error = shmem_whiteout(idmap, old_dir, old_dentry);
>> if (error)
>> return error;
>> }
>> 
>> + shmem_doff_remove(old_dir, old_dentry);
>> + error = shmem_doff_add(new_dir, old_dentry);
>> + if (error)
>> + return error;
>> +
>> if (d_really_is_positive(new_dentry)) {
>> (void) shmem_unlink(new_dir, new_dentry);
>> if (they_are_dirs) {
>> @@ -3149,26 +3242,22 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
>> 
>> error = security_inode_init_security(inode, dir, &dentry->d_name,
>>      shmem_initxattrs, NULL);
>> - if (error && error != -EOPNOTSUPP) {
>> - iput(inode);
>> - return error;
>> - }
>> + if (error && error != -EOPNOTSUPP)
>> + goto out_iput;
>> 
>> inode->i_size = len-1;
>> if (len <= SHORT_SYMLINK_LEN) {
>> inode->i_link = kmemdup(symname, len, GFP_KERNEL);
>> if (!inode->i_link) {
>> - iput(inode);
>> - return -ENOMEM;
>> + error = -ENOMEM;
>> + goto out_iput;
>> }
>> inode->i_op = &shmem_short_symlink_operations;
>> } else {
>> inode_nohighmem(inode);
>> error = shmem_get_folio(inode, 0, &folio, SGP_WRITE);
>> - if (error) {
>> - iput(inode);
>> - return error;
>> - }
>> + if (error)
>> + goto out_iput;
>> inode->i_mapping->a_ops = &shmem_aops;
>> inode->i_op = &shmem_symlink_inode_operations;
>> memcpy(folio_address(folio), symname, len);
>> @@ -3177,12 +3266,20 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
>> folio_unlock(folio);
>> folio_put(folio);
>> }
>> +
>> + error = shmem_doff_add(dir, dentry);
>> + if (error)
>> + goto out_iput;
>> +
>> dir->i_size += BOGO_DIRENT_SIZE;
>> dir->i_ctime = dir->i_mtime = current_time(dir);
>> inode_inc_iversion(dir);
>> d_instantiate(dentry, inode);
>> dget(dentry);
>> return 0;
>> +out_iput:
>> + iput(inode);
>> + return error;
>> }
>> 
>> static void shmem_put_link(void *arg)
>> @@ -3224,6 +3321,77 @@ static const char *shmem_get_link(struct dentry *dentry,
>> return folio_address(folio);
>> }
>> 
>> +static loff_t shmem_dir_llseek(struct file *file, loff_t offset, int whence)
>> +{
>> + switch (whence) {
>> + case SEEK_CUR:
>> + offset += file->f_pos;
>> + fallthrough;
>> + case SEEK_SET:
>> + if (offset >= 0)
>> + break;
>> + fallthrough;
>> + default:
>> + return -EINVAL;
>> + }
>> + return vfs_setpos(file, offset, U32_MAX);
>> +}
>> +
>> +static bool shmem_dir_emit(struct dir_context *ctx, struct dentry *dentry)
>> +{
>> + struct inode *inode = d_inode(dentry);
>> +
>> + return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len,
>> +   (loff_t)dentry->d_fsdata, inode->i_ino,
>> +   fs_umode_to_dtype(inode->i_mode));
>> +}
>> +
>> +/**
>> + * shmem_readdir - Emit entries starting at offset @ctx->pos
>> + * @file: an open directory to iterate over
>> + * @ctx: directory iteration context
>> + *
>> + * Caller must hold @file's i_rwsem to prevent insertion or removal of
>> + * entries during this call.
>> + *
>> + * On entry, @ctx->pos contains an offset that represents the first entry
>> + * to be read from the directory.
>> + *
>> + * The operation continues until there are no more entries to read, or
>> + * until the ctx->actor indicates there is no more space in the caller's
>> + * output buffer.
>> + *
>> + * On return, @ctx->pos contains an offset that will read the next entry
>> + * in this directory when shmem_readdir() is called again with @ctx.
>> + *
>> + * Return values:
>> + *   %0 - Complete
>> + */
>> +static int shmem_readdir(struct file *file, struct dir_context *ctx)
>> +{
>> + struct dentry *dentry, *dir = file->f_path.dentry;
>> + unsigned long offset;
>> +
>> + lockdep_assert_held(&d_inode(dir)->i_rwsem);
> 
> You probably don't need the above. This is called via ->iterate_shared
> so the lock had _better_ be held.

True, it's not 100% necessary.

I was trying to document the API contract, part of which is
"caller needs to hold dir->i_rwsem". This seemed like the most
crisp way to do that.


>> +
>> + if (!dir_emit_dots(file, ctx))
>> + goto out;
>> + for (offset = ctx->pos - 1; offset < ULONG_MAX - 1;) {
>> + dentry = shmem_doff_find_after(dir, &offset);
>> + if (!dentry)
>> + break;
>> + if (!shmem_dir_emit(ctx, dentry)) {
>> + dput(dentry);
>> + break;
>> + }
>> + ctx->pos = offset + 1;
>> + dput(dentry);
>> + }
>> +
>> +out:
>> + return 0;
>> +}
>> +
>> #ifdef CONFIG_TMPFS_XATTR
>> 
>> static int shmem_fileattr_get(struct dentry *dentry, struct fileattr *fa)
>> @@ -3742,6 +3910,12 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
>> return 0;
>> }
>> 
>> +#else /* CONFIG_TMPFS */
>> +
>> +static inline void shmem_doff_map_destroy(struct inode *dir)
>> +{
>> +}
>> +
>> #endif /* CONFIG_TMPFS */
>> 
>> static void shmem_put_super(struct super_block *sb)
>> @@ -3888,6 +4062,8 @@ static void shmem_destroy_inode(struct inode *inode)
>> {
>> if (S_ISREG(inode->i_mode))
>> mpol_free_shared_policy(&SHMEM_I(inode)->policy);
>> + if (S_ISDIR(inode->i_mode))
>> + shmem_doff_map_destroy(inode);
>> }
>> 
>> static void shmem_init_inode(void *foo)
>> @@ -3955,6 +4131,15 @@ static const struct inode_operations shmem_inode_operations = {
>> #endif
>> };
>> 
>> +static const struct file_operations shmem_dir_operations = {
>> +#ifdef CONFIG_TMPFS
>> + .llseek = shmem_dir_llseek,
>> + .iterate_shared = shmem_readdir,
>> +#endif
>> + .read = generic_read_dir,
>> + .fsync = noop_fsync,
>> +};
>> +
>> static const struct inode_operations shmem_dir_inode_operations = {
>> #ifdef CONFIG_TMPFS
>> .getattr = shmem_getattr,
>> 
>> 
> 
> Other than the nits above, this all looks fine to me. I've done some
> testing with this series too and it all seems to work as expected, and
> fixes some nasty problems when trying to recursively remove directories
> via nfsd.

Thanks for your review, testing, and suggestions.


> Have you done any performance testing? My expectation would be that
> you'd have roughly similar (or even faster) performance with this set,
> but at the expense of a bit of memory (for the xarrays).

I don't have any directory microbenchmarks. I suppose I could
do something like timing large software builds.


> One thing we could consider is lifting the bulk of this code into libfs,
> so other shmem-like filesystems can take advantage of it, but that work
> could be done later too when we have another proposed consumer.

Eg. autofs.


--
Chuck Lever
Andrew Morton May 3, 2023, 12:12 a.m. UTC | #3
On Mon, 17 Apr 2023 15:23:10 -0400 Chuck Lever <cel@kernel.org> wrote:

> From: Chuck Lever <chuck.lever@oracle.com>
> 
> The current cursor-based directory cookie mechanism doesn't work
> when a tmpfs filesystem is exported via NFS. This is because NFS
> clients do not open directories: each READDIR operation has to open
> the directory on the server, read it, then close it. The cursor
> state for that directory, being associated strictly with the opened
> struct file, is then discarded.
> 
> Directory cookies are cached not only by NFS clients, but also by
> user space libraries on those clients. Essentially there is no way
> to invalidate those caches when directory offsets have changed on
> an NFS server after the offset-to-dentry mapping changes.
> 
> The solution we've come up with is to make the directory cookie for
> each file in a tmpfs filesystem stable for the life of the directory
> entry it represents.
> 
> Add a per-directory xarray. shmem_readdir() uses this to map each
> directory offset (an loff_t integer) to the memory address of a
> struct dentry.
> 

How have people survived for this long with this problem?

It's a lot of new code - can we get away with simply disallowing
exports of tmpfs?

How can we maintain this?  Is it possible to come up with a test
harness for inclusion in kernel selftests?
Chuck Lever May 3, 2023, 12:43 a.m. UTC | #4
> On May 2, 2023, at 8:12 PM, Andrew Morton <akpm@linux-foundation.org> wrote:
> 
> On Mon, 17 Apr 2023 15:23:10 -0400 Chuck Lever <cel@kernel.org> wrote:
> 
>> From: Chuck Lever <chuck.lever@oracle.com>
>> 
>> The current cursor-based directory cookie mechanism doesn't work
>> when a tmpfs filesystem is exported via NFS. This is because NFS
>> clients do not open directories: each READDIR operation has to open
>> the directory on the server, read it, then close it. The cursor
>> state for that directory, being associated strictly with the opened
>> struct file, is then discarded.
>> 
>> Directory cookies are cached not only by NFS clients, but also by
>> user space libraries on those clients. Essentially there is no way
>> to invalidate those caches when directory offsets have changed on
>> an NFS server after the offset-to-dentry mapping changes.
>> 
>> The solution we've come up with is to make the directory cookie for
>> each file in a tmpfs filesystem stable for the life of the directory
>> entry it represents.
>> 
>> Add a per-directory xarray. shmem_readdir() uses this to map each
>> directory offset (an loff_t integer) to the memory address of a
>> struct dentry.
>> 
> 
> How have people survived for this long with this problem?

It's less of a problem without NFS in the picture; local
applications can hold the directory open, and that preserves
the seek cursor. But you can still trigger it.

Also, a plurality of applications are well-behaved in this
regard. It's just the more complex and more useful ones
(like git) that seem to trigger issues.

It became less bearable for NFS because of a recent change
on the Linux NFS client to optimize directory read behavior:

85aa8ddc3818 ("NFS: Trigger the "ls -l" readdir heuristic sooner")

Trond argued that tmpfs directory cookie behavior has always
been problematic (eg broken) therefore this commit does not
count as a regression. However, it does make tmpfs exports
less usable, breaking some tests that have always worked.


> It's a lot of new code -

I don't feel that this is a lot of new code:

include/linux/shmem_fs.h |    2 
mm/shmem.c               |  213 +++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 201 insertions(+), 14 deletions(-)

But I agree it might look a little daunting on first review.
I am happy to try to break this single patch up or consider
other approaches.

We could, for instance, tuck a little more of this into
lib/fs. Copying the readdir and directory seeking
implementation from simplefs to tmpfs is one reason
the insertion count is worrisome.


> can we get away with simply disallowing
> exports of tmpfs?

I think the bottom line is that you /can/ trigger this
behavior without NFS, just not as quickly. The threshold
is high enough that most use cases aren't bothered by
this right now.

We'd rather not disallow exporting tmpfs. It's a very
good testing platform for us, and disallowing it would
be a noticeable regression for some folks.


> How can we maintain this?  Is it possible to come up with a test
> harness for inclusion in kernel selftests?

There is very little directory cookie testing that I know of
in the obvious place: fstests. That would be where this stuff
should be unit tested, IMO.


--
Chuck Lever
Jeff Layton May 4, 2023, 5:21 p.m. UTC | #5
On Wed, 2023-05-03 at 00:43 +0000, Chuck Lever III wrote:
> 
> > On May 2, 2023, at 8:12 PM, Andrew Morton <akpm@linux-foundation.org> wrote:
> > 
> > On Mon, 17 Apr 2023 15:23:10 -0400 Chuck Lever <cel@kernel.org> wrote:
> > 
> > > From: Chuck Lever <chuck.lever@oracle.com>
> > > 
> > > The current cursor-based directory cookie mechanism doesn't work
> > > when a tmpfs filesystem is exported via NFS. This is because NFS
> > > clients do not open directories: each READDIR operation has to open
> > > the directory on the server, read it, then close it. The cursor
> > > state for that directory, being associated strictly with the opened
> > > struct file, is then discarded.
> > > 
> > > Directory cookies are cached not only by NFS clients, but also by
> > > user space libraries on those clients. Essentially there is no way
> > > to invalidate those caches when directory offsets have changed on
> > > an NFS server after the offset-to-dentry mapping changes.
> > > 
> > > The solution we've come up with is to make the directory cookie for
> > > each file in a tmpfs filesystem stable for the life of the directory
> > > entry it represents.
> > > 
> > > Add a per-directory xarray. shmem_readdir() uses this to map each
> > > directory offset (an loff_t integer) to the memory address of a
> > > struct dentry.
> > > 
> > 
> > How have people survived for this long with this problem?
> 
> It's less of a problem without NFS in the picture; local
> applications can hold the directory open, and that preserves
> the seek cursor. But you can still trigger it.
> 
> Also, a plurality of applications are well-behaved in this
> regard. It's just the more complex and more useful ones
> (like git) that seem to trigger issues.
> 
> It became less bearable for NFS because of a recent change
> on the Linux NFS client to optimize directory read behavior:
> 
> 85aa8ddc3818 ("NFS: Trigger the "ls -l" readdir heuristic sooner")
> 
> Trond argued that tmpfs directory cookie behavior has always
> been problematic (eg broken) therefore this commit does not
> count as a regression. However, it does make tmpfs exports
> less usable, breaking some tests that have always worked.
> 
> 
> > It's a lot of new code -
> 
> I don't feel that this is a lot of new code:
> 
> include/linux/shmem_fs.h |    2 
> mm/shmem.c               |  213 +++++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 201 insertions(+), 14 deletions(-)
> 
> But I agree it might look a little daunting on first review.
> I am happy to try to break this single patch up or consider
> other approaches.
> 

I wonder whether you really need an xarray here?

dcache_readdir walks the d_subdirs list. We add things to d_subdirs at
d_alloc time (and in d_move). If you were to assign its dirindex when
the dentry gets added to d_subdirs (maybe in ->d_init?) then you'd have
a list already ordered by index, and could deal with missing indexes
easily.

It's not as efficient as the xarray if you have to seek through a big
dir, but if keeping the changes tiny is a goal then that might be
another way to do this.

> We could, for instance, tuck a little more of this into
> lib/fs. Copying the readdir and directory seeking
> implementation from simplefs to tmpfs is one reason
> the insertion count is worrisome.
> 
> 
> > can we get away with simply disallowing
> > exports of tmpfs?
> 
> I think the bottom line is that you /can/ trigger this
> behavior without NFS, just not as quickly. The threshold
> is high enough that most use cases aren't bothered by
> this right now.
> 
> We'd rather not disallow exporting tmpfs. It's a very
> good testing platform for us, and disallowing it would
> be a noticeable regression for some folks.
> 
> 

Yeah, I'd not be in favor of that either. We've had an exportable tmpfs
for a long time. It's a good way to do testing of the entire NFS server
stack, without having to deal with underlying storage.

> > How can we maintain this?  Is it possible to come up with a test
> > harness for inclusion in kernel selftests?
> 
> There is very little directory cookie testing that I know of
> in the obvious place: fstests. That would be where this stuff
> should be unit tested, IMO.
> 

I'd like to see this too. It's easy for programs to get this wrong. In
this case, could we emulate the NFS behavior by doing this in a loop
over a large directory?

opendir
seekdir (to result of last telldir)
readdir
unlink
telldir
closedir

At the end of it, check whether there are any entries left over.
Benjamin Coddington May 4, 2023, 8:21 p.m. UTC | #6
On 2 May 2023, at 20:43, Chuck Lever III wrote:

>> On May 2, 2023, at 8:12 PM, Andrew Morton <akpm@linux-foundation.org> wrote:
>>
>> On Mon, 17 Apr 2023 15:23:10 -0400 Chuck Lever <cel@kernel.org> wrote:
>>
>>> From: Chuck Lever <chuck.lever@oracle.com>
>>>
>>> The current cursor-based directory cookie mechanism doesn't work
>>> when a tmpfs filesystem is exported via NFS. This is because NFS
>>> clients do not open directories: each READDIR operation has to open
>>> the directory on the server, read it, then close it. The cursor
>>> state for that directory, being associated strictly with the opened
>>> struct file, is then discarded.
>>>
>>> Directory cookies are cached not only by NFS clients, but also by
>>> user space libraries on those clients. Essentially there is no way
>>> to invalidate those caches when directory offsets have changed on
>>> an NFS server after the offset-to-dentry mapping changes.
>>>
>>> The solution we've come up with is to make the directory cookie for
>>> each file in a tmpfs filesystem stable for the life of the directory
>>> entry it represents.
>>>
>>> Add a per-directory xarray. shmem_readdir() uses this to map each
>>> directory offset (an loff_t integer) to the memory address of a
>>> struct dentry.
>>>
>>
>> How have people survived for this long with this problem?

They survived this long by not considering their current directory offset to
be a stationary position in the stream after removing chunks of that stream,
as per some POSIX.  However, git does this:

opendir
while getdents
    unlink(dentries)
closedir
assert(directory empty)

This pattern isn't guaranteed to always produce an empty directory, and
filesystems aren't wrong when it doesn't, but they could probably do better.

Libfs, on the other hand, conservatively closes and re-opens the directory
after removing some entries in order to ensure none are skipped.

> It's less of a problem without NFS in the picture; local
> applications can hold the directory open, and that preserves
> the seek cursor. But you can still trigger it.
>
> Also, a plurality of applications are well-behaved in this
> regard. It's just the more complex and more useful ones
> (like git) that seem to trigger issues.
>
> It became less bearable for NFS because of a recent change
> on the Linux NFS client to optimize directory read behavior:
>
> 85aa8ddc3818 ("NFS: Trigger the "ls -l" readdir heuristic sooner")

My ears burn again.

> Trond argued that tmpfs directory cookie behavior has always
> been problematic (eg broken) therefore this commit does not
> count as a regression. However, it does make tmpfs exports
> less usable, breaking some tests that have always worked.

As luck would have it, since on NFS the breakage also depends on the length
of the filenames.

It's also possible to fix git's remove_dir_recurse(), but making tmpfs have
stable directory offsets would be an improvement for everyone, and especially
for NFS.

>> It's a lot of new code -
>
> I don't feel that this is a lot of new code:
>
> include/linux/shmem_fs.h |    2
> mm/shmem.c               |  213 +++++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 201 insertions(+), 14 deletions(-)
>
> But I agree it might look a little daunting on first review.
> I am happy to try to break this single patch up or consider
> other approaches.
>
> We could, for instance, tuck a little more of this into
> lib/fs. Copying the readdir and directory seeking
> implementation from simplefs to tmpfs is one reason
> the insertion count is worrisome.
>
>
>> can we get away with simply disallowing
>> exports of tmpfs?
>
> I think the bottom line is that you /can/ trigger this
> behavior without NFS, just not as quickly. The threshold
> is high enough that most use cases aren't bothered by
> this right now.

Yes, you can run into this problem directly on tmpfs.

> We'd rather not disallow exporting tmpfs. It's a very
> good testing platform for us, and disallowing it would
> be a noticeable regression for some folks.
>
>
>> How can we maintain this?  Is it possible to come up with a test
>> harness for inclusion in kernel selftests?
>
> There is very little directory cookie testing that I know of
> in the obvious place: fstests. That would be where this stuff
> should be unit tested, IMO.

Yes, we could write a test, but a test failure shouldn't mean the
filesystem is wrong or broken.

Ben
Yujie Liu May 5, 2023, 5:06 a.m. UTC | #7
Hello,

kernel test robot noticed a -18.7% regression of aim9.disk_src.ops_per_sec on:

commit: 2976e2b93abcbf19811dc7a444b6df85a520468e ("[PATCH v1] shmem: stable directory cookies")
url: https://github.com/intel-lab-lkp/linux/commits/Chuck-Lever/shmem-stable-directory-cookies/20230418-032350
base: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git 6a8f57ae2eb07ab39a6f0ccad60c760743051026
patch link: https://lore.kernel.org/all/168175931561.2843.16288612382874559384.stgit@manet.1015granger.net/
patch subject: [PATCH v1] shmem: stable directory cookies

testcase: aim9
test machine: 48 threads 2 sockets Intel(R) Xeon(R) CPU E5-2697 v2 @ 2.70GHz (Ivy Bridge-EP) with 112G memory
parameters:

	testtime: 300s
	test: disk_src
	cpufreq_governor: performance

In addition to that, the commit also has significant impact on the following tests:

+------------------+----------------------------------------------------------+
| testcase: change | aim9: aim9.disk_src.ops_per_sec -21.1% regression        |
| test machine     | 224 threads 2 sockets (Sapphire Rapids) with 256G memory |
| test parameters  | cpufreq_governor=performance                             |
|                  | test=disk_src                                            |
|                  | testtime=300s                                            |
+------------------+----------------------------------------------------------+


If you fix the issue, kindly add following tag
| Reported-by: kernel test robot <yujie.liu@intel.com>
| Link: https://lore.kernel.org/oe-lkp/202305051223.8ef7d7ae-yujie.liu@intel.com


Details are as below:

=========================================================================================
compiler/cpufreq_governor/kconfig/rootfs/tbox_group/test/testcase/testtime:
  gcc-11/performance/x86_64-rhel-8.3/debian-11.1-x86_64-20220510.cgz/lkp-ivb-2ep1/disk_src/aim9/300s

commit: 
  v6.3-rc7
  2976e2b93a ("shmem: stable directory cookies")

        v6.3-rc7 2976e2b93abcbf19811dc7a444b 
---------------- --------------------------- 
         %stddev     %change         %stddev
             \          |                \  
      0.25 ±  7%      +0.1        0.36 ±  3%  mpstat.cpu.all.soft%
      0.61            -0.1        0.52        mpstat.cpu.all.usr%
    198823           -18.7%     161675        aim9.disk_src.ops_per_sec
     21274 ± 61%     -94.8%       1112 ± 13%  aim9.time.involuntary_context_switches
     95.00            -4.2%      91.00        aim9.time.percent_of_cpu_this_job_got
     72.93           -16.7%      60.78        aim9.time.user_time
     23420            +6.0%      24832        proc-vmstat.nr_slab_reclaimable
   1374766 ± 27%    +400.7%    6883519        proc-vmstat.numa_hit
   1331644 ± 28%    +413.6%    6839273        proc-vmstat.numa_local
   4415141 ± 38%    +507.5%   26821043        proc-vmstat.pgalloc_normal
   4392173 ± 38%    +508.9%   26743904        proc-vmstat.pgfree
     10.80           +23.1%      13.29        perf-stat.i.MPKI
      2.58            +0.2        2.79 ±  2%  perf-stat.i.branch-miss-rate%
  19666784            +2.5%   20148875        perf-stat.i.branch-misses
     17.41            -2.6       14.86        perf-stat.i.cache-miss-rate%
  40153603           +17.7%   47241220        perf-stat.i.cache-references
      1.69            +4.7%       1.77        perf-stat.i.cpi
 1.073e+09            -7.6%  9.923e+08 ±  9%  perf-stat.i.dTLB-loads
      0.19 ±  3%      -0.0        0.17 ±  4%  perf-stat.i.dTLB-store-miss-rate%
   1651558 ±  3%      -8.7%    1508543 ±  4%  perf-stat.i.dTLB-store-misses
 8.547e+08            +4.4%  8.927e+08        perf-stat.i.dTLB-stores
      0.59            -4.2%       0.57        perf-stat.i.ipc
     47.69            -1.4       46.24        perf-stat.i.node-load-miss-rate%
     31.75 ±  9%      -5.3       26.42 ± 12%  perf-stat.i.node-store-miss-rate%
    103277 ±  5%     +17.5%     121395 ±  5%  perf-stat.i.node-stores
     10.52           +22.3%      12.86        perf-stat.overall.MPKI
      2.68            +0.2        2.88        perf-stat.overall.branch-miss-rate%
     17.41            -2.5       14.87        perf-stat.overall.cache-miss-rate%
      1.66            +4.3%       1.74        perf-stat.overall.cpi
      0.19 ±  3%      -0.0        0.17 ±  4%  perf-stat.overall.dTLB-store-miss-rate%
      0.60            -4.1%       0.58        perf-stat.overall.ipc
     45.06            -1.5       43.55        perf-stat.overall.node-load-miss-rate%
  40019034           +17.7%   47083587        perf-stat.ps.cache-references
  1.07e+09            -7.6%  9.891e+08 ±  9%  perf-stat.ps.dTLB-loads
   1645993 ±  3%      -8.7%    1503507 ±  4%  perf-stat.ps.dTLB-store-misses
 8.519e+08            +4.4%  8.898e+08        perf-stat.ps.dTLB-stores
    102926 ±  5%     +17.6%     121068 ±  5%  perf-stat.ps.node-stores
      0.00            +0.9        0.94 ± 27%  perf-profile.calltrace.cycles-pp.kmem_cache_alloc_lru.xas_alloc.xas_create.xas_store.__xa_alloc
      0.00            +1.0        1.01 ± 25%  perf-profile.calltrace.cycles-pp.xas_alloc.xas_create.xas_store.__xa_alloc.__xa_alloc_cyclic
      0.00            +1.0        1.05 ± 26%  perf-profile.calltrace.cycles-pp.kmem_cache_alloc_lru.xas_alloc.xas_expand.xas_create.xas_store
      0.00            +1.1        1.12 ± 16%  perf-profile.calltrace.cycles-pp.xas_store.__xa_erase.xa_erase.shmem_unlink.vfs_unlink
      0.00            +1.1        1.14 ± 24%  perf-profile.calltrace.cycles-pp.xas_alloc.xas_expand.xas_create.xas_store.__xa_alloc
      0.00            +1.1        1.15 ± 16%  perf-profile.calltrace.cycles-pp.__xa_erase.xa_erase.shmem_unlink.vfs_unlink.do_unlinkat
      0.00            +1.2        1.21 ± 16%  perf-profile.calltrace.cycles-pp.xa_erase.shmem_unlink.vfs_unlink.do_unlinkat.__x64_sys_unlink
      1.24 ± 25%      +1.2        2.49 ± 15%  perf-profile.calltrace.cycles-pp.vfs_unlink.do_unlinkat.__x64_sys_unlink.do_syscall_64.entry_SYSCALL_64_after_hwframe
      0.00            +1.3        1.32 ± 21%  perf-profile.calltrace.cycles-pp.xas_expand.xas_create.xas_store.__xa_alloc.__xa_alloc_cyclic
      0.00            +1.5        1.49 ± 17%  perf-profile.calltrace.cycles-pp.shmem_unlink.vfs_unlink.do_unlinkat.__x64_sys_unlink.do_syscall_64
      0.00            +2.5        2.49 ± 16%  perf-profile.calltrace.cycles-pp.xas_create.xas_store.__xa_alloc.__xa_alloc_cyclic.shmem_doff_add
      0.00            +2.6        2.57 ± 16%  perf-profile.calltrace.cycles-pp.xas_store.__xa_alloc.__xa_alloc_cyclic.shmem_doff_add.shmem_mknod
      0.00            +2.8        2.81 ± 16%  perf-profile.calltrace.cycles-pp.__xa_alloc.__xa_alloc_cyclic.shmem_doff_add.shmem_mknod.lookup_open
      0.00            +2.8        2.84 ± 15%  perf-profile.calltrace.cycles-pp.__xa_alloc_cyclic.shmem_doff_add.shmem_mknod.lookup_open.open_last_lookups
      5.77 ± 19%      +2.9        8.72 ± 13%  perf-profile.calltrace.cycles-pp.open_last_lookups.path_openat.do_filp_open.do_sys_openat2.__x64_sys_creat
      0.00            +3.0        2.96 ± 16%  perf-profile.calltrace.cycles-pp.shmem_doff_add.shmem_mknod.lookup_open.open_last_lookups.path_openat
      2.76 ± 18%      +3.0        5.73 ± 14%  perf-profile.calltrace.cycles-pp.shmem_mknod.lookup_open.open_last_lookups.path_openat.do_filp_open
      5.11 ± 19%      +3.0        8.09 ± 13%  perf-profile.calltrace.cycles-pp.lookup_open.open_last_lookups.path_openat.do_filp_open.do_sys_openat2
      0.05 ± 65%      +0.1        0.12 ± 22%  perf-profile.children.cycles-pp.rcu_nocb_try_bypass
      0.02 ±141%      +0.1        0.11 ± 32%  perf-profile.children.cycles-pp.__unfreeze_partials
      0.01 ±223%      +0.1        0.11 ± 23%  perf-profile.children.cycles-pp.rmqueue
      0.09 ± 32%      +0.1        0.23 ± 24%  perf-profile.children.cycles-pp.rcu_segcblist_enqueue
      0.05 ± 77%      +0.2        0.20 ± 17%  perf-profile.children.cycles-pp.get_page_from_freelist
      0.08 ± 33%      +0.2        0.26 ± 15%  perf-profile.children.cycles-pp.__alloc_pages
      0.31 ± 20%      +0.5        0.78 ± 21%  perf-profile.children.cycles-pp.__slab_free
      0.47 ± 22%      +0.6        1.08 ± 16%  perf-profile.children.cycles-pp.__call_rcu_common
      0.00            +0.7        0.67 ± 20%  perf-profile.children.cycles-pp.radix_tree_node_rcu_free
      0.00            +1.0        1.00 ± 18%  perf-profile.children.cycles-pp.radix_tree_node_ctor
      0.17 ± 40%      +1.1        1.29 ± 15%  perf-profile.children.cycles-pp.setup_object
      0.00            +1.2        1.15 ± 16%  perf-profile.children.cycles-pp.__xa_erase
      0.00            +1.2        1.21 ± 16%  perf-profile.children.cycles-pp.xa_erase
      0.28 ± 41%      +1.2        1.50 ± 17%  perf-profile.children.cycles-pp.shmem_unlink
      0.21 ± 39%      +1.2        1.46 ± 15%  perf-profile.children.cycles-pp.shuffle_freelist
      1.25 ± 25%      +1.3        2.50 ± 15%  perf-profile.children.cycles-pp.vfs_unlink
      0.00            +1.3        1.32 ± 21%  perf-profile.children.cycles-pp.xas_expand
      0.28 ± 42%      +1.5        1.77 ± 14%  perf-profile.children.cycles-pp.allocate_slab
      0.42 ± 26%      +1.6        1.98 ± 13%  perf-profile.children.cycles-pp.___slab_alloc
      1.23 ± 16%      +1.7        2.93 ± 18%  perf-profile.children.cycles-pp.rcu_core
      2.70 ±  8%      +1.7        4.42 ±  9%  perf-profile.children.cycles-pp.__do_softirq
      1.09 ± 12%      +1.7        2.83 ± 19%  perf-profile.children.cycles-pp.rcu_do_batch
      1.53 ± 20%      +2.1        3.60 ± 13%  perf-profile.children.cycles-pp.kmem_cache_alloc_lru
      2.52 ± 11%      +2.1        4.65 ±  8%  perf-profile.children.cycles-pp.__irq_exit_rcu
      0.00            +2.2        2.16 ± 16%  perf-profile.children.cycles-pp.xas_alloc
     11.75 ± 10%      +2.4       14.15 ±  7%  perf-profile.children.cycles-pp.asm_sysvec_apic_timer_interrupt
     11.06 ± 10%      +2.4       13.48 ±  6%  perf-profile.children.cycles-pp.sysvec_apic_timer_interrupt
      0.00            +2.5        2.50 ± 16%  perf-profile.children.cycles-pp.xas_create
      0.00            +2.8        2.81 ± 16%  perf-profile.children.cycles-pp.__xa_alloc
      0.00            +2.8        2.84 ± 15%  perf-profile.children.cycles-pp.__xa_alloc_cyclic
      5.80 ± 19%      +3.0        8.76 ± 13%  perf-profile.children.cycles-pp.open_last_lookups
      2.78 ± 18%      +3.0        5.74 ± 14%  perf-profile.children.cycles-pp.shmem_mknod
      0.00            +3.0        2.96 ± 16%  perf-profile.children.cycles-pp.shmem_doff_add
      5.14 ± 19%      +3.0        8.12 ± 13%  perf-profile.children.cycles-pp.lookup_open
      0.00            +3.7        3.70 ± 15%  perf-profile.children.cycles-pp.xas_store
      0.08 ± 29%      +0.1        0.17 ± 22%  perf-profile.self.cycles-pp.xas_load
      0.03 ±100%      +0.1        0.14 ± 17%  perf-profile.self.cycles-pp.shuffle_freelist
      0.00            +0.1        0.13 ± 19%  perf-profile.self.cycles-pp.xas_alloc
      0.09 ± 35%      +0.1        0.22 ± 23%  perf-profile.self.cycles-pp.rcu_segcblist_enqueue
      0.00            +0.2        0.15 ± 24%  perf-profile.self.cycles-pp.xas_create
      0.00            +0.2        0.17 ± 31%  perf-profile.self.cycles-pp.xas_expand
      0.00            +0.3        0.27 ± 15%  perf-profile.self.cycles-pp.xas_store
      0.31 ± 18%      +0.3        0.62 ± 14%  perf-profile.self.cycles-pp.__call_rcu_common
      0.17 ± 27%      +0.3        0.50 ± 16%  perf-profile.self.cycles-pp.kmem_cache_alloc_lru
      0.31 ± 19%      +0.5        0.76 ± 22%  perf-profile.self.cycles-pp.__slab_free
      0.00            +0.7        0.66 ± 20%  perf-profile.self.cycles-pp.radix_tree_node_rcu_free
      0.00            +0.9        0.91 ± 17%  perf-profile.self.cycles-pp.radix_tree_node_ctor


***************************************************************************************************
lkp-spr-r02: 224 threads 2 sockets (Sapphire Rapids) with 256G memory
=========================================================================================
compiler/cpufreq_governor/kconfig/rootfs/tbox_group/test/testcase/testtime:
  gcc-11/performance/x86_64-rhel-8.3/debian-11.1-x86_64-20220510.cgz/lkp-spr-r02/disk_src/aim9/300s

commit: 
  v6.3-rc7
  2976e2b93a ("shmem: stable directory cookies")

        v6.3-rc7 2976e2b93abcbf19811dc7a444b 
---------------- --------------------------- 
         %stddev     %change         %stddev
             \          |                \  
      0.54            -8.5%       0.49        turbostat.IPC
      2819            +4.4%       2944        vmstat.system.cs
      0.07            +0.0        0.09        mpstat.cpu.all.soft%
      0.09            -0.0        0.08        mpstat.cpu.all.usr%
    412523           -21.1%     325484        aim9.disk_src.ops_per_sec
    816.00 ±  7%    +578.0%       5532 ±  4%  aim9.time.involuntary_context_switches
     92.20            -5.6%      87.00        aim9.time.percent_of_cpu_this_job_got
    229.15            -2.7%     222.99        aim9.time.system_time
     49.41           -17.9%      40.56        aim9.time.user_time
     39515            +5.7%      41749        proc-vmstat.nr_slab_reclaimable
   2514403          +186.2%    7196969        proc-vmstat.numa_hit
   2311707          +202.6%    6994204        proc-vmstat.numa_local
      5423            +7.9%       5849        proc-vmstat.pgactivate
  12367005          +303.9%   49944845        proc-vmstat.pgalloc_normal
  12324581          +304.3%   49832694        proc-vmstat.pgfree
     29.62 ± 31%     -29.9%      20.77 ± 15%  sched_debug.cfs_rq:/.load_avg.avg
    200823 ± 17%     -38.0%     124525 ±  9%  sched_debug.cfs_rq:/.min_vruntime.max
     19592 ± 32%     -46.2%      10535 ± 13%  sched_debug.cfs_rq:/.min_vruntime.stddev
     62.57 ± 12%     -17.9%      51.39 ±  9%  sched_debug.cfs_rq:/.runnable_avg.avg
    102694 ± 30%     -63.7%      37282 ± 25%  sched_debug.cfs_rq:/.spread0.max
     19593 ± 32%     -46.2%      10538 ± 13%  sched_debug.cfs_rq:/.spread0.stddev
     62.54 ± 12%     -18.0%      51.27 ±  8%  sched_debug.cfs_rq:/.util_avg.avg
      1.80          +157.0%       4.63        perf-stat.i.MPKI
 1.571e+09            -6.9%  1.462e+09        perf-stat.i.branch-instructions
      0.41            +0.0        0.43        perf-stat.i.branch-miss-rate%
   6982520            -1.8%    6857934        perf-stat.i.branch-misses
      2.64 ±  2%      -1.2        1.49        perf-stat.i.cache-miss-rate%
    445268           +28.9%     573872 ±  2%  perf-stat.i.cache-misses
  14884982          +138.0%   35429555        perf-stat.i.cache-references
      2696            +5.0%       2830        perf-stat.i.context-switches
      0.77           +10.4%       0.85        perf-stat.i.cpi
 6.327e+09            +3.6%  6.554e+09        perf-stat.i.cpu-cycles
    235.21            +3.5%     243.42        perf-stat.i.cpu-migrations
     18266 ±  3%     -21.6%      14320        perf-stat.i.cycles-between-cache-misses
 2.266e+09            -6.8%  2.113e+09        perf-stat.i.dTLB-loads
 1.403e+09            -6.4%  1.313e+09        perf-stat.i.dTLB-stores
 8.193e+09            -6.0%  7.702e+09        perf-stat.i.instructions
      1.29            -9.4%       1.17        perf-stat.i.ipc
      0.03            +3.7%       0.03        perf-stat.i.metric.GHz
     66.52          +138.0%     158.31        perf-stat.i.metric.K/sec
     23.39            -6.7%      21.82        perf-stat.i.metric.M/sec
    153573 ±  2%     +40.6%     215954 ±  2%  perf-stat.i.node-load-misses
      8731 ± 12%     +24.8%      10895 ±  8%  perf-stat.i.node-loads
      1.82          +153.2%       4.60        perf-stat.overall.MPKI
      0.44            +0.0        0.47        perf-stat.overall.branch-miss-rate%
      2.99            -1.4        1.62 ±  2%  perf-stat.overall.cache-miss-rate%
      0.77           +10.2%       0.85        perf-stat.overall.cpi
     14209           -19.6%      11428 ±  2%  perf-stat.overall.cycles-between-cache-misses
      1.30            -9.3%       1.18        perf-stat.overall.ipc
 1.565e+09            -6.9%  1.457e+09        perf-stat.ps.branch-instructions
   6959348            -1.8%    6835053        perf-stat.ps.branch-misses
    443804           +28.8%     571841 ±  2%  perf-stat.ps.cache-misses
  14834967          +138.0%   35309612        perf-stat.ps.cache-references
      2687            +5.0%       2821        perf-stat.ps.context-switches
 6.305e+09            +3.6%  6.532e+09        perf-stat.ps.cpu-cycles
    234.41            +3.5%     242.58        perf-stat.ps.cpu-migrations
 2.259e+09            -6.8%  2.106e+09        perf-stat.ps.dTLB-loads
 1.398e+09            -6.4%  1.309e+09        perf-stat.ps.dTLB-stores
 8.166e+09            -6.0%  7.676e+09        perf-stat.ps.instructions
    153047 ±  2%     +40.6%     215202 ±  2%  perf-stat.ps.node-load-misses
      8703 ± 12%     +24.7%      10853 ±  8%  perf-stat.ps.node-loads
 2.454e+12            -6.0%  2.307e+12        perf-stat.total.instructions
      7.47            -1.7        5.77 ±  5%  perf-profile.calltrace.cycles-pp.__xstat64
      6.81            -1.6        5.26 ±  4%  perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.__xstat64
      6.54            -1.5        4.99 ±  5%  perf-profile.calltrace.cycles-pp.__do_sys_newstat.do_syscall_64.entry_SYSCALL_64_after_hwframe.__xstat64
      6.72            -1.5        5.19 ±  4%  perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.__xstat64
      5.77            -1.4        4.41 ±  4%  perf-profile.calltrace.cycles-pp.vfs_fstatat.__do_sys_newstat.do_syscall_64.entry_SYSCALL_64_after_hwframe.__xstat64
     12.55            -1.2       11.32 ±  5%  perf-profile.calltrace.cycles-pp.unlink
     11.87            -1.0       10.88 ±  5%  perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.unlink
     11.66            -1.0       10.70 ±  5%  perf-profile.calltrace.cycles-pp.__x64_sys_unlink.do_syscall_64.entry_SYSCALL_64_after_hwframe.unlink
     11.80            -1.0       10.84 ±  5%  perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.unlink
      4.18            -1.0        3.22 ±  5%  perf-profile.calltrace.cycles-pp.vfs_statx.vfs_fstatat.__do_sys_newstat.do_syscall_64.entry_SYSCALL_64_after_hwframe
      3.34            -0.9        2.48 ±  6%  perf-profile.calltrace.cycles-pp.__close
      3.37            -0.8        2.61 ±  5%  perf-profile.calltrace.cycles-pp.filename_lookup.vfs_statx.vfs_fstatat.__do_sys_newstat.do_syscall_64
      3.34            -0.7        2.60 ±  6%  perf-profile.calltrace.cycles-pp.shmem_get_inode.shmem_mknod.lookup_open.open_last_lookups.path_openat
      2.50            -0.7        1.78 ±  5%  perf-profile.calltrace.cycles-pp.filename_parentat.do_unlinkat.__x64_sys_unlink.do_syscall_64.entry_SYSCALL_64_after_hwframe
      3.24            -0.7        2.53 ±  5%  perf-profile.calltrace.cycles-pp.path_lookupat.filename_lookup.vfs_statx.vfs_fstatat.__do_sys_newstat
      2.40 ±  2%      -0.7        1.70 ±  6%  perf-profile.calltrace.cycles-pp.path_parentat.filename_parentat.do_unlinkat.__x64_sys_unlink.do_syscall_64
      2.71            -0.7        2.06 ±  5%  perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.__close
      2.65            -0.6        2.02 ±  5%  perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.__close
      1.91 ±  2%      -0.6        1.34 ±  6%  perf-profile.calltrace.cycles-pp.link_path_walk.path_parentat.filename_parentat.do_unlinkat.__x64_sys_unlink
      2.59 ±  2%      -0.6        2.03 ±  6%  perf-profile.calltrace.cycles-pp.new_inode.shmem_get_inode.shmem_mknod.lookup_open.open_last_lookups
      2.05 ±  2%      -0.5        1.54 ±  7%  perf-profile.calltrace.cycles-pp.alloc_inode.new_inode.shmem_get_inode.shmem_mknod.lookup_open
      1.96 ±  2%      -0.5        1.45 ±  4%  perf-profile.calltrace.cycles-pp.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.__close
      1.90            -0.5        1.41 ±  4%  perf-profile.calltrace.cycles-pp.exit_to_user_mode_loop.exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe
      1.92 ±  2%      -0.5        1.44 ±  4%  perf-profile.calltrace.cycles-pp.exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.__close
      2.26 ±  2%      -0.5        1.78 ±  6%  perf-profile.calltrace.cycles-pp.link_path_walk.path_lookupat.filename_lookup.vfs_statx.vfs_fstatat
      1.91 ±  2%      -0.5        1.44 ±  8%  perf-profile.calltrace.cycles-pp.link_path_walk.path_openat.do_filp_open.do_sys_openat2.__x64_sys_creat
      1.78 ±  2%      -0.5        1.33 ±  7%  perf-profile.calltrace.cycles-pp.shmem_alloc_inode.alloc_inode.new_inode.shmem_get_inode.shmem_mknod
      1.77 ±  2%      -0.4        1.32 ±  8%  perf-profile.calltrace.cycles-pp.kmem_cache_alloc_lru.shmem_alloc_inode.alloc_inode.new_inode.shmem_get_inode
      1.70 ±  2%      -0.4        1.27 ±  5%  perf-profile.calltrace.cycles-pp.task_work_run.exit_to_user_mode_loop.exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64
      1.81 ±  2%      -0.4        1.41 ±  5%  perf-profile.calltrace.cycles-pp.d_alloc_parallel.lookup_open.open_last_lookups.path_openat.do_filp_open
      1.82 ±  2%      -0.4        1.42 ±  6%  perf-profile.calltrace.cycles-pp.alloc_empty_file.path_openat.do_filp_open.do_sys_openat2.__x64_sys_creat
      0.74 ±  6%      -0.4        0.34 ± 81%  perf-profile.calltrace.cycles-pp.strncpy_from_user.getname_flags.do_sys_openat2.__x64_sys_creat.do_syscall_64
      1.71            -0.4        1.33 ±  7%  perf-profile.calltrace.cycles-pp.__alloc_file.alloc_empty_file.path_openat.do_filp_open.do_sys_openat2
      0.69 ±  7%      -0.4        0.32 ± 82%  perf-profile.calltrace.cycles-pp.strncpy_from_user.getname_flags.__x64_sys_unlink.do_syscall_64.entry_SYSCALL_64_after_hwframe
      1.63 ±  2%      -0.4        1.26 ±  6%  perf-profile.calltrace.cycles-pp.dput.do_unlinkat.__x64_sys_unlink.do_syscall_64.entry_SYSCALL_64_after_hwframe
      1.64 ±  2%      -0.4        1.28 ±  6%  perf-profile.calltrace.cycles-pp.evict.do_unlinkat.__x64_sys_unlink.do_syscall_64.entry_SYSCALL_64_after_hwframe
      1.33 ±  3%      -0.3        1.00 ±  5%  perf-profile.calltrace.cycles-pp.getname_flags.vfs_fstatat.__do_sys_newstat.do_syscall_64.entry_SYSCALL_64_after_hwframe
      1.50 ±  3%      -0.3        1.19 ±  7%  perf-profile.calltrace.cycles-pp.d_alloc.d_alloc_parallel.lookup_open.open_last_lookups.path_openat
      1.06 ±  6%      -0.3        0.77 ±  8%  perf-profile.calltrace.cycles-pp.getname_flags.do_sys_openat2.__x64_sys_creat.do_syscall_64.entry_SYSCALL_64_after_hwframe
      1.18 ±  5%      -0.3        0.91 ±  6%  perf-profile.calltrace.cycles-pp.do_open.path_openat.do_filp_open.do_sys_openat2.__x64_sys_creat
      0.94            -0.3        0.67 ±  8%  perf-profile.calltrace.cycles-pp.__fput.task_work_run.exit_to_user_mode_loop.exit_to_user_mode_prepare.syscall_exit_to_user_mode
      0.58 ±  3%      -0.3        0.32 ± 81%  perf-profile.calltrace.cycles-pp.walk_component.link_path_walk.path_lookupat.filename_lookup.vfs_statx
      0.70 ±  3%      -0.3        0.44 ± 50%  perf-profile.calltrace.cycles-pp.cp_new_stat.__do_sys_newstat.do_syscall_64.entry_SYSCALL_64_after_hwframe.__xstat64
      1.11 ±  4%      -0.2        0.87 ±  6%  perf-profile.calltrace.cycles-pp.__d_alloc.d_alloc.d_alloc_parallel.lookup_open.open_last_lookups
      1.01 ±  4%      -0.2        0.78 ±  6%  perf-profile.calltrace.cycles-pp.getname_flags.__x64_sys_unlink.do_syscall_64.entry_SYSCALL_64_after_hwframe.unlink
      0.79 ±  6%      -0.2        0.56 ±  7%  perf-profile.calltrace.cycles-pp.simple_lookup.lookup_open.open_last_lookups.path_openat.do_filp_open
      0.93 ±  5%      -0.2        0.70 ±  5%  perf-profile.calltrace.cycles-pp.strncpy_from_user.getname_flags.vfs_fstatat.__do_sys_newstat.do_syscall_64
      1.05 ±  4%      -0.2        0.83 ±  9%  perf-profile.calltrace.cycles-pp.shmem_evict_inode.evict.do_unlinkat.__x64_sys_unlink.do_syscall_64
      1.00 ±  4%      -0.2        0.80 ±  6%  perf-profile.calltrace.cycles-pp.kmem_cache_alloc_lru.__d_alloc.d_alloc.d_alloc_parallel.lookup_open
      0.98 ±  5%      -0.2        0.79 ±  4%  perf-profile.calltrace.cycles-pp.dentry_kill.dput.do_unlinkat.__x64_sys_unlink.do_syscall_64
      0.89 ±  4%      -0.2        0.70 ± 10%  perf-profile.calltrace.cycles-pp.kmem_cache_alloc.__alloc_file.alloc_empty_file.path_openat.do_filp_open
      0.81 ±  4%      -0.2        0.63 ±  7%  perf-profile.calltrace.cycles-pp.do_dentry_open.do_open.path_openat.do_filp_open.do_sys_openat2
      0.82 ±  5%      -0.2        0.67 ±  6%  perf-profile.calltrace.cycles-pp.__dentry_kill.dentry_kill.dput.do_unlinkat.__x64_sys_unlink
      0.56 ±  2%      +0.2        0.74 ±  9%  perf-profile.calltrace.cycles-pp._raw_spin_lock_irq.__hrtimer_run_queues.hrtimer_interrupt.__sysvec_apic_timer_interrupt.sysvec_apic_timer_interrupt
      0.87 ±  9%      +0.2        1.08 ±  8%  perf-profile.calltrace.cycles-pp.rcu_pending.rcu_sched_clock_irq.update_process_times.tick_sched_handle.tick_sched_timer
      1.04 ±  6%      +0.3        1.30 ±  6%  perf-profile.calltrace.cycles-pp.rcu_sched_clock_irq.update_process_times.tick_sched_handle.tick_sched_timer.__hrtimer_run_queues
      0.59 ±  7%      +0.3        0.86 ± 12%  perf-profile.calltrace.cycles-pp.ret_from_fork
      0.59 ±  7%      +0.3        0.86 ± 12%  perf-profile.calltrace.cycles-pp.kthread.ret_from_fork
      1.24 ±  9%      +0.5        1.70 ±  6%  perf-profile.calltrace.cycles-pp._raw_spin_lock.scheduler_tick.update_process_times.tick_sched_handle.tick_sched_timer
      0.00            +0.6        0.58 ±  5%  perf-profile.calltrace.cycles-pp.rcu_do_batch.rcu_core.__do_softirq.__irq_exit_rcu.sysvec_apic_timer_interrupt
      3.79 ±  5%      +0.7        4.47 ±  4%  perf-profile.calltrace.cycles-pp.scheduler_tick.update_process_times.tick_sched_handle.tick_sched_timer.__hrtimer_run_queues
      0.45 ± 50%      +0.7        1.19 ±  5%  perf-profile.calltrace.cycles-pp.rcu_core.__do_softirq.__irq_exit_rcu.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt
      1.86 ±  3%      +0.8        2.68 ±  4%  perf-profile.calltrace.cycles-pp.ct_kernel_exit_state.ct_kernel_enter.ct_idle_exit.cpuidle_enter_state.cpuidle_enter
      2.02 ±  3%      +0.8        2.87 ±  4%  perf-profile.calltrace.cycles-pp.ct_idle_exit.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call.do_idle
      2.00 ±  4%      +0.9        2.85 ±  4%  perf-profile.calltrace.cycles-pp.ct_kernel_enter.ct_idle_exit.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call
      0.00            +1.0        0.97 ± 16%  perf-profile.calltrace.cycles-pp.allocate_slab.___slab_alloc.kmem_cache_alloc_lru.xas_alloc.xas_create
      5.77 ±  4%      +1.0        6.78 ±  4%  perf-profile.calltrace.cycles-pp.update_process_times.tick_sched_handle.tick_sched_timer.__hrtimer_run_queues.hrtimer_interrupt
      5.86 ±  4%      +1.0        6.88 ±  4%  perf-profile.calltrace.cycles-pp.tick_sched_handle.tick_sched_timer.__hrtimer_run_queues.hrtimer_interrupt.__sysvec_apic_timer_interrupt
      0.00            +1.0        1.04 ± 16%  perf-profile.calltrace.cycles-pp.___slab_alloc.kmem_cache_alloc_lru.xas_alloc.xas_create.xas_store
      2.49 ±  3%      +1.3        3.79 ±  5%  perf-profile.calltrace.cycles-pp.vfs_unlink.do_unlinkat.__x64_sys_unlink.do_syscall_64.entry_SYSCALL_64_after_hwframe
      0.00            +1.3        1.31 ±  5%  perf-profile.calltrace.cycles-pp.__call_rcu_common.xas_store.__xa_erase.xa_erase.shmem_unlink
      6.69 ±  6%      +1.3        8.01 ±  7%  perf-profile.calltrace.cycles-pp.tick_sched_timer.__hrtimer_run_queues.hrtimer_interrupt.__sysvec_apic_timer_interrupt.sysvec_apic_timer_interrupt
      0.00            +1.5        1.45 ± 11%  perf-profile.calltrace.cycles-pp.kmem_cache_alloc_lru.xas_alloc.xas_create.xas_store.__xa_alloc
      0.00            +1.5        1.47 ± 12%  perf-profile.calltrace.cycles-pp.allocate_slab.___slab_alloc.kmem_cache_alloc_lru.xas_alloc.xas_expand
      9.60 ±  4%      +1.6       11.16 ±  6%  perf-profile.calltrace.cycles-pp.__hrtimer_run_queues.hrtimer_interrupt.__sysvec_apic_timer_interrupt.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt
      0.00            +1.6        1.57 ± 10%  perf-profile.calltrace.cycles-pp.xas_alloc.xas_create.xas_store.__xa_alloc.__xa_alloc_cyclic
      0.00            +1.6        1.59 ± 11%  perf-profile.calltrace.cycles-pp.___slab_alloc.kmem_cache_alloc_lru.xas_alloc.xas_expand.xas_create
      0.56 ±  2%      +1.6        2.19 ±  5%  perf-profile.calltrace.cycles-pp.setup_object.shuffle_freelist.allocate_slab.___slab_alloc.kmem_cache_alloc_lru
      0.57            +1.8        2.34 ±  6%  perf-profile.calltrace.cycles-pp.shmem_unlink.vfs_unlink.do_unlinkat.__x64_sys_unlink.do_syscall_64
      0.00            +1.8        1.81 ±  6%  perf-profile.calltrace.cycles-pp.xas_store.__xa_erase.xa_erase.shmem_unlink.vfs_unlink
     12.59 ±  3%      +1.8       14.41 ±  6%  perf-profile.calltrace.cycles-pp.hrtimer_interrupt.__sysvec_apic_timer_interrupt.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.cpuidle_enter_state
     13.38 ±  3%      +1.8       15.23 ±  5%  perf-profile.calltrace.cycles-pp.__sysvec_apic_timer_interrupt.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.cpuidle_enter_state.cpuidle_enter
      0.00            +1.9        1.85 ±  6%  perf-profile.calltrace.cycles-pp.__xa_erase.xa_erase.shmem_unlink.vfs_unlink.do_unlinkat
      0.00            +1.9        1.92 ±  6%  perf-profile.calltrace.cycles-pp.xa_erase.shmem_unlink.vfs_unlink.do_unlinkat.__x64_sys_unlink
      0.00            +2.0        1.95 ± 10%  perf-profile.calltrace.cycles-pp.kmem_cache_alloc_lru.xas_alloc.xas_expand.xas_create.xas_store
      0.00            +2.1        2.08 ± 10%  perf-profile.calltrace.cycles-pp.xas_alloc.xas_expand.xas_create.xas_store.__xa_alloc
      0.00            +2.1        2.12 ±  5%  perf-profile.calltrace.cycles-pp.radix_tree_node_ctor.setup_object.shuffle_freelist.allocate_slab.___slab_alloc
      0.00            +2.3        2.29 ±  9%  perf-profile.calltrace.cycles-pp.xas_expand.xas_create.xas_store.__xa_alloc.__xa_alloc_cyclic
      0.00            +2.3        2.33 ±  4%  perf-profile.calltrace.cycles-pp.shuffle_freelist.allocate_slab.___slab_alloc.kmem_cache_alloc_lru.xas_alloc
      9.30            +2.4       11.67 ±  5%  perf-profile.calltrace.cycles-pp.open_last_lookups.path_openat.do_filp_open.do_sys_openat2.__x64_sys_creat
     19.70            +2.4       22.13 ±  4%  perf-profile.calltrace.cycles-pp.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call
     24.00            +2.4       26.44 ±  4%  perf-profile.calltrace.cycles-pp.asm_sysvec_apic_timer_interrupt.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call.do_idle
      7.90            +2.7       10.60 ±  5%  perf-profile.calltrace.cycles-pp.lookup_open.open_last_lookups.path_openat.do_filp_open.do_sys_openat2
      4.60            +3.5        8.14 ±  5%  perf-profile.calltrace.cycles-pp.shmem_mknod.lookup_open.open_last_lookups.path_openat.do_filp_open
     50.91            +3.8       54.75 ±  3%  perf-profile.calltrace.cycles-pp.secondary_startup_64_no_verify
     50.64            +3.9       54.50 ±  4%  perf-profile.calltrace.cycles-pp.do_idle.cpu_startup_entry.start_secondary.secondary_startup_64_no_verify
     50.66            +3.9       54.52 ±  3%  perf-profile.calltrace.cycles-pp.cpu_startup_entry.start_secondary.secondary_startup_64_no_verify
     50.66            +3.9       54.52 ±  3%  perf-profile.calltrace.cycles-pp.start_secondary.secondary_startup_64_no_verify
     47.04            +3.9       50.90 ±  3%  perf-profile.calltrace.cycles-pp.cpuidle_enter.cpuidle_idle_call.do_idle.cpu_startup_entry.start_secondary
     50.20            +3.9       54.08 ±  3%  perf-profile.calltrace.cycles-pp.cpuidle_idle_call.do_idle.cpu_startup_entry.start_secondary.secondary_startup_64_no_verify
     46.16            +3.9       50.04 ±  3%  perf-profile.calltrace.cycles-pp.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call.do_idle.cpu_startup_entry
      0.00            +4.1        4.08 ±  5%  perf-profile.calltrace.cycles-pp.xas_create.xas_store.__xa_alloc.__xa_alloc_cyclic.shmem_doff_add
      0.00            +4.2        4.16 ±  5%  perf-profile.calltrace.cycles-pp.xas_store.__xa_alloc.__xa_alloc_cyclic.shmem_doff_add.shmem_mknod
      0.00            +4.3        4.31 ±  4%  perf-profile.calltrace.cycles-pp.__xa_alloc.__xa_alloc_cyclic.shmem_doff_add.shmem_mknod.lookup_open
      0.00            +4.3        4.34 ±  4%  perf-profile.calltrace.cycles-pp.__xa_alloc_cyclic.shmem_doff_add.shmem_mknod.lookup_open.open_last_lookups
      0.00            +4.5        4.49 ±  5%  perf-profile.calltrace.cycles-pp.shmem_doff_add.shmem_mknod.lookup_open.open_last_lookups.path_openat
      7.97            -1.8        6.14 ±  5%  perf-profile.children.cycles-pp.__xstat64
      6.42            -1.6        4.83 ±  6%  perf-profile.children.cycles-pp.link_path_walk
      6.69            -1.5        5.15 ±  5%  perf-profile.children.cycles-pp.__do_sys_newstat
      5.93            -1.4        4.57 ±  4%  perf-profile.children.cycles-pp.vfs_fstatat
     12.81            -1.3       11.52 ±  5%  perf-profile.children.cycles-pp.unlink
     11.68            -1.0       10.71 ±  5%  perf-profile.children.cycles-pp.__x64_sys_unlink
      4.32            -1.0        3.37 ±  5%  perf-profile.children.cycles-pp.vfs_statx
      3.72 ±  2%      -0.9        2.79 ±  5%  perf-profile.children.cycles-pp.__close
      3.56 ±  3%      -0.9        2.68 ±  4%  perf-profile.children.cycles-pp.getname_flags
      3.58            -0.8        2.80 ±  5%  perf-profile.children.cycles-pp.filename_lookup
      2.96            -0.7        2.22 ±  6%  perf-profile.children.cycles-pp.dput
      3.49            -0.7        2.74 ±  5%  perf-profile.children.cycles-pp.path_lookupat
      3.36            -0.7        2.62 ±  6%  perf-profile.children.cycles-pp.shmem_get_inode
      2.50            -0.7        1.78 ±  5%  perf-profile.children.cycles-pp.filename_parentat
      2.43 ±  2%      -0.7        1.72 ±  6%  perf-profile.children.cycles-pp.path_parentat
      2.44 ±  3%      -0.6        1.80 ±  3%  perf-profile.children.cycles-pp.strncpy_from_user
      2.62 ±  2%      -0.6        2.05 ±  6%  perf-profile.children.cycles-pp.new_inode
      2.31 ±  2%      -0.6        1.75 ±  5%  perf-profile.children.cycles-pp.syscall_exit_to_user_mode
      2.17 ±  2%      -0.5        1.65 ±  5%  perf-profile.children.cycles-pp.exit_to_user_mode_prepare
      2.08 ±  2%      -0.5        1.57 ±  7%  perf-profile.children.cycles-pp.alloc_inode
      2.17 ±  4%      -0.5        1.66 ±  5%  perf-profile.children.cycles-pp.inode_permission
      2.08 ±  2%      -0.5        1.59 ±  5%  perf-profile.children.cycles-pp.exit_to_user_mode_loop
      1.61 ±  2%      -0.5        1.14 ±  4%  perf-profile.children.cycles-pp.__might_resched
      1.79 ±  2%      -0.5        1.33 ±  8%  perf-profile.children.cycles-pp.shmem_alloc_inode
      2.07 ±  2%      -0.4        1.62 ±  7%  perf-profile.children.cycles-pp.walk_component
      2.20 ±  3%      -0.4        1.75 ±  5%  perf-profile.children.cycles-pp.kmem_cache_alloc
      1.88 ±  2%      -0.4        1.43 ±  6%  perf-profile.children.cycles-pp.task_work_run
      1.56 ±  4%      -0.4        1.11 ±  8%  perf-profile.children.cycles-pp.__entry_text_start
      1.86 ±  3%      -0.4        1.45 ±  5%  perf-profile.children.cycles-pp.d_alloc_parallel
      1.88 ±  2%      -0.4        1.48 ±  6%  perf-profile.children.cycles-pp.alloc_empty_file
      1.78 ±  2%      -0.4        1.40 ±  7%  perf-profile.children.cycles-pp.__alloc_file
      1.67 ±  2%      -0.4        1.31 ±  6%  perf-profile.children.cycles-pp.evict
      1.56            -0.3        1.23 ±  8%  perf-profile.children.cycles-pp.lookup_fast
      1.53 ±  3%      -0.3        1.22 ±  7%  perf-profile.children.cycles-pp.d_alloc
      0.96 ±  4%      -0.3        0.67 ± 10%  perf-profile.children.cycles-pp.try_to_unlazy
      1.25 ±  4%      -0.3        0.97 ±  6%  perf-profile.children.cycles-pp.do_open
      1.11 ±  2%      -0.3        0.83 ±  8%  perf-profile.children.cycles-pp.__fput
      1.16 ±  3%      -0.3        0.88 ±  6%  perf-profile.children.cycles-pp.generic_permission
      1.19 ±  3%      -0.3        0.92 ±  7%  perf-profile.children.cycles-pp.__d_lookup_rcu
      0.85 ±  3%      -0.2        0.60 ±  8%  perf-profile.children.cycles-pp.__check_object_size
      0.73 ±  2%      -0.2        0.49 ± 11%  perf-profile.children.cycles-pp.complete_walk
      0.94 ±  5%      -0.2        0.71 ±  8%  perf-profile.children.cycles-pp.__cond_resched
      1.13 ±  4%      -0.2        0.91 ±  6%  perf-profile.children.cycles-pp.__d_alloc
      1.07 ±  3%      -0.2        0.85 ±  8%  perf-profile.children.cycles-pp.shmem_evict_inode
      0.79 ±  6%      -0.2        0.57 ±  6%  perf-profile.children.cycles-pp.simple_lookup
      0.77 ±  3%      -0.2        0.55 ±  8%  perf-profile.children.cycles-pp.lockref_put_or_lock
      0.75 ±  6%      -0.2        0.53 ± 11%  perf-profile.children.cycles-pp.__legitimize_path
      0.74 ±  8%      -0.2        0.53 ±  6%  perf-profile.children.cycles-pp.__d_add
      0.80 ±  9%      -0.2        0.59 ±  2%  perf-profile.children.cycles-pp.step_into
      0.88 ±  4%      -0.2        0.68 ±  8%  perf-profile.children.cycles-pp.do_dentry_open
      1.01 ±  5%      -0.2        0.82 ±  4%  perf-profile.children.cycles-pp.dentry_kill
      0.55 ±  3%      -0.2        0.38 ± 13%  perf-profile.children.cycles-pp.inode_init_once
      0.74 ±  4%      -0.2        0.57 ±  8%  perf-profile.children.cycles-pp.cp_new_stat
      0.69 ±  3%      -0.2        0.52 ± 11%  perf-profile.children.cycles-pp.entry_SYSRETQ_unsafe_stack
      0.65 ± 12%      -0.2        0.49 ± 11%  perf-profile.children.cycles-pp.security_file_alloc
      0.87 ±  5%      -0.2        0.71 ±  6%  perf-profile.children.cycles-pp.__dentry_kill
      0.59 ±  5%      -0.1        0.45 ±  3%  perf-profile.children.cycles-pp.down_write
      0.70 ±  5%      -0.1        0.56 ± 12%  perf-profile.children.cycles-pp.scramble
      0.46 ±  5%      -0.1        0.31 ±  8%  perf-profile.children.cycles-pp.make_vfsuid
      0.66 ±  6%      -0.1        0.52 ± 10%  perf-profile.children.cycles-pp._IO_fgets
      0.49 ±  7%      -0.1        0.36 ±  8%  perf-profile.children.cycles-pp.rcu_all_qs
      0.53 ±  4%      -0.1        0.39 ±  8%  perf-profile.children.cycles-pp._IO_getline_info
      0.51 ±  6%      -0.1        0.37 ±  9%  perf-profile.children.cycles-pp.fsnotify_destroy_marks
      0.54 ±  6%      -0.1        0.41 ±  6%  perf-profile.children.cycles-pp.fsnotify
      0.46 ±  5%      -0.1        0.34 ±  9%  perf-profile.children.cycles-pp.security_inode_permission
      0.63 ±  6%      -0.1        0.51 ± 10%  perf-profile.children.cycles-pp.shmem_undo_range
      0.46 ±  7%      -0.1        0.34 ±  9%  perf-profile.children.cycles-pp.fsnotify_grab_connector
      0.65 ±  2%      -0.1        0.53 ±  6%  perf-profile.children.cycles-pp.__might_sleep
      0.45 ±  4%      -0.1        0.33 ±  9%  perf-profile.children.cycles-pp.dentry_unlink_inode
      0.46 ± 12%      -0.1        0.35 ± 16%  perf-profile.children.cycles-pp.mnt_want_write
      0.44 ±  5%      -0.1        0.34 ± 10%  perf-profile.children.cycles-pp.iput
      0.46 ±  9%      -0.1        0.36 ±  7%  perf-profile.children.cycles-pp.destroy_inode
      0.48 ±  8%      -0.1        0.38 ±  9%  perf-profile.children.cycles-pp._copy_to_user
      0.42 ±  4%      -0.1        0.31 ± 12%  perf-profile.children.cycles-pp.terminate_walk
      0.38 ± 10%      -0.1        0.27 ± 15%  perf-profile.children.cycles-pp.__legitimize_mnt
      0.32 ±  6%      -0.1        0.23 ±  9%  perf-profile.children.cycles-pp.lockref_get_not_dead
      0.43 ±  8%      -0.1        0.34 ± 15%  perf-profile.children.cycles-pp.filp_close
      0.41 ±  8%      -0.1        0.32 ±  7%  perf-profile.children.cycles-pp.__destroy_inode
      0.35 ±  2%      -0.1        0.26 ±  4%  perf-profile.children.cycles-pp.__might_fault
      0.35 ±  9%      -0.1        0.26 ± 18%  perf-profile.children.cycles-pp.__mnt_want_write
      0.39 ±  3%      -0.1        0.31 ± 14%  perf-profile.children.cycles-pp.obj_cgroup_charge
      0.31 ±  5%      -0.1        0.22 ± 21%  perf-profile.children.cycles-pp.memcg_list_lru_alloc
      0.36 ±  6%      -0.1        0.27 ±  9%  perf-profile.children.cycles-pp._atomic_dec_and_lock
      0.35 ± 10%      -0.1        0.27 ±  6%  perf-profile.children.cycles-pp.path_init
      0.32 ±  8%      -0.1        0.25 ± 10%  perf-profile.children.cycles-pp.alloc_fd
      0.22 ± 12%      -0.1        0.15 ± 21%  perf-profile.children.cycles-pp.__virt_addr_valid
      0.44 ±  9%      -0.1        0.36 ± 13%  perf-profile.children.cycles-pp.copy_user_enhanced_fast_string
      0.37 ±  8%      -0.1        0.29 ±  6%  perf-profile.children.cycles-pp.simple_acl_create
      0.23 ±  9%      -0.1        0.16 ±  7%  perf-profile.children.cycles-pp.set_cached_acl
      0.23 ± 12%      -0.1        0.16 ±  9%  perf-profile.children.cycles-pp.shmem_getattr
      0.43 ±  6%      -0.1        0.36 ±  9%  perf-profile.children.cycles-pp.inode_maybe_inc_iversion
      0.30 ±  6%      -0.1        0.23 ±  6%  perf-profile.children.cycles-pp.refill_obj_stock
      0.17 ± 12%      -0.1        0.10 ±  8%  perf-profile.children.cycles-pp.__d_rehash
      0.30 ±  9%      -0.1        0.23 ± 14%  perf-profile.children.cycles-pp.up_write
      0.23 ± 10%      -0.1        0.16 ± 11%  perf-profile.children.cycles-pp.d_delete
      0.21 ±  7%      -0.1        0.15 ± 16%  perf-profile.children.cycles-pp.xa_load
      0.26 ± 10%      -0.1        0.19 ± 13%  perf-profile.children.cycles-pp.__filemap_get_folio
      0.22 ± 13%      -0.1        0.16 ±  9%  perf-profile.children.cycles-pp.__srcu_read_unlock
      0.18 ± 13%      -0.1        0.12 ± 19%  perf-profile.children.cycles-pp.map_id_up
      0.25 ±  7%      -0.1        0.19 ± 15%  perf-profile.children.cycles-pp.apparmor_file_alloc_security
      0.29 ± 12%      -0.1        0.22 ±  6%  perf-profile.children.cycles-pp.d_lookup
      0.26 ±  7%      -0.1        0.20 ±  5%  perf-profile.children.cycles-pp.__lookup_hash
      0.25 ±  6%      -0.1        0.19 ± 19%  perf-profile.children.cycles-pp.path_put
      0.23 ± 10%      -0.1        0.17 ± 13%  perf-profile.children.cycles-pp.__d_instantiate
      0.63 ±  5%      -0.1        0.57 ±  5%  perf-profile.children.cycles-pp.dsearch
      0.17 ±  6%      -0.1        0.12 ±  8%  perf-profile.children.cycles-pp._get_random_bytes
      0.13 ± 12%      -0.1        0.08 ±  9%  perf-profile.children.cycles-pp.find_lock_entries
      0.23 ±  8%      -0.0        0.18 ±  2%  perf-profile.children.cycles-pp.lookup_dcache
      0.16 ±  8%      -0.0        0.11 ±  3%  perf-profile.children.cycles-pp.syscall_return_via_sysret
      0.19 ±  9%      -0.0        0.14 ±  5%  perf-profile.children.cycles-pp.shmem_file_read_iter
      0.14 ± 16%      -0.0        0.09 ±  4%  perf-profile.children.cycles-pp.generic_fillattr
      0.21 ±  9%      -0.0        0.17 ±  8%  perf-profile.children.cycles-pp.chdir
      0.19 ±  6%      -0.0        0.15 ± 18%  perf-profile.children.cycles-pp.security_file_open
      0.10 ± 14%      -0.0        0.06 ± 12%  perf-profile.children.cycles-pp.crng_make_state
      0.24 ±  9%      -0.0        0.20 ±  7%  perf-profile.children.cycles-pp.may_delete
      0.18 ±  8%      -0.0        0.14 ± 16%  perf-profile.children.cycles-pp.inode_wait_for_writeback
      0.12 ±  9%      -0.0        0.08 ± 14%  perf-profile.children.cycles-pp.entry_SYSCALL_64_safe_stack
      0.19 ± 11%      -0.0        0.16 ± 13%  perf-profile.children.cycles-pp.__fsnotify_parent
      0.17 ±  5%      -0.0        0.13 ± 10%  perf-profile.children.cycles-pp.ihold
      0.14 ±  3%      -0.0        0.11 ±  7%  perf-profile.children.cycles-pp.chacha_block_generic
      0.13 ± 14%      -0.0        0.09 ± 12%  perf-profile.children.cycles-pp.tsc_verify_tsc_adjust
      0.13 ±  3%      -0.0        0.09 ± 10%  perf-profile.children.cycles-pp.chacha_permute
      0.09 ±  4%      -0.0        0.06 ± 12%  perf-profile.children.cycles-pp.putname
      0.09 ± 16%      -0.0        0.06 ± 14%  perf-profile.children.cycles-pp.crng_fast_key_erasure
      0.15 ± 11%      -0.0        0.12 ±  6%  perf-profile.children.cycles-pp.mntput_no_expire
      0.08 ± 13%      -0.0        0.05 ± 52%  perf-profile.children.cycles-pp.getcwd
      0.11 ± 11%      -0.0        0.08 ± 12%  perf-profile.children.cycles-pp.security_path_mknod
      0.18 ±  9%      -0.0        0.15 ±  4%  perf-profile.children.cycles-pp.___d_drop
      0.09 ± 10%      -0.0        0.07 ±  5%  perf-profile.children.cycles-pp.security_file_free
      0.10 ±  4%      -0.0        0.08 ± 13%  perf-profile.children.cycles-pp.security_path_unlink
      0.16 ±  4%      -0.0        0.14 ±  7%  perf-profile.children.cycles-pp._find_next_and_bit
      0.09 ±  8%      -0.0        0.07 ± 11%  perf-profile.children.cycles-pp.drop_nlink
      0.08 ±  7%      -0.0        0.06 ± 12%  perf-profile.children.cycles-pp.apparmor_path_unlink
      0.13 ±  8%      +0.0        0.15 ±  6%  perf-profile.children.cycles-pp.idle_cpu
      0.08 ± 13%      +0.0        0.10 ±  9%  perf-profile.children.cycles-pp.map__process_kallsym_symbol
      0.04 ± 51%      +0.0        0.07 ± 11%  perf-profile.children.cycles-pp.do_read_fault
      0.03 ± 82%      +0.0        0.07 ±  7%  perf-profile.children.cycles-pp.write_cache
      0.05 ± 50%      +0.0        0.08 ± 13%  perf-profile.children.cycles-pp.update_wall_time
      0.05 ± 50%      +0.0        0.08 ± 13%  perf-profile.children.cycles-pp.timekeeping_advance
      0.18 ± 11%      +0.0        0.22 ±  7%  perf-profile.children.cycles-pp.update_curr
      0.18 ±  6%      +0.0        0.22 ±  8%  perf-profile.children.cycles-pp.force_qs_rnp
      0.16 ± 15%      +0.0        0.20 ±  7%  perf-profile.children.cycles-pp._raw_spin_unlock_irqrestore
      0.13 ± 17%      +0.0        0.18 ±  9%  perf-profile.children.cycles-pp.task_tick_fair
      0.03 ± 82%      +0.1        0.09 ±  9%  perf-profile.children.cycles-pp.rcu_segcblist_pend_cbs
      0.00            +0.1        0.06 ± 12%  perf-profile.children.cycles-pp.rmqueue_bulk
      0.00            +0.1        0.08 ± 13%  perf-profile.children.cycles-pp.free_pcppages_bulk
      0.67 ±  7%      +0.1        0.75 ±  4%  perf-profile.children.cycles-pp.sched_clock_cpu
      0.86 ±  5%      +0.1        0.94        perf-profile.children.cycles-pp.native_sched_clock
      0.06 ± 12%      +0.1        0.14 ± 16%  perf-profile.children.cycles-pp.__unfreeze_partials
      0.00            +0.1        0.08 ± 19%  perf-profile.children.cycles-pp.rmqueue
      0.06 ± 15%      +0.1        0.15 ± 11%  perf-profile.children.cycles-pp.free_unref_page
      0.00            +0.1        0.09 ± 14%  perf-profile.children.cycles-pp.inc_slabs_node
      0.00            +0.1        0.09 ± 22%  perf-profile.children.cycles-pp.xas_clear_mark
      0.01 ±200%      +0.1        0.11 ± 17%  perf-profile.children.cycles-pp.get_page_from_freelist
      0.82 ±  3%      +0.1        0.92 ±  5%  perf-profile.children.cycles-pp._raw_spin_lock_irq
      0.02 ±123%      +0.1        0.13 ± 17%  perf-profile.children.cycles-pp.__alloc_pages
      0.40 ±  5%      +0.2        0.56 ±  7%  perf-profile.children.cycles-pp.note_gp_changes
      0.94 ±  6%      +0.2        1.10 ±  6%  perf-profile.children.cycles-pp._raw_spin_lock_irqsave
      0.15 ± 11%      +0.2        0.31 ±  5%  perf-profile.children.cycles-pp.rcu_nocb_try_bypass
      0.28 ± 11%      +0.2        0.48 ± 12%  perf-profile.children.cycles-pp.rcu_segcblist_enqueue
      0.90 ±  9%      +0.2        1.10 ±  7%  perf-profile.children.cycles-pp.rcu_pending
      0.24 ± 21%      +0.2        0.46 ± 22%  perf-profile.children.cycles-pp.smpboot_thread_fn
      0.01 ±200%      +0.2        0.26 ± 36%  perf-profile.children.cycles-pp.run_ksoftirqd
      1.07 ±  6%      +0.3        1.32 ±  6%  perf-profile.children.cycles-pp.rcu_sched_clock_irq
      0.60 ±  7%      +0.3        0.87 ± 12%  perf-profile.children.cycles-pp.ret_from_fork
      0.59 ±  7%      +0.3        0.86 ± 12%  perf-profile.children.cycles-pp.kthread
      0.00            +0.7        0.66 ±  5%  perf-profile.children.cycles-pp.radix_tree_node_rcu_free
      4.01 ±  6%      +0.7        4.74 ±  4%  perf-profile.children.cycles-pp.scheduler_tick
      1.93 ±  3%      +0.8        2.77 ±  4%  perf-profile.children.cycles-pp.ct_kernel_exit_state
      2.04 ±  4%      +0.8        2.89 ±  4%  perf-profile.children.cycles-pp.ct_idle_exit
      2.01 ±  4%      +0.8        2.86 ±  4%  perf-profile.children.cycles-pp.ct_kernel_enter
      5.98 ±  4%      +1.1        7.05 ±  3%  perf-profile.children.cycles-pp.update_process_times
      6.05 ±  4%      +1.1        7.13 ±  3%  perf-profile.children.cycles-pp.tick_sched_handle
      1.04 ±  6%      +1.2        2.21 ±  3%  perf-profile.children.cycles-pp.__call_rcu_common
      2.54 ±  3%      +1.3        3.83 ±  5%  perf-profile.children.cycles-pp.vfs_unlink
      6.92 ±  6%      +1.4        8.29 ±  7%  perf-profile.children.cycles-pp.tick_sched_timer
      9.90 ±  4%      +1.6       11.52 ±  6%  perf-profile.children.cycles-pp.__hrtimer_run_queues
      1.09 ±  6%      +1.7        2.79 ±  2%  perf-profile.children.cycles-pp.__slab_free
      0.58 ±  2%      +1.8        2.35 ±  6%  perf-profile.children.cycles-pp.shmem_unlink
      0.00            +1.9        1.86 ±  7%  perf-profile.children.cycles-pp.__xa_erase
     12.88 ±  3%      +1.9       14.77 ±  6%  perf-profile.children.cycles-pp.hrtimer_interrupt
     13.66 ±  3%      +1.9       15.57 ±  5%  perf-profile.children.cycles-pp.__sysvec_apic_timer_interrupt
      0.00            +1.9        1.93 ±  6%  perf-profile.children.cycles-pp.xa_erase
      0.57 ±  3%      +2.0        2.62 ±  5%  perf-profile.children.cycles-pp.setup_object
      0.00            +2.1        2.14 ±  6%  perf-profile.children.cycles-pp.radix_tree_node_ctor
      0.61 ±  3%      +2.2        2.77 ±  5%  perf-profile.children.cycles-pp.shuffle_freelist
      0.65 ±  3%      +2.3        2.91 ±  5%  perf-profile.children.cycles-pp.allocate_slab
      0.00            +2.3        2.30 ±  9%  perf-profile.children.cycles-pp.xas_expand
      9.38            +2.4       11.73 ±  5%  perf-profile.children.cycles-pp.open_last_lookups
      7.58 ±  5%      +2.4        9.98 ±  2%  perf-profile.children.cycles-pp.__irq_exit_rcu
      0.81 ±  5%      +2.4        3.23 ±  4%  perf-profile.children.cycles-pp.___slab_alloc
      7.16 ±  6%      +2.6        9.76 ±  2%  perf-profile.children.cycles-pp.__do_softirq
      3.16 ±  3%      +2.6        5.78 ±  2%  perf-profile.children.cycles-pp.rcu_do_batch
      7.96            +2.7       10.65 ±  5%  perf-profile.children.cycles-pp.lookup_open
      3.62 ±  3%      +2.8        6.41        perf-profile.children.cycles-pp.rcu_core
      2.81            +2.8        5.61 ±  5%  perf-profile.children.cycles-pp.kmem_cache_alloc_lru
      4.64            +3.5        8.16 ±  5%  perf-profile.children.cycles-pp.shmem_mknod
      0.00            +3.7        3.68 ±  5%  perf-profile.children.cycles-pp.xas_alloc
     50.91            +3.8       54.75 ±  3%  perf-profile.children.cycles-pp.secondary_startup_64_no_verify
     50.91            +3.8       54.75 ±  3%  perf-profile.children.cycles-pp.cpu_startup_entry
     50.91            +3.8       54.75 ±  3%  perf-profile.children.cycles-pp.do_idle
     50.66            +3.9       54.52 ±  3%  perf-profile.children.cycles-pp.start_secondary
     47.29            +3.9       51.14 ±  3%  perf-profile.children.cycles-pp.cpuidle_enter
     47.24            +3.9       51.10 ±  3%  perf-profile.children.cycles-pp.cpuidle_enter_state
     50.49            +3.9       54.37 ±  3%  perf-profile.children.cycles-pp.cpuidle_idle_call
      0.00            +4.1        4.10 ±  5%  perf-profile.children.cycles-pp.xas_create
      0.00            +4.3        4.32 ±  5%  perf-profile.children.cycles-pp.__xa_alloc
      0.00            +4.4        4.35 ±  4%  perf-profile.children.cycles-pp.__xa_alloc_cyclic
     23.08            +4.4       27.48 ±  3%  perf-profile.children.cycles-pp.sysvec_apic_timer_interrupt
     25.36            +4.4       29.77 ±  3%  perf-profile.children.cycles-pp.asm_sysvec_apic_timer_interrupt
      0.00            +4.5        4.51 ±  5%  perf-profile.children.cycles-pp.shmem_doff_add
      0.00            +6.0        6.01 ±  5%  perf-profile.children.cycles-pp.xas_store
      1.77            -0.6        1.22 ±  8%  perf-profile.self.cycles-pp.link_path_walk
      1.34 ±  2%      -0.4        0.90 ±  6%  perf-profile.self.cycles-pp.__might_resched
      1.29 ±  4%      -0.4        0.92 ±  3%  perf-profile.self.cycles-pp.strncpy_from_user
      0.97 ±  3%      -0.3        0.69 ±  6%  perf-profile.self.cycles-pp.__d_lookup_rcu
      0.62 ±  3%      -0.2        0.41 ± 11%  perf-profile.self.cycles-pp.lockref_put_or_lock
      0.68 ±  8%      -0.2        0.47 ±  3%  perf-profile.self.cycles-pp.step_into
      0.84 ±  5%      -0.2        0.64 ±  9%  perf-profile.self.cycles-pp.mod_objcg_state
      0.75 ±  4%      -0.2        0.56 ±  8%  perf-profile.self.cycles-pp.kmem_cache_alloc
      0.78 ±  4%      -0.2        0.58 ±  9%  perf-profile.self.cycles-pp.generic_permission
      0.40 ±  5%      -0.2        0.22 ± 14%  perf-profile.self.cycles-pp.inode_init_once
      0.79 ±  5%      -0.2        0.61 ±  7%  perf-profile.self.cycles-pp.inode_permission
      0.68 ±  2%      -0.2        0.51 ± 10%  perf-profile.self.cycles-pp.entry_SYSRETQ_unsafe_stack
      0.60 ±  5%      -0.1        0.45 ± 12%  perf-profile.self.cycles-pp.scramble
      0.49 ±  3%      -0.1        0.35 ±  6%  perf-profile.self.cycles-pp._IO_getline_info
      0.50 ±  6%      -0.1        0.36 ± 15%  perf-profile.self.cycles-pp.walk_component
      0.59 ±  5%      -0.1        0.45 ± 11%  perf-profile.self.cycles-pp._IO_fgets
      0.40 ±  5%      -0.1        0.27 ±  7%  perf-profile.self.cycles-pp.creat64
      0.35 ±  9%      -0.1        0.22 ±  4%  perf-profile.self.cycles-pp.__close
      0.49 ±  6%      -0.1        0.36 ±  5%  perf-profile.self.cycles-pp.fsnotify
      0.40 ±  5%      -0.1        0.27 ± 10%  perf-profile.self.cycles-pp.__xstat64
      0.34 ± 10%      -0.1        0.22 ±  6%  perf-profile.self.cycles-pp.make_vfsuid
      0.34 ±  9%      -0.1        0.23 ± 16%  perf-profile.self.cycles-pp.unlink
      0.46 ±  5%      -0.1        0.35 ± 10%  perf-profile.self.cycles-pp.__cond_resched
      0.32 ±  8%      -0.1        0.21 ± 12%  perf-profile.self.cycles-pp.do_dentry_open
      0.32 ±  5%      -0.1        0.21 ±  9%  perf-profile.self.cycles-pp.__check_object_size
      0.51 ±  3%      -0.1        0.40 ±  5%  perf-profile.self.cycles-pp.__might_sleep
      0.33 ± 11%      -0.1        0.22 ± 19%  perf-profile.self.cycles-pp.__legitimize_mnt
      0.37 ±  2%      -0.1        0.28 ± 10%  perf-profile.self.cycles-pp.security_inode_permission
      0.30 ± 11%      -0.1        0.20 ±  7%  perf-profile.self.cycles-pp.path_init
      0.39 ±  7%      -0.1        0.30 ± 10%  perf-profile.self.cycles-pp.inode_maybe_inc_iversion
      0.30 ±  5%      -0.1        0.21 ±  5%  perf-profile.self.cycles-pp.rcu_all_qs
      0.29 ±  3%      -0.1        0.20 ± 14%  perf-profile.self.cycles-pp.entry_SYSCALL_64_after_hwframe
      0.31 ± 10%      -0.1        0.23 ± 16%  perf-profile.self.cycles-pp.__mnt_want_write
      0.31 ±  8%      -0.1        0.22 ±  7%  perf-profile.self.cycles-pp.dput
      0.41 ± 10%      -0.1        0.33 ± 12%  perf-profile.self.cycles-pp.copy_user_enhanced_fast_string
      0.27 ± 11%      -0.1        0.19 ±  6%  perf-profile.self.cycles-pp.d_alloc_parallel
      0.26 ±  5%      -0.1        0.18 ±  9%  perf-profile.self.cycles-pp.lockref_get_not_dead
      0.21 ±  6%      -0.1        0.14 ±  6%  perf-profile.self.cycles-pp.lookup_open
      0.21 ±  9%      -0.1        0.14 ±  9%  perf-profile.self.cycles-pp.set_cached_acl
      0.28 ±  8%      -0.1        0.21 ±  5%  perf-profile.self.cycles-pp.refill_obj_stock
      0.26 ±  8%      -0.1        0.19 ± 14%  perf-profile.self.cycles-pp.up_write
      0.20 ± 14%      -0.1        0.13 ± 14%  perf-profile.self.cycles-pp.shmem_get_inode
      0.15 ± 14%      -0.1        0.09 ± 11%  perf-profile.self.cycles-pp.__d_rehash
      0.18 ± 10%      -0.1        0.11 ± 17%  perf-profile.self.cycles-pp.__virt_addr_valid
      0.15 ± 13%      -0.1        0.08 ± 14%  perf-profile.self.cycles-pp.map_id_up
      0.17 ± 10%      -0.1        0.11 ± 21%  perf-profile.self.cycles-pp.do_syscall_64
      0.22 ±  8%      -0.1        0.16 ± 10%  perf-profile.self.cycles-pp._atomic_dec_and_lock
      0.23 ± 12%      -0.1        0.17 ±  5%  perf-profile.self.cycles-pp._IO_default_xsputn
      0.16 ± 10%      -0.1        0.10 ± 16%  perf-profile.self.cycles-pp.shmem_evict_inode
      0.16 ±  8%      -0.1        0.11 ± 12%  perf-profile.self.cycles-pp.__filemap_get_folio
      0.19 ± 16%      -0.1        0.14 ± 13%  perf-profile.self.cycles-pp.__srcu_read_unlock
      0.16 ± 13%      -0.1        0.11 ±  6%  perf-profile.self.cycles-pp.vfs_unlink
      0.16 ±  8%      -0.1        0.11 ±  4%  perf-profile.self.cycles-pp.syscall_return_via_sysret
      0.23 ± 10%      -0.1        0.18 ± 12%  perf-profile.self.cycles-pp.getname_flags
      0.17 ±  6%      -0.0        0.12 ± 19%  perf-profile.self.cycles-pp.apparmor_file_open
      0.17 ± 13%      -0.0        0.12 ± 10%  perf-profile.self.cycles-pp.__srcu_read_lock
      0.12 ± 13%      -0.0        0.07 ± 25%  perf-profile.self.cycles-pp.fput
      0.08 ±  7%      -0.0        0.03 ± 82%  perf-profile.self.cycles-pp.putname
      0.20 ±  7%      -0.0        0.16 ±  3%  perf-profile.self.cycles-pp.__fput
      0.15 ± 12%      -0.0        0.11 ± 22%  perf-profile.self.cycles-pp.apparmor_file_alloc_security
      0.18 ±  9%      -0.0        0.14 ± 11%  perf-profile.self.cycles-pp.__alloc_file
      0.17 ±  8%      -0.0        0.13 ± 16%  perf-profile.self.cycles-pp.lockref_get
      0.08 ±  9%      -0.0        0.04 ± 82%  perf-profile.self.cycles-pp.may_delete
      0.13 ± 12%      -0.0        0.09 ± 15%  perf-profile.self.cycles-pp.cp_new_stat
      0.10 ± 13%      -0.0        0.06 ± 15%  perf-profile.self.cycles-pp.do_sys_openat2
      0.12 ±  9%      -0.0        0.08 ± 14%  perf-profile.self.cycles-pp.entry_SYSCALL_64_safe_stack
      0.12 ± 15%      -0.0        0.08 ± 19%  perf-profile.self.cycles-pp.open_last_lookups
      0.17 ±  8%      -0.0        0.13 ± 10%  perf-profile.self.cycles-pp.__fsnotify_parent
      0.16 ±  9%      -0.0        0.12 ±  8%  perf-profile.self.cycles-pp.___d_drop
      0.14 ±  7%      -0.0        0.10 ± 17%  perf-profile.self.cycles-pp.alloc_fd
      0.12 ± 17%      -0.0        0.08 ±  9%  perf-profile.self.cycles-pp.try_to_unlazy
      0.10 ±  5%      -0.0        0.06 ± 52%  perf-profile.self.cycles-pp.syscall_exit_to_user_mode
      0.08 ± 14%      -0.0        0.05 ± 52%  perf-profile.self.cycles-pp.exit_to_user_mode_prepare
      0.10 ± 15%      -0.0        0.07 ±  9%  perf-profile.self.cycles-pp.__d_add
      0.15 ±  7%      -0.0        0.12 ± 11%  perf-profile.self.cycles-pp.security_inode_init_security
      0.13 ±  3%      -0.0        0.09 ± 10%  perf-profile.self.cycles-pp.chacha_permute
      0.12 ±  8%      -0.0        0.09 ± 10%  perf-profile.self.cycles-pp.__d_lookup_unhash
      0.09 ±  8%      -0.0        0.06 ± 10%  perf-profile.self.cycles-pp.generic_fillattr
      0.09 ± 14%      -0.0        0.06 ± 18%  perf-profile.self.cycles-pp.shmem_mknod
      0.11 ± 13%      -0.0        0.08 ±  9%  perf-profile.self.cycles-pp.exit_to_user_mode_loop
      0.11 ± 15%      -0.0        0.08 ±  7%  perf-profile.self.cycles-pp.tsc_verify_tsc_adjust
      0.10 ±  9%      -0.0        0.07 ± 16%  perf-profile.self.cycles-pp.get_obj_cgroup_from_current
      0.13 ± 12%      -0.0        0.10 ± 12%  perf-profile.self.cycles-pp.mntput_no_expire
      0.09 ±  4%      -0.0        0.06 ± 12%  perf-profile.self.cycles-pp.drop_nlink
      0.15 ±  3%      -0.0        0.13 ±  5%  perf-profile.self.cycles-pp._find_next_and_bit
      0.08 ± 13%      -0.0        0.05 ± 14%  perf-profile.self.cycles-pp.path_openat
      0.09 ±  8%      -0.0        0.07 ± 14%  perf-profile.self.cycles-pp.__d_instantiate
      0.07 ± 16%      -0.0        0.05 ±  9%  perf-profile.self.cycles-pp.__might_fault
      0.08 ±  9%      -0.0        0.06 ±  6%  perf-profile.self.cycles-pp.do_open
      0.06            +0.0        0.08 ± 10%  perf-profile.self.cycles-pp.hrtimer_next_event_without
      0.10 ± 14%      +0.0        0.13 ± 19%  perf-profile.self.cycles-pp.___slab_alloc
      0.01 ±200%      +0.1        0.07 ± 11%  perf-profile.self.cycles-pp.rcu_segcblist_pend_cbs
      0.25 ±  4%      +0.1        0.32 ±  8%  perf-profile.self.cycles-pp.slab_pre_alloc_hook
      0.83 ±  5%      +0.1        0.90        perf-profile.self.cycles-pp.native_sched_clock
      0.00            +0.1        0.07 ± 18%  perf-profile.self.cycles-pp.xas_clear_mark
      0.00            +0.1        0.08 ± 14%  perf-profile.self.cycles-pp.setup_object
      0.00            +0.1        0.09 ± 13%  perf-profile.self.cycles-pp.inc_slabs_node
      0.24 ± 10%      +0.1        0.32 ± 10%  perf-profile.self.cycles-pp.note_gp_changes
      0.08 ± 17%      +0.1        0.18 ±  8%  perf-profile.self.cycles-pp.rcu_do_batch
      0.79 ±  2%      +0.1        0.91 ±  5%  perf-profile.self.cycles-pp._raw_spin_lock_irq
      0.11 ± 14%      +0.1        0.23 ±  8%  perf-profile.self.cycles-pp.rcu_nocb_try_bypass
      0.00            +0.1        0.13 ±  9%  perf-profile.self.cycles-pp.shuffle_freelist
      0.90 ±  6%      +0.2        1.05 ±  6%  perf-profile.self.cycles-pp._raw_spin_lock_irqsave
      0.26 ± 12%      +0.2        0.44 ± 14%  perf-profile.self.cycles-pp.rcu_segcblist_enqueue
      0.00            +0.2        0.18 ±  5%  perf-profile.self.cycles-pp.xas_create
      0.58 ± 14%      +0.2        0.77 ± 11%  perf-profile.self.cycles-pp.rcu_pending
      0.00            +0.2        0.19 ± 15%  perf-profile.self.cycles-pp.xas_alloc
      0.00            +0.2        0.19 ± 15%  perf-profile.self.cycles-pp.xas_expand
      0.00            +0.3        0.35 ±  5%  perf-profile.self.cycles-pp.xas_store
      0.21 ±  5%      +0.5        0.67 ±  9%  perf-profile.self.cycles-pp.kmem_cache_alloc_lru
      0.56 ±  8%      +0.6        1.16 ±  4%  perf-profile.self.cycles-pp.__call_rcu_common
      0.00            +0.6        0.64 ±  6%  perf-profile.self.cycles-pp.radix_tree_node_rcu_free
      1.92 ±  3%      +0.8        2.76 ±  4%  perf-profile.self.cycles-pp.ct_kernel_exit_state
      1.08 ±  6%      +1.6        2.71 ±  3%  perf-profile.self.cycles-pp.__slab_free
      0.00            +1.8        1.80 ±  6%  perf-profile.self.cycles-pp.radix_tree_node_ctor



Disclaimer:
Results have been estimated based on internal Intel analysis and are provided
for informational purposes only. Any difference in system hardware or software
design or configuration may affect actual performance.
diff mbox series

Patch

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 103d1000a5a2..682ef885aa89 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -26,6 +26,8 @@  struct shmem_inode_info {
 	atomic_t		stop_eviction;	/* hold when working on inode */
 	struct timespec64	i_crtime;	/* file creation time */
 	unsigned int		fsflags;	/* flags for FS_IOC_[SG]ETFLAGS */
+	struct xarray		doff_map;	/* dir offset to entry mapping */
+	u32			next_doff;
 	struct inode		vfs_inode;
 };
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 448f393d8ab2..ba4176499e5c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -40,6 +40,8 @@ 
 #include <linux/fs_parser.h>
 #include <linux/swapfile.h>
 #include <linux/iversion.h>
+#include <linux/xarray.h>
+
 #include "swap.h"
 
 static struct vfsmount *shm_mnt;
@@ -234,6 +236,7 @@  static const struct super_operations shmem_ops;
 const struct address_space_operations shmem_aops;
 static const struct file_operations shmem_file_operations;
 static const struct inode_operations shmem_inode_operations;
+static const struct file_operations shmem_dir_operations;
 static const struct inode_operations shmem_dir_inode_operations;
 static const struct inode_operations shmem_special_inode_operations;
 static const struct vm_operations_struct shmem_vm_ops;
@@ -2397,7 +2400,9 @@  static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block
 			/* Some things misbehave if size == 0 on a directory */
 			inode->i_size = 2 * BOGO_DIRENT_SIZE;
 			inode->i_op = &shmem_dir_inode_operations;
-			inode->i_fop = &simple_dir_operations;
+			inode->i_fop = &shmem_dir_operations;
+			xa_init_flags(&info->doff_map, XA_FLAGS_ALLOC1);
+			info->next_doff = 0;
 			break;
 		case S_IFLNK:
 			/*
@@ -2917,6 +2922,71 @@  static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
+static struct xarray *shmem_doff_map(struct inode *dir)
+{
+	return &SHMEM_I(dir)->doff_map;
+}
+
+static int shmem_doff_add(struct inode *dir, struct dentry *dentry)
+{
+	struct shmem_inode_info *info = SHMEM_I(dir);
+	struct xa_limit limit = XA_LIMIT(2, U32_MAX);
+	u32 offset;
+	int ret;
+
+	if (dentry->d_fsdata)
+		return -EBUSY;
+
+	offset = 0;
+	ret = xa_alloc_cyclic(shmem_doff_map(dir), &offset, dentry, limit,
+			      &info->next_doff, GFP_KERNEL);
+	if (ret < 0)
+		return ret;
+
+	dentry->d_fsdata = (void *)(unsigned long)offset;
+	return 0;
+}
+
+static struct dentry *shmem_doff_find_after(struct dentry *dir,
+					    unsigned long *offset)
+{
+	struct xarray *xa = shmem_doff_map(d_inode(dir));
+	struct dentry *d, *found = NULL;
+
+	spin_lock(&dir->d_lock);
+	d = xa_find_after(xa, offset, ULONG_MAX, XA_PRESENT);
+	if (d) {
+		spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
+		if (simple_positive(d))
+			found = dget_dlock(d);
+		spin_unlock(&d->d_lock);
+	}
+	spin_unlock(&dir->d_lock);
+	return found;
+}
+
+static void shmem_doff_remove(struct inode *dir, struct dentry *dentry)
+{
+	u32 offset = (u32)(unsigned long)dentry->d_fsdata;
+
+	if (!offset)
+		return;
+
+	xa_erase(shmem_doff_map(dir), offset);
+	dentry->d_fsdata = NULL;
+}
+
+/*
+ * During fs teardown (eg. umount), a directory's doff_map might still
+ * contain entries. xa_destroy() cleans out anything that remains.
+ */
+static void shmem_doff_map_destroy(struct inode *inode)
+{
+	struct xarray *xa = shmem_doff_map(inode);
+
+	xa_destroy(xa);
+}
+
 /*
  * File creation. Allocate an inode, and we're done..
  */
@@ -2938,6 +3008,10 @@  shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
 		if (error && error != -EOPNOTSUPP)
 			goto out_iput;
 
+		error = shmem_doff_add(dir, dentry);
+		if (error)
+			goto out_iput;
+
 		error = 0;
 		dir->i_size += BOGO_DIRENT_SIZE;
 		dir->i_ctime = dir->i_mtime = current_time(dir);
@@ -3015,6 +3089,10 @@  static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
 			goto out;
 	}
 
+	ret = shmem_doff_add(dir, dentry);
+	if (ret)
+		goto out;
+
 	dir->i_size += BOGO_DIRENT_SIZE;
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
 	inode_inc_iversion(dir);
@@ -3033,6 +3111,8 @@  static int shmem_unlink(struct inode *dir, struct dentry *dentry)
 	if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
 		shmem_free_inode(inode->i_sb);
 
+	shmem_doff_remove(dir, dentry);
+
 	dir->i_size -= BOGO_DIRENT_SIZE;
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
 	inode_inc_iversion(dir);
@@ -3091,24 +3171,37 @@  static int shmem_rename2(struct mnt_idmap *idmap,
 {
 	struct inode *inode = d_inode(old_dentry);
 	int they_are_dirs = S_ISDIR(inode->i_mode);
+	int error;
 
 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
 		return -EINVAL;
 
-	if (flags & RENAME_EXCHANGE)
+	if (flags & RENAME_EXCHANGE) {
+		shmem_doff_remove(old_dir, old_dentry);
+		shmem_doff_remove(new_dir, new_dentry);
+		error = shmem_doff_add(new_dir, old_dentry);
+		if (error)
+			return error;
+		error = shmem_doff_add(old_dir, new_dentry);
+		if (error)
+			return error;
 		return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
+	}
 
 	if (!simple_empty(new_dentry))
 		return -ENOTEMPTY;
 
 	if (flags & RENAME_WHITEOUT) {
-		int error;
-
 		error = shmem_whiteout(idmap, old_dir, old_dentry);
 		if (error)
 			return error;
 	}
 
+	shmem_doff_remove(old_dir, old_dentry);
+	error = shmem_doff_add(new_dir, old_dentry);
+	if (error)
+		return error;
+
 	if (d_really_is_positive(new_dentry)) {
 		(void) shmem_unlink(new_dir, new_dentry);
 		if (they_are_dirs) {
@@ -3149,26 +3242,22 @@  static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
 
 	error = security_inode_init_security(inode, dir, &dentry->d_name,
 					     shmem_initxattrs, NULL);
-	if (error && error != -EOPNOTSUPP) {
-		iput(inode);
-		return error;
-	}
+	if (error && error != -EOPNOTSUPP)
+		goto out_iput;
 
 	inode->i_size = len-1;
 	if (len <= SHORT_SYMLINK_LEN) {
 		inode->i_link = kmemdup(symname, len, GFP_KERNEL);
 		if (!inode->i_link) {
-			iput(inode);
-			return -ENOMEM;
+			error = -ENOMEM;
+			goto out_iput;
 		}
 		inode->i_op = &shmem_short_symlink_operations;
 	} else {
 		inode_nohighmem(inode);
 		error = shmem_get_folio(inode, 0, &folio, SGP_WRITE);
-		if (error) {
-			iput(inode);
-			return error;
-		}
+		if (error)
+			goto out_iput;
 		inode->i_mapping->a_ops = &shmem_aops;
 		inode->i_op = &shmem_symlink_inode_operations;
 		memcpy(folio_address(folio), symname, len);
@@ -3177,12 +3266,20 @@  static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
 		folio_unlock(folio);
 		folio_put(folio);
 	}
+
+	error = shmem_doff_add(dir, dentry);
+	if (error)
+		goto out_iput;
+
 	dir->i_size += BOGO_DIRENT_SIZE;
 	dir->i_ctime = dir->i_mtime = current_time(dir);
 	inode_inc_iversion(dir);
 	d_instantiate(dentry, inode);
 	dget(dentry);
 	return 0;
+out_iput:
+	iput(inode);
+	return error;
 }
 
 static void shmem_put_link(void *arg)
@@ -3224,6 +3321,77 @@  static const char *shmem_get_link(struct dentry *dentry,
 	return folio_address(folio);
 }
 
+static loff_t shmem_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+	switch (whence) {
+	case SEEK_CUR:
+		offset += file->f_pos;
+		fallthrough;
+	case SEEK_SET:
+		if (offset >= 0)
+			break;
+		fallthrough;
+	default:
+		return -EINVAL;
+	}
+	return vfs_setpos(file, offset, U32_MAX);
+}
+
+static bool shmem_dir_emit(struct dir_context *ctx, struct dentry *dentry)
+{
+	struct inode *inode = d_inode(dentry);
+
+	return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len,
+			  (loff_t)dentry->d_fsdata, inode->i_ino,
+			  fs_umode_to_dtype(inode->i_mode));
+}
+
+/**
+ * shmem_readdir - Emit entries starting at offset @ctx->pos
+ * @file: an open directory to iterate over
+ * @ctx: directory iteration context
+ *
+ * Caller must hold @file's i_rwsem to prevent insertion or removal of
+ * entries during this call.
+ *
+ * On entry, @ctx->pos contains an offset that represents the first entry
+ * to be read from the directory.
+ *
+ * The operation continues until there are no more entries to read, or
+ * until the ctx->actor indicates there is no more space in the caller's
+ * output buffer.
+ *
+ * On return, @ctx->pos contains an offset that will read the next entry
+ * in this directory when shmem_readdir() is called again with @ctx.
+ *
+ * Return values:
+ *   %0 - Complete
+ */
+static int shmem_readdir(struct file *file, struct dir_context *ctx)
+{
+	struct dentry *dentry, *dir = file->f_path.dentry;
+	unsigned long offset;
+
+	lockdep_assert_held(&d_inode(dir)->i_rwsem);
+
+	if (!dir_emit_dots(file, ctx))
+		goto out;
+	for (offset = ctx->pos - 1; offset < ULONG_MAX - 1;) {
+		dentry = shmem_doff_find_after(dir, &offset);
+		if (!dentry)
+			break;
+		if (!shmem_dir_emit(ctx, dentry)) {
+			dput(dentry);
+			break;
+		}
+		ctx->pos = offset + 1;
+		dput(dentry);
+	}
+
+out:
+	return 0;
+}
+
 #ifdef CONFIG_TMPFS_XATTR
 
 static int shmem_fileattr_get(struct dentry *dentry, struct fileattr *fa)
@@ -3742,6 +3910,12 @@  static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 	return 0;
 }
 
+#else /* CONFIG_TMPFS */
+
+static inline void shmem_doff_map_destroy(struct inode *dir)
+{
+}
+
 #endif /* CONFIG_TMPFS */
 
 static void shmem_put_super(struct super_block *sb)
@@ -3888,6 +4062,8 @@  static void shmem_destroy_inode(struct inode *inode)
 {
 	if (S_ISREG(inode->i_mode))
 		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
+	if (S_ISDIR(inode->i_mode))
+		shmem_doff_map_destroy(inode);
 }
 
 static void shmem_init_inode(void *foo)
@@ -3955,6 +4131,15 @@  static const struct inode_operations shmem_inode_operations = {
 #endif
 };
 
+static const struct file_operations shmem_dir_operations = {
+#ifdef CONFIG_TMPFS
+	.llseek		= shmem_dir_llseek,
+	.iterate_shared	= shmem_readdir,
+#endif
+	.read		= generic_read_dir,
+	.fsync		= noop_fsync,
+};
+
 static const struct inode_operations shmem_dir_inode_operations = {
 #ifdef CONFIG_TMPFS
 	.getattr	= shmem_getattr,