Message ID | 20230411143702.64495-4-jlayton@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | fs: opportunistic high-res file timestamps | expand |
On Tue, Apr 11, 2023 at 10:37:02AM -0400, Jeff Layton wrote: > When the mtime or ctime is being queried via getattr, ensure that we > mark the inode for a high-res timestamp update on the next pass. Also, > switch to current_cmtime for other c/mtime updates. > > With this change, we're better off having the NFS server just ignore > the i_version field and have it use the ctime instead, so clear the > STATX_CHANGE_COOKIE flag in the result mask in ->getattr. > > Signed-off-by: Jeff Layton <jlayton@kernel.org> > --- > fs/xfs/libxfs/xfs_trans_inode.c | 2 +- > fs/xfs/xfs_acl.c | 2 +- > fs/xfs/xfs_inode.c | 2 +- > fs/xfs/xfs_iops.c | 15 ++++++++++++--- > 4 files changed, 15 insertions(+), 6 deletions(-) > > diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c > index 8b5547073379..9ad7c229c617 100644 > --- a/fs/xfs/libxfs/xfs_trans_inode.c > +++ b/fs/xfs/libxfs/xfs_trans_inode.c > @@ -63,7 +63,7 @@ xfs_trans_ichgtime( > ASSERT(tp); > ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); > > - tv = current_time(inode); > + tv = current_cmtime(inode); > > if (flags & XFS_ICHGTIME_MOD) > inode->i_mtime = tv; > diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c > index 791db7d9c849..461adc58cf8c 100644 > --- a/fs/xfs/xfs_acl.c > +++ b/fs/xfs/xfs_acl.c > @@ -233,7 +233,7 @@ xfs_acl_set_mode( > xfs_ilock(ip, XFS_ILOCK_EXCL); > xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); > inode->i_mode = mode; > - inode->i_ctime = current_time(inode); > + inode->i_ctime = current_cmtime(inode); Hmm, now we're adding a spinlock to all these updates. Does lockdep have anything exciting to say about this? (I don't think it will, just wondering aloud...) > xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); > > if (xfs_has_wsync(mp)) > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c > index 5808abab786c..80f9d731e261 100644 > --- a/fs/xfs/xfs_inode.c > +++ b/fs/xfs/xfs_inode.c > @@ -843,7 +843,7 @@ xfs_init_new_inode( > ip->i_df.if_nextents = 0; > ASSERT(ip->i_nblocks == 0); > > - tv = current_time(inode); > + tv = current_cmtime(inode); > inode->i_mtime = tv; > inode->i_atime = tv; > inode->i_ctime = tv; > diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c > index 24718adb3c16..a0b07f90e16c 100644 > --- a/fs/xfs/xfs_iops.c > +++ b/fs/xfs/xfs_iops.c > @@ -565,6 +565,15 @@ xfs_vn_getattr( > if (xfs_is_shutdown(mp)) > return -EIO; > > + /* > + * XFS uses the i_version infrastructure to track any change to > + * the inode, including atime updates. This means that the i_version > + * returned by getattr doesn't conform to what the callers expect. > + * Clear it here so that nfsd will fake up a change cookie from the > + * ctime instead. > + */ > + stat->result_mask &= ~STATX_CHANGE_COOKIE; > + > stat->size = XFS_ISIZE(ip); > stat->dev = inode->i_sb->s_dev; > stat->mode = inode->i_mode; > @@ -573,8 +582,8 @@ xfs_vn_getattr( > stat->gid = vfsgid_into_kgid(vfsgid); > stat->ino = ip->i_ino; > stat->atime = inode->i_atime; > - stat->mtime = inode->i_mtime; > - stat->ctime = inode->i_ctime; > + if (request_mask & (STATX_CTIME|STATX_MTIME)) > + fill_cmtime_and_mark(inode, stat); Should we be setting STATX_[CM]TIME in the result_mask, just in case the caller asked for ctime and not mtime? --D > stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks); > > if (xfs_has_v3inodes(mp)) { > @@ -917,7 +926,7 @@ xfs_setattr_size( > if (newsize != oldsize && > !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) { > iattr->ia_ctime = iattr->ia_mtime = > - current_time(inode); > + current_cmtime(inode); > iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; > } > > -- > 2.39.2 >
On Tue, Apr 11, 2023 at 07:54:46AM -0700, Darrick J. Wong wrote: > On Tue, Apr 11, 2023 at 10:37:02AM -0400, Jeff Layton wrote: > > When the mtime or ctime is being queried via getattr, ensure that we > > mark the inode for a high-res timestamp update on the next pass. Also, > > switch to current_cmtime for other c/mtime updates. > > > > With this change, we're better off having the NFS server just ignore > > the i_version field and have it use the ctime instead, so clear the > > STATX_CHANGE_COOKIE flag in the result mask in ->getattr. > > > > Signed-off-by: Jeff Layton <jlayton@kernel.org> > > --- > > fs/xfs/libxfs/xfs_trans_inode.c | 2 +- > > fs/xfs/xfs_acl.c | 2 +- > > fs/xfs/xfs_inode.c | 2 +- > > fs/xfs/xfs_iops.c | 15 ++++++++++++--- > > 4 files changed, 15 insertions(+), 6 deletions(-) > > > > diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c > > index 8b5547073379..9ad7c229c617 100644 > > --- a/fs/xfs/libxfs/xfs_trans_inode.c > > +++ b/fs/xfs/libxfs/xfs_trans_inode.c > > @@ -63,7 +63,7 @@ xfs_trans_ichgtime( > > ASSERT(tp); > > ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); > > > > - tv = current_time(inode); > > + tv = current_cmtime(inode); > > > > if (flags & XFS_ICHGTIME_MOD) > > inode->i_mtime = tv; > > diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c > > index 791db7d9c849..461adc58cf8c 100644 > > --- a/fs/xfs/xfs_acl.c > > +++ b/fs/xfs/xfs_acl.c > > @@ -233,7 +233,7 @@ xfs_acl_set_mode( > > xfs_ilock(ip, XFS_ILOCK_EXCL); > > xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); > > inode->i_mode = mode; > > - inode->i_ctime = current_time(inode); > > + inode->i_ctime = current_cmtime(inode); > > Hmm, now we're adding a spinlock to all these updates. > Does lockdep have anything exciting to say about this? > > (I don't think it will, just wondering aloud...) > > > xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); > > > > if (xfs_has_wsync(mp)) > > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c > > index 5808abab786c..80f9d731e261 100644 > > --- a/fs/xfs/xfs_inode.c > > +++ b/fs/xfs/xfs_inode.c > > @@ -843,7 +843,7 @@ xfs_init_new_inode( > > ip->i_df.if_nextents = 0; > > ASSERT(ip->i_nblocks == 0); > > > > - tv = current_time(inode); > > + tv = current_cmtime(inode); > > inode->i_mtime = tv; > > inode->i_atime = tv; > > inode->i_ctime = tv; > > diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c > > index 24718adb3c16..a0b07f90e16c 100644 > > --- a/fs/xfs/xfs_iops.c > > +++ b/fs/xfs/xfs_iops.c > > @@ -565,6 +565,15 @@ xfs_vn_getattr( > > if (xfs_is_shutdown(mp)) > > return -EIO; > > > > + /* > > + * XFS uses the i_version infrastructure to track any change to > > + * the inode, including atime updates. This means that the i_version > > + * returned by getattr doesn't conform to what the callers expect. > > + * Clear it here so that nfsd will fake up a change cookie from the > > + * ctime instead. > > + */ > > + stat->result_mask &= ~STATX_CHANGE_COOKIE; > > + > > stat->size = XFS_ISIZE(ip); > > stat->dev = inode->i_sb->s_dev; > > stat->mode = inode->i_mode; > > @@ -573,8 +582,8 @@ xfs_vn_getattr( > > stat->gid = vfsgid_into_kgid(vfsgid); > > stat->ino = ip->i_ino; > > stat->atime = inode->i_atime; > > - stat->mtime = inode->i_mtime; > > - stat->ctime = inode->i_ctime; > > + if (request_mask & (STATX_CTIME|STATX_MTIME)) > > + fill_cmtime_and_mark(inode, stat); > > Should we be setting STATX_[CM]TIME in the result_mask, just in case the > caller asked for ctime and not mtime? I think the expectation is that everything in STATX_BASIC_MASK is always returned to allow equivalence between stat() and statx(). So requesting STATX_CTIME separately from STATX_MTIME isn't implemented widely, maybe even not at atll?, yet.
On Tue, 2023-04-11 at 07:54 -0700, Darrick J. Wong wrote: > On Tue, Apr 11, 2023 at 10:37:02AM -0400, Jeff Layton wrote: > > When the mtime or ctime is being queried via getattr, ensure that we > > mark the inode for a high-res timestamp update on the next pass. Also, > > switch to current_cmtime for other c/mtime updates. > > > > With this change, we're better off having the NFS server just ignore > > the i_version field and have it use the ctime instead, so clear the > > STATX_CHANGE_COOKIE flag in the result mask in ->getattr. > > > > Signed-off-by: Jeff Layton <jlayton@kernel.org> > > --- > > fs/xfs/libxfs/xfs_trans_inode.c | 2 +- > > fs/xfs/xfs_acl.c | 2 +- > > fs/xfs/xfs_inode.c | 2 +- > > fs/xfs/xfs_iops.c | 15 ++++++++++++--- > > 4 files changed, 15 insertions(+), 6 deletions(-) > > > > diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c > > index 8b5547073379..9ad7c229c617 100644 > > --- a/fs/xfs/libxfs/xfs_trans_inode.c > > +++ b/fs/xfs/libxfs/xfs_trans_inode.c > > @@ -63,7 +63,7 @@ xfs_trans_ichgtime( > > ASSERT(tp); > > ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); > > > > - tv = current_time(inode); > > + tv = current_cmtime(inode); > > > > if (flags & XFS_ICHGTIME_MOD) > > inode->i_mtime = tv; > > diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c > > index 791db7d9c849..461adc58cf8c 100644 > > --- a/fs/xfs/xfs_acl.c > > +++ b/fs/xfs/xfs_acl.c > > @@ -233,7 +233,7 @@ xfs_acl_set_mode( > > xfs_ilock(ip, XFS_ILOCK_EXCL); > > xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); > > inode->i_mode = mode; > > - inode->i_ctime = current_time(inode); > > + inode->i_ctime = current_cmtime(inode); > > Hmm, now we're adding a spinlock to all these updates. > Does lockdep have anything exciting to say about this? > > (I don't think it will, just wondering aloud...) > I haven't seen anything, but I haven't done any extensive testing yet either. Still, that might be a good reason to avoid locking in this scheme if we can. > > xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); > > > > if (xfs_has_wsync(mp)) > > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c > > index 5808abab786c..80f9d731e261 100644 > > --- a/fs/xfs/xfs_inode.c > > +++ b/fs/xfs/xfs_inode.c > > @@ -843,7 +843,7 @@ xfs_init_new_inode( > > ip->i_df.if_nextents = 0; > > ASSERT(ip->i_nblocks == 0); > > > > - tv = current_time(inode); > > + tv = current_cmtime(inode); > > inode->i_mtime = tv; > > inode->i_atime = tv; > > inode->i_ctime = tv; > > diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c > > index 24718adb3c16..a0b07f90e16c 100644 > > --- a/fs/xfs/xfs_iops.c > > +++ b/fs/xfs/xfs_iops.c > > @@ -565,6 +565,15 @@ xfs_vn_getattr( > > if (xfs_is_shutdown(mp)) > > return -EIO; > > > > + /* > > + * XFS uses the i_version infrastructure to track any change to > > + * the inode, including atime updates. This means that the i_version > > + * returned by getattr doesn't conform to what the callers expect. > > + * Clear it here so that nfsd will fake up a change cookie from the > > + * ctime instead. > > + */ > > + stat->result_mask &= ~STATX_CHANGE_COOKIE; > > + > > stat->size = XFS_ISIZE(ip); > > stat->dev = inode->i_sb->s_dev; > > stat->mode = inode->i_mode; > > @@ -573,8 +582,8 @@ xfs_vn_getattr( > > stat->gid = vfsgid_into_kgid(vfsgid); > > stat->ino = ip->i_ino; > > stat->atime = inode->i_atime; > > - stat->mtime = inode->i_mtime; > > - stat->ctime = inode->i_ctime; > > + if (request_mask & (STATX_CTIME|STATX_MTIME)) > > + fill_cmtime_and_mark(inode, stat); > > Should we be setting STATX_[CM]TIME in the result_mask, just in case the > caller asked for ctime and not mtime? > > We already set the result_mask in vfs_getattr_nosec: stat->result_mask |= STATX_BASIC_STATS; ...and it's OK to return more attrs than were requested. The bigger issue here is that it's not setting the ctime and mtime fields while leaving the STATX_CTIME and STATX_MTIME flags set. I'll plan to fix that up in some fashion in the next post. Thanks! > > stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks); > > > > if (xfs_has_v3inodes(mp)) { > > @@ -917,7 +926,7 @@ xfs_setattr_size( > > if (newsize != oldsize && > > !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) { > > iattr->ia_ctime = iattr->ia_mtime = > > - current_time(inode); > > + current_cmtime(inode); > > iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; > > } > > > > -- > > 2.39.2 > >
On Tue, 2023-04-11 at 17:15 +0200, Christian Brauner wrote: > On Tue, Apr 11, 2023 at 07:54:46AM -0700, Darrick J. Wong wrote: > > On Tue, Apr 11, 2023 at 10:37:02AM -0400, Jeff Layton wrote: > > > When the mtime or ctime is being queried via getattr, ensure that we > > > mark the inode for a high-res timestamp update on the next pass. Also, > > > switch to current_cmtime for other c/mtime updates. > > > > > > With this change, we're better off having the NFS server just ignore > > > the i_version field and have it use the ctime instead, so clear the > > > STATX_CHANGE_COOKIE flag in the result mask in ->getattr. > > > > > > Signed-off-by: Jeff Layton <jlayton@kernel.org> > > > --- > > > fs/xfs/libxfs/xfs_trans_inode.c | 2 +- > > > fs/xfs/xfs_acl.c | 2 +- > > > fs/xfs/xfs_inode.c | 2 +- > > > fs/xfs/xfs_iops.c | 15 ++++++++++++--- > > > 4 files changed, 15 insertions(+), 6 deletions(-) > > > > > > diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c > > > index 8b5547073379..9ad7c229c617 100644 > > > --- a/fs/xfs/libxfs/xfs_trans_inode.c > > > +++ b/fs/xfs/libxfs/xfs_trans_inode.c > > > @@ -63,7 +63,7 @@ xfs_trans_ichgtime( > > > ASSERT(tp); > > > ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); > > > > > > - tv = current_time(inode); > > > + tv = current_cmtime(inode); > > > > > > if (flags & XFS_ICHGTIME_MOD) > > > inode->i_mtime = tv; > > > diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c > > > index 791db7d9c849..461adc58cf8c 100644 > > > --- a/fs/xfs/xfs_acl.c > > > +++ b/fs/xfs/xfs_acl.c > > > @@ -233,7 +233,7 @@ xfs_acl_set_mode( > > > xfs_ilock(ip, XFS_ILOCK_EXCL); > > > xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); > > > inode->i_mode = mode; > > > - inode->i_ctime = current_time(inode); > > > + inode->i_ctime = current_cmtime(inode); > > > > Hmm, now we're adding a spinlock to all these updates. > > Does lockdep have anything exciting to say about this? > > > > (I don't think it will, just wondering aloud...) > > > > > xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); > > > > > > if (xfs_has_wsync(mp)) > > > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c > > > index 5808abab786c..80f9d731e261 100644 > > > --- a/fs/xfs/xfs_inode.c > > > +++ b/fs/xfs/xfs_inode.c > > > @@ -843,7 +843,7 @@ xfs_init_new_inode( > > > ip->i_df.if_nextents = 0; > > > ASSERT(ip->i_nblocks == 0); > > > > > > - tv = current_time(inode); > > > + tv = current_cmtime(inode); > > > inode->i_mtime = tv; > > > inode->i_atime = tv; > > > inode->i_ctime = tv; > > > diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c > > > index 24718adb3c16..a0b07f90e16c 100644 > > > --- a/fs/xfs/xfs_iops.c > > > +++ b/fs/xfs/xfs_iops.c > > > @@ -565,6 +565,15 @@ xfs_vn_getattr( > > > if (xfs_is_shutdown(mp)) > > > return -EIO; > > > > > > + /* > > > + * XFS uses the i_version infrastructure to track any change to > > > + * the inode, including atime updates. This means that the i_version > > > + * returned by getattr doesn't conform to what the callers expect. > > > + * Clear it here so that nfsd will fake up a change cookie from the > > > + * ctime instead. > > > + */ > > > + stat->result_mask &= ~STATX_CHANGE_COOKIE; > > > + > > > stat->size = XFS_ISIZE(ip); > > > stat->dev = inode->i_sb->s_dev; > > > stat->mode = inode->i_mode; > > > @@ -573,8 +582,8 @@ xfs_vn_getattr( > > > stat->gid = vfsgid_into_kgid(vfsgid); > > > stat->ino = ip->i_ino; > > > stat->atime = inode->i_atime; > > > - stat->mtime = inode->i_mtime; > > > - stat->ctime = inode->i_ctime; > > > + if (request_mask & (STATX_CTIME|STATX_MTIME)) > > > + fill_cmtime_and_mark(inode, stat); > > > > Should we be setting STATX_[CM]TIME in the result_mask, just in case the > > caller asked for ctime and not mtime? > > I think the expectation is that everything in STATX_BASIC_MASK is always > returned to allow equivalence between stat() and statx(). So requesting > STATX_CTIME separately from STATX_MTIME isn't implemented widely, maybe > even not at atll?, yet. Right. Probably we ought to be more selective with how the result_mask gets set in vfs_getattr_nosec. Applications that use statx() effectively are still pretty rare, so most calls will fetch both times anyway.
Hello, kernel test robot noticed a -4.8% regression of fsmark.files_per_sec on: commit: 219c0ed2b1f5d2ad921dc206b7b23a0e29e5c516 ("[RFC PATCH 3/3][RESEND] xfs: mark the inode for high-res timestamp update in getattr") url: https://github.com/intel-lab-lkp/linux/commits/Jeff-Layton/fs-add-infrastructure-for-opportunistic-high-res-ctime-mtime-updates/20230411-223803 base: https://git.kernel.org/cgit/fs/xfs/xfs-linux.git for-next patch link: https://lore.kernel.org/all/20230411143702.64495-4-jlayton@kernel.org/ patch subject: [RFC PATCH 3/3][RESEND] xfs: mark the inode for high-res timestamp update in getattr testcase: fsmark test machine: 96 threads 2 sockets (Ice Lake) with 256G memory parameters: iterations: 1x nr_threads: 1t disk: 1BRD_32G fs: xfs fs2: nfsv4 filesize: 4K test_size: 4G sync_method: fsyncBeforeClose nr_files_per_directory: 1fpd cpufreq_governor: performance test-description: The fsmark is a file system benchmark to test synchronous write workloads, for example, mail servers workload. test-url: https://sourceforge.net/projects/fsmark/ If you fix the issue, kindly add following tag | Reported-by: kernel test robot <oliver.sang@intel.com> | Link: https://lore.kernel.org/oe-lkp/202304210920.e679570a-oliver.sang@intel.com Details are as below: --------------------------------------------------------------------------------------------------> To reproduce: git clone https://github.com/intel/lkp-tests.git cd lkp-tests sudo bin/lkp install job.yaml # job file is attached in this email bin/lkp split-job --compatible job.yaml # generate the yaml file for lkp run sudo bin/lkp run generated-yaml-file # if come across any failure that blocks the test, # please remove ~/.lkp and /lkp dir to run from a clean state. ========================================================================================= compiler/cpufreq_governor/disk/filesize/fs2/fs/iterations/kconfig/nr_files_per_directory/nr_threads/rootfs/sync_method/tbox_group/test_size/testcase: gcc-11/performance/1BRD_32G/4K/nfsv4/xfs/1x/x86_64-rhel-8.3/1fpd/1t/debian-11.1-x86_64-20220510.cgz/fsyncBeforeClose/lkp-icl-2sp1/4G/fsmark commit: 1aa3422f44 ("shmem: mark for high-res timestamps on next update after getattr") 219c0ed2b1 ("xfs: mark the inode for high-res timestamp update in getattr") 1aa3422f44c3001f 219c0ed2b1f5d2ad921dc206b7b ---------------- --------------------------- %stddev %change %stddev \ | \ 3993 -4.8% 3802 fsmark.files_per_sec 250.44 +5.0% 263.03 fsmark.time.elapsed_time 250.44 +5.0% 263.03 fsmark.time.elapsed_time.max 3182 ± 3% +23.7% 3935 ± 6% fsmark.time.involuntary_context_switches 5761785 +12.9% 6502395 fsmark.time.voluntary_context_switches 4682497 ± 28% -49.1% 2381683 ± 51% numa-meminfo.node0.FilePages 1170661 ± 28% -49.1% 595420 ± 51% numa-vmstat.node0.nr_file_pages 1771594 ± 7% -19.5% 1425973 ± 6% sched_debug.cpu.nr_switches.max 126209 -6.2% 118428 vmstat.io.bo 349577 +2.1% 356924 vmstat.system.cs 1342752 ± 4% +8.3% 1454235 ± 4% turbostat.C1 0.27 -0.0 0.25 ± 2% turbostat.C1% 42.83 ± 3% +36.6% 58.50 ± 21% turbostat.CoreTmp 42.83 ± 3% +36.6% 58.50 ± 21% turbostat.PkgTmp 219.76 +3.5% 227.36 ± 2% turbostat.PkgWatt 167.86 +2.1% 171.31 turbostat.RAMWatt 12018871 +6.4% 12790775 proc-vmstat.numa_hit 11930268 +6.4% 12699729 proc-vmstat.numa_local 313977 +2.1% 320644 proc-vmstat.pgactivate 18057605 +4.3% 18838501 proc-vmstat.pgalloc_normal 781753 +4.3% 815507 proc-vmstat.pgfault 12737726 +6.1% 13516747 proc-vmstat.pgfree 1980800 +4.2% 2064896 proc-vmstat.unevictable_pgs_scanned 1.061e+09 +2.9% 1.092e+09 perf-stat.i.branch-instructions 6265739 ± 7% +16.1% 7274779 ± 7% perf-stat.i.cache-misses 1.751e+08 +2.5% 1.795e+08 perf-stat.i.cache-references 353475 +2.1% 361037 perf-stat.i.context-switches 8.402e+08 +2.0% 8.568e+08 perf-stat.i.dTLB-stores 5.501e+09 +1.8% 5.6e+09 perf-stat.i.instructions 38.10 +2.2% 38.93 perf-stat.i.metric.M/sec 3.58 ± 7% +0.5 4.06 ± 7% perf-stat.overall.cache-miss-rate% 1409 ± 7% -12.8% 1228 ± 6% perf-stat.overall.cycles-between-cache-misses 1.057e+09 +2.9% 1.088e+09 perf-stat.ps.branch-instructions 6241536 ± 7% +16.2% 7250504 ± 7% perf-stat.ps.cache-misses 1.744e+08 +2.5% 1.788e+08 perf-stat.ps.cache-references 352052 +2.2% 359651 perf-stat.ps.context-switches 8.369e+08 +2.0% 8.535e+08 perf-stat.ps.dTLB-stores 5.479e+09 +1.8% 5.579e+09 perf-stat.ps.instructions 1.377e+12 +7.0% 1.474e+12 perf-stat.total.instructions 1.34 ± 12% -0.2 1.10 ± 10% perf-profile.calltrace.cycles-pp.nfs_do_access.nfs_permission.inode_permission.link_path_walk.path_openat 1.35 ± 12% -0.2 1.11 ± 10% perf-profile.calltrace.cycles-pp.nfs_permission.inode_permission.link_path_walk.path_openat.do_filp_open 1.36 ± 12% -0.2 1.12 ± 10% perf-profile.calltrace.cycles-pp.inode_permission.link_path_walk.path_openat.do_filp_open.do_sys_openat2 1.20 ± 12% -0.2 0.96 ± 11% perf-profile.calltrace.cycles-pp.rpc_run_task.nfs4_call_sync_sequence._nfs4_proc_access.nfs4_proc_access.nfs_do_access 1.26 ± 12% -0.2 1.02 ± 11% perf-profile.calltrace.cycles-pp._nfs4_proc_access.nfs4_proc_access.nfs_do_access.nfs_permission.inode_permission 1.26 ± 13% -0.2 1.03 ± 11% perf-profile.calltrace.cycles-pp.nfs4_proc_access.nfs_do_access.nfs_permission.inode_permission.link_path_walk 1.21 ± 12% -0.2 0.98 ± 11% perf-profile.calltrace.cycles-pp.nfs4_call_sync_sequence._nfs4_proc_access.nfs4_proc_access.nfs_do_access.nfs_permission 1.16 ± 12% -0.2 0.94 ± 11% perf-profile.calltrace.cycles-pp.rpc_execute.rpc_run_task.nfs4_call_sync_sequence._nfs4_proc_access.nfs4_proc_access 1.15 ± 12% -0.2 0.93 ± 11% perf-profile.calltrace.cycles-pp.__rpc_execute.rpc_execute.rpc_run_task.nfs4_call_sync_sequence._nfs4_proc_access 1.84 ± 3% -0.2 1.66 ± 5% perf-profile.calltrace.cycles-pp.xlog_cil_push_work.process_one_work.worker_thread.kthread.ret_from_fork 1.39 ± 9% -0.2 1.21 ± 7% perf-profile.calltrace.cycles-pp.nfsd_create_setattr.nfsd_create_locked.nfsd_create.nfsd4_create.nfsd4_proc_compound 1.00 ± 10% -0.2 0.84 ± 9% perf-profile.calltrace.cycles-pp.xlog_state_release_iclog.xlog_force_lsn.xfs_log_force_seq.nfsd_create_setattr.nfsd_create_locked 1.03 ± 10% -0.2 0.86 ± 9% perf-profile.calltrace.cycles-pp.xlog_force_lsn.xfs_log_force_seq.nfsd_create_setattr.nfsd_create_locked.nfsd_create 2.02 ± 4% +0.2 2.27 ± 5% perf-profile.calltrace.cycles-pp.tcp_write_xmit.__tcp_push_pending_frames.tcp_sock_set_cork.svc_tcp_sendto.svc_send 2.04 ± 4% +0.3 2.29 ± 5% perf-profile.calltrace.cycles-pp.__tcp_push_pending_frames.tcp_sock_set_cork.svc_tcp_sendto.svc_send.nfsd 2.20 ± 3% +0.3 2.46 ± 5% perf-profile.calltrace.cycles-pp.tcp_sock_set_cork.svc_tcp_sendto.svc_send.nfsd.kthread 2.29 ± 9% +0.6 2.85 ± 8% perf-profile.calltrace.cycles-pp.path_openat.do_filp_open.do_sys_openat2.__x64_sys_openat.do_syscall_64 2.29 ± 9% +0.6 2.85 ± 8% perf-profile.calltrace.cycles-pp.do_filp_open.do_sys_openat2.__x64_sys_openat.do_syscall_64.entry_SYSCALL_64_after_hwframe 2.34 ± 9% +0.6 2.90 ± 7% perf-profile.calltrace.cycles-pp.do_sys_openat2.__x64_sys_openat.do_syscall_64.entry_SYSCALL_64_after_hwframe.open64 2.35 ± 9% +0.6 2.92 ± 8% perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.open64 2.34 ± 9% +0.6 2.90 ± 7% perf-profile.calltrace.cycles-pp.__x64_sys_openat.do_syscall_64.entry_SYSCALL_64_after_hwframe.open64 2.39 ± 9% +0.6 2.96 ± 7% perf-profile.calltrace.cycles-pp.open64 2.35 ± 9% +0.6 2.92 ± 8% perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.open64 1.41 ± 12% +0.6 2.04 ± 8% perf-profile.calltrace.cycles-pp.link_path_walk.path_openat.do_filp_open.do_sys_openat2.__x64_sys_openat 0.00 +0.8 0.77 ± 8% perf-profile.calltrace.cycles-pp.__rpc_execute.rpc_execute.rpc_run_task.nfs4_do_call_sync._nfs4_proc_getattr 0.00 +0.8 0.77 ± 9% perf-profile.calltrace.cycles-pp.rpc_execute.rpc_run_task.nfs4_do_call_sync._nfs4_proc_getattr.nfs4_proc_getattr 0.00 +0.8 0.79 ± 8% perf-profile.calltrace.cycles-pp.rpc_run_task.nfs4_do_call_sync._nfs4_proc_getattr.nfs4_proc_getattr.__nfs_revalidate_inode 0.00 +0.8 0.80 ± 9% perf-profile.calltrace.cycles-pp.nfs4_do_call_sync._nfs4_proc_getattr.nfs4_proc_getattr.__nfs_revalidate_inode.nfs_check_verifier 0.00 +0.8 0.80 ± 9% perf-profile.calltrace.cycles-pp._nfs4_proc_getattr.nfs4_proc_getattr.__nfs_revalidate_inode.nfs_check_verifier.nfs_do_lookup_revalidate 0.00 +0.8 0.81 ± 9% perf-profile.calltrace.cycles-pp.nfs4_proc_getattr.__nfs_revalidate_inode.nfs_check_verifier.nfs_do_lookup_revalidate.__nfs_lookup_revalidate 0.00 +0.8 0.84 ± 9% perf-profile.calltrace.cycles-pp.__nfs_revalidate_inode.nfs_check_verifier.nfs_do_lookup_revalidate.__nfs_lookup_revalidate.lookup_fast 0.00 +0.8 0.85 ± 8% perf-profile.calltrace.cycles-pp.nfs_check_verifier.nfs_do_lookup_revalidate.__nfs_lookup_revalidate.lookup_fast.walk_component 0.00 +0.9 0.85 ± 8% perf-profile.calltrace.cycles-pp.nfs_do_lookup_revalidate.__nfs_lookup_revalidate.lookup_fast.walk_component.link_path_walk 0.00 +0.9 0.87 ± 8% perf-profile.calltrace.cycles-pp.__nfs_lookup_revalidate.lookup_fast.walk_component.link_path_walk.path_openat 0.00 +0.9 0.88 ± 9% perf-profile.calltrace.cycles-pp.lookup_fast.walk_component.link_path_walk.path_openat.do_filp_open 0.00 +0.9 0.88 ± 8% perf-profile.calltrace.cycles-pp.walk_component.link_path_walk.path_openat.do_filp_open.do_sys_openat2 1.37 ± 11% -0.2 1.13 ± 9% perf-profile.children.cycles-pp.nfs_do_access 1.46 ± 11% -0.2 1.22 ± 8% perf-profile.children.cycles-pp.inode_permission 1.40 ± 11% -0.2 1.16 ± 8% perf-profile.children.cycles-pp.nfs_permission 1.27 ± 12% -0.2 1.03 ± 11% perf-profile.children.cycles-pp.nfs4_proc_access 1.26 ± 12% -0.2 1.02 ± 11% perf-profile.children.cycles-pp._nfs4_proc_access 1.87 ± 3% -0.2 1.68 ± 5% perf-profile.children.cycles-pp.xlog_cil_push_work 0.08 ± 9% -0.0 0.06 ± 13% perf-profile.children.cycles-pp.tick_program_event 0.08 ± 11% +0.0 0.10 ± 10% perf-profile.children.cycles-pp.refcount_dec_not_one 0.26 ± 8% +0.0 0.30 ± 8% perf-profile.children.cycles-pp.update_curr 0.11 ± 9% +0.0 0.15 ± 11% perf-profile.children.cycles-pp.tcp_rcv_space_adjust 0.24 ± 7% +0.0 0.28 ± 10% perf-profile.children.cycles-pp.skb_release_data 0.17 ± 12% +0.0 0.21 ± 8% perf-profile.children.cycles-pp.napi_consume_skb 0.44 ± 5% +0.0 0.49 ± 3% perf-profile.children.cycles-pp.__might_resched 0.00 +0.1 0.05 ± 8% perf-profile.children.cycles-pp.prepare_to_wait 0.26 ± 8% +0.1 0.32 ± 12% perf-profile.children.cycles-pp.nfsd4_encode_fattr 0.26 ± 8% +0.1 0.32 ± 12% perf-profile.children.cycles-pp.nfsd4_encode_getattr 0.36 ± 5% +0.1 0.46 ± 8% perf-profile.children.cycles-pp.put_cred_rcu 1.49 ± 4% +0.1 1.60 ± 5% perf-profile.children.cycles-pp.inet6_recvmsg 1.47 ± 4% +0.1 1.58 ± 5% perf-profile.children.cycles-pp.tcp_recvmsg 1.72 ± 3% +0.1 1.85 ± 5% perf-profile.children.cycles-pp.sock_recvmsg 1.69 ± 3% +0.2 1.88 ± 3% perf-profile.children.cycles-pp.tcp_rcv_established 1.72 ± 3% +0.2 1.92 ± 3% perf-profile.children.cycles-pp.tcp_v6_do_rcv 2.26 ± 4% +0.2 2.46 ± 4% perf-profile.children.cycles-pp.__netif_receive_skb_one_core 2.15 ± 3% +0.2 2.36 ± 4% perf-profile.children.cycles-pp.ip6_protocol_deliver_rcu 2.09 ± 3% +0.2 2.30 ± 4% perf-profile.children.cycles-pp.tcp_v6_rcv 2.16 ± 3% +0.2 2.36 ± 4% perf-profile.children.cycles-pp.ip6_input_finish 2.32 ± 4% +0.2 2.54 ± 4% perf-profile.children.cycles-pp.__napi_poll 2.32 ± 4% +0.2 2.54 ± 4% perf-profile.children.cycles-pp.process_backlog 2.82 ± 4% +0.3 3.12 ± 3% perf-profile.children.cycles-pp.do_softirq 2.70 ± 4% +0.3 3.00 ± 3% perf-profile.children.cycles-pp.net_rx_action 2.89 ± 4% +0.3 3.20 ± 3% perf-profile.children.cycles-pp.__local_bh_enable_ip 3.43 ± 3% +0.3 3.77 ± 3% perf-profile.children.cycles-pp.ip6_xmit 3.28 ± 4% +0.4 3.64 ± 3% perf-profile.children.cycles-pp.ip6_finish_output2 3.85 ± 2% +0.4 4.22 ± 4% perf-profile.children.cycles-pp.__tcp_transmit_skb 3.50 ± 3% +0.4 3.87 ± 3% perf-profile.children.cycles-pp.inet6_csk_xmit 4.23 ± 2% +0.4 4.62 ± 4% perf-profile.children.cycles-pp.tcp_write_xmit 4.26 ± 2% +0.4 4.66 ± 4% perf-profile.children.cycles-pp.__tcp_push_pending_frames 4.34 ± 2% +0.4 4.75 ± 4% perf-profile.children.cycles-pp.tcp_sock_set_cork 5.24 ± 3% +0.4 5.66 perf-profile.children.cycles-pp.__do_softirq 2.66 ± 9% +0.5 3.12 ± 9% perf-profile.children.cycles-pp.rpc_execute 2.34 ± 9% +0.6 2.89 ± 8% perf-profile.children.cycles-pp.do_filp_open 2.33 ± 9% +0.6 2.89 ± 8% perf-profile.children.cycles-pp.path_openat 2.40 ± 9% +0.6 2.96 ± 7% perf-profile.children.cycles-pp.open64 2.38 ± 9% +0.6 2.95 ± 7% perf-profile.children.cycles-pp.__x64_sys_openat 2.38 ± 8% +0.6 2.95 ± 7% perf-profile.children.cycles-pp.do_sys_openat2 1.52 ± 10% +0.6 2.14 ± 8% perf-profile.children.cycles-pp.link_path_walk 0.00 +0.8 0.80 ± 9% perf-profile.children.cycles-pp.nfs4_do_call_sync 0.00 +0.8 0.80 ± 9% perf-profile.children.cycles-pp._nfs4_proc_getattr 0.00 +0.8 0.81 ± 9% perf-profile.children.cycles-pp.nfs4_proc_getattr 0.00 +0.8 0.84 ± 8% perf-profile.children.cycles-pp.__nfs_revalidate_inode 0.00 +0.8 0.85 ± 8% perf-profile.children.cycles-pp.nfs_check_verifier 0.00 +0.9 0.86 ± 9% perf-profile.children.cycles-pp.nfs_do_lookup_revalidate 0.06 ± 46% +0.9 0.92 ± 8% perf-profile.children.cycles-pp.walk_component 0.04 ± 71% +0.9 0.91 ± 9% perf-profile.children.cycles-pp.lookup_fast 0.00 +0.9 0.87 ± 8% perf-profile.children.cycles-pp.__nfs_lookup_revalidate 0.12 ± 8% +0.0 0.13 ± 8% perf-profile.self.cycles-pp.nfsd_setuser 0.08 ± 14% +0.0 0.10 ± 7% perf-profile.self.cycles-pp.refcount_dec_not_one 0.16 ± 10% +0.0 0.19 ± 6% perf-profile.self.cycles-pp.__might_sleep 0.16 ± 8% +0.0 0.20 ± 10% perf-profile.self.cycles-pp.call_cpuidle 0.06 ± 47% +0.0 0.10 ± 17% perf-profile.self.cycles-pp.put_cred_rcu 0.40 ± 5% +0.0 0.45 ± 3% perf-profile.self.cycles-pp.__might_resched 0.02 ±142% +0.0 0.06 ± 11% perf-profile.self.cycles-pp.apparmor_cred_free Disclaimer: Results have been estimated based on internal Intel analysis and are provided for informational purposes only. Any difference in system hardware or software design or configuration may affect actual performance.
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index 8b5547073379..9ad7c229c617 100644 --- a/fs/xfs/libxfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -63,7 +63,7 @@ xfs_trans_ichgtime( ASSERT(tp); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - tv = current_time(inode); + tv = current_cmtime(inode); if (flags & XFS_ICHGTIME_MOD) inode->i_mtime = tv; diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 791db7d9c849..461adc58cf8c 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -233,7 +233,7 @@ xfs_acl_set_mode( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); inode->i_mode = mode; - inode->i_ctime = current_time(inode); + inode->i_ctime = current_cmtime(inode); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (xfs_has_wsync(mp)) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 5808abab786c..80f9d731e261 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -843,7 +843,7 @@ xfs_init_new_inode( ip->i_df.if_nextents = 0; ASSERT(ip->i_nblocks == 0); - tv = current_time(inode); + tv = current_cmtime(inode); inode->i_mtime = tv; inode->i_atime = tv; inode->i_ctime = tv; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 24718adb3c16..a0b07f90e16c 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -565,6 +565,15 @@ xfs_vn_getattr( if (xfs_is_shutdown(mp)) return -EIO; + /* + * XFS uses the i_version infrastructure to track any change to + * the inode, including atime updates. This means that the i_version + * returned by getattr doesn't conform to what the callers expect. + * Clear it here so that nfsd will fake up a change cookie from the + * ctime instead. + */ + stat->result_mask &= ~STATX_CHANGE_COOKIE; + stat->size = XFS_ISIZE(ip); stat->dev = inode->i_sb->s_dev; stat->mode = inode->i_mode; @@ -573,8 +582,8 @@ xfs_vn_getattr( stat->gid = vfsgid_into_kgid(vfsgid); stat->ino = ip->i_ino; stat->atime = inode->i_atime; - stat->mtime = inode->i_mtime; - stat->ctime = inode->i_ctime; + if (request_mask & (STATX_CTIME|STATX_MTIME)) + fill_cmtime_and_mark(inode, stat); stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks); if (xfs_has_v3inodes(mp)) { @@ -917,7 +926,7 @@ xfs_setattr_size( if (newsize != oldsize && !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) { iattr->ia_ctime = iattr->ia_mtime = - current_time(inode); + current_cmtime(inode); iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; }
When the mtime or ctime is being queried via getattr, ensure that we mark the inode for a high-res timestamp update on the next pass. Also, switch to current_cmtime for other c/mtime updates. With this change, we're better off having the NFS server just ignore the i_version field and have it use the ctime instead, so clear the STATX_CHANGE_COOKIE flag in the result mask in ->getattr. Signed-off-by: Jeff Layton <jlayton@kernel.org> --- fs/xfs/libxfs/xfs_trans_inode.c | 2 +- fs/xfs/xfs_acl.c | 2 +- fs/xfs/xfs_inode.c | 2 +- fs/xfs/xfs_iops.c | 15 ++++++++++++--- 4 files changed, 15 insertions(+), 6 deletions(-)