Message ID | 20241001-mgtime-v8-4-903343d91bc3@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | fs: multigrain timestamp redux | expand |
On Tue 01-10-24 06:58:58, Jeff Layton wrote: > When updating the ctime on an inode for a SETATTR with a multigrain > filesystem, we usually want to take the latest time we can get for the > ctime. The exception to this rule is when there is a nfsd write > delegation and the server is proxying timestamps from the client. > > When nfsd gets a CB_GETATTR response, we want to update the timestamp > value in the inode to the values that the client is tracking. The client > doesn't send a ctime value (since that's always determined by the > exported filesystem), but it can send a mtime value. In the case where > it does, then we may need to update the ctime to a value commensurate > with that instead of the current time. > > If ATTR_DELEG is set, then use ia_ctime value instead of setting the > timestamp to the current time. > > With the addition of delegated timestamps we can also receive a request > to update only the atime, but we may not need to set the ctime. Trust > the ATTR_CTIME flag in the update and only update the ctime when it's > set. > > Tested-by: Randy Dunlap <rdunlap@infradead.org> # documentation bits > Signed-off-by: Jeff Layton <jlayton@kernel.org> Looks good. Feel free to add: Reviewed-by: Jan Kara <jack@suse.cz> Honza > --- > fs/attr.c | 28 +++++++++++++-------- > fs/inode.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ > include/linux/fs.h | 2 ++ > 3 files changed, 92 insertions(+), 10 deletions(-) > > diff --git a/fs/attr.c b/fs/attr.c > index 3bcbc45708a3..392eb62aa609 100644 > --- a/fs/attr.c > +++ b/fs/attr.c > @@ -286,16 +286,20 @@ static void setattr_copy_mgtime(struct inode *inode, const struct iattr *attr) > unsigned int ia_valid = attr->ia_valid; > struct timespec64 now; > > - /* > - * If the ctime isn't being updated then nothing else should be > - * either. > - */ > - if (!(ia_valid & ATTR_CTIME)) { > - WARN_ON_ONCE(ia_valid & (ATTR_ATIME|ATTR_MTIME)); > - return; > + if (ia_valid & ATTR_CTIME) { > + /* > + * In the case of an update for a write delegation, we must respect > + * the value in ia_ctime and not use the current time. > + */ > + if (ia_valid & ATTR_DELEG) > + now = inode_set_ctime_deleg(inode, attr->ia_ctime); > + else > + now = inode_set_ctime_current(inode); > + } else { > + /* If ATTR_CTIME isn't set, then ATTR_MTIME shouldn't be either. */ > + WARN_ON_ONCE(ia_valid & ATTR_MTIME); > } > > - now = inode_set_ctime_current(inode); > if (ia_valid & ATTR_ATIME_SET) > inode_set_atime_to_ts(inode, attr->ia_atime); > else if (ia_valid & ATTR_ATIME) > @@ -354,8 +358,12 @@ void setattr_copy(struct mnt_idmap *idmap, struct inode *inode, > inode_set_atime_to_ts(inode, attr->ia_atime); > if (ia_valid & ATTR_MTIME) > inode_set_mtime_to_ts(inode, attr->ia_mtime); > - if (ia_valid & ATTR_CTIME) > - inode_set_ctime_to_ts(inode, attr->ia_ctime); > + if (ia_valid & ATTR_CTIME) { > + if (ia_valid & ATTR_DELEG) > + inode_set_ctime_deleg(inode, attr->ia_ctime); > + else > + inode_set_ctime_to_ts(inode, attr->ia_ctime); > + } > } > EXPORT_SYMBOL(setattr_copy); > > diff --git a/fs/inode.c b/fs/inode.c > index 4ec1e71e9a9d..7a324d999816 100644 > --- a/fs/inode.c > +++ b/fs/inode.c > @@ -2751,6 +2751,78 @@ struct timespec64 inode_set_ctime_current(struct inode *inode) > } > EXPORT_SYMBOL(inode_set_ctime_current); > > +/** > + * inode_set_ctime_deleg - try to update the ctime on a delegated inode > + * @inode: inode to update > + * @update: timespec64 to set the ctime > + * > + * Attempt to atomically update the ctime on behalf of a delegation holder. > + * > + * The nfs server can call back the holder of a delegation to get updated > + * inode attributes, including the mtime. When updating the mtime we may > + * need to update the ctime to a value at least equal to that. > + * > + * This can race with concurrent updates to the inode, in which > + * case we just don't do the update. > + * > + * Note that this works even when multigrain timestamps are not enabled, > + * so use it in either case. > + */ > +struct timespec64 inode_set_ctime_deleg(struct inode *inode, struct timespec64 update) > +{ > + struct timespec64 now, cur_ts; > + u32 cur, old; > + > + /* pairs with try_cmpxchg below */ > + cur = smp_load_acquire(&inode->i_ctime_nsec); > + cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED; > + cur_ts.tv_sec = inode->i_ctime_sec; > + > + /* If the update is older than the existing value, skip it. */ > + if (timespec64_compare(&update, &cur_ts) <= 0) > + return cur_ts; > + > + ktime_get_coarse_real_ts64_mg(&now); > + > + /* Clamp the update to "now" if it's in the future */ > + if (timespec64_compare(&update, &now) > 0) > + update = now; > + > + update = timestamp_truncate(update, inode); > + > + /* No need to update if the values are already the same */ > + if (timespec64_equal(&update, &cur_ts)) > + return cur_ts; > + > + /* > + * Try to swap the nsec value into place. If it fails, that means > + * we raced with an update due to a write or similar activity. That > + * stamp takes precedence, so just skip the update. > + */ > +retry: > + old = cur; > + if (try_cmpxchg(&inode->i_ctime_nsec, &cur, update.tv_nsec)) { > + inode->i_ctime_sec = update.tv_sec; > + mgtime_counter_inc(mg_ctime_swaps); > + return update; > + } > + > + /* > + * Was the change due to someone marking the old ctime QUERIED? > + * If so then retry the swap. This can only happen once since > + * the only way to clear I_CTIME_QUERIED is to stamp the inode > + * with a new ctime. > + */ > + if (!(old & I_CTIME_QUERIED) && (cur == (old | I_CTIME_QUERIED))) > + goto retry; > + > + /* Otherwise, it was a new timestamp. */ > + cur_ts.tv_sec = inode->i_ctime_sec; > + cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED; > + return cur_ts; > +} > +EXPORT_SYMBOL(inode_set_ctime_deleg); > + > /** > * in_group_or_capable - check whether caller is CAP_FSETID privileged > * @idmap: idmap of the mount @inode was found from > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 23908bad166c..b1a3bd07711b 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1584,6 +1584,8 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, > > struct timespec64 current_time(struct inode *inode); > struct timespec64 inode_set_ctime_current(struct inode *inode); > +struct timespec64 inode_set_ctime_deleg(struct inode *inode, > + struct timespec64 update); > > static inline time64_t inode_get_atime_sec(const struct inode *inode) > { > > -- > 2.46.2 >
diff --git a/fs/attr.c b/fs/attr.c index 3bcbc45708a3..392eb62aa609 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -286,16 +286,20 @@ static void setattr_copy_mgtime(struct inode *inode, const struct iattr *attr) unsigned int ia_valid = attr->ia_valid; struct timespec64 now; - /* - * If the ctime isn't being updated then nothing else should be - * either. - */ - if (!(ia_valid & ATTR_CTIME)) { - WARN_ON_ONCE(ia_valid & (ATTR_ATIME|ATTR_MTIME)); - return; + if (ia_valid & ATTR_CTIME) { + /* + * In the case of an update for a write delegation, we must respect + * the value in ia_ctime and not use the current time. + */ + if (ia_valid & ATTR_DELEG) + now = inode_set_ctime_deleg(inode, attr->ia_ctime); + else + now = inode_set_ctime_current(inode); + } else { + /* If ATTR_CTIME isn't set, then ATTR_MTIME shouldn't be either. */ + WARN_ON_ONCE(ia_valid & ATTR_MTIME); } - now = inode_set_ctime_current(inode); if (ia_valid & ATTR_ATIME_SET) inode_set_atime_to_ts(inode, attr->ia_atime); else if (ia_valid & ATTR_ATIME) @@ -354,8 +358,12 @@ void setattr_copy(struct mnt_idmap *idmap, struct inode *inode, inode_set_atime_to_ts(inode, attr->ia_atime); if (ia_valid & ATTR_MTIME) inode_set_mtime_to_ts(inode, attr->ia_mtime); - if (ia_valid & ATTR_CTIME) - inode_set_ctime_to_ts(inode, attr->ia_ctime); + if (ia_valid & ATTR_CTIME) { + if (ia_valid & ATTR_DELEG) + inode_set_ctime_deleg(inode, attr->ia_ctime); + else + inode_set_ctime_to_ts(inode, attr->ia_ctime); + } } EXPORT_SYMBOL(setattr_copy); diff --git a/fs/inode.c b/fs/inode.c index 4ec1e71e9a9d..7a324d999816 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2751,6 +2751,78 @@ struct timespec64 inode_set_ctime_current(struct inode *inode) } EXPORT_SYMBOL(inode_set_ctime_current); +/** + * inode_set_ctime_deleg - try to update the ctime on a delegated inode + * @inode: inode to update + * @update: timespec64 to set the ctime + * + * Attempt to atomically update the ctime on behalf of a delegation holder. + * + * The nfs server can call back the holder of a delegation to get updated + * inode attributes, including the mtime. When updating the mtime we may + * need to update the ctime to a value at least equal to that. + * + * This can race with concurrent updates to the inode, in which + * case we just don't do the update. + * + * Note that this works even when multigrain timestamps are not enabled, + * so use it in either case. + */ +struct timespec64 inode_set_ctime_deleg(struct inode *inode, struct timespec64 update) +{ + struct timespec64 now, cur_ts; + u32 cur, old; + + /* pairs with try_cmpxchg below */ + cur = smp_load_acquire(&inode->i_ctime_nsec); + cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED; + cur_ts.tv_sec = inode->i_ctime_sec; + + /* If the update is older than the existing value, skip it. */ + if (timespec64_compare(&update, &cur_ts) <= 0) + return cur_ts; + + ktime_get_coarse_real_ts64_mg(&now); + + /* Clamp the update to "now" if it's in the future */ + if (timespec64_compare(&update, &now) > 0) + update = now; + + update = timestamp_truncate(update, inode); + + /* No need to update if the values are already the same */ + if (timespec64_equal(&update, &cur_ts)) + return cur_ts; + + /* + * Try to swap the nsec value into place. If it fails, that means + * we raced with an update due to a write or similar activity. That + * stamp takes precedence, so just skip the update. + */ +retry: + old = cur; + if (try_cmpxchg(&inode->i_ctime_nsec, &cur, update.tv_nsec)) { + inode->i_ctime_sec = update.tv_sec; + mgtime_counter_inc(mg_ctime_swaps); + return update; + } + + /* + * Was the change due to someone marking the old ctime QUERIED? + * If so then retry the swap. This can only happen once since + * the only way to clear I_CTIME_QUERIED is to stamp the inode + * with a new ctime. + */ + if (!(old & I_CTIME_QUERIED) && (cur == (old | I_CTIME_QUERIED))) + goto retry; + + /* Otherwise, it was a new timestamp. */ + cur_ts.tv_sec = inode->i_ctime_sec; + cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED; + return cur_ts; +} +EXPORT_SYMBOL(inode_set_ctime_deleg); + /** * in_group_or_capable - check whether caller is CAP_FSETID privileged * @idmap: idmap of the mount @inode was found from diff --git a/include/linux/fs.h b/include/linux/fs.h index 23908bad166c..b1a3bd07711b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1584,6 +1584,8 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, struct timespec64 current_time(struct inode *inode); struct timespec64 inode_set_ctime_current(struct inode *inode); +struct timespec64 inode_set_ctime_deleg(struct inode *inode, + struct timespec64 update); static inline time64_t inode_get_atime_sec(const struct inode *inode) {