Message ID | 1606776378-22381-1-git-send-email-bfields@fieldses.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/5] nfsd: only call inode_query_iversion in the I_VERSION case | expand |
On Mon, 2020-11-30 at 17:46 -0500, J. Bruce Fields wrote: > From: "J. Bruce Fields" <bfields@redhat.com> > > inode_query_iversion() can modify i_version. Depending on the exported > filesystem, that may not be safe. For example, if you're re-exporting > NFS, NFS stores the server's change attribute in i_version and does not > expect it to be modified locally. This has been observed causing > unnecessary cache invalidations. > > The way a filesystem indicates that it's OK to call > inode_query_iverson() is by setting SB_I_VERSION. > > So, move the I_VERSION check out of encode_change(), where it's used > only in FATTR responses, to nfsd4_changeattr(), which is also called for > pre- and post- operation attributes. > "only in FATTR responses, to nfsd4_change_attribute()," > (Note we could also pull the NFSEXP_V4ROOT case into > nfsd4_change_attribute as well. That would actually be a no-op, since > pre/post attrs are only used for metadata-modifying operations, and > V4ROOT exports are read-only. But we might make the change in the > future just for simplicity.) > > Reported-by: Daire Byrne <daire@dneg.com> > Signed-off-by: J. Bruce Fields <bfields@redhat.com> > --- > fs/nfsd/nfs3xdr.c | 5 ++--- > fs/nfsd/nfs4xdr.c | 6 +----- > fs/nfsd/nfsfh.h | 14 ++++++++++---- > 3 files changed, 13 insertions(+), 12 deletions(-) > > diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c > index 2277f83da250..dfbf390ff40c 100644 > --- a/fs/nfsd/nfs3xdr.c > +++ b/fs/nfsd/nfs3xdr.c > @@ -291,14 +291,13 @@ void fill_post_wcc(struct svc_fh *fhp) > printk("nfsd: inode locked twice during operation.\n"); > > > > > err = fh_getattr(fhp, &fhp->fh_post_attr); > - fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr, > - d_inode(fhp->fh_dentry)); > if (err) { > fhp->fh_post_saved = false; > - /* Grab the ctime anyway - set_change_info might use it */ > fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime; > } else > fhp->fh_post_saved = true; > + fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr, > + d_inode(fhp->fh_dentry)); > } > > > > > /* > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index 833a2c64dfe8..56fd5f6d5c44 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -2298,12 +2298,8 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, > if (exp->ex_flags & NFSEXP_V4ROOT) { > *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time)); > *p++ = 0; > - } else if (IS_I_VERSION(inode)) { > + } else > p = xdr_encode_hyper(p, nfsd4_change_attribute(stat, inode)); > - } else { > - *p++ = cpu_to_be32(stat->ctime.tv_sec); > - *p++ = cpu_to_be32(stat->ctime.tv_nsec); > - } > return p; > } > > > > > diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h > index 56cfbc361561..39d764b129fa 100644 > --- a/fs/nfsd/nfsfh.h > +++ b/fs/nfsd/nfsfh.h > @@ -261,10 +261,16 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat, > { > u64 chattr; > > > > > - chattr = stat->ctime.tv_sec; > - chattr <<= 30; > - chattr += stat->ctime.tv_nsec; > - chattr += inode_query_iversion(inode); > + if (IS_I_VERSION(inode)) { > + chattr = stat->ctime.tv_sec; > + chattr <<= 30; > + chattr += stat->ctime.tv_nsec; > + chattr += inode_query_iversion(inode); > + } else { > + chattr = stat->ctime.tv_sec; > + chattr <<= 32; > + chattr += stat->ctime.tv_nsec; > + } > return chattr; > } > > > >
On Mon, 2020-11-30 at 17:46 -0500, J. Bruce Fields wrote: > From: "J. Bruce Fields" <bfields@redhat.com> > > inode_query_iversion() can modify i_version. Depending on the exported > filesystem, that may not be safe. For example, if you're re-exporting > NFS, NFS stores the server's change attribute in i_version and does not > expect it to be modified locally. This has been observed causing > unnecessary cache invalidations. > > The way a filesystem indicates that it's OK to call > inode_query_iverson() is by setting SB_I_VERSION. > > So, move the I_VERSION check out of encode_change(), where it's used > only in FATTR responses, to nfsd4_changeattr(), which is also called for > pre- and post- operation attributes. > > (Note we could also pull the NFSEXP_V4ROOT case into > nfsd4_change_attribute as well. That would actually be a no-op, since > pre/post attrs are only used for metadata-modifying operations, and > V4ROOT exports are read-only. But we might make the change in the > future just for simplicity.) > > Reported-by: Daire Byrne <daire@dneg.com> > Signed-off-by: J. Bruce Fields <bfields@redhat.com> > --- > fs/nfsd/nfs3xdr.c | 5 ++--- > fs/nfsd/nfs4xdr.c | 6 +----- > fs/nfsd/nfsfh.h | 14 ++++++++++---- > 3 files changed, 13 insertions(+), 12 deletions(-) > > diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c > index 2277f83da250..dfbf390ff40c 100644 > --- a/fs/nfsd/nfs3xdr.c > +++ b/fs/nfsd/nfs3xdr.c > @@ -291,14 +291,13 @@ void fill_post_wcc(struct svc_fh *fhp) > printk("nfsd: inode locked twice during operation.\n"); > > > > > err = fh_getattr(fhp, &fhp->fh_post_attr); > - fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr, > - d_inode(fhp->fh_dentry)); > if (err) { > fhp->fh_post_saved = false; > - /* Grab the ctime anyway - set_change_info might use it */ > fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime; > } else > fhp->fh_post_saved = true; > + fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr, > + d_inode(fhp->fh_dentry)); > } > > > > > /* > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index 833a2c64dfe8..56fd5f6d5c44 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -2298,12 +2298,8 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, > if (exp->ex_flags & NFSEXP_V4ROOT) { > *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time)); > *p++ = 0; > - } else if (IS_I_VERSION(inode)) { > + } else > p = xdr_encode_hyper(p, nfsd4_change_attribute(stat, inode)); > - } else { > - *p++ = cpu_to_be32(stat->ctime.tv_sec); > - *p++ = cpu_to_be32(stat->ctime.tv_nsec); > - } > return p; > } > > > > > diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h > index 56cfbc361561..39d764b129fa 100644 > --- a/fs/nfsd/nfsfh.h > +++ b/fs/nfsd/nfsfh.h > @@ -261,10 +261,16 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat, > { > u64 chattr; > > > > > - chattr = stat->ctime.tv_sec; > - chattr <<= 30; > - chattr += stat->ctime.tv_nsec; > - chattr += inode_query_iversion(inode); > + if (IS_I_VERSION(inode)) { > + chattr = stat->ctime.tv_sec; > + chattr <<= 30; > + chattr += stat->ctime.tv_nsec; > + chattr += inode_query_iversion(inode); > + } else { > + chattr = stat->ctime.tv_sec; > + chattr <<= 32; Might be nice to annotate the shifts above and maybe make them named constants. I'm not sure where those values come from, tbh. > + chattr += stat->ctime.tv_nsec; > + } > return chattr; > } > > > > Other than some very minor nits, the set itself looks great. Are you planning to follow up with the series to add the fetch_iversion op, or have you decided not to do that for now? Reviewed-by: Jeff Layton <jlayton@kernel.org>
On Tue, Dec 01, 2020 at 11:30:12AM -0500, Jeff Layton wrote: > On Mon, 2020-11-30 at 17:46 -0500, J. Bruce Fields wrote: > > From: "J. Bruce Fields" <bfields@redhat.com> > > > > inode_query_iversion() can modify i_version. Depending on the exported > > filesystem, that may not be safe. For example, if you're re-exporting > > NFS, NFS stores the server's change attribute in i_version and does not > > expect it to be modified locally. This has been observed causing > > unnecessary cache invalidations. > > > > The way a filesystem indicates that it's OK to call > > inode_query_iverson() is by setting SB_I_VERSION. > > > > So, move the I_VERSION check out of encode_change(), where it's used > > only in FATTR responses, to nfsd4_changeattr(), which is also called for > > pre- and post- operation attributes. > > > > "only in FATTR responses, to nfsd4_change_attribute()," Whoops, and also FATTR should have been GETATTR. Fixed locally, I assume Chuck will just want to fix that up in his tree (let me know if not). --b.
On Tue, Dec 01, 2020 at 11:43:31AM -0500, Jeff Layton wrote: > On Mon, 2020-11-30 at 17:46 -0500, J. Bruce Fields wrote: > > From: "J. Bruce Fields" <bfields@redhat.com> > > > > inode_query_iversion() can modify i_version. Depending on the exported > > filesystem, that may not be safe. For example, if you're re-exporting > > NFS, NFS stores the server's change attribute in i_version and does not > > expect it to be modified locally. This has been observed causing > > unnecessary cache invalidations. > > > > The way a filesystem indicates that it's OK to call > > inode_query_iverson() is by setting SB_I_VERSION. > > > > So, move the I_VERSION check out of encode_change(), where it's used > > only in FATTR responses, to nfsd4_changeattr(), which is also called for > > pre- and post- operation attributes. > > > > (Note we could also pull the NFSEXP_V4ROOT case into > > nfsd4_change_attribute as well. That would actually be a no-op, since > > pre/post attrs are only used for metadata-modifying operations, and > > V4ROOT exports are read-only. But we might make the change in the > > future just for simplicity.) > > > > Reported-by: Daire Byrne <daire@dneg.com> > > Signed-off-by: J. Bruce Fields <bfields@redhat.com> > > --- > > fs/nfsd/nfs3xdr.c | 5 ++--- > > fs/nfsd/nfs4xdr.c | 6 +----- > > fs/nfsd/nfsfh.h | 14 ++++++++++---- > > 3 files changed, 13 insertions(+), 12 deletions(-) > > > > diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c > > index 2277f83da250..dfbf390ff40c 100644 > > --- a/fs/nfsd/nfs3xdr.c > > +++ b/fs/nfsd/nfs3xdr.c > > @@ -291,14 +291,13 @@ void fill_post_wcc(struct svc_fh *fhp) > > printk("nfsd: inode locked twice during operation.\n"); > > > > > > > > > > err = fh_getattr(fhp, &fhp->fh_post_attr); > > - fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr, > > - d_inode(fhp->fh_dentry)); > > if (err) { > > fhp->fh_post_saved = false; > > - /* Grab the ctime anyway - set_change_info might use it */ > > fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime; > > } else > > fhp->fh_post_saved = true; > > + fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr, > > + d_inode(fhp->fh_dentry)); > > } > > > > > > > > > > /* > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > > index 833a2c64dfe8..56fd5f6d5c44 100644 > > --- a/fs/nfsd/nfs4xdr.c > > +++ b/fs/nfsd/nfs4xdr.c > > @@ -2298,12 +2298,8 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, > > if (exp->ex_flags & NFSEXP_V4ROOT) { > > *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time)); > > *p++ = 0; > > - } else if (IS_I_VERSION(inode)) { > > + } else > > p = xdr_encode_hyper(p, nfsd4_change_attribute(stat, inode)); > > - } else { > > - *p++ = cpu_to_be32(stat->ctime.tv_sec); > > - *p++ = cpu_to_be32(stat->ctime.tv_nsec); > > - } > > return p; > > } > > > > > > > > > > diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h > > index 56cfbc361561..39d764b129fa 100644 > > --- a/fs/nfsd/nfsfh.h > > +++ b/fs/nfsd/nfsfh.h > > @@ -261,10 +261,16 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat, > > { > > u64 chattr; > > > > > > > > > > - chattr = stat->ctime.tv_sec; > > - chattr <<= 30; > > - chattr += stat->ctime.tv_nsec; > > - chattr += inode_query_iversion(inode); > > + if (IS_I_VERSION(inode)) { > > + chattr = stat->ctime.tv_sec; > > + chattr <<= 30; > > + chattr += stat->ctime.tv_nsec; > > + chattr += inode_query_iversion(inode); > > + } else { > > + chattr = stat->ctime.tv_sec; > > + chattr <<= 32; > > Might be nice to annotate the shifts above and maybe make them named > constants. I'm not sure where those values come from, tbh. This shouldn't be changing the on-the-wire results at all; so for example the latter is just doing exactly what: > > - *p++ = cpu_to_be32(stat->ctime.tv_sec); > > - *p++ = cpu_to_be32(stat->ctime.tv_nsec); did (after xdr-encoding of the 64-bit chattr). But yeah maybe it could use some explanation. The 30-bit shift in the IS_I_VERSION case is weirder, I honestly don't remember what I was thinking. Maybe just that nanoseconds really only need 30 bits, and the shift would save a couple high bits in case we wanted to change the format later without making the change attribute go backwards. Probably overthinking it! --b.
On Tue, Dec 01, 2020 at 11:43:31AM -0500, Jeff Layton wrote: > Are you planning to follow up with the series to add the fetch_iversion > op, or have you decided not to do that for now? I'm still interested. I just figured it's not urgent, and it'll be easier to ask for fs-devel review after this other stuff is in, so it may as well wait till after the merge window. --b.
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 2277f83da250..dfbf390ff40c 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -291,14 +291,13 @@ void fill_post_wcc(struct svc_fh *fhp) printk("nfsd: inode locked twice during operation.\n"); err = fh_getattr(fhp, &fhp->fh_post_attr); - fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr, - d_inode(fhp->fh_dentry)); if (err) { fhp->fh_post_saved = false; - /* Grab the ctime anyway - set_change_info might use it */ fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime; } else fhp->fh_post_saved = true; + fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr, + d_inode(fhp->fh_dentry)); } /* diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 833a2c64dfe8..56fd5f6d5c44 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2298,12 +2298,8 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, if (exp->ex_flags & NFSEXP_V4ROOT) { *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time)); *p++ = 0; - } else if (IS_I_VERSION(inode)) { + } else p = xdr_encode_hyper(p, nfsd4_change_attribute(stat, inode)); - } else { - *p++ = cpu_to_be32(stat->ctime.tv_sec); - *p++ = cpu_to_be32(stat->ctime.tv_nsec); - } return p; } diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 56cfbc361561..39d764b129fa 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -261,10 +261,16 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat, { u64 chattr; - chattr = stat->ctime.tv_sec; - chattr <<= 30; - chattr += stat->ctime.tv_nsec; - chattr += inode_query_iversion(inode); + if (IS_I_VERSION(inode)) { + chattr = stat->ctime.tv_sec; + chattr <<= 30; + chattr += stat->ctime.tv_nsec; + chattr += inode_query_iversion(inode); + } else { + chattr = stat->ctime.tv_sec; + chattr <<= 32; + chattr += stat->ctime.tv_nsec; + } return chattr; }