Message ID | 1440069440-27454-16-git-send-email-jeff.layton@primarydata.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Aug 20, 2015 at 4:17 AM, Jeff Layton <jlayton@poochiereds.net> wrote: > ...when there are open files to be closed. > > When knfsd does an fput(), it gets queued to a list and a workqueue job > is then scheduled to do the actual __fput work. In the case of knfsd > closing down the file prior to a REMOVE or RENAME, we really want to > ensure that those files are closed prior to returning. When there are > files to be closed, call flush_delayed_fput to ensure this. > > There are deadlock possibilities if you call flush_delayed_fput while > holding locks, however. In the case of nfsd_rename, we don't even do the > lookups of the dentries to be renamed until we've locked for rename. > > Once we've figured out what the target dentry is for a rename, check to > see whether there are cached open files associated with it. If there > are, then unwind all of the locking, close them all, and then reattempt > the rename. > > Signed-off-by: Jeff Layton <jeff.layton@primarydata.com> > --- > fs/file_table.c | 1 + > fs/nfsd/filecache.c | 33 ++++++++++++++++++++++++++++++++- > fs/nfsd/filecache.h | 1 + > fs/nfsd/trace.h | 10 +++++++++- > fs/nfsd/vfs.c | 47 +++++++++++++++++++++++++++++++++++++++-------- > 5 files changed, 82 insertions(+), 10 deletions(-) > > diff --git a/fs/file_table.c b/fs/file_table.c > index 7f9d407c7595..33898e72618c 100644 > --- a/fs/file_table.c > +++ b/fs/file_table.c > @@ -257,6 +257,7 @@ void flush_delayed_fput(void) > { > delayed_fput(NULL); > } > +EXPORT_SYMBOL_GPL(flush_delayed_fput); > > static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput); > > diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c > index 4bd683f03b6e..b62942ba6e7b 100644 > --- a/fs/nfsd/filecache.c > +++ b/fs/nfsd/filecache.c > @@ -284,6 +284,34 @@ nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, > } > > /** > + * nfsd_file_is_cached - are there any cached open files for this fh? > + * @inode: inode of the file to check > + * > + * Scan the hashtable for open files that match this fh. Returns true if there > + * are any, and false if not. > + */ > +bool > +nfsd_file_is_cached(struct inode *inode) > +{ > + bool ret = false; > + struct nfsd_file *nf; > + unsigned int hashval = (unsigned int)hash_ptr(inode, NFSD_FILE_HASH_BITS); > + > + rcu_read_lock(); > + hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, > + nf_node) { > + if (inode == nf->nf_inode) { > + ret = true; > + break; > + } > + } > + rcu_read_unlock(); > + trace_nfsd_file_is_cached(hashval, inode, (int)ret); > + return ret; > +} > + > + > +/** > * nfsd_file_close_inode - attempt to forcibly close a nfsd_file > * @inode: inode of the file to attempt to remove > * > @@ -305,7 +333,10 @@ nfsd_file_close_inode(struct inode *inode) > } > spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); > trace_nfsd_file_close_inode(hashval, inode, !list_empty(&dispose)); > - nfsd_file_dispose_list(&dispose); > + if (!list_empty(&dispose)) { > + nfsd_file_dispose_list(&dispose); > + flush_delayed_fput(); It looks like flush_delayed_fput() is not exported symbol? And if flush_delayed_fput() is acceptable, it looks like __fput_sync() is a better fit, because knfsd would not try to do all the delayed fput() work, just the dispose list... Cheers, Tao > + } > } > > __be32 > diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h > index 191cdb25aa66..4a873efb7953 100644 > --- a/fs/nfsd/filecache.h > +++ b/fs/nfsd/filecache.h > @@ -27,6 +27,7 @@ void nfsd_file_cache_shutdown(void); > void nfsd_file_put(struct nfsd_file *nf); > struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); > void nfsd_file_close_inode(struct inode *inode); > +bool nfsd_file_is_cached(struct inode *inode); > __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, > unsigned int may_flags, struct nfsd_file **nfp); > #endif /* _FS_NFSD_FILECACHE_H */ > diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h > index 95af3b9c7b66..fc6d8ee51a00 100644 > --- a/fs/nfsd/trace.h > +++ b/fs/nfsd/trace.h > @@ -140,7 +140,7 @@ TRACE_EVENT(nfsd_file_acquire, > be32_to_cpu(__entry->status)) > ); > > -TRACE_EVENT(nfsd_file_close_inode, > +DECLARE_EVENT_CLASS(nfsd_file_search_class, > TP_PROTO(unsigned int hash, struct inode *inode, int found), > TP_ARGS(hash, inode, found), > TP_STRUCT__entry( > @@ -156,6 +156,14 @@ TRACE_EVENT(nfsd_file_close_inode, > TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash, > __entry->inode, __entry->found) > ); > + > +#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ > +DEFINE_EVENT(nfsd_file_search_class, name, \ > + TP_PROTO(unsigned int hash, struct inode *inode, int found), \ > + TP_ARGS(hash, inode, found)) > + > +DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); > +DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached); > #endif /* _NFSD_TRACE_H */ > > #undef TRACE_INCLUDE_PATH > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c > index 98d3b9d96480..4cc78a4ec694 100644 > --- a/fs/nfsd/vfs.c > +++ b/fs/nfsd/vfs.c > @@ -1592,6 +1592,17 @@ nfsd_close_cached_files(struct dentry *dentry) > nfsd_file_close_inode(inode); > } > > +static bool > +nfsd_has_cached_files(struct dentry *dentry) > +{ > + bool ret = false; > + struct inode *inode = d_inode(dentry); > + > + if (inode && S_ISREG(inode->i_mode)) > + ret = nfsd_file_is_cached(inode); > + return ret; > +} > + > /* > * Rename a file > * N.B. After this call _both_ ffhp and tfhp need an fh_put > @@ -1604,6 +1615,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, > struct inode *fdir, *tdir; > __be32 err; > int host_err; > + bool has_cached = false; > > err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE); > if (err) > @@ -1622,6 +1634,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, > if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) > goto out; > > +retry: > host_err = fh_want_write(ffhp); > if (host_err) { > err = nfserrno(host_err); > @@ -1661,12 +1674,16 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, > if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) > goto out_dput_new; > > - nfsd_close_cached_files(ndentry); > - host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); > - if (!host_err) { > - host_err = commit_metadata(tfhp); > - if (!host_err) > - host_err = commit_metadata(ffhp); > + if (nfsd_has_cached_files(ndentry)) { > + has_cached = true; > + goto out_dput_old; > + } else { > + host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); > + if (!host_err) { > + host_err = commit_metadata(tfhp); > + if (!host_err) > + host_err = commit_metadata(ffhp); > + } > } > out_dput_new: > dput(ndentry); > @@ -1679,12 +1696,26 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, > * as that would do the wrong thing if the two directories > * were the same, so again we do it by hand. > */ > - fill_post_wcc(ffhp); > - fill_post_wcc(tfhp); > + if (!has_cached) { > + fill_post_wcc(ffhp); > + fill_post_wcc(tfhp); > + } > unlock_rename(tdentry, fdentry); > ffhp->fh_locked = tfhp->fh_locked = 0; > fh_drop_write(ffhp); > > + /* > + * If the target dentry has cached open files, then we need to try to > + * close them prior to doing the rename. Flushing delayed fput > + * shouldn't be done with locks held however, so we delay it until this > + * point and then reattempt the whole shebang. > + */ > + if (has_cached) { > + has_cached = false; > + nfsd_close_cached_files(ndentry); > + dput(ndentry); > + goto retry; > + } > out: > return err; > } > -- > 2.4.3 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Aug 20, 2015 at 6:01 PM, Peng Tao <bergwolf@primarydata.com> wrote: > On Thu, Aug 20, 2015 at 4:17 AM, Jeff Layton <jlayton@poochiereds.net> wrote: >> ...when there are open files to be closed. >> >> When knfsd does an fput(), it gets queued to a list and a workqueue job >> is then scheduled to do the actual __fput work. In the case of knfsd >> closing down the file prior to a REMOVE or RENAME, we really want to >> ensure that those files are closed prior to returning. When there are >> files to be closed, call flush_delayed_fput to ensure this. >> >> There are deadlock possibilities if you call flush_delayed_fput while >> holding locks, however. In the case of nfsd_rename, we don't even do the >> lookups of the dentries to be renamed until we've locked for rename. >> >> Once we've figured out what the target dentry is for a rename, check to >> see whether there are cached open files associated with it. If there >> are, then unwind all of the locking, close them all, and then reattempt >> the rename. >> >> Signed-off-by: Jeff Layton <jeff.layton@primarydata.com> >> --- >> fs/file_table.c | 1 + >> fs/nfsd/filecache.c | 33 ++++++++++++++++++++++++++++++++- >> fs/nfsd/filecache.h | 1 + >> fs/nfsd/trace.h | 10 +++++++++- >> fs/nfsd/vfs.c | 47 +++++++++++++++++++++++++++++++++++++++-------- >> 5 files changed, 82 insertions(+), 10 deletions(-) >> >> diff --git a/fs/file_table.c b/fs/file_table.c >> index 7f9d407c7595..33898e72618c 100644 >> --- a/fs/file_table.c >> +++ b/fs/file_table.c >> @@ -257,6 +257,7 @@ void flush_delayed_fput(void) >> { >> delayed_fput(NULL); >> } >> +EXPORT_SYMBOL_GPL(flush_delayed_fput); >> >> static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput); >> >> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c >> index 4bd683f03b6e..b62942ba6e7b 100644 >> --- a/fs/nfsd/filecache.c >> +++ b/fs/nfsd/filecache.c >> @@ -284,6 +284,34 @@ nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, >> } >> >> /** >> + * nfsd_file_is_cached - are there any cached open files for this fh? >> + * @inode: inode of the file to check >> + * >> + * Scan the hashtable for open files that match this fh. Returns true if there >> + * are any, and false if not. >> + */ >> +bool >> +nfsd_file_is_cached(struct inode *inode) >> +{ >> + bool ret = false; >> + struct nfsd_file *nf; >> + unsigned int hashval = (unsigned int)hash_ptr(inode, NFSD_FILE_HASH_BITS); >> + >> + rcu_read_lock(); >> + hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, >> + nf_node) { >> + if (inode == nf->nf_inode) { >> + ret = true; >> + break; >> + } >> + } >> + rcu_read_unlock(); >> + trace_nfsd_file_is_cached(hashval, inode, (int)ret); >> + return ret; >> +} >> + >> + >> +/** >> * nfsd_file_close_inode - attempt to forcibly close a nfsd_file >> * @inode: inode of the file to attempt to remove >> * >> @@ -305,7 +333,10 @@ nfsd_file_close_inode(struct inode *inode) >> } >> spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); >> trace_nfsd_file_close_inode(hashval, inode, !list_empty(&dispose)); >> - nfsd_file_dispose_list(&dispose); >> + if (!list_empty(&dispose)) { >> + nfsd_file_dispose_list(&dispose); >> + flush_delayed_fput(); > It looks like flush_delayed_fput() is not exported symbol? > > And if flush_delayed_fput() is acceptable, it looks like __fput_sync() > is a better fit, because knfsd would not try to do all the delayed > fput() work, just the dispose list... oh, just saw that flush_delayed_fput() is exported in this patch! sorry for the noise. But I still think __fput_sync() might be a better fit, despite the assertion there... I'm fine with settling with flush_delayed_fput() though since calling __fput_sync() from a kernel thread might get more objections. Cheers, Tao > > Cheers, > Tao > >> + } >> } >> >> __be32 >> diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h >> index 191cdb25aa66..4a873efb7953 100644 >> --- a/fs/nfsd/filecache.h >> +++ b/fs/nfsd/filecache.h >> @@ -27,6 +27,7 @@ void nfsd_file_cache_shutdown(void); >> void nfsd_file_put(struct nfsd_file *nf); >> struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); >> void nfsd_file_close_inode(struct inode *inode); >> +bool nfsd_file_is_cached(struct inode *inode); >> __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, >> unsigned int may_flags, struct nfsd_file **nfp); >> #endif /* _FS_NFSD_FILECACHE_H */ >> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h >> index 95af3b9c7b66..fc6d8ee51a00 100644 >> --- a/fs/nfsd/trace.h >> +++ b/fs/nfsd/trace.h >> @@ -140,7 +140,7 @@ TRACE_EVENT(nfsd_file_acquire, >> be32_to_cpu(__entry->status)) >> ); >> >> -TRACE_EVENT(nfsd_file_close_inode, >> +DECLARE_EVENT_CLASS(nfsd_file_search_class, >> TP_PROTO(unsigned int hash, struct inode *inode, int found), >> TP_ARGS(hash, inode, found), >> TP_STRUCT__entry( >> @@ -156,6 +156,14 @@ TRACE_EVENT(nfsd_file_close_inode, >> TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash, >> __entry->inode, __entry->found) >> ); >> + >> +#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ >> +DEFINE_EVENT(nfsd_file_search_class, name, \ >> + TP_PROTO(unsigned int hash, struct inode *inode, int found), \ >> + TP_ARGS(hash, inode, found)) >> + >> +DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); >> +DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached); >> #endif /* _NFSD_TRACE_H */ >> >> #undef TRACE_INCLUDE_PATH >> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c >> index 98d3b9d96480..4cc78a4ec694 100644 >> --- a/fs/nfsd/vfs.c >> +++ b/fs/nfsd/vfs.c >> @@ -1592,6 +1592,17 @@ nfsd_close_cached_files(struct dentry *dentry) >> nfsd_file_close_inode(inode); >> } >> >> +static bool >> +nfsd_has_cached_files(struct dentry *dentry) >> +{ >> + bool ret = false; >> + struct inode *inode = d_inode(dentry); >> + >> + if (inode && S_ISREG(inode->i_mode)) >> + ret = nfsd_file_is_cached(inode); >> + return ret; >> +} >> + >> /* >> * Rename a file >> * N.B. After this call _both_ ffhp and tfhp need an fh_put >> @@ -1604,6 +1615,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, >> struct inode *fdir, *tdir; >> __be32 err; >> int host_err; >> + bool has_cached = false; >> >> err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE); >> if (err) >> @@ -1622,6 +1634,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, >> if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) >> goto out; >> >> +retry: >> host_err = fh_want_write(ffhp); >> if (host_err) { >> err = nfserrno(host_err); >> @@ -1661,12 +1674,16 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, >> if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) >> goto out_dput_new; >> >> - nfsd_close_cached_files(ndentry); >> - host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); >> - if (!host_err) { >> - host_err = commit_metadata(tfhp); >> - if (!host_err) >> - host_err = commit_metadata(ffhp); >> + if (nfsd_has_cached_files(ndentry)) { >> + has_cached = true; >> + goto out_dput_old; >> + } else { >> + host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); >> + if (!host_err) { >> + host_err = commit_metadata(tfhp); >> + if (!host_err) >> + host_err = commit_metadata(ffhp); >> + } >> } >> out_dput_new: >> dput(ndentry); >> @@ -1679,12 +1696,26 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, >> * as that would do the wrong thing if the two directories >> * were the same, so again we do it by hand. >> */ >> - fill_post_wcc(ffhp); >> - fill_post_wcc(tfhp); >> + if (!has_cached) { >> + fill_post_wcc(ffhp); >> + fill_post_wcc(tfhp); >> + } >> unlock_rename(tdentry, fdentry); >> ffhp->fh_locked = tfhp->fh_locked = 0; >> fh_drop_write(ffhp); >> >> + /* >> + * If the target dentry has cached open files, then we need to try to >> + * close them prior to doing the rename. Flushing delayed fput >> + * shouldn't be done with locks held however, so we delay it until this >> + * point and then reattempt the whole shebang. >> + */ >> + if (has_cached) { >> + has_cached = false; >> + nfsd_close_cached_files(ndentry); >> + dput(ndentry); >> + goto retry; >> + } >> out: >> return err; >> } >> -- >> 2.4.3 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 20 Aug 2015 19:18:25 -0700 Peng Tao <bergwolf@primarydata.com> wrote: > On Thu, Aug 20, 2015 at 6:01 PM, Peng Tao <bergwolf@primarydata.com> wrote: > > On Thu, Aug 20, 2015 at 4:17 AM, Jeff Layton <jlayton@poochiereds.net> wrote: > >> ...when there are open files to be closed. > >> > >> When knfsd does an fput(), it gets queued to a list and a workqueue job > >> is then scheduled to do the actual __fput work. In the case of knfsd > >> closing down the file prior to a REMOVE or RENAME, we really want to > >> ensure that those files are closed prior to returning. When there are > >> files to be closed, call flush_delayed_fput to ensure this. > >> > >> There are deadlock possibilities if you call flush_delayed_fput while > >> holding locks, however. In the case of nfsd_rename, we don't even do the > >> lookups of the dentries to be renamed until we've locked for rename. > >> > >> Once we've figured out what the target dentry is for a rename, check to > >> see whether there are cached open files associated with it. If there > >> are, then unwind all of the locking, close them all, and then reattempt > >> the rename. > >> > >> Signed-off-by: Jeff Layton <jeff.layton@primarydata.com> > >> --- > >> fs/file_table.c | 1 + > >> fs/nfsd/filecache.c | 33 ++++++++++++++++++++++++++++++++- > >> fs/nfsd/filecache.h | 1 + > >> fs/nfsd/trace.h | 10 +++++++++- > >> fs/nfsd/vfs.c | 47 +++++++++++++++++++++++++++++++++++++++-------- > >> 5 files changed, 82 insertions(+), 10 deletions(-) > >> > >> diff --git a/fs/file_table.c b/fs/file_table.c > >> index 7f9d407c7595..33898e72618c 100644 > >> --- a/fs/file_table.c > >> +++ b/fs/file_table.c > >> @@ -257,6 +257,7 @@ void flush_delayed_fput(void) > >> { > >> delayed_fput(NULL); > >> } > >> +EXPORT_SYMBOL_GPL(flush_delayed_fput); > >> > >> static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput); > >> > >> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c > >> index 4bd683f03b6e..b62942ba6e7b 100644 > >> --- a/fs/nfsd/filecache.c > >> +++ b/fs/nfsd/filecache.c > >> @@ -284,6 +284,34 @@ nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, > >> } > >> > >> /** > >> + * nfsd_file_is_cached - are there any cached open files for this fh? > >> + * @inode: inode of the file to check > >> + * > >> + * Scan the hashtable for open files that match this fh. Returns true if there > >> + * are any, and false if not. > >> + */ > >> +bool > >> +nfsd_file_is_cached(struct inode *inode) > >> +{ > >> + bool ret = false; > >> + struct nfsd_file *nf; > >> + unsigned int hashval = (unsigned int)hash_ptr(inode, NFSD_FILE_HASH_BITS); > >> + > >> + rcu_read_lock(); > >> + hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, > >> + nf_node) { > >> + if (inode == nf->nf_inode) { > >> + ret = true; > >> + break; > >> + } > >> + } > >> + rcu_read_unlock(); > >> + trace_nfsd_file_is_cached(hashval, inode, (int)ret); > >> + return ret; > >> +} > >> + > >> + > >> +/** > >> * nfsd_file_close_inode - attempt to forcibly close a nfsd_file > >> * @inode: inode of the file to attempt to remove > >> * > >> @@ -305,7 +333,10 @@ nfsd_file_close_inode(struct inode *inode) > >> } > >> spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); > >> trace_nfsd_file_close_inode(hashval, inode, !list_empty(&dispose)); > >> - nfsd_file_dispose_list(&dispose); > >> + if (!list_empty(&dispose)) { > >> + nfsd_file_dispose_list(&dispose); > >> + flush_delayed_fput(); > > It looks like flush_delayed_fput() is not exported symbol? > > > > And if flush_delayed_fput() is acceptable, it looks like __fput_sync() > > is a better fit, because knfsd would not try to do all the delayed > > fput() work, just the dispose list... > oh, just saw that flush_delayed_fput() is exported in this patch! > sorry for the noise. But I still think __fput_sync() might be a better > fit, despite the assertion there... I'm fine with settling with > flush_delayed_fput() though since calling __fput_sync() from a kernel > thread might get more objections. > > Cheers, > Tao > I looked at __fput_sync when I first rolled this patch, but it's a little less convenient to use. __fput_sync is a synchronous analogue to fput -- so you have to ensure that you use it instead of fput. To make that work here, we'd need a separate set of destruction routines that uses __fput_sync instead of fput. Certainly we can do that if necessary, but I don't think it's really worth it. The downside of course is that we might end up with this thread doing a little extra __fput work if there happened to be other things queued onto the delayed_fput_list, but I'm not too concerned about that. > > > > Cheers, > > Tao > > > >> + } > >> } > >> > >> __be32 > >> diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h > >> index 191cdb25aa66..4a873efb7953 100644 > >> --- a/fs/nfsd/filecache.h > >> +++ b/fs/nfsd/filecache.h > >> @@ -27,6 +27,7 @@ void nfsd_file_cache_shutdown(void); > >> void nfsd_file_put(struct nfsd_file *nf); > >> struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); > >> void nfsd_file_close_inode(struct inode *inode); > >> +bool nfsd_file_is_cached(struct inode *inode); > >> __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, > >> unsigned int may_flags, struct nfsd_file **nfp); > >> #endif /* _FS_NFSD_FILECACHE_H */ > >> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h > >> index 95af3b9c7b66..fc6d8ee51a00 100644 > >> --- a/fs/nfsd/trace.h > >> +++ b/fs/nfsd/trace.h > >> @@ -140,7 +140,7 @@ TRACE_EVENT(nfsd_file_acquire, > >> be32_to_cpu(__entry->status)) > >> ); > >> > >> -TRACE_EVENT(nfsd_file_close_inode, > >> +DECLARE_EVENT_CLASS(nfsd_file_search_class, > >> TP_PROTO(unsigned int hash, struct inode *inode, int found), > >> TP_ARGS(hash, inode, found), > >> TP_STRUCT__entry( > >> @@ -156,6 +156,14 @@ TRACE_EVENT(nfsd_file_close_inode, > >> TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash, > >> __entry->inode, __entry->found) > >> ); > >> + > >> +#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ > >> +DEFINE_EVENT(nfsd_file_search_class, name, \ > >> + TP_PROTO(unsigned int hash, struct inode *inode, int found), \ > >> + TP_ARGS(hash, inode, found)) > >> + > >> +DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); > >> +DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached); > >> #endif /* _NFSD_TRACE_H */ > >> > >> #undef TRACE_INCLUDE_PATH > >> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c > >> index 98d3b9d96480..4cc78a4ec694 100644 > >> --- a/fs/nfsd/vfs.c > >> +++ b/fs/nfsd/vfs.c > >> @@ -1592,6 +1592,17 @@ nfsd_close_cached_files(struct dentry *dentry) > >> nfsd_file_close_inode(inode); > >> } > >> > >> +static bool > >> +nfsd_has_cached_files(struct dentry *dentry) > >> +{ > >> + bool ret = false; > >> + struct inode *inode = d_inode(dentry); > >> + > >> + if (inode && S_ISREG(inode->i_mode)) > >> + ret = nfsd_file_is_cached(inode); > >> + return ret; > >> +} > >> + > >> /* > >> * Rename a file > >> * N.B. After this call _both_ ffhp and tfhp need an fh_put > >> @@ -1604,6 +1615,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, > >> struct inode *fdir, *tdir; > >> __be32 err; > >> int host_err; > >> + bool has_cached = false; > >> > >> err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE); > >> if (err) > >> @@ -1622,6 +1634,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, > >> if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) > >> goto out; > >> > >> +retry: > >> host_err = fh_want_write(ffhp); > >> if (host_err) { > >> err = nfserrno(host_err); > >> @@ -1661,12 +1674,16 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, > >> if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) > >> goto out_dput_new; > >> > >> - nfsd_close_cached_files(ndentry); > >> - host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); > >> - if (!host_err) { > >> - host_err = commit_metadata(tfhp); > >> - if (!host_err) > >> - host_err = commit_metadata(ffhp); > >> + if (nfsd_has_cached_files(ndentry)) { > >> + has_cached = true; > >> + goto out_dput_old; > >> + } else { > >> + host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); > >> + if (!host_err) { > >> + host_err = commit_metadata(tfhp); > >> + if (!host_err) > >> + host_err = commit_metadata(ffhp); > >> + } > >> } > >> out_dput_new: > >> dput(ndentry); > >> @@ -1679,12 +1696,26 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, > >> * as that would do the wrong thing if the two directories > >> * were the same, so again we do it by hand. > >> */ > >> - fill_post_wcc(ffhp); > >> - fill_post_wcc(tfhp); > >> + if (!has_cached) { > >> + fill_post_wcc(ffhp); > >> + fill_post_wcc(tfhp); > >> + } > >> unlock_rename(tdentry, fdentry); > >> ffhp->fh_locked = tfhp->fh_locked = 0; > >> fh_drop_write(ffhp); > >> > >> + /* > >> + * If the target dentry has cached open files, then we need to try to > >> + * close them prior to doing the rename. Flushing delayed fput > >> + * shouldn't be done with locks held however, so we delay it until this > >> + * point and then reattempt the whole shebang. > >> + */ > >> + if (has_cached) { > >> + has_cached = false; > >> + nfsd_close_cached_files(ndentry); > >> + dput(ndentry); > >> + goto retry; > >> + } > >> out: > >> return err; > >> } > >> -- > >> 2.4.3 > >> > >> -- > >> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > >> the body of a message to majordomo@vger.kernel.org > >> More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/file_table.c b/fs/file_table.c index 7f9d407c7595..33898e72618c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -257,6 +257,7 @@ void flush_delayed_fput(void) { delayed_fput(NULL); } +EXPORT_SYMBOL_GPL(flush_delayed_fput); static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput); diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 4bd683f03b6e..b62942ba6e7b 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -284,6 +284,34 @@ nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, } /** + * nfsd_file_is_cached - are there any cached open files for this fh? + * @inode: inode of the file to check + * + * Scan the hashtable for open files that match this fh. Returns true if there + * are any, and false if not. + */ +bool +nfsd_file_is_cached(struct inode *inode) +{ + bool ret = false; + struct nfsd_file *nf; + unsigned int hashval = (unsigned int)hash_ptr(inode, NFSD_FILE_HASH_BITS); + + rcu_read_lock(); + hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, + nf_node) { + if (inode == nf->nf_inode) { + ret = true; + break; + } + } + rcu_read_unlock(); + trace_nfsd_file_is_cached(hashval, inode, (int)ret); + return ret; +} + + +/** * nfsd_file_close_inode - attempt to forcibly close a nfsd_file * @inode: inode of the file to attempt to remove * @@ -305,7 +333,10 @@ nfsd_file_close_inode(struct inode *inode) } spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); trace_nfsd_file_close_inode(hashval, inode, !list_empty(&dispose)); - nfsd_file_dispose_list(&dispose); + if (!list_empty(&dispose)) { + nfsd_file_dispose_list(&dispose); + flush_delayed_fput(); + } } __be32 diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 191cdb25aa66..4a873efb7953 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -27,6 +27,7 @@ void nfsd_file_cache_shutdown(void); void nfsd_file_put(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); void nfsd_file_close_inode(struct inode *inode); +bool nfsd_file_is_cached(struct inode *inode); __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); #endif /* _FS_NFSD_FILECACHE_H */ diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 95af3b9c7b66..fc6d8ee51a00 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -140,7 +140,7 @@ TRACE_EVENT(nfsd_file_acquire, be32_to_cpu(__entry->status)) ); -TRACE_EVENT(nfsd_file_close_inode, +DECLARE_EVENT_CLASS(nfsd_file_search_class, TP_PROTO(unsigned int hash, struct inode *inode, int found), TP_ARGS(hash, inode, found), TP_STRUCT__entry( @@ -156,6 +156,14 @@ TRACE_EVENT(nfsd_file_close_inode, TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash, __entry->inode, __entry->found) ); + +#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ +DEFINE_EVENT(nfsd_file_search_class, name, \ + TP_PROTO(unsigned int hash, struct inode *inode, int found), \ + TP_ARGS(hash, inode, found)) + +DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); +DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached); #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 98d3b9d96480..4cc78a4ec694 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1592,6 +1592,17 @@ nfsd_close_cached_files(struct dentry *dentry) nfsd_file_close_inode(inode); } +static bool +nfsd_has_cached_files(struct dentry *dentry) +{ + bool ret = false; + struct inode *inode = d_inode(dentry); + + if (inode && S_ISREG(inode->i_mode)) + ret = nfsd_file_is_cached(inode); + return ret; +} + /* * Rename a file * N.B. After this call _both_ ffhp and tfhp need an fh_put @@ -1604,6 +1615,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, struct inode *fdir, *tdir; __be32 err; int host_err; + bool has_cached = false; err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE); if (err) @@ -1622,6 +1634,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) goto out; +retry: host_err = fh_want_write(ffhp); if (host_err) { err = nfserrno(host_err); @@ -1661,12 +1674,16 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) goto out_dput_new; - nfsd_close_cached_files(ndentry); - host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); - if (!host_err) { - host_err = commit_metadata(tfhp); - if (!host_err) - host_err = commit_metadata(ffhp); + if (nfsd_has_cached_files(ndentry)) { + has_cached = true; + goto out_dput_old; + } else { + host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); + if (!host_err) { + host_err = commit_metadata(tfhp); + if (!host_err) + host_err = commit_metadata(ffhp); + } } out_dput_new: dput(ndentry); @@ -1679,12 +1696,26 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, * as that would do the wrong thing if the two directories * were the same, so again we do it by hand. */ - fill_post_wcc(ffhp); - fill_post_wcc(tfhp); + if (!has_cached) { + fill_post_wcc(ffhp); + fill_post_wcc(tfhp); + } unlock_rename(tdentry, fdentry); ffhp->fh_locked = tfhp->fh_locked = 0; fh_drop_write(ffhp); + /* + * If the target dentry has cached open files, then we need to try to + * close them prior to doing the rename. Flushing delayed fput + * shouldn't be done with locks held however, so we delay it until this + * point and then reattempt the whole shebang. + */ + if (has_cached) { + has_cached = false; + nfsd_close_cached_files(ndentry); + dput(ndentry); + goto retry; + } out: return err; }
...when there are open files to be closed. When knfsd does an fput(), it gets queued to a list and a workqueue job is then scheduled to do the actual __fput work. In the case of knfsd closing down the file prior to a REMOVE or RENAME, we really want to ensure that those files are closed prior to returning. When there are files to be closed, call flush_delayed_fput to ensure this. There are deadlock possibilities if you call flush_delayed_fput while holding locks, however. In the case of nfsd_rename, we don't even do the lookups of the dentries to be renamed until we've locked for rename. Once we've figured out what the target dentry is for a rename, check to see whether there are cached open files associated with it. If there are, then unwind all of the locking, close them all, and then reattempt the rename. Signed-off-by: Jeff Layton <jeff.layton@primarydata.com> --- fs/file_table.c | 1 + fs/nfsd/filecache.c | 33 ++++++++++++++++++++++++++++++++- fs/nfsd/filecache.h | 1 + fs/nfsd/trace.h | 10 +++++++++- fs/nfsd/vfs.c | 47 +++++++++++++++++++++++++++++++++++++++-------- 5 files changed, 82 insertions(+), 10 deletions(-)