Message ID | 166507323630.1802.8998394314935628609.stgit@manet.1015granger.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | A course adjustment, maybe... | expand |
On Fri, 07 Oct 2022, Chuck Lever wrote: > NFSv4 operations manage the lifetime of nfsd_file items they use by > means of NFSv4 OPEN and CLOSE. Hence there's no need for them to be > garbage collected. > > Introduce a mechanism to enable garbage collection for nfsd_file > items used only by NFSv2/3 callers. > > Note that the change in nfsd_file_put() ensures that both CLOSE and > DELEGRETURN will actually close out and free an nfsd_file on last > reference of a non-garbage-collected file. > > Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=394 > Suggested-by: Trond Myklebust <trond.myklebust@hammerspace.com> > Signed-off-by: Chuck Lever <chuck.lever@oracle.com> > --- > fs/nfsd/filecache.c | 61 +++++++++++++++++++++++++++++++++++++++++++++------ > fs/nfsd/filecache.h | 3 +++ > fs/nfsd/nfs3proc.c | 4 ++- > fs/nfsd/trace.h | 3 ++- > fs/nfsd/vfs.c | 4 ++- > 5 files changed, 63 insertions(+), 12 deletions(-) > > diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c > index b7aa523c2010..01c27deabf83 100644 > --- a/fs/nfsd/filecache.c > +++ b/fs/nfsd/filecache.c > @@ -63,6 +63,7 @@ struct nfsd_file_lookup_key { > struct net *net; > const struct cred *cred; > unsigned char need; > + unsigned char gc:1; > enum nfsd_file_lookup_type type; > }; > > @@ -162,6 +163,8 @@ static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, > return 1; > if (!nfsd_match_cred(nf->nf_cred, key->cred)) > return 1; > + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) > + return 1; > if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) > return 1; > break; > @@ -297,6 +300,8 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) > nf->nf_flags = 0; > __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); > __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); > + if (key->gc) > + __set_bit(NFSD_FILE_GC, &nf->nf_flags); > nf->nf_inode = key->inode; > /* nf_ref is pre-incremented for hash table */ > refcount_set(&nf->nf_ref, 2); > @@ -428,16 +433,27 @@ nfsd_file_put_noref(struct nfsd_file *nf) > } > } > > +static void > +nfsd_file_unhash_and_put(struct nfsd_file *nf) > +{ > + if (nfsd_file_unhash(nf)) > + nfsd_file_put_noref(nf); > +} > + > void > nfsd_file_put(struct nfsd_file *nf) > { > might_sleep(); > > - nfsd_file_lru_add(nf); > + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) == 1) > + nfsd_file_lru_add(nf); > + else if (refcount_read(&nf->nf_ref) == 2) > + nfsd_file_unhash_and_put(nf); > + > if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { > nfsd_file_flush(nf); > nfsd_file_put_noref(nf); > - } else if (nf->nf_file) { > + } else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags) == 1) { > nfsd_file_put_noref(nf); > nfsd_file_schedule_laundrette(); > } else > @@ -1017,12 +1033,14 @@ nfsd_file_is_cached(struct inode *inode) > > static __be32 > nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, > - unsigned int may_flags, struct nfsd_file **pnf, bool open) > + unsigned int may_flags, struct nfsd_file **pnf, > + bool open, int want_gc) > { > struct nfsd_file_lookup_key key = { > .type = NFSD_FILE_KEY_FULL, > .need = may_flags & NFSD_FILE_MAY_MASK, > .net = SVC_NET(rqstp), > + .gc = want_gc, > }; > bool open_retry = true; > struct nfsd_file *nf; > @@ -1117,14 +1135,35 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, > * then unhash. > */ > if (status != nfs_ok || key.inode->i_nlink == 0) > - if (nfsd_file_unhash(nf)) > - nfsd_file_put_noref(nf); > + nfsd_file_unhash_and_put(nf); > clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); > smp_mb__after_atomic(); > wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); > goto out; > } > > +/** > + * nfsd_file_acquire_gc - Get a struct nfsd_file with an open file > + * @rqstp: the RPC transaction being executed > + * @fhp: the NFS filehandle of the file to be opened > + * @may_flags: NFSD_MAY_ settings for the file > + * @pnf: OUT: new or found "struct nfsd_file" object > + * > + * The nfsd_file object returned by this API is reference-counted > + * and garbage-collected. The object is retained for a few > + * seconds after the final nfsd_file_put() in case the caller > + * wants to re-use it. > + * > + * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in > + * network byte order is returned. > + */ > +__be32 > +nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, > + unsigned int may_flags, struct nfsd_file **pnf) > +{ > + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, 1); > +} > + > /** > * nfsd_file_acquire - Get a struct nfsd_file with an open file > * @rqstp: the RPC transaction being executed > @@ -1132,6 +1171,10 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, > * @may_flags: NFSD_MAY_ settings for the file > * @pnf: OUT: new or found "struct nfsd_file" object > * > + * The nfsd_file_object returned by this API is reference-counted > + * but not garbage-collected. The object is released one RCU grace > + * period after the final nfsd_file_put(). > + * > * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in > * network byte order is returned. > */ > @@ -1139,7 +1182,7 @@ __be32 > nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, > unsigned int may_flags, struct nfsd_file **pnf) > { > - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true); > + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, 0); > } > > /** > @@ -1149,6 +1192,10 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, > * @may_flags: NFSD_MAY_ settings for the file > * @pnf: OUT: new or found "struct nfsd_file" object > * > + * The nfsd_file_object returned by this API is reference-counted > + * but not garbage-collected. The object is released immediately > + * one RCU grace period after the final nfsd_file_put(). While that last sentence is correct, I think it is missing the point. The reference to grace periods is really an internal implementation details. The important point is that on final nfsd_file_put(), the object is removed from the hash table. This contrasts with the other version where on final nfsd_file_put(), the object is added to the lru. The code all seems good, just the comment seemed odd. Thanks, NeilBrown > + * > * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in > * network byte order is returned. > */ > @@ -1156,7 +1203,7 @@ __be32 > nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, > unsigned int may_flags, struct nfsd_file **pnf) > { > - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false); > + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, 0); > } > > /* > diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h > index f81c198f4ed6..0f6546bcd3e0 100644 > --- a/fs/nfsd/filecache.h > +++ b/fs/nfsd/filecache.h > @@ -38,6 +38,7 @@ struct nfsd_file { > #define NFSD_FILE_HASHED (0) > #define NFSD_FILE_PENDING (1) > #define NFSD_FILE_REFERENCED (2) > +#define NFSD_FILE_GC (3) > unsigned long nf_flags; > struct inode *nf_inode; /* don't deref */ > refcount_t nf_ref; > @@ -55,6 +56,8 @@ void nfsd_file_put(struct nfsd_file *nf); > struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); > void nfsd_file_close_inode_sync(struct inode *inode); > bool nfsd_file_is_cached(struct inode *inode); > +__be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, > + unsigned int may_flags, struct nfsd_file **nfp); > __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, > unsigned int may_flags, struct nfsd_file **nfp); > __be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, > diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c > index d12823371857..6a5ad6c91d8e 100644 > --- a/fs/nfsd/nfs3proc.c > +++ b/fs/nfsd/nfs3proc.c > @@ -779,8 +779,8 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) > (unsigned long long) argp->offset); > > fh_copy(&resp->fh, &argp->fh); > - resp->status = nfsd_file_acquire(rqstp, &resp->fh, NFSD_MAY_WRITE | > - NFSD_MAY_NOT_BREAK_LEASE, &nf); > + resp->status = nfsd_file_acquire_gc(rqstp, &resp->fh, NFSD_MAY_WRITE | > + NFSD_MAY_NOT_BREAK_LEASE, &nf); > if (resp->status) > goto out; > resp->status = nfsd_commit(rqstp, &resp->fh, nf, argp->offset, > diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h > index 9ebd67d461f9..4921144880d3 100644 > --- a/fs/nfsd/trace.h > +++ b/fs/nfsd/trace.h > @@ -742,7 +742,8 @@ DEFINE_CLID_EVENT(confirmed_r); > __print_flags(val, "|", \ > { 1 << NFSD_FILE_HASHED, "HASHED" }, \ > { 1 << NFSD_FILE_PENDING, "PENDING" }, \ > - { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}) > + { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}, \ > + { 1 << NFSD_FILE_GC, "GC"}) > > DECLARE_EVENT_CLASS(nfsd_file_class, > TP_PROTO(struct nfsd_file *nf), > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c > index 44f210ba17cc..89d682a56fc4 100644 > --- a/fs/nfsd/vfs.c > +++ b/fs/nfsd/vfs.c > @@ -1060,7 +1060,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, > __be32 err; > > trace_nfsd_read_start(rqstp, fhp, offset, *count); > - err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf); > + err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_READ, &nf); > if (err) > return err; > > @@ -1092,7 +1092,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, > > trace_nfsd_write_start(rqstp, fhp, offset, *cnt); > > - err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf); > + err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_WRITE, &nf); > if (err) > goto out; > > > >
> On Oct 10, 2022, at 7:50 PM, NeilBrown <neilb@suse.de> wrote: > > On Fri, 07 Oct 2022, Chuck Lever wrote: >> NFSv4 operations manage the lifetime of nfsd_file items they use by >> means of NFSv4 OPEN and CLOSE. Hence there's no need for them to be >> garbage collected. >> >> Introduce a mechanism to enable garbage collection for nfsd_file >> items used only by NFSv2/3 callers. >> >> Note that the change in nfsd_file_put() ensures that both CLOSE and >> DELEGRETURN will actually close out and free an nfsd_file on last >> reference of a non-garbage-collected file. >> >> Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=394 >> Suggested-by: Trond Myklebust <trond.myklebust@hammerspace.com> >> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> >> --- >> fs/nfsd/filecache.c | 61 +++++++++++++++++++++++++++++++++++++++++++++------ >> fs/nfsd/filecache.h | 3 +++ >> fs/nfsd/nfs3proc.c | 4 ++- >> fs/nfsd/trace.h | 3 ++- >> fs/nfsd/vfs.c | 4 ++- >> 5 files changed, 63 insertions(+), 12 deletions(-) >> >> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c >> index b7aa523c2010..01c27deabf83 100644 >> --- a/fs/nfsd/filecache.c >> +++ b/fs/nfsd/filecache.c >> @@ -63,6 +63,7 @@ struct nfsd_file_lookup_key { >> struct net *net; >> const struct cred *cred; >> unsigned char need; >> + unsigned char gc:1; >> enum nfsd_file_lookup_type type; >> }; >> >> @@ -162,6 +163,8 @@ static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, >> return 1; >> if (!nfsd_match_cred(nf->nf_cred, key->cred)) >> return 1; >> + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) >> + return 1; >> if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) >> return 1; >> break; >> @@ -297,6 +300,8 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) >> nf->nf_flags = 0; >> __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); >> __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); >> + if (key->gc) >> + __set_bit(NFSD_FILE_GC, &nf->nf_flags); >> nf->nf_inode = key->inode; >> /* nf_ref is pre-incremented for hash table */ >> refcount_set(&nf->nf_ref, 2); >> @@ -428,16 +433,27 @@ nfsd_file_put_noref(struct nfsd_file *nf) >> } >> } >> >> +static void >> +nfsd_file_unhash_and_put(struct nfsd_file *nf) >> +{ >> + if (nfsd_file_unhash(nf)) >> + nfsd_file_put_noref(nf); >> +} >> + >> void >> nfsd_file_put(struct nfsd_file *nf) >> { >> might_sleep(); >> >> - nfsd_file_lru_add(nf); >> + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) == 1) >> + nfsd_file_lru_add(nf); >> + else if (refcount_read(&nf->nf_ref) == 2) >> + nfsd_file_unhash_and_put(nf); >> + >> if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { >> nfsd_file_flush(nf); >> nfsd_file_put_noref(nf); >> - } else if (nf->nf_file) { >> + } else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags) == 1) { >> nfsd_file_put_noref(nf); >> nfsd_file_schedule_laundrette(); >> } else >> @@ -1017,12 +1033,14 @@ nfsd_file_is_cached(struct inode *inode) >> >> static __be32 >> nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, >> - unsigned int may_flags, struct nfsd_file **pnf, bool open) >> + unsigned int may_flags, struct nfsd_file **pnf, >> + bool open, int want_gc) >> { >> struct nfsd_file_lookup_key key = { >> .type = NFSD_FILE_KEY_FULL, >> .need = may_flags & NFSD_FILE_MAY_MASK, >> .net = SVC_NET(rqstp), >> + .gc = want_gc, >> }; >> bool open_retry = true; >> struct nfsd_file *nf; >> @@ -1117,14 +1135,35 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, >> * then unhash. >> */ >> if (status != nfs_ok || key.inode->i_nlink == 0) >> - if (nfsd_file_unhash(nf)) >> - nfsd_file_put_noref(nf); >> + nfsd_file_unhash_and_put(nf); >> clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); >> smp_mb__after_atomic(); >> wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); >> goto out; >> } >> >> +/** >> + * nfsd_file_acquire_gc - Get a struct nfsd_file with an open file >> + * @rqstp: the RPC transaction being executed >> + * @fhp: the NFS filehandle of the file to be opened >> + * @may_flags: NFSD_MAY_ settings for the file >> + * @pnf: OUT: new or found "struct nfsd_file" object >> + * >> + * The nfsd_file object returned by this API is reference-counted >> + * and garbage-collected. The object is retained for a few >> + * seconds after the final nfsd_file_put() in case the caller >> + * wants to re-use it. >> + * >> + * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in >> + * network byte order is returned. >> + */ >> +__be32 >> +nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, >> + unsigned int may_flags, struct nfsd_file **pnf) >> +{ >> + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, 1); >> +} >> + >> /** >> * nfsd_file_acquire - Get a struct nfsd_file with an open file >> * @rqstp: the RPC transaction being executed >> @@ -1132,6 +1171,10 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, >> * @may_flags: NFSD_MAY_ settings for the file >> * @pnf: OUT: new or found "struct nfsd_file" object >> * >> + * The nfsd_file_object returned by this API is reference-counted >> + * but not garbage-collected. The object is released one RCU grace >> + * period after the final nfsd_file_put(). >> + * >> * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in >> * network byte order is returned. >> */ >> @@ -1139,7 +1182,7 @@ __be32 >> nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, >> unsigned int may_flags, struct nfsd_file **pnf) >> { >> - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true); >> + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, 0); >> } >> >> /** >> @@ -1149,6 +1192,10 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, >> * @may_flags: NFSD_MAY_ settings for the file >> * @pnf: OUT: new or found "struct nfsd_file" object >> * >> + * The nfsd_file_object returned by this API is reference-counted >> + * but not garbage-collected. The object is released immediately >> + * one RCU grace period after the final nfsd_file_put(). > > While that last sentence is correct, I think it is missing the point. > The reference to grace periods is really an internal implementation > details. Fair enough, document policy and not implementation. I'll rework this based on your suggestion below. > The important point is that on final nfsd_file_put(), the object is > removed from the hash table. This contrasts with the other version > where on final nfsd_file_put(), the object is added to the lru. > > The code all seems good, just the comment seemed odd. > > Thanks, > NeilBrown > > >> + * >> * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in >> * network byte order is returned. >> */ >> @@ -1156,7 +1203,7 @@ __be32 >> nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, >> unsigned int may_flags, struct nfsd_file **pnf) >> { >> - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false); >> + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, 0); >> } >> >> /* >> diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h >> index f81c198f4ed6..0f6546bcd3e0 100644 >> --- a/fs/nfsd/filecache.h >> +++ b/fs/nfsd/filecache.h >> @@ -38,6 +38,7 @@ struct nfsd_file { >> #define NFSD_FILE_HASHED (0) >> #define NFSD_FILE_PENDING (1) >> #define NFSD_FILE_REFERENCED (2) >> +#define NFSD_FILE_GC (3) >> unsigned long nf_flags; >> struct inode *nf_inode; /* don't deref */ >> refcount_t nf_ref; >> @@ -55,6 +56,8 @@ void nfsd_file_put(struct nfsd_file *nf); >> struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); >> void nfsd_file_close_inode_sync(struct inode *inode); >> bool nfsd_file_is_cached(struct inode *inode); >> +__be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, >> + unsigned int may_flags, struct nfsd_file **nfp); >> __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, >> unsigned int may_flags, struct nfsd_file **nfp); >> __be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, >> diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c >> index d12823371857..6a5ad6c91d8e 100644 >> --- a/fs/nfsd/nfs3proc.c >> +++ b/fs/nfsd/nfs3proc.c >> @@ -779,8 +779,8 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) >> (unsigned long long) argp->offset); >> >> fh_copy(&resp->fh, &argp->fh); >> - resp->status = nfsd_file_acquire(rqstp, &resp->fh, NFSD_MAY_WRITE | >> - NFSD_MAY_NOT_BREAK_LEASE, &nf); >> + resp->status = nfsd_file_acquire_gc(rqstp, &resp->fh, NFSD_MAY_WRITE | >> + NFSD_MAY_NOT_BREAK_LEASE, &nf); >> if (resp->status) >> goto out; >> resp->status = nfsd_commit(rqstp, &resp->fh, nf, argp->offset, >> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h >> index 9ebd67d461f9..4921144880d3 100644 >> --- a/fs/nfsd/trace.h >> +++ b/fs/nfsd/trace.h >> @@ -742,7 +742,8 @@ DEFINE_CLID_EVENT(confirmed_r); >> __print_flags(val, "|", \ >> { 1 << NFSD_FILE_HASHED, "HASHED" }, \ >> { 1 << NFSD_FILE_PENDING, "PENDING" }, \ >> - { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}) >> + { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}, \ >> + { 1 << NFSD_FILE_GC, "GC"}) >> >> DECLARE_EVENT_CLASS(nfsd_file_class, >> TP_PROTO(struct nfsd_file *nf), >> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c >> index 44f210ba17cc..89d682a56fc4 100644 >> --- a/fs/nfsd/vfs.c >> +++ b/fs/nfsd/vfs.c >> @@ -1060,7 +1060,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, >> __be32 err; >> >> trace_nfsd_read_start(rqstp, fhp, offset, *count); >> - err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf); >> + err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_READ, &nf); >> if (err) >> return err; >> >> @@ -1092,7 +1092,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, >> >> trace_nfsd_write_start(rqstp, fhp, offset, *cnt); >> >> - err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf); >> + err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_WRITE, &nf); >> if (err) >> goto out; -- Chuck Lever
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index b7aa523c2010..01c27deabf83 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -63,6 +63,7 @@ struct nfsd_file_lookup_key { struct net *net; const struct cred *cred; unsigned char need; + unsigned char gc:1; enum nfsd_file_lookup_type type; }; @@ -162,6 +163,8 @@ static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, return 1; if (!nfsd_match_cred(nf->nf_cred, key->cred)) return 1; + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) + return 1; if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) return 1; break; @@ -297,6 +300,8 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) nf->nf_flags = 0; __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); + if (key->gc) + __set_bit(NFSD_FILE_GC, &nf->nf_flags); nf->nf_inode = key->inode; /* nf_ref is pre-incremented for hash table */ refcount_set(&nf->nf_ref, 2); @@ -428,16 +433,27 @@ nfsd_file_put_noref(struct nfsd_file *nf) } } +static void +nfsd_file_unhash_and_put(struct nfsd_file *nf) +{ + if (nfsd_file_unhash(nf)) + nfsd_file_put_noref(nf); +} + void nfsd_file_put(struct nfsd_file *nf) { might_sleep(); - nfsd_file_lru_add(nf); + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) == 1) + nfsd_file_lru_add(nf); + else if (refcount_read(&nf->nf_ref) == 2) + nfsd_file_unhash_and_put(nf); + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { nfsd_file_flush(nf); nfsd_file_put_noref(nf); - } else if (nf->nf_file) { + } else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags) == 1) { nfsd_file_put_noref(nf); nfsd_file_schedule_laundrette(); } else @@ -1017,12 +1033,14 @@ nfsd_file_is_cached(struct inode *inode) static __be32 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf, bool open) + unsigned int may_flags, struct nfsd_file **pnf, + bool open, int want_gc) { struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_FULL, .need = may_flags & NFSD_FILE_MAY_MASK, .net = SVC_NET(rqstp), + .gc = want_gc, }; bool open_retry = true; struct nfsd_file *nf; @@ -1117,14 +1135,35 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, * then unhash. */ if (status != nfs_ok || key.inode->i_nlink == 0) - if (nfsd_file_unhash(nf)) - nfsd_file_put_noref(nf); + nfsd_file_unhash_and_put(nf); clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); smp_mb__after_atomic(); wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); goto out; } +/** + * nfsd_file_acquire_gc - Get a struct nfsd_file with an open file + * @rqstp: the RPC transaction being executed + * @fhp: the NFS filehandle of the file to be opened + * @may_flags: NFSD_MAY_ settings for the file + * @pnf: OUT: new or found "struct nfsd_file" object + * + * The nfsd_file object returned by this API is reference-counted + * and garbage-collected. The object is retained for a few + * seconds after the final nfsd_file_put() in case the caller + * wants to re-use it. + * + * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in + * network byte order is returned. + */ +__be32 +nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **pnf) +{ + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, 1); +} + /** * nfsd_file_acquire - Get a struct nfsd_file with an open file * @rqstp: the RPC transaction being executed @@ -1132,6 +1171,10 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, * @may_flags: NFSD_MAY_ settings for the file * @pnf: OUT: new or found "struct nfsd_file" object * + * The nfsd_file_object returned by this API is reference-counted + * but not garbage-collected. The object is released one RCU grace + * period after the final nfsd_file_put(). + * * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in * network byte order is returned. */ @@ -1139,7 +1182,7 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, 0); } /** @@ -1149,6 +1192,10 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, * @may_flags: NFSD_MAY_ settings for the file * @pnf: OUT: new or found "struct nfsd_file" object * + * The nfsd_file_object returned by this API is reference-counted + * but not garbage-collected. The object is released immediately + * one RCU grace period after the final nfsd_file_put(). + * * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in * network byte order is returned. */ @@ -1156,7 +1203,7 @@ __be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, 0); } /* diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index f81c198f4ed6..0f6546bcd3e0 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -38,6 +38,7 @@ struct nfsd_file { #define NFSD_FILE_HASHED (0) #define NFSD_FILE_PENDING (1) #define NFSD_FILE_REFERENCED (2) +#define NFSD_FILE_GC (3) unsigned long nf_flags; struct inode *nf_inode; /* don't deref */ refcount_t nf_ref; @@ -55,6 +56,8 @@ void nfsd_file_put(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); bool nfsd_file_is_cached(struct inode *inode); +__be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **nfp); __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); __be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index d12823371857..6a5ad6c91d8e 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -779,8 +779,8 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) (unsigned long long) argp->offset); fh_copy(&resp->fh, &argp->fh); - resp->status = nfsd_file_acquire(rqstp, &resp->fh, NFSD_MAY_WRITE | - NFSD_MAY_NOT_BREAK_LEASE, &nf); + resp->status = nfsd_file_acquire_gc(rqstp, &resp->fh, NFSD_MAY_WRITE | + NFSD_MAY_NOT_BREAK_LEASE, &nf); if (resp->status) goto out; resp->status = nfsd_commit(rqstp, &resp->fh, nf, argp->offset, diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 9ebd67d461f9..4921144880d3 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -742,7 +742,8 @@ DEFINE_CLID_EVENT(confirmed_r); __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ - { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}) + { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}, \ + { 1 << NFSD_FILE_GC, "GC"}) DECLARE_EVENT_CLASS(nfsd_file_class, TP_PROTO(struct nfsd_file *nf), diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 44f210ba17cc..89d682a56fc4 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1060,7 +1060,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 err; trace_nfsd_read_start(rqstp, fhp, offset, *count); - err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf); + err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_READ, &nf); if (err) return err; @@ -1092,7 +1092,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, trace_nfsd_write_start(rqstp, fhp, offset, *cnt); - err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf); + err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_WRITE, &nf); if (err) goto out;
NFSv4 operations manage the lifetime of nfsd_file items they use by means of NFSv4 OPEN and CLOSE. Hence there's no need for them to be garbage collected. Introduce a mechanism to enable garbage collection for nfsd_file items used only by NFSv2/3 callers. Note that the change in nfsd_file_put() ensures that both CLOSE and DELEGRETURN will actually close out and free an nfsd_file on last reference of a non-garbage-collected file. Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=394 Suggested-by: Trond Myklebust <trond.myklebust@hammerspace.com> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> --- fs/nfsd/filecache.c | 61 +++++++++++++++++++++++++++++++++++++++++++++------ fs/nfsd/filecache.h | 3 +++ fs/nfsd/nfs3proc.c | 4 ++- fs/nfsd/trace.h | 3 ++- fs/nfsd/vfs.c | 4 ++- 5 files changed, 63 insertions(+), 12 deletions(-)