Message ID | 165953745991.1658.5781306176717145818.stgit@manet.1015granger.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Wait for DELEGRETURN before returning NFS4ERR_DELAY | expand |
On Wed, 2022-08-03 at 10:37 -0400, Chuck Lever wrote: > nfsd_setattr() can kick off a CB_RECALL (via > notify_change() -> break_lease()) if a delegation is present. Before > returning NFS4ERR_DELAY, give the client holding that delegation a > chance to return it and then retry the nfsd_setattr() again, once. > > Signed-off-by: Chuck Lever <chuck.lever@oracle.com> > --- > fs/nfsd/nfs4proc.c | 18 +++++++++++++++--- > fs/nfsd/nfs4state.c | 17 +++++++++++++++++ > fs/nfsd/nfsd.h | 1 + > fs/nfsd/trace.h | 19 +++++++++++++++++++ > fs/nfsd/xdr4.h | 2 ++ > 5 files changed, 54 insertions(+), 3 deletions(-) > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index 42bfe0d769ec..62a267bb2ce5 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -1142,7 +1142,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > { > struct nfsd4_setattr *setattr = &u->setattr; > __be32 status = nfs_ok; > - int err; > + int err, retries; > > if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { > status = nfs4_preprocess_stateid_op(rqstp, cstate, > @@ -1173,8 +1173,20 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > &setattr->sa_label); > if (status) > goto out; > - status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, > - 0, (time64_t)0); > + > + retries = 1; > + do { > + status = nfsd_setattr(rqstp, &cstate->current_fh, > + &setattr->sa_iattr, 0, (time64_t)0); > + if (status != nfserr_jukebox) > + break; > + if (!retries--) > + break; > + > + fh_clear_pre_post_attrs(&cstate->current_fh); > + nfsd4_wait_for_delegreturn(rqstp, &cstate->current_fh); > + } while (1); > + > out: > fh_drop_write(&cstate->current_fh); > return status; > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index 0cf5a4bb36df..e3ac89d4a859 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -4689,6 +4689,23 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) > return ret; > } > > +/** > + * nfsd4_wait_for_delegreturn - wait for delegations to be returned > + * @rqstp: the RPC transaction being executed > + * @fhp: filehandle of file being waited for > + * > + * A better approach would wait for the DELEGRETURN operation, and > + * retry just as soon as it was done. > + * > + * The timeout prevents deadlock if all nfsd threads happen to be > + * tied up waiting for returning delegations. > + */ > +void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp, struct svc_fh *fhp) > +{ > + trace_nfsd_delegreturn_wait(rqstp, fhp); > + msleep(NFSD_DELEGRETURN_TIMEOUT); Like you mentioned in the cover letter, this is pretty nasty. You could use wait_var_event_timeout here on the inode, paired with a wake_up_var when a delegation is returned. For the condition, you could use something like this: !inode->i_flctx || list_empty(&inode->i_flctx->flc_lease) Maybe even a similar lockless check as the one in break_deleg? > +} > + > static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) > { > struct nfs4_delegation *dp = cb_to_delegation(cb); > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h > index 9a8b09afc173..0b800a154828 100644 > --- a/fs/nfsd/nfsd.h > +++ b/fs/nfsd/nfsd.h > @@ -341,6 +341,7 @@ void nfsd_lockd_shutdown(void); > > #define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */ > #define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */ > +#define NFSD_DELEGRETURN_TIMEOUT (30) /* milliseconds */ > > /* > * The following attributes are currently not supported by the NFSv4 server: > diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h > index 8c3d5f88072f..dd2654cac132 100644 > --- a/fs/nfsd/trace.h > +++ b/fs/nfsd/trace.h > @@ -443,6 +443,25 @@ DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err); > #include "filecache.h" > #include "vfs.h" > > +TRACE_EVENT(nfsd_delegreturn_wait, > + TP_PROTO( > + const struct svc_rqst *rqstp, > + const struct svc_fh *fhp > + ), > + TP_ARGS(rqstp, fhp), > + TP_STRUCT__entry( > + __field(u32, xid) > + __field(u32, fh_hash) > + ), > + TP_fast_assign( > + __entry->xid = be32_to_cpu(rqstp->rq_xid); > + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); > + ), > + TP_printk("xid=0x%08x fh_hash=0x%08x", > + __entry->xid, __entry->fh_hash > + ) > +); > + > DECLARE_EVENT_CLASS(nfsd_stateid_class, > TP_PROTO(stateid_t *stp), > TP_ARGS(stp), > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > index 7b744011f2d3..5b9213076e95 100644 > --- a/fs/nfsd/xdr4.h > +++ b/fs/nfsd/xdr4.h > @@ -788,6 +788,8 @@ extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_st > union nfsd4_op_u *u); > __be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, > union nfsd4_op_u *u); > +extern void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp, > + struct svc_fh *fhp); > extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, > struct nfsd4_open *open, struct nfsd_net *nn); > extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, > >
> On Aug 3, 2022, at 3:47 PM, Jeff Layton <jlayton@kernel.org> wrote: > > On Wed, 2022-08-03 at 10:37 -0400, Chuck Lever wrote: >> nfsd_setattr() can kick off a CB_RECALL (via >> notify_change() -> break_lease()) if a delegation is present. Before >> returning NFS4ERR_DELAY, give the client holding that delegation a >> chance to return it and then retry the nfsd_setattr() again, once. >> >> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> >> --- >> fs/nfsd/nfs4proc.c | 18 +++++++++++++++--- >> fs/nfsd/nfs4state.c | 17 +++++++++++++++++ >> fs/nfsd/nfsd.h | 1 + >> fs/nfsd/trace.h | 19 +++++++++++++++++++ >> fs/nfsd/xdr4.h | 2 ++ >> 5 files changed, 54 insertions(+), 3 deletions(-) >> >> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c >> index 42bfe0d769ec..62a267bb2ce5 100644 >> --- a/fs/nfsd/nfs4proc.c >> +++ b/fs/nfsd/nfs4proc.c >> @@ -1142,7 +1142,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> { >> struct nfsd4_setattr *setattr = &u->setattr; >> __be32 status = nfs_ok; >> - int err; >> + int err, retries; >> >> if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { >> status = nfs4_preprocess_stateid_op(rqstp, cstate, >> @@ -1173,8 +1173,20 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> &setattr->sa_label); >> if (status) >> goto out; >> - status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, >> - 0, (time64_t)0); >> + >> + retries = 1; >> + do { >> + status = nfsd_setattr(rqstp, &cstate->current_fh, >> + &setattr->sa_iattr, 0, (time64_t)0); >> + if (status != nfserr_jukebox) >> + break; >> + if (!retries--) >> + break; >> + >> + fh_clear_pre_post_attrs(&cstate->current_fh); >> + nfsd4_wait_for_delegreturn(rqstp, &cstate->current_fh); >> + } while (1); >> + >> out: >> fh_drop_write(&cstate->current_fh); >> return status; >> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c >> index 0cf5a4bb36df..e3ac89d4a859 100644 >> --- a/fs/nfsd/nfs4state.c >> +++ b/fs/nfsd/nfs4state.c >> @@ -4689,6 +4689,23 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) >> return ret; >> } >> >> +/** >> + * nfsd4_wait_for_delegreturn - wait for delegations to be returned >> + * @rqstp: the RPC transaction being executed >> + * @fhp: filehandle of file being waited for >> + * >> + * A better approach would wait for the DELEGRETURN operation, and >> + * retry just as soon as it was done. >> + * >> + * The timeout prevents deadlock if all nfsd threads happen to be >> + * tied up waiting for returning delegations. >> + */ >> +void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp, struct svc_fh *fhp) >> +{ >> + trace_nfsd_delegreturn_wait(rqstp, fhp); >> + msleep(NFSD_DELEGRETURN_TIMEOUT); > > Like you mentioned in the cover letter, this is pretty nasty. Right, it's proof-of-concept stuff. > You could use wait_var_event_timeout here on the inode, paired with a > wake_up_var when a delegation is returned. I was looking for an NFSD-specific data structure to add a completion to, but yeah, I guess the inode itself could work. I'll have a look at that for the next version of this series. Thanks for the suggestion! > For the condition, you could use something like this: > > !inode->i_flctx || list_empty(&inode->i_flctx->flc_lease) > > Maybe even a similar lockless check as the one in break_deleg? > >> +} >> + >> static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) >> { >> struct nfs4_delegation *dp = cb_to_delegation(cb); >> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h >> index 9a8b09afc173..0b800a154828 100644 >> --- a/fs/nfsd/nfsd.h >> +++ b/fs/nfsd/nfsd.h >> @@ -341,6 +341,7 @@ void nfsd_lockd_shutdown(void); >> >> #define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */ >> #define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */ >> +#define NFSD_DELEGRETURN_TIMEOUT (30) /* milliseconds */ >> >> /* >> * The following attributes are currently not supported by the NFSv4 server: >> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h >> index 8c3d5f88072f..dd2654cac132 100644 >> --- a/fs/nfsd/trace.h >> +++ b/fs/nfsd/trace.h >> @@ -443,6 +443,25 @@ DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err); >> #include "filecache.h" >> #include "vfs.h" >> >> +TRACE_EVENT(nfsd_delegreturn_wait, >> + TP_PROTO( >> + const struct svc_rqst *rqstp, >> + const struct svc_fh *fhp >> + ), >> + TP_ARGS(rqstp, fhp), >> + TP_STRUCT__entry( >> + __field(u32, xid) >> + __field(u32, fh_hash) >> + ), >> + TP_fast_assign( >> + __entry->xid = be32_to_cpu(rqstp->rq_xid); >> + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); >> + ), >> + TP_printk("xid=0x%08x fh_hash=0x%08x", >> + __entry->xid, __entry->fh_hash >> + ) >> +); >> + >> DECLARE_EVENT_CLASS(nfsd_stateid_class, >> TP_PROTO(stateid_t *stp), >> TP_ARGS(stp), >> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h >> index 7b744011f2d3..5b9213076e95 100644 >> --- a/fs/nfsd/xdr4.h >> +++ b/fs/nfsd/xdr4.h >> @@ -788,6 +788,8 @@ extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_st >> union nfsd4_op_u *u); >> __be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, >> union nfsd4_op_u *u); >> +extern void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp, >> + struct svc_fh *fhp); >> extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, >> struct nfsd4_open *open, struct nfsd_net *nn); >> extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, >> >> > > -- > Jeff Layton <jlayton@kernel.org> -- Chuck Lever
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 42bfe0d769ec..62a267bb2ce5 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1142,7 +1142,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct nfsd4_setattr *setattr = &u->setattr; __be32 status = nfs_ok; - int err; + int err, retries; if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { status = nfs4_preprocess_stateid_op(rqstp, cstate, @@ -1173,8 +1173,20 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &setattr->sa_label); if (status) goto out; - status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, - 0, (time64_t)0); + + retries = 1; + do { + status = nfsd_setattr(rqstp, &cstate->current_fh, + &setattr->sa_iattr, 0, (time64_t)0); + if (status != nfserr_jukebox) + break; + if (!retries--) + break; + + fh_clear_pre_post_attrs(&cstate->current_fh); + nfsd4_wait_for_delegreturn(rqstp, &cstate->current_fh); + } while (1); + out: fh_drop_write(&cstate->current_fh); return status; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0cf5a4bb36df..e3ac89d4a859 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4689,6 +4689,23 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) return ret; } +/** + * nfsd4_wait_for_delegreturn - wait for delegations to be returned + * @rqstp: the RPC transaction being executed + * @fhp: filehandle of file being waited for + * + * A better approach would wait for the DELEGRETURN operation, and + * retry just as soon as it was done. + * + * The timeout prevents deadlock if all nfsd threads happen to be + * tied up waiting for returning delegations. + */ +void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp, struct svc_fh *fhp) +{ + trace_nfsd_delegreturn_wait(rqstp, fhp); + msleep(NFSD_DELEGRETURN_TIMEOUT); +} + static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) { struct nfs4_delegation *dp = cb_to_delegation(cb); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 9a8b09afc173..0b800a154828 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -341,6 +341,7 @@ void nfsd_lockd_shutdown(void); #define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */ #define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */ +#define NFSD_DELEGRETURN_TIMEOUT (30) /* milliseconds */ /* * The following attributes are currently not supported by the NFSv4 server: diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 8c3d5f88072f..dd2654cac132 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -443,6 +443,25 @@ DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err); #include "filecache.h" #include "vfs.h" +TRACE_EVENT(nfsd_delegreturn_wait, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp + ), + TP_ARGS(rqstp, fhp), + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, fh_hash) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + ), + TP_printk("xid=0x%08x fh_hash=0x%08x", + __entry->xid, __entry->fh_hash + ) +); + DECLARE_EVENT_CLASS(nfsd_stateid_class, TP_PROTO(stateid_t *stp), TP_ARGS(stp), diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 7b744011f2d3..5b9213076e95 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -788,6 +788,8 @@ extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_st union nfsd4_op_u *u); __be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, union nfsd4_op_u *u); +extern void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp, + struct svc_fh *fhp); extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, struct nfsd4_open *open, struct nfsd_net *nn); extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
nfsd_setattr() can kick off a CB_RECALL (via notify_change() -> break_lease()) if a delegation is present. Before returning NFS4ERR_DELAY, give the client holding that delegation a chance to return it and then retry the nfsd_setattr() again, once. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> --- fs/nfsd/nfs4proc.c | 18 +++++++++++++++--- fs/nfsd/nfs4state.c | 17 +++++++++++++++++ fs/nfsd/nfsd.h | 1 + fs/nfsd/trace.h | 19 +++++++++++++++++++ fs/nfsd/xdr4.h | 2 ++ 5 files changed, 54 insertions(+), 3 deletions(-)