Message ID | 1684618595-4178-4-git-send-email-dai.ngo@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | NFSD: add support for NFSv4 write delegation | expand |
> On May 20, 2023, at 5:36 PM, Dai Ngo <dai.ngo@oracle.com> wrote: > > If the GETATTR request on a file that has write delegation in effect > and the request attributes include the change info and size attribute > then the write delegation is recalled and NFS4ERR_DELAY is returned > for the GETATTR. > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com> > --- > fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 45 insertions(+) > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index 76db2fe29624..e069b970f136 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2) > return nfserr_resource; > } > > +static struct file_lock * > +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode) > +{ > + struct file_lock_context *ctx; > + struct file_lock *fl; > + > + ctx = locks_inode_context(inode); > + if (!ctx) > + return NULL; > + spin_lock(&ctx->flc_lock); > + if (!list_empty(&ctx->flc_lease)) { > + fl = list_first_entry(&ctx->flc_lease, > + struct file_lock, fl_list); > + if (fl->fl_type == F_WRLCK) { > + spin_unlock(&ctx->flc_lock); > + return fl; > + } > + } > + spin_unlock(&ctx->flc_lock); > + return NULL; > +} > + > +static __be32 > +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode) > +{ > + __be32 status; > + struct file_lock *fl; > + struct nfs4_delegation *dp; > + > + fl = nfs4_wrdeleg_filelock(rqstp, inode); > + if (!fl) > + return 0; > + dp = fl->fl_owner; > + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) > + return 0; > + refcount_inc(&dp->dl_stid.sc_count); > + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); > + return status; > +} > + fs/nfsd/nfs4state.c seems more appropriate for these. I'll move them as I apply this patch. > /* > * Note: @fhp can be NULL; in this case, we might have to compose the filehandle > * ourselves. > @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, > if (status) > goto out; > } > + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { > + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry)); > + if (status) > + goto out; > + } > > err = vfs_getattr(&path, &stat, > STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE, > -- > 2.9.5 > -- Chuck Lever
On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote: > If the GETATTR request on a file that has write delegation in effect > and the request attributes include the change info and size attribute > then the write delegation is recalled and NFS4ERR_DELAY is returned > for the GETATTR. > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com> > --- > fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 45 insertions(+) > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index 76db2fe29624..e069b970f136 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2) > return nfserr_resource; > } > > +static struct file_lock * > +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode) > +{ > + struct file_lock_context *ctx; > + struct file_lock *fl; > + > + ctx = locks_inode_context(inode); > + if (!ctx) > + return NULL; > + spin_lock(&ctx->flc_lock); > + if (!list_empty(&ctx->flc_lease)) { > + fl = list_first_entry(&ctx->flc_lease, > + struct file_lock, fl_list); > + if (fl->fl_type == F_WRLCK) { > + spin_unlock(&ctx->flc_lock); > + return fl; > + } > + } > + spin_unlock(&ctx->flc_lock); > + return NULL; > +} > + > +static __be32 > +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode) > +{ > + __be32 status; > + struct file_lock *fl; > + struct nfs4_delegation *dp; > + > + fl = nfs4_wrdeleg_filelock(rqstp, inode); > + if (!fl) > + return 0; > + dp = fl->fl_owner; One problem here is that you don't know whether the owner was set by nfsd. This owner could be a struct file from a userland lease holder, and that that point it's not safe to dereference it below like you are. The q&d way we check for this in other places is to validate that the fl_lmops is correct. In this case you'd want to make sure it's set to &nfsd_lease_mng_ops. Beyond that, you also don't know whether that owner or file_lock still _exists_ after you drop the flc_lock. You need to either do these checks while holding that lock, or take a reference to the owner before you start dereferencing things. Probably, you're better off here just doing it all under the flc_lock. > + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) > + return 0; > + refcount_inc(&dp->dl_stid.sc_count); Another problem: the sc_count might already have gone to zero here. You don't already hold a reference to dl_stid at this point, so you can't just increment it without taking the cl_lock for that client. You may be able to do this safely with refcount_inc_not_zero, and just ignore the delegation if it's already at zero. > + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); > + return status; > +} > + > /* > * Note: @fhp can be NULL; in this case, we might have to compose the filehandle > * ourselves. > @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, > if (status) > goto out; > } > + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { > + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry)); > + if (status) > + goto out; > + } > > err = vfs_getattr(&path, &stat, > STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
On 5/21/23 9:49 AM, Jeff Layton wrote: > On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote: >> If the GETATTR request on a file that has write delegation in effect >> and the request attributes include the change info and size attribute >> then the write delegation is recalled and NFS4ERR_DELAY is returned >> for the GETATTR. >> >> Signed-off-by: Dai Ngo <dai.ngo@oracle.com> >> --- >> fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 45 insertions(+) >> >> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c >> index 76db2fe29624..e069b970f136 100644 >> --- a/fs/nfsd/nfs4xdr.c >> +++ b/fs/nfsd/nfs4xdr.c >> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2) >> return nfserr_resource; >> } >> >> +static struct file_lock * >> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode) >> +{ >> + struct file_lock_context *ctx; >> + struct file_lock *fl; >> + >> + ctx = locks_inode_context(inode); >> + if (!ctx) >> + return NULL; >> + spin_lock(&ctx->flc_lock); >> + if (!list_empty(&ctx->flc_lease)) { >> + fl = list_first_entry(&ctx->flc_lease, >> + struct file_lock, fl_list); >> + if (fl->fl_type == F_WRLCK) { >> + spin_unlock(&ctx->flc_lock); >> + return fl; >> + } >> + } >> + spin_unlock(&ctx->flc_lock); >> + return NULL; >> +} >> + >> +static __be32 >> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode) >> +{ >> + __be32 status; >> + struct file_lock *fl; >> + struct nfs4_delegation *dp; >> + >> + fl = nfs4_wrdeleg_filelock(rqstp, inode); >> + if (!fl) >> + return 0; >> + dp = fl->fl_owner; > One problem here is that you don't know whether the owner was set by > nfsd. This owner could be a struct file from a userland lease holder, > and that that point it's not safe to dereference it below like you are. > > The q&d way we check for this in other places is to validate that the > fl_lmops is correct. In this case you'd want to make sure it's set to > &nfsd_lease_mng_ops. Thanks Jeff, fix in v5. > > Beyond that, you also don't know whether that owner or file_lock still > _exists_ after you drop the flc_lock. You need to either do these checks > while holding that lock, or take a reference to the owner before you > start dereferencing things. > > Probably, you're better off here just doing it all under the flc_lock. fix in v5. > >> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) >> + return 0; >> + refcount_inc(&dp->dl_stid.sc_count); > Another problem: the sc_count might already have gone to zero here. You > don't already hold a reference to dl_stid at this point, so you can't > just increment it without taking the cl_lock for that client. > > You may be able to do this safely with refcount_inc_not_zero, and just > ignore the delegation if it's already at zero. Fix in v5. Chuck, I can do v5 to address feedback from you and Jeff. Thanks, -Dai > >> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); >> + return status; >> +} >> + >> /* >> * Note: @fhp can be NULL; in this case, we might have to compose the filehandle >> * ourselves. >> @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, >> if (status) >> goto out; >> } >> + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { >> + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry)); >> + if (status) >> + goto out; >> + } >> >> err = vfs_getattr(&path, &stat, >> STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote: > If the GETATTR request on a file that has write delegation in effect > and the request attributes include the change info and size attribute > then the write delegation is recalled and NFS4ERR_DELAY is returned > for the GETATTR. > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com> > --- > fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 45 insertions(+) > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index 76db2fe29624..e069b970f136 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2) > return nfserr_resource; > } > > +static struct file_lock * > +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode) > +{ > + struct file_lock_context *ctx; > + struct file_lock *fl; > + > + ctx = locks_inode_context(inode); > + if (!ctx) > + return NULL; > + spin_lock(&ctx->flc_lock); > + if (!list_empty(&ctx->flc_lease)) { > + fl = list_first_entry(&ctx->flc_lease, > + struct file_lock, fl_list); > + if (fl->fl_type == F_WRLCK) { > + spin_unlock(&ctx->flc_lock); > + return fl; > + } > + } > + spin_unlock(&ctx->flc_lock); > + return NULL; > +} > + > +static __be32 > +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode) > +{ > + __be32 status; > + struct file_lock *fl; > + struct nfs4_delegation *dp; > + > + fl = nfs4_wrdeleg_filelock(rqstp, inode); > + if (!fl) > + return 0; > + dp = fl->fl_owner; > + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) > + return 0; > + refcount_inc(&dp->dl_stid.sc_count); Another question: Why are you taking a reference here at all? AFAICT, you don't even look at the delegation again after that point, so it's not clear to me who's responsible for putting that reference. > + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); > + return status; > +} > + > /* > * Note: @fhp can be NULL; in this case, we might have to compose the filehandle > * ourselves. > @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, > if (status) > goto out; > } > + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { > + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry)); > + if (status) > + goto out; > + } > > err = vfs_getattr(&path, &stat, > STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
On Sun, May 21, 2023 at 07:08:43PM -0400, Jeff Layton wrote: > On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote: > > If the GETATTR request on a file that has write delegation in effect > > and the request attributes include the change info and size attribute > > then the write delegation is recalled and NFS4ERR_DELAY is returned > > for the GETATTR. > > > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com> > > --- > > fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 45 insertions(+) > > > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > > index 76db2fe29624..e069b970f136 100644 > > --- a/fs/nfsd/nfs4xdr.c > > +++ b/fs/nfsd/nfs4xdr.c > > @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2) > > return nfserr_resource; > > } > > > > +static struct file_lock * > > +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode) > > +{ > > + struct file_lock_context *ctx; > > + struct file_lock *fl; > > + > > + ctx = locks_inode_context(inode); > > + if (!ctx) > > + return NULL; > > + spin_lock(&ctx->flc_lock); > > + if (!list_empty(&ctx->flc_lease)) { > > + fl = list_first_entry(&ctx->flc_lease, > > + struct file_lock, fl_list); > > + if (fl->fl_type == F_WRLCK) { > > + spin_unlock(&ctx->flc_lock); > > + return fl; > > + } > > + } > > + spin_unlock(&ctx->flc_lock); > > + return NULL; > > +} > > + > > +static __be32 > > +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode) > > +{ > > + __be32 status; > > + struct file_lock *fl; > > + struct nfs4_delegation *dp; > > + > > + fl = nfs4_wrdeleg_filelock(rqstp, inode); > > + if (!fl) > > + return 0; > > + dp = fl->fl_owner; > > + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) > > + return 0; > > + refcount_inc(&dp->dl_stid.sc_count); > > Another question: Why are you taking a reference here at all? AFAICT, > you don't even look at the delegation again after that point, so it's > not clear to me who's responsible for putting that reference. I had the same thought, but I assumed I was missing something. > > + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); > > + return status; > > +} > > + > > /* > > * Note: @fhp can be NULL; in this case, we might have to compose the filehandle > > * ourselves. > > @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, > > if (status) > > goto out; > > } > > + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { > > + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry)); > > + if (status) > > + goto out; > > + } > > > > err = vfs_getattr(&path, &stat, > > STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE, > > -- > Jeff Layton <jlayton@kernel.org>
On 5/21/23 4:08 PM, Jeff Layton wrote: > On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote: >> If the GETATTR request on a file that has write delegation in effect >> and the request attributes include the change info and size attribute >> then the write delegation is recalled and NFS4ERR_DELAY is returned >> for the GETATTR. >> >> Signed-off-by: Dai Ngo <dai.ngo@oracle.com> >> --- >> fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 45 insertions(+) >> >> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c >> index 76db2fe29624..e069b970f136 100644 >> --- a/fs/nfsd/nfs4xdr.c >> +++ b/fs/nfsd/nfs4xdr.c >> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2) >> return nfserr_resource; >> } >> >> +static struct file_lock * >> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode) >> +{ >> + struct file_lock_context *ctx; >> + struct file_lock *fl; >> + >> + ctx = locks_inode_context(inode); >> + if (!ctx) >> + return NULL; >> + spin_lock(&ctx->flc_lock); >> + if (!list_empty(&ctx->flc_lease)) { >> + fl = list_first_entry(&ctx->flc_lease, >> + struct file_lock, fl_list); >> + if (fl->fl_type == F_WRLCK) { >> + spin_unlock(&ctx->flc_lock); >> + return fl; >> + } >> + } >> + spin_unlock(&ctx->flc_lock); >> + return NULL; >> +} >> + >> +static __be32 >> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode) >> +{ >> + __be32 status; >> + struct file_lock *fl; >> + struct nfs4_delegation *dp; >> + >> + fl = nfs4_wrdeleg_filelock(rqstp, inode); >> + if (!fl) >> + return 0; >> + dp = fl->fl_owner; >> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) >> + return 0; >> + refcount_inc(&dp->dl_stid.sc_count); > Another question: Why are you taking a reference here at all? This is same as in nfsd_break_one_deleg and revoke_delegation. I think it is to prevent the delegation to be freed while delegation is being recalled. > AFAICT, > you don't even look at the delegation again after that point, so it's > not clear to me who's responsible for putting that reference. In v2, the sc_count is decrement by nfs4_put_stid. I forgot to do that in V4. I'll add it back in v5. Thanks, -Dai > >> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); >> + return status; >> +} >> + >> /* >> * Note: @fhp can be NULL; in this case, we might have to compose the filehandle >> * ourselves. >> @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, >> if (status) >> goto out; >> } >> + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { >> + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry)); >> + if (status) >> + goto out; >> + } >> >> err = vfs_getattr(&path, &stat, >> STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
On 5/21/23 7:56 PM, dai.ngo@oracle.com wrote: > > On 5/21/23 4:08 PM, Jeff Layton wrote: >> On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote: >>> If the GETATTR request on a file that has write delegation in effect >>> and the request attributes include the change info and size attribute >>> then the write delegation is recalled and NFS4ERR_DELAY is returned >>> for the GETATTR. >>> >>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com> >>> --- >>> fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ >>> 1 file changed, 45 insertions(+) >>> >>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c >>> index 76db2fe29624..e069b970f136 100644 >>> --- a/fs/nfsd/nfs4xdr.c >>> +++ b/fs/nfsd/nfs4xdr.c >>> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, >>> u32 bmval0, u32 bmval1, u32 bmval2) >>> return nfserr_resource; >>> } >>> +static struct file_lock * >>> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode) >>> +{ >>> + struct file_lock_context *ctx; >>> + struct file_lock *fl; >>> + >>> + ctx = locks_inode_context(inode); >>> + if (!ctx) >>> + return NULL; >>> + spin_lock(&ctx->flc_lock); >>> + if (!list_empty(&ctx->flc_lease)) { >>> + fl = list_first_entry(&ctx->flc_lease, >>> + struct file_lock, fl_list); >>> + if (fl->fl_type == F_WRLCK) { >>> + spin_unlock(&ctx->flc_lock); >>> + return fl; >>> + } >>> + } >>> + spin_unlock(&ctx->flc_lock); >>> + return NULL; >>> +} >>> + >>> +static __be32 >>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode >>> *inode) >>> +{ >>> + __be32 status; >>> + struct file_lock *fl; >>> + struct nfs4_delegation *dp; >>> + >>> + fl = nfs4_wrdeleg_filelock(rqstp, inode); >>> + if (!fl) >>> + return 0; >>> + dp = fl->fl_owner; >>> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) >>> + return 0; >>> + refcount_inc(&dp->dl_stid.sc_count); >> Another question: Why are you taking a reference here at all? > > This is same as in nfsd_break_one_deleg and revoke_delegation. > I think it is to prevent the delegation to be freed while delegation > is being recalled. > >> AFAICT, >> you don't even look at the delegation again after that point, so it's >> not clear to me who's responsible for putting that reference. > > In v2, the sc_count is decrement by nfs4_put_stid. I forgot to do that > in V4. I'll add it back in v5. Actually the refcount is decremented after the CB_RECALL is done by nfs4_put_stid in nfsd4_cb_recall_release. So we don't have to decrement it here. This is to prevent the delegation to be free while the recall is being sent. -Dai > > Thanks, > -Dai > >> >>> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); >>> + return status; >>> +} >>> + >>> /* >>> * Note: @fhp can be NULL; in this case, we might have to compose >>> the filehandle >>> * ourselves. >>> @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, >>> struct svc_fh *fhp, >>> if (status) >>> goto out; >>> } >>> + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { >>> + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry)); >>> + if (status) >>> + goto out; >>> + } >>> err = vfs_getattr(&path, &stat, >>> STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
> On May 21, 2023, at 11:56 PM, Dai Ngo <dai.ngo@oracle.com> wrote: > > > On 5/21/23 7:56 PM, dai.ngo@oracle.com wrote: >> >> On 5/21/23 4:08 PM, Jeff Layton wrote: >>> On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote: >>>> If the GETATTR request on a file that has write delegation in effect >>>> and the request attributes include the change info and size attribute >>>> then the write delegation is recalled and NFS4ERR_DELAY is returned >>>> for the GETATTR. >>>> >>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com> >>>> --- >>>> fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ >>>> 1 file changed, 45 insertions(+) >>>> >>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c >>>> index 76db2fe29624..e069b970f136 100644 >>>> --- a/fs/nfsd/nfs4xdr.c >>>> +++ b/fs/nfsd/nfs4xdr.c >>>> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2) >>>> return nfserr_resource; >>>> } >>>> +static struct file_lock * >>>> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode) >>>> +{ >>>> + struct file_lock_context *ctx; >>>> + struct file_lock *fl; >>>> + >>>> + ctx = locks_inode_context(inode); >>>> + if (!ctx) >>>> + return NULL; >>>> + spin_lock(&ctx->flc_lock); >>>> + if (!list_empty(&ctx->flc_lease)) { >>>> + fl = list_first_entry(&ctx->flc_lease, >>>> + struct file_lock, fl_list); >>>> + if (fl->fl_type == F_WRLCK) { >>>> + spin_unlock(&ctx->flc_lock); >>>> + return fl; >>>> + } >>>> + } >>>> + spin_unlock(&ctx->flc_lock); >>>> + return NULL; >>>> +} >>>> + >>>> +static __be32 >>>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode) >>>> +{ >>>> + __be32 status; >>>> + struct file_lock *fl; >>>> + struct nfs4_delegation *dp; >>>> + >>>> + fl = nfs4_wrdeleg_filelock(rqstp, inode); >>>> + if (!fl) >>>> + return 0; >>>> + dp = fl->fl_owner; >>>> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) >>>> + return 0; >>>> + refcount_inc(&dp->dl_stid.sc_count); >>> Another question: Why are you taking a reference here at all? >> >> This is same as in nfsd_break_one_deleg and revoke_delegation. >> I think it is to prevent the delegation to be freed while delegation >> is being recalled. >> >>> AFAICT, >>> you don't even look at the delegation again after that point, so it's >>> not clear to me who's responsible for putting that reference. >> >> In v2, the sc_count is decrement by nfs4_put_stid. I forgot to do that >> in V4. I'll add it back in v5. > > Actually the refcount is decremented after the CB_RECALL is done > by nfs4_put_stid in nfsd4_cb_recall_release. So we don't have to > decrement it here. This is to prevent the delegation to be free > while the recall is being sent. For v5, please add this good information to the documenting comment for this function. > -Dai > >> >> Thanks, >> -Dai >> >>> >>>> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); >>>> + return status; >>>> +} >>>> + >>>> /* >>>> * Note: @fhp can be NULL; in this case, we might have to compose the filehandle >>>> * ourselves. >>>> @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, >>>> if (status) >>>> goto out; >>>> } >>>> + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { >>>> + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry)); >>>> + if (status) >>>> + goto out; >>>> + } >>>> err = vfs_getattr(&path, &stat, >>>> STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE, -- Chuck Lever
On Sun, 2023-05-21 at 20:56 -0700, dai.ngo@oracle.com wrote: > On 5/21/23 7:56 PM, dai.ngo@oracle.com wrote: > > > > On 5/21/23 4:08 PM, Jeff Layton wrote: > > > On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote: > > > > If the GETATTR request on a file that has write delegation in effect > > > > and the request attributes include the change info and size attribute > > > > then the write delegation is recalled and NFS4ERR_DELAY is returned > > > > for the GETATTR. > > > > > > > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com> > > > > --- > > > > fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ > > > > 1 file changed, 45 insertions(+) > > > > > > > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > > > > index 76db2fe29624..e069b970f136 100644 > > > > --- a/fs/nfsd/nfs4xdr.c > > > > +++ b/fs/nfsd/nfs4xdr.c > > > > @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, > > > > u32 bmval0, u32 bmval1, u32 bmval2) > > > > return nfserr_resource; > > > > } > > > > +static struct file_lock * > > > > +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode) > > > > +{ > > > > + struct file_lock_context *ctx; > > > > + struct file_lock *fl; > > > > + > > > > + ctx = locks_inode_context(inode); > > > > + if (!ctx) > > > > + return NULL; > > > > + spin_lock(&ctx->flc_lock); > > > > + if (!list_empty(&ctx->flc_lease)) { > > > > + fl = list_first_entry(&ctx->flc_lease, > > > > + struct file_lock, fl_list); > > > > + if (fl->fl_type == F_WRLCK) { > > > > + spin_unlock(&ctx->flc_lock); > > > > + return fl; > > > > + } One more issue here too. FL_LAYOUT file_locks live on this list too. They shouldn't conflict with leases or delegations, so you probably just want to skip them. Longer term, I wonder if we ought to add a new list in the file_lock_context for layouts? There's no reason to keep them all on the same list. > > > > + } > > > > + spin_unlock(&ctx->flc_lock); > > > > + return NULL; > > > > +} > > > > + > > > > +static __be32 > > > > +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode > > > > *inode) > > > > +{ > > > > + __be32 status; > > > > + struct file_lock *fl; > > > > + struct nfs4_delegation *dp; > > > > + > > > > + fl = nfs4_wrdeleg_filelock(rqstp, inode); > > > > + if (!fl) > > > > + return 0; > > > > + dp = fl->fl_owner; > > > > + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) > > > > + return 0; > > > > + refcount_inc(&dp->dl_stid.sc_count); > > > Another question: Why are you taking a reference here at all? > > > > This is same as in nfsd_break_one_deleg and revoke_delegation. > > I think it is to prevent the delegation to be freed while delegation > > is being recalled. > > > > > AFAICT, > > > you don't even look at the delegation again after that point, so it's > > > not clear to me who's responsible for putting that reference. > > > > In v2, the sc_count is decrement by nfs4_put_stid. I forgot to do that > > in V4. I'll add it back in v5. > > Actually the refcount is decremented after the CB_RECALL is done > by nfs4_put_stid in nfsd4_cb_recall_release. So we don't have to > decrement it here. This is to prevent the delegation to be free > while the recall is being sent. > That's the put for the increment in nfsd_break_one_deleg. You don't need to take an extra reference to a delegation to call nfsd_open_break_lease. You might not even know which delegation is being broken. There could even be more than one, after all. I think that extra refcount_inc is likely to cause a leak.
On 5/22/23 6:49 AM, Jeff Layton wrote: > On Sun, 2023-05-21 at 20:56 -0700, dai.ngo@oracle.com wrote: >> On 5/21/23 7:56 PM, dai.ngo@oracle.com wrote: >>> On 5/21/23 4:08 PM, Jeff Layton wrote: >>>> On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote: >>>>> If the GETATTR request on a file that has write delegation in effect >>>>> and the request attributes include the change info and size attribute >>>>> then the write delegation is recalled and NFS4ERR_DELAY is returned >>>>> for the GETATTR. >>>>> >>>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com> >>>>> --- >>>>> fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ >>>>> 1 file changed, 45 insertions(+) >>>>> >>>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c >>>>> index 76db2fe29624..e069b970f136 100644 >>>>> --- a/fs/nfsd/nfs4xdr.c >>>>> +++ b/fs/nfsd/nfs4xdr.c >>>>> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, >>>>> u32 bmval0, u32 bmval1, u32 bmval2) >>>>> return nfserr_resource; >>>>> } >>>>> +static struct file_lock * >>>>> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode) >>>>> +{ >>>>> + struct file_lock_context *ctx; >>>>> + struct file_lock *fl; >>>>> + >>>>> + ctx = locks_inode_context(inode); >>>>> + if (!ctx) >>>>> + return NULL; >>>>> + spin_lock(&ctx->flc_lock); >>>>> + if (!list_empty(&ctx->flc_lease)) { >>>>> + fl = list_first_entry(&ctx->flc_lease, >>>>> + struct file_lock, fl_list); >>>>> + if (fl->fl_type == F_WRLCK) { >>>>> + spin_unlock(&ctx->flc_lock); >>>>> + return fl; >>>>> + } > One more issue here too. FL_LAYOUT file_locks live on this list too. > They shouldn't conflict with leases or delegations, so you probably just > want to skip them. oh ok, that means we have to scan the list and skip the FL_LAYOUT file_locks. > > Longer term, I wonder if we ought to add a new list in the > file_lock_context for layouts? There's no reason to keep them all on the > same list. Yes, that would be nice. Thanks Jeff, -Dai > >>>>> + } >>>>> + spin_unlock(&ctx->flc_lock); >>>>> + return NULL; >>>>> +} >>>>> + >>>>> +static __be32 >>>>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode >>>>> *inode) >>>>> +{ >>>>> + __be32 status; >>>>> + struct file_lock *fl; >>>>> + struct nfs4_delegation *dp; >>>>> + >>>>> + fl = nfs4_wrdeleg_filelock(rqstp, inode); >>>>> + if (!fl) >>>>> + return 0; >>>>> + dp = fl->fl_owner; >>>>> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) >>>>> + return 0; >>>>> + refcount_inc(&dp->dl_stid.sc_count); >>>> Another question: Why are you taking a reference here at all? >>> This is same as in nfsd_break_one_deleg and revoke_delegation. >>> I think it is to prevent the delegation to be freed while delegation >>> is being recalled. >>> >>>> AFAICT, >>>> you don't even look at the delegation again after that point, so it's >>>> not clear to me who's responsible for putting that reference. >>> In v2, the sc_count is decrement by nfs4_put_stid. I forgot to do that >>> in V4. I'll add it back in v5. >> Actually the refcount is decremented after the CB_RECALL is done >> by nfs4_put_stid in nfsd4_cb_recall_release. So we don't have to >> decrement it here. This is to prevent the delegation to be free >> while the recall is being sent. >> > That's the put for the increment in nfsd_break_one_deleg. > > You don't need to take an extra reference to a delegation to call > nfsd_open_break_lease. You might not even know which delegation is being > broken. There could even be more than one, after all. > > I think that extra refcount_inc is likely to cause a leak.
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 76db2fe29624..e069b970f136 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2) return nfserr_resource; } +static struct file_lock * +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode) +{ + struct file_lock_context *ctx; + struct file_lock *fl; + + ctx = locks_inode_context(inode); + if (!ctx) + return NULL; + spin_lock(&ctx->flc_lock); + if (!list_empty(&ctx->flc_lease)) { + fl = list_first_entry(&ctx->flc_lease, + struct file_lock, fl_list); + if (fl->fl_type == F_WRLCK) { + spin_unlock(&ctx->flc_lock); + return fl; + } + } + spin_unlock(&ctx->flc_lock); + return NULL; +} + +static __be32 +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode) +{ + __be32 status; + struct file_lock *fl; + struct nfs4_delegation *dp; + + fl = nfs4_wrdeleg_filelock(rqstp, inode); + if (!fl) + return 0; + dp = fl->fl_owner; + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) + return 0; + refcount_inc(&dp->dl_stid.sc_count); + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); + return status; +} + /* * Note: @fhp can be NULL; in this case, we might have to compose the filehandle * ourselves. @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, if (status) goto out; } + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry)); + if (status) + goto out; + } err = vfs_getattr(&path, &stat, STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
If the GETATTR request on a file that has write delegation in effect and the request attributes include the change info and size attribute then the write delegation is recalled and NFS4ERR_DELAY is returned for the GETATTR. Signed-off-by: Dai Ngo <dai.ngo@oracle.com> --- fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+)