diff mbox series

[v4,3/4] NFSD: handle GETATTR conflict with write delegation

Message ID 1684618595-4178-4-git-send-email-dai.ngo@oracle.com (mailing list archive)
State New, archived
Headers show
Series NFSD: add support for NFSv4 write delegation | expand

Commit Message

Dai Ngo May 20, 2023, 9:36 p.m. UTC
If the GETATTR request on a file that has write delegation in effect
and the request attributes include the change info and size attribute
then the write delegation is recalled and NFS4ERR_DELAY is returned
for the GETATTR.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

Comments

Chuck Lever III May 21, 2023, 4:30 p.m. UTC | #1
> On May 20, 2023, at 5:36 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
> 
> If the GETATTR request on a file that has write delegation in effect
> and the request attributes include the change info and size attribute
> then the write delegation is recalled and NFS4ERR_DELAY is returned
> for the GETATTR.
> 
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
> fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 45 insertions(+)
> 
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 76db2fe29624..e069b970f136 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
> return nfserr_resource;
> }
> 
> +static struct file_lock *
> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
> +{
> + struct file_lock_context *ctx;
> + struct file_lock *fl;
> +
> + ctx = locks_inode_context(inode);
> + if (!ctx)
> + return NULL;
> + spin_lock(&ctx->flc_lock);
> + if (!list_empty(&ctx->flc_lease)) {
> + fl = list_first_entry(&ctx->flc_lease,
> + struct file_lock, fl_list);
> + if (fl->fl_type == F_WRLCK) {
> + spin_unlock(&ctx->flc_lock);
> + return fl;
> + }
> + }
> + spin_unlock(&ctx->flc_lock);
> + return NULL;
> +}
> +
> +static __be32
> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
> +{
> + __be32 status;
> + struct file_lock *fl;
> + struct nfs4_delegation *dp;
> +
> + fl = nfs4_wrdeleg_filelock(rqstp, inode);
> + if (!fl)
> + return 0;
> + dp = fl->fl_owner;
> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
> + return 0;
> + refcount_inc(&dp->dl_stid.sc_count);
> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
> + return status;
> +}
> +

fs/nfsd/nfs4state.c seems more appropriate for these.
I'll move them as I apply this patch.


> /*
>  * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
>  * ourselves.
> @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> if (status)
> goto out;
> }
> + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
> + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
> + if (status)
> + goto out;
> + }
> 
> err = vfs_getattr(&path, &stat,
>  STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
> -- 
> 2.9.5
> 

--
Chuck Lever
Jeff Layton May 21, 2023, 4:49 p.m. UTC | #2
On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
> If the GETATTR request on a file that has write delegation in effect
> and the request attributes include the change info and size attribute
> then the write delegation is recalled and NFS4ERR_DELAY is returned
> for the GETATTR.
> 
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
>  fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 45 insertions(+)
> 
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 76db2fe29624..e069b970f136 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
>  	return nfserr_resource;
>  }
>  
> +static struct file_lock *
> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
> +{
> +	struct file_lock_context *ctx;
> +	struct file_lock *fl;
> +
> +	ctx = locks_inode_context(inode);
> +	if (!ctx)
> +		return NULL;
> +	spin_lock(&ctx->flc_lock);
> +	if (!list_empty(&ctx->flc_lease)) {
> +		fl = list_first_entry(&ctx->flc_lease,
> +					struct file_lock, fl_list);
> +		if (fl->fl_type == F_WRLCK) {
> +			spin_unlock(&ctx->flc_lock);
> +			return fl;
> +		}
> +	}
> +	spin_unlock(&ctx->flc_lock);
> +	return NULL;
> +}
> +
> +static __be32
> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
> +{
> +	__be32 status;
> +	struct file_lock *fl;
> +	struct nfs4_delegation *dp;
> +
> +	fl = nfs4_wrdeleg_filelock(rqstp, inode);
> +	if (!fl)
> +		return 0;
> +	dp = fl->fl_owner;

One problem here is that you don't know whether the owner was set by
nfsd. This owner could be a struct file from a userland lease holder,
and that that point it's not safe to dereference it below like you are.

The q&d way we check for this in other places is to validate that the
fl_lmops is correct. In this case you'd want to make sure it's set to
&nfsd_lease_mng_ops.

Beyond that, you also don't know whether that owner or file_lock still
_exists_ after you drop the flc_lock. You need to either do these checks
while holding that lock, or take a reference to the owner before you
start dereferencing things.

Probably, you're better off here just doing it all under the flc_lock.

> +	if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
> +		return 0;
> +	refcount_inc(&dp->dl_stid.sc_count);

Another problem: the sc_count might already have gone to zero here. You
don't already hold a reference to dl_stid at this point, so you can't
just increment it without taking the cl_lock for that client.

You may be able to do this safely with refcount_inc_not_zero, and just
ignore the delegation if it's already at zero.

> +	status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
> +	return status;
> +}
> +
>  /*
>   * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
>   * ourselves.
> @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>  		if (status)
>  			goto out;
>  	}
> +	if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
> +		status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
> +		if (status)
> +			goto out;
> +	}
>  
>  	err = vfs_getattr(&path, &stat,
>  			  STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
Dai Ngo May 21, 2023, 6:48 p.m. UTC | #3
On 5/21/23 9:49 AM, Jeff Layton wrote:
> On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
>> If the GETATTR request on a file that has write delegation in effect
>> and the request attributes include the change info and size attribute
>> then the write delegation is recalled and NFS4ERR_DELAY is returned
>> for the GETATTR.
>>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>>   fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 45 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>> index 76db2fe29624..e069b970f136 100644
>> --- a/fs/nfsd/nfs4xdr.c
>> +++ b/fs/nfsd/nfs4xdr.c
>> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
>>   	return nfserr_resource;
>>   }
>>   
>> +static struct file_lock *
>> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
>> +{
>> +	struct file_lock_context *ctx;
>> +	struct file_lock *fl;
>> +
>> +	ctx = locks_inode_context(inode);
>> +	if (!ctx)
>> +		return NULL;
>> +	spin_lock(&ctx->flc_lock);
>> +	if (!list_empty(&ctx->flc_lease)) {
>> +		fl = list_first_entry(&ctx->flc_lease,
>> +					struct file_lock, fl_list);
>> +		if (fl->fl_type == F_WRLCK) {
>> +			spin_unlock(&ctx->flc_lock);
>> +			return fl;
>> +		}
>> +	}
>> +	spin_unlock(&ctx->flc_lock);
>> +	return NULL;
>> +}
>> +
>> +static __be32
>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
>> +{
>> +	__be32 status;
>> +	struct file_lock *fl;
>> +	struct nfs4_delegation *dp;
>> +
>> +	fl = nfs4_wrdeleg_filelock(rqstp, inode);
>> +	if (!fl)
>> +		return 0;
>> +	dp = fl->fl_owner;
> One problem here is that you don't know whether the owner was set by
> nfsd. This owner could be a struct file from a userland lease holder,
> and that that point it's not safe to dereference it below like you are.
>
> The q&d way we check for this in other places is to validate that the
> fl_lmops is correct. In this case you'd want to make sure it's set to
> &nfsd_lease_mng_ops.

Thanks Jeff, fix in v5.

>
> Beyond that, you also don't know whether that owner or file_lock still
> _exists_ after you drop the flc_lock. You need to either do these checks
> while holding that lock, or take a reference to the owner before you
> start dereferencing things.
>
> Probably, you're better off here just doing it all under the flc_lock.

fix in v5.

>
>> +	if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
>> +		return 0;
>> +	refcount_inc(&dp->dl_stid.sc_count);
> Another problem: the sc_count might already have gone to zero here. You
> don't already hold a reference to dl_stid at this point, so you can't
> just increment it without taking the cl_lock for that client.
>
> You may be able to do this safely with refcount_inc_not_zero, and just
> ignore the delegation if it's already at zero.

Fix in v5.

Chuck, I can do v5 to address feedback from you and Jeff.

Thanks,
-Dai

>
>> +	status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
>> +	return status;
>> +}
>> +
>>   /*
>>    * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
>>    * ourselves.
>> @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>>   		if (status)
>>   			goto out;
>>   	}
>> +	if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
>> +		status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
>> +		if (status)
>> +			goto out;
>> +	}
>>   
>>   	err = vfs_getattr(&path, &stat,
>>   			  STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
Jeff Layton May 21, 2023, 11:08 p.m. UTC | #4
On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
> If the GETATTR request on a file that has write delegation in effect
> and the request attributes include the change info and size attribute
> then the write delegation is recalled and NFS4ERR_DELAY is returned
> for the GETATTR.
> 
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
>  fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 45 insertions(+)
> 
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 76db2fe29624..e069b970f136 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
>  	return nfserr_resource;
>  }
>  
> +static struct file_lock *
> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
> +{
> +	struct file_lock_context *ctx;
> +	struct file_lock *fl;
> +
> +	ctx = locks_inode_context(inode);
> +	if (!ctx)
> +		return NULL;
> +	spin_lock(&ctx->flc_lock);
> +	if (!list_empty(&ctx->flc_lease)) {
> +		fl = list_first_entry(&ctx->flc_lease,
> +					struct file_lock, fl_list);
> +		if (fl->fl_type == F_WRLCK) {
> +			spin_unlock(&ctx->flc_lock);
> +			return fl;
> +		}
> +	}
> +	spin_unlock(&ctx->flc_lock);
> +	return NULL;
> +}
> +
> +static __be32
> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
> +{
> +	__be32 status;
> +	struct file_lock *fl;
> +	struct nfs4_delegation *dp;
> +
> +	fl = nfs4_wrdeleg_filelock(rqstp, inode);
> +	if (!fl)
> +		return 0;
> +	dp = fl->fl_owner;
> +	if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
> +		return 0;
> +	refcount_inc(&dp->dl_stid.sc_count);

Another question: Why are you taking a reference here at all? AFAICT,
you don't even look at the delegation again after that point, so it's
not clear to me who's responsible for putting that reference.

> +	status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
> +	return status;
> +}
> +
>  /*
>   * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
>   * ourselves.
> @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>  		if (status)
>  			goto out;
>  	}
> +	if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
> +		status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
> +		if (status)
> +			goto out;
> +	}
>  
>  	err = vfs_getattr(&path, &stat,
>  			  STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
Chuck Lever May 22, 2023, 2:31 a.m. UTC | #5
On Sun, May 21, 2023 at 07:08:43PM -0400, Jeff Layton wrote:
> On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
> > If the GETATTR request on a file that has write delegation in effect
> > and the request attributes include the change info and size attribute
> > then the write delegation is recalled and NFS4ERR_DELAY is returned
> > for the GETATTR.
> > 
> > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > ---
> >  fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 45 insertions(+)
> > 
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index 76db2fe29624..e069b970f136 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
> >  	return nfserr_resource;
> >  }
> >  
> > +static struct file_lock *
> > +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
> > +{
> > +	struct file_lock_context *ctx;
> > +	struct file_lock *fl;
> > +
> > +	ctx = locks_inode_context(inode);
> > +	if (!ctx)
> > +		return NULL;
> > +	spin_lock(&ctx->flc_lock);
> > +	if (!list_empty(&ctx->flc_lease)) {
> > +		fl = list_first_entry(&ctx->flc_lease,
> > +					struct file_lock, fl_list);
> > +		if (fl->fl_type == F_WRLCK) {
> > +			spin_unlock(&ctx->flc_lock);
> > +			return fl;
> > +		}
> > +	}
> > +	spin_unlock(&ctx->flc_lock);
> > +	return NULL;
> > +}
> > +
> > +static __be32
> > +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
> > +{
> > +	__be32 status;
> > +	struct file_lock *fl;
> > +	struct nfs4_delegation *dp;
> > +
> > +	fl = nfs4_wrdeleg_filelock(rqstp, inode);
> > +	if (!fl)
> > +		return 0;
> > +	dp = fl->fl_owner;
> > +	if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
> > +		return 0;
> > +	refcount_inc(&dp->dl_stid.sc_count);
> 
> Another question: Why are you taking a reference here at all? AFAICT,
> you don't even look at the delegation again after that point, so it's
> not clear to me who's responsible for putting that reference.

I had the same thought, but I assumed I was missing something.


> > +	status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
> > +	return status;
> > +}
> > +
> >  /*
> >   * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
> >   * ourselves.
> > @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> >  		if (status)
> >  			goto out;
> >  	}
> > +	if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
> > +		status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
> > +		if (status)
> > +			goto out;
> > +	}
> >  
> >  	err = vfs_getattr(&path, &stat,
> >  			  STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
> 
> -- 
> Jeff Layton <jlayton@kernel.org>
Dai Ngo May 22, 2023, 2:56 a.m. UTC | #6
On 5/21/23 4:08 PM, Jeff Layton wrote:
> On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
>> If the GETATTR request on a file that has write delegation in effect
>> and the request attributes include the change info and size attribute
>> then the write delegation is recalled and NFS4ERR_DELAY is returned
>> for the GETATTR.
>>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>>   fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 45 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>> index 76db2fe29624..e069b970f136 100644
>> --- a/fs/nfsd/nfs4xdr.c
>> +++ b/fs/nfsd/nfs4xdr.c
>> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
>>   	return nfserr_resource;
>>   }
>>   
>> +static struct file_lock *
>> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
>> +{
>> +	struct file_lock_context *ctx;
>> +	struct file_lock *fl;
>> +
>> +	ctx = locks_inode_context(inode);
>> +	if (!ctx)
>> +		return NULL;
>> +	spin_lock(&ctx->flc_lock);
>> +	if (!list_empty(&ctx->flc_lease)) {
>> +		fl = list_first_entry(&ctx->flc_lease,
>> +					struct file_lock, fl_list);
>> +		if (fl->fl_type == F_WRLCK) {
>> +			spin_unlock(&ctx->flc_lock);
>> +			return fl;
>> +		}
>> +	}
>> +	spin_unlock(&ctx->flc_lock);
>> +	return NULL;
>> +}
>> +
>> +static __be32
>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
>> +{
>> +	__be32 status;
>> +	struct file_lock *fl;
>> +	struct nfs4_delegation *dp;
>> +
>> +	fl = nfs4_wrdeleg_filelock(rqstp, inode);
>> +	if (!fl)
>> +		return 0;
>> +	dp = fl->fl_owner;
>> +	if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
>> +		return 0;
>> +	refcount_inc(&dp->dl_stid.sc_count);
> Another question: Why are you taking a reference here at all?

This is same as in nfsd_break_one_deleg and revoke_delegation.
I think it is to prevent the delegation to be freed while delegation
is being recalled.

>   AFAICT,
> you don't even look at the delegation again after that point, so it's
> not clear to me who's responsible for putting that reference.

In v2, the sc_count is decrement by nfs4_put_stid. I forgot to do that
in V4. I'll add it back in v5.

Thanks,
-Dai

>
>> +	status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
>> +	return status;
>> +}
>> +
>>   /*
>>    * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
>>    * ourselves.
>> @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>>   		if (status)
>>   			goto out;
>>   	}
>> +	if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
>> +		status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
>> +		if (status)
>> +			goto out;
>> +	}
>>   
>>   	err = vfs_getattr(&path, &stat,
>>   			  STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
Dai Ngo May 22, 2023, 3:56 a.m. UTC | #7
On 5/21/23 7:56 PM, dai.ngo@oracle.com wrote:
>
> On 5/21/23 4:08 PM, Jeff Layton wrote:
>> On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
>>> If the GETATTR request on a file that has write delegation in effect
>>> and the request attributes include the change info and size attribute
>>> then the write delegation is recalled and NFS4ERR_DELAY is returned
>>> for the GETATTR.
>>>
>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>> ---
>>>   fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>>>   1 file changed, 45 insertions(+)
>>>
>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>>> index 76db2fe29624..e069b970f136 100644
>>> --- a/fs/nfsd/nfs4xdr.c
>>> +++ b/fs/nfsd/nfs4xdr.c
>>> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, 
>>> u32 bmval0, u32 bmval1, u32 bmval2)
>>>       return nfserr_resource;
>>>   }
>>>   +static struct file_lock *
>>> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
>>> +{
>>> +    struct file_lock_context *ctx;
>>> +    struct file_lock *fl;
>>> +
>>> +    ctx = locks_inode_context(inode);
>>> +    if (!ctx)
>>> +        return NULL;
>>> +    spin_lock(&ctx->flc_lock);
>>> +    if (!list_empty(&ctx->flc_lease)) {
>>> +        fl = list_first_entry(&ctx->flc_lease,
>>> +                    struct file_lock, fl_list);
>>> +        if (fl->fl_type == F_WRLCK) {
>>> +            spin_unlock(&ctx->flc_lock);
>>> +            return fl;
>>> +        }
>>> +    }
>>> +    spin_unlock(&ctx->flc_lock);
>>> +    return NULL;
>>> +}
>>> +
>>> +static __be32
>>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode 
>>> *inode)
>>> +{
>>> +    __be32 status;
>>> +    struct file_lock *fl;
>>> +    struct nfs4_delegation *dp;
>>> +
>>> +    fl = nfs4_wrdeleg_filelock(rqstp, inode);
>>> +    if (!fl)
>>> +        return 0;
>>> +    dp = fl->fl_owner;
>>> +    if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
>>> +        return 0;
>>> +    refcount_inc(&dp->dl_stid.sc_count);
>> Another question: Why are you taking a reference here at all?
>
> This is same as in nfsd_break_one_deleg and revoke_delegation.
> I think it is to prevent the delegation to be freed while delegation
> is being recalled.
>
>>   AFAICT,
>> you don't even look at the delegation again after that point, so it's
>> not clear to me who's responsible for putting that reference.
>
> In v2, the sc_count is decrement by nfs4_put_stid. I forgot to do that
> in V4. I'll add it back in v5.

Actually the refcount is decremented after the CB_RECALL is done
by nfs4_put_stid in nfsd4_cb_recall_release. So we don't have to
decrement it here. This is to prevent the delegation to be free
while the recall is being sent.

-Dai

>
> Thanks,
> -Dai
>
>>
>>> +    status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
>>> +    return status;
>>> +}
>>> +
>>>   /*
>>>    * Note: @fhp can be NULL; in this case, we might have to compose 
>>> the filehandle
>>>    * ourselves.
>>> @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, 
>>> struct svc_fh *fhp,
>>>           if (status)
>>>               goto out;
>>>       }
>>> +    if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
>>> +        status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
>>> +        if (status)
>>> +            goto out;
>>> +    }
>>>         err = vfs_getattr(&path, &stat,
>>>                 STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
Chuck Lever III May 22, 2023, 1:16 p.m. UTC | #8
> On May 21, 2023, at 11:56 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
> 
> 
> On 5/21/23 7:56 PM, dai.ngo@oracle.com wrote:
>> 
>> On 5/21/23 4:08 PM, Jeff Layton wrote:
>>> On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
>>>> If the GETATTR request on a file that has write delegation in effect
>>>> and the request attributes include the change info and size attribute
>>>> then the write delegation is recalled and NFS4ERR_DELAY is returned
>>>> for the GETATTR.
>>>> 
>>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>>> ---
>>>>   fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>>>>   1 file changed, 45 insertions(+)
>>>> 
>>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>>>> index 76db2fe29624..e069b970f136 100644
>>>> --- a/fs/nfsd/nfs4xdr.c
>>>> +++ b/fs/nfsd/nfs4xdr.c
>>>> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
>>>>       return nfserr_resource;
>>>>   }
>>>>   +static struct file_lock *
>>>> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
>>>> +{
>>>> +    struct file_lock_context *ctx;
>>>> +    struct file_lock *fl;
>>>> +
>>>> +    ctx = locks_inode_context(inode);
>>>> +    if (!ctx)
>>>> +        return NULL;
>>>> +    spin_lock(&ctx->flc_lock);
>>>> +    if (!list_empty(&ctx->flc_lease)) {
>>>> +        fl = list_first_entry(&ctx->flc_lease,
>>>> +                    struct file_lock, fl_list);
>>>> +        if (fl->fl_type == F_WRLCK) {
>>>> +            spin_unlock(&ctx->flc_lock);
>>>> +            return fl;
>>>> +        }
>>>> +    }
>>>> +    spin_unlock(&ctx->flc_lock);
>>>> +    return NULL;
>>>> +}
>>>> +
>>>> +static __be32
>>>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
>>>> +{
>>>> +    __be32 status;
>>>> +    struct file_lock *fl;
>>>> +    struct nfs4_delegation *dp;
>>>> +
>>>> +    fl = nfs4_wrdeleg_filelock(rqstp, inode);
>>>> +    if (!fl)
>>>> +        return 0;
>>>> +    dp = fl->fl_owner;
>>>> +    if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
>>>> +        return 0;
>>>> +    refcount_inc(&dp->dl_stid.sc_count);
>>> Another question: Why are you taking a reference here at all?
>> 
>> This is same as in nfsd_break_one_deleg and revoke_delegation.
>> I think it is to prevent the delegation to be freed while delegation
>> is being recalled.
>> 
>>>   AFAICT,
>>> you don't even look at the delegation again after that point, so it's
>>> not clear to me who's responsible for putting that reference.
>> 
>> In v2, the sc_count is decrement by nfs4_put_stid. I forgot to do that
>> in V4. I'll add it back in v5.
> 
> Actually the refcount is decremented after the CB_RECALL is done
> by nfs4_put_stid in nfsd4_cb_recall_release. So we don't have to
> decrement it here. This is to prevent the delegation to be free
> while the recall is being sent.

For v5, please add this good information to the documenting comment
for this function.


> -Dai
> 
>> 
>> Thanks,
>> -Dai
>> 
>>> 
>>>> +    status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
>>>> +    return status;
>>>> +}
>>>> +
>>>>   /*
>>>>    * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
>>>>    * ourselves.
>>>> @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>>>>           if (status)
>>>>               goto out;
>>>>       }
>>>> +    if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
>>>> +        status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
>>>> +        if (status)
>>>> +            goto out;
>>>> +    }
>>>>         err = vfs_getattr(&path, &stat,
>>>>                 STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,

--
Chuck Lever
Jeff Layton May 22, 2023, 1:49 p.m. UTC | #9
On Sun, 2023-05-21 at 20:56 -0700, dai.ngo@oracle.com wrote:
> On 5/21/23 7:56 PM, dai.ngo@oracle.com wrote:
> > 
> > On 5/21/23 4:08 PM, Jeff Layton wrote:
> > > On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
> > > > If the GETATTR request on a file that has write delegation in effect
> > > > and the request attributes include the change info and size attribute
> > > > then the write delegation is recalled and NFS4ERR_DELAY is returned
> > > > for the GETATTR.
> > > > 
> > > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > > > ---
> > > >   fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
> > > >   1 file changed, 45 insertions(+)
> > > > 
> > > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > > > index 76db2fe29624..e069b970f136 100644
> > > > --- a/fs/nfsd/nfs4xdr.c
> > > > +++ b/fs/nfsd/nfs4xdr.c
> > > > @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, 
> > > > u32 bmval0, u32 bmval1, u32 bmval2)
> > > >       return nfserr_resource;
> > > >   }
> > > >   +static struct file_lock *
> > > > +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
> > > > +{
> > > > +    struct file_lock_context *ctx;
> > > > +    struct file_lock *fl;
> > > > +
> > > > +    ctx = locks_inode_context(inode);
> > > > +    if (!ctx)
> > > > +        return NULL;
> > > > +    spin_lock(&ctx->flc_lock);
> > > > +    if (!list_empty(&ctx->flc_lease)) {
> > > > +        fl = list_first_entry(&ctx->flc_lease,
> > > > +                    struct file_lock, fl_list);
> > > > +        if (fl->fl_type == F_WRLCK) {
> > > > +            spin_unlock(&ctx->flc_lock);
> > > > +            return fl;
> > > > +        }

One more issue here too. FL_LAYOUT file_locks live on this list too.
They shouldn't conflict with leases or delegations, so you probably just
want to skip them.

Longer term, I wonder if we ought to add a new list in the
file_lock_context for layouts? There's no reason to keep them all on the
same list.

> > > > +    }
> > > > +    spin_unlock(&ctx->flc_lock);
> > > > +    return NULL;
> > > > +}
> > > > +
> > > > +static __be32
> > > > +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode 
> > > > *inode)
> > > > +{
> > > > +    __be32 status;
> > > > +    struct file_lock *fl;
> > > > +    struct nfs4_delegation *dp;
> > > > +
> > > > +    fl = nfs4_wrdeleg_filelock(rqstp, inode);
> > > > +    if (!fl)
> > > > +        return 0;
> > > > +    dp = fl->fl_owner;
> > > > +    if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
> > > > +        return 0;
> > > > +    refcount_inc(&dp->dl_stid.sc_count);
> > > Another question: Why are you taking a reference here at all?
> > 
> > This is same as in nfsd_break_one_deleg and revoke_delegation.
> > I think it is to prevent the delegation to be freed while delegation
> > is being recalled.
> > 
> > >   AFAICT,
> > > you don't even look at the delegation again after that point, so it's
> > > not clear to me who's responsible for putting that reference.
> > 
> > In v2, the sc_count is decrement by nfs4_put_stid. I forgot to do that
> > in V4. I'll add it back in v5.
> 
> Actually the refcount is decremented after the CB_RECALL is done
> by nfs4_put_stid in nfsd4_cb_recall_release. So we don't have to
> decrement it here. This is to prevent the delegation to be free
> while the recall is being sent.
> 

That's the put for the increment in nfsd_break_one_deleg.

You don't need to take an extra reference to a delegation to call
nfsd_open_break_lease. You might not even know which delegation is being
broken. There could even be more than one, after all.

I think that extra refcount_inc is likely to cause a leak.
Dai Ngo May 22, 2023, 5:10 p.m. UTC | #10
On 5/22/23 6:49 AM, Jeff Layton wrote:
> On Sun, 2023-05-21 at 20:56 -0700, dai.ngo@oracle.com wrote:
>> On 5/21/23 7:56 PM, dai.ngo@oracle.com wrote:
>>> On 5/21/23 4:08 PM, Jeff Layton wrote:
>>>> On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
>>>>> If the GETATTR request on a file that has write delegation in effect
>>>>> and the request attributes include the change info and size attribute
>>>>> then the write delegation is recalled and NFS4ERR_DELAY is returned
>>>>> for the GETATTR.
>>>>>
>>>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>>>> ---
>>>>>    fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>>>>>    1 file changed, 45 insertions(+)
>>>>>
>>>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>>>>> index 76db2fe29624..e069b970f136 100644
>>>>> --- a/fs/nfsd/nfs4xdr.c
>>>>> +++ b/fs/nfsd/nfs4xdr.c
>>>>> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr,
>>>>> u32 bmval0, u32 bmval1, u32 bmval2)
>>>>>        return nfserr_resource;
>>>>>    }
>>>>>    +static struct file_lock *
>>>>> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
>>>>> +{
>>>>> +    struct file_lock_context *ctx;
>>>>> +    struct file_lock *fl;
>>>>> +
>>>>> +    ctx = locks_inode_context(inode);
>>>>> +    if (!ctx)
>>>>> +        return NULL;
>>>>> +    spin_lock(&ctx->flc_lock);
>>>>> +    if (!list_empty(&ctx->flc_lease)) {
>>>>> +        fl = list_first_entry(&ctx->flc_lease,
>>>>> +                    struct file_lock, fl_list);
>>>>> +        if (fl->fl_type == F_WRLCK) {
>>>>> +            spin_unlock(&ctx->flc_lock);
>>>>> +            return fl;
>>>>> +        }
> One more issue here too. FL_LAYOUT file_locks live on this list too.
> They shouldn't conflict with leases or delegations, so you probably just
> want to skip them.

oh ok, that means we have to scan the list and skip the FL_LAYOUT file_locks.

>
> Longer term, I wonder if we ought to add a new list in the
> file_lock_context for layouts? There's no reason to keep them all on the
> same list.

Yes, that would be nice.

Thanks Jeff,
-Dai

>
>>>>> +    }
>>>>> +    spin_unlock(&ctx->flc_lock);
>>>>> +    return NULL;
>>>>> +}
>>>>> +
>>>>> +static __be32
>>>>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode
>>>>> *inode)
>>>>> +{
>>>>> +    __be32 status;
>>>>> +    struct file_lock *fl;
>>>>> +    struct nfs4_delegation *dp;
>>>>> +
>>>>> +    fl = nfs4_wrdeleg_filelock(rqstp, inode);
>>>>> +    if (!fl)
>>>>> +        return 0;
>>>>> +    dp = fl->fl_owner;
>>>>> +    if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
>>>>> +        return 0;
>>>>> +    refcount_inc(&dp->dl_stid.sc_count);
>>>> Another question: Why are you taking a reference here at all?
>>> This is same as in nfsd_break_one_deleg and revoke_delegation.
>>> I think it is to prevent the delegation to be freed while delegation
>>> is being recalled.
>>>
>>>>    AFAICT,
>>>> you don't even look at the delegation again after that point, so it's
>>>> not clear to me who's responsible for putting that reference.
>>> In v2, the sc_count is decrement by nfs4_put_stid. I forgot to do that
>>> in V4. I'll add it back in v5.
>> Actually the refcount is decremented after the CB_RECALL is done
>> by nfs4_put_stid in nfsd4_cb_recall_release. So we don't have to
>> decrement it here. This is to prevent the delegation to be free
>> while the recall is being sent.
>>
> That's the put for the increment in nfsd_break_one_deleg.
>
> You don't need to take an extra reference to a delegation to call
> nfsd_open_break_lease. You might not even know which delegation is being
> broken. There could even be more than one, after all.
>
> I think that extra refcount_inc is likely to cause a leak.
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 76db2fe29624..e069b970f136 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2920,6 +2920,46 @@  nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
 	return nfserr_resource;
 }
 
+static struct file_lock *
+nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
+{
+	struct file_lock_context *ctx;
+	struct file_lock *fl;
+
+	ctx = locks_inode_context(inode);
+	if (!ctx)
+		return NULL;
+	spin_lock(&ctx->flc_lock);
+	if (!list_empty(&ctx->flc_lease)) {
+		fl = list_first_entry(&ctx->flc_lease,
+					struct file_lock, fl_list);
+		if (fl->fl_type == F_WRLCK) {
+			spin_unlock(&ctx->flc_lock);
+			return fl;
+		}
+	}
+	spin_unlock(&ctx->flc_lock);
+	return NULL;
+}
+
+static __be32
+nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
+{
+	__be32 status;
+	struct file_lock *fl;
+	struct nfs4_delegation *dp;
+
+	fl = nfs4_wrdeleg_filelock(rqstp, inode);
+	if (!fl)
+		return 0;
+	dp = fl->fl_owner;
+	if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
+		return 0;
+	refcount_inc(&dp->dl_stid.sc_count);
+	status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+	return status;
+}
+
 /*
  * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
  * ourselves.
@@ -2966,6 +3006,11 @@  nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
 		if (status)
 			goto out;
 	}
+	if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
+		status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
+		if (status)
+			goto out;
+	}
 
 	err = vfs_getattr(&path, &stat,
 			  STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,