diff mbox series

[v2,2/3] nfsd: Offer write delegations for o_wronly opens

Message ID 20240728204104.519041-3-sagi@grimberg.me (mailing list archive)
State New
Headers show
Series Offer write delegations for O_WRONLY opens | expand

Commit Message

Sagi Grimberg July 28, 2024, 8:41 p.m. UTC
In order to support write delegations with O_WRONLY opens, we need to
allow the clients to read using the write delegation stateid (Per RFC
8881 section 9.1.2. Use of the Stateid and Locking).

Hence, we check for NFS4_SHARE_ACCESS_WRITE set in open request, and
in case the share access flag does not set NFS4_SHARE_ACCESS_READ as
well, we'll open the file locally with O_RDWR in order to allow the
client to use the write delegation stateid when issuing a read in case
it may choose to.

Plus, find_rw_file singular call-site is now removed, remove it altogether.

Note: reads using special stateids that conflict with pending write
delegations are undetected, and will be covered in a follow on patch.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 fs/nfsd/nfs4proc.c  | 18 +++++++++++++++++-
 fs/nfsd/nfs4state.c | 42 ++++++++++++++++++++----------------------
 fs/nfsd/xdr4.h      |  2 ++
 3 files changed, 39 insertions(+), 23 deletions(-)

Comments

Jeff Layton July 29, 2024, 12:10 p.m. UTC | #1
On Sun, 2024-07-28 at 23:41 +0300, Sagi Grimberg wrote:
> In order to support write delegations with O_WRONLY opens, we need to
> allow the clients to read using the write delegation stateid (Per RFC
> 8881 section 9.1.2. Use of the Stateid and Locking).
> 
> Hence, we check for NFS4_SHARE_ACCESS_WRITE set in open request, and
> in case the share access flag does not set NFS4_SHARE_ACCESS_READ as
> well, we'll open the file locally with O_RDWR in order to allow the
> client to use the write delegation stateid when issuing a read in
> case
> it may choose to.
> 
> Plus, find_rw_file singular call-site is now removed, remove it
> altogether.
> 
> Note: reads using special stateids that conflict with pending write
> delegations are undetected, and will be covered in a follow on patch.
> 
> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
> ---
>  fs/nfsd/nfs4proc.c  | 18 +++++++++++++++++-
>  fs/nfsd/nfs4state.c | 42 ++++++++++++++++++++----------------------
>  fs/nfsd/xdr4.h      |  2 ++
>  3 files changed, 39 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 7b70309ad8fb..041bcc3ab5d7 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -979,8 +979,22 @@ nfsd4_read(struct svc_rqst *rqstp, struct
> nfsd4_compound_state *cstate,
>  	/* check stateid */
>  	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate-
> >current_fh,
>  					&read->rd_stateid, RD_STATE,
> -					&read->rd_nf, NULL);
> +					&read->rd_nf, &read-
> >rd_wd_stid);
>  
> +	/*
> +	 * rd_wd_stid is needed for nfsd4_encode_read to allow write
> +	 * delegation stateid used for read. Its refcount is
> decremented
> +	 * by nfsd4_read_release when read is done.
> +	 */
> +	if (!status) {
> +		if (read->rd_wd_stid &&
> +		    (read->rd_wd_stid->sc_type != SC_TYPE_DELEG ||
> +		     delegstateid(read->rd_wd_stid)->dl_type !=
> +					NFS4_OPEN_DELEGATE_WRITE)) {
> +			nfs4_put_stid(read->rd_wd_stid);
> +			read->rd_wd_stid = NULL;
> +		}
> +	}
>  	read->rd_rqstp = rqstp;
>  	read->rd_fhp = &cstate->current_fh;
>  	return status;
> @@ -990,6 +1004,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct
> nfsd4_compound_state *cstate,
>  static void
>  nfsd4_read_release(union nfsd4_op_u *u)
>  {
> +	if (u->read.rd_wd_stid)
> +		nfs4_put_stid(u->read.rd_wd_stid);
>  	if (u->read.rd_nf)
>  		nfsd_file_put(u->read.rd_nf);
>  	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 0645fccbf122..538b6e1127a2 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -639,18 +639,6 @@ find_readable_file(struct nfs4_file *f)
>  	return ret;
>  }
>  
> -static struct nfsd_file *
> -find_rw_file(struct nfs4_file *f)
> -{
> -	struct nfsd_file *ret;
> -
> -	spin_lock(&f->fi_lock);
> -	ret = nfsd_file_get(f->fi_fds[O_RDWR]);
> -	spin_unlock(&f->fi_lock);
> -
> -	return ret;
> -}
> -
>  struct nfsd_file *
>  find_any_file(struct nfs4_file *f)
>  {
> @@ -5784,15 +5772,11 @@ nfs4_set_delegation(struct nfsd4_open *open,
> struct nfs4_ol_stateid *stp,
>  	 *  "An OPEN_DELEGATE_WRITE delegation allows the client to
> handle,
>  	 *   on its own, all opens."
>  	 *
> -	 * Furthermore the client can use a write delegation for
> most READ
> -	 * operations as well, so we require a O_RDWR file here.
> -	 *
> -	 * Offer a write delegation in the case of a BOTH open, and
> ensure
> -	 * we get the O_RDWR descriptor.
> +	 * Offer a write delegation for WRITE or BOTH access
>  	 */
> -	if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) ==
> NFS4_SHARE_ACCESS_BOTH) {
> -		nf = find_rw_file(fp);
> +	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
>  		dl_type = NFS4_OPEN_DELEGATE_WRITE;
> +		nf = find_writeable_file(fp);
>  	}
>  
>  	/*
> @@ -5934,8 +5918,8 @@ static void nfsd4_open_deleg_none_ext(struct
> nfsd4_open *open, int status)
>   * open or lock state.
>   */
>  static void
> -nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid
> *stp,
> -		     struct svc_fh *currentfh)
> +nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open
> *open,
> +		struct nfs4_ol_stateid *stp, struct svc_fh
> *currentfh)
>  {
>  	struct nfs4_delegation *dp;
>  	struct nfs4_openowner *oo = openowner(stp->st_stateowner);
> @@ -5994,6 +5978,20 @@ nfs4_open_delegation(struct nfsd4_open *open,
> struct nfs4_ol_stateid *stp,
>  		dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
>  		dp->dl_cb_fattr.ncf_initial_cinfo =
>  			nfsd4_change_attribute(&stat,
> d_inode(currentfh->fh_dentry));
> +		if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH)
> != NFS4_SHARE_ACCESS_BOTH) {
> +			struct nfsd_file *nf = NULL;
> +
> +			/* make sure the file is opened locally for
> O_RDWR */
> +			status = nfsd_file_acquire_opened(rqstp,
> currentfh,
> +				nfs4_access_to_access(NFS4_SHARE_ACC
> ESS_BOTH),
> +				open->op_filp, &nf);
> +			if (status) {
> +				nfs4_put_stid(&dp->dl_stid);
> +				destroy_delegation(dp);
> +				goto out_no_deleg;
> +			}
> +			stp->st_stid.sc_file->fi_fds[O_RDWR] = nf;

I have a bit of a concern here. When we go to put access references to
the fi_fds, that's done according to the st_access_bmap. Here though,
you're adding an extra reference for the O_RDWR fd, but I don't see
where you're calling set_access for that on the delegation stateid? Am
I missing where that happens? Not doing that may lead to fd leaks if it
was missed.

> +		}
>  	} else {
>  		open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
>  		trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
> @@ -6123,7 +6121,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp,
> struct svc_fh *current_fh, struct nf
>  	* Attempt to hand out a delegation. No error return, because
> the
>  	* OPEN succeeds even if we fail.
>  	*/
> -	nfs4_open_delegation(open, stp, &resp->cstate.current_fh);
> +	nfs4_open_delegation(rqstp, open, stp, &resp-
> >cstate.current_fh);
>  nodeleg:
>  	status = nfs_ok;
>  	trace_nfsd_open(&stp->st_stid.sc_stateid);
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index fbdd42cde1fa..434973a6a8b1 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -426,6 +426,8 @@ struct nfsd4_read {
>  	struct svc_rqst		*rd_rqstp;          /*
> response */
>  	struct svc_fh		*rd_fhp;            /* response */
>  	u32			rd_eof;             /* response */
> +
> +	struct nfs4_stid	*rd_wd_stid;		/* internal
> */
>  };
>  
>  struct nfsd4_readdir {
Jeff Layton July 29, 2024, 12:26 p.m. UTC | #2
On Sun, 2024-07-28 at 23:41 +0300, Sagi Grimberg wrote:
> In order to support write delegations with O_WRONLY opens, we need to
> allow the clients to read using the write delegation stateid (Per RFC
> 8881 section 9.1.2. Use of the Stateid and Locking).
> 
> Hence, we check for NFS4_SHARE_ACCESS_WRITE set in open request, and
> in case the share access flag does not set NFS4_SHARE_ACCESS_READ as
> well, we'll open the file locally with O_RDWR in order to allow the
> client to use the write delegation stateid when issuing a read in case
> it may choose to.
> 
> Plus, find_rw_file singular call-site is now removed, remove it altogether.
> 
> Note: reads using special stateids that conflict with pending write
> delegations are undetected, and will be covered in a follow on patch.
> 
> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
> ---
>  fs/nfsd/nfs4proc.c  | 18 +++++++++++++++++-
>  fs/nfsd/nfs4state.c | 42 ++++++++++++++++++++----------------------
>  fs/nfsd/xdr4.h      |  2 ++
>  3 files changed, 39 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 7b70309ad8fb..041bcc3ab5d7 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -979,8 +979,22 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	/* check stateid */
>  	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
>  					&read->rd_stateid, RD_STATE,
> -					&read->rd_nf, NULL);
> +					&read->rd_nf, &read->rd_wd_stid);
>  
> +	/*
> +	 * rd_wd_stid is needed for nfsd4_encode_read to allow write
> +	 * delegation stateid used for read. Its refcount is decremented
> +	 * by nfsd4_read_release when read is done.
> +	 */
> +	if (!status) {
> +		if (read->rd_wd_stid &&
> +		    (read->rd_wd_stid->sc_type != SC_TYPE_DELEG ||
> +		     delegstateid(read->rd_wd_stid)->dl_type !=
> +					NFS4_OPEN_DELEGATE_WRITE)) {
> +			nfs4_put_stid(read->rd_wd_stid);
> +			read->rd_wd_stid = NULL;
> +		}
> +	}
>  	read->rd_rqstp = rqstp;
>  	read->rd_fhp = &cstate->current_fh;
>  	return status;
> @@ -990,6 +1004,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  static void
>  nfsd4_read_release(union nfsd4_op_u *u)
>  {
> +	if (u->read.rd_wd_stid)
> +		nfs4_put_stid(u->read.rd_wd_stid);
>  	if (u->read.rd_nf)
>  		nfsd_file_put(u->read.rd_nf);
>  	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 0645fccbf122..538b6e1127a2 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -639,18 +639,6 @@ find_readable_file(struct nfs4_file *f)
>  	return ret;
>  }
>  
> -static struct nfsd_file *
> -find_rw_file(struct nfs4_file *f)
> -{
> -	struct nfsd_file *ret;
> -
> -	spin_lock(&f->fi_lock);
> -	ret = nfsd_file_get(f->fi_fds[O_RDWR]);
> -	spin_unlock(&f->fi_lock);
> -
> -	return ret;
> -}
> -
>  struct nfsd_file *
>  find_any_file(struct nfs4_file *f)
>  {
> @@ -5784,15 +5772,11 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
>  	 *  "An OPEN_DELEGATE_WRITE delegation allows the client to handle,
>  	 *   on its own, all opens."
>  	 *
> -	 * Furthermore the client can use a write delegation for most READ
> -	 * operations as well, so we require a O_RDWR file here.
> -	 *
> -	 * Offer a write delegation in the case of a BOTH open, and ensure
> -	 * we get the O_RDWR descriptor.
> +	 * Offer a write delegation for WRITE or BOTH access
>  	 */
> -	if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) {
> -		nf = find_rw_file(fp);
> +	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
>  		dl_type = NFS4_OPEN_DELEGATE_WRITE;
> +		nf = find_writeable_file(fp);
>  	}
>  
>  	/*
> @@ -5934,8 +5918,8 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
>   * open or lock state.
>   */
>  static void
> -nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
> -		     struct svc_fh *currentfh)
> +nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open *open,
> +		struct nfs4_ol_stateid *stp, struct svc_fh *currentfh)
>  {
>  	struct nfs4_delegation *dp;
>  	struct nfs4_openowner *oo = openowner(stp->st_stateowner);
> @@ -5994,6 +5978,20 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
>  		dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
>  		dp->dl_cb_fattr.ncf_initial_cinfo =
>  			nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry));
> +		if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) != NFS4_SHARE_ACCESS_BOTH) {

More comments on this part:


nit: You've already tested for NFS4_SHARE_ACCESS_WRITE here, and this
seems easier to read:

		if (!(open->op_share_access & NFS4_SHARE_ACCESS_READ))



> +			struct nfsd_file *nf = NULL;
> +
> +			/* make sure the file is opened locally for O_RDWR */
> +			status = nfsd_file_acquire_opened(rqstp, currentfh,
> +				nfs4_access_to_access(NFS4_SHARE_ACCESS_BOTH),
> +				open->op_filp, &nf);
> +			if (status) {
> +				nfs4_put_stid(&dp->dl_stid);
> +				destroy_delegation(dp);
> +				goto out_no_deleg;
> +			}
> +			stp->st_stid.sc_file->fi_fds[O_RDWR] = nf;

How do you know that this fd isn't already set? Also, this field is
protected by the sc_file->fi_lock and that's not held here.

> +		}
>  	} else {
>  		open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
>  		trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
> @@ -6123,7 +6121,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
>  	* Attempt to hand out a delegation. No error return, because the
>  	* OPEN succeeds even if we fail.
>  	*/
> -	nfs4_open_delegation(open, stp, &resp->cstate.current_fh);
> +	nfs4_open_delegation(rqstp, open, stp, &resp->cstate.current_fh);
>  nodeleg:
>  	status = nfs_ok;
>  	trace_nfsd_open(&stp->st_stid.sc_stateid);
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index fbdd42cde1fa..434973a6a8b1 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -426,6 +426,8 @@ struct nfsd4_read {
>  	struct svc_rqst		*rd_rqstp;          /* response */
>  	struct svc_fh		*rd_fhp;            /* response */
>  	u32			rd_eof;             /* response */
> +
> +	struct nfs4_stid	*rd_wd_stid;		/* internal */
>  };
>  
>  struct nfsd4_readdir {
Sagi Grimberg July 29, 2024, 12:58 p.m. UTC | #3
On 29/07/2024 15:10, Jeff Layton wrote:
> On Sun, 2024-07-28 at 23:41 +0300, Sagi Grimberg wrote:
>> In order to support write delegations with O_WRONLY opens, we need to
>> allow the clients to read using the write delegation stateid (Per RFC
>> 8881 section 9.1.2. Use of the Stateid and Locking).
>>
>> Hence, we check for NFS4_SHARE_ACCESS_WRITE set in open request, and
>> in case the share access flag does not set NFS4_SHARE_ACCESS_READ as
>> well, we'll open the file locally with O_RDWR in order to allow the
>> client to use the write delegation stateid when issuing a read in
>> case
>> it may choose to.
>>
>> Plus, find_rw_file singular call-site is now removed, remove it
>> altogether.
>>
>> Note: reads using special stateids that conflict with pending write
>> delegations are undetected, and will be covered in a follow on patch.
>>
>> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
>> ---
>>   fs/nfsd/nfs4proc.c  | 18 +++++++++++++++++-
>>   fs/nfsd/nfs4state.c | 42 ++++++++++++++++++++----------------------
>>   fs/nfsd/xdr4.h      |  2 ++
>>   3 files changed, 39 insertions(+), 23 deletions(-)
>>
>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>> index 7b70309ad8fb..041bcc3ab5d7 100644
>> --- a/fs/nfsd/nfs4proc.c
>> +++ b/fs/nfsd/nfs4proc.c
>> @@ -979,8 +979,22 @@ nfsd4_read(struct svc_rqst *rqstp, struct
>> nfsd4_compound_state *cstate,
>>   	/* check stateid */
>>   	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate-
>>> current_fh,
>>   					&read->rd_stateid, RD_STATE,
>> -					&read->rd_nf, NULL);
>> +					&read->rd_nf, &read-
>>> rd_wd_stid);
>>   
>> +	/*
>> +	 * rd_wd_stid is needed for nfsd4_encode_read to allow write
>> +	 * delegation stateid used for read. Its refcount is
>> decremented
>> +	 * by nfsd4_read_release when read is done.
>> +	 */
>> +	if (!status) {
>> +		if (read->rd_wd_stid &&
>> +		    (read->rd_wd_stid->sc_type != SC_TYPE_DELEG ||
>> +		     delegstateid(read->rd_wd_stid)->dl_type !=
>> +					NFS4_OPEN_DELEGATE_WRITE)) {
>> +			nfs4_put_stid(read->rd_wd_stid);
>> +			read->rd_wd_stid = NULL;
>> +		}
>> +	}
>>   	read->rd_rqstp = rqstp;
>>   	read->rd_fhp = &cstate->current_fh;
>>   	return status;
>> @@ -990,6 +1004,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct
>> nfsd4_compound_state *cstate,
>>   static void
>>   nfsd4_read_release(union nfsd4_op_u *u)
>>   {
>> +	if (u->read.rd_wd_stid)
>> +		nfs4_put_stid(u->read.rd_wd_stid);
>>   	if (u->read.rd_nf)
>>   		nfsd_file_put(u->read.rd_nf);
>>   	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 0645fccbf122..538b6e1127a2 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -639,18 +639,6 @@ find_readable_file(struct nfs4_file *f)
>>   	return ret;
>>   }
>>   
>> -static struct nfsd_file *
>> -find_rw_file(struct nfs4_file *f)
>> -{
>> -	struct nfsd_file *ret;
>> -
>> -	spin_lock(&f->fi_lock);
>> -	ret = nfsd_file_get(f->fi_fds[O_RDWR]);
>> -	spin_unlock(&f->fi_lock);
>> -
>> -	return ret;
>> -}
>> -
>>   struct nfsd_file *
>>   find_any_file(struct nfs4_file *f)
>>   {
>> @@ -5784,15 +5772,11 @@ nfs4_set_delegation(struct nfsd4_open *open,
>> struct nfs4_ol_stateid *stp,
>>   	 *  "An OPEN_DELEGATE_WRITE delegation allows the client to
>> handle,
>>   	 *   on its own, all opens."
>>   	 *
>> -	 * Furthermore the client can use a write delegation for
>> most READ
>> -	 * operations as well, so we require a O_RDWR file here.
>> -	 *
>> -	 * Offer a write delegation in the case of a BOTH open, and
>> ensure
>> -	 * we get the O_RDWR descriptor.
>> +	 * Offer a write delegation for WRITE or BOTH access
>>   	 */
>> -	if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) ==
>> NFS4_SHARE_ACCESS_BOTH) {
>> -		nf = find_rw_file(fp);
>> +	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
>>   		dl_type = NFS4_OPEN_DELEGATE_WRITE;
>> +		nf = find_writeable_file(fp);
>>   	}
>>   
>>   	/*
>> @@ -5934,8 +5918,8 @@ static void nfsd4_open_deleg_none_ext(struct
>> nfsd4_open *open, int status)
>>    * open or lock state.
>>    */
>>   static void
>> -nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid
>> *stp,
>> -		     struct svc_fh *currentfh)
>> +nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open
>> *open,
>> +		struct nfs4_ol_stateid *stp, struct svc_fh
>> *currentfh)
>>   {
>>   	struct nfs4_delegation *dp;
>>   	struct nfs4_openowner *oo = openowner(stp->st_stateowner);
>> @@ -5994,6 +5978,20 @@ nfs4_open_delegation(struct nfsd4_open *open,
>> struct nfs4_ol_stateid *stp,
>>   		dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
>>   		dp->dl_cb_fattr.ncf_initial_cinfo =
>>   			nfsd4_change_attribute(&stat,
>> d_inode(currentfh->fh_dentry));
>> +		if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH)
>> != NFS4_SHARE_ACCESS_BOTH) {
>> +			struct nfsd_file *nf = NULL;
>> +
>> +			/* make sure the file is opened locally for
>> O_RDWR */
>> +			status = nfsd_file_acquire_opened(rqstp,
>> currentfh,
>> +				nfs4_access_to_access(NFS4_SHARE_ACC
>> ESS_BOTH),
>> +				open->op_filp, &nf);
>> +			if (status) {
>> +				nfs4_put_stid(&dp->dl_stid);
>> +				destroy_delegation(dp);
>> +				goto out_no_deleg;
>> +			}
>> +			stp->st_stid.sc_file->fi_fds[O_RDWR] = nf;
> I have a bit of a concern here. When we go to put access references to
> the fi_fds, that's done according to the st_access_bmap. Here though,
> you're adding an extra reference for the O_RDWR fd, but I don't see
> where you're calling set_access for that on the delegation stateid? Am
> I missing where that happens? Not doing that may lead to fd leaks if it
> was missed.

Ah, this is something that I did not fully understand...
However it looks like st_access_bmap is not something that is
accounted on the delegation stateid...

Can I simply set it on the open stateid (stp)?
Sagi Grimberg July 29, 2024, 12:59 p.m. UTC | #4
On 29/07/2024 15:26, Jeff Layton wrote:
> On Sun, 2024-07-28 at 23:41 +0300, Sagi Grimberg wrote:
>> In order to support write delegations with O_WRONLY opens, we need to
>> allow the clients to read using the write delegation stateid (Per RFC
>> 8881 section 9.1.2. Use of the Stateid and Locking).
>>
>> Hence, we check for NFS4_SHARE_ACCESS_WRITE set in open request, and
>> in case the share access flag does not set NFS4_SHARE_ACCESS_READ as
>> well, we'll open the file locally with O_RDWR in order to allow the
>> client to use the write delegation stateid when issuing a read in case
>> it may choose to.
>>
>> Plus, find_rw_file singular call-site is now removed, remove it altogether.
>>
>> Note: reads using special stateids that conflict with pending write
>> delegations are undetected, and will be covered in a follow on patch.
>>
>> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
>> ---
>>   fs/nfsd/nfs4proc.c  | 18 +++++++++++++++++-
>>   fs/nfsd/nfs4state.c | 42 ++++++++++++++++++++----------------------
>>   fs/nfsd/xdr4.h      |  2 ++
>>   3 files changed, 39 insertions(+), 23 deletions(-)
>>
>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>> index 7b70309ad8fb..041bcc3ab5d7 100644
>> --- a/fs/nfsd/nfs4proc.c
>> +++ b/fs/nfsd/nfs4proc.c
>> @@ -979,8 +979,22 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>   	/* check stateid */
>>   	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
>>   					&read->rd_stateid, RD_STATE,
>> -					&read->rd_nf, NULL);
>> +					&read->rd_nf, &read->rd_wd_stid);
>>   
>> +	/*
>> +	 * rd_wd_stid is needed for nfsd4_encode_read to allow write
>> +	 * delegation stateid used for read. Its refcount is decremented
>> +	 * by nfsd4_read_release when read is done.
>> +	 */
>> +	if (!status) {
>> +		if (read->rd_wd_stid &&
>> +		    (read->rd_wd_stid->sc_type != SC_TYPE_DELEG ||
>> +		     delegstateid(read->rd_wd_stid)->dl_type !=
>> +					NFS4_OPEN_DELEGATE_WRITE)) {
>> +			nfs4_put_stid(read->rd_wd_stid);
>> +			read->rd_wd_stid = NULL;
>> +		}
>> +	}
>>   	read->rd_rqstp = rqstp;
>>   	read->rd_fhp = &cstate->current_fh;
>>   	return status;
>> @@ -990,6 +1004,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>   static void
>>   nfsd4_read_release(union nfsd4_op_u *u)
>>   {
>> +	if (u->read.rd_wd_stid)
>> +		nfs4_put_stid(u->read.rd_wd_stid);
>>   	if (u->read.rd_nf)
>>   		nfsd_file_put(u->read.rd_nf);
>>   	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 0645fccbf122..538b6e1127a2 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -639,18 +639,6 @@ find_readable_file(struct nfs4_file *f)
>>   	return ret;
>>   }
>>   
>> -static struct nfsd_file *
>> -find_rw_file(struct nfs4_file *f)
>> -{
>> -	struct nfsd_file *ret;
>> -
>> -	spin_lock(&f->fi_lock);
>> -	ret = nfsd_file_get(f->fi_fds[O_RDWR]);
>> -	spin_unlock(&f->fi_lock);
>> -
>> -	return ret;
>> -}
>> -
>>   struct nfsd_file *
>>   find_any_file(struct nfs4_file *f)
>>   {
>> @@ -5784,15 +5772,11 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
>>   	 *  "An OPEN_DELEGATE_WRITE delegation allows the client to handle,
>>   	 *   on its own, all opens."
>>   	 *
>> -	 * Furthermore the client can use a write delegation for most READ
>> -	 * operations as well, so we require a O_RDWR file here.
>> -	 *
>> -	 * Offer a write delegation in the case of a BOTH open, and ensure
>> -	 * we get the O_RDWR descriptor.
>> +	 * Offer a write delegation for WRITE or BOTH access
>>   	 */
>> -	if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) {
>> -		nf = find_rw_file(fp);
>> +	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
>>   		dl_type = NFS4_OPEN_DELEGATE_WRITE;
>> +		nf = find_writeable_file(fp);
>>   	}
>>   
>>   	/*
>> @@ -5934,8 +5918,8 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
>>    * open or lock state.
>>    */
>>   static void
>> -nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
>> -		     struct svc_fh *currentfh)
>> +nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open *open,
>> +		struct nfs4_ol_stateid *stp, struct svc_fh *currentfh)
>>   {
>>   	struct nfs4_delegation *dp;
>>   	struct nfs4_openowner *oo = openowner(stp->st_stateowner);
>> @@ -5994,6 +5978,20 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
>>   		dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
>>   		dp->dl_cb_fattr.ncf_initial_cinfo =
>>   			nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry));
>> +		if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) != NFS4_SHARE_ACCESS_BOTH) {
> More comments on this part:
>
>
> nit: You've already tested for NFS4_SHARE_ACCESS_WRITE here, and this
> seems easier to read:
>
> 		if (!(open->op_share_access & NFS4_SHARE_ACCESS_READ))
>
>
>
>> +			struct nfsd_file *nf = NULL;
>> +
>> +			/* make sure the file is opened locally for O_RDWR */
>> +			status = nfsd_file_acquire_opened(rqstp, currentfh,
>> +				nfs4_access_to_access(NFS4_SHARE_ACCESS_BOTH),
>> +				open->op_filp, &nf);
>> +			if (status) {
>> +				nfs4_put_stid(&dp->dl_stid);
>> +				destroy_delegation(dp);
>> +				goto out_no_deleg;
>> +			}
>> +			stp->st_stid.sc_file->fi_fds[O_RDWR] = nf;
> How do you know that this fd isn't already set? Also, this field is
> protected by the sc_file->fi_lock and that's not held here.

Something like this?
--
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a6c6d813c59c..ee0c65ff1940 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -5978,19 +5978,24 @@ nfs4_open_delegation(struct svc_rqst *rqstp, 
struct nfsd4_open *open,
                 dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
                 dp->dl_cb_fattr.ncf_initial_cinfo =
                         nfsd4_change_attribute(&stat, 
d_inode(currentfh->fh_dentry));
-               if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) != 
NFS4_SHARE_ACCESS_BOTH) {
+               if (!(open->op_share_access & NFS4_SHARE_ACCESS_READ)) {
+                       u32 access = 
nfs4_access_to_access(NFS4_SHARE_ACCESS_BOTH);
+                       struct nfs4_file *fp = stp->st_stid.sc_file;
                         struct nfsd_file *nf = NULL;

                         /* make sure the file is opened locally for 
O_RDWR */
+                       set_access(access, stp);
                         status = nfsd_file_acquire_opened(rqstp, currentfh,
- nfs4_access_to_access(NFS4_SHARE_ACCESS_BOTH),
-                               open->op_filp, &nf);
+                                       access, open->op_filp, &nf);
                         if (status) {
                                 nfs4_put_stid(&dp->dl_stid);
                                 destroy_delegation(dp);
                                 goto out_no_deleg;
                         }
-                       stp->st_stid.sc_file->fi_fds[O_RDWR] = nf;
+                       spin_lock(&fp->fi_lock);
+                       if (!fp->fi_fds[O_RDWR])
+                               fp->fi_fds[O_RDWR] = nf;
+                       spin_lock(&fp->fi_lock);
                 }
         } else {
--
Jeff Layton July 29, 2024, 1:10 p.m. UTC | #5
On Mon, 2024-07-29 at 15:58 +0300, Sagi Grimberg wrote:
> 
> 
> 
> On 29/07/2024 15:10, Jeff Layton wrote:
> > On Sun, 2024-07-28 at 23:41 +0300, Sagi Grimberg wrote:
> > > In order to support write delegations with O_WRONLY opens, we
> > > need to
> > > allow the clients to read using the write delegation stateid (Per
> > > RFC
> > > 8881 section 9.1.2. Use of the Stateid and Locking).
> > > 
> > > Hence, we check for NFS4_SHARE_ACCESS_WRITE set in open request,
> > > and
> > > in case the share access flag does not set NFS4_SHARE_ACCESS_READ
> > > as
> > > well, we'll open the file locally with O_RDWR in order to allow
> > > the
> > > client to use the write delegation stateid when issuing a read in
> > > case
> > > it may choose to.
> > > 
> > > Plus, find_rw_file singular call-site is now removed, remove it
> > > altogether.
> > > 
> > > Note: reads using special stateids that conflict with pending
> > > write
> > > delegations are undetected, and will be covered in a follow on
> > > patch.
> > > 
> > > Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
> > > ---
> > >   fs/nfsd/nfs4proc.c  | 18 +++++++++++++++++-
> > >   fs/nfsd/nfs4state.c | 42 ++++++++++++++++++++------------------
> > > ----
> > >   fs/nfsd/xdr4.h      |  2 ++
> > >   3 files changed, 39 insertions(+), 23 deletions(-)
> > > 
> > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > > index 7b70309ad8fb..041bcc3ab5d7 100644
> > > --- a/fs/nfsd/nfs4proc.c
> > > +++ b/fs/nfsd/nfs4proc.c
> > > @@ -979,8 +979,22 @@ nfsd4_read(struct svc_rqst *rqstp, struct
> > > nfsd4_compound_state *cstate,
> > >   	/* check stateid */
> > >   	status = nfs4_preprocess_stateid_op(rqstp, cstate,
> > > &cstate-
> > > > current_fh,
> > >   					&read->rd_stateid,
> > > RD_STATE,
> > > -					&read->rd_nf, NULL);
> > > +					&read->rd_nf, &read-
> > > > rd_wd_stid);
> > >   
> > > +	/*
> > > +	 * rd_wd_stid is needed for nfsd4_encode_read to allow
> > > write
> > > +	 * delegation stateid used for read. Its refcount is
> > > decremented
> > > +	 * by nfsd4_read_release when read is done.
> > > +	 */
> > > +	if (!status) {
> > > +		if (read->rd_wd_stid &&
> > > +		    (read->rd_wd_stid->sc_type != SC_TYPE_DELEG
> > > ||
> > > +		     delegstateid(read->rd_wd_stid)->dl_type !=
> > > +					NFS4_OPEN_DELEGATE_WRITE
> > > )) {
> > > +			nfs4_put_stid(read->rd_wd_stid);
> > > +			read->rd_wd_stid = NULL;
> > > +		}
> > > +	}
> > >   	read->rd_rqstp = rqstp;
> > >   	read->rd_fhp = &cstate->current_fh;
> > >   	return status;
> > > @@ -990,6 +1004,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct
> > > nfsd4_compound_state *cstate,
> > >   static void
> > >   nfsd4_read_release(union nfsd4_op_u *u)
> > >   {
> > > +	if (u->read.rd_wd_stid)
> > > +		nfs4_put_stid(u->read.rd_wd_stid);
> > >   	if (u->read.rd_nf)
> > >   		nfsd_file_put(u->read.rd_nf);
> > >   	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
> > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > index 0645fccbf122..538b6e1127a2 100644
> > > --- a/fs/nfsd/nfs4state.c
> > > +++ b/fs/nfsd/nfs4state.c
> > > @@ -639,18 +639,6 @@ find_readable_file(struct nfs4_file *f)
> > >   	return ret;
> > >   }
> > >   
> > > -static struct nfsd_file *
> > > -find_rw_file(struct nfs4_file *f)
> > > -{
> > > -	struct nfsd_file *ret;
> > > -
> > > -	spin_lock(&f->fi_lock);
> > > -	ret = nfsd_file_get(f->fi_fds[O_RDWR]);
> > > -	spin_unlock(&f->fi_lock);
> > > -
> > > -	return ret;
> > > -}
> > > -
> > >   struct nfsd_file *
> > >   find_any_file(struct nfs4_file *f)
> > >   {
> > > @@ -5784,15 +5772,11 @@ nfs4_set_delegation(struct nfsd4_open
> > > *open,
> > > struct nfs4_ol_stateid *stp,
> > >   	 *  "An OPEN_DELEGATE_WRITE delegation allows the client
> > > to
> > > handle,
> > >   	 *   on its own, all opens."
> > >   	 *
> > > -	 * Furthermore the client can use a write delegation for
> > > most READ
> > > -	 * operations as well, so we require a O_RDWR file here.
> > > -	 *
> > > -	 * Offer a write delegation in the case of a BOTH open,
> > > and
> > > ensure
> > > -	 * we get the O_RDWR descriptor.
> > > +	 * Offer a write delegation for WRITE or BOTH access
> > >   	 */
> > > -	if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) ==
> > > NFS4_SHARE_ACCESS_BOTH) {
> > > -		nf = find_rw_file(fp);
> > > +	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
> > >   		dl_type = NFS4_OPEN_DELEGATE_WRITE;
> > > +		nf = find_writeable_file(fp);
> > >   	}
> > >   
> > >   	/*
> > > @@ -5934,8 +5918,8 @@ static void
> > > nfsd4_open_deleg_none_ext(struct
> > > nfsd4_open *open, int status)
> > >    * open or lock state.
> > >    */
> > >   static void
> > > -nfs4_open_delegation(struct nfsd4_open *open, struct
> > > nfs4_ol_stateid
> > > *stp,
> > > -		     struct svc_fh *currentfh)
> > > +nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open
> > > *open,
> > > +		struct nfs4_ol_stateid *stp, struct svc_fh
> > > *currentfh)
> > >   {
> > >   	struct nfs4_delegation *dp;
> > >   	struct nfs4_openowner *oo = openowner(stp-
> > > >st_stateowner);
> > > @@ -5994,6 +5978,20 @@ nfs4_open_delegation(struct nfsd4_open
> > > *open,
> > > struct nfs4_ol_stateid *stp,
> > >   		dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
> > >   		dp->dl_cb_fattr.ncf_initial_cinfo =
> > >   			nfsd4_change_attribute(&stat,
> > > d_inode(currentfh->fh_dentry));
> > > +		if ((open->op_share_access &
> > > NFS4_SHARE_ACCESS_BOTH)
> > > != NFS4_SHARE_ACCESS_BOTH) {
> > > +			struct nfsd_file *nf = NULL;
> > > +
> > > +			/* make sure the file is opened locally
> > > for
> > > O_RDWR */
> > > +			status = nfsd_file_acquire_opened(rqstp,
> > > currentfh,
> > > +				nfs4_access_to_access(NFS4_SHARE
> > > _ACC
> > > ESS_BOTH),
> > > +				open->op_filp, &nf);
> > > +			if (status) {
> > > +				nfs4_put_stid(&dp->dl_stid);
> > > +				destroy_delegation(dp);
> > > +				goto out_no_deleg;
> > > +			}
> > > +			stp->st_stid.sc_file->fi_fds[O_RDWR] =
> > > nf;
> > I have a bit of a concern here. When we go to put access references
> > to
> > the fi_fds, that's done according to the st_access_bmap. Here
> > though,
> > you're adding an extra reference for the O_RDWR fd, but I don't see
> > where you're calling set_access for that on the delegation stateid?
> > Am
> > I missing where that happens? Not doing that may lead to fd leaks
> > if it
> > was missed.
> 
> Ah, this is something that I did not fully understand...
> However it looks like st_access_bmap is not something that is
> accounted on the delegation stateid...
> 
> Can I simply set it on the open stateid (stp)?

That would likely fix the leak, but I'm not sure that's the best
approach. You have an NFS4_SHARE_ACCESS_WRITE-only stateid here, and
that would turn it a NFS4_SHARE_ACCESS_BOTH one, wouldn't it?

It wouldn't surprise me if that might break a testcase or two.
Jeff Layton July 29, 2024, 1:17 p.m. UTC | #6
On Mon, 2024-07-29 at 15:59 +0300, Sagi Grimberg wrote:
> 
> 
> 
> On 29/07/2024 15:26, Jeff Layton wrote:
> > On Sun, 2024-07-28 at 23:41 +0300, Sagi Grimberg wrote:
> > > In order to support write delegations with O_WRONLY opens, we
> > > need to
> > > allow the clients to read using the write delegation stateid (Per
> > > RFC
> > > 8881 section 9.1.2. Use of the Stateid and Locking).
> > > 
> > > Hence, we check for NFS4_SHARE_ACCESS_WRITE set in open request,
> > > and
> > > in case the share access flag does not set NFS4_SHARE_ACCESS_READ
> > > as
> > > well, we'll open the file locally with O_RDWR in order to allow
> > > the
> > > client to use the write delegation stateid when issuing a read in
> > > case
> > > it may choose to.
> > > 
> > > Plus, find_rw_file singular call-site is now removed, remove it
> > > altogether.
> > > 
> > > Note: reads using special stateids that conflict with pending
> > > write
> > > delegations are undetected, and will be covered in a follow on
> > > patch.
> > > 
> > > Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
> > > ---
> > >   fs/nfsd/nfs4proc.c  | 18 +++++++++++++++++-
> > >   fs/nfsd/nfs4state.c | 42 ++++++++++++++++++++------------------
> > > ----
> > >   fs/nfsd/xdr4.h      |  2 ++
> > >   3 files changed, 39 insertions(+), 23 deletions(-)
> > > 
> > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > > index 7b70309ad8fb..041bcc3ab5d7 100644
> > > --- a/fs/nfsd/nfs4proc.c
> > > +++ b/fs/nfsd/nfs4proc.c
> > > @@ -979,8 +979,22 @@ nfsd4_read(struct svc_rqst *rqstp, struct
> > > nfsd4_compound_state *cstate,
> > >   	/* check stateid */
> > >   	status = nfs4_preprocess_stateid_op(rqstp, cstate,
> > > &cstate->current_fh,
> > >   					&read->rd_stateid,
> > > RD_STATE,
> > > -					&read->rd_nf, NULL);
> > > +					&read->rd_nf, &read-
> > > >rd_wd_stid);
> > >   
> > > +	/*
> > > +	 * rd_wd_stid is needed for nfsd4_encode_read to allow
> > > write
> > > +	 * delegation stateid used for read. Its refcount is
> > > decremented
> > > +	 * by nfsd4_read_release when read is done.
> > > +	 */
> > > +	if (!status) {
> > > +		if (read->rd_wd_stid &&
> > > +		    (read->rd_wd_stid->sc_type != SC_TYPE_DELEG
> > > ||
> > > +		     delegstateid(read->rd_wd_stid)->dl_type !=
> > > +					NFS4_OPEN_DELEGATE_WRITE
> > > )) {
> > > +			nfs4_put_stid(read->rd_wd_stid);
> > > +			read->rd_wd_stid = NULL;
> > > +		}
> > > +	}
> > >   	read->rd_rqstp = rqstp;
> > >   	read->rd_fhp = &cstate->current_fh;
> > >   	return status;
> > > @@ -990,6 +1004,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct
> > > nfsd4_compound_state *cstate,
> > >   static void
> > >   nfsd4_read_release(union nfsd4_op_u *u)
> > >   {
> > > +	if (u->read.rd_wd_stid)
> > > +		nfs4_put_stid(u->read.rd_wd_stid);
> > >   	if (u->read.rd_nf)
> > >   		nfsd_file_put(u->read.rd_nf);
> > >   	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
> > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > index 0645fccbf122..538b6e1127a2 100644
> > > --- a/fs/nfsd/nfs4state.c
> > > +++ b/fs/nfsd/nfs4state.c
> > > @@ -639,18 +639,6 @@ find_readable_file(struct nfs4_file *f)
> > >   	return ret;
> > >   }
> > >   
> > > -static struct nfsd_file *
> > > -find_rw_file(struct nfs4_file *f)
> > > -{
> > > -	struct nfsd_file *ret;
> > > -
> > > -	spin_lock(&f->fi_lock);
> > > -	ret = nfsd_file_get(f->fi_fds[O_RDWR]);
> > > -	spin_unlock(&f->fi_lock);
> > > -
> > > -	return ret;
> > > -}
> > > -
> > >   struct nfsd_file *
> > >   find_any_file(struct nfs4_file *f)
> > >   {
> > > @@ -5784,15 +5772,11 @@ nfs4_set_delegation(struct nfsd4_open
> > > *open, struct nfs4_ol_stateid *stp,
> > >   	 *  "An OPEN_DELEGATE_WRITE delegation allows the client
> > > to handle,
> > >   	 *   on its own, all opens."
> > >   	 *
> > > -	 * Furthermore the client can use a write delegation for
> > > most READ
> > > -	 * operations as well, so we require a O_RDWR file here.
> > > -	 *
> > > -	 * Offer a write delegation in the case of a BOTH open,
> > > and ensure
> > > -	 * we get the O_RDWR descriptor.
> > > +	 * Offer a write delegation for WRITE or BOTH access
> > >   	 */
> > > -	if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) ==
> > > NFS4_SHARE_ACCESS_BOTH) {
> > > -		nf = find_rw_file(fp);
> > > +	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
> > >   		dl_type = NFS4_OPEN_DELEGATE_WRITE;
> > > +		nf = find_writeable_file(fp);
> > >   	}
> > >   
> > >   	/*
> > > @@ -5934,8 +5918,8 @@ static void
> > > nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
> > >    * open or lock state.
> > >    */
> > >   static void
> > > -nfs4_open_delegation(struct nfsd4_open *open, struct
> > > nfs4_ol_stateid *stp,
> > > -		     struct svc_fh *currentfh)
> > > +nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open
> > > *open,
> > > +		struct nfs4_ol_stateid *stp, struct svc_fh
> > > *currentfh)
> > >   {
> > >   	struct nfs4_delegation *dp;
> > >   	struct nfs4_openowner *oo = openowner(stp-
> > > >st_stateowner);
> > > @@ -5994,6 +5978,20 @@ nfs4_open_delegation(struct nfsd4_open
> > > *open, struct nfs4_ol_stateid *stp,
> > >   		dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
> > >   		dp->dl_cb_fattr.ncf_initial_cinfo =
> > >   			nfsd4_change_attribute(&stat,
> > > d_inode(currentfh->fh_dentry));
> > > +		if ((open->op_share_access &
> > > NFS4_SHARE_ACCESS_BOTH) != NFS4_SHARE_ACCESS_BOTH) {
> > More comments on this part:
> > 
> > 
> > nit: You've already tested for NFS4_SHARE_ACCESS_WRITE here, and
> > this
> > seems easier to read:
> > 
> > 		if (!(open->op_share_access &
> > NFS4_SHARE_ACCESS_READ))
> > 
> > 
> > 
> > > +			struct nfsd_file *nf = NULL;
> > > +
> > > +			/* make sure the file is opened locally
> > > for O_RDWR */
> > > +			status = nfsd_file_acquire_opened(rqstp,
> > > currentfh,
> > > +				nfs4_access_to_access(NFS4_SHARE
> > > _ACCESS_BOTH),
> > > +				open->op_filp, &nf);
> > > +			if (status) {
> > > +				nfs4_put_stid(&dp->dl_stid);
> > > +				destroy_delegation(dp);
> > > +				goto out_no_deleg;
> > > +			}
> > > +			stp->st_stid.sc_file->fi_fds[O_RDWR] =
> > > nf;
> > How do you know that this fd isn't already set? Also, this field is
> > protected by the sc_file->fi_lock and that's not held here.
> 
> Something like this?
> --
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index a6c6d813c59c..ee0c65ff1940 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -5978,19 +5978,24 @@ nfs4_open_delegation(struct svc_rqst *rqstp, 
> struct nfsd4_open *open,
>                  dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
>                  dp->dl_cb_fattr.ncf_initial_cinfo =
>                          nfsd4_change_attribute(&stat, 
> d_inode(currentfh->fh_dentry));
> -               if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH)
> != 
> NFS4_SHARE_ACCESS_BOTH) {
> +               if (!(open->op_share_access &
> NFS4_SHARE_ACCESS_READ)) {
> +                       u32 access = 
> nfs4_access_to_access(NFS4_SHARE_ACCESS_BOTH);
> +                       struct nfs4_file *fp = stp->st_stid.sc_file;
>                          struct nfsd_file *nf = NULL;
> 
>                          /* make sure the file is opened locally for 
> O_RDWR */
> +                       set_access(access, stp);
>                          status = nfsd_file_acquire_opened(rqstp,
> currentfh,
> - nfs4_access_to_access(NFS4_SHARE_ACCESS_BOTH),
> -                               open->op_filp, &nf);
> +                                       access, open->op_filp, &nf);
>                          if (status) {
>                                  nfs4_put_stid(&dp->dl_stid);
>                                  destroy_delegation(dp);
>                                  goto out_no_deleg;
>                          }
> -                       stp->st_stid.sc_file->fi_fds[O_RDWR] = nf;
> +                       spin_lock(&fp->fi_lock);
> +                       if (!fp->fi_fds[O_RDWR])
> +                               fp->fi_fds[O_RDWR] = nf;
> +                       spin_lock(&fp->fi_lock);
>                  }
>          } else {
> --
> 

Your MUA mangled it a bit, but that probably would work. You do also
need to put the nf reference though if you don't assign
fp->fi_fds[O_RDWR] here.
Sagi Grimberg July 29, 2024, 1:37 p.m. UTC | #7
On 29/07/2024 16:17, Jeff Layton wrote:
> On Mon, 2024-07-29 at 15:59 +0300, Sagi Grimberg wrote:
>>
>>
>> On 29/07/2024 15:26, Jeff Layton wrote:
>>> On Sun, 2024-07-28 at 23:41 +0300, Sagi Grimberg wrote:
>>>> In order to support write delegations with O_WRONLY opens, we
>>>> need to
>>>> allow the clients to read using the write delegation stateid (Per
>>>> RFC
>>>> 8881 section 9.1.2. Use of the Stateid and Locking).
>>>>
>>>> Hence, we check for NFS4_SHARE_ACCESS_WRITE set in open request,
>>>> and
>>>> in case the share access flag does not set NFS4_SHARE_ACCESS_READ
>>>> as
>>>> well, we'll open the file locally with O_RDWR in order to allow
>>>> the
>>>> client to use the write delegation stateid when issuing a read in
>>>> case
>>>> it may choose to.
>>>>
>>>> Plus, find_rw_file singular call-site is now removed, remove it
>>>> altogether.
>>>>
>>>> Note: reads using special stateids that conflict with pending
>>>> write
>>>> delegations are undetected, and will be covered in a follow on
>>>> patch.
>>>>
>>>> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
>>>> ---
>>>>    fs/nfsd/nfs4proc.c  | 18 +++++++++++++++++-
>>>>    fs/nfsd/nfs4state.c | 42 ++++++++++++++++++++------------------
>>>> ----
>>>>    fs/nfsd/xdr4.h      |  2 ++
>>>>    3 files changed, 39 insertions(+), 23 deletions(-)
>>>>
>>>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>>>> index 7b70309ad8fb..041bcc3ab5d7 100644
>>>> --- a/fs/nfsd/nfs4proc.c
>>>> +++ b/fs/nfsd/nfs4proc.c
>>>> @@ -979,8 +979,22 @@ nfsd4_read(struct svc_rqst *rqstp, struct
>>>> nfsd4_compound_state *cstate,
>>>>    	/* check stateid */
>>>>    	status = nfs4_preprocess_stateid_op(rqstp, cstate,
>>>> &cstate->current_fh,
>>>>    					&read->rd_stateid,
>>>> RD_STATE,
>>>> -					&read->rd_nf, NULL);
>>>> +					&read->rd_nf, &read-
>>>>> rd_wd_stid);
>>>>    
>>>> +	/*
>>>> +	 * rd_wd_stid is needed for nfsd4_encode_read to allow
>>>> write
>>>> +	 * delegation stateid used for read. Its refcount is
>>>> decremented
>>>> +	 * by nfsd4_read_release when read is done.
>>>> +	 */
>>>> +	if (!status) {
>>>> +		if (read->rd_wd_stid &&
>>>> +		    (read->rd_wd_stid->sc_type != SC_TYPE_DELEG
>>>> ||
>>>> +		     delegstateid(read->rd_wd_stid)->dl_type !=
>>>> +					NFS4_OPEN_DELEGATE_WRITE
>>>> )) {
>>>> +			nfs4_put_stid(read->rd_wd_stid);
>>>> +			read->rd_wd_stid = NULL;
>>>> +		}
>>>> +	}
>>>>    	read->rd_rqstp = rqstp;
>>>>    	read->rd_fhp = &cstate->current_fh;
>>>>    	return status;
>>>> @@ -990,6 +1004,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct
>>>> nfsd4_compound_state *cstate,
>>>>    static void
>>>>    nfsd4_read_release(union nfsd4_op_u *u)
>>>>    {
>>>> +	if (u->read.rd_wd_stid)
>>>> +		nfs4_put_stid(u->read.rd_wd_stid);
>>>>    	if (u->read.rd_nf)
>>>>    		nfsd_file_put(u->read.rd_nf);
>>>>    	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>> index 0645fccbf122..538b6e1127a2 100644
>>>> --- a/fs/nfsd/nfs4state.c
>>>> +++ b/fs/nfsd/nfs4state.c
>>>> @@ -639,18 +639,6 @@ find_readable_file(struct nfs4_file *f)
>>>>    	return ret;
>>>>    }
>>>>    
>>>> -static struct nfsd_file *
>>>> -find_rw_file(struct nfs4_file *f)
>>>> -{
>>>> -	struct nfsd_file *ret;
>>>> -
>>>> -	spin_lock(&f->fi_lock);
>>>> -	ret = nfsd_file_get(f->fi_fds[O_RDWR]);
>>>> -	spin_unlock(&f->fi_lock);
>>>> -
>>>> -	return ret;
>>>> -}
>>>> -
>>>>    struct nfsd_file *
>>>>    find_any_file(struct nfs4_file *f)
>>>>    {
>>>> @@ -5784,15 +5772,11 @@ nfs4_set_delegation(struct nfsd4_open
>>>> *open, struct nfs4_ol_stateid *stp,
>>>>    	 *  "An OPEN_DELEGATE_WRITE delegation allows the client
>>>> to handle,
>>>>    	 *   on its own, all opens."
>>>>    	 *
>>>> -	 * Furthermore the client can use a write delegation for
>>>> most READ
>>>> -	 * operations as well, so we require a O_RDWR file here.
>>>> -	 *
>>>> -	 * Offer a write delegation in the case of a BOTH open,
>>>> and ensure
>>>> -	 * we get the O_RDWR descriptor.
>>>> +	 * Offer a write delegation for WRITE or BOTH access
>>>>    	 */
>>>> -	if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) ==
>>>> NFS4_SHARE_ACCESS_BOTH) {
>>>> -		nf = find_rw_file(fp);
>>>> +	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
>>>>    		dl_type = NFS4_OPEN_DELEGATE_WRITE;
>>>> +		nf = find_writeable_file(fp);
>>>>    	}
>>>>    
>>>>    	/*
>>>> @@ -5934,8 +5918,8 @@ static void
>>>> nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
>>>>     * open or lock state.
>>>>     */
>>>>    static void
>>>> -nfs4_open_delegation(struct nfsd4_open *open, struct
>>>> nfs4_ol_stateid *stp,
>>>> -		     struct svc_fh *currentfh)
>>>> +nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open
>>>> *open,
>>>> +		struct nfs4_ol_stateid *stp, struct svc_fh
>>>> *currentfh)
>>>>    {
>>>>    	struct nfs4_delegation *dp;
>>>>    	struct nfs4_openowner *oo = openowner(stp-
>>>>> st_stateowner);
>>>> @@ -5994,6 +5978,20 @@ nfs4_open_delegation(struct nfsd4_open
>>>> *open, struct nfs4_ol_stateid *stp,
>>>>    		dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
>>>>    		dp->dl_cb_fattr.ncf_initial_cinfo =
>>>>    			nfsd4_change_attribute(&stat,
>>>> d_inode(currentfh->fh_dentry));
>>>> +		if ((open->op_share_access &
>>>> NFS4_SHARE_ACCESS_BOTH) != NFS4_SHARE_ACCESS_BOTH) {
>>> More comments on this part:
>>>
>>>
>>> nit: You've already tested for NFS4_SHARE_ACCESS_WRITE here, and
>>> this
>>> seems easier to read:
>>>
>>> 		if (!(open->op_share_access &
>>> NFS4_SHARE_ACCESS_READ))
>>>
>>>
>>>
>>>> +			struct nfsd_file *nf = NULL;
>>>> +
>>>> +			/* make sure the file is opened locally
>>>> for O_RDWR */
>>>> +			status = nfsd_file_acquire_opened(rqstp,
>>>> currentfh,
>>>> +				nfs4_access_to_access(NFS4_SHARE
>>>> _ACCESS_BOTH),
>>>> +				open->op_filp, &nf);
>>>> +			if (status) {
>>>> +				nfs4_put_stid(&dp->dl_stid);
>>>> +				destroy_delegation(dp);
>>>> +				goto out_no_deleg;
>>>> +			}
>>>> +			stp->st_stid.sc_file->fi_fds[O_RDWR] =
>>>> nf;
>>> How do you know that this fd isn't already set? Also, this field is
>>> protected by the sc_file->fi_lock and that's not held here.
>> Something like this?
>> --
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index a6c6d813c59c..ee0c65ff1940 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -5978,19 +5978,24 @@ nfs4_open_delegation(struct svc_rqst *rqstp,
>> struct nfsd4_open *open,
>>                   dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
>>                   dp->dl_cb_fattr.ncf_initial_cinfo =
>>                           nfsd4_change_attribute(&stat,
>> d_inode(currentfh->fh_dentry));
>> -               if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH)
>> !=
>> NFS4_SHARE_ACCESS_BOTH) {
>> +               if (!(open->op_share_access &
>> NFS4_SHARE_ACCESS_READ)) {
>> +                       u32 access =
>> nfs4_access_to_access(NFS4_SHARE_ACCESS_BOTH);
>> +                       struct nfs4_file *fp = stp->st_stid.sc_file;
>>                           struct nfsd_file *nf = NULL;
>>
>>                           /* make sure the file is opened locally for
>> O_RDWR */
>> +                       set_access(access, stp);
>>                           status = nfsd_file_acquire_opened(rqstp,
>> currentfh,
>> - nfs4_access_to_access(NFS4_SHARE_ACCESS_BOTH),
>> -                               open->op_filp, &nf);
>> +                                       access, open->op_filp, &nf);
>>                           if (status) {
>>                                   nfs4_put_stid(&dp->dl_stid);
>>                                   destroy_delegation(dp);
>>                                   goto out_no_deleg;
>>                           }
>> -                       stp->st_stid.sc_file->fi_fds[O_RDWR] = nf;
>> +                       spin_lock(&fp->fi_lock);
>> +                       if (!fp->fi_fds[O_RDWR])
>> +                               fp->fi_fds[O_RDWR] = nf;
>> +                       spin_lock(&fp->fi_lock);
>>                   }
>>           } else {
>> --
>>
> Your MUA mangled it a bit, but that probably would work. You do also
> need to put the nf reference though if you don't assign
> fp->fi_fds[O_RDWR] here.

 From the amount of non-trivial work this hunk is doing, I'm starting to 
think perhaps
we need something simpler?

Maybe call nfs4_get_vfs_file() instead? and modify the 
open->op_share_access before calling it?
Although I'm not fully sure if things will blow up or not...
Sagi Grimberg July 29, 2024, 1:39 p.m. UTC | #8
On 29/07/2024 16:10, Jeff Layton wrote:
> On Mon, 2024-07-29 at 15:58 +0300, Sagi Grimberg wrote:
>>
>>
>> On 29/07/2024 15:10, Jeff Layton wrote:
>>> On Sun, 2024-07-28 at 23:41 +0300, Sagi Grimberg wrote:
>>>> In order to support write delegations with O_WRONLY opens, we
>>>> need to
>>>> allow the clients to read using the write delegation stateid (Per
>>>> RFC
>>>> 8881 section 9.1.2. Use of the Stateid and Locking).
>>>>
>>>> Hence, we check for NFS4_SHARE_ACCESS_WRITE set in open request,
>>>> and
>>>> in case the share access flag does not set NFS4_SHARE_ACCESS_READ
>>>> as
>>>> well, we'll open the file locally with O_RDWR in order to allow
>>>> the
>>>> client to use the write delegation stateid when issuing a read in
>>>> case
>>>> it may choose to.
>>>>
>>>> Plus, find_rw_file singular call-site is now removed, remove it
>>>> altogether.
>>>>
>>>> Note: reads using special stateids that conflict with pending
>>>> write
>>>> delegations are undetected, and will be covered in a follow on
>>>> patch.
>>>>
>>>> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
>>>> ---
>>>>    fs/nfsd/nfs4proc.c  | 18 +++++++++++++++++-
>>>>    fs/nfsd/nfs4state.c | 42 ++++++++++++++++++++------------------
>>>> ----
>>>>    fs/nfsd/xdr4.h      |  2 ++
>>>>    3 files changed, 39 insertions(+), 23 deletions(-)
>>>>
>>>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>>>> index 7b70309ad8fb..041bcc3ab5d7 100644
>>>> --- a/fs/nfsd/nfs4proc.c
>>>> +++ b/fs/nfsd/nfs4proc.c
>>>> @@ -979,8 +979,22 @@ nfsd4_read(struct svc_rqst *rqstp, struct
>>>> nfsd4_compound_state *cstate,
>>>>    	/* check stateid */
>>>>    	status = nfs4_preprocess_stateid_op(rqstp, cstate,
>>>> &cstate-
>>>>> current_fh,
>>>>    					&read->rd_stateid,
>>>> RD_STATE,
>>>> -					&read->rd_nf, NULL);
>>>> +					&read->rd_nf, &read-
>>>>> rd_wd_stid);
>>>>    
>>>> +	/*
>>>> +	 * rd_wd_stid is needed for nfsd4_encode_read to allow
>>>> write
>>>> +	 * delegation stateid used for read. Its refcount is
>>>> decremented
>>>> +	 * by nfsd4_read_release when read is done.
>>>> +	 */
>>>> +	if (!status) {
>>>> +		if (read->rd_wd_stid &&
>>>> +		    (read->rd_wd_stid->sc_type != SC_TYPE_DELEG
>>>> ||
>>>> +		     delegstateid(read->rd_wd_stid)->dl_type !=
>>>> +					NFS4_OPEN_DELEGATE_WRITE
>>>> )) {
>>>> +			nfs4_put_stid(read->rd_wd_stid);
>>>> +			read->rd_wd_stid = NULL;
>>>> +		}
>>>> +	}
>>>>    	read->rd_rqstp = rqstp;
>>>>    	read->rd_fhp = &cstate->current_fh;
>>>>    	return status;
>>>> @@ -990,6 +1004,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct
>>>> nfsd4_compound_state *cstate,
>>>>    static void
>>>>    nfsd4_read_release(union nfsd4_op_u *u)
>>>>    {
>>>> +	if (u->read.rd_wd_stid)
>>>> +		nfs4_put_stid(u->read.rd_wd_stid);
>>>>    	if (u->read.rd_nf)
>>>>    		nfsd_file_put(u->read.rd_nf);
>>>>    	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>> index 0645fccbf122..538b6e1127a2 100644
>>>> --- a/fs/nfsd/nfs4state.c
>>>> +++ b/fs/nfsd/nfs4state.c
>>>> @@ -639,18 +639,6 @@ find_readable_file(struct nfs4_file *f)
>>>>    	return ret;
>>>>    }
>>>>    
>>>> -static struct nfsd_file *
>>>> -find_rw_file(struct nfs4_file *f)
>>>> -{
>>>> -	struct nfsd_file *ret;
>>>> -
>>>> -	spin_lock(&f->fi_lock);
>>>> -	ret = nfsd_file_get(f->fi_fds[O_RDWR]);
>>>> -	spin_unlock(&f->fi_lock);
>>>> -
>>>> -	return ret;
>>>> -}
>>>> -
>>>>    struct nfsd_file *
>>>>    find_any_file(struct nfs4_file *f)
>>>>    {
>>>> @@ -5784,15 +5772,11 @@ nfs4_set_delegation(struct nfsd4_open
>>>> *open,
>>>> struct nfs4_ol_stateid *stp,
>>>>    	 *  "An OPEN_DELEGATE_WRITE delegation allows the client
>>>> to
>>>> handle,
>>>>    	 *   on its own, all opens."
>>>>    	 *
>>>> -	 * Furthermore the client can use a write delegation for
>>>> most READ
>>>> -	 * operations as well, so we require a O_RDWR file here.
>>>> -	 *
>>>> -	 * Offer a write delegation in the case of a BOTH open,
>>>> and
>>>> ensure
>>>> -	 * we get the O_RDWR descriptor.
>>>> +	 * Offer a write delegation for WRITE or BOTH access
>>>>    	 */
>>>> -	if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) ==
>>>> NFS4_SHARE_ACCESS_BOTH) {
>>>> -		nf = find_rw_file(fp);
>>>> +	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
>>>>    		dl_type = NFS4_OPEN_DELEGATE_WRITE;
>>>> +		nf = find_writeable_file(fp);
>>>>    	}
>>>>    
>>>>    	/*
>>>> @@ -5934,8 +5918,8 @@ static void
>>>> nfsd4_open_deleg_none_ext(struct
>>>> nfsd4_open *open, int status)
>>>>     * open or lock state.
>>>>     */
>>>>    static void
>>>> -nfs4_open_delegation(struct nfsd4_open *open, struct
>>>> nfs4_ol_stateid
>>>> *stp,
>>>> -		     struct svc_fh *currentfh)
>>>> +nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open
>>>> *open,
>>>> +		struct nfs4_ol_stateid *stp, struct svc_fh
>>>> *currentfh)
>>>>    {
>>>>    	struct nfs4_delegation *dp;
>>>>    	struct nfs4_openowner *oo = openowner(stp-
>>>>> st_stateowner);
>>>> @@ -5994,6 +5978,20 @@ nfs4_open_delegation(struct nfsd4_open
>>>> *open,
>>>> struct nfs4_ol_stateid *stp,
>>>>    		dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
>>>>    		dp->dl_cb_fattr.ncf_initial_cinfo =
>>>>    			nfsd4_change_attribute(&stat,
>>>> d_inode(currentfh->fh_dentry));
>>>> +		if ((open->op_share_access &
>>>> NFS4_SHARE_ACCESS_BOTH)
>>>> != NFS4_SHARE_ACCESS_BOTH) {
>>>> +			struct nfsd_file *nf = NULL;
>>>> +
>>>> +			/* make sure the file is opened locally
>>>> for
>>>> O_RDWR */
>>>> +			status = nfsd_file_acquire_opened(rqstp,
>>>> currentfh,
>>>> +				nfs4_access_to_access(NFS4_SHARE
>>>> _ACC
>>>> ESS_BOTH),
>>>> +				open->op_filp, &nf);
>>>> +			if (status) {
>>>> +				nfs4_put_stid(&dp->dl_stid);
>>>> +				destroy_delegation(dp);
>>>> +				goto out_no_deleg;
>>>> +			}
>>>> +			stp->st_stid.sc_file->fi_fds[O_RDWR] =
>>>> nf;
>>> I have a bit of a concern here. When we go to put access references
>>> to
>>> the fi_fds, that's done according to the st_access_bmap. Here
>>> though,
>>> you're adding an extra reference for the O_RDWR fd, but I don't see
>>> where you're calling set_access for that on the delegation stateid?
>>> Am
>>> I missing where that happens? Not doing that may lead to fd leaks
>>> if it
>>> was missed.
>> Ah, this is something that I did not fully understand...
>> However it looks like st_access_bmap is not something that is
>> accounted on the delegation stateid...
>>
>> Can I simply set it on the open stateid (stp)?
> That would likely fix the leak, but I'm not sure that's the best
> approach. You have an NFS4_SHARE_ACCESS_WRITE-only stateid here, and
> that would turn it a NFS4_SHARE_ACCESS_BOTH one, wouldn't it?
>
> It wouldn't surprise me if that might break a testcase or two.

Well, if the server handed out a write delegation, isn't it effectively 
equivalent to
NFS4_SHARE_ACCESS_BOTH open?
Chuck Lever July 29, 2024, 1:52 p.m. UTC | #9
On Mon, Jul 29, 2024 at 04:39:07PM +0300, Sagi Grimberg wrote:
> 
> 
> 
> On 29/07/2024 16:10, Jeff Layton wrote:
> > On Mon, 2024-07-29 at 15:58 +0300, Sagi Grimberg wrote:
> > > 
> > > 
> > > On 29/07/2024 15:10, Jeff Layton wrote:
> > > > On Sun, 2024-07-28 at 23:41 +0300, Sagi Grimberg wrote:
> > > > > In order to support write delegations with O_WRONLY opens, we
> > > > > need to
> > > > > allow the clients to read using the write delegation stateid (Per
> > > > > RFC
> > > > > 8881 section 9.1.2. Use of the Stateid and Locking).
> > > > > 
> > > > > Hence, we check for NFS4_SHARE_ACCESS_WRITE set in open request,
> > > > > and
> > > > > in case the share access flag does not set NFS4_SHARE_ACCESS_READ
> > > > > as
> > > > > well, we'll open the file locally with O_RDWR in order to allow
> > > > > the
> > > > > client to use the write delegation stateid when issuing a read in
> > > > > case
> > > > > it may choose to.
> > > > > 
> > > > > Plus, find_rw_file singular call-site is now removed, remove it
> > > > > altogether.
> > > > > 
> > > > > Note: reads using special stateids that conflict with pending
> > > > > write
> > > > > delegations are undetected, and will be covered in a follow on
> > > > > patch.
> > > > > 
> > > > > Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
> > > > > ---
> > > > >    fs/nfsd/nfs4proc.c  | 18 +++++++++++++++++-
> > > > >    fs/nfsd/nfs4state.c | 42 ++++++++++++++++++++------------------
> > > > > ----
> > > > >    fs/nfsd/xdr4.h      |  2 ++
> > > > >    3 files changed, 39 insertions(+), 23 deletions(-)
> > > > > 
> > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > > > > index 7b70309ad8fb..041bcc3ab5d7 100644
> > > > > --- a/fs/nfsd/nfs4proc.c
> > > > > +++ b/fs/nfsd/nfs4proc.c
> > > > > @@ -979,8 +979,22 @@ nfsd4_read(struct svc_rqst *rqstp, struct
> > > > > nfsd4_compound_state *cstate,
> > > > >    	/* check stateid */
> > > > >    	status = nfs4_preprocess_stateid_op(rqstp, cstate,
> > > > > &cstate-
> > > > > > current_fh,
> > > > >    					&read->rd_stateid,
> > > > > RD_STATE,
> > > > > -					&read->rd_nf, NULL);
> > > > > +					&read->rd_nf, &read-
> > > > > > rd_wd_stid);
> > > > > +	/*
> > > > > +	 * rd_wd_stid is needed for nfsd4_encode_read to allow
> > > > > write
> > > > > +	 * delegation stateid used for read. Its refcount is
> > > > > decremented
> > > > > +	 * by nfsd4_read_release when read is done.
> > > > > +	 */
> > > > > +	if (!status) {
> > > > > +		if (read->rd_wd_stid &&
> > > > > +		    (read->rd_wd_stid->sc_type != SC_TYPE_DELEG
> > > > > ||
> > > > > +		     delegstateid(read->rd_wd_stid)->dl_type !=
> > > > > +					NFS4_OPEN_DELEGATE_WRITE
> > > > > )) {
> > > > > +			nfs4_put_stid(read->rd_wd_stid);
> > > > > +			read->rd_wd_stid = NULL;
> > > > > +		}
> > > > > +	}
> > > > >    	read->rd_rqstp = rqstp;
> > > > >    	read->rd_fhp = &cstate->current_fh;
> > > > >    	return status;
> > > > > @@ -990,6 +1004,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct
> > > > > nfsd4_compound_state *cstate,
> > > > >    static void
> > > > >    nfsd4_read_release(union nfsd4_op_u *u)
> > > > >    {
> > > > > +	if (u->read.rd_wd_stid)
> > > > > +		nfs4_put_stid(u->read.rd_wd_stid);
> > > > >    	if (u->read.rd_nf)
> > > > >    		nfsd_file_put(u->read.rd_nf);
> > > > >    	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
> > > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > > > index 0645fccbf122..538b6e1127a2 100644
> > > > > --- a/fs/nfsd/nfs4state.c
> > > > > +++ b/fs/nfsd/nfs4state.c
> > > > > @@ -639,18 +639,6 @@ find_readable_file(struct nfs4_file *f)
> > > > >    	return ret;
> > > > >    }
> > > > > -static struct nfsd_file *
> > > > > -find_rw_file(struct nfs4_file *f)
> > > > > -{
> > > > > -	struct nfsd_file *ret;
> > > > > -
> > > > > -	spin_lock(&f->fi_lock);
> > > > > -	ret = nfsd_file_get(f->fi_fds[O_RDWR]);
> > > > > -	spin_unlock(&f->fi_lock);
> > > > > -
> > > > > -	return ret;
> > > > > -}
> > > > > -
> > > > >    struct nfsd_file *
> > > > >    find_any_file(struct nfs4_file *f)
> > > > >    {
> > > > > @@ -5784,15 +5772,11 @@ nfs4_set_delegation(struct nfsd4_open
> > > > > *open,
> > > > > struct nfs4_ol_stateid *stp,
> > > > >    	 *  "An OPEN_DELEGATE_WRITE delegation allows the client
> > > > > to
> > > > > handle,
> > > > >    	 *   on its own, all opens."
> > > > >    	 *
> > > > > -	 * Furthermore the client can use a write delegation for
> > > > > most READ
> > > > > -	 * operations as well, so we require a O_RDWR file here.
> > > > > -	 *
> > > > > -	 * Offer a write delegation in the case of a BOTH open,
> > > > > and
> > > > > ensure
> > > > > -	 * we get the O_RDWR descriptor.
> > > > > +	 * Offer a write delegation for WRITE or BOTH access
> > > > >    	 */
> > > > > -	if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) ==
> > > > > NFS4_SHARE_ACCESS_BOTH) {
> > > > > -		nf = find_rw_file(fp);
> > > > > +	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
> > > > >    		dl_type = NFS4_OPEN_DELEGATE_WRITE;
> > > > > +		nf = find_writeable_file(fp);
> > > > >    	}
> > > > >    	/*
> > > > > @@ -5934,8 +5918,8 @@ static void
> > > > > nfsd4_open_deleg_none_ext(struct
> > > > > nfsd4_open *open, int status)
> > > > >     * open or lock state.
> > > > >     */
> > > > >    static void
> > > > > -nfs4_open_delegation(struct nfsd4_open *open, struct
> > > > > nfs4_ol_stateid
> > > > > *stp,
> > > > > -		     struct svc_fh *currentfh)
> > > > > +nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open
> > > > > *open,
> > > > > +		struct nfs4_ol_stateid *stp, struct svc_fh
> > > > > *currentfh)
> > > > >    {
> > > > >    	struct nfs4_delegation *dp;
> > > > >    	struct nfs4_openowner *oo = openowner(stp-
> > > > > > st_stateowner);
> > > > > @@ -5994,6 +5978,20 @@ nfs4_open_delegation(struct nfsd4_open
> > > > > *open,
> > > > > struct nfs4_ol_stateid *stp,
> > > > >    		dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
> > > > >    		dp->dl_cb_fattr.ncf_initial_cinfo =
> > > > >    			nfsd4_change_attribute(&stat,
> > > > > d_inode(currentfh->fh_dentry));
> > > > > +		if ((open->op_share_access &
> > > > > NFS4_SHARE_ACCESS_BOTH)
> > > > > != NFS4_SHARE_ACCESS_BOTH) {
> > > > > +			struct nfsd_file *nf = NULL;
> > > > > +
> > > > > +			/* make sure the file is opened locally
> > > > > for
> > > > > O_RDWR */
> > > > > +			status = nfsd_file_acquire_opened(rqstp,
> > > > > currentfh,
> > > > > +				nfs4_access_to_access(NFS4_SHARE
> > > > > _ACC
> > > > > ESS_BOTH),
> > > > > +				open->op_filp, &nf);
> > > > > +			if (status) {
> > > > > +				nfs4_put_stid(&dp->dl_stid);
> > > > > +				destroy_delegation(dp);
> > > > > +				goto out_no_deleg;
> > > > > +			}
> > > > > +			stp->st_stid.sc_file->fi_fds[O_RDWR] =
> > > > > nf;
> > > > I have a bit of a concern here. When we go to put access references
> > > > to
> > > > the fi_fds, that's done according to the st_access_bmap. Here
> > > > though,
> > > > you're adding an extra reference for the O_RDWR fd, but I don't see
> > > > where you're calling set_access for that on the delegation stateid?
> > > > Am
> > > > I missing where that happens? Not doing that may lead to fd leaks
> > > > if it
> > > > was missed.
> > > Ah, this is something that I did not fully understand...
> > > However it looks like st_access_bmap is not something that is
> > > accounted on the delegation stateid...
> > > 
> > > Can I simply set it on the open stateid (stp)?
> > That would likely fix the leak, but I'm not sure that's the best
> > approach. You have an NFS4_SHARE_ACCESS_WRITE-only stateid here, and
> > that would turn it a NFS4_SHARE_ACCESS_BOTH one, wouldn't it?
> > 
> > It wouldn't surprise me if that might break a testcase or two.
> 
> Well, if the server handed out a write delegation, isn't it effectively
> equivalent to
> NFS4_SHARE_ACCESS_BOTH open?

It has to be equivalent, since the write delegation gives the client
carte blanche to perform any open it wants to, locally. The server
does not know about those local client-side opens, and it has a
notification set up to fire if anyone else wants to open that file.

In nfs4_set_delegation(), we have this comment:

>       /*                                                                          
>        * Try for a write delegation first. RFC8881 section 10.4 says:             
>        *                                                                          
>        *  "An OPEN_DELEGATE_WRITE delegation allows the client to handle,         
>        *   on its own, all opens."                                                
>        *                                                                          
>        * Furthermore the client can use a write delegation for most READ          
>        * operations as well, so we require a O_RDWR file here.                    
>        *                                                                          
>        * Offer a write delegation in the case of a BOTH open, and ensure          
>        * we get the O_RDWR descriptor.                                            
>        */   

I think we did it this way only to circumvent the broken test cases.

A write delegation stateid uses a local O_RDWR OPEN, yes? If so, why
can't it be used for READ operations already? All that has to be
done is hand out the write delegation in the correct cases.

Instead of gating the delegation on the intent presented by the OPEN
operation, gate it on whether the user who is opening the file has
both read and write access to the file.
Sagi Grimberg July 29, 2024, 2:05 p.m. UTC | #10
On 29/07/2024 16:52, Chuck Lever wrote:
> On Mon, Jul 29, 2024 at 04:39:07PM +0300, Sagi Grimberg wrote:
>>
>>
>> On 29/07/2024 16:10, Jeff Layton wrote:
>>> On Mon, 2024-07-29 at 15:58 +0300, Sagi Grimberg wrote:
>>>>
>>>> On 29/07/2024 15:10, Jeff Layton wrote:
>>>>> On Sun, 2024-07-28 at 23:41 +0300, Sagi Grimberg wrote:
>>>>>> In order to support write delegations with O_WRONLY opens, we
>>>>>> need to
>>>>>> allow the clients to read using the write delegation stateid (Per
>>>>>> RFC
>>>>>> 8881 section 9.1.2. Use of the Stateid and Locking).
>>>>>>
>>>>>> Hence, we check for NFS4_SHARE_ACCESS_WRITE set in open request,
>>>>>> and
>>>>>> in case the share access flag does not set NFS4_SHARE_ACCESS_READ
>>>>>> as
>>>>>> well, we'll open the file locally with O_RDWR in order to allow
>>>>>> the
>>>>>> client to use the write delegation stateid when issuing a read in
>>>>>> case
>>>>>> it may choose to.
>>>>>>
>>>>>> Plus, find_rw_file singular call-site is now removed, remove it
>>>>>> altogether.
>>>>>>
>>>>>> Note: reads using special stateids that conflict with pending
>>>>>> write
>>>>>> delegations are undetected, and will be covered in a follow on
>>>>>> patch.
>>>>>>
>>>>>> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
>>>>>> ---
>>>>>>     fs/nfsd/nfs4proc.c  | 18 +++++++++++++++++-
>>>>>>     fs/nfsd/nfs4state.c | 42 ++++++++++++++++++++------------------
>>>>>> ----
>>>>>>     fs/nfsd/xdr4.h      |  2 ++
>>>>>>     3 files changed, 39 insertions(+), 23 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>>>>>> index 7b70309ad8fb..041bcc3ab5d7 100644
>>>>>> --- a/fs/nfsd/nfs4proc.c
>>>>>> +++ b/fs/nfsd/nfs4proc.c
>>>>>> @@ -979,8 +979,22 @@ nfsd4_read(struct svc_rqst *rqstp, struct
>>>>>> nfsd4_compound_state *cstate,
>>>>>>     	/* check stateid */
>>>>>>     	status = nfs4_preprocess_stateid_op(rqstp, cstate,
>>>>>> &cstate-
>>>>>>> current_fh,
>>>>>>     					&read->rd_stateid,
>>>>>> RD_STATE,
>>>>>> -					&read->rd_nf, NULL);
>>>>>> +					&read->rd_nf, &read-
>>>>>>> rd_wd_stid);
>>>>>> +	/*
>>>>>> +	 * rd_wd_stid is needed for nfsd4_encode_read to allow
>>>>>> write
>>>>>> +	 * delegation stateid used for read. Its refcount is
>>>>>> decremented
>>>>>> +	 * by nfsd4_read_release when read is done.
>>>>>> +	 */
>>>>>> +	if (!status) {
>>>>>> +		if (read->rd_wd_stid &&
>>>>>> +		    (read->rd_wd_stid->sc_type != SC_TYPE_DELEG
>>>>>> ||
>>>>>> +		     delegstateid(read->rd_wd_stid)->dl_type !=
>>>>>> +					NFS4_OPEN_DELEGATE_WRITE
>>>>>> )) {
>>>>>> +			nfs4_put_stid(read->rd_wd_stid);
>>>>>> +			read->rd_wd_stid = NULL;
>>>>>> +		}
>>>>>> +	}
>>>>>>     	read->rd_rqstp = rqstp;
>>>>>>     	read->rd_fhp = &cstate->current_fh;
>>>>>>     	return status;
>>>>>> @@ -990,6 +1004,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct
>>>>>> nfsd4_compound_state *cstate,
>>>>>>     static void
>>>>>>     nfsd4_read_release(union nfsd4_op_u *u)
>>>>>>     {
>>>>>> +	if (u->read.rd_wd_stid)
>>>>>> +		nfs4_put_stid(u->read.rd_wd_stid);
>>>>>>     	if (u->read.rd_nf)
>>>>>>     		nfsd_file_put(u->read.rd_nf);
>>>>>>     	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
>>>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>>>> index 0645fccbf122..538b6e1127a2 100644
>>>>>> --- a/fs/nfsd/nfs4state.c
>>>>>> +++ b/fs/nfsd/nfs4state.c
>>>>>> @@ -639,18 +639,6 @@ find_readable_file(struct nfs4_file *f)
>>>>>>     	return ret;
>>>>>>     }
>>>>>> -static struct nfsd_file *
>>>>>> -find_rw_file(struct nfs4_file *f)
>>>>>> -{
>>>>>> -	struct nfsd_file *ret;
>>>>>> -
>>>>>> -	spin_lock(&f->fi_lock);
>>>>>> -	ret = nfsd_file_get(f->fi_fds[O_RDWR]);
>>>>>> -	spin_unlock(&f->fi_lock);
>>>>>> -
>>>>>> -	return ret;
>>>>>> -}
>>>>>> -
>>>>>>     struct nfsd_file *
>>>>>>     find_any_file(struct nfs4_file *f)
>>>>>>     {
>>>>>> @@ -5784,15 +5772,11 @@ nfs4_set_delegation(struct nfsd4_open
>>>>>> *open,
>>>>>> struct nfs4_ol_stateid *stp,
>>>>>>     	 *  "An OPEN_DELEGATE_WRITE delegation allows the client
>>>>>> to
>>>>>> handle,
>>>>>>     	 *   on its own, all opens."
>>>>>>     	 *
>>>>>> -	 * Furthermore the client can use a write delegation for
>>>>>> most READ
>>>>>> -	 * operations as well, so we require a O_RDWR file here.
>>>>>> -	 *
>>>>>> -	 * Offer a write delegation in the case of a BOTH open,
>>>>>> and
>>>>>> ensure
>>>>>> -	 * we get the O_RDWR descriptor.
>>>>>> +	 * Offer a write delegation for WRITE or BOTH access
>>>>>>     	 */
>>>>>> -	if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) ==
>>>>>> NFS4_SHARE_ACCESS_BOTH) {
>>>>>> -		nf = find_rw_file(fp);
>>>>>> +	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
>>>>>>     		dl_type = NFS4_OPEN_DELEGATE_WRITE;
>>>>>> +		nf = find_writeable_file(fp);
>>>>>>     	}
>>>>>>     	/*
>>>>>> @@ -5934,8 +5918,8 @@ static void
>>>>>> nfsd4_open_deleg_none_ext(struct
>>>>>> nfsd4_open *open, int status)
>>>>>>      * open or lock state.
>>>>>>      */
>>>>>>     static void
>>>>>> -nfs4_open_delegation(struct nfsd4_open *open, struct
>>>>>> nfs4_ol_stateid
>>>>>> *stp,
>>>>>> -		     struct svc_fh *currentfh)
>>>>>> +nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open
>>>>>> *open,
>>>>>> +		struct nfs4_ol_stateid *stp, struct svc_fh
>>>>>> *currentfh)
>>>>>>     {
>>>>>>     	struct nfs4_delegation *dp;
>>>>>>     	struct nfs4_openowner *oo = openowner(stp-
>>>>>>> st_stateowner);
>>>>>> @@ -5994,6 +5978,20 @@ nfs4_open_delegation(struct nfsd4_open
>>>>>> *open,
>>>>>> struct nfs4_ol_stateid *stp,
>>>>>>     		dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
>>>>>>     		dp->dl_cb_fattr.ncf_initial_cinfo =
>>>>>>     			nfsd4_change_attribute(&stat,
>>>>>> d_inode(currentfh->fh_dentry));
>>>>>> +		if ((open->op_share_access &
>>>>>> NFS4_SHARE_ACCESS_BOTH)
>>>>>> != NFS4_SHARE_ACCESS_BOTH) {
>>>>>> +			struct nfsd_file *nf = NULL;
>>>>>> +
>>>>>> +			/* make sure the file is opened locally
>>>>>> for
>>>>>> O_RDWR */
>>>>>> +			status = nfsd_file_acquire_opened(rqstp,
>>>>>> currentfh,
>>>>>> +				nfs4_access_to_access(NFS4_SHARE
>>>>>> _ACC
>>>>>> ESS_BOTH),
>>>>>> +				open->op_filp, &nf);
>>>>>> +			if (status) {
>>>>>> +				nfs4_put_stid(&dp->dl_stid);
>>>>>> +				destroy_delegation(dp);
>>>>>> +				goto out_no_deleg;
>>>>>> +			}
>>>>>> +			stp->st_stid.sc_file->fi_fds[O_RDWR] =
>>>>>> nf;
>>>>> I have a bit of a concern here. When we go to put access references
>>>>> to
>>>>> the fi_fds, that's done according to the st_access_bmap. Here
>>>>> though,
>>>>> you're adding an extra reference for the O_RDWR fd, but I don't see
>>>>> where you're calling set_access for that on the delegation stateid?
>>>>> Am
>>>>> I missing where that happens? Not doing that may lead to fd leaks
>>>>> if it
>>>>> was missed.
>>>> Ah, this is something that I did not fully understand...
>>>> However it looks like st_access_bmap is not something that is
>>>> accounted on the delegation stateid...
>>>>
>>>> Can I simply set it on the open stateid (stp)?
>>> That would likely fix the leak, but I'm not sure that's the best
>>> approach. You have an NFS4_SHARE_ACCESS_WRITE-only stateid here, and
>>> that would turn it a NFS4_SHARE_ACCESS_BOTH one, wouldn't it?
>>>
>>> It wouldn't surprise me if that might break a testcase or two.
>> Well, if the server handed out a write delegation, isn't it effectively
>> equivalent to
>> NFS4_SHARE_ACCESS_BOTH open?
> It has to be equivalent, since the write delegation gives the client
> carte blanche to perform any open it wants to, locally. The server
> does not know about those local client-side opens, and it has a
> notification set up to fire if anyone else wants to open that file.
>
> In nfs4_set_delegation(), we have this comment:
>
>>        /*
>>         * Try for a write delegation first. RFC8881 section 10.4 says:
>>         *
>>         *  "An OPEN_DELEGATE_WRITE delegation allows the client to handle,
>>         *   on its own, all opens."
>>         *
>>         * Furthermore the client can use a write delegation for most READ
>>         * operations as well, so we require a O_RDWR file here.
>>         *
>>         * Offer a write delegation in the case of a BOTH open, and ensure
>>         * we get the O_RDWR descriptor.
>>         */
> I think we did it this way only to circumvent the broken test cases.
>
> A write delegation stateid uses a local O_RDWR OPEN, yes? If so, why
> can't it be used for READ operations already? All that has to be
> done is hand out the write delegation in the correct cases.
>
> Instead of gating the delegation on the intent presented by the OPEN
> operation, gate it on whether the user who is opening the file has
> both read and write access to the file.

Umm, I'm a little unclear on what is the suggestion here Chuck. You mean
checking the openowner permissions to access the file? If so, won't buffered
read-modify-write be a problem ?
Chuck Lever July 29, 2024, 2:10 p.m. UTC | #11
On Mon, Jul 29, 2024 at 05:05:13PM +0300, Sagi Grimberg wrote:
> 
> 
> 
> On 29/07/2024 16:52, Chuck Lever wrote:
> > On Mon, Jul 29, 2024 at 04:39:07PM +0300, Sagi Grimberg wrote:
> > > 
> > > 
> > > On 29/07/2024 16:10, Jeff Layton wrote:
> > > > On Mon, 2024-07-29 at 15:58 +0300, Sagi Grimberg wrote:
> > > > > 
> > > > > On 29/07/2024 15:10, Jeff Layton wrote:
> > > > > > On Sun, 2024-07-28 at 23:41 +0300, Sagi Grimberg wrote:
> > > > > > > In order to support write delegations with O_WRONLY opens, we
> > > > > > > need to
> > > > > > > allow the clients to read using the write delegation stateid (Per
> > > > > > > RFC
> > > > > > > 8881 section 9.1.2. Use of the Stateid and Locking).
> > > > > > > 
> > > > > > > Hence, we check for NFS4_SHARE_ACCESS_WRITE set in open request,
> > > > > > > and
> > > > > > > in case the share access flag does not set NFS4_SHARE_ACCESS_READ
> > > > > > > as
> > > > > > > well, we'll open the file locally with O_RDWR in order to allow
> > > > > > > the
> > > > > > > client to use the write delegation stateid when issuing a read in
> > > > > > > case
> > > > > > > it may choose to.
> > > > > > > 
> > > > > > > Plus, find_rw_file singular call-site is now removed, remove it
> > > > > > > altogether.
> > > > > > > 
> > > > > > > Note: reads using special stateids that conflict with pending
> > > > > > > write
> > > > > > > delegations are undetected, and will be covered in a follow on
> > > > > > > patch.
> > > > > > > 
> > > > > > > Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
> > > > > > > ---
> > > > > > >     fs/nfsd/nfs4proc.c  | 18 +++++++++++++++++-
> > > > > > >     fs/nfsd/nfs4state.c | 42 ++++++++++++++++++++------------------
> > > > > > > ----
> > > > > > >     fs/nfsd/xdr4.h      |  2 ++
> > > > > > >     3 files changed, 39 insertions(+), 23 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > > > > > > index 7b70309ad8fb..041bcc3ab5d7 100644
> > > > > > > --- a/fs/nfsd/nfs4proc.c
> > > > > > > +++ b/fs/nfsd/nfs4proc.c
> > > > > > > @@ -979,8 +979,22 @@ nfsd4_read(struct svc_rqst *rqstp, struct
> > > > > > > nfsd4_compound_state *cstate,
> > > > > > >     	/* check stateid */
> > > > > > >     	status = nfs4_preprocess_stateid_op(rqstp, cstate,
> > > > > > > &cstate-
> > > > > > > > current_fh,
> > > > > > >     					&read->rd_stateid,
> > > > > > > RD_STATE,
> > > > > > > -					&read->rd_nf, NULL);
> > > > > > > +					&read->rd_nf, &read-
> > > > > > > > rd_wd_stid);
> > > > > > > +	/*
> > > > > > > +	 * rd_wd_stid is needed for nfsd4_encode_read to allow
> > > > > > > write
> > > > > > > +	 * delegation stateid used for read. Its refcount is
> > > > > > > decremented
> > > > > > > +	 * by nfsd4_read_release when read is done.
> > > > > > > +	 */
> > > > > > > +	if (!status) {
> > > > > > > +		if (read->rd_wd_stid &&
> > > > > > > +		    (read->rd_wd_stid->sc_type != SC_TYPE_DELEG
> > > > > > > ||
> > > > > > > +		     delegstateid(read->rd_wd_stid)->dl_type !=
> > > > > > > +					NFS4_OPEN_DELEGATE_WRITE
> > > > > > > )) {
> > > > > > > +			nfs4_put_stid(read->rd_wd_stid);
> > > > > > > +			read->rd_wd_stid = NULL;
> > > > > > > +		}
> > > > > > > +	}
> > > > > > >     	read->rd_rqstp = rqstp;
> > > > > > >     	read->rd_fhp = &cstate->current_fh;
> > > > > > >     	return status;
> > > > > > > @@ -990,6 +1004,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct
> > > > > > > nfsd4_compound_state *cstate,
> > > > > > >     static void
> > > > > > >     nfsd4_read_release(union nfsd4_op_u *u)
> > > > > > >     {
> > > > > > > +	if (u->read.rd_wd_stid)
> > > > > > > +		nfs4_put_stid(u->read.rd_wd_stid);
> > > > > > >     	if (u->read.rd_nf)
> > > > > > >     		nfsd_file_put(u->read.rd_nf);
> > > > > > >     	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
> > > > > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > > > > > index 0645fccbf122..538b6e1127a2 100644
> > > > > > > --- a/fs/nfsd/nfs4state.c
> > > > > > > +++ b/fs/nfsd/nfs4state.c
> > > > > > > @@ -639,18 +639,6 @@ find_readable_file(struct nfs4_file *f)
> > > > > > >     	return ret;
> > > > > > >     }
> > > > > > > -static struct nfsd_file *
> > > > > > > -find_rw_file(struct nfs4_file *f)
> > > > > > > -{
> > > > > > > -	struct nfsd_file *ret;
> > > > > > > -
> > > > > > > -	spin_lock(&f->fi_lock);
> > > > > > > -	ret = nfsd_file_get(f->fi_fds[O_RDWR]);
> > > > > > > -	spin_unlock(&f->fi_lock);
> > > > > > > -
> > > > > > > -	return ret;
> > > > > > > -}
> > > > > > > -
> > > > > > >     struct nfsd_file *
> > > > > > >     find_any_file(struct nfs4_file *f)
> > > > > > >     {
> > > > > > > @@ -5784,15 +5772,11 @@ nfs4_set_delegation(struct nfsd4_open
> > > > > > > *open,
> > > > > > > struct nfs4_ol_stateid *stp,
> > > > > > >     	 *  "An OPEN_DELEGATE_WRITE delegation allows the client
> > > > > > > to
> > > > > > > handle,
> > > > > > >     	 *   on its own, all opens."
> > > > > > >     	 *
> > > > > > > -	 * Furthermore the client can use a write delegation for
> > > > > > > most READ
> > > > > > > -	 * operations as well, so we require a O_RDWR file here.
> > > > > > > -	 *
> > > > > > > -	 * Offer a write delegation in the case of a BOTH open,
> > > > > > > and
> > > > > > > ensure
> > > > > > > -	 * we get the O_RDWR descriptor.
> > > > > > > +	 * Offer a write delegation for WRITE or BOTH access
> > > > > > >     	 */
> > > > > > > -	if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) ==
> > > > > > > NFS4_SHARE_ACCESS_BOTH) {
> > > > > > > -		nf = find_rw_file(fp);
> > > > > > > +	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
> > > > > > >     		dl_type = NFS4_OPEN_DELEGATE_WRITE;
> > > > > > > +		nf = find_writeable_file(fp);
> > > > > > >     	}
> > > > > > >     	/*
> > > > > > > @@ -5934,8 +5918,8 @@ static void
> > > > > > > nfsd4_open_deleg_none_ext(struct
> > > > > > > nfsd4_open *open, int status)
> > > > > > >      * open or lock state.
> > > > > > >      */
> > > > > > >     static void
> > > > > > > -nfs4_open_delegation(struct nfsd4_open *open, struct
> > > > > > > nfs4_ol_stateid
> > > > > > > *stp,
> > > > > > > -		     struct svc_fh *currentfh)
> > > > > > > +nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open
> > > > > > > *open,
> > > > > > > +		struct nfs4_ol_stateid *stp, struct svc_fh
> > > > > > > *currentfh)
> > > > > > >     {
> > > > > > >     	struct nfs4_delegation *dp;
> > > > > > >     	struct nfs4_openowner *oo = openowner(stp-
> > > > > > > > st_stateowner);
> > > > > > > @@ -5994,6 +5978,20 @@ nfs4_open_delegation(struct nfsd4_open
> > > > > > > *open,
> > > > > > > struct nfs4_ol_stateid *stp,
> > > > > > >     		dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
> > > > > > >     		dp->dl_cb_fattr.ncf_initial_cinfo =
> > > > > > >     			nfsd4_change_attribute(&stat,
> > > > > > > d_inode(currentfh->fh_dentry));
> > > > > > > +		if ((open->op_share_access &
> > > > > > > NFS4_SHARE_ACCESS_BOTH)
> > > > > > > != NFS4_SHARE_ACCESS_BOTH) {
> > > > > > > +			struct nfsd_file *nf = NULL;
> > > > > > > +
> > > > > > > +			/* make sure the file is opened locally
> > > > > > > for
> > > > > > > O_RDWR */
> > > > > > > +			status = nfsd_file_acquire_opened(rqstp,
> > > > > > > currentfh,
> > > > > > > +				nfs4_access_to_access(NFS4_SHARE
> > > > > > > _ACC
> > > > > > > ESS_BOTH),
> > > > > > > +				open->op_filp, &nf);
> > > > > > > +			if (status) {
> > > > > > > +				nfs4_put_stid(&dp->dl_stid);
> > > > > > > +				destroy_delegation(dp);
> > > > > > > +				goto out_no_deleg;
> > > > > > > +			}
> > > > > > > +			stp->st_stid.sc_file->fi_fds[O_RDWR] =
> > > > > > > nf;
> > > > > > I have a bit of a concern here. When we go to put access references
> > > > > > to
> > > > > > the fi_fds, that's done according to the st_access_bmap. Here
> > > > > > though,
> > > > > > you're adding an extra reference for the O_RDWR fd, but I don't see
> > > > > > where you're calling set_access for that on the delegation stateid?
> > > > > > Am
> > > > > > I missing where that happens? Not doing that may lead to fd leaks
> > > > > > if it
> > > > > > was missed.
> > > > > Ah, this is something that I did not fully understand...
> > > > > However it looks like st_access_bmap is not something that is
> > > > > accounted on the delegation stateid...
> > > > > 
> > > > > Can I simply set it on the open stateid (stp)?
> > > > That would likely fix the leak, but I'm not sure that's the best
> > > > approach. You have an NFS4_SHARE_ACCESS_WRITE-only stateid here, and
> > > > that would turn it a NFS4_SHARE_ACCESS_BOTH one, wouldn't it?
> > > > 
> > > > It wouldn't surprise me if that might break a testcase or two.
> > > Well, if the server handed out a write delegation, isn't it effectively
> > > equivalent to
> > > NFS4_SHARE_ACCESS_BOTH open?
> > It has to be equivalent, since the write delegation gives the client
> > carte blanche to perform any open it wants to, locally. The server
> > does not know about those local client-side opens, and it has a
> > notification set up to fire if anyone else wants to open that file.
> > 
> > In nfs4_set_delegation(), we have this comment:
> > 
> > >        /*
> > >         * Try for a write delegation first. RFC8881 section 10.4 says:
> > >         *
> > >         *  "An OPEN_DELEGATE_WRITE delegation allows the client to handle,
> > >         *   on its own, all opens."
> > >         *
> > >         * Furthermore the client can use a write delegation for most READ
> > >         * operations as well, so we require a O_RDWR file here.
> > >         *
> > >         * Offer a write delegation in the case of a BOTH open, and ensure
> > >         * we get the O_RDWR descriptor.
> > >         */
> > I think we did it this way only to circumvent the broken test cases.
> > 
> > A write delegation stateid uses a local O_RDWR OPEN, yes? If so, why
> > can't it be used for READ operations already? All that has to be
> > done is hand out the write delegation in the correct cases.
> > 
> > Instead of gating the delegation on the intent presented by the OPEN
> > operation, gate it on whether the user who is opening the file has
> > both read and write access to the file.
> 
> Umm, I'm a little unclear on what is the suggestion here Chuck. You mean
> checking the openowner permissions to access the file?

Yes. OPEN already does that. Can this user access the file for read
and for write? If yes, this OPEN is eligible for a write delegation.


> If so, won't buffered read-modify-write be a problem ?

I'm not sure what problem you're referring to.
Jeff Layton July 29, 2024, 2:12 p.m. UTC | #12
On Mon, 2024-07-29 at 16:39 +0300, Sagi Grimberg wrote:
> 
> 
> 
> On 29/07/2024 16:10, Jeff Layton wrote:
> > On Mon, 2024-07-29 at 15:58 +0300, Sagi Grimberg wrote:
> > > 
> > > 
> > > On 29/07/2024 15:10, Jeff Layton wrote:
> > > > On Sun, 2024-07-28 at 23:41 +0300, Sagi Grimberg wrote:
> > > > > In order to support write delegations with O_WRONLY opens, we
> > > > > need to
> > > > > allow the clients to read using the write delegation stateid
> > > > > (Per
> > > > > RFC
> > > > > 8881 section 9.1.2. Use of the Stateid and Locking).
> > > > > 
> > > > > Hence, we check for NFS4_SHARE_ACCESS_WRITE set in open
> > > > > request,
> > > > > and
> > > > > in case the share access flag does not set
> > > > > NFS4_SHARE_ACCESS_READ
> > > > > as
> > > > > well, we'll open the file locally with O_RDWR in order to
> > > > > allow
> > > > > the
> > > > > client to use the write delegation stateid when issuing a
> > > > > read in
> > > > > case
> > > > > it may choose to.
> > > > > 
> > > > > Plus, find_rw_file singular call-site is now removed, remove
> > > > > it
> > > > > altogether.
> > > > > 
> > > > > Note: reads using special stateids that conflict with pending
> > > > > write
> > > > > delegations are undetected, and will be covered in a follow
> > > > > on
> > > > > patch.
> > > > > 
> > > > > Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
> > > > > ---
> > > > >    fs/nfsd/nfs4proc.c  | 18 +++++++++++++++++-
> > > > >    fs/nfsd/nfs4state.c | 42 ++++++++++++++++++++-------------
> > > > > -----
> > > > > ----
> > > > >    fs/nfsd/xdr4.h      |  2 ++
> > > > >    3 files changed, 39 insertions(+), 23 deletions(-)
> > > > > 
> > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > > > > index 7b70309ad8fb..041bcc3ab5d7 100644
> > > > > --- a/fs/nfsd/nfs4proc.c
> > > > > +++ b/fs/nfsd/nfs4proc.c
> > > > > @@ -979,8 +979,22 @@ nfsd4_read(struct svc_rqst *rqstp,
> > > > > struct
> > > > > nfsd4_compound_state *cstate,
> > > > >    	/* check stateid */
> > > > >    	status = nfs4_preprocess_stateid_op(rqstp, cstate,
> > > > > &cstate-
> > > > > > current_fh,
> > > > >    					&read->rd_stateid,
> > > > > RD_STATE,
> > > > > -					&read->rd_nf, NULL);
> > > > > +					&read->rd_nf, &read-
> > > > > > rd_wd_stid);
> > > > >    
> > > > > +	/*
> > > > > +	 * rd_wd_stid is needed for nfsd4_encode_read to
> > > > > allow
> > > > > write
> > > > > +	 * delegation stateid used for read. Its refcount is
> > > > > decremented
> > > > > +	 * by nfsd4_read_release when read is done.
> > > > > +	 */
> > > > > +	if (!status) {
> > > > > +		if (read->rd_wd_stid &&
> > > > > +		    (read->rd_wd_stid->sc_type !=
> > > > > SC_TYPE_DELEG
> > > > > > > 
> > > > > +		     delegstateid(read->rd_wd_stid)->dl_type
> > > > > !=
> > > > > +					NFS4_OPEN_DELEGATE_W
> > > > > RITE
> > > > > )) {
> > > > > +			nfs4_put_stid(read->rd_wd_stid);
> > > > > +			read->rd_wd_stid = NULL;
> > > > > +		}
> > > > > +	}
> > > > >    	read->rd_rqstp = rqstp;
> > > > >    	read->rd_fhp = &cstate->current_fh;
> > > > >    	return status;
> > > > > @@ -990,6 +1004,8 @@ nfsd4_read(struct svc_rqst *rqstp,
> > > > > struct
> > > > > nfsd4_compound_state *cstate,
> > > > >    static void
> > > > >    nfsd4_read_release(union nfsd4_op_u *u)
> > > > >    {
> > > > > +	if (u->read.rd_wd_stid)
> > > > > +		nfs4_put_stid(u->read.rd_wd_stid);
> > > > >    	if (u->read.rd_nf)
> > > > >    		nfsd_file_put(u->read.rd_nf);
> > > > >    	trace_nfsd_read_done(u->read.rd_rqstp, u-
> > > > > >read.rd_fhp,
> > > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > > > index 0645fccbf122..538b6e1127a2 100644
> > > > > --- a/fs/nfsd/nfs4state.c
> > > > > +++ b/fs/nfsd/nfs4state.c
> > > > > @@ -639,18 +639,6 @@ find_readable_file(struct nfs4_file *f)
> > > > >    	return ret;
> > > > >    }
> > > > >    
> > > > > -static struct nfsd_file *
> > > > > -find_rw_file(struct nfs4_file *f)
> > > > > -{
> > > > > -	struct nfsd_file *ret;
> > > > > -
> > > > > -	spin_lock(&f->fi_lock);
> > > > > -	ret = nfsd_file_get(f->fi_fds[O_RDWR]);
> > > > > -	spin_unlock(&f->fi_lock);
> > > > > -
> > > > > -	return ret;
> > > > > -}
> > > > > -
> > > > >    struct nfsd_file *
> > > > >    find_any_file(struct nfs4_file *f)
> > > > >    {
> > > > > @@ -5784,15 +5772,11 @@ nfs4_set_delegation(struct nfsd4_open
> > > > > *open,
> > > > > struct nfs4_ol_stateid *stp,
> > > > >    	 *  "An OPEN_DELEGATE_WRITE delegation allows the
> > > > > client
> > > > > to
> > > > > handle,
> > > > >    	 *   on its own, all opens."
> > > > >    	 *
> > > > > -	 * Furthermore the client can use a write delegation
> > > > > for
> > > > > most READ
> > > > > -	 * operations as well, so we require a O_RDWR file
> > > > > here.
> > > > > -	 *
> > > > > -	 * Offer a write delegation in the case of a BOTH
> > > > > open,
> > > > > and
> > > > > ensure
> > > > > -	 * we get the O_RDWR descriptor.
> > > > > +	 * Offer a write delegation for WRITE or BOTH access
> > > > >    	 */
> > > > > -	if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH)
> > > > > ==
> > > > > NFS4_SHARE_ACCESS_BOTH) {
> > > > > -		nf = find_rw_file(fp);
> > > > > +	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
> > > > > {
> > > > >    		dl_type = NFS4_OPEN_DELEGATE_WRITE;
> > > > > +		nf = find_writeable_file(fp);
> > > > >    	}
> > > > >    
> > > > >    	/*
> > > > > @@ -5934,8 +5918,8 @@ static void
> > > > > nfsd4_open_deleg_none_ext(struct
> > > > > nfsd4_open *open, int status)
> > > > >     * open or lock state.
> > > > >     */
> > > > >    static void
> > > > > -nfs4_open_delegation(struct nfsd4_open *open, struct
> > > > > nfs4_ol_stateid
> > > > > *stp,
> > > > > -		     struct svc_fh *currentfh)
> > > > > +nfs4_open_delegation(struct svc_rqst *rqstp, struct
> > > > > nfsd4_open
> > > > > *open,
> > > > > +		struct nfs4_ol_stateid *stp, struct svc_fh
> > > > > *currentfh)
> > > > >    {
> > > > >    	struct nfs4_delegation *dp;
> > > > >    	struct nfs4_openowner *oo = openowner(stp-
> > > > > > st_stateowner);
> > > > > @@ -5994,6 +5978,20 @@ nfs4_open_delegation(struct nfsd4_open
> > > > > *open,
> > > > > struct nfs4_ol_stateid *stp,
> > > > >    		dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
> > > > >    		dp->dl_cb_fattr.ncf_initial_cinfo =
> > > > >    			nfsd4_change_attribute(&stat,
> > > > > d_inode(currentfh->fh_dentry));
> > > > > +		if ((open->op_share_access &
> > > > > NFS4_SHARE_ACCESS_BOTH)
> > > > > != NFS4_SHARE_ACCESS_BOTH) {
> > > > > +			struct nfsd_file *nf = NULL;
> > > > > +
> > > > > +			/* make sure the file is opened
> > > > > locally
> > > > > for
> > > > > O_RDWR */
> > > > > +			status =
> > > > > nfsd_file_acquire_opened(rqstp,
> > > > > currentfh,
> > > > > +				nfs4_access_to_access(NFS4_S
> > > > > HARE
> > > > > _ACC
> > > > > ESS_BOTH),
> > > > > +				open->op_filp, &nf);
> > > > > +			if (status) {
> > > > > +				nfs4_put_stid(&dp->dl_stid);
> > > > > +				destroy_delegation(dp);
> > > > > +				goto out_no_deleg;
> > > > > +			}
> > > > > +			stp->st_stid.sc_file->fi_fds[O_RDWR]
> > > > > =
> > > > > nf;
> > > > I have a bit of a concern here. When we go to put access
> > > > references
> > > > to
> > > > the fi_fds, that's done according to the st_access_bmap. Here
> > > > though,
> > > > you're adding an extra reference for the O_RDWR fd, but I don't
> > > > see
> > > > where you're calling set_access for that on the delegation
> > > > stateid?
> > > > Am
> > > > I missing where that happens? Not doing that may lead to fd
> > > > leaks
> > > > if it
> > > > was missed.
> > > Ah, this is something that I did not fully understand...
> > > However it looks like st_access_bmap is not something that is
> > > accounted on the delegation stateid...
> > > 
> > > Can I simply set it on the open stateid (stp)?
> > That would likely fix the leak, but I'm not sure that's the best
> > approach. You have an NFS4_SHARE_ACCESS_WRITE-only stateid here,
> > and
> > that would turn it a NFS4_SHARE_ACCESS_BOTH one, wouldn't it?
> > 
> > It wouldn't surprise me if that might break a testcase or two.
> 
> Well, if the server handed out a write delegation, isn't it
> effectively 
> equivalent to
> NFS4_SHARE_ACCESS_BOTH open?

For the delegation, yes, but you're proposing to change an open stateid
that was explicitly requested to be ACCESS_WRITE into ACCESS_BOTH.

Given that you're doing this for a write delegation, that sort of
implies that no other client has it open. Maybe it's OK in that case,
but I think that if you do that then you'd need to convert the open
stateid back into being ACCESS_WRITE when the delegation goes away.

Otherwise you wouldn't (for instance) be able to set a DENY_READ on the
file after doing an O_WRONLY open on it.

Thinking about this a bit more though, I wonder if we ought to consider
reworking the nfs4_file access altogether, especially in light of the
recent delstid draft. Today, the open stateids hold all of the access
refs, so if those go away while there is still an outstanding
delegation, there's no guarantee we'll consider the file open at all
anymore (AFAICS).

Eventually we want to implement delstid support, in which case we'll
need to support the situation where we may give out a delegation with
no open stateid. It might be better to rework things along those lines
instead.
Jeff Layton July 29, 2024, 2:21 p.m. UTC | #13
On Mon, 2024-07-29 at 09:52 -0400, Chuck Lever wrote:
> On Mon, Jul 29, 2024 at 04:39:07PM +0300, Sagi Grimberg wrote:
> > 
> > 
> > 
> > On 29/07/2024 16:10, Jeff Layton wrote:
> > > On Mon, 2024-07-29 at 15:58 +0300, Sagi Grimberg wrote:
> > > > 
> > > > 
> > > > On 29/07/2024 15:10, Jeff Layton wrote:
> > > > > On Sun, 2024-07-28 at 23:41 +0300, Sagi Grimberg wrote:
> > > > > > In order to support write delegations with O_WRONLY opens,
> > > > > > we
> > > > > > need to
> > > > > > allow the clients to read using the write delegation
> > > > > > stateid (Per
> > > > > > RFC
> > > > > > 8881 section 9.1.2. Use of the Stateid and Locking).
> > > > > > 
> > > > > > Hence, we check for NFS4_SHARE_ACCESS_WRITE set in open
> > > > > > request,
> > > > > > and
> > > > > > in case the share access flag does not set
> > > > > > NFS4_SHARE_ACCESS_READ
> > > > > > as
> > > > > > well, we'll open the file locally with O_RDWR in order to
> > > > > > allow
> > > > > > the
> > > > > > client to use the write delegation stateid when issuing a
> > > > > > read in
> > > > > > case
> > > > > > it may choose to.
> > > > > > 
> > > > > > Plus, find_rw_file singular call-site is now removed,
> > > > > > remove it
> > > > > > altogether.
> > > > > > 
> > > > > > Note: reads using special stateids that conflict with
> > > > > > pending
> > > > > > write
> > > > > > delegations are undetected, and will be covered in a follow
> > > > > > on
> > > > > > patch.
> > > > > > 
> > > > > > Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
> > > > > > ---
> > > > > >    fs/nfsd/nfs4proc.c  | 18 +++++++++++++++++-
> > > > > >    fs/nfsd/nfs4state.c | 42 ++++++++++++++++++++-----------
> > > > > > -------
> > > > > > ----
> > > > > >    fs/nfsd/xdr4.h      |  2 ++
> > > > > >    3 files changed, 39 insertions(+), 23 deletions(-)
> > > > > > 
> > > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > > > > > index 7b70309ad8fb..041bcc3ab5d7 100644
> > > > > > --- a/fs/nfsd/nfs4proc.c
> > > > > > +++ b/fs/nfsd/nfs4proc.c
> > > > > > @@ -979,8 +979,22 @@ nfsd4_read(struct svc_rqst *rqstp,
> > > > > > struct
> > > > > > nfsd4_compound_state *cstate,
> > > > > >    	/* check stateid */
> > > > > >    	status = nfs4_preprocess_stateid_op(rqstp, cstate,
> > > > > > &cstate-
> > > > > > > current_fh,
> > > > > >    					&read->rd_stateid,
> > > > > > RD_STATE,
> > > > > > -					&read->rd_nf,
> > > > > > NULL);
> > > > > > +					&read->rd_nf,
> > > > > > &read-
> > > > > > > rd_wd_stid);
> > > > > > +	/*
> > > > > > +	 * rd_wd_stid is needed for nfsd4_encode_read to
> > > > > > allow
> > > > > > write
> > > > > > +	 * delegation stateid used for read. Its refcount
> > > > > > is
> > > > > > decremented
> > > > > > +	 * by nfsd4_read_release when read is done.
> > > > > > +	 */
> > > > > > +	if (!status) {
> > > > > > +		if (read->rd_wd_stid &&
> > > > > > +		    (read->rd_wd_stid->sc_type !=
> > > > > > SC_TYPE_DELEG
> > > > > > > > 
> > > > > > +		     delegstateid(read->rd_wd_stid)-
> > > > > > >dl_type !=
> > > > > > +					NFS4_OPEN_DELEGATE
> > > > > > _WRITE
> > > > > > )) {
> > > > > > +			nfs4_put_stid(read->rd_wd_stid);
> > > > > > +			read->rd_wd_stid = NULL;
> > > > > > +		}
> > > > > > +	}
> > > > > >    	read->rd_rqstp = rqstp;
> > > > > >    	read->rd_fhp = &cstate->current_fh;
> > > > > >    	return status;
> > > > > > @@ -990,6 +1004,8 @@ nfsd4_read(struct svc_rqst *rqstp,
> > > > > > struct
> > > > > > nfsd4_compound_state *cstate,
> > > > > >    static void
> > > > > >    nfsd4_read_release(union nfsd4_op_u *u)
> > > > > >    {
> > > > > > +	if (u->read.rd_wd_stid)
> > > > > > +		nfs4_put_stid(u->read.rd_wd_stid);
> > > > > >    	if (u->read.rd_nf)
> > > > > >    		nfsd_file_put(u->read.rd_nf);
> > > > > >    	trace_nfsd_read_done(u->read.rd_rqstp, u-
> > > > > > >read.rd_fhp,
> > > > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > > > > index 0645fccbf122..538b6e1127a2 100644
> > > > > > --- a/fs/nfsd/nfs4state.c
> > > > > > +++ b/fs/nfsd/nfs4state.c
> > > > > > @@ -639,18 +639,6 @@ find_readable_file(struct nfs4_file
> > > > > > *f)
> > > > > >    	return ret;
> > > > > >    }
> > > > > > -static struct nfsd_file *
> > > > > > -find_rw_file(struct nfs4_file *f)
> > > > > > -{
> > > > > > -	struct nfsd_file *ret;
> > > > > > -
> > > > > > -	spin_lock(&f->fi_lock);
> > > > > > -	ret = nfsd_file_get(f->fi_fds[O_RDWR]);
> > > > > > -	spin_unlock(&f->fi_lock);
> > > > > > -
> > > > > > -	return ret;
> > > > > > -}
> > > > > > -
> > > > > >    struct nfsd_file *
> > > > > >    find_any_file(struct nfs4_file *f)
> > > > > >    {
> > > > > > @@ -5784,15 +5772,11 @@ nfs4_set_delegation(struct
> > > > > > nfsd4_open
> > > > > > *open,
> > > > > > struct nfs4_ol_stateid *stp,
> > > > > >    	 *  "An OPEN_DELEGATE_WRITE delegation allows the
> > > > > > client
> > > > > > to
> > > > > > handle,
> > > > > >    	 *   on its own, all opens."
> > > > > >    	 *
> > > > > > -	 * Furthermore the client can use a write
> > > > > > delegation for
> > > > > > most READ
> > > > > > -	 * operations as well, so we require a O_RDWR file
> > > > > > here.
> > > > > > -	 *
> > > > > > -	 * Offer a write delegation in the case of a BOTH
> > > > > > open,
> > > > > > and
> > > > > > ensure
> > > > > > -	 * we get the O_RDWR descriptor.
> > > > > > +	 * Offer a write delegation for WRITE or BOTH
> > > > > > access
> > > > > >    	 */
> > > > > > -	if ((open->op_share_access &
> > > > > > NFS4_SHARE_ACCESS_BOTH) ==
> > > > > > NFS4_SHARE_ACCESS_BOTH) {
> > > > > > -		nf = find_rw_file(fp);
> > > > > > +	if (open->op_share_access &
> > > > > > NFS4_SHARE_ACCESS_WRITE) {
> > > > > >    		dl_type = NFS4_OPEN_DELEGATE_WRITE;
> > > > > > +		nf = find_writeable_file(fp);
> > > > > >    	}
> > > > > >    	/*
> > > > > > @@ -5934,8 +5918,8 @@ static void
> > > > > > nfsd4_open_deleg_none_ext(struct
> > > > > > nfsd4_open *open, int status)
> > > > > >     * open or lock state.
> > > > > >     */
> > > > > >    static void
> > > > > > -nfs4_open_delegation(struct nfsd4_open *open, struct
> > > > > > nfs4_ol_stateid
> > > > > > *stp,
> > > > > > -		     struct svc_fh *currentfh)
> > > > > > +nfs4_open_delegation(struct svc_rqst *rqstp, struct
> > > > > > nfsd4_open
> > > > > > *open,
> > > > > > +		struct nfs4_ol_stateid *stp, struct svc_fh
> > > > > > *currentfh)
> > > > > >    {
> > > > > >    	struct nfs4_delegation *dp;
> > > > > >    	struct nfs4_openowner *oo = openowner(stp-
> > > > > > > st_stateowner);
> > > > > > @@ -5994,6 +5978,20 @@ nfs4_open_delegation(struct
> > > > > > nfsd4_open
> > > > > > *open,
> > > > > > struct nfs4_ol_stateid *stp,
> > > > > >    		dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
> > > > > >    		dp->dl_cb_fattr.ncf_initial_cinfo =
> > > > > >    			nfsd4_change_attribute(&stat,
> > > > > > d_inode(currentfh->fh_dentry));
> > > > > > +		if ((open->op_share_access &
> > > > > > NFS4_SHARE_ACCESS_BOTH)
> > > > > > != NFS4_SHARE_ACCESS_BOTH) {
> > > > > > +			struct nfsd_file *nf = NULL;
> > > > > > +
> > > > > > +			/* make sure the file is opened
> > > > > > locally
> > > > > > for
> > > > > > O_RDWR */
> > > > > > +			status =
> > > > > > nfsd_file_acquire_opened(rqstp,
> > > > > > currentfh,
> > > > > > +				nfs4_access_to_access(NFS4
> > > > > > _SHARE
> > > > > > _ACC
> > > > > > ESS_BOTH),
> > > > > > +				open->op_filp, &nf);
> > > > > > +			if (status) {
> > > > > > +				nfs4_put_stid(&dp-
> > > > > > >dl_stid);
> > > > > > +				destroy_delegation(dp);
> > > > > > +				goto out_no_deleg;
> > > > > > +			}
> > > > > > +			stp->st_stid.sc_file-
> > > > > > >fi_fds[O_RDWR] =
> > > > > > nf;
> > > > > I have a bit of a concern here. When we go to put access
> > > > > references
> > > > > to
> > > > > the fi_fds, that's done according to the st_access_bmap. Here
> > > > > though,
> > > > > you're adding an extra reference for the O_RDWR fd, but I
> > > > > don't see
> > > > > where you're calling set_access for that on the delegation
> > > > > stateid?
> > > > > Am
> > > > > I missing where that happens? Not doing that may lead to fd
> > > > > leaks
> > > > > if it
> > > > > was missed.
> > > > Ah, this is something that I did not fully understand...
> > > > However it looks like st_access_bmap is not something that is
> > > > accounted on the delegation stateid...
> > > > 
> > > > Can I simply set it on the open stateid (stp)?
> > > That would likely fix the leak, but I'm not sure that's the best
> > > approach. You have an NFS4_SHARE_ACCESS_WRITE-only stateid here,
> > > and
> > > that would turn it a NFS4_SHARE_ACCESS_BOTH one, wouldn't it?
> > > 
> > > It wouldn't surprise me if that might break a testcase or two.
> > 
> > Well, if the server handed out a write delegation, isn't it
> > effectively
> > equivalent to
> > NFS4_SHARE_ACCESS_BOTH open?
> 
> It has to be equivalent, since the write delegation gives the client
> carte blanche to perform any open it wants to, locally. The server
> does not know about those local client-side opens, and it has a
> notification set up to fire if anyone else wants to open that file.
> 
> In nfs4_set_delegation(), we have this comment:
> 
> >      
> > /*                                                                 
> >          
> >        * Try for a write delegation first. RFC8881 section 10.4
> > says:             
> >       
> > *                                                                  
> >         
> >        *  "An OPEN_DELEGATE_WRITE delegation allows the client to
> > handle,         
> >        *   on its own, all
> > opens."                                                
> >       
> > *                                                                  
> >         
> >        * Furthermore the client can use a write delegation for most
> > READ          
> >        * operations as well, so we require a O_RDWR file
> > here.                    
> >       
> > *                                                                  
> >         
> >        * Offer a write delegation in the case of a BOTH open, and
> > ensure          
> >        * we get the O_RDWR
> > descriptor.                                            
> >        */   
> 
> I think we did it this way only to circumvent the broken test cases.
> 
> A write delegation stateid uses a local O_RDWR OPEN, yes? If so, why
> can't it be used for READ operations already? All that has to be
> done is hand out the write delegation in the correct cases.
> 

It currently doesn't hold a reference to the fi_fds array at all. The
delegation is dependent on the open stateid holding the fi_fds array
open.

Maybe we should have delegations hold references to those too? That
might help us implement the delstid RFC later as well.

> Instead of gating the delegation on the intent presented by the OPEN
> operation, gate it on whether the user who is opening the file has
> both read and write access to the file.
> 

This seems like a reasonable approach.
Dai Ngo July 29, 2024, 4:12 p.m. UTC | #14
On 7/29/24 7:12 AM, Jeff Layton wrote:
> On Mon, 2024-07-29 at 16:39 +0300, Sagi Grimberg wrote:
>>
>>
>> On 29/07/2024 16:10, Jeff Layton wrote:
>>> On Mon, 2024-07-29 at 15:58 +0300, Sagi Grimberg wrote:
>>>>
>>>> On 29/07/2024 15:10, Jeff Layton wrote:
>>>>> On Sun, 2024-07-28 at 23:41 +0300, Sagi Grimberg wrote:
>>>>>> In order to support write delegations with O_WRONLY opens, we
>>>>>> need to
>>>>>> allow the clients to read using the write delegation stateid
>>>>>> (Per
>>>>>> RFC
>>>>>> 8881 section 9.1.2. Use of the Stateid and Locking).
>>>>>>
>>>>>> Hence, we check for NFS4_SHARE_ACCESS_WRITE set in open
>>>>>> request,
>>>>>> and
>>>>>> in case the share access flag does not set
>>>>>> NFS4_SHARE_ACCESS_READ
>>>>>> as
>>>>>> well, we'll open the file locally with O_RDWR in order to
>>>>>> allow
>>>>>> the
>>>>>> client to use the write delegation stateid when issuing a
>>>>>> read in
>>>>>> case
>>>>>> it may choose to.
>>>>>>
>>>>>> Plus, find_rw_file singular call-site is now removed, remove
>>>>>> it
>>>>>> altogether.
>>>>>>
>>>>>> Note: reads using special stateids that conflict with pending
>>>>>> write
>>>>>> delegations are undetected, and will be covered in a follow
>>>>>> on
>>>>>> patch.
>>>>>>
>>>>>> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
>>>>>> ---
>>>>>>     fs/nfsd/nfs4proc.c  | 18 +++++++++++++++++-
>>>>>>     fs/nfsd/nfs4state.c | 42 ++++++++++++++++++++-------------
>>>>>> -----
>>>>>> ----
>>>>>>     fs/nfsd/xdr4.h      |  2 ++
>>>>>>     3 files changed, 39 insertions(+), 23 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>>>>>> index 7b70309ad8fb..041bcc3ab5d7 100644
>>>>>> --- a/fs/nfsd/nfs4proc.c
>>>>>> +++ b/fs/nfsd/nfs4proc.c
>>>>>> @@ -979,8 +979,22 @@ nfsd4_read(struct svc_rqst *rqstp,
>>>>>> struct
>>>>>> nfsd4_compound_state *cstate,
>>>>>>     	/* check stateid */
>>>>>>     	status = nfs4_preprocess_stateid_op(rqstp, cstate,
>>>>>> &cstate-
>>>>>>> current_fh,
>>>>>>     					&read->rd_stateid,
>>>>>> RD_STATE,
>>>>>> -					&read->rd_nf, NULL);
>>>>>> +					&read->rd_nf, &read-
>>>>>>> rd_wd_stid);
>>>>>>     
>>>>>> +	/*
>>>>>> +	 * rd_wd_stid is needed for nfsd4_encode_read to
>>>>>> allow
>>>>>> write
>>>>>> +	 * delegation stateid used for read. Its refcount is
>>>>>> decremented
>>>>>> +	 * by nfsd4_read_release when read is done.
>>>>>> +	 */
>>>>>> +	if (!status) {
>>>>>> +		if (read->rd_wd_stid &&
>>>>>> +		    (read->rd_wd_stid->sc_type !=
>>>>>> SC_TYPE_DELEG
>>>>>> +		     delegstateid(read->rd_wd_stid)->dl_type
>>>>>> !=
>>>>>> +					NFS4_OPEN_DELEGATE_W
>>>>>> RITE
>>>>>> )) {
>>>>>> +			nfs4_put_stid(read->rd_wd_stid);
>>>>>> +			read->rd_wd_stid = NULL;
>>>>>> +		}
>>>>>> +	}
>>>>>>     	read->rd_rqstp = rqstp;
>>>>>>     	read->rd_fhp = &cstate->current_fh;
>>>>>>     	return status;
>>>>>> @@ -990,6 +1004,8 @@ nfsd4_read(struct svc_rqst *rqstp,
>>>>>> struct
>>>>>> nfsd4_compound_state *cstate,
>>>>>>     static void
>>>>>>     nfsd4_read_release(union nfsd4_op_u *u)
>>>>>>     {
>>>>>> +	if (u->read.rd_wd_stid)
>>>>>> +		nfs4_put_stid(u->read.rd_wd_stid);
>>>>>>     	if (u->read.rd_nf)
>>>>>>     		nfsd_file_put(u->read.rd_nf);
>>>>>>     	trace_nfsd_read_done(u->read.rd_rqstp, u-
>>>>>>> read.rd_fhp,
>>>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>>>> index 0645fccbf122..538b6e1127a2 100644
>>>>>> --- a/fs/nfsd/nfs4state.c
>>>>>> +++ b/fs/nfsd/nfs4state.c
>>>>>> @@ -639,18 +639,6 @@ find_readable_file(struct nfs4_file *f)
>>>>>>     	return ret;
>>>>>>     }
>>>>>>     
>>>>>> -static struct nfsd_file *
>>>>>> -find_rw_file(struct nfs4_file *f)
>>>>>> -{
>>>>>> -	struct nfsd_file *ret;
>>>>>> -
>>>>>> -	spin_lock(&f->fi_lock);
>>>>>> -	ret = nfsd_file_get(f->fi_fds[O_RDWR]);
>>>>>> -	spin_unlock(&f->fi_lock);
>>>>>> -
>>>>>> -	return ret;
>>>>>> -}
>>>>>> -
>>>>>>     struct nfsd_file *
>>>>>>     find_any_file(struct nfs4_file *f)
>>>>>>     {
>>>>>> @@ -5784,15 +5772,11 @@ nfs4_set_delegation(struct nfsd4_open
>>>>>> *open,
>>>>>> struct nfs4_ol_stateid *stp,
>>>>>>     	 *  "An OPEN_DELEGATE_WRITE delegation allows the
>>>>>> client
>>>>>> to
>>>>>> handle,
>>>>>>     	 *   on its own, all opens."
>>>>>>     	 *
>>>>>> -	 * Furthermore the client can use a write delegation
>>>>>> for
>>>>>> most READ
>>>>>> -	 * operations as well, so we require a O_RDWR file
>>>>>> here.
>>>>>> -	 *
>>>>>> -	 * Offer a write delegation in the case of a BOTH
>>>>>> open,
>>>>>> and
>>>>>> ensure
>>>>>> -	 * we get the O_RDWR descriptor.
>>>>>> +	 * Offer a write delegation for WRITE or BOTH access
>>>>>>     	 */
>>>>>> -	if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH)
>>>>>> ==
>>>>>> NFS4_SHARE_ACCESS_BOTH) {
>>>>>> -		nf = find_rw_file(fp);
>>>>>> +	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
>>>>>> {
>>>>>>     		dl_type = NFS4_OPEN_DELEGATE_WRITE;
>>>>>> +		nf = find_writeable_file(fp);
>>>>>>     	}
>>>>>>     
>>>>>>     	/*
>>>>>> @@ -5934,8 +5918,8 @@ static void
>>>>>> nfsd4_open_deleg_none_ext(struct
>>>>>> nfsd4_open *open, int status)
>>>>>>      * open or lock state.
>>>>>>      */
>>>>>>     static void
>>>>>> -nfs4_open_delegation(struct nfsd4_open *open, struct
>>>>>> nfs4_ol_stateid
>>>>>> *stp,
>>>>>> -		     struct svc_fh *currentfh)
>>>>>> +nfs4_open_delegation(struct svc_rqst *rqstp, struct
>>>>>> nfsd4_open
>>>>>> *open,
>>>>>> +		struct nfs4_ol_stateid *stp, struct svc_fh
>>>>>> *currentfh)
>>>>>>     {
>>>>>>     	struct nfs4_delegation *dp;
>>>>>>     	struct nfs4_openowner *oo = openowner(stp-
>>>>>>> st_stateowner);
>>>>>> @@ -5994,6 +5978,20 @@ nfs4_open_delegation(struct nfsd4_open
>>>>>> *open,
>>>>>> struct nfs4_ol_stateid *stp,
>>>>>>     		dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
>>>>>>     		dp->dl_cb_fattr.ncf_initial_cinfo =
>>>>>>     			nfsd4_change_attribute(&stat,
>>>>>> d_inode(currentfh->fh_dentry));
>>>>>> +		if ((open->op_share_access &
>>>>>> NFS4_SHARE_ACCESS_BOTH)
>>>>>> != NFS4_SHARE_ACCESS_BOTH) {
>>>>>> +			struct nfsd_file *nf = NULL;
>>>>>> +
>>>>>> +			/* make sure the file is opened
>>>>>> locally
>>>>>> for
>>>>>> O_RDWR */
>>>>>> +			status =
>>>>>> nfsd_file_acquire_opened(rqstp,
>>>>>> currentfh,
>>>>>> +				nfs4_access_to_access(NFS4_S
>>>>>> HARE
>>>>>> _ACC
>>>>>> ESS_BOTH),
>>>>>> +				open->op_filp, &nf);
>>>>>> +			if (status) {
>>>>>> +				nfs4_put_stid(&dp->dl_stid);
>>>>>> +				destroy_delegation(dp);
>>>>>> +				goto out_no_deleg;
>>>>>> +			}
>>>>>> +			stp->st_stid.sc_file->fi_fds[O_RDWR]
>>>>>> =
>>>>>> nf;
>>>>> I have a bit of a concern here. When we go to put access
>>>>> references
>>>>> to
>>>>> the fi_fds, that's done according to the st_access_bmap. Here
>>>>> though,
>>>>> you're adding an extra reference for the O_RDWR fd, but I don't
>>>>> see
>>>>> where you're calling set_access for that on the delegation
>>>>> stateid?
>>>>> Am
>>>>> I missing where that happens? Not doing that may lead to fd
>>>>> leaks
>>>>> if it
>>>>> was missed.
>>>> Ah, this is something that I did not fully understand...
>>>> However it looks like st_access_bmap is not something that is
>>>> accounted on the delegation stateid...
>>>>
>>>> Can I simply set it on the open stateid (stp)?
>>> That would likely fix the leak, but I'm not sure that's the best
>>> approach. You have an NFS4_SHARE_ACCESS_WRITE-only stateid here,
>>> and
>>> that would turn it a NFS4_SHARE_ACCESS_BOTH one, wouldn't it?
>>>
>>> It wouldn't surprise me if that might break a testcase or two.
>> Well, if the server handed out a write delegation, isn't it
>> effectively
>> equivalent to
>> NFS4_SHARE_ACCESS_BOTH open?
> For the delegation, yes, but you're proposing to change an open stateid
> that was explicitly requested to be ACCESS_WRITE into ACCESS_BOTH.
>
> Given that you're doing this for a write delegation, that sort of
> implies that no other client has it open. Maybe it's OK in that case,
> but I think that if you do that then you'd need to convert the open
> stateid back into being ACCESS_WRITE when the delegation goes away.

Yes, this is my concern too, delegation can be recalled.

-Dai

>
> Otherwise you wouldn't (for instance) be able to set a DENY_READ on the
> file after doing an O_WRONLY open on it.
>
> Thinking about this a bit more though, I wonder if we ought to consider
> reworking the nfs4_file access altogether, especially in light of the
> recent delstid draft. Today, the open stateids hold all of the access
> refs, so if those go away while there is still an outstanding
> delegation, there's no guarantee we'll consider the file open at all
> anymore (AFAICS).
>
> Eventually we want to implement delstid support, in which case we'll
> need to support the situation where we may give out a delegation with
> no open stateid. It might be better to rework things along those lines
> instead.
kernel test robot July 29, 2024, 10:41 p.m. UTC | #15
Hi Sagi,

kernel test robot noticed the following build warnings:

[auto build test WARNING on linus/master]
[also build test WARNING on v6.11-rc1 next-20240729]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Sagi-Grimberg/nfsd-don-t-assume-copy-notify-when-preprocessing-the-stateid/20240729-044834
base:   linus/master
patch link:    https://lore.kernel.org/r/20240728204104.519041-3-sagi%40grimberg.me
patch subject: [PATCH v2 2/3] nfsd: Offer write delegations for o_wronly opens
config: sparc64-randconfig-r121-20240729 (https://download.01.org/0day-ci/archive/20240730/202407300630.V7R20Aao-lkp@intel.com/config)
compiler: sparc64-linux-gcc (GCC) 14.1.0
reproduce: (https://download.01.org/0day-ci/archive/20240730/202407300630.V7R20Aao-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202407300630.V7R20Aao-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
>> fs/nfsd/nfs4state.c:5985:32: sparse: sparse: incorrect type in assignment (different base types) @@     expected int status @@     got restricted __be32 @@
   fs/nfsd/nfs4state.c:5985:32: sparse:     expected int status
   fs/nfsd/nfs4state.c:5985:32: sparse:     got restricted __be32
   fs/nfsd/nfs4state.c: note: in included file (through include/linux/wait.h, include/linux/wait_bit.h, include/linux/fs.h):
   include/linux/list.h:218:19: sparse: sparse: context imbalance in 'put_clnt_odstate' - unexpected unlock
   fs/nfsd/nfs4state.c:1197:9: sparse: sparse: context imbalance in 'nfs4_put_stid' - unexpected unlock

vim +5985 fs/nfsd/nfs4state.c

  5895	
  5896	/*
  5897	 * The Linux NFS server does not offer write delegations to NFSv4.0
  5898	 * clients in order to avoid conflicts between write delegations and
  5899	 * GETATTRs requesting CHANGE or SIZE attributes.
  5900	 *
  5901	 * With NFSv4.1 and later minorversions, the SEQUENCE operation that
  5902	 * begins each COMPOUND contains a client ID. Delegation recall can
  5903	 * be avoided when the server recognizes the client sending a
  5904	 * GETATTR also holds write delegation it conflicts with.
  5905	 *
  5906	 * However, the NFSv4.0 protocol does not enable a server to
  5907	 * determine that a GETATTR originated from the client holding the
  5908	 * conflicting delegation versus coming from some other client. Per
  5909	 * RFC 7530 Section 16.7.5, the server must recall or send a
  5910	 * CB_GETATTR even when the GETATTR originates from the client that
  5911	 * holds the conflicting delegation.
  5912	 *
  5913	 * An NFSv4.0 client can trigger a pathological situation if it
  5914	 * always sends a DELEGRETURN preceded by a conflicting GETATTR in
  5915	 * the same COMPOUND. COMPOUND execution will always stop at the
  5916	 * GETATTR and the DELEGRETURN will never get executed. The server
  5917	 * eventually revokes the delegation, which can result in loss of
  5918	 * open or lock state.
  5919	 */
  5920	static void
  5921	nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open *open,
  5922			struct nfs4_ol_stateid *stp, struct svc_fh *currentfh)
  5923	{
  5924		struct nfs4_delegation *dp;
  5925		struct nfs4_openowner *oo = openowner(stp->st_stateowner);
  5926		struct nfs4_client *clp = stp->st_stid.sc_client;
  5927		struct svc_fh *parent = NULL;
  5928		int cb_up;
  5929		int status = 0;
  5930		struct kstat stat;
  5931		struct path path;
  5932	
  5933		cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
  5934		open->op_recall = false;
  5935		switch (open->op_claim_type) {
  5936			case NFS4_OPEN_CLAIM_PREVIOUS:
  5937				if (!cb_up)
  5938					open->op_recall = true;
  5939				break;
  5940			case NFS4_OPEN_CLAIM_NULL:
  5941				parent = currentfh;
  5942				fallthrough;
  5943			case NFS4_OPEN_CLAIM_FH:
  5944				/*
  5945				 * Let's not give out any delegations till everyone's
  5946				 * had the chance to reclaim theirs, *and* until
  5947				 * NLM locks have all been reclaimed:
  5948				 */
  5949				if (locks_in_grace(clp->net))
  5950					goto out_no_deleg;
  5951				if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
  5952					goto out_no_deleg;
  5953				if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE &&
  5954						!clp->cl_minorversion)
  5955					goto out_no_deleg;
  5956				break;
  5957			default:
  5958				goto out_no_deleg;
  5959		}
  5960		dp = nfs4_set_delegation(open, stp, parent);
  5961		if (IS_ERR(dp))
  5962			goto out_no_deleg;
  5963	
  5964		memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
  5965	
  5966		if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
  5967			open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
  5968			trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
  5969			path.mnt = currentfh->fh_export->ex_path.mnt;
  5970			path.dentry = currentfh->fh_dentry;
  5971			if (vfs_getattr(&path, &stat,
  5972					(STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE),
  5973					AT_STATX_SYNC_AS_STAT)) {
  5974				nfs4_put_stid(&dp->dl_stid);
  5975				destroy_delegation(dp);
  5976				goto out_no_deleg;
  5977			}
  5978			dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
  5979			dp->dl_cb_fattr.ncf_initial_cinfo =
  5980				nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry));
  5981			if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) != NFS4_SHARE_ACCESS_BOTH) {
  5982				struct nfsd_file *nf = NULL;
  5983	
  5984				/* make sure the file is opened locally for O_RDWR */
> 5985				status = nfsd_file_acquire_opened(rqstp, currentfh,
  5986					nfs4_access_to_access(NFS4_SHARE_ACCESS_BOTH),
  5987					open->op_filp, &nf);
  5988				if (status) {
  5989					nfs4_put_stid(&dp->dl_stid);
  5990					destroy_delegation(dp);
  5991					goto out_no_deleg;
  5992				}
  5993				stp->st_stid.sc_file->fi_fds[O_RDWR] = nf;
  5994			}
  5995		} else {
  5996			open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
  5997			trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
  5998		}
  5999		nfs4_put_stid(&dp->dl_stid);
  6000		return;
  6001	out_no_deleg:
  6002		open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
  6003		if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
  6004		    open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) {
  6005			dprintk("NFSD: WARNING: refusing delegation reclaim\n");
  6006			open->op_recall = true;
  6007		}
  6008	
  6009		/* 4.1 client asking for a delegation? */
  6010		if (open->op_deleg_want)
  6011			nfsd4_open_deleg_none_ext(open, status);
  6012		return;
  6013	}
  6014
Chuck Lever July 30, 2024, 6:45 p.m. UTC | #16
On Mon, Jul 29, 2024 at 10:12:46AM -0400, Jeff Layton wrote:
> On Mon, 2024-07-29 at 16:39 +0300, Sagi Grimberg wrote:
> > 
> > 
> > 
> > On 29/07/2024 16:10, Jeff Layton wrote:
> > > On Mon, 2024-07-29 at 15:58 +0300, Sagi Grimberg wrote:
> > > > 
> > > > 
> > > > On 29/07/2024 15:10, Jeff Layton wrote:
> > > > > On Sun, 2024-07-28 at 23:41 +0300, Sagi Grimberg wrote:
> > > > > > In order to support write delegations with O_WRONLY opens, we
> > > > > > need to
> > > > > > allow the clients to read using the write delegation stateid
> > > > > > (Per
> > > > > > RFC
> > > > > > 8881 section 9.1.2. Use of the Stateid and Locking).
> > > > > > 
> > > > > > Hence, we check for NFS4_SHARE_ACCESS_WRITE set in open
> > > > > > request,
> > > > > > and
> > > > > > in case the share access flag does not set
> > > > > > NFS4_SHARE_ACCESS_READ
> > > > > > as
> > > > > > well, we'll open the file locally with O_RDWR in order to
> > > > > > allow
> > > > > > the
> > > > > > client to use the write delegation stateid when issuing a
> > > > > > read in
> > > > > > case
> > > > > > it may choose to.
> > > > > > 
> > > > > > Plus, find_rw_file singular call-site is now removed, remove
> > > > > > it
> > > > > > altogether.
> > > > > > 
> > > > > > Note: reads using special stateids that conflict with pending
> > > > > > write
> > > > > > delegations are undetected, and will be covered in a follow
> > > > > > on
> > > > > > patch.
> > > > > > 
> > > > > > Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
> > > > > > ---
> > > > > >    fs/nfsd/nfs4proc.c  | 18 +++++++++++++++++-
> > > > > >    fs/nfsd/nfs4state.c | 42 ++++++++++++++++++++-------------
> > > > > > -----
> > > > > > ----
> > > > > >    fs/nfsd/xdr4.h      |  2 ++
> > > > > >    3 files changed, 39 insertions(+), 23 deletions(-)
> > > > > > 
> > > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > > > > > index 7b70309ad8fb..041bcc3ab5d7 100644
> > > > > > --- a/fs/nfsd/nfs4proc.c
> > > > > > +++ b/fs/nfsd/nfs4proc.c
> > > > > > @@ -979,8 +979,22 @@ nfsd4_read(struct svc_rqst *rqstp,
> > > > > > struct
> > > > > > nfsd4_compound_state *cstate,
> > > > > >    	/* check stateid */
> > > > > >    	status = nfs4_preprocess_stateid_op(rqstp, cstate,
> > > > > > &cstate-
> > > > > > > current_fh,
> > > > > >    					&read->rd_stateid,
> > > > > > RD_STATE,
> > > > > > -					&read->rd_nf, NULL);
> > > > > > +					&read->rd_nf, &read-
> > > > > > > rd_wd_stid);
> > > > > >    
> > > > > > +	/*
> > > > > > +	 * rd_wd_stid is needed for nfsd4_encode_read to
> > > > > > allow
> > > > > > write
> > > > > > +	 * delegation stateid used for read. Its refcount is
> > > > > > decremented
> > > > > > +	 * by nfsd4_read_release when read is done.
> > > > > > +	 */
> > > > > > +	if (!status) {
> > > > > > +		if (read->rd_wd_stid &&
> > > > > > +		    (read->rd_wd_stid->sc_type !=
> > > > > > SC_TYPE_DELEG
> > > > > > > > 
> > > > > > +		     delegstateid(read->rd_wd_stid)->dl_type
> > > > > > !=
> > > > > > +					NFS4_OPEN_DELEGATE_W
> > > > > > RITE
> > > > > > )) {
> > > > > > +			nfs4_put_stid(read->rd_wd_stid);
> > > > > > +			read->rd_wd_stid = NULL;
> > > > > > +		}
> > > > > > +	}
> > > > > >    	read->rd_rqstp = rqstp;
> > > > > >    	read->rd_fhp = &cstate->current_fh;
> > > > > >    	return status;
> > > > > > @@ -990,6 +1004,8 @@ nfsd4_read(struct svc_rqst *rqstp,
> > > > > > struct
> > > > > > nfsd4_compound_state *cstate,
> > > > > >    static void
> > > > > >    nfsd4_read_release(union nfsd4_op_u *u)
> > > > > >    {
> > > > > > +	if (u->read.rd_wd_stid)
> > > > > > +		nfs4_put_stid(u->read.rd_wd_stid);
> > > > > >    	if (u->read.rd_nf)
> > > > > >    		nfsd_file_put(u->read.rd_nf);
> > > > > >    	trace_nfsd_read_done(u->read.rd_rqstp, u-
> > > > > > >read.rd_fhp,
> > > > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > > > > index 0645fccbf122..538b6e1127a2 100644
> > > > > > --- a/fs/nfsd/nfs4state.c
> > > > > > +++ b/fs/nfsd/nfs4state.c
> > > > > > @@ -639,18 +639,6 @@ find_readable_file(struct nfs4_file *f)
> > > > > >    	return ret;
> > > > > >    }
> > > > > >    
> > > > > > -static struct nfsd_file *
> > > > > > -find_rw_file(struct nfs4_file *f)
> > > > > > -{
> > > > > > -	struct nfsd_file *ret;
> > > > > > -
> > > > > > -	spin_lock(&f->fi_lock);
> > > > > > -	ret = nfsd_file_get(f->fi_fds[O_RDWR]);
> > > > > > -	spin_unlock(&f->fi_lock);
> > > > > > -
> > > > > > -	return ret;
> > > > > > -}
> > > > > > -
> > > > > >    struct nfsd_file *
> > > > > >    find_any_file(struct nfs4_file *f)
> > > > > >    {
> > > > > > @@ -5784,15 +5772,11 @@ nfs4_set_delegation(struct nfsd4_open
> > > > > > *open,
> > > > > > struct nfs4_ol_stateid *stp,
> > > > > >    	 *  "An OPEN_DELEGATE_WRITE delegation allows the
> > > > > > client
> > > > > > to
> > > > > > handle,
> > > > > >    	 *   on its own, all opens."
> > > > > >    	 *
> > > > > > -	 * Furthermore the client can use a write delegation
> > > > > > for
> > > > > > most READ
> > > > > > -	 * operations as well, so we require a O_RDWR file
> > > > > > here.
> > > > > > -	 *
> > > > > > -	 * Offer a write delegation in the case of a BOTH
> > > > > > open,
> > > > > > and
> > > > > > ensure
> > > > > > -	 * we get the O_RDWR descriptor.
> > > > > > +	 * Offer a write delegation for WRITE or BOTH access
> > > > > >    	 */
> > > > > > -	if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH)
> > > > > > ==
> > > > > > NFS4_SHARE_ACCESS_BOTH) {
> > > > > > -		nf = find_rw_file(fp);
> > > > > > +	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
> > > > > > {
> > > > > >    		dl_type = NFS4_OPEN_DELEGATE_WRITE;
> > > > > > +		nf = find_writeable_file(fp);
> > > > > >    	}
> > > > > >    
> > > > > >    	/*
> > > > > > @@ -5934,8 +5918,8 @@ static void
> > > > > > nfsd4_open_deleg_none_ext(struct
> > > > > > nfsd4_open *open, int status)
> > > > > >     * open or lock state.
> > > > > >     */
> > > > > >    static void
> > > > > > -nfs4_open_delegation(struct nfsd4_open *open, struct
> > > > > > nfs4_ol_stateid
> > > > > > *stp,
> > > > > > -		     struct svc_fh *currentfh)
> > > > > > +nfs4_open_delegation(struct svc_rqst *rqstp, struct
> > > > > > nfsd4_open
> > > > > > *open,
> > > > > > +		struct nfs4_ol_stateid *stp, struct svc_fh
> > > > > > *currentfh)
> > > > > >    {
> > > > > >    	struct nfs4_delegation *dp;
> > > > > >    	struct nfs4_openowner *oo = openowner(stp-
> > > > > > > st_stateowner);
> > > > > > @@ -5994,6 +5978,20 @@ nfs4_open_delegation(struct nfsd4_open
> > > > > > *open,
> > > > > > struct nfs4_ol_stateid *stp,
> > > > > >    		dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
> > > > > >    		dp->dl_cb_fattr.ncf_initial_cinfo =
> > > > > >    			nfsd4_change_attribute(&stat,
> > > > > > d_inode(currentfh->fh_dentry));
> > > > > > +		if ((open->op_share_access &
> > > > > > NFS4_SHARE_ACCESS_BOTH)
> > > > > > != NFS4_SHARE_ACCESS_BOTH) {
> > > > > > +			struct nfsd_file *nf = NULL;
> > > > > > +
> > > > > > +			/* make sure the file is opened
> > > > > > locally
> > > > > > for
> > > > > > O_RDWR */
> > > > > > +			status =
> > > > > > nfsd_file_acquire_opened(rqstp,
> > > > > > currentfh,
> > > > > > +				nfs4_access_to_access(NFS4_S
> > > > > > HARE
> > > > > > _ACC
> > > > > > ESS_BOTH),
> > > > > > +				open->op_filp, &nf);
> > > > > > +			if (status) {
> > > > > > +				nfs4_put_stid(&dp->dl_stid);
> > > > > > +				destroy_delegation(dp);
> > > > > > +				goto out_no_deleg;
> > > > > > +			}
> > > > > > +			stp->st_stid.sc_file->fi_fds[O_RDWR]
> > > > > > =
> > > > > > nf;
> > > > > I have a bit of a concern here. When we go to put access
> > > > > references
> > > > > to
> > > > > the fi_fds, that's done according to the st_access_bmap. Here
> > > > > though,
> > > > > you're adding an extra reference for the O_RDWR fd, but I don't
> > > > > see
> > > > > where you're calling set_access for that on the delegation
> > > > > stateid?
> > > > > Am
> > > > > I missing where that happens? Not doing that may lead to fd
> > > > > leaks
> > > > > if it
> > > > > was missed.
> > > > Ah, this is something that I did not fully understand...
> > > > However it looks like st_access_bmap is not something that is
> > > > accounted on the delegation stateid...
> > > > 
> > > > Can I simply set it on the open stateid (stp)?
> > > That would likely fix the leak, but I'm not sure that's the best
> > > approach. You have an NFS4_SHARE_ACCESS_WRITE-only stateid here,
> > > and
> > > that would turn it a NFS4_SHARE_ACCESS_BOTH one, wouldn't it?
> > > 
> > > It wouldn't surprise me if that might break a testcase or two.
> > 
> > Well, if the server handed out a write delegation, isn't it
> > effectively 
> > equivalent to
> > NFS4_SHARE_ACCESS_BOTH open?
> 
> For the delegation, yes, but you're proposing to change an open stateid
> that was explicitly requested to be ACCESS_WRITE into ACCESS_BOTH.
> 
> Given that you're doing this for a write delegation, that sort of
> implies that no other client has it open. Maybe it's OK in that case,
> but I think that if you do that then you'd need to convert the open
> stateid back into being ACCESS_WRITE when the delegation goes away.
> 
> Otherwise you wouldn't (for instance) be able to set a DENY_READ on the
> file after doing an O_WRONLY open on it.
> 
> Thinking about this a bit more though, I wonder if we ought to consider
> reworking the nfs4_file access altogether, especially in light of the
> recent delstid draft. Today, the open stateids hold all of the access
> refs, so if those go away while there is still an outstanding
> delegation, there's no guarantee we'll consider the file open at all
> anymore (AFAICS).
> 
> Eventually we want to implement delstid support, in which case we'll
> need to support the situation where we may give out a delegation with
> no open stateid. It might be better to rework things along those lines
> instead.

Dai tells me that I assumed incorrectly that each delegation
stateid is backed by its own open file descriptor on the server.

So, if it's the case that delegation stateids are just references to
the file descriptor backing the open stateid, maybe we need to
address that first. Once a write delegation stateid is always backed
by a local O_RDWR open, I think READ with a write delegation stateid
will fall out naturally.

And, what you said here above suggests there could be additional
benefits to that approach.
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 7b70309ad8fb..041bcc3ab5d7 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -979,8 +979,22 @@  nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	/* check stateid */
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
 					&read->rd_stateid, RD_STATE,
-					&read->rd_nf, NULL);
+					&read->rd_nf, &read->rd_wd_stid);
 
+	/*
+	 * rd_wd_stid is needed for nfsd4_encode_read to allow write
+	 * delegation stateid used for read. Its refcount is decremented
+	 * by nfsd4_read_release when read is done.
+	 */
+	if (!status) {
+		if (read->rd_wd_stid &&
+		    (read->rd_wd_stid->sc_type != SC_TYPE_DELEG ||
+		     delegstateid(read->rd_wd_stid)->dl_type !=
+					NFS4_OPEN_DELEGATE_WRITE)) {
+			nfs4_put_stid(read->rd_wd_stid);
+			read->rd_wd_stid = NULL;
+		}
+	}
 	read->rd_rqstp = rqstp;
 	read->rd_fhp = &cstate->current_fh;
 	return status;
@@ -990,6 +1004,8 @@  nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 static void
 nfsd4_read_release(union nfsd4_op_u *u)
 {
+	if (u->read.rd_wd_stid)
+		nfs4_put_stid(u->read.rd_wd_stid);
 	if (u->read.rd_nf)
 		nfsd_file_put(u->read.rd_nf);
 	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0645fccbf122..538b6e1127a2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -639,18 +639,6 @@  find_readable_file(struct nfs4_file *f)
 	return ret;
 }
 
-static struct nfsd_file *
-find_rw_file(struct nfs4_file *f)
-{
-	struct nfsd_file *ret;
-
-	spin_lock(&f->fi_lock);
-	ret = nfsd_file_get(f->fi_fds[O_RDWR]);
-	spin_unlock(&f->fi_lock);
-
-	return ret;
-}
-
 struct nfsd_file *
 find_any_file(struct nfs4_file *f)
 {
@@ -5784,15 +5772,11 @@  nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 	 *  "An OPEN_DELEGATE_WRITE delegation allows the client to handle,
 	 *   on its own, all opens."
 	 *
-	 * Furthermore the client can use a write delegation for most READ
-	 * operations as well, so we require a O_RDWR file here.
-	 *
-	 * Offer a write delegation in the case of a BOTH open, and ensure
-	 * we get the O_RDWR descriptor.
+	 * Offer a write delegation for WRITE or BOTH access
 	 */
-	if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) {
-		nf = find_rw_file(fp);
+	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
 		dl_type = NFS4_OPEN_DELEGATE_WRITE;
+		nf = find_writeable_file(fp);
 	}
 
 	/*
@@ -5934,8 +5918,8 @@  static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
  * open or lock state.
  */
 static void
-nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
-		     struct svc_fh *currentfh)
+nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open *open,
+		struct nfs4_ol_stateid *stp, struct svc_fh *currentfh)
 {
 	struct nfs4_delegation *dp;
 	struct nfs4_openowner *oo = openowner(stp->st_stateowner);
@@ -5994,6 +5978,20 @@  nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 		dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
 		dp->dl_cb_fattr.ncf_initial_cinfo =
 			nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry));
+		if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) != NFS4_SHARE_ACCESS_BOTH) {
+			struct nfsd_file *nf = NULL;
+
+			/* make sure the file is opened locally for O_RDWR */
+			status = nfsd_file_acquire_opened(rqstp, currentfh,
+				nfs4_access_to_access(NFS4_SHARE_ACCESS_BOTH),
+				open->op_filp, &nf);
+			if (status) {
+				nfs4_put_stid(&dp->dl_stid);
+				destroy_delegation(dp);
+				goto out_no_deleg;
+			}
+			stp->st_stid.sc_file->fi_fds[O_RDWR] = nf;
+		}
 	} else {
 		open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
 		trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
@@ -6123,7 +6121,7 @@  nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	* Attempt to hand out a delegation. No error return, because the
 	* OPEN succeeds even if we fail.
 	*/
-	nfs4_open_delegation(open, stp, &resp->cstate.current_fh);
+	nfs4_open_delegation(rqstp, open, stp, &resp->cstate.current_fh);
 nodeleg:
 	status = nfs_ok;
 	trace_nfsd_open(&stp->st_stid.sc_stateid);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index fbdd42cde1fa..434973a6a8b1 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -426,6 +426,8 @@  struct nfsd4_read {
 	struct svc_rqst		*rd_rqstp;          /* response */
 	struct svc_fh		*rd_fhp;            /* response */
 	u32			rd_eof;             /* response */
+
+	struct nfs4_stid	*rd_wd_stid;		/* internal */
 };
 
 struct nfsd4_readdir {