Message ID | 20240724170138.1942307-2-sagi@grimberg.me (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [rfc,1/2] nfsd: don't assume copy notify when preprocessing the stateid | expand |
On Wed, Jul 24, 2024 at 1:01 PM Sagi Grimberg <sagi@grimberg.me> wrote: > > Based on a patch fom Dai Ngo, allow NFSv4 client to use write delegation > stateid for READ operation. Per RFC 8881 section 9.1.2. Use of the > Stateid and Locking. > > In addition, for anonymous stateids, check for pending delegations by > the filehandle and client_id, and if a conflict found, recall the delegation > before allowing the read to take place. > > Suggested-by: Dai Ngo <dai.ngo@oracle.com> > Signed-off-by: Sagi Grimberg <sagi@grimberg.me> > --- > fs/nfsd/nfs4proc.c | 22 +++++++++++++++++++-- > fs/nfsd/nfs4state.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ > fs/nfsd/nfs4xdr.c | 9 +++++++++ > fs/nfsd/state.h | 2 ++ > fs/nfsd/xdr4.h | 2 ++ > 5 files changed, 80 insertions(+), 2 deletions(-) > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index 7b70309ad8fb..324984ec70c6 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -979,8 +979,24 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > /* check stateid */ > status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, > &read->rd_stateid, RD_STATE, > - &read->rd_nf, NULL); > - > + &read->rd_nf, &read->rd_wd_stid); Now I can see that this patch wants to leverage the "returned stateid of the nfs4_preprocess_stateid_op() but the logic in the previous patch was in the way because it distinguished the copy_notify by the non-null passed in stateid. So I would suggest that in order to not break the copy_notify and help with this functionality something else is sent into nfs4_proprocess_staetid_op() to allow for the stateid to be passed and then distinguish between copy_notify and read. > + /* > + * rd_wd_stid is needed for nfsd4_encode_read to allow write > + * delegation stateid used for read. Its refcount is decremented > + * by nfsd4_read_release when read is done. > + */ > + if (!status) { > + if (!read->rd_wd_stid) { > + /* special stateid? */ > + status = nfsd4_deleg_read_conflict(rqstp, cstate->clp, > + &cstate->current_fh); > + } else if (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || > + delegstateid(read->rd_wd_stid)->dl_type != > + NFS4_OPEN_DELEGATE_WRITE) { > + nfs4_put_stid(read->rd_wd_stid); > + read->rd_wd_stid = NULL; > + } > + } > read->rd_rqstp = rqstp; > read->rd_fhp = &cstate->current_fh; > return status; > @@ -990,6 +1006,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > static void > nfsd4_read_release(union nfsd4_op_u *u) > { > + if (u->read.rd_wd_stid) > + nfs4_put_stid(u->read.rd_wd_stid); > if (u->read.rd_nf) > nfsd_file_put(u->read.rd_nf); > trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index dc61a8adfcd4..7e6b9fb31a4c 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -8805,6 +8805,53 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, > get_stateid(cstate, &u->write.wr_stateid); > } > > +/** > + * nfsd4_deleg_read_conflict - Recall if read causes conflict > + * @rqstp: RPC transaction context > + * @clp: nfs client > + * @fhp: nfs file handle > + * @inode: file to be checked for a conflict > + * @modified: return true if file was modified > + * @size: new size of file if modified is true > + * > + * This function is called when there is a conflict between a write > + * delegation and a read that is using a special stateid where the > + * we cannot derive the client stateid exsistence. The server > + * must recall a conflicting delegation before allowing the read > + * to continue. > + * > + * Returns 0 if there is no conflict; otherwise an nfs_stat > + * code is returned. > + */ > +__be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > + struct nfs4_client *clp, struct svc_fh *fhp) > +{ > + struct nfs4_file *fp; > + __be32 status = 0; > + > + fp = nfsd4_file_hash_lookup(fhp); > + if (!fp) > + return nfs_ok; > + > + spin_lock(&state_lock); > + spin_lock(&fp->fi_lock); > + if (!list_empty(&fp->fi_delegations) && > + !nfs4_delegation_exists(clp, fp)) { > + /* conflict, recall deleg */ > + status = nfserrno(nfsd_open_break_lease(fp->fi_inode, > + NFSD_MAY_READ)); > + if (status) > + goto out; > + if (!nfsd_wait_for_delegreturn(rqstp, fp->fi_inode)) > + status = nfserr_jukebox; > + } > +out: > + spin_unlock(&fp->fi_lock); > + spin_unlock(&state_lock); > + return status; > +} > + > + > /** > * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict > * @rqstp: RPC transaction context > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index c7bfd2180e3f..f0fe526fac3c 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -4418,6 +4418,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > unsigned long maxcount; > struct file *file; > __be32 *p; > + fmode_t o_fmode = 0; > > if (nfserr) > return nfserr; > @@ -4437,10 +4438,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > maxcount = min_t(unsigned long, read->rd_length, > (xdr->buf->buflen - xdr->buf->len)); > > + if (read->rd_wd_stid) { > + /* allow READ using write delegation stateid */ > + o_fmode = file->f_mode; > + file->f_mode |= FMODE_READ; > + } > if (file->f_op->splice_read && splice_ok) > nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); > else > nfserr = nfsd4_encode_readv(resp, read, file, maxcount); > + if (o_fmode) > + file->f_mode = o_fmode; > + > if (nfserr) { > xdr_truncate_encode(xdr, starting_len); > return nfserr; > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > index ffc217099d19..c1f13b5877c6 100644 > --- a/fs/nfsd/state.h > +++ b/fs/nfsd/state.h > @@ -780,6 +780,8 @@ static inline bool try_to_expire_client(struct nfs4_client *clp) > return clp->cl_state == NFSD4_EXPIRABLE; > } > > +extern __be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > + struct nfs4_client *clp, struct svc_fh *fhp); > extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, > struct inode *inode, bool *file_modified, u64 *size); > #endif /* NFSD4_STATE_H */ > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > index fbdd42cde1fa..434973a6a8b1 100644 > --- a/fs/nfsd/xdr4.h > +++ b/fs/nfsd/xdr4.h > @@ -426,6 +426,8 @@ struct nfsd4_read { > struct svc_rqst *rd_rqstp; /* response */ > struct svc_fh *rd_fhp; /* response */ > u32 rd_eof; /* response */ > + > + struct nfs4_stid *rd_wd_stid; /* internal */ > }; > > struct nfsd4_readdir { > -- > 2.43.0 > >
On 24/07/2024 22:29, Olga Kornievskaia wrote: > On Wed, Jul 24, 2024 at 1:01 PM Sagi Grimberg <sagi@grimberg.me> wrote: >> Based on a patch fom Dai Ngo, allow NFSv4 client to use write delegation >> stateid for READ operation. Per RFC 8881 section 9.1.2. Use of the >> Stateid and Locking. >> >> In addition, for anonymous stateids, check for pending delegations by >> the filehandle and client_id, and if a conflict found, recall the delegation >> before allowing the read to take place. >> >> Suggested-by: Dai Ngo <dai.ngo@oracle.com> >> Signed-off-by: Sagi Grimberg <sagi@grimberg.me> >> --- >> fs/nfsd/nfs4proc.c | 22 +++++++++++++++++++-- >> fs/nfsd/nfs4state.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ >> fs/nfsd/nfs4xdr.c | 9 +++++++++ >> fs/nfsd/state.h | 2 ++ >> fs/nfsd/xdr4.h | 2 ++ >> 5 files changed, 80 insertions(+), 2 deletions(-) >> >> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c >> index 7b70309ad8fb..324984ec70c6 100644 >> --- a/fs/nfsd/nfs4proc.c >> +++ b/fs/nfsd/nfs4proc.c >> @@ -979,8 +979,24 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> /* check stateid */ >> status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, >> &read->rd_stateid, RD_STATE, >> - &read->rd_nf, NULL); >> - >> + &read->rd_nf, &read->rd_wd_stid); > Now I can see that this patch wants to leverage the "returned stateid > of the nfs4_preprocess_stateid_op() but the logic in the previous > patch was in the way because it distinguished the copy_notify by the > non-null passed in stateid. So I would suggest that in order to not > break the copy_notify and help with this functionality something else > is sent into nfs4_proprocess_staetid_op() to allow for the stateid to > be passed and then distinguish between copy_notify and read. My thinking is that instead having the generic stateid pre-processing be aware which proc may accept a special stateid and which may not, we'd want to have the call-sites enforce that... But I am not the expert here, and will be happy to change based on your preference...
On Wed, 2024-07-24 at 10:01 -0700, Sagi Grimberg wrote: > Based on a patch fom Dai Ngo, allow NFSv4 client to use write delegation > stateid for READ operation. Per RFC 8881 section 9.1.2. Use of the > Stateid and Locking. > > In addition, for anonymous stateids, check for pending delegations by > the filehandle and client_id, and if a conflict found, recall the delegation > before allowing the read to take place. > > Suggested-by: Dai Ngo <dai.ngo@oracle.com> > Signed-off-by: Sagi Grimberg <sagi@grimberg.me> > --- > fs/nfsd/nfs4proc.c | 22 +++++++++++++++++++-- > fs/nfsd/nfs4state.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ > fs/nfsd/nfs4xdr.c | 9 +++++++++ > fs/nfsd/state.h | 2 ++ > fs/nfsd/xdr4.h | 2 ++ > 5 files changed, 80 insertions(+), 2 deletions(-) > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index 7b70309ad8fb..324984ec70c6 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -979,8 +979,24 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > /* check stateid */ > status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, > &read->rd_stateid, RD_STATE, > - &read->rd_nf, NULL); > - > + &read->rd_nf, &read->rd_wd_stid); In the case where we have multiple stateids for this client, are we guaranteed to get back the delegation stateid here? What if we get back the open stateid for the O_WRONLY open instead? > + /* > + * rd_wd_stid is needed for nfsd4_encode_read to allow write > + * delegation stateid used for read. Its refcount is decremented > + * by nfsd4_read_release when read is done. > + */ > + if (!status) { > + if (!read->rd_wd_stid) { > + /* special stateid? */ > + status = nfsd4_deleg_read_conflict(rqstp, cstate->clp, > + &cstate->current_fh); > + } else if (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || > + delegstateid(read->rd_wd_stid)->dl_type != > + NFS4_OPEN_DELEGATE_WRITE) { > + nfs4_put_stid(read->rd_wd_stid); > + read->rd_wd_stid = NULL; > + } > + } > read->rd_rqstp = rqstp; > read->rd_fhp = &cstate->current_fh; > return status; > @@ -990,6 +1006,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > static void > nfsd4_read_release(union nfsd4_op_u *u) > { > + if (u->read.rd_wd_stid) > + nfs4_put_stid(u->read.rd_wd_stid); > if (u->read.rd_nf) > nfsd_file_put(u->read.rd_nf); > trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index dc61a8adfcd4..7e6b9fb31a4c 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -8805,6 +8805,53 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, > get_stateid(cstate, &u->write.wr_stateid); > } > > +/** > + * nfsd4_deleg_read_conflict - Recall if read causes conflict > + * @rqstp: RPC transaction context > + * @clp: nfs client > + * @fhp: nfs file handle > + * @inode: file to be checked for a conflict > + * @modified: return true if file was modified > + * @size: new size of file if modified is true > + * > + * This function is called when there is a conflict between a write > + * delegation and a read that is using a special stateid where the > + * we cannot derive the client stateid exsistence. The server > + * must recall a conflicting delegation before allowing the read > + * to continue. > + * > + * Returns 0 if there is no conflict; otherwise an nfs_stat > + * code is returned. > + */ > +__be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > + struct nfs4_client *clp, struct svc_fh *fhp) > +{ > + struct nfs4_file *fp; > + __be32 status = 0; > + > + fp = nfsd4_file_hash_lookup(fhp); > + if (!fp) > + return nfs_ok; > + > + spin_lock(&state_lock); > + spin_lock(&fp->fi_lock); > + if (!list_empty(&fp->fi_delegations) && > + !nfs4_delegation_exists(clp, fp)) { > + /* conflict, recall deleg */ > + status = nfserrno(nfsd_open_break_lease(fp->fi_inode, > + NFSD_MAY_READ)); > + if (status) > + goto out; > + if (!nfsd_wait_for_delegreturn(rqstp, fp->fi_inode)) > + status = nfserr_jukebox; > + } > +out: > + spin_unlock(&fp->fi_lock); > + spin_unlock(&state_lock); > + return status; > +} > + > + > /** > * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict > * @rqstp: RPC transaction context > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index c7bfd2180e3f..f0fe526fac3c 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -4418,6 +4418,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > unsigned long maxcount; > struct file *file; > __be32 *p; > + fmode_t o_fmode = 0; > > if (nfserr) > return nfserr; > @@ -4437,10 +4438,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > maxcount = min_t(unsigned long, read->rd_length, > (xdr->buf->buflen - xdr->buf->len)); > > + if (read->rd_wd_stid) { > + /* allow READ using write delegation stateid */ > + o_fmode = file->f_mode; > + file->f_mode |= FMODE_READ; > + } > if (file->f_op->splice_read && splice_ok) > nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); > else > nfserr = nfsd4_encode_readv(resp, read, file, maxcount); > + if (o_fmode) > + file->f_mode = o_fmode; > + > if (nfserr) { > xdr_truncate_encode(xdr, starting_len); > return nfserr; > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > index ffc217099d19..c1f13b5877c6 100644 > --- a/fs/nfsd/state.h > +++ b/fs/nfsd/state.h > @@ -780,6 +780,8 @@ static inline bool try_to_expire_client(struct nfs4_client *clp) > return clp->cl_state == NFSD4_EXPIRABLE; > } > > +extern __be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > + struct nfs4_client *clp, struct svc_fh *fhp); > extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, > struct inode *inode, bool *file_modified, u64 *size); > #endif /* NFSD4_STATE_H */ > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > index fbdd42cde1fa..434973a6a8b1 100644 > --- a/fs/nfsd/xdr4.h > +++ b/fs/nfsd/xdr4.h > @@ -426,6 +426,8 @@ struct nfsd4_read { > struct svc_rqst *rd_rqstp; /* response */ > struct svc_fh *rd_fhp; /* response */ > u32 rd_eof; /* response */ > + > + struct nfs4_stid *rd_wd_stid; /* internal */ > }; > > struct nfsd4_readdir {
On Wed, 2024-07-24 at 14:47 -0700, Sagi Grimberg wrote: > > > > On 24/07/2024 22:29, Olga Kornievskaia wrote: > > On Wed, Jul 24, 2024 at 1:01 PM Sagi Grimberg <sagi@grimberg.me> > > wrote: > > > Based on a patch fom Dai Ngo, allow NFSv4 client to use write > > > delegation > > > stateid for READ operation. Per RFC 8881 section 9.1.2. Use of > > > the > > > Stateid and Locking. > > > > > > In addition, for anonymous stateids, check for pending > > > delegations by > > > the filehandle and client_id, and if a conflict found, recall the > > > delegation > > > before allowing the read to take place. > > > > > > Suggested-by: Dai Ngo <dai.ngo@oracle.com> > > > Signed-off-by: Sagi Grimberg <sagi@grimberg.me> > > > --- > > > fs/nfsd/nfs4proc.c | 22 +++++++++++++++++++-- > > > fs/nfsd/nfs4state.c | 47 > > > +++++++++++++++++++++++++++++++++++++++++++++ > > > fs/nfsd/nfs4xdr.c | 9 +++++++++ > > > fs/nfsd/state.h | 2 ++ > > > fs/nfsd/xdr4.h | 2 ++ > > > 5 files changed, 80 insertions(+), 2 deletions(-) > > > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > > > index 7b70309ad8fb..324984ec70c6 100644 > > > --- a/fs/nfsd/nfs4proc.c > > > +++ b/fs/nfsd/nfs4proc.c > > > @@ -979,8 +979,24 @@ nfsd4_read(struct svc_rqst *rqstp, struct > > > nfsd4_compound_state *cstate, > > > /* check stateid */ > > > status = nfs4_preprocess_stateid_op(rqstp, cstate, > > > &cstate->current_fh, > > > &read->rd_stateid, > > > RD_STATE, > > > - &read->rd_nf, NULL); > > > - > > > + &read->rd_nf, &read- > > > >rd_wd_stid); > > Now I can see that this patch wants to leverage the "returned > > stateid > > of the nfs4_preprocess_stateid_op() but the logic in the previous > > patch was in the way because it distinguished the copy_notify by > > the > > non-null passed in stateid. So I would suggest that in order to not > > break the copy_notify and help with this functionality something > > else > > is sent into nfs4_proprocess_staetid_op() to allow for the stateid > > to > > be passed and then distinguish between copy_notify and read. > > My thinking is that instead having the generic stateid pre-processing > be aware which proc may accept a special stateid and which may not, > we'd want to have the call-sites enforce that... > > But I am not the expert here, and will be happy to change based on > your preference... I tend to agree with Sagi here. Keeping this sort of operation-specific check in the operation-specific function makes more sense to me too.
On Wed, Jul 24, 2024 at 10:01:38AM -0700, Sagi Grimberg wrote: > Based on a patch fom Dai Ngo, allow NFSv4 client to use write delegation > stateid for READ operation. Per RFC 8881 section 9.1.2. Use of the > Stateid and Locking. > > In addition, for anonymous stateids, check for pending delegations by > the filehandle and client_id, and if a conflict found, recall the delegation > before allowing the read to take place. > > Suggested-by: Dai Ngo <dai.ngo@oracle.com> > Signed-off-by: Sagi Grimberg <sagi@grimberg.me> > --- > fs/nfsd/nfs4proc.c | 22 +++++++++++++++++++-- > fs/nfsd/nfs4state.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ > fs/nfsd/nfs4xdr.c | 9 +++++++++ > fs/nfsd/state.h | 2 ++ > fs/nfsd/xdr4.h | 2 ++ > 5 files changed, 80 insertions(+), 2 deletions(-) > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index 7b70309ad8fb..324984ec70c6 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -979,8 +979,24 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > /* check stateid */ > status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, > &read->rd_stateid, RD_STATE, > - &read->rd_nf, NULL); > - > + &read->rd_nf, &read->rd_wd_stid); > + /* > + * rd_wd_stid is needed for nfsd4_encode_read to allow write > + * delegation stateid used for read. Its refcount is decremented > + * by nfsd4_read_release when read is done. > + */ > + if (!status) { > + if (!read->rd_wd_stid) { > + /* special stateid? */ > + status = nfsd4_deleg_read_conflict(rqstp, cstate->clp, > + &cstate->current_fh); > + } else if (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || > + delegstateid(read->rd_wd_stid)->dl_type != > + NFS4_OPEN_DELEGATE_WRITE) { > + nfs4_put_stid(read->rd_wd_stid); > + read->rd_wd_stid = NULL; > + } > + } > read->rd_rqstp = rqstp; > read->rd_fhp = &cstate->current_fh; > return status; > @@ -990,6 +1006,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > static void > nfsd4_read_release(union nfsd4_op_u *u) > { > + if (u->read.rd_wd_stid) > + nfs4_put_stid(u->read.rd_wd_stid); > if (u->read.rd_nf) > nfsd_file_put(u->read.rd_nf); > trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index dc61a8adfcd4..7e6b9fb31a4c 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -8805,6 +8805,53 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, > get_stateid(cstate, &u->write.wr_stateid); > } > > +/** > + * nfsd4_deleg_read_conflict - Recall if read causes conflict > + * @rqstp: RPC transaction context > + * @clp: nfs client > + * @fhp: nfs file handle > + * @inode: file to be checked for a conflict > + * @modified: return true if file was modified > + * @size: new size of file if modified is true > + * > + * This function is called when there is a conflict between a write > + * delegation and a read that is using a special stateid where the > + * we cannot derive the client stateid exsistence. The server > + * must recall a conflicting delegation before allowing the read > + * to continue. > + * > + * Returns 0 if there is no conflict; otherwise an nfs_stat > + * code is returned. > + */ > +__be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > + struct nfs4_client *clp, struct svc_fh *fhp) > +{ > + struct nfs4_file *fp; > + __be32 status = 0; > + > + fp = nfsd4_file_hash_lookup(fhp); > + if (!fp) > + return nfs_ok; > + > + spin_lock(&state_lock); > + spin_lock(&fp->fi_lock); > + if (!list_empty(&fp->fi_delegations) && > + !nfs4_delegation_exists(clp, fp)) { > + /* conflict, recall deleg */ > + status = nfserrno(nfsd_open_break_lease(fp->fi_inode, > + NFSD_MAY_READ)); > + if (status) > + goto out; > + if (!nfsd_wait_for_delegreturn(rqstp, fp->fi_inode)) > + status = nfserr_jukebox; > + } > +out: > + spin_unlock(&fp->fi_lock); > + spin_unlock(&state_lock); > + return status; > +} > + > + > /** > * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict > * @rqstp: RPC transaction context > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index c7bfd2180e3f..f0fe526fac3c 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -4418,6 +4418,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > unsigned long maxcount; > struct file *file; > __be32 *p; > + fmode_t o_fmode = 0; > > if (nfserr) > return nfserr; > @@ -4437,10 +4438,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > maxcount = min_t(unsigned long, read->rd_length, > (xdr->buf->buflen - xdr->buf->len)); > > + if (read->rd_wd_stid) { > + /* allow READ using write delegation stateid */ > + o_fmode = file->f_mode; > + file->f_mode |= FMODE_READ; > + } nfsd4_encode_read_plus() needs to handle write delegation stateids as well. I'm not too sure about modifying the f_mode on an nfsd_file you just got from a cache of shared nfsd_files. I think I'd prefer if preprocess_stateid returned an nfsd_file that was already open for read. IIUC that would mean that no changes would be needed here or in nfsd4_encode_read_plus(). Would that be difficult? > if (file->f_op->splice_read && splice_ok) > nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); > else > nfserr = nfsd4_encode_readv(resp, read, file, maxcount); > + if (o_fmode) > + file->f_mode = o_fmode; > + > if (nfserr) { > xdr_truncate_encode(xdr, starting_len); > return nfserr; > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > index ffc217099d19..c1f13b5877c6 100644 > --- a/fs/nfsd/state.h > +++ b/fs/nfsd/state.h > @@ -780,6 +780,8 @@ static inline bool try_to_expire_client(struct nfs4_client *clp) > return clp->cl_state == NFSD4_EXPIRABLE; > } > > +extern __be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > + struct nfs4_client *clp, struct svc_fh *fhp); > extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, > struct inode *inode, bool *file_modified, u64 *size); > #endif /* NFSD4_STATE_H */ > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > index fbdd42cde1fa..434973a6a8b1 100644 > --- a/fs/nfsd/xdr4.h > +++ b/fs/nfsd/xdr4.h > @@ -426,6 +426,8 @@ struct nfsd4_read { > struct svc_rqst *rd_rqstp; /* response */ > struct svc_fh *rd_fhp; /* response */ > u32 rd_eof; /* response */ > + > + struct nfs4_stid *rd_wd_stid; /* internal */ > }; > > struct nfsd4_readdir { > -- > 2.43.0 >
On Fri, 2024-07-26 at 09:54 -0400, Jeff Layton wrote: > On Wed, 2024-07-24 at 10:01 -0700, Sagi Grimberg wrote: > > Based on a patch fom Dai Ngo, allow NFSv4 client to use write > > delegation > > stateid for READ operation. Per RFC 8881 section 9.1.2. Use of the > > Stateid and Locking. > > > > In addition, for anonymous stateids, check for pending delegations > > by > > the filehandle and client_id, and if a conflict found, recall the > > delegation > > before allowing the read to take place. > > > > Suggested-by: Dai Ngo <dai.ngo@oracle.com> > > Signed-off-by: Sagi Grimberg <sagi@grimberg.me> > > --- > > fs/nfsd/nfs4proc.c | 22 +++++++++++++++++++-- > > fs/nfsd/nfs4state.c | 47 > > +++++++++++++++++++++++++++++++++++++++++++++ > > fs/nfsd/nfs4xdr.c | 9 +++++++++ > > fs/nfsd/state.h | 2 ++ > > fs/nfsd/xdr4.h | 2 ++ > > 5 files changed, 80 insertions(+), 2 deletions(-) > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > > index 7b70309ad8fb..324984ec70c6 100644 > > --- a/fs/nfsd/nfs4proc.c > > +++ b/fs/nfsd/nfs4proc.c > > @@ -979,8 +979,24 @@ nfsd4_read(struct svc_rqst *rqstp, struct > > nfsd4_compound_state *cstate, > > /* check stateid */ > > status = nfs4_preprocess_stateid_op(rqstp, cstate, > > &cstate->current_fh, > > &read->rd_stateid, > > RD_STATE, > > - &read->rd_nf, NULL); > > - > > + &read->rd_nf, &read- > > >rd_wd_stid); > > In the case where we have multiple stateids for this client, are we > guaranteed to get back the delegation stateid here? What if we get > back > the open stateid for the O_WRONLY open instead? > Oh, duh -- disregard this. We're looking for a specific stateid here, so if the client sends a delegation stateid, that's what we'll find. > > > + /* > > + * rd_wd_stid is needed for nfsd4_encode_read to allow > > write > > + * delegation stateid used for read. Its refcount is > > decremented > > + * by nfsd4_read_release when read is done. > > + */ > > + if (!status) { > > + if (!read->rd_wd_stid) { > > + /* special stateid? */ > > + status = nfsd4_deleg_read_conflict(rqstp, > > cstate->clp, > > + &cstate->current_fh); > > + } else if (read->rd_wd_stid->sc_type != > > SC_TYPE_DELEG || > > + delegstateid(read->rd_wd_stid)->dl_type > > != > > + NFS4_OPEN_DELEGATE > > _WRITE) { > > + nfs4_put_stid(read->rd_wd_stid); > > + read->rd_wd_stid = NULL; > > + } > > + } > > read->rd_rqstp = rqstp; > > read->rd_fhp = &cstate->current_fh; > > return status; > > @@ -990,6 +1006,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct > > nfsd4_compound_state *cstate, > > static void > > nfsd4_read_release(union nfsd4_op_u *u) > > { > > + if (u->read.rd_wd_stid) > > + nfs4_put_stid(u->read.rd_wd_stid); > > if (u->read.rd_nf) > > nfsd_file_put(u->read.rd_nf); > > trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > > index dc61a8adfcd4..7e6b9fb31a4c 100644 > > --- a/fs/nfsd/nfs4state.c > > +++ b/fs/nfsd/nfs4state.c > > @@ -8805,6 +8805,53 @@ nfsd4_get_writestateid(struct > > nfsd4_compound_state *cstate, > > get_stateid(cstate, &u->write.wr_stateid); > > } > > > > +/** > > + * nfsd4_deleg_read_conflict - Recall if read causes conflict > > + * @rqstp: RPC transaction context > > + * @clp: nfs client > > + * @fhp: nfs file handle > > + * @inode: file to be checked for a conflict > > + * @modified: return true if file was modified > > + * @size: new size of file if modified is true > > + * > > + * This function is called when there is a conflict between a > > write > > + * delegation and a read that is using a special stateid where the > > + * we cannot derive the client stateid exsistence. The server > > + * must recall a conflicting delegation before allowing the read > > + * to continue. > > + * > > + * Returns 0 if there is no conflict; otherwise an nfs_stat > > + * code is returned. > > + */ > > +__be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > > + struct nfs4_client *clp, struct svc_fh *fhp) > > +{ > > + struct nfs4_file *fp; > > + __be32 status = 0; > > + > > + fp = nfsd4_file_hash_lookup(fhp); > > + if (!fp) > > + return nfs_ok; > > + > > + spin_lock(&state_lock); > > + spin_lock(&fp->fi_lock); > > + if (!list_empty(&fp->fi_delegations) && > > + !nfs4_delegation_exists(clp, fp)) { > > + /* conflict, recall deleg */ > > + status = nfserrno(nfsd_open_break_lease(fp- > > >fi_inode, > > + NFSD_MAY_READ)); > > + if (status) > > + goto out; > > + if (!nfsd_wait_for_delegreturn(rqstp, fp- > > >fi_inode)) > > + status = nfserr_jukebox; > > + } > > +out: > > + spin_unlock(&fp->fi_lock); > > + spin_unlock(&state_lock); > > + return status; > > +} > > + > > + > > /** > > * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes > > conflict > > * @rqstp: RPC transaction context > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > > index c7bfd2180e3f..f0fe526fac3c 100644 > > --- a/fs/nfsd/nfs4xdr.c > > +++ b/fs/nfsd/nfs4xdr.c > > @@ -4418,6 +4418,7 @@ nfsd4_encode_read(struct nfsd4_compoundres > > *resp, __be32 nfserr, > > unsigned long maxcount; > > struct file *file; > > __be32 *p; > > + fmode_t o_fmode = 0; > > > > if (nfserr) > > return nfserr; > > @@ -4437,10 +4438,18 @@ nfsd4_encode_read(struct nfsd4_compoundres > > *resp, __be32 nfserr, > > maxcount = min_t(unsigned long, read->rd_length, > > (xdr->buf->buflen - xdr->buf->len)); > > > > + if (read->rd_wd_stid) { > > + /* allow READ using write delegation stateid */ > > + o_fmode = file->f_mode; > > + file->f_mode |= FMODE_READ; > > + } ^^^ I do agree with Chuck that this looks sketchy. > > if (file->f_op->splice_read && splice_ok) > > nfserr = nfsd4_encode_splice_read(resp, read, > > file, maxcount); > > else > > nfserr = nfsd4_encode_readv(resp, read, file, > > maxcount); > > + if (o_fmode) > > + file->f_mode = o_fmode; > > + > > if (nfserr) { > > xdr_truncate_encode(xdr, starting_len); > > return nfserr; > > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > > index ffc217099d19..c1f13b5877c6 100644 > > --- a/fs/nfsd/state.h > > +++ b/fs/nfsd/state.h > > @@ -780,6 +780,8 @@ static inline bool try_to_expire_client(struct > > nfs4_client *clp) > > return clp->cl_state == NFSD4_EXPIRABLE; > > } > > > > +extern __be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > > + struct nfs4_client *clp, struct svc_fh *fhp); > > extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, > > struct inode *inode, bool *file_modified, u64 > > *size); > > #endif /* NFSD4_STATE_H */ > > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > > index fbdd42cde1fa..434973a6a8b1 100644 > > --- a/fs/nfsd/xdr4.h > > +++ b/fs/nfsd/xdr4.h > > @@ -426,6 +426,8 @@ struct nfsd4_read { > > struct svc_rqst *rd_rqstp; /* > > response */ > > struct svc_fh *rd_fhp; /* response */ > > u32 rd_eof; /* response */ > > + > > + struct nfs4_stid *rd_wd_stid; /* > > internal */ > > }; > > > > struct nfsd4_readdir { >
On 7/24/24 10:01 AM, Sagi Grimberg wrote: > Based on a patch fom Dai Ngo, allow NFSv4 client to use write delegation > stateid for READ operation. Per RFC 8881 section 9.1.2. Use of the > Stateid and Locking. > > In addition, for anonymous stateids, check for pending delegations by > the filehandle and client_id, and if a conflict found, recall the delegation > before allowing the read to take place. > > Suggested-by: Dai Ngo <dai.ngo@oracle.com> > Signed-off-by: Sagi Grimberg <sagi@grimberg.me> > --- > fs/nfsd/nfs4proc.c | 22 +++++++++++++++++++-- > fs/nfsd/nfs4state.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ > fs/nfsd/nfs4xdr.c | 9 +++++++++ > fs/nfsd/state.h | 2 ++ > fs/nfsd/xdr4.h | 2 ++ > 5 files changed, 80 insertions(+), 2 deletions(-) > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index 7b70309ad8fb..324984ec70c6 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -979,8 +979,24 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > /* check stateid */ > status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, > &read->rd_stateid, RD_STATE, > - &read->rd_nf, NULL); > - > + &read->rd_nf, &read->rd_wd_stid); > + /* > + * rd_wd_stid is needed for nfsd4_encode_read to allow write > + * delegation stateid used for read. Its refcount is decremented > + * by nfsd4_read_release when read is done. > + */ > + if (!status) { > + if (!read->rd_wd_stid) { > + /* special stateid? */ > + status = nfsd4_deleg_read_conflict(rqstp, cstate->clp, > + &cstate->current_fh); > + } else if (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || > + delegstateid(read->rd_wd_stid)->dl_type != > + NFS4_OPEN_DELEGATE_WRITE) { > + nfs4_put_stid(read->rd_wd_stid); > + read->rd_wd_stid = NULL; > + } > + } > read->rd_rqstp = rqstp; > read->rd_fhp = &cstate->current_fh; > return status; > @@ -990,6 +1006,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > static void > nfsd4_read_release(union nfsd4_op_u *u) > { > + if (u->read.rd_wd_stid) > + nfs4_put_stid(u->read.rd_wd_stid); > if (u->read.rd_nf) > nfsd_file_put(u->read.rd_nf); > trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index dc61a8adfcd4..7e6b9fb31a4c 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -8805,6 +8805,53 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, > get_stateid(cstate, &u->write.wr_stateid); > } > > +/** > + * nfsd4_deleg_read_conflict - Recall if read causes conflict > + * @rqstp: RPC transaction context > + * @clp: nfs client > + * @fhp: nfs file handle > + * @inode: file to be checked for a conflict > + * @modified: return true if file was modified > + * @size: new size of file if modified is true > + * > + * This function is called when there is a conflict between a write > + * delegation and a read that is using a special stateid where the > + * we cannot derive the client stateid exsistence. The server > + * must recall a conflicting delegation before allowing the read > + * to continue. > + * > + * Returns 0 if there is no conflict; otherwise an nfs_stat > + * code is returned. > + */ > +__be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > + struct nfs4_client *clp, struct svc_fh *fhp) > +{ > + struct nfs4_file *fp; > + __be32 status = 0; > + > + fp = nfsd4_file_hash_lookup(fhp); > + if (!fp) > + return nfs_ok; > + > + spin_lock(&state_lock); > + spin_lock(&fp->fi_lock); > + if (!list_empty(&fp->fi_delegations) && > + !nfs4_delegation_exists(clp, fp)) { > + /* conflict, recall deleg */ I don't see how we can have a delegation conflict here. If a client has a write delegation then there should not be any delegations from other clients. -Dai > + status = nfserrno(nfsd_open_break_lease(fp->fi_inode, > + NFSD_MAY_READ)); > + if (status) > + goto out; > + if (!nfsd_wait_for_delegreturn(rqstp, fp->fi_inode)) > + status = nfserr_jukebox; > + } > +out: > + spin_unlock(&fp->fi_lock); > + spin_unlock(&state_lock); > + return status; > +} > + > + > /** > * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict > * @rqstp: RPC transaction context > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index c7bfd2180e3f..f0fe526fac3c 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -4418,6 +4418,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > unsigned long maxcount; > struct file *file; > __be32 *p; > + fmode_t o_fmode = 0; > > if (nfserr) > return nfserr; > @@ -4437,10 +4438,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > maxcount = min_t(unsigned long, read->rd_length, > (xdr->buf->buflen - xdr->buf->len)); > > + if (read->rd_wd_stid) { > + /* allow READ using write delegation stateid */ > + o_fmode = file->f_mode; > + file->f_mode |= FMODE_READ; > + } > if (file->f_op->splice_read && splice_ok) > nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); > else > nfserr = nfsd4_encode_readv(resp, read, file, maxcount); > + if (o_fmode) > + file->f_mode = o_fmode; > + > if (nfserr) { > xdr_truncate_encode(xdr, starting_len); > return nfserr; > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > index ffc217099d19..c1f13b5877c6 100644 > --- a/fs/nfsd/state.h > +++ b/fs/nfsd/state.h > @@ -780,6 +780,8 @@ static inline bool try_to_expire_client(struct nfs4_client *clp) > return clp->cl_state == NFSD4_EXPIRABLE; > } > > +extern __be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > + struct nfs4_client *clp, struct svc_fh *fhp); > extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, > struct inode *inode, bool *file_modified, u64 *size); > #endif /* NFSD4_STATE_H */ > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > index fbdd42cde1fa..434973a6a8b1 100644 > --- a/fs/nfsd/xdr4.h > +++ b/fs/nfsd/xdr4.h > @@ -426,6 +426,8 @@ struct nfsd4_read { > struct svc_rqst *rd_rqstp; /* response */ > struct svc_fh *rd_fhp; /* response */ > u32 rd_eof; /* response */ > + > + struct nfs4_stid *rd_wd_stid; /* internal */ > }; > > struct nfsd4_readdir {
On Sat, Jul 27, 2024 at 09:46:34AM -0700, Dai Ngo wrote: > > On 7/24/24 10:01 AM, Sagi Grimberg wrote: > > Based on a patch fom Dai Ngo, allow NFSv4 client to use write delegation > > stateid for READ operation. Per RFC 8881 section 9.1.2. Use of the > > Stateid and Locking. > > > > In addition, for anonymous stateids, check for pending delegations by > > the filehandle and client_id, and if a conflict found, recall the delegation > > before allowing the read to take place. > > > > Suggested-by: Dai Ngo <dai.ngo@oracle.com> > > Signed-off-by: Sagi Grimberg <sagi@grimberg.me> > > --- > > fs/nfsd/nfs4proc.c | 22 +++++++++++++++++++-- > > fs/nfsd/nfs4state.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ > > fs/nfsd/nfs4xdr.c | 9 +++++++++ > > fs/nfsd/state.h | 2 ++ > > fs/nfsd/xdr4.h | 2 ++ > > 5 files changed, 80 insertions(+), 2 deletions(-) > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > > index 7b70309ad8fb..324984ec70c6 100644 > > --- a/fs/nfsd/nfs4proc.c > > +++ b/fs/nfsd/nfs4proc.c > > @@ -979,8 +979,24 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > > /* check stateid */ > > status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, > > &read->rd_stateid, RD_STATE, > > - &read->rd_nf, NULL); > > - > > + &read->rd_nf, &read->rd_wd_stid); > > + /* > > + * rd_wd_stid is needed for nfsd4_encode_read to allow write > > + * delegation stateid used for read. Its refcount is decremented > > + * by nfsd4_read_release when read is done. > > + */ > > + if (!status) { > > + if (!read->rd_wd_stid) { > > + /* special stateid? */ > > + status = nfsd4_deleg_read_conflict(rqstp, cstate->clp, > > + &cstate->current_fh); > > + } else if (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || > > + delegstateid(read->rd_wd_stid)->dl_type != > > + NFS4_OPEN_DELEGATE_WRITE) { > > + nfs4_put_stid(read->rd_wd_stid); > > + read->rd_wd_stid = NULL; > > + } > > + } > > read->rd_rqstp = rqstp; > > read->rd_fhp = &cstate->current_fh; > > return status; > > @@ -990,6 +1006,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > > static void > > nfsd4_read_release(union nfsd4_op_u *u) > > { > > + if (u->read.rd_wd_stid) > > + nfs4_put_stid(u->read.rd_wd_stid); > > if (u->read.rd_nf) > > nfsd_file_put(u->read.rd_nf); > > trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > > index dc61a8adfcd4..7e6b9fb31a4c 100644 > > --- a/fs/nfsd/nfs4state.c > > +++ b/fs/nfsd/nfs4state.c > > @@ -8805,6 +8805,53 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, > > get_stateid(cstate, &u->write.wr_stateid); > > } > > +/** > > + * nfsd4_deleg_read_conflict - Recall if read causes conflict > > + * @rqstp: RPC transaction context > > + * @clp: nfs client > > + * @fhp: nfs file handle > > + * @inode: file to be checked for a conflict > > + * @modified: return true if file was modified > > + * @size: new size of file if modified is true > > + * > > + * This function is called when there is a conflict between a write > > + * delegation and a read that is using a special stateid where the > > + * we cannot derive the client stateid exsistence. The server > > + * must recall a conflicting delegation before allowing the read > > + * to continue. > > + * > > + * Returns 0 if there is no conflict; otherwise an nfs_stat > > + * code is returned. > > + */ > > +__be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > > + struct nfs4_client *clp, struct svc_fh *fhp) > > +{ > > + struct nfs4_file *fp; > > + __be32 status = 0; > > + > > + fp = nfsd4_file_hash_lookup(fhp); > > + if (!fp) > > + return nfs_ok; > > + > > + spin_lock(&state_lock); > > + spin_lock(&fp->fi_lock); > > + if (!list_empty(&fp->fi_delegations) && > > + !nfs4_delegation_exists(clp, fp)) { > > + /* conflict, recall deleg */ > > I don't see how we can have a delegation conflict here. If a client > has a write delegation then there should not be any delegations from > other clients. A delegation conflict is possible if the client is using an anonymous stateid to perform the READ. One thing we could perhaps do is add support for the use of anonymous stateids as a separate patch. Sagi, how necessary is support for "READ with anonymous stateid" for supporting WR_ONLY write delegation? > > + status = nfserrno(nfsd_open_break_lease(fp->fi_inode, > > + NFSD_MAY_READ)); > > + if (status) > > + goto out; > > + if (!nfsd_wait_for_delegreturn(rqstp, fp->fi_inode)) > > + status = nfserr_jukebox; > > + } > > +out: > > + spin_unlock(&fp->fi_lock); > > + spin_unlock(&state_lock); > > + return status; > > +} > > + > > + > > /** > > * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict > > * @rqstp: RPC transaction context > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > > index c7bfd2180e3f..f0fe526fac3c 100644 > > --- a/fs/nfsd/nfs4xdr.c > > +++ b/fs/nfsd/nfs4xdr.c > > @@ -4418,6 +4418,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > > unsigned long maxcount; > > struct file *file; > > __be32 *p; > > + fmode_t o_fmode = 0; > > if (nfserr) > > return nfserr; > > @@ -4437,10 +4438,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > > maxcount = min_t(unsigned long, read->rd_length, > > (xdr->buf->buflen - xdr->buf->len)); > > + if (read->rd_wd_stid) { > > + /* allow READ using write delegation stateid */ > > + o_fmode = file->f_mode; > > + file->f_mode |= FMODE_READ; > > + } > > if (file->f_op->splice_read && splice_ok) > > nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); > > else > > nfserr = nfsd4_encode_readv(resp, read, file, maxcount); > > + if (o_fmode) > > + file->f_mode = o_fmode; > > + > > if (nfserr) { > > xdr_truncate_encode(xdr, starting_len); > > return nfserr; > > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > > index ffc217099d19..c1f13b5877c6 100644 > > --- a/fs/nfsd/state.h > > +++ b/fs/nfsd/state.h > > @@ -780,6 +780,8 @@ static inline bool try_to_expire_client(struct nfs4_client *clp) > > return clp->cl_state == NFSD4_EXPIRABLE; > > } > > +extern __be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > > + struct nfs4_client *clp, struct svc_fh *fhp); > > extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, > > struct inode *inode, bool *file_modified, u64 *size); > > #endif /* NFSD4_STATE_H */ > > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > > index fbdd42cde1fa..434973a6a8b1 100644 > > --- a/fs/nfsd/xdr4.h > > +++ b/fs/nfsd/xdr4.h > > @@ -426,6 +426,8 @@ struct nfsd4_read { > > struct svc_rqst *rd_rqstp; /* response */ > > struct svc_fh *rd_fhp; /* response */ > > u32 rd_eof; /* response */ > > + > > + struct nfs4_stid *rd_wd_stid; /* internal */ > > }; > > struct nfsd4_readdir {
On 7/26/24 7:06 AM, Chuck Lever wrote: > On Wed, Jul 24, 2024 at 10:01:38AM -0700, Sagi Grimberg wrote: >> Based on a patch fom Dai Ngo, allow NFSv4 client to use write delegation >> stateid for READ operation. Per RFC 8881 section 9.1.2. Use of the >> Stateid and Locking. >> >> In addition, for anonymous stateids, check for pending delegations by >> the filehandle and client_id, and if a conflict found, recall the delegation >> before allowing the read to take place. >> >> Suggested-by: Dai Ngo <dai.ngo@oracle.com> >> Signed-off-by: Sagi Grimberg <sagi@grimberg.me> >> --- >> fs/nfsd/nfs4proc.c | 22 +++++++++++++++++++-- >> fs/nfsd/nfs4state.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ >> fs/nfsd/nfs4xdr.c | 9 +++++++++ >> fs/nfsd/state.h | 2 ++ >> fs/nfsd/xdr4.h | 2 ++ >> 5 files changed, 80 insertions(+), 2 deletions(-) >> >> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c >> index 7b70309ad8fb..324984ec70c6 100644 >> --- a/fs/nfsd/nfs4proc.c >> +++ b/fs/nfsd/nfs4proc.c >> @@ -979,8 +979,24 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> /* check stateid */ >> status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, >> &read->rd_stateid, RD_STATE, >> - &read->rd_nf, NULL); >> - >> + &read->rd_nf, &read->rd_wd_stid); >> + /* >> + * rd_wd_stid is needed for nfsd4_encode_read to allow write >> + * delegation stateid used for read. Its refcount is decremented >> + * by nfsd4_read_release when read is done. >> + */ >> + if (!status) { >> + if (!read->rd_wd_stid) { >> + /* special stateid? */ >> + status = nfsd4_deleg_read_conflict(rqstp, cstate->clp, >> + &cstate->current_fh); >> + } else if (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || >> + delegstateid(read->rd_wd_stid)->dl_type != >> + NFS4_OPEN_DELEGATE_WRITE) { >> + nfs4_put_stid(read->rd_wd_stid); >> + read->rd_wd_stid = NULL; >> + } >> + } >> read->rd_rqstp = rqstp; >> read->rd_fhp = &cstate->current_fh; >> return status; >> @@ -990,6 +1006,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> static void >> nfsd4_read_release(union nfsd4_op_u *u) >> { >> + if (u->read.rd_wd_stid) >> + nfs4_put_stid(u->read.rd_wd_stid); >> if (u->read.rd_nf) >> nfsd_file_put(u->read.rd_nf); >> trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, >> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c >> index dc61a8adfcd4..7e6b9fb31a4c 100644 >> --- a/fs/nfsd/nfs4state.c >> +++ b/fs/nfsd/nfs4state.c >> @@ -8805,6 +8805,53 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, >> get_stateid(cstate, &u->write.wr_stateid); >> } >> >> +/** >> + * nfsd4_deleg_read_conflict - Recall if read causes conflict >> + * @rqstp: RPC transaction context >> + * @clp: nfs client >> + * @fhp: nfs file handle >> + * @inode: file to be checked for a conflict >> + * @modified: return true if file was modified >> + * @size: new size of file if modified is true >> + * >> + * This function is called when there is a conflict between a write >> + * delegation and a read that is using a special stateid where the >> + * we cannot derive the client stateid exsistence. The server >> + * must recall a conflicting delegation before allowing the read >> + * to continue. >> + * >> + * Returns 0 if there is no conflict; otherwise an nfs_stat >> + * code is returned. >> + */ >> +__be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, >> + struct nfs4_client *clp, struct svc_fh *fhp) >> +{ >> + struct nfs4_file *fp; >> + __be32 status = 0; >> + >> + fp = nfsd4_file_hash_lookup(fhp); >> + if (!fp) >> + return nfs_ok; >> + >> + spin_lock(&state_lock); >> + spin_lock(&fp->fi_lock); >> + if (!list_empty(&fp->fi_delegations) && >> + !nfs4_delegation_exists(clp, fp)) { >> + /* conflict, recall deleg */ >> + status = nfserrno(nfsd_open_break_lease(fp->fi_inode, >> + NFSD_MAY_READ)); >> + if (status) >> + goto out; >> + if (!nfsd_wait_for_delegreturn(rqstp, fp->fi_inode)) >> + status = nfserr_jukebox; >> + } >> +out: >> + spin_unlock(&fp->fi_lock); >> + spin_unlock(&state_lock); >> + return status; >> +} >> + >> + >> /** >> * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict >> * @rqstp: RPC transaction context >> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c >> index c7bfd2180e3f..f0fe526fac3c 100644 >> --- a/fs/nfsd/nfs4xdr.c >> +++ b/fs/nfsd/nfs4xdr.c >> @@ -4418,6 +4418,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, >> unsigned long maxcount; >> struct file *file; >> __be32 *p; >> + fmode_t o_fmode = 0; >> >> if (nfserr) >> return nfserr; >> @@ -4437,10 +4438,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, >> maxcount = min_t(unsigned long, read->rd_length, >> (xdr->buf->buflen - xdr->buf->len)); >> >> + if (read->rd_wd_stid) { >> + /* allow READ using write delegation stateid */ >> + o_fmode = file->f_mode; >> + file->f_mode |= FMODE_READ; >> + } > nfsd4_encode_read_plus() needs to handle write delegation stateids > as well. > > I'm not too sure about modifying the f_mode on an nfsd_file you > just got from a cache of shared nfsd_files. > > I think I'd prefer if preprocess_stateid returned an nfsd_file that > was already open for read. There would not be any nfsd_file that was already open for read since the file still has a write delegation on it, unless the open for read was from the same client. I'm also not sure if a client would send an open for read to the server when it already owned the write delegation of the file. -Dai > IIUC that would mean that no changes > would be needed here or in nfsd4_encode_read_plus(). > > Would that be difficult? > > >> if (file->f_op->splice_read && splice_ok) >> nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); >> else >> nfserr = nfsd4_encode_readv(resp, read, file, maxcount); >> + if (o_fmode) >> + file->f_mode = o_fmode; >> + >> if (nfserr) { >> xdr_truncate_encode(xdr, starting_len); >> return nfserr; >> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h >> index ffc217099d19..c1f13b5877c6 100644 >> --- a/fs/nfsd/state.h >> +++ b/fs/nfsd/state.h >> @@ -780,6 +780,8 @@ static inline bool try_to_expire_client(struct nfs4_client *clp) >> return clp->cl_state == NFSD4_EXPIRABLE; >> } >> >> +extern __be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, >> + struct nfs4_client *clp, struct svc_fh *fhp); >> extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, >> struct inode *inode, bool *file_modified, u64 *size); >> #endif /* NFSD4_STATE_H */ >> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h >> index fbdd42cde1fa..434973a6a8b1 100644 >> --- a/fs/nfsd/xdr4.h >> +++ b/fs/nfsd/xdr4.h >> @@ -426,6 +426,8 @@ struct nfsd4_read { >> struct svc_rqst *rd_rqstp; /* response */ >> struct svc_fh *rd_fhp; /* response */ >> u32 rd_eof; /* response */ >> + >> + struct nfs4_stid *rd_wd_stid; /* internal */ >> }; >> >> struct nfsd4_readdir { >> -- >> 2.43.0 >>
On 7/27/24 9:52 AM, Chuck Lever wrote: > On Sat, Jul 27, 2024 at 09:46:34AM -0700, Dai Ngo wrote: >> On 7/24/24 10:01 AM, Sagi Grimberg wrote: >>> Based on a patch fom Dai Ngo, allow NFSv4 client to use write delegation >>> stateid for READ operation. Per RFC 8881 section 9.1.2. Use of the >>> Stateid and Locking. >>> >>> In addition, for anonymous stateids, check for pending delegations by >>> the filehandle and client_id, and if a conflict found, recall the delegation >>> before allowing the read to take place. >>> >>> Suggested-by: Dai Ngo <dai.ngo@oracle.com> >>> Signed-off-by: Sagi Grimberg <sagi@grimberg.me> >>> --- >>> fs/nfsd/nfs4proc.c | 22 +++++++++++++++++++-- >>> fs/nfsd/nfs4state.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ >>> fs/nfsd/nfs4xdr.c | 9 +++++++++ >>> fs/nfsd/state.h | 2 ++ >>> fs/nfsd/xdr4.h | 2 ++ >>> 5 files changed, 80 insertions(+), 2 deletions(-) >>> >>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c >>> index 7b70309ad8fb..324984ec70c6 100644 >>> --- a/fs/nfsd/nfs4proc.c >>> +++ b/fs/nfsd/nfs4proc.c >>> @@ -979,8 +979,24 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >>> /* check stateid */ >>> status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, >>> &read->rd_stateid, RD_STATE, >>> - &read->rd_nf, NULL); >>> - >>> + &read->rd_nf, &read->rd_wd_stid); >>> + /* >>> + * rd_wd_stid is needed for nfsd4_encode_read to allow write >>> + * delegation stateid used for read. Its refcount is decremented >>> + * by nfsd4_read_release when read is done. >>> + */ >>> + if (!status) { >>> + if (!read->rd_wd_stid) { >>> + /* special stateid? */ >>> + status = nfsd4_deleg_read_conflict(rqstp, cstate->clp, >>> + &cstate->current_fh); >>> + } else if (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || >>> + delegstateid(read->rd_wd_stid)->dl_type != >>> + NFS4_OPEN_DELEGATE_WRITE) { >>> + nfs4_put_stid(read->rd_wd_stid); >>> + read->rd_wd_stid = NULL; >>> + } >>> + } >>> read->rd_rqstp = rqstp; >>> read->rd_fhp = &cstate->current_fh; >>> return status; >>> @@ -990,6 +1006,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >>> static void >>> nfsd4_read_release(union nfsd4_op_u *u) >>> { >>> + if (u->read.rd_wd_stid) >>> + nfs4_put_stid(u->read.rd_wd_stid); >>> if (u->read.rd_nf) >>> nfsd_file_put(u->read.rd_nf); >>> trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, >>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c >>> index dc61a8adfcd4..7e6b9fb31a4c 100644 >>> --- a/fs/nfsd/nfs4state.c >>> +++ b/fs/nfsd/nfs4state.c >>> @@ -8805,6 +8805,53 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, >>> get_stateid(cstate, &u->write.wr_stateid); >>> } >>> +/** >>> + * nfsd4_deleg_read_conflict - Recall if read causes conflict >>> + * @rqstp: RPC transaction context >>> + * @clp: nfs client >>> + * @fhp: nfs file handle >>> + * @inode: file to be checked for a conflict >>> + * @modified: return true if file was modified >>> + * @size: new size of file if modified is true >>> + * >>> + * This function is called when there is a conflict between a write >>> + * delegation and a read that is using a special stateid where the >>> + * we cannot derive the client stateid exsistence. The server >>> + * must recall a conflicting delegation before allowing the read >>> + * to continue. >>> + * >>> + * Returns 0 if there is no conflict; otherwise an nfs_stat >>> + * code is returned. >>> + */ >>> +__be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, >>> + struct nfs4_client *clp, struct svc_fh *fhp) >>> +{ >>> + struct nfs4_file *fp; >>> + __be32 status = 0; >>> + >>> + fp = nfsd4_file_hash_lookup(fhp); >>> + if (!fp) >>> + return nfs_ok; >>> + >>> + spin_lock(&state_lock); >>> + spin_lock(&fp->fi_lock); >>> + if (!list_empty(&fp->fi_delegations) && >>> + !nfs4_delegation_exists(clp, fp)) { >>> + /* conflict, recall deleg */ >> I don't see how we can have a delegation conflict here. If a client >> has a write delegation then there should not be any delegations from >> other clients. > A delegation conflict is possible if the client is using an > anonymous stateid to perform the READ. Then we should not detect any delegation conflict from this function. > > One thing we could perhaps do is add support for the use of > anonymous stateids as a separate patch. This would be a separate issue from allowing write delegation stateid to read. This would be to detect the scenario where a client using special stateid to read while another client has a write delegation on the file. -Dai > Sagi, how necessary is > support for "READ with anonymous stateid" for supporting WR_ONLY > write delegation? > > >>> + status = nfserrno(nfsd_open_break_lease(fp->fi_inode, >>> + NFSD_MAY_READ)); >>> + if (status) >>> + goto out; >>> + if (!nfsd_wait_for_delegreturn(rqstp, fp->fi_inode)) >>> + status = nfserr_jukebox; >>> + } >>> +out: >>> + spin_unlock(&fp->fi_lock); >>> + spin_unlock(&state_lock); >>> + return status; >>> +} >>> + >>> + >>> /** >>> * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict >>> * @rqstp: RPC transaction context >>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c >>> index c7bfd2180e3f..f0fe526fac3c 100644 >>> --- a/fs/nfsd/nfs4xdr.c >>> +++ b/fs/nfsd/nfs4xdr.c >>> @@ -4418,6 +4418,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, >>> unsigned long maxcount; >>> struct file *file; >>> __be32 *p; >>> + fmode_t o_fmode = 0; >>> if (nfserr) >>> return nfserr; >>> @@ -4437,10 +4438,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, >>> maxcount = min_t(unsigned long, read->rd_length, >>> (xdr->buf->buflen - xdr->buf->len)); >>> + if (read->rd_wd_stid) { >>> + /* allow READ using write delegation stateid */ >>> + o_fmode = file->f_mode; >>> + file->f_mode |= FMODE_READ; >>> + } >>> if (file->f_op->splice_read && splice_ok) >>> nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); >>> else >>> nfserr = nfsd4_encode_readv(resp, read, file, maxcount); >>> + if (o_fmode) >>> + file->f_mode = o_fmode; >>> + >>> if (nfserr) { >>> xdr_truncate_encode(xdr, starting_len); >>> return nfserr; >>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h >>> index ffc217099d19..c1f13b5877c6 100644 >>> --- a/fs/nfsd/state.h >>> +++ b/fs/nfsd/state.h >>> @@ -780,6 +780,8 @@ static inline bool try_to_expire_client(struct nfs4_client *clp) >>> return clp->cl_state == NFSD4_EXPIRABLE; >>> } >>> +extern __be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, >>> + struct nfs4_client *clp, struct svc_fh *fhp); >>> extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, >>> struct inode *inode, bool *file_modified, u64 *size); >>> #endif /* NFSD4_STATE_H */ >>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h >>> index fbdd42cde1fa..434973a6a8b1 100644 >>> --- a/fs/nfsd/xdr4.h >>> +++ b/fs/nfsd/xdr4.h >>> @@ -426,6 +426,8 @@ struct nfsd4_read { >>> struct svc_rqst *rd_rqstp; /* response */ >>> struct svc_fh *rd_fhp; /* response */ >>> u32 rd_eof; /* response */ >>> + >>> + struct nfs4_stid *rd_wd_stid; /* internal */ >>> }; >>> struct nfsd4_readdir {
>> nfsd4_encode_read_plus() needs to handle write delegation stateids >> as well. >> >> I'm not too sure about modifying the f_mode on an nfsd_file you >> just got from a cache of shared nfsd_files. >> >> I think I'd prefer if preprocess_stateid returned an nfsd_file that >> was already open for read. > > There would not be any nfsd_file that was already open for read since > the file still has a write delegation on it, unless the open for read > was from the same client. > > I'm also not sure if a client would send an open for read to the server > when it already owned the write delegation of the file. It doesn't have to, IIUC. So this needs to be addressed. One thing that we could do is open the file for RW to begin with if we hand out a write delegation? BTW, the patch is missing the part that actually offers the write delegation, which was sent initially.
On 26/07/2024 17:06, Chuck Lever wrote: > On Wed, Jul 24, 2024 at 10:01:38AM -0700, Sagi Grimberg wrote: >> Based on a patch fom Dai Ngo, allow NFSv4 client to use write delegation >> stateid for READ operation. Per RFC 8881 section 9.1.2. Use of the >> Stateid and Locking. >> >> In addition, for anonymous stateids, check for pending delegations by >> the filehandle and client_id, and if a conflict found, recall the delegation >> before allowing the read to take place. >> >> Suggested-by: Dai Ngo <dai.ngo@oracle.com> >> Signed-off-by: Sagi Grimberg <sagi@grimberg.me> >> --- >> fs/nfsd/nfs4proc.c | 22 +++++++++++++++++++-- >> fs/nfsd/nfs4state.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ >> fs/nfsd/nfs4xdr.c | 9 +++++++++ >> fs/nfsd/state.h | 2 ++ >> fs/nfsd/xdr4.h | 2 ++ >> 5 files changed, 80 insertions(+), 2 deletions(-) >> >> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c >> index 7b70309ad8fb..324984ec70c6 100644 >> --- a/fs/nfsd/nfs4proc.c >> +++ b/fs/nfsd/nfs4proc.c >> @@ -979,8 +979,24 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> /* check stateid */ >> status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, >> &read->rd_stateid, RD_STATE, >> - &read->rd_nf, NULL); >> - >> + &read->rd_nf, &read->rd_wd_stid); >> + /* >> + * rd_wd_stid is needed for nfsd4_encode_read to allow write >> + * delegation stateid used for read. Its refcount is decremented >> + * by nfsd4_read_release when read is done. >> + */ >> + if (!status) { >> + if (!read->rd_wd_stid) { >> + /* special stateid? */ >> + status = nfsd4_deleg_read_conflict(rqstp, cstate->clp, >> + &cstate->current_fh); >> + } else if (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || >> + delegstateid(read->rd_wd_stid)->dl_type != >> + NFS4_OPEN_DELEGATE_WRITE) { >> + nfs4_put_stid(read->rd_wd_stid); >> + read->rd_wd_stid = NULL; >> + } >> + } >> read->rd_rqstp = rqstp; >> read->rd_fhp = &cstate->current_fh; >> return status; >> @@ -990,6 +1006,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> static void >> nfsd4_read_release(union nfsd4_op_u *u) >> { >> + if (u->read.rd_wd_stid) >> + nfs4_put_stid(u->read.rd_wd_stid); >> if (u->read.rd_nf) >> nfsd_file_put(u->read.rd_nf); >> trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, >> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c >> index dc61a8adfcd4..7e6b9fb31a4c 100644 >> --- a/fs/nfsd/nfs4state.c >> +++ b/fs/nfsd/nfs4state.c >> @@ -8805,6 +8805,53 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, >> get_stateid(cstate, &u->write.wr_stateid); >> } >> >> +/** >> + * nfsd4_deleg_read_conflict - Recall if read causes conflict >> + * @rqstp: RPC transaction context >> + * @clp: nfs client >> + * @fhp: nfs file handle >> + * @inode: file to be checked for a conflict >> + * @modified: return true if file was modified >> + * @size: new size of file if modified is true >> + * >> + * This function is called when there is a conflict between a write >> + * delegation and a read that is using a special stateid where the >> + * we cannot derive the client stateid exsistence. The server >> + * must recall a conflicting delegation before allowing the read >> + * to continue. >> + * >> + * Returns 0 if there is no conflict; otherwise an nfs_stat >> + * code is returned. >> + */ >> +__be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, >> + struct nfs4_client *clp, struct svc_fh *fhp) >> +{ >> + struct nfs4_file *fp; >> + __be32 status = 0; >> + >> + fp = nfsd4_file_hash_lookup(fhp); >> + if (!fp) >> + return nfs_ok; >> + >> + spin_lock(&state_lock); >> + spin_lock(&fp->fi_lock); >> + if (!list_empty(&fp->fi_delegations) && >> + !nfs4_delegation_exists(clp, fp)) { >> + /* conflict, recall deleg */ >> + status = nfserrno(nfsd_open_break_lease(fp->fi_inode, >> + NFSD_MAY_READ)); >> + if (status) >> + goto out; >> + if (!nfsd_wait_for_delegreturn(rqstp, fp->fi_inode)) >> + status = nfserr_jukebox; >> + } >> +out: >> + spin_unlock(&fp->fi_lock); >> + spin_unlock(&state_lock); >> + return status; >> +} >> + >> + >> /** >> * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict >> * @rqstp: RPC transaction context >> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c >> index c7bfd2180e3f..f0fe526fac3c 100644 >> --- a/fs/nfsd/nfs4xdr.c >> +++ b/fs/nfsd/nfs4xdr.c >> @@ -4418,6 +4418,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, >> unsigned long maxcount; >> struct file *file; >> __be32 *p; >> + fmode_t o_fmode = 0; >> >> if (nfserr) >> return nfserr; >> @@ -4437,10 +4438,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, >> maxcount = min_t(unsigned long, read->rd_length, >> (xdr->buf->buflen - xdr->buf->len)); >> >> + if (read->rd_wd_stid) { >> + /* allow READ using write delegation stateid */ >> + o_fmode = file->f_mode; >> + file->f_mode |= FMODE_READ; >> + } > nfsd4_encode_read_plus() needs to handle write delegation stateids > as well. Yes, missed that one. > > I'm not too sure about modifying the f_mode on an nfsd_file you > just got from a cache of shared nfsd_files. If that is a problem, nfsd can open the file with rw access to begin with if a write deleg was given? > > I think I'd prefer if preprocess_stateid returned an nfsd_file that > was already open for read. IIUC that would mean that no changes > would be needed here or in nfsd4_encode_read_plus(). > > Would that be difficult? preprocess_stateif to return a nfsd_file? not sure. But Nai correctly pointed out that the file may have not been opened for read.
>> I don't see how we can have a delegation conflict here. If a client >> has a write delegation then there should not be any delegations from >> other clients. > A delegation conflict is possible if the client is using an > anonymous stateid to perform the READ. > > One thing we could perhaps do is add support for the use of > anonymous stateids as a separate patch. Sagi, how necessary is > support for "READ with anonymous stateid" for supporting WR_ONLY > write delegation? Without this in the patch, there is a pynfs tests that breaks (read with anon stateid). I can separate it if you want, but didn't want to introduce a patch that causes a regression on its own.
>>>> id(cstate, &u->write.wr_stateid); >>>> } >>>> +/** >>>> + * nfsd4_deleg_read_conflict - Recall if read causes conflict >>>> + * @rqstp: RPC transaction context >>>> + * @clp: nfs client >>>> + * @fhp: nfs file handle >>>> + * @inode: file to be checked for a conflict >>>> + * @modified: return true if file was modified >>>> + * @size: new size of file if modified is true >>>> + * >>>> + * This function is called when there is a conflict between a write >>>> + * delegation and a read that is using a special stateid where the >>>> + * we cannot derive the client stateid exsistence. The server >>>> + * must recall a conflicting delegation before allowing the read >>>> + * to continue. >>>> + * >>>> + * Returns 0 if there is no conflict; otherwise an nfs_stat >>>> + * code is returned. >>>> + */ >>>> +__be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, >>>> + struct nfs4_client *clp, struct svc_fh *fhp) >>>> +{ >>>> + struct nfs4_file *fp; >>>> + __be32 status = 0; >>>> + >>>> + fp = nfsd4_file_hash_lookup(fhp); >>>> + if (!fp) >>>> + return nfs_ok; >>>> + >>>> + spin_lock(&state_lock); >>>> + spin_lock(&fp->fi_lock); >>>> + if (!list_empty(&fp->fi_delegations) && >>>> + !nfs4_delegation_exists(clp, fp)) { >>>> + /* conflict, recall deleg */ >>> I don't see how we can have a delegation conflict here. If a client >>> has a write delegation then there should not be any delegations from >>> other clients. >> A delegation conflict is possible if the client is using an >> anonymous stateid to perform the READ. > > Then we should not detect any delegation conflict from this > function. Can you explain why? If the client sent a raed with anon stateid, this function checks the pending delegations (fi_delegations), and not from the same client (!nfs4_delegation_exists), then it should detect a conflict. > >> >> One thing we could perhaps do is add support for the use of >> anonymous stateids as a separate patch. > > This would be a separate issue from allowing write delegation > stateid to read. This would be to detect the scenario where a > client using special stateid to read while another client has > a write delegation on the file. As mentioned, I can separate it, but this would make this patch break a pynfs test.
On Sat, Jul 27, 2024 at 10:26:24PM +0300, Sagi Grimberg wrote: > > > > On 26/07/2024 17:06, Chuck Lever wrote: > > On Wed, Jul 24, 2024 at 10:01:38AM -0700, Sagi Grimberg wrote: > > > Based on a patch fom Dai Ngo, allow NFSv4 client to use write delegation > > > stateid for READ operation. Per RFC 8881 section 9.1.2. Use of the > > > Stateid and Locking. > > > > > > In addition, for anonymous stateids, check for pending delegations by > > > the filehandle and client_id, and if a conflict found, recall the delegation > > > before allowing the read to take place. > > > > > > Suggested-by: Dai Ngo <dai.ngo@oracle.com> > > > Signed-off-by: Sagi Grimberg <sagi@grimberg.me> > > > --- > > > fs/nfsd/nfs4proc.c | 22 +++++++++++++++++++-- > > > fs/nfsd/nfs4state.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ > > > fs/nfsd/nfs4xdr.c | 9 +++++++++ > > > fs/nfsd/state.h | 2 ++ > > > fs/nfsd/xdr4.h | 2 ++ > > > 5 files changed, 80 insertions(+), 2 deletions(-) > > > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > > > index 7b70309ad8fb..324984ec70c6 100644 > > > --- a/fs/nfsd/nfs4proc.c > > > +++ b/fs/nfsd/nfs4proc.c > > > @@ -979,8 +979,24 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > > > /* check stateid */ > > > status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, > > > &read->rd_stateid, RD_STATE, > > > - &read->rd_nf, NULL); > > > - > > > + &read->rd_nf, &read->rd_wd_stid); > > > + /* > > > + * rd_wd_stid is needed for nfsd4_encode_read to allow write > > > + * delegation stateid used for read. Its refcount is decremented > > > + * by nfsd4_read_release when read is done. > > > + */ > > > + if (!status) { > > > + if (!read->rd_wd_stid) { > > > + /* special stateid? */ > > > + status = nfsd4_deleg_read_conflict(rqstp, cstate->clp, > > > + &cstate->current_fh); > > > + } else if (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || > > > + delegstateid(read->rd_wd_stid)->dl_type != > > > + NFS4_OPEN_DELEGATE_WRITE) { > > > + nfs4_put_stid(read->rd_wd_stid); > > > + read->rd_wd_stid = NULL; > > > + } > > > + } > > > read->rd_rqstp = rqstp; > > > read->rd_fhp = &cstate->current_fh; > > > return status; > > > @@ -990,6 +1006,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > > > static void > > > nfsd4_read_release(union nfsd4_op_u *u) > > > { > > > + if (u->read.rd_wd_stid) > > > + nfs4_put_stid(u->read.rd_wd_stid); > > > if (u->read.rd_nf) > > > nfsd_file_put(u->read.rd_nf); > > > trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > > > index dc61a8adfcd4..7e6b9fb31a4c 100644 > > > --- a/fs/nfsd/nfs4state.c > > > +++ b/fs/nfsd/nfs4state.c > > > @@ -8805,6 +8805,53 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, > > > get_stateid(cstate, &u->write.wr_stateid); > > > } > > > +/** > > > + * nfsd4_deleg_read_conflict - Recall if read causes conflict > > > + * @rqstp: RPC transaction context > > > + * @clp: nfs client > > > + * @fhp: nfs file handle > > > + * @inode: file to be checked for a conflict > > > + * @modified: return true if file was modified > > > + * @size: new size of file if modified is true > > > + * > > > + * This function is called when there is a conflict between a write > > > + * delegation and a read that is using a special stateid where the > > > + * we cannot derive the client stateid exsistence. The server > > > + * must recall a conflicting delegation before allowing the read > > > + * to continue. > > > + * > > > + * Returns 0 if there is no conflict; otherwise an nfs_stat > > > + * code is returned. > > > + */ > > > +__be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > > > + struct nfs4_client *clp, struct svc_fh *fhp) > > > +{ > > > + struct nfs4_file *fp; > > > + __be32 status = 0; > > > + > > > + fp = nfsd4_file_hash_lookup(fhp); > > > + if (!fp) > > > + return nfs_ok; > > > + > > > + spin_lock(&state_lock); > > > + spin_lock(&fp->fi_lock); > > > + if (!list_empty(&fp->fi_delegations) && > > > + !nfs4_delegation_exists(clp, fp)) { > > > + /* conflict, recall deleg */ > > > + status = nfserrno(nfsd_open_break_lease(fp->fi_inode, > > > + NFSD_MAY_READ)); > > > + if (status) > > > + goto out; > > > + if (!nfsd_wait_for_delegreturn(rqstp, fp->fi_inode)) > > > + status = nfserr_jukebox; > > > + } > > > +out: > > > + spin_unlock(&fp->fi_lock); > > > + spin_unlock(&state_lock); > > > + return status; > > > +} > > > + > > > + > > > /** > > > * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict > > > * @rqstp: RPC transaction context > > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > > > index c7bfd2180e3f..f0fe526fac3c 100644 > > > --- a/fs/nfsd/nfs4xdr.c > > > +++ b/fs/nfsd/nfs4xdr.c > > > @@ -4418,6 +4418,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > > > unsigned long maxcount; > > > struct file *file; > > > __be32 *p; > > > + fmode_t o_fmode = 0; > > > if (nfserr) > > > return nfserr; > > > @@ -4437,10 +4438,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > > > maxcount = min_t(unsigned long, read->rd_length, > > > (xdr->buf->buflen - xdr->buf->len)); > > > + if (read->rd_wd_stid) { > > > + /* allow READ using write delegation stateid */ > > > + o_fmode = file->f_mode; > > > + file->f_mode |= FMODE_READ; > > > + } > > nfsd4_encode_read_plus() needs to handle write delegation stateids > > as well. > > Yes, missed that one. > > > > > I'm not too sure about modifying the f_mode on an nfsd_file you > > just got from a cache of shared nfsd_files. > > If that is a problem, nfsd can open the file with rw access to begin with > if a write deleg was given? IMO, it's not a question of whether there is a write delegation, but rather what intent the client told the server during the OPEN. The server would need to open the local file for R/W when it gets a O_WRONLY open. Not saying this is the right thing to do, just thinking out loud... > > I think I'd prefer if preprocess_stateid returned an nfsd_file that > > was already open for read. IIUC that would mean that no changes > > would be needed here or in nfsd4_encode_read_plus(). > > > > Would that be difficult? > > preprocess_stateif to return a nfsd_file? not sure. But Nai correctly > pointed out > that the file may have not been opened for read. My impression is that nfsd_file_acquire(), which underpins preprocess_stateid, can open a file itself. I believe that is what happens in the NFSv3 case, for instance. So if the caller requests an nfsd_file that is open for read and there isn't already one on hand, wouldn't preprocess_stateid (and nfsd_file_acquire) just open the file on demand using the given mode and credentials, and return the new nfsd_file? For NFSv4, that would mean the subsequent nfsd_file_put() would close that file immediately rather than cache it. So maybe not as efficient as we could be.
On Sat, Jul 27, 2024 at 03:53:41PM -0400, Chuck Lever wrote: > On Sat, Jul 27, 2024 at 10:26:24PM +0300, Sagi Grimberg wrote: > > > > > > > > On 26/07/2024 17:06, Chuck Lever wrote: > > > On Wed, Jul 24, 2024 at 10:01:38AM -0700, Sagi Grimberg wrote: > > > > Based on a patch fom Dai Ngo, allow NFSv4 client to use write delegation > > > > stateid for READ operation. Per RFC 8881 section 9.1.2. Use of the > > > > Stateid and Locking. > > > > > > > > In addition, for anonymous stateids, check for pending delegations by > > > > the filehandle and client_id, and if a conflict found, recall the delegation > > > > before allowing the read to take place. > > > > > > > > Suggested-by: Dai Ngo <dai.ngo@oracle.com> > > > > Signed-off-by: Sagi Grimberg <sagi@grimberg.me> > > > > --- > > > > fs/nfsd/nfs4proc.c | 22 +++++++++++++++++++-- > > > > fs/nfsd/nfs4state.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ > > > > fs/nfsd/nfs4xdr.c | 9 +++++++++ > > > > fs/nfsd/state.h | 2 ++ > > > > fs/nfsd/xdr4.h | 2 ++ > > > > 5 files changed, 80 insertions(+), 2 deletions(-) > > > > > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > > > > index 7b70309ad8fb..324984ec70c6 100644 > > > > --- a/fs/nfsd/nfs4proc.c > > > > +++ b/fs/nfsd/nfs4proc.c > > > > @@ -979,8 +979,24 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > > > > /* check stateid */ > > > > status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, > > > > &read->rd_stateid, RD_STATE, > > > > - &read->rd_nf, NULL); > > > > - > > > > + &read->rd_nf, &read->rd_wd_stid); > > > > + /* > > > > + * rd_wd_stid is needed for nfsd4_encode_read to allow write > > > > + * delegation stateid used for read. Its refcount is decremented > > > > + * by nfsd4_read_release when read is done. > > > > + */ > > > > + if (!status) { > > > > + if (!read->rd_wd_stid) { > > > > + /* special stateid? */ > > > > + status = nfsd4_deleg_read_conflict(rqstp, cstate->clp, > > > > + &cstate->current_fh); > > > > + } else if (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || > > > > + delegstateid(read->rd_wd_stid)->dl_type != > > > > + NFS4_OPEN_DELEGATE_WRITE) { > > > > + nfs4_put_stid(read->rd_wd_stid); > > > > + read->rd_wd_stid = NULL; > > > > + } > > > > + } > > > > read->rd_rqstp = rqstp; > > > > read->rd_fhp = &cstate->current_fh; > > > > return status; > > > > @@ -990,6 +1006,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > > > > static void > > > > nfsd4_read_release(union nfsd4_op_u *u) > > > > { > > > > + if (u->read.rd_wd_stid) > > > > + nfs4_put_stid(u->read.rd_wd_stid); > > > > if (u->read.rd_nf) > > > > nfsd_file_put(u->read.rd_nf); > > > > trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, > > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > > > > index dc61a8adfcd4..7e6b9fb31a4c 100644 > > > > --- a/fs/nfsd/nfs4state.c > > > > +++ b/fs/nfsd/nfs4state.c > > > > @@ -8805,6 +8805,53 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, > > > > get_stateid(cstate, &u->write.wr_stateid); > > > > } > > > > +/** > > > > + * nfsd4_deleg_read_conflict - Recall if read causes conflict > > > > + * @rqstp: RPC transaction context > > > > + * @clp: nfs client > > > > + * @fhp: nfs file handle > > > > + * @inode: file to be checked for a conflict > > > > + * @modified: return true if file was modified > > > > + * @size: new size of file if modified is true > > > > + * > > > > + * This function is called when there is a conflict between a write > > > > + * delegation and a read that is using a special stateid where the > > > > + * we cannot derive the client stateid exsistence. The server > > > > + * must recall a conflicting delegation before allowing the read > > > > + * to continue. > > > > + * > > > > + * Returns 0 if there is no conflict; otherwise an nfs_stat > > > > + * code is returned. > > > > + */ > > > > +__be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > > > > + struct nfs4_client *clp, struct svc_fh *fhp) > > > > +{ > > > > + struct nfs4_file *fp; > > > > + __be32 status = 0; > > > > + > > > > + fp = nfsd4_file_hash_lookup(fhp); > > > > + if (!fp) > > > > + return nfs_ok; > > > > + > > > > + spin_lock(&state_lock); > > > > + spin_lock(&fp->fi_lock); > > > > + if (!list_empty(&fp->fi_delegations) && > > > > + !nfs4_delegation_exists(clp, fp)) { > > > > + /* conflict, recall deleg */ > > > > + status = nfserrno(nfsd_open_break_lease(fp->fi_inode, > > > > + NFSD_MAY_READ)); > > > > + if (status) > > > > + goto out; > > > > + if (!nfsd_wait_for_delegreturn(rqstp, fp->fi_inode)) > > > > + status = nfserr_jukebox; > > > > + } > > > > +out: > > > > + spin_unlock(&fp->fi_lock); > > > > + spin_unlock(&state_lock); > > > > + return status; > > > > +} > > > > + > > > > + > > > > /** > > > > * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict > > > > * @rqstp: RPC transaction context > > > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > > > > index c7bfd2180e3f..f0fe526fac3c 100644 > > > > --- a/fs/nfsd/nfs4xdr.c > > > > +++ b/fs/nfsd/nfs4xdr.c > > > > @@ -4418,6 +4418,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > > > > unsigned long maxcount; > > > > struct file *file; > > > > __be32 *p; > > > > + fmode_t o_fmode = 0; > > > > if (nfserr) > > > > return nfserr; > > > > @@ -4437,10 +4438,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > > > > maxcount = min_t(unsigned long, read->rd_length, > > > > (xdr->buf->buflen - xdr->buf->len)); > > > > + if (read->rd_wd_stid) { > > > > + /* allow READ using write delegation stateid */ > > > > + o_fmode = file->f_mode; > > > > + file->f_mode |= FMODE_READ; > > > > + } > > > nfsd4_encode_read_plus() needs to handle write delegation stateids > > > as well. > > > > Yes, missed that one. > > > > > > > > I'm not too sure about modifying the f_mode on an nfsd_file you > > > just got from a cache of shared nfsd_files. > > > > If that is a problem, nfsd can open the file with rw access to begin with > > if a write deleg was given? > > IMO, it's not a question of whether there is a write delegation, but > rather what intent the client told the server during the OPEN. The > server would need to open the local file for R/W when it gets a > O_WRONLY open. Not saying this is the right thing to do, just > thinking out loud... OK, thinking about this again... The nfsd_file that is associated with a write delegation is probably going to need to be a local O_RDWR open, if a write delegation state ID can be used for READ or WRITE.
On 7/27/24 12:33 PM, Sagi Grimberg wrote: >>>>> id(cstate, &u->write.wr_stateid); >>>>> } >>>>> +/** >>>>> + * nfsd4_deleg_read_conflict - Recall if read causes conflict >>>>> + * @rqstp: RPC transaction context >>>>> + * @clp: nfs client >>>>> + * @fhp: nfs file handle >>>>> + * @inode: file to be checked for a conflict >>>>> + * @modified: return true if file was modified >>>>> + * @size: new size of file if modified is true >>>>> + * >>>>> + * This function is called when there is a conflict between a write >>>>> + * delegation and a read that is using a special stateid where the >>>>> + * we cannot derive the client stateid exsistence. The server >>>>> + * must recall a conflicting delegation before allowing the read >>>>> + * to continue. >>>>> + * >>>>> + * Returns 0 if there is no conflict; otherwise an nfs_stat >>>>> + * code is returned. >>>>> + */ >>>>> +__be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, >>>>> + struct nfs4_client *clp, struct svc_fh *fhp) >>>>> +{ >>>>> + struct nfs4_file *fp; >>>>> + __be32 status = 0; >>>>> + >>>>> + fp = nfsd4_file_hash_lookup(fhp); >>>>> + if (!fp) >>>>> + return nfs_ok; >>>>> + >>>>> + spin_lock(&state_lock); >>>>> + spin_lock(&fp->fi_lock); >>>>> + if (!list_empty(&fp->fi_delegations) && >>>>> + !nfs4_delegation_exists(clp, fp)) { >>>>> + /* conflict, recall deleg */ >>>> I don't see how we can have a delegation conflict here. If a client >>>> has a write delegation then there should not be any delegations from >>>> other clients. >>> A delegation conflict is possible if the client is using an >>> anonymous stateid to perform the READ. >> >> Then we should not detect any delegation conflict from this >> function. > > Can you explain why? It was my mistake. I missed the part that the nfsd4_deleg_read_conflict was called only from the context of the client sending the read with the special stateid. It's probably clearer to separate this patch into 2 patches: one to allow a write delegation (opened with wr-only) to read the file and the other is to detect write delegation conflict, regardless of whether the delegation was from an open with rw or wr-only, when a special stateid is used to do the read. -Dai > > If the client sent a raed with anon stateid, this function checks the > pending > delegations (fi_delegations), and not from the same client > (!nfs4_delegation_exists), > then it should detect a conflict. > >> >>> >>> One thing we could perhaps do is add support for the use of >>> anonymous stateids as a separate patch. >> >> This would be a separate issue from allowing write delegation >> stateid to read. This would be to detect the scenario where a >> client using special stateid to read while another client has >> a write delegation on the file. > > As mentioned, I can separate it, but this would make this patch > break a pynfs test.
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 7b70309ad8fb..324984ec70c6 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -979,8 +979,24 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* check stateid */ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &read->rd_stateid, RD_STATE, - &read->rd_nf, NULL); - + &read->rd_nf, &read->rd_wd_stid); + /* + * rd_wd_stid is needed for nfsd4_encode_read to allow write + * delegation stateid used for read. Its refcount is decremented + * by nfsd4_read_release when read is done. + */ + if (!status) { + if (!read->rd_wd_stid) { + /* special stateid? */ + status = nfsd4_deleg_read_conflict(rqstp, cstate->clp, + &cstate->current_fh); + } else if (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || + delegstateid(read->rd_wd_stid)->dl_type != + NFS4_OPEN_DELEGATE_WRITE) { + nfs4_put_stid(read->rd_wd_stid); + read->rd_wd_stid = NULL; + } + } read->rd_rqstp = rqstp; read->rd_fhp = &cstate->current_fh; return status; @@ -990,6 +1006,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, static void nfsd4_read_release(union nfsd4_op_u *u) { + if (u->read.rd_wd_stid) + nfs4_put_stid(u->read.rd_wd_stid); if (u->read.rd_nf) nfsd_file_put(u->read.rd_nf); trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index dc61a8adfcd4..7e6b9fb31a4c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -8805,6 +8805,53 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, get_stateid(cstate, &u->write.wr_stateid); } +/** + * nfsd4_deleg_read_conflict - Recall if read causes conflict + * @rqstp: RPC transaction context + * @clp: nfs client + * @fhp: nfs file handle + * @inode: file to be checked for a conflict + * @modified: return true if file was modified + * @size: new size of file if modified is true + * + * This function is called when there is a conflict between a write + * delegation and a read that is using a special stateid where the + * we cannot derive the client stateid exsistence. The server + * must recall a conflicting delegation before allowing the read + * to continue. + * + * Returns 0 if there is no conflict; otherwise an nfs_stat + * code is returned. + */ +__be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, + struct nfs4_client *clp, struct svc_fh *fhp) +{ + struct nfs4_file *fp; + __be32 status = 0; + + fp = nfsd4_file_hash_lookup(fhp); + if (!fp) + return nfs_ok; + + spin_lock(&state_lock); + spin_lock(&fp->fi_lock); + if (!list_empty(&fp->fi_delegations) && + !nfs4_delegation_exists(clp, fp)) { + /* conflict, recall deleg */ + status = nfserrno(nfsd_open_break_lease(fp->fi_inode, + NFSD_MAY_READ)); + if (status) + goto out; + if (!nfsd_wait_for_delegreturn(rqstp, fp->fi_inode)) + status = nfserr_jukebox; + } +out: + spin_unlock(&fp->fi_lock); + spin_unlock(&state_lock); + return status; +} + + /** * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict * @rqstp: RPC transaction context diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c7bfd2180e3f..f0fe526fac3c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4418,6 +4418,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, unsigned long maxcount; struct file *file; __be32 *p; + fmode_t o_fmode = 0; if (nfserr) return nfserr; @@ -4437,10 +4438,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, maxcount = min_t(unsigned long, read->rd_length, (xdr->buf->buflen - xdr->buf->len)); + if (read->rd_wd_stid) { + /* allow READ using write delegation stateid */ + o_fmode = file->f_mode; + file->f_mode |= FMODE_READ; + } if (file->f_op->splice_read && splice_ok) nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); else nfserr = nfsd4_encode_readv(resp, read, file, maxcount); + if (o_fmode) + file->f_mode = o_fmode; + if (nfserr) { xdr_truncate_encode(xdr, starting_len); return nfserr; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index ffc217099d19..c1f13b5877c6 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -780,6 +780,8 @@ static inline bool try_to_expire_client(struct nfs4_client *clp) return clp->cl_state == NFSD4_EXPIRABLE; } +extern __be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, + struct nfs4_client *clp, struct svc_fh *fhp); extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode, bool *file_modified, u64 *size); #endif /* NFSD4_STATE_H */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index fbdd42cde1fa..434973a6a8b1 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -426,6 +426,8 @@ struct nfsd4_read { struct svc_rqst *rd_rqstp; /* response */ struct svc_fh *rd_fhp; /* response */ u32 rd_eof; /* response */ + + struct nfs4_stid *rd_wd_stid; /* internal */ }; struct nfsd4_readdir {
Based on a patch fom Dai Ngo, allow NFSv4 client to use write delegation stateid for READ operation. Per RFC 8881 section 9.1.2. Use of the Stateid and Locking. In addition, for anonymous stateids, check for pending delegations by the filehandle and client_id, and if a conflict found, recall the delegation before allowing the read to take place. Suggested-by: Dai Ngo <dai.ngo@oracle.com> Signed-off-by: Sagi Grimberg <sagi@grimberg.me> --- fs/nfsd/nfs4proc.c | 22 +++++++++++++++++++-- fs/nfsd/nfs4state.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/nfs4xdr.c | 9 +++++++++ fs/nfsd/state.h | 2 ++ fs/nfsd/xdr4.h | 2 ++ 5 files changed, 80 insertions(+), 2 deletions(-)