Message ID | 1688089960-24568-3-git-send-email-dai.ngo@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | NFSD: add support for NFSv4.1+ write delegation | expand |
On Thu, 2023-06-29 at 18:52 -0700, Dai Ngo wrote: > Allow NFSv4 client to use write delegation stateid for READ operation. > Per RFC 8881 section 9.1.2. Use of the Stateid and Locking. > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com> > --- > fs/nfsd/nfs4proc.c | 16 ++++++++++++++-- > fs/nfsd/nfs4xdr.c | 9 +++++++++ > fs/nfsd/xdr4.h | 2 ++ > 3 files changed, 25 insertions(+), 2 deletions(-) > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index 5ae670807449..3fa66cb38780 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -942,8 +942,18 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > /* check stateid */ > status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, > &read->rd_stateid, RD_STATE, > - &read->rd_nf, NULL); > - > + &read->rd_nf, &read->rd_wd_stid); I think this patch is causing breakage with pynfs. WRT3 sends a READ operation with the zero-stateid. On earlier kernels, this works, but a linux-next kernel rejects this with BAD_STATEID. nfs4_preprocess_stateid_op seems to assume that if cstid is set, then it's a copy operation and anonymous stateids aren't allowed. Maybe that test should be something besides checking cstid == NULL? > + /* > + * rd_wd_stid is needed for nfsd4_encode_read to allow write > + * delegation stateid used for read. Its refcount is decremented > + * by nfsd4_read_release when read is done. > + */ > + if (!status && (read->rd_wd_stid->sc_type != NFS4_DELEG_STID || > + delegstateid(read->rd_wd_stid)->dl_type != > + NFS4_OPEN_DELEGATE_WRITE)) { > + nfs4_put_stid(read->rd_wd_stid); > + read->rd_wd_stid = NULL; > + } > read->rd_rqstp = rqstp; > read->rd_fhp = &cstate->current_fh; > return status; > @@ -953,6 +963,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > static void > nfsd4_read_release(union nfsd4_op_u *u) > { > + if (u->read.rd_wd_stid) > + nfs4_put_stid(u->read.rd_wd_stid); > if (u->read.rd_nf) > nfsd_file_put(u->read.rd_nf); > trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index 76db2fe29624..e0640b31d041 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -4120,6 +4120,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > struct file *file; > int starting_len = xdr->buf->len; > __be32 *p; > + fmode_t o_fmode = 0; > > if (nfserr) > return nfserr; > @@ -4139,10 +4140,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > maxcount = min_t(unsigned long, read->rd_length, > (xdr->buf->buflen - xdr->buf->len)); > > + if (read->rd_wd_stid) { > + /* allow READ using write delegation stateid */ > + o_fmode = file->f_mode; > + file->f_mode |= FMODE_READ; > + } > if (file->f_op->splice_read && splice_ok) > nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); > else > nfserr = nfsd4_encode_readv(resp, read, file, maxcount); > + if (o_fmode) > + file->f_mode = o_fmode; > + > if (nfserr) { > xdr_truncate_encode(xdr, starting_len); > return nfserr; > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > index 510978e602da..3ccc40f9274a 100644 > --- a/fs/nfsd/xdr4.h > +++ b/fs/nfsd/xdr4.h > @@ -307,6 +307,8 @@ struct nfsd4_read { > struct svc_rqst *rd_rqstp; /* response */ > struct svc_fh *rd_fhp; /* response */ > u32 rd_eof; /* response */ > + > + struct nfs4_stid *rd_wd_stid; /* internal */ > }; > > struct nfsd4_readdir {
Hi Jeff, I have not looked at this carefully, but since now we only grant write delegation for OPEN with both read and write access then perhaps this patch is no longer needed? -Dai On 8/11/23 10:22 AM, Jeff Layton wrote: > On Thu, 2023-06-29 at 18:52 -0700, Dai Ngo wrote: >> Allow NFSv4 client to use write delegation stateid for READ operation. >> Per RFC 8881 section 9.1.2. Use of the Stateid and Locking. >> >> Signed-off-by: Dai Ngo <dai.ngo@oracle.com> >> --- >> fs/nfsd/nfs4proc.c | 16 ++++++++++++++-- >> fs/nfsd/nfs4xdr.c | 9 +++++++++ >> fs/nfsd/xdr4.h | 2 ++ >> 3 files changed, 25 insertions(+), 2 deletions(-) >> >> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c >> index 5ae670807449..3fa66cb38780 100644 >> --- a/fs/nfsd/nfs4proc.c >> +++ b/fs/nfsd/nfs4proc.c >> @@ -942,8 +942,18 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> /* check stateid */ >> status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, >> &read->rd_stateid, RD_STATE, >> - &read->rd_nf, NULL); >> - >> + &read->rd_nf, &read->rd_wd_stid); > I think this patch is causing breakage with pynfs. WRT3 sends a READ > operation with the zero-stateid. On earlier kernels, this works, but a > linux-next kernel rejects this with BAD_STATEID. > > nfs4_preprocess_stateid_op seems to assume that if cstid is set, then > it's a copy operation and anonymous stateids aren't allowed. Maybe that > test should be something besides checking cstid == NULL? > >> + /* >> + * rd_wd_stid is needed for nfsd4_encode_read to allow write >> + * delegation stateid used for read. Its refcount is decremented >> + * by nfsd4_read_release when read is done. >> + */ >> + if (!status && (read->rd_wd_stid->sc_type != NFS4_DELEG_STID || >> + delegstateid(read->rd_wd_stid)->dl_type != >> + NFS4_OPEN_DELEGATE_WRITE)) { >> + nfs4_put_stid(read->rd_wd_stid); >> + read->rd_wd_stid = NULL; >> + } >> read->rd_rqstp = rqstp; >> read->rd_fhp = &cstate->current_fh; >> return status; >> @@ -953,6 +963,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> static void >> nfsd4_read_release(union nfsd4_op_u *u) >> { >> + if (u->read.rd_wd_stid) >> + nfs4_put_stid(u->read.rd_wd_stid); >> if (u->read.rd_nf) >> nfsd_file_put(u->read.rd_nf); >> trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, >> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c >> index 76db2fe29624..e0640b31d041 100644 >> --- a/fs/nfsd/nfs4xdr.c >> +++ b/fs/nfsd/nfs4xdr.c >> @@ -4120,6 +4120,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, >> struct file *file; >> int starting_len = xdr->buf->len; >> __be32 *p; >> + fmode_t o_fmode = 0; >> >> if (nfserr) >> return nfserr; >> @@ -4139,10 +4140,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, >> maxcount = min_t(unsigned long, read->rd_length, >> (xdr->buf->buflen - xdr->buf->len)); >> >> + if (read->rd_wd_stid) { >> + /* allow READ using write delegation stateid */ >> + o_fmode = file->f_mode; >> + file->f_mode |= FMODE_READ; >> + } >> if (file->f_op->splice_read && splice_ok) >> nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); >> else >> nfserr = nfsd4_encode_readv(resp, read, file, maxcount); >> + if (o_fmode) >> + file->f_mode = o_fmode; >> + >> if (nfserr) { >> xdr_truncate_encode(xdr, starting_len); >> return nfserr; >> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h >> index 510978e602da..3ccc40f9274a 100644 >> --- a/fs/nfsd/xdr4.h >> +++ b/fs/nfsd/xdr4.h >> @@ -307,6 +307,8 @@ struct nfsd4_read { >> struct svc_rqst *rd_rqstp; /* response */ >> struct svc_fh *rd_fhp; /* response */ >> u32 rd_eof; /* response */ >> + >> + struct nfs4_stid *rd_wd_stid; /* internal */ >> }; >> >> struct nfsd4_readdir {
Yep. Reverting that patch seemed to fix the problem. Chuck, mind just dropping this patch from nfsd-next? Thanks, Jeff On Fri, 2023-08-11 at 10:47 -0700, dai.ngo@oracle.com wrote: > Hi Jeff, > > I have not looked at this carefully, but since now we only grant > write delegation for OPEN with both read and write access then > perhaps this patch is no longer needed? > > -Dai > > On 8/11/23 10:22 AM, Jeff Layton wrote: > > On Thu, 2023-06-29 at 18:52 -0700, Dai Ngo wrote: > > > Allow NFSv4 client to use write delegation stateid for READ operation. > > > Per RFC 8881 section 9.1.2. Use of the Stateid and Locking. > > > > > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com> > > > --- > > > fs/nfsd/nfs4proc.c | 16 ++++++++++++++-- > > > fs/nfsd/nfs4xdr.c | 9 +++++++++ > > > fs/nfsd/xdr4.h | 2 ++ > > > 3 files changed, 25 insertions(+), 2 deletions(-) > > > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > > > index 5ae670807449..3fa66cb38780 100644 > > > --- a/fs/nfsd/nfs4proc.c > > > +++ b/fs/nfsd/nfs4proc.c > > > @@ -942,8 +942,18 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > > > /* check stateid */ > > > status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, > > > &read->rd_stateid, RD_STATE, > > > - &read->rd_nf, NULL); > > > - > > > + &read->rd_nf, &read->rd_wd_stid); > > I think this patch is causing breakage with pynfs. WRT3 sends a READ > > operation with the zero-stateid. On earlier kernels, this works, but a > > linux-next kernel rejects this with BAD_STATEID. > > > > nfs4_preprocess_stateid_op seems to assume that if cstid is set, then > > it's a copy operation and anonymous stateids aren't allowed. Maybe that > > test should be something besides checking cstid == NULL? > > > > > + /* > > > + * rd_wd_stid is needed for nfsd4_encode_read to allow write > > > + * delegation stateid used for read. Its refcount is decremented > > > + * by nfsd4_read_release when read is done. > > > + */ > > > + if (!status && (read->rd_wd_stid->sc_type != NFS4_DELEG_STID || > > > + delegstateid(read->rd_wd_stid)->dl_type != > > > + NFS4_OPEN_DELEGATE_WRITE)) { > > > + nfs4_put_stid(read->rd_wd_stid); > > > + read->rd_wd_stid = NULL; > > > + } > > > read->rd_rqstp = rqstp; > > > read->rd_fhp = &cstate->current_fh; > > > return status; > > > @@ -953,6 +963,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > > > static void > > > nfsd4_read_release(union nfsd4_op_u *u) > > > { > > > + if (u->read.rd_wd_stid) > > > + nfs4_put_stid(u->read.rd_wd_stid); > > > if (u->read.rd_nf) > > > nfsd_file_put(u->read.rd_nf); > > > trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, > > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > > > index 76db2fe29624..e0640b31d041 100644 > > > --- a/fs/nfsd/nfs4xdr.c > > > +++ b/fs/nfsd/nfs4xdr.c > > > @@ -4120,6 +4120,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > > > struct file *file; > > > int starting_len = xdr->buf->len; > > > __be32 *p; > > > + fmode_t o_fmode = 0; > > > > > > if (nfserr) > > > return nfserr; > > > @@ -4139,10 +4140,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > > > maxcount = min_t(unsigned long, read->rd_length, > > > (xdr->buf->buflen - xdr->buf->len)); > > > > > > + if (read->rd_wd_stid) { > > > + /* allow READ using write delegation stateid */ > > > + o_fmode = file->f_mode; > > > + file->f_mode |= FMODE_READ; > > > + } > > > if (file->f_op->splice_read && splice_ok) > > > nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); > > > else > > > nfserr = nfsd4_encode_readv(resp, read, file, maxcount); > > > + if (o_fmode) > > > + file->f_mode = o_fmode; > > > + > > > if (nfserr) { > > > xdr_truncate_encode(xdr, starting_len); > > > return nfserr; > > > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > > > index 510978e602da..3ccc40f9274a 100644 > > > --- a/fs/nfsd/xdr4.h > > > +++ b/fs/nfsd/xdr4.h > > > @@ -307,6 +307,8 @@ struct nfsd4_read { > > > struct svc_rqst *rd_rqstp; /* response */ > > > struct svc_fh *rd_fhp; /* response */ > > > u32 rd_eof; /* response */ > > > + > > > + struct nfs4_stid *rd_wd_stid; /* internal */ > > > }; > > > > > > struct nfsd4_readdir {
> On Aug 11, 2023, at 3:06 PM, Jeff Layton <jlayton@kernel.org> wrote: > > Yep. Reverting that patch seemed to fix the problem. Chuck, mind just > dropping this patch from nfsd-next? I can, but let's make sure that doesn't break anything else first. Send me any test results, and I'll run some tests here too. -- Chuck Lever
On Fri, 2023-08-11 at 19:08 +0000, Chuck Lever III wrote: > > > On Aug 11, 2023, at 3:06 PM, Jeff Layton <jlayton@kernel.org> wrote: > > > > Yep. Reverting that patch seemed to fix the problem. Chuck, mind just > > dropping this patch from nfsd-next? > > I can, but let's make sure that doesn't break anything else first. > Send me any test results, and I'll run some tests here too. > > Sure, results attached from several runs. These are running "all" tests in pynfs for both v4.0 and v4.1. The kernels are: 6.5.0-rc5-00050-gb1e667caad15: your nfsd-next branch as of today 6.5.0-rc5+: gb1e667caad15, but with this patch reverted In summary: [jlayton@tleilax pynfs-results]$ grep '"failures":' * 6.4.0-v4.0.json: "failures": 0, 6.4.0-v4.1.json: "failures": 0, 6.5.0-rc5-00050-gb1e667caad15-v4.0.json: "failures": 17, 6.5.0-rc5-00050-gb1e667caad15-v4.1.json: "failures": 1, 6.5.0-rc5-v4.0.json: "failures": 3, 6.5.0-rc5+-v4.0.json: "failures": 0, 6.5.0-rc5-v4.1.json: "failures": 0, 6.5.0-rc5+-v4.1.json: "failures": 0, So, reverting this patch makes things look good in nfsd-next. The more worrisome problem is that 6.5.0-rc5-v4.0.json shows 3 regressions in current mainline. It looks like some sort of data corruption at first glance. I'm still looking at that one.
> On Aug 11, 2023, at 4:33 PM, Jeff Layton <jlayton@kernel.org> wrote: > > On Fri, 2023-08-11 at 19:08 +0000, Chuck Lever III wrote: >> >>> On Aug 11, 2023, at 3:06 PM, Jeff Layton <jlayton@kernel.org> wrote: >>> >>> Yep. Reverting that patch seemed to fix the problem. Chuck, mind just >>> dropping this patch from nfsd-next? >> >> I can, but let's make sure that doesn't break anything else first. >> Send me any test results, and I'll run some tests here too. > > Sure, results attached from several runs. These are running "all" tests > in pynfs for both v4.0 and v4.1. The kernels are: > > 6.5.0-rc5-00050-gb1e667caad15: your nfsd-next branch as of today > 6.5.0-rc5+: gb1e667caad15, but with this patch reverted I haven't found any problems either, so I've dropped the patch and refreshed nfsd-next and topic-sunrpc-thread-scheduling. -- Chuck Lever
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5ae670807449..3fa66cb38780 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -942,8 +942,18 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* check stateid */ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &read->rd_stateid, RD_STATE, - &read->rd_nf, NULL); - + &read->rd_nf, &read->rd_wd_stid); + /* + * rd_wd_stid is needed for nfsd4_encode_read to allow write + * delegation stateid used for read. Its refcount is decremented + * by nfsd4_read_release when read is done. + */ + if (!status && (read->rd_wd_stid->sc_type != NFS4_DELEG_STID || + delegstateid(read->rd_wd_stid)->dl_type != + NFS4_OPEN_DELEGATE_WRITE)) { + nfs4_put_stid(read->rd_wd_stid); + read->rd_wd_stid = NULL; + } read->rd_rqstp = rqstp; read->rd_fhp = &cstate->current_fh; return status; @@ -953,6 +963,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, static void nfsd4_read_release(union nfsd4_op_u *u) { + if (u->read.rd_wd_stid) + nfs4_put_stid(u->read.rd_wd_stid); if (u->read.rd_nf) nfsd_file_put(u->read.rd_nf); trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 76db2fe29624..e0640b31d041 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4120,6 +4120,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, struct file *file; int starting_len = xdr->buf->len; __be32 *p; + fmode_t o_fmode = 0; if (nfserr) return nfserr; @@ -4139,10 +4140,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, maxcount = min_t(unsigned long, read->rd_length, (xdr->buf->buflen - xdr->buf->len)); + if (read->rd_wd_stid) { + /* allow READ using write delegation stateid */ + o_fmode = file->f_mode; + file->f_mode |= FMODE_READ; + } if (file->f_op->splice_read && splice_ok) nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); else nfserr = nfsd4_encode_readv(resp, read, file, maxcount); + if (o_fmode) + file->f_mode = o_fmode; + if (nfserr) { xdr_truncate_encode(xdr, starting_len); return nfserr; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 510978e602da..3ccc40f9274a 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -307,6 +307,8 @@ struct nfsd4_read { struct svc_rqst *rd_rqstp; /* response */ struct svc_fh *rd_fhp; /* response */ u32 rd_eof; /* response */ + + struct nfs4_stid *rd_wd_stid; /* internal */ }; struct nfsd4_readdir {
Allow NFSv4 client to use write delegation stateid for READ operation. Per RFC 8881 section 9.1.2. Use of the Stateid and Locking. Signed-off-by: Dai Ngo <dai.ngo@oracle.com> --- fs/nfsd/nfs4proc.c | 16 ++++++++++++++-- fs/nfsd/nfs4xdr.c | 9 +++++++++ fs/nfsd/xdr4.h | 2 ++ 3 files changed, 25 insertions(+), 2 deletions(-)