Message ID | 20200908162559.509113-3-Anna.Schumaker@Netapp.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | NFSD: Add support for the v4.2 READ_PLUS operation | expand |
On Tue, Sep 08, 2020 at 12:25:56PM -0400, schumaker.anna@gmail.com wrote: > From: Anna Schumaker <Anna.Schumaker@Netapp.com> > > This patch adds READ_PLUS support for returning a single > NFS4_CONTENT_DATA segment to the client. This is basically the same as > the READ operation, only with the extra information about data segments. > > Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com> > > --- > v5: Fix up nfsd4_read_plus_rsize() calculation > --- > fs/nfsd/nfs4proc.c | 20 +++++++++++ > fs/nfsd/nfs4xdr.c | 83 ++++++++++++++++++++++++++++++++++++++++++++-- > 2 files changed, 101 insertions(+), 2 deletions(-) > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index eaf50eafa935..0a3df5f10501 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -2591,6 +2591,19 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) > return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); > } > > +static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) > +{ > + u32 maxcount = svc_max_payload(rqstp); > + u32 rlen = min(op->u.read.rd_length, maxcount); > + /* > + * Worst case is a single large data segment, so make > + * sure we have enough room to encode that > + */ After the last patch we allow an unlimited number of segments. So a zillion 1-byte segments is also possible, and is a worse case. Possible ways to avoid that kind of thing: - when encoding, stop and return a short read if the xdr-encoded result would exceed the limit calculated here. - round SEEK_HOLE results up to the nearest (say) 512 bytes, and SEEK_DATA result down to the nearest 512 bytes. May also need some logic to avoid encoding 0-length segments. - talk to linux-fsdevel, see if we can get a minimum hole alignment guarantee from filesystems. I'm thinking #1 is simplest. --b. > + u32 seg_len = 1 + 2 + 1; > + > + return (op_encode_hdr_size + 2 + seg_len + XDR_QUADLEN(rlen)) * sizeof(__be32); > +} > + > static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) > { > u32 maxcount = 0, rlen = 0; > @@ -3163,6 +3176,13 @@ static const struct nfsd4_operation nfsd4_ops[] = { > .op_name = "OP_COPY", > .op_rsize_bop = nfsd4_copy_rsize, > }, > + [OP_READ_PLUS] = { > + .op_func = nfsd4_read, > + .op_release = nfsd4_read_release, > + .op_name = "OP_READ_PLUS", > + .op_rsize_bop = nfsd4_read_plus_rsize, > + .op_get_currentstateid = nfsd4_get_readstateid, > + }, > [OP_SEEK] = { > .op_func = nfsd4_seek, > .op_name = "OP_SEEK", > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index 0be194de4888..26d12e3edf33 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -2173,7 +2173,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = { > [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp, > [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status, > [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status, > - [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp, > + [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read, > [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, > [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, > [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone, > @@ -4597,6 +4597,85 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, > return nfserr_resource; > p = xdr_encode_hyper(p, os->count); > *p++ = cpu_to_be32(0); > + return nfserr; > +} > + > +static __be32 > +nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, > + struct nfsd4_read *read, > + unsigned long maxcount, u32 *eof) > +{ > + struct xdr_stream *xdr = &resp->xdr; > + struct file *file = read->rd_nf->nf_file; > + int starting_len = xdr->buf->len; > + __be32 nfserr; > + __be32 *p, tmp; > + __be64 tmp64; > + > + /* Content type, offset, byte count */ > + p = xdr_reserve_space(xdr, 4 + 8 + 4); > + if (!p) > + return nfserr_resource; > + > + read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount); > + if (read->rd_vlen < 0) > + return nfserr_resource; > + > + nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, > + resp->rqstp->rq_vec, read->rd_vlen, &maxcount, eof); > + if (nfserr) > + return nfserr; > + > + tmp = htonl(NFS4_CONTENT_DATA); > + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); > + tmp64 = cpu_to_be64(read->rd_offset); > + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8); > + tmp = htonl(maxcount); > + write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4); > + return nfs_ok; > +} > + > +static __be32 > +nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, > + struct nfsd4_read *read) > +{ > + unsigned long maxcount; > + struct xdr_stream *xdr = &resp->xdr; > + struct file *file; > + int starting_len = xdr->buf->len; > + int segments = 0; > + __be32 *p, tmp; > + u32 eof; > + > + if (nfserr) > + return nfserr; > + file = read->rd_nf->nf_file; > + > + /* eof flag, segment count */ > + p = xdr_reserve_space(xdr, 4 + 4); > + if (!p) > + return nfserr_resource; > + xdr_commit_encode(xdr); > + > + maxcount = svc_max_payload(resp->rqstp); > + maxcount = min_t(unsigned long, maxcount, > + (xdr->buf->buflen - xdr->buf->len)); > + maxcount = min_t(unsigned long, maxcount, read->rd_length); > + > + eof = read->rd_offset >= i_size_read(file_inode(file)); > + if (!eof) { > + nfserr = nfsd4_encode_read_plus_data(resp, read, maxcount, &eof); > + segments++; > + } > + > + if (nfserr) > + xdr_truncate_encode(xdr, starting_len); > + else { > + tmp = htonl(eof); > + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); > + tmp = htonl(segments); > + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); > + } > > return nfserr; > } > @@ -4974,7 +5053,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = { > [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop, > [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop, > [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status, > - [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop, > + [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus, > [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, > [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, > [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop, > -- > 2.28.0 >
On Tue, Sep 8, 2020 at 3:42 PM J. Bruce Fields <bfields@redhat.com> wrote: > > On Tue, Sep 08, 2020 at 12:25:56PM -0400, schumaker.anna@gmail.com wrote: > > From: Anna Schumaker <Anna.Schumaker@Netapp.com> > > > > This patch adds READ_PLUS support for returning a single > > NFS4_CONTENT_DATA segment to the client. This is basically the same as > > the READ operation, only with the extra information about data segments. > > > > Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com> > > > > --- > > v5: Fix up nfsd4_read_plus_rsize() calculation > > --- > > fs/nfsd/nfs4proc.c | 20 +++++++++++ > > fs/nfsd/nfs4xdr.c | 83 ++++++++++++++++++++++++++++++++++++++++++++-- > > 2 files changed, 101 insertions(+), 2 deletions(-) > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > > index eaf50eafa935..0a3df5f10501 100644 > > --- a/fs/nfsd/nfs4proc.c > > +++ b/fs/nfsd/nfs4proc.c > > @@ -2591,6 +2591,19 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) > > return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); > > } > > > > +static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) > > +{ > > + u32 maxcount = svc_max_payload(rqstp); > > + u32 rlen = min(op->u.read.rd_length, maxcount); > > + /* > > + * Worst case is a single large data segment, so make > > + * sure we have enough room to encode that > > + */ > > After the last patch we allow an unlimited number of segments. So a > zillion 1-byte segments is also possible, and is a worse case. > > Possible ways to avoid that kind of thing: > > - when encoding, stop and return a short read if the xdr-encoded > result would exceed the limit calculated here. Doesn't this happen automatically through calls to xdr_reserve_space()? > - round SEEK_HOLE results up to the nearest (say) 512 bytes, and > SEEK_DATA result down to the nearest 512 bytes. May also need > some logic to avoid encoding 0-length segments. > - talk to linux-fsdevel, see if we can get a minimum hole > alignment guarantee from filesystems. > > I'm thinking #1 is simplest. > > --b. > > > + u32 seg_len = 1 + 2 + 1; > > + > > + return (op_encode_hdr_size + 2 + seg_len + XDR_QUADLEN(rlen)) * sizeof(__be32); > > +} > > + > > static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) > > { > > u32 maxcount = 0, rlen = 0; > > @@ -3163,6 +3176,13 @@ static const struct nfsd4_operation nfsd4_ops[] = { > > .op_name = "OP_COPY", > > .op_rsize_bop = nfsd4_copy_rsize, > > }, > > + [OP_READ_PLUS] = { > > + .op_func = nfsd4_read, > > + .op_release = nfsd4_read_release, > > + .op_name = "OP_READ_PLUS", > > + .op_rsize_bop = nfsd4_read_plus_rsize, > > + .op_get_currentstateid = nfsd4_get_readstateid, > > + }, > > [OP_SEEK] = { > > .op_func = nfsd4_seek, > > .op_name = "OP_SEEK", > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > > index 0be194de4888..26d12e3edf33 100644 > > --- a/fs/nfsd/nfs4xdr.c > > +++ b/fs/nfsd/nfs4xdr.c > > @@ -2173,7 +2173,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = { > > [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp, > > [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status, > > [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status, > > - [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp, > > + [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read, > > [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, > > [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, > > [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone, > > @@ -4597,6 +4597,85 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, > > return nfserr_resource; > > p = xdr_encode_hyper(p, os->count); > > *p++ = cpu_to_be32(0); > > + return nfserr; > > +} > > + > > +static __be32 > > +nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, > > + struct nfsd4_read *read, > > + unsigned long maxcount, u32 *eof) > > +{ > > + struct xdr_stream *xdr = &resp->xdr; > > + struct file *file = read->rd_nf->nf_file; > > + int starting_len = xdr->buf->len; > > + __be32 nfserr; > > + __be32 *p, tmp; > > + __be64 tmp64; > > + > > + /* Content type, offset, byte count */ > > + p = xdr_reserve_space(xdr, 4 + 8 + 4); > > + if (!p) > > + return nfserr_resource; > > + > > + read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount); > > + if (read->rd_vlen < 0) > > + return nfserr_resource; > > + > > + nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, > > + resp->rqstp->rq_vec, read->rd_vlen, &maxcount, eof); > > + if (nfserr) > > + return nfserr; > > + > > + tmp = htonl(NFS4_CONTENT_DATA); > > + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); > > + tmp64 = cpu_to_be64(read->rd_offset); > > + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8); > > + tmp = htonl(maxcount); > > + write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4); > > + return nfs_ok; > > +} > > + > > +static __be32 > > +nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, > > + struct nfsd4_read *read) > > +{ > > + unsigned long maxcount; > > + struct xdr_stream *xdr = &resp->xdr; > > + struct file *file; > > + int starting_len = xdr->buf->len; > > + int segments = 0; > > + __be32 *p, tmp; > > + u32 eof; > > + > > + if (nfserr) > > + return nfserr; > > + file = read->rd_nf->nf_file; > > + > > + /* eof flag, segment count */ > > + p = xdr_reserve_space(xdr, 4 + 4); > > + if (!p) > > + return nfserr_resource; > > + xdr_commit_encode(xdr); > > + > > + maxcount = svc_max_payload(resp->rqstp); > > + maxcount = min_t(unsigned long, maxcount, > > + (xdr->buf->buflen - xdr->buf->len)); > > + maxcount = min_t(unsigned long, maxcount, read->rd_length); > > + > > + eof = read->rd_offset >= i_size_read(file_inode(file)); > > + if (!eof) { > > + nfserr = nfsd4_encode_read_plus_data(resp, read, maxcount, &eof); > > + segments++; > > + } > > + > > + if (nfserr) > > + xdr_truncate_encode(xdr, starting_len); > > + else { > > + tmp = htonl(eof); > > + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); > > + tmp = htonl(segments); > > + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); > > + } > > > > return nfserr; > > } > > @@ -4974,7 +5053,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = { > > [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop, > > [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop, > > [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status, > > - [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop, > > + [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus, > > [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, > > [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, > > [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop, > > -- > > 2.28.0 > > >
On Wed, Sep 09, 2020 at 12:53:18PM -0400, Anna Schumaker wrote: > On Tue, Sep 8, 2020 at 3:42 PM J. Bruce Fields <bfields@redhat.com> wrote: > > > > On Tue, Sep 08, 2020 at 12:25:56PM -0400, schumaker.anna@gmail.com wrote: > > > From: Anna Schumaker <Anna.Schumaker@Netapp.com> > > > > > > This patch adds READ_PLUS support for returning a single > > > NFS4_CONTENT_DATA segment to the client. This is basically the same as > > > the READ operation, only with the extra information about data segments. > > > > > > Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com> > > > > > > --- > > > v5: Fix up nfsd4_read_plus_rsize() calculation > > > --- > > > fs/nfsd/nfs4proc.c | 20 +++++++++++ > > > fs/nfsd/nfs4xdr.c | 83 ++++++++++++++++++++++++++++++++++++++++++++-- > > > 2 files changed, 101 insertions(+), 2 deletions(-) > > > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > > > index eaf50eafa935..0a3df5f10501 100644 > > > --- a/fs/nfsd/nfs4proc.c > > > +++ b/fs/nfsd/nfs4proc.c > > > @@ -2591,6 +2591,19 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) > > > return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); > > > } > > > > > > +static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) > > > +{ > > > + u32 maxcount = svc_max_payload(rqstp); > > > + u32 rlen = min(op->u.read.rd_length, maxcount); > > > + /* > > > + * Worst case is a single large data segment, so make > > > + * sure we have enough room to encode that > > > + */ > > > > After the last patch we allow an unlimited number of segments. So a > > zillion 1-byte segments is also possible, and is a worse case. > > > > Possible ways to avoid that kind of thing: > > > > - when encoding, stop and return a short read if the xdr-encoded > > result would exceed the limit calculated here. > > Doesn't this happen automatically through calls to xdr_reserve_space()? No, xdr_reserve_space() will keep us from running out of buffer completely, but it won't check that ops come in under the estimates made in read_plus_rsize(). --b. > > > - round SEEK_HOLE results up to the nearest (say) 512 bytes, and > > SEEK_DATA result down to the nearest 512 bytes. May also need > > some logic to avoid encoding 0-length segments. > > - talk to linux-fsdevel, see if we can get a minimum hole > > alignment guarantee from filesystems. > > > > I'm thinking #1 is simplest. > > > > --b. > > > > > + u32 seg_len = 1 + 2 + 1; > > > + > > > + return (op_encode_hdr_size + 2 + seg_len + XDR_QUADLEN(rlen)) * sizeof(__be32); > > > +} > > > + > > > static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) > > > { > > > u32 maxcount = 0, rlen = 0; > > > @@ -3163,6 +3176,13 @@ static const struct nfsd4_operation nfsd4_ops[] = { > > > .op_name = "OP_COPY", > > > .op_rsize_bop = nfsd4_copy_rsize, > > > }, > > > + [OP_READ_PLUS] = { > > > + .op_func = nfsd4_read, > > > + .op_release = nfsd4_read_release, > > > + .op_name = "OP_READ_PLUS", > > > + .op_rsize_bop = nfsd4_read_plus_rsize, > > > + .op_get_currentstateid = nfsd4_get_readstateid, > > > + }, > > > [OP_SEEK] = { > > > .op_func = nfsd4_seek, > > > .op_name = "OP_SEEK", > > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > > > index 0be194de4888..26d12e3edf33 100644 > > > --- a/fs/nfsd/nfs4xdr.c > > > +++ b/fs/nfsd/nfs4xdr.c > > > @@ -2173,7 +2173,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = { > > > [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp, > > > [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status, > > > [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status, > > > - [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp, > > > + [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read, > > > [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, > > > [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, > > > [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone, > > > @@ -4597,6 +4597,85 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, > > > return nfserr_resource; > > > p = xdr_encode_hyper(p, os->count); > > > *p++ = cpu_to_be32(0); > > > + return nfserr; > > > +} > > > + > > > +static __be32 > > > +nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, > > > + struct nfsd4_read *read, > > > + unsigned long maxcount, u32 *eof) > > > +{ > > > + struct xdr_stream *xdr = &resp->xdr; > > > + struct file *file = read->rd_nf->nf_file; > > > + int starting_len = xdr->buf->len; > > > + __be32 nfserr; > > > + __be32 *p, tmp; > > > + __be64 tmp64; > > > + > > > + /* Content type, offset, byte count */ > > > + p = xdr_reserve_space(xdr, 4 + 8 + 4); > > > + if (!p) > > > + return nfserr_resource; > > > + > > > + read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount); > > > + if (read->rd_vlen < 0) > > > + return nfserr_resource; > > > + > > > + nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, > > > + resp->rqstp->rq_vec, read->rd_vlen, &maxcount, eof); > > > + if (nfserr) > > > + return nfserr; > > > + > > > + tmp = htonl(NFS4_CONTENT_DATA); > > > + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); > > > + tmp64 = cpu_to_be64(read->rd_offset); > > > + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8); > > > + tmp = htonl(maxcount); > > > + write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4); > > > + return nfs_ok; > > > +} > > > + > > > +static __be32 > > > +nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, > > > + struct nfsd4_read *read) > > > +{ > > > + unsigned long maxcount; > > > + struct xdr_stream *xdr = &resp->xdr; > > > + struct file *file; > > > + int starting_len = xdr->buf->len; > > > + int segments = 0; > > > + __be32 *p, tmp; > > > + u32 eof; > > > + > > > + if (nfserr) > > > + return nfserr; > > > + file = read->rd_nf->nf_file; > > > + > > > + /* eof flag, segment count */ > > > + p = xdr_reserve_space(xdr, 4 + 4); > > > + if (!p) > > > + return nfserr_resource; > > > + xdr_commit_encode(xdr); > > > + > > > + maxcount = svc_max_payload(resp->rqstp); > > > + maxcount = min_t(unsigned long, maxcount, > > > + (xdr->buf->buflen - xdr->buf->len)); > > > + maxcount = min_t(unsigned long, maxcount, read->rd_length); > > > + > > > + eof = read->rd_offset >= i_size_read(file_inode(file)); > > > + if (!eof) { > > > + nfserr = nfsd4_encode_read_plus_data(resp, read, maxcount, &eof); > > > + segments++; > > > + } > > > + > > > + if (nfserr) > > > + xdr_truncate_encode(xdr, starting_len); > > > + else { > > > + tmp = htonl(eof); > > > + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); > > > + tmp = htonl(segments); > > > + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); > > > + } > > > > > > return nfserr; > > > } > > > @@ -4974,7 +5053,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = { > > > [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop, > > > [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop, > > > [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status, > > > - [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop, > > > + [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus, > > > [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, > > > [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, > > > [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop, > > > -- > > > 2.28.0 > > > > >
On Wed, Sep 09, 2020 at 04:25:34PM -0400, bfields wrote: > On Wed, Sep 09, 2020 at 12:53:18PM -0400, Anna Schumaker wrote: > > On Tue, Sep 8, 2020 at 3:42 PM J. Bruce Fields <bfields@redhat.com> wrote: > > > > > > On Tue, Sep 08, 2020 at 12:25:56PM -0400, schumaker.anna@gmail.com wrote: > > > > From: Anna Schumaker <Anna.Schumaker@Netapp.com> > > > > > > > > This patch adds READ_PLUS support for returning a single > > > > NFS4_CONTENT_DATA segment to the client. This is basically the same as > > > > the READ operation, only with the extra information about data segments. > > > > > > > > Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com> > > > > > > > > --- > > > > v5: Fix up nfsd4_read_plus_rsize() calculation > > > > --- > > > > fs/nfsd/nfs4proc.c | 20 +++++++++++ > > > > fs/nfsd/nfs4xdr.c | 83 ++++++++++++++++++++++++++++++++++++++++++++-- > > > > 2 files changed, 101 insertions(+), 2 deletions(-) > > > > > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > > > > index eaf50eafa935..0a3df5f10501 100644 > > > > --- a/fs/nfsd/nfs4proc.c > > > > +++ b/fs/nfsd/nfs4proc.c > > > > @@ -2591,6 +2591,19 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) > > > > return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); > > > > } > > > > > > > > +static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) > > > > +{ > > > > + u32 maxcount = svc_max_payload(rqstp); > > > > + u32 rlen = min(op->u.read.rd_length, maxcount); > > > > + /* > > > > + * Worst case is a single large data segment, so make > > > > + * sure we have enough room to encode that > > > > + */ > > > > > > After the last patch we allow an unlimited number of segments. So a > > > zillion 1-byte segments is also possible, and is a worse case. > > > > > > Possible ways to avoid that kind of thing: > > > > > > - when encoding, stop and return a short read if the xdr-encoded > > > result would exceed the limit calculated here. > > > > Doesn't this happen automatically through calls to xdr_reserve_space()? > > No, xdr_reserve_space() will keep us from running out of buffer > completely, but it won't check that ops come in under the estimates made > in read_plus_rsize(). If it's easier, another option might be just: if we ever get a "small" hole (say, less than 512 bytes), just give up and encode the rest of the result as a single big data segment. --b.
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index eaf50eafa935..0a3df5f10501 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2591,6 +2591,19 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); } +static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + u32 maxcount = svc_max_payload(rqstp); + u32 rlen = min(op->u.read.rd_length, maxcount); + /* + * Worst case is a single large data segment, so make + * sure we have enough room to encode that + */ + u32 seg_len = 1 + 2 + 1; + + return (op_encode_hdr_size + 2 + seg_len + XDR_QUADLEN(rlen)) * sizeof(__be32); +} + static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { u32 maxcount = 0, rlen = 0; @@ -3163,6 +3176,13 @@ static const struct nfsd4_operation nfsd4_ops[] = { .op_name = "OP_COPY", .op_rsize_bop = nfsd4_copy_rsize, }, + [OP_READ_PLUS] = { + .op_func = nfsd4_read, + .op_release = nfsd4_read_release, + .op_name = "OP_READ_PLUS", + .op_rsize_bop = nfsd4_read_plus_rsize, + .op_get_currentstateid = nfsd4_get_readstateid, + }, [OP_SEEK] = { .op_func = nfsd4_seek, .op_name = "OP_SEEK", diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 0be194de4888..26d12e3edf33 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2173,7 +2173,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = { [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status, [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status, - [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read, [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone, @@ -4597,6 +4597,85 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr_resource; p = xdr_encode_hyper(p, os->count); *p++ = cpu_to_be32(0); + return nfserr; +} + +static __be32 +nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, + struct nfsd4_read *read, + unsigned long maxcount, u32 *eof) +{ + struct xdr_stream *xdr = &resp->xdr; + struct file *file = read->rd_nf->nf_file; + int starting_len = xdr->buf->len; + __be32 nfserr; + __be32 *p, tmp; + __be64 tmp64; + + /* Content type, offset, byte count */ + p = xdr_reserve_space(xdr, 4 + 8 + 4); + if (!p) + return nfserr_resource; + + read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount); + if (read->rd_vlen < 0) + return nfserr_resource; + + nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, + resp->rqstp->rq_vec, read->rd_vlen, &maxcount, eof); + if (nfserr) + return nfserr; + + tmp = htonl(NFS4_CONTENT_DATA); + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); + tmp64 = cpu_to_be64(read->rd_offset); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8); + tmp = htonl(maxcount); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4); + return nfs_ok; +} + +static __be32 +nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_read *read) +{ + unsigned long maxcount; + struct xdr_stream *xdr = &resp->xdr; + struct file *file; + int starting_len = xdr->buf->len; + int segments = 0; + __be32 *p, tmp; + u32 eof; + + if (nfserr) + return nfserr; + file = read->rd_nf->nf_file; + + /* eof flag, segment count */ + p = xdr_reserve_space(xdr, 4 + 4); + if (!p) + return nfserr_resource; + xdr_commit_encode(xdr); + + maxcount = svc_max_payload(resp->rqstp); + maxcount = min_t(unsigned long, maxcount, + (xdr->buf->buflen - xdr->buf->len)); + maxcount = min_t(unsigned long, maxcount, read->rd_length); + + eof = read->rd_offset >= i_size_read(file_inode(file)); + if (!eof) { + nfserr = nfsd4_encode_read_plus_data(resp, read, maxcount, &eof); + segments++; + } + + if (nfserr) + xdr_truncate_encode(xdr, starting_len); + else { + tmp = htonl(eof); + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); + tmp = htonl(segments); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); + } return nfserr; } @@ -4974,7 +5053,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = { [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop, [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop, [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status, - [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop, + [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus, [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop,