diff mbox series

[v1] NFSD: add IO_ADVISE operation

Message ID 20230110184726.13380-1-mora@netapp.com (mailing list archive)
State New, archived
Headers show
Series [v1] NFSD: add IO_ADVISE operation | expand

Commit Message

Jorge Mora Jan. 10, 2023, 6:47 p.m. UTC
If multiple bits are set, select just one using a predetermined
order of precedence. If there are no bits which correspond to
any of the advice values (POSIX_FADV_*), the server simply
replies with NFS4ERR_UNION_NOTSUPP.

If a client sends a bitmap mask with multiple words, ignore all
but the first word in the bitmap. The response is always the
same first word of the bitmap mask given in the request.

Signed-off-by: Jorge Mora <mora@netapp.com>
---
 fs/nfsd/nfs4proc.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfs4xdr.c  | 27 +++++++++++++++++++--
 fs/nfsd/xdr4.h     | 11 +++++++++
 3 files changed, 95 insertions(+), 2 deletions(-)

Comments

Chuck Lever Jan. 10, 2023, 7 p.m. UTC | #1
> On Jan 10, 2023, at 1:47 PM, Jorge Mora <jmora1300@gmail.com> wrote:
> 
> If multiple bits are set, select just one using a predetermined
> order of precedence. If there are no bits which correspond to
> any of the advice values (POSIX_FADV_*), the server simply
> replies with NFS4ERR_UNION_NOTSUPP.
> 
> If a client sends a bitmap mask with multiple words, ignore all
> but the first word in the bitmap. The response is always the
> same first word of the bitmap mask given in the request.

Hi Jorge-

I'd rather not add this operation just because it happens to be
missing. Is there a reason you need it? Does it provide some
kind of performance benefit, for instance? The patch description
really does need to provide this kind of rationale, and hopefully
some performance measurements.

Do the POSIX_FADV_* settings map to behavior that a client can
expect in other server implementations? That is, do you expect
the proposed implementation below to interoperate properly?


> Signed-off-by: Jorge Mora <mora@netapp.com>
> ---
> fs/nfsd/nfs4proc.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++
> fs/nfsd/nfs4xdr.c  | 27 +++++++++++++++++++--
> fs/nfsd/xdr4.h     | 11 +++++++++
> 3 files changed, 95 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 8beb2bc4c328..65179a3946f5 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -35,6 +35,7 @@
> #include <linux/fs_struct.h>
> #include <linux/file.h>
> #include <linux/falloc.h>
> +#include <linux/fadvise.h>
> #include <linux/slab.h>
> #include <linux/kthread.h>
> #include <linux/namei.h>
> @@ -1995,6 +1996,53 @@ nfsd4_deallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> 			       FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE);
> }
> 
> +static __be32
> +nfsd4_io_advise(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> +		union nfsd4_op_u *u)
> +{
> +	struct nfsd4_io_advise *io_advise = &u->io_advise;
> +	struct nfsd_file *nf;
> +	__be32 status;
> +	int advice;
> +	int ret;
> +
> +	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
> +					    &io_advise->stateid,
> +					    RD_STATE, &nf, NULL);
> +	if (status) {
> +		dprintk("NFSD: %s: couldn't process stateid!\n", __func__);
> +		return status;
> +	}
> +
> +	/*
> +	 * If multiple bits are set, select just one using the following
> +	 * order of precedence
> +	 */
> +	if (io_advise->hints & NFS_IO_ADVISE4_NORMAL)
> +		advice = POSIX_FADV_NORMAL;
> +	else if (io_advise->hints & NFS_IO_ADVISE4_RANDOM)
> +		advice = POSIX_FADV_RANDOM;
> +	else if (io_advise->hints & NFS_IO_ADVISE4_SEQUENTIAL)
> +		advice = POSIX_FADV_SEQUENTIAL;
> +	else if (io_advise->hints & NFS_IO_ADVISE4_WILLNEED)
> +		advice = POSIX_FADV_WILLNEED;
> +	else if (io_advise->hints & NFS_IO_ADVISE4_DONTNEED)
> +		advice = POSIX_FADV_DONTNEED;
> +	else if (io_advise->hints & NFS_IO_ADVISE4_NOREUSE)
> +		advice = POSIX_FADV_NOREUSE;
> +	else {
> +		status = nfserr_union_notsupp;
> +		goto out;
> +	}
> +
> +	ret = vfs_fadvise(nf->nf_file, io_advise->offset, io_advise->count, advice);
> +	if (ret < 0)
> +		status = nfserrno(ret);
> +out:
> +	nfsd_file_put(nf);
> +	return status;
> +}
> +
> static __be32
> nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> 	   union nfsd4_op_u *u)
> @@ -3096,6 +3144,12 @@ static u32 nfsd4_layoutreturn_rsize(const struct svc_rqst *rqstp,
> #endif /* CONFIG_NFSD_PNFS */
> 
> 
> +static u32 nfsd4_io_advise_rsize(const struct svc_rqst *rqstp,
> +				 const struct nfsd4_op *op)
> +{
> +	return (op_encode_hdr_size + 2) * sizeof(__be32);
> +}
> +
> static u32 nfsd4_seek_rsize(const struct svc_rqst *rqstp,
> 			    const struct nfsd4_op *op)
> {
> @@ -3479,6 +3533,11 @@ static const struct nfsd4_operation nfsd4_ops[] = {
> 		.op_name = "OP_DEALLOCATE",
> 		.op_rsize_bop = nfsd4_only_status_rsize,
> 	},
> +	[OP_IO_ADVISE] = {
> +		.op_func = nfsd4_io_advise,
> +		.op_name = "OP_IO_ADVISE",
> +		.op_rsize_bop = nfsd4_io_advise_rsize,
> +	},
> 	[OP_CLONE] = {
> 		.op_func = nfsd4_clone,
> 		.op_flags = OP_MODIFIES_SOMETHING,
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index bcfeb1a922c0..8814c24047ff 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1882,6 +1882,22 @@ nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
> 	return nfs_ok;
> }
> 
> +static __be32
> +nfsd4_decode_io_advise(struct nfsd4_compoundargs *argp,
> +		       struct nfsd4_io_advise *io_advise)
> +{
> +	__be32 status;
> +
> +	status = nfsd4_decode_stateid4(argp, &io_advise->stateid);
> +	if (status)
> +		return status;
> +	if (xdr_stream_decode_u64(argp->xdr, &io_advise->offset) < 0)
> +		return nfserr_bad_xdr;
> +	if (xdr_stream_decode_u64(argp->xdr, &io_advise->count) < 0)
> +		return nfserr_bad_xdr;
> +	return nfsd4_decode_bitmap4(argp, &io_advise->hints, 1);
> +}
> +
> static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
> 				      struct nl4_server *ns)
> {
> @@ -2338,7 +2354,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = {
> 	[OP_COPY]		= (nfsd4_dec)nfsd4_decode_copy,
> 	[OP_COPY_NOTIFY]	= (nfsd4_dec)nfsd4_decode_copy_notify,
> 	[OP_DEALLOCATE]		= (nfsd4_dec)nfsd4_decode_fallocate,
> -	[OP_IO_ADVISE]		= (nfsd4_dec)nfsd4_decode_notsupp,
> +	[OP_IO_ADVISE]		= (nfsd4_dec)nfsd4_decode_io_advise,
> 	[OP_LAYOUTERROR]	= (nfsd4_dec)nfsd4_decode_notsupp,
> 	[OP_LAYOUTSTATS]	= (nfsd4_dec)nfsd4_decode_notsupp,
> 	[OP_OFFLOAD_CANCEL]	= (nfsd4_dec)nfsd4_decode_offload_status,
> @@ -4948,6 +4964,13 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
> 	return nfserr;
> }
> 
> +static __be32
> +nfsd4_encode_io_advise(struct nfsd4_compoundres *resp, __be32 nfserr,
> +		       struct nfsd4_io_advise *io_advise)
> +{
> +	return nfsd4_encode_bitmap(resp->xdr, io_advise->hints, 0, 0);
> +}
> +
> static __be32
> nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
> 		  struct nfsd4_seek *seek)
> @@ -5282,7 +5305,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = {
> 	[OP_COPY]		= (nfsd4_enc)nfsd4_encode_copy,
> 	[OP_COPY_NOTIFY]	= (nfsd4_enc)nfsd4_encode_copy_notify,
> 	[OP_DEALLOCATE]		= (nfsd4_enc)nfsd4_encode_noop,
> -	[OP_IO_ADVISE]		= (nfsd4_enc)nfsd4_encode_noop,
> +	[OP_IO_ADVISE]		= (nfsd4_enc)nfsd4_encode_io_advise,
> 	[OP_LAYOUTERROR]	= (nfsd4_enc)nfsd4_encode_noop,
> 	[OP_LAYOUTSTATS]	= (nfsd4_enc)nfsd4_encode_noop,
> 	[OP_OFFLOAD_CANCEL]	= (nfsd4_enc)nfsd4_encode_noop,
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 0eb00105d845..9b8ba4d6e3ab 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -518,6 +518,16 @@ struct nfsd4_fallocate {
> 	u64		falloc_length;
> };
> 
> +struct nfsd4_io_advise {
> +	/* request */
> +	stateid_t	stateid;
> +	loff_t		offset;
> +	u64		count;
> +
> +	/* both */
> +	u32		hints;
> +};
> +
> struct nfsd4_clone {
> 	/* request */
> 	stateid_t	cl_src_stateid;
> @@ -688,6 +698,7 @@ struct nfsd4_op {
> 		/* NFSv4.2 */
> 		struct nfsd4_fallocate		allocate;
> 		struct nfsd4_fallocate		deallocate;
> +		struct nfsd4_io_advise		io_advise;
> 		struct nfsd4_clone		clone;
> 		struct nfsd4_copy		copy;
> 		struct nfsd4_offload_status	offload_status;
> -- 
> 2.31.1
> 

--
Chuck Lever
Olga Kornievskaia Jan. 11, 2023, 2:15 p.m. UTC | #2
On Tue, Jan 10, 2023 at 2:04 PM Chuck Lever III <chuck.lever@oracle.com> wrote:
>
>
>
> > On Jan 10, 2023, at 1:47 PM, Jorge Mora <jmora1300@gmail.com> wrote:
> >
> > If multiple bits are set, select just one using a predetermined
> > order of precedence. If there are no bits which correspond to
> > any of the advice values (POSIX_FADV_*), the server simply
> > replies with NFS4ERR_UNION_NOTSUPP.
> >
> > If a client sends a bitmap mask with multiple words, ignore all
> > but the first word in the bitmap. The response is always the
> > same first word of the bitmap mask given in the request.
>
> Hi Jorge-
>
> I'd rather not add this operation just because it happens to be
> missing. Is there a reason you need it?

The motivation was to implement the not implement and hope to serve as
a foundation for others to expand on and do something. Also because
typically if we put in a client piece support there is supposed to be
server side as well.

> Does it provide some
> kind of performance benefit, for instance? The patch description
> really does need to provide this kind of rationale, and hopefully
> some performance measurements.
>
> Do the POSIX_FADV_* settings map to behavior that a client can
> expect in other server implementations?

I thought the purpose of IO_ADVISE is to advise but not expect. If the
server wants to do something with the knowledge it can.

> That is, do you expect
>  the proposed implementation below to interoperate properly?
>
>
> > Signed-off-by: Jorge Mora <mora@netapp.com>
> > ---
> > fs/nfsd/nfs4proc.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++
> > fs/nfsd/nfs4xdr.c  | 27 +++++++++++++++++++--
> > fs/nfsd/xdr4.h     | 11 +++++++++
> > 3 files changed, 95 insertions(+), 2 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > index 8beb2bc4c328..65179a3946f5 100644
> > --- a/fs/nfsd/nfs4proc.c
> > +++ b/fs/nfsd/nfs4proc.c
> > @@ -35,6 +35,7 @@
> > #include <linux/fs_struct.h>
> > #include <linux/file.h>
> > #include <linux/falloc.h>
> > +#include <linux/fadvise.h>
> > #include <linux/slab.h>
> > #include <linux/kthread.h>
> > #include <linux/namei.h>
> > @@ -1995,6 +1996,53 @@ nfsd4_deallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >                              FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE);
> > }
> >
> > +static __be32
> > +nfsd4_io_advise(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > +             union nfsd4_op_u *u)
> > +{
> > +     struct nfsd4_io_advise *io_advise = &u->io_advise;
> > +     struct nfsd_file *nf;
> > +     __be32 status;
> > +     int advice;
> > +     int ret;
> > +
> > +     status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
> > +                                         &io_advise->stateid,
> > +                                         RD_STATE, &nf, NULL);
> > +     if (status) {
> > +             dprintk("NFSD: %s: couldn't process stateid!\n", __func__);
> > +             return status;
> > +     }
> > +
> > +     /*
> > +      * If multiple bits are set, select just one using the following
> > +      * order of precedence
> > +      */
> > +     if (io_advise->hints & NFS_IO_ADVISE4_NORMAL)
> > +             advice = POSIX_FADV_NORMAL;
> > +     else if (io_advise->hints & NFS_IO_ADVISE4_RANDOM)
> > +             advice = POSIX_FADV_RANDOM;
> > +     else if (io_advise->hints & NFS_IO_ADVISE4_SEQUENTIAL)
> > +             advice = POSIX_FADV_SEQUENTIAL;
> > +     else if (io_advise->hints & NFS_IO_ADVISE4_WILLNEED)
> > +             advice = POSIX_FADV_WILLNEED;
> > +     else if (io_advise->hints & NFS_IO_ADVISE4_DONTNEED)
> > +             advice = POSIX_FADV_DONTNEED;
> > +     else if (io_advise->hints & NFS_IO_ADVISE4_NOREUSE)
> > +             advice = POSIX_FADV_NOREUSE;
> > +     else {
> > +             status = nfserr_union_notsupp;
> > +             goto out;
> > +     }
> > +
> > +     ret = vfs_fadvise(nf->nf_file, io_advise->offset, io_advise->count, advice);
> > +     if (ret < 0)
> > +             status = nfserrno(ret);
> > +out:
> > +     nfsd_file_put(nf);
> > +     return status;
> > +}
> > +
> > static __be32
> > nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >          union nfsd4_op_u *u)
> > @@ -3096,6 +3144,12 @@ static u32 nfsd4_layoutreturn_rsize(const struct svc_rqst *rqstp,
> > #endif /* CONFIG_NFSD_PNFS */
> >
> >
> > +static u32 nfsd4_io_advise_rsize(const struct svc_rqst *rqstp,
> > +                              const struct nfsd4_op *op)
> > +{
> > +     return (op_encode_hdr_size + 2) * sizeof(__be32);
> > +}
> > +
> > static u32 nfsd4_seek_rsize(const struct svc_rqst *rqstp,
> >                           const struct nfsd4_op *op)
> > {
> > @@ -3479,6 +3533,11 @@ static const struct nfsd4_operation nfsd4_ops[] = {
> >               .op_name = "OP_DEALLOCATE",
> >               .op_rsize_bop = nfsd4_only_status_rsize,
> >       },
> > +     [OP_IO_ADVISE] = {
> > +             .op_func = nfsd4_io_advise,
> > +             .op_name = "OP_IO_ADVISE",
> > +             .op_rsize_bop = nfsd4_io_advise_rsize,
> > +     },
> >       [OP_CLONE] = {
> >               .op_func = nfsd4_clone,
> >               .op_flags = OP_MODIFIES_SOMETHING,
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index bcfeb1a922c0..8814c24047ff 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -1882,6 +1882,22 @@ nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
> >       return nfs_ok;
> > }
> >
> > +static __be32
> > +nfsd4_decode_io_advise(struct nfsd4_compoundargs *argp,
> > +                    struct nfsd4_io_advise *io_advise)
> > +{
> > +     __be32 status;
> > +
> > +     status = nfsd4_decode_stateid4(argp, &io_advise->stateid);
> > +     if (status)
> > +             return status;
> > +     if (xdr_stream_decode_u64(argp->xdr, &io_advise->offset) < 0)
> > +             return nfserr_bad_xdr;
> > +     if (xdr_stream_decode_u64(argp->xdr, &io_advise->count) < 0)
> > +             return nfserr_bad_xdr;
> > +     return nfsd4_decode_bitmap4(argp, &io_advise->hints, 1);
> > +}
> > +
> > static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
> >                                     struct nl4_server *ns)
> > {
> > @@ -2338,7 +2354,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = {
> >       [OP_COPY]               = (nfsd4_dec)nfsd4_decode_copy,
> >       [OP_COPY_NOTIFY]        = (nfsd4_dec)nfsd4_decode_copy_notify,
> >       [OP_DEALLOCATE]         = (nfsd4_dec)nfsd4_decode_fallocate,
> > -     [OP_IO_ADVISE]          = (nfsd4_dec)nfsd4_decode_notsupp,
> > +     [OP_IO_ADVISE]          = (nfsd4_dec)nfsd4_decode_io_advise,
> >       [OP_LAYOUTERROR]        = (nfsd4_dec)nfsd4_decode_notsupp,
> >       [OP_LAYOUTSTATS]        = (nfsd4_dec)nfsd4_decode_notsupp,
> >       [OP_OFFLOAD_CANCEL]     = (nfsd4_dec)nfsd4_decode_offload_status,
> > @@ -4948,6 +4964,13 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
> >       return nfserr;
> > }
> >
> > +static __be32
> > +nfsd4_encode_io_advise(struct nfsd4_compoundres *resp, __be32 nfserr,
> > +                    struct nfsd4_io_advise *io_advise)
> > +{
> > +     return nfsd4_encode_bitmap(resp->xdr, io_advise->hints, 0, 0);
> > +}
> > +
> > static __be32
> > nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
> >                 struct nfsd4_seek *seek)
> > @@ -5282,7 +5305,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = {
> >       [OP_COPY]               = (nfsd4_enc)nfsd4_encode_copy,
> >       [OP_COPY_NOTIFY]        = (nfsd4_enc)nfsd4_encode_copy_notify,
> >       [OP_DEALLOCATE]         = (nfsd4_enc)nfsd4_encode_noop,
> > -     [OP_IO_ADVISE]          = (nfsd4_enc)nfsd4_encode_noop,
> > +     [OP_IO_ADVISE]          = (nfsd4_enc)nfsd4_encode_io_advise,
> >       [OP_LAYOUTERROR]        = (nfsd4_enc)nfsd4_encode_noop,
> >       [OP_LAYOUTSTATS]        = (nfsd4_enc)nfsd4_encode_noop,
> >       [OP_OFFLOAD_CANCEL]     = (nfsd4_enc)nfsd4_encode_noop,
> > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> > index 0eb00105d845..9b8ba4d6e3ab 100644
> > --- a/fs/nfsd/xdr4.h
> > +++ b/fs/nfsd/xdr4.h
> > @@ -518,6 +518,16 @@ struct nfsd4_fallocate {
> >       u64             falloc_length;
> > };
> >
> > +struct nfsd4_io_advise {
> > +     /* request */
> > +     stateid_t       stateid;
> > +     loff_t          offset;
> > +     u64             count;
> > +
> > +     /* both */
> > +     u32             hints;
> > +};
> > +
> > struct nfsd4_clone {
> >       /* request */
> >       stateid_t       cl_src_stateid;
> > @@ -688,6 +698,7 @@ struct nfsd4_op {
> >               /* NFSv4.2 */
> >               struct nfsd4_fallocate          allocate;
> >               struct nfsd4_fallocate          deallocate;
> > +             struct nfsd4_io_advise          io_advise;
> >               struct nfsd4_clone              clone;
> >               struct nfsd4_copy               copy;
> >               struct nfsd4_offload_status     offload_status;
> > --
> > 2.31.1
> >
>
> --
> Chuck Lever
>
>
>
Chuck Lever Jan. 12, 2023, 2:04 a.m. UTC | #3
> On Jan 11, 2023, at 9:15 AM, Olga Kornievskaia <aglo@umich.edu> wrote:
> 
> On Tue, Jan 10, 2023 at 2:04 PM Chuck Lever III <chuck.lever@oracle.com> wrote:
>> 
>>> On Jan 10, 2023, at 1:47 PM, Jorge Mora <jmora1300@gmail.com> wrote:
>>> 
>>> If multiple bits are set, select just one using a predetermined
>>> order of precedence. If there are no bits which correspond to
>>> any of the advice values (POSIX_FADV_*), the server simply
>>> replies with NFS4ERR_UNION_NOTSUPP.
>>> 
>>> If a client sends a bitmap mask with multiple words, ignore all
>>> but the first word in the bitmap. The response is always the
>>> same first word of the bitmap mask given in the request.

>> Does it provide some
>> kind of performance benefit, for instance? The patch description
>> really does need to provide this kind of rationale, and hopefully
>> some performance measurements.
>> 
>> Do the POSIX_FADV_* settings map to behavior that a client can
>> expect in other server implementations?
> 
> I thought the purpose of IO_ADVISE is to advise but not expect. If the
> server wants to do something with the knowledge it can.

Fair enough: what should the Linux server do with these hints?
Show some data that demonstrates a specific benefit to application
workloads for the server implementation choices that Jorge proposed.

I'm not saying NAK, more like "not yet." This work doesn't look ready
to be merged without a way to evaluate whether the proposed design
choices are reasonable and will do little or no harm. There's no
discussion of that in either a cover letter or the patch descriptions,
so it's really hard for me to tell whether this has been thought
through.

In the meantime, the client side is supposed to work correctly
whether the server implements IO_ADVISE or not... so I don't see
a pressing need to merge a server IO_ADVISE implementation until we
have a sensible architectural direction and one or more use cases
that can actually benefit.


--
Chuck Lever
Chuck Lever Jan. 13, 2023, 5:39 p.m. UTC | #4
> On Jan 12, 2023, at 11:47 AM, Jorge Mora <jmora1300@gmail.com> wrote:
> 
> Hello Chuck,
> 
> Thank you for your comments, I will look into these issues and get some data to justify the implementation.

Awesome. That will be great!


--
Chuck Lever
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 8beb2bc4c328..65179a3946f5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -35,6 +35,7 @@ 
 #include <linux/fs_struct.h>
 #include <linux/file.h>
 #include <linux/falloc.h>
+#include <linux/fadvise.h>
 #include <linux/slab.h>
 #include <linux/kthread.h>
 #include <linux/namei.h>
@@ -1995,6 +1996,53 @@  nfsd4_deallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			       FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE);
 }
 
+static __be32
+nfsd4_io_advise(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
+{
+	struct nfsd4_io_advise *io_advise = &u->io_advise;
+	struct nfsd_file *nf;
+	__be32 status;
+	int advice;
+	int ret;
+
+	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+					    &io_advise->stateid,
+					    RD_STATE, &nf, NULL);
+	if (status) {
+		dprintk("NFSD: %s: couldn't process stateid!\n", __func__);
+		return status;
+	}
+
+	/*
+	 * If multiple bits are set, select just one using the following
+	 * order of precedence
+	 */
+	if (io_advise->hints & NFS_IO_ADVISE4_NORMAL)
+		advice = POSIX_FADV_NORMAL;
+	else if (io_advise->hints & NFS_IO_ADVISE4_RANDOM)
+		advice = POSIX_FADV_RANDOM;
+	else if (io_advise->hints & NFS_IO_ADVISE4_SEQUENTIAL)
+		advice = POSIX_FADV_SEQUENTIAL;
+	else if (io_advise->hints & NFS_IO_ADVISE4_WILLNEED)
+		advice = POSIX_FADV_WILLNEED;
+	else if (io_advise->hints & NFS_IO_ADVISE4_DONTNEED)
+		advice = POSIX_FADV_DONTNEED;
+	else if (io_advise->hints & NFS_IO_ADVISE4_NOREUSE)
+		advice = POSIX_FADV_NOREUSE;
+	else {
+		status = nfserr_union_notsupp;
+		goto out;
+	}
+
+	ret = vfs_fadvise(nf->nf_file, io_advise->offset, io_advise->count, advice);
+	if (ret < 0)
+		status = nfserrno(ret);
+out:
+	nfsd_file_put(nf);
+	return status;
+}
+
 static __be32
 nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	   union nfsd4_op_u *u)
@@ -3096,6 +3144,12 @@  static u32 nfsd4_layoutreturn_rsize(const struct svc_rqst *rqstp,
 #endif /* CONFIG_NFSD_PNFS */
 
 
+static u32 nfsd4_io_advise_rsize(const struct svc_rqst *rqstp,
+				 const struct nfsd4_op *op)
+{
+	return (op_encode_hdr_size + 2) * sizeof(__be32);
+}
+
 static u32 nfsd4_seek_rsize(const struct svc_rqst *rqstp,
 			    const struct nfsd4_op *op)
 {
@@ -3479,6 +3533,11 @@  static const struct nfsd4_operation nfsd4_ops[] = {
 		.op_name = "OP_DEALLOCATE",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
+	[OP_IO_ADVISE] = {
+		.op_func = nfsd4_io_advise,
+		.op_name = "OP_IO_ADVISE",
+		.op_rsize_bop = nfsd4_io_advise_rsize,
+	},
 	[OP_CLONE] = {
 		.op_func = nfsd4_clone,
 		.op_flags = OP_MODIFIES_SOMETHING,
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index bcfeb1a922c0..8814c24047ff 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1882,6 +1882,22 @@  nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
 	return nfs_ok;
 }
 
+static __be32
+nfsd4_decode_io_advise(struct nfsd4_compoundargs *argp,
+		       struct nfsd4_io_advise *io_advise)
+{
+	__be32 status;
+
+	status = nfsd4_decode_stateid4(argp, &io_advise->stateid);
+	if (status)
+		return status;
+	if (xdr_stream_decode_u64(argp->xdr, &io_advise->offset) < 0)
+		return nfserr_bad_xdr;
+	if (xdr_stream_decode_u64(argp->xdr, &io_advise->count) < 0)
+		return nfserr_bad_xdr;
+	return nfsd4_decode_bitmap4(argp, &io_advise->hints, 1);
+}
+
 static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
 				      struct nl4_server *ns)
 {
@@ -2338,7 +2354,7 @@  static const nfsd4_dec nfsd4_dec_ops[] = {
 	[OP_COPY]		= (nfsd4_dec)nfsd4_decode_copy,
 	[OP_COPY_NOTIFY]	= (nfsd4_dec)nfsd4_decode_copy_notify,
 	[OP_DEALLOCATE]		= (nfsd4_dec)nfsd4_decode_fallocate,
-	[OP_IO_ADVISE]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_IO_ADVISE]		= (nfsd4_dec)nfsd4_decode_io_advise,
 	[OP_LAYOUTERROR]	= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_LAYOUTSTATS]	= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_OFFLOAD_CANCEL]	= (nfsd4_dec)nfsd4_decode_offload_status,
@@ -4948,6 +4964,13 @@  nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
 	return nfserr;
 }
 
+static __be32
+nfsd4_encode_io_advise(struct nfsd4_compoundres *resp, __be32 nfserr,
+		       struct nfsd4_io_advise *io_advise)
+{
+	return nfsd4_encode_bitmap(resp->xdr, io_advise->hints, 0, 0);
+}
+
 static __be32
 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
 		  struct nfsd4_seek *seek)
@@ -5282,7 +5305,7 @@  static const nfsd4_enc nfsd4_enc_ops[] = {
 	[OP_COPY]		= (nfsd4_enc)nfsd4_encode_copy,
 	[OP_COPY_NOTIFY]	= (nfsd4_enc)nfsd4_encode_copy_notify,
 	[OP_DEALLOCATE]		= (nfsd4_enc)nfsd4_encode_noop,
-	[OP_IO_ADVISE]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_IO_ADVISE]		= (nfsd4_enc)nfsd4_encode_io_advise,
 	[OP_LAYOUTERROR]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_LAYOUTSTATS]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_OFFLOAD_CANCEL]	= (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 0eb00105d845..9b8ba4d6e3ab 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -518,6 +518,16 @@  struct nfsd4_fallocate {
 	u64		falloc_length;
 };
 
+struct nfsd4_io_advise {
+	/* request */
+	stateid_t	stateid;
+	loff_t		offset;
+	u64		count;
+
+	/* both */
+	u32		hints;
+};
+
 struct nfsd4_clone {
 	/* request */
 	stateid_t	cl_src_stateid;
@@ -688,6 +698,7 @@  struct nfsd4_op {
 		/* NFSv4.2 */
 		struct nfsd4_fallocate		allocate;
 		struct nfsd4_fallocate		deallocate;
+		struct nfsd4_io_advise		io_advise;
 		struct nfsd4_clone		clone;
 		struct nfsd4_copy		copy;
 		struct nfsd4_offload_status	offload_status;