diff mbox series

[RFC,2/9] NFSD: Free async copy information in nfsd4_cb_offload_release()

Message ID 20241008134719.116825-13-cel@kernel.org (mailing list archive)
State New
Headers show
Series async COPY fixes | expand

Commit Message

Chuck Lever Oct. 8, 2024, 1:47 p.m. UTC
From: Chuck Lever <chuck.lever@oracle.com>

RFC 7862 Section 4.8 states:

> A copy offload stateid will be valid until either (A) the client
> or server restarts or (B) the client returns the resource by
> issuing an OFFLOAD_CANCEL operation or the client replies to a
> CB_OFFLOAD operation.

Currently, NFSD purges the metadata for an async COPY operation as
soon as the CB_OFFLOAD callback has been sent. It does not wait for
even the client's CB_OFFLOAD response, as the paragraph above
suggests that it should.

This makes the OFFLOAD_STATUS operation ineffective in the window
between the completion of the COPY and the server's receipt of the
CB_OFFLOAD response. This is important if, for example, the client
responds with NFS4ERR_DELAY, or the transport is lost before the
server receives the response. A client might use OFFLOAD_STATUS to
query the server about the missing CB_OFFLOAD, but NFSD will respond
to OFFLOAD_STATUS as if it had never heard of the presented copy
stateid.

This patch starts to address this issue by extending the lifetime of
struct nfsd4_copy at least until the server has seen the CB_OFFLOAD
response, or its CB_OFFLOAD operation has timed out.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4proc.c | 18 +++++++++++-------
 fs/nfsd/xdr4.h     |  3 +++
 2 files changed, 14 insertions(+), 7 deletions(-)

Comments

Chuck Lever Oct. 9, 2024, 3:14 p.m. UTC | #1
On Tue, Oct 08, 2024 at 09:47:21AM -0400, cel@kernel.org wrote:
> From: Chuck Lever <chuck.lever@oracle.com>
> 
> RFC 7862 Section 4.8 states:
> 
> > A copy offload stateid will be valid until either (A) the client
> > or server restarts or (B) the client returns the resource by
> > issuing an OFFLOAD_CANCEL operation or the client replies to a
> > CB_OFFLOAD operation.
> 
> Currently, NFSD purges the metadata for an async COPY operation as
> soon as the CB_OFFLOAD callback has been sent. It does not wait for
> even the client's CB_OFFLOAD response, as the paragraph above
> suggests that it should.
> 
> This makes the OFFLOAD_STATUS operation ineffective in the window
> between the completion of the COPY and the server's receipt of the
> CB_OFFLOAD response. This is important if, for example, the client
> responds with NFS4ERR_DELAY, or the transport is lost before the
> server receives the response. A client might use OFFLOAD_STATUS to
> query the server about the missing CB_OFFLOAD, but NFSD will respond
> to OFFLOAD_STATUS as if it had never heard of the presented copy
> stateid.
> 
> This patch starts to address this issue by extending the lifetime of
> struct nfsd4_copy at least until the server has seen the CB_OFFLOAD
> response, or its CB_OFFLOAD operation has timed out.

An OFFLOAD_CANCEL that is sent after the COPY completes but before
the CB_OFFLOAD reply is received will free the struct nfsd4_copy.

Won't that cause a UAF in nfsd4_cb_offload_release or
nfsd4_cb_offload_done ?

If I bump the struct's reference count, would that help?


> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  fs/nfsd/nfs4proc.c | 18 +++++++++++-------
>  fs/nfsd/xdr4.h     |  3 +++
>  2 files changed, 14 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index b5a6bf4f459f..a3c564a9596c 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -57,6 +57,8 @@ module_param(inter_copy_offload_enable, bool, 0644);
>  MODULE_PARM_DESC(inter_copy_offload_enable,
>  		 "Enable inter server to server copy offload. Default: false");
>  
> +static void cleanup_async_copy(struct nfsd4_copy *copy);
> +
>  #ifdef CONFIG_NFSD_V4_2_INTER_SSC
>  static int nfsd4_ssc_umount_timeout = 900000;		/* default to 15 mins */
>  module_param(nfsd4_ssc_umount_timeout, int, 0644);
> @@ -1598,8 +1600,10 @@ static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
>  {
>  	struct nfsd4_cb_offload *cbo =
>  		container_of(cb, struct nfsd4_cb_offload, co_cb);
> +	struct nfsd4_copy *copy =
> +		container_of(cbo, struct nfsd4_copy, cp_cb_offload);
>  
> -	kfree(cbo);
> +	cleanup_async_copy(copy);
>  }
>  
>  static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
> @@ -1730,13 +1734,10 @@ static void cleanup_async_copy(struct nfsd4_copy *copy)
>  	nfs4_put_copy(copy);
>  }
>  
> +/* Kick off a CB_OFFLOAD callback, but don't wait for the response */
>  static void nfsd4_send_cb_offload(struct nfsd4_copy *copy)
>  {
> -	struct nfsd4_cb_offload *cbo;
> -
> -	cbo = kzalloc(sizeof(*cbo), GFP_KERNEL);
> -	if (!cbo)
> -		return;
> +	struct nfsd4_cb_offload *cbo = &copy->cp_cb_offload;
>  
>  	memcpy(&cbo->co_res, &copy->cp_res, sizeof(copy->cp_res));
>  	memcpy(&cbo->co_fh, &copy->fh, sizeof(copy->fh));
> @@ -1786,10 +1787,13 @@ static int nfsd4_do_async_copy(void *data)
>  	}
>  
>  do_callback:
> +	/* The kthread exits forthwith. Ensure that a subsequent
> +	 * OFFLOAD_CANCEL won't try to kill it again. */
> +	set_bit(NFSD4_COPY_F_STOPPED, &copy->cp_flags);
> +
>  	set_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags);
>  	trace_nfsd_copy_async_done(copy);
>  	nfsd4_send_cb_offload(copy);
> -	cleanup_async_copy(copy);
>  	return 0;
>  }
>  
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 2a21a7662e03..dec29afa43f3 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -699,6 +699,9 @@ struct nfsd4_copy {
>  	struct nfsd42_write_res	cp_res;
>  	struct knfsd_fh		fh;
>  
> +	/* offload callback */
> +	struct nfsd4_cb_offload	cp_cb_offload;
> +
>  	struct nfs4_client      *cp_clp;
>  
>  	struct nfsd_file        *nf_src;
> -- 
> 2.46.2
>
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index b5a6bf4f459f..a3c564a9596c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -57,6 +57,8 @@  module_param(inter_copy_offload_enable, bool, 0644);
 MODULE_PARM_DESC(inter_copy_offload_enable,
 		 "Enable inter server to server copy offload. Default: false");
 
+static void cleanup_async_copy(struct nfsd4_copy *copy);
+
 #ifdef CONFIG_NFSD_V4_2_INTER_SSC
 static int nfsd4_ssc_umount_timeout = 900000;		/* default to 15 mins */
 module_param(nfsd4_ssc_umount_timeout, int, 0644);
@@ -1598,8 +1600,10 @@  static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
 {
 	struct nfsd4_cb_offload *cbo =
 		container_of(cb, struct nfsd4_cb_offload, co_cb);
+	struct nfsd4_copy *copy =
+		container_of(cbo, struct nfsd4_copy, cp_cb_offload);
 
-	kfree(cbo);
+	cleanup_async_copy(copy);
 }
 
 static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
@@ -1730,13 +1734,10 @@  static void cleanup_async_copy(struct nfsd4_copy *copy)
 	nfs4_put_copy(copy);
 }
 
+/* Kick off a CB_OFFLOAD callback, but don't wait for the response */
 static void nfsd4_send_cb_offload(struct nfsd4_copy *copy)
 {
-	struct nfsd4_cb_offload *cbo;
-
-	cbo = kzalloc(sizeof(*cbo), GFP_KERNEL);
-	if (!cbo)
-		return;
+	struct nfsd4_cb_offload *cbo = &copy->cp_cb_offload;
 
 	memcpy(&cbo->co_res, &copy->cp_res, sizeof(copy->cp_res));
 	memcpy(&cbo->co_fh, &copy->fh, sizeof(copy->fh));
@@ -1786,10 +1787,13 @@  static int nfsd4_do_async_copy(void *data)
 	}
 
 do_callback:
+	/* The kthread exits forthwith. Ensure that a subsequent
+	 * OFFLOAD_CANCEL won't try to kill it again. */
+	set_bit(NFSD4_COPY_F_STOPPED, &copy->cp_flags);
+
 	set_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags);
 	trace_nfsd_copy_async_done(copy);
 	nfsd4_send_cb_offload(copy);
-	cleanup_async_copy(copy);
 	return 0;
 }
 
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 2a21a7662e03..dec29afa43f3 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -699,6 +699,9 @@  struct nfsd4_copy {
 	struct nfsd42_write_res	cp_res;
 	struct knfsd_fh		fh;
 
+	/* offload callback */
+	struct nfsd4_cb_offload	cp_cb_offload;
+
 	struct nfs4_client      *cp_clp;
 
 	struct nfsd_file        *nf_src;