diff mbox series

[V3] nfsdcld: fix cld pipe read size

Message ID 20250227005530.3358455-1-zhangjian496@huawei.com (mailing list archive)
State Handled Elsewhere
Headers show
Series [V3] nfsdcld: fix cld pipe read size | expand

Commit Message

zhangjian Feb. 27, 2025, 12:55 a.m. UTC
When nfsd inits failed for detecting cld version in
nfsd4_client_tracking_init, kernel may assume nfsdcld support version 1
message format and try to upcall with v1 message size to nfsdcld.
There exists one error case in the following process, causeing nfsd
hunging for nfsdcld replay:

kernel write to pipe->msgs (v1 msg length)
    |--------- first msg --------|-------- second message -------|

nfsdcld read from pipe->msgs (v2 msg length)
    |------------ first msg --------------|---second message-----|
    |  valid message             | ignore |     wrong message    |

When two nfsd kernel thread add two upcall messages to cld pipe with v1
version cld_msg (size == 1034) concurrently,but nfsdcld reads with v2
version size(size == 1067), 33 bytes of the second message will be read
and merged with first message. The 33 bytes in second message will be
ignored. Nfsdcld will then read 1001 bytes in second message, which cause
FATAL in cld_messaged_size checking. Nfsd kernel thread will hang for
it forever until nfs server restarts.

Signed-off-by: zhangjian <zhangjian496@huawei.com>
Reviewed-by: Scott Mayhew <smayhew@redhat.com>
---
 utils/nfsdcld/nfsdcld.c | 65 ++++++++++++++++++++++++++++-------------
 1 file changed, 45 insertions(+), 20 deletions(-)

Comments

Scott Mayhew Feb. 27, 2025, 9:25 p.m. UTC | #1
On Thu, 27 Feb 2025, zhangjian wrote:

> When nfsd inits failed for detecting cld version in
> nfsd4_client_tracking_init, kernel may assume nfsdcld support version 1
> message format and try to upcall with v1 message size to nfsdcld.
> There exists one error case in the following process, causeing nfsd
> hunging for nfsdcld replay:
> 
> kernel write to pipe->msgs (v1 msg length)
>     |--------- first msg --------|-------- second message -------|
> 
> nfsdcld read from pipe->msgs (v2 msg length)
>     |------------ first msg --------------|---second message-----|
>     |  valid message             | ignore |     wrong message    |
> 
> When two nfsd kernel thread add two upcall messages to cld pipe with v1
> version cld_msg (size == 1034) concurrently,but nfsdcld reads with v2
> version size(size == 1067), 33 bytes of the second message will be read
> and merged with first message. The 33 bytes in second message will be
> ignored. Nfsdcld will then read 1001 bytes in second message, which cause
> FATAL in cld_messaged_size checking. Nfsd kernel thread will hang for
> it forever until nfs server restarts.
> 
> Signed-off-by: zhangjian <zhangjian496@huawei.com>
> Reviewed-by: Scott Mayhew <smayhew@redhat.com>
> ---
>  utils/nfsdcld/nfsdcld.c | 65 ++++++++++++++++++++++++++++-------------
>  1 file changed, 45 insertions(+), 20 deletions(-)
> 
> diff --git a/utils/nfsdcld/nfsdcld.c b/utils/nfsdcld/nfsdcld.c
> index dbc7a57..005d1ea 100644
> --- a/utils/nfsdcld/nfsdcld.c
> +++ b/utils/nfsdcld/nfsdcld.c
> @@ -716,35 +716,60 @@ reply:
>  	}
>  }
>  
> -static void
> -cldcb(int UNUSED(fd), short which, void *data)
> +static int
> +cld_pipe_read_msg(struct cld_client *clnt)
>  {
> -	ssize_t len;
> -	struct cld_client *clnt = data;
> -#if UPCALL_VERSION >= 2
> -	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
> -#else
> -	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
> -#endif
> +	ssize_t len, left_len;
> +	ssize_t hdr_len = sizeof(struct cld_msg_hdr);
> +	struct cld_msg_hdr *hdr = (struct cld_msg_hdr *)&clnt->cl_u;
>  
> -	if (which != EV_READ)
> -		goto out;
> +	len = atomicio(read, clnt->cl_fd, hdr, hdr_len);
>  
> -	len = atomicio(read, clnt->cl_fd, cmsg, sizeof(*cmsg));
>  	if (len <= 0) {
>  		xlog(L_ERROR, "%s: pipe read failed: %m", __func__);
> -		cld_pipe_open(clnt);
> -		goto out;
> +		goto fail_read;
>  	}

We probably also want to fail if len != hdr_len.
>  
> -	if (cmsg->cm_vers > UPCALL_VERSION) {
> +	switch (hdr->cm_vers) {
> +	case 1:
> +		left_len = sizeof(struct cld_msg) - hdr_len;
> +		break;
> +	case 2:
> +		left_len = sizeof(struct cld_msg_v2) - hdr_len;
> +		break;
> +	default:
>  		xlog(L_ERROR, "%s: unsupported upcall version: %hu",
> -				__func__, cmsg->cm_vers);
> -		cld_pipe_open(clnt);
> -		goto out;
> +			__func__, hdr->cm_vers);
> +		goto fail_read;
>  	}
>  
> -	switch(cmsg->cm_cmd) {
> +	len = atomicio(read, clnt->cl_fd, hdr, left_len);

This is reading into the beginning of the message and overwriting the
header.  In the original version of this patch you had hdr + 1 as the
to read the data into.

-Scott

> +
> +	if (len <= 0) {
> +		xlog(L_ERROR, "%s: pipe read failed: %m", __func__);
> +		goto fail_read;
> +	}
> +
> +	return 0;
> +
> +fail_read:
> +	cld_pipe_open(clnt);
> +	return -1;
> +}
> +
> +static void
> +cldcb(int UNUSED(fd), short which, void *data)
> +{
> +	struct cld_client *clnt = data;
> +	struct cld_msg_hdr *hdr = (struct cld_msg_hdr *)&clnt->cl_u;
> +
> +	if (which != EV_READ)
> +		goto out;
> +
> +	if (cld_pipe_read_msg(clnt) < 0)
> +		goto out;
> +
> +	switch (hdr->cm_cmd) {
>  	case Cld_Create:
>  		cld_create(clnt);
>  		break;
> @@ -765,7 +790,7 @@ cldcb(int UNUSED(fd), short which, void *data)
>  		break;
>  	default:
>  		xlog(L_WARNING, "%s: command %u is not yet implemented",
> -				__func__, cmsg->cm_cmd);
> +				__func__, hdr->cm_cmd);
>  		cld_not_implemented(clnt);
>  	}
>  out:
> -- 
> 2.33.0
>
zhangjian Feb. 28, 2025, 3:43 a.m. UTC | #2
It's really that you say. I will fix it at once.

On 2025/2/28 5:25, Scott Mayhew wrote:
> On Thu, 27 Feb 2025, zhangjian wrote:
> 
>> When nfsd inits failed for detecting cld version in
>> nfsd4_client_tracking_init, kernel may assume nfsdcld support version 1
>> message format and try to upcall with v1 message size to nfsdcld.
>> There exists one error case in the following process, causeing nfsd
>> hunging for nfsdcld replay:
>>
>> kernel write to pipe->msgs (v1 msg length)
>>     |--------- first msg --------|-------- second message -------|
>>
>> nfsdcld read from pipe->msgs (v2 msg length)
>>     |------------ first msg --------------|---second message-----|
>>     |  valid message             | ignore |     wrong message    |
>>
>> When two nfsd kernel thread add two upcall messages to cld pipe with v1
>> version cld_msg (size == 1034) concurrently,but nfsdcld reads with v2
>> version size(size == 1067), 33 bytes of the second message will be read
>> and merged with first message. The 33 bytes in second message will be
>> ignored. Nfsdcld will then read 1001 bytes in second message, which cause
>> FATAL in cld_messaged_size checking. Nfsd kernel thread will hang for
>> it forever until nfs server restarts.
>>
>> Signed-off-by: zhangjian <zhangjian496@huawei.com>
>> Reviewed-by: Scott Mayhew <smayhew@redhat.com>
>> ---
>>  utils/nfsdcld/nfsdcld.c | 65 ++++++++++++++++++++++++++++-------------
>>  1 file changed, 45 insertions(+), 20 deletions(-)
>>
>> diff --git a/utils/nfsdcld/nfsdcld.c b/utils/nfsdcld/nfsdcld.c
>> index dbc7a57..005d1ea 100644
>> --- a/utils/nfsdcld/nfsdcld.c
>> +++ b/utils/nfsdcld/nfsdcld.c
>> @@ -716,35 +716,60 @@ reply:
>>  	}
>>  }
>>  
>> -static void
>> -cldcb(int UNUSED(fd), short which, void *data)
>> +static int
>> +cld_pipe_read_msg(struct cld_client *clnt)
>>  {
>> -	ssize_t len;
>> -	struct cld_client *clnt = data;
>> -#if UPCALL_VERSION >= 2
>> -	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
>> -#else
>> -	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
>> -#endif
>> +	ssize_t len, left_len;
>> +	ssize_t hdr_len = sizeof(struct cld_msg_hdr);
>> +	struct cld_msg_hdr *hdr = (struct cld_msg_hdr *)&clnt->cl_u;
>>  
>> -	if (which != EV_READ)
>> -		goto out;
>> +	len = atomicio(read, clnt->cl_fd, hdr, hdr_len);
>>  
>> -	len = atomicio(read, clnt->cl_fd, cmsg, sizeof(*cmsg));
>>  	if (len <= 0) {
>>  		xlog(L_ERROR, "%s: pipe read failed: %m", __func__);
>> -		cld_pipe_open(clnt);
>> -		goto out;
>> +		goto fail_read;
>>  	}
> 
> We probably also want to fail if len != hdr_len.
>>  
>> -	if (cmsg->cm_vers > UPCALL_VERSION) {
>> +	switch (hdr->cm_vers) {
>> +	case 1:
>> +		left_len = sizeof(struct cld_msg) - hdr_len;
>> +		break;
>> +	case 2:
>> +		left_len = sizeof(struct cld_msg_v2) - hdr_len;
>> +		break;
>> +	default:
>>  		xlog(L_ERROR, "%s: unsupported upcall version: %hu",
>> -				__func__, cmsg->cm_vers);
>> -		cld_pipe_open(clnt);
>> -		goto out;
>> +			__func__, hdr->cm_vers);
>> +		goto fail_read;
>>  	}
>>  
>> -	switch(cmsg->cm_cmd) {
>> +	len = atomicio(read, clnt->cl_fd, hdr, left_len);
> 
> This is reading into the beginning of the message and overwriting the
> header.  In the original version of this patch you had hdr + 1 as the
> to read the data into.
> 
> -Scott
> 
>> +
>> +	if (len <= 0) {
>> +		xlog(L_ERROR, "%s: pipe read failed: %m", __func__);
>> +		goto fail_read;
>> +	}
>> +
>> +	return 0;
>> +
>> +fail_read:
>> +	cld_pipe_open(clnt);
>> +	return -1;
>> +}
>> +
>> +static void
>> +cldcb(int UNUSED(fd), short which, void *data)
>> +{
>> +	struct cld_client *clnt = data;
>> +	struct cld_msg_hdr *hdr = (struct cld_msg_hdr *)&clnt->cl_u;
>> +
>> +	if (which != EV_READ)
>> +		goto out;
>> +
>> +	if (cld_pipe_read_msg(clnt) < 0)
>> +		goto out;
>> +
>> +	switch (hdr->cm_cmd) {
>>  	case Cld_Create:
>>  		cld_create(clnt);
>>  		break;
>> @@ -765,7 +790,7 @@ cldcb(int UNUSED(fd), short which, void *data)
>>  		break;
>>  	default:
>>  		xlog(L_WARNING, "%s: command %u is not yet implemented",
>> -				__func__, cmsg->cm_cmd);
>> +				__func__, hdr->cm_cmd);
>>  		cld_not_implemented(clnt);
>>  	}
>>  out:
>> -- 
>> 2.33.0
>>
> 
>
diff mbox series

Patch

diff --git a/utils/nfsdcld/nfsdcld.c b/utils/nfsdcld/nfsdcld.c
index dbc7a57..005d1ea 100644
--- a/utils/nfsdcld/nfsdcld.c
+++ b/utils/nfsdcld/nfsdcld.c
@@ -716,35 +716,60 @@  reply:
 	}
 }
 
-static void
-cldcb(int UNUSED(fd), short which, void *data)
+static int
+cld_pipe_read_msg(struct cld_client *clnt)
 {
-	ssize_t len;
-	struct cld_client *clnt = data;
-#if UPCALL_VERSION >= 2
-	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
-#else
-	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
-#endif
+	ssize_t len, left_len;
+	ssize_t hdr_len = sizeof(struct cld_msg_hdr);
+	struct cld_msg_hdr *hdr = (struct cld_msg_hdr *)&clnt->cl_u;
 
-	if (which != EV_READ)
-		goto out;
+	len = atomicio(read, clnt->cl_fd, hdr, hdr_len);
 
-	len = atomicio(read, clnt->cl_fd, cmsg, sizeof(*cmsg));
 	if (len <= 0) {
 		xlog(L_ERROR, "%s: pipe read failed: %m", __func__);
-		cld_pipe_open(clnt);
-		goto out;
+		goto fail_read;
 	}
 
-	if (cmsg->cm_vers > UPCALL_VERSION) {
+	switch (hdr->cm_vers) {
+	case 1:
+		left_len = sizeof(struct cld_msg) - hdr_len;
+		break;
+	case 2:
+		left_len = sizeof(struct cld_msg_v2) - hdr_len;
+		break;
+	default:
 		xlog(L_ERROR, "%s: unsupported upcall version: %hu",
-				__func__, cmsg->cm_vers);
-		cld_pipe_open(clnt);
-		goto out;
+			__func__, hdr->cm_vers);
+		goto fail_read;
 	}
 
-	switch(cmsg->cm_cmd) {
+	len = atomicio(read, clnt->cl_fd, hdr, left_len);
+
+	if (len <= 0) {
+		xlog(L_ERROR, "%s: pipe read failed: %m", __func__);
+		goto fail_read;
+	}
+
+	return 0;
+
+fail_read:
+	cld_pipe_open(clnt);
+	return -1;
+}
+
+static void
+cldcb(int UNUSED(fd), short which, void *data)
+{
+	struct cld_client *clnt = data;
+	struct cld_msg_hdr *hdr = (struct cld_msg_hdr *)&clnt->cl_u;
+
+	if (which != EV_READ)
+		goto out;
+
+	if (cld_pipe_read_msg(clnt) < 0)
+		goto out;
+
+	switch (hdr->cm_cmd) {
 	case Cld_Create:
 		cld_create(clnt);
 		break;
@@ -765,7 +790,7 @@  cldcb(int UNUSED(fd), short which, void *data)
 		break;
 	default:
 		xlog(L_WARNING, "%s: command %u is not yet implemented",
-				__func__, cmsg->cm_cmd);
+				__func__, hdr->cm_cmd);
 		cld_not_implemented(clnt);
 	}
 out: