diff mbox series

[v11,02/10] btrfs-progs: receive: dynamically allocate sctx->read_buf

Message ID 01efd9dd3a70c1a765549b16d6aa5c4cec8a67e4.1630515568.git.osandov@fb.com (mailing list archive)
State New, archived
Headers show
Series None | expand

Commit Message

Omar Sandoval Sept. 1, 2021, 5:01 p.m. UTC
From: Boris Burkov <boris@bur.io>

In send stream v2, write commands can now be an arbitrary size. For that
reason, we can no longer allocate a fixed array in sctx for read_cmd.
Instead, read_cmd dynamically allocates sctx->read_buf. To avoid
needless reallocations, we reuse read_buf between read_cmd calls by also
keeping track of the size of the allocated buffer in sctx->read_buf_sz.

We do the first allocation of the old default size at the start of
processing the stream, and we only reallocate if we encounter a command
that needs a larger buffer.

Signed-off-by: Boris Burkov <boris@bur.io>
---
 common/send-stream.c | 55 ++++++++++++++++++++++++++++----------------
 send.h               |  2 +-
 2 files changed, 36 insertions(+), 21 deletions(-)

Comments

Nikolay Borisov Oct. 20, 2021, 2:09 p.m. UTC | #1
On 1.09.21 г. 20:01, Omar Sandoval wrote:
> From: Boris Burkov <boris@bur.io>
> 
> In send stream v2, write commands can now be an arbitrary size. For that
> reason, we can no longer allocate a fixed array in sctx for read_cmd.
> Instead, read_cmd dynamically allocates sctx->read_buf. To avoid
> needless reallocations, we reuse read_buf between read_cmd calls by also
> keeping track of the size of the allocated buffer in sctx->read_buf_sz.
> 
> We do the first allocation of the old default size at the start of
> processing the stream, and we only reallocate if we encounter a command
> that needs a larger buffer.
> 
> Signed-off-by: Boris Burkov <boris@bur.io>
> ---
>  common/send-stream.c | 55 ++++++++++++++++++++++++++++----------------
>  send.h               |  2 +-
>  2 files changed, 36 insertions(+), 21 deletions(-)
> 

<snip>

> @@ -124,18 +125,22 @@ static int read_cmd(struct btrfs_send_stream *sctx)
>  		goto out;
>  	}
>  
> -	sctx->cmd_hdr = (struct btrfs_cmd_header *)sctx->read_buf;
> -	cmd = le16_to_cpu(sctx->cmd_hdr->cmd);
> -	cmd_len = le32_to_cpu(sctx->cmd_hdr->len);
> -
> -	if (cmd_len + sizeof(*sctx->cmd_hdr) >= sizeof(sctx->read_buf)) {
> -		ret = -EINVAL;
> -		error("command length %u too big for buffer %zu",
> -				cmd_len, sizeof(sctx->read_buf));
> -		goto out;
> +	cmd_hdr = (struct btrfs_cmd_header *)sctx->read_buf;
> +	cmd_len = le32_to_cpu(cmd_hdr->len);
> +	cmd = le16_to_cpu(cmd_hdr->cmd);
> +	buf_len = sizeof(*cmd_hdr) + cmd_len;
> +	if (sctx->read_buf_sz < buf_len) {
> +		sctx->read_buf = realloc(sctx->read_buf, buf_len);
> +		if (!sctx->read_buf) {

nit: This is prone to a memory leak, because according to
https://en.cppreference.com/w/c/memory/realloc

If there is not enough memory, the old memory block is not freed and
null pointer is returned.


This means if realloc fails it will overwrite sctx->read_buf with NULL,
yet the old memory won't be freed which will cause a memory leak. It can
be argued that's not critical since we'll very quickly terminate the
program afterwards but still.


<snip>
Nikolay Borisov Oct. 20, 2021, 2:35 p.m. UTC | #2
On 20.10.21 г. 17:09, Nikolay Borisov wrote:
> 
> 
> On 1.09.21 г. 20:01, Omar Sandoval wrote:
>> From: Boris Burkov <boris@bur.io>
>>
>> In send stream v2, write commands can now be an arbitrary size. For that

nit: Actually can't commands really be up-to BTRFS_MAX_COMPRESSED + 16K
really  or are we going to leave this as an implementation detail? I'm
fine either way but looking at the changelog of patch 12 in the kernel
series doesn't really mention of arbitrary size, instead it explicitly
is talking about sending the max compressed extent size (128K) + some
space for metadata (the 16K above).

>> reason, we can no longer allocate a fixed array in sctx for read_cmd.
>> Instead, read_cmd dynamically allocates sctx->read_buf. To avoid
>> needless reallocations, we reuse read_buf between read_cmd calls by also
>> keeping track of the size of the allocated buffer in sctx->read_buf_sz.
>>
>> We do the first allocation of the old default size at the start of
>> processing the stream, and we only reallocate if we encounter a command
>> that needs a larger buffer.
>>
>> Signed-off-by: Boris Burkov <boris@bur.io>
>> ---
>>  common/send-stream.c | 55 ++++++++++++++++++++++++++++----------------
>>  send.h               |  2 +-
>>  2 files changed, 36 insertions(+), 21 deletions(-)
>>
> 
> <snip>
> 
>> @@ -124,18 +125,22 @@ static int read_cmd(struct btrfs_send_stream *sctx)
>>  		goto out;
>>  	}
>>  
>> -	sctx->cmd_hdr = (struct btrfs_cmd_header *)sctx->read_buf;
>> -	cmd = le16_to_cpu(sctx->cmd_hdr->cmd);
>> -	cmd_len = le32_to_cpu(sctx->cmd_hdr->len);
>> -
>> -	if (cmd_len + sizeof(*sctx->cmd_hdr) >= sizeof(sctx->read_buf)) {
>> -		ret = -EINVAL;
>> -		error("command length %u too big for buffer %zu",
>> -				cmd_len, sizeof(sctx->read_buf));
>> -		goto out;
>> +	cmd_hdr = (struct btrfs_cmd_header *)sctx->read_buf;
>> +	cmd_len = le32_to_cpu(cmd_hdr->len);
>> +	cmd = le16_to_cpu(cmd_hdr->cmd);
>> +	buf_len = sizeof(*cmd_hdr) + cmd_len;
>> +	if (sctx->read_buf_sz < buf_len) {
>> +		sctx->read_buf = realloc(sctx->read_buf, buf_len);
>> +		if (!sctx->read_buf) {
> 
> nit: This is prone to a memory leak, because according to
> https://en.cppreference.com/w/c/memory/realloc
> 
> If there is not enough memory, the old memory block is not freed and
> null pointer is returned.
> 
> 
> This means if realloc fails it will overwrite sctx->read_buf with NULL,
> yet the old memory won't be freed which will cause a memory leak. It can
> be argued that's not critical since we'll very quickly terminate the
> program afterwards but still.
> 
> 
> <snip>
>
Omar Sandoval Oct. 20, 2021, 5:44 p.m. UTC | #3
On Wed, Oct 20, 2021 at 05:35:42PM +0300, Nikolay Borisov wrote:
> 
> 
> On 20.10.21 г. 17:09, Nikolay Borisov wrote:
> > 
> > 
> > On 1.09.21 г. 20:01, Omar Sandoval wrote:
> >> From: Boris Burkov <boris@bur.io>
> >>
> >> In send stream v2, write commands can now be an arbitrary size. For that
> 
> nit: Actually can't commands really be up-to BTRFS_MAX_COMPRESSED + 16K
> really  or are we going to leave this as an implementation detail? I'm
> fine either way but looking at the changelog of patch 12 in the kernel
> series doesn't really mention of arbitrary size, instead it explicitly
> is talking about sending the max compressed extent size (128K) + some
> space for metadata (the 16K above).

Patch 10 mentions it in the changelog: "It also documents two changes to the
send stream format in v2: the receiver shouldn't assume a maximum command size,
and the DATA attribute is encoded differently to allow for writes larger than
64k".

And in send.h:

-#define BTRFS_SEND_BUF_SIZE SZ_64K
+/*
+ * In send stream v1, no command is larger than 64k. In send stream v2, no limit
+ * should be assumed.
+ */
+#define BTRFS_SEND_BUF_SIZE_V1 SZ_64K

You're correct that right now the limit is BTRFS_MAX_COMPRESSED + 16k,
but I think it's better if userspace doesn't make any assumptions about
that in case we want to send larger commands in the future.

> >> reason, we can no longer allocate a fixed array in sctx for read_cmd.
> >> Instead, read_cmd dynamically allocates sctx->read_buf. To avoid
> >> needless reallocations, we reuse read_buf between read_cmd calls by also
> >> keeping track of the size of the allocated buffer in sctx->read_buf_sz.
> >>
> >> We do the first allocation of the old default size at the start of
> >> processing the stream, and we only reallocate if we encounter a command
> >> that needs a larger buffer.
> >>
> >> Signed-off-by: Boris Burkov <boris@bur.io>
> >> ---
> >>  common/send-stream.c | 55 ++++++++++++++++++++++++++++----------------
> >>  send.h               |  2 +-
> >>  2 files changed, 36 insertions(+), 21 deletions(-)
> >>
> > 
> > <snip>
> > 
> >> @@ -124,18 +125,22 @@ static int read_cmd(struct btrfs_send_stream *sctx)
> >>  		goto out;
> >>  	}
> >>  
> >> -	sctx->cmd_hdr = (struct btrfs_cmd_header *)sctx->read_buf;
> >> -	cmd = le16_to_cpu(sctx->cmd_hdr->cmd);
> >> -	cmd_len = le32_to_cpu(sctx->cmd_hdr->len);
> >> -
> >> -	if (cmd_len + sizeof(*sctx->cmd_hdr) >= sizeof(sctx->read_buf)) {
> >> -		ret = -EINVAL;
> >> -		error("command length %u too big for buffer %zu",
> >> -				cmd_len, sizeof(sctx->read_buf));
> >> -		goto out;
> >> +	cmd_hdr = (struct btrfs_cmd_header *)sctx->read_buf;
> >> +	cmd_len = le32_to_cpu(cmd_hdr->len);
> >> +	cmd = le16_to_cpu(cmd_hdr->cmd);
> >> +	buf_len = sizeof(*cmd_hdr) + cmd_len;
> >> +	if (sctx->read_buf_sz < buf_len) {
> >> +		sctx->read_buf = realloc(sctx->read_buf, buf_len);
> >> +		if (!sctx->read_buf) {
> > 
> > nit: This is prone to a memory leak, because according to
> > https://en.cppreference.com/w/c/memory/realloc
> > 
> > If there is not enough memory, the old memory block is not freed and
> > null pointer is returned.
> > 
> > 
> > This means if realloc fails it will overwrite sctx->read_buf with NULL,
> > yet the old memory won't be freed which will cause a memory leak. It can
> > be argued that's not critical since we'll very quickly terminate the
> > program afterwards but still.

Good catch, I'll fix that.
diff mbox series

Patch

diff --git a/common/send-stream.c b/common/send-stream.c
index cd5aa311..3d3585c3 100644
--- a/common/send-stream.c
+++ b/common/send-stream.c
@@ -35,11 +35,11 @@  struct btrfs_send_attribute {
 };
 
 struct btrfs_send_stream {
-	char read_buf[BTRFS_SEND_BUF_SIZE];
+	char *read_buf;
+	size_t read_buf_sz;
 	int fd;
 
 	int cmd;
-	struct btrfs_cmd_header *cmd_hdr;
 	struct btrfs_send_attribute cmd_attrs[BTRFS_SEND_A_MAX + 1];
 	u32 version;
 
@@ -111,11 +111,12 @@  static int read_cmd(struct btrfs_send_stream *sctx)
 	u32 pos;
 	u32 crc;
 	u32 crc2;
+	struct btrfs_cmd_header *cmd_hdr;
+	size_t buf_len;
 
 	memset(sctx->cmd_attrs, 0, sizeof(sctx->cmd_attrs));
 
-	ASSERT(sizeof(*sctx->cmd_hdr) <= sizeof(sctx->read_buf));
-	ret = read_buf(sctx, sctx->read_buf, sizeof(*sctx->cmd_hdr));
+	ret = read_buf(sctx, sctx->read_buf, sizeof(*cmd_hdr));
 	if (ret < 0)
 		goto out;
 	if (ret) {
@@ -124,18 +125,22 @@  static int read_cmd(struct btrfs_send_stream *sctx)
 		goto out;
 	}
 
-	sctx->cmd_hdr = (struct btrfs_cmd_header *)sctx->read_buf;
-	cmd = le16_to_cpu(sctx->cmd_hdr->cmd);
-	cmd_len = le32_to_cpu(sctx->cmd_hdr->len);
-
-	if (cmd_len + sizeof(*sctx->cmd_hdr) >= sizeof(sctx->read_buf)) {
-		ret = -EINVAL;
-		error("command length %u too big for buffer %zu",
-				cmd_len, sizeof(sctx->read_buf));
-		goto out;
+	cmd_hdr = (struct btrfs_cmd_header *)sctx->read_buf;
+	cmd_len = le32_to_cpu(cmd_hdr->len);
+	cmd = le16_to_cpu(cmd_hdr->cmd);
+	buf_len = sizeof(*cmd_hdr) + cmd_len;
+	if (sctx->read_buf_sz < buf_len) {
+		sctx->read_buf = realloc(sctx->read_buf, buf_len);
+		if (!sctx->read_buf) {
+			ret = -ENOMEM;
+			error("failed to reallocate read buffer for cmd");
+			goto out;
+		}
+		sctx->read_buf_sz = buf_len;
+		/* We need to reset cmd_hdr after realloc of sctx->read_buf */
+		cmd_hdr = (struct btrfs_cmd_header *)sctx->read_buf;
 	}
-
-	data = sctx->read_buf + sizeof(*sctx->cmd_hdr);
+	data = sctx->read_buf + sizeof(*cmd_hdr);
 	ret = read_buf(sctx, data, cmd_len);
 	if (ret < 0)
 		goto out;
@@ -145,11 +150,12 @@  static int read_cmd(struct btrfs_send_stream *sctx)
 		goto out;
 	}
 
-	crc = le32_to_cpu(sctx->cmd_hdr->crc);
-	sctx->cmd_hdr->crc = 0;
+	crc = le32_to_cpu(cmd_hdr->crc);
+	/* in send, crc is computed with header crc = 0, replicate that */
+	cmd_hdr->crc = 0;
 
 	crc2 = crc32c(0, (unsigned char*)sctx->read_buf,
-			sizeof(*sctx->cmd_hdr) + cmd_len);
+			sizeof(*cmd_hdr) + cmd_len);
 
 	if (crc != crc2) {
 		ret = -EINVAL;
@@ -524,19 +530,28 @@  int btrfs_read_and_process_send_stream(int fd,
 		goto out;
 	}
 
+	sctx.read_buf = malloc(BTRFS_SEND_BUF_SIZE_V1);
+	if (!sctx.read_buf) {
+		ret = -ENOMEM;
+		error("unable to allocate send stream read buffer");
+		goto out;
+	}
+	sctx.read_buf_sz = BTRFS_SEND_BUF_SIZE_V1;
+
 	while (1) {
 		ret = read_and_process_cmd(&sctx);
 		if (ret < 0) {
 			last_err = ret;
 			errors++;
 			if (max_errors > 0 && errors >= max_errors)
-				goto out;
+				break;
 		} else if (ret > 0) {
 			if (!honor_end_cmd)
 				ret = 0;
-			goto out;
+			break;
 		}
 	}
+	free(sctx.read_buf);
 
 out:
 	if (last_err && !ret)
diff --git a/send.h b/send.h
index 8dd865ec..228928a0 100644
--- a/send.h
+++ b/send.h
@@ -33,7 +33,7 @@  extern "C" {
 #define BTRFS_SEND_STREAM_MAGIC "btrfs-stream"
 #define BTRFS_SEND_STREAM_VERSION 1
 
-#define BTRFS_SEND_BUF_SIZE SZ_64K
+#define BTRFS_SEND_BUF_SIZE_V1 SZ_64K
 #define BTRFS_SEND_READ_SIZE (1024 * 48)
 
 enum btrfs_tlv_type {