diff mbox series

[v2,4/6] virtiofs: support bounce buffer backed by scattered pages

Message ID 20240228144126.2864064-5-houtao@huaweicloud.com (mailing list archive)
State New, archived
Headers show
Series virtiofs: fix the warning for ITER_KVEC dio | expand

Commit Message

Hou Tao Feb. 28, 2024, 2:41 p.m. UTC
From: Hou Tao <houtao1@huawei.com>

When reading a file kept in virtiofs from kernel (e.g., insmod a kernel
module), if the cache of virtiofs is disabled, the read buffer will be
passed to virtiofs through out_args[0].value instead of pages. Because
virtiofs can't get the pages for the read buffer, virtio_fs_argbuf_new()
will create a bounce buffer for the read buffer by using kmalloc() and
copy the read buffer into bounce buffer. If the read buffer is large
(e.g., 1MB), the allocation will incur significant stress on the memory
subsystem.

So instead of allocating bounce buffer by using kmalloc(), allocate a
bounce buffer which is backed by scattered pages. The original idea is
to use vmap(), but the use of GFP_ATOMIC is no possible for vmap(). To
simplify the copy operations in the bounce buffer, use a bio_vec flex
array to represent the argbuf. Also add an is_flat field in struct
virtio_fs_argbuf to distinguish between kmalloc-ed and scattered bounce
buffer.

Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 fs/fuse/virtio_fs.c | 163 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 149 insertions(+), 14 deletions(-)

Comments

Brian Foster Feb. 29, 2024, 3:01 p.m. UTC | #1
On Wed, Feb 28, 2024 at 10:41:24PM +0800, Hou Tao wrote:
> From: Hou Tao <houtao1@huawei.com>
> 
> When reading a file kept in virtiofs from kernel (e.g., insmod a kernel
> module), if the cache of virtiofs is disabled, the read buffer will be
> passed to virtiofs through out_args[0].value instead of pages. Because
> virtiofs can't get the pages for the read buffer, virtio_fs_argbuf_new()
> will create a bounce buffer for the read buffer by using kmalloc() and
> copy the read buffer into bounce buffer. If the read buffer is large
> (e.g., 1MB), the allocation will incur significant stress on the memory
> subsystem.
> 
> So instead of allocating bounce buffer by using kmalloc(), allocate a
> bounce buffer which is backed by scattered pages. The original idea is
> to use vmap(), but the use of GFP_ATOMIC is no possible for vmap(). To
> simplify the copy operations in the bounce buffer, use a bio_vec flex
> array to represent the argbuf. Also add an is_flat field in struct
> virtio_fs_argbuf to distinguish between kmalloc-ed and scattered bounce
> buffer.
> 
> Signed-off-by: Hou Tao <houtao1@huawei.com>
> ---
>  fs/fuse/virtio_fs.c | 163 ++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 149 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> index f10fff7f23a0f..ffea684bd100d 100644
> --- a/fs/fuse/virtio_fs.c
> +++ b/fs/fuse/virtio_fs.c
...
> @@ -408,42 +425,143 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work)
>  	}
>  }
>  
...  
>  static void virtio_fs_argbuf_copy_from_in_arg(struct virtio_fs_argbuf *argbuf,
>  					      unsigned int offset,
>  					      const void *src, unsigned int len)
>  {
> -	memcpy(argbuf->buf + offset, src, len);
> +	struct iov_iter iter;
> +	unsigned int copied;
> +
> +	if (argbuf->is_flat) {
> +		memcpy(argbuf->f.buf + offset, src, len);
> +		return;
> +	}
> +
> +	iov_iter_bvec(&iter, ITER_DEST, argbuf->s.bvec,
> +		      argbuf->s.nr, argbuf->s.size);
> +	iov_iter_advance(&iter, offset);

Hi Hou,

Just a random comment, but it seems a little inefficient to reinit and
readvance the iter like this on every copy/call. It looks like offset is
already incremented in the callers of the argbuf copy helpers. Perhaps
iov_iter could be lifted into the callers and passed down, or even just
include it in the argbuf structure and init it at alloc time?

Brian

> +
> +	copied = _copy_to_iter(src, len, &iter);
> +	WARN_ON_ONCE(copied != len);
>  }
>  
>  static unsigned int
> @@ -451,15 +569,32 @@ virtio_fs_argbuf_out_args_offset(struct virtio_fs_argbuf *argbuf,
>  				 const struct fuse_args *args)
>  {
>  	unsigned int num_in = args->in_numargs - args->in_pages;
> +	unsigned int offset = fuse_len_args(num_in,
> +					    (struct fuse_arg *)args->in_args);
>  
> -	return fuse_len_args(num_in, (struct fuse_arg *)args->in_args);
> +	if (argbuf->is_flat)
> +		return offset;
> +	return round_up(offset, PAGE_SIZE);
>  }
>  
>  static void virtio_fs_argbuf_copy_to_out_arg(struct virtio_fs_argbuf *argbuf,
>  					     unsigned int offset, void *dst,
>  					     unsigned int len)
>  {
> -	memcpy(dst, argbuf->buf + offset, len);
> +	struct iov_iter iter;
> +	unsigned int copied;
> +
> +	if (argbuf->is_flat) {
> +		memcpy(dst, argbuf->f.buf + offset, len);
> +		return;
> +	}
> +
> +	iov_iter_bvec(&iter, ITER_SOURCE, argbuf->s.bvec,
> +		      argbuf->s.nr, argbuf->s.size);
> +	iov_iter_advance(&iter, offset);
> +
> +	copied = _copy_from_iter(dst, len, &iter);
> +	WARN_ON_ONCE(copied != len);
>  }
>  
>  /*
> @@ -1154,7 +1289,7 @@ static unsigned int sg_init_fuse_args(struct scatterlist *sg,
>  	len = fuse_len_args(numargs - argpages, args);
>  	if (len)
>  		total_sgs += virtio_fs_argbuf_setup_sg(req->argbuf, *len_used,
> -						       len, &sg[total_sgs]);
> +						       &len, &sg[total_sgs]);
>  
>  	if (argpages)
>  		total_sgs += sg_init_fuse_pages(&sg[total_sgs],
> @@ -1199,7 +1334,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
>  	}
>  
>  	/* Use a bounce buffer since stack args cannot be mapped */
> -	req->argbuf = virtio_fs_argbuf_new(args, GFP_ATOMIC);
> +	req->argbuf = virtio_fs_argbuf_new(args, GFP_ATOMIC, true);
>  	if (!req->argbuf) {
>  		ret = -ENOMEM;
>  		goto out;
> -- 
> 2.29.2
> 
>
Hou Tao March 9, 2024, 4:14 a.m. UTC | #2
Hi,

On 2/29/2024 11:01 PM, Brian Foster wrote:
> On Wed, Feb 28, 2024 at 10:41:24PM +0800, Hou Tao wrote:
>> From: Hou Tao <houtao1@huawei.com>
>>
>> When reading a file kept in virtiofs from kernel (e.g., insmod a kernel
>> module), if the cache of virtiofs is disabled, the read buffer will be
>> passed to virtiofs through out_args[0].value instead of pages. Because
>> virtiofs can't get the pages for the read buffer, virtio_fs_argbuf_new()
>> will create a bounce buffer for the read buffer by using kmalloc() and
>> copy the read buffer into bounce buffer. If the read buffer is large
>> (e.g., 1MB), the allocation will incur significant stress on the memory
>> subsystem.
>>
>> So instead of allocating bounce buffer by using kmalloc(), allocate a
>> bounce buffer which is backed by scattered pages. The original idea is
>> to use vmap(), but the use of GFP_ATOMIC is no possible for vmap(). To
>> simplify the copy operations in the bounce buffer, use a bio_vec flex
>> array to represent the argbuf. Also add an is_flat field in struct
>> virtio_fs_argbuf to distinguish between kmalloc-ed and scattered bounce
>> buffer.
>>
>> Signed-off-by: Hou Tao <houtao1@huawei.com>
>> ---
>>  fs/fuse/virtio_fs.c | 163 ++++++++++++++++++++++++++++++++++++++++----
>>  1 file changed, 149 insertions(+), 14 deletions(-)
>>
>> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
>> index f10fff7f23a0f..ffea684bd100d 100644
>> --- a/fs/fuse/virtio_fs.c
>> +++ b/fs/fuse/virtio_fs.c
> ...
>> @@ -408,42 +425,143 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work)
>>  	}
>>  }
>>  
> ...  
>>  static void virtio_fs_argbuf_copy_from_in_arg(struct virtio_fs_argbuf *argbuf,
>>  					      unsigned int offset,
>>  					      const void *src, unsigned int len)
>>  {
>> -	memcpy(argbuf->buf + offset, src, len);
>> +	struct iov_iter iter;
>> +	unsigned int copied;
>> +
>> +	if (argbuf->is_flat) {
>> +		memcpy(argbuf->f.buf + offset, src, len);
>> +		return;
>> +	}
>> +
>> +	iov_iter_bvec(&iter, ITER_DEST, argbuf->s.bvec,
>> +		      argbuf->s.nr, argbuf->s.size);
>> +	iov_iter_advance(&iter, offset);
> Hi Hou,
>
> Just a random comment, but it seems a little inefficient to reinit and
> readvance the iter like this on every copy/call. It looks like offset is
> already incremented in the callers of the argbuf copy helpers. Perhaps
> iov_iter could be lifted into the callers and passed down, or even just
> include it in the argbuf structure and init it at alloc time?

Sorry for the late reply. Being busy with off-site workshop these days.

I have tried a similar idea before in which iov_iter was saved directly
in argbuf struct, but it didn't work out well. The reason is that for
copy both in_args and out_args, an iov_iter is needed, but the direction
is different. Currently the bi-directional io_vec is not supported, so
the code have to initialize the iov_iter twice: one for copy from
in_args and another one for copy to out_args.

For dio read initiated from kernel, both of its in_numargs and
out_numargs is 1, so there will be only one iov_iter_advance() in
virtio_fs_argbuf_copy_to_out_arg() and the offset is 64, so I think the
overhead will be fine. For dio write initiated from kernel, its
in_numargs is 2 and out_numargs is 1, so there will be two invocations
of iov_iter_advance(). The first one with offset=64, and the another one
with offset=round_up_page_size(64 + write_size), so the later one may
introduce extra overhead. But compared with the overhead of data copy, I
still think the overhead of calling iov_iter_advance() is fine.

> Brian
>
>> +
>> +	copied = _copy_to_iter(src, len, &iter);
>> +	WARN_ON_ONCE(copied != len);
>>  }
>>  
>>  static unsigned int
>> @@ -451,15 +569,32 @@ virtio_fs_argbuf_out_args_offset(struct virtio_fs_argbuf *argbuf,
>>  				 const struct fuse_args *args)
>>  {
>>  	unsigned int num_in = args->in_numargs - args->in_pages;
>> +	unsigned int offset = fuse_len_args(num_in,
>> +					    (struct fuse_arg *)args->in_args);
>>  
>> -	return fuse_len_args(num_in, (struct fuse_arg *)args->in_args);
>> +	if (argbuf->is_flat)
>> +		return offset;
>> +	return round_up(offset, PAGE_SIZE);
>>  }
>>  
>>  static void virtio_fs_argbuf_copy_to_out_arg(struct virtio_fs_argbuf *argbuf,
>>  					     unsigned int offset, void *dst,
>>  					     unsigned int len)
>>  {
>> -	memcpy(dst, argbuf->buf + offset, len);
>> +	struct iov_iter iter;
>> +	unsigned int copied;
>> +
>> +	if (argbuf->is_flat) {
>> +		memcpy(dst, argbuf->f.buf + offset, len);
>> +		return;
>> +	}
>> +
>> +	iov_iter_bvec(&iter, ITER_SOURCE, argbuf->s.bvec,
>> +		      argbuf->s.nr, argbuf->s.size);
>> +	iov_iter_advance(&iter, offset);
>> +
>> +	copied = _copy_from_iter(dst, len, &iter);
>> +	WARN_ON_ONCE(copied != len);
>>  }
>>  
>>  /*
>> @@ -1154,7 +1289,7 @@ static unsigned int sg_init_fuse_args(struct scatterlist *sg,
>>  	len = fuse_len_args(numargs - argpages, args);
>>  	if (len)
>>  		total_sgs += virtio_fs_argbuf_setup_sg(req->argbuf, *len_used,
>> -						       len, &sg[total_sgs]);
>> +						       &len, &sg[total_sgs]);
>>  
>>  	if (argpages)
>>  		total_sgs += sg_init_fuse_pages(&sg[total_sgs],
>> @@ -1199,7 +1334,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
>>  	}
>>  
>>  	/* Use a bounce buffer since stack args cannot be mapped */
>> -	req->argbuf = virtio_fs_argbuf_new(args, GFP_ATOMIC);
>> +	req->argbuf = virtio_fs_argbuf_new(args, GFP_ATOMIC, true);
>>  	if (!req->argbuf) {
>>  		ret = -ENOMEM;
>>  		goto out;
>> -- 
>> 2.29.2
>>
>>
Brian Foster March 13, 2024, 12:14 p.m. UTC | #3
On Sat, Mar 09, 2024 at 12:14:23PM +0800, Hou Tao wrote:
> Hi,
> 
> On 2/29/2024 11:01 PM, Brian Foster wrote:
> > On Wed, Feb 28, 2024 at 10:41:24PM +0800, Hou Tao wrote:
> >> From: Hou Tao <houtao1@huawei.com>
> >>
> >> When reading a file kept in virtiofs from kernel (e.g., insmod a kernel
> >> module), if the cache of virtiofs is disabled, the read buffer will be
> >> passed to virtiofs through out_args[0].value instead of pages. Because
> >> virtiofs can't get the pages for the read buffer, virtio_fs_argbuf_new()
> >> will create a bounce buffer for the read buffer by using kmalloc() and
> >> copy the read buffer into bounce buffer. If the read buffer is large
> >> (e.g., 1MB), the allocation will incur significant stress on the memory
> >> subsystem.
> >>
> >> So instead of allocating bounce buffer by using kmalloc(), allocate a
> >> bounce buffer which is backed by scattered pages. The original idea is
> >> to use vmap(), but the use of GFP_ATOMIC is no possible for vmap(). To
> >> simplify the copy operations in the bounce buffer, use a bio_vec flex
> >> array to represent the argbuf. Also add an is_flat field in struct
> >> virtio_fs_argbuf to distinguish between kmalloc-ed and scattered bounce
> >> buffer.
> >>
> >> Signed-off-by: Hou Tao <houtao1@huawei.com>
> >> ---
> >>  fs/fuse/virtio_fs.c | 163 ++++++++++++++++++++++++++++++++++++++++----
> >>  1 file changed, 149 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> >> index f10fff7f23a0f..ffea684bd100d 100644
> >> --- a/fs/fuse/virtio_fs.c
> >> +++ b/fs/fuse/virtio_fs.c
> > ...
> >> @@ -408,42 +425,143 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work)
> >>  	}
> >>  }
> >>  
> > ...  
> >>  static void virtio_fs_argbuf_copy_from_in_arg(struct virtio_fs_argbuf *argbuf,
> >>  					      unsigned int offset,
> >>  					      const void *src, unsigned int len)
> >>  {
> >> -	memcpy(argbuf->buf + offset, src, len);
> >> +	struct iov_iter iter;
> >> +	unsigned int copied;
> >> +
> >> +	if (argbuf->is_flat) {
> >> +		memcpy(argbuf->f.buf + offset, src, len);
> >> +		return;
> >> +	}
> >> +
> >> +	iov_iter_bvec(&iter, ITER_DEST, argbuf->s.bvec,
> >> +		      argbuf->s.nr, argbuf->s.size);
> >> +	iov_iter_advance(&iter, offset);
> > Hi Hou,
> >
> > Just a random comment, but it seems a little inefficient to reinit and
> > readvance the iter like this on every copy/call. It looks like offset is
> > already incremented in the callers of the argbuf copy helpers. Perhaps
> > iov_iter could be lifted into the callers and passed down, or even just
> > include it in the argbuf structure and init it at alloc time?
> 
> Sorry for the late reply. Being busy with off-site workshop these days.
> 

No worries.

> I have tried a similar idea before in which iov_iter was saved directly
> in argbuf struct, but it didn't work out well. The reason is that for
> copy both in_args and out_args, an iov_iter is needed, but the direction
> is different. Currently the bi-directional io_vec is not supported, so
> the code have to initialize the iov_iter twice: one for copy from
> in_args and another one for copy to out_args.
> 

Ok, seems reasonable enough.

> For dio read initiated from kernel, both of its in_numargs and
> out_numargs is 1, so there will be only one iov_iter_advance() in
> virtio_fs_argbuf_copy_to_out_arg() and the offset is 64, so I think the
> overhead will be fine. For dio write initiated from kernel, its
> in_numargs is 2 and out_numargs is 1, so there will be two invocations
> of iov_iter_advance(). The first one with offset=64, and the another one
> with offset=round_up_page_size(64 + write_size), so the later one may
> introduce extra overhead. But compared with the overhead of data copy, I
> still think the overhead of calling iov_iter_advance() is fine.
> 

I'm not claiming the overhead is some practical problem here, but rather
that we shouldn't need to be concerned with it in the first place with
some cleaner code. It's been a bit since I first looked at this. I was
originally wondering about defining the iov_iter in the caller and pass
as a param, but on another look, do the lowest level helpers really need
to exist?

If you don't anticipate further users, IMO something like the diff below
is a bit more clean (compile tested only, but no reinits and less code
overall). But that's just my .02, feel free to use it or not..

Brian

--- 8< ---

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 9ee71051c89f..9de477e9ccd5 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -544,26 +544,6 @@ static unsigned int virtio_fs_argbuf_setup_sg(struct virtio_fs_argbuf *argbuf,
 	return cur - sg;
 }
 
-static void virtio_fs_argbuf_copy_from_in_arg(struct virtio_fs_argbuf *argbuf,
-					      unsigned int offset,
-					      const void *src, unsigned int len)
-{
-	struct iov_iter iter;
-	unsigned int copied;
-
-	if (argbuf->is_flat) {
-		memcpy(argbuf->f.buf + offset, src, len);
-		return;
-	}
-
-	iov_iter_bvec(&iter, ITER_DEST, argbuf->s.bvec,
-		      argbuf->s.nr, argbuf->s.size);
-	iov_iter_advance(&iter, offset);
-
-	copied = _copy_to_iter(src, len, &iter);
-	WARN_ON_ONCE(copied != len);
-}
-
 static unsigned int
 virtio_fs_argbuf_out_args_offset(struct virtio_fs_argbuf *argbuf,
 				 const struct fuse_args *args)
@@ -577,26 +557,6 @@ virtio_fs_argbuf_out_args_offset(struct virtio_fs_argbuf *argbuf,
 	return round_up(offset, PAGE_SIZE);
 }
 
-static void virtio_fs_argbuf_copy_to_out_arg(struct virtio_fs_argbuf *argbuf,
-					     unsigned int offset, void *dst,
-					     unsigned int len)
-{
-	struct iov_iter iter;
-	unsigned int copied;
-
-	if (argbuf->is_flat) {
-		memcpy(dst, argbuf->f.buf + offset, len);
-		return;
-	}
-
-	iov_iter_bvec(&iter, ITER_SOURCE, argbuf->s.bvec,
-		      argbuf->s.nr, argbuf->s.size);
-	iov_iter_advance(&iter, offset);
-
-	copied = _copy_from_iter(dst, len, &iter);
-	WARN_ON_ONCE(copied != len);
-}
-
 /*
  * Returns 1 if queue is full and sender should wait a bit before sending
  * next request, 0 otherwise.
@@ -684,23 +644,41 @@ static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
 static void copy_args_to_argbuf(struct fuse_req *req)
 {
 	struct fuse_args *args = req->args;
+	struct virtio_fs_argbuf *argbuf = req->argbuf;
+	struct iov_iter iter;
+	unsigned int copied;
 	unsigned int offset = 0;
 	unsigned int num_in;
 	unsigned int i;
 
+	if (!argbuf->is_flat) {
+		iov_iter_bvec(&iter, ITER_DEST, argbuf->s.bvec, argbuf->s.nr,
+			argbuf->s.size);
+	}
+
 	num_in = args->in_numargs - args->in_pages;
 	for (i = 0; i < num_in; i++) {
-		virtio_fs_argbuf_copy_from_in_arg(req->argbuf, offset,
-						  args->in_args[i].value,
-						  args->in_args[i].size);
-		offset += args->in_args[i].size;
+		const void *src = args->in_args[i].value;
+		unsigned int len = args->in_args[i].size;
+
+		if (argbuf->is_flat) {
+			memcpy(argbuf->f.buf + offset, src, len);
+			offset += len;
+			continue;
+		}
+
+		iov_iter_advance(&iter, len);
+		copied = _copy_to_iter(src, len, &iter);
+		WARN_ON_ONCE(copied != len);
 	}
 }
 
 /* Copy args out of req->argbuf */
 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
 {
-	struct virtio_fs_argbuf *argbuf;
+	struct virtio_fs_argbuf *argbuf = req->argbuf;
+	struct iov_iter iter;
+	unsigned int copied;
 	unsigned int remaining;
 	unsigned int offset;
 	unsigned int num_out;
@@ -711,10 +689,14 @@ static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
 	if (!num_out)
 		goto out;
 
-	argbuf = req->argbuf;
+	if (!argbuf->is_flat)
+		iov_iter_bvec(&iter, ITER_SOURCE, argbuf->s.bvec,
+		      argbuf->s.nr, argbuf->s.size);
+
 	offset = virtio_fs_argbuf_out_args_offset(argbuf, args);
 	for (i = 0; i < num_out; i++) {
 		unsigned int argsize = args->out_args[i].size;
+		void *dst = args->out_args[i].value;
 
 		if (args->out_argvar &&
 		    i == args->out_numargs - 1 &&
@@ -722,10 +704,14 @@ static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
 			argsize = remaining;
 		}
 
-		virtio_fs_argbuf_copy_to_out_arg(argbuf, offset,
-						 args->out_args[i].value,
-						 argsize);
-		offset += argsize;
+		if (argbuf->is_flat) {
+			memcpy(dst, argbuf->f.buf + offset, argsize);
+			offset += argsize;
+		} else {
+			iov_iter_advance(&iter, argsize);
+			copied = _copy_from_iter(dst, argsize, &iter);
+			WARN_ON_ONCE(copied != argsize);
+		}
 
 		if (i != args->out_numargs - 1)
 			remaining -= argsize;
diff mbox series

Patch

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index f10fff7f23a0f..ffea684bd100d 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -86,10 +86,27 @@  struct virtio_fs_req_work {
 	struct work_struct done_work;
 };
 
-struct virtio_fs_argbuf {
+struct virtio_fs_flat_argbuf {
 	DECLARE_FLEX_ARRAY(u8, buf);
 };
 
+struct virtio_fs_scattered_argbuf {
+	unsigned int size;
+	unsigned int nr;
+	DECLARE_FLEX_ARRAY(struct bio_vec, bvec);
+};
+
+struct virtio_fs_argbuf {
+	bool is_flat;
+	/* There is flexible array in the end of these two struct
+	 * definitions, so they must be the last field.
+	 */
+	union {
+		struct virtio_fs_flat_argbuf f;
+		struct virtio_fs_scattered_argbuf s;
+	};
+};
+
 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
 				 struct fuse_req *req, bool in_flight);
 
@@ -408,42 +425,143 @@  static void virtio_fs_request_dispatch_work(struct work_struct *work)
 	}
 }
 
+static unsigned int virtio_fs_argbuf_len(unsigned int in_args_len,
+					 unsigned int out_args_len,
+					 bool is_flat)
+{
+	if (is_flat)
+		return in_args_len + out_args_len;
+
+	/*
+	 * Align in_args_len with PAGE_SIZE to reduce the total number of
+	 * sg entries when the value of out_args_len (e.g., the length of
+	 * read buffer) is page-aligned.
+	 */
+	return round_up(in_args_len, PAGE_SIZE) +
+	       round_up(out_args_len, PAGE_SIZE);
+}
+
 static void virtio_fs_argbuf_free(struct virtio_fs_argbuf *argbuf)
 {
+	unsigned int i;
+
+	if (!argbuf)
+		return;
+
+	if (argbuf->is_flat)
+		goto free_argbuf;
+
+	for (i = 0; i < argbuf->s.nr; i++)
+		__free_page(argbuf->s.bvec[i].bv_page);
+
+free_argbuf:
 	kfree(argbuf);
 }
 
 static struct virtio_fs_argbuf *virtio_fs_argbuf_new(struct fuse_args *args,
-						     gfp_t gfp)
+						     gfp_t gfp, bool is_flat)
 {
 	struct virtio_fs_argbuf *argbuf;
 	unsigned int numargs;
-	unsigned int len;
+	unsigned int in_len, out_len, len;
+	unsigned int i, nr;
 
 	numargs = args->in_numargs - args->in_pages;
-	len = fuse_len_args(numargs, (struct fuse_arg *) args->in_args);
+	in_len = fuse_len_args(numargs, (struct fuse_arg *) args->in_args);
 	numargs = args->out_numargs - args->out_pages;
-	len += fuse_len_args(numargs, args->out_args);
+	out_len = fuse_len_args(numargs, args->out_args);
+	len = virtio_fs_argbuf_len(in_len, out_len, is_flat);
+
+	if (is_flat) {
+		argbuf = kmalloc(struct_size(argbuf, f.buf, len), gfp);
+		if (argbuf)
+			argbuf->is_flat = true;
+
+		return argbuf;
+	}
+
+	nr = len >> PAGE_SHIFT;
+	argbuf = kmalloc(struct_size(argbuf, s.bvec, nr), gfp);
+	if (!argbuf)
+		return NULL;
+
+	argbuf->is_flat = false;
+	argbuf->s.size = len;
+	argbuf->s.nr = 0;
+	for (i = 0; i < nr; i++) {
+		struct page *page;
+
+		page = alloc_page(gfp);
+		if (!page) {
+			virtio_fs_argbuf_free(argbuf);
+			return NULL;
+		}
+		bvec_set_page(&argbuf->s.bvec[i], page, PAGE_SIZE, 0);
+		argbuf->s.nr++;
+	}
+
+	/* Zero the unused space for in_args */
+	if (in_len & ~PAGE_MASK) {
+		struct iov_iter iter;
+		unsigned int to_zero;
+
+		iov_iter_bvec(&iter, ITER_DEST, argbuf->s.bvec, argbuf->s.nr,
+			      argbuf->s.size);
+		iov_iter_advance(&iter, in_len);
 
-	argbuf = kmalloc(struct_size(argbuf, buf, len), gfp);
+		to_zero = PAGE_SIZE - (in_len & ~PAGE_MASK);
+		iov_iter_zero(to_zero, &iter);
+	}
 
 	return argbuf;
 }
 
 static unsigned int virtio_fs_argbuf_setup_sg(struct virtio_fs_argbuf *argbuf,
 					      unsigned int offset,
-					      unsigned int len,
+					      unsigned int *len,
 					      struct scatterlist *sg)
 {
-	sg_init_one(sg, argbuf->buf + offset, len);
-	return 1;
+	struct bvec_iter bi = {
+		.bi_size = offset + *len,
+	};
+	struct scatterlist *cur;
+	struct bio_vec bv;
+
+	if (argbuf->is_flat) {
+		sg_init_one(sg, argbuf->f.buf + offset, *len);
+		return 1;
+	}
+
+	cur = sg;
+	bvec_iter_advance(argbuf->s.bvec, &bi, offset);
+	for_each_bvec(bv, argbuf->s.bvec, bi, bi) {
+		sg_init_table(cur, 1);
+		sg_set_page(cur, bv.bv_page, bv.bv_len, bv.bv_offset);
+		cur++;
+	}
+	*len = round_up(*len, PAGE_SIZE);
+
+	return cur - sg;
 }
 
 static void virtio_fs_argbuf_copy_from_in_arg(struct virtio_fs_argbuf *argbuf,
 					      unsigned int offset,
 					      const void *src, unsigned int len)
 {
-	memcpy(argbuf->buf + offset, src, len);
+	struct iov_iter iter;
+	unsigned int copied;
+
+	if (argbuf->is_flat) {
+		memcpy(argbuf->f.buf + offset, src, len);
+		return;
+	}
+
+	iov_iter_bvec(&iter, ITER_DEST, argbuf->s.bvec,
+		      argbuf->s.nr, argbuf->s.size);
+	iov_iter_advance(&iter, offset);
+
+	copied = _copy_to_iter(src, len, &iter);
+	WARN_ON_ONCE(copied != len);
 }
 
 static unsigned int
@@ -451,15 +569,32 @@  virtio_fs_argbuf_out_args_offset(struct virtio_fs_argbuf *argbuf,
 				 const struct fuse_args *args)
 {
 	unsigned int num_in = args->in_numargs - args->in_pages;
+	unsigned int offset = fuse_len_args(num_in,
+					    (struct fuse_arg *)args->in_args);
 
-	return fuse_len_args(num_in, (struct fuse_arg *)args->in_args);
+	if (argbuf->is_flat)
+		return offset;
+	return round_up(offset, PAGE_SIZE);
 }
 
 static void virtio_fs_argbuf_copy_to_out_arg(struct virtio_fs_argbuf *argbuf,
 					     unsigned int offset, void *dst,
 					     unsigned int len)
 {
-	memcpy(dst, argbuf->buf + offset, len);
+	struct iov_iter iter;
+	unsigned int copied;
+
+	if (argbuf->is_flat) {
+		memcpy(dst, argbuf->f.buf + offset, len);
+		return;
+	}
+
+	iov_iter_bvec(&iter, ITER_SOURCE, argbuf->s.bvec,
+		      argbuf->s.nr, argbuf->s.size);
+	iov_iter_advance(&iter, offset);
+
+	copied = _copy_from_iter(dst, len, &iter);
+	WARN_ON_ONCE(copied != len);
 }
 
 /*
@@ -1154,7 +1289,7 @@  static unsigned int sg_init_fuse_args(struct scatterlist *sg,
 	len = fuse_len_args(numargs - argpages, args);
 	if (len)
 		total_sgs += virtio_fs_argbuf_setup_sg(req->argbuf, *len_used,
-						       len, &sg[total_sgs]);
+						       &len, &sg[total_sgs]);
 
 	if (argpages)
 		total_sgs += sg_init_fuse_pages(&sg[total_sgs],
@@ -1199,7 +1334,7 @@  static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
 	}
 
 	/* Use a bounce buffer since stack args cannot be mapped */
-	req->argbuf = virtio_fs_argbuf_new(args, GFP_ATOMIC);
+	req->argbuf = virtio_fs_argbuf_new(args, GFP_ATOMIC, true);
 	if (!req->argbuf) {
 		ret = -ENOMEM;
 		goto out;