diff mbox series

[RFC,v2,2/8] vhost/vsock: rework packet allocation logic

Message ID 72ae7f76-ffee-3e64-d445-7a0f4261d891@sberdevices.ru (mailing list archive)
State RFC
Headers show
Series virtio/vsock: experimental zerocopy receive | expand

Checks

Context Check Description
netdev/tree_selection success Guessed tree name to be net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 5 this patch: 5
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang fail Errors and warnings before: 6 this patch: 6
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 5 this patch: 5
netdev/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: Comparison to NULL could be written "!buf_page" CHECK: Comparison to NULL could be written "!mapped"
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Arseniy Krasnov June 3, 2022, 5:33 a.m. UTC
For packets received from virtio RX queue, use buddy
allocator instead of 'kmalloc()' to be able to insert
such pages to user provided vma. Single call to
'copy_from_iter()' replaced with per-page loop.

Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
---
 drivers/vhost/vsock.c | 81 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 69 insertions(+), 12 deletions(-)

Comments

Stefano Garzarella June 9, 2022, 8:38 a.m. UTC | #1
On Fri, Jun 03, 2022 at 05:33:04AM +0000, Arseniy Krasnov wrote:
>For packets received from virtio RX queue, use buddy
>allocator instead of 'kmalloc()' to be able to insert
>such pages to user provided vma. Single call to
>'copy_from_iter()' replaced with per-page loop.
>
>Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
>---
> drivers/vhost/vsock.c | 81 ++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 69 insertions(+), 12 deletions(-)
>
>diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>index e6c9d41db1de..0dc2229f18f7 100644
>--- a/drivers/vhost/vsock.c
>+++ b/drivers/vhost/vsock.c
>@@ -58,6 +58,7 @@ struct vhost_vsock {
>
> 	u32 guest_cid;
> 	bool seqpacket_allow;
>+	bool zerocopy_rx_on;

This is per-device, so a single socket can change the behaviour of all 
the sockets of this device.

Can we do something better?

Maybe we can allocate the header, copy it, find the socket and check if 
zero-copy is enabled or not for that socket.

Of course we should change or extend virtio_transport_recv_pkt() to 
avoid to find the socket again.


> };
>
> static u32 vhost_transport_get_local_cid(void)
>@@ -357,6 +358,7 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
> 		      unsigned int out, unsigned int in)
> {
> 	struct virtio_vsock_pkt *pkt;
>+	struct vhost_vsock *vsock;
> 	struct iov_iter iov_iter;
> 	size_t nbytes;
> 	size_t len;
>@@ -393,20 +395,75 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
> 		return NULL;
> 	}
>
>-	pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
>-	if (!pkt->buf) {
>-		kfree(pkt);
>-		return NULL;
>-	}
>-
> 	pkt->buf_len = pkt->len;
>+	vsock = container_of(vq->dev, struct vhost_vsock, dev);
>
>-	nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
>-	if (nbytes != pkt->len) {
>-		vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
>-		       pkt->len, nbytes);
>-		virtio_transport_free_pkt(pkt);
>-		return NULL;
>+	if (!vsock->zerocopy_rx_on) {
>+		pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
>+
>+		if (!pkt->buf) {
>+			kfree(pkt);
>+			return NULL;
>+		}
>+
>+		pkt->slab_buf = true;
>+		nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
>+		if (nbytes != pkt->len) {
>+			vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
>+				pkt->len, nbytes);
>+			virtio_transport_free_pkt(pkt);
>+			return NULL;
>+		}
>+	} else {
>+		struct page *buf_page;
>+		ssize_t pkt_len;
>+		int page_idx;
>+
>+		/* This creates memory overrun, as we allocate
>+		 * at least one page for each packet.
>+		 */
>+		buf_page = alloc_pages(GFP_KERNEL, get_order(pkt->len));
>+
>+		if (buf_page == NULL) {
>+			kfree(pkt);
>+			return NULL;
>+		}
>+
>+		pkt->buf = page_to_virt(buf_page);
>+
>+		page_idx = 0;
>+		pkt_len = pkt->len;
>+
>+		/* As allocated pages are not mapped, process
>+		 * pages one by one.
>+		 */
>+		while (pkt_len > 0) {
>+			void *mapped;
>+			size_t to_copy;
>+
>+			mapped = kmap(buf_page + page_idx);
>+
>+			if (mapped == NULL) {
>+				virtio_transport_free_pkt(pkt);
>+				return NULL;
>+			}
>+
>+			to_copy = min(pkt_len, ((ssize_t)PAGE_SIZE));
>+
>+			nbytes = copy_from_iter(mapped, to_copy, &iov_iter);
>+			if (nbytes != to_copy) {
>+				vq_err(vq, "Expected %zu byte payload, got %zu bytes\n",
>+				       to_copy, nbytes);
>+				kunmap(mapped);
>+				virtio_transport_free_pkt(pkt);
>+				return NULL;
>+			}
>+
>+			kunmap(mapped);
>+
>+			pkt_len -= to_copy;
>+			page_idx++;
>+		}
> 	}
>
> 	return pkt;
>-- 
>2.25.1
Arseniy Krasnov June 9, 2022, 12:09 p.m. UTC | #2
On 09.06.2022 11:38, Stefano Garzarella wrote:
> On Fri, Jun 03, 2022 at 05:33:04AM +0000, Arseniy Krasnov wrote:
>> For packets received from virtio RX queue, use buddy
>> allocator instead of 'kmalloc()' to be able to insert
>> such pages to user provided vma. Single call to
>> 'copy_from_iter()' replaced with per-page loop.
>>
>> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
>> ---
>> drivers/vhost/vsock.c | 81 ++++++++++++++++++++++++++++++++++++-------
>> 1 file changed, 69 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>> index e6c9d41db1de..0dc2229f18f7 100644
>> --- a/drivers/vhost/vsock.c
>> +++ b/drivers/vhost/vsock.c
>> @@ -58,6 +58,7 @@ struct vhost_vsock {
>>
>>     u32 guest_cid;
>>     bool seqpacket_allow;
>> +    bool zerocopy_rx_on;
> 
> This is per-device, so a single socket can change the behaviour of all the sockets of this device.

Sure, my mistake

> 
> Can we do something better?
> 
> Maybe we can allocate the header, copy it, find the socket and check if zero-copy is enabled or not for that socket.
> 
> Of course we should change or extend virtio_transport_recv_pkt() to avoid to find the socket again.

I think yes

> 
> 
>> };
>>
>> static u32 vhost_transport_get_local_cid(void)
>> @@ -357,6 +358,7 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
>>               unsigned int out, unsigned int in)
>> {
>>     struct virtio_vsock_pkt *pkt;
>> +    struct vhost_vsock *vsock;
>>     struct iov_iter iov_iter;
>>     size_t nbytes;
>>     size_t len;
>> @@ -393,20 +395,75 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
>>         return NULL;
>>     }
>>
>> -    pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
>> -    if (!pkt->buf) {
>> -        kfree(pkt);
>> -        return NULL;
>> -    }
>> -
>>     pkt->buf_len = pkt->len;
>> +    vsock = container_of(vq->dev, struct vhost_vsock, dev);
>>
>> -    nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
>> -    if (nbytes != pkt->len) {
>> -        vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
>> -               pkt->len, nbytes);
>> -        virtio_transport_free_pkt(pkt);
>> -        return NULL;
>> +    if (!vsock->zerocopy_rx_on) {
>> +        pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
>> +
>> +        if (!pkt->buf) {
>> +            kfree(pkt);
>> +            return NULL;
>> +        }
>> +
>> +        pkt->slab_buf = true;
>> +        nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
>> +        if (nbytes != pkt->len) {
>> +            vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
>> +                pkt->len, nbytes);
>> +            virtio_transport_free_pkt(pkt);
>> +            return NULL;
>> +        }
>> +    } else {
>> +        struct page *buf_page;
>> +        ssize_t pkt_len;
>> +        int page_idx;
>> +
>> +        /* This creates memory overrun, as we allocate
>> +         * at least one page for each packet.
>> +         */
>> +        buf_page = alloc_pages(GFP_KERNEL, get_order(pkt->len));
>> +
>> +        if (buf_page == NULL) {
>> +            kfree(pkt);
>> +            return NULL;
>> +        }
>> +
>> +        pkt->buf = page_to_virt(buf_page);
>> +
>> +        page_idx = 0;
>> +        pkt_len = pkt->len;
>> +
>> +        /* As allocated pages are not mapped, process
>> +         * pages one by one.
>> +         */
>> +        while (pkt_len > 0) {
>> +            void *mapped;
>> +            size_t to_copy;
>> +
>> +            mapped = kmap(buf_page + page_idx);
>> +
>> +            if (mapped == NULL) {
>> +                virtio_transport_free_pkt(pkt);
>> +                return NULL;
>> +            }
>> +
>> +            to_copy = min(pkt_len, ((ssize_t)PAGE_SIZE));
>> +
>> +            nbytes = copy_from_iter(mapped, to_copy, &iov_iter);
>> +            if (nbytes != to_copy) {
>> +                vq_err(vq, "Expected %zu byte payload, got %zu bytes\n",
>> +                       to_copy, nbytes);
>> +                kunmap(mapped);
>> +                virtio_transport_free_pkt(pkt);
>> +                return NULL;
>> +            }
>> +
>> +            kunmap(mapped);
>> +
>> +            pkt_len -= to_copy;
>> +            page_idx++;
>> +        }
>>     }
>>
>>     return pkt;
>> -- 
>> 2.25.1
>
diff mbox series

Patch

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index e6c9d41db1de..0dc2229f18f7 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -58,6 +58,7 @@  struct vhost_vsock {
 
 	u32 guest_cid;
 	bool seqpacket_allow;
+	bool zerocopy_rx_on;
 };
 
 static u32 vhost_transport_get_local_cid(void)
@@ -357,6 +358,7 @@  vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
 		      unsigned int out, unsigned int in)
 {
 	struct virtio_vsock_pkt *pkt;
+	struct vhost_vsock *vsock;
 	struct iov_iter iov_iter;
 	size_t nbytes;
 	size_t len;
@@ -393,20 +395,75 @@  vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
 		return NULL;
 	}
 
-	pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
-	if (!pkt->buf) {
-		kfree(pkt);
-		return NULL;
-	}
-
 	pkt->buf_len = pkt->len;
+	vsock = container_of(vq->dev, struct vhost_vsock, dev);
 
-	nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
-	if (nbytes != pkt->len) {
-		vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
-		       pkt->len, nbytes);
-		virtio_transport_free_pkt(pkt);
-		return NULL;
+	if (!vsock->zerocopy_rx_on) {
+		pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
+
+		if (!pkt->buf) {
+			kfree(pkt);
+			return NULL;
+		}
+
+		pkt->slab_buf = true;
+		nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
+		if (nbytes != pkt->len) {
+			vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
+				pkt->len, nbytes);
+			virtio_transport_free_pkt(pkt);
+			return NULL;
+		}
+	} else {
+		struct page *buf_page;
+		ssize_t pkt_len;
+		int page_idx;
+
+		/* This creates memory overrun, as we allocate
+		 * at least one page for each packet.
+		 */
+		buf_page = alloc_pages(GFP_KERNEL, get_order(pkt->len));
+
+		if (buf_page == NULL) {
+			kfree(pkt);
+			return NULL;
+		}
+
+		pkt->buf = page_to_virt(buf_page);
+
+		page_idx = 0;
+		pkt_len = pkt->len;
+
+		/* As allocated pages are not mapped, process
+		 * pages one by one.
+		 */
+		while (pkt_len > 0) {
+			void *mapped;
+			size_t to_copy;
+
+			mapped = kmap(buf_page + page_idx);
+
+			if (mapped == NULL) {
+				virtio_transport_free_pkt(pkt);
+				return NULL;
+			}
+
+			to_copy = min(pkt_len, ((ssize_t)PAGE_SIZE));
+
+			nbytes = copy_from_iter(mapped, to_copy, &iov_iter);
+			if (nbytes != to_copy) {
+				vq_err(vq, "Expected %zu byte payload, got %zu bytes\n",
+				       to_copy, nbytes);
+				kunmap(mapped);
+				virtio_transport_free_pkt(pkt);
+				return NULL;
+			}
+
+			kunmap(mapped);
+
+			pkt_len -= to_copy;
+			page_idx++;
+		}
 	}
 
 	return pkt;