diff mbox series

[RFC,v3,01/11] virtio/vsock: rework packet allocation logic

Message ID f896b8fd-50d2-2512-3966-3775245e9b96@sberdevices.ru (mailing list archive)
State RFC
Headers show
Series virtio/vsock: experimental zerocopy receive | expand

Checks

Context Check Description
netdev/tree_selection success Guessed tree name to be net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 11 of 11 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 58 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Arseniy Krasnov Nov. 6, 2022, 7:36 p.m. UTC
To support zerocopy receive, packet's buffer allocation is changed: for
buffers which could be mapped to user's vma we can't use 'kmalloc()'(as
kernel restricts to map slab pages to user's vma) and raw buddy
allocator now called. But, for tx packets(such packets won't be mapped
to user), previous 'kmalloc()' way is used, but with special flag in
packet's structure which allows to distinguish between 'kmalloc()' and
raw pages buffers.

Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
---
 drivers/vhost/vsock.c                   |  1 +
 include/linux/virtio_vsock.h            |  1 +
 net/vmw_vsock/virtio_transport.c        |  8 ++++++--
 net/vmw_vsock/virtio_transport_common.c | 10 +++++++++-
 4 files changed, 17 insertions(+), 3 deletions(-)

Comments

Christophe JAILLET Nov. 6, 2022, 7:50 p.m. UTC | #1
Le 06/11/2022 à 20:36, Arseniy Krasnov a écrit :
> To support zerocopy receive, packet's buffer allocation is changed: for
> buffers which could be mapped to user's vma we can't use 'kmalloc()'(as
> kernel restricts to map slab pages to user's vma) and raw buddy
> allocator now called. But, for tx packets(such packets won't be mapped
> to user), previous 'kmalloc()' way is used, but with special flag in
> packet's structure which allows to distinguish between 'kmalloc()' and
> raw pages buffers.
> 
> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
> ---
>   drivers/vhost/vsock.c                   |  1 +
>   include/linux/virtio_vsock.h            |  1 +
>   net/vmw_vsock/virtio_transport.c        |  8 ++++++--
>   net/vmw_vsock/virtio_transport_common.c | 10 +++++++++-
>   4 files changed, 17 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index 5703775af129..65475d128a1d 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -399,6 +399,7 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
>   		return NULL;
>   	}
>   
> +	pkt->slab_buf = true;
>   	pkt->buf_len = pkt->len;
>   
>   	nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> index 35d7eedb5e8e..d02cb7aa922f 100644
> --- a/include/linux/virtio_vsock.h
> +++ b/include/linux/virtio_vsock.h
> @@ -50,6 +50,7 @@ struct virtio_vsock_pkt {
>   	u32 off;
>   	bool reply;
>   	bool tap_delivered;
> +	bool slab_buf;
>   };
>   
>   struct virtio_vsock_pkt_info {
> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
> index ad64f403536a..19909c1e9ba3 100644
> --- a/net/vmw_vsock/virtio_transport.c
> +++ b/net/vmw_vsock/virtio_transport.c
> @@ -255,16 +255,20 @@ static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
>   	vq = vsock->vqs[VSOCK_VQ_RX];
>   
>   	do {
> +		struct page *buf_page;
> +
>   		pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
>   		if (!pkt)
>   			break;
>   
> -		pkt->buf = kmalloc(buf_len, GFP_KERNEL);
> -		if (!pkt->buf) {
> +		buf_page = alloc_page(GFP_KERNEL);
> +
> +		if (!buf_page) {
>   			virtio_transport_free_pkt(pkt);
>   			break;
>   		}
>   
> +		pkt->buf = page_to_virt(buf_page);
>   		pkt->buf_len = buf_len;
>   		pkt->len = buf_len;
>   
> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> index a9980e9b9304..37e8dbfe2f5d 100644
> --- a/net/vmw_vsock/virtio_transport_common.c
> +++ b/net/vmw_vsock/virtio_transport_common.c
> @@ -69,6 +69,7 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
>   		if (!pkt->buf)
>   			goto out_pkt;
>   
> +		pkt->slab_buf = true;
>   		pkt->buf_len = len;
>   
>   		err = memcpy_from_msg(pkt->buf, info->msg, len);
> @@ -1339,7 +1340,14 @@ EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
>   
>   void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt)
>   {
> -	kvfree(pkt->buf);
> +	if (pkt->buf_len) {
> +		if (pkt->slab_buf)
> +			kvfree(pkt->buf);

Hi,

kfree()? (according to virtio_transport_alloc_pkt() in -next) or 
something else need to be changed.

> +		else
> +			free_pages((unsigned long)pkt->buf,
> +				   get_order(pkt->buf_len));

In virtio_vsock_rx_fill(), only alloc_page() is used.

Should this be alloc_pages(.., get_order(buf_len)) or free_page() 
(without an 's') here?

Just my 2c,

CJ

> +	}
> +
>   	kfree(pkt);
>   }
>   EXPORT_SYMBOL_GPL(virtio_transport_free_pkt);
Arseniy Krasnov Nov. 7, 2022, 5:23 a.m. UTC | #2
On 06.11.2022 22:50, Christophe JAILLET wrote:
> Le 06/11/2022 à 20:36, Arseniy Krasnov a écrit :
>> To support zerocopy receive, packet's buffer allocation is changed: for
>> buffers which could be mapped to user's vma we can't use 'kmalloc()'(as
>> kernel restricts to map slab pages to user's vma) and raw buddy
>> allocator now called. But, for tx packets(such packets won't be mapped
>> to user), previous 'kmalloc()' way is used, but with special flag in
>> packet's structure which allows to distinguish between 'kmalloc()' and
>> raw pages buffers.
>>
>> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
>> ---
>>   drivers/vhost/vsock.c                   |  1 +
>>   include/linux/virtio_vsock.h            |  1 +
>>   net/vmw_vsock/virtio_transport.c        |  8 ++++++--
>>   net/vmw_vsock/virtio_transport_common.c | 10 +++++++++-
>>   4 files changed, 17 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>> index 5703775af129..65475d128a1d 100644
>> --- a/drivers/vhost/vsock.c
>> +++ b/drivers/vhost/vsock.c
>> @@ -399,6 +399,7 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
>>           return NULL;
>>       }
>>   +    pkt->slab_buf = true;
>>       pkt->buf_len = pkt->len;
>>         nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
>> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
>> index 35d7eedb5e8e..d02cb7aa922f 100644
>> --- a/include/linux/virtio_vsock.h
>> +++ b/include/linux/virtio_vsock.h
>> @@ -50,6 +50,7 @@ struct virtio_vsock_pkt {
>>       u32 off;
>>       bool reply;
>>       bool tap_delivered;
>> +    bool slab_buf;
>>   };
>>     struct virtio_vsock_pkt_info {
>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>> index ad64f403536a..19909c1e9ba3 100644
>> --- a/net/vmw_vsock/virtio_transport.c
>> +++ b/net/vmw_vsock/virtio_transport.c
>> @@ -255,16 +255,20 @@ static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
>>       vq = vsock->vqs[VSOCK_VQ_RX];
>>         do {
>> +        struct page *buf_page;
>> +
>>           pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
>>           if (!pkt)
>>               break;
>>   -        pkt->buf = kmalloc(buf_len, GFP_KERNEL);
>> -        if (!pkt->buf) {
>> +        buf_page = alloc_page(GFP_KERNEL);
>> +
>> +        if (!buf_page) {
>>               virtio_transport_free_pkt(pkt);
>>               break;
>>           }
>>   +        pkt->buf = page_to_virt(buf_page);
>>           pkt->buf_len = buf_len;
>>           pkt->len = buf_len;
>>   diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>> index a9980e9b9304..37e8dbfe2f5d 100644
>> --- a/net/vmw_vsock/virtio_transport_common.c
>> +++ b/net/vmw_vsock/virtio_transport_common.c
>> @@ -69,6 +69,7 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
>>           if (!pkt->buf)
>>               goto out_pkt;
>>   +        pkt->slab_buf = true;
>>           pkt->buf_len = len;
>>             err = memcpy_from_msg(pkt->buf, info->msg, len);
>> @@ -1339,7 +1340,14 @@ EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
>>     void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt)
>>   {
>> -    kvfree(pkt->buf);
>> +    if (pkt->buf_len) {
>> +        if (pkt->slab_buf)
>> +            kvfree(pkt->buf);
> 
> Hi,
> 
> kfree()? (according to virtio_transport_alloc_pkt() in -next) or something else need to be changed.
> 
Hello Cristophe,

I think, 'kvfree()' is still needed here, because buffer for packet could be allocated by 'kvmalloc()'
in drivers/vhost/vsock.c. Correct me if i'm wrong.

>> +        else
>> +            free_pages((unsigned long)pkt->buf,
>> +                   get_order(pkt->buf_len));
> 
> In virtio_vsock_rx_fill(), only alloc_page() is used.
> 
> Should this be alloc_pages(.., get_order(buf_len)) or free_page() (without an 's') here?
This function frees packets which were allocated in vhost path also. In vhost, for zerocopy
packets alloc_pageS() is used.

Thank You, Arseniy
> 
> Just my 2c,
> 
> CJ
> 
>> +    }
>> +
>>       kfree(pkt);
>>   }
>>   EXPORT_SYMBOL_GPL(virtio_transport_free_pkt);
>
Christophe JAILLET Nov. 7, 2022, 9:24 p.m. UTC | #3
Le 07/11/2022 à 06:23, Arseniy Krasnov a écrit :
> On 06.11.2022 22:50, Christophe JAILLET wrote:
>> Le 06/11/2022 à 20:36, Arseniy Krasnov a écrit :
>>> To support zerocopy receive, packet's buffer allocation is changed: for
>>> buffers which could be mapped to user's vma we can't use 'kmalloc()'(as
>>> kernel restricts to map slab pages to user's vma) and raw buddy
>>> allocator now called. But, for tx packets(such packets won't be mapped
>>> to user), previous 'kmalloc()' way is used, but with special flag in
>>> packet's structure which allows to distinguish between 'kmalloc()' and
>>> raw pages buffers.
>>>
>>> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
>>> ---
>>>    drivers/vhost/vsock.c                   |  1 +
>>>    include/linux/virtio_vsock.h            |  1 +
>>>    net/vmw_vsock/virtio_transport.c        |  8 ++++++--
>>>    net/vmw_vsock/virtio_transport_common.c | 10 +++++++++-
>>>    4 files changed, 17 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>>> index 5703775af129..65475d128a1d 100644
>>> --- a/drivers/vhost/vsock.c
>>> +++ b/drivers/vhost/vsock.c
>>> @@ -399,6 +399,7 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
>>>            return NULL;
>>>        }
>>>    +    pkt->slab_buf = true;
>>>        pkt->buf_len = pkt->len;
>>>          nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
>>> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
>>> index 35d7eedb5e8e..d02cb7aa922f 100644
>>> --- a/include/linux/virtio_vsock.h
>>> +++ b/include/linux/virtio_vsock.h
>>> @@ -50,6 +50,7 @@ struct virtio_vsock_pkt {
>>>        u32 off;
>>>        bool reply;
>>>        bool tap_delivered;
>>> +    bool slab_buf;
>>>    };
>>>      struct virtio_vsock_pkt_info {
>>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>>> index ad64f403536a..19909c1e9ba3 100644
>>> --- a/net/vmw_vsock/virtio_transport.c
>>> +++ b/net/vmw_vsock/virtio_transport.c
>>> @@ -255,16 +255,20 @@ static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
>>>        vq = vsock->vqs[VSOCK_VQ_RX];
>>>          do {
>>> +        struct page *buf_page;
>>> +
>>>            pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
>>>            if (!pkt)
>>>                break;
>>>    -        pkt->buf = kmalloc(buf_len, GFP_KERNEL);
>>> -        if (!pkt->buf) {
>>> +        buf_page = alloc_page(GFP_KERNEL);
>>> +
>>> +        if (!buf_page) {
>>>                virtio_transport_free_pkt(pkt);
>>>                break;
>>>            }
>>>    +        pkt->buf = page_to_virt(buf_page);
>>>            pkt->buf_len = buf_len;
>>>            pkt->len = buf_len;
>>>    diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>>> index a9980e9b9304..37e8dbfe2f5d 100644
>>> --- a/net/vmw_vsock/virtio_transport_common.c
>>> +++ b/net/vmw_vsock/virtio_transport_common.c
>>> @@ -69,6 +69,7 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
>>>            if (!pkt->buf)
>>>                goto out_pkt;
>>>    +        pkt->slab_buf = true;
>>>            pkt->buf_len = len;
>>>              err = memcpy_from_msg(pkt->buf, info->msg, len);
>>> @@ -1339,7 +1340,14 @@ EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
>>>      void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt)
>>>    {
>>> -    kvfree(pkt->buf);
>>> +    if (pkt->buf_len) {
>>> +        if (pkt->slab_buf)
>>> +            kvfree(pkt->buf);
>>
>> Hi,
>>
>> kfree()? (according to virtio_transport_alloc_pkt() in -next) or something else need to be changed.
>>
> Hello Cristophe,
> 
> I think, 'kvfree()' is still needed here, because buffer for packet could be allocated by 'kvmalloc()'
> in drivers/vhost/vsock.c. Correct me if i'm wrong.

Agreed.

> 
>>> +        else
>>> +            free_pages((unsigned long)pkt->buf,
>>> +                   get_order(pkt->buf_len));
>>
>> In virtio_vsock_rx_fill(), only alloc_page() is used.
>>
>> Should this be alloc_pages(.., get_order(buf_len)) or free_page() (without an 's') here?
> This function frees packets which were allocated in vhost path also. In vhost, for zerocopy
> packets alloc_pageS() is used.

Ok. Seen in patch 5/11.

But wouldn't it be cleaner and future-proof to also have alloc_pageS() 
in virtio_vsock_rx_fill(), even if get_order(buf->len) is kwown to be 0?

What, if for some reason a PAGE_SIZE was < 4kb for a given arch, or 
VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE increased?

CJ

> 
> Thank You, Arseniy
>>
>> Just my 2c,
>>
>> CJ
>>
>>> +    }
>>> +
>>>        kfree(pkt);
>>>    }
>>>    EXPORT_SYMBOL_GPL(virtio_transport_free_pkt);
>>
>
Arseniy Krasnov Nov. 7, 2022, 9:31 p.m. UTC | #4
On 08.11.2022 00:24, Christophe JAILLET wrote:
> Le 07/11/2022 à 06:23, Arseniy Krasnov a écrit :
>> On 06.11.2022 22:50, Christophe JAILLET wrote:
>>> Le 06/11/2022 à 20:36, Arseniy Krasnov a écrit :
>>>> To support zerocopy receive, packet's buffer allocation is changed: for
>>>> buffers which could be mapped to user's vma we can't use 'kmalloc()'(as
>>>> kernel restricts to map slab pages to user's vma) and raw buddy
>>>> allocator now called. But, for tx packets(such packets won't be mapped
>>>> to user), previous 'kmalloc()' way is used, but with special flag in
>>>> packet's structure which allows to distinguish between 'kmalloc()' and
>>>> raw pages buffers.
>>>>
>>>> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
>>>> ---
>>>>    drivers/vhost/vsock.c                   |  1 +
>>>>    include/linux/virtio_vsock.h            |  1 +
>>>>    net/vmw_vsock/virtio_transport.c        |  8 ++++++--
>>>>    net/vmw_vsock/virtio_transport_common.c | 10 +++++++++-
>>>>    4 files changed, 17 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>>>> index 5703775af129..65475d128a1d 100644
>>>> --- a/drivers/vhost/vsock.c
>>>> +++ b/drivers/vhost/vsock.c
>>>> @@ -399,6 +399,7 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
>>>>            return NULL;
>>>>        }
>>>>    +    pkt->slab_buf = true;
>>>>        pkt->buf_len = pkt->len;
>>>>          nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
>>>> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
>>>> index 35d7eedb5e8e..d02cb7aa922f 100644
>>>> --- a/include/linux/virtio_vsock.h
>>>> +++ b/include/linux/virtio_vsock.h
>>>> @@ -50,6 +50,7 @@ struct virtio_vsock_pkt {
>>>>        u32 off;
>>>>        bool reply;
>>>>        bool tap_delivered;
>>>> +    bool slab_buf;
>>>>    };
>>>>      struct virtio_vsock_pkt_info {
>>>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>>>> index ad64f403536a..19909c1e9ba3 100644
>>>> --- a/net/vmw_vsock/virtio_transport.c
>>>> +++ b/net/vmw_vsock/virtio_transport.c
>>>> @@ -255,16 +255,20 @@ static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
>>>>        vq = vsock->vqs[VSOCK_VQ_RX];
>>>>          do {
>>>> +        struct page *buf_page;
>>>> +
>>>>            pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
>>>>            if (!pkt)
>>>>                break;
>>>>    -        pkt->buf = kmalloc(buf_len, GFP_KERNEL);
>>>> -        if (!pkt->buf) {
>>>> +        buf_page = alloc_page(GFP_KERNEL);
>>>> +
>>>> +        if (!buf_page) {
>>>>                virtio_transport_free_pkt(pkt);
>>>>                break;
>>>>            }
>>>>    +        pkt->buf = page_to_virt(buf_page);
>>>>            pkt->buf_len = buf_len;
>>>>            pkt->len = buf_len;
>>>>    diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>>>> index a9980e9b9304..37e8dbfe2f5d 100644
>>>> --- a/net/vmw_vsock/virtio_transport_common.c
>>>> +++ b/net/vmw_vsock/virtio_transport_common.c
>>>> @@ -69,6 +69,7 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
>>>>            if (!pkt->buf)
>>>>                goto out_pkt;
>>>>    +        pkt->slab_buf = true;
>>>>            pkt->buf_len = len;
>>>>              err = memcpy_from_msg(pkt->buf, info->msg, len);
>>>> @@ -1339,7 +1340,14 @@ EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
>>>>      void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt)
>>>>    {
>>>> -    kvfree(pkt->buf);
>>>> +    if (pkt->buf_len) {
>>>> +        if (pkt->slab_buf)
>>>> +            kvfree(pkt->buf);
>>>
>>> Hi,
>>>
>>> kfree()? (according to virtio_transport_alloc_pkt() in -next) or something else need to be changed.
>>>
>> Hello Cristophe,
>>
>> I think, 'kvfree()' is still needed here, because buffer for packet could be allocated by 'kvmalloc()'
>> in drivers/vhost/vsock.c. Correct me if i'm wrong.
> 
> Agreed.
> 
>>
>>>> +        else
>>>> +            free_pages((unsigned long)pkt->buf,
>>>> +                   get_order(pkt->buf_len));
>>>
>>> In virtio_vsock_rx_fill(), only alloc_page() is used.
>>>
>>> Should this be alloc_pages(.., get_order(buf_len)) or free_page() (without an 's') here?
>> This function frees packets which were allocated in vhost path also. In vhost, for zerocopy
>> packets alloc_pageS() is used.
> 
> Ok. Seen in patch 5/11.
> 
> But wouldn't it be cleaner and future-proof to also have alloc_pageS() in virtio_vsock_rx_fill(), even if get_order(buf->len) is kwown to be 0?
> 
> What, if for some reason a PAGE_SIZE was < 4kb for a given arch, or VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE increased?
Yes, i see. You're right. It will be correct to use alloc_pages(get_order(buf->len)), because in current version, real length of
packet buffer(e.g. single page) is totally unrelated with VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE. I'll fix it in next version.

Thank You
> 
> CJ
> 
>>
>> Thank You, Arseniy
>>>
>>> Just my 2c,
>>>
>>> CJ
>>>
>>>> +    }
>>>> +
>>>>        kfree(pkt);
>>>>    }
>>>>    EXPORT_SYMBOL_GPL(virtio_transport_free_pkt);
>>>
>>
>
Bobby Eshleman Nov. 11, 2022, 8:35 p.m. UTC | #5
On Sun, Nov 06, 2022 at 07:36:02PM +0000, Arseniy Krasnov wrote:
> To support zerocopy receive, packet's buffer allocation is changed: for
> buffers which could be mapped to user's vma we can't use 'kmalloc()'(as
> kernel restricts to map slab pages to user's vma) and raw buddy
> allocator now called. But, for tx packets(such packets won't be mapped
> to user), previous 'kmalloc()' way is used, but with special flag in
> packet's structure which allows to distinguish between 'kmalloc()' and
> raw pages buffers.
> 
> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>

Hey Arseniy, great work here!

> ---
>  drivers/vhost/vsock.c                   |  1 +
>  include/linux/virtio_vsock.h            |  1 +
>  net/vmw_vsock/virtio_transport.c        |  8 ++++++--
>  net/vmw_vsock/virtio_transport_common.c | 10 +++++++++-
>  4 files changed, 17 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index 5703775af129..65475d128a1d 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -399,6 +399,7 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
>  		return NULL;
>  	}
>  
> +	pkt->slab_buf = true;
>  	pkt->buf_len = pkt->len;
>  
>  	nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> index 35d7eedb5e8e..d02cb7aa922f 100644
> --- a/include/linux/virtio_vsock.h
> +++ b/include/linux/virtio_vsock.h
> @@ -50,6 +50,7 @@ struct virtio_vsock_pkt {
>  	u32 off;
>  	bool reply;
>  	bool tap_delivered;
> +	bool slab_buf;
>  };

WRT the sk_buff series, I bet this bool will not be needed after the
rebase.

>  
>  struct virtio_vsock_pkt_info {
> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
> index ad64f403536a..19909c1e9ba3 100644
> --- a/net/vmw_vsock/virtio_transport.c
> +++ b/net/vmw_vsock/virtio_transport.c
> @@ -255,16 +255,20 @@ static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
>  	vq = vsock->vqs[VSOCK_VQ_RX];
>  
>  	do {
> +		struct page *buf_page;
> +
>  		pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
>  		if (!pkt)
>  			break;
>  
> -		pkt->buf = kmalloc(buf_len, GFP_KERNEL);
> -		if (!pkt->buf) {
> +		buf_page = alloc_page(GFP_KERNEL);
> +
> +		if (!buf_page) {

I think it should not be too hard to use build_skb() around the page
here after rebasing onto the sk_buff series.

>  			virtio_transport_free_pkt(pkt);
>  			break;
>  		}
>  
> +		pkt->buf = page_to_virt(buf_page);
>  		pkt->buf_len = buf_len;
>  		pkt->len = buf_len;
>  
> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> index a9980e9b9304..37e8dbfe2f5d 100644
> --- a/net/vmw_vsock/virtio_transport_common.c
> +++ b/net/vmw_vsock/virtio_transport_common.c
> @@ -69,6 +69,7 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
>  		if (!pkt->buf)
>  			goto out_pkt;
>  
> +		pkt->slab_buf = true;
>  		pkt->buf_len = len;
>  
>  		err = memcpy_from_msg(pkt->buf, info->msg, len);
> @@ -1339,7 +1340,14 @@ EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
>  
>  void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt)
>  {
> -	kvfree(pkt->buf);
> +	if (pkt->buf_len) {
> +		if (pkt->slab_buf)
> +			kvfree(pkt->buf);
> +		else
> +			free_pages((unsigned long)pkt->buf,
> +				   get_order(pkt->buf_len));
> +	}
> +
>  	kfree(pkt);
>  }
>  EXPORT_SYMBOL_GPL(virtio_transport_free_pkt);
> -- 
> 2.35.0
diff mbox series

Patch

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 5703775af129..65475d128a1d 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -399,6 +399,7 @@  vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
 		return NULL;
 	}
 
+	pkt->slab_buf = true;
 	pkt->buf_len = pkt->len;
 
 	nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index 35d7eedb5e8e..d02cb7aa922f 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -50,6 +50,7 @@  struct virtio_vsock_pkt {
 	u32 off;
 	bool reply;
 	bool tap_delivered;
+	bool slab_buf;
 };
 
 struct virtio_vsock_pkt_info {
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index ad64f403536a..19909c1e9ba3 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -255,16 +255,20 @@  static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
 	vq = vsock->vqs[VSOCK_VQ_RX];
 
 	do {
+		struct page *buf_page;
+
 		pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
 		if (!pkt)
 			break;
 
-		pkt->buf = kmalloc(buf_len, GFP_KERNEL);
-		if (!pkt->buf) {
+		buf_page = alloc_page(GFP_KERNEL);
+
+		if (!buf_page) {
 			virtio_transport_free_pkt(pkt);
 			break;
 		}
 
+		pkt->buf = page_to_virt(buf_page);
 		pkt->buf_len = buf_len;
 		pkt->len = buf_len;
 
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index a9980e9b9304..37e8dbfe2f5d 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -69,6 +69,7 @@  virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
 		if (!pkt->buf)
 			goto out_pkt;
 
+		pkt->slab_buf = true;
 		pkt->buf_len = len;
 
 		err = memcpy_from_msg(pkt->buf, info->msg, len);
@@ -1339,7 +1340,14 @@  EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
 
 void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt)
 {
-	kvfree(pkt->buf);
+	if (pkt->buf_len) {
+		if (pkt->slab_buf)
+			kvfree(pkt->buf);
+		else
+			free_pages((unsigned long)pkt->buf,
+				   get_order(pkt->buf_len));
+	}
+
 	kfree(pkt);
 }
 EXPORT_SYMBOL_GPL(virtio_transport_free_pkt);