diff mbox series

[net-next,v4,11/27] tcp: support externally provided ubufs

Message ID 7ee05f644e3b3626b693973738364bcb23cf905d.1657194434.git.asml.silence@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series io_uring zerocopy send | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next, async
netdev/apply fail Patch does not apply to net-next

Commit Message

Pavel Begunkov July 7, 2022, 11:49 a.m. UTC
Teach ipv4/udp how to use external ubuf_info provided in msghdr and
also prepare it for managed frags by sprinkling
skb_zcopy_downgrade_managed() when it could mix managed and not managed
frags.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 net/ipv4/tcp.c | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

Comments

David Ahern July 8, 2022, 4:06 a.m. UTC | #1
On 7/7/22 5:49 AM, Pavel Begunkov wrote:
> Teach ipv4/udp how to use external ubuf_info provided in msghdr and

ipv4/tcp?

> also prepare it for managed frags by sprinkling
> skb_zcopy_downgrade_managed() when it could mix managed and not managed
> frags.
> 
> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> ---
>  net/ipv4/tcp.c | 32 ++++++++++++++++++++------------
>  1 file changed, 20 insertions(+), 12 deletions(-)
> 
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 390eb3dc53bd..a81f694af5e9 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -1223,17 +1223,23 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
>  
>  	flags = msg->msg_flags;
>  
> -	if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) {
> +	if ((flags & MSG_ZEROCOPY) && size) {
>  		skb = tcp_write_queue_tail(sk);
> -		uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
> -		if (!uarg) {
> -			err = -ENOBUFS;
> -			goto out_err;
> -		}
>  
> -		zc = sk->sk_route_caps & NETIF_F_SG;
> -		if (!zc)
> -			uarg->zerocopy = 0;
> +		if (msg->msg_ubuf) {
> +			uarg = msg->msg_ubuf;
> +			net_zcopy_get(uarg);
> +			zc = sk->sk_route_caps & NETIF_F_SG;
> +		} else if (sock_flag(sk, SOCK_ZEROCOPY)) {
> +			uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
> +			if (!uarg) {
> +				err = -ENOBUFS;
> +				goto out_err;
> +			}
> +			zc = sk->sk_route_caps & NETIF_F_SG;
> +			if (!zc)
> +				uarg->zerocopy = 0;
> +		}
>  	}
>  
>  	if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
> @@ -1356,9 +1362,11 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
>  
>  			copy = min_t(int, copy, pfrag->size - pfrag->offset);
>  
> -			if (tcp_downgrade_zcopy_pure(sk, skb))
> -				goto wait_for_space;
> -
> +			if (unlikely(skb_zcopy_pure(skb) || skb_zcopy_managed(skb))) {
> +				if (tcp_downgrade_zcopy_pure(sk, skb))
> +					goto wait_for_space;
> +				skb_zcopy_downgrade_managed(skb);
> +			}
>  			copy = tcp_wmem_schedule(sk, copy);
>  			if (!copy)
>  				goto wait_for_space;

You dropped the msg->msg_ubuf checks on jump labels. Removing the one
you had at 'out_nopush' I agree with based on my tests (i.e, it is not
needed).

The one at 'out_err' seems like it is needed - but it has been a few
weeks since I debugged that case. I believe the error path I was hitting
was sk_stream_wait_memory with MSG_DONTWAIT flag set meaning timeout is
0 and it jumps there with EPIPE.
Pavel Begunkov July 8, 2022, 2:03 p.m. UTC | #2
On 7/8/22 05:06, David Ahern wrote:
> On 7/7/22 5:49 AM, Pavel Begunkov wrote:
>> Teach ipv4/udp how to use external ubuf_info provided in msghdr and
> 
> ipv4/tcp?

Ehh, just tcp. Thanks, I updated the branches


>> also prepare it for managed frags by sprinkling
>> skb_zcopy_downgrade_managed() when it could mix managed and not managed
>> frags.
>>
>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>> ---
>>   net/ipv4/tcp.c | 32 ++++++++++++++++++++------------
>>   1 file changed, 20 insertions(+), 12 deletions(-)
>>
>> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
>> index 390eb3dc53bd..a81f694af5e9 100644
>> --- a/net/ipv4/tcp.c
>> +++ b/net/ipv4/tcp.c
>> @@ -1223,17 +1223,23 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
>>   
>>   	flags = msg->msg_flags;
>>   
>> -	if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) {
>> +	if ((flags & MSG_ZEROCOPY) && size) {
>>   		skb = tcp_write_queue_tail(sk);
>> -		uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
>> -		if (!uarg) {
>> -			err = -ENOBUFS;
>> -			goto out_err;
>> -		}
>>   
>> -		zc = sk->sk_route_caps & NETIF_F_SG;
>> -		if (!zc)
>> -			uarg->zerocopy = 0;
>> +		if (msg->msg_ubuf) {
>> +			uarg = msg->msg_ubuf;
>> +			net_zcopy_get(uarg);
>> +			zc = sk->sk_route_caps & NETIF_F_SG;
>> +		} else if (sock_flag(sk, SOCK_ZEROCOPY)) {
>> +			uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
>> +			if (!uarg) {
>> +				err = -ENOBUFS;
>> +				goto out_err;
>> +			}
>> +			zc = sk->sk_route_caps & NETIF_F_SG;
>> +			if (!zc)
>> +				uarg->zerocopy = 0;
>> +		}
>>   	}
>>   
>>   	if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
>> @@ -1356,9 +1362,11 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
>>   
>>   			copy = min_t(int, copy, pfrag->size - pfrag->offset);
>>   
>> -			if (tcp_downgrade_zcopy_pure(sk, skb))
>> -				goto wait_for_space;
>> -
>> +			if (unlikely(skb_zcopy_pure(skb) || skb_zcopy_managed(skb))) {
>> +				if (tcp_downgrade_zcopy_pure(sk, skb))
>> +					goto wait_for_space;
>> +				skb_zcopy_downgrade_managed(skb);
>> +			}
>>   			copy = tcp_wmem_schedule(sk, copy);
>>   			if (!copy)
>>   				goto wait_for_space;
> 
> You dropped the msg->msg_ubuf checks on jump labels. Removing the one
> you had at 'out_nopush' I agree with based on my tests (i.e, it is not
> needed).

It was an optimisation, which I dropped for simplicity. Will be sending it
and couple more afterwards.


> The one at 'out_err' seems like it is needed - but it has been a few
> weeks since I debugged that case. I believe the error path I was hitting
> was sk_stream_wait_memory with MSG_DONTWAIT flag set meaning timeout is
> 0 and it jumps there with EPIPE.

Currently, it's consistent with MSG_ZEROCOPY ubuf_info, we grab a ubuf_info
reference at the beginning (msg_zerocopy_realloc() for MSG_ZEROCOPY and
net_zcopy_get() for msg_ubuf), and then release it at the end
with net_zcopy_put() or net_zcopy_put_abort().

All users, e.g. skb_zerocopy_iter_stream(), have to grab a new reference,
skb_zcopy_set() -> net_zcopy_get().

Not sure I see any issue, and if there is it sounds that it should also
be affecting MSG_ZEROCOPY.
David Ahern July 13, 2022, 11:38 p.m. UTC | #3
On 7/8/22 7:03 AM, Pavel Begunkov wrote:
>>> @@ -1356,9 +1362,11 @@ int tcp_sendmsg_locked(struct sock *sk, struct
>>> msghdr *msg, size_t size)
>>>                 copy = min_t(int, copy, pfrag->size - pfrag->offset);
>>>   -            if (tcp_downgrade_zcopy_pure(sk, skb))
>>> -                goto wait_for_space;
>>> -
>>> +            if (unlikely(skb_zcopy_pure(skb) ||
>>> skb_zcopy_managed(skb))) {
>>> +                if (tcp_downgrade_zcopy_pure(sk, skb))
>>> +                    goto wait_for_space;
>>> +                skb_zcopy_downgrade_managed(skb);
>>> +            }
>>>               copy = tcp_wmem_schedule(sk, copy);
>>>               if (!copy)
>>>                   goto wait_for_space;
>>
>> You dropped the msg->msg_ubuf checks on jump labels. Removing the one
>> you had at 'out_nopush' I agree with based on my tests (i.e, it is not
>> needed).
> 
> It was an optimisation, which I dropped for simplicity. Will be sending it
> and couple more afterwards.
> 
> 
>> The one at 'out_err' seems like it is needed - but it has been a few
>> weeks since I debugged that case. I believe the error path I was hitting
>> was sk_stream_wait_memory with MSG_DONTWAIT flag set meaning timeout is
>> 0 and it jumps there with EPIPE.
> 
> Currently, it's consistent with MSG_ZEROCOPY ubuf_info, we grab a ubuf_info
> reference at the beginning (msg_zerocopy_realloc() for MSG_ZEROCOPY and
> net_zcopy_get() for msg_ubuf), and then release it at the end
> with net_zcopy_put() or net_zcopy_put_abort().
> 

my fault; I somehow dropped a line in the port to the 5.13 kernel.
diff mbox series

Patch

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 390eb3dc53bd..a81f694af5e9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1223,17 +1223,23 @@  int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 
 	flags = msg->msg_flags;
 
-	if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) {
+	if ((flags & MSG_ZEROCOPY) && size) {
 		skb = tcp_write_queue_tail(sk);
-		uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
-		if (!uarg) {
-			err = -ENOBUFS;
-			goto out_err;
-		}
 
-		zc = sk->sk_route_caps & NETIF_F_SG;
-		if (!zc)
-			uarg->zerocopy = 0;
+		if (msg->msg_ubuf) {
+			uarg = msg->msg_ubuf;
+			net_zcopy_get(uarg);
+			zc = sk->sk_route_caps & NETIF_F_SG;
+		} else if (sock_flag(sk, SOCK_ZEROCOPY)) {
+			uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
+			if (!uarg) {
+				err = -ENOBUFS;
+				goto out_err;
+			}
+			zc = sk->sk_route_caps & NETIF_F_SG;
+			if (!zc)
+				uarg->zerocopy = 0;
+		}
 	}
 
 	if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
@@ -1356,9 +1362,11 @@  int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 
 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
 
-			if (tcp_downgrade_zcopy_pure(sk, skb))
-				goto wait_for_space;
-
+			if (unlikely(skb_zcopy_pure(skb) || skb_zcopy_managed(skb))) {
+				if (tcp_downgrade_zcopy_pure(sk, skb))
+					goto wait_for_space;
+				skb_zcopy_downgrade_managed(skb);
+			}
 			copy = tcp_wmem_schedule(sk, copy);
 			if (!copy)
 				goto wait_for_space;