diff mbox series

[net-next,v3,11/25] tcp: support zc with managed data

Message ID cd46ef115fb6ee38b2b5e873777c26b0332138e9.1656318994.git.asml.silence@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series io_uring zerocopy send | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next, async
netdev/apply fail Patch does not apply to net-next

Commit Message

Pavel Begunkov July 5, 2022, 3:01 p.m. UTC
Also make tcp to use managed data and propagate SKBFL_MANAGED_FRAG_REFS
to optimise frag pages referencing.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 net/ipv4/tcp.c | 52 ++++++++++++++++++++++++++++++++------------------
 1 file changed, 33 insertions(+), 19 deletions(-)
diff mbox series

Patch

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 390eb3dc53bd..05e2f6271f65 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1223,17 +1223,23 @@  int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 
 	flags = msg->msg_flags;
 
-	if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) {
+	if ((flags & MSG_ZEROCOPY) && size) {
 		skb = tcp_write_queue_tail(sk);
-		uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
-		if (!uarg) {
-			err = -ENOBUFS;
-			goto out_err;
-		}
 
-		zc = sk->sk_route_caps & NETIF_F_SG;
-		if (!zc)
-			uarg->zerocopy = 0;
+		if (msg->msg_ubuf) {
+			uarg = msg->msg_ubuf;
+			net_zcopy_get(uarg);
+			zc = sk->sk_route_caps & NETIF_F_SG;
+		} else if (sock_flag(sk, SOCK_ZEROCOPY)) {
+			uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
+			if (!uarg) {
+				err = -ENOBUFS;
+				goto out_err;
+			}
+			zc = sk->sk_route_caps & NETIF_F_SG;
+			if (!zc)
+				uarg->zerocopy = 0;
+		}
 	}
 
 	if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
@@ -1356,9 +1362,11 @@  int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 
 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
 
-			if (tcp_downgrade_zcopy_pure(sk, skb))
-				goto wait_for_space;
-
+			if (unlikely(skb_zcopy_pure(skb) || skb_zcopy_managed(skb))) {
+				if (tcp_downgrade_zcopy_pure(sk, skb))
+					goto wait_for_space;
+				skb_zcopy_downgrade_managed(skb);
+			}
 			copy = tcp_wmem_schedule(sk, copy);
 			if (!copy)
 				goto wait_for_space;
@@ -1381,15 +1389,21 @@  int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 			pfrag->offset += copy;
 		} else {
 			/* First append to a fragless skb builds initial
-			 * pure zerocopy skb
+			 * zerocopy skb
 			 */
-			if (!skb->len)
+			if (!skb->len) {
+				if (msg->msg_managed_data)
+					skb_shinfo(skb)->flags |= SKBFL_MANAGED_FRAG_REFS;
 				skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY;
-
-			if (!skb_zcopy_pure(skb)) {
-				copy = tcp_wmem_schedule(sk, copy);
-				if (!copy)
-					goto wait_for_space;
+			} else {
+				/* appending, don't mix managed and unmanaged */
+				if (!msg->msg_managed_data)
+					skb_zcopy_downgrade_managed(skb);
+				if (!skb_zcopy_pure(skb)) {
+					copy = tcp_wmem_schedule(sk, copy);
+					if (!copy)
+						goto wait_for_space;
+				}
 			}
 
 			err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);