diff mbox series

[v3,17/55] ip6, udp6: Support MSG_SPLICE_PAGES

Message ID 20230331160914.1608208-18-dhowells@redhat.com (mailing list archive)
State New
Headers show
Series splice, net: Replace sendpage with sendmsg(MSG_SPLICE_PAGES) | expand

Commit Message

David Howells March 31, 2023, 4:08 p.m. UTC
Make IP6/UDP6 sendmsg() support MSG_SPLICE_PAGES.  This causes pages to be
spliced from the source iterator if possible, copying the data if not.

This allows ->sendpage() to be replaced by something that can handle
multiple multipage folios in a single transaction.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
cc: "David S. Miller" <davem@davemloft.net>
cc: Eric Dumazet <edumazet@google.com>
cc: Jakub Kicinski <kuba@kernel.org>
cc: Paolo Abeni <pabeni@redhat.com>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
cc: netdev@vger.kernel.org
---
 include/net/ip.h      |  4 ++++
 net/ipv4/ip_output.c  | 11 ++++++-----
 net/ipv6/ip6_output.c | 28 +++++++++++++++++++++++++---
 3 files changed, 35 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/include/net/ip.h b/include/net/ip.h
index c3fffaa92d6e..e27d2ceffcfa 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -211,6 +211,10 @@  int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
 int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 		    __u8 tos);
 void ip_init(void);
+int __ip_splice_alloc(struct sock *sk, struct sk_buff **pskb,
+		      unsigned int fragheaderlen, unsigned int maxfraglen,
+		      unsigned int hh_len);
+int __ip_splice_pages(struct sock *sk, struct sk_buff *skb, void *from, int *pcopy);
 int ip_append_data(struct sock *sk, struct flowi4 *fl4,
 		   int getfrag(void *from, char *to, int offset, int len,
 			       int odd, struct sk_buff *skb),
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 41a954ac9e1a..fa2546d944bc 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -959,9 +959,9 @@  csum_page(struct page *page, int offset, int copy)
 /*
  * Allocate a packet for MSG_SPLICE_PAGES.
  */
-static int __ip_splice_alloc(struct sock *sk, struct sk_buff **pskb,
-			     unsigned int fragheaderlen, unsigned int maxfraglen,
-			     unsigned int hh_len)
+int __ip_splice_alloc(struct sock *sk, struct sk_buff **pskb,
+		      unsigned int fragheaderlen, unsigned int maxfraglen,
+		      unsigned int hh_len)
 {
 	struct sk_buff *skb_prev = *pskb, *skb;
 	unsigned int fraggap = skb_prev->len - maxfraglen;
@@ -993,12 +993,12 @@  static int __ip_splice_alloc(struct sock *sk, struct sk_buff **pskb,
 	*pskb = skb;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__ip_splice_alloc);
 
 /*
  * Add (or copy) data pages for MSG_SPLICE_PAGES.
  */
-static int __ip_splice_pages(struct sock *sk, struct sk_buff *skb,
-			     void *from, int *pcopy)
+int __ip_splice_pages(struct sock *sk, struct sk_buff *skb, void *from, int *pcopy)
 {
 	struct msghdr *msg = from;
 	struct page *page = NULL, **pages = &page;
@@ -1047,6 +1047,7 @@  static int __ip_splice_pages(struct sock *sk, struct sk_buff *skb,
 	*pcopy = copy;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__ip_splice_pages);
 
 static int __ip_append_data(struct sock *sk,
 			    struct flowi4 *fl4,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index c314fdde0097..c95d034cb45a 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1486,7 +1486,7 @@  static int __ip6_append_data(struct sock *sk,
 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
 	struct ipv6_txoptions *opt = v6_cork->opt;
 	int csummode = CHECKSUM_NONE;
-	unsigned int maxnonfragsize, headersize;
+	unsigned int maxnonfragsize, headersize, initial_length;
 	unsigned int wmem_alloc_delta = 0;
 	bool paged, extra_uref = false;
 
@@ -1559,6 +1559,7 @@  static int __ip6_append_data(struct sock *sk,
 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
 		csummode = CHECKSUM_PARTIAL;
 
+	initial_length = length;
 	if ((flags & MSG_ZEROCOPY) && length) {
 		struct msghdr *msg = from;
 
@@ -1589,6 +1590,14 @@  static int __ip6_append_data(struct sock *sk,
 				skb_zcopy_set(skb, uarg, &extra_uref);
 			}
 		}
+	} else if ((flags & MSG_SPLICE_PAGES) && length) {
+		if (inet_sk(sk)->hdrincl)
+			return -EPERM;
+		if (rt->dst.dev->features & NETIF_F_SG)
+			/* We need an empty buffer to attach stuff to */
+			initial_length = transhdrlen;
+		else
+			flags &= ~MSG_SPLICE_PAGES;
 	}
 
 	/*
@@ -1624,6 +1633,15 @@  static int __ip6_append_data(struct sock *sk,
 			unsigned int fraggap;
 			unsigned int alloclen, alloc_extra;
 			unsigned int pagedlen;
+
+			if (unlikely(flags & MSG_SPLICE_PAGES)) {
+				err = __ip_splice_alloc(sk, &skb, fragheaderlen,
+							maxfraglen, hh_len);
+				if (err < 0)
+					goto error;
+				continue;
+			}
+			initial_length = length;
 alloc_new_skb:
 			/* There's no room in the current skb */
 			if (skb)
@@ -1642,7 +1660,7 @@  static int __ip6_append_data(struct sock *sk,
 			 * If remaining data exceeds the mtu,
 			 * we know we need more fragment(s).
 			 */
-			datalen = length + fraggap;
+			datalen = initial_length + fraggap;
 
 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
@@ -1672,7 +1690,7 @@  static int __ip6_append_data(struct sock *sk,
 			}
 			alloclen += alloc_extra;
 
-			if (datalen != length + fraggap) {
+			if (datalen != initial_length + fraggap) {
 				/*
 				 * this is not the last fragment, the trailer
 				 * space is regarded as data space.
@@ -1778,6 +1796,10 @@  static int __ip6_append_data(struct sock *sk,
 				err = -EFAULT;
 				goto error;
 			}
+		} else if (flags & MSG_SPLICE_PAGES) {
+			err = __ip_splice_pages(sk, skb, from, &copy);
+			if (err < 0)
+				goto error;
 		} else if (!zc) {
 			int i = skb_shinfo(skb)->nr_frags;