diff mbox series

[net-next,v5,06/19] tcp: Make sendmsg(MSG_SPLICE_PAGES) copy unspliceable data

Message ID 20230406094245.3633290-7-dhowells@redhat.com (mailing list archive)
State New
Headers show
Series splice, net: Replace sendpage with sendmsg(MSG_SPLICE_PAGES), part 1 | expand

Commit Message

David Howells April 6, 2023, 9:42 a.m. UTC
If sendmsg() with MSG_SPLICE_PAGES encounters a page that shouldn't be
spliced - a slab page, for instance, or one with a zero count - make
tcp_sendmsg() copy it.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Eric Dumazet <edumazet@google.com>
cc: "David S. Miller" <davem@davemloft.net>
cc: David Ahern <dsahern@kernel.org>
cc: Jakub Kicinski <kuba@kernel.org>
cc: Paolo Abeni <pabeni@redhat.com>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
cc: netdev@vger.kernel.org
---
 net/ipv4/tcp.c | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

Comments

Willem de Bruijn April 7, 2023, 2:01 a.m. UTC | #1
On Thu, Apr 6, 2023 at 5:43 AM David Howells <dhowells@redhat.com> wrote:
>
> If sendmsg() with MSG_SPLICE_PAGES encounters a page that shouldn't be
> spliced - a slab page, for instance, or one with a zero count - make
> tcp_sendmsg() copy it.
>
> Signed-off-by: David Howells <dhowells@redhat.com>
> cc: Eric Dumazet <edumazet@google.com>
> cc: "David S. Miller" <davem@davemloft.net>
> cc: David Ahern <dsahern@kernel.org>
> cc: Jakub Kicinski <kuba@kernel.org>
> cc: Paolo Abeni <pabeni@redhat.com>
> cc: Jens Axboe <axboe@kernel.dk>
> cc: Matthew Wilcox <willy@infradead.org>
> cc: netdev@vger.kernel.org
> ---
>  net/ipv4/tcp.c | 28 +++++++++++++++++++++++++---
>  1 file changed, 25 insertions(+), 3 deletions(-)
>
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 510bacc7ce7b..238a8ad6527c 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -1418,10 +1418,10 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
>                                 goto do_error;
>                         copy = err;
>                 } else if (zc == 2) {
> -                       /* Splice in data. */
> +                       /* Splice in data if we can; copy if we can't. */
>                         struct page *page = NULL, **pages = &page;
>                         size_t off = 0, part;
> -                       bool can_coalesce;
> +                       bool can_coalesce, put = false;
>                         int i = skb_shinfo(skb)->nr_frags;
>
>                         copy = iov_iter_extract_pages(&msg->msg_iter, &pages,
> @@ -1448,12 +1448,34 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
>                                 goto wait_for_space;
>                         copy = part;
>
> +                       if (!sendpage_ok(page)) {
> +                               const void *p = kmap_local_page(page);
> +                               void *q;
> +
> +                               q = page_frag_memdup(NULL, p + off, copy,
> +                                                    sk->sk_allocation, ULONG_MAX);
> +                               kunmap_local(p);
> +                               if (!q) {
> +                                       iov_iter_revert(&msg->msg_iter, copy);
> +                                       err = copy ?: -ENOMEM;
> +                                       goto do_error;
> +                               }
> +                               page = virt_to_page(q);
> +                               off = offset_in_page(q);
> +                               put = true;
> +                               can_coalesce = false;
> +                       }
> +

This is almost identical in the later udp and unix implementations.
Could this be a wrapper, something like

    page = sendpage_copy_if_needed(&page, &off, copy, gfp, &put));

(it seems page is never needed if it would return NULL)
diff mbox series

Patch

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 510bacc7ce7b..238a8ad6527c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1418,10 +1418,10 @@  int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 				goto do_error;
 			copy = err;
 		} else if (zc == 2) {
-			/* Splice in data. */
+			/* Splice in data if we can; copy if we can't. */
 			struct page *page = NULL, **pages = &page;
 			size_t off = 0, part;
-			bool can_coalesce;
+			bool can_coalesce, put = false;
 			int i = skb_shinfo(skb)->nr_frags;
 
 			copy = iov_iter_extract_pages(&msg->msg_iter, &pages,
@@ -1448,12 +1448,34 @@  int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 				goto wait_for_space;
 			copy = part;
 
+			if (!sendpage_ok(page)) {
+				const void *p = kmap_local_page(page);
+				void *q;
+
+				q = page_frag_memdup(NULL, p + off, copy,
+						     sk->sk_allocation, ULONG_MAX);
+				kunmap_local(p);
+				if (!q) {
+					iov_iter_revert(&msg->msg_iter, copy);
+					err = copy ?: -ENOMEM;
+					goto do_error;
+				}
+				page = virt_to_page(q);
+				off = offset_in_page(q);
+				put = true;
+				can_coalesce = false;
+			}
+
 			if (can_coalesce) {
 				skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
 			} else {
-				get_page(page);
+				if (!put)
+					get_page(page);
+				put = false;
 				skb_fill_page_desc_noacc(skb, i, page, off, copy);
 			}
+			if (put)
+				put_page(page);
 			page = NULL;
 
 			if (!(flags & MSG_NO_SHARED_FRAGS))