diff mbox series

[bpf-next,v6,08/12] udp: implement ->read_sock() for sockmap

Message ID 20210323003808.16074-9-xiyou.wangcong@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series sockmap: introduce BPF_SK_SKB_VERDICT and support UDP | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 10 maintainers not CCed: dsahern@kernel.org yhs@fb.com kpsingh@kernel.org yoshfuji@linux-ipv6.org andrii@kernel.org kafai@fb.com ast@kernel.org songliubraving@fb.com davem@davemloft.net kuba@kernel.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 306 this patch: 306
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 63 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 414 this patch: 414
netdev/header_inline success Link

Commit Message

Cong Wang March 23, 2021, 12:38 a.m. UTC
From: Cong Wang <cong.wang@bytedance.com>

This is similar to tcp_read_sock(), except we do not need
to worry about connections, we just need to retrieve skb
from UDP receive queue.

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
 include/net/udp.h   |  2 ++
 net/ipv4/af_inet.c  |  1 +
 net/ipv4/udp.c      | 35 +++++++++++++++++++++++++++++++++++
 net/ipv6/af_inet6.c |  1 +
 4 files changed, 39 insertions(+)

Comments

Yunsheng Lin March 23, 2021, 6:31 a.m. UTC | #1
On 2021/3/23 8:38, Cong Wang wrote:
> From: Cong Wang <cong.wang@bytedance.com>
> 
> This is similar to tcp_read_sock(), except we do not need
> to worry about connections, we just need to retrieve skb
> from UDP receive queue.
> 
> Cc: John Fastabend <john.fastabend@gmail.com>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: Jakub Sitnicki <jakub@cloudflare.com>
> Cc: Lorenz Bauer <lmb@cloudflare.com>
> Signed-off-by: Cong Wang <cong.wang@bytedance.com>
> ---
>  include/net/udp.h   |  2 ++
>  net/ipv4/af_inet.c  |  1 +
>  net/ipv4/udp.c      | 35 +++++++++++++++++++++++++++++++++++
>  net/ipv6/af_inet6.c |  1 +
>  4 files changed, 39 insertions(+)
> 
> diff --git a/include/net/udp.h b/include/net/udp.h
> index df7cc1edc200..347b62a753c3 100644
> --- a/include/net/udp.h
> +++ b/include/net/udp.h
> @@ -329,6 +329,8 @@ struct sock *__udp6_lib_lookup(struct net *net,
>  			       struct sk_buff *skb);
>  struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
>  				 __be16 sport, __be16 dport);
> +int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
> +		  sk_read_actor_t recv_actor);
>  
>  /* UDP uses skb->dev_scratch to cache as much information as possible and avoid
>   * possibly multiple cache miss on dequeue()
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index 1355e6c0d567..f17870ee558b 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -1070,6 +1070,7 @@ const struct proto_ops inet_dgram_ops = {
>  	.setsockopt	   = sock_common_setsockopt,
>  	.getsockopt	   = sock_common_getsockopt,
>  	.sendmsg	   = inet_sendmsg,
> +	.read_sock	   = udp_read_sock,
>  	.recvmsg	   = inet_recvmsg,
>  	.mmap		   = sock_no_mmap,
>  	.sendpage	   = inet_sendpage,
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 38952aaee3a1..a0adee3b1af4 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -1782,6 +1782,41 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
>  }
>  EXPORT_SYMBOL(__skb_recv_udp);
>  
> +int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
> +		  sk_read_actor_t recv_actor)
> +{
> +	int copied = 0;
> +
> +	while (1) {
> +		int offset = 0, err;
> +		struct sk_buff *skb;
> +
> +		skb = __skb_recv_udp(sk, 0, 1, &offset, &err);
> +		if (!skb)
> +			break;

Does above error handling need the below additional handling?
It seems __skb_recv_udp() will return the error by parameter "err",
if "copied == 0", does it need to return the error?

if (!skb) {
	if (!copied)
		copied = err;

	break;
}

> +		if (offset < skb->len) {
> +			int used;
> +			size_t len;
> +
> +			len = skb->len - offset;
> +			used = recv_actor(desc, skb, offset, len);
> +			if (used <= 0) {
> +				if (!copied)
> +					copied = used;
> +				break;

As here it seems handling the "copied == 0" error case.

> +			} else if (used <= len) {
> +				copied += used;
> +				offset += used;
> +			}
> +		}
> +		if (!desc->count)
> +			break;
> +	}
> +
> +	return copied;
> +}
> +EXPORT_SYMBOL(udp_read_sock);
> +
>  /*
>   * 	This should be easy, if there is something there we
>   * 	return it, otherwise we block.
> diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
> index 802f5111805a..71de739b4a9e 100644
> --- a/net/ipv6/af_inet6.c
> +++ b/net/ipv6/af_inet6.c
> @@ -714,6 +714,7 @@ const struct proto_ops inet6_dgram_ops = {
>  	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
>  	.sendmsg	   = inet6_sendmsg,		/* retpoline's sake */
>  	.recvmsg	   = inet6_recvmsg,		/* retpoline's sake */
> +	.read_sock	   = udp_read_sock,
>  	.mmap		   = sock_no_mmap,
>  	.sendpage	   = sock_no_sendpage,
>  	.set_peek_off	   = sk_set_peek_off,
>
Cong Wang March 24, 2021, 8:04 p.m. UTC | #2
On Mon, Mar 22, 2021 at 11:31 PM Yunsheng Lin <linyunsheng@huawei.com> wrote:
>
> On 2021/3/23 8:38, Cong Wang wrote:
> > From: Cong Wang <cong.wang@bytedance.com>
> >
> > This is similar to tcp_read_sock(), except we do not need
> > to worry about connections, we just need to retrieve skb
> > from UDP receive queue.
> >
> > Cc: John Fastabend <john.fastabend@gmail.com>
> > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > Cc: Jakub Sitnicki <jakub@cloudflare.com>
> > Cc: Lorenz Bauer <lmb@cloudflare.com>
> > Signed-off-by: Cong Wang <cong.wang@bytedance.com>
> > ---
> >  include/net/udp.h   |  2 ++
> >  net/ipv4/af_inet.c  |  1 +
> >  net/ipv4/udp.c      | 35 +++++++++++++++++++++++++++++++++++
> >  net/ipv6/af_inet6.c |  1 +
> >  4 files changed, 39 insertions(+)
> >
> > diff --git a/include/net/udp.h b/include/net/udp.h
> > index df7cc1edc200..347b62a753c3 100644
> > --- a/include/net/udp.h
> > +++ b/include/net/udp.h
> > @@ -329,6 +329,8 @@ struct sock *__udp6_lib_lookup(struct net *net,
> >                              struct sk_buff *skb);
> >  struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
> >                                __be16 sport, __be16 dport);
> > +int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
> > +               sk_read_actor_t recv_actor);
> >
> >  /* UDP uses skb->dev_scratch to cache as much information as possible and avoid
> >   * possibly multiple cache miss on dequeue()
> > diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> > index 1355e6c0d567..f17870ee558b 100644
> > --- a/net/ipv4/af_inet.c
> > +++ b/net/ipv4/af_inet.c
> > @@ -1070,6 +1070,7 @@ const struct proto_ops inet_dgram_ops = {
> >       .setsockopt        = sock_common_setsockopt,
> >       .getsockopt        = sock_common_getsockopt,
> >       .sendmsg           = inet_sendmsg,
> > +     .read_sock         = udp_read_sock,
> >       .recvmsg           = inet_recvmsg,
> >       .mmap              = sock_no_mmap,
> >       .sendpage          = inet_sendpage,
> > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> > index 38952aaee3a1..a0adee3b1af4 100644
> > --- a/net/ipv4/udp.c
> > +++ b/net/ipv4/udp.c
> > @@ -1782,6 +1782,41 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
> >  }
> >  EXPORT_SYMBOL(__skb_recv_udp);
> >
> > +int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
> > +               sk_read_actor_t recv_actor)
> > +{
> > +     int copied = 0;
> > +
> > +     while (1) {
> > +             int offset = 0, err;
> > +             struct sk_buff *skb;
> > +
> > +             skb = __skb_recv_udp(sk, 0, 1, &offset, &err);
> > +             if (!skb)
> > +                     break;
>
> Does above error handling need the below additional handling?
> It seems __skb_recv_udp() will return the error by parameter "err",
> if "copied == 0", does it need to return the error?

Not for skmsg case, because the return value is just unused:

static void sk_psock_verdict_data_ready(struct sock *sk)
{
        struct socket *sock = sk->sk_socket;
        read_descriptor_t desc;

        if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
                return;

        desc.arg.data = sk;
        desc.error = 0;
        desc.count = 1;

        sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
}

Thanks.
diff mbox series

Patch

diff --git a/include/net/udp.h b/include/net/udp.h
index df7cc1edc200..347b62a753c3 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -329,6 +329,8 @@  struct sock *__udp6_lib_lookup(struct net *net,
 			       struct sk_buff *skb);
 struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
 				 __be16 sport, __be16 dport);
+int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
+		  sk_read_actor_t recv_actor);
 
 /* UDP uses skb->dev_scratch to cache as much information as possible and avoid
  * possibly multiple cache miss on dequeue()
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1355e6c0d567..f17870ee558b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1070,6 +1070,7 @@  const struct proto_ops inet_dgram_ops = {
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
 	.sendmsg	   = inet_sendmsg,
+	.read_sock	   = udp_read_sock,
 	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = inet_sendpage,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 38952aaee3a1..a0adee3b1af4 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1782,6 +1782,41 @@  struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
 }
 EXPORT_SYMBOL(__skb_recv_udp);
 
+int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
+		  sk_read_actor_t recv_actor)
+{
+	int copied = 0;
+
+	while (1) {
+		int offset = 0, err;
+		struct sk_buff *skb;
+
+		skb = __skb_recv_udp(sk, 0, 1, &offset, &err);
+		if (!skb)
+			break;
+		if (offset < skb->len) {
+			int used;
+			size_t len;
+
+			len = skb->len - offset;
+			used = recv_actor(desc, skb, offset, len);
+			if (used <= 0) {
+				if (!copied)
+					copied = used;
+				break;
+			} else if (used <= len) {
+				copied += used;
+				offset += used;
+			}
+		}
+		if (!desc->count)
+			break;
+	}
+
+	return copied;
+}
+EXPORT_SYMBOL(udp_read_sock);
+
 /*
  * 	This should be easy, if there is something there we
  * 	return it, otherwise we block.
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 802f5111805a..71de739b4a9e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -714,6 +714,7 @@  const struct proto_ops inet6_dgram_ops = {
 	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
 	.sendmsg	   = inet6_sendmsg,		/* retpoline's sake */
 	.recvmsg	   = inet6_recvmsg,		/* retpoline's sake */
+	.read_sock	   = udp_read_sock,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = sock_no_sendpage,
 	.set_peek_off	   = sk_set_peek_off,