diff mbox series

[RFC,net-next,v6,09/14] virtio/vsock: add common datagram recv path

Message ID 20240710212555.1617795-10-amery.hung@bytedance.com (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series virtio/vsock: support datagrams | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 821 this patch: 821
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 1 maintainers not CCed: virtualization@lists.linux.dev
netdev/build_clang fail Errors and warnings before: 826 this patch: 826
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 826 this patch: 826
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 148 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Amery Hung July 10, 2024, 9:25 p.m. UTC
From: Bobby Eshleman <bobby.eshleman@bytedance.com>

This commit adds the common datagram receive functionality for virtio
transports. It does not add the vhost/virtio users of that
functionality.

This functionality includes:
- changes to the virtio_transport_recv_pkt() path for finding the
  bound socket receiver for incoming packets
- virtio_transport_recv_pkt() saves the source cid and port to the
  control buffer for recvmsg() to initialize sockaddr_vm structure
  when using datagram

Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
Signed-off-by: Amery Hung <amery.hung@bytedance.com>
---
 net/vmw_vsock/virtio_transport_common.c | 79 +++++++++++++++++++++----
 1 file changed, 66 insertions(+), 13 deletions(-)

Comments

Stefano Garzarella July 23, 2024, 2:42 p.m. UTC | #1
On Wed, Jul 10, 2024 at 09:25:50PM GMT, Amery Hung wrote:
>From: Bobby Eshleman <bobby.eshleman@bytedance.com>
>
>This commit adds the common datagram receive functionality for virtio
>transports. It does not add the vhost/virtio users of that
>functionality.
>
>This functionality includes:
>- changes to the virtio_transport_recv_pkt() path for finding the
>  bound socket receiver for incoming packets
>- virtio_transport_recv_pkt() saves the source cid and port to the
>  control buffer for recvmsg() to initialize sockaddr_vm structure
>  when using datagram
>
>Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
>Signed-off-by: Amery Hung <amery.hung@bytedance.com>
>---
> net/vmw_vsock/virtio_transport_common.c | 79 +++++++++++++++++++++----
> 1 file changed, 66 insertions(+), 13 deletions(-)
>
>diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>index 46cd1807f8e3..a571b575fde9 100644
>--- a/net/vmw_vsock/virtio_transport_common.c
>+++ b/net/vmw_vsock/virtio_transport_common.c
>@@ -235,7 +235,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
>
> static u16 virtio_transport_get_type(struct sock *sk)
> {
>-	if (sk->sk_type == SOCK_STREAM)
>+	if (sk->sk_type == SOCK_DGRAM)
>+		return VIRTIO_VSOCK_TYPE_DGRAM;
>+	else if (sk->sk_type == SOCK_STREAM)
> 		return VIRTIO_VSOCK_TYPE_STREAM;
> 	else
> 		return VIRTIO_VSOCK_TYPE_SEQPACKET;
>@@ -1422,6 +1424,33 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
> 		kfree_skb(skb);
> }
>
>+static void
>+virtio_transport_dgram_kfree_skb(struct sk_buff *skb, int err)
>+{
>+	if (err == -ENOMEM)
>+		kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_RCVBUFF);
>+	else if (err == -ENOBUFS)
>+		kfree_skb_reason(skb, SKB_DROP_REASON_PROTO_MEM);
>+	else
>+		kfree_skb(skb);
>+}
>+
>+/* This function takes ownership of the skb.
>+ *
>+ * It either places the skb on the sk_receive_queue or frees it.
>+ */
>+static void
>+virtio_transport_recv_dgram(struct sock *sk, struct sk_buff *skb)
>+{
>+	int err;
>+
>+	err = sock_queue_rcv_skb(sk, skb);
>+	if (err) {
>+		virtio_transport_dgram_kfree_skb(skb, err);
>+		return;
>+	}
>+}
>+
> static int
> virtio_transport_recv_connected(struct sock *sk,
> 				struct sk_buff *skb)
>@@ -1591,7 +1620,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
> static bool virtio_transport_valid_type(u16 type)
> {
> 	return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
>-	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
>+	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
>+	       (type == VIRTIO_VSOCK_TYPE_DGRAM);
> }
>
> /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
>@@ -1601,44 +1631,57 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> 			       struct sk_buff *skb)
> {
> 	struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
>+	struct vsock_skb_cb *vsock_cb;

This can be defined in the block where it's used.

> 	struct sockaddr_vm src, dst;
> 	struct vsock_sock *vsk;
> 	struct sock *sk;
> 	bool space_available;
>+	u16 type;
>
> 	vsock_addr_init(&src, le64_to_cpu(hdr->src_cid),
> 			le32_to_cpu(hdr->src_port));
> 	vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid),
> 			le32_to_cpu(hdr->dst_port));
>
>+	type = le16_to_cpu(hdr->type);
>+
> 	trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
> 					dst.svm_cid, dst.svm_port,
> 					le32_to_cpu(hdr->len),
>-					le16_to_cpu(hdr->type),
>+					type,
> 					le16_to_cpu(hdr->op),
> 					le32_to_cpu(hdr->flags),
> 					le32_to_cpu(hdr->buf_alloc),
> 					le32_to_cpu(hdr->fwd_cnt));
>
>-	if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
>+	if (!virtio_transport_valid_type(type)) {
> 		(void)virtio_transport_reset_no_sock(t, skb);
> 		goto free_pkt;
> 	}
>
>-	/* The socket must be in connected or bound table
>-	 * otherwise send reset back
>+	/* For stream/seqpacket, the socket must be in connected or bound table
>+	 * otherwise send reset back.
>+	 *
>+	 * For datagrams, no reset is sent back.
> 	 */
> 	sk = vsock_find_connected_socket(&src, &dst);
> 	if (!sk) {
>-		sk = vsock_find_bound_socket(&dst);
>-		if (!sk) {
>-			(void)virtio_transport_reset_no_sock(t, skb);
>-			goto free_pkt;
>+		if (type == VIRTIO_VSOCK_TYPE_DGRAM) {
>+			sk = vsock_find_bound_dgram_socket(&dst);
>+			if (!sk)
>+				goto free_pkt;
>+		} else {
>+			sk = vsock_find_bound_socket(&dst);
>+			if (!sk) {
>+				(void)virtio_transport_reset_no_sock(t, skb);
>+				goto free_pkt;
>+			}
> 		}
> 	}
>
>-	if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) {
>-		(void)virtio_transport_reset_no_sock(t, skb);
>+	if (virtio_transport_get_type(sk) != type) {
>+		if (type != VIRTIO_VSOCK_TYPE_DGRAM)
>+			(void)virtio_transport_reset_no_sock(t, skb);
> 		sock_put(sk);
> 		goto free_pkt;
> 	}
>@@ -1654,12 +1697,21 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
>
> 	/* Check if sk has been closed before lock_sock */
> 	if (sock_flag(sk, SOCK_DONE)) {
>-		(void)virtio_transport_reset_no_sock(t, skb);
>+		if (type != VIRTIO_VSOCK_TYPE_DGRAM)
>+			(void)virtio_transport_reset_no_sock(t, skb);
> 		release_sock(sk);
> 		sock_put(sk);
> 		goto free_pkt;
> 	}
>
>+	if (sk->sk_type == SOCK_DGRAM) {
>+		vsock_cb = vsock_skb_cb(skb);
>+		vsock_cb->src_cid = src.svm_cid;
>+		vsock_cb->src_port = src.svm_port;
>+		virtio_transport_recv_dgram(sk, skb);


What about adding an API that transports can use to hide this?

I mean something that hide vsock_cb creation and queue packet in the 
socket receive queue. I'd also not expose vsock_skb_cb in an header, but 
I'd handle it internally in af_vsock.c. So I'd just expose API to 
queue/dequeue them.

Also why VMCI is using sk_receive_skb(), while we are using 
sock_queue_rcv_skb()?

Thanks,
Stefano

>+		goto out;
>+	}
>+
> 	space_available = virtio_transport_space_update(sk, skb);
>
> 	/* Update CID in case it has changed after a transport reset event */
>@@ -1691,6 +1743,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> 		break;
> 	}
>
>+out:
> 	release_sock(sk);
>
> 	/* Release refcnt obtained when we fetched this socket out of the
>-- 
>2.20.1
>
Amery Hung July 30, 2024, 12:35 a.m. UTC | #2
On Tue, Jul 23, 2024 at 7:42 AM Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> On Wed, Jul 10, 2024 at 09:25:50PM GMT, Amery Hung wrote:
> >From: Bobby Eshleman <bobby.eshleman@bytedance.com>
> >
> >This commit adds the common datagram receive functionality for virtio
> >transports. It does not add the vhost/virtio users of that
> >functionality.
> >
> >This functionality includes:
> >- changes to the virtio_transport_recv_pkt() path for finding the
> >  bound socket receiver for incoming packets
> >- virtio_transport_recv_pkt() saves the source cid and port to the
> >  control buffer for recvmsg() to initialize sockaddr_vm structure
> >  when using datagram
> >
> >Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> >Signed-off-by: Amery Hung <amery.hung@bytedance.com>
> >---
> > net/vmw_vsock/virtio_transport_common.c | 79 +++++++++++++++++++++----
> > 1 file changed, 66 insertions(+), 13 deletions(-)
> >
> >diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> >index 46cd1807f8e3..a571b575fde9 100644
> >--- a/net/vmw_vsock/virtio_transport_common.c
> >+++ b/net/vmw_vsock/virtio_transport_common.c
> >@@ -235,7 +235,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
> >
> > static u16 virtio_transport_get_type(struct sock *sk)
> > {
> >-      if (sk->sk_type == SOCK_STREAM)
> >+      if (sk->sk_type == SOCK_DGRAM)
> >+              return VIRTIO_VSOCK_TYPE_DGRAM;
> >+      else if (sk->sk_type == SOCK_STREAM)
> >               return VIRTIO_VSOCK_TYPE_STREAM;
> >       else
> >               return VIRTIO_VSOCK_TYPE_SEQPACKET;
> >@@ -1422,6 +1424,33 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
> >               kfree_skb(skb);
> > }
> >
> >+static void
> >+virtio_transport_dgram_kfree_skb(struct sk_buff *skb, int err)
> >+{
> >+      if (err == -ENOMEM)
> >+              kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_RCVBUFF);
> >+      else if (err == -ENOBUFS)
> >+              kfree_skb_reason(skb, SKB_DROP_REASON_PROTO_MEM);
> >+      else
> >+              kfree_skb(skb);
> >+}
> >+
> >+/* This function takes ownership of the skb.
> >+ *
> >+ * It either places the skb on the sk_receive_queue or frees it.
> >+ */
> >+static void
> >+virtio_transport_recv_dgram(struct sock *sk, struct sk_buff *skb)
> >+{
> >+      int err;
> >+
> >+      err = sock_queue_rcv_skb(sk, skb);
> >+      if (err) {
> >+              virtio_transport_dgram_kfree_skb(skb, err);
> >+              return;
> >+      }
> >+}
> >+
> > static int
> > virtio_transport_recv_connected(struct sock *sk,
> >                               struct sk_buff *skb)
> >@@ -1591,7 +1620,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
> > static bool virtio_transport_valid_type(u16 type)
> > {
> >       return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
> >-             (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
> >+             (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
> >+             (type == VIRTIO_VSOCK_TYPE_DGRAM);
> > }
> >
> > /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
> >@@ -1601,44 +1631,57 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> >                              struct sk_buff *skb)
> > {
> >       struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
> >+      struct vsock_skb_cb *vsock_cb;
>
> This can be defined in the block where it's used.
>

Got it.

> >       struct sockaddr_vm src, dst;
> >       struct vsock_sock *vsk;
> >       struct sock *sk;
> >       bool space_available;
> >+      u16 type;
> >
> >       vsock_addr_init(&src, le64_to_cpu(hdr->src_cid),
> >                       le32_to_cpu(hdr->src_port));
> >       vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid),
> >                       le32_to_cpu(hdr->dst_port));
> >
> >+      type = le16_to_cpu(hdr->type);
> >+
> >       trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
> >                                       dst.svm_cid, dst.svm_port,
> >                                       le32_to_cpu(hdr->len),
> >-                                      le16_to_cpu(hdr->type),
> >+                                      type,
> >                                       le16_to_cpu(hdr->op),
> >                                       le32_to_cpu(hdr->flags),
> >                                       le32_to_cpu(hdr->buf_alloc),
> >                                       le32_to_cpu(hdr->fwd_cnt));
> >
> >-      if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
> >+      if (!virtio_transport_valid_type(type)) {
> >               (void)virtio_transport_reset_no_sock(t, skb);
> >               goto free_pkt;
> >       }
> >
> >-      /* The socket must be in connected or bound table
> >-       * otherwise send reset back
> >+      /* For stream/seqpacket, the socket must be in connected or bound table
> >+       * otherwise send reset back.
> >+       *
> >+       * For datagrams, no reset is sent back.
> >        */
> >       sk = vsock_find_connected_socket(&src, &dst);
> >       if (!sk) {
> >-              sk = vsock_find_bound_socket(&dst);
> >-              if (!sk) {
> >-                      (void)virtio_transport_reset_no_sock(t, skb);
> >-                      goto free_pkt;
> >+              if (type == VIRTIO_VSOCK_TYPE_DGRAM) {
> >+                      sk = vsock_find_bound_dgram_socket(&dst);
> >+                      if (!sk)
> >+                              goto free_pkt;
> >+              } else {
> >+                      sk = vsock_find_bound_socket(&dst);
> >+                      if (!sk) {
> >+                              (void)virtio_transport_reset_no_sock(t, skb);
> >+                              goto free_pkt;
> >+                      }
> >               }
> >       }
> >
> >-      if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) {
> >-              (void)virtio_transport_reset_no_sock(t, skb);
> >+      if (virtio_transport_get_type(sk) != type) {
> >+              if (type != VIRTIO_VSOCK_TYPE_DGRAM)
> >+                      (void)virtio_transport_reset_no_sock(t, skb);
> >               sock_put(sk);
> >               goto free_pkt;
> >       }
> >@@ -1654,12 +1697,21 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> >
> >       /* Check if sk has been closed before lock_sock */
> >       if (sock_flag(sk, SOCK_DONE)) {
> >-              (void)virtio_transport_reset_no_sock(t, skb);
> >+              if (type != VIRTIO_VSOCK_TYPE_DGRAM)
> >+                      (void)virtio_transport_reset_no_sock(t, skb);
> >               release_sock(sk);
> >               sock_put(sk);
> >               goto free_pkt;
> >       }
> >
> >+      if (sk->sk_type == SOCK_DGRAM) {
> >+              vsock_cb = vsock_skb_cb(skb);
> >+              vsock_cb->src_cid = src.svm_cid;
> >+              vsock_cb->src_port = src.svm_port;
> >+              virtio_transport_recv_dgram(sk, skb);
>
>
> What about adding an API that transports can use to hide this?
>
> I mean something that hide vsock_cb creation and queue packet in the
> socket receive queue. I'd also not expose vsock_skb_cb in an header, but
> I'd handle it internally in af_vsock.c. So I'd just expose API to
> queue/dequeue them.
>

Got it. I will move vsock_skb_cb to af_vsock.c and create an API:

vsock_dgram_skb_save_src_addr(struct sk_buff *skb, u32 cid, u32 port)

Different dgram implementations will call this API instead of the code
block above to save the source address information into the control
buffer.

A side note on why this is a vsock API instead of a member function in
transport: As we move to support multi-transport dgram, different
transport implementations can place skb into the sk->sk_receive_queue.
Therefore, we cannot call transport-specific function in
vsock_dgram_recvmsg() to initialize struct sockaddr_vm. Hence, the
receiving paths of different transports need to call this API to save
source address.

> Also why VMCI is using sk_receive_skb(), while we are using
> sock_queue_rcv_skb()?
>

I _think_ originally we referred to UDP and UDS when designing virtio
dgram, and ended up with placing skb into sk_receive_queue directly. I
will look into this to provide better justification.

Thank you,
Amery

> Thanks,
> Stefano
>
> >+              goto out;
> >+      }
> >+
> >       space_available = virtio_transport_space_update(sk, skb);
> >
> >       /* Update CID in case it has changed after a transport reset event */
> >@@ -1691,6 +1743,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> >               break;
> >       }
> >
> >+out:
> >       release_sock(sk);
> >
> >       /* Release refcnt obtained when we fetched this socket out of the
> >--
> >2.20.1
> >
>
Stefano Garzarella July 30, 2024, 8:32 a.m. UTC | #3
On Mon, Jul 29, 2024 at 05:35:01PM GMT, Amery Hung wrote:
>On Tue, Jul 23, 2024 at 7:42 AM Stefano Garzarella <sgarzare@redhat.com> wrote:
>>
>> On Wed, Jul 10, 2024 at 09:25:50PM GMT, Amery Hung wrote:
>> >From: Bobby Eshleman <bobby.eshleman@bytedance.com>
>> >
>> >This commit adds the common datagram receive functionality for virtio
>> >transports. It does not add the vhost/virtio users of that
>> >functionality.
>> >
>> >This functionality includes:
>> >- changes to the virtio_transport_recv_pkt() path for finding the
>> >  bound socket receiver for incoming packets
>> >- virtio_transport_recv_pkt() saves the source cid and port to the
>> >  control buffer for recvmsg() to initialize sockaddr_vm structure
>> >  when using datagram
>> >
>> >Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
>> >Signed-off-by: Amery Hung <amery.hung@bytedance.com>
>> >---
>> > net/vmw_vsock/virtio_transport_common.c | 79 +++++++++++++++++++++----
>> > 1 file changed, 66 insertions(+), 13 deletions(-)
>> >
>> >diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>> >index 46cd1807f8e3..a571b575fde9 100644
>> >--- a/net/vmw_vsock/virtio_transport_common.c
>> >+++ b/net/vmw_vsock/virtio_transport_common.c
>> >@@ -235,7 +235,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
>> >
>> > static u16 virtio_transport_get_type(struct sock *sk)
>> > {
>> >-      if (sk->sk_type == SOCK_STREAM)
>> >+      if (sk->sk_type == SOCK_DGRAM)
>> >+              return VIRTIO_VSOCK_TYPE_DGRAM;
>> >+      else if (sk->sk_type == SOCK_STREAM)
>> >               return VIRTIO_VSOCK_TYPE_STREAM;
>> >       else
>> >               return VIRTIO_VSOCK_TYPE_SEQPACKET;
>> >@@ -1422,6 +1424,33 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
>> >               kfree_skb(skb);
>> > }
>> >
>> >+static void
>> >+virtio_transport_dgram_kfree_skb(struct sk_buff *skb, int err)
>> >+{
>> >+      if (err == -ENOMEM)
>> >+              kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_RCVBUFF);
>> >+      else if (err == -ENOBUFS)
>> >+              kfree_skb_reason(skb, SKB_DROP_REASON_PROTO_MEM);
>> >+      else
>> >+              kfree_skb(skb);
>> >+}
>> >+
>> >+/* This function takes ownership of the skb.
>> >+ *
>> >+ * It either places the skb on the sk_receive_queue or frees it.
>> >+ */
>> >+static void
>> >+virtio_transport_recv_dgram(struct sock *sk, struct sk_buff *skb)
>> >+{
>> >+      int err;
>> >+
>> >+      err = sock_queue_rcv_skb(sk, skb);
>> >+      if (err) {
>> >+              virtio_transport_dgram_kfree_skb(skb, err);
>> >+              return;
>> >+      }
>> >+}
>> >+
>> > static int
>> > virtio_transport_recv_connected(struct sock *sk,
>> >                               struct sk_buff *skb)
>> >@@ -1591,7 +1620,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
>> > static bool virtio_transport_valid_type(u16 type)
>> > {
>> >       return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
>> >-             (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
>> >+             (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
>> >+             (type == VIRTIO_VSOCK_TYPE_DGRAM);
>> > }
>> >
>> > /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
>> >@@ -1601,44 +1631,57 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
>> >                              struct sk_buff *skb)
>> > {
>> >       struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
>> >+      struct vsock_skb_cb *vsock_cb;
>>
>> This can be defined in the block where it's used.
>>
>
>Got it.
>
>> >       struct sockaddr_vm src, dst;
>> >       struct vsock_sock *vsk;
>> >       struct sock *sk;
>> >       bool space_available;
>> >+      u16 type;
>> >
>> >       vsock_addr_init(&src, le64_to_cpu(hdr->src_cid),
>> >                       le32_to_cpu(hdr->src_port));
>> >       vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid),
>> >                       le32_to_cpu(hdr->dst_port));
>> >
>> >+      type = le16_to_cpu(hdr->type);
>> >+
>> >       trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
>> >                                       dst.svm_cid, dst.svm_port,
>> >                                       le32_to_cpu(hdr->len),
>> >-                                      le16_to_cpu(hdr->type),
>> >+                                      type,
>> >                                       le16_to_cpu(hdr->op),
>> >                                       le32_to_cpu(hdr->flags),
>> >                                       le32_to_cpu(hdr->buf_alloc),
>> >                                       le32_to_cpu(hdr->fwd_cnt));
>> >
>> >-      if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
>> >+      if (!virtio_transport_valid_type(type)) {
>> >               (void)virtio_transport_reset_no_sock(t, skb);
>> >               goto free_pkt;
>> >       }
>> >
>> >-      /* The socket must be in connected or bound table
>> >-       * otherwise send reset back
>> >+      /* For stream/seqpacket, the socket must be in connected or bound table
>> >+       * otherwise send reset back.
>> >+       *
>> >+       * For datagrams, no reset is sent back.
>> >        */
>> >       sk = vsock_find_connected_socket(&src, &dst);
>> >       if (!sk) {
>> >-              sk = vsock_find_bound_socket(&dst);
>> >-              if (!sk) {
>> >-                      (void)virtio_transport_reset_no_sock(t, skb);
>> >-                      goto free_pkt;
>> >+              if (type == VIRTIO_VSOCK_TYPE_DGRAM) {
>> >+                      sk = vsock_find_bound_dgram_socket(&dst);
>> >+                      if (!sk)
>> >+                              goto free_pkt;
>> >+              } else {
>> >+                      sk = vsock_find_bound_socket(&dst);
>> >+                      if (!sk) {
>> >+                              (void)virtio_transport_reset_no_sock(t, skb);
>> >+                              goto free_pkt;
>> >+                      }
>> >               }
>> >       }
>> >
>> >-      if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) {
>> >-              (void)virtio_transport_reset_no_sock(t, skb);
>> >+      if (virtio_transport_get_type(sk) != type) {
>> >+              if (type != VIRTIO_VSOCK_TYPE_DGRAM)
>> >+                      (void)virtio_transport_reset_no_sock(t, skb);
>> >               sock_put(sk);
>> >               goto free_pkt;
>> >       }
>> >@@ -1654,12 +1697,21 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
>> >
>> >       /* Check if sk has been closed before lock_sock */
>> >       if (sock_flag(sk, SOCK_DONE)) {
>> >-              (void)virtio_transport_reset_no_sock(t, skb);
>> >+              if (type != VIRTIO_VSOCK_TYPE_DGRAM)
>> >+                      (void)virtio_transport_reset_no_sock(t, skb);
>> >               release_sock(sk);
>> >               sock_put(sk);
>> >               goto free_pkt;
>> >       }
>> >
>> >+      if (sk->sk_type == SOCK_DGRAM) {
>> >+              vsock_cb = vsock_skb_cb(skb);
>> >+              vsock_cb->src_cid = src.svm_cid;
>> >+              vsock_cb->src_port = src.svm_port;
>> >+              virtio_transport_recv_dgram(sk, skb);
>>
>>
>> What about adding an API that transports can use to hide this?
>>
>> I mean something that hide vsock_cb creation and queue packet in the
>> socket receive queue. I'd also not expose vsock_skb_cb in an header, but
>> I'd handle it internally in af_vsock.c. So I'd just expose API to
>> queue/dequeue them.
>>
>
>Got it. I will move vsock_skb_cb to af_vsock.c and create an API:
>
>vsock_dgram_skb_save_src_addr(struct sk_buff *skb, u32 cid, u32 port)

This is okay, but I would try to go further by directly adding an API to 
queue dgrams in af_vsock.c (if it's feasible).

>
>Different dgram implementations will call this API instead of the code
>block above to save the source address information into the control
>buffer.
>
>A side note on why this is a vsock API instead of a member )unction in
>transport: As we move to support multi-transport dgram, different
>transport implementations can place skb into the sk->sk_receive_queue.
>Therefore, we cannot call transport-specific function in
>vsock_dgram_recvmsg() to initialize struct sockaddr_vm. Hence, the
>receiving paths of different transports need to call this API to save
>source address.

What I meant is, why virtio_transport_recv_dgram() can't be exposed by 
af_vsock.c as vsock_recv_dgram() and handle all internally, like 
populate vsock_cb, call sock_queue_rcv_skb(), etc.

>
>> Also why VMCI is using sk_receive_skb(), while we are using
>> sock_queue_rcv_skb()?
>>
>
>I _think_ originally we referred to UDP and UDS when designing virtio
>dgram, and ended up with placing skb into sk_receive_queue directly. I
>will look into this to provide better justification.

Great, thanks.

Maybe we can also ping VMCI maintainers to understand if they can switch 
to sock_queue_rcv_skb(). But we should understand better the difference.

Thanks,
Stefano
diff mbox series

Patch

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 46cd1807f8e3..a571b575fde9 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -235,7 +235,9 @@  EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
 
 static u16 virtio_transport_get_type(struct sock *sk)
 {
-	if (sk->sk_type == SOCK_STREAM)
+	if (sk->sk_type == SOCK_DGRAM)
+		return VIRTIO_VSOCK_TYPE_DGRAM;
+	else if (sk->sk_type == SOCK_STREAM)
 		return VIRTIO_VSOCK_TYPE_STREAM;
 	else
 		return VIRTIO_VSOCK_TYPE_SEQPACKET;
@@ -1422,6 +1424,33 @@  virtio_transport_recv_enqueue(struct vsock_sock *vsk,
 		kfree_skb(skb);
 }
 
+static void
+virtio_transport_dgram_kfree_skb(struct sk_buff *skb, int err)
+{
+	if (err == -ENOMEM)
+		kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_RCVBUFF);
+	else if (err == -ENOBUFS)
+		kfree_skb_reason(skb, SKB_DROP_REASON_PROTO_MEM);
+	else
+		kfree_skb(skb);
+}
+
+/* This function takes ownership of the skb.
+ *
+ * It either places the skb on the sk_receive_queue or frees it.
+ */
+static void
+virtio_transport_recv_dgram(struct sock *sk, struct sk_buff *skb)
+{
+	int err;
+
+	err = sock_queue_rcv_skb(sk, skb);
+	if (err) {
+		virtio_transport_dgram_kfree_skb(skb, err);
+		return;
+	}
+}
+
 static int
 virtio_transport_recv_connected(struct sock *sk,
 				struct sk_buff *skb)
@@ -1591,7 +1620,8 @@  virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
 static bool virtio_transport_valid_type(u16 type)
 {
 	return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
-	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
+	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
+	       (type == VIRTIO_VSOCK_TYPE_DGRAM);
 }
 
 /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
@@ -1601,44 +1631,57 @@  void virtio_transport_recv_pkt(struct virtio_transport *t,
 			       struct sk_buff *skb)
 {
 	struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
+	struct vsock_skb_cb *vsock_cb;
 	struct sockaddr_vm src, dst;
 	struct vsock_sock *vsk;
 	struct sock *sk;
 	bool space_available;
+	u16 type;
 
 	vsock_addr_init(&src, le64_to_cpu(hdr->src_cid),
 			le32_to_cpu(hdr->src_port));
 	vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid),
 			le32_to_cpu(hdr->dst_port));
 
+	type = le16_to_cpu(hdr->type);
+
 	trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
 					dst.svm_cid, dst.svm_port,
 					le32_to_cpu(hdr->len),
-					le16_to_cpu(hdr->type),
+					type,
 					le16_to_cpu(hdr->op),
 					le32_to_cpu(hdr->flags),
 					le32_to_cpu(hdr->buf_alloc),
 					le32_to_cpu(hdr->fwd_cnt));
 
-	if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
+	if (!virtio_transport_valid_type(type)) {
 		(void)virtio_transport_reset_no_sock(t, skb);
 		goto free_pkt;
 	}
 
-	/* The socket must be in connected or bound table
-	 * otherwise send reset back
+	/* For stream/seqpacket, the socket must be in connected or bound table
+	 * otherwise send reset back.
+	 *
+	 * For datagrams, no reset is sent back.
 	 */
 	sk = vsock_find_connected_socket(&src, &dst);
 	if (!sk) {
-		sk = vsock_find_bound_socket(&dst);
-		if (!sk) {
-			(void)virtio_transport_reset_no_sock(t, skb);
-			goto free_pkt;
+		if (type == VIRTIO_VSOCK_TYPE_DGRAM) {
+			sk = vsock_find_bound_dgram_socket(&dst);
+			if (!sk)
+				goto free_pkt;
+		} else {
+			sk = vsock_find_bound_socket(&dst);
+			if (!sk) {
+				(void)virtio_transport_reset_no_sock(t, skb);
+				goto free_pkt;
+			}
 		}
 	}
 
-	if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) {
-		(void)virtio_transport_reset_no_sock(t, skb);
+	if (virtio_transport_get_type(sk) != type) {
+		if (type != VIRTIO_VSOCK_TYPE_DGRAM)
+			(void)virtio_transport_reset_no_sock(t, skb);
 		sock_put(sk);
 		goto free_pkt;
 	}
@@ -1654,12 +1697,21 @@  void virtio_transport_recv_pkt(struct virtio_transport *t,
 
 	/* Check if sk has been closed before lock_sock */
 	if (sock_flag(sk, SOCK_DONE)) {
-		(void)virtio_transport_reset_no_sock(t, skb);
+		if (type != VIRTIO_VSOCK_TYPE_DGRAM)
+			(void)virtio_transport_reset_no_sock(t, skb);
 		release_sock(sk);
 		sock_put(sk);
 		goto free_pkt;
 	}
 
+	if (sk->sk_type == SOCK_DGRAM) {
+		vsock_cb = vsock_skb_cb(skb);
+		vsock_cb->src_cid = src.svm_cid;
+		vsock_cb->src_port = src.svm_port;
+		virtio_transport_recv_dgram(sk, skb);
+		goto out;
+	}
+
 	space_available = virtio_transport_space_update(sk, skb);
 
 	/* Update CID in case it has changed after a transport reset event */
@@ -1691,6 +1743,7 @@  void virtio_transport_recv_pkt(struct virtio_transport *t,
 		break;
 	}
 
+out:
 	release_sock(sk);
 
 	/* Release refcnt obtained when we fetched this socket out of the