Message ID | 20240710212555.1617795-10-amery.hung@bytedance.com (mailing list archive) |
---|---|
State | RFC |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | virtio/vsock: support datagrams | expand |
On Wed, Jul 10, 2024 at 09:25:50PM GMT, Amery Hung wrote: >From: Bobby Eshleman <bobby.eshleman@bytedance.com> > >This commit adds the common datagram receive functionality for virtio >transports. It does not add the vhost/virtio users of that >functionality. > >This functionality includes: >- changes to the virtio_transport_recv_pkt() path for finding the > bound socket receiver for incoming packets >- virtio_transport_recv_pkt() saves the source cid and port to the > control buffer for recvmsg() to initialize sockaddr_vm structure > when using datagram > >Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com> >Signed-off-by: Amery Hung <amery.hung@bytedance.com> >--- > net/vmw_vsock/virtio_transport_common.c | 79 +++++++++++++++++++++---- > 1 file changed, 66 insertions(+), 13 deletions(-) > >diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c >index 46cd1807f8e3..a571b575fde9 100644 >--- a/net/vmw_vsock/virtio_transport_common.c >+++ b/net/vmw_vsock/virtio_transport_common.c >@@ -235,7 +235,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); > > static u16 virtio_transport_get_type(struct sock *sk) > { >- if (sk->sk_type == SOCK_STREAM) >+ if (sk->sk_type == SOCK_DGRAM) >+ return VIRTIO_VSOCK_TYPE_DGRAM; >+ else if (sk->sk_type == SOCK_STREAM) > return VIRTIO_VSOCK_TYPE_STREAM; > else > return VIRTIO_VSOCK_TYPE_SEQPACKET; >@@ -1422,6 +1424,33 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, > kfree_skb(skb); > } > >+static void >+virtio_transport_dgram_kfree_skb(struct sk_buff *skb, int err) >+{ >+ if (err == -ENOMEM) >+ kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_RCVBUFF); >+ else if (err == -ENOBUFS) >+ kfree_skb_reason(skb, SKB_DROP_REASON_PROTO_MEM); >+ else >+ kfree_skb(skb); >+} >+ >+/* This function takes ownership of the skb. >+ * >+ * It either places the skb on the sk_receive_queue or frees it. >+ */ >+static void >+virtio_transport_recv_dgram(struct sock *sk, struct sk_buff *skb) >+{ >+ int err; >+ >+ err = sock_queue_rcv_skb(sk, skb); >+ if (err) { >+ virtio_transport_dgram_kfree_skb(skb, err); >+ return; >+ } >+} >+ > static int > virtio_transport_recv_connected(struct sock *sk, > struct sk_buff *skb) >@@ -1591,7 +1620,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, > static bool virtio_transport_valid_type(u16 type) > { > return (type == VIRTIO_VSOCK_TYPE_STREAM) || >- (type == VIRTIO_VSOCK_TYPE_SEQPACKET); >+ (type == VIRTIO_VSOCK_TYPE_SEQPACKET) || >+ (type == VIRTIO_VSOCK_TYPE_DGRAM); > } > > /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex >@@ -1601,44 +1631,57 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, > struct sk_buff *skb) > { > struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); >+ struct vsock_skb_cb *vsock_cb; This can be defined in the block where it's used. > struct sockaddr_vm src, dst; > struct vsock_sock *vsk; > struct sock *sk; > bool space_available; >+ u16 type; > > vsock_addr_init(&src, le64_to_cpu(hdr->src_cid), > le32_to_cpu(hdr->src_port)); > vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid), > le32_to_cpu(hdr->dst_port)); > >+ type = le16_to_cpu(hdr->type); >+ > trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, > dst.svm_cid, dst.svm_port, > le32_to_cpu(hdr->len), >- le16_to_cpu(hdr->type), >+ type, > le16_to_cpu(hdr->op), > le32_to_cpu(hdr->flags), > le32_to_cpu(hdr->buf_alloc), > le32_to_cpu(hdr->fwd_cnt)); > >- if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) { >+ if (!virtio_transport_valid_type(type)) { > (void)virtio_transport_reset_no_sock(t, skb); > goto free_pkt; > } > >- /* The socket must be in connected or bound table >- * otherwise send reset back >+ /* For stream/seqpacket, the socket must be in connected or bound table >+ * otherwise send reset back. >+ * >+ * For datagrams, no reset is sent back. > */ > sk = vsock_find_connected_socket(&src, &dst); > if (!sk) { >- sk = vsock_find_bound_socket(&dst); >- if (!sk) { >- (void)virtio_transport_reset_no_sock(t, skb); >- goto free_pkt; >+ if (type == VIRTIO_VSOCK_TYPE_DGRAM) { >+ sk = vsock_find_bound_dgram_socket(&dst); >+ if (!sk) >+ goto free_pkt; >+ } else { >+ sk = vsock_find_bound_socket(&dst); >+ if (!sk) { >+ (void)virtio_transport_reset_no_sock(t, skb); >+ goto free_pkt; >+ } > } > } > >- if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) { >- (void)virtio_transport_reset_no_sock(t, skb); >+ if (virtio_transport_get_type(sk) != type) { >+ if (type != VIRTIO_VSOCK_TYPE_DGRAM) >+ (void)virtio_transport_reset_no_sock(t, skb); > sock_put(sk); > goto free_pkt; > } >@@ -1654,12 +1697,21 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, > > /* Check if sk has been closed before lock_sock */ > if (sock_flag(sk, SOCK_DONE)) { >- (void)virtio_transport_reset_no_sock(t, skb); >+ if (type != VIRTIO_VSOCK_TYPE_DGRAM) >+ (void)virtio_transport_reset_no_sock(t, skb); > release_sock(sk); > sock_put(sk); > goto free_pkt; > } > >+ if (sk->sk_type == SOCK_DGRAM) { >+ vsock_cb = vsock_skb_cb(skb); >+ vsock_cb->src_cid = src.svm_cid; >+ vsock_cb->src_port = src.svm_port; >+ virtio_transport_recv_dgram(sk, skb); What about adding an API that transports can use to hide this? I mean something that hide vsock_cb creation and queue packet in the socket receive queue. I'd also not expose vsock_skb_cb in an header, but I'd handle it internally in af_vsock.c. So I'd just expose API to queue/dequeue them. Also why VMCI is using sk_receive_skb(), while we are using sock_queue_rcv_skb()? Thanks, Stefano >+ goto out; >+ } >+ > space_available = virtio_transport_space_update(sk, skb); > > /* Update CID in case it has changed after a transport reset event */ >@@ -1691,6 +1743,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, > break; > } > >+out: > release_sock(sk); > > /* Release refcnt obtained when we fetched this socket out of the >-- >2.20.1 >
On Tue, Jul 23, 2024 at 7:42 AM Stefano Garzarella <sgarzare@redhat.com> wrote: > > On Wed, Jul 10, 2024 at 09:25:50PM GMT, Amery Hung wrote: > >From: Bobby Eshleman <bobby.eshleman@bytedance.com> > > > >This commit adds the common datagram receive functionality for virtio > >transports. It does not add the vhost/virtio users of that > >functionality. > > > >This functionality includes: > >- changes to the virtio_transport_recv_pkt() path for finding the > > bound socket receiver for incoming packets > >- virtio_transport_recv_pkt() saves the source cid and port to the > > control buffer for recvmsg() to initialize sockaddr_vm structure > > when using datagram > > > >Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com> > >Signed-off-by: Amery Hung <amery.hung@bytedance.com> > >--- > > net/vmw_vsock/virtio_transport_common.c | 79 +++++++++++++++++++++---- > > 1 file changed, 66 insertions(+), 13 deletions(-) > > > >diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c > >index 46cd1807f8e3..a571b575fde9 100644 > >--- a/net/vmw_vsock/virtio_transport_common.c > >+++ b/net/vmw_vsock/virtio_transport_common.c > >@@ -235,7 +235,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); > > > > static u16 virtio_transport_get_type(struct sock *sk) > > { > >- if (sk->sk_type == SOCK_STREAM) > >+ if (sk->sk_type == SOCK_DGRAM) > >+ return VIRTIO_VSOCK_TYPE_DGRAM; > >+ else if (sk->sk_type == SOCK_STREAM) > > return VIRTIO_VSOCK_TYPE_STREAM; > > else > > return VIRTIO_VSOCK_TYPE_SEQPACKET; > >@@ -1422,6 +1424,33 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, > > kfree_skb(skb); > > } > > > >+static void > >+virtio_transport_dgram_kfree_skb(struct sk_buff *skb, int err) > >+{ > >+ if (err == -ENOMEM) > >+ kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_RCVBUFF); > >+ else if (err == -ENOBUFS) > >+ kfree_skb_reason(skb, SKB_DROP_REASON_PROTO_MEM); > >+ else > >+ kfree_skb(skb); > >+} > >+ > >+/* This function takes ownership of the skb. > >+ * > >+ * It either places the skb on the sk_receive_queue or frees it. > >+ */ > >+static void > >+virtio_transport_recv_dgram(struct sock *sk, struct sk_buff *skb) > >+{ > >+ int err; > >+ > >+ err = sock_queue_rcv_skb(sk, skb); > >+ if (err) { > >+ virtio_transport_dgram_kfree_skb(skb, err); > >+ return; > >+ } > >+} > >+ > > static int > > virtio_transport_recv_connected(struct sock *sk, > > struct sk_buff *skb) > >@@ -1591,7 +1620,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, > > static bool virtio_transport_valid_type(u16 type) > > { > > return (type == VIRTIO_VSOCK_TYPE_STREAM) || > >- (type == VIRTIO_VSOCK_TYPE_SEQPACKET); > >+ (type == VIRTIO_VSOCK_TYPE_SEQPACKET) || > >+ (type == VIRTIO_VSOCK_TYPE_DGRAM); > > } > > > > /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex > >@@ -1601,44 +1631,57 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, > > struct sk_buff *skb) > > { > > struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); > >+ struct vsock_skb_cb *vsock_cb; > > This can be defined in the block where it's used. > Got it. > > struct sockaddr_vm src, dst; > > struct vsock_sock *vsk; > > struct sock *sk; > > bool space_available; > >+ u16 type; > > > > vsock_addr_init(&src, le64_to_cpu(hdr->src_cid), > > le32_to_cpu(hdr->src_port)); > > vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid), > > le32_to_cpu(hdr->dst_port)); > > > >+ type = le16_to_cpu(hdr->type); > >+ > > trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, > > dst.svm_cid, dst.svm_port, > > le32_to_cpu(hdr->len), > >- le16_to_cpu(hdr->type), > >+ type, > > le16_to_cpu(hdr->op), > > le32_to_cpu(hdr->flags), > > le32_to_cpu(hdr->buf_alloc), > > le32_to_cpu(hdr->fwd_cnt)); > > > >- if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) { > >+ if (!virtio_transport_valid_type(type)) { > > (void)virtio_transport_reset_no_sock(t, skb); > > goto free_pkt; > > } > > > >- /* The socket must be in connected or bound table > >- * otherwise send reset back > >+ /* For stream/seqpacket, the socket must be in connected or bound table > >+ * otherwise send reset back. > >+ * > >+ * For datagrams, no reset is sent back. > > */ > > sk = vsock_find_connected_socket(&src, &dst); > > if (!sk) { > >- sk = vsock_find_bound_socket(&dst); > >- if (!sk) { > >- (void)virtio_transport_reset_no_sock(t, skb); > >- goto free_pkt; > >+ if (type == VIRTIO_VSOCK_TYPE_DGRAM) { > >+ sk = vsock_find_bound_dgram_socket(&dst); > >+ if (!sk) > >+ goto free_pkt; > >+ } else { > >+ sk = vsock_find_bound_socket(&dst); > >+ if (!sk) { > >+ (void)virtio_transport_reset_no_sock(t, skb); > >+ goto free_pkt; > >+ } > > } > > } > > > >- if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) { > >- (void)virtio_transport_reset_no_sock(t, skb); > >+ if (virtio_transport_get_type(sk) != type) { > >+ if (type != VIRTIO_VSOCK_TYPE_DGRAM) > >+ (void)virtio_transport_reset_no_sock(t, skb); > > sock_put(sk); > > goto free_pkt; > > } > >@@ -1654,12 +1697,21 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, > > > > /* Check if sk has been closed before lock_sock */ > > if (sock_flag(sk, SOCK_DONE)) { > >- (void)virtio_transport_reset_no_sock(t, skb); > >+ if (type != VIRTIO_VSOCK_TYPE_DGRAM) > >+ (void)virtio_transport_reset_no_sock(t, skb); > > release_sock(sk); > > sock_put(sk); > > goto free_pkt; > > } > > > >+ if (sk->sk_type == SOCK_DGRAM) { > >+ vsock_cb = vsock_skb_cb(skb); > >+ vsock_cb->src_cid = src.svm_cid; > >+ vsock_cb->src_port = src.svm_port; > >+ virtio_transport_recv_dgram(sk, skb); > > > What about adding an API that transports can use to hide this? > > I mean something that hide vsock_cb creation and queue packet in the > socket receive queue. I'd also not expose vsock_skb_cb in an header, but > I'd handle it internally in af_vsock.c. So I'd just expose API to > queue/dequeue them. > Got it. I will move vsock_skb_cb to af_vsock.c and create an API: vsock_dgram_skb_save_src_addr(struct sk_buff *skb, u32 cid, u32 port) Different dgram implementations will call this API instead of the code block above to save the source address information into the control buffer. A side note on why this is a vsock API instead of a member function in transport: As we move to support multi-transport dgram, different transport implementations can place skb into the sk->sk_receive_queue. Therefore, we cannot call transport-specific function in vsock_dgram_recvmsg() to initialize struct sockaddr_vm. Hence, the receiving paths of different transports need to call this API to save source address. > Also why VMCI is using sk_receive_skb(), while we are using > sock_queue_rcv_skb()? > I _think_ originally we referred to UDP and UDS when designing virtio dgram, and ended up with placing skb into sk_receive_queue directly. I will look into this to provide better justification. Thank you, Amery > Thanks, > Stefano > > >+ goto out; > >+ } > >+ > > space_available = virtio_transport_space_update(sk, skb); > > > > /* Update CID in case it has changed after a transport reset event */ > >@@ -1691,6 +1743,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, > > break; > > } > > > >+out: > > release_sock(sk); > > > > /* Release refcnt obtained when we fetched this socket out of the > >-- > >2.20.1 > > >
On Mon, Jul 29, 2024 at 05:35:01PM GMT, Amery Hung wrote: >On Tue, Jul 23, 2024 at 7:42 AM Stefano Garzarella <sgarzare@redhat.com> wrote: >> >> On Wed, Jul 10, 2024 at 09:25:50PM GMT, Amery Hung wrote: >> >From: Bobby Eshleman <bobby.eshleman@bytedance.com> >> > >> >This commit adds the common datagram receive functionality for virtio >> >transports. It does not add the vhost/virtio users of that >> >functionality. >> > >> >This functionality includes: >> >- changes to the virtio_transport_recv_pkt() path for finding the >> > bound socket receiver for incoming packets >> >- virtio_transport_recv_pkt() saves the source cid and port to the >> > control buffer for recvmsg() to initialize sockaddr_vm structure >> > when using datagram >> > >> >Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com> >> >Signed-off-by: Amery Hung <amery.hung@bytedance.com> >> >--- >> > net/vmw_vsock/virtio_transport_common.c | 79 +++++++++++++++++++++---- >> > 1 file changed, 66 insertions(+), 13 deletions(-) >> > >> >diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c >> >index 46cd1807f8e3..a571b575fde9 100644 >> >--- a/net/vmw_vsock/virtio_transport_common.c >> >+++ b/net/vmw_vsock/virtio_transport_common.c >> >@@ -235,7 +235,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); >> > >> > static u16 virtio_transport_get_type(struct sock *sk) >> > { >> >- if (sk->sk_type == SOCK_STREAM) >> >+ if (sk->sk_type == SOCK_DGRAM) >> >+ return VIRTIO_VSOCK_TYPE_DGRAM; >> >+ else if (sk->sk_type == SOCK_STREAM) >> > return VIRTIO_VSOCK_TYPE_STREAM; >> > else >> > return VIRTIO_VSOCK_TYPE_SEQPACKET; >> >@@ -1422,6 +1424,33 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, >> > kfree_skb(skb); >> > } >> > >> >+static void >> >+virtio_transport_dgram_kfree_skb(struct sk_buff *skb, int err) >> >+{ >> >+ if (err == -ENOMEM) >> >+ kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_RCVBUFF); >> >+ else if (err == -ENOBUFS) >> >+ kfree_skb_reason(skb, SKB_DROP_REASON_PROTO_MEM); >> >+ else >> >+ kfree_skb(skb); >> >+} >> >+ >> >+/* This function takes ownership of the skb. >> >+ * >> >+ * It either places the skb on the sk_receive_queue or frees it. >> >+ */ >> >+static void >> >+virtio_transport_recv_dgram(struct sock *sk, struct sk_buff *skb) >> >+{ >> >+ int err; >> >+ >> >+ err = sock_queue_rcv_skb(sk, skb); >> >+ if (err) { >> >+ virtio_transport_dgram_kfree_skb(skb, err); >> >+ return; >> >+ } >> >+} >> >+ >> > static int >> > virtio_transport_recv_connected(struct sock *sk, >> > struct sk_buff *skb) >> >@@ -1591,7 +1620,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, >> > static bool virtio_transport_valid_type(u16 type) >> > { >> > return (type == VIRTIO_VSOCK_TYPE_STREAM) || >> >- (type == VIRTIO_VSOCK_TYPE_SEQPACKET); >> >+ (type == VIRTIO_VSOCK_TYPE_SEQPACKET) || >> >+ (type == VIRTIO_VSOCK_TYPE_DGRAM); >> > } >> > >> > /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex >> >@@ -1601,44 +1631,57 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, >> > struct sk_buff *skb) >> > { >> > struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); >> >+ struct vsock_skb_cb *vsock_cb; >> >> This can be defined in the block where it's used. >> > >Got it. > >> > struct sockaddr_vm src, dst; >> > struct vsock_sock *vsk; >> > struct sock *sk; >> > bool space_available; >> >+ u16 type; >> > >> > vsock_addr_init(&src, le64_to_cpu(hdr->src_cid), >> > le32_to_cpu(hdr->src_port)); >> > vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid), >> > le32_to_cpu(hdr->dst_port)); >> > >> >+ type = le16_to_cpu(hdr->type); >> >+ >> > trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, >> > dst.svm_cid, dst.svm_port, >> > le32_to_cpu(hdr->len), >> >- le16_to_cpu(hdr->type), >> >+ type, >> > le16_to_cpu(hdr->op), >> > le32_to_cpu(hdr->flags), >> > le32_to_cpu(hdr->buf_alloc), >> > le32_to_cpu(hdr->fwd_cnt)); >> > >> >- if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) { >> >+ if (!virtio_transport_valid_type(type)) { >> > (void)virtio_transport_reset_no_sock(t, skb); >> > goto free_pkt; >> > } >> > >> >- /* The socket must be in connected or bound table >> >- * otherwise send reset back >> >+ /* For stream/seqpacket, the socket must be in connected or bound table >> >+ * otherwise send reset back. >> >+ * >> >+ * For datagrams, no reset is sent back. >> > */ >> > sk = vsock_find_connected_socket(&src, &dst); >> > if (!sk) { >> >- sk = vsock_find_bound_socket(&dst); >> >- if (!sk) { >> >- (void)virtio_transport_reset_no_sock(t, skb); >> >- goto free_pkt; >> >+ if (type == VIRTIO_VSOCK_TYPE_DGRAM) { >> >+ sk = vsock_find_bound_dgram_socket(&dst); >> >+ if (!sk) >> >+ goto free_pkt; >> >+ } else { >> >+ sk = vsock_find_bound_socket(&dst); >> >+ if (!sk) { >> >+ (void)virtio_transport_reset_no_sock(t, skb); >> >+ goto free_pkt; >> >+ } >> > } >> > } >> > >> >- if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) { >> >- (void)virtio_transport_reset_no_sock(t, skb); >> >+ if (virtio_transport_get_type(sk) != type) { >> >+ if (type != VIRTIO_VSOCK_TYPE_DGRAM) >> >+ (void)virtio_transport_reset_no_sock(t, skb); >> > sock_put(sk); >> > goto free_pkt; >> > } >> >@@ -1654,12 +1697,21 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, >> > >> > /* Check if sk has been closed before lock_sock */ >> > if (sock_flag(sk, SOCK_DONE)) { >> >- (void)virtio_transport_reset_no_sock(t, skb); >> >+ if (type != VIRTIO_VSOCK_TYPE_DGRAM) >> >+ (void)virtio_transport_reset_no_sock(t, skb); >> > release_sock(sk); >> > sock_put(sk); >> > goto free_pkt; >> > } >> > >> >+ if (sk->sk_type == SOCK_DGRAM) { >> >+ vsock_cb = vsock_skb_cb(skb); >> >+ vsock_cb->src_cid = src.svm_cid; >> >+ vsock_cb->src_port = src.svm_port; >> >+ virtio_transport_recv_dgram(sk, skb); >> >> >> What about adding an API that transports can use to hide this? >> >> I mean something that hide vsock_cb creation and queue packet in the >> socket receive queue. I'd also not expose vsock_skb_cb in an header, but >> I'd handle it internally in af_vsock.c. So I'd just expose API to >> queue/dequeue them. >> > >Got it. I will move vsock_skb_cb to af_vsock.c and create an API: > >vsock_dgram_skb_save_src_addr(struct sk_buff *skb, u32 cid, u32 port) This is okay, but I would try to go further by directly adding an API to queue dgrams in af_vsock.c (if it's feasible). > >Different dgram implementations will call this API instead of the code >block above to save the source address information into the control >buffer. > >A side note on why this is a vsock API instead of a member )unction in >transport: As we move to support multi-transport dgram, different >transport implementations can place skb into the sk->sk_receive_queue. >Therefore, we cannot call transport-specific function in >vsock_dgram_recvmsg() to initialize struct sockaddr_vm. Hence, the >receiving paths of different transports need to call this API to save >source address. What I meant is, why virtio_transport_recv_dgram() can't be exposed by af_vsock.c as vsock_recv_dgram() and handle all internally, like populate vsock_cb, call sock_queue_rcv_skb(), etc. > >> Also why VMCI is using sk_receive_skb(), while we are using >> sock_queue_rcv_skb()? >> > >I _think_ originally we referred to UDP and UDS when designing virtio >dgram, and ended up with placing skb into sk_receive_queue directly. I >will look into this to provide better justification. Great, thanks. Maybe we can also ping VMCI maintainers to understand if they can switch to sock_queue_rcv_skb(). But we should understand better the difference. Thanks, Stefano
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 46cd1807f8e3..a571b575fde9 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -235,7 +235,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); static u16 virtio_transport_get_type(struct sock *sk) { - if (sk->sk_type == SOCK_STREAM) + if (sk->sk_type == SOCK_DGRAM) + return VIRTIO_VSOCK_TYPE_DGRAM; + else if (sk->sk_type == SOCK_STREAM) return VIRTIO_VSOCK_TYPE_STREAM; else return VIRTIO_VSOCK_TYPE_SEQPACKET; @@ -1422,6 +1424,33 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, kfree_skb(skb); } +static void +virtio_transport_dgram_kfree_skb(struct sk_buff *skb, int err) +{ + if (err == -ENOMEM) + kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_RCVBUFF); + else if (err == -ENOBUFS) + kfree_skb_reason(skb, SKB_DROP_REASON_PROTO_MEM); + else + kfree_skb(skb); +} + +/* This function takes ownership of the skb. + * + * It either places the skb on the sk_receive_queue or frees it. + */ +static void +virtio_transport_recv_dgram(struct sock *sk, struct sk_buff *skb) +{ + int err; + + err = sock_queue_rcv_skb(sk, skb); + if (err) { + virtio_transport_dgram_kfree_skb(skb, err); + return; + } +} + static int virtio_transport_recv_connected(struct sock *sk, struct sk_buff *skb) @@ -1591,7 +1620,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, static bool virtio_transport_valid_type(u16 type) { return (type == VIRTIO_VSOCK_TYPE_STREAM) || - (type == VIRTIO_VSOCK_TYPE_SEQPACKET); + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) || + (type == VIRTIO_VSOCK_TYPE_DGRAM); } /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex @@ -1601,44 +1631,57 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, struct sk_buff *skb) { struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); + struct vsock_skb_cb *vsock_cb; struct sockaddr_vm src, dst; struct vsock_sock *vsk; struct sock *sk; bool space_available; + u16 type; vsock_addr_init(&src, le64_to_cpu(hdr->src_cid), le32_to_cpu(hdr->src_port)); vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid), le32_to_cpu(hdr->dst_port)); + type = le16_to_cpu(hdr->type); + trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, dst.svm_cid, dst.svm_port, le32_to_cpu(hdr->len), - le16_to_cpu(hdr->type), + type, le16_to_cpu(hdr->op), le32_to_cpu(hdr->flags), le32_to_cpu(hdr->buf_alloc), le32_to_cpu(hdr->fwd_cnt)); - if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) { + if (!virtio_transport_valid_type(type)) { (void)virtio_transport_reset_no_sock(t, skb); goto free_pkt; } - /* The socket must be in connected or bound table - * otherwise send reset back + /* For stream/seqpacket, the socket must be in connected or bound table + * otherwise send reset back. + * + * For datagrams, no reset is sent back. */ sk = vsock_find_connected_socket(&src, &dst); if (!sk) { - sk = vsock_find_bound_socket(&dst); - if (!sk) { - (void)virtio_transport_reset_no_sock(t, skb); - goto free_pkt; + if (type == VIRTIO_VSOCK_TYPE_DGRAM) { + sk = vsock_find_bound_dgram_socket(&dst); + if (!sk) + goto free_pkt; + } else { + sk = vsock_find_bound_socket(&dst); + if (!sk) { + (void)virtio_transport_reset_no_sock(t, skb); + goto free_pkt; + } } } - if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) { - (void)virtio_transport_reset_no_sock(t, skb); + if (virtio_transport_get_type(sk) != type) { + if (type != VIRTIO_VSOCK_TYPE_DGRAM) + (void)virtio_transport_reset_no_sock(t, skb); sock_put(sk); goto free_pkt; } @@ -1654,12 +1697,21 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, /* Check if sk has been closed before lock_sock */ if (sock_flag(sk, SOCK_DONE)) { - (void)virtio_transport_reset_no_sock(t, skb); + if (type != VIRTIO_VSOCK_TYPE_DGRAM) + (void)virtio_transport_reset_no_sock(t, skb); release_sock(sk); sock_put(sk); goto free_pkt; } + if (sk->sk_type == SOCK_DGRAM) { + vsock_cb = vsock_skb_cb(skb); + vsock_cb->src_cid = src.svm_cid; + vsock_cb->src_port = src.svm_port; + virtio_transport_recv_dgram(sk, skb); + goto out; + } + space_available = virtio_transport_space_update(sk, skb); /* Update CID in case it has changed after a transport reset event */ @@ -1691,6 +1743,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, break; } +out: release_sock(sk); /* Release refcnt obtained when we fetched this socket out of the