@@ -31,7 +31,8 @@
enum {
VHOST_VSOCK_FEATURES = VHOST_FEATURES |
- (1ULL << VIRTIO_F_ACCESS_PLATFORM)
+ (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
+ (1ULL << VIRTIO_VSOCK_F_SEQPACKET)
};
enum {
@@ -56,6 +57,7 @@ struct vhost_vsock {
atomic_t queued_replies;
u32 guest_cid;
+ bool seqpacket_allow;
};
static u32 vhost_transport_get_local_cid(void)
@@ -112,6 +114,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
size_t nbytes;
size_t iov_len, payload_len;
int head;
+ bool restore_flag = false;
spin_lock_bh(&vsock->send_pkt_list_lock);
if (list_empty(&vsock->send_pkt_list)) {
@@ -168,9 +171,26 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
/* If the packet is greater than the space available in the
* buffer, we split it using multiple buffers.
*/
- if (payload_len > iov_len - sizeof(pkt->hdr))
+ if (payload_len > iov_len - sizeof(pkt->hdr)) {
payload_len = iov_len - sizeof(pkt->hdr);
+ /* As we are copying pieces of large packet's buffer to
+ * small rx buffers, headers of packets in rx queue are
+ * created dynamically and are initialized with header
+ * of current packet(except length). But in case of
+ * SOCK_SEQPACKET, we also must clear record delimeter
+ * bit(VIRTIO_VSOCK_SEQ_EOR). Otherwise, instead of one
+ * packet with delimeter(which marks end of record),
+ * there will be sequence of packets with delimeter
+ * bit set. After initialized header will be copied to
+ * rx buffer, this bit will be restored.
+ */
+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
+ pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+ restore_flag = true;
+ }
+ }
+
/* Set the correct length in the header */
pkt->hdr.len = cpu_to_le32(payload_len);
@@ -204,6 +224,9 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
* to send it with the next available buffer.
*/
if (pkt->off < pkt->len) {
+ if (restore_flag)
+ pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+
/* We are queueing the same virtio_vsock_pkt to handle
* the remaining bytes, and we want to deliver it
* to monitoring devices in the next iteration.
@@ -354,8 +377,7 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
return NULL;
}
- if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM)
- pkt->len = le32_to_cpu(pkt->hdr.len);
+ pkt->len = le32_to_cpu(pkt->hdr.len);
/* No payload */
if (!pkt->len)
@@ -398,6 +420,8 @@ static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
return val < vq->num;
}
+static bool vhost_transport_seqpacket_allow(u32 remote_cid);
+
static struct virtio_transport vhost_transport = {
.transport = {
.module = THIS_MODULE,
@@ -424,6 +448,11 @@ static struct virtio_transport vhost_transport = {
.stream_is_active = virtio_transport_stream_is_active,
.stream_allow = virtio_transport_stream_allow,
+ .seqpacket_dequeue = virtio_transport_seqpacket_dequeue,
+ .seqpacket_enqueue = virtio_transport_seqpacket_enqueue,
+ .seqpacket_allow = vhost_transport_seqpacket_allow,
+ .seqpacket_has_data = virtio_transport_seqpacket_has_data,
+
.notify_poll_in = virtio_transport_notify_poll_in,
.notify_poll_out = virtio_transport_notify_poll_out,
.notify_recv_init = virtio_transport_notify_recv_init,
@@ -441,6 +470,22 @@ static struct virtio_transport vhost_transport = {
.send_pkt = vhost_transport_send_pkt,
};
+static bool vhost_transport_seqpacket_allow(u32 remote_cid)
+{
+ struct vhost_vsock *vsock;
+ bool seqpacket_allow = false;
+
+ rcu_read_lock();
+ vsock = vhost_vsock_get(remote_cid);
+
+ if (vsock)
+ seqpacket_allow = vsock->seqpacket_allow;
+
+ rcu_read_unlock();
+
+ return seqpacket_allow;
+}
+
static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
{
struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
@@ -785,6 +830,9 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
goto err;
}
+ if (features & (1ULL << VIRTIO_VSOCK_F_SEQPACKET))
+ vsock->seqpacket_allow = true;
+
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
vq = &vsock->vqs[i];
mutex_lock(&vq->mutex);
When received packet is copied to guests's rx queue, data buffers of rx queue could be smaller that data buffer of input packet, so data of input packet is copied to each rx buffer, thus each rx buffer will be a packet with dynamically created header. Fields of such header are initialized from header of input packet(except length field which value is depends on number of bytes copied to rx buffer). But in SEQPACKET case, we also need to take care of record delimeter bit: if input packet has this bit set, we don't copy it to header of packet in rx buffer, except case when such rx buffer is last part of input packet. Otherwise, we will get sequence of packets with delimeter bit set, thus braking record bounds. Also remove ignore of non-stream type of packets, handle SEQPACKET feature bit. Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com> --- v10 -> v11: 1) Large comment added to describe idea of patch. 2) Comment message updated. 3) 'seqpacket_has_data()' callback set. drivers/vhost/vsock.c | 56 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 4 deletions(-)