Message ID | 20240802112138.46831-4-sahilcdq@proton.me (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add packed virtqueue to shadow virtqueue | expand |
On Fri, Aug 2, 2024 at 1:22 PM Sahil Siddiq <icegambit91@gmail.com> wrote: > > Allocate memory for the packed vq format and support > packed vq in the SVQ "start" and "stop" operations. > > Signed-off-by: Sahil Siddiq <sahilcdq@proton.me> > --- > Changes v2 -> v3: > * vhost-shadow-virtqueue.c > (vhost_svq_memory_packed): New function > (vhost_svq_start): > - Remove common variables out of if-else branch. > (vhost_svq_stop): > - Add support for packed vq. > (vhost_svq_get_vring_addr): Revert changes > (vhost_svq_get_vring_addr_packed): Likwise. > * vhost-shadow-virtqueue.h > - Revert changes made to "vhost_svq_get_vring_addr*" > functions. > * vhost-vdpa.c: Revert changes. > > hw/virtio/vhost-shadow-virtqueue.c | 56 +++++++++++++++++++++++------- > hw/virtio/vhost-shadow-virtqueue.h | 4 +++ > 2 files changed, 47 insertions(+), 13 deletions(-) > > diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c > index 4c308ee53d..f4285db2b4 100644 > --- a/hw/virtio/vhost-shadow-virtqueue.c > +++ b/hw/virtio/vhost-shadow-virtqueue.c > @@ -645,6 +645,8 @@ void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd) > > /** > * Get the shadow vq vring address. > + * This is used irrespective of whether the > + * split or packed vq format is used. > * @svq: Shadow virtqueue > * @addr: Destination to store address > */ > @@ -672,6 +674,16 @@ size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq) > return ROUND_UP(used_size, qemu_real_host_page_size()); > } > > +size_t vhost_svq_memory_packed(const VhostShadowVirtqueue *svq) > +{ > + size_t desc_size = sizeof(struct vring_packed_desc) * svq->num_free; > + size_t driver_event_suppression = sizeof(struct vring_packed_desc_event); > + size_t device_event_suppression = sizeof(struct vring_packed_desc_event); > + > + return ROUND_UP(desc_size + driver_event_suppression + device_event_suppression, > + qemu_real_host_page_size()); > +} > + > /** > * Set a new file descriptor for the guest to kick the SVQ and notify for avail > * > @@ -726,17 +738,30 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, > > svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); > svq->num_free = svq->vring.num; > - svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), > - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > - -1, 0); > - desc_size = sizeof(vring_desc_t) * svq->vring.num; > - svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); > - svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), > - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > - -1, 0); > - svq->desc_state = g_new0(SVQDescState, svq->vring.num); > - svq->desc_next = g_new0(uint16_t, svq->vring.num); > - for (unsigned i = 0; i < svq->vring.num - 1; i++) { > + svq->is_packed = virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED); > + > + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED)) { > + svq->vring_packed.vring.desc = mmap(NULL, vhost_svq_memory_packed(svq), > + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > + -1, 0); > + desc_size = sizeof(struct vring_packed_desc) * svq->vring.num; > + svq->vring_packed.vring.driver = (void *)((char *)svq->vring_packed.vring.desc + desc_size); > + svq->vring_packed.vring.device = (void *)((char *)svq->vring_packed.vring.driver + > + sizeof(struct vring_packed_desc_event)); This is a great start but it will be problematic when you start mapping the areas to the vdpa device. The driver area should be read only for the device, but it is placed in the same page as a RW one. More on this later. > + } else { > + svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), > + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > + -1, 0); > + desc_size = sizeof(vring_desc_t) * svq->vring.num; > + svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); > + svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), > + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > + -1, 0); > + } I think it will be beneficial to avoid "if (packed)" conditionals on the exposed functions that give information about the memory maps. These need to be replicated at hw/virtio/vhost-vdpa.c:vhost_vdpa_svq_map_rings. However, the current one depends on the driver area to live in the same page as the descriptor area, so it is not suitable for this. So what about this action plan: 1) Make the avail ring (or driver area) independent of the descriptor ring. 2) Return the mapping permissions of the descriptor area (not needed here, but needed for hw/virtio/vhost-vdpa.c:vhost_vdpa_svq_map_rings > + > + svq->desc_state = g_new0(SVQDescState, svq->num_free); > + svq->desc_next = g_new0(uint16_t, svq->num_free); > + for (unsigned i = 0; i < svq->num_free - 1; i++) { > svq->desc_next[i] = cpu_to_le16(i + 1); > } > } > @@ -776,8 +801,13 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) > svq->vq = NULL; > g_free(svq->desc_next); > g_free(svq->desc_state); > - munmap(svq->vring.desc, vhost_svq_driver_area_size(svq)); > - munmap(svq->vring.used, vhost_svq_device_area_size(svq)); > + > + if (svq->is_packed) { > + munmap(svq->vring_packed.vring.desc, vhost_svq_memory_packed(svq)); > + } else { > + munmap(svq->vring.desc, vhost_svq_driver_area_size(svq)); > + munmap(svq->vring.used, vhost_svq_device_area_size(svq)); > + } > event_notifier_set_handler(&svq->hdev_call, NULL); > } > > diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h > index ee1a87f523..03b722a186 100644 > --- a/hw/virtio/vhost-shadow-virtqueue.h > +++ b/hw/virtio/vhost-shadow-virtqueue.h > @@ -67,6 +67,9 @@ struct vring_packed { > > /* Shadow virtqueue to relay notifications */ > typedef struct VhostShadowVirtqueue { > + /* True if packed virtqueue */ > + bool is_packed; > + > /* Virtio queue shadowing */ > VirtQueue *vq; > > @@ -150,6 +153,7 @@ void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, > struct vhost_vring_addr *addr); > size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq); > size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq); > +size_t vhost_svq_memory_packed(const VhostShadowVirtqueue *svq); > > void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, > VirtQueue *vq, VhostIOVATree *iova_tree); > -- > 2.45.2 >
Hi, Thank you for your reply. On Wednesday, August 7, 2024 10:11:52 PM GMT+5:30 you wrote: > On Fri, Aug 2, 2024 at 1:22 PM Sahil Siddiq <icegambit91@gmail.com> wrote: > [...] > > I'll also test these changes out by following the > > suggestions given in response to v1. I'll have more > > confidence once I know these changes work. > > Please let me know if you need help with the testing! > Sure, I'll let you know if I run into any issues. On Wednesday, August 7, 2024 10:10:51 PM GMT+5:30 you wrote: > On Fri, Aug 2, 2024 at 1:22 PM Sahil Siddiq <icegambit91@gmail.com> wrote: > [...] > As a suggestion, we can split it into: > 1) Refactor in vhost_svq_translate_addr to support out_num+in_num. No > functional change. > 2) Refactor vhost_svq_add_split to extract common code into > vhost_svq_add. No functional change. > 3) Adding packed code. > > How to split or merge the patches is not a well-defined thing, so I'm > happy with this patch if you think the reactor is not worth it. I think your suggestion will make the changeset neater. If feasible, I'll refactor it after testing my changes and clearing up the page mapping issue. Thanks, Sahil
Hi, On Wednesday, August 7, 2024 9:52:10 PM GMT+5:30 Eugenio Perez Martin wrote: > On Fri, Aug 2, 2024 at 1:22 PM Sahil Siddiq <icegambit91@gmail.com> wrote: > > [...] > > @@ -726,17 +738,30 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, > > svq->vring.num = virtio_queue_get_num(vdev, > > virtio_get_queue_index(vq)); > > svq->num_free = svq->vring.num; > > > > - svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), > > - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > > - -1, 0); > > - desc_size = sizeof(vring_desc_t) * svq->vring.num; > > - svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); > > - svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), > > - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > > - -1, 0); > > - svq->desc_state = g_new0(SVQDescState, svq->vring.num); > > - svq->desc_next = g_new0(uint16_t, svq->vring.num); > > - for (unsigned i = 0; i < svq->vring.num - 1; i++) { > > + svq->is_packed = virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED); > > + > > + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED)) { > > + svq->vring_packed.vring.desc = mmap(NULL, vhost_svq_memory_packed(svq), > > + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > > + -1, 0); > > + desc_size = sizeof(struct vring_packed_desc) * svq->vring.num; > > + svq->vring_packed.vring.driver = (void *)((char *)svq->vring_packed.vring.desc + desc_size); > > + svq->vring_packed.vring.device = (void *)((char *)svq->vring_packed.vring.driver + > > + sizeof(struct vring_packed_desc_event)); > > This is a great start but it will be problematic when you start > mapping the areas to the vdpa device. The driver area should be read > only for the device, but it is placed in the same page as a RW one. > > More on this later. > > > + } else { > > + svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), > > + PROT_READ | PROT_WRITE, MAP_SHARED |MAP_ANONYMOUS, > > + -1, 0); > > + desc_size = sizeof(vring_desc_t) * svq->vring.num; > > + svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); > > + svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), > > + PROT_READ | PROT_WRITE, MAP_SHARED |MAP_ANONYMOUS, > > + -1, 0); > > + } > > I think it will be beneficial to avoid "if (packed)" conditionals on > the exposed functions that give information about the memory maps. > These need to be replicated at > hw/virtio/vhost-vdpa.c:vhost_vdpa_svq_map_rings. > > However, the current one depends on the driver area to live in the > same page as the descriptor area, so it is not suitable for this. I haven't really understood this. In split vqs the descriptor, driver and device areas are mapped to RW pages. In vhost_vdpa.c:vhost_vdpa_svq_map_rings, the regions are mapped with the appropriate "perm" field that sets the R/W permissions in the DMAMap object. Is this problematic for the split vq format because the avail ring is anyway mapped to a RW page in "vhost_svq_start"? For packed vqs, the "Driver Event Suppression" data structure should be read-only for the device. Similar to split vqs, this is mapped to a RW page in "vhost_svq_start" but it is then mapped to a DMAMap object with read- only perms in "vhost_vdpa_svq_map_rings". I am a little confused about where the issue lies. Thanks, Sahil
On Sun, Aug 11, 2024 at 7:20 PM Sahil <icegambit91@gmail.com> wrote: > > Hi, > > On Wednesday, August 7, 2024 9:52:10 PM GMT+5:30 Eugenio Perez Martin wrote: > > On Fri, Aug 2, 2024 at 1:22 PM Sahil Siddiq <icegambit91@gmail.com> wrote: > > > [...] > > > @@ -726,17 +738,30 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, > > > svq->vring.num = virtio_queue_get_num(vdev, > > > virtio_get_queue_index(vq)); > > > svq->num_free = svq->vring.num; > > > > > > - svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), > > > - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > > > - -1, 0); > > > - desc_size = sizeof(vring_desc_t) * svq->vring.num; > > > - svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); > > > - svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), > > > - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > > > - -1, 0); > > > - svq->desc_state = g_new0(SVQDescState, svq->vring.num); > > > - svq->desc_next = g_new0(uint16_t, svq->vring.num); > > > - for (unsigned i = 0; i < svq->vring.num - 1; i++) { > > > + svq->is_packed = virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED); > > > + > > > + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED)) { > > > + svq->vring_packed.vring.desc = mmap(NULL, vhost_svq_memory_packed(svq), > > > + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > > > + -1, 0); > > > + desc_size = sizeof(struct vring_packed_desc) * svq->vring.num; > > > + svq->vring_packed.vring.driver = (void *)((char *)svq->vring_packed.vring.desc + desc_size); > > > + svq->vring_packed.vring.device = (void *)((char *)svq->vring_packed.vring.driver + > > > + sizeof(struct vring_packed_desc_event)); > > > > This is a great start but it will be problematic when you start > > mapping the areas to the vdpa device. The driver area should be read > > only for the device, but it is placed in the same page as a RW one. > > > > More on this later. > > > > > + } else { > > > + svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), > > > + PROT_READ | PROT_WRITE, MAP_SHARED |MAP_ANONYMOUS, > > > + -1, 0); > > > + desc_size = sizeof(vring_desc_t) * svq->vring.num; > > > + svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); > > > + svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), > > > + PROT_READ | PROT_WRITE, MAP_SHARED |MAP_ANONYMOUS, > > > + -1, 0); > > > + } > > > > I think it will be beneficial to avoid "if (packed)" conditionals on > > the exposed functions that give information about the memory maps. > > These need to be replicated at > > hw/virtio/vhost-vdpa.c:vhost_vdpa_svq_map_rings. > > > > However, the current one depends on the driver area to live in the > > same page as the descriptor area, so it is not suitable for this. > > I haven't really understood this. > > In split vqs the descriptor, driver and device areas are mapped to RW pages. > In vhost_vdpa.c:vhost_vdpa_svq_map_rings, the regions are mapped with > the appropriate "perm" field that sets the R/W permissions in the DMAMap > object. Is this problematic for the split vq format because the avail ring is > anyway mapped to a RW page in "vhost_svq_start"? > Ok so maybe the map word is misleading here. The pages needs to be allocated for the QEMU process with both PROT_READ | PROT_WRITE, as QEMU needs to write into it. They are mapped to the device with vhost_vdpa_dma_map, and the last bool parameter indicates if the device needs write permissions or not. You can see how hw/virtio/vhost-vdpa.c:vhost_vdpa_svq_map_ring checks the needle permission for this, and the needle permissions are stored at hw/virtio/vhost-vdpa.c:vhost_vdpa_svq_map_rings. This is the function that needs to check for the maps permissions. > For packed vqs, the "Driver Event Suppression" data structure should be > read-only for the device. Similar to split vqs, this is mapped to a RW page > in "vhost_svq_start" but it is then mapped to a DMAMap object with read- > only perms in "vhost_vdpa_svq_map_rings". > > I am a little confused about where the issue lies. > > Thanks, > Sahil > >
Hi, On Monday, August 12, 2024 12:01:00 PM GMT+5:30 you wrote: > On Sun, Aug 11, 2024 at 7:20 PM Sahil <icegambit91@gmail.com> wrote: > > On Wednesday, August 7, 2024 9:52:10 PM GMT+5:30 Eugenio Perez Martin wrote: > > > On Fri, Aug 2, 2024 at 1:22 PM Sahil Siddiq <icegambit91@gmail.com> wrote: > > > > [...] > > > > @@ -726,17 +738,30 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, > > > > svq->vring.num = virtio_queue_get_num(vdev, > > > > virtio_get_queue_index(vq)); > > > > svq->num_free = svq->vring.num; > > > > > > > > - svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), > > > > - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > > > > - -1, 0); > > > > - desc_size = sizeof(vring_desc_t) * svq->vring.num; > > > > - svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); > > > > - svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), > > > > - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > > > > - -1, 0); > > > > - svq->desc_state = g_new0(SVQDescState, svq->vring.num); > > > > - svq->desc_next = g_new0(uint16_t, svq->vring.num); > > > > - for (unsigned i = 0; i < svq->vring.num - 1; i++) { > > > > + svq->is_packed = virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED); > > > > + > > > > + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED)) { > > > > + svq->vring_packed.vring.desc = mmap(NULL, vhost_svq_memory_packed(svq), > > > > + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > > > > + -1, 0); > > > > + desc_size = sizeof(struct vring_packed_desc) * svq->vring.num; > > > > + svq->vring_packed.vring.driver = (void *)((char *)svq->vring_packed.vring.desc + desc_size); > > > > + svq->vring_packed.vring.device = (void *)((char *)svq->vring_packed.vring.driver + > > > > + sizeof(struct vring_packed_desc_event)); > > > > > > This is a great start but it will be problematic when you start > > > mapping the areas to the vdpa device. The driver area should be read > > > only for the device, but it is placed in the same page as a RW one. > > > > > > More on this later. > > > > > > > + } else { > > > > + svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), > > > > + PROT_READ | PROT_WRITE, MAP_SHARED |MAP_ANONYMOUS, > > > > + -1, 0); > > > > + desc_size = sizeof(vring_desc_t) * svq->vring.num; > > > > + svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); > > > > + svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), > > > > + PROT_READ | PROT_WRITE, MAP_SHARED |MAP_ANONYMOUS, > > > > + -1, 0); > > > > + } > > > > > > I think it will be beneficial to avoid "if (packed)" conditionals on > > > the exposed functions that give information about the memory maps. > > > These need to be replicated at > > > hw/virtio/vhost-vdpa.c:vhost_vdpa_svq_map_rings. > > > > > > However, the current one depends on the driver area to live in the > > > same page as the descriptor area, so it is not suitable for this. > > > > I haven't really understood this. > > > > In split vqs the descriptor, driver and device areas are mapped to RW pages. > > In vhost_vdpa.c:vhost_vdpa_svq_map_rings, the regions are mapped with > > the appropriate "perm" field that sets the R/W permissions in the DMAMap > > object. Is this problematic for the split vq format because the avail ring is > > anyway mapped to a RW page in "vhost_svq_start"? > > > > Ok so maybe the map word is misleading here. The pages needs to be > allocated for the QEMU process with both PROT_READ | PROT_WRITE, as > QEMU needs to write into it. > > They are mapped to the device with vhost_vdpa_dma_map, and the last > bool parameter indicates if the device needs write permissions or not. > You can see how hw/virtio/vhost-vdpa.c:vhost_vdpa_svq_map_ring checks > the needle permission for this, and the needle permissions are stored > at hw/virtio/vhost-vdpa.c:vhost_vdpa_svq_map_rings. This is the > function that needs to check for the maps permissions. > I think I have understood what's going on in "vhost_vdpa_svq_map_rings", "vhost_vdpa_svq_map_ring" and "vhost_vdpa_dma_map". But based on what I have understood it looks like the driver area is getting mapped to an iova which is read-only for vhost_vdpa. Please let me know where I am going wrong. Consider the following implementation in hw/virtio/vhost_vdpa.c: > size_t device_size = vhost_svq_device_area_size(svq); > size_t driver_size = vhost_svq_driver_area_size(svq); The driver size includes the descriptor area and the driver area. For packed vq, the driver area is the "driver event suppression" structure which should be read-only for the device according to the virtio spec (section 2.8.10) [1]. > size_t avail_offset; > bool ok; > > vhost_svq_get_vring_addr(svq, &svq_addr); Over here "svq_addr.desc_user_addr" will point to the descriptor area while "svq_addr.avail_user_addr" will point to the driver area/driver event suppression structure. > driver_region = (DMAMap) { > .translated_addr = svq_addr.desc_user_addr, > .size = driver_size - 1, > .perm = IOMMU_RO, > }; This region points to the descriptor area and its size encompasses the driver area as well with RO permission. > ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp); The above function checks the value of needle->perm and sees that it is RO. It then calls "vhost_vdpa_dma_map" with the following arguments: > r = vhost_vdpa_dma_map(v->shared, v->address_space_id, needle->iova, > needle->size + 1, > (void *)(uintptr_t)needle->translated_addr, > needle->perm == IOMMU_RO); Since needle->size includes the driver area as well, the driver area will be mapped to a RO page in the device's address space, right? > if (unlikely(!ok)) { > error_prepend(errp, "Cannot create vq driver region: "); > return false; > } > addr->desc_user_addr = driver_region.iova; > avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr; > addr->avail_user_addr = driver_region.iova + avail_offset; I think "addr->desc_user_addr" and "addr->avail_user_addr" will both be mapped to a RO page in the device's address space. > device_region = (DMAMap) { > .translated_addr = svq_addr.used_user_addr, > .size = device_size - 1, > .perm = IOMMU_RW, > }; The device area/device event suppression structure on the other hand will be mapped to a RW page. I also think there are other issues with the current state of the patch. According to the virtio spec (section 2.8.10) [1], the "device event suppression" structure needs to be write-only for the device but is mapped to a RW page. Another concern I have is regarding the driver area size for packed vq. In "hw/virtio/vhost-shadow-virtqueue.c" of the current patch: > size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq) > { > size_t desc_size = sizeof(vring_desc_t) * svq->vring.num; > size_t avail_size = offsetof(vring_avail_t, ring[svq->vring.num]) + > sizeof(uint16_t); > > return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size()); > } > > [...] > > size_t vhost_svq_memory_packed(const VhostShadowVirtqueue *svq) > { > size_t desc_size = sizeof(struct vring_packed_desc) * svq->num_free; > size_t driver_event_suppression = sizeof(struct vring_packed_desc_event); > size_t device_event_suppression = sizeof(struct vring_packed_desc_event); > > return ROUND_UP(desc_size + driver_event_suppression + device_event_suppression, > qemu_real_host_page_size()); > } The size returned by "vhost_svq_driver_area_size" might not be the actual driver size which is given by desc_size + driver_event_suppression, right? Will this have to be changed too? Thanks, Sahil [1] https://docs.oasis-open.org/virtio/virtio/v1.3/csd01/virtio-v1.3-csd01.html#x1-720008
On Mon, Aug 12, 2024 at 9:32 PM Sahil <icegambit91@gmail.com> wrote: > > Hi, > > On Monday, August 12, 2024 12:01:00 PM GMT+5:30 you wrote: > > On Sun, Aug 11, 2024 at 7:20 PM Sahil <icegambit91@gmail.com> wrote: > > > On Wednesday, August 7, 2024 9:52:10 PM GMT+5:30 Eugenio Perez Martin wrote: > > > > On Fri, Aug 2, 2024 at 1:22 PM Sahil Siddiq <icegambit91@gmail.com> wrote: > > > > > [...] > > > > > @@ -726,17 +738,30 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, > > > > > svq->vring.num = virtio_queue_get_num(vdev, > > > > > virtio_get_queue_index(vq)); > > > > > svq->num_free = svq->vring.num; > > > > > > > > > > - svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), > > > > > - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > > > > > - -1, 0); > > > > > - desc_size = sizeof(vring_desc_t) * svq->vring.num; > > > > > - svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); > > > > > - svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), > > > > > - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > > > > > - -1, 0); > > > > > - svq->desc_state = g_new0(SVQDescState, svq->vring.num); > > > > > - svq->desc_next = g_new0(uint16_t, svq->vring.num); > > > > > - for (unsigned i = 0; i < svq->vring.num - 1; i++) { > > > > > + svq->is_packed = virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED); > > > > > + > > > > > + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED)) { > > > > > + svq->vring_packed.vring.desc = mmap(NULL, vhost_svq_memory_packed(svq), > > > > > + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > > > > > + -1, 0); > > > > > + desc_size = sizeof(struct vring_packed_desc) * svq->vring.num; > > > > > + svq->vring_packed.vring.driver = (void *)((char *)svq->vring_packed.vring.desc + desc_size); > > > > > + svq->vring_packed.vring.device = (void *)((char *)svq->vring_packed.vring.driver + > > > > > + sizeof(struct vring_packed_desc_event)); > > > > > > > > This is a great start but it will be problematic when you start > > > > mapping the areas to the vdpa device. The driver area should be read > > > > only for the device, but it is placed in the same page as a RW one. > > > > > > > > More on this later. > > > > > > > > > + } else { > > > > > + svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), > > > > > + PROT_READ | PROT_WRITE, MAP_SHARED |MAP_ANONYMOUS, > > > > > + -1, 0); > > > > > + desc_size = sizeof(vring_desc_t) * svq->vring.num; > > > > > + svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); > > > > > + svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), > > > > > + PROT_READ | PROT_WRITE, MAP_SHARED |MAP_ANONYMOUS, > > > > > + -1, 0); > > > > > + } > > > > > > > > I think it will be beneficial to avoid "if (packed)" conditionals on > > > > the exposed functions that give information about the memory maps. > > > > These need to be replicated at > > > > hw/virtio/vhost-vdpa.c:vhost_vdpa_svq_map_rings. > > > > > > > > However, the current one depends on the driver area to live in the > > > > same page as the descriptor area, so it is not suitable for this. > > > > > > I haven't really understood this. > > > > > > In split vqs the descriptor, driver and device areas are mapped to RW pages. > > > In vhost_vdpa.c:vhost_vdpa_svq_map_rings, the regions are mapped with > > > the appropriate "perm" field that sets the R/W permissions in the DMAMap > > > object. Is this problematic for the split vq format because the avail ring is > > > anyway mapped to a RW page in "vhost_svq_start"? > > > > > > > Ok so maybe the map word is misleading here. The pages needs to be > > allocated for the QEMU process with both PROT_READ | PROT_WRITE, as > > QEMU needs to write into it. > > > > They are mapped to the device with vhost_vdpa_dma_map, and the last > > bool parameter indicates if the device needs write permissions or not. > > You can see how hw/virtio/vhost-vdpa.c:vhost_vdpa_svq_map_ring checks > > the needle permission for this, and the needle permissions are stored > > at hw/virtio/vhost-vdpa.c:vhost_vdpa_svq_map_rings. This is the > > function that needs to check for the maps permissions. > > > > I think I have understood what's going on in "vhost_vdpa_svq_map_rings", > "vhost_vdpa_svq_map_ring" and "vhost_vdpa_dma_map". But based on > what I have understood it looks like the driver area is getting mapped to > an iova which is read-only for vhost_vdpa. Please let me know where I am > going wrong. > You're not going wrong there. The device does not need to write into this area, so we map it read only. > Consider the following implementation in hw/virtio/vhost_vdpa.c: > > > size_t device_size = vhost_svq_device_area_size(svq); > > size_t driver_size = vhost_svq_driver_area_size(svq); > > The driver size includes the descriptor area and the driver area. For > packed vq, the driver area is the "driver event suppression" structure > which should be read-only for the device according to the virtio spec > (section 2.8.10) [1]. > > > size_t avail_offset; > > bool ok; > > > > vhost_svq_get_vring_addr(svq, &svq_addr); > > Over here "svq_addr.desc_user_addr" will point to the descriptor area > while "svq_addr.avail_user_addr" will point to the driver area/driver > event suppression structure. > > > driver_region = (DMAMap) { > > .translated_addr = svq_addr.desc_user_addr, > > .size = driver_size - 1, > > .perm = IOMMU_RO, > > }; > > This region points to the descriptor area and its size encompasses the > driver area as well with RO permission. > > > ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp); > > The above function checks the value of needle->perm and sees that it is RO. > It then calls "vhost_vdpa_dma_map" with the following arguments: > > > r = vhost_vdpa_dma_map(v->shared, v->address_space_id, needle->iova, > > needle->size + 1, > > (void *)(uintptr_t)needle->translated_addr, > > needle->perm == IOMMU_RO); > > Since needle->size includes the driver area as well, the driver area will be > mapped to a RO page in the device's address space, right? > Yes, the device does not need to write into the descriptor area in the supported split virtqueue case. So the descriptor area is also mapped RO at this moment. This change in the packed virtqueue case, so we need to map it RW. > > if (unlikely(!ok)) { > > error_prepend(errp, "Cannot create vq driver region: "); > > return false; > > } > > addr->desc_user_addr = driver_region.iova; > > avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr; > > addr->avail_user_addr = driver_region.iova + avail_offset; > > I think "addr->desc_user_addr" and "addr->avail_user_addr" will both be > mapped to a RO page in the device's address space. > > > device_region = (DMAMap) { > > .translated_addr = svq_addr.used_user_addr, > > .size = device_size - 1, > > .perm = IOMMU_RW, > > }; > > The device area/device event suppression structure on the other hand will > be mapped to a RW page. > > I also think there are other issues with the current state of the patch. According > to the virtio spec (section 2.8.10) [1], the "device event suppression" structure > needs to be write-only for the device but is mapped to a RW page. > Yes, I'm not sure if all IOMMU supports write-only maps to be honest. > Another concern I have is regarding the driver area size for packed vq. In > "hw/virtio/vhost-shadow-virtqueue.c" of the current patch: > > size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq) > > { > > size_t desc_size = sizeof(vring_desc_t) * svq->vring.num; > > size_t avail_size = offsetof(vring_avail_t, ring[svq->vring.num]) + > > sizeof(uint16_t); > > > > return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size()); > > } > > > > [...] > > > > size_t vhost_svq_memory_packed(const VhostShadowVirtqueue *svq) > > { > > size_t desc_size = sizeof(struct vring_packed_desc) * svq->num_free; > > size_t driver_event_suppression = sizeof(struct vring_packed_desc_event); > > size_t device_event_suppression = sizeof(struct vring_packed_desc_event); > > > > return ROUND_UP(desc_size + driver_event_suppression + device_event_suppression, > > qemu_real_host_page_size()); > > } > > The size returned by "vhost_svq_driver_area_size" might not be the actual driver > size which is given by desc_size + driver_event_suppression, right? Will this have to > be changed too? > Yes, you're right this needs to be changed too. > Thanks, > Sahil > > [1] https://docs.oasis-open.org/virtio/virtio/v1.3/csd01/virtio-v1.3-csd01.html#x1-720008 > > >
Hi, Sorry for the late reply. On Tuesday, August 13, 2024 12:23:55 PM GMT+5:30 Eugenio Perez Martin wrote: > [...] > > I think I have understood what's going on in "vhost_vdpa_svq_map_rings", > > "vhost_vdpa_svq_map_ring" and "vhost_vdpa_dma_map". But based on > > what I have understood it looks like the driver area is getting mapped to > > an iova which is read-only for vhost_vdpa. Please let me know where I am > > going wrong. > > You're not going wrong there. The device does not need to write into > this area, so we map it read only. > > > Consider the following implementation in hw/virtio/vhost_vdpa.c: > > > size_t device_size = vhost_svq_device_area_size(svq); > > > size_t driver_size = vhost_svq_driver_area_size(svq); > > > > The driver size includes the descriptor area and the driver area. For > > packed vq, the driver area is the "driver event suppression" structure > > which should be read-only for the device according to the virtio spec > > (section 2.8.10) [1]. > > > > > size_t avail_offset; > > > bool ok; > > > > > > vhost_svq_get_vring_addr(svq, &svq_addr); > > > > Over here "svq_addr.desc_user_addr" will point to the descriptor area > > while "svq_addr.avail_user_addr" will point to the driver area/driver > > event suppression structure. > > > > > driver_region = (DMAMap) { > > > > > > .translated_addr = svq_addr.desc_user_addr, > > > .size = driver_size - 1, > > > .perm = IOMMU_RO, > > > > > > }; > > > > This region points to the descriptor area and its size encompasses the > > driver area as well with RO permission. > > > > > ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp); > > > > The above function checks the value of needle->perm and sees that it is > > RO. > > > > It then calls "vhost_vdpa_dma_map" with the following arguments: > > > r = vhost_vdpa_dma_map(v->shared, v->address_space_id, needle->iova, > > > > > > needle->size + 1, > > > (void > > > *)(uintptr_t)needle->tra > > > nslated_addr, > > > needle->perm == > > > IOMMU_RO); > > > > Since needle->size includes the driver area as well, the driver area will > > be mapped to a RO page in the device's address space, right? > > Yes, the device does not need to write into the descriptor area in the > supported split virtqueue case. So the descriptor area is also mapped > RO at this moment. > > This change in the packed virtqueue case, so we need to map it RW. I understand this now. I'll see how the implementation can be modified to take this into account. I'll see if making the driver area and descriptor ring helps. > > > if (unlikely(!ok)) { > > > > > > error_prepend(errp, "Cannot create vq driver region: "); > > > return false; > > > > > > } > > > addr->desc_user_addr = driver_region.iova; > > > avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr; > > > addr->avail_user_addr = driver_region.iova + avail_offset; > > > > I think "addr->desc_user_addr" and "addr->avail_user_addr" will both be > > mapped to a RO page in the device's address space. > > > > > device_region = (DMAMap) { > > > > > > .translated_addr = svq_addr.used_user_addr, > > > .size = device_size - 1, > > > .perm = IOMMU_RW, > > > > > > }; > > > > The device area/device event suppression structure on the other hand will > > be mapped to a RW page. > > > > I also think there are other issues with the current state of the patch. > > According to the virtio spec (section 2.8.10) [1], the "device event > > suppression" structure needs to be write-only for the device but is > > mapped to a RW page. > > Yes, I'm not sure if all IOMMU supports write-only maps to be honest. Got it. I think it should be alright to defer this issue until later. > > Another concern I have is regarding the driver area size for packed vq. In > > > > "hw/virtio/vhost-shadow-virtqueue.c" of the current patch: > > > size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq) > > > { > > > > > > size_t desc_size = sizeof(vring_desc_t) * svq->vring.num; > > > size_t avail_size = offsetof(vring_avail_t, ring[svq->vring.num]) + > > > > > > sizeof(uin > > > t16_t); > > > > > > return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size()); > > > > > > } > > > > > > [...] > > > > > > size_t vhost_svq_memory_packed(const VhostShadowVirtqueue *svq) > > > { > > > > > > size_t desc_size = sizeof(struct vring_packed_desc) * svq->num_free; > > > size_t driver_event_suppression = sizeof(struct > > > vring_packed_desc_event); > > > size_t device_event_suppression = sizeof(struct > > > vring_packed_desc_event); > > > > > > return ROUND_UP(desc_size + driver_event_suppression + > > > device_event_suppression,> > > > > qemu_real_host_page_size()); > > > > > > } > > > > The size returned by "vhost_svq_driver_area_size" might not be the actual > > driver size which is given by desc_size + driver_event_suppression, > > right? Will this have to be changed too? > > Yes, you're right this needs to be changed too. Understood. I'll modify this too. I have been trying to test my changes so far as well. I am not very clear on a few things. Q1. I built QEMU from source with my changes and followed the vdpa_sim + vhost_vdpa tutorial [1]. The VM seems to be running fine. How do I check if the packed format is being used instead of the split vq format for shadow virtqueues? I know the packed format is used when virtio_vdev has got the VIRTIO_F_RING_PACKED bit enabled. Is there a way of checking that this is the case? Q2. What's the recommended way to see what's going on under the hood? I tried using the -D option so QEMU's logs are written to a file but the file was empty. Would using qemu with -monitor stdio or attaching gdb to the QEMU VM be worthwhile? Thanks, Sahil [1] https://www.redhat.com/en/blog/hands-vdpa-what-do-you-do-when-you-aint-got-hardware-part-1
On Wed, Aug 21, 2024 at 2:20 PM Sahil <icegambit91@gmail.com> wrote: > > Hi, > > Sorry for the late reply. > > On Tuesday, August 13, 2024 12:23:55 PM GMT+5:30 Eugenio Perez Martin wrote: > > [...] > > > I think I have understood what's going on in "vhost_vdpa_svq_map_rings", > > > "vhost_vdpa_svq_map_ring" and "vhost_vdpa_dma_map". But based on > > > what I have understood it looks like the driver area is getting mapped to > > > an iova which is read-only for vhost_vdpa. Please let me know where I am > > > going wrong. > > > > You're not going wrong there. The device does not need to write into > > this area, so we map it read only. > > > > > Consider the following implementation in hw/virtio/vhost_vdpa.c: > > > > size_t device_size = vhost_svq_device_area_size(svq); > > > > size_t driver_size = vhost_svq_driver_area_size(svq); > > > > > > The driver size includes the descriptor area and the driver area. For > > > packed vq, the driver area is the "driver event suppression" structure > > > which should be read-only for the device according to the virtio spec > > > (section 2.8.10) [1]. > > > > > > > size_t avail_offset; > > > > bool ok; > > > > > > > > vhost_svq_get_vring_addr(svq, &svq_addr); > > > > > > Over here "svq_addr.desc_user_addr" will point to the descriptor area > > > while "svq_addr.avail_user_addr" will point to the driver area/driver > > > event suppression structure. > > > > > > > driver_region = (DMAMap) { > > > > > > > > .translated_addr = svq_addr.desc_user_addr, > > > > .size = driver_size - 1, > > > > .perm = IOMMU_RO, > > > > > > > > }; > > > > > > This region points to the descriptor area and its size encompasses the > > > driver area as well with RO permission. > > > > > > > ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp); > > > > > > The above function checks the value of needle->perm and sees that it is > > > RO. > > > > > > It then calls "vhost_vdpa_dma_map" with the following arguments: > > > > r = vhost_vdpa_dma_map(v->shared, v->address_space_id, needle->iova, > > > > > > > > needle->size + 1, > > > > (void > > > > *)(uintptr_t)needle->tra > > > > nslated_addr, > > > > needle->perm == > > > > IOMMU_RO); > > > > > > Since needle->size includes the driver area as well, the driver area will > > > be mapped to a RO page in the device's address space, right? > > > > Yes, the device does not need to write into the descriptor area in the > > supported split virtqueue case. So the descriptor area is also mapped > > RO at this moment. > > > > This change in the packed virtqueue case, so we need to map it RW. > > I understand this now. I'll see how the implementation can be modified to take > this into account. I'll see if making the driver area and descriptor ring helps. > > > > > if (unlikely(!ok)) { > > > > > > > > error_prepend(errp, "Cannot create vq driver region: "); > > > > return false; > > > > > > > > } > > > > addr->desc_user_addr = driver_region.iova; > > > > avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr; > > > > addr->avail_user_addr = driver_region.iova + avail_offset; > > > > > > I think "addr->desc_user_addr" and "addr->avail_user_addr" will both be > > > mapped to a RO page in the device's address space. > > > > > > > device_region = (DMAMap) { > > > > > > > > .translated_addr = svq_addr.used_user_addr, > > > > .size = device_size - 1, > > > > .perm = IOMMU_RW, > > > > > > > > }; > > > > > > The device area/device event suppression structure on the other hand will > > > be mapped to a RW page. > > > > > > I also think there are other issues with the current state of the patch. > > > According to the virtio spec (section 2.8.10) [1], the "device event > > > suppression" structure needs to be write-only for the device but is > > > mapped to a RW page. > > > > Yes, I'm not sure if all IOMMU supports write-only maps to be honest. > > Got it. I think it should be alright to defer this issue until later. > > > > Another concern I have is regarding the driver area size for packed vq. In > > > > > > "hw/virtio/vhost-shadow-virtqueue.c" of the current patch: > > > > size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq) > > > > { > > > > > > > > size_t desc_size = sizeof(vring_desc_t) * svq->vring.num; > > > > size_t avail_size = offsetof(vring_avail_t, ring[svq->vring.num]) + > > > > > > > > sizeof(uin > > > > t16_t); > > > > > > > > return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size()); > > > > > > > > } > > > > > > > > [...] > > > > > > > > size_t vhost_svq_memory_packed(const VhostShadowVirtqueue *svq) > > > > { > > > > > > > > size_t desc_size = sizeof(struct vring_packed_desc) * svq->num_free; > > > > size_t driver_event_suppression = sizeof(struct > > > > vring_packed_desc_event); > > > > size_t device_event_suppression = sizeof(struct > > > > vring_packed_desc_event); > > > > > > > > return ROUND_UP(desc_size + driver_event_suppression + > > > > device_event_suppression,> > > > > > qemu_real_host_page_size()); > > > > > > > > } > > > > > > The size returned by "vhost_svq_driver_area_size" might not be the actual > > > driver size which is given by desc_size + driver_event_suppression, > > > right? Will this have to be changed too? > > > > Yes, you're right this needs to be changed too. > > Understood. I'll modify this too. > > I have been trying to test my changes so far as well. I am not very clear on > a few things. > > Q1. > I built QEMU from source with my changes and followed the vdpa_sim + > vhost_vdpa tutorial [1]. The VM seems to be running fine. How do I check > if the packed format is being used instead of the split vq format for shadow > virtqueues? I know the packed format is used when virtio_vdev has got the > VIRTIO_F_RING_PACKED bit enabled. Is there a way of checking that this is > the case? > You can see the features that the driver acked from the guest by checking sysfs. Once you know the PCI BFN from lspci: # lspci -nn|grep '\[1af4:1041\]' 01:00.0 Ethernet controller [0200]: Red Hat, Inc. Virtio 1.0 network device [1af4:1041] (rev 01) # cut -c 35 /sys/devices/pci0000:00/0000:00:02.0/0000:01:00.0/virtio0/features 0 Also, you can check from QEMU by simply tracing if your functions are being called. > Q2. > What's the recommended way to see what's going on under the hood? I tried > using the -D option so QEMU's logs are written to a file but the file was empty. > Would using qemu with -monitor stdio or attaching gdb to the QEMU VM be > worthwhile? > You need to add --trace options with the regex you want to get to enable any output. For example, --trace 'vhost_vdpa_*' print all the trace_vhost_vdpa_* functions. If you want to speed things up, you can just replace the interesting trace_... functions with fprintf(stderr, ...). We can add the trace ones afterwards.
Hi, On Tuesday, August 27, 2024 9:00:36 PM GMT+5:30 Eugenio Perez Martin wrote: > On Wed, Aug 21, 2024 at 2:20 PM Sahil <icegambit91@gmail.com> wrote: > > [...] > > I have been trying to test my changes so far as well. I am not very clear > > on a few things. > > > > Q1. > > I built QEMU from source with my changes and followed the vdpa_sim + > > vhost_vdpa tutorial [1]. The VM seems to be running fine. How do I check > > if the packed format is being used instead of the split vq format for > > shadow virtqueues? I know the packed format is used when virtio_vdev has > > got the VIRTIO_F_RING_PACKED bit enabled. Is there a way of checking that > > this is the case? > > You can see the features that the driver acked from the guest by > checking sysfs. Once you know the PCI BFN from lspci: > # lspci -nn|grep '\[1af4:1041\]' > 01:00.0 Ethernet controller [0200]: Red Hat, Inc. Virtio 1.0 network > device [1af4:1041] (rev 01) > # cut -c 35 > /sys/devices/pci0000:00/0000:00:02.0/0000:01:00.0/virtio0/features 0 > > Also, you can check from QEMU by simply tracing if your functions are > being called. > > > Q2. > > What's the recommended way to see what's going on under the hood? I tried > > using the -D option so QEMU's logs are written to a file but the file was > > empty. Would using qemu with -monitor stdio or attaching gdb to the QEMU > > VM be worthwhile? > > You need to add --trace options with the regex you want to get to > enable any output. For example, --trace 'vhost_vdpa_*' print all the > trace_vhost_vdpa_* functions. > > If you want to speed things up, you can just replace the interesting > trace_... functions with fprintf(stderr, ...). We can add the trace > ones afterwards. Understood. I am able to trace the functions that are being called with fprintf. I'll stick with fprintf for now. I realized that packed vqs are not being used in the test environment. I see that in "hw/virtio/vhost-shadow-virtqueue.c", svq->is_packed is set to 0 and that calls vhost_svq_add_split(). I am not sure how one enables the packed feature bit. I don't know if this is an environment issue. I built qemu from the latest source with my changes on top of it. I followed this article [1] to set up the environment. On the host machine: $ uname -a Linux fedora 6.10.5-100.fc39.x86_64 #1 SMP PREEMPT_DYNAMIC Wed Aug 14 15:49:25 UTC 2024 x86_64 GNU/Linux $ ./qemu/build/qemu-system-x86_64 --version QEMU emulator version 9.0.91 $ vdpa -V vdpa utility, iproute2-6.4.0 All the relevant vdpa modules have been loaded in accordance with [1]. $ lsmod | grep -iE "(vdpa|virtio)" vdpa_sim_net 12288 0 vdpa_sim 24576 1 vdpa_sim_net vringh 32768 2 vdpa_sim,vdpa_sim_net vhost_vdpa 32768 2 vhost 65536 1 vhost_vdpa vhost_iotlb 16384 4 vdpa_sim,vringh,vhost_vdpa,vhost vdpa 36864 3 vdpa_sim,vhost_vdpa,vdpa_sim_net $ ls -l /sys/bus/vdpa/devices/vdpa0/driver lrwxrwxrwx. 1 root root 0 Aug 30 11:25 /sys/bus/vdpa/devices/vdpa0/driver -> ../../bus/vdpa/drivers/vhost_vdpa In the output of the following command, I see ANY_LAYOUT is supported. According to virtio_config.h [2] in the linux kernel, this represents the layout of descriptors. This refers to split and packed vqs, right? $ vdpa mgmtdev show vdpasim_net: supported_classes net max_supported_vqs 3 dev_features MTU MAC STATUS CTRL_VQ CTRL_MAC_ADDR ANY_LAYOUT VERSION_1 ACCESS_PLATFORM $ vdpa dev show -jp { "dev": { "vdpa0": { "type": "network", "mgmtdev": "vdpasim_net", "vendor_id": 0, "max_vqs": 3, "max_vq_size": 256 } } } I started the VM by running: $ sudo ./qemu/build/qemu-system-x86_64 \ -enable-kvm \ -drive file=//home/ig91/fedora_qemu_test_vm/L1.qcow2,media=disk,if=virtio \ -net nic,model=virtio \ -net user,hostfwd=tcp::2226-:22 \ -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0 \ -device virtio-net-pci,netdev=vhost-vdpa0,bus=pci.0,addr=0x7,disable-legacy=on,disable-modern=off,page-per-vq=on,event_idx=off,packed=on \ -nographic \ -m 2G \ -smp 2 \ -cpu host \ 2>&1 | tee vm.log I added the packed=on option to -device virtio-net-pci. In the VM: # uname -a Linux fedora 6.8.5-201.fc39.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Apr 11 18:25:26 UTC 2024 x86_64 GNU/Linux # lspci -nn | grep -i -A15 "\[1af4:1041\]" 00:07.0 Ethernet controller [0200]: Red Hat, Inc. Virtio 1.0 network device [1af4:1041] (rev 01) # cut -c 35 /sys/devices/pci0000:00/0000:00:07.0/virtio1/features 0 The packed vq feature bit hasn't been set. Am I missing something here? Thanks, Sahil [1] https://www.redhat.com/en/blog/hands-vdpa-what-do-you-do-when-you-aint-got-hardware-part-1 [2] https://github.com/torvalds/linux/blob/master/include/uapi/linux/virtio_config.h#L63
On Fri, Aug 30, 2024 at 12:20 PM Sahil <icegambit91@gmail.com> wrote: > > Hi, > > On Tuesday, August 27, 2024 9:00:36 PM GMT+5:30 Eugenio Perez Martin wrote: > > On Wed, Aug 21, 2024 at 2:20 PM Sahil <icegambit91@gmail.com> wrote: > > > [...] > > > I have been trying to test my changes so far as well. I am not very clear > > > on a few things. > > > > > > Q1. > > > I built QEMU from source with my changes and followed the vdpa_sim + > > > vhost_vdpa tutorial [1]. The VM seems to be running fine. How do I check > > > if the packed format is being used instead of the split vq format for > > > shadow virtqueues? I know the packed format is used when virtio_vdev has > > > got the VIRTIO_F_RING_PACKED bit enabled. Is there a way of checking that > > > this is the case? > > > > You can see the features that the driver acked from the guest by > > checking sysfs. Once you know the PCI BFN from lspci: > > # lspci -nn|grep '\[1af4:1041\]' > > 01:00.0 Ethernet controller [0200]: Red Hat, Inc. Virtio 1.0 network > > device [1af4:1041] (rev 01) > > # cut -c 35 > > /sys/devices/pci0000:00/0000:00:02.0/0000:01:00.0/virtio0/features 0 > > > > Also, you can check from QEMU by simply tracing if your functions are > > being called. > > > > > Q2. > > > What's the recommended way to see what's going on under the hood? I tried > > > using the -D option so QEMU's logs are written to a file but the file was > > > empty. Would using qemu with -monitor stdio or attaching gdb to the QEMU > > > VM be worthwhile? > > > > You need to add --trace options with the regex you want to get to > > enable any output. For example, --trace 'vhost_vdpa_*' print all the > > trace_vhost_vdpa_* functions. > > > > If you want to speed things up, you can just replace the interesting > > trace_... functions with fprintf(stderr, ...). We can add the trace > > ones afterwards. > > Understood. I am able to trace the functions that are being called with > fprintf. I'll stick with fprintf for now. > > I realized that packed vqs are not being used in the test environment. I > see that in "hw/virtio/vhost-shadow-virtqueue.c", svq->is_packed is set > to 0 and that calls vhost_svq_add_split(). I am not sure how one enables > the packed feature bit. I don't know if this is an environment issue. > > I built qemu from the latest source with my changes on top of it. I followed > this article [1] to set up the environment. > > On the host machine: > > $ uname -a > Linux fedora 6.10.5-100.fc39.x86_64 #1 SMP PREEMPT_DYNAMIC Wed Aug 14 15:49:25 UTC 2024 x86_64 GNU/Linux > > $ ./qemu/build/qemu-system-x86_64 --version > QEMU emulator version 9.0.91 > > $ vdpa -V > vdpa utility, iproute2-6.4.0 > > All the relevant vdpa modules have been loaded in accordance with [1]. > > $ lsmod | grep -iE "(vdpa|virtio)" > vdpa_sim_net 12288 0 > vdpa_sim 24576 1 vdpa_sim_net > vringh 32768 2 vdpa_sim,vdpa_sim_net > vhost_vdpa 32768 2 > vhost 65536 1 vhost_vdpa > vhost_iotlb 16384 4 vdpa_sim,vringh,vhost_vdpa,vhost > vdpa 36864 3 vdpa_sim,vhost_vdpa,vdpa_sim_net > > $ ls -l /sys/bus/vdpa/devices/vdpa0/driver > lrwxrwxrwx. 1 root root 0 Aug 30 11:25 /sys/bus/vdpa/devices/vdpa0/driver -> ../../bus/vdpa/drivers/vhost_vdpa > > In the output of the following command, I see ANY_LAYOUT is supported. > According to virtio_config.h [2] in the linux kernel, this represents the > layout of descriptors. This refers to split and packed vqs, right? > > $ vdpa mgmtdev show > vdpasim_net: > supported_classes net > max_supported_vqs 3 > dev_features MTU MAC STATUS CTRL_VQ CTRL_MAC_ADDR ANY_LAYOUT VERSION_1 ACCESS_PLATFORM > > $ vdpa dev show -jp > { > "dev": { > "vdpa0": { > "type": "network", > "mgmtdev": "vdpasim_net", > "vendor_id": 0, > "max_vqs": 3, > "max_vq_size": 256 > } > } > } > > I started the VM by running: > > $ sudo ./qemu/build/qemu-system-x86_64 \ > -enable-kvm \ > -drive file=//home/ig91/fedora_qemu_test_vm/L1.qcow2,media=disk,if=virtio \ > -net nic,model=virtio \ > -net user,hostfwd=tcp::2226-:22 \ > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0 \ > -device virtio-net-pci,netdev=vhost-vdpa0,bus=pci.0,addr=0x7,disable-legacy=on,disable-modern=off,page-per-vq=on,event_idx=off,packed=on \ > -nographic \ > -m 2G \ > -smp 2 \ > -cpu host \ > 2>&1 | tee vm.log > > I added the packed=on option to -device virtio-net-pci. > > In the VM: > > # uname -a > Linux fedora 6.8.5-201.fc39.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Apr 11 18:25:26 UTC 2024 x86_64 GNU/Linux > > # lspci -nn | grep -i -A15 "\[1af4:1041\]" > 00:07.0 Ethernet controller [0200]: Red Hat, Inc. Virtio 1.0 network device [1af4:1041] (rev 01) > > # cut -c 35 /sys/devices/pci0000:00/0000:00:07.0/virtio1/features > 0 > > The packed vq feature bit hasn't been set. Am I missing something here? > vdpa_sim does not support packed vq at the moment. You need to build the use case #3 of the second part of that blog [1]. It's good that you build the vdpa_sim earlier as it is a simpler setup. If you have problems with the vp_vdpa environment please let me know so we can find alternative setups. Thanks! [1] https://www.redhat.com/en/blog/hands-vdpa-what-do-you-do-when-you-aint-got-hardware-part-2 > Thanks, > Sahil > > [1] https://www.redhat.com/en/blog/hands-vdpa-what-do-you-do-when-you-aint-got-hardware-part-1 > [2] https://github.com/torvalds/linux/blob/master/include/uapi/linux/virtio_config.h#L63 > > >
Hi, On Friday, August 30, 2024 4:18:31 PM GMT+5:30 Eugenio Perez Martin wrote: > On Fri, Aug 30, 2024 at 12:20 PM Sahil <icegambit91@gmail.com> wrote: > > Hi, > > > > On Tuesday, August 27, 2024 9:00:36 PM GMT+5:30 Eugenio Perez Martin wrote: > > > On Wed, Aug 21, 2024 at 2:20 PM Sahil <icegambit91@gmail.com> wrote: > > > > [...] > > > > I have been trying to test my changes so far as well. I am not very > > > > clear > > > > on a few things. > > > > > > > > Q1. > > > > I built QEMU from source with my changes and followed the vdpa_sim + > > > > vhost_vdpa tutorial [1]. The VM seems to be running fine. How do I > > > > check > > > > if the packed format is being used instead of the split vq format for > > > > shadow virtqueues? I know the packed format is used when virtio_vdev > > > > has > > > > got the VIRTIO_F_RING_PACKED bit enabled. Is there a way of checking > > > > that > > > > this is the case? > > > > > > You can see the features that the driver acked from the guest by > > > checking sysfs. Once you know the PCI BFN from lspci: > > > # lspci -nn|grep '\[1af4:1041\]' > > > 01:00.0 Ethernet controller [0200]: Red Hat, Inc. Virtio 1.0 network > > > device [1af4:1041] (rev 01) > > > # cut -c 35 > > > /sys/devices/pci0000:00/0000:00:02.0/0000:01:00.0/virtio0/features 0 > > > > > > Also, you can check from QEMU by simply tracing if your functions are > > > being called. > > > > > > > Q2. > > > > What's the recommended way to see what's going on under the hood? I > > > > tried > > > > using the -D option so QEMU's logs are written to a file but the file > > > > was > > > > empty. Would using qemu with -monitor stdio or attaching gdb to the > > > > QEMU > > > > VM be worthwhile? > > > > > > You need to add --trace options with the regex you want to get to > > > enable any output. For example, --trace 'vhost_vdpa_*' print all the > > > trace_vhost_vdpa_* functions. > > > > > > If you want to speed things up, you can just replace the interesting > > > trace_... functions with fprintf(stderr, ...). We can add the trace > > > ones afterwards. > > > > Understood. I am able to trace the functions that are being called with > > fprintf. I'll stick with fprintf for now. > > > > I realized that packed vqs are not being used in the test environment. I > > see that in "hw/virtio/vhost-shadow-virtqueue.c", svq->is_packed is set > > to 0 and that calls vhost_svq_add_split(). I am not sure how one enables > > the packed feature bit. I don't know if this is an environment issue. > > > > I built qemu from the latest source with my changes on top of it. I > > followed this article [1] to set up the environment. > > > > On the host machine: > > > > $ uname -a > > Linux fedora 6.10.5-100.fc39.x86_64 #1 SMP PREEMPT_DYNAMIC Wed Aug 14 > > 15:49:25 UTC 2024 x86_64 GNU/Linux > > > > $ ./qemu/build/qemu-system-x86_64 --version > > QEMU emulator version 9.0.91 > > > > $ vdpa -V > > vdpa utility, iproute2-6.4.0 > > > > All the relevant vdpa modules have been loaded in accordance with [1]. > > > > $ lsmod | grep -iE "(vdpa|virtio)" > > vdpa_sim_net 12288 0 > > vdpa_sim 24576 1 vdpa_sim_net > > vringh 32768 2 vdpa_sim,vdpa_sim_net > > vhost_vdpa 32768 2 > > vhost 65536 1 vhost_vdpa > > vhost_iotlb 16384 4 vdpa_sim,vringh,vhost_vdpa,vhost > > vdpa 36864 3 vdpa_sim,vhost_vdpa,vdpa_sim_net > > > > $ ls -l /sys/bus/vdpa/devices/vdpa0/driver > > lrwxrwxrwx. 1 root root 0 Aug 30 11:25 /sys/bus/vdpa/devices/vdpa0/driver > > -> ../../bus/vdpa/drivers/vhost_vdpa > > > > In the output of the following command, I see ANY_LAYOUT is supported. > > According to virtio_config.h [2] in the linux kernel, this represents the > > layout of descriptors. This refers to split and packed vqs, right? > > > > $ vdpa mgmtdev show > > > > vdpasim_net: > > supported_classes net > > max_supported_vqs 3 > > dev_features MTU MAC STATUS CTRL_VQ CTRL_MAC_ADDR ANY_LAYOUT VERSION_1 > > ACCESS_PLATFORM> > > $ vdpa dev show -jp > > { > > > > "dev": { > > > > "vdpa0": { > > > > "type": "network", > > "mgmtdev": "vdpasim_net", > > "vendor_id": 0, > > "max_vqs": 3, > > "max_vq_size": 256 > > > > } > > > > } > > > > } > > > > I started the VM by running: > > > > $ sudo ./qemu/build/qemu-system-x86_64 \ > > -enable-kvm \ > > -drive file=//home/ig91/fedora_qemu_test_vm/L1.qcow2,media=disk,if=virtio > > \ > > -net nic,model=virtio \ > > -net user,hostfwd=tcp::2226-:22 \ > > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0 \ > > -device > > virtio-net-pci,netdev=vhost-vdpa0,bus=pci.0,addr=0x7,disable-legacy=on,di > > sable-modern=off,page-per-vq=on,event_idx=off,packed=on \ -nographic \ > > -m 2G \ > > -smp 2 \ > > -cpu host \ > > 2>&1 | tee vm.log > > > > I added the packed=on option to -device virtio-net-pci. > > > > In the VM: > > > > # uname -a > > Linux fedora 6.8.5-201.fc39.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Apr 11 > > 18:25:26 UTC 2024 x86_64 GNU/Linux > > > > # lspci -nn | grep -i -A15 "\[1af4:1041\]" > > 00:07.0 Ethernet controller [0200]: Red Hat, Inc. Virtio 1.0 network > > device [1af4:1041] (rev 01) > > > > # cut -c 35 /sys/devices/pci0000:00/0000:00:07.0/virtio1/features > > 0 > > > > The packed vq feature bit hasn't been set. Am I missing something here? > > vdpa_sim does not support packed vq at the moment. You need to build > the use case #3 of the second part of that blog [1]. It's good that > you build the vdpa_sim earlier as it is a simpler setup. > > If you have problems with the vp_vdpa environment please let me know > so we can find alternative setups. Thank you for the clarification. I tried setting up the vp_vdpa environment (scenario 3) but I ended up running into a problem in the L1 VM. I verified that nesting is enabled in KVM (L0): $ grep -oE "(vmx|svm)" /proc/cpuinfo | sort | uniq vmx $ cat /sys/module/kvm_intel/parameters/nested Y There are no issues when booting L1. I start the VM by running: $ sudo ./qemu/build/qemu-system-x86_64 \ -enable-kvm \ -drive file=//home/ig91/fedora_qemu_test_vm/L1.qcow2,media=disk,if=virtio \ -net nic,model=virtio \ -net user,hostfwd=tcp::2222-:22 \ -device intel-iommu,snoop-control=on \ -device virtio-net-pci,netdev=net0,disable-legacy=on,disable-modern=off,iommu_platform=on,event_idx=off,packed=on,bus=pcie.0,addr=0x4 \ -netdev tap,id=net0,script=no,downscript=no \ -nographic \ -m 2G \ -smp 2 \ -M q35 \ -cpu host \ 2>&1 | tee vm.log Kernel version in L1: # uname -a Linux fedora 6.8.5-201.fc39.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Apr 11 18:25:26 UTC 2024 x86_64 GNU/Linux The following variables are set in the kernel's config as described in the blog [1]: CONFIG_VIRTIO_VDPA=m CONFIG_VDPA=m CONFIG_VP_VDPA=m CONFIG_VHOST_VDPA=m The vDPA tool also satisfies the version criterion. # vdpa -V vdpa utility, iproute2-6.10.0 I built QEMU from source with my changes on top of it. # ./qemu/build/qemu-system-x86_64 --version QEMU emulator version 9.0.91 The relevant vdpa modules are loaded successfully as explained in the blog. # lsmod | grep -i vdpa vp_vdpa 20480 0 vhost_vdpa 32768 0 vhost 65536 1 vhost_vdpa vhost_iotlb 16384 2 vhost_vdpa,vhost vdpa 36864 2 vp_vdpa,vhost_vdpa irqbypass 12288 2 vhost_vdpa,kvm # lspci | grep -i virtio 00:03.0 SCSI storage controller: Red Hat, Inc. Virtio block device 00:04.0 Ethernet controller: Red Hat, Inc. Virtio 1.0 network device (rev 01) # lspci -n |grep 00:04.0 00:04.0 0200: 1af4:1041 (rev 01) I then unbind the virtio-pci device from the virtio-pci driver and bind it to the vp_vdpa driver. # echo 0000:00:04.0 > /sys/bus/pci/drivers/virtio-pci/unbind # echo 1af4 1041 > /sys/bus/pci/drivers/vp-vdpa/new_id I then create the vDPA device without any issues. # vdpa mgmtdev show pci/0000:00:04.0: supported_classes net max_supported_vqs 3 dev_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN CTRL_RX_EXTRA GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DE6 # vdpa dev add name vdpa0 mgmtdev pci/0000:00:04.0 # vdpa dev show -jp { "dev": { "vdpa0": { "type": "network", "mgmtdev": "pci/0000:00:04.0", "vendor_id": 6900, "max_vqs": 3, "max_vq_size": 256 } } } # ls -l /sys/bus/vdpa/devices/vdpa0/driver lrwxrwxrwx. 1 root root 0 Sep 8 18:58 /sys/bus/vdpa/devices/vdpa0/driver -> ../../../../bus/vdpa/drivers/vhost_vdpa # ls -l /dev/ |grep vdpa crw-------. 1 root root 239, 0 Sep 8 18:58 vhost-vdpa-0 # driverctl -b vdpa list-devices vdpa0 vhost_vdpa I have a qcow2 image L2.qcow in L1. QEMU throws an error when trying to launch L2. # sudo ./qemu/build/qemu-system-x86_64 \ -enable-kvm \ -drive file=//root/L2.qcow2,media=disk,if=virtio \ -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0 \ -device virtio-net-pci,netdev=vhost-vdpa0,bus=pcie.0,addr=0x7,disable-legacy=on,disable-modern=off,event_idx=off,packed=on \ -nographic \ -m 2G \ -smp 2 \ -M q35 \ -cpu host \ 2>&1 | tee vm.log qemu-system-x86_64: -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0: Could not open '/dev/vhost-vdpa-0': Unknown error 524 I get the same error when trying to launch L2 with qemu-kvm which I installed using "dnf install". # qemu-kvm --version QEMU emulator version 8.1.3 (qemu-8.1.3-5.fc39) The minimum version of QEMU required is v7.0.0-rc4. According to "include/linux/errno.h" [2], errno 524 is ENOTSUPP (operation is not supported). I am not sure where I am going wrong. However, I managed to set up scenario 4 successfully and I see that packed vq is enabled in this case. # cut -c 35 /sys/devices/pci0000:00/0000:00:04.0/virtio1/features 1 For the time being, shall I simply continue testing with scenario 4? > Thanks! > > [1] > https://www.redhat.com/en/blog/hands-vdpa-what-do-you-do-when-you-aint-got- > hardware-part-2 > > Thanks, > > Sahil > > > > [1] > > https://www.redhat.com/en/blog/hands-vdpa-what-do-you-do-when-you-aint-go > > t-hardware-part-1 [2] > > https://github.com/torvalds/linux/blob/master/include/uapi/linux/virtio_c > > onfig.h#L63 Thanks, Sahil [1] https://www.redhat.com/en/blog/hands-vdpa-what-do-you-do-when-you-aint-got-hardware-part-2 [2] https://github.com/torvalds/linux/blob/master/include/linux/errno.h#L27
On Sun, Sep 8, 2024 at 9:47 PM Sahil <icegambit91@gmail.com> wrote: > > Hi, > > On Friday, August 30, 2024 4:18:31 PM GMT+5:30 Eugenio Perez Martin wrote: > > On Fri, Aug 30, 2024 at 12:20 PM Sahil <icegambit91@gmail.com> wrote: > > > Hi, > > > > > > On Tuesday, August 27, 2024 9:00:36 PM GMT+5:30 Eugenio Perez Martin wrote: > > > > On Wed, Aug 21, 2024 at 2:20 PM Sahil <icegambit91@gmail.com> wrote: > > > > > [...] > > > > > I have been trying to test my changes so far as well. I am not very > > > > > clear > > > > > on a few things. > > > > > > > > > > Q1. > > > > > I built QEMU from source with my changes and followed the vdpa_sim + > > > > > vhost_vdpa tutorial [1]. The VM seems to be running fine. How do I > > > > > check > > > > > if the packed format is being used instead of the split vq format for > > > > > shadow virtqueues? I know the packed format is used when virtio_vdev > > > > > has > > > > > got the VIRTIO_F_RING_PACKED bit enabled. Is there a way of checking > > > > > that > > > > > this is the case? > > > > > > > > You can see the features that the driver acked from the guest by > > > > checking sysfs. Once you know the PCI BFN from lspci: > > > > # lspci -nn|grep '\[1af4:1041\]' > > > > 01:00.0 Ethernet controller [0200]: Red Hat, Inc. Virtio 1.0 network > > > > device [1af4:1041] (rev 01) > > > > # cut -c 35 > > > > /sys/devices/pci0000:00/0000:00:02.0/0000:01:00.0/virtio0/features 0 > > > > > > > > Also, you can check from QEMU by simply tracing if your functions are > > > > being called. > > > > > > > > > Q2. > > > > > What's the recommended way to see what's going on under the hood? I > > > > > tried > > > > > using the -D option so QEMU's logs are written to a file but the file > > > > > was > > > > > empty. Would using qemu with -monitor stdio or attaching gdb to the > > > > > QEMU > > > > > VM be worthwhile? > > > > > > > > You need to add --trace options with the regex you want to get to > > > > enable any output. For example, --trace 'vhost_vdpa_*' print all the > > > > trace_vhost_vdpa_* functions. > > > > > > > > If you want to speed things up, you can just replace the interesting > > > > trace_... functions with fprintf(stderr, ...). We can add the trace > > > > ones afterwards. > > > > > > Understood. I am able to trace the functions that are being called with > > > fprintf. I'll stick with fprintf for now. > > > > > > I realized that packed vqs are not being used in the test environment. I > > > see that in "hw/virtio/vhost-shadow-virtqueue.c", svq->is_packed is set > > > to 0 and that calls vhost_svq_add_split(). I am not sure how one enables > > > the packed feature bit. I don't know if this is an environment issue. > > > > > > I built qemu from the latest source with my changes on top of it. I > > > followed this article [1] to set up the environment. > > > > > > On the host machine: > > > > > > $ uname -a > > > Linux fedora 6.10.5-100.fc39.x86_64 #1 SMP PREEMPT_DYNAMIC Wed Aug 14 > > > 15:49:25 UTC 2024 x86_64 GNU/Linux > > > > > > $ ./qemu/build/qemu-system-x86_64 --version > > > QEMU emulator version 9.0.91 > > > > > > $ vdpa -V > > > vdpa utility, iproute2-6.4.0 > > > > > > All the relevant vdpa modules have been loaded in accordance with [1]. > > > > > > $ lsmod | grep -iE "(vdpa|virtio)" > > > vdpa_sim_net 12288 0 > > > vdpa_sim 24576 1 vdpa_sim_net > > > vringh 32768 2 vdpa_sim,vdpa_sim_net > > > vhost_vdpa 32768 2 > > > vhost 65536 1 vhost_vdpa > > > vhost_iotlb 16384 4 vdpa_sim,vringh,vhost_vdpa,vhost > > > vdpa 36864 3 vdpa_sim,vhost_vdpa,vdpa_sim_net > > > > > > $ ls -l /sys/bus/vdpa/devices/vdpa0/driver > > > lrwxrwxrwx. 1 root root 0 Aug 30 11:25 /sys/bus/vdpa/devices/vdpa0/driver > > > -> ../../bus/vdpa/drivers/vhost_vdpa > > > > > > In the output of the following command, I see ANY_LAYOUT is supported. > > > According to virtio_config.h [2] in the linux kernel, this represents the > > > layout of descriptors. This refers to split and packed vqs, right? > > > > > > $ vdpa mgmtdev show > > > > > > vdpasim_net: > > > supported_classes net > > > max_supported_vqs 3 > > > dev_features MTU MAC STATUS CTRL_VQ CTRL_MAC_ADDR ANY_LAYOUT VERSION_1 > > > ACCESS_PLATFORM> > > > $ vdpa dev show -jp > > > { > > > > > > "dev": { > > > > > > "vdpa0": { > > > > > > "type": "network", > > > "mgmtdev": "vdpasim_net", > > > "vendor_id": 0, > > > "max_vqs": 3, > > > "max_vq_size": 256 > > > > > > } > > > > > > } > > > > > > } > > > > > > I started the VM by running: > > > > > > $ sudo ./qemu/build/qemu-system-x86_64 \ > > > -enable-kvm \ > > > -drive file=//home/ig91/fedora_qemu_test_vm/L1.qcow2,media=disk,if=virtio > > > \ > > > -net nic,model=virtio \ > > > -net user,hostfwd=tcp::2226-:22 \ > > > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0 \ > > > -device > > > virtio-net-pci,netdev=vhost-vdpa0,bus=pci.0,addr=0x7,disable-legacy=on,di > > > sable-modern=off,page-per-vq=on,event_idx=off,packed=on \ -nographic \ > > > -m 2G \ > > > -smp 2 \ > > > -cpu host \ > > > 2>&1 | tee vm.log > > > > > > I added the packed=on option to -device virtio-net-pci. > > > > > > In the VM: > > > > > > # uname -a > > > Linux fedora 6.8.5-201.fc39.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Apr 11 > > > 18:25:26 UTC 2024 x86_64 GNU/Linux > > > > > > # lspci -nn | grep -i -A15 "\[1af4:1041\]" > > > 00:07.0 Ethernet controller [0200]: Red Hat, Inc. Virtio 1.0 network > > > device [1af4:1041] (rev 01) > > > > > > # cut -c 35 /sys/devices/pci0000:00/0000:00:07.0/virtio1/features > > > 0 > > > > > > The packed vq feature bit hasn't been set. Am I missing something here? > > > > vdpa_sim does not support packed vq at the moment. You need to build > > the use case #3 of the second part of that blog [1]. It's good that > > you build the vdpa_sim earlier as it is a simpler setup. > > > > If you have problems with the vp_vdpa environment please let me know > > so we can find alternative setups. > > Thank you for the clarification. I tried setting up the vp_vdpa > environment (scenario 3) but I ended up running into a problem > in the L1 VM. > > I verified that nesting is enabled in KVM (L0): > > $ grep -oE "(vmx|svm)" /proc/cpuinfo | sort | uniq > vmx > > $ cat /sys/module/kvm_intel/parameters/nested > Y > > There are no issues when booting L1. I start the VM by running: > > $ sudo ./qemu/build/qemu-system-x86_64 \ > -enable-kvm \ > -drive file=//home/ig91/fedora_qemu_test_vm/L1.qcow2,media=disk,if=virtio \ > -net nic,model=virtio \ > -net user,hostfwd=tcp::2222-:22 \ > -device intel-iommu,snoop-control=on \ > -device virtio-net-pci,netdev=net0,disable-legacy=on,disable-modern=off,iommu_platform=on,event_idx=off,packed=on,bus=pcie.0,addr=0x4 \ > -netdev tap,id=net0,script=no,downscript=no \ > -nographic \ > -m 2G \ > -smp 2 \ > -M q35 \ > -cpu host \ > 2>&1 | tee vm.log > > Kernel version in L1: > > # uname -a > Linux fedora 6.8.5-201.fc39.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Apr 11 18:25:26 UTC 2024 x86_64 GNU/Linux > Did you run the kernels with the arguments "iommu=pt intel_iommu=on"? You can print them with cat /proc/cmdline. > The following variables are set in the kernel's config as > described in the blog [1]: > > CONFIG_VIRTIO_VDPA=m > CONFIG_VDPA=m > CONFIG_VP_VDPA=m > CONFIG_VHOST_VDPA=m > > The vDPA tool also satisfies the version criterion. > > # vdpa -V > vdpa utility, iproute2-6.10.0 > > I built QEMU from source with my changes on top of it. > > # ./qemu/build/qemu-system-x86_64 --version > QEMU emulator version 9.0.91 > > The relevant vdpa modules are loaded successfully as > explained in the blog. > > # lsmod | grep -i vdpa > vp_vdpa 20480 0 > vhost_vdpa 32768 0 > vhost 65536 1 vhost_vdpa > vhost_iotlb 16384 2 vhost_vdpa,vhost > vdpa 36864 2 vp_vdpa,vhost_vdpa > irqbypass 12288 2 vhost_vdpa,kvm > > # lspci | grep -i virtio > 00:03.0 SCSI storage controller: Red Hat, Inc. Virtio block device > 00:04.0 Ethernet controller: Red Hat, Inc. Virtio 1.0 network device (rev 01) > > # lspci -n |grep 00:04.0 > 00:04.0 0200: 1af4:1041 (rev 01) > > I then unbind the virtio-pci device from the virtio-pci > driver and bind it to the vp_vdpa driver. > > # echo 0000:00:04.0 > /sys/bus/pci/drivers/virtio-pci/unbind > # echo 1af4 1041 > /sys/bus/pci/drivers/vp-vdpa/new_id > > I then create the vDPA device without any issues. > > # vdpa mgmtdev show > pci/0000:00:04.0: > supported_classes net > max_supported_vqs 3 > dev_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN CTRL_RX_EXTRA GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DE6 > > # vdpa dev add name vdpa0 mgmtdev pci/0000:00:04.0 > # vdpa dev show -jp > { > "dev": { > "vdpa0": { > "type": "network", > "mgmtdev": "pci/0000:00:04.0", > "vendor_id": 6900, > "max_vqs": 3, > "max_vq_size": 256 > } > } > } > > # ls -l /sys/bus/vdpa/devices/vdpa0/driver > lrwxrwxrwx. 1 root root 0 Sep 8 18:58 /sys/bus/vdpa/devices/vdpa0/driver -> ../../../../bus/vdpa/drivers/vhost_vdpa > > # ls -l /dev/ |grep vdpa > crw-------. 1 root root 239, 0 Sep 8 18:58 vhost-vdpa-0 > > # driverctl -b vdpa list-devices > vdpa0 vhost_vdpa > > I have a qcow2 image L2.qcow in L1. QEMU throws an error > when trying to launch L2. > > # sudo ./qemu/build/qemu-system-x86_64 \ > -enable-kvm \ > -drive file=//root/L2.qcow2,media=disk,if=virtio \ > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0 \ > -device virtio-net-pci,netdev=vhost-vdpa0,bus=pcie.0,addr=0x7,disable-legacy=on,disable-modern=off,event_idx=off,packed=on \ > -nographic \ > -m 2G \ > -smp 2 \ > -M q35 \ > -cpu host \ > 2>&1 | tee vm.log > > qemu-system-x86_64: -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0: Could not open '/dev/vhost-vdpa-0': Unknown error 524 > > I get the same error when trying to launch L2 with qemu-kvm > which I installed using "dnf install". > > # qemu-kvm --version > QEMU emulator version 8.1.3 (qemu-8.1.3-5.fc39) > > The minimum version of QEMU required is v7.0.0-rc4. > > According to "include/linux/errno.h" [2], errno 524 is > ENOTSUPP (operation is not supported). I am not sure > where I am going wrong. > > However, I managed to set up scenario 4 successfully > and I see that packed vq is enabled in this case. > > # cut -c 35 /sys/devices/pci0000:00/0000:00:04.0/virtio1/features > 1 > > For the time being, shall I simply continue testing with > scenario 4? > > > Thanks! > > > > [1] > > https://www.redhat.com/en/blog/hands-vdpa-what-do-you-do-when-you-aint-got- > > hardware-part-2 > > > Thanks, > > > Sahil > > > > > > [1] > > > https://www.redhat.com/en/blog/hands-vdpa-what-do-you-do-when-you-aint-go > > > t-hardware-part-1 [2] > > > https://github.com/torvalds/linux/blob/master/include/uapi/linux/virtio_c > > > onfig.h#L63 > > Thanks, > Sahil > > [1] https://www.redhat.com/en/blog/hands-vdpa-what-do-you-do-when-you-aint-got-hardware-part-2 > [2] https://github.com/torvalds/linux/blob/master/include/linux/errno.h#L27 > >
Hi, On Monday, September 9, 2024 6:04:45 PM GMT+5:30 Eugenio Perez Martin wrote: > On Sun, Sep 8, 2024 at 9:47 PM Sahil <icegambit91@gmail.com> wrote: > > On Friday, August 30, 2024 4:18:31 PM GMT+5:30 Eugenio Perez Martin wrote: > > > On Fri, Aug 30, 2024 at 12:20 PM Sahil <icegambit91@gmail.com> wrote: > > > [...] > > > vdpa_sim does not support packed vq at the moment. You need to build > > > the use case #3 of the second part of that blog [1]. It's good that > > > you build the vdpa_sim earlier as it is a simpler setup. > > > > > > If you have problems with the vp_vdpa environment please let me know > > > so we can find alternative setups. > > > > Thank you for the clarification. I tried setting up the vp_vdpa > > environment (scenario 3) but I ended up running into a problem > > in the L1 VM. > > > > I verified that nesting is enabled in KVM (L0): > > > > $ grep -oE "(vmx|svm)" /proc/cpuinfo | sort | uniq > > vmx > > > > $ cat /sys/module/kvm_intel/parameters/nested > > Y > > > > There are no issues when booting L1. I start the VM by running: > > > > $ sudo ./qemu/build/qemu-system-x86_64 \ > > -enable-kvm \ > > -drive file=//home/ig91/fedora_qemu_test_vm/L1.qcow2,media=disk,if=virtio > > \ > > -net nic,model=virtio \ > > -net user,hostfwd=tcp::2222-:22 \ > > -device intel-iommu,snoop-control=on \ > > -device > > virtio-net-pci,netdev=net0,disable-legacy=on,disable-modern=off,iommu_pla > > tform=on,event_idx=off,packed=on,bus=pcie.0,addr=0x4 \ -netdev > > tap,id=net0,script=no,downscript=no \ > > -nographic \ > > -m 2G \ > > -smp 2 \ > > -M q35 \ > > -cpu host \ > > 2>&1 | tee vm.log > > > > Kernel version in L1: > > > > # uname -a > > Linux fedora 6.8.5-201.fc39.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Apr 11 > > 18:25:26 UTC 2024 x86_64 GNU/Linux > Did you run the kernels with the arguments "iommu=pt intel_iommu=on"? > You can print them with cat /proc/cmdline. I missed this while setting up the environment. After setting the kernel params I managed to move past this issue but my environment in virtualbox was very unstable and it kept crashing. I managed to get L1 to run on my host OS, so scenario 3 is now up and running. However, the packed bit seems to be disabled in this scenario too. L0 (host machine) specs: - kernel version: 6.6.46-1-lts - QEMU version: 9.0.50 (v8.2.0-5536-g16514611dc) - vDPA version: iproute2-6.10.0 L1 specs: - kernel version: 6.8.5-201.fc39.x86_64 - QEMU version: 9.0.91 - vDPA version: iproute2-6.10.0 L2 specs: - kernel version 6.8.7-200.fc39.x86_64 I followed the following steps to set up scenario 3: ==== In L0 ==== $ grep -oE "(vmx|svm)" /proc/cpuinfo | sort | uniq vmx $ cat /sys/module/kvm_intel/parameters/nested Y $ sudo ./qemu/build/qemu-system-x86_64 \ -enable-kvm \ -drive file=//home/valdaarhun/valdaarhun/qcow2_img/L1.qcow2,media=disk,if=virtio \ -net nic,model=virtio \ -net user,hostfwd=tcp::2222-:22 \ -device intel-iommu,snoop-control=on \ -device virtio-net-pci,netdev=net0,disable-legacy=on,disable-modern=off,iommu_platform=on,event_idx=off,packed=on,bus=pcie.0,addr=0x4 \ -netdev tap,id=net0,script=no,downscript=no \ -nographic \ -m 8G \ -smp 4 \ -M q35 \ -cpu host \ 2>&1 | tee vm.log ==== In L1 ==== I verified that the following config variables are set as decribed in the blog [1]. CONFIG_VIRTIO_VDPA=m CONFIG_VDPA=m CONFIG_VP_VDPA=m CONFIG_VHOST_VDPA=m # modprobe vdpa # modprobe vhost_vdpa # modprobe vp_vdpa # lsmod | grep -i vdpa vp_vdpa 20480 0 vhost_vdpa 32768 0 vhost 65536 1 vhost_vdpa vhost_iotlb 16384 2 vhost_vdpa,vhost vdpa 36864 2 vp_vdpa,vhost_vdpa irqbypass 12288 2 vhost_vdpa,kvm # lspci | grep -i ethernet 00:04.0 Ethernet controller: Red Hat, Inc. Virtio 1.0 network device (rev 01) # lspci -nn | grep 00:04.0 00:04.0 Ethernet controller [0200]: Red Hat, Inc. Virtio 1.0 network device [1af4:1041] (rev 01) # echo 0000:00:04.0 > /sys/bus/pci/drivers/virtio-pci/unbind # echo 1af4 1041 > /sys/bus/pci/drivers/vp-vdpa/new_id # vdpa mgmtdev show pci/0000:00:04.0: supported_classes net < unknown class > max_supported_vqs 3 dev_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN CTRL_RX_EXTRA GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX VERSION_1 ACCESS_PLATFORM bit_40 bit_54 bit_55 bit_56 # vdpa dev add name vdpa0 mgmtdev pci/0000:00:04.0 # vdpa dev show -jp { "dev": { "vdpa0": { "type": "network", "mgmtdev": "pci/0000:00:04.0", "vendor_id": 6900, "max_vqs": 3, "max_vq_size": 256 } } } # ls -l /sys/bus/vdpa/devices/vdpa0/driver lrwxrwxrwx. 1 root root 0 Sep 11 17:07 /sys/bus/vdpa/devices/vdpa0/driver -> ../../../../bus/vdpa/drivers/vhost_vdpa # ls -l /dev/ |grep vdpa crw-------. 1 root root 239, 0 Sep 11 17:07 vhost-vdpa-0 # driverctl -b vdpa vdpa0 vhost_vdpa [*] Booting L2: # ./qemu/build/qemu-system-x86_64 \ -nographic \ -m 4G \ -enable-kvm \ -M q35 \ -drive file=//root/L2.qcow2,media=disk,if=virtio \ -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0 \ -device virtio-net-pci,netdev=vhost-vdpa0,disable-legacy=on,disable-modern=off,event_idx=off,packed=on,bus=pcie.0,addr=0x7 \ -smp 4 \ -cpu host \ 2>&1 | tee vm.log ==== In L2 ==== # cut -c 35 /sys/devices/pci0000:00/0000:00:07.0/virtio1/features 0 Based on what I have understood from the kernel's source, vhost-vdpa and vp-vdpa both support packed vqs and v6.6.46 is more recent than the minimum required kernel version. I am not sure where I am going wrong. > > However, I managed to set up scenario 4 successfully > > and I see that packed vq is enabled in this case. > > > > # cut -c 35 /sys/devices/pci0000:00/0000:00:04.0/virtio1/features > > 1 So far scenario 4 is the only scenario where the virtio-net device has the packed feature enabled. The difference between scenarios 3 and 4 in terms of the kernel modules loaded is that scenario 4 uses virtio_vdpa while scenario 3 uses vdpa and vhost_vdpa. Thanks, Sahil [1] https://www.redhat.com/en/blog/hands-vdpa-what-do-you-do-when-you-aint-got-hardware-part-2
On Wed, Sep 11, 2024 at 9:36 PM Sahil <icegambit91@gmail.com> wrote: > > Hi, > > On Monday, September 9, 2024 6:04:45 PM GMT+5:30 Eugenio Perez Martin wrote: > > On Sun, Sep 8, 2024 at 9:47 PM Sahil <icegambit91@gmail.com> wrote: > > > On Friday, August 30, 2024 4:18:31 PM GMT+5:30 Eugenio Perez Martin wrote: > > > > On Fri, Aug 30, 2024 at 12:20 PM Sahil <icegambit91@gmail.com> wrote: > > > > [...] > > > > vdpa_sim does not support packed vq at the moment. You need to build > > > > the use case #3 of the second part of that blog [1]. It's good that > > > > you build the vdpa_sim earlier as it is a simpler setup. > > > > > > > > If you have problems with the vp_vdpa environment please let me know > > > > so we can find alternative setups. > > > > > > Thank you for the clarification. I tried setting up the vp_vdpa > > > environment (scenario 3) but I ended up running into a problem > > > in the L1 VM. > > > > > > I verified that nesting is enabled in KVM (L0): > > > > > > $ grep -oE "(vmx|svm)" /proc/cpuinfo | sort | uniq > > > vmx > > > > > > $ cat /sys/module/kvm_intel/parameters/nested > > > Y > > > > > > There are no issues when booting L1. I start the VM by running: > > > > > > $ sudo ./qemu/build/qemu-system-x86_64 \ > > > -enable-kvm \ > > > -drive file=//home/ig91/fedora_qemu_test_vm/L1.qcow2,media=disk,if=virtio > > > \ > > > -net nic,model=virtio \ > > > -net user,hostfwd=tcp::2222-:22 \ > > > -device intel-iommu,snoop-control=on \ > > > -device > > > virtio-net-pci,netdev=net0,disable-legacy=on,disable-modern=off,iommu_pla > > > tform=on,event_idx=off,packed=on,bus=pcie.0,addr=0x4 \ -netdev > > > tap,id=net0,script=no,downscript=no \ > > > -nographic \ > > > -m 2G \ > > > -smp 2 \ > > > -M q35 \ > > > -cpu host \ > > > 2>&1 | tee vm.log > > > > > > Kernel version in L1: > > > > > > # uname -a > > > Linux fedora 6.8.5-201.fc39.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Apr 11 > > > 18:25:26 UTC 2024 x86_64 GNU/Linux > > Did you run the kernels with the arguments "iommu=pt intel_iommu=on"? > > You can print them with cat /proc/cmdline. > > I missed this while setting up the environment. After setting the kernel > params I managed to move past this issue but my environment in virtualbox > was very unstable and it kept crashing. > I've no experience with virtualbox+vdpa, sorry :). Why not use QEMU also for L1? > I managed to get L1 to run on my host OS, so scenario 3 is now up and > running. However, the packed bit seems to be disabled in this scenario too. > > L0 (host machine) specs: > - kernel version: > 6.6.46-1-lts > > - QEMU version: > 9.0.50 (v8.2.0-5536-g16514611dc) > > - vDPA version: > iproute2-6.10.0 > > L1 specs: > > - kernel version: > 6.8.5-201.fc39.x86_64 > > - QEMU version: > 9.0.91 > > - vDPA version: > iproute2-6.10.0 > > L2 specs: > - kernel version > 6.8.7-200.fc39.x86_64 > > I followed the following steps to set up scenario 3: > > ==== In L0 ==== > > $ grep -oE "(vmx|svm)" /proc/cpuinfo | sort | uniq > vmx > > $ cat /sys/module/kvm_intel/parameters/nested > Y > > $ sudo ./qemu/build/qemu-system-x86_64 \ > -enable-kvm \ > -drive file=//home/valdaarhun/valdaarhun/qcow2_img/L1.qcow2,media=disk,if=virtio \ > -net nic,model=virtio \ > -net user,hostfwd=tcp::2222-:22 \ > -device intel-iommu,snoop-control=on \ > -device virtio-net-pci,netdev=net0,disable-legacy=on,disable-modern=off,iommu_platform=on,event_idx=off,packed=on,bus=pcie.0,addr=0x4 \ > -netdev tap,id=net0,script=no,downscript=no \ > -nographic \ > -m 8G \ > -smp 4 \ > -M q35 \ > -cpu host \ > 2>&1 | tee vm.log > > ==== In L1 ==== > > I verified that the following config variables are set as decribed in the blog [1]. > > CONFIG_VIRTIO_VDPA=m > CONFIG_VDPA=m > CONFIG_VP_VDPA=m > CONFIG_VHOST_VDPA=m > > # modprobe vdpa > # modprobe vhost_vdpa > # modprobe vp_vdpa > > # lsmod | grep -i vdpa > vp_vdpa 20480 0 > vhost_vdpa 32768 0 > vhost 65536 1 vhost_vdpa > vhost_iotlb 16384 2 vhost_vdpa,vhost > vdpa 36864 2 vp_vdpa,vhost_vdpa > irqbypass 12288 2 vhost_vdpa,kvm > > # lspci | grep -i ethernet > 00:04.0 Ethernet controller: Red Hat, Inc. Virtio 1.0 network device (rev 01) > > # lspci -nn | grep 00:04.0 > 00:04.0 Ethernet controller [0200]: Red Hat, Inc. Virtio 1.0 network device [1af4:1041] (rev 01) > > # echo 0000:00:04.0 > /sys/bus/pci/drivers/virtio-pci/unbind > # echo 1af4 1041 > /sys/bus/pci/drivers/vp-vdpa/new_id > > # vdpa mgmtdev show > pci/0000:00:04.0: > supported_classes net < unknown class > > max_supported_vqs 3 > dev_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 > GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN > HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN CTRL_RX_EXTRA > GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX > VERSION_1 ACCESS_PLATFORM bit_40 bit_54 bit_55 bit_56 > > # vdpa dev add name vdpa0 mgmtdev pci/0000:00:04.0 > # vdpa dev show -jp > { > "dev": { > "vdpa0": { > "type": "network", > "mgmtdev": "pci/0000:00:04.0", > "vendor_id": 6900, > "max_vqs": 3, > "max_vq_size": 256 > } > } > } > > # ls -l /sys/bus/vdpa/devices/vdpa0/driver > lrwxrwxrwx. 1 root root 0 Sep 11 17:07 /sys/bus/vdpa/devices/vdpa0/driver -> ../../../../bus/vdpa/drivers/vhost_vdpa > > # ls -l /dev/ |grep vdpa > crw-------. 1 root root 239, 0 Sep 11 17:07 vhost-vdpa-0 > > # driverctl -b vdpa > vdpa0 vhost_vdpa [*] > > Booting L2: > > # ./qemu/build/qemu-system-x86_64 \ > -nographic \ > -m 4G \ > -enable-kvm \ > -M q35 \ > -drive file=//root/L2.qcow2,media=disk,if=virtio \ > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0 \ > -device virtio-net-pci,netdev=vhost-vdpa0,disable-legacy=on,disable-modern=off,event_idx=off,packed=on,bus=pcie.0,addr=0x7 \ > -smp 4 \ > -cpu host \ > 2>&1 | tee vm.log > > ==== In L2 ==== > > # cut -c 35 /sys/devices/pci0000:00/0000:00:07.0/virtio1/features > 0 > > Based on what I have understood from the kernel's source, vhost-vdpa and > vp-vdpa both support packed vqs and v6.6.46 is more recent than the > minimum required kernel version. I am not sure where I am going wrong. > > > > However, I managed to set up scenario 4 successfully > > > and I see that packed vq is enabled in this case. > > > > > > # cut -c 35 /sys/devices/pci0000:00/0000:00:04.0/virtio1/features > > > 1 > > So far scenario 4 is the only scenario where the virtio-net device has the packed > feature enabled. The difference between scenarios 3 and 4 in terms of the kernel > modules loaded is that scenario 4 uses virtio_vdpa while scenario 3 uses vdpa and > vhost_vdpa. > We're pretty close then :). But I don't see anything obviously wrong here. I guess it is the same L1 VM in both cases, isn't it? The function that gets the features from vhost-vdpa in QEMU is hw/virtio/vhost-vdpa.c:vhost_vdpa_get_features. Can you check that it returns bit 34 (offset starts with 0 here)? If it returns it, can you keep debugging until you see what clears it? If it comes clear, then we need to check the kernel. If you don't see anything wrong there, we can keep debugging the function that handles getting the features through L2 guest PCI read, hw/virtio/virtio-pci.c:virtio_pci_common_read. VIRTIO_PCI_COMMON_DF case when proxy->dfselect==1. Let me know what you find. Thanks!
Hi, On Thursday, September 12, 2024 3:24:27 PM GMT+5:30 Eugenio Perez Martin wrote: > On Wed, Sep 11, 2024 at 9:36 PM Sahil <icegambit91@gmail.com> wrote: > > Hi, > > > > On Monday, September 9, 2024 6:04:45 PM GMT+5:30 Eugenio Perez Martin wrote: > > > On Sun, Sep 8, 2024 at 9:47 PM Sahil <icegambit91@gmail.com> wrote: > > > > On Friday, August 30, 2024 4:18:31 PM GMT+5:30 Eugenio Perez Martin wrote: > > > > > On Fri, Aug 30, 2024 at 12:20 PM Sahil <icegambit91@gmail.com> > > > > > wrote: > > > > > [...] > > > > > vdpa_sim does not support packed vq at the moment. You need to build > > > > > the use case #3 of the second part of that blog [1]. It's good that > > > > > you build the vdpa_sim earlier as it is a simpler setup. > > > > > > > > > > If you have problems with the vp_vdpa environment please let me know > > > > > so we can find alternative setups. > > > > > > > > Thank you for the clarification. I tried setting up the vp_vdpa > > > > environment (scenario 3) but I ended up running into a problem > > > > in the L1 VM. > > > > > > > > I verified that nesting is enabled in KVM (L0): > > > > > > > > $ grep -oE "(vmx|svm)" /proc/cpuinfo | sort | uniq > > > > vmx > > > > > > > > $ cat /sys/module/kvm_intel/parameters/nested > > > > Y > > > > > > > > There are no issues when booting L1. I start the VM by running: > > > > > > > > $ sudo ./qemu/build/qemu-system-x86_64 \ > > > > -enable-kvm \ > > > > -drive > > > > file=//home/ig91/fedora_qemu_test_vm/L1.qcow2,media=disk,if=virtio > > > > \ > > > > -net nic,model=virtio \ > > > > -net user,hostfwd=tcp::2222-:22 \ > > > > -device intel-iommu,snoop-control=on \ > > > > -device > > > > virtio-net-pci,netdev=net0,disable-legacy=on,disable-modern=off,iommu_ > > > > pla > > > > tform=on,event_idx=off,packed=on,bus=pcie.0,addr=0x4 \ -netdev > > > > tap,id=net0,script=no,downscript=no \ > > > > -nographic \ > > > > -m 2G \ > > > > -smp 2 \ > > > > -M q35 \ > > > > -cpu host \ > > > > 2>&1 | tee vm.log > > > > > > > > Kernel version in L1: > > > > > > > > # uname -a > > > > Linux fedora 6.8.5-201.fc39.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Apr 11 > > > > 18:25:26 UTC 2024 x86_64 GNU/Linux > > > > > > Did you run the kernels with the arguments "iommu=pt intel_iommu=on"? > > > You can print them with cat /proc/cmdline. > > > > I missed this while setting up the environment. After setting the kernel > > params I managed to move past this issue but my environment in virtualbox > > was very unstable and it kept crashing. > > I've no experience with virtualbox+vdpa, sorry :). Why not use QEMU also for > L1? No, I was using virtualbox for L0. I wasn't able to create L1.qcow2 on my host OS (Arch Linux). "Virt-sysprep" was giving me "resolve.conf is a dangling symlink" issues. I had a Fedora VM set up on virtualbox and I managed to create L1.qcow2 and L2.qcow2 there. So my environment looked something like this: Host OS (Bare metal) -> L0 (Fedora on virtualbox) -> L1 (QEMU) -> L2 (QEMU) I learnt that shared folders can be set up between the host and virtualbox and so I managed to move L1.qcow2 from the Fedora VM to my host OS. So now my environment looks like this: L0 (Arch Linux on bare metal) -> L1 (QEMU) -> L2 (QEMU) > > I managed to get L1 to run on my host OS, so scenario 3 is now up and > > running. However, the packed bit seems to be disabled in this scenario > > too. > > > > L0 (host machine) specs: > > > > - kernel version: > > 6.6.46-1-lts > > > > - QEMU version: > > 9.0.50 (v8.2.0-5536-g16514611dc) > > > > - vDPA version: > > iproute2-6.10.0 > > > > L1 specs: > > > > - kernel version: > > 6.8.5-201.fc39.x86_64 > > > > - QEMU version: > > 9.0.91 > > > > - vDPA version: > > iproute2-6.10.0 > > > > L2 specs: > > - kernel version > > > > 6.8.7-200.fc39.x86_64 > > > > I followed the following steps to set up scenario 3: > > > > ==== In L0 ==== > > > > $ grep -oE "(vmx|svm)" /proc/cpuinfo | sort | uniq > > vmx > > > > $ cat /sys/module/kvm_intel/parameters/nested > > Y > > > > $ sudo ./qemu/build/qemu-system-x86_64 \ > > -enable-kvm \ > > -drive > > file=//home/valdaarhun/valdaarhun/qcow2_img/L1.qcow2,media=disk,if=virtio > > \ -net nic,model=virtio \ > > -net user,hostfwd=tcp::2222-:22 \ > > -device intel-iommu,snoop-control=on \ > > -device > > virtio-net-pci,netdev=net0,disable-legacy=on,disable-modern=off,iommu_pla > > tform=on,event_idx=off,packed=on,bus=pcie.0,addr=0x4 \ -netdev > > tap,id=net0,script=no,downscript=no \ > > -nographic \ > > -m 8G \ > > -smp 4 \ > > -M q35 \ > > -cpu host \ > > 2>&1 | tee vm.log > > > > ==== In L1 ==== > > > > I verified that the following config variables are set as decribed in the > > blog [1]. > > > > CONFIG_VIRTIO_VDPA=m > > CONFIG_VDPA=m > > CONFIG_VP_VDPA=m > > CONFIG_VHOST_VDPA=m > > > > # modprobe vdpa > > # modprobe vhost_vdpa > > # modprobe vp_vdpa > > > > # lsmod | grep -i vdpa > > vp_vdpa 20480 0 > > vhost_vdpa 32768 0 > > vhost 65536 1 vhost_vdpa > > vhost_iotlb 16384 2 vhost_vdpa,vhost > > vdpa 36864 2 vp_vdpa,vhost_vdpa > > irqbypass 12288 2 vhost_vdpa,kvm > > > > # lspci | grep -i ethernet > > 00:04.0 Ethernet controller: Red Hat, Inc. Virtio 1.0 network device (rev > > 01) > > > > # lspci -nn | grep 00:04.0 > > 00:04.0 Ethernet controller [0200]: Red Hat, Inc. Virtio 1.0 network > > device [1af4:1041] (rev 01) > > > > # echo 0000:00:04.0 > /sys/bus/pci/drivers/virtio-pci/unbind > > # echo 1af4 1041 > /sys/bus/pci/drivers/vp-vdpa/new_id > > > > # vdpa mgmtdev show > > > > pci/0000:00:04.0: > > supported_classes net < unknown class > > > max_supported_vqs 3 > > dev_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 > > GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN > > HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN CTRL_RX_EXTRA > > GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX > > VERSION_1 ACCESS_PLATFORM bit_40 bit_54 bit_55 bit_56 > > > > # vdpa dev add name vdpa0 mgmtdev pci/0000:00:04.0 > > # vdpa dev show -jp > > { > > > > "dev": { > > > > "vdpa0": { > > > > "type": "network", > > "mgmtdev": "pci/0000:00:04.0", > > "vendor_id": 6900, > > "max_vqs": 3, > > "max_vq_size": 256 > > > > } > > > > } > > > > } > > > > # ls -l /sys/bus/vdpa/devices/vdpa0/driver > > lrwxrwxrwx. 1 root root 0 Sep 11 17:07 /sys/bus/vdpa/devices/vdpa0/driver > > -> ../../../../bus/vdpa/drivers/vhost_vdpa > > > > # ls -l /dev/ |grep vdpa > > crw-------. 1 root root 239, 0 Sep 11 17:07 vhost-vdpa-0 > > > > # driverctl -b vdpa > > vdpa0 vhost_vdpa [*] > > > > Booting L2: > > > > # ./qemu/build/qemu-system-x86_64 \ > > -nographic \ > > -m 4G \ > > -enable-kvm \ > > -M q35 \ > > -drive file=//root/L2.qcow2,media=disk,if=virtio \ > > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0 \ > > -device > > virtio-net-pci,netdev=vhost-vdpa0,disable-legacy=on,disable-modern=off,ev > > ent_idx=off,packed=on,bus=pcie.0,addr=0x7 \ -smp 4 \ > > -cpu host \ > > 2>&1 | tee vm.log > > > > ==== In L2 ==== > > > > # cut -c 35 /sys/devices/pci0000:00/0000:00:07.0/virtio1/features > > 0 > > > > Based on what I have understood from the kernel's source, vhost-vdpa and > > vp-vdpa both support packed vqs and v6.6.46 is more recent than the > > minimum required kernel version. I am not sure where I am going wrong. > > > > > > However, I managed to set up scenario 4 successfully > > > > and I see that packed vq is enabled in this case. > > > > > > > > # cut -c 35 /sys/devices/pci0000:00/0000:00:04.0/virtio1/features > > > > 1 > > > > So far scenario 4 is the only scenario where the virtio-net device has the > > packed feature enabled. The difference between scenarios 3 and 4 in terms > > of the kernel modules loaded is that scenario 4 uses virtio_vdpa while > > scenario 3 uses vdpa and vhost_vdpa. > > We're pretty close then :). But I don't see anything obviously wrong here. > > I guess it is the same L1 VM in both cases, isn't it? Right, I am using the same L1.qcow2 image in both scenarios. > The function that gets the features from vhost-vdpa in QEMU is > hw/virtio/vhost-vdpa.c:vhost_vdpa_get_features. Can you check that it > returns bit 34 (offset starts with 0 here)? If it returns it, can you > keep debugging until you see what clears it? > > If it comes clear, then we need to check the kernel. Got it. I'll start debugging from here. > If you don't see anything wrong there, we can keep debugging the > function that handles getting the features through L2 guest PCI read, > hw/virtio/virtio-pci.c:virtio_pci_common_read. VIRTIO_PCI_COMMON_DF > case when proxy->dfselect==1. > > Let me know what you find. I'll do that. Thank you for the suggestions. Thanks, Sahil
Hi, I have a small update. On Monday, September 16, 2024 10:04:28 AM GMT+5:30 Sahil wrote: > On Thursday, September 12, 2024 3:24:27 PM GMT+5:30 Eugenio Perez Martin wrote: > [...] > > The function that gets the features from vhost-vdpa in QEMU is > > hw/virtio/vhost-vdpa.c:vhost_vdpa_get_features. Can you check that it > > returns bit 34 (offset starts with 0 here)? If it returns it, can you > > keep debugging until you see what clears it? > > > > If it comes clear, then we need to check the kernel. > > Got it. I'll start debugging from here. I am printing the value of "*features & (1ULL << 34)" in hw/virtio/vhost-vdpa.c:vhost_vdpa_get_features and I see it is 1. I guess that means the vhost device has the packed feature bit turned on in L1. I am also printing out the values of "host_features", "guest_features" and "backend_features" set in "VirtIODevice vdev" in hw/virtio/virtio-pci.c:virtio_pci_common_read under "case VIRTIO_PCI_COMMON_DF". I observed the following values: dev name: virtio-net host features: 0x10150bfffa7 guest features: 0x0 backend features: 0x10150bfffa7 The host features and backend features match but guest features is 0. Is this because the value of guest features has not been set yet or is it because the driver hasn't selected any of the features? I am not entirely sure but I think it's the former considering that the value of /sys/devices/pci0000:00/0000:00:07.0/virtio1/features is 0x10110afffa7. Please let me know if I am wrong. I found a few other issues as well. When I shut down the L2 VM, I get the following errors just after shutdown: qemu-system-x86_64: vhost VQ 0 ring restore failed: -1: Operation not permitted (1) qemu-system-x86_64: vhost VQ 1 ring restore failed: -1: Operation not permitted (1) qemu-system-x86_64: vhost VQ 2 ring restore failed: -1: Operation not permitted (1) This is printed in hw/virtio/vhost.c:vhost_virtqueue_stop. According to the comments, this is because the connection to the backend is broken. I booted L1 by running: $ ./qemu/build/qemu-system-x86_64 -enable-kvm \ -drive file=//home/valdaarhun/valdaarhun/qcow2_img/L1.qcow2,media=disk,if=virtio -net nic,model=virtio -net user,hostfwd=tcp::2222-:22 \ -device intel-iommu,snoop-control=on \ -device virtio-net-pci,netdev=net0,disable-legacy=on,disable-modern=off,iommu_platform=on,event_idx=off,packed=on,bus=pcie.0,addr=0x4 \ -netdev tap,id=net0,script=no,downscript=no \ -nographic \ -m 8G \ -smp 4 \ -M q35 \ -cpu host 2>&1 | tee vm.log And I booted L2 by running: # ./qemu/build/qemu-system-x86_64 \ -nographic \ -m 4G \ -enable-kvm \ -M q35 \ -drive file=//root/L2.qcow2,media=disk,if=virtio \ -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0 \ -device virtio-net-pci,netdev=vhost-vdpa0,disable-legacy=on,disable-modern=off,event_idx=off,bus=pcie.0,addr=0x7 \ -smp 4 \ -cpu host \ 2>&1 | tee vm.log Am I missing something here? When booting L2, I also noticed that the control flow does not enter the following "if" block in hw/virtio/vhost-vdpa.c:vhost_vdpa_init. if (dev->migration_blocker == NULL && !v->shadow_vqs_enabled) { ... vhost_svq_valid_features(features, &dev->migration_blocker); } So "vhost_svq_valid_features" is never called. According to the comments this is because the device was not started with x-svq=on. Could this be a result (or reason) of the broken connection to the backend? Is there a way to manually set this option? Thanks, Sahil
On Tue, Sep 24, 2024 at 7:31 AM Sahil <icegambit91@gmail.com> wrote: > > Hi, > > I have a small update. > > On Monday, September 16, 2024 10:04:28 AM GMT+5:30 Sahil wrote: > > On Thursday, September 12, 2024 3:24:27 PM GMT+5:30 Eugenio Perez Martin wrote: > > [...] > > > The function that gets the features from vhost-vdpa in QEMU is > > > hw/virtio/vhost-vdpa.c:vhost_vdpa_get_features. Can you check that it > > > returns bit 34 (offset starts with 0 here)? If it returns it, can you > > > keep debugging until you see what clears it? > > > > > > If it comes clear, then we need to check the kernel. > > > > Got it. I'll start debugging from here. > > I am printing the value of "*features & (1ULL << 34)" in > hw/virtio/vhost-vdpa.c:vhost_vdpa_get_features and I see it is 1. > I guess that means the vhost device has the packed feature bit > turned on in L1. > Not quite. That means the device exports the _F_PACKED feature bit. Now we need to check if the guest acks it. You can look to vhost_vdpa_set_features for that. > I am also printing out the values of "host_features", "guest_features" > and "backend_features" set in "VirtIODevice vdev" in > hw/virtio/virtio-pci.c:virtio_pci_common_read under "case > VIRTIO_PCI_COMMON_DF". I observed the following values: > > dev name: virtio-net > host features: 0x10150bfffa7 > guest features: 0x0 > backend features: 0x10150bfffa7 > > The host features and backend features match but guest features > is 0. Is this because the value of guest features has not been set yet > or is it because the driver hasn't selected any of the features? > > I am not entirely sure but I think it's the former considering that the > value of /sys/devices/pci0000:00/0000:00:07.0/virtio1/features is > 0x10110afffa7. Please let me know if I am wrong. > Right, it's the former. _DF is the guest selecting on what 32-bit half of the 64-bit features willread. You can check the virtio_pci_common_write function, VIRTIO_PCI_COMMON_GF case when proxy->gfselect == 1. But it should end in vhost_vdpa_set_features when the driver writes DRIVER_OK, so maybe that one is easier. > I found a few other issues as well. When I shut down the L2 VM, > I get the following errors just after shutdown: > > qemu-system-x86_64: vhost VQ 0 ring restore failed: -1: Operation not permitted (1) > qemu-system-x86_64: vhost VQ 1 ring restore failed: -1: Operation not permitted (1) > qemu-system-x86_64: vhost VQ 2 ring restore failed: -1: Operation not permitted (1) > > This is printed in hw/virtio/vhost.c:vhost_virtqueue_stop. According to the comments, > this is because the connection to the backend is broken. > Can you trace vhost_vdpa_get_vring_base to be sure? In particular, if dev->fd is valid. > I booted L1 by running: > > $ ./qemu/build/qemu-system-x86_64 -enable-kvm \ > -drive file=//home/valdaarhun/valdaarhun/qcow2_img/L1.qcow2,media=disk,if=virtio -net nic,model=virtio -net user,hostfwd=tcp::2222-:22 \ > -device intel-iommu,snoop-control=on \ > -device virtio-net-pci,netdev=net0,disable-legacy=on,disable-modern=off,iommu_platform=on,event_idx=off,packed=on,bus=pcie.0,addr=0x4 \ > -netdev tap,id=net0,script=no,downscript=no \ > -nographic \ > -m 8G \ > -smp 4 \ > -M q35 \ > -cpu host 2>&1 | tee vm.log > > And I booted L2 by running: > > # ./qemu/build/qemu-system-x86_64 \ > -nographic \ > -m 4G \ > -enable-kvm \ > -M q35 \ > -drive file=//root/L2.qcow2,media=disk,if=virtio \ > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0 \ > -device virtio-net-pci,netdev=vhost-vdpa0,disable-legacy=on,disable-modern=off,event_idx=off,bus=pcie.0,addr=0x7 \ > -smp 4 \ > -cpu host \ > 2>&1 | tee vm.log > > Am I missing something here? > > When booting L2, I also noticed that the control flow does not enter the > following "if" block in hw/virtio/vhost-vdpa.c:vhost_vdpa_init. > > if (dev->migration_blocker == NULL && !v->shadow_vqs_enabled) { > ... > vhost_svq_valid_features(features, &dev->migration_blocker); > } > > So "vhost_svq_valid_features" is never called. According to the comments > this is because the device was not started with x-svq=on. Could this be a > result (or reason) of the broken connection to the backend? Is there a way > to manually set this option? > Yes, you can set it in -netdev vhost-vdpa,x-svq=on.
Hi, On Tuesday, September 24, 2024 4:16:26 PM GMT+5:30 Eugenio Perez Martin wrote: > On Tue, Sep 24, 2024 at 7:31 AM Sahil <icegambit91@gmail.com> wrote: > > Hi, > > > > I have a small update. > > > > On Monday, September 16, 2024 10:04:28 AM GMT+5:30 Sahil wrote: > > > On Thursday, September 12, 2024 3:24:27 PM GMT+5:30 Eugenio Perez Martin > > > wrote: [...] > > > > > > > The function that gets the features from vhost-vdpa in QEMU is > > > > hw/virtio/vhost-vdpa.c:vhost_vdpa_get_features. Can you check that it > > > > returns bit 34 (offset starts with 0 here)? If it returns it, can you > > > > keep debugging until you see what clears it? > > > > > > > > If it comes clear, then we need to check the kernel. > > > > > > Got it. I'll start debugging from here. > > > > I am printing the value of "*features & (1ULL << 34)" in > > hw/virtio/vhost-vdpa.c:vhost_vdpa_get_features and I see it is 1. > > I guess that means the vhost device has the packed feature bit > > turned on in L1. > > Not quite. That means the device exports the _F_PACKED feature bit. > Now we need to check if the guest acks it. You can look to > vhost_vdpa_set_features for that. Thank you for your reply. I have understood this now. I realized that the guest was not acking the packed feature bit. > > I am also printing out the values of "host_features", "guest_features" > > and "backend_features" set in "VirtIODevice vdev" in > > hw/virtio/virtio-pci.c:virtio_pci_common_read under "case > > VIRTIO_PCI_COMMON_DF". I observed the following values: > > > > dev name: virtio-net > > host features: 0x10150bfffa7 > > guest features: 0x0 > > backend features: 0x10150bfffa7 > > > > The host features and backend features match but guest features > > is 0. Is this because the value of guest features has not been set yet > > or is it because the driver hasn't selected any of the features? > > > > I am not entirely sure but I think it's the former considering that the > > value of /sys/devices/pci0000:00/0000:00:07.0/virtio1/features is > > 0x10110afffa7. Please let me know if I am wrong. > > Right, it's the former. _DF is the guest selecting on what 32-bit half > of the 64-bit features willread. > > You can check the virtio_pci_common_write function, > VIRTIO_PCI_COMMON_GF case when proxy->gfselect == 1. But it should end > in vhost_vdpa_set_features when the driver writes DRIVER_OK, so maybe > that one is easier. > > > I found a few other issues as well. When I shut down the L2 VM, > > I get the following errors just after shutdown: > > > > qemu-system-x86_64: vhost VQ 0 ring restore failed: -1: Operation not > > permitted (1) qemu-system-x86_64: vhost VQ 1 ring restore failed: -1: > > Operation not permitted (1) qemu-system-x86_64: vhost VQ 2 ring restore > > failed: -1: Operation not permitted (1) > > > > This is printed in hw/virtio/vhost.c:vhost_virtqueue_stop. According to > > the comments, this is because the connection to the backend is broken. > > Can you trace vhost_vdpa_get_vring_base to be sure? In particular, if > dev->fd is valid. > > > I booted L1 by running: > > > > $ ./qemu/build/qemu-system-x86_64 -enable-kvm \ > > -drive > > file=//home/valdaarhun/valdaarhun/qcow2_img/L1.qcow2,media=disk,if=virtio > > -net nic,model=virtio -net user,hostfwd=tcp::2222-:22 \ -device > > intel-iommu,snoop-control=on \ > > -device > > virtio-net-pci,netdev=net0,disable-legacy=on,disable-modern=off,iommu_pla > > tform=on,event_idx=off,packed=on,bus=pcie.0,addr=0x4 \ -netdev > > tap,id=net0,script=no,downscript=no \ > > -nographic \ > > -m 8G \ > > -smp 4 \ > > -M q35 \ > > -cpu host 2>&1 | tee vm.log > > > > And I booted L2 by running: > > > > # ./qemu/build/qemu-system-x86_64 \ > > -nographic \ > > -m 4G \ > > -enable-kvm \ > > -M q35 \ > > -drive file=//root/L2.qcow2,media=disk,if=virtio \ > > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0 \ > > -device > > virtio-net-pci,netdev=vhost-vdpa0,disable-legacy=on,disable-modern=off,ev > > ent_idx=off,bus=pcie.0,addr=0x7 \ -smp 4 \ > > -cpu host \ > > 2>&1 | tee vm.log > > > > Am I missing something here? > > > > When booting L2, I also noticed that the control flow does not enter the > > following "if" block in hw/virtio/vhost-vdpa.c:vhost_vdpa_init. > > > > if (dev->migration_blocker == NULL && !v->shadow_vqs_enabled) { > > > > ... > > vhost_svq_valid_features(features, &dev->migration_blocker); > > > > } > > > > So "vhost_svq_valid_features" is never called. According to the comments > > this is because the device was not started with x-svq=on. Could this be a > > result (or reason) of the broken connection to the backend? Is there a way > > to manually set this option? > > Yes, you can set it in -netdev vhost-vdpa,x-svq=on. I haven't checked that dev->fd is valid yet. I started debugging with the x-svq=on option, and I realized I was booting the L2 VM without setting packed=on. On Tue, Sep 24, 2024 at 7:31 AM Sahil <icegambit91@gmail.com> wrote: > And I booted L2 by running: > > # ./qemu/build/qemu-system-x86_64 \ > -nographic \ > -m 4G \ > -enable-kvm \ > -M q35 \ > -drive file=//root/L2.qcow2,media=disk,if=virtio \ > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0 \ > -device virtio-net-pci,netdev=vhost-vdpa0,disable-legacy=on,disable-modern=off,event_idx=off,bus=pcie.0,addr=0x7 \ > -smp 4 \ > -cpu host \ > 2>&1 | tee vm.log > With packed=on in the device option, I see that the packed feature bit is set in L2 :) However, I see that vhost shadow virtqueues are still not being used. I am currently trying to find the reason behind this. I have narrowed down the issue to hw/virtio/vhost-vdpa.c [1]. The "vhost_vdpa_svqs_start" function is being called but in the loop, vhost_svq_start is never called. I think it might be because there's an issue with "vhost_vdpa_svq_setup". I'll send an update once I find something. Thanks, Sahil [1] https://github.com/qemu/qemu/blob/master/hw/virtio/vhost-vdpa.c#L1243
Hi, It's been a while since I gave my last update. I have one more update that I would like to give. > On Tue, Sep 24, 2024 at 7:31 AM Sahil <icegambit91@gmail.com> wrote: > > And I booted L2 by running: > > > > # ./qemu/build/qemu-system-x86_64 \ > > -nographic \ > > -m 4G \ > > -enable-kvm \ > > -M q35 \ > > -drive file=//root/L2.qcow2,media=disk,if=virtio \ > > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0 \ > > -device > > virtio-net-pci,netdev=vhost-vdpa0,disable-legacy=on,disable-modern=off,ev > > ent_idx=off,bus=pcie.0,addr=0x7 \ -smp 4 \ > > -cpu host \ > > 2>&1 | tee vm.log > > With packed=on in the device option, I see that the packed feature bit is > set in L2 :) > > However, I see that vhost shadow virtqueues are still not being used. I am > currently trying to find the reason behind this. I have narrowed down the > issue to hw/virtio/vhost-vdpa.c [1]. The "vhost_vdpa_svqs_start" function > is being called but in the loop, vhost_svq_start is never called. I think it > might be because there's an issue with "vhost_vdpa_svq_setup". > > I'll send an update once I find something. > > Thanks, > Sahil > > [1] https://github.com/qemu/qemu/blob/master/hw/virtio/vhost-vdpa.c#L1243 I spent some time tinkering with the L0-L1-L2 test environment setup, and understanding QEMU's hw/virtio/vhost-vdpa.c [1] as well as Linux's drivers/vhost/vdpa.c [2] and /drivers/vhost/vhost.c [3]. I don't think there is an issue with the environment itself. When I boot L2 with the following combinations of "x-svq" and "packed", this is what I observe: 1. x-svq=on and packed=off The virtio device in L2 has the packed feature bit turned off. Vhost shadow virtqueues are used as expected. 2. x-svq=off and packed=on The virtio device in L2 has the packed feature bit turned on. Vhost shadow virtqueues are not used. I don't see any issues in either of the above environment configurations. 3. x-svq=on and packed=on This is the configuration that I need for testing. The virtio device in L2 has the packed feature bit turned on. However, vhost shadow virtqueues are not being used. This is due to the VHOST_SET_VRING_BASE ioctl call returning a EOPNOTSUPP in hw/virtio/vhost-vdpa.c:vhost_vdpa_set_dev_vring_base() [4]. I spent some time going through the ioctl's implementation in Linux. I used ftrace to trace the functions that were being called in the kernel. With x-svq=on (regardless of whether split virtqueues are used or packed virtqueues), I got the following trace: [...] qemu-system-x86-1737 [001] ...1. 3613.371358: vhost_vdpa_unlocked_ioctl <-__x64_sys_ioctl qemu-system-x86-1737 [001] ...1. 3613.371358: vhost_vring_ioctl <-vhost_vdpa_unlocked_ioctl qemu-system-x86-1737 [001] ...1. 3613.371362: vp_vdpa_set_vq_state <-vhost_vdpa_unlocked_ioctl [...] There are 3 virtqueues that the vdpa device offers in L1. There were no issues when using split virtqueues and the trace shown above appears 3 times. With packed virtqueues, the first call to VHOST_SET_VRING_BASE fails because drivers/vdpa/virtio_pci/vp_vdpa.c:vp_vdpa_set_vq_state_packed [5] returns EOPNOTSUPP. The payload that VHOST_SET_VRING_BASE accepts depends on whether split virtqueues or packed virtqueues are used [6]. In hw/virtio/vhost- vdpa.c:vhost_vdpa_svq_setup() [7], the following payload is used which is not suitable for packed virtqueues: struct vhost_vring_state s = { .index = vq_index, }; Based on the implementation in the linux kernel, the payload needs to be as shown below for the ioctl to succeed for packed virtqueues: struct vhost_vring_state s = { .index = vq_index, .num = 0x80008000, }; After making these changes, it looks like QEMU is able to set up the virtqueues and shadow virtqueues are enabled as well. Unfortunately, before the L2 VM can finish booting the kernel crashes. The reason is that even though packed virtqueues are to be used, the kernel tries to run drivers/virtio/virtio_ring.c:virtqueue_get_buf_ctx_split() [8] (instead of virtqueue_get_buf_ctx_packed) and throws an "invalid vring head" error. I am still investigating this issue. I'll send an update once I resolve this issue. I'll also send a patch that crafts the payload correctly based on the format of the virtqueue in vhost_vdpa_svq_setup(). Thanks, Sahil [1] https://gitlab.com/qemu-project/qemu/-/blob/master/hw/virtio/vhost-vdpa.c [2] https://github.com/torvalds/linux/blob/master/drivers/vhost/vdpa.c [3] https://github.com/torvalds/linux/blob/master/drivers/vhost/vhost.c [4] https://gitlab.com/qemu-project/qemu/-/blob/master/hw/virtio/vhost-vdpa.c#L1002 [5] https://github.com/torvalds/linux/blob/master/drivers/vdpa/virtio_pci/vp_vdpa.c#L278 [6] https://qemu-project.gitlab.io/qemu/interop/vhost-user.html#front-end-message-types [7] https://gitlab.com/qemu-project/qemu/-/blob/master/hw/virtio/vhost-vdpa.c#L1223 [8] https://github.com/torvalds/linux/blob/master/drivers/virtio/virtio_ring.c#L823
On Mon, Oct 28, 2024 at 6:38 AM Sahil Siddiq <icegambit91@gmail.com> wrote: > > Hi, > > It's been a while since I gave my last update. I have one more update > that I would like to give. > > > On Tue, Sep 24, 2024 at 7:31 AM Sahil <icegambit91@gmail.com> wrote: > > > And I booted L2 by running: > > > > > > # ./qemu/build/qemu-system-x86_64 \ > > > -nographic \ > > > -m 4G \ > > > -enable-kvm \ > > > -M q35 \ > > > -drive file=//root/L2.qcow2,media=disk,if=virtio \ > > > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0 \ > > > -device > > > virtio-net-pci,netdev=vhost-vdpa0,disable-legacy=on,disable-modern=off,ev > > > ent_idx=off,bus=pcie.0,addr=0x7 \ -smp 4 \ > > > -cpu host \ > > > 2>&1 | tee vm.log > > > > With packed=on in the device option, I see that the packed feature bit is > > set in L2 :) > > > > However, I see that vhost shadow virtqueues are still not being used. I am > > currently trying to find the reason behind this. I have narrowed down the > > issue to hw/virtio/vhost-vdpa.c [1]. The "vhost_vdpa_svqs_start" function > > is being called but in the loop, vhost_svq_start is never called. I think it > > might be because there's an issue with "vhost_vdpa_svq_setup". > > > > I'll send an update once I find something. > > > > Thanks, > > Sahil > > > > [1] https://github.com/qemu/qemu/blob/master/hw/virtio/vhost-vdpa.c#L1243 > > I spent some time tinkering with the L0-L1-L2 test environment setup, > and understanding QEMU's hw/virtio/vhost-vdpa.c [1] as well as Linux's > drivers/vhost/vdpa.c [2] and /drivers/vhost/vhost.c [3]. I don't think there > is an issue with the environment itself. > > When I boot L2 with the following combinations of "x-svq" and > "packed", this is what I observe: > > 1. x-svq=on and packed=off > > The virtio device in L2 has the packed feature bit turned off. Vhost > shadow virtqueues are used as expected. > > 2. x-svq=off and packed=on > > The virtio device in L2 has the packed feature bit turned on. Vhost > shadow virtqueues are not used. > > I don't see any issues in either of the above environment > configurations. > > 3. x-svq=on and packed=on > > This is the configuration that I need for testing. The virtio device in > L2 has the packed feature bit turned on. However, vhost shadow > virtqueues are not being used. This is due to the > VHOST_SET_VRING_BASE ioctl call returning a EOPNOTSUPP in > hw/virtio/vhost-vdpa.c:vhost_vdpa_set_dev_vring_base() [4]. > > I spent some time going through the ioctl's implementation in Linux. > I used ftrace to trace the functions that were being called in the kernel. > With x-svq=on (regardless of whether split virtqueues are used or packed > virtqueues), I got the following trace: > > [...] > qemu-system-x86-1737 [001] ...1. 3613.371358: > vhost_vdpa_unlocked_ioctl <-__x64_sys_ioctl > qemu-system-x86-1737 [001] ...1. 3613.371358: vhost_vring_ioctl > <-vhost_vdpa_unlocked_ioctl > qemu-system-x86-1737 [001] ...1. 3613.371362: > vp_vdpa_set_vq_state <-vhost_vdpa_unlocked_ioctl > [...] > > There are 3 virtqueues that the vdpa device offers in L1. There were no > issues when using split virtqueues and the trace shown above appears > 3 times. With packed virtqueues, the first call to VHOST_SET_VRING_BASE > fails because drivers/vdpa/virtio_pci/vp_vdpa.c:vp_vdpa_set_vq_state_packed > [5] returns EOPNOTSUPP. > > The payload that VHOST_SET_VRING_BASE accepts depends on whether > split virtqueues or packed virtqueues are used [6]. In hw/virtio/vhost- > vdpa.c:vhost_vdpa_svq_setup() [7], the following payload is used which is > not suitable for packed virtqueues: > > struct vhost_vring_state s = { > .index = vq_index, > }; > > Based on the implementation in the linux kernel, the payload needs to > be as shown below for the ioctl to succeed for packed virtqueues: > > struct vhost_vring_state s = { > .index = vq_index, > .num = 0x80008000, > }; > Wow, that's a great analysis, very good catch! > After making these changes, it looks like QEMU is able to set up the > virtqueues > and shadow virtqueues are enabled as well. > > Unfortunately, before the L2 VM can finish booting the kernel crashes. > The reason is that even though packed virtqueues are to be used, the > kernel tries to run > drivers/virtio/virtio_ring.c:virtqueue_get_buf_ctx_split() [8] > (instead of virtqueue_get_buf_ctx_packed) and throws an "invalid vring > head" error. I am still investigating this issue. > That's interesting. It's been a while since I haven't tested that code, maybe you also discovered a regression here :). > I'll send an update once I resolve this issue. I'll also send a patch that > crafts the payload correctly based on the format of the virtqueue in > vhost_vdpa_svq_setup(). > The QEMU's vhost_vdpa_svq_setup is a valid patch so if you have the bandwith please send it ASAP and we move it forward :). Thanks! > Thanks, > Sahil > > [1] https://gitlab.com/qemu-project/qemu/-/blob/master/hw/virtio/vhost-vdpa.c > [2] https://github.com/torvalds/linux/blob/master/drivers/vhost/vdpa.c > [3] https://github.com/torvalds/linux/blob/master/drivers/vhost/vhost.c > [4] https://gitlab.com/qemu-project/qemu/-/blob/master/hw/virtio/vhost-vdpa.c#L1002 > [5] https://github.com/torvalds/linux/blob/master/drivers/vdpa/virtio_pci/vp_vdpa.c#L278 > [6] https://qemu-project.gitlab.io/qemu/interop/vhost-user.html#front-end-message-types > [7] https://gitlab.com/qemu-project/qemu/-/blob/master/hw/virtio/vhost-vdpa.c#L1223 > [8] https://github.com/torvalds/linux/blob/master/drivers/virtio/virtio_ring.c#L823 >
Hi, Thank you for your reply. On 10/28/24 1:40 PM, Eugenio Perez Martin wrote: > On Mon, Oct 28, 2024 at 6:38 AM Sahil Siddiq <icegambit91@gmail.com> wrote: >> [...] >> I spent some time tinkering with the L0-L1-L2 test environment setup, >> and understanding QEMU's hw/virtio/vhost-vdpa.c [1] as well as Linux's >> drivers/vhost/vdpa.c [2] and /drivers/vhost/vhost.c [3]. I don't think there >> is an issue with the environment itself. >> >> When I boot L2 with the following combinations of "x-svq" and >> "packed", this is what I observe: >> >> 1. x-svq=on and packed=off >> >> The virtio device in L2 has the packed feature bit turned off. Vhost >> shadow virtqueues are used as expected. >> >> 2. x-svq=off and packed=on >> >> The virtio device in L2 has the packed feature bit turned on. Vhost >> shadow virtqueues are not used. >> >> I don't see any issues in either of the above environment >> configurations. >> >> 3. x-svq=on and packed=on >> >> This is the configuration that I need for testing. The virtio device in >> L2 has the packed feature bit turned on. However, vhost shadow >> virtqueues are not being used. This is due to the >> VHOST_SET_VRING_BASE ioctl call returning a EOPNOTSUPP in >> hw/virtio/vhost-vdpa.c:vhost_vdpa_set_dev_vring_base() [4]. >> >> I spent some time going through the ioctl's implementation in Linux. >> I used ftrace to trace the functions that were being called in the kernel. >> With x-svq=on (regardless of whether split virtqueues are used or packed >> virtqueues), I got the following trace: >> >> [...] >> qemu-system-x86-1737 [001] ...1. 3613.371358: >> vhost_vdpa_unlocked_ioctl <-__x64_sys_ioctl >> qemu-system-x86-1737 [001] ...1. 3613.371358: vhost_vring_ioctl >> <-vhost_vdpa_unlocked_ioctl >> qemu-system-x86-1737 [001] ...1. 3613.371362: >> vp_vdpa_set_vq_state <-vhost_vdpa_unlocked_ioctl >> [...] >> >> There are 3 virtqueues that the vdpa device offers in L1. There were no >> issues when using split virtqueues and the trace shown above appears >> 3 times. With packed virtqueues, the first call to VHOST_SET_VRING_BASE >> fails because drivers/vdpa/virtio_pci/vp_vdpa.c:vp_vdpa_set_vq_state_packed >> [5] returns EOPNOTSUPP. >> >> The payload that VHOST_SET_VRING_BASE accepts depends on whether >> split virtqueues or packed virtqueues are used [6]. In hw/virtio/vhost- >> vdpa.c:vhost_vdpa_svq_setup() [7], the following payload is used which is >> not suitable for packed virtqueues: >> >> struct vhost_vring_state s = { >> .index = vq_index, >> }; >> >> Based on the implementation in the linux kernel, the payload needs to >> be as shown below for the ioctl to succeed for packed virtqueues: >> >> struct vhost_vring_state s = { >> .index = vq_index, >> .num = 0x80008000, >> }; >> > > Wow, that's a great analysis, very good catch! > > [...] >> I'll send an update once I resolve this issue. I'll also send a patch that >> crafts the payload correctly based on the format of the virtqueue in >> vhost_vdpa_svq_setup(). >> > > The QEMU's vhost_vdpa_svq_setup is a valid patch so if you have the > bandwith please send it ASAP and we move it forward :). > Sure thing. I'll do that while debugging the kernel in parallel. Thanks, Sahil
Hi, On 10/28/24 11:07 AM, Sahil Siddiq wrote: > [...] > The payload that VHOST_SET_VRING_BASE accepts depends on whether > split virtqueues or packed virtqueues are used [6]. In hw/virtio/vhost- > vdpa.c:vhost_vdpa_svq_setup() [7], the following payload is used which is > not suitable for packed virtqueues: > > struct vhost_vring_state s = { > .index = vq_index, > }; > > Based on the implementation in the linux kernel, the payload needs to > be as shown below for the ioctl to succeed for packed virtqueues: > > struct vhost_vring_state s = { > .index = vq_index, > .num = 0x80008000, > }; > > After making these changes, it looks like QEMU is able to set up the > virtqueues and shadow virtqueues are enabled as well. > > Unfortunately, before the L2 VM can finish booting the kernel crashes. > The reason is that even though packed virtqueues are to be used, the > kernel tries to run > drivers/virtio/virtio_ring.c:virtqueue_get_buf_ctx_split() [8] > (instead of virtqueue_get_buf_ctx_packed) and throws an "invalid vring > head" error. I am still investigating this issue. I made a mistake here. "virtqueue_get_buf_ctx_packed" [1] in the linux kernel also throws the same error. I think the issue might be because hw/virtio/vhost-vdpa.c:vhost_vdpa_svq_map_rings [2] does not handle mapping packed virtqueues at the moment. Probably because of this, vq->packed.desc_state[id].data [1] is NULL in the kernel. Regarding one of the earlier reviews in the same thread [3]: On 8/7/24 9:52 PM, Eugenio Perez Martin wrote: > On Fri, Aug 2, 2024 at 1:22 PM Sahil Siddiq <icegambit91@gmail.com> wrote: >> >> Allocate memory for the packed vq format and support >> packed vq in the SVQ "start" and "stop" operations. >> >> Signed-off-by: Sahil Siddiq <sahilcdq@proton.me> >> --- >> [...] >> >> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c >> index 4c308ee53d..f4285db2b4 100644 >> --- a/hw/virtio/vhost-shadow-virtqueue.c >> +++ b/hw/virtio/vhost-shadow-virtqueue.c >> [...] >> @@ -672,6 +674,16 @@ size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq) >> return ROUND_UP(used_size, qemu_real_host_page_size()); >> } >> >> +size_t vhost_svq_memory_packed(const VhostShadowVirtqueue *svq) >> +{ >> + size_t desc_size = sizeof(struct vring_packed_desc) * svq->num_free; >> + size_t driver_event_suppression = sizeof(struct vring_packed_desc_event); >> + size_t device_event_suppression = sizeof(struct vring_packed_desc_event); >> + >> + return ROUND_UP(desc_size + driver_event_suppression + device_event_suppression, >> + qemu_real_host_page_size()); >> +} >> + >> /** >> * Set a new file descriptor for the guest to kick the SVQ and notify for avail >> * >> @@ -726,17 +738,30 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, >> >> svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); >> svq->num_free = svq->vring.num; >> - svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), >> - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, >> - -1, 0); >> - desc_size = sizeof(vring_desc_t) * svq->vring.num; >> - svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); >> - svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), >> - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, >> - -1, 0); >> - svq->desc_state = g_new0(SVQDescState, svq->vring.num); >> - svq->desc_next = g_new0(uint16_t, svq->vring.num); >> - for (unsigned i = 0; i < svq->vring.num - 1; i++) { >> + svq->is_packed = virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED); >> + >> + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED)) { >> + svq->vring_packed.vring.desc = mmap(NULL, vhost_svq_memory_packed(svq), >> + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, >> + -1, 0); >> + desc_size = sizeof(struct vring_packed_desc) * svq->vring.num; >> + svq->vring_packed.vring.driver = (void *)((char *)svq->vring_packed.vring.desc + desc_size); >> + svq->vring_packed.vring.device = (void *)((char *)svq->vring_packed.vring.driver + >> + sizeof(struct vring_packed_desc_event)); > > This is a great start but it will be problematic when you start > mapping the areas to the vdpa device. The driver area should be read > only for the device, but it is placed in the same page as a RW one. > > More on this later. > >> + } else { >> + svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), >> + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, >> + -1, 0); >> + desc_size = sizeof(vring_desc_t) * svq->vring.num; >> + svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); >> + svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), >> + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, >> + -1, 0); >> + } > > I think it will be beneficial to avoid "if (packed)" conditionals on > the exposed functions that give information about the memory maps. > These need to be replicated at > hw/virtio/vhost-vdpa.c:vhost_vdpa_svq_map_rings. Based on what I have understood, I'll need to have an if(packed) condition in vhost_vdpa_svq_map_rings() because the mappings will differ in the packed and split cases. > However, the current one depends on the driver area to live in the > same page as the descriptor area, so it is not suitable for this. Right, for the split case, svq->vring.desc and svq->vring.avail are mapped to the same page. svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); desc_size = sizeof(vring_desc_t) * svq->vring.num; svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); vhost_svq_driver_area_size() encompasses the descriptor area and avail ring. > So what about this action plan: > 1) Make the avail ring (or driver area) independent of the descriptor ring. > 2) Return the mapping permissions of the descriptor area (not needed > here, but needed for hw/virtio/vhost-vdpa.c:vhost_vdpa_svq_map_rings Does point 1 refer to mapping the descriptor and avail rings to separate pages for both split and packed cases. I am not sure if my understanding is correct. I believe, however, that this approach will make it easier to map the rings in the vdpa device. It might also help in removing the if(packed) condition in vhost_svq_start(). Thanks, Sahil [1] https://github.com/torvalds/linux/blob/master/drivers/virtio/virtio_ring.c#L1708 [2] https://gitlab.com/qemu-project/qemu/-/blob/master/hw/virtio/vhost-vdpa.c#L1178 [3] https://lists.nongnu.org/archive/html/qemu-devel/2024-08/msg01145.html
On Wed, Nov 13, 2024 at 6:11 AM Sahil Siddiq <icegambit91@gmail.com> wrote: > > Hi, > > On 10/28/24 11:07 AM, Sahil Siddiq wrote: > > [...] > > The payload that VHOST_SET_VRING_BASE accepts depends on whether > > split virtqueues or packed virtqueues are used [6]. In hw/virtio/vhost- > > vdpa.c:vhost_vdpa_svq_setup() [7], the following payload is used which is > > not suitable for packed virtqueues: > > > > struct vhost_vring_state s = { > > .index = vq_index, > > }; > > > > Based on the implementation in the linux kernel, the payload needs to > > be as shown below for the ioctl to succeed for packed virtqueues: > > > > struct vhost_vring_state s = { > > .index = vq_index, > > .num = 0x80008000, > > }; > > > > After making these changes, it looks like QEMU is able to set up the > > virtqueues and shadow virtqueues are enabled as well. > > > > Unfortunately, before the L2 VM can finish booting the kernel crashes. > > The reason is that even though packed virtqueues are to be used, the > > kernel tries to run > > drivers/virtio/virtio_ring.c:virtqueue_get_buf_ctx_split() [8] > > (instead of virtqueue_get_buf_ctx_packed) and throws an "invalid vring > > head" error. I am still investigating this issue. > > I made a mistake here. "virtqueue_get_buf_ctx_packed" [1] in the linux > kernel also throws the same error. I think the issue might be because > hw/virtio/vhost-vdpa.c:vhost_vdpa_svq_map_rings [2] does not handle > mapping packed virtqueues at the moment. > > Probably because of this, vq->packed.desc_state[id].data [1] is NULL in the > kernel. > > Regarding one of the earlier reviews in the same thread [3]: > I think it is a good first step, yes. Looking forward to your findings! > On 8/7/24 9:52 PM, Eugenio Perez Martin wrote: > > On Fri, Aug 2, 2024 at 1:22 PM Sahil Siddiq <icegambit91@gmail.com> wrote: > >> > >> Allocate memory for the packed vq format and support > >> packed vq in the SVQ "start" and "stop" operations. > >> > >> Signed-off-by: Sahil Siddiq <sahilcdq@proton.me> > >> --- > >> [...] > >> > >> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c > >> index 4c308ee53d..f4285db2b4 100644 > >> --- a/hw/virtio/vhost-shadow-virtqueue.c > >> +++ b/hw/virtio/vhost-shadow-virtqueue.c > >> [...] > >> @@ -672,6 +674,16 @@ size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq) > >> return ROUND_UP(used_size, qemu_real_host_page_size()); > >> } > >> > >> +size_t vhost_svq_memory_packed(const VhostShadowVirtqueue *svq) > >> +{ > >> + size_t desc_size = sizeof(struct vring_packed_desc) * svq->num_free; > >> + size_t driver_event_suppression = sizeof(struct vring_packed_desc_event); > >> + size_t device_event_suppression = sizeof(struct vring_packed_desc_event); > >> + > >> + return ROUND_UP(desc_size + driver_event_suppression + device_event_suppression, > >> + qemu_real_host_page_size()); > >> +} > >> + > >> /** > >> * Set a new file descriptor for the guest to kick the SVQ and notify for avail > >> * > >> @@ -726,17 +738,30 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, > >> > >> svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); > >> svq->num_free = svq->vring.num; > >> - svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), > >> - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > >> - -1, 0); > >> - desc_size = sizeof(vring_desc_t) * svq->vring.num; > >> - svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); > >> - svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), > >> - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > >> - -1, 0); > >> - svq->desc_state = g_new0(SVQDescState, svq->vring.num); > >> - svq->desc_next = g_new0(uint16_t, svq->vring.num); > >> - for (unsigned i = 0; i < svq->vring.num - 1; i++) { > >> + svq->is_packed = virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED); > >> + > >> + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED)) { > >> + svq->vring_packed.vring.desc = mmap(NULL, vhost_svq_memory_packed(svq), > >> + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > >> + -1, 0); > >> + desc_size = sizeof(struct vring_packed_desc) * svq->vring.num; > >> + svq->vring_packed.vring.driver = (void *)((char *)svq->vring_packed.vring.desc + desc_size); > >> + svq->vring_packed.vring.device = (void *)((char *)svq->vring_packed.vring.driver + > >> + sizeof(struct vring_packed_desc_event)); > > > > This is a great start but it will be problematic when you start > > mapping the areas to the vdpa device. The driver area should be read > > only for the device, but it is placed in the same page as a RW one. > > > > More on this later. > > > >> + } else { > >> + svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), > >> + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > >> + -1, 0); > >> + desc_size = sizeof(vring_desc_t) * svq->vring.num; > >> + svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); > >> + svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), > >> + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > >> + -1, 0); > >> + } > > > > I think it will be beneficial to avoid "if (packed)" conditionals on > > the exposed functions that give information about the memory maps. > > These need to be replicated at > > hw/virtio/vhost-vdpa.c:vhost_vdpa_svq_map_rings. > > Based on what I have understood, I'll need to have an if(packed) > condition in vhost_vdpa_svq_map_rings() because the mappings will > differ in the packed and split cases. > > > However, the current one depends on the driver area to live in the > > same page as the descriptor area, so it is not suitable for this. > > Right, for the split case, svq->vring.desc and svq->vring.avail are > mapped to the same page. > > svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), > PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, > -1, 0); > desc_size = sizeof(vring_desc_t) * svq->vring.num; > svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); > > vhost_svq_driver_area_size() encompasses the descriptor area and avail ring. > > > So what about this action plan: > > 1) Make the avail ring (or driver area) independent of the descriptor ring. > > 2) Return the mapping permissions of the descriptor area (not needed > > here, but needed for hw/virtio/vhost-vdpa.c:vhost_vdpa_svq_map_rings > > Does point 1 refer to mapping the descriptor and avail rings to separate > pages for both split and packed cases. I am not sure if my understanding > is correct. > > I believe, however, that this approach will make it easier to map the > rings in the vdpa device. It might also help in removing the if(packed) > condition in vhost_svq_start(). > That's right, you understood it perfectly :). It's not a big deal but it makes the code simpler in my opinion. Thanks!
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 4c308ee53d..f4285db2b4 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -645,6 +645,8 @@ void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd) /** * Get the shadow vq vring address. + * This is used irrespective of whether the + * split or packed vq format is used. * @svq: Shadow virtqueue * @addr: Destination to store address */ @@ -672,6 +674,16 @@ size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq) return ROUND_UP(used_size, qemu_real_host_page_size()); } +size_t vhost_svq_memory_packed(const VhostShadowVirtqueue *svq) +{ + size_t desc_size = sizeof(struct vring_packed_desc) * svq->num_free; + size_t driver_event_suppression = sizeof(struct vring_packed_desc_event); + size_t device_event_suppression = sizeof(struct vring_packed_desc_event); + + return ROUND_UP(desc_size + driver_event_suppression + device_event_suppression, + qemu_real_host_page_size()); +} + /** * Set a new file descriptor for the guest to kick the SVQ and notify for avail * @@ -726,17 +738,30 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); svq->num_free = svq->vring.num; - svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, - -1, 0); - desc_size = sizeof(vring_desc_t) * svq->vring.num; - svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); - svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, - -1, 0); - svq->desc_state = g_new0(SVQDescState, svq->vring.num); - svq->desc_next = g_new0(uint16_t, svq->vring.num); - for (unsigned i = 0; i < svq->vring.num - 1; i++) { + svq->is_packed = virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED); + + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED)) { + svq->vring_packed.vring.desc = mmap(NULL, vhost_svq_memory_packed(svq), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, + -1, 0); + desc_size = sizeof(struct vring_packed_desc) * svq->vring.num; + svq->vring_packed.vring.driver = (void *)((char *)svq->vring_packed.vring.desc + desc_size); + svq->vring_packed.vring.device = (void *)((char *)svq->vring_packed.vring.driver + + sizeof(struct vring_packed_desc_event)); + } else { + svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, + -1, 0); + desc_size = sizeof(vring_desc_t) * svq->vring.num; + svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); + svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, + -1, 0); + } + + svq->desc_state = g_new0(SVQDescState, svq->num_free); + svq->desc_next = g_new0(uint16_t, svq->num_free); + for (unsigned i = 0; i < svq->num_free - 1; i++) { svq->desc_next[i] = cpu_to_le16(i + 1); } } @@ -776,8 +801,13 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) svq->vq = NULL; g_free(svq->desc_next); g_free(svq->desc_state); - munmap(svq->vring.desc, vhost_svq_driver_area_size(svq)); - munmap(svq->vring.used, vhost_svq_device_area_size(svq)); + + if (svq->is_packed) { + munmap(svq->vring_packed.vring.desc, vhost_svq_memory_packed(svq)); + } else { + munmap(svq->vring.desc, vhost_svq_driver_area_size(svq)); + munmap(svq->vring.used, vhost_svq_device_area_size(svq)); + } event_notifier_set_handler(&svq->hdev_call, NULL); } diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h index ee1a87f523..03b722a186 100644 --- a/hw/virtio/vhost-shadow-virtqueue.h +++ b/hw/virtio/vhost-shadow-virtqueue.h @@ -67,6 +67,9 @@ struct vring_packed { /* Shadow virtqueue to relay notifications */ typedef struct VhostShadowVirtqueue { + /* True if packed virtqueue */ + bool is_packed; + /* Virtio queue shadowing */ VirtQueue *vq; @@ -150,6 +153,7 @@ void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, struct vhost_vring_addr *addr); size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq); size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq); +size_t vhost_svq_memory_packed(const VhostShadowVirtqueue *svq); void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, VirtQueue *vq, VhostIOVATree *iova_tree);
Allocate memory for the packed vq format and support packed vq in the SVQ "start" and "stop" operations. Signed-off-by: Sahil Siddiq <sahilcdq@proton.me> --- Changes v2 -> v3: * vhost-shadow-virtqueue.c (vhost_svq_memory_packed): New function (vhost_svq_start): - Remove common variables out of if-else branch. (vhost_svq_stop): - Add support for packed vq. (vhost_svq_get_vring_addr): Revert changes (vhost_svq_get_vring_addr_packed): Likwise. * vhost-shadow-virtqueue.h - Revert changes made to "vhost_svq_get_vring_addr*" functions. * vhost-vdpa.c: Revert changes. hw/virtio/vhost-shadow-virtqueue.c | 56 +++++++++++++++++++++++------- hw/virtio/vhost-shadow-virtqueue.h | 4 +++ 2 files changed, 47 insertions(+), 13 deletions(-)