diff mbox series

[v2,6/7] vdpa: move iova_tree allocation to net_vhost_vdpa_init

Message ID 20240201180924.487579-7-eperezma@redhat.com (mailing list archive)
State New, archived
Headers show
Series Move memory listener register to vhost_vdpa_init | expand

Commit Message

Eugenio Perez Martin Feb. 1, 2024, 6:09 p.m. UTC
As we are moving to keep the mapping through all the vdpa device life
instead of resetting it at VirtIO reset, we need to move all its
dependencies to the initialization too.  In particular devices with
x-svq=on need a valid iova_tree from the beginning.

Simplify the code also consolidating the two creation points: the first
data vq in case of SVQ active and CVQ start in case only CVQ uses it.

Suggested-by: Si-Wei Liu <si-wei.liu@oracle.com>
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
 include/hw/virtio/vhost-vdpa.h | 16 ++++++++++++++-
 net/vhost-vdpa.c               | 36 +++-------------------------------
 2 files changed, 18 insertions(+), 34 deletions(-)

Comments

Si-Wei Liu Feb. 6, 2024, 1:10 a.m. UTC | #1
Hi Eugenio,

I thought this new code looks good to me and the original issue I saw 
with x-svq=on should be gone. However, after rebase my tree on top of 
this, there's a new failure I found around setting up guest mappings at 
early boot, please see attached the specific QEMU config and 
corresponding event traces. Haven't checked into the detail yet, 
thinking you would need to be aware of ahead.

Regards,
-Siwei

On 2/1/2024 10:09 AM, Eugenio Pérez wrote:
> As we are moving to keep the mapping through all the vdpa device life
> instead of resetting it at VirtIO reset, we need to move all its
> dependencies to the initialization too.  In particular devices with
> x-svq=on need a valid iova_tree from the beginning.
>
> Simplify the code also consolidating the two creation points: the first
> data vq in case of SVQ active and CVQ start in case only CVQ uses it.
>
> Suggested-by: Si-Wei Liu <si-wei.liu@oracle.com>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
>   include/hw/virtio/vhost-vdpa.h | 16 ++++++++++++++-
>   net/vhost-vdpa.c               | 36 +++-------------------------------
>   2 files changed, 18 insertions(+), 34 deletions(-)
>
> diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> index 03ed2f2be3..ad754eb803 100644
> --- a/include/hw/virtio/vhost-vdpa.h
> +++ b/include/hw/virtio/vhost-vdpa.h
> @@ -37,7 +37,21 @@ typedef struct vhost_vdpa_shared {
>       struct vhost_vdpa_iova_range iova_range;
>       QLIST_HEAD(, vdpa_iommu) iommu_list;
>   
> -    /* IOVA mapping used by the Shadow Virtqueue */
> +    /*
> +     * IOVA mapping used by the Shadow Virtqueue
> +     *
> +     * It is shared among all ASID for simplicity, whether CVQ shares ASID with
> +     * guest or not:
> +     * - Memory listener need access to guest's memory addresses allocated in
> +     *   the IOVA tree.
> +     * - There should be plenty of IOVA address space for both ASID not to
> +     *   worry about collisions between them.  Guest's translations are still
> +     *   validated with virtio virtqueue_pop so there is no risk for the guest
> +     *   to access memory that it shouldn't.
> +     *
> +     * To allocate a iova tree per ASID is doable but it complicates the code
> +     * and it is not worth it for the moment.
> +     */
>       VhostIOVATree *iova_tree;
>   
>       /* Copy of backend features */
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index cc589dd148..57edcf34d0 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -232,6 +232,7 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
>           return;
>       }
>       qemu_close(s->vhost_vdpa.shared->device_fd);
> +    g_clear_pointer(&s->vhost_vdpa.shared->iova_tree, vhost_iova_tree_delete);
>       g_free(s->vhost_vdpa.shared);
>   }
>   
> @@ -329,16 +330,8 @@ static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
>   
>   static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
>   {
> -    struct vhost_vdpa *v = &s->vhost_vdpa;
> -
>       migration_add_notifier(&s->migration_state,
>                              vdpa_net_migration_state_notifier);
> -
> -    /* iova_tree may be initialized by vhost_vdpa_net_load_setup */
> -    if (v->shadow_vqs_enabled && !v->shared->iova_tree) {
> -        v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first,
> -                                                   v->shared->iova_range.last);
> -    }
>   }
>   
>   static int vhost_vdpa_net_data_start(NetClientState *nc)
> @@ -383,19 +376,12 @@ static int vhost_vdpa_net_data_load(NetClientState *nc)
>   static void vhost_vdpa_net_client_stop(NetClientState *nc)
>   {
>       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> -    struct vhost_dev *dev;
>   
>       assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>   
>       if (s->vhost_vdpa.index == 0) {
>           migration_remove_notifier(&s->migration_state);
>       }
> -
> -    dev = s->vhost_vdpa.dev;
> -    if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
> -        g_clear_pointer(&s->vhost_vdpa.shared->iova_tree,
> -                        vhost_iova_tree_delete);
> -    }
>   }
>   
>   static NetClientInfo net_vhost_vdpa_info = {
> @@ -557,24 +543,6 @@ out:
>           return 0;
>       }
>   
> -    /*
> -     * If other vhost_vdpa already have an iova_tree, reuse it for simplicity,
> -     * whether CVQ shares ASID with guest or not, because:
> -     * - Memory listener need access to guest's memory addresses allocated in
> -     *   the IOVA tree.
> -     * - There should be plenty of IOVA address space for both ASID not to
> -     *   worry about collisions between them.  Guest's translations are still
> -     *   validated with virtio virtqueue_pop so there is no risk for the guest
> -     *   to access memory that it shouldn't.
> -     *
> -     * To allocate a iova tree per ASID is doable but it complicates the code
> -     * and it is not worth it for the moment.
> -     */
> -    if (!v->shared->iova_tree) {
> -        v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first,
> -                                                   v->shared->iova_range.last);
> -    }
> -
>       r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
>                                  vhost_vdpa_net_cvq_cmd_page_len(), false);
>       if (unlikely(r < 0)) {
> @@ -1674,6 +1642,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
>           s->vhost_vdpa.shared->device_fd = vdpa_device_fd;
>           s->vhost_vdpa.shared->iova_range = iova_range;
>           s->vhost_vdpa.shared->shadow_data = svq;
> +        s->vhost_vdpa.shared->iova_tree = vhost_iova_tree_new(iova_range.first,
> +                                                              iova_range.last);
>       } else if (!is_datapath) {
>           s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
>                                        PROT_READ | PROT_WRITE,
+ qemu-system-x86_64 -no-user-config -nodefaults -machine type=pc-q35-8.0,dump-guest-core=off,accel=kvm,kernel_irqchip=split -cpu EPYC -smp 8,sockets=1,cores=8,threads=1 -m size=128G,slots=8,maxmem=256G -qmp unix:/tmp/q,server,nowait -serial mon:stdio -display vnc=:37 -drive file=/usr/share/OVMF/OVMF_CODE.pure-efi.fd,index=0,if=pflash,format=raw,readonly=on -drive file=/usr/share/OVMF/OVMF_VARS.pure-efi.fd,index=1,if=pflash,format=raw -drive id=disk0,if=none,format=qcow2,file=/root/vdpa/tools/images/VM-el7.9-uefi-x86_64.qcow2 -device virtio-blk-pci,drive=disk0 -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0,x-svq=on -device virtio-net-pci,netdev=vhost-vdpa0,id=vdpa0,bootindex=-1,page-per-vq=on,mac=e4:11:c6:d3:45:f0,ctrl_vq=on,mq=on,ctrl_vlan=off,vectors=6,host_mtu=9000,disable-legacy=on -msg timestamp=on --trace events=/tmp/e
55674@1706854838.133877:vhost_vdpa_init dev: 0x556d459e6e00, common dev: 0x556d45c75140 vdpa: 0x7f991c860190
55674@1706854838.133901:vhost_vdpa_add_status dev: 0x556d459e6e00 status: 0x3
55674@1706854838.133906:vhost_vdpa_set_owner dev: 0x556d459e6e00
55674@1706854838.133922:vhost_vdpa_get_features dev: 0x556d459e6e00 features: 0x300c3182b
55674@1706854838.133924:vhost_vdpa_memslots_limit dev: 0x556d459e6e00 = 0x7fffffff
55674@1706854838.133927:vhost_vdpa_get_vq_index dev: 0x556d459e6e00 idx: 0 vq idx: 0
55674@1706854838.133933:vhost_vdpa_get_vq_index dev: 0x556d459e6e00 idx: 1 vq idx: 1
55674@1706854838.133939:vhost_vdpa_get_device_id dev: 0x556d459e6e00 device_id 1
55674@1706854838.133954:vhost_vdpa_init dev: 0x556d45c5e670, common dev: 0x556d45c75140 vdpa: 0x7f991c6d7190
55674@1706854838.133959:vhost_vdpa_get_features dev: 0x556d45c5e670 features: 0x300c3182b
55674@1706854838.133961:vhost_vdpa_memslots_limit dev: 0x556d45c5e670 = 0x7fffffff
55674@1706854838.133963:vhost_vdpa_get_vq_index dev: 0x556d45c5e670 idx: 0 vq idx: 0
55674@1706854838.133967:vhost_vdpa_get_vq_index dev: 0x556d45c5e670 idx: 1 vq idx: 1
55674@1706854838.133972:vhost_vdpa_get_device_id dev: 0x556d45c5e670 device_id 1
55674@1706854838.144782:vhost_vdpa_init dev: 0x556d45c5eb90, common dev: 0x556d45c75140 vdpa: 0x7f991c68e190
55674@1706854838.144791:vhost_vdpa_get_features dev: 0x556d45c5eb90 features: 0x300c3182b
55674@1706854838.144793:vhost_vdpa_memslots_limit dev: 0x556d45c5eb90 = 0x7fffffff
55674@1706854838.144796:vhost_vdpa_get_vq_index dev: 0x556d45c5eb90 idx: 0 vq idx: 0
55674@1706854838.144802:vhost_vdpa_get_device_id dev: 0x556d45c5eb90 device_id 1
55674@1706854838.154394:vhost_vdpa_listener_region_add vdpa: 0x556d45c75140 iova 0x0 llend 0x80000000 vaddr: 0x7f7903e00000 read-only: 0
55674@1706854838.154419:vhost_vdpa_listener_begin_batch vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 type: 5
55674@1706854838.154427:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x1000 size: 0x80000000 uaddr: 0x7f7903e00000 perm: 0x3 type: 2
55674@1706854838.320783:vhost_vdpa_listener_commit vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 type: 6
55674@1706854838.322834:vhost_vdpa_listener_region_add vdpa: 0x556d45c75140 iova 0x100000000 llend 0x2080000000 vaddr: 0x7f7983e00000 read-only: 0
55674@1706854838.322844:vhost_vdpa_listener_begin_batch vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 type: 5
55674@1706854838.322848:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x80001000 size: 0x1f80000000 uaddr: 0x7f7983e00000 perm: 0x3 type: 2
55674@1706854848.782709:vhost_vdpa_listener_commit vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 type: 6
55674@1706854848.864173:vhost_vdpa_listener_region_add vdpa: 0x556d45c75140 iova 0xfeda0000 llend 0xfedc0000 vaddr: 0x7f7903ea0000 read-only: 0
55674@1706854848.864197:vhost_vdpa_listener_begin_batch vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 type: 5
55674@1706854848.864203:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x2000001000 size: 0x20000 uaddr: 0x7f7903ea0000 perm: 0x3 type: 2
55674@1706854848.864221:vhost_vdpa_listener_commit vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 type: 6
55674@1706854848.964846:vhost_vdpa_listener_region_del vdpa: 0x556d45c75140 iova 0xfeda0000 llend 0xfedbffff
55674@1706854848.964873:vhost_vdpa_listener_begin_batch vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 type: 5
55674@1706854848.964878:vhost_vdpa_dma_unmap vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x1000 size: 0x20000 type: 3
55674@1706854848.964932:vhost_vdpa_listener_commit vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 type: 6
55674@1706854849.068555:vhost_vdpa_set_config dev: 0x556d459e6e00 offset: 0 size: 6 flags: 0x0
55674@1706854849.068579:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0                                
55674@1706854849.080452:vhost_vdpa_listener_region_del vdpa: 0x556d45c75140 iova 0x0 llend 0x7fffffff
55674@1706854849.080489:vhost_vdpa_listener_begin_batch vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 type: 5
55674@1706854849.080497:vhost_vdpa_dma_unmap vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x2000001000 size: 0x80000000 type: 3
55674@1706854849.080513:vhost_vdpa_listener_region_add vdpa: 0x556d45c75140 iova 0x0 llend 0xc0000 vaddr: 0x7f7903e00000 read-only: 0
55674@1706854849.080517:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x1000 size: 0xc0000 uaddr: 0x7f7903e00000 perm: 0x3 type: 2
55674@1706854849.080532:vhost_vdpa_listener_region_add vdpa: 0x556d45c75140 iova 0xc0000 llend 0xe0000 vaddr: 0x7f7902e00000 read-only: 1
55674@1706854849.080536:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0xc1000 size: 0x20000 uaddr: 0x7f7902e00000 perm: 0x1 type: 2
55674@1706854849.080669:vhost_vdpa_listener_region_add vdpa: 0x556d45c75140 iova 0xe0000 llend 0x100000 vaddr: 0x7f9908200000 read-only: 1
55674@1706854849.080673:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0xe1000 size: 0x20000 uaddr: 0x7f9908200000 perm: 0x1 type: 2
55674@1706854849.080697:vhost_vdpa_listener_region_add vdpa: 0x556d45c75140 iova 0x100000 llend 0x80000000 vaddr: 0x7f7903f00000 read-only: 0
55674@1706854849.080700:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x101000 size: 0x7ff00000 uaddr: 0x7f7903f00000 perm: 0x3 type: 2
2024-02-02T06:20:49.080706Z qemu-system-x86_64: failed to write, fd=16, errno=14 (Bad address)
2024-02-02T06:20:49.080732Z qemu-system-x86_64: vhost vdpa map fail!
2024-02-02T06:20:49.080742Z qemu-system-x86_64: vhost-vdpa: DMA mapping failed, unable to continue
55674@1706854849.080757:vhost_vdpa_listener_commit vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 type: 6
55680@1706854851.484584:vhost_vdpa_listener_region_add vdpa: 0x556d45c75140 iova 0xc0000000 llend 0xc0040000 vaddr: 0x7f7902c00000 read-only: 1
55680@1706854851.484612:vhost_vdpa_listener_begin_batch vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 type: 5
55680@1706854851.484620:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x101000 size: 0x40000 uaddr: 0x7f7902c00000 perm: 0x1 type: 2
55680@1706854851.484666:vhost_vdpa_listener_commit vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 type: 6
55680@1706854851.582299:vhost_vdpa_listener_region_del vdpa: 0x556d45c75140 iova 0xc0000000 llend 0xc003ffff
55680@1706854851.582329:vhost_vdpa_listener_begin_batch vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 type: 5
55680@1706854851.582335:vhost_vdpa_dma_unmap vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x101000 size: 0x40000 type: 3
55680@1706854851.582349:vhost_vdpa_listener_commit vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 type: 6
55680@1706854851.790802:vhost_vdpa_get_config dev: 0x556d459e6e00 config: 0x7f990bb79460 config_len: 12
55680@1706854851.790828:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
55680@1706854851.790836:vhost_vdpa_get_config dev: 0x556d459e6e00 config: 0x7f990bb79460 config_len: 12
55680@1706854851.790841:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
55680@1706854851.790855:vhost_vdpa_get_config dev: 0x556d459e6e00 config: 0x7f990bb79460 config_len: 12
55680@1706854851.790860:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
55680@1706854851.790867:vhost_vdpa_get_config dev: 0x556d459e6e00 config: 0x7f990bb79460 config_len: 12
55680@1706854851.790872:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
55680@1706854851.790879:vhost_vdpa_get_config dev: 0x556d459e6e00 config: 0x7f990bb79460 config_len: 12
55680@1706854851.790883:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
55680@1706854851.790890:vhost_vdpa_get_config dev: 0x556d459e6e00 config: 0x7f990bb79460 config_len: 12
55680@1706854851.790895:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
55680@1706854851.790902:vhost_vdpa_get_config dev: 0x556d459e6e00 config: 0x7f990bb79460 config_len: 12
55680@1706854851.790906:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
55680@1706854851.790917:vhost_vdpa_get_config dev: 0x556d459e6e00 config: 0x7f990bb79460 config_len: 12
55680@1706854851.790924:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
55680@1706854851.793467:vhost_vdpa_get_vq_index dev: 0x556d459e6e00 idx: 0 vq idx: 0
55680@1706854851.793480:vhost_vdpa_get_vq_index dev: 0x556d459e6e00 idx: 1 vq idx: 1
55680@1706854851.793578:vhost_vdpa_set_features dev: 0x556d459e6e00 features: 0x300000008
55680@1706854851.793585:vhost_vdpa_add_status dev: 0x556d459e6e00 status: 0x8
55680@1706854851.793590:vhost_vdpa_set_mem_table dev: 0x556d459e6e00 nregions: 3 padding: 0x0
55680@1706854851.793593:vhost_vdpa_dump_regions dev: 0x556d459e6e00 0: guest_phys_addr: 0x0 memory_size: 0xc0000 userspace_addr: 0x7f7903e00000 flags_padding: 0x0
55680@1706854851.793596:vhost_vdpa_dump_regions dev: 0x556d459e6e00 1: guest_phys_addr: 0x100000 memory_size: 0x7ff00000 userspace_addr: 0x7f7903f00000 flags_padding: 0x0
55680@1706854851.793598:vhost_vdpa_dump_regions dev: 0x556d459e6e00 2: guest_phys_addr: 0x100000000 memory_size: 0x1f80000000 userspace_addr: 0x7f7983e00000 flags_padding: 0x0
55680@1706854851.793601:vhost_vdpa_get_vq_index dev: 0x556d459e6e00 idx: 0 vq idx: 0
55680@1706854851.793604:vhost_vdpa_set_vring_num dev: 0x556d459e6e00 index: 0 num: 256
55680@1706854851.793610:vhost_vdpa_vq_get_addr dev: 0x556d459e6e00 vq: 0x556d459e7080 desc_user_addr: 0x7e459000 avail_user_addr: 0x7e45a000 used_user_addr: 0x7e45b000
55680@1706854851.793616:vhost_vdpa_get_vq_index dev: 0x556d459e6e00 idx: 1 vq idx: 1
55680@1706854851.793619:vhost_vdpa_set_vring_num dev: 0x556d459e6e00 index: 1 num: 256
55680@1706854851.793623:vhost_vdpa_vq_get_addr dev: 0x556d459e6e00 vq: 0x556d459e7100 desc_user_addr: 0x7e45d000 avail_user_addr: 0x7e45e000 used_user_addr: 0x7e45f000
55680@1706854851.793630:vhost_vdpa_dev_start dev: 0x556d459e6e00 started: 1
55680@1706854851.793632:vhost_vdpa_set_vring_base dev: 0x556d459e6e00 index: 0 num: 0
55680@1706854851.793638:vhost_vdpa_set_vring_kick dev: 0x556d459e6e00 index: 0 fd: 49
55680@1706854851.793643:vhost_vdpa_set_vring_call dev: 0x556d459e6e00 index: 0 fd: 50
55680@1706854851.793675:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x101000 size: 0x2000 uaddr: 0x7f991c831000 perm: 0x1 type: 2
55680@1706854851.983932:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x103000 size: 0x1000 uaddr: 0x7f991ca0b000 perm: 0x3 type: 2
55680@1706854852.078146:vhost_vdpa_set_vring_addr dev: 0x556d459e6e00 index: 0 flags: 0x0 desc_user_addr: 0x101000 used_user_addr: 0x103000 avail_user_addr: 0x102000 log_guest_addr: 0x0
55680@1706854852.078176:vhost_vdpa_set_vring_base dev: 0x556d459e6e00 index: 1 num: 0
55680@1706854852.078190:vhost_vdpa_set_vring_kick dev: 0x556d459e6e00 index: 1 fd: 51
55680@1706854852.078204:vhost_vdpa_set_vring_call dev: 0x556d459e6e00 index: 1 fd: 52
55680@1706854852.078248:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x104000 size: 0x2000 uaddr: 0x7f991c82f000 perm: 0x1 type: 2
55680@1706854852.268200:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x106000 size: 0x1000 uaddr: 0x7f991c82e000 perm: 0x3 type: 2
55680@1706854852.363654:vhost_vdpa_set_vring_addr dev: 0x556d459e6e00 index: 1 flags: 0x0 desc_user_addr: 0x104000 used_user_addr: 0x106000 avail_user_addr: 0x105000 log_guest_addr: 0x0
55680@1706854852.363687:vhost_vdpa_add_status dev: 0x556d459e6e00 status: 0x4
55680@1706854852.403258:vhost_vdpa_set_config_call dev: 0x556d459e6e00 fd: 45
55680@1706854852.403420:vhost_vdpa_set_vring_ready dev: 0x556d459e6e00, idx: 0, r: 0
55680@1706854852.403499:vhost_vdpa_set_vring_ready dev: 0x556d459e6e00, idx: 1, r: 0
55680@1706854854.591372:vhost_vdpa_dev_start dev: 0x556d459e6e00 started: 0
55680@1706854854.591409:vhost_vdpa_suspend dev: 0x556d459e6e00
55680@1706854854.603127:vhost_vdpa_dma_unmap vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x101000 size: 0x2000 type: 3
55680@1706854854.737565:vhost_vdpa_dma_unmap vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x103000 size: 0x1000 type: 3
55680@1706854854.833983:vhost_vdpa_dma_unmap vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x104000 size: 0x2000 type: 3
55680@1706854854.929203:vhost_vdpa_dma_unmap vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x106000 size: 0x1000 type: 3
55680@1706854855.024140:vhost_vdpa_get_vq_index dev: 0x556d459e6e00 idx: 0 vq idx: 0
55680@1706854855.024161:vhost_vdpa_get_vq_index dev: 0x556d459e6e00 idx: 1 vq idx: 1
55680@1706854855.064848:vhost_vdpa_reset_device dev: 0x556d459e6e00
55680@1706854855.064858:vhost_vdpa_add_status dev: 0x556d459e6e00 status: 0x3
55680@1706854855.064866:vhost_vdpa_listener_region_del vdpa: 0x556d45c75140 iova 0x0 llend 0xbffff
55680@1706854855.064875:vhost_vdpa_listener_begin_batch vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 type: 5
55680@1706854855.064879:vhost_vdpa_dma_unmap vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x1000 size: 0xc0000 type: 3
55680@1706854855.064902:vhost_vdpa_listener_region_del vdpa: 0x556d45c75140 iova 0xc0000 llend 0xdffff
55680@1706854855.064905:vhost_vdpa_dma_unmap vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0xc1000 size: 0x20000 type: 3
55680@1706854855.064916:vhost_vdpa_listener_region_del vdpa: 0x556d45c75140 iova 0xe0000 llend 0xfffff
55680@1706854855.064920:vhost_vdpa_dma_unmap vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0xe1000 size: 0x20000 type: 3
55680@1706854855.064933:vhost_vdpa_listener_region_del vdpa: 0x556d45c75140 iova 0x100000 llend 0x7fffffff
55680@1706854855.064936:vhost_vdpa_listener_region_del vdpa: 0x556d45c75140 iova 0x100000000 llend 0x207fffffff
55680@1706854855.064939:vhost_vdpa_dma_unmap vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x80001000 size: 0x1f80000000 type: 3
55680@1706854857.740395:vhost_vdpa_listener_commit vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 type: 6
55680@1706854857.742202:vhost_vdpa_set_config_call dev: 0x556d459e6e00 fd: -1
55680@1706854857.742366:vhost_vdpa_get_vq_index dev: 0x556d459e6e00 idx: 0 vq idx: 0
55680@1706854857.742374:vhost_vdpa_get_vq_index dev: 0x556d459e6e00 idx: 1 vq idx: 1
55686@1706854858.995952:vhost_vdpa_get_config dev: 0x556d459e6e00 config: 0x7f9908b73460 config_len: 12
55686@1706854858.996010:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
55686@1706854858.996036:vhost_vdpa_get_config dev: 0x556d459e6e00 config: 0x7f9908b73460 config_len: 12
55686@1706854858.996042:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
55686@1706854858.996061:vhost_vdpa_get_config dev: 0x556d459e6e00 config: 0x7f9908b73460 config_len: 12
55686@1706854858.996067:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
55686@1706854858.996086:vhost_vdpa_get_config dev: 0x556d459e6e00 config: 0x7f9908b73460 config_len: 12
55686@1706854858.996092:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
55686@1706854858.996101:vhost_vdpa_get_config dev: 0x556d459e6e00 config: 0x7f9908b73460 config_len: 12
55686@1706854858.996106:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
55686@1706854858.996114:vhost_vdpa_get_config dev: 0x556d459e6e00 config: 0x7f9908b73460 config_len: 12
55686@1706854858.996120:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
55686@1706854858.996128:vhost_vdpa_get_config dev: 0x556d459e6e00 config: 0x7f9908b73460 config_len: 12
55686@1706854858.996134:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
55686@1706854858.996142:vhost_vdpa_get_config dev: 0x556d459e6e00 config: 0x7f9908b73460 config_len: 12
55686@1706854858.996148:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
55686@1706854858.996162:vhost_vdpa_get_config dev: 0x556d459e6e00 config: 0x7f9908b73460 config_len: 12
55686@1706854858.996168:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
55686@1706854858.998252:vhost_vdpa_get_vq_index dev: 0x556d459e6e00 idx: 0 vq idx: 0
55686@1706854858.998300:vhost_vdpa_get_vq_index dev: 0x556d459e6e00 idx: 1 vq idx: 1
55686@1706854858.998336:vhost_vdpa_get_vq_index dev: 0x556d45c5e670 idx: 2 vq idx: 2
55686@1706854858.998370:vhost_vdpa_get_vq_index dev: 0x556d45c5e670 idx: 3 vq idx: 3
55686@1706854858.998472:vhost_vdpa_set_features dev: 0x556d459e6e00 features: 0x300c3180b
55686@1706854858.998479:vhost_vdpa_add_status dev: 0x556d459e6e00 status: 0x8
55686@1706854858.998487:vhost_vdpa_set_mem_table dev: 0x556d459e6e00 nregions: 3 padding: 0x0
55686@1706854858.998490:vhost_vdpa_dump_regions dev: 0x556d459e6e00 0: guest_phys_addr: 0x0 memory_size: 0xc0000 userspace_addr: 0x7f7903e00000 flags_padding: 0x0
55686@1706854858.998494:vhost_vdpa_dump_regions dev: 0x556d459e6e00 1: guest_phys_addr: 0x100000 memory_size: 0x7ff00000 userspace_addr: 0x7f7903f00000 flags_padding: 0x0
55686@1706854858.998498:vhost_vdpa_dump_regions dev: 0x556d459e6e00 2: guest_phys_addr: 0x100000000 memory_size: 0x1f80000000 userspace_addr: 0x7f7983e00000 flags_padding: 0x0
55686@1706854858.998501:vhost_vdpa_get_vq_index dev: 0x556d459e6e00 idx: 0 vq idx: 0
55686@1706854858.998504:vhost_vdpa_set_vring_num dev: 0x556d459e6e00 index: 0 num: 256
55686@1706854858.998511:vhost_vdpa_vq_get_addr dev: 0x556d459e6e00 vq: 0x556d459e7080 desc_user_addr: 0x1ed88d2000 avail_user_addr: 0x1ed88d3000 used_user_addr: 0x1ed88d3240
55686@1706854858.998527:vhost_vdpa_get_vq_index dev: 0x556d459e6e00 idx: 1 vq idx: 1
55686@1706854858.998530:vhost_vdpa_set_vring_num dev: 0x556d459e6e00 index: 1 num: 256
55686@1706854858.998534:vhost_vdpa_vq_get_addr dev: 0x556d459e6e00 vq: 0x556d459e7100 desc_user_addr: 0x1ed88d4000 avail_user_addr: 0x1ed88d5000 used_user_addr: 0x1ed88d5240
55686@1706854858.998543:vhost_vdpa_dev_start dev: 0x556d459e6e00 started: 1
55686@1706854858.998546:vhost_vdpa_set_vring_base dev: 0x556d459e6e00 index: 0 num: 0
55686@1706854858.998553:vhost_vdpa_set_vring_kick dev: 0x556d459e6e00 index: 0 fd: 69
55686@1706854858.998563:vhost_vdpa_set_vring_call dev: 0x556d459e6e00 index: 0 fd: 70
55686@1706854858.998598:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x1000 size: 0x2000 uaddr: 0x7f991c831000 perm: 0x1 type: 2
55686@1706854859.003385:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x3000 size: 0x1000 uaddr: 0x7f991ca0b000 perm: 0x3 type: 2
55686@1706854859.006753:vhost_vdpa_set_vring_addr dev: 0x556d459e6e00 index: 0 flags: 0x0 desc_user_addr: 0x1000 used_user_addr: 0x3000 avail_user_addr: 0x2000 log_guest_addr: 0x0
55686@1706854859.006774:vhost_vdpa_set_vring_base dev: 0x556d459e6e00 index: 1 num: 0
55686@1706854859.006782:vhost_vdpa_set_vring_kick dev: 0x556d459e6e00 index: 1 fd: 71
55686@1706854859.006788:vhost_vdpa_set_vring_call dev: 0x556d459e6e00 index: 1 fd: 72
55686@1706854859.006815:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x4000 size: 0x2000 uaddr: 0x7f991c82f000 perm: 0x1 type: 2
55686@1706854859.012971:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x6000 size: 0x1000 uaddr: 0x7f991c82e000 perm: 0x3 type: 2
55686@1706854859.016453:vhost_vdpa_set_vring_addr dev: 0x556d459e6e00 index: 1 flags: 0x0 desc_user_addr: 0x4000 used_user_addr: 0x6000 avail_user_addr: 0x5000 log_guest_addr: 0x0
55686@1706854859.016468:vhost_vdpa_set_config_call dev: 0x556d459e6e00 fd: 65
55686@1706854859.016559:vhost_vdpa_get_vq_index dev: 0x556d45c5e670 idx: 2 vq idx: 2
55686@1706854859.016562:vhost_vdpa_set_vring_num dev: 0x556d45c5e670 index: 2 num: 256
55686@1706854859.016567:vhost_vdpa_vq_get_addr dev: 0x556d45c5e670 vq: 0x556d45c5e8f0 desc_user_addr: 0x1ed88d6000 avail_user_addr: 0x1ed88d7000 used_user_addr: 0x1ed88d7240
55686@1706854859.016574:vhost_vdpa_get_vq_index dev: 0x556d45c5e670 idx: 3 vq idx: 3
55686@1706854859.016577:vhost_vdpa_set_vring_num dev: 0x556d45c5e670 index: 3 num: 256
55686@1706854859.016580:vhost_vdpa_vq_get_addr dev: 0x556d45c5e670 vq: 0x556d45c5e970 desc_user_addr: 0x1ed8920000 avail_user_addr: 0x1ed8921000 used_user_addr: 0x1ed8921240
55686@1706854859.016587:vhost_vdpa_dev_start dev: 0x556d45c5e670 started: 1
55686@1706854859.016589:vhost_vdpa_set_vring_base dev: 0x556d45c5e670 index: 2 num: 0
55686@1706854859.016595:vhost_vdpa_set_vring_kick dev: 0x556d45c5e670 index: 2 fd: 76
55686@1706854859.016598:vhost_vdpa_set_vring_call dev: 0x556d45c5e670 index: 2 fd: 77
55686@1706854859.016625:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x7000 size: 0x2000 uaddr: 0x7f991c82c000 perm: 0x1 type: 2
55686@1706854859.020454:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x9000 size: 0x1000 uaddr: 0x7f991c82b000 perm: 0x3 type: 2
55686@1706854859.025091:vhost_vdpa_set_vring_addr dev: 0x556d45c5e670 index: 2 flags: 0x0 desc_user_addr: 0x7000 used_user_addr: 0x9000 avail_user_addr: 0x8000 log_guest_addr: 0x0
55686@1706854859.025104:vhost_vdpa_set_vring_base dev: 0x556d45c5e670 index: 3 num: 0
55686@1706854859.025111:vhost_vdpa_set_vring_kick dev: 0x556d45c5e670 index: 3 fd: 78
55686@1706854859.025114:vhost_vdpa_set_vring_call dev: 0x556d45c5e670 index: 3 fd: 79
55686@1706854859.025141:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0xa000 size: 0x2000 uaddr: 0x7f991c829000 perm: 0x1 type: 2
55686@1706854859.034514:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0xc000 size: 0x1000 uaddr: 0x7f991c828000 perm: 0x3 type: 2
55686@1706854859.039516:vhost_vdpa_set_vring_addr dev: 0x556d45c5e670 index: 3 flags: 0x0 desc_user_addr: 0xa000 used_user_addr: 0xc000 avail_user_addr: 0xb000 log_guest_addr: 0x0
55686@1706854859.039532:vhost_vdpa_set_config_call dev: 0x556d45c5e670 fd: 65
55686@1706854859.039541:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0xd000 size: 0x1000 uaddr: 0x7f991ca19000 perm: 0x1 type: 2
55686@1706854859.045025:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0xe000 size: 0x1000 uaddr: 0x7f991ca18000 perm: 0x3 type: 2
55686@1706854859.050923:vhost_vdpa_get_vq_index dev: 0x556d45c5eb90 idx: 4 vq idx: 4
55686@1706854859.050934:vhost_vdpa_set_vring_num dev: 0x556d45c5eb90 index: 4 num: 64
55686@1706854859.050938:vhost_vdpa_vq_get_addr dev: 0x556d45c5eb90 vq: 0x556d45c5ee10 desc_user_addr: 0x1ed9feb000 avail_user_addr: 0x1ed9feb400 used_user_addr: 0x1ed9feb4c0
55686@1706854859.050948:vhost_vdpa_dev_start dev: 0x556d45c5eb90 started: 1
55686@1706854859.050951:vhost_vdpa_set_vring_base dev: 0x556d45c5eb90 index: 4 num: 0
55686@1706854859.050956:vhost_vdpa_set_vring_kick dev: 0x556d45c5eb90 index: 4 fd: 82
55686@1706854859.050966:vhost_vdpa_set_vring_call dev: 0x556d45c5eb90 index: 4 fd: 83
55686@1706854859.050992:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0xf000 size: 0x1000 uaddr: 0x7f991c827000 perm: 0x1 type: 2
55686@1706854859.057284:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x10000 size: 0x1000 uaddr: 0x7f991c826000 perm: 0x3 type: 2
55686@1706854859.064081:vhost_vdpa_set_vring_addr dev: 0x556d45c5eb90 index: 4 flags: 0x0 desc_user_addr: 0xf000 used_user_addr: 0x10000 avail_user_addr: 0xf400 log_guest_addr: 0x0
55686@1706854859.064096:vhost_vdpa_listener_region_add vdpa: 0x556d45c75140 iova 0x0 llend 0xc0000 vaddr: 0x7f7903e00000 read-only: 0
55686@1706854859.064101:vhost_vdpa_listener_begin_batch vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 type: 5
55686@1706854859.064105:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x11000 size: 0xc0000 uaddr: 0x7f7903e00000 perm: 0x3 type: 2
55686@1706854859.064114:vhost_vdpa_listener_region_add vdpa: 0x556d45c75140 iova 0xc0000 llend 0xe0000 vaddr: 0x7f7902e00000 read-only: 1
55686@1706854859.064117:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0xd1000 size: 0x20000 uaddr: 0x7f7902e00000 perm: 0x1 type: 2
55686@1706854859.064130:vhost_vdpa_listener_region_add vdpa: 0x556d45c75140 iova 0xe0000 llend 0x100000 vaddr: 0x7f9908200000 read-only: 1
55686@1706854859.064133:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0xf1000 size: 0x20000 uaddr: 0x7f9908200000 perm: 0x1 type: 2
55686@1706854859.064157:vhost_vdpa_listener_region_add vdpa: 0x556d45c75140 iova 0x100000 llend 0x80000000 vaddr: 0x7f7903f00000 read-only: 0
55686@1706854859.064160:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x111000 size: 0x7ff00000 uaddr: 0x7f7903f00000 perm: 0x3 type: 2
2024-02-02T06:20:59.064167Z qemu-system-x86_64: failed to write, fd=16, errno=14 (Bad address)
2024-02-02T06:20:59.064187Z qemu-system-x86_64: vhost vdpa map fail!
2024-02-02T06:20:59.064198Z qemu-system-x86_64: vhost-vdpa: DMA mapping failed, unable to continue
55686@1706854859.064208:vhost_vdpa_listener_region_add vdpa: 0x556d45c75140 iova 0x100000000 llend 0x2080000000 vaddr: 0x7f7983e00000 read-only: 0
55686@1706854859.064211:vhost_vdpa_dma_map vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 asid: 0 iova: 0x111000 size: 0x1f80000000 uaddr: 0x7f7983e00000 perm: 0x3 type: 2
2024-02-02T06:20:59.064215Z qemu-system-x86_64: failed to write, fd=16, errno=14 (Bad address)
2024-02-02T06:20:59.064232Z qemu-system-x86_64: vhost vdpa map fail!
2024-02-02T06:20:59.064244Z qemu-system-x86_64: vhost-vdpa: DMA mapping failed, unable to continue
55686@1706854859.064261:vhost_vdpa_listener_commit vdpa_shared:0x556d45c75140 fd: 16 msg_type: 2 type: 6
55686@1706854859.071961:vhost_vdpa_add_status dev: 0x556d45c5eb90 status: 0x4
55686@1706854859.122365:vhost_vdpa_set_config_call dev: 0x556d45c5eb90 fd: 65
55686@1706854859.122382:vhost_vdpa_set_vring_ready dev: 0x556d45c5eb90, idx: 4, r: 0
55686@1706854859.122520:vhost_vdpa_set_vring_ready dev: 0x556d45c5eb90, idx: 0, r: 0
55686@1706854859.122600:vhost_vdpa_set_vring_ready dev: 0x556d45c5eb90, idx: 1, r: 0
55686@1706854859.122684:vhost_vdpa_set_vring_ready dev: 0x556d45c5eb90, idx: 2, r: 0
55686@1706854859.122760:vhost_vdpa_set_vring_ready dev: 0x556d45c5eb90, idx: 3, r: 0
55686@1706854859.123300:vhost_vdpa_get_config dev: 0x556d459e6e00 config: 0x7f9908b73460 config_len: 12
55686@1706854859.123317:vhost_vdpa_dump_config dev: 0x556d459e6e00 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23
Michael S. Tsirkin Feb. 13, 2024, 10:22 a.m. UTC | #2
On Mon, Feb 05, 2024 at 05:10:36PM -0800, Si-Wei Liu wrote:
> Hi Eugenio,
> 
> I thought this new code looks good to me and the original issue I saw with
> x-svq=on should be gone. However, after rebase my tree on top of this,
> there's a new failure I found around setting up guest mappings at early
> boot, please see attached the specific QEMU config and corresponding event
> traces. Haven't checked into the detail yet, thinking you would need to be
> aware of ahead.
> 
> Regards,
> -Siwei

Eugenio were you able to reproduce? Siwei did you have time to
look into this? Can't merge patches which are known to break things ...
Eugenio Perez Martin Feb. 13, 2024, 4:26 p.m. UTC | #3
On Tue, Feb 13, 2024 at 11:22 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Feb 05, 2024 at 05:10:36PM -0800, Si-Wei Liu wrote:
> > Hi Eugenio,
> >
> > I thought this new code looks good to me and the original issue I saw with
> > x-svq=on should be gone. However, after rebase my tree on top of this,
> > there's a new failure I found around setting up guest mappings at early
> > boot, please see attached the specific QEMU config and corresponding event
> > traces. Haven't checked into the detail yet, thinking you would need to be
> > aware of ahead.
> >
> > Regards,
> > -Siwei
>
> Eugenio were you able to reproduce? Siwei did you have time to
> look into this? Can't merge patches which are known to break things ...
>

Sorry for the lack of news, I'll try to reproduce this week. Meanwhile
this patch should not be merged, as you mention.

Thanks!
Si-Wei Liu Feb. 14, 2024, 6:29 p.m. UTC | #4
Hi Michael,

On 2/13/2024 2:22 AM, Michael S. Tsirkin wrote:
> On Mon, Feb 05, 2024 at 05:10:36PM -0800, Si-Wei Liu wrote:
>> Hi Eugenio,
>>
>> I thought this new code looks good to me and the original issue I saw with
>> x-svq=on should be gone. However, after rebase my tree on top of this,
>> there's a new failure I found around setting up guest mappings at early
>> boot, please see attached the specific QEMU config and corresponding event
>> traces. Haven't checked into the detail yet, thinking you would need to be
>> aware of ahead.
>>
>> Regards,
>> -Siwei
> Eugenio were you able to reproduce? Siwei did you have time to
> look into this?
Didn't get a chance to look into the detail yet in the past week, but 
thought it may have something to do with the (internals of) iova tree 
range allocation and the lookup routine. It started to fall apart at the 
first vhost_vdpa_dma_unmap call showing up in the trace events, where it 
should've gotten IOVA=0x2000001000,  but an incorrect IOVA address 
0x1000 was ended up returning from the iova tree lookup routine.

HVA                    GPA                IOVA
-------------------------------------------------------------------------------------------------------------------------
Map
[0x7f7903e00000, 0x7f7983e00000)    [0x0, 0x80000000) [0x1000, 0x80000000)
[0x7f7983e00000, 0x7f9903e00000)    [0x100000000, 0x2080000000) 
[0x80001000, 0x2000001000)
[0x7f7903ea0000, 0x7f7903ec0000)    [0xfeda0000, 0xfedc0000) 
[0x2000001000, 0x2000021000)

Unmap
[0x7f7903ea0000, 0x7f7903ec0000)    [0xfeda0000, 0xfedc0000) [0x1000, 
0x20000) ???
                                     shouldn't it be [0x2000001000, 
0x2000021000) ???

PS, I will be taking off from today and for the next two weeks. Will try 
to help out looking more closely after I get back.

-Siwei
>   Can't merge patches which are known to break things ...
Si-Wei Liu Feb. 14, 2024, 6:37 p.m. UTC | #5
Hi Eugenio,

Just to answer the question you had in the sync meeting as I've just 
tried, it seems that the issue is also reproducible even with VGA device 
and VNC display removed, and also reproducible with 8G mem size. You 
already knew that I can only repro with x-svq=on.

Regards,
-Siwei

On 2/13/2024 8:26 AM, Eugenio Perez Martin wrote:
> On Tue, Feb 13, 2024 at 11:22 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>> On Mon, Feb 05, 2024 at 05:10:36PM -0800, Si-Wei Liu wrote:
>>> Hi Eugenio,
>>>
>>> I thought this new code looks good to me and the original issue I saw with
>>> x-svq=on should be gone. However, after rebase my tree on top of this,
>>> there's a new failure I found around setting up guest mappings at early
>>> boot, please see attached the specific QEMU config and corresponding event
>>> traces. Haven't checked into the detail yet, thinking you would need to be
>>> aware of ahead.
>>>
>>> Regards,
>>> -Siwei
>> Eugenio were you able to reproduce? Siwei did you have time to
>> look into this? Can't merge patches which are known to break things ...
>>
> Sorry for the lack of news, I'll try to reproduce this week. Meanwhile
> this patch should not be merged, as you mention.
>
> Thanks!
>
Eugenio Perez Martin Feb. 14, 2024, 7:11 p.m. UTC | #6
On Wed, Feb 14, 2024 at 7:29 PM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
> Hi Michael,
>
> On 2/13/2024 2:22 AM, Michael S. Tsirkin wrote:
> > On Mon, Feb 05, 2024 at 05:10:36PM -0800, Si-Wei Liu wrote:
> >> Hi Eugenio,
> >>
> >> I thought this new code looks good to me and the original issue I saw with
> >> x-svq=on should be gone. However, after rebase my tree on top of this,
> >> there's a new failure I found around setting up guest mappings at early
> >> boot, please see attached the specific QEMU config and corresponding event
> >> traces. Haven't checked into the detail yet, thinking you would need to be
> >> aware of ahead.
> >>
> >> Regards,
> >> -Siwei
> > Eugenio were you able to reproduce? Siwei did you have time to
> > look into this?
> Didn't get a chance to look into the detail yet in the past week, but
> thought it may have something to do with the (internals of) iova tree
> range allocation and the lookup routine. It started to fall apart at the
> first vhost_vdpa_dma_unmap call showing up in the trace events, where it
> should've gotten IOVA=0x2000001000,  but an incorrect IOVA address
> 0x1000 was ended up returning from the iova tree lookup routine.
>
> HVA                    GPA                IOVA
> -------------------------------------------------------------------------------------------------------------------------
> Map
> [0x7f7903e00000, 0x7f7983e00000)    [0x0, 0x80000000) [0x1000, 0x80000000)
> [0x7f7983e00000, 0x7f9903e00000)    [0x100000000, 0x2080000000)
> [0x80001000, 0x2000001000)
> [0x7f7903ea0000, 0x7f7903ec0000)    [0xfeda0000, 0xfedc0000)
> [0x2000001000, 0x2000021000)
>
> Unmap
> [0x7f7903ea0000, 0x7f7903ec0000)    [0xfeda0000, 0xfedc0000) [0x1000,
> 0x20000) ???
>                                      shouldn't it be [0x2000001000,
> 0x2000021000) ???
>

Yes, I'm still not able to reproduce. In particular, I don't know how
how the memory listener adds a region and then release a region with a
different size. I'm talking about these log entries:

1706854838.154394:vhost_vdpa_listener_region_add vdpa: 0x556d45c75140
iova 0x0 llend 0x80000000 vaddr: 0x7f7903e00000 read-only: 0
452:vhost_vdpa_listener_region_del vdpa: 0x556d45c75140 iova 0x0 llend
0x7fffffff

Is it possible for you to also trace the skipped regions? We should
add a debug trace there too...

Thanks!

> PS, I will be taking off from today and for the next two weeks. Will try
> to help out looking more closely after I get back.
>
> -Siwei
> >   Can't merge patches which are known to break things ...
>
Si-Wei Liu April 2, 2024, 6:19 a.m. UTC | #7
On 2/14/2024 11:11 AM, Eugenio Perez Martin wrote:
> On Wed, Feb 14, 2024 at 7:29 PM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>> Hi Michael,
>>
>> On 2/13/2024 2:22 AM, Michael S. Tsirkin wrote:
>>> On Mon, Feb 05, 2024 at 05:10:36PM -0800, Si-Wei Liu wrote:
>>>> Hi Eugenio,
>>>>
>>>> I thought this new code looks good to me and the original issue I saw with
>>>> x-svq=on should be gone. However, after rebase my tree on top of this,
>>>> there's a new failure I found around setting up guest mappings at early
>>>> boot, please see attached the specific QEMU config and corresponding event
>>>> traces. Haven't checked into the detail yet, thinking you would need to be
>>>> aware of ahead.
>>>>
>>>> Regards,
>>>> -Siwei
>>> Eugenio were you able to reproduce? Siwei did you have time to
>>> look into this?
>> Didn't get a chance to look into the detail yet in the past week, but
>> thought it may have something to do with the (internals of) iova tree
>> range allocation and the lookup routine. It started to fall apart at the
>> first vhost_vdpa_dma_unmap call showing up in the trace events, where it
>> should've gotten IOVA=0x2000001000,  but an incorrect IOVA address
>> 0x1000 was ended up returning from the iova tree lookup routine.
>>
>> HVA                    GPA                IOVA
>> -------------------------------------------------------------------------------------------------------------------------
>> Map
>> [0x7f7903e00000, 0x7f7983e00000)    [0x0, 0x80000000) [0x1000, 0x80000000)
>> [0x7f7983e00000, 0x7f9903e00000)    [0x100000000, 0x2080000000)
>> [0x80001000, 0x2000001000)
>> [0x7f7903ea0000, 0x7f7903ec0000)    [0xfeda0000, 0xfedc0000)
>> [0x2000001000, 0x2000021000)
>>
>> Unmap
>> [0x7f7903ea0000, 0x7f7903ec0000)    [0xfeda0000, 0xfedc0000) [0x1000,
>> 0x20000) ???
>>                                       shouldn't it be [0x2000001000,
>> 0x2000021000) ???
>>
It looks the SVQ iova tree lookup routine vhost_iova_tree_find_iova(), 
which is called from vhost_vdpa_listener_region_del(), can't properly 
deal with overlapped region. Specifically, q35's mch_realize() has the 
following:

579     memory_region_init_alias(&mch->open_high_smram, OBJECT(mch), 
"smram-open-high",
580                              mch->ram_memory, 
MCH_HOST_BRIDGE_SMRAM_C_BASE,
581                              MCH_HOST_BRIDGE_SMRAM_C_SIZE);
582     memory_region_add_subregion_overlap(mch->system_memory, 0xfeda0000,
583 &mch->open_high_smram, 1);
584     memory_region_set_enabled(&mch->open_high_smram, false);

#0  0x0000564c30bf6980 in iova_tree_find_address_iterator 
(key=0x564c331cf8e0, value=0x564c331cf8e0, data=0x7fffb6d749b0) at 
../util/iova-tree.c:96
#1  0x00007f5f66479654 in g_tree_foreach () at /lib64/libglib-2.0.so.0
#2  0x0000564c30bf6b53 in iova_tree_find_iova (tree=<optimized out>, 
map=map@entry=0x7fffb6d74a00) at ../util/iova-tree.c:114
#3  0x0000564c309da0a9 in vhost_iova_tree_find_iova (tree=<optimized 
out>, map=map@entry=0x7fffb6d74a00) at ../hw/virtio/vhost-iova-tree.c:70
#4  0x0000564c3085e49d in vhost_vdpa_listener_region_del 
(listener=0x564c331024c8, section=0x7fffb6d74aa0) at 
../hw/virtio/vhost-vdpa.c:444
#5  0x0000564c309f4931 in address_space_update_topology_pass 
(as=as@entry=0x564c31ab1840 <address_space_memory>, 
old_view=old_view@entry=0x564c33364cc0, 
new_view=new_view@entry=0x564c333640f0, adding=adding@entry=false) at 
../system/memory.c:977
#6  0x0000564c309f4dcd in address_space_set_flatview (as=0x564c31ab1840 
<address_space_memory>) at ../system/memory.c:1079
#7  0x0000564c309f86d0 in memory_region_transaction_commit () at 
../system/memory.c:1132
#8  0x0000564c309f86d0 in memory_region_transaction_commit () at 
../system/memory.c:1117
#9  0x0000564c307cce64 in mch_realize (d=<optimized out>, 
errp=<optimized out>) at ../hw/pci-host/q35.c:584

However, it looks like iova_tree_find_address_iterator() only check if 
the translated address (HVA) falls in to the range when trying to locate 
the desired IOVA, causing the first DMAMap that happens to overlap in 
the translated address (HVA) space to be returned prematurely:

  89 static gboolean iova_tree_find_address_iterator(gpointer key, 
gpointer value,
  90                                                 gpointer data)
  91 {
  :
  :
  99     if (map->translated_addr + map->size < needle->translated_addr ||
100         needle->translated_addr + needle->size < map->translated_addr) {
101         return false;
102     }
103
104     args->result = map;
105     return true;
106 }

In the QEMU trace file, it reveals that the first DMAMap as below gets 
returned incorrectly instead the second, the latter of which is what the 
actual IOVA corresponds to:

HVA								GPA						IOVA
[0x7f7903e00000, 0x7f7983e00000) 	[0x0, 0x80000000)			[0x1000, 0x80001000)
[0x7f7903ea0000, 0x7f7903ec0000)	[0xfeda0000, 0xfedc0000)	[0x2000001000, 0x2000021000)


Maybe other than check the HVA range, we should also match GPA, or at 
least the size should exactly match?

> Yes, I'm still not able to reproduce. In particular, I don't know how
> how the memory listener adds a region and then release a region with a
> different size. I'm talking about these log entries:
>
> 1706854838.154394:vhost_vdpa_listener_region_add vdpa: 0x556d45c75140
> iova 0x0 llend 0x80000000 vaddr: 0x7f7903e00000 read-only: 0
> 452:vhost_vdpa_listener_region_del vdpa: 0x556d45c75140 iova 0x0 llend
> 0x7fffffff
Didn't see a different size here, though if you referred to the 
discrepancy in the traces around llend, I thought the two between _add() 
and _del() would have to be interpreted differently due to:

3d1e4d34 "vhost_vdpa: fix the input in 
trace_vhost_vdpa_listener_region_del()"

Regards,
-Siwei
> Is it possible for you to also trace the skipped regions? We should
> add a debug trace there too...
>
> Thanks!
>
>> PS, I will be taking off from today and for the next two weeks. Will try
>> to help out looking more closely after I get back.
>>
>> -Siwei
>>>    Can't merge patches which are known to break things ...
Eugenio Perez Martin April 2, 2024, 12:01 p.m. UTC | #8
On Tue, Apr 2, 2024 at 8:19 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 2/14/2024 11:11 AM, Eugenio Perez Martin wrote:
> > On Wed, Feb 14, 2024 at 7:29 PM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >> Hi Michael,
> >>
> >> On 2/13/2024 2:22 AM, Michael S. Tsirkin wrote:
> >>> On Mon, Feb 05, 2024 at 05:10:36PM -0800, Si-Wei Liu wrote:
> >>>> Hi Eugenio,
> >>>>
> >>>> I thought this new code looks good to me and the original issue I saw with
> >>>> x-svq=on should be gone. However, after rebase my tree on top of this,
> >>>> there's a new failure I found around setting up guest mappings at early
> >>>> boot, please see attached the specific QEMU config and corresponding event
> >>>> traces. Haven't checked into the detail yet, thinking you would need to be
> >>>> aware of ahead.
> >>>>
> >>>> Regards,
> >>>> -Siwei
> >>> Eugenio were you able to reproduce? Siwei did you have time to
> >>> look into this?
> >> Didn't get a chance to look into the detail yet in the past week, but
> >> thought it may have something to do with the (internals of) iova tree
> >> range allocation and the lookup routine. It started to fall apart at the
> >> first vhost_vdpa_dma_unmap call showing up in the trace events, where it
> >> should've gotten IOVA=0x2000001000,  but an incorrect IOVA address
> >> 0x1000 was ended up returning from the iova tree lookup routine.
> >>
> >> HVA                    GPA                IOVA
> >> -------------------------------------------------------------------------------------------------------------------------
> >> Map
> >> [0x7f7903e00000, 0x7f7983e00000)    [0x0, 0x80000000) [0x1000, 0x80000000)
> >> [0x7f7983e00000, 0x7f9903e00000)    [0x100000000, 0x2080000000)
> >> [0x80001000, 0x2000001000)
> >> [0x7f7903ea0000, 0x7f7903ec0000)    [0xfeda0000, 0xfedc0000)
> >> [0x2000001000, 0x2000021000)
> >>
> >> Unmap
> >> [0x7f7903ea0000, 0x7f7903ec0000)    [0xfeda0000, 0xfedc0000) [0x1000,
> >> 0x20000) ???
> >>                                       shouldn't it be [0x2000001000,
> >> 0x2000021000) ???
> >>
> It looks the SVQ iova tree lookup routine vhost_iova_tree_find_iova(),
> which is called from vhost_vdpa_listener_region_del(), can't properly
> deal with overlapped region. Specifically, q35's mch_realize() has the
> following:
>
> 579     memory_region_init_alias(&mch->open_high_smram, OBJECT(mch),
> "smram-open-high",
> 580                              mch->ram_memory,
> MCH_HOST_BRIDGE_SMRAM_C_BASE,
> 581                              MCH_HOST_BRIDGE_SMRAM_C_SIZE);
> 582     memory_region_add_subregion_overlap(mch->system_memory, 0xfeda0000,
> 583 &mch->open_high_smram, 1);
> 584     memory_region_set_enabled(&mch->open_high_smram, false);
>
> #0  0x0000564c30bf6980 in iova_tree_find_address_iterator
> (key=0x564c331cf8e0, value=0x564c331cf8e0, data=0x7fffb6d749b0) at
> ../util/iova-tree.c:96
> #1  0x00007f5f66479654 in g_tree_foreach () at /lib64/libglib-2.0.so.0
> #2  0x0000564c30bf6b53 in iova_tree_find_iova (tree=<optimized out>,
> map=map@entry=0x7fffb6d74a00) at ../util/iova-tree.c:114
> #3  0x0000564c309da0a9 in vhost_iova_tree_find_iova (tree=<optimized
> out>, map=map@entry=0x7fffb6d74a00) at ../hw/virtio/vhost-iova-tree.c:70
> #4  0x0000564c3085e49d in vhost_vdpa_listener_region_del
> (listener=0x564c331024c8, section=0x7fffb6d74aa0) at
> ../hw/virtio/vhost-vdpa.c:444
> #5  0x0000564c309f4931 in address_space_update_topology_pass
> (as=as@entry=0x564c31ab1840 <address_space_memory>,
> old_view=old_view@entry=0x564c33364cc0,
> new_view=new_view@entry=0x564c333640f0, adding=adding@entry=false) at
> ../system/memory.c:977
> #6  0x0000564c309f4dcd in address_space_set_flatview (as=0x564c31ab1840
> <address_space_memory>) at ../system/memory.c:1079
> #7  0x0000564c309f86d0 in memory_region_transaction_commit () at
> ../system/memory.c:1132
> #8  0x0000564c309f86d0 in memory_region_transaction_commit () at
> ../system/memory.c:1117
> #9  0x0000564c307cce64 in mch_realize (d=<optimized out>,
> errp=<optimized out>) at ../hw/pci-host/q35.c:584
>
> However, it looks like iova_tree_find_address_iterator() only check if
> the translated address (HVA) falls in to the range when trying to locate
> the desired IOVA, causing the first DMAMap that happens to overlap in
> the translated address (HVA) space to be returned prematurely:
>
>   89 static gboolean iova_tree_find_address_iterator(gpointer key,
> gpointer value,
>   90                                                 gpointer data)
>   91 {
>   :
>   :
>   99     if (map->translated_addr + map->size < needle->translated_addr ||
> 100         needle->translated_addr + needle->size < map->translated_addr) {
> 101         return false;
> 102     }
> 103
> 104     args->result = map;
> 105     return true;
> 106 }
>
> In the QEMU trace file, it reveals that the first DMAMap as below gets
> returned incorrectly instead the second, the latter of which is what the
> actual IOVA corresponds to:
>
> HVA                                                             GPA                                             IOVA
> [0x7f7903e00000, 0x7f7983e00000)        [0x0, 0x80000000)                       [0x1000, 0x80001000)
> [0x7f7903ea0000, 0x7f7903ec0000)        [0xfeda0000, 0xfedc0000)        [0x2000001000, 0x2000021000)
>

I think the analysis is totally accurate as no code expects to unmap /
map overlapping regions. In particular, vdpa kernel does not expect it
either.

Since it is issued at _realize, it should be ok to unmap all the
region range and then map the right range again, even if that implies
a lot of unpin / pin.

>
> Maybe other than check the HVA range, we should also match GPA, or at
> least the size should exactly match?
>

The safe actions here would be to unmap all the memory chunk and then
map the overlap memory? Or am I missing something?

Another thing I don't get, is this reproducible in previous versions?
As far as I understand, this bug was never found before. I guess this
path of q35's mch_realize is recent?

Thanks!

> > Yes, I'm still not able to reproduce. In particular, I don't know how
> > how the memory listener adds a region and then release a region with a
> > different size. I'm talking about these log entries:
> >
> > 1706854838.154394:vhost_vdpa_listener_region_add vdpa: 0x556d45c75140
> > iova 0x0 llend 0x80000000 vaddr: 0x7f7903e00000 read-only: 0
> > 452:vhost_vdpa_listener_region_del vdpa: 0x556d45c75140 iova 0x0 llend
> > 0x7fffffff
> Didn't see a different size here, though if you referred to the
> discrepancy in the traces around llend, I thought the two between _add()
> and _del() would have to be interpreted differently due to:
>
> 3d1e4d34 "vhost_vdpa: fix the input in
> trace_vhost_vdpa_listener_region_del()"
>
> Regards,
> -Siwei
> > Is it possible for you to also trace the skipped regions? We should
> > add a debug trace there too...
> >
> > Thanks!
> >
> >> PS, I will be taking off from today and for the next two weeks. Will try
> >> to help out looking more closely after I get back.
> >>
> >> -Siwei
> >>>    Can't merge patches which are known to break things ...
>
Si-Wei Liu April 3, 2024, 6:53 a.m. UTC | #9
On 4/2/2024 5:01 AM, Eugenio Perez Martin wrote:
> On Tue, Apr 2, 2024 at 8:19 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>
>>
>> On 2/14/2024 11:11 AM, Eugenio Perez Martin wrote:
>>> On Wed, Feb 14, 2024 at 7:29 PM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>>> Hi Michael,
>>>>
>>>> On 2/13/2024 2:22 AM, Michael S. Tsirkin wrote:
>>>>> On Mon, Feb 05, 2024 at 05:10:36PM -0800, Si-Wei Liu wrote:
>>>>>> Hi Eugenio,
>>>>>>
>>>>>> I thought this new code looks good to me and the original issue I saw with
>>>>>> x-svq=on should be gone. However, after rebase my tree on top of this,
>>>>>> there's a new failure I found around setting up guest mappings at early
>>>>>> boot, please see attached the specific QEMU config and corresponding event
>>>>>> traces. Haven't checked into the detail yet, thinking you would need to be
>>>>>> aware of ahead.
>>>>>>
>>>>>> Regards,
>>>>>> -Siwei
>>>>> Eugenio were you able to reproduce? Siwei did you have time to
>>>>> look into this?
>>>> Didn't get a chance to look into the detail yet in the past week, but
>>>> thought it may have something to do with the (internals of) iova tree
>>>> range allocation and the lookup routine. It started to fall apart at the
>>>> first vhost_vdpa_dma_unmap call showing up in the trace events, where it
>>>> should've gotten IOVA=0x2000001000,  but an incorrect IOVA address
>>>> 0x1000 was ended up returning from the iova tree lookup routine.
>>>>
>>>> HVA                    GPA                IOVA
>>>> -------------------------------------------------------------------------------------------------------------------------
>>>> Map
>>>> [0x7f7903e00000, 0x7f7983e00000)    [0x0, 0x80000000) [0x1000, 0x80000000)
>>>> [0x7f7983e00000, 0x7f9903e00000)    [0x100000000, 0x2080000000)
>>>> [0x80001000, 0x2000001000)
>>>> [0x7f7903ea0000, 0x7f7903ec0000)    [0xfeda0000, 0xfedc0000)
>>>> [0x2000001000, 0x2000021000)
>>>>
>>>> Unmap
>>>> [0x7f7903ea0000, 0x7f7903ec0000)    [0xfeda0000, 0xfedc0000) [0x1000,
>>>> 0x20000) ???
>>>>                                        shouldn't it be [0x2000001000,
>>>> 0x2000021000) ???
>>>>
>> It looks the SVQ iova tree lookup routine vhost_iova_tree_find_iova(),
>> which is called from vhost_vdpa_listener_region_del(), can't properly
>> deal with overlapped region. Specifically, q35's mch_realize() has the
>> following:
>>
>> 579     memory_region_init_alias(&mch->open_high_smram, OBJECT(mch),
>> "smram-open-high",
>> 580                              mch->ram_memory,
>> MCH_HOST_BRIDGE_SMRAM_C_BASE,
>> 581                              MCH_HOST_BRIDGE_SMRAM_C_SIZE);
>> 582     memory_region_add_subregion_overlap(mch->system_memory, 0xfeda0000,
>> 583 &mch->open_high_smram, 1);
>> 584     memory_region_set_enabled(&mch->open_high_smram, false);
>>
>> #0  0x0000564c30bf6980 in iova_tree_find_address_iterator
>> (key=0x564c331cf8e0, value=0x564c331cf8e0, data=0x7fffb6d749b0) at
>> ../util/iova-tree.c:96
>> #1  0x00007f5f66479654 in g_tree_foreach () at /lib64/libglib-2.0.so.0
>> #2  0x0000564c30bf6b53 in iova_tree_find_iova (tree=<optimized out>,
>> map=map@entry=0x7fffb6d74a00) at ../util/iova-tree.c:114
>> #3  0x0000564c309da0a9 in vhost_iova_tree_find_iova (tree=<optimized
>> out>, map=map@entry=0x7fffb6d74a00) at ../hw/virtio/vhost-iova-tree.c:70
>> #4  0x0000564c3085e49d in vhost_vdpa_listener_region_del
>> (listener=0x564c331024c8, section=0x7fffb6d74aa0) at
>> ../hw/virtio/vhost-vdpa.c:444
>> #5  0x0000564c309f4931 in address_space_update_topology_pass
>> (as=as@entry=0x564c31ab1840 <address_space_memory>,
>> old_view=old_view@entry=0x564c33364cc0,
>> new_view=new_view@entry=0x564c333640f0, adding=adding@entry=false) at
>> ../system/memory.c:977
>> #6  0x0000564c309f4dcd in address_space_set_flatview (as=0x564c31ab1840
>> <address_space_memory>) at ../system/memory.c:1079
>> #7  0x0000564c309f86d0 in memory_region_transaction_commit () at
>> ../system/memory.c:1132
>> #8  0x0000564c309f86d0 in memory_region_transaction_commit () at
>> ../system/memory.c:1117
>> #9  0x0000564c307cce64 in mch_realize (d=<optimized out>,
>> errp=<optimized out>) at ../hw/pci-host/q35.c:584
>>
>> However, it looks like iova_tree_find_address_iterator() only check if
>> the translated address (HVA) falls in to the range when trying to locate
>> the desired IOVA, causing the first DMAMap that happens to overlap in
>> the translated address (HVA) space to be returned prematurely:
>>
>>    89 static gboolean iova_tree_find_address_iterator(gpointer key,
>> gpointer value,
>>    90                                                 gpointer data)
>>    91 {
>>    :
>>    :
>>    99     if (map->translated_addr + map->size < needle->translated_addr ||
>> 100         needle->translated_addr + needle->size < map->translated_addr) {
>> 101         return false;
>> 102     }
>> 103
>> 104     args->result = map;
>> 105     return true;
>> 106 }
>>
>> In the QEMU trace file, it reveals that the first DMAMap as below gets
>> returned incorrectly instead the second, the latter of which is what the
>> actual IOVA corresponds to:
>>
>> HVA                                                             GPA                                             IOVA
>> [0x7f7903e00000, 0x7f7983e00000)        [0x0, 0x80000000)                       [0x1000, 0x80001000)
>> [0x7f7903ea0000, 0x7f7903ec0000)        [0xfeda0000, 0xfedc0000)        [0x2000001000, 0x2000021000)
>>
> I think the analysis is totally accurate as no code expects to unmap /
> map overlapping regions. In particular, vdpa kernel does not expect it
> either.

Maybe I missed something, but I don't see how vdpa kernel prohibits this 
overlapping case? The same HVA gets remapped under a different GPA or 
IOVA range should be a valid use case it seems? For e.g. I don't see 
mapping failure when x-svq=on was removed from QEMU's vhost-vdpa 
arguments (where you can see in the attached trace, mch_realize with the 
exactly same overlapping region got remapped successfully, and then got 
unmapped right after).

> Since it is issued at _realize, it should be ok to unmap all the
> region range and then map the right range again, even if that implies
> a lot of unpin / pin.
You'll find out mch_realize() already did that -  where line 582 is to 
create a mapping overlapped with entire range, while line 583 is to 
disable (unmap) this same mapping shortly after.

582     memory_region_add_subregion_overlap(mch->system_memory, 0xfeda0000,
583 &mch->open_high_smram, 1);
584     memory_region_set_enabled(&mch->open_high_smram, false);

So we don't have to add any special case to deal with these overlapping 
case (just use the GPA as the hint to help identify which iova range it 
tries to unmap), as the vdpa's memory listener now moves upfront to the 
earliest point of time at machine boot, the relevant SVQ code needs to 
be more flexible and tolerate to the extra churns for the rest of QEMU's 
subsystems to stabilize the guest memory layout.
>> Maybe other than check the HVA range, we should also match GPA, or at
>> least the size should exactly match?
>>
> The safe actions here would be to unmap all the memory chunk and then
> map the overlap memory? Or am I missing something?
>
> Another thing I don't get, is this reproducible in previous versions?
It is not reproducible without this series. I thought that may be 
because previously the vhost-vdpa's memory listener was registered at a 
later point of time, after all the overlapped regions are gone and the 
memory layout is completely stabilized. Now we move registration to the 
init time, then all the devils come out biting us.

Previous versions of this patch series we didn't get this far yet, when 
x-svq=on is specified.

https://lore.kernel.org/qemu-devel/92ecfd90-8d06-4669-b260-a7a3b106277e@oracle.com/
> As far as I understand, this bug was never found before. I guess this
> path of q35's mch_realize is recent?
No, it's been there for quite a while.

Cheers,
-Siwei
>
> Thanks!
>
>>> Yes, I'm still not able to reproduce. In particular, I don't know how
>>> how the memory listener adds a region and then release a region with a
>>> different size. I'm talking about these log entries:
>>>
>>> 1706854838.154394:vhost_vdpa_listener_region_add vdpa: 0x556d45c75140
>>> iova 0x0 llend 0x80000000 vaddr: 0x7f7903e00000 read-only: 0
>>> 452:vhost_vdpa_listener_region_del vdpa: 0x556d45c75140 iova 0x0 llend
>>> 0x7fffffff
>> Didn't see a different size here, though if you referred to the
>> discrepancy in the traces around llend, I thought the two between _add()
>> and _del() would have to be interpreted differently due to:
>>
>> 3d1e4d34 "vhost_vdpa: fix the input in
>> trace_vhost_vdpa_listener_region_del()"
>>
>> Regards,
>> -Siwei
>>> Is it possible for you to also trace the skipped regions? We should
>>> add a debug trace there too...
>>>
>>> Thanks!
>>>
>>>> PS, I will be taking off from today and for the next two weeks. Will try
>>>> to help out looking more closely after I get back.
>>>>
>>>> -Siwei
>>>>>     Can't merge patches which are known to break things ...
+ qemu-system-x86_64 -no-user-config -nodefaults -machine type=pc-q35-8.0,dump-guest-core=off,accel=kvm,kernel_irqchip=split -cpu EPYC -smp 8,sockets=1,cores=8,threads=1 -m size=8G,slots=8,maxmem=256G -qmp unix:/tmp/q,server,nowait -serial mon:stdio -drive file=/usr/share/OVMF/OVMF_CODE.pure-efi.fd,index=0,if=pflash,format=raw,readonly=on -drive file=/usr/share/OVMF/OVMF_VARS.pure-efi.fd,index=1,if=pflash,format=raw -drive id=disk0,if=none,format=qcow2,file=/root/vdpa/tools/images/VM-el7.9-uefi-x86_64.qcow2 -device virtio-blk-pci,drive=disk0 -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0, -device virtio-net-pci,netdev=vhost-vdpa0,id=vdpa0,bootindex=-1,page-per-vq=on,mac=e4:11:c6:d3:45:f0,ctrl_vq=on,mq=on,ctrl_vlan=off,vectors=6,host_mtu=9000,mrg_rxbuf=on,rx_queue_size=1024,disable-legacy=on -msg timestamp=on --trace events=/tmp/e
9371@1712120096.231743:vhost_vdpa_init dev: 0x5641ebbea650, common dev: 0x5641eb972fd0 vdpa: 0x7f6a968d7190
9371@1712120096.231769:vhost_vdpa_get_features dev: 0x5641ebbea650 features: 0x300c3982b
9371@1712120096.231774:vhost_vdpa_add_status dev: 0x5641ebbea650 status: 0x3
9371@1712120096.231781:vhost_vdpa_set_owner dev: 0x5641ebbea650
9371@1712120096.231788:vhost_vdpa_get_features dev: 0x5641ebbea650 features: 0x300c3982b
9371@1712120096.231791:vhost_vdpa_memslots_limit dev: 0x5641ebbea650 = 0x7fffffff
9371@1712120096.231795:vhost_vdpa_get_vq_index dev: 0x5641ebbea650 idx: 0 vq idx: 0
9371@1712120096.231802:vhost_vdpa_set_vring_call dev: 0x5641ebbea650 index: 0 fd: 17
9371@1712120096.231807:vhost_vdpa_get_vq_index dev: 0x5641ebbea650 idx: 1 vq idx: 1
9371@1712120096.231812:vhost_vdpa_set_vring_call dev: 0x5641ebbea650 index: 1 fd: 18
9371@1712120096.231819:vhost_vdpa_get_device_id dev: 0x5641ebbea650 device_id 1
9371@1712120096.231832:vhost_vdpa_init dev: 0x5641ebbeab70, common dev: 0x5641eb972fd0 vdpa: 0x7f6a9688e190
9371@1712120096.231837:vhost_vdpa_get_features dev: 0x5641ebbeab70 features: 0x300c3982b
9371@1712120096.231840:vhost_vdpa_memslots_limit dev: 0x5641ebbeab70 = 0x7fffffff
9371@1712120096.231843:vhost_vdpa_get_vq_index dev: 0x5641ebbeab70 idx: 0 vq idx: 0
9371@1712120096.231847:vhost_vdpa_set_vring_call dev: 0x5641ebbeab70 index: 0 fd: 19
9371@1712120096.231851:vhost_vdpa_get_vq_index dev: 0x5641ebbeab70 idx: 1 vq idx: 1
9371@1712120096.231856:vhost_vdpa_set_vring_call dev: 0x5641ebbeab70 index: 1 fd: 20
9371@1712120096.231860:vhost_vdpa_get_device_id dev: 0x5641ebbeab70 device_id 1
9371@1712120096.242481:vhost_vdpa_init dev: 0x5641ebbeb090, common dev: 0x5641eb972fd0 vdpa: 0x7f6a96845190
9371@1712120096.242491:vhost_vdpa_get_features dev: 0x5641ebbeb090 features: 0x300c3982b
9371@1712120096.242494:vhost_vdpa_memslots_limit dev: 0x5641ebbeb090 = 0x7fffffff
9371@1712120096.242497:vhost_vdpa_get_vq_index dev: 0x5641ebbeb090 idx: 0 vq idx: 0
9371@1712120096.242502:vhost_vdpa_set_vring_call dev: 0x5641ebbeb090 index: 0 fd: 21
9371@1712120096.242507:vhost_vdpa_get_device_id dev: 0x5641ebbeb090 device_id 1
9371@1712120096.246335:vhost_vdpa_listener_region_add vdpa: 0x5641eb972fd0 iova 0x0 llend 0x80000000 vaddr: 0x7f687fe00000 read-only: 0
9371@1712120096.246346:vhost_vdpa_listener_begin_batch vdpa_shared:0x5641eb972fd0 fd: 16 msg_type: 2 type: 5
9371@1712120096.246352:vhost_vdpa_dma_map vdpa_shared:0x5641eb972fd0 fd: 16 msg_type: 2 asid: 0 iova: 0x0 size: 0x80000000 uaddr: 0x7f687fe00000 perm: 0x3 type: 2
9371@1712120096.422415:vhost_vdpa_listener_commit vdpa_shared:0x5641eb972fd0 fd: 16 msg_type: 2 type: 6
9371@1712120096.424414:vhost_vdpa_listener_region_add vdpa: 0x5641eb972fd0 iova 0x100000000 llend 0x280000000 vaddr: 0x7f68ffe00000 read-only: 0
9371@1712120096.424424:vhost_vdpa_listener_begin_batch vdpa_shared:0x5641eb972fd0 fd: 16 msg_type: 2 type: 5
9371@1712120096.424429:vhost_vdpa_dma_map vdpa_shared:0x5641eb972fd0 fd: 16 msg_type: 2 asid: 0 iova: 0x100000000 size: 0x180000000 uaddr: 0x7f68ffe00000 perm: 0x3 type: 2
9371@1712120096.959565:vhost_vdpa_listener_commit vdpa_shared:0x5641eb972fd0 fd: 16 msg_type: 2 type: 6
9371@1712120096.969950:vhost_vdpa_listener_region_add vdpa: 0x5641eb972fd0 iova 0xfeda0000 llend 0xfedc0000 vaddr: 0x7f687fea0000 read-only: 0
9371@1712120096.969962:vhost_vdpa_listener_begin_batch vdpa_shared:0x5641eb972fd0 fd: 16 msg_type: 2 type: 5
9371@1712120096.969967:vhost_vdpa_dma_map vdpa_shared:0x5641eb972fd0 fd: 16 msg_type: 2 asid: 0 iova: 0xfeda0000 size: 0x20000 uaddr: 0x7f687fea0000 perm: 0x3 type: 2
9371@1712120096.969992:vhost_vdpa_listener_commit vdpa_shared:0x5641eb972fd0 fd: 16 msg_type: 2 type: 6
9371@1712120096.976797:vhost_vdpa_listener_region_del vdpa: 0x5641eb972fd0 iova 0xfeda0000 llend 0xfedbffff
9371@1712120096.976807:vhost_vdpa_listener_begin_batch vdpa_shared:0x5641eb972fd0 fd: 16 msg_type: 2 type: 5
9371@1712120096.976811:vhost_vdpa_dma_unmap vdpa_shared:0x5641eb972fd0 fd: 16 msg_type: 2 asid: 0 iova: 0xfeda0000 size: 0x20000 type: 3
9371@1712120096.976833:vhost_vdpa_listener_commit vdpa_shared:0x5641eb972fd0 fd: 16 msg_type: 2 type: 6
9371@1712120096.991764:vhost_vdpa_set_config dev: 0x5641ebbea650 offset: 0 size: 6 flags: 0x0
9371@1712120096.991779:vhost_vdpa_dump_config dev: 0x5641ebbea650 0000:  e4 11 c6 d3  45 f0                                
9371@1712120096.997715:vhost_vdpa_listener_region_del vdpa: 0x5641eb972fd0 iova 0x0 llend 0x7fffffff
9371@1712120096.997728:vhost_vdpa_listener_begin_batch vdpa_shared:0x5641eb972fd0 fd: 16 msg_type: 2 type: 5
9371@1712120096.997734:vhost_vdpa_dma_unmap vdpa_shared:0x5641eb972fd0 fd: 16 msg_type: 2 asid: 0 iova: 0x0 size: 0x80000000 type: 3
9371@1712120097.053665:vhost_vdpa_listener_region_add vdpa: 0x5641eb972fd0 iova 0x0 llend 0xc0000 vaddr: 0x7f687fe00000 read-only: 0
9371@1712120097.053708:vhost_vdpa_dma_map vdpa_shared:0x5641eb972fd0 fd: 16 msg_type: 2 asid: 0 iova: 0x0 size: 0xc0000 uaddr: 0x7f687fe00000 perm: 0x3 type: 2
9371@1712120097.053758:vhost_vdpa_listener_region_add vdpa: 0x5641eb972fd0 iova 0x100000 llend 0x80000000 vaddr: 0x7f687ff00000 read-only: 0
9371@1712120097.053761:vhost_vdpa_dma_map vdpa_shared:0x5641eb972fd0 fd: 16 msg_type: 2 asid: 0 iova: 0x100000 size: 0x7ff00000 uaddr: 0x7f687ff00000 perm: 0x3 type: 2
9371@1712120097.065117:vhost_vdpa_listener_commit vdpa_shared:0x5641eb972fd0 fd: 16 msg_type: 2 type: 6
9377@1712120099.655612:vhost_vdpa_get_config dev: 0x5641ebbea650 config: 0x7f6a851ff460 config_len: 12
9377@1712120099.655656:vhost_vdpa_dump_config dev: 0x5641ebbea650 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
9377@1712120099.655666:vhost_vdpa_get_config dev: 0x5641ebbea650 config: 0x7f6a851ff460 config_len: 12
9377@1712120099.655672:vhost_vdpa_dump_config dev: 0x5641ebbea650 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
9377@1712120099.655681:vhost_vdpa_get_config dev: 0x5641ebbea650 config: 0x7f6a851ff460 config_len: 12
9377@1712120099.655686:vhost_vdpa_dump_config dev: 0x5641ebbea650 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
9377@1712120099.655695:vhost_vdpa_get_config dev: 0x5641ebbea650 config: 0x7f6a851ff460 config_len: 12
9377@1712120099.655700:vhost_vdpa_dump_config dev: 0x5641ebbea650 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
9377@1712120099.655709:vhost_vdpa_get_config dev: 0x5641ebbea650 config: 0x7f6a851ff460 config_len: 12
9377@1712120099.655714:vhost_vdpa_dump_config dev: 0x5641ebbea650 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
9377@1712120099.655723:vhost_vdpa_get_config dev: 0x5641ebbea650 config: 0x7f6a851ff460 config_len: 12
9377@1712120099.655728:vhost_vdpa_dump_config dev: 0x5641ebbea650 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
9377@1712120099.655736:vhost_vdpa_get_config dev: 0x5641ebbea650 config: 0x7f6a851ff460 config_len: 12
9377@1712120099.655742:vhost_vdpa_dump_config dev: 0x5641ebbea650 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
9377@1712120099.655750:vhost_vdpa_get_config dev: 0x5641ebbea650 config: 0x7f6a851ff460 config_len: 12
9377@1712120099.655756:vhost_vdpa_dump_config dev: 0x5641ebbea650 0000:  e4 11 c6 d3  45 f0 01 00  02 00 28 23             
9377@1712120099.658600:vhost_vdpa_get_vq_index dev: 0x5641ebbea650 idx: 0 vq idx: 0
9377@1712120099.658612:vhost_vdpa_set_vring_call dev: 0x5641ebbea650 index: 0 fd: 43
9377@1712120099.658631:vhost_vdpa_get_vq_index dev: 0x5641ebbea650 idx: 1 vq idx: 1
9377@1712120099.658635:vhost_vdpa_set_vring_call dev: 0x5641ebbea650 index: 1 fd: 44
9377@1712120099.658753:vhost_vdpa_set_features dev: 0x5641ebbea650 features: 0x300000008
9377@1712120099.658758:vhost_vdpa_add_status dev: 0x5641ebbea650 status: 0x8
9377@1712120099.658764:vhost_vdpa_set_mem_table dev: 0x5641ebbea650 nregions: 3 padding: 0x0
9377@1712120099.658767:vhost_vdpa_dump_regions dev: 0x5641ebbea650 0: guest_phys_addr: 0x0 memory_size: 0xc0000 userspace_addr: 0x7f687fe00000 flags_padding: 0x0
9377@1712120099.658771:vhost_vdpa_dump_regions dev: 0x5641ebbea650 1: guest_phys_addr: 0x100000 memory_size: 0x7ff00000 userspace_addr: 0x7f687ff00000 flags_padding: 0x0
9377@1712120099.658774:vhost_vdpa_dump_regions dev: 0x5641ebbea650 2: guest_phys_addr: 0x100000000 memory_size: 0x180000000 userspace_addr: 0x7f68ffe00000 flags_padding: 0x0
9377@1712120099.658777:vhost_vdpa_get_vq_index dev: 0x5641ebbea650 idx: 0 vq idx: 0
9377@1712120099.658781:vhost_vdpa_set_vring_num dev: 0x5641ebbea650 index: 0 num: 256
9377@1712120099.658786:vhost_vdpa_set_vring_base dev: 0x5641ebbea650 index: 0 num: 0
9377@1712120099.658792:vhost_vdpa_vq_get_addr dev: 0x5641ebbea650 vq: 0x5641ebbea8d0 desc_user_addr: 0x7e45b000 avail_user_addr: 0x7e45c000 used_user_addr: 0x7e45d000
9377@1712120099.658796:vhost_vdpa_set_vring_addr dev: 0x5641ebbea650 index: 0 flags: 0x0 desc_user_addr: 0x7e45b000 used_user_addr: 0x7e45d000 avail_user_addr: 0x7e45c000 log_guest_addr: 0x7e45d000
9377@1712120099.658800:vhost_vdpa_set_vring_kick dev: 0x5641ebbea650 index: 0 fd: 46
9377@1712120099.658807:vhost_vdpa_get_vq_index dev: 0x5641ebbea650 idx: 1 vq idx: 1
9377@1712120099.658810:vhost_vdpa_set_vring_num dev: 0x5641ebbea650 index: 1 num: 256
9377@1712120099.658814:vhost_vdpa_set_vring_base dev: 0x5641ebbea650 index: 1 num: 0
9377@1712120099.658818:vhost_vdpa_vq_get_addr dev: 0x5641ebbea650 vq: 0x5641ebbea950 desc_user_addr: 0x7e45f000 avail_user_addr: 0x7e460000 used_user_addr: 0x7e461000
9377@1712120099.658821:vhost_vdpa_set_vring_addr dev: 0x5641ebbea650 index: 1 flags: 0x0 desc_user_addr: 0x7e45f000 used_user_addr: 0x7e461000 avail_user_addr: 0x7e460000 log_guest_addr: 0x7e461000
9377@1712120099.658825:vhost_vdpa_set_vring_kick dev: 0x5641ebbea650 index: 1 fd: 47
9377@1712120099.658833:vhost_vdpa_dev_start dev: 0x5641ebbea650 started: 1
9377@1712120099.659208:vhost_vdpa_set_mem_table dev: 0x5641ebbea650 nregions: 5 padding: 0x0
9377@1712120099.659217:vhost_vdpa_dump_regions dev: 0x5641ebbea650 0: guest_phys_addr: 0x0 memory_size: 0xc0000 userspace_addr: 0x7f687fe00000 flags_padding: 0x0
9377@1712120099.659220:vhost_vdpa_dump_regions dev: 0x5641ebbea650 1: guest_phys_addr: 0x100000 memory_size: 0x7ff00000 userspace_addr: 0x7f687ff00000 flags_padding: 0x0
9377@1712120099.659224:vhost_vdpa_dump_regions dev: 0x5641ebbea650 2: guest_phys_addr: 0x100000000 memory_size: 0x180000000 userspace_addr: 0x7f68ffe00000 flags_padding: 0x0
9377@1712120099.659227:vhost_vdpa_dump_regions dev: 0x5641ebbea650 3: guest_phys_addr: 0x4800003000 memory_size: 0x1000 userspace_addr: 0x7f6a96aa7000 flags_padding: 0x0
9377@1712120099.659230:vhost_vdpa_dump_regions dev: 0x5641ebbea650 4: guest_phys_addr: 0x4800004000 memory_size: 0x1000 userspace_addr: 0x7f6a96a39000 flags_padding: 0x0
9377@1712120099.659312:vhost_vdpa_add_status dev: 0x5641ebbea650 status: 0x4
9377@1712120099.696381:vhost_vdpa_set_config_call dev: 0x5641ebbea650 fd: 45
9377@1712120099.696551:vhost_vdpa_set_vring_ready dev: 0x5641ebbea650, idx: 0, r: 0
9377@1712120099.696644:vhost_vdpa_set_vring_ready dev: 0x5641ebbea650, idx: 1, r: 0
Eugenio Perez Martin April 3, 2024, 8:46 a.m. UTC | #10
On Wed, Apr 3, 2024 at 8:53 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 4/2/2024 5:01 AM, Eugenio Perez Martin wrote:
> > On Tue, Apr 2, 2024 at 8:19 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >>
> >>
> >> On 2/14/2024 11:11 AM, Eugenio Perez Martin wrote:
> >>> On Wed, Feb 14, 2024 at 7:29 PM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >>>> Hi Michael,
> >>>>
> >>>> On 2/13/2024 2:22 AM, Michael S. Tsirkin wrote:
> >>>>> On Mon, Feb 05, 2024 at 05:10:36PM -0800, Si-Wei Liu wrote:
> >>>>>> Hi Eugenio,
> >>>>>>
> >>>>>> I thought this new code looks good to me and the original issue I saw with
> >>>>>> x-svq=on should be gone. However, after rebase my tree on top of this,
> >>>>>> there's a new failure I found around setting up guest mappings at early
> >>>>>> boot, please see attached the specific QEMU config and corresponding event
> >>>>>> traces. Haven't checked into the detail yet, thinking you would need to be
> >>>>>> aware of ahead.
> >>>>>>
> >>>>>> Regards,
> >>>>>> -Siwei
> >>>>> Eugenio were you able to reproduce? Siwei did you have time to
> >>>>> look into this?
> >>>> Didn't get a chance to look into the detail yet in the past week, but
> >>>> thought it may have something to do with the (internals of) iova tree
> >>>> range allocation and the lookup routine. It started to fall apart at the
> >>>> first vhost_vdpa_dma_unmap call showing up in the trace events, where it
> >>>> should've gotten IOVA=0x2000001000,  but an incorrect IOVA address
> >>>> 0x1000 was ended up returning from the iova tree lookup routine.
> >>>>
> >>>> HVA                    GPA                IOVA
> >>>> -------------------------------------------------------------------------------------------------------------------------
> >>>> Map
> >>>> [0x7f7903e00000, 0x7f7983e00000)    [0x0, 0x80000000) [0x1000, 0x80000000)
> >>>> [0x7f7983e00000, 0x7f9903e00000)    [0x100000000, 0x2080000000)
> >>>> [0x80001000, 0x2000001000)
> >>>> [0x7f7903ea0000, 0x7f7903ec0000)    [0xfeda0000, 0xfedc0000)
> >>>> [0x2000001000, 0x2000021000)
> >>>>
> >>>> Unmap
> >>>> [0x7f7903ea0000, 0x7f7903ec0000)    [0xfeda0000, 0xfedc0000) [0x1000,
> >>>> 0x20000) ???
> >>>>                                        shouldn't it be [0x2000001000,
> >>>> 0x2000021000) ???
> >>>>
> >> It looks the SVQ iova tree lookup routine vhost_iova_tree_find_iova(),
> >> which is called from vhost_vdpa_listener_region_del(), can't properly
> >> deal with overlapped region. Specifically, q35's mch_realize() has the
> >> following:
> >>
> >> 579     memory_region_init_alias(&mch->open_high_smram, OBJECT(mch),
> >> "smram-open-high",
> >> 580                              mch->ram_memory,
> >> MCH_HOST_BRIDGE_SMRAM_C_BASE,
> >> 581                              MCH_HOST_BRIDGE_SMRAM_C_SIZE);
> >> 582     memory_region_add_subregion_overlap(mch->system_memory, 0xfeda0000,
> >> 583 &mch->open_high_smram, 1);
> >> 584     memory_region_set_enabled(&mch->open_high_smram, false);
> >>
> >> #0  0x0000564c30bf6980 in iova_tree_find_address_iterator
> >> (key=0x564c331cf8e0, value=0x564c331cf8e0, data=0x7fffb6d749b0) at
> >> ../util/iova-tree.c:96
> >> #1  0x00007f5f66479654 in g_tree_foreach () at /lib64/libglib-2.0.so.0
> >> #2  0x0000564c30bf6b53 in iova_tree_find_iova (tree=<optimized out>,
> >> map=map@entry=0x7fffb6d74a00) at ../util/iova-tree.c:114
> >> #3  0x0000564c309da0a9 in vhost_iova_tree_find_iova (tree=<optimized
> >> out>, map=map@entry=0x7fffb6d74a00) at ../hw/virtio/vhost-iova-tree.c:70
> >> #4  0x0000564c3085e49d in vhost_vdpa_listener_region_del
> >> (listener=0x564c331024c8, section=0x7fffb6d74aa0) at
> >> ../hw/virtio/vhost-vdpa.c:444
> >> #5  0x0000564c309f4931 in address_space_update_topology_pass
> >> (as=as@entry=0x564c31ab1840 <address_space_memory>,
> >> old_view=old_view@entry=0x564c33364cc0,
> >> new_view=new_view@entry=0x564c333640f0, adding=adding@entry=false) at
> >> ../system/memory.c:977
> >> #6  0x0000564c309f4dcd in address_space_set_flatview (as=0x564c31ab1840
> >> <address_space_memory>) at ../system/memory.c:1079
> >> #7  0x0000564c309f86d0 in memory_region_transaction_commit () at
> >> ../system/memory.c:1132
> >> #8  0x0000564c309f86d0 in memory_region_transaction_commit () at
> >> ../system/memory.c:1117
> >> #9  0x0000564c307cce64 in mch_realize (d=<optimized out>,
> >> errp=<optimized out>) at ../hw/pci-host/q35.c:584
> >>
> >> However, it looks like iova_tree_find_address_iterator() only check if
> >> the translated address (HVA) falls in to the range when trying to locate
> >> the desired IOVA, causing the first DMAMap that happens to overlap in
> >> the translated address (HVA) space to be returned prematurely:
> >>
> >>    89 static gboolean iova_tree_find_address_iterator(gpointer key,
> >> gpointer value,
> >>    90                                                 gpointer data)
> >>    91 {
> >>    :
> >>    :
> >>    99     if (map->translated_addr + map->size < needle->translated_addr ||
> >> 100         needle->translated_addr + needle->size < map->translated_addr) {
> >> 101         return false;
> >> 102     }
> >> 103
> >> 104     args->result = map;
> >> 105     return true;
> >> 106 }
> >>
> >> In the QEMU trace file, it reveals that the first DMAMap as below gets
> >> returned incorrectly instead the second, the latter of which is what the
> >> actual IOVA corresponds to:
> >>
> >> HVA                                                             GPA                                             IOVA
> >> [0x7f7903e00000, 0x7f7983e00000)        [0x0, 0x80000000)                       [0x1000, 0x80001000)
> >> [0x7f7903ea0000, 0x7f7903ec0000)        [0xfeda0000, 0xfedc0000)        [0x2000001000, 0x2000021000)
> >>
> > I think the analysis is totally accurate as no code expects to unmap /
> > map overlapping regions. In particular, vdpa kernel does not expect it
> > either.
>
> Maybe I missed something, but I don't see how vdpa kernel prohibits this
> overlapping case? The same HVA gets remapped under a different GPA or
> IOVA range should be a valid use case it seems? For e.g. I don't see
> mapping failure when x-svq=on was removed from QEMU's vhost-vdpa
> arguments (where you can see in the attached trace, mch_realize with the
> exactly same overlapping region got remapped successfully, and then got
> unmapped right after).
>

I meant to remove part of a map. You can see that vhost_vdpa_pa_unmap
/ vhost_vdpa_va_unmap removes all the map, not only the portion of it,
much like iova_tree does.

> > Since it is issued at _realize, it should be ok to unmap all the
> > region range and then map the right range again, even if that implies
> > a lot of unpin / pin.
> You'll find out mch_realize() already did that -  where line 582 is to
> create a mapping overlapped with entire range, while line 583 is to
> disable (unmap) this same mapping shortly after.
>
> 582     memory_region_add_subregion_overlap(mch->system_memory, 0xfeda0000,
> 583 &mch->open_high_smram, 1);
> 584     memory_region_set_enabled(&mch->open_high_smram, false);
>
> So we don't have to add any special case to deal with these overlapping
> case (just use the GPA as the hint to help identify which iova range it
> tries to unmap), as the vdpa's memory listener now moves upfront to the
> earliest point of time at machine boot, the relevant SVQ code needs to
> be more flexible and tolerate to the extra churns for the rest of QEMU's
> subsystems to stabilize the guest memory layout.

Yes I think the same.

> >> Maybe other than check the HVA range, we should also match GPA, or at
> >> least the size should exactly match?
> >>
> > The safe actions here would be to unmap all the memory chunk and then
> > map the overlap memory? Or am I missing something?
> >
> > Another thing I don't get, is this reproducible in previous versions?
> It is not reproducible without this series. I thought that may be
> because previously the vhost-vdpa's memory listener was registered at a
> later point of time, after all the overlapped regions are gone and the
> memory layout is completely stabilized. Now we move registration to the
> init time, then all the devils come out biting us.
>
> Previous versions of this patch series we didn't get this far yet, when
> x-svq=on is specified.
>

Right, good point.

Thanks!

> https://lore.kernel.org/qemu-devel/92ecfd90-8d06-4669-b260-a7a3b106277e@oracle.com/
> > As far as I understand, this bug was never found before. I guess this
> > path of q35's mch_realize is recent?
> No, it's been there for quite a while.
>
> Cheers,
> -Siwei
> >
> > Thanks!
> >
> >>> Yes, I'm still not able to reproduce. In particular, I don't know how
> >>> how the memory listener adds a region and then release a region with a
> >>> different size. I'm talking about these log entries:
> >>>
> >>> 1706854838.154394:vhost_vdpa_listener_region_add vdpa: 0x556d45c75140
> >>> iova 0x0 llend 0x80000000 vaddr: 0x7f7903e00000 read-only: 0
> >>> 452:vhost_vdpa_listener_region_del vdpa: 0x556d45c75140 iova 0x0 llend
> >>> 0x7fffffff
> >> Didn't see a different size here, though if you referred to the
> >> discrepancy in the traces around llend, I thought the two between _add()
> >> and _del() would have to be interpreted differently due to:
> >>
> >> 3d1e4d34 "vhost_vdpa: fix the input in
> >> trace_vhost_vdpa_listener_region_del()"
> >>
> >> Regards,
> >> -Siwei
> >>> Is it possible for you to also trace the skipped regions? We should
> >>> add a debug trace there too...
> >>>
> >>> Thanks!
> >>>
> >>>> PS, I will be taking off from today and for the next two weeks. Will try
> >>>> to help out looking more closely after I get back.
> >>>>
> >>>> -Siwei
> >>>>>     Can't merge patches which are known to break things ...
diff mbox series

Patch

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index 03ed2f2be3..ad754eb803 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -37,7 +37,21 @@  typedef struct vhost_vdpa_shared {
     struct vhost_vdpa_iova_range iova_range;
     QLIST_HEAD(, vdpa_iommu) iommu_list;
 
-    /* IOVA mapping used by the Shadow Virtqueue */
+    /*
+     * IOVA mapping used by the Shadow Virtqueue
+     *
+     * It is shared among all ASID for simplicity, whether CVQ shares ASID with
+     * guest or not:
+     * - Memory listener need access to guest's memory addresses allocated in
+     *   the IOVA tree.
+     * - There should be plenty of IOVA address space for both ASID not to
+     *   worry about collisions between them.  Guest's translations are still
+     *   validated with virtio virtqueue_pop so there is no risk for the guest
+     *   to access memory that it shouldn't.
+     *
+     * To allocate a iova tree per ASID is doable but it complicates the code
+     * and it is not worth it for the moment.
+     */
     VhostIOVATree *iova_tree;
 
     /* Copy of backend features */
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index cc589dd148..57edcf34d0 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -232,6 +232,7 @@  static void vhost_vdpa_cleanup(NetClientState *nc)
         return;
     }
     qemu_close(s->vhost_vdpa.shared->device_fd);
+    g_clear_pointer(&s->vhost_vdpa.shared->iova_tree, vhost_iova_tree_delete);
     g_free(s->vhost_vdpa.shared);
 }
 
@@ -329,16 +330,8 @@  static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
 
 static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
 {
-    struct vhost_vdpa *v = &s->vhost_vdpa;
-
     migration_add_notifier(&s->migration_state,
                            vdpa_net_migration_state_notifier);
-
-    /* iova_tree may be initialized by vhost_vdpa_net_load_setup */
-    if (v->shadow_vqs_enabled && !v->shared->iova_tree) {
-        v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first,
-                                                   v->shared->iova_range.last);
-    }
 }
 
 static int vhost_vdpa_net_data_start(NetClientState *nc)
@@ -383,19 +376,12 @@  static int vhost_vdpa_net_data_load(NetClientState *nc)
 static void vhost_vdpa_net_client_stop(NetClientState *nc)
 {
     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
-    struct vhost_dev *dev;
 
     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 
     if (s->vhost_vdpa.index == 0) {
         migration_remove_notifier(&s->migration_state);
     }
-
-    dev = s->vhost_vdpa.dev;
-    if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
-        g_clear_pointer(&s->vhost_vdpa.shared->iova_tree,
-                        vhost_iova_tree_delete);
-    }
 }
 
 static NetClientInfo net_vhost_vdpa_info = {
@@ -557,24 +543,6 @@  out:
         return 0;
     }
 
-    /*
-     * If other vhost_vdpa already have an iova_tree, reuse it for simplicity,
-     * whether CVQ shares ASID with guest or not, because:
-     * - Memory listener need access to guest's memory addresses allocated in
-     *   the IOVA tree.
-     * - There should be plenty of IOVA address space for both ASID not to
-     *   worry about collisions between them.  Guest's translations are still
-     *   validated with virtio virtqueue_pop so there is no risk for the guest
-     *   to access memory that it shouldn't.
-     *
-     * To allocate a iova tree per ASID is doable but it complicates the code
-     * and it is not worth it for the moment.
-     */
-    if (!v->shared->iova_tree) {
-        v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first,
-                                                   v->shared->iova_range.last);
-    }
-
     r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
                                vhost_vdpa_net_cvq_cmd_page_len(), false);
     if (unlikely(r < 0)) {
@@ -1674,6 +1642,8 @@  static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
         s->vhost_vdpa.shared->device_fd = vdpa_device_fd;
         s->vhost_vdpa.shared->iova_range = iova_range;
         s->vhost_vdpa.shared->shadow_data = svq;
+        s->vhost_vdpa.shared->iova_tree = vhost_iova_tree_new(iova_range.first,
+                                                              iova_range.last);
     } else if (!is_datapath) {
         s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
                                      PROT_READ | PROT_WRITE,