diff mbox series

[for,8.1,v2,5/6] vdpa: move CVQ isolation check to net_init_vhost_vdpa

Message ID 20230323195404.1247326-6-eperezma@redhat.com (mailing list archive)
State New, archived
Headers show
Series Move ASID test to vhost-vdpa net initialization | expand

Commit Message

Eugenio Perez Martin March 23, 2023, 7:54 p.m. UTC
Evaluating it at start time instead of initialization time may make the
guest capable of dynamically adding or removing migration blockers.

Also, moving to initialization reduces the number of ioctls in the
migration, reducing failure possibilities.

As a drawback we need to check for CVQ isolation twice: one time with no
MQ negotiated and another one acking it, as long as the device supports
it.  This is because Vring ASID / group management is based on vq
indexes, but we don't know the index of CVQ before negotiating MQ.

Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
v2: Take out the reset of the device from vhost_vdpa_cvq_is_isolated
---
 net/vhost-vdpa.c | 194 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 151 insertions(+), 43 deletions(-)

Comments

Stefano Garzarella March 27, 2023, 9:36 a.m. UTC | #1
On Thu, Mar 23, 2023 at 08:54:03PM +0100, Eugenio Pérez wrote:
>Evaluating it at start time instead of initialization time may make the
>guest capable of dynamically adding or removing migration blockers.
>
>Also, moving to initialization reduces the number of ioctls in the
>migration, reducing failure possibilities.
>
>As a drawback we need to check for CVQ isolation twice: one time with no
>MQ negotiated and another one acking it, as long as the device supports
>it.  This is because Vring ASID / group management is based on vq
>indexes, but we don't know the index of CVQ before negotiating MQ.

I don't know this code sufficiently to do a review, but now I understand
the motivation behind it ;-)

Thanks,
Stefano

>
>Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
>---
>v2: Take out the reset of the device from vhost_vdpa_cvq_is_isolated
>---
> net/vhost-vdpa.c | 194 ++++++++++++++++++++++++++++++++++++-----------
> 1 file changed, 151 insertions(+), 43 deletions(-)
>
>diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
>index 4397c0d4b3..db2c9afcb3 100644
>--- a/net/vhost-vdpa.c
>+++ b/net/vhost-vdpa.c
>@@ -43,6 +43,13 @@ typedef struct VhostVDPAState {
>
>     /* The device always have SVQ enabled */
>     bool always_svq;
>+
>+    /* The device can isolate CVQ in its own ASID if MQ is negotiated */
>+    bool cvq_isolated_mq;
>+
>+    /* The device can isolate CVQ in its own ASID if MQ is not negotiated */
>+    bool cvq_isolated;
>+
>     bool started;
> } VhostVDPAState;
>
>@@ -361,15 +368,8 @@ static NetClientInfo net_vhost_vdpa_info = {
>         .check_peer_type = vhost_vdpa_check_peer_type,
> };
>
>-/**
>- * Get vring virtqueue group
>- *
>- * @device_fd  vdpa device fd
>- * @vq_index   Virtqueue index
>- *
>- * Return -errno in case of error, or vq group if success.
>- */
>-static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index)
>+static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index,
>+                                          Error **errp)
> {
>     struct vhost_vring_state state = {
>         .index = vq_index,
>@@ -378,8 +378,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index)
>
>     if (unlikely(r < 0)) {
>         r = -errno;
>-        error_report("Cannot get VQ %u group: %s", vq_index,
>-                     g_strerror(errno));
>+        error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index);
>         return r;
>     }
>
>@@ -479,9 +478,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
> {
>     VhostVDPAState *s, *s0;
>     struct vhost_vdpa *v;
>-    uint64_t backend_features;
>     int64_t cvq_group;
>-    int cvq_index, r;
>+    int r;
>+    Error *err = NULL;
>
>     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>
>@@ -501,42 +500,29 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
>     /*
>      * If we early return in these cases SVQ will not be enabled. The migration
>      * will be blocked as long as vhost-vdpa backends will not offer _F_LOG.
>-     *
>-     * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev
>-     * yet.
>      */
>-    r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features);
>-    if (unlikely(r < 0)) {
>-        error_report("Cannot get vdpa backend_features: %s(%d)",
>-            g_strerror(errno), errno);
>-        return -1;
>-    }
>-    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) ||
>-        !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
>+    if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
>         return 0;
>     }
>
>-    /*
>-     * Check if all the virtqueues of the virtio device are in a different vq
>-     * than the last vq. VQ group of last group passed in cvq_group.
>-     */
>-    cvq_index = v->dev->vq_index_end - 1;
>-    cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index);
>-    if (unlikely(cvq_group < 0)) {
>-        return cvq_group;
>-    }
>-    for (int i = 0; i < cvq_index; ++i) {
>-        int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i);
>-
>-        if (unlikely(group < 0)) {
>-            return group;
>+    if (v->dev->features & BIT_ULL(VIRTIO_NET_F_MQ)) {
>+        if (!s->cvq_isolated_mq) {
>+            return 0;
>         }
>-
>-        if (group == cvq_group) {
>+    } else {
>+        if (!s->cvq_isolated) {
>             return 0;
>         }
>     }
>
>+    cvq_group = vhost_vdpa_get_vring_group(v->device_fd,
>+                                           v->dev->vq_index_end - 1,
>+                                           &err);
>+    if (unlikely(cvq_group < 0)) {
>+        error_report_err(err);
>+        return cvq_group;
>+    }
>+
>     r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID);
>     if (unlikely(r < 0)) {
>         return r;
>@@ -798,6 +784,116 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
>     .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
> };
>
>+/**
>+ * Probe the device to check control virtqueue is isolated.
>+ *
>+ * @device_fd vhost-vdpa file descriptor
>+ * @features features to negotiate
>+ * @cvq_index Control vq index
>+ *
>+ * Returns -1 in case of error, 0 if false and 1 if true
>+ */
>+static int vhost_vdpa_cvq_is_isolated(int device_fd, uint64_t features,
>+                                      unsigned cvq_index, Error **errp)
>+{
>+    int64_t cvq_group;
>+    int r;
>+
>+    r = vhost_vdpa_set_dev_features_fd(device_fd, features);
>+    if (unlikely(r < 0)) {
>+        error_setg_errno(errp, -r, "Cannot set device features");
>+        return r;
>+    }
>+
>+    cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp);
>+    if (unlikely(cvq_group < 0)) {
>+        return cvq_group;
>+    }
>+
>+    for (int i = 0; i < cvq_index; ++i) {
>+        int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp);
>+
>+        if (unlikely(group < 0)) {
>+            return group;
>+        }
>+
>+        if (group == (int64_t)cvq_group) {
>+            return 0;
>+        }
>+    }
>+
>+    return 1;
>+}
>+
>+/**
>+ * Probe if CVQ is isolated when the device is MQ and when it is not MQ
>+ *
>+ * @device_fd         The vdpa device fd
>+ * @features          Features offered by the device.
>+ * @cvq_index         The control vq index if mq is negotiated. Ignored
>+ *                    otherwise.
>+ * @cvq_isolated      It'll be set to true if cvq is isolated if mq is not
>+ *                    negotiated.
>+ * @cvq_isolated_mq   It'll be set to true if cvq is isolated if mq is
>+ *                    negotiated.
>+ *
>+ * Returns -1 in case of failure
>+ */
>+static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features,
>+                                          int cvq_index, bool *cvq_isolated,
>+                                          bool *cvq_isolated_mq, Error **errp)
>+{
>+    uint64_t backend_features;
>+    int r;
>+
>+    ERRP_GUARD();
>+
>+    *cvq_isolated = false;
>+    *cvq_isolated_mq = false;
>+    r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features);
>+    if (unlikely(r < 0)) {
>+        error_setg_errno(errp, errno, "Cannot get vdpa backend_features");
>+        return r;
>+    }
>+
>+    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
>+        return 0;
>+    }
>+
>+    r = vhost_vdpa_cvq_is_isolated(device_fd,
>+                                   features & ~BIT_ULL(VIRTIO_NET_F_MQ), 2,
>+                                   errp);
>+    if (unlikely(r < 0)) {
>+        if (r == -ENOTSUP) {
>+            /*
>+             * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa
>+             * frontend support ASID but the parent driver does not.  The CVQ
>+             * cannot be isolated in this case.
>+             */
>+            error_free(*errp);
>+            *errp = NULL;
>+            return 0;
>+        }
>+
>+        return r;
>+    }
>+
>+    *cvq_isolated = r == 1;
>+    vhost_vdpa_reset_status_fd(device_fd);
>+    if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
>+        return 0;
>+    }
>+
>+    r = vhost_vdpa_cvq_is_isolated(device_fd, features, cvq_index * 2, errp);
>+    if (unlikely(r < 0)) {
>+        return r;
>+    }
>+
>+    *cvq_isolated_mq = r == 1;
>+    vhost_vdpa_reset_status_fd(device_fd);
>+    return 0;
>+}
>+
> static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
>                                        const char *device,
>                                        const char *name,
>@@ -807,16 +903,26 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
>                                        bool is_datapath,
>                                        bool svq,
>                                        struct vhost_vdpa_iova_range iova_range,
>-                                       uint64_t features)
>+                                       uint64_t features,
>+                                       Error **errp)
> {
>     NetClientState *nc = NULL;
>     VhostVDPAState *s;
>     int ret = 0;
>     assert(name);
>+    bool cvq_isolated, cvq_isolated_mq;
>+
>     if (is_datapath) {
>         nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
>                                  name);
>     } else {
>+        ret = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features,
>+                                             queue_pair_index, &cvq_isolated,
>+                                             &cvq_isolated_mq, errp);
>+        if (unlikely(ret)) {
>+            return NULL;
>+        }
>+
>         nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
>                                          device, name);
>     }
>@@ -843,6 +949,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
>
>         s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
>         s->vhost_vdpa.shadow_vq_ops_opaque = s;
>+        s->cvq_isolated = cvq_isolated;
>+        s->cvq_isolated_mq = cvq_isolated_mq;
>
>         /*
>          * TODO: We cannot migrate devices with CVQ as there is no way to set
>@@ -971,7 +1079,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
>     for (i = 0; i < queue_pairs; i++) {
>         ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
>                                      vdpa_device_fd, i, 2, true, opts->x_svq,
>-                                     iova_range, features);
>+                                     iova_range, features, errp);
>         if (!ncs[i])
>             goto err;
>     }
>@@ -979,7 +1087,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
>     if (has_cvq) {
>         nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
>                                  vdpa_device_fd, i, 1, false,
>-                                 opts->x_svq, iova_range, features);
>+                                 opts->x_svq, iova_range, features, errp);
>         if (!nc)
>             goto err;
>     }
>-- 
>2.31.1
>
Jason Wang March 30, 2023, 6:20 a.m. UTC | #2
On Fri, Mar 24, 2023 at 3:54 AM Eugenio Pérez <eperezma@redhat.com> wrote:
>
> Evaluating it at start time instead of initialization time may make the
> guest capable of dynamically adding or removing migration blockers.
>
> Also, moving to initialization reduces the number of ioctls in the
> migration, reducing failure possibilities.
>
> As a drawback we need to check for CVQ isolation twice: one time with no
> MQ negotiated and another one acking it, as long as the device supports
> it.  This is because Vring ASID / group management is based on vq
> indexes, but we don't know the index of CVQ before negotiating MQ.

We need to fail if we see a device that can isolate cvq without MQ but
not with MQ.

>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
> v2: Take out the reset of the device from vhost_vdpa_cvq_is_isolated
> ---
>  net/vhost-vdpa.c | 194 ++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 151 insertions(+), 43 deletions(-)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 4397c0d4b3..db2c9afcb3 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -43,6 +43,13 @@ typedef struct VhostVDPAState {
>
>      /* The device always have SVQ enabled */
>      bool always_svq;
> +
> +    /* The device can isolate CVQ in its own ASID if MQ is negotiated */
> +    bool cvq_isolated_mq;
> +
> +    /* The device can isolate CVQ in its own ASID if MQ is not negotiated */
> +    bool cvq_isolated;

As stated above, if we need a device that cvq_isolated_mq^cvq_isolated
== true, we need to fail. This may reduce the complexity of the code?

Thanks

> +
>      bool started;
>  } VhostVDPAState;
>
> @@ -361,15 +368,8 @@ static NetClientInfo net_vhost_vdpa_info = {
>          .check_peer_type = vhost_vdpa_check_peer_type,
>  };
>
> -/**
> - * Get vring virtqueue group
> - *
> - * @device_fd  vdpa device fd
> - * @vq_index   Virtqueue index
> - *
> - * Return -errno in case of error, or vq group if success.
> - */
> -static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index)
> +static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index,
> +                                          Error **errp)
>  {
>      struct vhost_vring_state state = {
>          .index = vq_index,
> @@ -378,8 +378,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index)
>
>      if (unlikely(r < 0)) {
>          r = -errno;
> -        error_report("Cannot get VQ %u group: %s", vq_index,
> -                     g_strerror(errno));
> +        error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index);
>          return r;
>      }
>
> @@ -479,9 +478,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
>  {
>      VhostVDPAState *s, *s0;
>      struct vhost_vdpa *v;
> -    uint64_t backend_features;
>      int64_t cvq_group;
> -    int cvq_index, r;
> +    int r;
> +    Error *err = NULL;
>
>      assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>
> @@ -501,42 +500,29 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
>      /*
>       * If we early return in these cases SVQ will not be enabled. The migration
>       * will be blocked as long as vhost-vdpa backends will not offer _F_LOG.
> -     *
> -     * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev
> -     * yet.
>       */
> -    r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features);
> -    if (unlikely(r < 0)) {
> -        error_report("Cannot get vdpa backend_features: %s(%d)",
> -            g_strerror(errno), errno);
> -        return -1;
> -    }
> -    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) ||
> -        !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
> +    if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
>          return 0;
>      }
>
> -    /*
> -     * Check if all the virtqueues of the virtio device are in a different vq
> -     * than the last vq. VQ group of last group passed in cvq_group.
> -     */
> -    cvq_index = v->dev->vq_index_end - 1;
> -    cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index);
> -    if (unlikely(cvq_group < 0)) {
> -        return cvq_group;
> -    }
> -    for (int i = 0; i < cvq_index; ++i) {
> -        int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i);
> -
> -        if (unlikely(group < 0)) {
> -            return group;
> +    if (v->dev->features & BIT_ULL(VIRTIO_NET_F_MQ)) {
> +        if (!s->cvq_isolated_mq) {
> +            return 0;
>          }
> -
> -        if (group == cvq_group) {
> +    } else {
> +        if (!s->cvq_isolated) {
>              return 0;
>          }
>      }
>
> +    cvq_group = vhost_vdpa_get_vring_group(v->device_fd,
> +                                           v->dev->vq_index_end - 1,
> +                                           &err);
> +    if (unlikely(cvq_group < 0)) {
> +        error_report_err(err);
> +        return cvq_group;
> +    }
> +
>      r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID);
>      if (unlikely(r < 0)) {
>          return r;
> @@ -798,6 +784,116 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
>      .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
>  };
>
> +/**
> + * Probe the device to check control virtqueue is isolated.
> + *
> + * @device_fd vhost-vdpa file descriptor
> + * @features features to negotiate
> + * @cvq_index Control vq index
> + *
> + * Returns -1 in case of error, 0 if false and 1 if true
> + */
> +static int vhost_vdpa_cvq_is_isolated(int device_fd, uint64_t features,
> +                                      unsigned cvq_index, Error **errp)
> +{
> +    int64_t cvq_group;
> +    int r;
> +
> +    r = vhost_vdpa_set_dev_features_fd(device_fd, features);
> +    if (unlikely(r < 0)) {
> +        error_setg_errno(errp, -r, "Cannot set device features");
> +        return r;
> +    }
> +
> +    cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp);
> +    if (unlikely(cvq_group < 0)) {
> +        return cvq_group;
> +    }
> +
> +    for (int i = 0; i < cvq_index; ++i) {
> +        int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp);
> +
> +        if (unlikely(group < 0)) {
> +            return group;
> +        }
> +
> +        if (group == (int64_t)cvq_group) {
> +            return 0;
> +        }
> +    }
> +
> +    return 1;
> +}
> +
> +/**
> + * Probe if CVQ is isolated when the device is MQ and when it is not MQ
> + *
> + * @device_fd         The vdpa device fd
> + * @features          Features offered by the device.
> + * @cvq_index         The control vq index if mq is negotiated. Ignored
> + *                    otherwise.
> + * @cvq_isolated      It'll be set to true if cvq is isolated if mq is not
> + *                    negotiated.
> + * @cvq_isolated_mq   It'll be set to true if cvq is isolated if mq is
> + *                    negotiated.
> + *
> + * Returns -1 in case of failure
> + */
> +static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features,
> +                                          int cvq_index, bool *cvq_isolated,
> +                                          bool *cvq_isolated_mq, Error **errp)
> +{
> +    uint64_t backend_features;
> +    int r;
> +
> +    ERRP_GUARD();
> +
> +    *cvq_isolated = false;
> +    *cvq_isolated_mq = false;
> +    r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features);
> +    if (unlikely(r < 0)) {
> +        error_setg_errno(errp, errno, "Cannot get vdpa backend_features");
> +        return r;
> +    }
> +
> +    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
> +        return 0;
> +    }
> +
> +    r = vhost_vdpa_cvq_is_isolated(device_fd,
> +                                   features & ~BIT_ULL(VIRTIO_NET_F_MQ), 2,
> +                                   errp);
> +    if (unlikely(r < 0)) {
> +        if (r == -ENOTSUP) {
> +            /*
> +             * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa
> +             * frontend support ASID but the parent driver does not.  The CVQ
> +             * cannot be isolated in this case.
> +             */
> +            error_free(*errp);
> +            *errp = NULL;
> +            return 0;
> +        }
> +
> +        return r;
> +    }
> +
> +    *cvq_isolated = r == 1;
> +    vhost_vdpa_reset_status_fd(device_fd);
> +    if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
> +        return 0;
> +    }
> +
> +    r = vhost_vdpa_cvq_is_isolated(device_fd, features, cvq_index * 2, errp);
> +    if (unlikely(r < 0)) {
> +        return r;
> +    }
> +
> +    *cvq_isolated_mq = r == 1;
> +    vhost_vdpa_reset_status_fd(device_fd);
> +    return 0;
> +}
> +
>  static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
>                                         const char *device,
>                                         const char *name,
> @@ -807,16 +903,26 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
>                                         bool is_datapath,
>                                         bool svq,
>                                         struct vhost_vdpa_iova_range iova_range,
> -                                       uint64_t features)
> +                                       uint64_t features,
> +                                       Error **errp)
>  {
>      NetClientState *nc = NULL;
>      VhostVDPAState *s;
>      int ret = 0;
>      assert(name);
> +    bool cvq_isolated, cvq_isolated_mq;
> +
>      if (is_datapath) {
>          nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
>                                   name);
>      } else {
> +        ret = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features,
> +                                             queue_pair_index, &cvq_isolated,
> +                                             &cvq_isolated_mq, errp);
> +        if (unlikely(ret)) {
> +            return NULL;
> +        }
> +
>          nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
>                                           device, name);
>      }
> @@ -843,6 +949,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
>
>          s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
>          s->vhost_vdpa.shadow_vq_ops_opaque = s;
> +        s->cvq_isolated = cvq_isolated;
> +        s->cvq_isolated_mq = cvq_isolated_mq;
>
>          /*
>           * TODO: We cannot migrate devices with CVQ as there is no way to set
> @@ -971,7 +1079,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
>      for (i = 0; i < queue_pairs; i++) {
>          ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
>                                       vdpa_device_fd, i, 2, true, opts->x_svq,
> -                                     iova_range, features);
> +                                     iova_range, features, errp);
>          if (!ncs[i])
>              goto err;
>      }
> @@ -979,7 +1087,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
>      if (has_cvq) {
>          nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
>                                   vdpa_device_fd, i, 1, false,
> -                                 opts->x_svq, iova_range, features);
> +                                 opts->x_svq, iova_range, features, errp);
>          if (!nc)
>              goto err;
>      }
> --
> 2.31.1
>
Jason Wang March 30, 2023, 6:23 a.m. UTC | #3
On Thu, Mar 30, 2023 at 2:20 PM Jason Wang <jasowang@redhat.com> wrote:
>
> On Fri, Mar 24, 2023 at 3:54 AM Eugenio Pérez <eperezma@redhat.com> wrote:
> >
> > Evaluating it at start time instead of initialization time may make the
> > guest capable of dynamically adding or removing migration blockers.
> >
> > Also, moving to initialization reduces the number of ioctls in the
> > migration, reducing failure possibilities.
> >
> > As a drawback we need to check for CVQ isolation twice: one time with no
> > MQ negotiated and another one acking it, as long as the device supports
> > it.  This is because Vring ASID / group management is based on vq
> > indexes, but we don't know the index of CVQ before negotiating MQ.
>
> We need to fail if we see a device that can isolate cvq without MQ but
> not with MQ.
>
> >
> > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > ---
> > v2: Take out the reset of the device from vhost_vdpa_cvq_is_isolated
> > ---
> >  net/vhost-vdpa.c | 194 ++++++++++++++++++++++++++++++++++++-----------
> >  1 file changed, 151 insertions(+), 43 deletions(-)
> >
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index 4397c0d4b3..db2c9afcb3 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -43,6 +43,13 @@ typedef struct VhostVDPAState {
> >
> >      /* The device always have SVQ enabled */
> >      bool always_svq;
> > +
> > +    /* The device can isolate CVQ in its own ASID if MQ is negotiated */
> > +    bool cvq_isolated_mq;
> > +
> > +    /* The device can isolate CVQ in its own ASID if MQ is not negotiated */
> > +    bool cvq_isolated;
>
> As stated above, if we need a device that cvq_isolated_mq^cvq_isolated
> == true, we need to fail. This may reduce the complexity of the code?
>
> Thanks

Since we are the mediation layer, Qemu can alway choose to negotiate
MQ regardless whether or not it is supported by the guest. In this
way, we can have a stable virtqueue index for cvq.

Thanks
Eugenio Perez Martin March 30, 2023, 10:42 a.m. UTC | #4
On Thu, Mar 30, 2023 at 8:23 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Thu, Mar 30, 2023 at 2:20 PM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Fri, Mar 24, 2023 at 3:54 AM Eugenio Pérez <eperezma@redhat.com> wrote:
> > >
> > > Evaluating it at start time instead of initialization time may make the
> > > guest capable of dynamically adding or removing migration blockers.
> > >
> > > Also, moving to initialization reduces the number of ioctls in the
> > > migration, reducing failure possibilities.
> > >
> > > As a drawback we need to check for CVQ isolation twice: one time with no
> > > MQ negotiated and another one acking it, as long as the device supports
> > > it.  This is because Vring ASID / group management is based on vq
> > > indexes, but we don't know the index of CVQ before negotiating MQ.
> >
> > We need to fail if we see a device that can isolate cvq without MQ but
> > not with MQ.
> >
> > >
> > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > > ---
> > > v2: Take out the reset of the device from vhost_vdpa_cvq_is_isolated
> > > ---
> > >  net/vhost-vdpa.c | 194 ++++++++++++++++++++++++++++++++++++-----------
> > >  1 file changed, 151 insertions(+), 43 deletions(-)
> > >
> > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > index 4397c0d4b3..db2c9afcb3 100644
> > > --- a/net/vhost-vdpa.c
> > > +++ b/net/vhost-vdpa.c
> > > @@ -43,6 +43,13 @@ typedef struct VhostVDPAState {
> > >
> > >      /* The device always have SVQ enabled */
> > >      bool always_svq;
> > > +
> > > +    /* The device can isolate CVQ in its own ASID if MQ is negotiated */
> > > +    bool cvq_isolated_mq;
> > > +
> > > +    /* The device can isolate CVQ in its own ASID if MQ is not negotiated */
> > > +    bool cvq_isolated;
> >
> > As stated above, if we need a device that cvq_isolated_mq^cvq_isolated
> > == true, we need to fail. This may reduce the complexity of the code?
> >
> > Thanks
>
> Since we are the mediation layer, Qemu can alway choose to negotiate
> MQ regardless whether or not it is supported by the guest. In this
> way, we can have a stable virtqueue index for cvq.
>

I think it is a great idea and it simplifies this patch somehow.
However, we need something like the queue mapping [1] to do so :).

To double confirm:
* If the device supports MQ, only probe MQ. If not, only probe !MQ.
* Only store cvq_isolated in VhostVDPAState.

Now, if the device does not negotiate MQ but the device supports MQ:
* All the requests to queue 3 must be redirected to the last queue in
the device. That includes set_vq_address, notifiers regions, etc.

I'm totally ok to go this route but it's not immediate.

Thanks!

[1] https://lists.gnu.org/archive/html/qemu-devel/2023-01/msg07157.html
Jason Wang March 31, 2023, 8 a.m. UTC | #5
在 2023/3/30 18:42, Eugenio Perez Martin 写道:
> On Thu, Mar 30, 2023 at 8:23 AM Jason Wang <jasowang@redhat.com> wrote:
>> On Thu, Mar 30, 2023 at 2:20 PM Jason Wang <jasowang@redhat.com> wrote:
>>> On Fri, Mar 24, 2023 at 3:54 AM Eugenio Pérez <eperezma@redhat.com> wrote:
>>>> Evaluating it at start time instead of initialization time may make the
>>>> guest capable of dynamically adding or removing migration blockers.
>>>>
>>>> Also, moving to initialization reduces the number of ioctls in the
>>>> migration, reducing failure possibilities.
>>>>
>>>> As a drawback we need to check for CVQ isolation twice: one time with no
>>>> MQ negotiated and another one acking it, as long as the device supports
>>>> it.  This is because Vring ASID / group management is based on vq
>>>> indexes, but we don't know the index of CVQ before negotiating MQ.
>>> We need to fail if we see a device that can isolate cvq without MQ but
>>> not with MQ.
>>>
>>>> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
>>>> ---
>>>> v2: Take out the reset of the device from vhost_vdpa_cvq_is_isolated
>>>> ---
>>>>   net/vhost-vdpa.c | 194 ++++++++++++++++++++++++++++++++++++-----------
>>>>   1 file changed, 151 insertions(+), 43 deletions(-)
>>>>
>>>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
>>>> index 4397c0d4b3..db2c9afcb3 100644
>>>> --- a/net/vhost-vdpa.c
>>>> +++ b/net/vhost-vdpa.c
>>>> @@ -43,6 +43,13 @@ typedef struct VhostVDPAState {
>>>>
>>>>       /* The device always have SVQ enabled */
>>>>       bool always_svq;
>>>> +
>>>> +    /* The device can isolate CVQ in its own ASID if MQ is negotiated */
>>>> +    bool cvq_isolated_mq;
>>>> +
>>>> +    /* The device can isolate CVQ in its own ASID if MQ is not negotiated */
>>>> +    bool cvq_isolated;
>>> As stated above, if we need a device that cvq_isolated_mq^cvq_isolated
>>> == true, we need to fail. This may reduce the complexity of the code?
>>>
>>> Thanks
>> Since we are the mediation layer, Qemu can alway choose to negotiate
>> MQ regardless whether or not it is supported by the guest. In this
>> way, we can have a stable virtqueue index for cvq.
>>
> I think it is a great idea and it simplifies this patch somehow.
> However, we need something like the queue mapping [1] to do so :).
>
> To double confirm:
> * If the device supports MQ, only probe MQ. If not, only probe !MQ.
> * Only store cvq_isolated in VhostVDPAState.
>
> Now, if the device does not negotiate MQ but the device supports MQ:


I'm not sure I understand here, if device supports MQ it should accepts 
MQ or we can fail the initialization here.


> * All the requests to queue 3 must be redirected to the last queue in
> the device. That includes set_vq_address, notifiers regions, etc.


This also means we will only mediate the case:

1) Qemu emulated virtio-net has 1 queue but device support multiple queue

but not

2) Qemu emulated virtio-net has M queue but device support N queue (N>M)


>
> I'm totally ok to go this route but it's not immediate.


Yes but I mean, we can start from failing the device if 
cvq_isolated_mq^cvq_isolated == true (or I wonder if we can meet this 
condition for any existing parents).

Thanks


>
> Thanks!
>
> [1] https://lists.gnu.org/archive/html/qemu-devel/2023-01/msg07157.html
>
Eugenio Perez Martin March 31, 2023, 10:11 a.m. UTC | #6
On Fri, Mar 31, 2023 at 10:00 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2023/3/30 18:42, Eugenio Perez Martin 写道:
> > On Thu, Mar 30, 2023 at 8:23 AM Jason Wang <jasowang@redhat.com> wrote:
> >> On Thu, Mar 30, 2023 at 2:20 PM Jason Wang <jasowang@redhat.com> wrote:
> >>> On Fri, Mar 24, 2023 at 3:54 AM Eugenio Pérez <eperezma@redhat.com> wrote:
> >>>> Evaluating it at start time instead of initialization time may make the
> >>>> guest capable of dynamically adding or removing migration blockers.
> >>>>
> >>>> Also, moving to initialization reduces the number of ioctls in the
> >>>> migration, reducing failure possibilities.
> >>>>
> >>>> As a drawback we need to check for CVQ isolation twice: one time with no
> >>>> MQ negotiated and another one acking it, as long as the device supports
> >>>> it.  This is because Vring ASID / group management is based on vq
> >>>> indexes, but we don't know the index of CVQ before negotiating MQ.
> >>> We need to fail if we see a device that can isolate cvq without MQ but
> >>> not with MQ.
> >>>
> >>>> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> >>>> ---
> >>>> v2: Take out the reset of the device from vhost_vdpa_cvq_is_isolated
> >>>> ---
> >>>>   net/vhost-vdpa.c | 194 ++++++++++++++++++++++++++++++++++++-----------
> >>>>   1 file changed, 151 insertions(+), 43 deletions(-)
> >>>>
> >>>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> >>>> index 4397c0d4b3..db2c9afcb3 100644
> >>>> --- a/net/vhost-vdpa.c
> >>>> +++ b/net/vhost-vdpa.c
> >>>> @@ -43,6 +43,13 @@ typedef struct VhostVDPAState {
> >>>>
> >>>>       /* The device always have SVQ enabled */
> >>>>       bool always_svq;
> >>>> +
> >>>> +    /* The device can isolate CVQ in its own ASID if MQ is negotiated */
> >>>> +    bool cvq_isolated_mq;
> >>>> +
> >>>> +    /* The device can isolate CVQ in its own ASID if MQ is not negotiated */
> >>>> +    bool cvq_isolated;
> >>> As stated above, if we need a device that cvq_isolated_mq^cvq_isolated
> >>> == true, we need to fail. This may reduce the complexity of the code?
> >>>
> >>> Thanks
> >> Since we are the mediation layer, Qemu can alway choose to negotiate
> >> MQ regardless whether or not it is supported by the guest. In this
> >> way, we can have a stable virtqueue index for cvq.
> >>
> > I think it is a great idea and it simplifies this patch somehow.
> > However, we need something like the queue mapping [1] to do so :).
> >
> > To double confirm:
> > * If the device supports MQ, only probe MQ. If not, only probe !MQ.
> > * Only store cvq_isolated in VhostVDPAState.
> >
> > Now, if the device does not negotiate MQ but the device supports MQ:
>
>
> I'm not sure I understand here, if device supports MQ it should accepts
> MQ or we can fail the initialization here.
>

My fault, I wanted to say "if the device offers MQ but the driver does
not acks it".

>
> > * All the requests to queue 3 must be redirected to the last queue in
> > the device. That includes set_vq_address, notifiers regions, etc.
>
>
> This also means we will only mediate the case:
>
> 1) Qemu emulated virtio-net has 1 queue but device support multiple queue
>
> but not
>
> 2) Qemu emulated virtio-net has M queue but device support N queue (N>M)
>

Right.

>
> >
> > I'm totally ok to go this route but it's not immediate.
>
>
> Yes but I mean, we can start from failing the device if
> cvq_isolated_mq^cvq_isolated == true
>

So probe the two cases but set VhostVDPAState->cvq_isolated =
cvq_isolated && cvq_mq_isolated then? No map involved that way, and
all parents should behave that way.

> (or I wonder if we can meet this condition for any existing parents).

I don't think so, but I think we need to probe the two anyway.
Otherwise we may change the dataplane asid too.

Thanks!
Jason Wang April 3, 2023, 5:32 a.m. UTC | #7
On Fri, Mar 31, 2023 at 6:12 PM Eugenio Perez Martin
<eperezma@redhat.com> wrote:
>
> On Fri, Mar 31, 2023 at 10:00 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> >
> > 在 2023/3/30 18:42, Eugenio Perez Martin 写道:
> > > On Thu, Mar 30, 2023 at 8:23 AM Jason Wang <jasowang@redhat.com> wrote:
> > >> On Thu, Mar 30, 2023 at 2:20 PM Jason Wang <jasowang@redhat.com> wrote:
> > >>> On Fri, Mar 24, 2023 at 3:54 AM Eugenio Pérez <eperezma@redhat.com> wrote:
> > >>>> Evaluating it at start time instead of initialization time may make the
> > >>>> guest capable of dynamically adding or removing migration blockers.
> > >>>>
> > >>>> Also, moving to initialization reduces the number of ioctls in the
> > >>>> migration, reducing failure possibilities.
> > >>>>
> > >>>> As a drawback we need to check for CVQ isolation twice: one time with no
> > >>>> MQ negotiated and another one acking it, as long as the device supports
> > >>>> it.  This is because Vring ASID / group management is based on vq
> > >>>> indexes, but we don't know the index of CVQ before negotiating MQ.
> > >>> We need to fail if we see a device that can isolate cvq without MQ but
> > >>> not with MQ.
> > >>>
> > >>>> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > >>>> ---
> > >>>> v2: Take out the reset of the device from vhost_vdpa_cvq_is_isolated
> > >>>> ---
> > >>>>   net/vhost-vdpa.c | 194 ++++++++++++++++++++++++++++++++++++-----------
> > >>>>   1 file changed, 151 insertions(+), 43 deletions(-)
> > >>>>
> > >>>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > >>>> index 4397c0d4b3..db2c9afcb3 100644
> > >>>> --- a/net/vhost-vdpa.c
> > >>>> +++ b/net/vhost-vdpa.c
> > >>>> @@ -43,6 +43,13 @@ typedef struct VhostVDPAState {
> > >>>>
> > >>>>       /* The device always have SVQ enabled */
> > >>>>       bool always_svq;
> > >>>> +
> > >>>> +    /* The device can isolate CVQ in its own ASID if MQ is negotiated */
> > >>>> +    bool cvq_isolated_mq;
> > >>>> +
> > >>>> +    /* The device can isolate CVQ in its own ASID if MQ is not negotiated */
> > >>>> +    bool cvq_isolated;
> > >>> As stated above, if we need a device that cvq_isolated_mq^cvq_isolated
> > >>> == true, we need to fail. This may reduce the complexity of the code?
> > >>>
> > >>> Thanks
> > >> Since we are the mediation layer, Qemu can alway choose to negotiate
> > >> MQ regardless whether or not it is supported by the guest. In this
> > >> way, we can have a stable virtqueue index for cvq.
> > >>
> > > I think it is a great idea and it simplifies this patch somehow.
> > > However, we need something like the queue mapping [1] to do so :).
> > >
> > > To double confirm:
> > > * If the device supports MQ, only probe MQ. If not, only probe !MQ.
> > > * Only store cvq_isolated in VhostVDPAState.
> > >
> > > Now, if the device does not negotiate MQ but the device supports MQ:
> >
> >
> > I'm not sure I understand here, if device supports MQ it should accepts
> > MQ or we can fail the initialization here.
> >
>
> My fault, I wanted to say "if the device offers MQ but the driver does
> not acks it".
>
> >
> > > * All the requests to queue 3 must be redirected to the last queue in
> > > the device. That includes set_vq_address, notifiers regions, etc.
> >
> >
> > This also means we will only mediate the case:
> >
> > 1) Qemu emulated virtio-net has 1 queue but device support multiple queue
> >
> > but not
> >
> > 2) Qemu emulated virtio-net has M queue but device support N queue (N>M)
> >
>
> Right.
>
> >
> > >
> > > I'm totally ok to go this route but it's not immediate.
> >
> >
> > Yes but I mean, we can start from failing the device if
> > cvq_isolated_mq^cvq_isolated == true
> >
>
> So probe the two cases but set VhostVDPAState->cvq_isolated =
> cvq_isolated && cvq_mq_isolated then? No map involved that way, and
> all parents should behave that way.
>
> > (or I wonder if we can meet this condition for any existing parents).
>
> I don't think so, but I think we need to probe the two anyway.
> Otherwise we may change the dataplane asid too.

Just to make sure we are at the same page, I meant we could fail the
initialization of vhost-vDPA is the device:

1) can isolate cvq in the case of singqueue but not multiqueue

or

2) can isolate cvq in the case of multiqueue but not single queue

Because I don't think there are any parents that have such a buggy
implementation.

Thanks

>
> Thanks!
>
Eugenio Perez Martin April 3, 2023, 5:21 p.m. UTC | #8
On Mon, Apr 3, 2023 at 7:32 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Fri, Mar 31, 2023 at 6:12 PM Eugenio Perez Martin
> <eperezma@redhat.com> wrote:
> >
> > On Fri, Mar 31, 2023 at 10:00 AM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > >
> > > 在 2023/3/30 18:42, Eugenio Perez Martin 写道:
> > > > On Thu, Mar 30, 2023 at 8:23 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >> On Thu, Mar 30, 2023 at 2:20 PM Jason Wang <jasowang@redhat.com> wrote:
> > > >>> On Fri, Mar 24, 2023 at 3:54 AM Eugenio Pérez <eperezma@redhat.com> wrote:
> > > >>>> Evaluating it at start time instead of initialization time may make the
> > > >>>> guest capable of dynamically adding or removing migration blockers.
> > > >>>>
> > > >>>> Also, moving to initialization reduces the number of ioctls in the
> > > >>>> migration, reducing failure possibilities.
> > > >>>>
> > > >>>> As a drawback we need to check for CVQ isolation twice: one time with no
> > > >>>> MQ negotiated and another one acking it, as long as the device supports
> > > >>>> it.  This is because Vring ASID / group management is based on vq
> > > >>>> indexes, but we don't know the index of CVQ before negotiating MQ.
> > > >>> We need to fail if we see a device that can isolate cvq without MQ but
> > > >>> not with MQ.
> > > >>>
> > > >>>> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > > >>>> ---
> > > >>>> v2: Take out the reset of the device from vhost_vdpa_cvq_is_isolated
> > > >>>> ---
> > > >>>>   net/vhost-vdpa.c | 194 ++++++++++++++++++++++++++++++++++++-----------
> > > >>>>   1 file changed, 151 insertions(+), 43 deletions(-)
> > > >>>>
> > > >>>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > >>>> index 4397c0d4b3..db2c9afcb3 100644
> > > >>>> --- a/net/vhost-vdpa.c
> > > >>>> +++ b/net/vhost-vdpa.c
> > > >>>> @@ -43,6 +43,13 @@ typedef struct VhostVDPAState {
> > > >>>>
> > > >>>>       /* The device always have SVQ enabled */
> > > >>>>       bool always_svq;
> > > >>>> +
> > > >>>> +    /* The device can isolate CVQ in its own ASID if MQ is negotiated */
> > > >>>> +    bool cvq_isolated_mq;
> > > >>>> +
> > > >>>> +    /* The device can isolate CVQ in its own ASID if MQ is not negotiated */
> > > >>>> +    bool cvq_isolated;
> > > >>> As stated above, if we need a device that cvq_isolated_mq^cvq_isolated
> > > >>> == true, we need to fail. This may reduce the complexity of the code?
> > > >>>
> > > >>> Thanks
> > > >> Since we are the mediation layer, Qemu can alway choose to negotiate
> > > >> MQ regardless whether or not it is supported by the guest. In this
> > > >> way, we can have a stable virtqueue index for cvq.
> > > >>
> > > > I think it is a great idea and it simplifies this patch somehow.
> > > > However, we need something like the queue mapping [1] to do so :).
> > > >
> > > > To double confirm:
> > > > * If the device supports MQ, only probe MQ. If not, only probe !MQ.
> > > > * Only store cvq_isolated in VhostVDPAState.
> > > >
> > > > Now, if the device does not negotiate MQ but the device supports MQ:
> > >
> > >
> > > I'm not sure I understand here, if device supports MQ it should accepts
> > > MQ or we can fail the initialization here.
> > >
> >
> > My fault, I wanted to say "if the device offers MQ but the driver does
> > not acks it".
> >
> > >
> > > > * All the requests to queue 3 must be redirected to the last queue in
> > > > the device. That includes set_vq_address, notifiers regions, etc.
> > >
> > >
> > > This also means we will only mediate the case:
> > >
> > > 1) Qemu emulated virtio-net has 1 queue but device support multiple queue
> > >
> > > but not
> > >
> > > 2) Qemu emulated virtio-net has M queue but device support N queue (N>M)
> > >
> >
> > Right.
> >
> > >
> > > >
> > > > I'm totally ok to go this route but it's not immediate.
> > >
> > >
> > > Yes but I mean, we can start from failing the device if
> > > cvq_isolated_mq^cvq_isolated == true
> > >
> >
> > So probe the two cases but set VhostVDPAState->cvq_isolated =
> > cvq_isolated && cvq_mq_isolated then? No map involved that way, and
> > all parents should behave that way.
> >
> > > (or I wonder if we can meet this condition for any existing parents).
> >
> > I don't think so, but I think we need to probe the two anyway.
> > Otherwise we may change the dataplane asid too.
>
> Just to make sure we are at the same page, I meant we could fail the
> initialization of vhost-vDPA is the device:
>
> 1) can isolate cvq in the case of singqueue but not multiqueue
>
> or
>
> 2) can isolate cvq in the case of multiqueue but not single queue
>
> Because I don't think there are any parents that have such a buggy
> implementation.
>

Got it.

Leaving out the queue multiplex for the moment, as it adds complexity
and we can add it on top.

Thanks!
diff mbox series

Patch

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 4397c0d4b3..db2c9afcb3 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -43,6 +43,13 @@  typedef struct VhostVDPAState {
 
     /* The device always have SVQ enabled */
     bool always_svq;
+
+    /* The device can isolate CVQ in its own ASID if MQ is negotiated */
+    bool cvq_isolated_mq;
+
+    /* The device can isolate CVQ in its own ASID if MQ is not negotiated */
+    bool cvq_isolated;
+
     bool started;
 } VhostVDPAState;
 
@@ -361,15 +368,8 @@  static NetClientInfo net_vhost_vdpa_info = {
         .check_peer_type = vhost_vdpa_check_peer_type,
 };
 
-/**
- * Get vring virtqueue group
- *
- * @device_fd  vdpa device fd
- * @vq_index   Virtqueue index
- *
- * Return -errno in case of error, or vq group if success.
- */
-static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index)
+static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index,
+                                          Error **errp)
 {
     struct vhost_vring_state state = {
         .index = vq_index,
@@ -378,8 +378,7 @@  static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index)
 
     if (unlikely(r < 0)) {
         r = -errno;
-        error_report("Cannot get VQ %u group: %s", vq_index,
-                     g_strerror(errno));
+        error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index);
         return r;
     }
 
@@ -479,9 +478,9 @@  static int vhost_vdpa_net_cvq_start(NetClientState *nc)
 {
     VhostVDPAState *s, *s0;
     struct vhost_vdpa *v;
-    uint64_t backend_features;
     int64_t cvq_group;
-    int cvq_index, r;
+    int r;
+    Error *err = NULL;
 
     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 
@@ -501,42 +500,29 @@  static int vhost_vdpa_net_cvq_start(NetClientState *nc)
     /*
      * If we early return in these cases SVQ will not be enabled. The migration
      * will be blocked as long as vhost-vdpa backends will not offer _F_LOG.
-     *
-     * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev
-     * yet.
      */
-    r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features);
-    if (unlikely(r < 0)) {
-        error_report("Cannot get vdpa backend_features: %s(%d)",
-            g_strerror(errno), errno);
-        return -1;
-    }
-    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) ||
-        !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
+    if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
         return 0;
     }
 
-    /*
-     * Check if all the virtqueues of the virtio device are in a different vq
-     * than the last vq. VQ group of last group passed in cvq_group.
-     */
-    cvq_index = v->dev->vq_index_end - 1;
-    cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index);
-    if (unlikely(cvq_group < 0)) {
-        return cvq_group;
-    }
-    for (int i = 0; i < cvq_index; ++i) {
-        int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i);
-
-        if (unlikely(group < 0)) {
-            return group;
+    if (v->dev->features & BIT_ULL(VIRTIO_NET_F_MQ)) {
+        if (!s->cvq_isolated_mq) {
+            return 0;
         }
-
-        if (group == cvq_group) {
+    } else {
+        if (!s->cvq_isolated) {
             return 0;
         }
     }
 
+    cvq_group = vhost_vdpa_get_vring_group(v->device_fd,
+                                           v->dev->vq_index_end - 1,
+                                           &err);
+    if (unlikely(cvq_group < 0)) {
+        error_report_err(err);
+        return cvq_group;
+    }
+
     r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID);
     if (unlikely(r < 0)) {
         return r;
@@ -798,6 +784,116 @@  static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
     .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
 };
 
+/**
+ * Probe the device to check control virtqueue is isolated.
+ *
+ * @device_fd vhost-vdpa file descriptor
+ * @features features to negotiate
+ * @cvq_index Control vq index
+ *
+ * Returns -1 in case of error, 0 if false and 1 if true
+ */
+static int vhost_vdpa_cvq_is_isolated(int device_fd, uint64_t features,
+                                      unsigned cvq_index, Error **errp)
+{
+    int64_t cvq_group;
+    int r;
+
+    r = vhost_vdpa_set_dev_features_fd(device_fd, features);
+    if (unlikely(r < 0)) {
+        error_setg_errno(errp, -r, "Cannot set device features");
+        return r;
+    }
+
+    cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp);
+    if (unlikely(cvq_group < 0)) {
+        return cvq_group;
+    }
+
+    for (int i = 0; i < cvq_index; ++i) {
+        int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp);
+
+        if (unlikely(group < 0)) {
+            return group;
+        }
+
+        if (group == (int64_t)cvq_group) {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+/**
+ * Probe if CVQ is isolated when the device is MQ and when it is not MQ
+ *
+ * @device_fd         The vdpa device fd
+ * @features          Features offered by the device.
+ * @cvq_index         The control vq index if mq is negotiated. Ignored
+ *                    otherwise.
+ * @cvq_isolated      It'll be set to true if cvq is isolated if mq is not
+ *                    negotiated.
+ * @cvq_isolated_mq   It'll be set to true if cvq is isolated if mq is
+ *                    negotiated.
+ *
+ * Returns -1 in case of failure
+ */
+static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features,
+                                          int cvq_index, bool *cvq_isolated,
+                                          bool *cvq_isolated_mq, Error **errp)
+{
+    uint64_t backend_features;
+    int r;
+
+    ERRP_GUARD();
+
+    *cvq_isolated = false;
+    *cvq_isolated_mq = false;
+    r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features);
+    if (unlikely(r < 0)) {
+        error_setg_errno(errp, errno, "Cannot get vdpa backend_features");
+        return r;
+    }
+
+    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
+        return 0;
+    }
+
+    r = vhost_vdpa_cvq_is_isolated(device_fd,
+                                   features & ~BIT_ULL(VIRTIO_NET_F_MQ), 2,
+                                   errp);
+    if (unlikely(r < 0)) {
+        if (r == -ENOTSUP) {
+            /*
+             * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa
+             * frontend support ASID but the parent driver does not.  The CVQ
+             * cannot be isolated in this case.
+             */
+            error_free(*errp);
+            *errp = NULL;
+            return 0;
+        }
+
+        return r;
+    }
+
+    *cvq_isolated = r == 1;
+    vhost_vdpa_reset_status_fd(device_fd);
+    if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
+        return 0;
+    }
+
+    r = vhost_vdpa_cvq_is_isolated(device_fd, features, cvq_index * 2, errp);
+    if (unlikely(r < 0)) {
+        return r;
+    }
+
+    *cvq_isolated_mq = r == 1;
+    vhost_vdpa_reset_status_fd(device_fd);
+    return 0;
+}
+
 static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
                                        const char *device,
                                        const char *name,
@@ -807,16 +903,26 @@  static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
                                        bool is_datapath,
                                        bool svq,
                                        struct vhost_vdpa_iova_range iova_range,
-                                       uint64_t features)
+                                       uint64_t features,
+                                       Error **errp)
 {
     NetClientState *nc = NULL;
     VhostVDPAState *s;
     int ret = 0;
     assert(name);
+    bool cvq_isolated, cvq_isolated_mq;
+
     if (is_datapath) {
         nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
                                  name);
     } else {
+        ret = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features,
+                                             queue_pair_index, &cvq_isolated,
+                                             &cvq_isolated_mq, errp);
+        if (unlikely(ret)) {
+            return NULL;
+        }
+
         nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
                                          device, name);
     }
@@ -843,6 +949,8 @@  static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
 
         s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
         s->vhost_vdpa.shadow_vq_ops_opaque = s;
+        s->cvq_isolated = cvq_isolated;
+        s->cvq_isolated_mq = cvq_isolated_mq;
 
         /*
          * TODO: We cannot migrate devices with CVQ as there is no way to set
@@ -971,7 +1079,7 @@  int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
     for (i = 0; i < queue_pairs; i++) {
         ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
                                      vdpa_device_fd, i, 2, true, opts->x_svq,
-                                     iova_range, features);
+                                     iova_range, features, errp);
         if (!ncs[i])
             goto err;
     }
@@ -979,7 +1087,7 @@  int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
     if (has_cvq) {
         nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
                                  vdpa_device_fd, i, 1, false,
-                                 opts->x_svq, iova_range, features);
+                                 opts->x_svq, iova_range, features, errp);
         if (!nc)
             goto err;
     }