Message ID | 20230509154435.1410162-6-eperezma@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Move ASID test to vhost-vdpa net initialization | expand |
On Tue, May 9, 2023 at 11:44 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > Evaluating it at start time instead of initialization time may make the > guest capable of dynamically adding or removing migration blockers. > > Also, moving to initialization reduces the number of ioctls in the > migration, reducing failure possibilities. > > As a drawback we need to check for CVQ isolation twice: one time with no > MQ negotiated and another one acking it, as long as the device supports > it. This is because Vring ASID / group management is based on vq > indexes, but we don't know the index of CVQ before negotiating MQ. > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > --- > v2: Take out the reset of the device from vhost_vdpa_cvq_is_isolated > v3: Only record cvq_isolated, true if the device have cvq isolated in > both !MQ and MQ configurations. > --- > net/vhost-vdpa.c | 178 +++++++++++++++++++++++++++++++++++------------ > 1 file changed, 135 insertions(+), 43 deletions(-) > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c > index 3fb833fe76..29054b77a9 100644 > --- a/net/vhost-vdpa.c > +++ b/net/vhost-vdpa.c > @@ -43,6 +43,10 @@ typedef struct VhostVDPAState { > > /* The device always have SVQ enabled */ > bool always_svq; > + > + /* The device can isolate CVQ in its own ASID */ > + bool cvq_isolated; > + > bool started; > } VhostVDPAState; > > @@ -362,15 +366,8 @@ static NetClientInfo net_vhost_vdpa_info = { > .check_peer_type = vhost_vdpa_check_peer_type, > }; > > -/** > - * Get vring virtqueue group > - * > - * @device_fd vdpa device fd > - * @vq_index Virtqueue index > - * > - * Return -errno in case of error, or vq group if success. > - */ > -static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) > +static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, > + Error **errp) > { > struct vhost_vring_state state = { > .index = vq_index, > @@ -379,8 +376,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) > > if (unlikely(r < 0)) { > r = -errno; > - error_report("Cannot get VQ %u group: %s", vq_index, > - g_strerror(errno)); > + error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index); > return r; > } > > @@ -480,9 +476,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) > { > VhostVDPAState *s, *s0; > struct vhost_vdpa *v; > - uint64_t backend_features; > int64_t cvq_group; > - int cvq_index, r; > + int r; > + Error *err = NULL; > > assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); > > @@ -502,41 +498,22 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) > /* > * If we early return in these cases SVQ will not be enabled. The migration > * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. > - * > - * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev > - * yet. > */ > - r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); > - if (unlikely(r < 0)) { > - error_report("Cannot get vdpa backend_features: %s(%d)", > - g_strerror(errno), errno); > - return -1; > + if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { > + return 0; > } > - if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || > - !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { > + > + if (!s->cvq_isolated) { > return 0; > } > > - /* > - * Check if all the virtqueues of the virtio device are in a different vq > - * than the last vq. VQ group of last group passed in cvq_group. > - */ > - cvq_index = v->dev->vq_index_end - 1; > - cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); > + cvq_group = vhost_vdpa_get_vring_group(v->device_fd, > + v->dev->vq_index_end - 1, > + &err); > if (unlikely(cvq_group < 0)) { > + error_report_err(err); > return cvq_group; > } > - for (int i = 0; i < cvq_index; ++i) { > - int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); > - > - if (unlikely(group < 0)) { > - return group; > - } > - > - if (group == cvq_group) { > - return 0; > - } > - } > > r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); > if (unlikely(r < 0)) { > @@ -799,6 +776,111 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { > .avail_handler = vhost_vdpa_net_handle_ctrl_avail, > }; > > +/** > + * Probe the device to check control virtqueue is isolated. > + * > + * @device_fd vhost-vdpa file descriptor > + * @features features to negotiate > + * @cvq_index Control vq index > + * > + * Returns -1 in case of error, 0 if false and 1 if true > + */ > +static int vhost_vdpa_cvq_is_isolated(int device_fd, uint64_t features, > + unsigned cvq_index, Error **errp) > +{ > + int64_t cvq_group; > + int r; > + > + r = vhost_vdpa_set_dev_features_fd(device_fd, features); > + if (unlikely(r < 0)) { > + error_setg_errno(errp, -r, "Cannot set device features"); > + return r; > + } > + > + cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp); > + if (unlikely(cvq_group < 0)) { > + return cvq_group; > + } > + > + for (int i = 0; i < cvq_index; ++i) { > + int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp); > + > + if (unlikely(group < 0)) { > + return group; > + } > + > + if (group == (int64_t)cvq_group) { > + return 0; > + } > + } > + > + return 1; > +} > + > +/** > + * Probe if CVQ is isolated when the device is MQ and when it is not MQ > + * > + * @device_fd The vdpa device fd > + * @features Features offered by the device. > + * @cvq_index The control vq index if mq is negotiated. Ignored > + * otherwise. > + * > + * Returns <0 in case of failure, 0 if false and 1 if true. > + */ > +static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, > + int cvq_index, Error **errp) > +{ > + uint64_t backend_features; > + int r; > + > + ERRP_GUARD(); > + > + r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); > + if (unlikely(r < 0)) { > + error_setg_errno(errp, errno, "Cannot get vdpa backend_features"); > + return r; > + } > + > + if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { > + return 0; > + } > + > + r = vhost_vdpa_cvq_is_isolated(device_fd, > + features & ~BIT_ULL(VIRTIO_NET_F_MQ), 2, > + errp); > + if (unlikely(r < 0)) { > + if (r != -ENOTSUP) { > + return r; > + } > + > + /* > + * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend > + * support ASID even if the parent driver does not. The CVQ cannot be > + * isolated in this case. > + */ > + error_free(*errp); > + *errp = NULL; > + return 0; > + } > + > + if (r == 0) { > + return 0; > + } > + > + vhost_vdpa_reset_status_fd(device_fd); > + if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) { > + return 0; > + } > + > + r = vhost_vdpa_cvq_is_isolated(device_fd, features, cvq_index * 2, errp); I think checking this once should be sufficient. That is to say, it should be a bug if there's hardware that puts cvq in a dedicated group in MQ but not in SQ. Thanks > + if (unlikely(r < 0)) { > + return r; > + } > + > + vhost_vdpa_reset_status_fd(device_fd); > + return r; > +} > + > static NetClientState *net_vhost_vdpa_init(NetClientState *peer, > const char *device, > const char *name, > @@ -808,16 +890,25 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, > bool is_datapath, > bool svq, > struct vhost_vdpa_iova_range iova_range, > - uint64_t features) > + uint64_t features, > + Error **errp) > { > NetClientState *nc = NULL; > VhostVDPAState *s; > int ret = 0; > assert(name); > + int cvq_isolated; > + > if (is_datapath) { > nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, > name); > } else { > + cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features, > + queue_pair_index, errp); > + if (unlikely(cvq_isolated < 0)) { > + return NULL; > + } > + > nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, > device, name); > } > @@ -844,6 +935,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, > > s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; > s->vhost_vdpa.shadow_vq_ops_opaque = s; > + s->cvq_isolated = cvq_isolated; > > /* > * TODO: We cannot migrate devices with CVQ as there is no way to set > @@ -972,7 +1064,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, > for (i = 0; i < queue_pairs; i++) { > ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, > vdpa_device_fd, i, 2, true, opts->x_svq, > - iova_range, features); > + iova_range, features, errp); > if (!ncs[i]) > goto err; > } > @@ -980,7 +1072,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, > if (has_cvq) { > nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, > vdpa_device_fd, i, 1, false, > - opts->x_svq, iova_range, features); > + opts->x_svq, iova_range, features, errp); > if (!nc) > goto err; > } > -- > 2.31.1 >
On Wed, May 17, 2023 at 5:59 AM Jason Wang <jasowang@redhat.com> wrote: > > On Tue, May 9, 2023 at 11:44 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > > > Evaluating it at start time instead of initialization time may make the > > guest capable of dynamically adding or removing migration blockers. > > > > Also, moving to initialization reduces the number of ioctls in the > > migration, reducing failure possibilities. > > > > As a drawback we need to check for CVQ isolation twice: one time with no > > MQ negotiated and another one acking it, as long as the device supports > > it. This is because Vring ASID / group management is based on vq > > indexes, but we don't know the index of CVQ before negotiating MQ. > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > > --- > > v2: Take out the reset of the device from vhost_vdpa_cvq_is_isolated > > v3: Only record cvq_isolated, true if the device have cvq isolated in > > both !MQ and MQ configurations. > > --- > > net/vhost-vdpa.c | 178 +++++++++++++++++++++++++++++++++++------------ > > 1 file changed, 135 insertions(+), 43 deletions(-) > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c > > index 3fb833fe76..29054b77a9 100644 > > --- a/net/vhost-vdpa.c > > +++ b/net/vhost-vdpa.c > > @@ -43,6 +43,10 @@ typedef struct VhostVDPAState { > > > > /* The device always have SVQ enabled */ > > bool always_svq; > > + > > + /* The device can isolate CVQ in its own ASID */ > > + bool cvq_isolated; > > + > > bool started; > > } VhostVDPAState; > > > > @@ -362,15 +366,8 @@ static NetClientInfo net_vhost_vdpa_info = { > > .check_peer_type = vhost_vdpa_check_peer_type, > > }; > > > > -/** > > - * Get vring virtqueue group > > - * > > - * @device_fd vdpa device fd > > - * @vq_index Virtqueue index > > - * > > - * Return -errno in case of error, or vq group if success. > > - */ > > -static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) > > +static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, > > + Error **errp) > > { > > struct vhost_vring_state state = { > > .index = vq_index, > > @@ -379,8 +376,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) > > > > if (unlikely(r < 0)) { > > r = -errno; > > - error_report("Cannot get VQ %u group: %s", vq_index, > > - g_strerror(errno)); > > + error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index); > > return r; > > } > > > > @@ -480,9 +476,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) > > { > > VhostVDPAState *s, *s0; > > struct vhost_vdpa *v; > > - uint64_t backend_features; > > int64_t cvq_group; > > - int cvq_index, r; > > + int r; > > + Error *err = NULL; > > > > assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); > > > > @@ -502,41 +498,22 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) > > /* > > * If we early return in these cases SVQ will not be enabled. The migration > > * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. > > - * > > - * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev > > - * yet. > > */ > > - r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); > > - if (unlikely(r < 0)) { > > - error_report("Cannot get vdpa backend_features: %s(%d)", > > - g_strerror(errno), errno); > > - return -1; > > + if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { > > + return 0; > > } > > - if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || > > - !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { > > + > > + if (!s->cvq_isolated) { > > return 0; > > } > > > > - /* > > - * Check if all the virtqueues of the virtio device are in a different vq > > - * than the last vq. VQ group of last group passed in cvq_group. > > - */ > > - cvq_index = v->dev->vq_index_end - 1; > > - cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); > > + cvq_group = vhost_vdpa_get_vring_group(v->device_fd, > > + v->dev->vq_index_end - 1, > > + &err); > > if (unlikely(cvq_group < 0)) { > > + error_report_err(err); > > return cvq_group; > > } > > - for (int i = 0; i < cvq_index; ++i) { > > - int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); > > - > > - if (unlikely(group < 0)) { > > - return group; > > - } > > - > > - if (group == cvq_group) { > > - return 0; > > - } > > - } > > > > r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); > > if (unlikely(r < 0)) { > > @@ -799,6 +776,111 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { > > .avail_handler = vhost_vdpa_net_handle_ctrl_avail, > > }; > > > > +/** > > + * Probe the device to check control virtqueue is isolated. > > + * > > + * @device_fd vhost-vdpa file descriptor > > + * @features features to negotiate > > + * @cvq_index Control vq index > > + * > > + * Returns -1 in case of error, 0 if false and 1 if true > > + */ > > +static int vhost_vdpa_cvq_is_isolated(int device_fd, uint64_t features, > > + unsigned cvq_index, Error **errp) > > +{ > > + int64_t cvq_group; > > + int r; > > + > > + r = vhost_vdpa_set_dev_features_fd(device_fd, features); > > + if (unlikely(r < 0)) { > > + error_setg_errno(errp, -r, "Cannot set device features"); > > + return r; > > + } > > + > > + cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp); > > + if (unlikely(cvq_group < 0)) { > > + return cvq_group; > > + } > > + > > + for (int i = 0; i < cvq_index; ++i) { > > + int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp); > > + > > + if (unlikely(group < 0)) { > > + return group; > > + } > > + > > + if (group == (int64_t)cvq_group) { > > + return 0; > > + } > > + } > > + > > + return 1; > > +} > > + > > +/** > > + * Probe if CVQ is isolated when the device is MQ and when it is not MQ > > + * > > + * @device_fd The vdpa device fd > > + * @features Features offered by the device. > > + * @cvq_index The control vq index if mq is negotiated. Ignored > > + * otherwise. > > + * > > + * Returns <0 in case of failure, 0 if false and 1 if true. > > + */ > > +static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, > > + int cvq_index, Error **errp) > > +{ > > + uint64_t backend_features; > > + int r; > > + > > + ERRP_GUARD(); > > + > > + r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); > > + if (unlikely(r < 0)) { > > + error_setg_errno(errp, errno, "Cannot get vdpa backend_features"); > > + return r; > > + } > > + > > + if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { > > + return 0; > > + } > > + > > + r = vhost_vdpa_cvq_is_isolated(device_fd, > > + features & ~BIT_ULL(VIRTIO_NET_F_MQ), 2, > > + errp); > > + if (unlikely(r < 0)) { > > + if (r != -ENOTSUP) { > > + return r; > > + } > > + > > + /* > > + * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend > > + * support ASID even if the parent driver does not. The CVQ cannot be > > + * isolated in this case. > > + */ > > + error_free(*errp); > > + *errp = NULL; > > + return 0; > > + } > > + > > + if (r == 0) { > > + return 0; > > + } > > + > > + vhost_vdpa_reset_status_fd(device_fd); > > + if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) { > > + return 0; > > + } > > + > > + r = vhost_vdpa_cvq_is_isolated(device_fd, features, cvq_index * 2, errp); > > I think checking this once should be sufficient. That is to say, it > should be a bug if there's hardware that puts cvq in a dedicated group > in MQ but not in SQ. > This is checking the NIC is not buggy :). Otherwise, we're giving access to the guest to the CVQ shadow vring. And, currently, SVQ code assumes only QEMU can access it. But maybe this made more sense in previous versions, where the series also cached the cvq group here. If I understand you correctly, it is enough to check that CVQ is isolated in SQ, and assume it will be isolated also in MQ, right? I can modify the patch that way if you confirm this. Thanks! > Thanks > > > + if (unlikely(r < 0)) { > > + return r; > > + } > > + > > + vhost_vdpa_reset_status_fd(device_fd); > > + return r; > > +} > > + > > static NetClientState *net_vhost_vdpa_init(NetClientState *peer, > > const char *device, > > const char *name, > > @@ -808,16 +890,25 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, > > bool is_datapath, > > bool svq, > > struct vhost_vdpa_iova_range iova_range, > > - uint64_t features) > > + uint64_t features, > > + Error **errp) > > { > > NetClientState *nc = NULL; > > VhostVDPAState *s; > > int ret = 0; > > assert(name); > > + int cvq_isolated; > > + > > if (is_datapath) { > > nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, > > name); > > } else { > > + cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features, > > + queue_pair_index, errp); > > + if (unlikely(cvq_isolated < 0)) { > > + return NULL; > > + } > > + > > nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, > > device, name); > > } > > @@ -844,6 +935,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, > > > > s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; > > s->vhost_vdpa.shadow_vq_ops_opaque = s; > > + s->cvq_isolated = cvq_isolated; > > > > /* > > * TODO: We cannot migrate devices with CVQ as there is no way to set > > @@ -972,7 +1064,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, > > for (i = 0; i < queue_pairs; i++) { > > ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, > > vdpa_device_fd, i, 2, true, opts->x_svq, > > - iova_range, features); > > + iova_range, features, errp); > > if (!ncs[i]) > > goto err; > > } > > @@ -980,7 +1072,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, > > if (has_cvq) { > > nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, > > vdpa_device_fd, i, 1, false, > > - opts->x_svq, iova_range, features); > > + opts->x_svq, iova_range, features, errp); > > if (!nc) > > goto err; > > } > > -- > > 2.31.1 > > >
On Wed, May 17, 2023 at 2:30 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > On Wed, May 17, 2023 at 5:59 AM Jason Wang <jasowang@redhat.com> wrote: > > > > On Tue, May 9, 2023 at 11:44 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > > > > > Evaluating it at start time instead of initialization time may make the > > > guest capable of dynamically adding or removing migration blockers. > > > > > > Also, moving to initialization reduces the number of ioctls in the > > > migration, reducing failure possibilities. > > > > > > As a drawback we need to check for CVQ isolation twice: one time with no > > > MQ negotiated and another one acking it, as long as the device supports > > > it. This is because Vring ASID / group management is based on vq > > > indexes, but we don't know the index of CVQ before negotiating MQ. > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > > > --- > > > v2: Take out the reset of the device from vhost_vdpa_cvq_is_isolated > > > v3: Only record cvq_isolated, true if the device have cvq isolated in > > > both !MQ and MQ configurations. > > > --- > > > net/vhost-vdpa.c | 178 +++++++++++++++++++++++++++++++++++------------ > > > 1 file changed, 135 insertions(+), 43 deletions(-) > > > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c > > > index 3fb833fe76..29054b77a9 100644 > > > --- a/net/vhost-vdpa.c > > > +++ b/net/vhost-vdpa.c > > > @@ -43,6 +43,10 @@ typedef struct VhostVDPAState { > > > > > > /* The device always have SVQ enabled */ > > > bool always_svq; > > > + > > > + /* The device can isolate CVQ in its own ASID */ > > > + bool cvq_isolated; > > > + > > > bool started; > > > } VhostVDPAState; > > > > > > @@ -362,15 +366,8 @@ static NetClientInfo net_vhost_vdpa_info = { > > > .check_peer_type = vhost_vdpa_check_peer_type, > > > }; > > > > > > -/** > > > - * Get vring virtqueue group > > > - * > > > - * @device_fd vdpa device fd > > > - * @vq_index Virtqueue index > > > - * > > > - * Return -errno in case of error, or vq group if success. > > > - */ > > > -static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) > > > +static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, > > > + Error **errp) > > > { > > > struct vhost_vring_state state = { > > > .index = vq_index, > > > @@ -379,8 +376,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) > > > > > > if (unlikely(r < 0)) { > > > r = -errno; > > > - error_report("Cannot get VQ %u group: %s", vq_index, > > > - g_strerror(errno)); > > > + error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index); > > > return r; > > > } > > > > > > @@ -480,9 +476,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) > > > { > > > VhostVDPAState *s, *s0; > > > struct vhost_vdpa *v; > > > - uint64_t backend_features; > > > int64_t cvq_group; > > > - int cvq_index, r; > > > + int r; > > > + Error *err = NULL; > > > > > > assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); > > > > > > @@ -502,41 +498,22 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) > > > /* > > > * If we early return in these cases SVQ will not be enabled. The migration > > > * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. > > > - * > > > - * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev > > > - * yet. > > > */ > > > - r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); > > > - if (unlikely(r < 0)) { > > > - error_report("Cannot get vdpa backend_features: %s(%d)", > > > - g_strerror(errno), errno); > > > - return -1; > > > + if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { > > > + return 0; > > > } > > > - if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || > > > - !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { > > > + > > > + if (!s->cvq_isolated) { > > > return 0; > > > } > > > > > > - /* > > > - * Check if all the virtqueues of the virtio device are in a different vq > > > - * than the last vq. VQ group of last group passed in cvq_group. > > > - */ > > > - cvq_index = v->dev->vq_index_end - 1; > > > - cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); > > > + cvq_group = vhost_vdpa_get_vring_group(v->device_fd, > > > + v->dev->vq_index_end - 1, > > > + &err); > > > if (unlikely(cvq_group < 0)) { > > > + error_report_err(err); > > > return cvq_group; > > > } > > > - for (int i = 0; i < cvq_index; ++i) { > > > - int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); > > > - > > > - if (unlikely(group < 0)) { > > > - return group; > > > - } > > > - > > > - if (group == cvq_group) { > > > - return 0; > > > - } > > > - } > > > > > > r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); > > > if (unlikely(r < 0)) { > > > @@ -799,6 +776,111 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { > > > .avail_handler = vhost_vdpa_net_handle_ctrl_avail, > > > }; > > > > > > +/** > > > + * Probe the device to check control virtqueue is isolated. > > > + * > > > + * @device_fd vhost-vdpa file descriptor > > > + * @features features to negotiate > > > + * @cvq_index Control vq index > > > + * > > > + * Returns -1 in case of error, 0 if false and 1 if true > > > + */ > > > +static int vhost_vdpa_cvq_is_isolated(int device_fd, uint64_t features, > > > + unsigned cvq_index, Error **errp) > > > +{ > > > + int64_t cvq_group; > > > + int r; > > > + > > > + r = vhost_vdpa_set_dev_features_fd(device_fd, features); > > > + if (unlikely(r < 0)) { > > > + error_setg_errno(errp, -r, "Cannot set device features"); > > > + return r; > > > + } > > > + > > > + cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp); > > > + if (unlikely(cvq_group < 0)) { > > > + return cvq_group; > > > + } > > > + > > > + for (int i = 0; i < cvq_index; ++i) { > > > + int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp); > > > + > > > + if (unlikely(group < 0)) { > > > + return group; > > > + } > > > + > > > + if (group == (int64_t)cvq_group) { > > > + return 0; > > > + } > > > + } > > > + > > > + return 1; > > > +} > > > + > > > +/** > > > + * Probe if CVQ is isolated when the device is MQ and when it is not MQ > > > + * > > > + * @device_fd The vdpa device fd > > > + * @features Features offered by the device. > > > + * @cvq_index The control vq index if mq is negotiated. Ignored > > > + * otherwise. > > > + * > > > + * Returns <0 in case of failure, 0 if false and 1 if true. > > > + */ > > > +static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, > > > + int cvq_index, Error **errp) > > > +{ > > > + uint64_t backend_features; > > > + int r; > > > + > > > + ERRP_GUARD(); > > > + > > > + r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); > > > + if (unlikely(r < 0)) { > > > + error_setg_errno(errp, errno, "Cannot get vdpa backend_features"); > > > + return r; > > > + } > > > + > > > + if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { > > > + return 0; > > > + } > > > + > > > + r = vhost_vdpa_cvq_is_isolated(device_fd, > > > + features & ~BIT_ULL(VIRTIO_NET_F_MQ), 2, > > > + errp); > > > + if (unlikely(r < 0)) { > > > + if (r != -ENOTSUP) { > > > + return r; > > > + } > > > + > > > + /* > > > + * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend > > > + * support ASID even if the parent driver does not. The CVQ cannot be > > > + * isolated in this case. > > > + */ > > > + error_free(*errp); > > > + *errp = NULL; > > > + return 0; > > > + } > > > + > > > + if (r == 0) { > > > + return 0; > > > + } > > > + > > > + vhost_vdpa_reset_status_fd(device_fd); > > > + if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) { > > > + return 0; > > > + } > > > + > > > + r = vhost_vdpa_cvq_is_isolated(device_fd, features, cvq_index * 2, errp); > > > > I think checking this once should be sufficient. That is to say, it > > should be a bug if there's hardware that puts cvq in a dedicated group > > in MQ but not in SQ. > > > > This is checking the NIC is not buggy :). Otherwise, we're giving > access to the guest to the CVQ shadow vring. And, currently, SVQ code > assumes only QEMU can access it. Just to make sure we are at the same page, I meant, the hardware should be buggy if the isolation of cvq is not consistent between single and multiqueue. > > But maybe this made more sense in previous versions, where the series > also cached the cvq group here. If I understand you correctly, it is > enough to check that CVQ is isolated in SQ, and assume it will be > isolated also in MQ, right? I can modify the patch that way if you > confirm this. I think so, or just negotiate with what hardware provides us and check. Thanks > > Thanks! > > > Thanks > > > > > + if (unlikely(r < 0)) { > > > + return r; > > > + } > > > + > > > + vhost_vdpa_reset_status_fd(device_fd); > > > + return r; > > > +} > > > + > > > static NetClientState *net_vhost_vdpa_init(NetClientState *peer, > > > const char *device, > > > const char *name, > > > @@ -808,16 +890,25 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, > > > bool is_datapath, > > > bool svq, > > > struct vhost_vdpa_iova_range iova_range, > > > - uint64_t features) > > > + uint64_t features, > > > + Error **errp) > > > { > > > NetClientState *nc = NULL; > > > VhostVDPAState *s; > > > int ret = 0; > > > assert(name); > > > + int cvq_isolated; > > > + > > > if (is_datapath) { > > > nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, > > > name); > > > } else { > > > + cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features, > > > + queue_pair_index, errp); > > > + if (unlikely(cvq_isolated < 0)) { > > > + return NULL; > > > + } > > > + > > > nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, > > > device, name); > > > } > > > @@ -844,6 +935,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, > > > > > > s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; > > > s->vhost_vdpa.shadow_vq_ops_opaque = s; > > > + s->cvq_isolated = cvq_isolated; > > > > > > /* > > > * TODO: We cannot migrate devices with CVQ as there is no way to set > > > @@ -972,7 +1064,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, > > > for (i = 0; i < queue_pairs; i++) { > > > ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, > > > vdpa_device_fd, i, 2, true, opts->x_svq, > > > - iova_range, features); > > > + iova_range, features, errp); > > > if (!ncs[i]) > > > goto err; > > > } > > > @@ -980,7 +1072,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, > > > if (has_cvq) { > > > nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, > > > vdpa_device_fd, i, 1, false, > > > - opts->x_svq, iova_range, features); > > > + opts->x_svq, iova_range, features, errp); > > > if (!nc) > > > goto err; > > > } > > > -- > > > 2.31.1 > > > > > >
On Thu, May 18, 2023 at 7:50 AM Jason Wang <jasowang@redhat.com> wrote: > > On Wed, May 17, 2023 at 2:30 PM Eugenio Perez Martin > <eperezma@redhat.com> wrote: > > > > On Wed, May 17, 2023 at 5:59 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > On Tue, May 9, 2023 at 11:44 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > > > > > > > Evaluating it at start time instead of initialization time may make the > > > > guest capable of dynamically adding or removing migration blockers. > > > > > > > > Also, moving to initialization reduces the number of ioctls in the > > > > migration, reducing failure possibilities. > > > > > > > > As a drawback we need to check for CVQ isolation twice: one time with no > > > > MQ negotiated and another one acking it, as long as the device supports > > > > it. This is because Vring ASID / group management is based on vq > > > > indexes, but we don't know the index of CVQ before negotiating MQ. > > > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > > > > --- > > > > v2: Take out the reset of the device from vhost_vdpa_cvq_is_isolated > > > > v3: Only record cvq_isolated, true if the device have cvq isolated in > > > > both !MQ and MQ configurations. > > > > --- > > > > net/vhost-vdpa.c | 178 +++++++++++++++++++++++++++++++++++------------ > > > > 1 file changed, 135 insertions(+), 43 deletions(-) > > > > > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c > > > > index 3fb833fe76..29054b77a9 100644 > > > > --- a/net/vhost-vdpa.c > > > > +++ b/net/vhost-vdpa.c > > > > @@ -43,6 +43,10 @@ typedef struct VhostVDPAState { > > > > > > > > /* The device always have SVQ enabled */ > > > > bool always_svq; > > > > + > > > > + /* The device can isolate CVQ in its own ASID */ > > > > + bool cvq_isolated; > > > > + > > > > bool started; > > > > } VhostVDPAState; > > > > > > > > @@ -362,15 +366,8 @@ static NetClientInfo net_vhost_vdpa_info = { > > > > .check_peer_type = vhost_vdpa_check_peer_type, > > > > }; > > > > > > > > -/** > > > > - * Get vring virtqueue group > > > > - * > > > > - * @device_fd vdpa device fd > > > > - * @vq_index Virtqueue index > > > > - * > > > > - * Return -errno in case of error, or vq group if success. > > > > - */ > > > > -static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) > > > > +static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, > > > > + Error **errp) > > > > { > > > > struct vhost_vring_state state = { > > > > .index = vq_index, > > > > @@ -379,8 +376,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) > > > > > > > > if (unlikely(r < 0)) { > > > > r = -errno; > > > > - error_report("Cannot get VQ %u group: %s", vq_index, > > > > - g_strerror(errno)); > > > > + error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index); > > > > return r; > > > > } > > > > > > > > @@ -480,9 +476,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) > > > > { > > > > VhostVDPAState *s, *s0; > > > > struct vhost_vdpa *v; > > > > - uint64_t backend_features; > > > > int64_t cvq_group; > > > > - int cvq_index, r; > > > > + int r; > > > > + Error *err = NULL; > > > > > > > > assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); > > > > > > > > @@ -502,41 +498,22 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) > > > > /* > > > > * If we early return in these cases SVQ will not be enabled. The migration > > > > * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. > > > > - * > > > > - * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev > > > > - * yet. > > > > */ > > > > - r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); > > > > - if (unlikely(r < 0)) { > > > > - error_report("Cannot get vdpa backend_features: %s(%d)", > > > > - g_strerror(errno), errno); > > > > - return -1; > > > > + if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { > > > > + return 0; > > > > } > > > > - if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || > > > > - !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { > > > > + > > > > + if (!s->cvq_isolated) { > > > > return 0; > > > > } > > > > > > > > - /* > > > > - * Check if all the virtqueues of the virtio device are in a different vq > > > > - * than the last vq. VQ group of last group passed in cvq_group. > > > > - */ > > > > - cvq_index = v->dev->vq_index_end - 1; > > > > - cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); > > > > + cvq_group = vhost_vdpa_get_vring_group(v->device_fd, > > > > + v->dev->vq_index_end - 1, > > > > + &err); > > > > if (unlikely(cvq_group < 0)) { > > > > + error_report_err(err); > > > > return cvq_group; > > > > } > > > > - for (int i = 0; i < cvq_index; ++i) { > > > > - int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); > > > > - > > > > - if (unlikely(group < 0)) { > > > > - return group; > > > > - } > > > > - > > > > - if (group == cvq_group) { > > > > - return 0; > > > > - } > > > > - } > > > > > > > > r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); > > > > if (unlikely(r < 0)) { > > > > @@ -799,6 +776,111 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { > > > > .avail_handler = vhost_vdpa_net_handle_ctrl_avail, > > > > }; > > > > > > > > +/** > > > > + * Probe the device to check control virtqueue is isolated. > > > > + * > > > > + * @device_fd vhost-vdpa file descriptor > > > > + * @features features to negotiate > > > > + * @cvq_index Control vq index > > > > + * > > > > + * Returns -1 in case of error, 0 if false and 1 if true > > > > + */ > > > > +static int vhost_vdpa_cvq_is_isolated(int device_fd, uint64_t features, > > > > + unsigned cvq_index, Error **errp) > > > > +{ > > > > + int64_t cvq_group; > > > > + int r; > > > > + > > > > + r = vhost_vdpa_set_dev_features_fd(device_fd, features); > > > > + if (unlikely(r < 0)) { > > > > + error_setg_errno(errp, -r, "Cannot set device features"); > > > > + return r; > > > > + } > > > > + > > > > + cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp); > > > > + if (unlikely(cvq_group < 0)) { > > > > + return cvq_group; > > > > + } > > > > + > > > > + for (int i = 0; i < cvq_index; ++i) { > > > > + int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp); > > > > + > > > > + if (unlikely(group < 0)) { > > > > + return group; > > > > + } > > > > + > > > > + if (group == (int64_t)cvq_group) { > > > > + return 0; > > > > + } > > > > + } > > > > + > > > > + return 1; > > > > +} > > > > + > > > > +/** > > > > + * Probe if CVQ is isolated when the device is MQ and when it is not MQ > > > > + * > > > > + * @device_fd The vdpa device fd > > > > + * @features Features offered by the device. > > > > + * @cvq_index The control vq index if mq is negotiated. Ignored > > > > + * otherwise. > > > > + * > > > > + * Returns <0 in case of failure, 0 if false and 1 if true. > > > > + */ > > > > +static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, > > > > + int cvq_index, Error **errp) > > > > +{ > > > > + uint64_t backend_features; > > > > + int r; > > > > + > > > > + ERRP_GUARD(); > > > > + > > > > + r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); > > > > + if (unlikely(r < 0)) { > > > > + error_setg_errno(errp, errno, "Cannot get vdpa backend_features"); > > > > + return r; > > > > + } > > > > + > > > > + if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { > > > > + return 0; > > > > + } > > > > + > > > > + r = vhost_vdpa_cvq_is_isolated(device_fd, > > > > + features & ~BIT_ULL(VIRTIO_NET_F_MQ), 2, > > > > + errp); > > > > + if (unlikely(r < 0)) { > > > > + if (r != -ENOTSUP) { > > > > + return r; > > > > + } > > > > + > > > > + /* > > > > + * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend > > > > + * support ASID even if the parent driver does not. The CVQ cannot be > > > > + * isolated in this case. > > > > + */ > > > > + error_free(*errp); > > > > + *errp = NULL; > > > > + return 0; > > > > + } > > > > + > > > > + if (r == 0) { > > > > + return 0; > > > > + } > > > > + > > > > + vhost_vdpa_reset_status_fd(device_fd); > > > > + if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) { > > > > + return 0; > > > > + } > > > > + > > > > + r = vhost_vdpa_cvq_is_isolated(device_fd, features, cvq_index * 2, errp); > > > > > > I think checking this once should be sufficient. That is to say, it > > > should be a bug if there's hardware that puts cvq in a dedicated group > > > in MQ but not in SQ. > > > > > > > This is checking the NIC is not buggy :). Otherwise, we're giving > > access to the guest to the CVQ shadow vring. And, currently, SVQ code > > assumes only QEMU can access it. > > Just to make sure we are at the same page, I meant, the hardware > should be buggy if the isolation of cvq is not consistent between > single and multiqueue. > Yes, I got you. The problem with that particular bug is that we will handle guest's vring with the bad IOVA tree. Since QEMU is not sanitizing that descriptors anymore, the device can be used to write at qemu memory. At this time only SVQ vring and in buffers should be writable by this, so it's not a big deal. This can also happen if the device is buggy in other ways. For example, reporting that CVQ is isolated at VHOST_VDPA_GET_VRING_GROUP but then handling maps ignoring the ASID parameter. There is no protection for that, so I agree this double check makes little sense. > > > > But maybe this made more sense in previous versions, where the series > > also cached the cvq group here. If I understand you correctly, it is > > enough to check that CVQ is isolated in SQ, and assume it will be > > isolated also in MQ, right? I can modify the patch that way if you > > confirm this. > > I think so, or just negotiate with what hardware provides us and check. > To always probe with SQ makes the code simpler, but let me know if you think there are advantages to probing otherwise. Thanks!
On Thu, May 18, 2023 at 2:37 PM Eugenio Perez Martin <eperezma@redhat.com> wrote: > > On Thu, May 18, 2023 at 7:50 AM Jason Wang <jasowang@redhat.com> wrote: > > > > On Wed, May 17, 2023 at 2:30 PM Eugenio Perez Martin > > <eperezma@redhat.com> wrote: > > > > > > On Wed, May 17, 2023 at 5:59 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > On Tue, May 9, 2023 at 11:44 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > > > > > > > > > Evaluating it at start time instead of initialization time may make the > > > > > guest capable of dynamically adding or removing migration blockers. > > > > > > > > > > Also, moving to initialization reduces the number of ioctls in the > > > > > migration, reducing failure possibilities. > > > > > > > > > > As a drawback we need to check for CVQ isolation twice: one time with no > > > > > MQ negotiated and another one acking it, as long as the device supports > > > > > it. This is because Vring ASID / group management is based on vq > > > > > indexes, but we don't know the index of CVQ before negotiating MQ. > > > > > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > > > > > --- > > > > > v2: Take out the reset of the device from vhost_vdpa_cvq_is_isolated > > > > > v3: Only record cvq_isolated, true if the device have cvq isolated in > > > > > both !MQ and MQ configurations. > > > > > --- > > > > > net/vhost-vdpa.c | 178 +++++++++++++++++++++++++++++++++++------------ > > > > > 1 file changed, 135 insertions(+), 43 deletions(-) > > > > > > > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c > > > > > index 3fb833fe76..29054b77a9 100644 > > > > > --- a/net/vhost-vdpa.c > > > > > +++ b/net/vhost-vdpa.c > > > > > @@ -43,6 +43,10 @@ typedef struct VhostVDPAState { > > > > > > > > > > /* The device always have SVQ enabled */ > > > > > bool always_svq; > > > > > + > > > > > + /* The device can isolate CVQ in its own ASID */ > > > > > + bool cvq_isolated; > > > > > + > > > > > bool started; > > > > > } VhostVDPAState; > > > > > > > > > > @@ -362,15 +366,8 @@ static NetClientInfo net_vhost_vdpa_info = { > > > > > .check_peer_type = vhost_vdpa_check_peer_type, > > > > > }; > > > > > > > > > > -/** > > > > > - * Get vring virtqueue group > > > > > - * > > > > > - * @device_fd vdpa device fd > > > > > - * @vq_index Virtqueue index > > > > > - * > > > > > - * Return -errno in case of error, or vq group if success. > > > > > - */ > > > > > -static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) > > > > > +static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, > > > > > + Error **errp) > > > > > { > > > > > struct vhost_vring_state state = { > > > > > .index = vq_index, > > > > > @@ -379,8 +376,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) > > > > > > > > > > if (unlikely(r < 0)) { > > > > > r = -errno; > > > > > - error_report("Cannot get VQ %u group: %s", vq_index, > > > > > - g_strerror(errno)); > > > > > + error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index); > > > > > return r; > > > > > } > > > > > > > > > > @@ -480,9 +476,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) > > > > > { > > > > > VhostVDPAState *s, *s0; > > > > > struct vhost_vdpa *v; > > > > > - uint64_t backend_features; > > > > > int64_t cvq_group; > > > > > - int cvq_index, r; > > > > > + int r; > > > > > + Error *err = NULL; > > > > > > > > > > assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); > > > > > > > > > > @@ -502,41 +498,22 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) > > > > > /* > > > > > * If we early return in these cases SVQ will not be enabled. The migration > > > > > * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. > > > > > - * > > > > > - * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev > > > > > - * yet. > > > > > */ > > > > > - r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); > > > > > - if (unlikely(r < 0)) { > > > > > - error_report("Cannot get vdpa backend_features: %s(%d)", > > > > > - g_strerror(errno), errno); > > > > > - return -1; > > > > > + if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { > > > > > + return 0; > > > > > } > > > > > - if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || > > > > > - !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { > > > > > + > > > > > + if (!s->cvq_isolated) { > > > > > return 0; > > > > > } > > > > > > > > > > - /* > > > > > - * Check if all the virtqueues of the virtio device are in a different vq > > > > > - * than the last vq. VQ group of last group passed in cvq_group. > > > > > - */ > > > > > - cvq_index = v->dev->vq_index_end - 1; > > > > > - cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); > > > > > + cvq_group = vhost_vdpa_get_vring_group(v->device_fd, > > > > > + v->dev->vq_index_end - 1, > > > > > + &err); > > > > > if (unlikely(cvq_group < 0)) { > > > > > + error_report_err(err); > > > > > return cvq_group; > > > > > } > > > > > - for (int i = 0; i < cvq_index; ++i) { > > > > > - int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); > > > > > - > > > > > - if (unlikely(group < 0)) { > > > > > - return group; > > > > > - } > > > > > - > > > > > - if (group == cvq_group) { > > > > > - return 0; > > > > > - } > > > > > - } > > > > > > > > > > r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); > > > > > if (unlikely(r < 0)) { > > > > > @@ -799,6 +776,111 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { > > > > > .avail_handler = vhost_vdpa_net_handle_ctrl_avail, > > > > > }; > > > > > > > > > > +/** > > > > > + * Probe the device to check control virtqueue is isolated. > > > > > + * > > > > > + * @device_fd vhost-vdpa file descriptor > > > > > + * @features features to negotiate > > > > > + * @cvq_index Control vq index > > > > > + * > > > > > + * Returns -1 in case of error, 0 if false and 1 if true > > > > > + */ > > > > > +static int vhost_vdpa_cvq_is_isolated(int device_fd, uint64_t features, > > > > > + unsigned cvq_index, Error **errp) > > > > > +{ > > > > > + int64_t cvq_group; > > > > > + int r; > > > > > + > > > > > + r = vhost_vdpa_set_dev_features_fd(device_fd, features); > > > > > + if (unlikely(r < 0)) { > > > > > + error_setg_errno(errp, -r, "Cannot set device features"); > > > > > + return r; > > > > > + } > > > > > + > > > > > + cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp); > > > > > + if (unlikely(cvq_group < 0)) { > > > > > + return cvq_group; > > > > > + } > > > > > + > > > > > + for (int i = 0; i < cvq_index; ++i) { > > > > > + int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp); > > > > > + > > > > > + if (unlikely(group < 0)) { > > > > > + return group; > > > > > + } > > > > > + > > > > > + if (group == (int64_t)cvq_group) { > > > > > + return 0; > > > > > + } > > > > > + } > > > > > + > > > > > + return 1; > > > > > +} > > > > > + > > > > > +/** > > > > > + * Probe if CVQ is isolated when the device is MQ and when it is not MQ > > > > > + * > > > > > + * @device_fd The vdpa device fd > > > > > + * @features Features offered by the device. > > > > > + * @cvq_index The control vq index if mq is negotiated. Ignored > > > > > + * otherwise. > > > > > + * > > > > > + * Returns <0 in case of failure, 0 if false and 1 if true. > > > > > + */ > > > > > +static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, > > > > > + int cvq_index, Error **errp) > > > > > +{ > > > > > + uint64_t backend_features; > > > > > + int r; > > > > > + > > > > > + ERRP_GUARD(); > > > > > + > > > > > + r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); > > > > > + if (unlikely(r < 0)) { > > > > > + error_setg_errno(errp, errno, "Cannot get vdpa backend_features"); > > > > > + return r; > > > > > + } > > > > > + > > > > > + if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { > > > > > + return 0; > > > > > + } > > > > > + > > > > > + r = vhost_vdpa_cvq_is_isolated(device_fd, > > > > > + features & ~BIT_ULL(VIRTIO_NET_F_MQ), 2, > > > > > + errp); > > > > > + if (unlikely(r < 0)) { > > > > > + if (r != -ENOTSUP) { > > > > > + return r; > > > > > + } > > > > > + > > > > > + /* > > > > > + * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend > > > > > + * support ASID even if the parent driver does not. The CVQ cannot be > > > > > + * isolated in this case. > > > > > + */ > > > > > + error_free(*errp); > > > > > + *errp = NULL; > > > > > + return 0; > > > > > + } > > > > > + > > > > > + if (r == 0) { > > > > > + return 0; > > > > > + } > > > > > + > > > > > + vhost_vdpa_reset_status_fd(device_fd); > > > > > + if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) { > > > > > + return 0; > > > > > + } > > > > > + > > > > > + r = vhost_vdpa_cvq_is_isolated(device_fd, features, cvq_index * 2, errp); > > > > > > > > I think checking this once should be sufficient. That is to say, it > > > > should be a bug if there's hardware that puts cvq in a dedicated group > > > > in MQ but not in SQ. > > > > > > > > > > This is checking the NIC is not buggy :). Otherwise, we're giving > > > access to the guest to the CVQ shadow vring. And, currently, SVQ code > > > assumes only QEMU can access it. > > > > Just to make sure we are at the same page, I meant, the hardware > > should be buggy if the isolation of cvq is not consistent between > > single and multiqueue. > > > > Yes, I got you. > > The problem with that particular bug is that we will handle guest's > vring with the bad IOVA tree. Since QEMU is not sanitizing that > descriptors anymore, the device can be used to write at qemu memory. > At this time only SVQ vring and in buffers should be writable by this, > so it's not a big deal. > > This can also happen if the device is buggy in other ways. For > example, reporting that CVQ is isolated at VHOST_VDPA_GET_VRING_GROUP > but then handling maps ignoring the ASID parameter. There is no > protection for that, so I agree this double check makes little sense. > > > > > > > But maybe this made more sense in previous versions, where the series > > > also cached the cvq group here. If I understand you correctly, it is > > > enough to check that CVQ is isolated in SQ, and assume it will be > > > isolated also in MQ, right? I can modify the patch that way if you > > > confirm this. > > > > I think so, or just negotiate with what hardware provides us and check. > > > > To always probe with SQ makes the code simpler, but let me know if you > think there are advantages to probing otherwise. It should be fine. Thanks > > Thanks! >
On Thu, May 18, 2023 at 08:36:22AM +0200, Eugenio Perez Martin wrote: > On Thu, May 18, 2023 at 7:50 AM Jason Wang <jasowang@redhat.com> wrote: > > > > On Wed, May 17, 2023 at 2:30 PM Eugenio Perez Martin > > <eperezma@redhat.com> wrote: > > > > > > On Wed, May 17, 2023 at 5:59 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > On Tue, May 9, 2023 at 11:44 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > > > > > > > > > Evaluating it at start time instead of initialization time may make the > > > > > guest capable of dynamically adding or removing migration blockers. > > > > > > > > > > Also, moving to initialization reduces the number of ioctls in the > > > > > migration, reducing failure possibilities. > > > > > > > > > > As a drawback we need to check for CVQ isolation twice: one time with no > > > > > MQ negotiated and another one acking it, as long as the device supports > > > > > it. This is because Vring ASID / group management is based on vq > > > > > indexes, but we don't know the index of CVQ before negotiating MQ. > > > > > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > > > > > --- > > > > > v2: Take out the reset of the device from vhost_vdpa_cvq_is_isolated > > > > > v3: Only record cvq_isolated, true if the device have cvq isolated in > > > > > both !MQ and MQ configurations. > > > > > --- > > > > > net/vhost-vdpa.c | 178 +++++++++++++++++++++++++++++++++++------------ > > > > > 1 file changed, 135 insertions(+), 43 deletions(-) > > > > > > > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c > > > > > index 3fb833fe76..29054b77a9 100644 > > > > > --- a/net/vhost-vdpa.c > > > > > +++ b/net/vhost-vdpa.c > > > > > @@ -43,6 +43,10 @@ typedef struct VhostVDPAState { > > > > > > > > > > /* The device always have SVQ enabled */ > > > > > bool always_svq; > > > > > + > > > > > + /* The device can isolate CVQ in its own ASID */ > > > > > + bool cvq_isolated; > > > > > + > > > > > bool started; > > > > > } VhostVDPAState; > > > > > > > > > > @@ -362,15 +366,8 @@ static NetClientInfo net_vhost_vdpa_info = { > > > > > .check_peer_type = vhost_vdpa_check_peer_type, > > > > > }; > > > > > > > > > > -/** > > > > > - * Get vring virtqueue group > > > > > - * > > > > > - * @device_fd vdpa device fd > > > > > - * @vq_index Virtqueue index > > > > > - * > > > > > - * Return -errno in case of error, or vq group if success. > > > > > - */ > > > > > -static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) > > > > > +static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, > > > > > + Error **errp) > > > > > { > > > > > struct vhost_vring_state state = { > > > > > .index = vq_index, > > > > > @@ -379,8 +376,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) > > > > > > > > > > if (unlikely(r < 0)) { > > > > > r = -errno; > > > > > - error_report("Cannot get VQ %u group: %s", vq_index, > > > > > - g_strerror(errno)); > > > > > + error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index); > > > > > return r; > > > > > } > > > > > > > > > > @@ -480,9 +476,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) > > > > > { > > > > > VhostVDPAState *s, *s0; > > > > > struct vhost_vdpa *v; > > > > > - uint64_t backend_features; > > > > > int64_t cvq_group; > > > > > - int cvq_index, r; > > > > > + int r; > > > > > + Error *err = NULL; > > > > > > > > > > assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); > > > > > > > > > > @@ -502,41 +498,22 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) > > > > > /* > > > > > * If we early return in these cases SVQ will not be enabled. The migration > > > > > * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. > > > > > - * > > > > > - * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev > > > > > - * yet. > > > > > */ > > > > > - r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); > > > > > - if (unlikely(r < 0)) { > > > > > - error_report("Cannot get vdpa backend_features: %s(%d)", > > > > > - g_strerror(errno), errno); > > > > > - return -1; > > > > > + if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { > > > > > + return 0; > > > > > } > > > > > - if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || > > > > > - !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { > > > > > + > > > > > + if (!s->cvq_isolated) { > > > > > return 0; > > > > > } > > > > > > > > > > - /* > > > > > - * Check if all the virtqueues of the virtio device are in a different vq > > > > > - * than the last vq. VQ group of last group passed in cvq_group. > > > > > - */ > > > > > - cvq_index = v->dev->vq_index_end - 1; > > > > > - cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); > > > > > + cvq_group = vhost_vdpa_get_vring_group(v->device_fd, > > > > > + v->dev->vq_index_end - 1, > > > > > + &err); > > > > > if (unlikely(cvq_group < 0)) { > > > > > + error_report_err(err); > > > > > return cvq_group; > > > > > } > > > > > - for (int i = 0; i < cvq_index; ++i) { > > > > > - int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); > > > > > - > > > > > - if (unlikely(group < 0)) { > > > > > - return group; > > > > > - } > > > > > - > > > > > - if (group == cvq_group) { > > > > > - return 0; > > > > > - } > > > > > - } > > > > > > > > > > r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); > > > > > if (unlikely(r < 0)) { > > > > > @@ -799,6 +776,111 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { > > > > > .avail_handler = vhost_vdpa_net_handle_ctrl_avail, > > > > > }; > > > > > > > > > > +/** > > > > > + * Probe the device to check control virtqueue is isolated. > > > > > + * > > > > > + * @device_fd vhost-vdpa file descriptor > > > > > + * @features features to negotiate > > > > > + * @cvq_index Control vq index > > > > > + * > > > > > + * Returns -1 in case of error, 0 if false and 1 if true > > > > > + */ > > > > > +static int vhost_vdpa_cvq_is_isolated(int device_fd, uint64_t features, > > > > > + unsigned cvq_index, Error **errp) > > > > > +{ > > > > > + int64_t cvq_group; > > > > > + int r; > > > > > + > > > > > + r = vhost_vdpa_set_dev_features_fd(device_fd, features); > > > > > + if (unlikely(r < 0)) { > > > > > + error_setg_errno(errp, -r, "Cannot set device features"); > > > > > + return r; > > > > > + } > > > > > + > > > > > + cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp); > > > > > + if (unlikely(cvq_group < 0)) { > > > > > + return cvq_group; > > > > > + } > > > > > + > > > > > + for (int i = 0; i < cvq_index; ++i) { > > > > > + int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp); > > > > > + > > > > > + if (unlikely(group < 0)) { > > > > > + return group; > > > > > + } > > > > > + > > > > > + if (group == (int64_t)cvq_group) { > > > > > + return 0; > > > > > + } > > > > > + } > > > > > + > > > > > + return 1; > > > > > +} > > > > > + > > > > > +/** > > > > > + * Probe if CVQ is isolated when the device is MQ and when it is not MQ > > > > > + * > > > > > + * @device_fd The vdpa device fd > > > > > + * @features Features offered by the device. > > > > > + * @cvq_index The control vq index if mq is negotiated. Ignored > > > > > + * otherwise. > > > > > + * > > > > > + * Returns <0 in case of failure, 0 if false and 1 if true. > > > > > + */ > > > > > +static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, > > > > > + int cvq_index, Error **errp) > > > > > +{ > > > > > + uint64_t backend_features; > > > > > + int r; > > > > > + > > > > > + ERRP_GUARD(); > > > > > + > > > > > + r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); > > > > > + if (unlikely(r < 0)) { > > > > > + error_setg_errno(errp, errno, "Cannot get vdpa backend_features"); > > > > > + return r; > > > > > + } > > > > > + > > > > > + if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { > > > > > + return 0; > > > > > + } > > > > > + > > > > > + r = vhost_vdpa_cvq_is_isolated(device_fd, > > > > > + features & ~BIT_ULL(VIRTIO_NET_F_MQ), 2, > > > > > + errp); > > > > > + if (unlikely(r < 0)) { > > > > > + if (r != -ENOTSUP) { > > > > > + return r; > > > > > + } > > > > > + > > > > > + /* > > > > > + * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend > > > > > + * support ASID even if the parent driver does not. The CVQ cannot be > > > > > + * isolated in this case. > > > > > + */ > > > > > + error_free(*errp); > > > > > + *errp = NULL; > > > > > + return 0; > > > > > + } > > > > > + > > > > > + if (r == 0) { > > > > > + return 0; > > > > > + } > > > > > + > > > > > + vhost_vdpa_reset_status_fd(device_fd); > > > > > + if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) { > > > > > + return 0; > > > > > + } > > > > > + > > > > > + r = vhost_vdpa_cvq_is_isolated(device_fd, features, cvq_index * 2, errp); > > > > > > > > I think checking this once should be sufficient. That is to say, it > > > > should be a bug if there's hardware that puts cvq in a dedicated group > > > > in MQ but not in SQ. > > > > > > > > > > This is checking the NIC is not buggy :). Otherwise, we're giving > > > access to the guest to the CVQ shadow vring. And, currently, SVQ code > > > assumes only QEMU can access it. > > > > Just to make sure we are at the same page, I meant, the hardware > > should be buggy if the isolation of cvq is not consistent between > > single and multiqueue. > > > > Yes, I got you. > > The problem with that particular bug is that we will handle guest's > vring with the bad IOVA tree. Since QEMU is not sanitizing that > descriptors anymore, the device can be used to write at qemu memory. > At this time only SVQ vring and in buffers should be writable by this, > so it's not a big deal. > > This can also happen if the device is buggy in other ways. For > example, reporting that CVQ is isolated at VHOST_VDPA_GET_VRING_GROUP > but then handling maps ignoring the ASID parameter. There is no > protection for that, so I agree this double check makes little sense. Ok so you will repost with this check removed? > > > > > > But maybe this made more sense in previous versions, where the series > > > also cached the cvq group here. If I understand you correctly, it is > > > enough to check that CVQ is isolated in SQ, and assume it will be > > > isolated also in MQ, right? I can modify the patch that way if you > > > confirm this. > > > > I think so, or just negotiate with what hardware provides us and check. > > > > To always probe with SQ makes the code simpler, but let me know if you > think there are advantages to probing otherwise. > > Thanks!
On Thu, May 18, 2023 at 11:23 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > On Thu, May 18, 2023 at 08:36:22AM +0200, Eugenio Perez Martin wrote: > > On Thu, May 18, 2023 at 7:50 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > On Wed, May 17, 2023 at 2:30 PM Eugenio Perez Martin > > > <eperezma@redhat.com> wrote: > > > > > > > > On Wed, May 17, 2023 at 5:59 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > > > On Tue, May 9, 2023 at 11:44 PM Eugenio Pérez <eperezma@redhat.com> wrote: > > > > > > > > > > > > Evaluating it at start time instead of initialization time may make the > > > > > > guest capable of dynamically adding or removing migration blockers. > > > > > > > > > > > > Also, moving to initialization reduces the number of ioctls in the > > > > > > migration, reducing failure possibilities. > > > > > > > > > > > > As a drawback we need to check for CVQ isolation twice: one time with no > > > > > > MQ negotiated and another one acking it, as long as the device supports > > > > > > it. This is because Vring ASID / group management is based on vq > > > > > > indexes, but we don't know the index of CVQ before negotiating MQ. > > > > > > > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > > > > > > --- > > > > > > v2: Take out the reset of the device from vhost_vdpa_cvq_is_isolated > > > > > > v3: Only record cvq_isolated, true if the device have cvq isolated in > > > > > > both !MQ and MQ configurations. > > > > > > --- > > > > > > net/vhost-vdpa.c | 178 +++++++++++++++++++++++++++++++++++------------ > > > > > > 1 file changed, 135 insertions(+), 43 deletions(-) > > > > > > > > > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c > > > > > > index 3fb833fe76..29054b77a9 100644 > > > > > > --- a/net/vhost-vdpa.c > > > > > > +++ b/net/vhost-vdpa.c > > > > > > @@ -43,6 +43,10 @@ typedef struct VhostVDPAState { > > > > > > > > > > > > /* The device always have SVQ enabled */ > > > > > > bool always_svq; > > > > > > + > > > > > > + /* The device can isolate CVQ in its own ASID */ > > > > > > + bool cvq_isolated; > > > > > > + > > > > > > bool started; > > > > > > } VhostVDPAState; > > > > > > > > > > > > @@ -362,15 +366,8 @@ static NetClientInfo net_vhost_vdpa_info = { > > > > > > .check_peer_type = vhost_vdpa_check_peer_type, > > > > > > }; > > > > > > > > > > > > -/** > > > > > > - * Get vring virtqueue group > > > > > > - * > > > > > > - * @device_fd vdpa device fd > > > > > > - * @vq_index Virtqueue index > > > > > > - * > > > > > > - * Return -errno in case of error, or vq group if success. > > > > > > - */ > > > > > > -static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) > > > > > > +static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, > > > > > > + Error **errp) > > > > > > { > > > > > > struct vhost_vring_state state = { > > > > > > .index = vq_index, > > > > > > @@ -379,8 +376,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) > > > > > > > > > > > > if (unlikely(r < 0)) { > > > > > > r = -errno; > > > > > > - error_report("Cannot get VQ %u group: %s", vq_index, > > > > > > - g_strerror(errno)); > > > > > > + error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index); > > > > > > return r; > > > > > > } > > > > > > > > > > > > @@ -480,9 +476,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) > > > > > > { > > > > > > VhostVDPAState *s, *s0; > > > > > > struct vhost_vdpa *v; > > > > > > - uint64_t backend_features; > > > > > > int64_t cvq_group; > > > > > > - int cvq_index, r; > > > > > > + int r; > > > > > > + Error *err = NULL; > > > > > > > > > > > > assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); > > > > > > > > > > > > @@ -502,41 +498,22 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) > > > > > > /* > > > > > > * If we early return in these cases SVQ will not be enabled. The migration > > > > > > * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. > > > > > > - * > > > > > > - * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev > > > > > > - * yet. > > > > > > */ > > > > > > - r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); > > > > > > - if (unlikely(r < 0)) { > > > > > > - error_report("Cannot get vdpa backend_features: %s(%d)", > > > > > > - g_strerror(errno), errno); > > > > > > - return -1; > > > > > > + if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { > > > > > > + return 0; > > > > > > } > > > > > > - if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || > > > > > > - !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { > > > > > > + > > > > > > + if (!s->cvq_isolated) { > > > > > > return 0; > > > > > > } > > > > > > > > > > > > - /* > > > > > > - * Check if all the virtqueues of the virtio device are in a different vq > > > > > > - * than the last vq. VQ group of last group passed in cvq_group. > > > > > > - */ > > > > > > - cvq_index = v->dev->vq_index_end - 1; > > > > > > - cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); > > > > > > + cvq_group = vhost_vdpa_get_vring_group(v->device_fd, > > > > > > + v->dev->vq_index_end - 1, > > > > > > + &err); > > > > > > if (unlikely(cvq_group < 0)) { > > > > > > + error_report_err(err); > > > > > > return cvq_group; > > > > > > } > > > > > > - for (int i = 0; i < cvq_index; ++i) { > > > > > > - int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); > > > > > > - > > > > > > - if (unlikely(group < 0)) { > > > > > > - return group; > > > > > > - } > > > > > > - > > > > > > - if (group == cvq_group) { > > > > > > - return 0; > > > > > > - } > > > > > > - } > > > > > > > > > > > > r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); > > > > > > if (unlikely(r < 0)) { > > > > > > @@ -799,6 +776,111 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { > > > > > > .avail_handler = vhost_vdpa_net_handle_ctrl_avail, > > > > > > }; > > > > > > > > > > > > +/** > > > > > > + * Probe the device to check control virtqueue is isolated. > > > > > > + * > > > > > > + * @device_fd vhost-vdpa file descriptor > > > > > > + * @features features to negotiate > > > > > > + * @cvq_index Control vq index > > > > > > + * > > > > > > + * Returns -1 in case of error, 0 if false and 1 if true > > > > > > + */ > > > > > > +static int vhost_vdpa_cvq_is_isolated(int device_fd, uint64_t features, > > > > > > + unsigned cvq_index, Error **errp) > > > > > > +{ > > > > > > + int64_t cvq_group; > > > > > > + int r; > > > > > > + > > > > > > + r = vhost_vdpa_set_dev_features_fd(device_fd, features); > > > > > > + if (unlikely(r < 0)) { > > > > > > + error_setg_errno(errp, -r, "Cannot set device features"); > > > > > > + return r; > > > > > > + } > > > > > > + > > > > > > + cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp); > > > > > > + if (unlikely(cvq_group < 0)) { > > > > > > + return cvq_group; > > > > > > + } > > > > > > + > > > > > > + for (int i = 0; i < cvq_index; ++i) { > > > > > > + int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp); > > > > > > + > > > > > > + if (unlikely(group < 0)) { > > > > > > + return group; > > > > > > + } > > > > > > + > > > > > > + if (group == (int64_t)cvq_group) { > > > > > > + return 0; > > > > > > + } > > > > > > + } > > > > > > + > > > > > > + return 1; > > > > > > +} > > > > > > + > > > > > > +/** > > > > > > + * Probe if CVQ is isolated when the device is MQ and when it is not MQ > > > > > > + * > > > > > > + * @device_fd The vdpa device fd > > > > > > + * @features Features offered by the device. > > > > > > + * @cvq_index The control vq index if mq is negotiated. Ignored > > > > > > + * otherwise. > > > > > > + * > > > > > > + * Returns <0 in case of failure, 0 if false and 1 if true. > > > > > > + */ > > > > > > +static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, > > > > > > + int cvq_index, Error **errp) > > > > > > +{ > > > > > > + uint64_t backend_features; > > > > > > + int r; > > > > > > + > > > > > > + ERRP_GUARD(); > > > > > > + > > > > > > + r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); > > > > > > + if (unlikely(r < 0)) { > > > > > > + error_setg_errno(errp, errno, "Cannot get vdpa backend_features"); > > > > > > + return r; > > > > > > + } > > > > > > + > > > > > > + if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { > > > > > > + return 0; > > > > > > + } > > > > > > + > > > > > > + r = vhost_vdpa_cvq_is_isolated(device_fd, > > > > > > + features & ~BIT_ULL(VIRTIO_NET_F_MQ), 2, > > > > > > + errp); > > > > > > + if (unlikely(r < 0)) { > > > > > > + if (r != -ENOTSUP) { > > > > > > + return r; > > > > > > + } > > > > > > + > > > > > > + /* > > > > > > + * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend > > > > > > + * support ASID even if the parent driver does not. The CVQ cannot be > > > > > > + * isolated in this case. > > > > > > + */ > > > > > > + error_free(*errp); > > > > > > + *errp = NULL; > > > > > > + return 0; > > > > > > + } > > > > > > + > > > > > > + if (r == 0) { > > > > > > + return 0; > > > > > > + } > > > > > > + > > > > > > + vhost_vdpa_reset_status_fd(device_fd); > > > > > > + if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) { > > > > > > + return 0; > > > > > > + } > > > > > > + > > > > > > + r = vhost_vdpa_cvq_is_isolated(device_fd, features, cvq_index * 2, errp); > > > > > > > > > > I think checking this once should be sufficient. That is to say, it > > > > > should be a bug if there's hardware that puts cvq in a dedicated group > > > > > in MQ but not in SQ. > > > > > > > > > > > > > This is checking the NIC is not buggy :). Otherwise, we're giving > > > > access to the guest to the CVQ shadow vring. And, currently, SVQ code > > > > assumes only QEMU can access it. > > > > > > Just to make sure we are at the same page, I meant, the hardware > > > should be buggy if the isolation of cvq is not consistent between > > > single and multiqueue. > > > > > > > Yes, I got you. > > > > The problem with that particular bug is that we will handle guest's > > vring with the bad IOVA tree. Since QEMU is not sanitizing that > > descriptors anymore, the device can be used to write at qemu memory. > > At this time only SVQ vring and in buffers should be writable by this, > > so it's not a big deal. > > > > This can also happen if the device is buggy in other ways. For > > example, reporting that CVQ is isolated at VHOST_VDPA_GET_VRING_GROUP > > but then handling maps ignoring the ASID parameter. There is no > > protection for that, so I agree this double check makes little sense. > > Ok so you will repost with this check removed? > Yes, I'll repost it. Thanks! > > > > > > > > But maybe this made more sense in previous versions, where the series > > > > also cached the cvq group here. If I understand you correctly, it is > > > > enough to check that CVQ is isolated in SQ, and assume it will be > > > > isolated also in MQ, right? I can modify the patch that way if you > > > > confirm this. > > > > > > I think so, or just negotiate with what hardware provides us and check. > > > > > > > To always probe with SQ makes the code simpler, but let me know if you > > think there are advantages to probing otherwise. > > > > Thanks! >
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 3fb833fe76..29054b77a9 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -43,6 +43,10 @@ typedef struct VhostVDPAState { /* The device always have SVQ enabled */ bool always_svq; + + /* The device can isolate CVQ in its own ASID */ + bool cvq_isolated; + bool started; } VhostVDPAState; @@ -362,15 +366,8 @@ static NetClientInfo net_vhost_vdpa_info = { .check_peer_type = vhost_vdpa_check_peer_type, }; -/** - * Get vring virtqueue group - * - * @device_fd vdpa device fd - * @vq_index Virtqueue index - * - * Return -errno in case of error, or vq group if success. - */ -static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) +static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, + Error **errp) { struct vhost_vring_state state = { .index = vq_index, @@ -379,8 +376,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) if (unlikely(r < 0)) { r = -errno; - error_report("Cannot get VQ %u group: %s", vq_index, - g_strerror(errno)); + error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index); return r; } @@ -480,9 +476,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) { VhostVDPAState *s, *s0; struct vhost_vdpa *v; - uint64_t backend_features; int64_t cvq_group; - int cvq_index, r; + int r; + Error *err = NULL; assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); @@ -502,41 +498,22 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) /* * If we early return in these cases SVQ will not be enabled. The migration * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. - * - * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev - * yet. */ - r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); - if (unlikely(r < 0)) { - error_report("Cannot get vdpa backend_features: %s(%d)", - g_strerror(errno), errno); - return -1; + if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { + return 0; } - if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || - !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { + + if (!s->cvq_isolated) { return 0; } - /* - * Check if all the virtqueues of the virtio device are in a different vq - * than the last vq. VQ group of last group passed in cvq_group. - */ - cvq_index = v->dev->vq_index_end - 1; - cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); + cvq_group = vhost_vdpa_get_vring_group(v->device_fd, + v->dev->vq_index_end - 1, + &err); if (unlikely(cvq_group < 0)) { + error_report_err(err); return cvq_group; } - for (int i = 0; i < cvq_index; ++i) { - int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); - - if (unlikely(group < 0)) { - return group; - } - - if (group == cvq_group) { - return 0; - } - } r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); if (unlikely(r < 0)) { @@ -799,6 +776,111 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { .avail_handler = vhost_vdpa_net_handle_ctrl_avail, }; +/** + * Probe the device to check control virtqueue is isolated. + * + * @device_fd vhost-vdpa file descriptor + * @features features to negotiate + * @cvq_index Control vq index + * + * Returns -1 in case of error, 0 if false and 1 if true + */ +static int vhost_vdpa_cvq_is_isolated(int device_fd, uint64_t features, + unsigned cvq_index, Error **errp) +{ + int64_t cvq_group; + int r; + + r = vhost_vdpa_set_dev_features_fd(device_fd, features); + if (unlikely(r < 0)) { + error_setg_errno(errp, -r, "Cannot set device features"); + return r; + } + + cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp); + if (unlikely(cvq_group < 0)) { + return cvq_group; + } + + for (int i = 0; i < cvq_index; ++i) { + int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp); + + if (unlikely(group < 0)) { + return group; + } + + if (group == (int64_t)cvq_group) { + return 0; + } + } + + return 1; +} + +/** + * Probe if CVQ is isolated when the device is MQ and when it is not MQ + * + * @device_fd The vdpa device fd + * @features Features offered by the device. + * @cvq_index The control vq index if mq is negotiated. Ignored + * otherwise. + * + * Returns <0 in case of failure, 0 if false and 1 if true. + */ +static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, + int cvq_index, Error **errp) +{ + uint64_t backend_features; + int r; + + ERRP_GUARD(); + + r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); + if (unlikely(r < 0)) { + error_setg_errno(errp, errno, "Cannot get vdpa backend_features"); + return r; + } + + if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { + return 0; + } + + r = vhost_vdpa_cvq_is_isolated(device_fd, + features & ~BIT_ULL(VIRTIO_NET_F_MQ), 2, + errp); + if (unlikely(r < 0)) { + if (r != -ENOTSUP) { + return r; + } + + /* + * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend + * support ASID even if the parent driver does not. The CVQ cannot be + * isolated in this case. + */ + error_free(*errp); + *errp = NULL; + return 0; + } + + if (r == 0) { + return 0; + } + + vhost_vdpa_reset_status_fd(device_fd); + if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) { + return 0; + } + + r = vhost_vdpa_cvq_is_isolated(device_fd, features, cvq_index * 2, errp); + if (unlikely(r < 0)) { + return r; + } + + vhost_vdpa_reset_status_fd(device_fd); + return r; +} + static NetClientState *net_vhost_vdpa_init(NetClientState *peer, const char *device, const char *name, @@ -808,16 +890,25 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, bool is_datapath, bool svq, struct vhost_vdpa_iova_range iova_range, - uint64_t features) + uint64_t features, + Error **errp) { NetClientState *nc = NULL; VhostVDPAState *s; int ret = 0; assert(name); + int cvq_isolated; + if (is_datapath) { nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, name); } else { + cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features, + queue_pair_index, errp); + if (unlikely(cvq_isolated < 0)) { + return NULL; + } + nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, device, name); } @@ -844,6 +935,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; s->vhost_vdpa.shadow_vq_ops_opaque = s; + s->cvq_isolated = cvq_isolated; /* * TODO: We cannot migrate devices with CVQ as there is no way to set @@ -972,7 +1064,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, for (i = 0; i < queue_pairs; i++) { ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, vdpa_device_fd, i, 2, true, opts->x_svq, - iova_range, features); + iova_range, features, errp); if (!ncs[i]) goto err; } @@ -980,7 +1072,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, if (has_cvq) { nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, vdpa_device_fd, i, 1, false, - opts->x_svq, iova_range, features); + opts->x_svq, iova_range, features, errp); if (!nc) goto err; }
Evaluating it at start time instead of initialization time may make the guest capable of dynamically adding or removing migration blockers. Also, moving to initialization reduces the number of ioctls in the migration, reducing failure possibilities. As a drawback we need to check for CVQ isolation twice: one time with no MQ negotiated and another one acking it, as long as the device supports it. This is because Vring ASID / group management is based on vq indexes, but we don't know the index of CVQ before negotiating MQ. Signed-off-by: Eugenio Pérez <eperezma@redhat.com> --- v2: Take out the reset of the device from vhost_vdpa_cvq_is_isolated v3: Only record cvq_isolated, true if the device have cvq isolated in both !MQ and MQ configurations. --- net/vhost-vdpa.c | 178 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 135 insertions(+), 43 deletions(-)