diff mbox series

vhost-user: add support for VHOST_USER_SET_STATUS

Message ID 20200514073332.1434576-1-maxime.coquelin@redhat.com (mailing list archive)
State New, archived
Headers show
Series vhost-user: add support for VHOST_USER_SET_STATUS | expand

Commit Message

Maxime Coquelin May 14, 2020, 7:33 a.m. UTC
It is usefull for the Vhost-user backend to know
about about the Virtio device status updates,
especially when the driver sets the DRIVER_OK bit.

With that information, no more need to do hazardous
assumptions on when the driver is done with the
device configuration.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---

This patch applies on top of Cindy's "vDPA support in qemu"
series, which introduces the .vhost_set_state vhost-backend
ops.

 docs/interop/vhost-user.rst | 12 ++++++++++++
 hw/net/vhost_net.c          | 10 +++++-----
 hw/virtio/vhost-user.c      | 35 +++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+), 5 deletions(-)

Comments

Jason Wang May 14, 2020, 7:53 a.m. UTC | #1
On 2020/5/14 下午3:33, Maxime Coquelin wrote:
> It is usefull for the Vhost-user backend to know
> about about the Virtio device status updates,
> especially when the driver sets the DRIVER_OK bit.
>
> With that information, no more need to do hazardous
> assumptions on when the driver is done with the
> device configuration.
>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>
> This patch applies on top of Cindy's "vDPA support in qemu"
> series, which introduces the .vhost_set_state vhost-backend
> ops.
>
>   docs/interop/vhost-user.rst | 12 ++++++++++++
>   hw/net/vhost_net.c          | 10 +++++-----
>   hw/virtio/vhost-user.c      | 35 +++++++++++++++++++++++++++++++++++
>   3 files changed, 52 insertions(+), 5 deletions(-)
>
> diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
> index 3b1b6602c7..f108de7458 100644
> --- a/docs/interop/vhost-user.rst
> +++ b/docs/interop/vhost-user.rst
> @@ -815,6 +815,7 @@ Protocol features
>     #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD       12
>     #define VHOST_USER_PROTOCOL_F_RESET_DEVICE         13
>     #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
> +  #define VHOST_USER_PROTOCOL_F_STATUS               15
>   
>   Master message types
>   --------------------
> @@ -1263,6 +1264,17 @@ Master message types
>   
>     The state.num field is currently reserved and must be set to 0.
>   
> +``VHOST_USER_SET_STATUS``
> +  :id: 36
> +  :equivalent ioctl: VHOST_VDPA_SET_STATUS
> +  :slave payload: N/A
> +  :master payload: ``u64``
> +
> +  When the ``VHOST_USER_PROTOCOL_F_STATUS`` protocol feature has been
> +  successfully negotiated, this message is submitted by the master to
> +  notify the backend with updated device status as defined in the Virtio
> +  specification.
> +
>   Slave message types
>   -------------------
>   
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index 463e333531..37f3156dbc 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -517,10 +517,10 @@ int vhost_set_state(NetClientState *nc, int state)
>   {
>       struct vhost_net *net = get_vhost_net(nc);
>       struct vhost_dev *hdev = &net->dev;
> -    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> -        if (hdev->vhost_ops->vhost_set_state) {
> -                return hdev->vhost_ops->vhost_set_state(hdev, state);
> -             }
> -        }
> +
> +    if (hdev->vhost_ops->vhost_set_state) {
> +        return hdev->vhost_ops->vhost_set_state(hdev, state);
> +    }
> +
>       return 0;
>   }
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index ec21e8fbe8..b7e52d97fc 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -59,6 +59,7 @@ enum VhostUserProtocolFeature {
>       VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
>       VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
>       VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
> +    VHOST_USER_PROTOCOL_F_STATUS = 15,
>       VHOST_USER_PROTOCOL_F_MAX
>   };
>   
> @@ -100,6 +101,7 @@ typedef enum VhostUserRequest {
>       VHOST_USER_SET_INFLIGHT_FD = 32,
>       VHOST_USER_GPU_SET_SOCKET = 33,
>       VHOST_USER_RESET_DEVICE = 34,
> +    VHOST_USER_SET_STATUS = 36,
>       VHOST_USER_MAX
>   } VhostUserRequest;
>   
> @@ -1886,6 +1888,38 @@ static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
>       return 0;
>   }
>   
> +static int vhost_user_set_state(struct vhost_dev *dev, int state)
> +{
> +    bool reply_supported = virtio_has_feature(dev->protocol_features,
> +                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
> +
> +    VhostUserMsg msg = {
> +        .hdr.request = VHOST_USER_SET_STATUS,
> +        .hdr.flags = VHOST_USER_VERSION,
> +        .hdr.size = sizeof(msg.payload.u64),
> +        .payload.u64 = (uint64_t)state,
> +    };
> +
> +    if (!virtio_has_feature(dev->protocol_features,
> +                VHOST_USER_PROTOCOL_F_STATUS)) {
> +        return -1;
> +    }
> +
> +    if (reply_supported) {
> +        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
> +    }
> +
> +    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
> +        return -1;
> +    }
> +
> +    if (reply_supported) {
> +        return process_message_reply(dev, &msg);
> +    }
> +
> +    return 0;
> +}


Interesting, I wonder how vm stop will be handled in this case.

In the case of vDPA kernel, we probable don't want to mirror the virtio 
device status to vdpa device status directly. Since qemu may stop 
vhost-vdpa device through e.g resting vdpa device, but in the mean time, 
guest should not detect such condition in virtio device status.

So in the new version of vDPA support, we probably need to do:

static int vhost_vdpa_set_state(struct vhost_dev *dev, bool started)
{
     if (started) {
         uint8_t status = 0;

         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
         vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);

         return !(status & VIRTIO_CONFIG_S_DRIVER_OK);
     } else {
         vhost_vdpa_reset_device(dev);
         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
                                    VIRTIO_CONFIG_S_DRIVER);
         return 0;
     }
}

And vhost_set_state() will be called from vhost_dev_start()/stop().

Does this work for vhost-user as well?

Thanks


> +
>   bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
>   {
>       if (user->chr) {
> @@ -1947,4 +1981,5 @@ const VhostOps user_ops = {
>           .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
>           .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
>           .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
> +        .vhost_set_state = vhost_user_set_state,
>   };
Maxime Coquelin May 14, 2020, 10:14 a.m. UTC | #2
On 5/14/20 9:53 AM, Jason Wang wrote:
> 
> On 2020/5/14 下午3:33, Maxime Coquelin wrote:
>> It is usefull for the Vhost-user backend to know
>> about about the Virtio device status updates,
>> especially when the driver sets the DRIVER_OK bit.
>>
>> With that information, no more need to do hazardous
>> assumptions on when the driver is done with the
>> device configuration.
>>
>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>> ---
>>
>> This patch applies on top of Cindy's "vDPA support in qemu"
>> series, which introduces the .vhost_set_state vhost-backend
>> ops.
>>
>>   docs/interop/vhost-user.rst | 12 ++++++++++++
>>   hw/net/vhost_net.c          | 10 +++++-----
>>   hw/virtio/vhost-user.c      | 35 +++++++++++++++++++++++++++++++++++
>>   3 files changed, 52 insertions(+), 5 deletions(-)
>>
>> diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
>> index 3b1b6602c7..f108de7458 100644
>> --- a/docs/interop/vhost-user.rst
>> +++ b/docs/interop/vhost-user.rst
>> @@ -815,6 +815,7 @@ Protocol features
>>     #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD       12
>>     #define VHOST_USER_PROTOCOL_F_RESET_DEVICE         13
>>     #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
>> +  #define VHOST_USER_PROTOCOL_F_STATUS               15
>>     Master message types
>>   --------------------
>> @@ -1263,6 +1264,17 @@ Master message types
>>       The state.num field is currently reserved and must be set to 0.
>>   +``VHOST_USER_SET_STATUS``
>> +  :id: 36
>> +  :equivalent ioctl: VHOST_VDPA_SET_STATUS
>> +  :slave payload: N/A
>> +  :master payload: ``u64``
>> +
>> +  When the ``VHOST_USER_PROTOCOL_F_STATUS`` protocol feature has been
>> +  successfully negotiated, this message is submitted by the master to
>> +  notify the backend with updated device status as defined in the Virtio
>> +  specification.
>> +
>>   Slave message types
>>   -------------------
>>   diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
>> index 463e333531..37f3156dbc 100644
>> --- a/hw/net/vhost_net.c
>> +++ b/hw/net/vhost_net.c
>> @@ -517,10 +517,10 @@ int vhost_set_state(NetClientState *nc, int state)
>>   {
>>       struct vhost_net *net = get_vhost_net(nc);
>>       struct vhost_dev *hdev = &net->dev;
>> -    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
>> -        if (hdev->vhost_ops->vhost_set_state) {
>> -                return hdev->vhost_ops->vhost_set_state(hdev, state);
>> -             }
>> -        }
>> +
>> +    if (hdev->vhost_ops->vhost_set_state) {
>> +        return hdev->vhost_ops->vhost_set_state(hdev, state);
>> +    }
>> +
>>       return 0;
>>   }
>> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
>> index ec21e8fbe8..b7e52d97fc 100644
>> --- a/hw/virtio/vhost-user.c
>> +++ b/hw/virtio/vhost-user.c
>> @@ -59,6 +59,7 @@ enum VhostUserProtocolFeature {
>>       VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
>>       VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
>>       VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
>> +    VHOST_USER_PROTOCOL_F_STATUS = 15,
>>       VHOST_USER_PROTOCOL_F_MAX
>>   };
>>   @@ -100,6 +101,7 @@ typedef enum VhostUserRequest {
>>       VHOST_USER_SET_INFLIGHT_FD = 32,
>>       VHOST_USER_GPU_SET_SOCKET = 33,
>>       VHOST_USER_RESET_DEVICE = 34,
>> +    VHOST_USER_SET_STATUS = 36,
>>       VHOST_USER_MAX
>>   } VhostUserRequest;
>>   @@ -1886,6 +1888,38 @@ static int vhost_user_set_inflight_fd(struct
>> vhost_dev *dev,
>>       return 0;
>>   }
>>   +static int vhost_user_set_state(struct vhost_dev *dev, int state)
>> +{
>> +    bool reply_supported = virtio_has_feature(dev->protocol_features,
>> +                                             
>> VHOST_USER_PROTOCOL_F_REPLY_ACK);
>> +
>> +    VhostUserMsg msg = {
>> +        .hdr.request = VHOST_USER_SET_STATUS,
>> +        .hdr.flags = VHOST_USER_VERSION,
>> +        .hdr.size = sizeof(msg.payload.u64),
>> +        .payload.u64 = (uint64_t)state,
>> +    };
>> +
>> +    if (!virtio_has_feature(dev->protocol_features,
>> +                VHOST_USER_PROTOCOL_F_STATUS)) {
>> +        return -1;
>> +    }
>> +
>> +    if (reply_supported) {
>> +        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
>> +    }
>> +
>> +    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
>> +        return -1;
>> +    }
>> +
>> +    if (reply_supported) {
>> +        return process_message_reply(dev, &msg);
>> +    }
>> +
>> +    return 0;
>> +}
> 
> 
> Interesting, I wonder how vm stop will be handled in this case.

For now, my DPDK series only use DRIVER_OK to help determine when the
driver is done with the initialization. For VM stop, it still relies on
GET_VRING_BASE.

GET_VRING_BASE arrives before DRIVER_OK bit is cleared is the tests I've
done (logs from backend side):

VHOST_CONFIG: read message VHOST_USER_GET_VRING_BASE

destroy port /tmp/vhost-user1, did: 0
VHOST_CONFIG: vring base idx:0 file:41
VHOST_CONFIG: read message VHOST_USER_GET_VRING_BASE
VHOST_CONFIG: vring base idx:1 file:0
VHOST_CONFIG: read message VHOST_USER_SET_STATUS
VHOST_CONFIG: New device status(0x0000000b):
	-ACKNOWLEDGE: 1
	-DRIVER: 1
	-FEATURES_OK: 1
	-DRIVER_OK: 0
	-DEVICE_NEED_RESET: 0
	-FAILED: 0

> In the case of vDPA kernel, we probable don't want to mirror the virtio
> device status to vdpa device status directly.

In vDPA DPDK, we don't mirror the Virtio device status either. It could
make sense to do that, but would require some API changes.

> Since qemu may stop
> vhost-vdpa device through e.g resting vdpa device, but in the mean time,
> guest should not detect such condition in virtio device status.



> So in the new version of vDPA support, we probably need to do:
> 
> static int vhost_vdpa_set_state(struct vhost_dev *dev, bool started)
> {
>     if (started) {
>         uint8_t status = 0;
> 
>         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
>         vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);
> 
>         return !(status & VIRTIO_CONFIG_S_DRIVER_OK);
>     } else {
>         vhost_vdpa_reset_device(dev);
>         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
>                                    VIRTIO_CONFIG_S_DRIVER);
>         return 0;
>     }
> }

IIUC, you have another copy of the status register not matching 1:1 what
the guest sets/sees.

Is vhost_vdpa_add_status() sending VHOST_VDPA_SET_STATUS to the backend?

And why reading back the status from the backend? Just to confirm the
change is taken into account?

> And vhost_set_state() will be called from vhost_dev_start()/stop().
> 
> Does this work for vhost-user as well?

IIUC what you did above, I think it would work. And we won't need
GET_STATUS request, but just rely on the REPLY_ACK.

Thanks,
Maxime

> Thanks
> 
> 
>> +
>>   bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error
>> **errp)
>>   {
>>       if (user->chr) {
>> @@ -1947,4 +1981,5 @@ const VhostOps user_ops = {
>>           .vhost_backend_mem_section_filter =
>> vhost_user_mem_section_filter,
>>           .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
>>           .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
>> +        .vhost_set_state = vhost_user_set_state,
>>   };
Michael S. Tsirkin May 14, 2020, 10:44 a.m. UTC | #3
On Thu, May 14, 2020 at 09:33:32AM +0200, Maxime Coquelin wrote:
> It is usefull for the Vhost-user backend to know
> about about the Virtio device status updates,
> especially when the driver sets the DRIVER_OK bit.
> 
> With that information, no more need to do hazardous
> assumptions on when the driver is done with the
> device configuration.
> 
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
> 
> This patch applies on top of Cindy's "vDPA support in qemu"
> series, which introduces the .vhost_set_state vhost-backend
> ops.
> 
>  docs/interop/vhost-user.rst | 12 ++++++++++++
>  hw/net/vhost_net.c          | 10 +++++-----
>  hw/virtio/vhost-user.c      | 35 +++++++++++++++++++++++++++++++++++
>  3 files changed, 52 insertions(+), 5 deletions(-)
> 
> diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
> index 3b1b6602c7..f108de7458 100644
> --- a/docs/interop/vhost-user.rst
> +++ b/docs/interop/vhost-user.rst
> @@ -815,6 +815,7 @@ Protocol features
>    #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD       12
>    #define VHOST_USER_PROTOCOL_F_RESET_DEVICE         13
>    #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
> +  #define VHOST_USER_PROTOCOL_F_STATUS               15
>  
>  Master message types
>  --------------------
> @@ -1263,6 +1264,17 @@ Master message types
>  
>    The state.num field is currently reserved and must be set to 0.
>  
> +``VHOST_USER_SET_STATUS``
> +  :id: 36
> +  :equivalent ioctl: VHOST_VDPA_SET_STATUS
> +  :slave payload: N/A
> +  :master payload: ``u64``
> +
> +  When the ``VHOST_USER_PROTOCOL_F_STATUS`` protocol feature has been
> +  successfully negotiated, this message is submitted by the master to
> +  notify the backend with updated device status as defined in the Virtio
> +  specification.
> +
>  Slave message types
>  -------------------
>  
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index 463e333531..37f3156dbc 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -517,10 +517,10 @@ int vhost_set_state(NetClientState *nc, int state)
>  {
>      struct vhost_net *net = get_vhost_net(nc);
>      struct vhost_dev *hdev = &net->dev;
> -    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> -        if (hdev->vhost_ops->vhost_set_state) {
> -                return hdev->vhost_ops->vhost_set_state(hdev, state);
> -             }
> -        }
> +
> +    if (hdev->vhost_ops->vhost_set_state) {
> +        return hdev->vhost_ops->vhost_set_state(hdev, state);
> +    }
> +
>      return 0;
>  }
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index ec21e8fbe8..b7e52d97fc 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -59,6 +59,7 @@ enum VhostUserProtocolFeature {
>      VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
>      VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
>      VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
> +    VHOST_USER_PROTOCOL_F_STATUS = 15,
>      VHOST_USER_PROTOCOL_F_MAX
>  };
>  
> @@ -100,6 +101,7 @@ typedef enum VhostUserRequest {
>      VHOST_USER_SET_INFLIGHT_FD = 32,
>      VHOST_USER_GPU_SET_SOCKET = 33,
>      VHOST_USER_RESET_DEVICE = 34,
> +    VHOST_USER_SET_STATUS = 36,
>      VHOST_USER_MAX
>  } VhostUserRequest;
>  
> @@ -1886,6 +1888,38 @@ static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
>      return 0;
>  }
>  
> +static int vhost_user_set_state(struct vhost_dev *dev, int state)
> +{
> +    bool reply_supported = virtio_has_feature(dev->protocol_features,
> +                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
> +
> +    VhostUserMsg msg = {
> +        .hdr.request = VHOST_USER_SET_STATUS,
> +        .hdr.flags = VHOST_USER_VERSION,
> +        .hdr.size = sizeof(msg.payload.u64),
> +        .payload.u64 = (uint64_t)state,
> +    };
> +
> +    if (!virtio_has_feature(dev->protocol_features,
> +                VHOST_USER_PROTOCOL_F_STATUS)) {
> +        return -1;
> +    }
> +
> +    if (reply_supported) {
> +        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
> +    }
> +
> +    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
> +        return -1;
> +    }
> +
> +    if (reply_supported) {
> +        return process_message_reply(dev, &msg);
> +    }

So since we are doing this anyway, let's give backend the
opportunity to validate features and fail FEATURES_OK?



> +
> +    return 0;
> +}
> +
>  bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
>  {
>      if (user->chr) {
> @@ -1947,4 +1981,5 @@ const VhostOps user_ops = {
>          .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
>          .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
>          .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
> +        .vhost_set_state = vhost_user_set_state,
>  };
> -- 
> 2.25.4
Michael S. Tsirkin May 14, 2020, 10:51 a.m. UTC | #4
On Thu, May 14, 2020 at 12:14:32PM +0200, Maxime Coquelin wrote:
> 
> 
> On 5/14/20 9:53 AM, Jason Wang wrote:
> > 
> > On 2020/5/14 下午3:33, Maxime Coquelin wrote:
> >> It is usefull for the Vhost-user backend to know
> >> about about the Virtio device status updates,
> >> especially when the driver sets the DRIVER_OK bit.
> >>
> >> With that information, no more need to do hazardous
> >> assumptions on when the driver is done with the
> >> device configuration.
> >>
> >> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> >> ---
> >>
> >> This patch applies on top of Cindy's "vDPA support in qemu"
> >> series, which introduces the .vhost_set_state vhost-backend
> >> ops.
> >>
> >>   docs/interop/vhost-user.rst | 12 ++++++++++++
> >>   hw/net/vhost_net.c          | 10 +++++-----
> >>   hw/virtio/vhost-user.c      | 35 +++++++++++++++++++++++++++++++++++
> >>   3 files changed, 52 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
> >> index 3b1b6602c7..f108de7458 100644
> >> --- a/docs/interop/vhost-user.rst
> >> +++ b/docs/interop/vhost-user.rst
> >> @@ -815,6 +815,7 @@ Protocol features
> >>     #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD       12
> >>     #define VHOST_USER_PROTOCOL_F_RESET_DEVICE         13
> >>     #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
> >> +  #define VHOST_USER_PROTOCOL_F_STATUS               15
> >>     Master message types
> >>   --------------------
> >> @@ -1263,6 +1264,17 @@ Master message types
> >>       The state.num field is currently reserved and must be set to 0.
> >>   +``VHOST_USER_SET_STATUS``
> >> +  :id: 36
> >> +  :equivalent ioctl: VHOST_VDPA_SET_STATUS
> >> +  :slave payload: N/A
> >> +  :master payload: ``u64``
> >> +
> >> +  When the ``VHOST_USER_PROTOCOL_F_STATUS`` protocol feature has been
> >> +  successfully negotiated, this message is submitted by the master to
> >> +  notify the backend with updated device status as defined in the Virtio
> >> +  specification.
> >> +
> >>   Slave message types
> >>   -------------------
> >>   diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> >> index 463e333531..37f3156dbc 100644
> >> --- a/hw/net/vhost_net.c
> >> +++ b/hw/net/vhost_net.c
> >> @@ -517,10 +517,10 @@ int vhost_set_state(NetClientState *nc, int state)
> >>   {
> >>       struct vhost_net *net = get_vhost_net(nc);
> >>       struct vhost_dev *hdev = &net->dev;
> >> -    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> >> -        if (hdev->vhost_ops->vhost_set_state) {
> >> -                return hdev->vhost_ops->vhost_set_state(hdev, state);
> >> -             }
> >> -        }
> >> +
> >> +    if (hdev->vhost_ops->vhost_set_state) {
> >> +        return hdev->vhost_ops->vhost_set_state(hdev, state);
> >> +    }
> >> +
> >>       return 0;
> >>   }
> >> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> >> index ec21e8fbe8..b7e52d97fc 100644
> >> --- a/hw/virtio/vhost-user.c
> >> +++ b/hw/virtio/vhost-user.c
> >> @@ -59,6 +59,7 @@ enum VhostUserProtocolFeature {
> >>       VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
> >>       VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
> >>       VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
> >> +    VHOST_USER_PROTOCOL_F_STATUS = 15,
> >>       VHOST_USER_PROTOCOL_F_MAX
> >>   };
> >>   @@ -100,6 +101,7 @@ typedef enum VhostUserRequest {
> >>       VHOST_USER_SET_INFLIGHT_FD = 32,
> >>       VHOST_USER_GPU_SET_SOCKET = 33,
> >>       VHOST_USER_RESET_DEVICE = 34,
> >> +    VHOST_USER_SET_STATUS = 36,
> >>       VHOST_USER_MAX
> >>   } VhostUserRequest;
> >>   @@ -1886,6 +1888,38 @@ static int vhost_user_set_inflight_fd(struct
> >> vhost_dev *dev,
> >>       return 0;
> >>   }
> >>   +static int vhost_user_set_state(struct vhost_dev *dev, int state)
> >> +{
> >> +    bool reply_supported = virtio_has_feature(dev->protocol_features,
> >> +                                             
> >> VHOST_USER_PROTOCOL_F_REPLY_ACK);
> >> +
> >> +    VhostUserMsg msg = {
> >> +        .hdr.request = VHOST_USER_SET_STATUS,
> >> +        .hdr.flags = VHOST_USER_VERSION,
> >> +        .hdr.size = sizeof(msg.payload.u64),
> >> +        .payload.u64 = (uint64_t)state,
> >> +    };
> >> +
> >> +    if (!virtio_has_feature(dev->protocol_features,
> >> +                VHOST_USER_PROTOCOL_F_STATUS)) {
> >> +        return -1;
> >> +    }
> >> +
> >> +    if (reply_supported) {
> >> +        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
> >> +    }
> >> +
> >> +    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
> >> +        return -1;
> >> +    }
> >> +
> >> +    if (reply_supported) {
> >> +        return process_message_reply(dev, &msg);
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> > 
> > 
> > Interesting, I wonder how vm stop will be handled in this case.
> 
> For now, my DPDK series only use DRIVER_OK to help determine when the
> driver is done with the initialization. For VM stop, it still relies on
> GET_VRING_BASE.

Sounds like a good fit.
GET_VRING_BASE is transparent to guest, as is vmstop.
This is more for driver 


> 
> GET_VRING_BASE arrives before DRIVER_OK bit is cleared is the tests I've
> done (logs from backend side):


One problem is with legacy guests, for these you can't rely
on DRIVER_OK, they often kick before that, and sometimes
expect buffers to be used too (e.g. for scsi).





> 
> VHOST_CONFIG: read message VHOST_USER_GET_VRING_BASE
> 
> destroy port /tmp/vhost-user1, did: 0
> VHOST_CONFIG: vring base idx:0 file:41
> VHOST_CONFIG: read message VHOST_USER_GET_VRING_BASE
> VHOST_CONFIG: vring base idx:1 file:0
> VHOST_CONFIG: read message VHOST_USER_SET_STATUS
> VHOST_CONFIG: New device status(0x0000000b):
> 	-ACKNOWLEDGE: 1
> 	-DRIVER: 1
> 	-FEATURES_OK: 1
> 	-DRIVER_OK: 0
> 	-DEVICE_NEED_RESET: 0
> 	-FAILED: 0
> 
> > In the case of vDPA kernel, we probable don't want to mirror the virtio
> > device status to vdpa device status directly.
> 
> In vDPA DPDK, we don't mirror the Virtio device status either. It could
> make sense to do that, but would require some API changes.
> 
> > Since qemu may stop
> > vhost-vdpa device through e.g resting vdpa device, but in the mean time,
> > guest should not detect such condition in virtio device status.
> 
> 
> 
> > So in the new version of vDPA support, we probably need to do:
> > 
> > static int vhost_vdpa_set_state(struct vhost_dev *dev, bool started)
> > {
> >     if (started) {
> >         uint8_t status = 0;
> > 
> >         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
> >         vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);
> > 
> >         return !(status & VIRTIO_CONFIG_S_DRIVER_OK);
> >     } else {
> >         vhost_vdpa_reset_device(dev);
> >         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
> >                                    VIRTIO_CONFIG_S_DRIVER);
> >         return 0;
> >     }
> > }
> 
> IIUC, you have another copy of the status register not matching 1:1 what
> the guest sets/sees.
> 
> Is vhost_vdpa_add_status() sending VHOST_VDPA_SET_STATUS to the backend?
> 
> And why reading back the status from the backend? Just to confirm the
> change is taken into account?

Making sure features have been acked makes sense IMHO.


> > And vhost_set_state() will be called from vhost_dev_start()/stop().
> > 
> > Does this work for vhost-user as well?
> 
> IIUC what you did above, I think it would work. And we won't need
> GET_STATUS request, but just rely on the REPLY_ACK.

Right. Need to document that though.


> 
> Thanks,
> Maxime
> 
> > Thanks
> > 
> > 
> >> +
> >>   bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error
> >> **errp)
> >>   {
> >>       if (user->chr) {
> >> @@ -1947,4 +1981,5 @@ const VhostOps user_ops = {
> >>           .vhost_backend_mem_section_filter =
> >> vhost_user_mem_section_filter,
> >>           .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
> >>           .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
> >> +        .vhost_set_state = vhost_user_set_state,
> >>   };
Maxime Coquelin May 14, 2020, 2:12 p.m. UTC | #5
On 5/14/20 12:44 PM, Michael S. Tsirkin wrote:
> On Thu, May 14, 2020 at 09:33:32AM +0200, Maxime Coquelin wrote:
>> It is usefull for the Vhost-user backend to know
>> about about the Virtio device status updates,
>> especially when the driver sets the DRIVER_OK bit.
>>
>> With that information, no more need to do hazardous
>> assumptions on when the driver is done with the
>> device configuration.
>>
>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>> ---
>>
>> This patch applies on top of Cindy's "vDPA support in qemu"
>> series, which introduces the .vhost_set_state vhost-backend
>> ops.
>>
>>  docs/interop/vhost-user.rst | 12 ++++++++++++
>>  hw/net/vhost_net.c          | 10 +++++-----
>>  hw/virtio/vhost-user.c      | 35 +++++++++++++++++++++++++++++++++++
>>  3 files changed, 52 insertions(+), 5 deletions(-)
>>
>> diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
>> index 3b1b6602c7..f108de7458 100644
>> --- a/docs/interop/vhost-user.rst
>> +++ b/docs/interop/vhost-user.rst
>> @@ -815,6 +815,7 @@ Protocol features
>>    #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD       12
>>    #define VHOST_USER_PROTOCOL_F_RESET_DEVICE         13
>>    #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
>> +  #define VHOST_USER_PROTOCOL_F_STATUS               15
>>  
>>  Master message types
>>  --------------------
>> @@ -1263,6 +1264,17 @@ Master message types
>>  
>>    The state.num field is currently reserved and must be set to 0.
>>  
>> +``VHOST_USER_SET_STATUS``
>> +  :id: 36
>> +  :equivalent ioctl: VHOST_VDPA_SET_STATUS
>> +  :slave payload: N/A
>> +  :master payload: ``u64``
>> +
>> +  When the ``VHOST_USER_PROTOCOL_F_STATUS`` protocol feature has been
>> +  successfully negotiated, this message is submitted by the master to
>> +  notify the backend with updated device status as defined in the Virtio
>> +  specification.
>> +
>>  Slave message types
>>  -------------------
>>  
>> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
>> index 463e333531..37f3156dbc 100644
>> --- a/hw/net/vhost_net.c
>> +++ b/hw/net/vhost_net.c
>> @@ -517,10 +517,10 @@ int vhost_set_state(NetClientState *nc, int state)
>>  {
>>      struct vhost_net *net = get_vhost_net(nc);
>>      struct vhost_dev *hdev = &net->dev;
>> -    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
>> -        if (hdev->vhost_ops->vhost_set_state) {
>> -                return hdev->vhost_ops->vhost_set_state(hdev, state);
>> -             }
>> -        }
>> +
>> +    if (hdev->vhost_ops->vhost_set_state) {
>> +        return hdev->vhost_ops->vhost_set_state(hdev, state);
>> +    }
>> +
>>      return 0;
>>  }
>> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
>> index ec21e8fbe8..b7e52d97fc 100644
>> --- a/hw/virtio/vhost-user.c
>> +++ b/hw/virtio/vhost-user.c
>> @@ -59,6 +59,7 @@ enum VhostUserProtocolFeature {
>>      VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
>>      VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
>>      VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
>> +    VHOST_USER_PROTOCOL_F_STATUS = 15,
>>      VHOST_USER_PROTOCOL_F_MAX
>>  };
>>  
>> @@ -100,6 +101,7 @@ typedef enum VhostUserRequest {
>>      VHOST_USER_SET_INFLIGHT_FD = 32,
>>      VHOST_USER_GPU_SET_SOCKET = 33,
>>      VHOST_USER_RESET_DEVICE = 34,
>> +    VHOST_USER_SET_STATUS = 36,
>>      VHOST_USER_MAX
>>  } VhostUserRequest;
>>  
>> @@ -1886,6 +1888,38 @@ static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
>>      return 0;
>>  }
>>  
>> +static int vhost_user_set_state(struct vhost_dev *dev, int state)
>> +{
>> +    bool reply_supported = virtio_has_feature(dev->protocol_features,
>> +                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
>> +
>> +    VhostUserMsg msg = {
>> +        .hdr.request = VHOST_USER_SET_STATUS,
>> +        .hdr.flags = VHOST_USER_VERSION,
>> +        .hdr.size = sizeof(msg.payload.u64),
>> +        .payload.u64 = (uint64_t)state,
>> +    };
>> +
>> +    if (!virtio_has_feature(dev->protocol_features,
>> +                VHOST_USER_PROTOCOL_F_STATUS)) {
>> +        return -1;
>> +    }
>> +
>> +    if (reply_supported) {
>> +        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
>> +    }
>> +
>> +    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
>> +        return -1;
>> +    }
>> +
>> +    if (reply_supported) {
>> +        return process_message_reply(dev, &msg);
>> +    }
> 
> So since we are doing this anyway, let's give backend the opportunity
> to validate features and fail FEATURES_OK?

Just to be sure I understand, the feature negotiation happens,
then when the backend receives FEATURES_OK, it uses the REPLY_ACK
protocol feature to NACK?

In DPDK backend, we already check the feature set by SET_FEATURES are
supported by the backend. If it is not the case, either the master did
set NEED_REPLY flag and error would be reported to the master, or the
NEED_REPLY flag isn't set in the message and the backend disconnects.

Looking at Qemu side, it does not seem to set the NEED_REPLY flag on
SET_FEATURES, so basically the backend close the sockets if it happened.

I wonder whether it would be better for Qemu to set the NEED_REPLY flag
on SET_FEATURES, so that when the backend is running and VHOST_F_LOG_ALL
feature bit is set, we are the sure the master starts the live-migration
only once the SET_FEATURES is fully handled on backend side.

What do you think?

>> +
>> +    return 0;
>> +}
>> +
>>  bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
>>  {
>>      if (user->chr) {
>> @@ -1947,4 +1981,5 @@ const VhostOps user_ops = {
>>          .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
>>          .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
>>          .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
>> +        .vhost_set_state = vhost_user_set_state,
>>  };
>> -- 
>> 2.25.4
>
Michael S. Tsirkin May 14, 2020, 2:20 p.m. UTC | #6
On Thu, May 14, 2020 at 04:12:32PM +0200, Maxime Coquelin wrote:
> 
> 
> 
> On 5/14/20 12:44 PM, Michael S. Tsirkin wrote:
> > On Thu, May 14, 2020 at 09:33:32AM +0200, Maxime Coquelin wrote:
> >> It is usefull for the Vhost-user backend to know
> >> about about the Virtio device status updates,
> >> especially when the driver sets the DRIVER_OK bit.
> >>
> >> With that information, no more need to do hazardous
> >> assumptions on when the driver is done with the
> >> device configuration.
> >>
> >> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> >> ---
> >>
> >> This patch applies on top of Cindy's "vDPA support in qemu"
> >> series, which introduces the .vhost_set_state vhost-backend
> >> ops.
> >>
> >>  docs/interop/vhost-user.rst | 12 ++++++++++++
> >>  hw/net/vhost_net.c          | 10 +++++-----
> >>  hw/virtio/vhost-user.c      | 35 +++++++++++++++++++++++++++++++++++
> >>  3 files changed, 52 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
> >> index 3b1b6602c7..f108de7458 100644
> >> --- a/docs/interop/vhost-user.rst
> >> +++ b/docs/interop/vhost-user.rst
> >> @@ -815,6 +815,7 @@ Protocol features
> >>    #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD       12
> >>    #define VHOST_USER_PROTOCOL_F_RESET_DEVICE         13
> >>    #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
> >> +  #define VHOST_USER_PROTOCOL_F_STATUS               15
> >>  
> >>  Master message types
> >>  --------------------
> >> @@ -1263,6 +1264,17 @@ Master message types
> >>  
> >>    The state.num field is currently reserved and must be set to 0.
> >>  
> >> +``VHOST_USER_SET_STATUS``
> >> +  :id: 36
> >> +  :equivalent ioctl: VHOST_VDPA_SET_STATUS
> >> +  :slave payload: N/A
> >> +  :master payload: ``u64``
> >> +
> >> +  When the ``VHOST_USER_PROTOCOL_F_STATUS`` protocol feature has been
> >> +  successfully negotiated, this message is submitted by the master to
> >> +  notify the backend with updated device status as defined in the Virtio
> >> +  specification.
> >> +
> >>  Slave message types
> >>  -------------------
> >>  
> >> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> >> index 463e333531..37f3156dbc 100644
> >> --- a/hw/net/vhost_net.c
> >> +++ b/hw/net/vhost_net.c
> >> @@ -517,10 +517,10 @@ int vhost_set_state(NetClientState *nc, int state)
> >>  {
> >>      struct vhost_net *net = get_vhost_net(nc);
> >>      struct vhost_dev *hdev = &net->dev;
> >> -    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> >> -        if (hdev->vhost_ops->vhost_set_state) {
> >> -                return hdev->vhost_ops->vhost_set_state(hdev, state);
> >> -             }
> >> -        }
> >> +
> >> +    if (hdev->vhost_ops->vhost_set_state) {
> >> +        return hdev->vhost_ops->vhost_set_state(hdev, state);
> >> +    }
> >> +
> >>      return 0;
> >>  }
> >> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> >> index ec21e8fbe8..b7e52d97fc 100644
> >> --- a/hw/virtio/vhost-user.c
> >> +++ b/hw/virtio/vhost-user.c
> >> @@ -59,6 +59,7 @@ enum VhostUserProtocolFeature {
> >>      VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
> >>      VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
> >>      VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
> >> +    VHOST_USER_PROTOCOL_F_STATUS = 15,
> >>      VHOST_USER_PROTOCOL_F_MAX
> >>  };
> >>  
> >> @@ -100,6 +101,7 @@ typedef enum VhostUserRequest {
> >>      VHOST_USER_SET_INFLIGHT_FD = 32,
> >>      VHOST_USER_GPU_SET_SOCKET = 33,
> >>      VHOST_USER_RESET_DEVICE = 34,
> >> +    VHOST_USER_SET_STATUS = 36,
> >>      VHOST_USER_MAX
> >>  } VhostUserRequest;
> >>  
> >> @@ -1886,6 +1888,38 @@ static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
> >>      return 0;
> >>  }
> >>  
> >> +static int vhost_user_set_state(struct vhost_dev *dev, int state)
> >> +{
> >> +    bool reply_supported = virtio_has_feature(dev->protocol_features,
> >> +                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
> >> +
> >> +    VhostUserMsg msg = {
> >> +        .hdr.request = VHOST_USER_SET_STATUS,
> >> +        .hdr.flags = VHOST_USER_VERSION,
> >> +        .hdr.size = sizeof(msg.payload.u64),
> >> +        .payload.u64 = (uint64_t)state,
> >> +    };
> >> +
> >> +    if (!virtio_has_feature(dev->protocol_features,
> >> +                VHOST_USER_PROTOCOL_F_STATUS)) {
> >> +        return -1;
> >> +    }
> >> +
> >> +    if (reply_supported) {
> >> +        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
> >> +    }
> >> +
> >> +    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
> >> +        return -1;
> >> +    }
> >> +
> >> +    if (reply_supported) {
> >> +        return process_message_reply(dev, &msg);
> >> +    }
> > 
> > So since we are doing this anyway, let's give backend the opportunity
> > to validate features and fail FEATURES_OK?
> 
> Just to be sure I understand, the feature negotiation happens,
> then when the backend receives FEATURES_OK, it uses the REPLY_ACK
> protocol feature to NACK?

Or ack but clear FEATURES_OK in the response.

> 
> In DPDK backend, we already check the feature set by SET_FEATURES are
> supported by the backend.

But that assumes all possible combinations of features in the bitmap
always work. That might not be the case.

> If it is not the case, either the master did
> set NEED_REPLY flag and error would be reported to the master, or the
> NEED_REPLY flag isn't set in the message and the backend disconnects.
> 
> Looking at Qemu side, it does not seem to set the NEED_REPLY flag on
> SET_FEATURES, so basically the backend close the sockets if it happened.

That is not the best ay to handle that imho.

> 
> I wonder whether it would be better for Qemu to set the NEED_REPLY flag
> on SET_FEATURES, so that when the backend is running and VHOST_F_LOG_ALL
> feature bit is set, we are the sure the master starts the live-migration
> only once the SET_FEATURES is fully handled on backend side.
> 
> What do you think?

features are set before backend is started so there's nothing to
migrate really.

> 
> >> +
> >> +    return 0;
> >> +}
> >> +
> >>  bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
> >>  {
> >>      if (user->chr) {
> >> @@ -1947,4 +1981,5 @@ const VhostOps user_ops = {
> >>          .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
> >>          .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
> >>          .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
> >> +        .vhost_set_state = vhost_user_set_state,
> >>  };
> >> -- 
> >> 2.25.4
> >
Maxime Coquelin May 14, 2020, 2:39 p.m. UTC | #7
On 5/14/20 12:51 PM, Michael S. Tsirkin wrote:
> On Thu, May 14, 2020 at 12:14:32PM +0200, Maxime Coquelin wrote:
>>
>>
>> On 5/14/20 9:53 AM, Jason Wang wrote:
>>>
>>> On 2020/5/14 下午3:33, Maxime Coquelin wrote:
>>>> It is usefull for the Vhost-user backend to know
>>>> about about the Virtio device status updates,
>>>> especially when the driver sets the DRIVER_OK bit.
>>>>
>>>> With that information, no more need to do hazardous
>>>> assumptions on when the driver is done with the
>>>> device configuration.
>>>>
>>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>>> ---
>>>>
>>>> This patch applies on top of Cindy's "vDPA support in qemu"
>>>> series, which introduces the .vhost_set_state vhost-backend
>>>> ops.
>>>>
>>>>   docs/interop/vhost-user.rst | 12 ++++++++++++
>>>>   hw/net/vhost_net.c          | 10 +++++-----
>>>>   hw/virtio/vhost-user.c      | 35 +++++++++++++++++++++++++++++++++++
>>>>   3 files changed, 52 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
>>>> index 3b1b6602c7..f108de7458 100644
>>>> --- a/docs/interop/vhost-user.rst
>>>> +++ b/docs/interop/vhost-user.rst
>>>> @@ -815,6 +815,7 @@ Protocol features
>>>>     #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD       12
>>>>     #define VHOST_USER_PROTOCOL_F_RESET_DEVICE         13
>>>>     #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
>>>> +  #define VHOST_USER_PROTOCOL_F_STATUS               15
>>>>     Master message types
>>>>   --------------------
>>>> @@ -1263,6 +1264,17 @@ Master message types
>>>>       The state.num field is currently reserved and must be set to 0.
>>>>   +``VHOST_USER_SET_STATUS``
>>>> +  :id: 36
>>>> +  :equivalent ioctl: VHOST_VDPA_SET_STATUS
>>>> +  :slave payload: N/A
>>>> +  :master payload: ``u64``
>>>> +
>>>> +  When the ``VHOST_USER_PROTOCOL_F_STATUS`` protocol feature has been
>>>> +  successfully negotiated, this message is submitted by the master to
>>>> +  notify the backend with updated device status as defined in the Virtio
>>>> +  specification.
>>>> +
>>>>   Slave message types
>>>>   -------------------
>>>>   diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
>>>> index 463e333531..37f3156dbc 100644
>>>> --- a/hw/net/vhost_net.c
>>>> +++ b/hw/net/vhost_net.c
>>>> @@ -517,10 +517,10 @@ int vhost_set_state(NetClientState *nc, int state)
>>>>   {
>>>>       struct vhost_net *net = get_vhost_net(nc);
>>>>       struct vhost_dev *hdev = &net->dev;
>>>> -    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
>>>> -        if (hdev->vhost_ops->vhost_set_state) {
>>>> -                return hdev->vhost_ops->vhost_set_state(hdev, state);
>>>> -             }
>>>> -        }
>>>> +
>>>> +    if (hdev->vhost_ops->vhost_set_state) {
>>>> +        return hdev->vhost_ops->vhost_set_state(hdev, state);
>>>> +    }
>>>> +
>>>>       return 0;
>>>>   }
>>>> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
>>>> index ec21e8fbe8..b7e52d97fc 100644
>>>> --- a/hw/virtio/vhost-user.c
>>>> +++ b/hw/virtio/vhost-user.c
>>>> @@ -59,6 +59,7 @@ enum VhostUserProtocolFeature {
>>>>       VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
>>>>       VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
>>>>       VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
>>>> +    VHOST_USER_PROTOCOL_F_STATUS = 15,
>>>>       VHOST_USER_PROTOCOL_F_MAX
>>>>   };
>>>>   @@ -100,6 +101,7 @@ typedef enum VhostUserRequest {
>>>>       VHOST_USER_SET_INFLIGHT_FD = 32,
>>>>       VHOST_USER_GPU_SET_SOCKET = 33,
>>>>       VHOST_USER_RESET_DEVICE = 34,
>>>> +    VHOST_USER_SET_STATUS = 36,
>>>>       VHOST_USER_MAX
>>>>   } VhostUserRequest;
>>>>   @@ -1886,6 +1888,38 @@ static int vhost_user_set_inflight_fd(struct
>>>> vhost_dev *dev,
>>>>       return 0;
>>>>   }
>>>>   +static int vhost_user_set_state(struct vhost_dev *dev, int state)
>>>> +{
>>>> +    bool reply_supported = virtio_has_feature(dev->protocol_features,
>>>> +                                             
>>>> VHOST_USER_PROTOCOL_F_REPLY_ACK);
>>>> +
>>>> +    VhostUserMsg msg = {
>>>> +        .hdr.request = VHOST_USER_SET_STATUS,
>>>> +        .hdr.flags = VHOST_USER_VERSION,
>>>> +        .hdr.size = sizeof(msg.payload.u64),
>>>> +        .payload.u64 = (uint64_t)state,
>>>> +    };
>>>> +
>>>> +    if (!virtio_has_feature(dev->protocol_features,
>>>> +                VHOST_USER_PROTOCOL_F_STATUS)) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    if (reply_supported) {
>>>> +        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
>>>> +    }
>>>> +
>>>> +    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    if (reply_supported) {
>>>> +        return process_message_reply(dev, &msg);
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>
>>>
>>> Interesting, I wonder how vm stop will be handled in this case.
>>
>> For now, my DPDK series only use DRIVER_OK to help determine when the
>> driver is done with the initialization. For VM stop, it still relies on
>> GET_VRING_BASE.
> 
> Sounds like a good fit.
> GET_VRING_BASE is transparent to guest, as is vmstop.
> This is more for driver 
> 
> 
>>
>> GET_VRING_BASE arrives before DRIVER_OK bit is cleared is the tests I've
>> done (logs from backend side):
> 
> 
> One problem is with legacy guests, for these you can't rely
> on DRIVER_OK, they often kick before that, and sometimes
> expect buffers to be used too (e.g. for scsi).

Ok, I remember this case now.
Any idea on how the backend would detect such legacy guests?

If I'm not mistaken, we discussed the idea to poll on the kick to detect
the rings are ready to be processed. But the problem is that Qemu writes
a kick at eventfd creation time:

vhost_user_backend_start():
-> vhost_dev_enable_notifiers()
	->virtio_bus_set_host_notifier()
		->event_notifier_init(, 1); //1 means active
->vhost_dev_start();

We could change the behavior in Qemu, but the backend won't know if
Qemu has the fix or not, so won't know if it can rely on the kick.

>>
>> VHOST_CONFIG: read message VHOST_USER_GET_VRING_BASE
>>
>> destroy port /tmp/vhost-user1, did: 0
>> VHOST_CONFIG: vring base idx:0 file:41
>> VHOST_CONFIG: read message VHOST_USER_GET_VRING_BASE
>> VHOST_CONFIG: vring base idx:1 file:0
>> VHOST_CONFIG: read message VHOST_USER_SET_STATUS
>> VHOST_CONFIG: New device status(0x0000000b):
>> 	-ACKNOWLEDGE: 1
>> 	-DRIVER: 1
>> 	-FEATURES_OK: 1
>> 	-DRIVER_OK: 0
>> 	-DEVICE_NEED_RESET: 0
>> 	-FAILED: 0
>>
>>> In the case of vDPA kernel, we probable don't want to mirror the virtio
>>> device status to vdpa device status directly.
>>
>> In vDPA DPDK, we don't mirror the Virtio device status either. It could
>> make sense to do that, but would require some API changes.
>>
>>> Since qemu may stop
>>> vhost-vdpa device through e.g resting vdpa device, but in the mean time,
>>> guest should not detect such condition in virtio device status.
>>
>>
>>
>>> So in the new version of vDPA support, we probably need to do:
>>>
>>> static int vhost_vdpa_set_state(struct vhost_dev *dev, bool started)
>>> {
>>>     if (started) {
>>>         uint8_t status = 0;
>>>
>>>         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
>>>         vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);
>>>
>>>         return !(status & VIRTIO_CONFIG_S_DRIVER_OK);
>>>     } else {
>>>         vhost_vdpa_reset_device(dev);
>>>         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
>>>                                    VIRTIO_CONFIG_S_DRIVER);
>>>         return 0;
>>>     }
>>> }
>>
>> IIUC, you have another copy of the status register not matching 1:1 what
>> the guest sets/sees.
>>
>> Is vhost_vdpa_add_status() sending VHOST_VDPA_SET_STATUS to the backend?
>>
>> And why reading back the status from the backend? Just to confirm the
>> change is taken into account?
> 
> Making sure features have been acked makes sense IMHO.
> 
> 
>>> And vhost_set_state() will be called from vhost_dev_start()/stop().
>>>
>>> Does this work for vhost-user as well?
>>
>> IIUC what you did above, I think it would work. And we won't need
>> GET_STATUS request, but just rely on the REPLY_ACK.
> 
> Right. Need to document that though.

Ok, will do in v2.

Thanks,
Maxime

> 
>>
>> Thanks,
>> Maxime
>>
>>> Thanks
>>>
>>>
>>>> +
>>>>   bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error
>>>> **errp)
>>>>   {
>>>>       if (user->chr) {
>>>> @@ -1947,4 +1981,5 @@ const VhostOps user_ops = {
>>>>           .vhost_backend_mem_section_filter =
>>>> vhost_user_mem_section_filter,
>>>>           .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
>>>>           .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
>>>> +        .vhost_set_state = vhost_user_set_state,
>>>>   };
>
Michael S. Tsirkin May 14, 2020, 2:48 p.m. UTC | #8
On Thu, May 14, 2020 at 04:39:26PM +0200, Maxime Coquelin wrote:
> 
> 
> On 5/14/20 12:51 PM, Michael S. Tsirkin wrote:
> > On Thu, May 14, 2020 at 12:14:32PM +0200, Maxime Coquelin wrote:
> >>
> >>
> >> On 5/14/20 9:53 AM, Jason Wang wrote:
> >>>
> >>> On 2020/5/14 下午3:33, Maxime Coquelin wrote:
> >>>> It is usefull for the Vhost-user backend to know
> >>>> about about the Virtio device status updates,
> >>>> especially when the driver sets the DRIVER_OK bit.
> >>>>
> >>>> With that information, no more need to do hazardous
> >>>> assumptions on when the driver is done with the
> >>>> device configuration.
> >>>>
> >>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> >>>> ---
> >>>>
> >>>> This patch applies on top of Cindy's "vDPA support in qemu"
> >>>> series, which introduces the .vhost_set_state vhost-backend
> >>>> ops.
> >>>>
> >>>>   docs/interop/vhost-user.rst | 12 ++++++++++++
> >>>>   hw/net/vhost_net.c          | 10 +++++-----
> >>>>   hw/virtio/vhost-user.c      | 35 +++++++++++++++++++++++++++++++++++
> >>>>   3 files changed, 52 insertions(+), 5 deletions(-)
> >>>>
> >>>> diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
> >>>> index 3b1b6602c7..f108de7458 100644
> >>>> --- a/docs/interop/vhost-user.rst
> >>>> +++ b/docs/interop/vhost-user.rst
> >>>> @@ -815,6 +815,7 @@ Protocol features
> >>>>     #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD       12
> >>>>     #define VHOST_USER_PROTOCOL_F_RESET_DEVICE         13
> >>>>     #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
> >>>> +  #define VHOST_USER_PROTOCOL_F_STATUS               15
> >>>>     Master message types
> >>>>   --------------------
> >>>> @@ -1263,6 +1264,17 @@ Master message types
> >>>>       The state.num field is currently reserved and must be set to 0.
> >>>>   +``VHOST_USER_SET_STATUS``
> >>>> +  :id: 36
> >>>> +  :equivalent ioctl: VHOST_VDPA_SET_STATUS
> >>>> +  :slave payload: N/A
> >>>> +  :master payload: ``u64``
> >>>> +
> >>>> +  When the ``VHOST_USER_PROTOCOL_F_STATUS`` protocol feature has been
> >>>> +  successfully negotiated, this message is submitted by the master to
> >>>> +  notify the backend with updated device status as defined in the Virtio
> >>>> +  specification.
> >>>> +
> >>>>   Slave message types
> >>>>   -------------------
> >>>>   diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> >>>> index 463e333531..37f3156dbc 100644
> >>>> --- a/hw/net/vhost_net.c
> >>>> +++ b/hw/net/vhost_net.c
> >>>> @@ -517,10 +517,10 @@ int vhost_set_state(NetClientState *nc, int state)
> >>>>   {
> >>>>       struct vhost_net *net = get_vhost_net(nc);
> >>>>       struct vhost_dev *hdev = &net->dev;
> >>>> -    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> >>>> -        if (hdev->vhost_ops->vhost_set_state) {
> >>>> -                return hdev->vhost_ops->vhost_set_state(hdev, state);
> >>>> -             }
> >>>> -        }
> >>>> +
> >>>> +    if (hdev->vhost_ops->vhost_set_state) {
> >>>> +        return hdev->vhost_ops->vhost_set_state(hdev, state);
> >>>> +    }
> >>>> +
> >>>>       return 0;
> >>>>   }
> >>>> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> >>>> index ec21e8fbe8..b7e52d97fc 100644
> >>>> --- a/hw/virtio/vhost-user.c
> >>>> +++ b/hw/virtio/vhost-user.c
> >>>> @@ -59,6 +59,7 @@ enum VhostUserProtocolFeature {
> >>>>       VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
> >>>>       VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
> >>>>       VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
> >>>> +    VHOST_USER_PROTOCOL_F_STATUS = 15,
> >>>>       VHOST_USER_PROTOCOL_F_MAX
> >>>>   };
> >>>>   @@ -100,6 +101,7 @@ typedef enum VhostUserRequest {
> >>>>       VHOST_USER_SET_INFLIGHT_FD = 32,
> >>>>       VHOST_USER_GPU_SET_SOCKET = 33,
> >>>>       VHOST_USER_RESET_DEVICE = 34,
> >>>> +    VHOST_USER_SET_STATUS = 36,
> >>>>       VHOST_USER_MAX
> >>>>   } VhostUserRequest;
> >>>>   @@ -1886,6 +1888,38 @@ static int vhost_user_set_inflight_fd(struct
> >>>> vhost_dev *dev,
> >>>>       return 0;
> >>>>   }
> >>>>   +static int vhost_user_set_state(struct vhost_dev *dev, int state)
> >>>> +{
> >>>> +    bool reply_supported = virtio_has_feature(dev->protocol_features,
> >>>> +                                             
> >>>> VHOST_USER_PROTOCOL_F_REPLY_ACK);
> >>>> +
> >>>> +    VhostUserMsg msg = {
> >>>> +        .hdr.request = VHOST_USER_SET_STATUS,
> >>>> +        .hdr.flags = VHOST_USER_VERSION,
> >>>> +        .hdr.size = sizeof(msg.payload.u64),
> >>>> +        .payload.u64 = (uint64_t)state,
> >>>> +    };
> >>>> +
> >>>> +    if (!virtio_has_feature(dev->protocol_features,
> >>>> +                VHOST_USER_PROTOCOL_F_STATUS)) {
> >>>> +        return -1;
> >>>> +    }
> >>>> +
> >>>> +    if (reply_supported) {
> >>>> +        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
> >>>> +    }
> >>>> +
> >>>> +    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
> >>>> +        return -1;
> >>>> +    }
> >>>> +
> >>>> +    if (reply_supported) {
> >>>> +        return process_message_reply(dev, &msg);
> >>>> +    }
> >>>> +
> >>>> +    return 0;
> >>>> +}
> >>>
> >>>
> >>> Interesting, I wonder how vm stop will be handled in this case.
> >>
> >> For now, my DPDK series only use DRIVER_OK to help determine when the
> >> driver is done with the initialization. For VM stop, it still relies on
> >> GET_VRING_BASE.
> > 
> > Sounds like a good fit.
> > GET_VRING_BASE is transparent to guest, as is vmstop.
> > This is more for driver 
> > 
> > 
> >>
> >> GET_VRING_BASE arrives before DRIVER_OK bit is cleared is the tests I've
> >> done (logs from backend side):
> > 
> > 
> > One problem is with legacy guests, for these you can't rely
> > on DRIVER_OK, they often kick before that, and sometimes
> > expect buffers to be used too (e.g. for scsi).
> 
> Ok, I remember this case now.
> Any idea on how the backend would detect such legacy guests?

It's mostly safe to assume that it's only the case for pre-1.0 since 1.0
spec says you must not.  A small window after 1.0 has the bug too but I
think it's safe to just ask that these guests are fixed.


> If I'm not mistaken, we discussed the idea to poll on the kick to detect
> the rings are ready to be processed. But the problem is that Qemu writes
> a kick at eventfd creation time:
> 
> vhost_user_backend_start():
> -> vhost_dev_enable_notifiers()
> 	->virtio_bus_set_host_notifier()
> 		->event_notifier_init(, 1); //1 means active
> ->vhost_dev_start();
> 
> We could change the behavior in Qemu, but the backend won't know if
> Qemu has the fix or not, so won't know if it can rely on the kick.

eventfd counts kicks. So you can read the counter and check whether
there was another kick or not. The difficulty is around migration, we
should migrate the "ring kicked" state but we don't. We really should
just fix that, it's an old bug affecting not just vhost-user but
vhost too.


> >>
> >> VHOST_CONFIG: read message VHOST_USER_GET_VRING_BASE
> >>
> >> destroy port /tmp/vhost-user1, did: 0
> >> VHOST_CONFIG: vring base idx:0 file:41
> >> VHOST_CONFIG: read message VHOST_USER_GET_VRING_BASE
> >> VHOST_CONFIG: vring base idx:1 file:0
> >> VHOST_CONFIG: read message VHOST_USER_SET_STATUS
> >> VHOST_CONFIG: New device status(0x0000000b):
> >> 	-ACKNOWLEDGE: 1
> >> 	-DRIVER: 1
> >> 	-FEATURES_OK: 1
> >> 	-DRIVER_OK: 0
> >> 	-DEVICE_NEED_RESET: 0
> >> 	-FAILED: 0
> >>
> >>> In the case of vDPA kernel, we probable don't want to mirror the virtio
> >>> device status to vdpa device status directly.
> >>
> >> In vDPA DPDK, we don't mirror the Virtio device status either. It could
> >> make sense to do that, but would require some API changes.
> >>
> >>> Since qemu may stop
> >>> vhost-vdpa device through e.g resting vdpa device, but in the mean time,
> >>> guest should not detect such condition in virtio device status.
> >>
> >>
> >>
> >>> So in the new version of vDPA support, we probably need to do:
> >>>
> >>> static int vhost_vdpa_set_state(struct vhost_dev *dev, bool started)
> >>> {
> >>>     if (started) {
> >>>         uint8_t status = 0;
> >>>
> >>>         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
> >>>         vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);
> >>>
> >>>         return !(status & VIRTIO_CONFIG_S_DRIVER_OK);
> >>>     } else {
> >>>         vhost_vdpa_reset_device(dev);
> >>>         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
> >>>                                    VIRTIO_CONFIG_S_DRIVER);
> >>>         return 0;
> >>>     }
> >>> }
> >>
> >> IIUC, you have another copy of the status register not matching 1:1 what
> >> the guest sets/sees.
> >>
> >> Is vhost_vdpa_add_status() sending VHOST_VDPA_SET_STATUS to the backend?
> >>
> >> And why reading back the status from the backend? Just to confirm the
> >> change is taken into account?
> > 
> > Making sure features have been acked makes sense IMHO.
> > 
> > 
> >>> And vhost_set_state() will be called from vhost_dev_start()/stop().
> >>>
> >>> Does this work for vhost-user as well?
> >>
> >> IIUC what you did above, I think it would work. And we won't need
> >> GET_STATUS request, but just rely on the REPLY_ACK.
> > 
> > Right. Need to document that though.
> 
> Ok, will do in v2.
> 
> Thanks,
> Maxime
> 
> > 
> >>
> >> Thanks,
> >> Maxime
> >>
> >>> Thanks
> >>>
> >>>
> >>>> +
> >>>>   bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error
> >>>> **errp)
> >>>>   {
> >>>>       if (user->chr) {
> >>>> @@ -1947,4 +1981,5 @@ const VhostOps user_ops = {
> >>>>           .vhost_backend_mem_section_filter =
> >>>> vhost_user_mem_section_filter,
> >>>>           .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
> >>>>           .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
> >>>> +        .vhost_set_state = vhost_user_set_state,
> >>>>   };
> >
Maxime Coquelin May 14, 2020, 3:38 p.m. UTC | #9
On 5/14/20 4:20 PM, Michael S. Tsirkin wrote:
> On Thu, May 14, 2020 at 04:12:32PM +0200, Maxime Coquelin wrote:
>>
>>
>>
>> On 5/14/20 12:44 PM, Michael S. Tsirkin wrote:
>>> On Thu, May 14, 2020 at 09:33:32AM +0200, Maxime Coquelin wrote:
>>>> It is usefull for the Vhost-user backend to know
>>>> about about the Virtio device status updates,
>>>> especially when the driver sets the DRIVER_OK bit.
>>>>
>>>> With that information, no more need to do hazardous
>>>> assumptions on when the driver is done with the
>>>> device configuration.
>>>>
>>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>>> ---
>>>>
>>>> This patch applies on top of Cindy's "vDPA support in qemu"
>>>> series, which introduces the .vhost_set_state vhost-backend
>>>> ops.
>>>>
>>>>  docs/interop/vhost-user.rst | 12 ++++++++++++
>>>>  hw/net/vhost_net.c          | 10 +++++-----
>>>>  hw/virtio/vhost-user.c      | 35 +++++++++++++++++++++++++++++++++++
>>>>  3 files changed, 52 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
>>>> index 3b1b6602c7..f108de7458 100644
>>>> --- a/docs/interop/vhost-user.rst
>>>> +++ b/docs/interop/vhost-user.rst
>>>> @@ -815,6 +815,7 @@ Protocol features
>>>>    #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD       12
>>>>    #define VHOST_USER_PROTOCOL_F_RESET_DEVICE         13
>>>>    #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
>>>> +  #define VHOST_USER_PROTOCOL_F_STATUS               15
>>>>  
>>>>  Master message types
>>>>  --------------------
>>>> @@ -1263,6 +1264,17 @@ Master message types
>>>>  
>>>>    The state.num field is currently reserved and must be set to 0.
>>>>  
>>>> +``VHOST_USER_SET_STATUS``
>>>> +  :id: 36
>>>> +  :equivalent ioctl: VHOST_VDPA_SET_STATUS
>>>> +  :slave payload: N/A
>>>> +  :master payload: ``u64``
>>>> +
>>>> +  When the ``VHOST_USER_PROTOCOL_F_STATUS`` protocol feature has been
>>>> +  successfully negotiated, this message is submitted by the master to
>>>> +  notify the backend with updated device status as defined in the Virtio
>>>> +  specification.
>>>> +
>>>>  Slave message types
>>>>  -------------------
>>>>  
>>>> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
>>>> index 463e333531..37f3156dbc 100644
>>>> --- a/hw/net/vhost_net.c
>>>> +++ b/hw/net/vhost_net.c
>>>> @@ -517,10 +517,10 @@ int vhost_set_state(NetClientState *nc, int state)
>>>>  {
>>>>      struct vhost_net *net = get_vhost_net(nc);
>>>>      struct vhost_dev *hdev = &net->dev;
>>>> -    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
>>>> -        if (hdev->vhost_ops->vhost_set_state) {
>>>> -                return hdev->vhost_ops->vhost_set_state(hdev, state);
>>>> -             }
>>>> -        }
>>>> +
>>>> +    if (hdev->vhost_ops->vhost_set_state) {
>>>> +        return hdev->vhost_ops->vhost_set_state(hdev, state);
>>>> +    }
>>>> +
>>>>      return 0;
>>>>  }
>>>> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
>>>> index ec21e8fbe8..b7e52d97fc 100644
>>>> --- a/hw/virtio/vhost-user.c
>>>> +++ b/hw/virtio/vhost-user.c
>>>> @@ -59,6 +59,7 @@ enum VhostUserProtocolFeature {
>>>>      VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
>>>>      VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
>>>>      VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
>>>> +    VHOST_USER_PROTOCOL_F_STATUS = 15,
>>>>      VHOST_USER_PROTOCOL_F_MAX
>>>>  };
>>>>  
>>>> @@ -100,6 +101,7 @@ typedef enum VhostUserRequest {
>>>>      VHOST_USER_SET_INFLIGHT_FD = 32,
>>>>      VHOST_USER_GPU_SET_SOCKET = 33,
>>>>      VHOST_USER_RESET_DEVICE = 34,
>>>> +    VHOST_USER_SET_STATUS = 36,
>>>>      VHOST_USER_MAX
>>>>  } VhostUserRequest;
>>>>  
>>>> @@ -1886,6 +1888,38 @@ static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
>>>>      return 0;
>>>>  }
>>>>  
>>>> +static int vhost_user_set_state(struct vhost_dev *dev, int state)
>>>> +{
>>>> +    bool reply_supported = virtio_has_feature(dev->protocol_features,
>>>> +                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
>>>> +
>>>> +    VhostUserMsg msg = {
>>>> +        .hdr.request = VHOST_USER_SET_STATUS,
>>>> +        .hdr.flags = VHOST_USER_VERSION,
>>>> +        .hdr.size = sizeof(msg.payload.u64),
>>>> +        .payload.u64 = (uint64_t)state,
>>>> +    };
>>>> +
>>>> +    if (!virtio_has_feature(dev->protocol_features,
>>>> +                VHOST_USER_PROTOCOL_F_STATUS)) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    if (reply_supported) {
>>>> +        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
>>>> +    }
>>>> +
>>>> +    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    if (reply_supported) {
>>>> +        return process_message_reply(dev, &msg);
>>>> +    }
>>>
>>> So since we are doing this anyway, let's give backend the opportunity
>>> to validate features and fail FEATURES_OK?
>>
>> Just to be sure I understand, the feature negotiation happens,
>> then when the backend receives FEATURES_OK, it uses the REPLY_ACK
>> protocol feature to NACK?
> 
> Or ack but clear FEATURES_OK in the response.

Ok, so it means instead of using reply-ack, either the SET_STATUS
request expects the status as reply, or we add a GET_STATUS request as
Jason proposes for the vDPA backend.

Not directly related to this, but we could also think of adding the
SET_STATUS request on the slave channel, so that the backend can set the
DEVICE_NEEDS_RESET bit if something goes wrong on backend side that
cannot be recovered. Not sure this is useful, but if we want to support
it, we may want to do it now, so that it is backed by the same
VHOST_USER_PROTOCOL_F_STATUS bit.

> 
>>
>> In DPDK backend, we already check the feature set by SET_FEATURES are
>> supported by the backend.
> 
> But that assumes all possible combinations of features in the bitmap
> always work. That might not be the case.
Yes, this is a (too) simple check, we should do better.

>> If it is not the case, either the master did
>> set NEED_REPLY flag and error would be reported to the master, or the
>> NEED_REPLY flag isn't set in the message and the backend disconnects.
>>
>> Looking at Qemu side, it does not seem to set the NEED_REPLY flag on
>> SET_FEATURES, so basically the backend close the sockets if it happened.
> 
> That is not the best ay to handle that imho.

I agree, but there's not much the backend can do if the master does not
set NEED_REPLY on SET_FEATURES.

>>
>> I wonder whether it would be better for Qemu to set the NEED_REPLY flag
>> on SET_FEATURES, so that when the backend is running and VHOST_F_LOG_ALL
>> feature bit is set, we are the sure the master starts the live-migration
>> only once the SET_FEATURES is fully handled on backend side.
>>
>> What do you think?
> 
> features are set before backend is started so there's nothing to
> migrate really.

Sorry, I was not clear. I meant why not setting NEED_REPLY in
SET_FEATURES as it would have two advantages:
1. Ability for the backend to nack if features being set aren't
   compatible.
2. On live-migration, reply-ack with SET_FEATURES would ensure that
   VHOST_F_LOG_ALL bit set has been taken into account by the backend
   (SET_FEATURES is send to set VHOST_F_LOG_ALL while the backend is
   started).

>>
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>>  bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
>>>>  {
>>>>      if (user->chr) {
>>>> @@ -1947,4 +1981,5 @@ const VhostOps user_ops = {
>>>>          .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
>>>>          .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
>>>>          .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
>>>> +        .vhost_set_state = vhost_user_set_state,
>>>>  };
>>>> -- 
>>>> 2.25.4
>>>
>
Maxime Coquelin May 14, 2020, 4:29 p.m. UTC | #10
On 5/14/20 4:48 PM, Michael S. Tsirkin wrote:
> On Thu, May 14, 2020 at 04:39:26PM +0200, Maxime Coquelin wrote:
>>
>>
>> On 5/14/20 12:51 PM, Michael S. Tsirkin wrote:
>>> On Thu, May 14, 2020 at 12:14:32PM +0200, Maxime Coquelin wrote:
>>>>
>>>>
>>>> On 5/14/20 9:53 AM, Jason Wang wrote:
>>>>>
>>>>> On 2020/5/14 下午3:33, Maxime Coquelin wrote:
>>>>>> It is usefull for the Vhost-user backend to know
>>>>>> about about the Virtio device status updates,
>>>>>> especially when the driver sets the DRIVER_OK bit.
>>>>>>
>>>>>> With that information, no more need to do hazardous
>>>>>> assumptions on when the driver is done with the
>>>>>> device configuration.
>>>>>>
>>>>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>>>>> ---
>>>>>>
>>>>>> This patch applies on top of Cindy's "vDPA support in qemu"
>>>>>> series, which introduces the .vhost_set_state vhost-backend
>>>>>> ops.
>>>>>>
>>>>>>   docs/interop/vhost-user.rst | 12 ++++++++++++
>>>>>>   hw/net/vhost_net.c          | 10 +++++-----
>>>>>>   hw/virtio/vhost-user.c      | 35 +++++++++++++++++++++++++++++++++++
>>>>>>   3 files changed, 52 insertions(+), 5 deletions(-)
>>>>>>
>>>>>> diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
>>>>>> index 3b1b6602c7..f108de7458 100644
>>>>>> --- a/docs/interop/vhost-user.rst
>>>>>> +++ b/docs/interop/vhost-user.rst
>>>>>> @@ -815,6 +815,7 @@ Protocol features
>>>>>>     #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD       12
>>>>>>     #define VHOST_USER_PROTOCOL_F_RESET_DEVICE         13
>>>>>>     #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
>>>>>> +  #define VHOST_USER_PROTOCOL_F_STATUS               15
>>>>>>     Master message types
>>>>>>   --------------------
>>>>>> @@ -1263,6 +1264,17 @@ Master message types
>>>>>>       The state.num field is currently reserved and must be set to 0.
>>>>>>   +``VHOST_USER_SET_STATUS``
>>>>>> +  :id: 36
>>>>>> +  :equivalent ioctl: VHOST_VDPA_SET_STATUS
>>>>>> +  :slave payload: N/A
>>>>>> +  :master payload: ``u64``
>>>>>> +
>>>>>> +  When the ``VHOST_USER_PROTOCOL_F_STATUS`` protocol feature has been
>>>>>> +  successfully negotiated, this message is submitted by the master to
>>>>>> +  notify the backend with updated device status as defined in the Virtio
>>>>>> +  specification.
>>>>>> +
>>>>>>   Slave message types
>>>>>>   -------------------
>>>>>>   diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
>>>>>> index 463e333531..37f3156dbc 100644
>>>>>> --- a/hw/net/vhost_net.c
>>>>>> +++ b/hw/net/vhost_net.c
>>>>>> @@ -517,10 +517,10 @@ int vhost_set_state(NetClientState *nc, int state)
>>>>>>   {
>>>>>>       struct vhost_net *net = get_vhost_net(nc);
>>>>>>       struct vhost_dev *hdev = &net->dev;
>>>>>> -    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
>>>>>> -        if (hdev->vhost_ops->vhost_set_state) {
>>>>>> -                return hdev->vhost_ops->vhost_set_state(hdev, state);
>>>>>> -             }
>>>>>> -        }
>>>>>> +
>>>>>> +    if (hdev->vhost_ops->vhost_set_state) {
>>>>>> +        return hdev->vhost_ops->vhost_set_state(hdev, state);
>>>>>> +    }
>>>>>> +
>>>>>>       return 0;
>>>>>>   }
>>>>>> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
>>>>>> index ec21e8fbe8..b7e52d97fc 100644
>>>>>> --- a/hw/virtio/vhost-user.c
>>>>>> +++ b/hw/virtio/vhost-user.c
>>>>>> @@ -59,6 +59,7 @@ enum VhostUserProtocolFeature {
>>>>>>       VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
>>>>>>       VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
>>>>>>       VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
>>>>>> +    VHOST_USER_PROTOCOL_F_STATUS = 15,
>>>>>>       VHOST_USER_PROTOCOL_F_MAX
>>>>>>   };
>>>>>>   @@ -100,6 +101,7 @@ typedef enum VhostUserRequest {
>>>>>>       VHOST_USER_SET_INFLIGHT_FD = 32,
>>>>>>       VHOST_USER_GPU_SET_SOCKET = 33,
>>>>>>       VHOST_USER_RESET_DEVICE = 34,
>>>>>> +    VHOST_USER_SET_STATUS = 36,
>>>>>>       VHOST_USER_MAX
>>>>>>   } VhostUserRequest;
>>>>>>   @@ -1886,6 +1888,38 @@ static int vhost_user_set_inflight_fd(struct
>>>>>> vhost_dev *dev,
>>>>>>       return 0;
>>>>>>   }
>>>>>>   +static int vhost_user_set_state(struct vhost_dev *dev, int state)
>>>>>> +{
>>>>>> +    bool reply_supported = virtio_has_feature(dev->protocol_features,
>>>>>> +                                             
>>>>>> VHOST_USER_PROTOCOL_F_REPLY_ACK);
>>>>>> +
>>>>>> +    VhostUserMsg msg = {
>>>>>> +        .hdr.request = VHOST_USER_SET_STATUS,
>>>>>> +        .hdr.flags = VHOST_USER_VERSION,
>>>>>> +        .hdr.size = sizeof(msg.payload.u64),
>>>>>> +        .payload.u64 = (uint64_t)state,
>>>>>> +    };
>>>>>> +
>>>>>> +    if (!virtio_has_feature(dev->protocol_features,
>>>>>> +                VHOST_USER_PROTOCOL_F_STATUS)) {
>>>>>> +        return -1;
>>>>>> +    }
>>>>>> +
>>>>>> +    if (reply_supported) {
>>>>>> +        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
>>>>>> +    }
>>>>>> +
>>>>>> +    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
>>>>>> +        return -1;
>>>>>> +    }
>>>>>> +
>>>>>> +    if (reply_supported) {
>>>>>> +        return process_message_reply(dev, &msg);
>>>>>> +    }
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>
>>>>>
>>>>> Interesting, I wonder how vm stop will be handled in this case.
>>>>
>>>> For now, my DPDK series only use DRIVER_OK to help determine when the
>>>> driver is done with the initialization. For VM stop, it still relies on
>>>> GET_VRING_BASE.
>>>
>>> Sounds like a good fit.
>>> GET_VRING_BASE is transparent to guest, as is vmstop.
>>> This is more for driver 
>>>
>>>
>>>>
>>>> GET_VRING_BASE arrives before DRIVER_OK bit is cleared is the tests I've
>>>> done (logs from backend side):
>>>
>>>
>>> One problem is with legacy guests, for these you can't rely
>>> on DRIVER_OK, they often kick before that, and sometimes
>>> expect buffers to be used too (e.g. for scsi).
>>
>> Ok, I remember this case now.
>> Any idea on how the backend would detect such legacy guests?
> 
> It's mostly safe to assume that it's only the case for pre-1.0 since 1.0
> spec says you must not.  A small window after 1.0 has the bug too but I
> think it's safe to just ask that these guests are fixed.

Good, so I will make starting on DRIVER_OK depending on VERSION_1 being
negotiated.

>> If I'm not mistaken, we discussed the idea to poll on the kick to detect
>> the rings are ready to be processed. But the problem is that Qemu writes
>> a kick at eventfd creation time:
>>
>> vhost_user_backend_start():
>> -> vhost_dev_enable_notifiers()
>> 	->virtio_bus_set_host_notifier()
>> 		->event_notifier_init(, 1); //1 means active
>> ->vhost_dev_start();
>>
>> We could change the behavior in Qemu, but the backend won't know if
>> Qemu has the fix or not, so won't know if it can rely on the kick.
> 
> eventfd counts kicks. So you can read the counter and check whether
> there was another kick or not.

OK. But if the vhost-user lib reads the counter, won't the backend
implementation (e.g. SPDK) that uses the Vhost-user lib miss the kick?

> The difficulty is around migration, we
> should migrate the "ring kicked" state but we don't. We really should
> just fix that, it's an old bug affecting not just vhost-user but
> vhost too.

But if we fix that in Qemu, the backend won't be able to know whether
it should wait for one, or for two kicks.

Maxime
>>>>
>>>> VHOST_CONFIG: read message VHOST_USER_GET_VRING_BASE
>>>>
>>>> destroy port /tmp/vhost-user1, did: 0
>>>> VHOST_CONFIG: vring base idx:0 file:41
>>>> VHOST_CONFIG: read message VHOST_USER_GET_VRING_BASE
>>>> VHOST_CONFIG: vring base idx:1 file:0
>>>> VHOST_CONFIG: read message VHOST_USER_SET_STATUS
>>>> VHOST_CONFIG: New device status(0x0000000b):
>>>> 	-ACKNOWLEDGE: 1
>>>> 	-DRIVER: 1
>>>> 	-FEATURES_OK: 1
>>>> 	-DRIVER_OK: 0
>>>> 	-DEVICE_NEED_RESET: 0
>>>> 	-FAILED: 0
>>>>
>>>>> In the case of vDPA kernel, we probable don't want to mirror the virtio
>>>>> device status to vdpa device status directly.
>>>>
>>>> In vDPA DPDK, we don't mirror the Virtio device status either. It could
>>>> make sense to do that, but would require some API changes.
>>>>
>>>>> Since qemu may stop
>>>>> vhost-vdpa device through e.g resting vdpa device, but in the mean time,
>>>>> guest should not detect such condition in virtio device status.
>>>>
>>>>
>>>>
>>>>> So in the new version of vDPA support, we probably need to do:
>>>>>
>>>>> static int vhost_vdpa_set_state(struct vhost_dev *dev, bool started)
>>>>> {
>>>>>     if (started) {
>>>>>         uint8_t status = 0;
>>>>>
>>>>>         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
>>>>>         vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);
>>>>>
>>>>>         return !(status & VIRTIO_CONFIG_S_DRIVER_OK);
>>>>>     } else {
>>>>>         vhost_vdpa_reset_device(dev);
>>>>>         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
>>>>>                                    VIRTIO_CONFIG_S_DRIVER);
>>>>>         return 0;
>>>>>     }
>>>>> }
>>>>
>>>> IIUC, you have another copy of the status register not matching 1:1 what
>>>> the guest sets/sees.
>>>>
>>>> Is vhost_vdpa_add_status() sending VHOST_VDPA_SET_STATUS to the backend?
>>>>
>>>> And why reading back the status from the backend? Just to confirm the
>>>> change is taken into account?
>>>
>>> Making sure features have been acked makes sense IMHO.
>>>
>>>
>>>>> And vhost_set_state() will be called from vhost_dev_start()/stop().
>>>>>
>>>>> Does this work for vhost-user as well?
>>>>
>>>> IIUC what you did above, I think it would work. And we won't need
>>>> GET_STATUS request, but just rely on the REPLY_ACK.
>>>
>>> Right. Need to document that though.
>>
>> Ok, will do in v2.
>>
>> Thanks,
>> Maxime
>>
>>>
>>>>
>>>> Thanks,
>>>> Maxime
>>>>
>>>>> Thanks
>>>>>
>>>>>
>>>>>> +
>>>>>>   bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error
>>>>>> **errp)
>>>>>>   {
>>>>>>       if (user->chr) {
>>>>>> @@ -1947,4 +1981,5 @@ const VhostOps user_ops = {
>>>>>>           .vhost_backend_mem_section_filter =
>>>>>> vhost_user_mem_section_filter,
>>>>>>           .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
>>>>>>           .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
>>>>>> +        .vhost_set_state = vhost_user_set_state,
>>>>>>   };
>>>
>
Jason Wang May 15, 2020, 3:53 a.m. UTC | #11
On 2020/5/14 下午6:14, Maxime Coquelin wrote:
>
> On 5/14/20 9:53 AM, Jason Wang wrote:
>> On 2020/5/14 下午3:33, Maxime Coquelin wrote:
>>> It is usefull for the Vhost-user backend to know
>>> about about the Virtio device status updates,
>>> especially when the driver sets the DRIVER_OK bit.
>>>
>>> With that information, no more need to do hazardous
>>> assumptions on when the driver is done with the
>>> device configuration.
>>>
>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>> ---
>>>
>>> This patch applies on top of Cindy's "vDPA support in qemu"
>>> series, which introduces the .vhost_set_state vhost-backend
>>> ops.
>>>
>>>    docs/interop/vhost-user.rst | 12 ++++++++++++
>>>    hw/net/vhost_net.c          | 10 +++++-----
>>>    hw/virtio/vhost-user.c      | 35 +++++++++++++++++++++++++++++++++++
>>>    3 files changed, 52 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
>>> index 3b1b6602c7..f108de7458 100644
>>> --- a/docs/interop/vhost-user.rst
>>> +++ b/docs/interop/vhost-user.rst
>>> @@ -815,6 +815,7 @@ Protocol features
>>>      #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD       12
>>>      #define VHOST_USER_PROTOCOL_F_RESET_DEVICE         13
>>>      #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
>>> +  #define VHOST_USER_PROTOCOL_F_STATUS               15
>>>      Master message types
>>>    --------------------
>>> @@ -1263,6 +1264,17 @@ Master message types
>>>        The state.num field is currently reserved and must be set to 0.
>>>    +``VHOST_USER_SET_STATUS``
>>> +  :id: 36
>>> +  :equivalent ioctl: VHOST_VDPA_SET_STATUS
>>> +  :slave payload: N/A
>>> +  :master payload: ``u64``
>>> +
>>> +  When the ``VHOST_USER_PROTOCOL_F_STATUS`` protocol feature has been
>>> +  successfully negotiated, this message is submitted by the master to
>>> +  notify the backend with updated device status as defined in the Virtio
>>> +  specification.
>>> +
>>>    Slave message types
>>>    -------------------
>>>    diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
>>> index 463e333531..37f3156dbc 100644
>>> --- a/hw/net/vhost_net.c
>>> +++ b/hw/net/vhost_net.c
>>> @@ -517,10 +517,10 @@ int vhost_set_state(NetClientState *nc, int state)
>>>    {
>>>        struct vhost_net *net = get_vhost_net(nc);
>>>        struct vhost_dev *hdev = &net->dev;
>>> -    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
>>> -        if (hdev->vhost_ops->vhost_set_state) {
>>> -                return hdev->vhost_ops->vhost_set_state(hdev, state);
>>> -             }
>>> -        }
>>> +
>>> +    if (hdev->vhost_ops->vhost_set_state) {
>>> +        return hdev->vhost_ops->vhost_set_state(hdev, state);
>>> +    }
>>> +
>>>        return 0;
>>>    }
>>> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
>>> index ec21e8fbe8..b7e52d97fc 100644
>>> --- a/hw/virtio/vhost-user.c
>>> +++ b/hw/virtio/vhost-user.c
>>> @@ -59,6 +59,7 @@ enum VhostUserProtocolFeature {
>>>        VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
>>>        VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
>>>        VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
>>> +    VHOST_USER_PROTOCOL_F_STATUS = 15,
>>>        VHOST_USER_PROTOCOL_F_MAX
>>>    };
>>>    @@ -100,6 +101,7 @@ typedef enum VhostUserRequest {
>>>        VHOST_USER_SET_INFLIGHT_FD = 32,
>>>        VHOST_USER_GPU_SET_SOCKET = 33,
>>>        VHOST_USER_RESET_DEVICE = 34,
>>> +    VHOST_USER_SET_STATUS = 36,
>>>        VHOST_USER_MAX
>>>    } VhostUserRequest;
>>>    @@ -1886,6 +1888,38 @@ static int vhost_user_set_inflight_fd(struct
>>> vhost_dev *dev,
>>>        return 0;
>>>    }
>>>    +static int vhost_user_set_state(struct vhost_dev *dev, int state)
>>> +{
>>> +    bool reply_supported = virtio_has_feature(dev->protocol_features,
>>> +
>>> VHOST_USER_PROTOCOL_F_REPLY_ACK);
>>> +
>>> +    VhostUserMsg msg = {
>>> +        .hdr.request = VHOST_USER_SET_STATUS,
>>> +        .hdr.flags = VHOST_USER_VERSION,
>>> +        .hdr.size = sizeof(msg.payload.u64),
>>> +        .payload.u64 = (uint64_t)state,
>>> +    };
>>> +
>>> +    if (!virtio_has_feature(dev->protocol_features,
>>> +                VHOST_USER_PROTOCOL_F_STATUS)) {
>>> +        return -1;
>>> +    }
>>> +
>>> +    if (reply_supported) {
>>> +        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
>>> +    }
>>> +
>>> +    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
>>> +        return -1;
>>> +    }
>>> +
>>> +    if (reply_supported) {
>>> +        return process_message_reply(dev, &msg);
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>
>> Interesting, I wonder how vm stop will be handled in this case.
> For now, my DPDK series only use DRIVER_OK to help determine when the
> driver is done with the initialization. For VM stop, it still relies on
> GET_VRING_BASE.
>
> GET_VRING_BASE arrives before DRIVER_OK bit is cleared is the tests I've
> done (logs from backend side):
>
> VHOST_CONFIG: read message VHOST_USER_GET_VRING_BASE
>
> destroy port /tmp/vhost-user1, did: 0
> VHOST_CONFIG: vring base idx:0 file:41
> VHOST_CONFIG: read message VHOST_USER_GET_VRING_BASE
> VHOST_CONFIG: vring base idx:1 file:0
> VHOST_CONFIG: read message VHOST_USER_SET_STATUS
> VHOST_CONFIG: New device status(0x0000000b):
> 	-ACKNOWLEDGE: 1
> 	-DRIVER: 1
> 	-FEATURES_OK: 1
> 	-DRIVER_OK: 0
> 	-DEVICE_NEED_RESET: 0
> 	-FAILED: 0


Then it looks like a function duplication, e.g backend could be stopped 
either via GET_VRING_BASE or VHOST_USER_SET_STATUS(0).

And I guess virtio-net vDPA driver, dpdk needs to reset the device when 
it accepts GET_VRING_BASE when DRIVER_OK is set.


>
>> In the case of vDPA kernel, we probable don't want to mirror the virtio
>> device status to vdpa device status directly.
> In vDPA DPDK, we don't mirror the Virtio device status either. It could
> make sense to do that, but would require some API changes.
>
>> Since qemu may stop
>> vhost-vdpa device through e.g resting vdpa device, but in the mean time,
>> guest should not detect such condition in virtio device status.
>
>
>> So in the new version of vDPA support, we probably need to do:
>>
>> static int vhost_vdpa_set_state(struct vhost_dev *dev, bool started)
>> {
>>      if (started) {
>>          uint8_t status = 0;
>>
>>          vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
>>          vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);
>>
>>          return !(status & VIRTIO_CONFIG_S_DRIVER_OK);
>>      } else {
>>          vhost_vdpa_reset_device(dev);
>>          vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
>>                                     VIRTIO_CONFIG_S_DRIVER);
>>          return 0;
>>      }
>> }
> IIUC, you have another copy of the status register not matching 1:1 what
> the guest sets/sees.


Yes, the status here is not the one visible by guest. We try to make 
vDPA device status behave like a virtio device status. The hardware 
driver will start and stop the hardware according to that.

Since the driver may serve for kernel subsystem, have an implicit API 
like GET_VRING_BASE/SET_VRING_BASE seem redundant.


>
> Is vhost_vdpa_add_status() sending VHOST_VDPA_SET_STATUS to the backend?


Yes.


>
> And why reading back the status from the backend? Just to confirm the
> change is taken into account?


Yes, and we use that in kernel driver as well.


>
>> And vhost_set_state() will be called from vhost_dev_start()/stop().
>>
>> Does this work for vhost-user as well?
> IIUC what you did above, I think it would work. And we won't need
> GET_STATUS request, but just rely on the REPLY_ACK.


Right.

Thanks


>
> Thanks,
> Maxime
>
>> Thanks
>>
>>
>>> +
>>>    bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error
>>> **errp)
>>>    {
>>>        if (user->chr) {
>>> @@ -1947,4 +1981,5 @@ const VhostOps user_ops = {
>>>            .vhost_backend_mem_section_filter =
>>> vhost_user_mem_section_filter,
>>>            .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
>>>            .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
>>> +        .vhost_set_state = vhost_user_set_state,
>>>    };
diff mbox series

Patch

diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
index 3b1b6602c7..f108de7458 100644
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -815,6 +815,7 @@  Protocol features
   #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD       12
   #define VHOST_USER_PROTOCOL_F_RESET_DEVICE         13
   #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
+  #define VHOST_USER_PROTOCOL_F_STATUS               15
 
 Master message types
 --------------------
@@ -1263,6 +1264,17 @@  Master message types
 
   The state.num field is currently reserved and must be set to 0.
 
+``VHOST_USER_SET_STATUS``
+  :id: 36
+  :equivalent ioctl: VHOST_VDPA_SET_STATUS
+  :slave payload: N/A
+  :master payload: ``u64``
+
+  When the ``VHOST_USER_PROTOCOL_F_STATUS`` protocol feature has been
+  successfully negotiated, this message is submitted by the master to
+  notify the backend with updated device status as defined in the Virtio
+  specification.
+
 Slave message types
 -------------------
 
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 463e333531..37f3156dbc 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -517,10 +517,10 @@  int vhost_set_state(NetClientState *nc, int state)
 {
     struct vhost_net *net = get_vhost_net(nc);
     struct vhost_dev *hdev = &net->dev;
-    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
-        if (hdev->vhost_ops->vhost_set_state) {
-                return hdev->vhost_ops->vhost_set_state(hdev, state);
-             }
-        }
+
+    if (hdev->vhost_ops->vhost_set_state) {
+        return hdev->vhost_ops->vhost_set_state(hdev, state);
+    }
+
     return 0;
 }
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index ec21e8fbe8..b7e52d97fc 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -59,6 +59,7 @@  enum VhostUserProtocolFeature {
     VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
     VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
     VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
+    VHOST_USER_PROTOCOL_F_STATUS = 15,
     VHOST_USER_PROTOCOL_F_MAX
 };
 
@@ -100,6 +101,7 @@  typedef enum VhostUserRequest {
     VHOST_USER_SET_INFLIGHT_FD = 32,
     VHOST_USER_GPU_SET_SOCKET = 33,
     VHOST_USER_RESET_DEVICE = 34,
+    VHOST_USER_SET_STATUS = 36,
     VHOST_USER_MAX
 } VhostUserRequest;
 
@@ -1886,6 +1888,38 @@  static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
     return 0;
 }
 
+static int vhost_user_set_state(struct vhost_dev *dev, int state)
+{
+    bool reply_supported = virtio_has_feature(dev->protocol_features,
+                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
+
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_SET_STATUS,
+        .hdr.flags = VHOST_USER_VERSION,
+        .hdr.size = sizeof(msg.payload.u64),
+        .payload.u64 = (uint64_t)state,
+    };
+
+    if (!virtio_has_feature(dev->protocol_features,
+                VHOST_USER_PROTOCOL_F_STATUS)) {
+        return -1;
+    }
+
+    if (reply_supported) {
+        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
+    }
+
+    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
+        return -1;
+    }
+
+    if (reply_supported) {
+        return process_message_reply(dev, &msg);
+    }
+
+    return 0;
+}
+
 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
 {
     if (user->chr) {
@@ -1947,4 +1981,5 @@  const VhostOps user_ops = {
         .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
         .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
         .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
+        .vhost_set_state = vhost_user_set_state,
 };