diff mbox series

[v3,4/5] qapi: introduce device-sync-config

Message ID 20240329183758.3360733-5-vsementsov@yandex-team.ru (mailing list archive)
State New, archived
Headers show
Series vhost-user-blk: live resize additional APIs | expand

Commit Message

Vladimir Sementsov-Ogievskiy March 29, 2024, 6:37 p.m. UTC
Add command to sync config from vhost-user backend to the device. It
may be helpful when VHOST_USER_SLAVE_CONFIG_CHANGE_MSG failed or not
triggered interrupt to the guest or just not available (not supported
by vhost-user server).

Command result is racy if allow it during migration. Let's allow the
sync only in RUNNING state.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
---
 hw/block/vhost-user-blk.c | 27 ++++++++++++++++------
 hw/virtio/virtio-pci.c    |  9 ++++++++
 include/hw/qdev-core.h    |  3 +++
 include/sysemu/runstate.h |  1 +
 qapi/qdev.json            | 21 +++++++++++++++++
 system/qdev-monitor.c     | 47 +++++++++++++++++++++++++++++++++++++++
 system/runstate.c         |  5 +++++
 7 files changed, 106 insertions(+), 7 deletions(-)

Comments

Markus Armbruster April 24, 2024, 11:48 a.m. UTC | #1
Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:

> Add command to sync config from vhost-user backend to the device. It
> may be helpful when VHOST_USER_SLAVE_CONFIG_CHANGE_MSG failed or not
> triggered interrupt to the guest or just not available (not supported
> by vhost-user server).
>
> Command result is racy if allow it during migration. Let's allow the
> sync only in RUNNING state.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> ---
>  hw/block/vhost-user-blk.c | 27 ++++++++++++++++------
>  hw/virtio/virtio-pci.c    |  9 ++++++++
>  include/hw/qdev-core.h    |  3 +++
>  include/sysemu/runstate.h |  1 +
>  qapi/qdev.json            | 21 +++++++++++++++++
>  system/qdev-monitor.c     | 47 +++++++++++++++++++++++++++++++++++++++
>  system/runstate.c         |  5 +++++
>  7 files changed, 106 insertions(+), 7 deletions(-)
>
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index 9e6bbc6950..2f301f380c 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -88,27 +88,39 @@ static void vhost_user_blk_set_config(VirtIODevice *vdev, const uint8_t *config)
>      s->blkcfg.wce = blkcfg->wce;
>  }
>  
> +static int vhost_user_blk_sync_config(DeviceState *dev, Error **errp)
> +{
> +    int ret;
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> +
> +    ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
> +                               vdev->config_len, errp);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    memcpy(vdev->config, &s->blkcfg, vdev->config_len);
> +    virtio_notify_config(vdev);
> +
> +    return 0;
> +}
> +
>  static int vhost_user_blk_handle_config_change(struct vhost_dev *dev)
>  {
>      int ret;
> -    VirtIODevice *vdev = dev->vdev;
> -    VHostUserBlk *s = VHOST_USER_BLK(dev->vdev);
>      Error *local_err = NULL;
>  
>      if (!dev->started) {
>          return 0;
>      }
>  
> -    ret = vhost_dev_get_config(dev, (uint8_t *)&s->blkcfg,
> -                               vdev->config_len, &local_err);
> +    ret = vhost_user_blk_sync_config(DEVICE(dev->vdev), &local_err);
>      if (ret < 0) {
>          error_report_err(local_err);
>          return ret;
>      }
>  
> -    memcpy(dev->vdev->config, &s->blkcfg, vdev->config_len);
> -    virtio_notify_config(dev->vdev);
> -
>      return 0;
>  }

This factors vhost_user_blk_sync_config() out of
vhost_user_blk_handle_config_change() for reuse.  Correct?

>  
> @@ -576,6 +588,7 @@ static void vhost_user_blk_class_init(ObjectClass *klass, void *data)
>  
>      device_class_set_props(dc, vhost_user_blk_properties);
>      dc->vmsd = &vmstate_vhost_user_blk;
> +    dc->sync_config = vhost_user_blk_sync_config;
>      set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
>      vdc->realize = vhost_user_blk_device_realize;
>      vdc->unrealize = vhost_user_blk_device_unrealize;
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index eaaf86402c..92afbae71c 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -2501,6 +2501,14 @@ static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp)
>      vpciklass->parent_dc_realize(qdev, errp);
>  }
>  
> +static int virtio_pci_sync_config(DeviceState *dev, Error **errp)
> +{
> +    VirtIOPCIProxy *proxy = VIRTIO_PCI(dev);
> +    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
> +
> +    return qdev_sync_config(DEVICE(vdev), errp);
> +}
> +
>  static void virtio_pci_class_init(ObjectClass *klass, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(klass);
> @@ -2517,6 +2525,7 @@ static void virtio_pci_class_init(ObjectClass *klass, void *data)
>      device_class_set_parent_realize(dc, virtio_pci_dc_realize,
>                                      &vpciklass->parent_dc_realize);
>      rc->phases.hold = virtio_pci_bus_reset_hold;
> +    dc->sync_config = virtio_pci_sync_config;
>  }
>  

I tried to follow the callbacks, but quickly gave up.  Leaving to a
reviewer who understands virtio.

>  static const TypeInfo virtio_pci_info = {
> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
> index 9228e96c87..87135bdcdf 100644
> --- a/include/hw/qdev-core.h
> +++ b/include/hw/qdev-core.h
> @@ -95,6 +95,7 @@ typedef void (*DeviceUnrealize)(DeviceState *dev);
>  typedef void (*DeviceReset)(DeviceState *dev);
>  typedef void (*BusRealize)(BusState *bus, Error **errp);
>  typedef void (*BusUnrealize)(BusState *bus);
> +typedef int (*DeviceSyncConfig)(DeviceState *dev, Error **errp);
>  
>  /**
>   * struct DeviceClass - The base class for all devices.
> @@ -162,6 +163,7 @@ struct DeviceClass {
>      DeviceReset reset;
>      DeviceRealize realize;
>      DeviceUnrealize unrealize;
> +    DeviceSyncConfig sync_config;
>  
>      /**
>       * @vmsd: device state serialisation description for
> @@ -546,6 +548,7 @@ bool qdev_hotplug_allowed(DeviceState *dev, Error **errp);
>   */
>  HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev);
>  void qdev_unplug(DeviceState *dev, Error **errp);
> +int qdev_sync_config(DeviceState *dev, Error **errp);
>  void qdev_simple_device_unplug_cb(HotplugHandler *hotplug_dev,
>                                    DeviceState *dev, Error **errp);
>  void qdev_machine_creation_done(void);
> diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
> index 0117d243c4..296af52322 100644
> --- a/include/sysemu/runstate.h
> +++ b/include/sysemu/runstate.h
> @@ -5,6 +5,7 @@
>  #include "qemu/notify.h"
>  
>  bool runstate_check(RunState state);
> +const char *current_run_state_str(void);
>  void runstate_set(RunState new_state);
>  RunState runstate_get(void);
>  bool runstate_is_running(void);
> diff --git a/qapi/qdev.json b/qapi/qdev.json
> index facaa0bc6a..e8be79c3d5 100644
> --- a/qapi/qdev.json
> +++ b/qapi/qdev.json
> @@ -161,3 +161,24 @@
>  ##
>  { 'event': 'DEVICE_UNPLUG_GUEST_ERROR',
>    'data': { '*device': 'str', 'path': 'str' } }
> +
> +##
> +# @device-sync-config:
> +#
> +# Synchronize config from backend to the guest. The command notifies
> +# re-read the device config from the backend and notifies the guest
> +# to re-read the config. The command may be used to notify the guest
> +# about block device capcity change. Currently only vhost-user-blk
> +# device supports this.

I'm not sure I understand this.  To work towards an understanding, I
rephrase it, and you point out the errors.

     Synchronize device configuration from host to guest part.  First,
     copy the configuration from the host part (backend) to the guest
     part (frontend).  Then notify guest software that device
     configuration changed.

I wonder how configuration can get out of sync.  Can you explain?

> +#
> +# @id: the device's ID or QOM path
> +#
> +# Features:
> +#
> +# @unstable: The command is experimental.
> +#
> +# Since: 9.1
> +##
> +{ 'command': 'device-sync-config',
> +  'features': [ 'unstable' ],
> +  'data': {'id': 'str'} }
> diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c
> index 7e075d91c1..cb35ea0b86 100644
> --- a/system/qdev-monitor.c
> +++ b/system/qdev-monitor.c
> @@ -23,6 +23,7 @@
>  #include "monitor/monitor.h"
>  #include "monitor/qdev.h"
>  #include "sysemu/arch_init.h"
> +#include "sysemu/runstate.h"
>  #include "qapi/error.h"
>  #include "qapi/qapi-commands-qdev.h"
>  #include "qapi/qmp/dispatch.h"
> @@ -969,6 +970,52 @@ void qmp_device_del(const char *id, Error **errp)
>      }
>  }
>  
> +int qdev_sync_config(DeviceState *dev, Error **errp)
> +{
> +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> +
> +    if (!dc->sync_config) {
> +        error_setg(errp, "device-sync-config is not supported for '%s'",
> +                   object_get_typename(OBJECT(dev)));
> +        return -ENOTSUP;
> +    }
> +
> +    return dc->sync_config(dev, errp);
> +}
> +
> +void qmp_device_sync_config(const char *id, Error **errp)
> +{
> +    DeviceState *dev;
> +
> +    /*
> +     * During migration there is a race between syncing`config and
> +     * migrating it, so let's just not allow it.

Can you briefly explain the race?

> +     *
> +     * Moreover, let's not rely on setting up interrupts in paused
> +     * state, which may be a part of migration process.

What dependence exactly are you avoiding?  Config synchronization
depending on guest interrupt delivery?

> +     */
> +
> +    if (migration_is_running()) {
> +        error_setg(errp, "Config synchronization is not allowed "
> +                   "during migration.");

qapi/error.h:

     * The resulting message should be a single phrase, with no newline or
     * trailing punctuation.

Drop the period, please.

> +        return;
> +    }
> +
> +    if (!runstate_is_running()) {
> +        error_setg(errp, "Config synchronization allowed only in '%s' state, "
> +                   "current state is '%s'", RunState_str(RUN_STATE_RUNNING),
> +                   current_run_state_str());
> +        return;
> +    }
> +
> +    dev = find_device_state(id, true, errp);
> +    if (!dev) {
> +        return;
> +    }
> +
> +    qdev_sync_config(dev, errp);
> +}
> +
>  void hmp_device_add(Monitor *mon, const QDict *qdict)
>  {
>      Error *err = NULL;
> diff --git a/system/runstate.c b/system/runstate.c
> index d6ab860eca..8fd89172ae 100644
> --- a/system/runstate.c
> +++ b/system/runstate.c
> @@ -189,6 +189,11 @@ bool runstate_check(RunState state)
>      return current_run_state == state;
>  }
>  
> +const char *current_run_state_str(void)
> +{
> +    return RunState_str(current_run_state);
> +}
> +

This helper is used just once.  Suggest to use
RunState_str(runstate_get()) instead.

>  static void runstate_init(void)
>  {
>      const RunStateTransition *p;
Markus Armbruster April 24, 2024, 12:05 p.m. UTC | #2
Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:

> Add command to sync config from vhost-user backend to the device. It
> may be helpful when VHOST_USER_SLAVE_CONFIG_CHANGE_MSG failed or not
> triggered interrupt to the guest or just not available (not supported
> by vhost-user server).
>
> Command result is racy if allow it during migration. Let's allow the
> sync only in RUNNING state.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>

[...]

> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
> index 9228e96c87..87135bdcdf 100644
> --- a/include/hw/qdev-core.h
> +++ b/include/hw/qdev-core.h
> @@ -95,6 +95,7 @@ typedef void (*DeviceUnrealize)(DeviceState *dev);
>  typedef void (*DeviceReset)(DeviceState *dev);
>  typedef void (*BusRealize)(BusState *bus, Error **errp);
>  typedef void (*BusUnrealize)(BusState *bus);
> +typedef int (*DeviceSyncConfig)(DeviceState *dev, Error **errp);
>  
>  /**
>   * struct DeviceClass - The base class for all devices.
> @@ -162,6 +163,7 @@ struct DeviceClass {
>      DeviceReset reset;
>      DeviceRealize realize;
>      DeviceUnrealize unrealize;
> +    DeviceSyncConfig sync_config;

I get

    include/hw/qdev-core.h:179: warning: Function parameter or member 'sync_config' not described in 'DeviceClass'

To fix this, cover the new member in the doc comment.

>  
>      /**
>       * @vmsd: device state serialisation description for

[...]
Vladimir Sementsov-Ogievskiy April 29, 2024, 8:18 a.m. UTC | #3
On 24.04.24 14:48, Markus Armbruster wrote:
> Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:
> 
>> Add command to sync config from vhost-user backend to the device. It
>> may be helpful when VHOST_USER_SLAVE_CONFIG_CHANGE_MSG failed or not
>> triggered interrupt to the guest or just not available (not supported
>> by vhost-user server).
>>
>> Command result is racy if allow it during migration. Let's allow the
>> sync only in RUNNING state.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>> ---
>>   hw/block/vhost-user-blk.c | 27 ++++++++++++++++------
>>   hw/virtio/virtio-pci.c    |  9 ++++++++
>>   include/hw/qdev-core.h    |  3 +++
>>   include/sysemu/runstate.h |  1 +
>>   qapi/qdev.json            | 21 +++++++++++++++++
>>   system/qdev-monitor.c     | 47 +++++++++++++++++++++++++++++++++++++++
>>   system/runstate.c         |  5 +++++
>>   7 files changed, 106 insertions(+), 7 deletions(-)
>>
>> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
>> index 9e6bbc6950..2f301f380c 100644
>> --- a/hw/block/vhost-user-blk.c
>> +++ b/hw/block/vhost-user-blk.c
>> @@ -88,27 +88,39 @@ static void vhost_user_blk_set_config(VirtIODevice *vdev, const uint8_t *config)
>>       s->blkcfg.wce = blkcfg->wce;
>>   }
>>   
>> +static int vhost_user_blk_sync_config(DeviceState *dev, Error **errp)
>> +{
>> +    int ret;
>> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>> +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
>> +
>> +    ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
>> +                               vdev->config_len, errp);
>> +    if (ret < 0) {
>> +        return ret;
>> +    }
>> +
>> +    memcpy(vdev->config, &s->blkcfg, vdev->config_len);
>> +    virtio_notify_config(vdev);
>> +
>> +    return 0;
>> +}
>> +
>>   static int vhost_user_blk_handle_config_change(struct vhost_dev *dev)
>>   {
>>       int ret;
>> -    VirtIODevice *vdev = dev->vdev;
>> -    VHostUserBlk *s = VHOST_USER_BLK(dev->vdev);
>>       Error *local_err = NULL;
>>   
>>       if (!dev->started) {
>>           return 0;
>>       }
>>   
>> -    ret = vhost_dev_get_config(dev, (uint8_t *)&s->blkcfg,
>> -                               vdev->config_len, &local_err);
>> +    ret = vhost_user_blk_sync_config(DEVICE(dev->vdev), &local_err);
>>       if (ret < 0) {
>>           error_report_err(local_err);
>>           return ret;
>>       }
>>   
>> -    memcpy(dev->vdev->config, &s->blkcfg, vdev->config_len);
>> -    virtio_notify_config(dev->vdev);
>> -
>>       return 0;
>>   }
> 
> This factors vhost_user_blk_sync_config() out of
> vhost_user_blk_handle_config_change() for reuse.  Correct?

Yes. Will split to a separate patch in v4

> 
>>   
>> @@ -576,6 +588,7 @@ static void vhost_user_blk_class_init(ObjectClass *klass, void *data)
>>   
>>       device_class_set_props(dc, vhost_user_blk_properties);
>>       dc->vmsd = &vmstate_vhost_user_blk;
>> +    dc->sync_config = vhost_user_blk_sync_config;
>>       set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
>>       vdc->realize = vhost_user_blk_device_realize;
>>       vdc->unrealize = vhost_user_blk_device_unrealize;
>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
>> index eaaf86402c..92afbae71c 100644
>> --- a/hw/virtio/virtio-pci.c
>> +++ b/hw/virtio/virtio-pci.c
>> @@ -2501,6 +2501,14 @@ static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp)
>>       vpciklass->parent_dc_realize(qdev, errp);
>>   }
>>   
>> +static int virtio_pci_sync_config(DeviceState *dev, Error **errp)
>> +{
>> +    VirtIOPCIProxy *proxy = VIRTIO_PCI(dev);
>> +    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
>> +
>> +    return qdev_sync_config(DEVICE(vdev), errp);
>> +}
>> +
>>   static void virtio_pci_class_init(ObjectClass *klass, void *data)
>>   {
>>       DeviceClass *dc = DEVICE_CLASS(klass);
>> @@ -2517,6 +2525,7 @@ static void virtio_pci_class_init(ObjectClass *klass, void *data)
>>       device_class_set_parent_realize(dc, virtio_pci_dc_realize,
>>                                       &vpciklass->parent_dc_realize);
>>       rc->phases.hold = virtio_pci_bus_reset_hold;
>> +    dc->sync_config = virtio_pci_sync_config;
>>   }
>>   
> 
> I tried to follow the callbacks, but quickly gave up.  Leaving to a
> reviewer who understands virtio.
> 
>>   static const TypeInfo virtio_pci_info = {
>> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
>> index 9228e96c87..87135bdcdf 100644
>> --- a/include/hw/qdev-core.h
>> +++ b/include/hw/qdev-core.h
>> @@ -95,6 +95,7 @@ typedef void (*DeviceUnrealize)(DeviceState *dev);
>>   typedef void (*DeviceReset)(DeviceState *dev);
>>   typedef void (*BusRealize)(BusState *bus, Error **errp);
>>   typedef void (*BusUnrealize)(BusState *bus);
>> +typedef int (*DeviceSyncConfig)(DeviceState *dev, Error **errp);
>>   
>>   /**
>>    * struct DeviceClass - The base class for all devices.
>> @@ -162,6 +163,7 @@ struct DeviceClass {
>>       DeviceReset reset;
>>       DeviceRealize realize;
>>       DeviceUnrealize unrealize;
>> +    DeviceSyncConfig sync_config;
>>   
>>       /**
>>        * @vmsd: device state serialisation description for
>> @@ -546,6 +548,7 @@ bool qdev_hotplug_allowed(DeviceState *dev, Error **errp);
>>    */
>>   HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev);
>>   void qdev_unplug(DeviceState *dev, Error **errp);
>> +int qdev_sync_config(DeviceState *dev, Error **errp);
>>   void qdev_simple_device_unplug_cb(HotplugHandler *hotplug_dev,
>>                                     DeviceState *dev, Error **errp);
>>   void qdev_machine_creation_done(void);
>> diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
>> index 0117d243c4..296af52322 100644
>> --- a/include/sysemu/runstate.h
>> +++ b/include/sysemu/runstate.h
>> @@ -5,6 +5,7 @@
>>   #include "qemu/notify.h"
>>   
>>   bool runstate_check(RunState state);
>> +const char *current_run_state_str(void);
>>   void runstate_set(RunState new_state);
>>   RunState runstate_get(void);
>>   bool runstate_is_running(void);
>> diff --git a/qapi/qdev.json b/qapi/qdev.json
>> index facaa0bc6a..e8be79c3d5 100644
>> --- a/qapi/qdev.json
>> +++ b/qapi/qdev.json
>> @@ -161,3 +161,24 @@
>>   ##
>>   { 'event': 'DEVICE_UNPLUG_GUEST_ERROR',
>>     'data': { '*device': 'str', 'path': 'str' } }
>> +
>> +##
>> +# @device-sync-config:
>> +#
>> +# Synchronize config from backend to the guest. The command notifies
>> +# re-read the device config from the backend and notifies the guest
>> +# to re-read the config. The command may be used to notify the guest
>> +# about block device capcity change. Currently only vhost-user-blk
>> +# device supports this.
> 
> I'm not sure I understand this.  To work towards an understanding, I
> rephrase it, and you point out the errors.
> 
>       Synchronize device configuration from host to guest part.  First,
>       copy the configuration from the host part (backend) to the guest
>       part (frontend).  Then notify guest software that device
>       configuration changed.

Correct, thanks

> 
> I wonder how configuration can get out of sync.  Can you explain?
> 

The example (and the original feature, which triggered developing this) is vhost disk resize. If vhost-server (backend) doesn't support VHOST_USER_SLAVE_CONFIG_CHANGE_MSG, neither QEMU nor guest will know that disk capacity changed.

>> +#
>> +# @id: the device's ID or QOM path
>> +#
>> +# Features:
>> +#
>> +# @unstable: The command is experimental.
>> +#
>> +# Since: 9.1
>> +##
>> +{ 'command': 'device-sync-config',
>> +  'features': [ 'unstable' ],
>> +  'data': {'id': 'str'} }
>> diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c
>> index 7e075d91c1..cb35ea0b86 100644
>> --- a/system/qdev-monitor.c
>> +++ b/system/qdev-monitor.c
>> @@ -23,6 +23,7 @@
>>   #include "monitor/monitor.h"
>>   #include "monitor/qdev.h"
>>   #include "sysemu/arch_init.h"
>> +#include "sysemu/runstate.h"
>>   #include "qapi/error.h"
>>   #include "qapi/qapi-commands-qdev.h"
>>   #include "qapi/qmp/dispatch.h"
>> @@ -969,6 +970,52 @@ void qmp_device_del(const char *id, Error **errp)
>>       }
>>   }
>>   
>> +int qdev_sync_config(DeviceState *dev, Error **errp)
>> +{
>> +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
>> +
>> +    if (!dc->sync_config) {
>> +        error_setg(errp, "device-sync-config is not supported for '%s'",
>> +                   object_get_typename(OBJECT(dev)));
>> +        return -ENOTSUP;
>> +    }
>> +
>> +    return dc->sync_config(dev, errp);
>> +}
>> +
>> +void qmp_device_sync_config(const char *id, Error **errp)
>> +{
>> +    DeviceState *dev;
>> +
>> +    /*
>> +     * During migration there is a race between syncing`config and
>> +     * migrating it, so let's just not allow it.
> 
> Can you briefly explain the race?

If at the moment of qmp command, corresponding config already migrated to the target, we'll change only the config on source, but on the target we'll still have outdated config.

> 
>> +     *
>> +     * Moreover, let's not rely on setting up interrupts in paused
>> +     * state, which may be a part of migration process.
> 
> What dependence exactly are you avoiding?  Config synchronization
> depending on guest interrupt delivery?

Right, guest is notified by pci_set_irq.

> 
>> +     */
>> +
>> +    if (migration_is_running()) {
>> +        error_setg(errp, "Config synchronization is not allowed "
>> +                   "during migration.");
> 
> qapi/error.h:
> 
>       * The resulting message should be a single phrase, with no newline or
>       * trailing punctuation.
> 
> Drop the period, please.

Will do

> 
>> +        return;
>> +    }
>> +
>> +    if (!runstate_is_running()) {
>> +        error_setg(errp, "Config synchronization allowed only in '%s' state, "
>> +                   "current state is '%s'", RunState_str(RUN_STATE_RUNNING),
>> +                   current_run_state_str());
>> +        return;
>> +    }
>> +
>> +    dev = find_device_state(id, true, errp);
>> +    if (!dev) {
>> +        return;
>> +    }
>> +
>> +    qdev_sync_config(dev, errp);
>> +}
>> +
>>   void hmp_device_add(Monitor *mon, const QDict *qdict)
>>   {
>>       Error *err = NULL;
>> diff --git a/system/runstate.c b/system/runstate.c
>> index d6ab860eca..8fd89172ae 100644
>> --- a/system/runstate.c
>> +++ b/system/runstate.c
>> @@ -189,6 +189,11 @@ bool runstate_check(RunState state)
>>       return current_run_state == state;
>>   }
>>   
>> +const char *current_run_state_str(void)
>> +{
>> +    return RunState_str(current_run_state);
>> +}
>> +
> 
> This helper is used just once.  Suggest to use
> RunState_str(runstate_get()) instead.

OK

> 
>>   static void runstate_init(void)
>>   {
>>       const RunStateTransition *p;
>
Markus Armbruster April 29, 2024, 10:51 a.m. UTC | #4
Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:

> On 24.04.24 14:48, Markus Armbruster wrote:
>> Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:
>> 
>>> Add command to sync config from vhost-user backend to the device. It
>>> may be helpful when VHOST_USER_SLAVE_CONFIG_CHANGE_MSG failed or not
>>> triggered interrupt to the guest or just not available (not supported
>>> by vhost-user server).
>>>
>>> Command result is racy if allow it during migration. Let's allow the
>>> sync only in RUNNING state.
>>>
>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>

[...]

>>> diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
>>> index 0117d243c4..296af52322 100644
>>> --- a/include/sysemu/runstate.h
>>> +++ b/include/sysemu/runstate.h
>>> @@ -5,6 +5,7 @@
>>>   #include "qemu/notify.h"
>>>   
>>>   bool runstate_check(RunState state);
>>> +const char *current_run_state_str(void);
>>>   void runstate_set(RunState new_state);
>>>   RunState runstate_get(void);
>>>   bool runstate_is_running(void);
>>> diff --git a/qapi/qdev.json b/qapi/qdev.json
>>> index facaa0bc6a..e8be79c3d5 100644
>>> --- a/qapi/qdev.json
>>> +++ b/qapi/qdev.json
>>> @@ -161,3 +161,24 @@
>>>   ##
>>>   { 'event': 'DEVICE_UNPLUG_GUEST_ERROR',
>>>     'data': { '*device': 'str', 'path': 'str' } }
>>> +
>>> +##
>>> +# @device-sync-config:
>>> +#
>>> +# Synchronize config from backend to the guest. The command notifies
>>> +# re-read the device config from the backend and notifies the guest
>>> +# to re-read the config. The command may be used to notify the guest
>>> +# about block device capcity change. Currently only vhost-user-blk
>>> +# device supports this.
>> 
>> I'm not sure I understand this.  To work towards an understanding, I
>> rephrase it, and you point out the errors.
>> 
>>       Synchronize device configuration from host to guest part.  First,
>>       copy the configuration from the host part (backend) to the guest
>>       part (frontend).  Then notify guest software that device
>>       configuration changed.
>
> Correct, thanks

Perhaps

  Synchronize guest-visible device configuration with the backend's
  configuration, and notify guest software that device configuration
  changed.

  This may be useful to notify the guest of a block device capacity
  change.  Currenrly, only vhost-user-blk devices support this.

Next question: what happens when the device *doesn't* support this?

>> I wonder how configuration can get out of sync.  Can you explain?
>> 
>
> The example (and the original feature, which triggered developing this) is vhost disk resize. If vhost-server (backend) doesn't support VHOST_USER_SLAVE_CONFIG_CHANGE_MSG, neither QEMU nor guest will know that disk capacity changed.

Sounds like we wouldn't need this command if we could make the
vhost-server support VHOST_USER_SLAVE_CONFIG_CHANGE_MSG.  Is making it
support it impractical?  Or are there other uses for this command?

>>> +#
>>> +# @id: the device's ID or QOM path
>>> +#
>>> +# Features:
>>> +#
>>> +# @unstable: The command is experimental.
>>> +#
>>> +# Since: 9.1
>>> +##
>>> +{ 'command': 'device-sync-config',
>>> +  'features': [ 'unstable' ],
>>> +  'data': {'id': 'str'} }
>>> diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c
>>> index 7e075d91c1..cb35ea0b86 100644
>>> --- a/system/qdev-monitor.c
>>> +++ b/system/qdev-monitor.c
>>> @@ -23,6 +23,7 @@
>>>  #include "monitor/monitor.h"
>>>  #include "monitor/qdev.h"
>>>  #include "sysemu/arch_init.h"
>>> +#include "sysemu/runstate.h"
>>>  #include "qapi/error.h"
>>>  #include "qapi/qapi-commands-qdev.h"
>>>  #include "qapi/qmp/dispatch.h"
>>> @@ -969,6 +970,52 @@ void qmp_device_del(const char *id, Error **errp)
>>>       }
>>>   }
>>>   
>>> +int qdev_sync_config(DeviceState *dev, Error **errp)
>>> +{
>>> +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
>>> +
>>> +    if (!dc->sync_config) {
>>> +        error_setg(errp, "device-sync-config is not supported for '%s'",
>>> +                   object_get_typename(OBJECT(dev)));
>>> +        return -ENOTSUP;
>>> +    }
>>> +
>>> +    return dc->sync_config(dev, errp);
>>> +}
>>> +
>>> +void qmp_device_sync_config(const char *id, Error **errp)
>>> +{
>>> +    DeviceState *dev;
>>> +
>>> +    /*
>>> +     * During migration there is a race between syncing`config and
>>> +     * migrating it, so let's just not allow it.
>> 
>> Can you briefly explain the race?
>
> If at the moment of qmp command, corresponding config already migrated to the target, we'll change only the config on source, but on the target we'll still have outdated config.

For RAM, dirty tracking ensures the change gets sent.  But this is
device memory.  Correct?

>>> +     *
>>> +     * Moreover, let's not rely on setting up interrupts in paused
>>> +     * state, which may be a part of migration process.
>> 
>> What dependence exactly are you avoiding?  Config synchronization
>> depending on guest interrupt delivery?
>
> Right, guest is notified by pci_set_irq.

If we allowed it in paused state, the delivery of the interrupt would be
delayed until the guest resumes running.  Correct?

>>> +     */
>>> +
>>> +    if (migration_is_running()) {
>>> +        error_setg(errp, "Config synchronization is not allowed "
>>> +                   "during migration.");
>> 
>> qapi/error.h:
>> 
>>       * The resulting message should be a single phrase, with no newline or
>>       * trailing punctuation.
>> 
>> Drop the period, please.
>
> Will do
>
>> 
>>> +        return;
>>> +    }
>>> +
>>> +    if (!runstate_is_running()) {
>>> +        error_setg(errp, "Config synchronization allowed only in '%s' state, "
>>> +                   "current state is '%s'", RunState_str(RUN_STATE_RUNNING),
>>> +                   current_run_state_str());
>>> +        return;
>>> +    }
>>> +
>>> +    dev = find_device_state(id, true, errp);
>>> +    if (!dev) {
>>> +        return;
>>> +    }
>>> +
>>> +    qdev_sync_config(dev, errp);
>>> +}
>>> +
>>>   void hmp_device_add(Monitor *mon, const QDict *qdict)
>>>   {
>>>       Error *err = NULL;

[...]
Vladimir Sementsov-Ogievskiy April 29, 2024, 12:22 p.m. UTC | #5
On 29.04.24 13:51, Markus Armbruster wrote:
> Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:
> 
>> On 24.04.24 14:48, Markus Armbruster wrote:
>>> Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:
>>>
>>>> Add command to sync config from vhost-user backend to the device. It
>>>> may be helpful when VHOST_USER_SLAVE_CONFIG_CHANGE_MSG failed or not
>>>> triggered interrupt to the guest or just not available (not supported
>>>> by vhost-user server).
>>>>
>>>> Command result is racy if allow it during migration. Let's allow the
>>>> sync only in RUNNING state.
>>>>
>>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> 
> [...]
> 
>>>> diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
>>>> index 0117d243c4..296af52322 100644
>>>> --- a/include/sysemu/runstate.h
>>>> +++ b/include/sysemu/runstate.h
>>>> @@ -5,6 +5,7 @@
>>>>    #include "qemu/notify.h"
>>>>    
>>>>    bool runstate_check(RunState state);
>>>> +const char *current_run_state_str(void);
>>>>    void runstate_set(RunState new_state);
>>>>    RunState runstate_get(void);
>>>>    bool runstate_is_running(void);
>>>> diff --git a/qapi/qdev.json b/qapi/qdev.json
>>>> index facaa0bc6a..e8be79c3d5 100644
>>>> --- a/qapi/qdev.json
>>>> +++ b/qapi/qdev.json
>>>> @@ -161,3 +161,24 @@
>>>>    ##
>>>>    { 'event': 'DEVICE_UNPLUG_GUEST_ERROR',
>>>>      'data': { '*device': 'str', 'path': 'str' } }
>>>> +
>>>> +##
>>>> +# @device-sync-config:
>>>> +#
>>>> +# Synchronize config from backend to the guest. The command notifies
>>>> +# re-read the device config from the backend and notifies the guest
>>>> +# to re-read the config. The command may be used to notify the guest
>>>> +# about block device capcity change. Currently only vhost-user-blk
>>>> +# device supports this.
>>>
>>> I'm not sure I understand this.  To work towards an understanding, I
>>> rephrase it, and you point out the errors.
>>>
>>>        Synchronize device configuration from host to guest part.  First,
>>>        copy the configuration from the host part (backend) to the guest
>>>        part (frontend).  Then notify guest software that device
>>>        configuration changed.
>>
>> Correct, thanks
> 
> Perhaps
> 
>    Synchronize guest-visible device configuration with the backend's
>    configuration, and notify guest software that device configuration
>    changed.
> 
>    This may be useful to notify the guest of a block device capacity
>    change.  Currenrly, only vhost-user-blk devices support this.

Sounds good

> 
> Next question: what happens when the device *doesn't* support this?

An error "device-sync-config is not supported ..."

> 
>>> I wonder how configuration can get out of sync.  Can you explain?
>>>
>>
>> The example (and the original feature, which triggered developing this) is vhost disk resize. If vhost-server (backend) doesn't support VHOST_USER_SLAVE_CONFIG_CHANGE_MSG, neither QEMU nor guest will know that disk capacity changed.
> 
> Sounds like we wouldn't need this command if we could make the
> vhost-server support VHOST_USER_SLAVE_CONFIG_CHANGE_MSG.  Is making it
> support it impractical?  Or are there other uses for this command?

Qemu's internal vhost-server do support it. But that's not the only vhost-user server) So the command is useful for those servers which doesn't support VHOST_USER_SLAVE_CONFIG_CHANGE_MSG. Note, that this message requires setting up additional channel of server -> client communication. That was the reason, why the "change-msg" solution was rejected in our downstream: it's safer to reuse existing channel (QMP), than to add and support an additional channel.

Also, the command may help to debug the system, when VHOST_USER_SLAVE_CONFIG_CHANGE_MSG doesn't work for some reason.

> 
>>>> +#
>>>> +# @id: the device's ID or QOM path
>>>> +#
>>>> +# Features:
>>>> +#
>>>> +# @unstable: The command is experimental.
>>>> +#
>>>> +# Since: 9.1
>>>> +##
>>>> +{ 'command': 'device-sync-config',
>>>> +  'features': [ 'unstable' ],
>>>> +  'data': {'id': 'str'} }
>>>> diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c
>>>> index 7e075d91c1..cb35ea0b86 100644
>>>> --- a/system/qdev-monitor.c
>>>> +++ b/system/qdev-monitor.c
>>>> @@ -23,6 +23,7 @@
>>>>   #include "monitor/monitor.h"
>>>>   #include "monitor/qdev.h"
>>>>   #include "sysemu/arch_init.h"
>>>> +#include "sysemu/runstate.h"
>>>>   #include "qapi/error.h"
>>>>   #include "qapi/qapi-commands-qdev.h"
>>>>   #include "qapi/qmp/dispatch.h"
>>>> @@ -969,6 +970,52 @@ void qmp_device_del(const char *id, Error **errp)
>>>>        }
>>>>    }
>>>>    
>>>> +int qdev_sync_config(DeviceState *dev, Error **errp)
>>>> +{
>>>> +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
>>>> +
>>>> +    if (!dc->sync_config) {
>>>> +        error_setg(errp, "device-sync-config is not supported for '%s'",
>>>> +                   object_get_typename(OBJECT(dev)));
>>>> +        return -ENOTSUP;
>>>> +    }
>>>> +
>>>> +    return dc->sync_config(dev, errp);
>>>> +}
>>>> +
>>>> +void qmp_device_sync_config(const char *id, Error **errp)
>>>> +{
>>>> +    DeviceState *dev;
>>>> +
>>>> +    /*
>>>> +     * During migration there is a race between syncing`config and
>>>> +     * migrating it, so let's just not allow it.
>>>
>>> Can you briefly explain the race?
>>
>> If at the moment of qmp command, corresponding config already migrated to the target, we'll change only the config on source, but on the target we'll still have outdated config.
> 
> For RAM, dirty tracking ensures the change gets sent.  But this is
> device memory.  Correct?

Yes. It's stored in malloced buffer VirtIIODevice::config, and accessed through handlers virtio_pci_config_read()/virtio_pci_config_write(). As I understand, no kind of dirty tracking here..

And I see, it's migrated in virtio_save():
...
     qemu_put_be32(f, vdev->config_len);
     qemu_put_buffer(f, vdev->config, vdev->config_len);
...

> 
>>>> +     *
>>>> +     * Moreover, let's not rely on setting up interrupts in paused
>>>> +     * state, which may be a part of migration process.
>>>
>>> What dependence exactly are you avoiding?  Config synchronization
>>> depending on guest interrupt delivery?
>>
>> Right, guest is notified by pci_set_irq.
> 
> If we allowed it in paused state, the delivery of the interrupt would be
> delayed until the guest resumes running.  Correct?

I think so. But this will not work, if we do offline migration like pause -> migrate -> resume on target. So I decided that better be more safe. The restrictions may be relaxed in future if needed.

> 
>>>> +     */
>>>> +
>>>> +    if (migration_is_running()) {
>>>> +        error_setg(errp, "Config synchronization is not allowed "
>>>> +                   "during migration.");
>>>
>>> qapi/error.h:
>>>
>>>        * The resulting message should be a single phrase, with no newline or
>>>        * trailing punctuation.
>>>
>>> Drop the period, please.
>>
>> Will do
>>
>>>
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    if (!runstate_is_running()) {
>>>> +        error_setg(errp, "Config synchronization allowed only in '%s' state, "
>>>> +                   "current state is '%s'", RunState_str(RUN_STATE_RUNNING),
>>>> +                   current_run_state_str());
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    dev = find_device_state(id, true, errp);
>>>> +    if (!dev) {
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    qdev_sync_config(dev, errp);
>>>> +}
>>>> +
>>>>    void hmp_device_add(Monitor *mon, const QDict *qdict)
>>>>    {
>>>>        Error *err = NULL;
> 
> [...]
>
Markus Armbruster April 29, 2024, 1:04 p.m. UTC | #6
Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:

> On 29.04.24 13:51, Markus Armbruster wrote:
>> Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:
>> 
>>> On 24.04.24 14:48, Markus Armbruster wrote:
>>>> Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:
>>>>
>>>>> Add command to sync config from vhost-user backend to the device. It
>>>>> may be helpful when VHOST_USER_SLAVE_CONFIG_CHANGE_MSG failed or not
>>>>> triggered interrupt to the guest or just not available (not supported
>>>>> by vhost-user server).
>>>>>
>>>>> Command result is racy if allow it during migration. Let's allow the
>>>>> sync only in RUNNING state.
>>>>>
>>>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>> 
>> [...]
>> 
>>>>> diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
>>>>> index 0117d243c4..296af52322 100644
>>>>> --- a/include/sysemu/runstate.h
>>>>> +++ b/include/sysemu/runstate.h
>>>>> @@ -5,6 +5,7 @@
>>>>>    #include "qemu/notify.h"
>>>>>    
>>>>>    bool runstate_check(RunState state);
>>>>> +const char *current_run_state_str(void);
>>>>>    void runstate_set(RunState new_state);
>>>>>    RunState runstate_get(void);
>>>>>    bool runstate_is_running(void);
>>>>> diff --git a/qapi/qdev.json b/qapi/qdev.json
>>>>> index facaa0bc6a..e8be79c3d5 100644
>>>>> --- a/qapi/qdev.json
>>>>> +++ b/qapi/qdev.json
>>>>> @@ -161,3 +161,24 @@
>>>>>    ##
>>>>>    { 'event': 'DEVICE_UNPLUG_GUEST_ERROR',
>>>>>      'data': { '*device': 'str', 'path': 'str' } }
>>>>> +
>>>>> +##
>>>>> +# @device-sync-config:
>>>>> +#
>>>>> +# Synchronize config from backend to the guest. The command notifies
>>>>> +# re-read the device config from the backend and notifies the guest
>>>>> +# to re-read the config. The command may be used to notify the guest
>>>>> +# about block device capcity change. Currently only vhost-user-blk
>>>>> +# device supports this.
>>>>
>>>> I'm not sure I understand this.  To work towards an understanding, I
>>>> rephrase it, and you point out the errors.
>>>>
>>>>        Synchronize device configuration from host to guest part.  First,
>>>>        copy the configuration from the host part (backend) to the guest
>>>>        part (frontend).  Then notify guest software that device
>>>>        configuration changed.
>>>
>>> Correct, thanks
>> 
>> Perhaps
>> 
>>    Synchronize guest-visible device configuration with the backend's
>>    configuration, and notify guest software that device configuration
>>    changed.
>> 
>>    This may be useful to notify the guest of a block device capacity
>>    change.  Currenrly, only vhost-user-blk devices support this.
>
> Sounds good

Except I fat-fingered "Currently".

>> 
>> Next question: what happens when the device *doesn't* support this?
>
> An error "device-sync-config is not supported ..."

Okay.

>>>> I wonder how configuration can get out of sync.  Can you explain?
>>>>
>>>
>>> The example (and the original feature, which triggered developing this) is vhost disk resize. If vhost-server (backend) doesn't support VHOST_USER_SLAVE_CONFIG_CHANGE_MSG, neither QEMU nor guest will know that disk capacity changed.
>> 
>> Sounds like we wouldn't need this command if we could make the
>> vhost-server support VHOST_USER_SLAVE_CONFIG_CHANGE_MSG.  Is making it
>> support it impractical?  Or are there other uses for this command?
>
> Qemu's internal vhost-server do support it. But that's not the only vhost-user server) So the command is useful for those servers which doesn't support VHOST_USER_SLAVE_CONFIG_CHANGE_MSG. Note, that this message requires setting up additional channel of server -> client communication. That was the reason, why the "change-msg" solution was rejected in our downstream: it's safer to reuse existing channel (QMP), than to add and support an additional channel.
>
> Also, the command may help to debug the system, when VHOST_USER_SLAVE_CONFIG_CHANGE_MSG doesn't work for some reason.

Suggest to work this into the commit message.

>>>>> +#
>>>>> +# @id: the device's ID or QOM path
>>>>> +#
>>>>> +# Features:
>>>>> +#
>>>>> +# @unstable: The command is experimental.
>>>>> +#
>>>>> +# Since: 9.1
>>>>> +##
>>>>> +{ 'command': 'device-sync-config',
>>>>> +  'features': [ 'unstable' ],
>>>>> +  'data': {'id': 'str'} }
>>>>> diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c
>>>>> index 7e075d91c1..cb35ea0b86 100644
>>>>> --- a/system/qdev-monitor.c
>>>>> +++ b/system/qdev-monitor.c
>>>>> @@ -23,6 +23,7 @@
>>>>>   #include "monitor/monitor.h"
>>>>>   #include "monitor/qdev.h"
>>>>>   #include "sysemu/arch_init.h"
>>>>> +#include "sysemu/runstate.h"
>>>>>   #include "qapi/error.h"
>>>>>   #include "qapi/qapi-commands-qdev.h"
>>>>>   #include "qapi/qmp/dispatch.h"
>>>>> @@ -969,6 +970,52 @@ void qmp_device_del(const char *id, Error **errp)
>>>>>        }
>>>>>    }
>>>>>    
>>>>> +int qdev_sync_config(DeviceState *dev, Error **errp)
>>>>> +{
>>>>> +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
>>>>> +
>>>>> +    if (!dc->sync_config) {
>>>>> +        error_setg(errp, "device-sync-config is not supported for '%s'",
>>>>> +                   object_get_typename(OBJECT(dev)));
>>>>> +        return -ENOTSUP;
>>>>> +    }
>>>>> +
>>>>> +    return dc->sync_config(dev, errp);
>>>>> +}
>>>>> +
>>>>> +void qmp_device_sync_config(const char *id, Error **errp)
>>>>> +{
>>>>> +    DeviceState *dev;
>>>>> +
>>>>> +    /*
>>>>> +     * During migration there is a race between syncing`config and
>>>>> +     * migrating it, so let's just not allow it.
>>>>
>>>> Can you briefly explain the race?
>>>
>>> If at the moment of qmp command, corresponding config already migrated to the target, we'll change only the config on source, but on the target we'll still have outdated config.
>> 
>> For RAM, dirty tracking ensures the change gets sent.  But this is
>> device memory.  Correct?
>
> Yes. It's stored in malloced buffer VirtIIODevice::config, and accessed through handlers virtio_pci_config_read()/virtio_pci_config_write(). As I understand, no kind of dirty tracking here..
>
> And I see, it's migrated in virtio_save():
> ...
>      qemu_put_be32(f, vdev->config_len);
>      qemu_put_buffer(f, vdev->config, vdev->config_len);
> ...

Suggest to explain the race in the comment.  Perhaps like this:

  Guest-visible configuration is stored in device memory.  There is a
  race between updating and migrating it: if we update it before we
  migrate it, it's migrated fine, but if any later updates are lost.

>>>>> +     *
>>>>> +     * Moreover, let's not rely on setting up interrupts in paused
>>>>> +     * state, which may be a part of migration process.
>>>>
>>>> What dependence exactly are you avoiding?  Config synchronization
>>>> depending on guest interrupt delivery?
>>>
>>> Right, guest is notified by pci_set_irq.
>> 
>> If we allowed it in paused state, the delivery of the interrupt would be
>> delayed until the guest resumes running.  Correct?
>
> I think so. But this will not work, if we do offline migration like pause -> migrate -> resume on target. So I decided that better be more safe. The restrictions may be relaxed in future if needed.

Sounds like we'd make an interrupt pending on the source, but migration
failed to make it pending on the target as well, so it gets lost.

Is that the case?

If yes, question for migration experts: why isn't this a problem
elsewhere, too?

[...]

Thanks!
Vladimir Sementsov-Ogievskiy April 29, 2024, 2:49 p.m. UTC | #7
On 29.04.24 16:04, Markus Armbruster wrote:
> Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:
> 
>> On 29.04.24 13:51, Markus Armbruster wrote:
>>> Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:
>>>
>>>> On 24.04.24 14:48, Markus Armbruster wrote:
>>>>> Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:
>>>>>
>>>>>> Add command to sync config from vhost-user backend to the device. It
>>>>>> may be helpful when VHOST_USER_SLAVE_CONFIG_CHANGE_MSG failed or not
>>>>>> triggered interrupt to the guest or just not available (not supported
>>>>>> by vhost-user server).
>>>>>>
>>>>>> Command result is racy if allow it during migration. Let's allow the
>>>>>> sync only in RUNNING state.
>>>>>>
>>>>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>>>
>>> [...]
>>>
>>>>>> diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
>>>>>> index 0117d243c4..296af52322 100644
>>>>>> --- a/include/sysemu/runstate.h
>>>>>> +++ b/include/sysemu/runstate.h
>>>>>> @@ -5,6 +5,7 @@
>>>>>>     #include "qemu/notify.h"
>>>>>>     
>>>>>>     bool runstate_check(RunState state);
>>>>>> +const char *current_run_state_str(void);
>>>>>>     void runstate_set(RunState new_state);
>>>>>>     RunState runstate_get(void);
>>>>>>     bool runstate_is_running(void);
>>>>>> diff --git a/qapi/qdev.json b/qapi/qdev.json
>>>>>> index facaa0bc6a..e8be79c3d5 100644
>>>>>> --- a/qapi/qdev.json
>>>>>> +++ b/qapi/qdev.json
>>>>>> @@ -161,3 +161,24 @@
>>>>>>     ##
>>>>>>     { 'event': 'DEVICE_UNPLUG_GUEST_ERROR',
>>>>>>       'data': { '*device': 'str', 'path': 'str' } }
>>>>>> +
>>>>>> +##
>>>>>> +# @device-sync-config:
>>>>>> +#
>>>>>> +# Synchronize config from backend to the guest. The command notifies
>>>>>> +# re-read the device config from the backend and notifies the guest
>>>>>> +# to re-read the config. The command may be used to notify the guest
>>>>>> +# about block device capcity change. Currently only vhost-user-blk
>>>>>> +# device supports this.
>>>>>
>>>>> I'm not sure I understand this.  To work towards an understanding, I
>>>>> rephrase it, and you point out the errors.
>>>>>
>>>>>         Synchronize device configuration from host to guest part.  First,
>>>>>         copy the configuration from the host part (backend) to the guest
>>>>>         part (frontend).  Then notify guest software that device
>>>>>         configuration changed.
>>>>
>>>> Correct, thanks
>>>
>>> Perhaps
>>>
>>>     Synchronize guest-visible device configuration with the backend's
>>>     configuration, and notify guest software that device configuration
>>>     changed.
>>>
>>>     This may be useful to notify the guest of a block device capacity
>>>     change.  Currenrly, only vhost-user-blk devices support this.
>>
>> Sounds good
> 
> Except I fat-fingered "Currently".
> 
>>>
>>> Next question: what happens when the device *doesn't* support this?
>>
>> An error "device-sync-config is not supported ..."
> 
> Okay.
> 
>>>>> I wonder how configuration can get out of sync.  Can you explain?
>>>>>
>>>>
>>>> The example (and the original feature, which triggered developing this) is vhost disk resize. If vhost-server (backend) doesn't support VHOST_USER_SLAVE_CONFIG_CHANGE_MSG, neither QEMU nor guest will know that disk capacity changed.
>>>
>>> Sounds like we wouldn't need this command if we could make the
>>> vhost-server support VHOST_USER_SLAVE_CONFIG_CHANGE_MSG.  Is making it
>>> support it impractical?  Or are there other uses for this command?
>>
>> Qemu's internal vhost-server do support it. But that's not the only vhost-user server) So the command is useful for those servers which doesn't support VHOST_USER_SLAVE_CONFIG_CHANGE_MSG. Note, that this message requires setting up additional channel of server -> client communication. That was the reason, why the "change-msg" solution was rejected in our downstream: it's safer to reuse existing channel (QMP), than to add and support an additional channel.
>>
>> Also, the command may help to debug the system, when VHOST_USER_SLAVE_CONFIG_CHANGE_MSG doesn't work for some reason.
> 
> Suggest to work this into the commit message.
> 
>>>>>> +#
>>>>>> +# @id: the device's ID or QOM path
>>>>>> +#
>>>>>> +# Features:
>>>>>> +#
>>>>>> +# @unstable: The command is experimental.
>>>>>> +#
>>>>>> +# Since: 9.1
>>>>>> +##
>>>>>> +{ 'command': 'device-sync-config',
>>>>>> +  'features': [ 'unstable' ],
>>>>>> +  'data': {'id': 'str'} }
>>>>>> diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c
>>>>>> index 7e075d91c1..cb35ea0b86 100644
>>>>>> --- a/system/qdev-monitor.c
>>>>>> +++ b/system/qdev-monitor.c
>>>>>> @@ -23,6 +23,7 @@
>>>>>>    #include "monitor/monitor.h"
>>>>>>    #include "monitor/qdev.h"
>>>>>>    #include "sysemu/arch_init.h"
>>>>>> +#include "sysemu/runstate.h"
>>>>>>    #include "qapi/error.h"
>>>>>>    #include "qapi/qapi-commands-qdev.h"
>>>>>>    #include "qapi/qmp/dispatch.h"
>>>>>> @@ -969,6 +970,52 @@ void qmp_device_del(const char *id, Error **errp)
>>>>>>         }
>>>>>>     }
>>>>>>     
>>>>>> +int qdev_sync_config(DeviceState *dev, Error **errp)
>>>>>> +{
>>>>>> +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
>>>>>> +
>>>>>> +    if (!dc->sync_config) {
>>>>>> +        error_setg(errp, "device-sync-config is not supported for '%s'",
>>>>>> +                   object_get_typename(OBJECT(dev)));
>>>>>> +        return -ENOTSUP;
>>>>>> +    }
>>>>>> +
>>>>>> +    return dc->sync_config(dev, errp);
>>>>>> +}
>>>>>> +
>>>>>> +void qmp_device_sync_config(const char *id, Error **errp)
>>>>>> +{
>>>>>> +    DeviceState *dev;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * During migration there is a race between syncing`config and
>>>>>> +     * migrating it, so let's just not allow it.
>>>>>
>>>>> Can you briefly explain the race?
>>>>
>>>> If at the moment of qmp command, corresponding config already migrated to the target, we'll change only the config on source, but on the target we'll still have outdated config.
>>>
>>> For RAM, dirty tracking ensures the change gets sent.  But this is
>>> device memory.  Correct?
>>
>> Yes. It's stored in malloced buffer VirtIIODevice::config, and accessed through handlers virtio_pci_config_read()/virtio_pci_config_write(). As I understand, no kind of dirty tracking here..
>>
>> And I see, it's migrated in virtio_save():
>> ...
>>       qemu_put_be32(f, vdev->config_len);
>>       qemu_put_buffer(f, vdev->config, vdev->config_len);
>> ...
> 
> Suggest to explain the race in the comment.  Perhaps like this:
> 
>    Guest-visible configuration is stored in device memory.  There is a
>    race between updating and migrating it: if we update it before we
>    migrate it, it's migrated fine, but if any later updates are lost.
> 
>>>>>> +     *
>>>>>> +     * Moreover, let's not rely on setting up interrupts in paused
>>>>>> +     * state, which may be a part of migration process.
>>>>>
>>>>> What dependence exactly are you avoiding?  Config synchronization
>>>>> depending on guest interrupt delivery?
>>>>
>>>> Right, guest is notified by pci_set_irq.
>>>
>>> If we allowed it in paused state, the delivery of the interrupt would be
>>> delayed until the guest resumes running.  Correct?
>>
>> I think so. But this will not work, if we do offline migration like pause -> migrate -> resume on target. So I decided that better be more safe. The restrictions may be relaxed in future if needed.
> 
> Sounds like we'd make an interrupt pending on the source, but migration
> failed to make it pending on the target as well, so it gets lost.
> 
> Is that the case?

Yes, I mean exactly this thing. But probably I was wrong with my doubt, as queued interrupts do migrate:

virtio_save() {
...
qemu_put_8s(f, &vdev->isr);
...


Hmm. And I don't remember now, was it just theoretical doubt, or a real problem. I'll check.

> 
> If yes, question for migration experts: why isn't this a problem
> elsewhere, too?
> 
> [...]
> 
> Thanks!
>
diff mbox series

Patch

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 9e6bbc6950..2f301f380c 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -88,27 +88,39 @@  static void vhost_user_blk_set_config(VirtIODevice *vdev, const uint8_t *config)
     s->blkcfg.wce = blkcfg->wce;
 }
 
+static int vhost_user_blk_sync_config(DeviceState *dev, Error **errp)
+{
+    int ret;
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserBlk *s = VHOST_USER_BLK(vdev);
+
+    ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
+                               vdev->config_len, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    memcpy(vdev->config, &s->blkcfg, vdev->config_len);
+    virtio_notify_config(vdev);
+
+    return 0;
+}
+
 static int vhost_user_blk_handle_config_change(struct vhost_dev *dev)
 {
     int ret;
-    VirtIODevice *vdev = dev->vdev;
-    VHostUserBlk *s = VHOST_USER_BLK(dev->vdev);
     Error *local_err = NULL;
 
     if (!dev->started) {
         return 0;
     }
 
-    ret = vhost_dev_get_config(dev, (uint8_t *)&s->blkcfg,
-                               vdev->config_len, &local_err);
+    ret = vhost_user_blk_sync_config(DEVICE(dev->vdev), &local_err);
     if (ret < 0) {
         error_report_err(local_err);
         return ret;
     }
 
-    memcpy(dev->vdev->config, &s->blkcfg, vdev->config_len);
-    virtio_notify_config(dev->vdev);
-
     return 0;
 }
 
@@ -576,6 +588,7 @@  static void vhost_user_blk_class_init(ObjectClass *klass, void *data)
 
     device_class_set_props(dc, vhost_user_blk_properties);
     dc->vmsd = &vmstate_vhost_user_blk;
+    dc->sync_config = vhost_user_blk_sync_config;
     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
     vdc->realize = vhost_user_blk_device_realize;
     vdc->unrealize = vhost_user_blk_device_unrealize;
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index eaaf86402c..92afbae71c 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -2501,6 +2501,14 @@  static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp)
     vpciklass->parent_dc_realize(qdev, errp);
 }
 
+static int virtio_pci_sync_config(DeviceState *dev, Error **errp)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(dev);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    return qdev_sync_config(DEVICE(vdev), errp);
+}
+
 static void virtio_pci_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -2517,6 +2525,7 @@  static void virtio_pci_class_init(ObjectClass *klass, void *data)
     device_class_set_parent_realize(dc, virtio_pci_dc_realize,
                                     &vpciklass->parent_dc_realize);
     rc->phases.hold = virtio_pci_bus_reset_hold;
+    dc->sync_config = virtio_pci_sync_config;
 }
 
 static const TypeInfo virtio_pci_info = {
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 9228e96c87..87135bdcdf 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -95,6 +95,7 @@  typedef void (*DeviceUnrealize)(DeviceState *dev);
 typedef void (*DeviceReset)(DeviceState *dev);
 typedef void (*BusRealize)(BusState *bus, Error **errp);
 typedef void (*BusUnrealize)(BusState *bus);
+typedef int (*DeviceSyncConfig)(DeviceState *dev, Error **errp);
 
 /**
  * struct DeviceClass - The base class for all devices.
@@ -162,6 +163,7 @@  struct DeviceClass {
     DeviceReset reset;
     DeviceRealize realize;
     DeviceUnrealize unrealize;
+    DeviceSyncConfig sync_config;
 
     /**
      * @vmsd: device state serialisation description for
@@ -546,6 +548,7 @@  bool qdev_hotplug_allowed(DeviceState *dev, Error **errp);
  */
 HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev);
 void qdev_unplug(DeviceState *dev, Error **errp);
+int qdev_sync_config(DeviceState *dev, Error **errp);
 void qdev_simple_device_unplug_cb(HotplugHandler *hotplug_dev,
                                   DeviceState *dev, Error **errp);
 void qdev_machine_creation_done(void);
diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
index 0117d243c4..296af52322 100644
--- a/include/sysemu/runstate.h
+++ b/include/sysemu/runstate.h
@@ -5,6 +5,7 @@ 
 #include "qemu/notify.h"
 
 bool runstate_check(RunState state);
+const char *current_run_state_str(void);
 void runstate_set(RunState new_state);
 RunState runstate_get(void);
 bool runstate_is_running(void);
diff --git a/qapi/qdev.json b/qapi/qdev.json
index facaa0bc6a..e8be79c3d5 100644
--- a/qapi/qdev.json
+++ b/qapi/qdev.json
@@ -161,3 +161,24 @@ 
 ##
 { 'event': 'DEVICE_UNPLUG_GUEST_ERROR',
   'data': { '*device': 'str', 'path': 'str' } }
+
+##
+# @device-sync-config:
+#
+# Synchronize config from backend to the guest. The command notifies
+# re-read the device config from the backend and notifies the guest
+# to re-read the config. The command may be used to notify the guest
+# about block device capcity change. Currently only vhost-user-blk
+# device supports this.
+#
+# @id: the device's ID or QOM path
+#
+# Features:
+#
+# @unstable: The command is experimental.
+#
+# Since: 9.1
+##
+{ 'command': 'device-sync-config',
+  'features': [ 'unstable' ],
+  'data': {'id': 'str'} }
diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c
index 7e075d91c1..cb35ea0b86 100644
--- a/system/qdev-monitor.c
+++ b/system/qdev-monitor.c
@@ -23,6 +23,7 @@ 
 #include "monitor/monitor.h"
 #include "monitor/qdev.h"
 #include "sysemu/arch_init.h"
+#include "sysemu/runstate.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-qdev.h"
 #include "qapi/qmp/dispatch.h"
@@ -969,6 +970,52 @@  void qmp_device_del(const char *id, Error **errp)
     }
 }
 
+int qdev_sync_config(DeviceState *dev, Error **errp)
+{
+    DeviceClass *dc = DEVICE_GET_CLASS(dev);
+
+    if (!dc->sync_config) {
+        error_setg(errp, "device-sync-config is not supported for '%s'",
+                   object_get_typename(OBJECT(dev)));
+        return -ENOTSUP;
+    }
+
+    return dc->sync_config(dev, errp);
+}
+
+void qmp_device_sync_config(const char *id, Error **errp)
+{
+    DeviceState *dev;
+
+    /*
+     * During migration there is a race between syncing`config and
+     * migrating it, so let's just not allow it.
+     *
+     * Moreover, let's not rely on setting up interrupts in paused
+     * state, which may be a part of migration process.
+     */
+
+    if (migration_is_running()) {
+        error_setg(errp, "Config synchronization is not allowed "
+                   "during migration.");
+        return;
+    }
+
+    if (!runstate_is_running()) {
+        error_setg(errp, "Config synchronization allowed only in '%s' state, "
+                   "current state is '%s'", RunState_str(RUN_STATE_RUNNING),
+                   current_run_state_str());
+        return;
+    }
+
+    dev = find_device_state(id, true, errp);
+    if (!dev) {
+        return;
+    }
+
+    qdev_sync_config(dev, errp);
+}
+
 void hmp_device_add(Monitor *mon, const QDict *qdict)
 {
     Error *err = NULL;
diff --git a/system/runstate.c b/system/runstate.c
index d6ab860eca..8fd89172ae 100644
--- a/system/runstate.c
+++ b/system/runstate.c
@@ -189,6 +189,11 @@  bool runstate_check(RunState state)
     return current_run_state == state;
 }
 
+const char *current_run_state_str(void)
+{
+    return RunState_str(current_run_state);
+}
+
 static void runstate_init(void)
 {
     const RunStateTransition *p;