diff mbox series

[v3,1/1] vhost-user-fs: add migration type property

Message ID 20230217170038.1273710-2-antonkuchin@yandex-team.ru (mailing list archive)
State New, archived
Headers show
Series virtio-fs: implement option for stateless migration. | expand

Commit Message

Anton Kuchin Feb. 17, 2023, 5 p.m. UTC
Migration of vhost-user-fs device requires transfer of FUSE internal state
from backend. There is no standard way to do it now so by default migration
must be blocked. But if this state can be externally transferred by
orchestrator give it an option to explicitly allow migration.

Signed-off-by: Anton Kuchin <antonkuchin@yandex-team.ru>
---
 hw/core/qdev-properties-system.c    | 10 +++++++++
 hw/virtio/vhost-user-fs.c           | 32 ++++++++++++++++++++++++++++-
 include/hw/qdev-properties-system.h |  1 +
 include/hw/virtio/vhost-user-fs.h   |  2 ++
 qapi/migration.json                 | 16 +++++++++++++++
 5 files changed, 60 insertions(+), 1 deletion(-)

Comments

Stefan Hajnoczi Feb. 21, 2023, 8:45 p.m. UTC | #1
On Fri, Feb 17, 2023 at 07:00:38PM +0200, Anton Kuchin wrote:
> Migration of vhost-user-fs device requires transfer of FUSE internal state
> from backend. There is no standard way to do it now so by default migration
> must be blocked. But if this state can be externally transferred by
> orchestrator give it an option to explicitly allow migration.
> 
> Signed-off-by: Anton Kuchin <antonkuchin@yandex-team.ru>
> ---
>  hw/core/qdev-properties-system.c    | 10 +++++++++
>  hw/virtio/vhost-user-fs.c           | 32 ++++++++++++++++++++++++++++-
>  include/hw/qdev-properties-system.h |  1 +
>  include/hw/virtio/vhost-user-fs.h   |  2 ++
>  qapi/migration.json                 | 16 +++++++++++++++
>  5 files changed, 60 insertions(+), 1 deletion(-)

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Vladimir Sementsov-Ogievskiy Feb. 22, 2023, 12:20 p.m. UTC | #2
On 17.02.23 20:00, Anton Kuchin wrote:
> Migration of vhost-user-fs device requires transfer of FUSE internal state
> from backend. There is no standard way to do it now so by default migration
> must be blocked. But if this state can be externally transferred by
> orchestrator give it an option to explicitly allow migration.
> 
> Signed-off-by: Anton Kuchin <antonkuchin@yandex-team.ru>
> ---
>   hw/core/qdev-properties-system.c    | 10 +++++++++
>   hw/virtio/vhost-user-fs.c           | 32 ++++++++++++++++++++++++++++-
>   include/hw/qdev-properties-system.h |  1 +
>   include/hw/virtio/vhost-user-fs.h   |  2 ++
>   qapi/migration.json                 | 16 +++++++++++++++
>   5 files changed, 60 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
> index d42493f630..d9b1aa2a5d 100644
> --- a/hw/core/qdev-properties-system.c
> +++ b/hw/core/qdev-properties-system.c
> @@ -1143,3 +1143,13 @@ const PropertyInfo qdev_prop_uuid = {
>       .set   = set_uuid,
>       .set_default_value = set_default_uuid_auto,
>   };
> +
> +const PropertyInfo qdev_prop_vhost_user_migration_type = {
> +    .name = "VhostUserMigrationType",
> +    .description = "none/external",
> +    .enum_table = &VhostUserMigrationType_lookup,
> +    .get = qdev_propinfo_get_enum,
> +    .set = qdev_propinfo_set_enum,
> +    .set_default_value = qdev_propinfo_set_default_value_enum,
> +    .realized_set_allowed = true,
> +};
> diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
> index 83fc20e49e..7deb9df5ec 100644
> --- a/hw/virtio/vhost-user-fs.c
> +++ b/hw/virtio/vhost-user-fs.c
> @@ -298,9 +298,35 @@ static struct vhost_dev *vuf_get_vhost(VirtIODevice *vdev)
>       return &fs->vhost_dev;
>   }
>   
> +static int vhost_user_fs_pre_save(void *opaque)
> +{
> +    VHostUserFS *fs = opaque;
> +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
> +
> +    switch (fs->migration_type) {
> +    case VHOST_USER_MIGRATION_TYPE_NONE:
> +        error_report("Migration is blocked by device %s", path);
> +        break;
> +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
> +        return 0;
> +    default:
> +        error_report("Migration type '%s' is not supported by device %s",
> +                     VhostUserMigrationType_str(fs->migration_type), path);
> +        break;
> +    }
> +
> +    return -1;
> +}

Should we also add this as .pre_load, to force user select correct migration_type on target too?

> +
>   static const VMStateDescription vuf_vmstate = {
>       .name = "vhost-user-fs",
> -    .unmigratable = 1,
> +    .minimum_version_id = 0,
> +    .version_id = 0,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_VIRTIO_DEVICE,
> +        VMSTATE_END_OF_LIST()
> +    },
> +   .pre_save = vhost_user_fs_pre_save,
>   };
>   
>   static Property vuf_properties[] = {
> @@ -309,6 +335,10 @@ static Property vuf_properties[] = {
>       DEFINE_PROP_UINT16("num-request-queues", VHostUserFS,
>                          conf.num_request_queues, 1),
>       DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128),
> +    DEFINE_PROP_UNSIGNED("migration", VHostUserFS, migration_type,
> +                         VHOST_USER_MIGRATION_TYPE_NONE,
> +                         qdev_prop_vhost_user_migration_type,
> +                         VhostUserMigrationType),

1. I see, other similar qdev_prop_* use DEFINE_PROP_SIGNED
2. All of them except only qdev_prop_fdc_drive_type, define also a convenient macro in include/hw/qdev-properties-system.h

should we follow these patterns?
Michael S. Tsirkin Feb. 22, 2023, 12:43 p.m. UTC | #3
On Wed, Feb 22, 2023 at 03:20:00PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> On 17.02.23 20:00, Anton Kuchin wrote:
> > Migration of vhost-user-fs device requires transfer of FUSE internal state
> > from backend. There is no standard way to do it now so by default migration
> > must be blocked. But if this state can be externally transferred by
> > orchestrator give it an option to explicitly allow migration.
> > 
> > Signed-off-by: Anton Kuchin <antonkuchin@yandex-team.ru>
> > ---
> >   hw/core/qdev-properties-system.c    | 10 +++++++++
> >   hw/virtio/vhost-user-fs.c           | 32 ++++++++++++++++++++++++++++-
> >   include/hw/qdev-properties-system.h |  1 +
> >   include/hw/virtio/vhost-user-fs.h   |  2 ++
> >   qapi/migration.json                 | 16 +++++++++++++++
> >   5 files changed, 60 insertions(+), 1 deletion(-)
> > 
> > diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
> > index d42493f630..d9b1aa2a5d 100644
> > --- a/hw/core/qdev-properties-system.c
> > +++ b/hw/core/qdev-properties-system.c
> > @@ -1143,3 +1143,13 @@ const PropertyInfo qdev_prop_uuid = {
> >       .set   = set_uuid,
> >       .set_default_value = set_default_uuid_auto,
> >   };
> > +
> > +const PropertyInfo qdev_prop_vhost_user_migration_type = {
> > +    .name = "VhostUserMigrationType",
> > +    .description = "none/external",
> > +    .enum_table = &VhostUserMigrationType_lookup,
> > +    .get = qdev_propinfo_get_enum,
> > +    .set = qdev_propinfo_set_enum,
> > +    .set_default_value = qdev_propinfo_set_default_value_enum,
> > +    .realized_set_allowed = true,
> > +};
> > diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
> > index 83fc20e49e..7deb9df5ec 100644
> > --- a/hw/virtio/vhost-user-fs.c
> > +++ b/hw/virtio/vhost-user-fs.c
> > @@ -298,9 +298,35 @@ static struct vhost_dev *vuf_get_vhost(VirtIODevice *vdev)
> >       return &fs->vhost_dev;
> >   }
> > +static int vhost_user_fs_pre_save(void *opaque)
> > +{
> > +    VHostUserFS *fs = opaque;
> > +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
> > +
> > +    switch (fs->migration_type) {
> > +    case VHOST_USER_MIGRATION_TYPE_NONE:
> > +        error_report("Migration is blocked by device %s", path);
> > +        break;
> > +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
> > +        return 0;
> > +    default:
> > +        error_report("Migration type '%s' is not supported by device %s",
> > +                     VhostUserMigrationType_str(fs->migration_type), path);
> > +        break;
> > +    }
> > +
> > +    return -1;
> > +}
> 
> Should we also add this as .pre_load, to force user select correct migration_type on target too?

In fact, I would claim we only want pre_load.
When qemu is started on destination we know where it's migrated
from so this flag can be set.
When qemu is started on source we generally do not yet know so
we don't know whether it's safe to set this flag.


> > +
> >   static const VMStateDescription vuf_vmstate = {
> >       .name = "vhost-user-fs",
> > -    .unmigratable = 1,
> > +    .minimum_version_id = 0,
> > +    .version_id = 0,
> > +    .fields = (VMStateField[]) {
> > +        VMSTATE_VIRTIO_DEVICE,
> > +        VMSTATE_END_OF_LIST()
> > +    },
> > +   .pre_save = vhost_user_fs_pre_save,
> >   };
> >   static Property vuf_properties[] = {
> > @@ -309,6 +335,10 @@ static Property vuf_properties[] = {
> >       DEFINE_PROP_UINT16("num-request-queues", VHostUserFS,
> >                          conf.num_request_queues, 1),
> >       DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128),
> > +    DEFINE_PROP_UNSIGNED("migration", VHostUserFS, migration_type,
> > +                         VHOST_USER_MIGRATION_TYPE_NONE,
> > +                         qdev_prop_vhost_user_migration_type,
> > +                         VhostUserMigrationType),
> 
> 1. I see, other similar qdev_prop_* use DEFINE_PROP_SIGNED
> 2. All of them except only qdev_prop_fdc_drive_type, define also a convenient macro in include/hw/qdev-properties-system.h
> 
> should we follow these patterns?
> 
> 
> -- 
> Best regards,
> Vladimir
Anton Kuchin Feb. 22, 2023, 2:21 p.m. UTC | #4
On 22/02/2023 14:20, Vladimir Sementsov-Ogievskiy wrote:
> On 17.02.23 20:00, Anton Kuchin wrote:
>> Migration of vhost-user-fs device requires transfer of FUSE internal 
>> state
>> from backend. There is no standard way to do it now so by default 
>> migration
>> must be blocked. But if this state can be externally transferred by
>> orchestrator give it an option to explicitly allow migration.
>>
>> Signed-off-by: Anton Kuchin <antonkuchin@yandex-team.ru>
>> ---
>>   hw/core/qdev-properties-system.c    | 10 +++++++++
>>   hw/virtio/vhost-user-fs.c           | 32 ++++++++++++++++++++++++++++-
>>   include/hw/qdev-properties-system.h |  1 +
>>   include/hw/virtio/vhost-user-fs.h   |  2 ++
>>   qapi/migration.json                 | 16 +++++++++++++++
>>   5 files changed, 60 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/core/qdev-properties-system.c 
>> b/hw/core/qdev-properties-system.c
>> index d42493f630..d9b1aa2a5d 100644
>> --- a/hw/core/qdev-properties-system.c
>> +++ b/hw/core/qdev-properties-system.c
>> @@ -1143,3 +1143,13 @@ const PropertyInfo qdev_prop_uuid = {
>>       .set   = set_uuid,
>>       .set_default_value = set_default_uuid_auto,
>>   };
>> +
>> +const PropertyInfo qdev_prop_vhost_user_migration_type = {
>> +    .name = "VhostUserMigrationType",
>> +    .description = "none/external",
>> +    .enum_table = &VhostUserMigrationType_lookup,
>> +    .get = qdev_propinfo_get_enum,
>> +    .set = qdev_propinfo_set_enum,
>> +    .set_default_value = qdev_propinfo_set_default_value_enum,
>> +    .realized_set_allowed = true,
>> +};
>> diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
>> index 83fc20e49e..7deb9df5ec 100644
>> --- a/hw/virtio/vhost-user-fs.c
>> +++ b/hw/virtio/vhost-user-fs.c
>> @@ -298,9 +298,35 @@ static struct vhost_dev 
>> *vuf_get_vhost(VirtIODevice *vdev)
>>       return &fs->vhost_dev;
>>   }
>>   +static int vhost_user_fs_pre_save(void *opaque)
>> +{
>> +    VHostUserFS *fs = opaque;
>> +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
>> +
>> +    switch (fs->migration_type) {
>> +    case VHOST_USER_MIGRATION_TYPE_NONE:
>> +        error_report("Migration is blocked by device %s", path);
>> +        break;
>> +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
>> +        return 0;
>> +    default:
>> +        error_report("Migration type '%s' is not supported by device 
>> %s",
>> + VhostUserMigrationType_str(fs->migration_type), path);
>> +        break;
>> +    }
>> +
>> +    return -1;
>> +}
>
> Should we also add this as .pre_load, to force user select correct 
> migration_type on target too?

Why do we need it? Enum forces user to select at least one of the sane 
option
and I believe this is enough. As this property affects only save and 
don't know
what we can do at load.

>
>> +
>>   static const VMStateDescription vuf_vmstate = {
>>       .name = "vhost-user-fs",
>> -    .unmigratable = 1,
>> +    .minimum_version_id = 0,
>> +    .version_id = 0,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_VIRTIO_DEVICE,
>> +        VMSTATE_END_OF_LIST()
>> +    },
>> +   .pre_save = vhost_user_fs_pre_save,
>>   };
>>     static Property vuf_properties[] = {
>> @@ -309,6 +335,10 @@ static Property vuf_properties[] = {
>>       DEFINE_PROP_UINT16("num-request-queues", VHostUserFS,
>>                          conf.num_request_queues, 1),
>>       DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 
>> 128),
>> +    DEFINE_PROP_UNSIGNED("migration", VHostUserFS, migration_type,
>> +                         VHOST_USER_MIGRATION_TYPE_NONE,
>> +                         qdev_prop_vhost_user_migration_type,
>> +                         VhostUserMigrationType),
>
> 1. I see, other similar qdev_prop_* use DEFINE_PROP_SIGNED

I don't think this should be signed. Enum values are non-negative so 
compilers
(at least gcc and clang that I checked) evaluate underlying enum type to 
be unsigned int.
I don't know why other property types use signed, may be they have 
reasons or just this
is how they were initially implemented.

> 2. All of them except only qdev_prop_fdc_drive_type, define also a 
> convenient macro in include/hw/qdev-properties-system.h

This makes sense if property is used in more than one place, in this 
case I don't see any
benefit from writing more code to handle this specific case. Maybe if 
property finds its
usage in other devices this can be done.

>
> should we follow these patterns?
>
>
Anton Kuchin Feb. 22, 2023, 2:25 p.m. UTC | #5
On 22/02/2023 14:43, Michael S. Tsirkin wrote:
> On Wed, Feb 22, 2023 at 03:20:00PM +0300, Vladimir Sementsov-Ogievskiy wrote:
>> On 17.02.23 20:00, Anton Kuchin wrote:
>>> Migration of vhost-user-fs device requires transfer of FUSE internal state
>>> from backend. There is no standard way to do it now so by default migration
>>> must be blocked. But if this state can be externally transferred by
>>> orchestrator give it an option to explicitly allow migration.
>>>
>>> Signed-off-by: Anton Kuchin <antonkuchin@yandex-team.ru>
>>> ---
>>>    hw/core/qdev-properties-system.c    | 10 +++++++++
>>>    hw/virtio/vhost-user-fs.c           | 32 ++++++++++++++++++++++++++++-
>>>    include/hw/qdev-properties-system.h |  1 +
>>>    include/hw/virtio/vhost-user-fs.h   |  2 ++
>>>    qapi/migration.json                 | 16 +++++++++++++++
>>>    5 files changed, 60 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
>>> index d42493f630..d9b1aa2a5d 100644
>>> --- a/hw/core/qdev-properties-system.c
>>> +++ b/hw/core/qdev-properties-system.c
>>> @@ -1143,3 +1143,13 @@ const PropertyInfo qdev_prop_uuid = {
>>>        .set   = set_uuid,
>>>        .set_default_value = set_default_uuid_auto,
>>>    };
>>> +
>>> +const PropertyInfo qdev_prop_vhost_user_migration_type = {
>>> +    .name = "VhostUserMigrationType",
>>> +    .description = "none/external",
>>> +    .enum_table = &VhostUserMigrationType_lookup,
>>> +    .get = qdev_propinfo_get_enum,
>>> +    .set = qdev_propinfo_set_enum,
>>> +    .set_default_value = qdev_propinfo_set_default_value_enum,
>>> +    .realized_set_allowed = true,
>>> +};
>>> diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
>>> index 83fc20e49e..7deb9df5ec 100644
>>> --- a/hw/virtio/vhost-user-fs.c
>>> +++ b/hw/virtio/vhost-user-fs.c
>>> @@ -298,9 +298,35 @@ static struct vhost_dev *vuf_get_vhost(VirtIODevice *vdev)
>>>        return &fs->vhost_dev;
>>>    }
>>> +static int vhost_user_fs_pre_save(void *opaque)
>>> +{
>>> +    VHostUserFS *fs = opaque;
>>> +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
>>> +
>>> +    switch (fs->migration_type) {
>>> +    case VHOST_USER_MIGRATION_TYPE_NONE:
>>> +        error_report("Migration is blocked by device %s", path);
>>> +        break;
>>> +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
>>> +        return 0;
>>> +    default:
>>> +        error_report("Migration type '%s' is not supported by device %s",
>>> +                     VhostUserMigrationType_str(fs->migration_type), path);
>>> +        break;
>>> +    }
>>> +
>>> +    return -1;
>>> +}
>> Should we also add this as .pre_load, to force user select correct migration_type on target too?
> In fact, I would claim we only want pre_load.
> When qemu is started on destination we know where it's migrated
> from so this flag can be set.
> When qemu is started on source we generally do not yet know so
> we don't know whether it's safe to set this flag.

This property selects if VM can migrate and if it can what should qemu put
to the migration stream. So we select on source what type of migration is
allowed for this VM, destination can't check anything at load time.

>
>
>>> +
>>>    static const VMStateDescription vuf_vmstate = {
>>>        .name = "vhost-user-fs",
>>> -    .unmigratable = 1,
>>> +    .minimum_version_id = 0,
>>> +    .version_id = 0,
>>> +    .fields = (VMStateField[]) {
>>> +        VMSTATE_VIRTIO_DEVICE,
>>> +        VMSTATE_END_OF_LIST()
>>> +    },
>>> +   .pre_save = vhost_user_fs_pre_save,
>>>    };
>>>    static Property vuf_properties[] = {
>>> @@ -309,6 +335,10 @@ static Property vuf_properties[] = {
>>>        DEFINE_PROP_UINT16("num-request-queues", VHostUserFS,
>>>                           conf.num_request_queues, 1),
>>>        DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128),
>>> +    DEFINE_PROP_UNSIGNED("migration", VHostUserFS, migration_type,
>>> +                         VHOST_USER_MIGRATION_TYPE_NONE,
>>> +                         qdev_prop_vhost_user_migration_type,
>>> +                         VhostUserMigrationType),
>> 1. I see, other similar qdev_prop_* use DEFINE_PROP_SIGNED
>> 2. All of them except only qdev_prop_fdc_drive_type, define also a convenient macro in include/hw/qdev-properties-system.h
>>
>> should we follow these patterns?
>>
>>
>> -- 
>> Best regards,
>> Vladimir
Vladimir Sementsov-Ogievskiy Feb. 22, 2023, 3:14 p.m. UTC | #6
On 22.02.23 17:25, Anton Kuchin wrote:
>>>> +static int vhost_user_fs_pre_save(void *opaque)
>>>> +{
>>>> +    VHostUserFS *fs = opaque;
>>>> +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
>>>> +
>>>> +    switch (fs->migration_type) {
>>>> +    case VHOST_USER_MIGRATION_TYPE_NONE:
>>>> +        error_report("Migration is blocked by device %s", path);
>>>> +        break;
>>>> +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
>>>> +        return 0;
>>>> +    default:
>>>> +        error_report("Migration type '%s' is not supported by device %s",
>>>> +                     VhostUserMigrationType_str(fs->migration_type), path);
>>>> +        break;
>>>> +    }
>>>> +
>>>> +    return -1;
>>>> +}
>>> Should we also add this as .pre_load, to force user select correct migration_type on target too?
>> In fact, I would claim we only want pre_load.
>> When qemu is started on destination we know where it's migrated
>> from so this flag can be set.
>> When qemu is started on source we generally do not yet know so
>> we don't know whether it's safe to set this flag.

But destination is a "source" for next migration, so there shouldn't be real difference.
The new property has ".realized_set_allowed = true", so, as I understand it may be changed at any time, so that's not a problem.

> 
> This property selects if VM can migrate and if it can what should qemu put
> to the migration stream. So we select on source what type of migration is
> allowed for this VM, destination can't check anything at load time.

OK, so the new field "migration" regulates only outgoing migration and do nothing for incoming. On incoming migration the migration stream itself defines the type of device migration.
Worth mentioning in doc?
Vladimir Sementsov-Ogievskiy Feb. 22, 2023, 3:15 p.m. UTC | #7
On 22.02.23 17:21, Anton Kuchin wrote:
>>>
>>
>> 1. I see, other similar qdev_prop_* use DEFINE_PROP_SIGNED
> 
> I don't think this should be signed. Enum values are non-negative so compilers
> (at least gcc and clang that I checked) evaluate underlying enum type to be unsigned int.
> I don't know why other property types use signed, may be they have reasons or just this
> is how they were initially implemented.
> 
>> 2. All of them except only qdev_prop_fdc_drive_type, define also a convenient macro in include/hw/qdev-properties-system.h
> 
> This makes sense if property is used in more than one place, in this case I don't see any
> benefit from writing more code to handle this specific case. Maybe if property finds its
> usage in other devices this can be done.

Reasonable, thanks!
Vladimir Sementsov-Ogievskiy Feb. 22, 2023, 3:20 p.m. UTC | #8
On 17.02.23 20:00, Anton Kuchin wrote:
> Migration of vhost-user-fs device requires transfer of FUSE internal state
> from backend. There is no standard way to do it now so by default migration
> must be blocked. But if this state can be externally transferred by
> orchestrator give it an option to explicitly allow migration.
> 
> Signed-off-by: Anton Kuchin<antonkuchin@yandex-team.ru>

Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Michael S. Tsirkin Feb. 22, 2023, 4:43 p.m. UTC | #9
On Wed, Feb 22, 2023 at 06:14:31PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> On 22.02.23 17:25, Anton Kuchin wrote:
> > > > > +static int vhost_user_fs_pre_save(void *opaque)
> > > > > +{
> > > > > +    VHostUserFS *fs = opaque;
> > > > > +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
> > > > > +
> > > > > +    switch (fs->migration_type) {
> > > > > +    case VHOST_USER_MIGRATION_TYPE_NONE:
> > > > > +        error_report("Migration is blocked by device %s", path);
> > > > > +        break;
> > > > > +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
> > > > > +        return 0;
> > > > > +    default:
> > > > > +        error_report("Migration type '%s' is not supported by device %s",
> > > > > +                     VhostUserMigrationType_str(fs->migration_type), path);
> > > > > +        break;
> > > > > +    }
> > > > > +
> > > > > +    return -1;
> > > > > +}
> > > > Should we also add this as .pre_load, to force user select correct migration_type on target too?
> > > In fact, I would claim we only want pre_load.
> > > When qemu is started on destination we know where it's migrated
> > > from so this flag can be set.
> > > When qemu is started on source we generally do not yet know so
> > > we don't know whether it's safe to set this flag.
> 
> But destination is a "source" for next migration, so there shouldn't be real difference.

And whether to allow that migration should be decided by destination of
that migration.


> The new property has ".realized_set_allowed = true", so, as I understand it may be changed at any time, so that's not a problem.
> 
> > 
> > This property selects if VM can migrate and if it can what should qemu put
> > to the migration stream. So we select on source what type of migration is
> > allowed for this VM, destination can't check anything at load time.
> 
> OK, so the new field "migration" regulates only outgoing migration and do nothing for incoming. On incoming migration the migration stream itself defines the type of device migration.
> Worth mentioning in doc?
> 
> -- 
> Best regards,
> Vladimir
Anton Kuchin Feb. 22, 2023, 4:49 p.m. UTC | #10
On 22/02/2023 17:14, Vladimir Sementsov-Ogievskiy wrote:
> On 22.02.23 17:25, Anton Kuchin wrote:
>>>>> +static int vhost_user_fs_pre_save(void *opaque)
>>>>> +{
>>>>> +    VHostUserFS *fs = opaque;
>>>>> +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
>>>>> +
>>>>> +    switch (fs->migration_type) {
>>>>> +    case VHOST_USER_MIGRATION_TYPE_NONE:
>>>>> +        error_report("Migration is blocked by device %s", path);
>>>>> +        break;
>>>>> +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
>>>>> +        return 0;
>>>>> +    default:
>>>>> +        error_report("Migration type '%s' is not supported by 
>>>>> device %s",
>>>>> + VhostUserMigrationType_str(fs->migration_type), path);
>>>>> +        break;
>>>>> +    }
>>>>> +
>>>>> +    return -1;
>>>>> +}
>>>> Should we also add this as .pre_load, to force user select correct 
>>>> migration_type on target too?
>>> In fact, I would claim we only want pre_load.
>>> When qemu is started on destination we know where it's migrated
>>> from so this flag can be set.
>>> When qemu is started on source we generally do not yet know so
>>> we don't know whether it's safe to set this flag.
>
> But destination is a "source" for next migration, so there shouldn't 
> be real difference.
> The new property has ".realized_set_allowed = true", so, as I 
> understand it may be changed at any time, so that's not a problem.

Yes, exactly. So destination's property sets not how it will handle this 
incoming
migration but the future outgoing one.

>
>>
>> This property selects if VM can migrate and if it can what should 
>> qemu put
>> to the migration stream. So we select on source what type of 
>> migration is
>> allowed for this VM, destination can't check anything at load time.
>
> OK, so the new field "migration" regulates only outgoing migration and 
> do nothing for incoming. On incoming migration the migration stream 
> itself defines the type of device migration.
> Worth mentioning in doc?

Good point. I don't think this deserves a respin but if I have to send 
v4 I'll include
clarification in it.
Michael S. Tsirkin Feb. 22, 2023, 4:51 p.m. UTC | #11
On Wed, Feb 22, 2023 at 06:49:10PM +0200, Anton Kuchin wrote:
> On 22/02/2023 17:14, Vladimir Sementsov-Ogievskiy wrote:
> > On 22.02.23 17:25, Anton Kuchin wrote:
> > > > > > +static int vhost_user_fs_pre_save(void *opaque)
> > > > > > +{
> > > > > > +    VHostUserFS *fs = opaque;
> > > > > > +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
> > > > > > +
> > > > > > +    switch (fs->migration_type) {
> > > > > > +    case VHOST_USER_MIGRATION_TYPE_NONE:
> > > > > > +        error_report("Migration is blocked by device %s", path);
> > > > > > +        break;
> > > > > > +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
> > > > > > +        return 0;
> > > > > > +    default:
> > > > > > +        error_report("Migration type '%s' is not
> > > > > > supported by device %s",
> > > > > > + VhostUserMigrationType_str(fs->migration_type), path);
> > > > > > +        break;
> > > > > > +    }
> > > > > > +
> > > > > > +    return -1;
> > > > > > +}
> > > > > Should we also add this as .pre_load, to force user select
> > > > > correct migration_type on target too?
> > > > In fact, I would claim we only want pre_load.
> > > > When qemu is started on destination we know where it's migrated
> > > > from so this flag can be set.
> > > > When qemu is started on source we generally do not yet know so
> > > > we don't know whether it's safe to set this flag.
> > 
> > But destination is a "source" for next migration, so there shouldn't be
> > real difference.
> > The new property has ".realized_set_allowed = true", so, as I understand
> > it may be changed at any time, so that's not a problem.
> 
> Yes, exactly. So destination's property sets not how it will handle this
> incoming
> migration but the future outgoing one.


How do you know where you are going to migrate though?
I think you don't.
Setting it on source is better since we know where we
are migrating from.

> > 
> > > 
> > > This property selects if VM can migrate and if it can what should
> > > qemu put
> > > to the migration stream. So we select on source what type of
> > > migration is
> > > allowed for this VM, destination can't check anything at load time.
> > 
> > OK, so the new field "migration" regulates only outgoing migration and
> > do nothing for incoming. On incoming migration the migration stream
> > itself defines the type of device migration.
> > Worth mentioning in doc?
> 
> Good point. I don't think this deserves a respin but if I have to send v4
> I'll include
> clarification in it.
Anton Kuchin Feb. 22, 2023, 5:05 p.m. UTC | #12
On 22/02/2023 18:51, Michael S. Tsirkin wrote:
> On Wed, Feb 22, 2023 at 06:49:10PM +0200, Anton Kuchin wrote:
>> On 22/02/2023 17:14, Vladimir Sementsov-Ogievskiy wrote:
>>> On 22.02.23 17:25, Anton Kuchin wrote:
>>>>>>> +static int vhost_user_fs_pre_save(void *opaque)
>>>>>>> +{
>>>>>>> +    VHostUserFS *fs = opaque;
>>>>>>> +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
>>>>>>> +
>>>>>>> +    switch (fs->migration_type) {
>>>>>>> +    case VHOST_USER_MIGRATION_TYPE_NONE:
>>>>>>> +        error_report("Migration is blocked by device %s", path);
>>>>>>> +        break;
>>>>>>> +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
>>>>>>> +        return 0;
>>>>>>> +    default:
>>>>>>> +        error_report("Migration type '%s' is not
>>>>>>> supported by device %s",
>>>>>>> + VhostUserMigrationType_str(fs->migration_type), path);
>>>>>>> +        break;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return -1;
>>>>>>> +}
>>>>>> Should we also add this as .pre_load, to force user select
>>>>>> correct migration_type on target too?
>>>>> In fact, I would claim we only want pre_load.
>>>>> When qemu is started on destination we know where it's migrated
>>>>> from so this flag can be set.
>>>>> When qemu is started on source we generally do not yet know so
>>>>> we don't know whether it's safe to set this flag.
>>> But destination is a "source" for next migration, so there shouldn't be
>>> real difference.
>>> The new property has ".realized_set_allowed = true", so, as I understand
>>> it may be changed at any time, so that's not a problem.
>> Yes, exactly. So destination's property sets not how it will handle this
>> incoming
>> migration but the future outgoing one.
>
> How do you know where you are going to migrate though?
> I think you don't.
> Setting it on source is better since we know where we
> are migrating from.

Yes, I don't know where I'm going to migrate to. This is why property
affects only how source saves state on outgoing migration.

>
>>>> This property selects if VM can migrate and if it can what should
>>>> qemu put
>>>> to the migration stream. So we select on source what type of
>>>> migration is
>>>> allowed for this VM, destination can't check anything at load time.
>>> OK, so the new field "migration" regulates only outgoing migration and
>>> do nothing for incoming. On incoming migration the migration stream
>>> itself defines the type of device migration.
>>> Worth mentioning in doc?
>> Good point. I don't think this deserves a respin but if I have to send v4
>> I'll include
>> clarification in it.
Michael S. Tsirkin Feb. 22, 2023, 5:12 p.m. UTC | #13
On Wed, Feb 22, 2023 at 07:05:47PM +0200, Anton Kuchin wrote:
> On 22/02/2023 18:51, Michael S. Tsirkin wrote:
> > On Wed, Feb 22, 2023 at 06:49:10PM +0200, Anton Kuchin wrote:
> > > On 22/02/2023 17:14, Vladimir Sementsov-Ogievskiy wrote:
> > > > On 22.02.23 17:25, Anton Kuchin wrote:
> > > > > > > > +static int vhost_user_fs_pre_save(void *opaque)
> > > > > > > > +{
> > > > > > > > +    VHostUserFS *fs = opaque;
> > > > > > > > +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
> > > > > > > > +
> > > > > > > > +    switch (fs->migration_type) {
> > > > > > > > +    case VHOST_USER_MIGRATION_TYPE_NONE:
> > > > > > > > +        error_report("Migration is blocked by device %s", path);
> > > > > > > > +        break;
> > > > > > > > +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
> > > > > > > > +        return 0;
> > > > > > > > +    default:
> > > > > > > > +        error_report("Migration type '%s' is not
> > > > > > > > supported by device %s",
> > > > > > > > + VhostUserMigrationType_str(fs->migration_type), path);
> > > > > > > > +        break;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    return -1;
> > > > > > > > +}
> > > > > > > Should we also add this as .pre_load, to force user select
> > > > > > > correct migration_type on target too?
> > > > > > In fact, I would claim we only want pre_load.
> > > > > > When qemu is started on destination we know where it's migrated
> > > > > > from so this flag can be set.
> > > > > > When qemu is started on source we generally do not yet know so
> > > > > > we don't know whether it's safe to set this flag.
> > > > But destination is a "source" for next migration, so there shouldn't be
> > > > real difference.
> > > > The new property has ".realized_set_allowed = true", so, as I understand
> > > > it may be changed at any time, so that's not a problem.
> > > Yes, exactly. So destination's property sets not how it will handle this
> > > incoming
> > > migration but the future outgoing one.
> > 
> > How do you know where you are going to migrate though?
> > I think you don't.
> > Setting it on source is better since we know where we
> > are migrating from.
> 
> Yes, I don't know where I'm going to migrate to. This is why property
> affects only how source saves state on outgoing migration.

Um. I don't get the logic.


> > 
> > > > > This property selects if VM can migrate and if it can what should
> > > > > qemu put
> > > > > to the migration stream. So we select on source what type of
> > > > > migration is
> > > > > allowed for this VM, destination can't check anything at load time.
> > > > OK, so the new field "migration" regulates only outgoing migration and
> > > > do nothing for incoming. On incoming migration the migration stream
> > > > itself defines the type of device migration.
> > > > Worth mentioning in doc?
> > > Good point. I don't think this deserves a respin but if I have to send v4
> > > I'll include
> > > clarification in it.
Anton Kuchin Feb. 22, 2023, 5:15 p.m. UTC | #14
On 22/02/2023 18:43, Michael S. Tsirkin wrote:
> On Wed, Feb 22, 2023 at 06:14:31PM +0300, Vladimir Sementsov-Ogievskiy wrote:
>> On 22.02.23 17:25, Anton Kuchin wrote:
>>>>>> +static int vhost_user_fs_pre_save(void *opaque)
>>>>>> +{
>>>>>> +    VHostUserFS *fs = opaque;
>>>>>> +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
>>>>>> +
>>>>>> +    switch (fs->migration_type) {
>>>>>> +    case VHOST_USER_MIGRATION_TYPE_NONE:
>>>>>> +        error_report("Migration is blocked by device %s", path);
>>>>>> +        break;
>>>>>> +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
>>>>>> +        return 0;
>>>>>> +    default:
>>>>>> +        error_report("Migration type '%s' is not supported by device %s",
>>>>>> +                     VhostUserMigrationType_str(fs->migration_type), path);
>>>>>> +        break;
>>>>>> +    }
>>>>>> +
>>>>>> +    return -1;
>>>>>> +}
>>>>> Should we also add this as .pre_load, to force user select correct migration_type on target too?
>>>> In fact, I would claim we only want pre_load.
>>>> When qemu is started on destination we know where it's migrated
>>>> from so this flag can be set.
>>>> When qemu is started on source we generally do not yet know so
>>>> we don't know whether it's safe to set this flag.
>> But destination is a "source" for next migration, so there shouldn't be real difference.
> And whether to allow that migration should be decided by destination of
> that migration.

Destination can just refuse to load unsupported state. But this happens
automatically if migration code finds unknown subsection and needs no
explicit check by device .pre_load.

>
>
>> The new property has ".realized_set_allowed = true", so, as I understand it may be changed at any time, so that's not a problem.
>>
>>> This property selects if VM can migrate and if it can what should qemu put
>>> to the migration stream. So we select on source what type of migration is
>>> allowed for this VM, destination can't check anything at load time.
>> OK, so the new field "migration" regulates only outgoing migration and do nothing for incoming. On incoming migration the migration stream itself defines the type of device migration.
>> Worth mentioning in doc?
>>
>> -- 
>> Best regards,
>> Vladimir
Michael S. Tsirkin Feb. 22, 2023, 5:30 p.m. UTC | #15
On Wed, Feb 22, 2023 at 07:15:40PM +0200, Anton Kuchin wrote:
> On 22/02/2023 18:43, Michael S. Tsirkin wrote:
> > On Wed, Feb 22, 2023 at 06:14:31PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > > On 22.02.23 17:25, Anton Kuchin wrote:
> > > > > > > +static int vhost_user_fs_pre_save(void *opaque)
> > > > > > > +{
> > > > > > > +    VHostUserFS *fs = opaque;
> > > > > > > +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
> > > > > > > +
> > > > > > > +    switch (fs->migration_type) {
> > > > > > > +    case VHOST_USER_MIGRATION_TYPE_NONE:
> > > > > > > +        error_report("Migration is blocked by device %s", path);
> > > > > > > +        break;
> > > > > > > +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
> > > > > > > +        return 0;
> > > > > > > +    default:
> > > > > > > +        error_report("Migration type '%s' is not supported by device %s",
> > > > > > > +                     VhostUserMigrationType_str(fs->migration_type), path);
> > > > > > > +        break;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    return -1;
> > > > > > > +}
> > > > > > Should we also add this as .pre_load, to force user select correct migration_type on target too?
> > > > > In fact, I would claim we only want pre_load.
> > > > > When qemu is started on destination we know where it's migrated
> > > > > from so this flag can be set.
> > > > > When qemu is started on source we generally do not yet know so
> > > > > we don't know whether it's safe to set this flag.
> > > But destination is a "source" for next migration, so there shouldn't be real difference.
> > And whether to allow that migration should be decided by destination of
> > that migration.
> 
> Destination can just refuse to load unsupported state. But this happens
> automatically if migration code finds unknown subsection and needs no
> explicit check by device .pre_load.

Does it happen with the patch in question?
What is this unknown subsection you are talking about?


> > 
> > 
> > > The new property has ".realized_set_allowed = true", so, as I understand it may be changed at any time, so that's not a problem.
> > > 
> > > > This property selects if VM can migrate and if it can what should qemu put
> > > > to the migration stream. So we select on source what type of migration is
> > > > allowed for this VM, destination can't check anything at load time.
> > > OK, so the new field "migration" regulates only outgoing migration and do nothing for incoming. On incoming migration the migration stream itself defines the type of device migration.
> > > Worth mentioning in doc?
> > > 
> > > -- 
> > > Best regards,
> > > Vladimir
Anton Kuchin Feb. 22, 2023, 6:25 p.m. UTC | #16
On 22/02/2023 19:12, Michael S. Tsirkin wrote:
> On Wed, Feb 22, 2023 at 07:05:47PM +0200, Anton Kuchin wrote:
>> On 22/02/2023 18:51, Michael S. Tsirkin wrote:
>>> On Wed, Feb 22, 2023 at 06:49:10PM +0200, Anton Kuchin wrote:
>>>> On 22/02/2023 17:14, Vladimir Sementsov-Ogievskiy wrote:
>>>>> On 22.02.23 17:25, Anton Kuchin wrote:
>>>>>>>>> +static int vhost_user_fs_pre_save(void *opaque)
>>>>>>>>> +{
>>>>>>>>> +    VHostUserFS *fs = opaque;
>>>>>>>>> +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
>>>>>>>>> +
>>>>>>>>> +    switch (fs->migration_type) {
>>>>>>>>> +    case VHOST_USER_MIGRATION_TYPE_NONE:
>>>>>>>>> +        error_report("Migration is blocked by device %s", path);
>>>>>>>>> +        break;
>>>>>>>>> +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
>>>>>>>>> +        return 0;
>>>>>>>>> +    default:
>>>>>>>>> +        error_report("Migration type '%s' is not
>>>>>>>>> supported by device %s",
>>>>>>>>> + VhostUserMigrationType_str(fs->migration_type), path);
>>>>>>>>> +        break;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    return -1;
>>>>>>>>> +}
>>>>>>>> Should we also add this as .pre_load, to force user select
>>>>>>>> correct migration_type on target too?
>>>>>>> In fact, I would claim we only want pre_load.
>>>>>>> When qemu is started on destination we know where it's migrated
>>>>>>> from so this flag can be set.
>>>>>>> When qemu is started on source we generally do not yet know so
>>>>>>> we don't know whether it's safe to set this flag.
>>>>> But destination is a "source" for next migration, so there shouldn't be
>>>>> real difference.
>>>>> The new property has ".realized_set_allowed = true", so, as I understand
>>>>> it may be changed at any time, so that's not a problem.
>>>> Yes, exactly. So destination's property sets not how it will handle this
>>>> incoming
>>>> migration but the future outgoing one.
>>> How do you know where you are going to migrate though?
>>> I think you don't.
>>> Setting it on source is better since we know where we
>>> are migrating from.
>> Yes, I don't know where I'm going to migrate to. This is why property
>> affects only how source saves state on outgoing migration.
> Um. I don't get the logic.

For this feature to work we need orchestrator to manage the migration. 
And we
generally assume that it is responsibility of orchestrator to ensure 
matching
properties on source and destination.
As orchestrator manages both sides of migration it can set option (and 
we can
check it) on either source or destination. Now it's not important which 
side we
select, because now the option is essentially binary allow/deny (but IMHO it
is much better to refuse source to migrate than find later that state 
can't be
loaded by destination, in case of file migration this becomes especially
painful).

But there are plans to add internal migration option (extract FUSE state 
from
backend and transfer it in QEMU migration stream), and that's where 
setting/checking
on source becomes important because it will rely on this property to 
decide if
extra state form backend needs to be put in the migration stream subsection.

If you are concerned about orchestrator breaking assumption of matching 
properties
on source and destination this is not really supported AFAIK but I don't 
think we
need to punish it for this, maybe it has its reasons: I can imagine scenario
where orchestrator could want to migrate from source with 
'migration=external'
to destination with 'migration=none' to ensure that destination can't be
migrated further.

>
>
>>>>>> This property selects if VM can migrate and if it can what should
>>>>>> qemu put
>>>>>> to the migration stream. So we select on source what type of
>>>>>> migration is
>>>>>> allowed for this VM, destination can't check anything at load time.
>>>>> OK, so the new field "migration" regulates only outgoing migration and
>>>>> do nothing for incoming. On incoming migration the migration stream
>>>>> itself defines the type of device migration.
>>>>> Worth mentioning in doc?
>>>> Good point. I don't think this deserves a respin but if I have to send v4
>>>> I'll include
>>>> clarification in it.
Michael S. Tsirkin Feb. 22, 2023, 8:21 p.m. UTC | #17
On Wed, Feb 22, 2023 at 08:25:19PM +0200, Anton Kuchin wrote:
> On 22/02/2023 19:12, Michael S. Tsirkin wrote:
> > On Wed, Feb 22, 2023 at 07:05:47PM +0200, Anton Kuchin wrote:
> > > On 22/02/2023 18:51, Michael S. Tsirkin wrote:
> > > > On Wed, Feb 22, 2023 at 06:49:10PM +0200, Anton Kuchin wrote:
> > > > > On 22/02/2023 17:14, Vladimir Sementsov-Ogievskiy wrote:
> > > > > > On 22.02.23 17:25, Anton Kuchin wrote:
> > > > > > > > > > +static int vhost_user_fs_pre_save(void *opaque)
> > > > > > > > > > +{
> > > > > > > > > > +    VHostUserFS *fs = opaque;
> > > > > > > > > > +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
> > > > > > > > > > +
> > > > > > > > > > +    switch (fs->migration_type) {
> > > > > > > > > > +    case VHOST_USER_MIGRATION_TYPE_NONE:
> > > > > > > > > > +        error_report("Migration is blocked by device %s", path);
> > > > > > > > > > +        break;
> > > > > > > > > > +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
> > > > > > > > > > +        return 0;
> > > > > > > > > > +    default:
> > > > > > > > > > +        error_report("Migration type '%s' is not
> > > > > > > > > > supported by device %s",
> > > > > > > > > > + VhostUserMigrationType_str(fs->migration_type), path);
> > > > > > > > > > +        break;
> > > > > > > > > > +    }
> > > > > > > > > > +
> > > > > > > > > > +    return -1;
> > > > > > > > > > +}
> > > > > > > > > Should we also add this as .pre_load, to force user select
> > > > > > > > > correct migration_type on target too?
> > > > > > > > In fact, I would claim we only want pre_load.
> > > > > > > > When qemu is started on destination we know where it's migrated
> > > > > > > > from so this flag can be set.
> > > > > > > > When qemu is started on source we generally do not yet know so
> > > > > > > > we don't know whether it's safe to set this flag.
> > > > > > But destination is a "source" for next migration, so there shouldn't be
> > > > > > real difference.
> > > > > > The new property has ".realized_set_allowed = true", so, as I understand
> > > > > > it may be changed at any time, so that's not a problem.
> > > > > Yes, exactly. So destination's property sets not how it will handle this
> > > > > incoming
> > > > > migration but the future outgoing one.
> > > > How do you know where you are going to migrate though?
> > > > I think you don't.
> > > > Setting it on source is better since we know where we
> > > > are migrating from.
> > > Yes, I don't know where I'm going to migrate to. This is why property
> > > affects only how source saves state on outgoing migration.
> > Um. I don't get the logic.
> 
> For this feature to work we need orchestrator to manage the migration. And
> we
> generally assume that it is responsibility of orchestrator to ensure
> matching
> properties on source and destination.
> As orchestrator manages both sides of migration it can set option (and we
> can
> check it) on either source or destination. Now it's not important which side
> we
> select, because now the option is essentially binary allow/deny (but IMHO it
> is much better to refuse source to migrate than find later that state can't
> be
> loaded by destination, in case of file migration this becomes especially
> painful).
> 
> But there are plans to add internal migration option (extract FUSE state
> from
> backend and transfer it in QEMU migration stream), and that's where
> setting/checking
> on source becomes important because it will rely on this property to decide
> if
> extra state form backend needs to be put in the migration stream subsection.


If we do internal migration that will be a different property
which has to match on source *and* destination.


> If you are concerned about orchestrator breaking assumption of matching
> properties
> on source and destination this is not really supported AFAIK but I don't
> think we
> need to punish it for this, maybe it has its reasons: I can imagine scenario
> where orchestrator could want to migrate from source with
> 'migration=external'
> to destination with 'migration=none' to ensure that destination can't be
> migrated further.

No. I am concerned about a simple practical matter:
- I decide to restart qemu on the same host - so I need to enable
  migration
- Later I decide to migrate qemu to another host - this should be
  blocked


Property on source does not satisfy both at the same time.
Property on destination does.



> > 
> > 
> > > > > > > This property selects if VM can migrate and if it can what should
> > > > > > > qemu put
> > > > > > > to the migration stream. So we select on source what type of
> > > > > > > migration is
> > > > > > > allowed for this VM, destination can't check anything at load time.
> > > > > > OK, so the new field "migration" regulates only outgoing migration and
> > > > > > do nothing for incoming. On incoming migration the migration stream
> > > > > > itself defines the type of device migration.
> > > > > > Worth mentioning in doc?
> > > > > Good point. I don't think this deserves a respin but if I have to send v4
> > > > > I'll include
> > > > > clarification in it.
Anton Kuchin Feb. 22, 2023, 8:50 p.m. UTC | #18
On 22/02/2023 22:21, Michael S. Tsirkin wrote:
> On Wed, Feb 22, 2023 at 08:25:19PM +0200, Anton Kuchin wrote:
>> On 22/02/2023 19:12, Michael S. Tsirkin wrote:
>>> On Wed, Feb 22, 2023 at 07:05:47PM +0200, Anton Kuchin wrote:
>>>> On 22/02/2023 18:51, Michael S. Tsirkin wrote:
>>>>> On Wed, Feb 22, 2023 at 06:49:10PM +0200, Anton Kuchin wrote:
>>>>>> On 22/02/2023 17:14, Vladimir Sementsov-Ogievskiy wrote:
>>>>>>> On 22.02.23 17:25, Anton Kuchin wrote:
>>>>>>>>>>> +static int vhost_user_fs_pre_save(void *opaque)
>>>>>>>>>>> +{
>>>>>>>>>>> +    VHostUserFS *fs = opaque;
>>>>>>>>>>> +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
>>>>>>>>>>> +
>>>>>>>>>>> +    switch (fs->migration_type) {
>>>>>>>>>>> +    case VHOST_USER_MIGRATION_TYPE_NONE:
>>>>>>>>>>> +        error_report("Migration is blocked by device %s", path);
>>>>>>>>>>> +        break;
>>>>>>>>>>> +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
>>>>>>>>>>> +        return 0;
>>>>>>>>>>> +    default:
>>>>>>>>>>> +        error_report("Migration type '%s' is not
>>>>>>>>>>> supported by device %s",
>>>>>>>>>>> + VhostUserMigrationType_str(fs->migration_type), path);
>>>>>>>>>>> +        break;
>>>>>>>>>>> +    }
>>>>>>>>>>> +
>>>>>>>>>>> +    return -1;
>>>>>>>>>>> +}
>>>>>>>>>> Should we also add this as .pre_load, to force user select
>>>>>>>>>> correct migration_type on target too?
>>>>>>>>> In fact, I would claim we only want pre_load.
>>>>>>>>> When qemu is started on destination we know where it's migrated
>>>>>>>>> from so this flag can be set.
>>>>>>>>> When qemu is started on source we generally do not yet know so
>>>>>>>>> we don't know whether it's safe to set this flag.
>>>>>>> But destination is a "source" for next migration, so there shouldn't be
>>>>>>> real difference.
>>>>>>> The new property has ".realized_set_allowed = true", so, as I understand
>>>>>>> it may be changed at any time, so that's not a problem.
>>>>>> Yes, exactly. So destination's property sets not how it will handle this
>>>>>> incoming
>>>>>> migration but the future outgoing one.
>>>>> How do you know where you are going to migrate though?
>>>>> I think you don't.
>>>>> Setting it on source is better since we know where we
>>>>> are migrating from.
>>>> Yes, I don't know where I'm going to migrate to. This is why property
>>>> affects only how source saves state on outgoing migration.
>>> Um. I don't get the logic.
>> For this feature to work we need orchestrator to manage the migration. And
>> we
>> generally assume that it is responsibility of orchestrator to ensure
>> matching
>> properties on source and destination.
>> As orchestrator manages both sides of migration it can set option (and we
>> can
>> check it) on either source or destination. Now it's not important which side
>> we
>> select, because now the option is essentially binary allow/deny (but IMHO it
>> is much better to refuse source to migrate than find later that state can't
>> be
>> loaded by destination, in case of file migration this becomes especially
>> painful).
>>
>> But there are plans to add internal migration option (extract FUSE state
>> from
>> backend and transfer it in QEMU migration stream), and that's where
>> setting/checking
>> on source becomes important because it will rely on this property to decide
>> if
>> extra state form backend needs to be put in the migration stream subsection.
>
> If we do internal migration that will be a different property
> which has to match on source *and* destination.

I'm not sure if we need other property. Initial idea was to allow
orchestrator setup which part of state qemu should put to stream
that will be sufficient to restore VM on destination.
But this depends on how external migration will be implemented.

>
>
>> If you are concerned about orchestrator breaking assumption of matching
>> properties
>> on source and destination this is not really supported AFAIK but I don't
>> think we
>> need to punish it for this, maybe it has its reasons: I can imagine scenario
>> where orchestrator could want to migrate from source with
>> 'migration=external'
>> to destination with 'migration=none' to ensure that destination can't be
>> migrated further.
> No. I am concerned about a simple practical matter:
> - I decide to restart qemu on the same host - so I need to enable
>    migration
> - Later I decide to migrate qemu to another host - this should be
>    blocked
>
>
> Property on source does not satisfy both at the same time.
> Property on destination does.

If destination QEMUs on local and remote hosts have same properties how 
can we
write check that passes on the same host and fails on remote?
Sorry, I don't understand how qemu can help to handle this. It knows nothing
about the hosts so this is responsibility of management to software to know
where it can migrate and configure it appropriately.

Maybe I didn't understand your scenario or what you propose to check on
destination. Could you explain a bit more?

>
>
>
>>>
>>>>>>>> This property selects if VM can migrate and if it can what should
>>>>>>>> qemu put
>>>>>>>> to the migration stream. So we select on source what type of
>>>>>>>> migration is
>>>>>>>> allowed for this VM, destination can't check anything at load time.
>>>>>>> OK, so the new field "migration" regulates only outgoing migration and
>>>>>>> do nothing for incoming. On incoming migration the migration stream
>>>>>>> itself defines the type of device migration.
>>>>>>> Worth mentioning in doc?
>>>>>> Good point. I don't think this deserves a respin but if I have to send v4
>>>>>> I'll include
>>>>>> clarification in it.
Michael S. Tsirkin Feb. 23, 2023, 7:36 a.m. UTC | #19
On Wed, Feb 22, 2023 at 03:21:42PM -0500, Michael S. Tsirkin wrote:
> On Wed, Feb 22, 2023 at 08:25:19PM +0200, Anton Kuchin wrote:
> > On 22/02/2023 19:12, Michael S. Tsirkin wrote:
> > > On Wed, Feb 22, 2023 at 07:05:47PM +0200, Anton Kuchin wrote:
> > > > On 22/02/2023 18:51, Michael S. Tsirkin wrote:
> > > > > On Wed, Feb 22, 2023 at 06:49:10PM +0200, Anton Kuchin wrote:
> > > > > > On 22/02/2023 17:14, Vladimir Sementsov-Ogievskiy wrote:
> > > > > > > On 22.02.23 17:25, Anton Kuchin wrote:
> > > > > > > > > > > +static int vhost_user_fs_pre_save(void *opaque)
> > > > > > > > > > > +{
> > > > > > > > > > > +    VHostUserFS *fs = opaque;
> > > > > > > > > > > +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
> > > > > > > > > > > +
> > > > > > > > > > > +    switch (fs->migration_type) {
> > > > > > > > > > > +    case VHOST_USER_MIGRATION_TYPE_NONE:
> > > > > > > > > > > +        error_report("Migration is blocked by device %s", path);
> > > > > > > > > > > +        break;
> > > > > > > > > > > +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
> > > > > > > > > > > +        return 0;
> > > > > > > > > > > +    default:
> > > > > > > > > > > +        error_report("Migration type '%s' is not
> > > > > > > > > > > supported by device %s",
> > > > > > > > > > > + VhostUserMigrationType_str(fs->migration_type), path);
> > > > > > > > > > > +        break;
> > > > > > > > > > > +    }
> > > > > > > > > > > +
> > > > > > > > > > > +    return -1;
> > > > > > > > > > > +}
> > > > > > > > > > Should we also add this as .pre_load, to force user select
> > > > > > > > > > correct migration_type on target too?
> > > > > > > > > In fact, I would claim we only want pre_load.
> > > > > > > > > When qemu is started on destination we know where it's migrated
> > > > > > > > > from so this flag can be set.
> > > > > > > > > When qemu is started on source we generally do not yet know so
> > > > > > > > > we don't know whether it's safe to set this flag.
> > > > > > > But destination is a "source" for next migration, so there shouldn't be
> > > > > > > real difference.
> > > > > > > The new property has ".realized_set_allowed = true", so, as I understand
> > > > > > > it may be changed at any time, so that's not a problem.
> > > > > > Yes, exactly. So destination's property sets not how it will handle this
> > > > > > incoming
> > > > > > migration but the future outgoing one.
> > > > > How do you know where you are going to migrate though?
> > > > > I think you don't.
> > > > > Setting it on source is better since we know where we
> > > > > are migrating from.
> > > > Yes, I don't know where I'm going to migrate to. This is why property
> > > > affects only how source saves state on outgoing migration.
> > > Um. I don't get the logic.
> > 
> > For this feature to work we need orchestrator to manage the migration. And
> > we
> > generally assume that it is responsibility of orchestrator to ensure
> > matching
> > properties on source and destination.
> > As orchestrator manages both sides of migration it can set option (and we
> > can
> > check it) on either source or destination. Now it's not important which side
> > we
> > select, because now the option is essentially binary allow/deny (but IMHO it
> > is much better to refuse source to migrate than find later that state can't
> > be
> > loaded by destination, in case of file migration this becomes especially
> > painful).
> > 
> > But there are plans to add internal migration option (extract FUSE state
> > from
> > backend and transfer it in QEMU migration stream), and that's where
> > setting/checking
> > on source becomes important because it will rely on this property to decide
> > if
> > extra state form backend needs to be put in the migration stream subsection.
> 
> 
> If we do internal migration that will be a different property
> which has to match on source *and* destination.
> 
> 
> > If you are concerned about orchestrator breaking assumption of matching
> > properties
> > on source and destination this is not really supported AFAIK but I don't
> > think we
> > need to punish it for this, maybe it has its reasons: I can imagine scenario
> > where orchestrator could want to migrate from source with
> > 'migration=external'
> > to destination with 'migration=none' to ensure that destination can't be
> > migrated further.
> 
> No. I am concerned about a simple practical matter:
> - I decide to restart qemu on the same host - so I need to enable
>   migration
> - Later I decide to migrate qemu to another host - this should be
>   blocked
> 
> 
> Property on source does not satisfy both at the same time.
> Property on destination does.


Stefan what's your take on this? Should we move this from
save to load hook?

> 
> 
> > > 
> > > 
> > > > > > > > This property selects if VM can migrate and if it can what should
> > > > > > > > qemu put
> > > > > > > > to the migration stream. So we select on source what type of
> > > > > > > > migration is
> > > > > > > > allowed for this VM, destination can't check anything at load time.
> > > > > > > OK, so the new field "migration" regulates only outgoing migration and
> > > > > > > do nothing for incoming. On incoming migration the migration stream
> > > > > > > itself defines the type of device migration.
> > > > > > > Worth mentioning in doc?
> > > > > > Good point. I don't think this deserves a respin but if I have to send v4
> > > > > > I'll include
> > > > > > clarification in it.
Stefan Hajnoczi Feb. 23, 2023, 9:24 p.m. UTC | #20
On Thu, Feb 23, 2023 at 02:36:33AM -0500, Michael S. Tsirkin wrote:
> On Wed, Feb 22, 2023 at 03:21:42PM -0500, Michael S. Tsirkin wrote:
> > On Wed, Feb 22, 2023 at 08:25:19PM +0200, Anton Kuchin wrote:
> > > On 22/02/2023 19:12, Michael S. Tsirkin wrote:
> > > > On Wed, Feb 22, 2023 at 07:05:47PM +0200, Anton Kuchin wrote:
> > > > > On 22/02/2023 18:51, Michael S. Tsirkin wrote:
> > > > > > On Wed, Feb 22, 2023 at 06:49:10PM +0200, Anton Kuchin wrote:
> > > > > > > On 22/02/2023 17:14, Vladimir Sementsov-Ogievskiy wrote:
> > > > > > > > On 22.02.23 17:25, Anton Kuchin wrote:
> > > > > > > > > > > > +static int vhost_user_fs_pre_save(void *opaque)
> > > > > > > > > > > > +{
> > > > > > > > > > > > +    VHostUserFS *fs = opaque;
> > > > > > > > > > > > +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
> > > > > > > > > > > > +
> > > > > > > > > > > > +    switch (fs->migration_type) {
> > > > > > > > > > > > +    case VHOST_USER_MIGRATION_TYPE_NONE:
> > > > > > > > > > > > +        error_report("Migration is blocked by device %s", path);
> > > > > > > > > > > > +        break;
> > > > > > > > > > > > +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
> > > > > > > > > > > > +        return 0;
> > > > > > > > > > > > +    default:
> > > > > > > > > > > > +        error_report("Migration type '%s' is not
> > > > > > > > > > > > supported by device %s",
> > > > > > > > > > > > + VhostUserMigrationType_str(fs->migration_type), path);
> > > > > > > > > > > > +        break;
> > > > > > > > > > > > +    }
> > > > > > > > > > > > +
> > > > > > > > > > > > +    return -1;
> > > > > > > > > > > > +}
> > > > > > > > > > > Should we also add this as .pre_load, to force user select
> > > > > > > > > > > correct migration_type on target too?
> > > > > > > > > > In fact, I would claim we only want pre_load.
> > > > > > > > > > When qemu is started on destination we know where it's migrated
> > > > > > > > > > from so this flag can be set.
> > > > > > > > > > When qemu is started on source we generally do not yet know so
> > > > > > > > > > we don't know whether it's safe to set this flag.
> > > > > > > > But destination is a "source" for next migration, so there shouldn't be
> > > > > > > > real difference.
> > > > > > > > The new property has ".realized_set_allowed = true", so, as I understand
> > > > > > > > it may be changed at any time, so that's not a problem.
> > > > > > > Yes, exactly. So destination's property sets not how it will handle this
> > > > > > > incoming
> > > > > > > migration but the future outgoing one.
> > > > > > How do you know where you are going to migrate though?
> > > > > > I think you don't.
> > > > > > Setting it on source is better since we know where we
> > > > > > are migrating from.
> > > > > Yes, I don't know where I'm going to migrate to. This is why property
> > > > > affects only how source saves state on outgoing migration.
> > > > Um. I don't get the logic.
> > > 
> > > For this feature to work we need orchestrator to manage the migration. And
> > > we
> > > generally assume that it is responsibility of orchestrator to ensure
> > > matching
> > > properties on source and destination.
> > > As orchestrator manages both sides of migration it can set option (and we
> > > can
> > > check it) on either source or destination. Now it's not important which side
> > > we
> > > select, because now the option is essentially binary allow/deny (but IMHO it
> > > is much better to refuse source to migrate than find later that state can't
> > > be
> > > loaded by destination, in case of file migration this becomes especially
> > > painful).
> > > 
> > > But there are plans to add internal migration option (extract FUSE state
> > > from
> > > backend and transfer it in QEMU migration stream), and that's where
> > > setting/checking
> > > on source becomes important because it will rely on this property to decide
> > > if
> > > extra state form backend needs to be put in the migration stream subsection.
> > 
> > 
> > If we do internal migration that will be a different property
> > which has to match on source *and* destination.
> > 
> > 
> > > If you are concerned about orchestrator breaking assumption of matching
> > > properties
> > > on source and destination this is not really supported AFAIK but I don't
> > > think we
> > > need to punish it for this, maybe it has its reasons: I can imagine scenario
> > > where orchestrator could want to migrate from source with
> > > 'migration=external'
> > > to destination with 'migration=none' to ensure that destination can't be
> > > migrated further.
> > 
> > No. I am concerned about a simple practical matter:
> > - I decide to restart qemu on the same host - so I need to enable
> >   migration
> > - Later I decide to migrate qemu to another host - this should be
> >   blocked
> > 
> > 
> > Property on source does not satisfy both at the same time.
> > Property on destination does.
> 
> 
> Stefan what's your take on this? Should we move this from
> save to load hook?

This property can be changed on the source at runtime via qom-set, so
you don't need to predict the future. The device can be started from an
incoming migration with "external" and then set to "stateful" migration
to migrate to another host later on.

Anton, can you share the virtiofsd patches so we have a full picture of
how "external" migration works? I'd like to understand the workflow and
also how it can be extended when "stateful" migration is added.

> 
> > 
> > 
> > > > 
> > > > 
> > > > > > > > > This property selects if VM can migrate and if it can what should
> > > > > > > > > qemu put
> > > > > > > > > to the migration stream. So we select on source what type of
> > > > > > > > > migration is
> > > > > > > > > allowed for this VM, destination can't check anything at load time.
> > > > > > > > OK, so the new field "migration" regulates only outgoing migration and
> > > > > > > > do nothing for incoming. On incoming migration the migration stream
> > > > > > > > itself defines the type of device migration.
> > > > > > > > Worth mentioning in doc?
> > > > > > > Good point. I don't think this deserves a respin but if I have to send v4
> > > > > > > I'll include
> > > > > > > clarification in it.
>
Anton Kuchin Feb. 24, 2023, 4:14 a.m. UTC | #21
On 23/02/2023 23:24, Stefan Hajnoczi wrote:
> On Thu, Feb 23, 2023 at 02:36:33AM -0500, Michael S. Tsirkin wrote:
>> On Wed, Feb 22, 2023 at 03:21:42PM -0500, Michael S. Tsirkin wrote:
>>> On Wed, Feb 22, 2023 at 08:25:19PM +0200, Anton Kuchin wrote:
>>>> On 22/02/2023 19:12, Michael S. Tsirkin wrote:
>>>>> On Wed, Feb 22, 2023 at 07:05:47PM +0200, Anton Kuchin wrote:
>>>>>> On 22/02/2023 18:51, Michael S. Tsirkin wrote:
>>>>>>> On Wed, Feb 22, 2023 at 06:49:10PM +0200, Anton Kuchin wrote:
>>>>>>>> On 22/02/2023 17:14, Vladimir Sementsov-Ogievskiy wrote:
>>>>>>>>> On 22.02.23 17:25, Anton Kuchin wrote:
>>>>>>>>>>>>> +static int vhost_user_fs_pre_save(void *opaque)
>>>>>>>>>>>>> +{
>>>>>>>>>>>>> +    VHostUserFS *fs = opaque;
>>>>>>>>>>>>> +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    switch (fs->migration_type) {
>>>>>>>>>>>>> +    case VHOST_USER_MIGRATION_TYPE_NONE:
>>>>>>>>>>>>> +        error_report("Migration is blocked by device %s", path);
>>>>>>>>>>>>> +        break;
>>>>>>>>>>>>> +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
>>>>>>>>>>>>> +        return 0;
>>>>>>>>>>>>> +    default:
>>>>>>>>>>>>> +        error_report("Migration type '%s' is not
>>>>>>>>>>>>> supported by device %s",
>>>>>>>>>>>>> + VhostUserMigrationType_str(fs->migration_type), path);
>>>>>>>>>>>>> +        break;
>>>>>>>>>>>>> +    }
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    return -1;
>>>>>>>>>>>>> +}
>>>>>>>>>>>> Should we also add this as .pre_load, to force user select
>>>>>>>>>>>> correct migration_type on target too?
>>>>>>>>>>> In fact, I would claim we only want pre_load.
>>>>>>>>>>> When qemu is started on destination we know where it's migrated
>>>>>>>>>>> from so this flag can be set.
>>>>>>>>>>> When qemu is started on source we generally do not yet know so
>>>>>>>>>>> we don't know whether it's safe to set this flag.
>>>>>>>>> But destination is a "source" for next migration, so there shouldn't be
>>>>>>>>> real difference.
>>>>>>>>> The new property has ".realized_set_allowed = true", so, as I understand
>>>>>>>>> it may be changed at any time, so that's not a problem.
>>>>>>>> Yes, exactly. So destination's property sets not how it will handle this
>>>>>>>> incoming
>>>>>>>> migration but the future outgoing one.
>>>>>>> How do you know where you are going to migrate though?
>>>>>>> I think you don't.
>>>>>>> Setting it on source is better since we know where we
>>>>>>> are migrating from.
>>>>>> Yes, I don't know where I'm going to migrate to. This is why property
>>>>>> affects only how source saves state on outgoing migration.
>>>>> Um. I don't get the logic.
>>>> For this feature to work we need orchestrator to manage the migration. And
>>>> we
>>>> generally assume that it is responsibility of orchestrator to ensure
>>>> matching
>>>> properties on source and destination.
>>>> As orchestrator manages both sides of migration it can set option (and we
>>>> can
>>>> check it) on either source or destination. Now it's not important which side
>>>> we
>>>> select, because now the option is essentially binary allow/deny (but IMHO it
>>>> is much better to refuse source to migrate than find later that state can't
>>>> be
>>>> loaded by destination, in case of file migration this becomes especially
>>>> painful).
>>>>
>>>> But there are plans to add internal migration option (extract FUSE state
>>>> from
>>>> backend and transfer it in QEMU migration stream), and that's where
>>>> setting/checking
>>>> on source becomes important because it will rely on this property to decide
>>>> if
>>>> extra state form backend needs to be put in the migration stream subsection.
>>>
>>> If we do internal migration that will be a different property
>>> which has to match on source *and* destination.
>>>
>>>
>>>> If you are concerned about orchestrator breaking assumption of matching
>>>> properties
>>>> on source and destination this is not really supported AFAIK but I don't
>>>> think we
>>>> need to punish it for this, maybe it has its reasons: I can imagine scenario
>>>> where orchestrator could want to migrate from source with
>>>> 'migration=external'
>>>> to destination with 'migration=none' to ensure that destination can't be
>>>> migrated further.
>>> No. I am concerned about a simple practical matter:
>>> - I decide to restart qemu on the same host - so I need to enable
>>>    migration
>>> - Later I decide to migrate qemu to another host - this should be
>>>    blocked
>>>
>>>
>>> Property on source does not satisfy both at the same time.
>>> Property on destination does.
>>
>> Stefan what's your take on this? Should we move this from
>> save to load hook?
> This property can be changed on the source at runtime via qom-set, so
> you don't need to predict the future. The device can be started from an
> incoming migration with "external" and then set to "stateful" migration
> to migrate to another host later on.
>
> Anton, can you share the virtiofsd patches so we have a full picture of
> how "external" migration works? I'd like to understand the workflow and
> also how it can be extended when "stateful" migration is added.

Unfortunately internal implementation is relying heavily on our 
infrastructure,
and rust virtiofsd still lacks dirty tracking so it is not ready yet. 
But I did
have a PoC for deprecated now C virtiofsd that I didn't bother to prepare
for upstreaming because C version was declared unsupported. It 
essentially adds
reconnect and this was the only thing required from virtiofsd to support
migration via file.

If this helps I'll try to find patches or recreate then and will be 
happy to share.

>
>>>
>>>>>
>>>>>>>>>> This property selects if VM can migrate and if it can what should
>>>>>>>>>> qemu put
>>>>>>>>>> to the migration stream. So we select on source what type of
>>>>>>>>>> migration is
>>>>>>>>>> allowed for this VM, destination can't check anything at load time.
>>>>>>>>> OK, so the new field "migration" regulates only outgoing migration and
>>>>>>>>> do nothing for incoming. On incoming migration the migration stream
>>>>>>>>> itself defines the type of device migration.
>>>>>>>>> Worth mentioning in doc?
>>>>>>>> Good point. I don't think this deserves a respin but if I have to send v4
>>>>>>>> I'll include
>>>>>>>> clarification in it.
Michael S. Tsirkin Feb. 24, 2023, 8:47 a.m. UTC | #22
On Thu, Feb 23, 2023 at 04:24:43PM -0500, Stefan Hajnoczi wrote:
> On Thu, Feb 23, 2023 at 02:36:33AM -0500, Michael S. Tsirkin wrote:
> > On Wed, Feb 22, 2023 at 03:21:42PM -0500, Michael S. Tsirkin wrote:
> > > On Wed, Feb 22, 2023 at 08:25:19PM +0200, Anton Kuchin wrote:
> > > > On 22/02/2023 19:12, Michael S. Tsirkin wrote:
> > > > > On Wed, Feb 22, 2023 at 07:05:47PM +0200, Anton Kuchin wrote:
> > > > > > On 22/02/2023 18:51, Michael S. Tsirkin wrote:
> > > > > > > On Wed, Feb 22, 2023 at 06:49:10PM +0200, Anton Kuchin wrote:
> > > > > > > > On 22/02/2023 17:14, Vladimir Sementsov-Ogievskiy wrote:
> > > > > > > > > On 22.02.23 17:25, Anton Kuchin wrote:
> > > > > > > > > > > > > +static int vhost_user_fs_pre_save(void *opaque)
> > > > > > > > > > > > > +{
> > > > > > > > > > > > > +    VHostUserFS *fs = opaque;
> > > > > > > > > > > > > +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +    switch (fs->migration_type) {
> > > > > > > > > > > > > +    case VHOST_USER_MIGRATION_TYPE_NONE:
> > > > > > > > > > > > > +        error_report("Migration is blocked by device %s", path);
> > > > > > > > > > > > > +        break;
> > > > > > > > > > > > > +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
> > > > > > > > > > > > > +        return 0;
> > > > > > > > > > > > > +    default:
> > > > > > > > > > > > > +        error_report("Migration type '%s' is not
> > > > > > > > > > > > > supported by device %s",
> > > > > > > > > > > > > + VhostUserMigrationType_str(fs->migration_type), path);
> > > > > > > > > > > > > +        break;
> > > > > > > > > > > > > +    }
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +    return -1;
> > > > > > > > > > > > > +}
> > > > > > > > > > > > Should we also add this as .pre_load, to force user select
> > > > > > > > > > > > correct migration_type on target too?
> > > > > > > > > > > In fact, I would claim we only want pre_load.
> > > > > > > > > > > When qemu is started on destination we know where it's migrated
> > > > > > > > > > > from so this flag can be set.
> > > > > > > > > > > When qemu is started on source we generally do not yet know so
> > > > > > > > > > > we don't know whether it's safe to set this flag.
> > > > > > > > > But destination is a "source" for next migration, so there shouldn't be
> > > > > > > > > real difference.
> > > > > > > > > The new property has ".realized_set_allowed = true", so, as I understand
> > > > > > > > > it may be changed at any time, so that's not a problem.
> > > > > > > > Yes, exactly. So destination's property sets not how it will handle this
> > > > > > > > incoming
> > > > > > > > migration but the future outgoing one.
> > > > > > > How do you know where you are going to migrate though?
> > > > > > > I think you don't.
> > > > > > > Setting it on source is better since we know where we
> > > > > > > are migrating from.
> > > > > > Yes, I don't know where I'm going to migrate to. This is why property
> > > > > > affects only how source saves state on outgoing migration.
> > > > > Um. I don't get the logic.
> > > > 
> > > > For this feature to work we need orchestrator to manage the migration. And
> > > > we
> > > > generally assume that it is responsibility of orchestrator to ensure
> > > > matching
> > > > properties on source and destination.
> > > > As orchestrator manages both sides of migration it can set option (and we
> > > > can
> > > > check it) on either source or destination. Now it's not important which side
> > > > we
> > > > select, because now the option is essentially binary allow/deny (but IMHO it
> > > > is much better to refuse source to migrate than find later that state can't
> > > > be
> > > > loaded by destination, in case of file migration this becomes especially
> > > > painful).
> > > > 
> > > > But there are plans to add internal migration option (extract FUSE state
> > > > from
> > > > backend and transfer it in QEMU migration stream), and that's where
> > > > setting/checking
> > > > on source becomes important because it will rely on this property to decide
> > > > if
> > > > extra state form backend needs to be put in the migration stream subsection.
> > > 
> > > 
> > > If we do internal migration that will be a different property
> > > which has to match on source *and* destination.
> > > 
> > > 
> > > > If you are concerned about orchestrator breaking assumption of matching
> > > > properties
> > > > on source and destination this is not really supported AFAIK but I don't
> > > > think we
> > > > need to punish it for this, maybe it has its reasons: I can imagine scenario
> > > > where orchestrator could want to migrate from source with
> > > > 'migration=external'
> > > > to destination with 'migration=none' to ensure that destination can't be
> > > > migrated further.
> > > 
> > > No. I am concerned about a simple practical matter:
> > > - I decide to restart qemu on the same host - so I need to enable
> > >   migration
> > > - Later I decide to migrate qemu to another host - this should be
> > >   blocked
> > > 
> > > 
> > > Property on source does not satisfy both at the same time.
> > > Property on destination does.
> > 
> > 
> > Stefan what's your take on this? Should we move this from
> > save to load hook?
> 
> This property can be changed on the source at runtime via qom-set, so
> you don't need to predict the future. The device can be started from an
> incoming migration with "external" and then set to "stateful" migration
> to migrate to another host later on.

And then you are supposed to change it back if migration fails?
External tool failures have to be handled ...
How likely is all this fragile dance not to have bugs?

And all of this effort for what? Just to make the case of a buggy
management tool fail a bit faster - is it really worth it?

Compare to setting it on destination where you can set it
on command line or through qom and forget about it.
No?

> Anton, can you share the virtiofsd patches so we have a full picture of
> how "external" migration works? I'd like to understand the workflow and
> also how it can be extended when "stateful" migration is added.
> 
> > 
> > > 
> > > 
> > > > > 
> > > > > 
> > > > > > > > > > This property selects if VM can migrate and if it can what should
> > > > > > > > > > qemu put
> > > > > > > > > > to the migration stream. So we select on source what type of
> > > > > > > > > > migration is
> > > > > > > > > > allowed for this VM, destination can't check anything at load time.
> > > > > > > > > OK, so the new field "migration" regulates only outgoing migration and
> > > > > > > > > do nothing for incoming. On incoming migration the migration stream
> > > > > > > > > itself defines the type of device migration.
> > > > > > > > > Worth mentioning in doc?
> > > > > > > > Good point. I don't think this deserves a respin but if I have to send v4
> > > > > > > > I'll include
> > > > > > > > clarification in it.
> >
Anton Kuchin Feb. 27, 2023, 10:19 a.m. UTC | #23
On 24/02/2023 06:14, Anton Kuchin wrote:
> On 23/02/2023 23:24, Stefan Hajnoczi wrote:
>> On Thu, Feb 23, 2023 at 02:36:33AM -0500, Michael S. Tsirkin wrote:
>>> On Wed, Feb 22, 2023 at 03:21:42PM -0500, Michael S. Tsirkin wrote:
>>>> On Wed, Feb 22, 2023 at 08:25:19PM +0200, Anton Kuchin wrote:
>>>>> On 22/02/2023 19:12, Michael S. Tsirkin wrote:
>>>>>> On Wed, Feb 22, 2023 at 07:05:47PM +0200, Anton Kuchin wrote:
>>>>>>> On 22/02/2023 18:51, Michael S. Tsirkin wrote:
>>>>>>>> On Wed, Feb 22, 2023 at 06:49:10PM +0200, Anton Kuchin wrote:
>>>>>>>>> On 22/02/2023 17:14, Vladimir Sementsov-Ogievskiy wrote:
>>>>>>>>>> On 22.02.23 17:25, Anton Kuchin wrote:
>>>>>>>>>>>>>> +static int vhost_user_fs_pre_save(void *opaque)
>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>> +    VHostUserFS *fs = opaque;
>>>>>>>>>>>>>> +    g_autofree char *path = 
>>>>>>>>>>>>>> object_get_canonical_path(OBJECT(fs));
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +    switch (fs->migration_type) {
>>>>>>>>>>>>>> +    case VHOST_USER_MIGRATION_TYPE_NONE:
>>>>>>>>>>>>>> +        error_report("Migration is blocked by device 
>>>>>>>>>>>>>> %s", path);
>>>>>>>>>>>>>> +        break;
>>>>>>>>>>>>>> +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
>>>>>>>>>>>>>> +        return 0;
>>>>>>>>>>>>>> +    default:
>>>>>>>>>>>>>> +        error_report("Migration type '%s' is not
>>>>>>>>>>>>>> supported by device %s",
>>>>>>>>>>>>>> + VhostUserMigrationType_str(fs->migration_type), path);
>>>>>>>>>>>>>> +        break;
>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +    return -1;
>>>>>>>>>>>>>> +}
>>>>>>>>>>>>> Should we also add this as .pre_load, to force user select
>>>>>>>>>>>>> correct migration_type on target too?
>>>>>>>>>>>> In fact, I would claim we only want pre_load.
>>>>>>>>>>>> When qemu is started on destination we know where it's 
>>>>>>>>>>>> migrated
>>>>>>>>>>>> from so this flag can be set.
>>>>>>>>>>>> When qemu is started on source we generally do not yet know so
>>>>>>>>>>>> we don't know whether it's safe to set this flag.
>>>>>>>>>> But destination is a "source" for next migration, so there 
>>>>>>>>>> shouldn't be
>>>>>>>>>> real difference.
>>>>>>>>>> The new property has ".realized_set_allowed = true", so, as I 
>>>>>>>>>> understand
>>>>>>>>>> it may be changed at any time, so that's not a problem.
>>>>>>>>> Yes, exactly. So destination's property sets not how it will 
>>>>>>>>> handle this
>>>>>>>>> incoming
>>>>>>>>> migration but the future outgoing one.
>>>>>>>> How do you know where you are going to migrate though?
>>>>>>>> I think you don't.
>>>>>>>> Setting it on source is better since we know where we
>>>>>>>> are migrating from.
>>>>>>> Yes, I don't know where I'm going to migrate to. This is why 
>>>>>>> property
>>>>>>> affects only how source saves state on outgoing migration.
>>>>>> Um. I don't get the logic.
>>>>> For this feature to work we need orchestrator to manage the 
>>>>> migration. And
>>>>> we
>>>>> generally assume that it is responsibility of orchestrator to ensure
>>>>> matching
>>>>> properties on source and destination.
>>>>> As orchestrator manages both sides of migration it can set option 
>>>>> (and we
>>>>> can
>>>>> check it) on either source or destination. Now it's not important 
>>>>> which side
>>>>> we
>>>>> select, because now the option is essentially binary allow/deny 
>>>>> (but IMHO it
>>>>> is much better to refuse source to migrate than find later that 
>>>>> state can't
>>>>> be
>>>>> loaded by destination, in case of file migration this becomes 
>>>>> especially
>>>>> painful).
>>>>>
>>>>> But there are plans to add internal migration option (extract FUSE 
>>>>> state
>>>>> from
>>>>> backend and transfer it in QEMU migration stream), and that's where
>>>>> setting/checking
>>>>> on source becomes important because it will rely on this property 
>>>>> to decide
>>>>> if
>>>>> extra state form backend needs to be put in the migration stream 
>>>>> subsection.
>>>>
>>>> If we do internal migration that will be a different property
>>>> which has to match on source *and* destination.
>>>>
>>>>
>>>>> If you are concerned about orchestrator breaking assumption of 
>>>>> matching
>>>>> properties
>>>>> on source and destination this is not really supported AFAIK but I 
>>>>> don't
>>>>> think we
>>>>> need to punish it for this, maybe it has its reasons: I can 
>>>>> imagine scenario
>>>>> where orchestrator could want to migrate from source with
>>>>> 'migration=external'
>>>>> to destination with 'migration=none' to ensure that destination 
>>>>> can't be
>>>>> migrated further.
>>>> No. I am concerned about a simple practical matter:
>>>> - I decide to restart qemu on the same host - so I need to enable
>>>>    migration
>>>> - Later I decide to migrate qemu to another host - this should be
>>>>    blocked
>>>>
>>>>
>>>> Property on source does not satisfy both at the same time.
>>>> Property on destination does.
>>>
>>> Stefan what's your take on this? Should we move this from
>>> save to load hook?
>> This property can be changed on the source at runtime via qom-set, so
>> you don't need to predict the future. The device can be started from an
>> incoming migration with "external" and then set to "stateful" migration
>> to migrate to another host later on.
>>
>> Anton, can you share the virtiofsd patches so we have a full picture of
>> how "external" migration works? I'd like to understand the workflow and
>> also how it can be extended when "stateful" migration is added.
>
> Unfortunately internal implementation is relying heavily on our 
> infrastructure,
> and rust virtiofsd still lacks dirty tracking so it is not ready yet. 
> But I did
> have a PoC for deprecated now C virtiofsd that I didn't bother to prepare
> for upstreaming because C version was declared unsupported. It 
> essentially adds
> reconnect and this was the only thing required from virtiofsd to support
> migration via file.
>
> If this helps I'll try to find patches or recreate then and will be 
> happy to share.


Found my stash with PoC. It has some TODOs and needs more work to become 
submission
quality, but I hope this can be helpful to show the idea.

I checked it with file migration on the same host to update qemu binary 
like this:
  1. Run tools/virtiofsd/virtiofsd with "--reconnect" flag
  2. Run src qemu VM with vhost-user-fs device
  3. Mount fs in guest and run fio with verification on test file in 
shared directory
  4. Command via QMP to migrate VM to file
  5. Run dst qemu VM with vhost-user-fs device and "-incoming defer" flag
  6. Command via QMP to load migration stream from file and continue VM
  7. Check that fio keeps running in guest with no errors

[PATCH] tools/virtiofsd: reconnect PoC

Keep daemon listening after disconnect so session can be continued
after VM is restored.

Signed-off-by: Anton Kuchin <antonkuchin@yandex-team.ru>
---
  tools/virtiofsd/fuse_lowlevel.h       |  1 +
  tools/virtiofsd/fuse_virtio.c         | 59 +++++++++++++++++----------
  tools/virtiofsd/helper.c              |  1 +
  tools/virtiofsd/passthrough_ll.c      | 49 ++++++++++++++++------
  tools/virtiofsd/passthrough_seccomp.c | 12 +++++-
  tools/virtiofsd/passthrough_seccomp.h |  2 +-
  6 files changed, 87 insertions(+), 37 deletions(-)

diff --git a/tools/virtiofsd/fuse_lowlevel.h 
b/tools/virtiofsd/fuse_lowlevel.h
index b889dae4de..d5f3bf05ba 100644
--- a/tools/virtiofsd/fuse_lowlevel.h
+++ b/tools/virtiofsd/fuse_lowlevel.h
@@ -1795,6 +1795,7 @@ struct fuse_cmdline_opts {
      int log_level;
      unsigned int max_idle_threads;
      unsigned long rlimit_nofile;
+    int reconnect;
  };

  /**
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index 9368e292e4..64782cd78d 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -837,6 +837,7 @@ static const VuDevIface fv_iface = {
  int virtio_loop(struct fuse_session *se)
  {
      fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__);
+    int ret = 0;

      while (!fuse_session_exited(se)) {
          struct pollfd pf[1];
@@ -858,7 +859,13 @@ int virtio_loop(struct fuse_session *se)
              break;
          }
          assert(poll_res == 1);
-        if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
+        if (pf[0].revents & POLLHUP) {
+            fuse_log(FUSE_LOG_ERR, "%s: Client disconnected: %x\n", 
__func__,
+                     pf[0].revents);
+            ret = -1;
+            break;
+        }
+        if (pf[0].revents & (POLLERR | POLLNVAL)) {
              fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x\n", 
__func__,
                       pf[0].revents);
              break;
@@ -885,7 +892,7 @@ int virtio_loop(struct fuse_session *se)
      stop_all_queues(se->virtio_dev);
      fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__);

-    return 0;
+    return ret;
  }

  static void strreplace(char *s, char old, char new)
@@ -1015,25 +1022,31 @@ int virtio_session_mount(struct fuse_session *se)
  {
      int ret;

-    /*
-     * Test that unshare(CLONE_FS) works. fv_queue_worker() will need 
it. It's
-     * an unprivileged system call but some Docker/Moby versions are 
known to
-     * reject it via seccomp when CAP_SYS_ADMIN is not given.
-     *
-     * Note that the program is single-threaded here so this syscall has no
-     * visible effect and is safe to make.
-     */
-    ret = unshare(CLONE_FS);
-    if (ret == -1 && errno == EPERM) {
-        fuse_log(FUSE_LOG_ERR, "unshare(CLONE_FS) failed with EPERM. If "
-                "running in a container please check that the container "
-                "runtime seccomp policy allows unshare.\n");
-        return -1;
-    }

-    ret = fv_create_listen_socket(se);
-    if (ret < 0) {
-        return ret;
+    if (se->vu_listen_fd == -1) {
+        fuse_log(FUSE_LOG_INFO, "listenfd is closed. set it up\n");
+        /*
+         * Test that unshare(CLONE_FS) works. fv_queue_worker() will 
need it. It's
+         * an unprivileged system call but some Docker/Moby versions 
are known to
+         * reject it via seccomp when CAP_SYS_ADMIN is not given.
+         *
+         * Note that the program is single-threaded here so this 
syscall has no
+         * visible effect and is safe to make.
+         */
+        ret = unshare(CLONE_FS);
+        if (ret == -1 && errno == EPERM) {
+            fuse_log(FUSE_LOG_ERR, "unshare(CLONE_FS) failed with 
EPERM. If "
+                    "running in a container please check that the 
container "
+                    "runtime seccomp policy allows unshare.\n");
+            return -1;
+        }
+
+        ret = fv_create_listen_socket(se);
+        if (ret < 0) {
+            return ret;
+        }
+    } else {
+        fuse_log(FUSE_LOG_INFO, "listenfd is open. reconnecting\n");
      }

      se->fd = -1;
@@ -1046,8 +1059,8 @@ int virtio_session_mount(struct fuse_session *se)
          close(se->vu_listen_fd);
          return -1;
      }
-    close(se->vu_listen_fd);
-    se->vu_listen_fd = -1;
+
+    /* TODO: close vu_listen_fd here if not reconnect */
      fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n",
               __func__);

@@ -1068,6 +1081,8 @@ int virtio_session_mount(struct fuse_session *se)

  void virtio_session_close(struct fuse_session *se)
  {
+    /* TODO: vu_listen_fd can be closed in session_mount */
+    close(se->vu_listen_fd);
      close(se->vu_socketfd);

      if (!se->virtio_dev) {
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index f5f66f292c..236a58f109 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -53,6 +53,7 @@ static const struct fuse_opt fuse_helper_opts[] = {
      FUSE_HELPER_OPT_VALUE("log_level=info", log_level, FUSE_LOG_INFO),
      FUSE_HELPER_OPT_VALUE("log_level=warn", log_level, FUSE_LOG_WARNING),
      FUSE_HELPER_OPT_VALUE("log_level=err", log_level, FUSE_LOG_ERR),
+    FUSE_HELPER_OPT_VALUE("--reconnect", reconnect, 0),
      FUSE_OPT_END
  };

diff --git a/tools/virtiofsd/passthrough_ll.c 
b/tools/virtiofsd/passthrough_ll.c
index 40ea2ed27f..487d320c70 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -3819,7 +3819,7 @@ static void setup_wait_parent_capabilities(void)
  /*
   * Move to a new mount, net, and pid namespaces to isolate this process.
   */
-static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
+static void setup_namespaces(struct lo_data *lo, struct fuse_session 
*se, bool enable_reconnect)
  {
      pid_t child;

@@ -3829,12 +3829,21 @@ static void setup_namespaces(struct lo_data *lo, 
struct fuse_session *se)
       * is also needed so that we can remount /proc for the new pid
       * namespace.
       *
-     * Our UNIX domain sockets have been created.  Now we can move to
-     * an empty network namespace to prevent TCP/IP and other network
-     * activity in case this process is compromised.
+     * Our UNIX domain sockets have been created. If we don't need 
reconnect
+     * now we can move to an empty network namespace to prevent TCP/IP and
+     * other network activity in case this process is compromised.
+     *
+     * TODO: Need to double-check if this is necessary. Looks like unix 
sockets
+     * can be shared across network namespaces. Maybe it is better if 
socket can
+     * be created in new namespace (but before we forbid listen with 
seccomp).
       */
-    if (unshare(CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET) != 0) {
-        fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): 
%m\n");
+    int unshare_flags = CLONE_NEWPID | CLONE_NEWNS;
+    if (!enable_reconnect) {
+        unshare_flags |= CLONE_NEWNET;
+    }
+    if (unshare(unshare_flags) != 0) {
+        fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS%s): 
%m\n",
+                 enable_reconnect ? " | CLONE_NEWNET" : "");
          exit(1);
      }

@@ -4146,16 +4155,16 @@ static void setup_chroot(struct lo_data *lo)
   * source directory.  This reduces the impact of arbitrary code 
execution bugs.
   */
  static void setup_sandbox(struct lo_data *lo, struct fuse_session *se,
-                          bool enable_syslog)
+                          bool enable_syslog, bool enable_reconnect)
  {
      if (lo->sandbox == SANDBOX_NAMESPACE) {
-        setup_namespaces(lo, se);
+        setup_namespaces(lo, se, enable_reconnect);
          setup_mounts(lo->source);
      } else {
          setup_chroot(lo);
      }

-    setup_seccomp(enable_syslog);
+    setup_seccomp(enable_syslog, enable_reconnect);
      setup_capabilities(g_strdup(lo->modcaps));
  }

@@ -4500,13 +4509,27 @@ int main(int argc, char *argv[])
      /* Must be before sandbox since it wants /proc */
      setup_capng();

-    setup_sandbox(&lo, se, opts.syslog);
+    setup_sandbox(&lo, se, opts.syslog, opts.reconnect);

      setup_root(&lo, &lo.root);
-    /* Block until ctrl+c or fusermount -u */
-    ret = virtio_loop(se);
+    do {
+        /* TODO: this loop should be like {mount-loop-unmount} but setup of
+         * listen descriptor happens in mount and will fail after 
sandboxing.
+         * Need to extract setup to virtio_session_new where we can create
+         * socket before sandboxing and in session_mount only accept 
client.*/
+        /* Block until ctrl+c, fusermount -u or client disconnect */
+        ret = virtio_loop(se);
+        fuse_log(FUSE_LOG_ERR, "ret is %d, %s\n", ret,
+                 ret ? "reconnecting" : "terminating");
+        fuse_session_unmount(se);
+        if (ret) {
+            if (fuse_session_mount(se) != 0) {
+                fuse_log(FUSE_LOG_ERR, "failed to remount\n");
+                goto err_out3;
+            }
+        }
+    } while (opts.reconnect && !ret);

-    fuse_session_unmount(se);
      cleanup_capng();
  err_out3:
      fuse_remove_signal_handlers(se);
diff --git a/tools/virtiofsd/passthrough_seccomp.c 
b/tools/virtiofsd/passthrough_seccomp.c
index 0033dab493..8dd773a062 100644
--- a/tools/virtiofsd/passthrough_seccomp.c
+++ b/tools/virtiofsd/passthrough_seccomp.c
@@ -129,6 +129,11 @@ static const int syscall_allowlist_syslog[] = {
      SCMP_SYS(sendto),
  };

+/* Allow accept to handle client reconnect */
+static const int syscall_allowlist_reconnect[] = {
+    SCMP_SYS(accept),
+};
+
  static void add_allowlist(scmp_filter_ctx ctx, const int syscalls[], 
size_t len)
  {
      size_t i;
@@ -142,7 +147,7 @@ static void add_allowlist(scmp_filter_ctx ctx, const 
int syscalls[], size_t len)
      }
  }

-void setup_seccomp(bool enable_syslog)
+void setup_seccomp(bool enable_syslog, bool enable_reconnect)
  {
      scmp_filter_ctx ctx;

@@ -166,6 +171,11 @@ void setup_seccomp(bool enable_syslog)
                        G_N_ELEMENTS(syscall_allowlist_syslog));
      }

+    if (enable_reconnect) {
+        add_allowlist(ctx, syscall_allowlist_reconnect,
+ G_N_ELEMENTS(syscall_allowlist_reconnect));
+    }
+
      /* libvhost-user calls this for post-copy migration, we don't need 
it */
      if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS),
                           SCMP_SYS(userfaultfd), 0) != 0) {
diff --git a/tools/virtiofsd/passthrough_seccomp.h 
b/tools/virtiofsd/passthrough_seccomp.h
index 12674fc050..bec47b114c 100644
--- a/tools/virtiofsd/passthrough_seccomp.h
+++ b/tools/virtiofsd/passthrough_seccomp.h
@@ -9,6 +9,6 @@
  #ifndef VIRTIOFSD_PASSTHROUGH_SECCOMP_H
  #define VIRTIOFSD_PASSTHROUGH_SECCOMP_H

-void setup_seccomp(bool enable_syslog);
+void setup_seccomp(bool enable_syslog, bool enable_reconnect);

  #endif /* VIRTIOFSD_PASSTHROUGH_SECCOMP_H */
Anton Kuchin Feb. 28, 2023, 2:30 p.m. UTC | #24
On 24/02/2023 10:47, Michael S. Tsirkin wrote:
> On Thu, Feb 23, 2023 at 04:24:43PM -0500, Stefan Hajnoczi wrote:
>> On Thu, Feb 23, 2023 at 02:36:33AM -0500, Michael S. Tsirkin wrote:
>>> On Wed, Feb 22, 2023 at 03:21:42PM -0500, Michael S. Tsirkin wrote:
>>>> On Wed, Feb 22, 2023 at 08:25:19PM +0200, Anton Kuchin wrote:
>>>>> On 22/02/2023 19:12, Michael S. Tsirkin wrote:
>>>>>> On Wed, Feb 22, 2023 at 07:05:47PM +0200, Anton Kuchin wrote:
>>>>>>> On 22/02/2023 18:51, Michael S. Tsirkin wrote:
>>>>>>>> On Wed, Feb 22, 2023 at 06:49:10PM +0200, Anton Kuchin wrote:
>>>>>>>>> On 22/02/2023 17:14, Vladimir Sementsov-Ogievskiy wrote:
>>>>>>>>>> On 22.02.23 17:25, Anton Kuchin wrote:
>>>>>>>>>>>>>> +static int vhost_user_fs_pre_save(void *opaque)
>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>> +    VHostUserFS *fs = opaque;
>>>>>>>>>>>>>> +    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +    switch (fs->migration_type) {
>>>>>>>>>>>>>> +    case VHOST_USER_MIGRATION_TYPE_NONE:
>>>>>>>>>>>>>> +        error_report("Migration is blocked by device %s", path);
>>>>>>>>>>>>>> +        break;
>>>>>>>>>>>>>> +    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
>>>>>>>>>>>>>> +        return 0;
>>>>>>>>>>>>>> +    default:
>>>>>>>>>>>>>> +        error_report("Migration type '%s' is not
>>>>>>>>>>>>>> supported by device %s",
>>>>>>>>>>>>>> + VhostUserMigrationType_str(fs->migration_type), path);
>>>>>>>>>>>>>> +        break;
>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +    return -1;
>>>>>>>>>>>>>> +}
>>>>>>>>>>>>> Should we also add this as .pre_load, to force user select
>>>>>>>>>>>>> correct migration_type on target too?
>>>>>>>>>>>> In fact, I would claim we only want pre_load.
>>>>>>>>>>>> When qemu is started on destination we know where it's migrated
>>>>>>>>>>>> from so this flag can be set.
>>>>>>>>>>>> When qemu is started on source we generally do not yet know so
>>>>>>>>>>>> we don't know whether it's safe to set this flag.
>>>>>>>>>> But destination is a "source" for next migration, so there shouldn't be
>>>>>>>>>> real difference.
>>>>>>>>>> The new property has ".realized_set_allowed = true", so, as I understand
>>>>>>>>>> it may be changed at any time, so that's not a problem.
>>>>>>>>> Yes, exactly. So destination's property sets not how it will handle this
>>>>>>>>> incoming
>>>>>>>>> migration but the future outgoing one.
>>>>>>>> How do you know where you are going to migrate though?
>>>>>>>> I think you don't.
>>>>>>>> Setting it on source is better since we know where we
>>>>>>>> are migrating from.
>>>>>>> Yes, I don't know where I'm going to migrate to. This is why property
>>>>>>> affects only how source saves state on outgoing migration.
>>>>>> Um. I don't get the logic.
>>>>> For this feature to work we need orchestrator to manage the migration. And
>>>>> we
>>>>> generally assume that it is responsibility of orchestrator to ensure
>>>>> matching
>>>>> properties on source and destination.
>>>>> As orchestrator manages both sides of migration it can set option (and we
>>>>> can
>>>>> check it) on either source or destination. Now it's not important which side
>>>>> we
>>>>> select, because now the option is essentially binary allow/deny (but IMHO it
>>>>> is much better to refuse source to migrate than find later that state can't
>>>>> be
>>>>> loaded by destination, in case of file migration this becomes especially
>>>>> painful).
>>>>>
>>>>> But there are plans to add internal migration option (extract FUSE state
>>>>> from
>>>>> backend and transfer it in QEMU migration stream), and that's where
>>>>> setting/checking
>>>>> on source becomes important because it will rely on this property to decide
>>>>> if
>>>>> extra state form backend needs to be put in the migration stream subsection.
>>>>
>>>> If we do internal migration that will be a different property
>>>> which has to match on source *and* destination.
>>>>
>>>>
>>>>> If you are concerned about orchestrator breaking assumption of matching
>>>>> properties
>>>>> on source and destination this is not really supported AFAIK but I don't
>>>>> think we
>>>>> need to punish it for this, maybe it has its reasons: I can imagine scenario
>>>>> where orchestrator could want to migrate from source with
>>>>> 'migration=external'
>>>>> to destination with 'migration=none' to ensure that destination can't be
>>>>> migrated further.
>>>> No. I am concerned about a simple practical matter:
>>>> - I decide to restart qemu on the same host - so I need to enable
>>>>    migration
>>>> - Later I decide to migrate qemu to another host - this should be
>>>>    blocked
>>>>
>>>>
>>>> Property on source does not satisfy both at the same time.
>>>> Property on destination does.
>>>
>>> Stefan what's your take on this? Should we move this from
>>> save to load hook?
>> This property can be changed on the source at runtime via qom-set, so
>> you don't need to predict the future. The device can be started from an
>> incoming migration with "external" and then set to "stateful" migration
>> to migrate to another host later on.
> And then you are supposed to change it back if migration fails?
> External tool failures have to be handled ...
> How likely is all this fragile dance not to have bugs?
>
> And all of this effort for what? Just to make the case of a buggy
> management tool fail a bit faster - is it really worth it?
>
> Compare to setting it on destination where you can set it
> on command line or through qom and forget about it.
> No?

I really don't understand why and what do you want to check on
destination. This option is supposed to control outgoing migration
only, just to be compatible and extensible for future migration
implementations.
It doesn't affectin any way the ability to load migration stream on
destination.
Destination can decide if the stream can be loaded based only on the
information present in the migration stream and not on the device
properties.

So qemu with this patch can load any stream generated by qemu with
this patch because there is only one supported mode "external" and
I don't see why we shouldn't allow this.

And when "internal" mode is added newer qemu will support loading
both full and partial types of streams depending on what data is
present in the stream (and this is controlled by orchestrator that
is setting option on source side).
Future "internal" option is not planned to deprecate "external"
but they both will be valid and coexist simultaneously allowing
management to pick one or the other.

So having faster fail on source if something is misconfigured is
just a good side-effect we get for free, but not the real purpose.

There can be some benefit in creating all devices with "migration=none"
by default and enabling required type of migration just before it
is about to happen if orchestrator wants to be protected from
wrong migrations. This may be a little too much but if orchestrator
wants it can force such type of double-check and avoid guessing future
migration type when there will be more than one type supported.

>
>> Anton, can you share the virtiofsd patches so we have a full picture of
>> how "external" migration works? I'd like to understand the workflow and
>> also how it can be extended when "stateful" migration is added.
>>
>>>>
>>>>>>
>>>>>>>>>>> This property selects if VM can migrate and if it can what should
>>>>>>>>>>> qemu put
>>>>>>>>>>> to the migration stream. So we select on source what type of
>>>>>>>>>>> migration is
>>>>>>>>>>> allowed for this VM, destination can't check anything at load time.
>>>>>>>>>> OK, so the new field "migration" regulates only outgoing migration and
>>>>>>>>>> do nothing for incoming. On incoming migration the migration stream
>>>>>>>>>> itself defines the type of device migration.
>>>>>>>>>> Worth mentioning in doc?
>>>>>>>>> Good point. I don't think this deserves a respin but if I have to send v4
>>>>>>>>> I'll include
>>>>>>>>> clarification in it.
>
Michael S. Tsirkin Feb. 28, 2023, 2:57 p.m. UTC | #25
On Tue, Feb 28, 2023 at 04:30:36PM +0200, Anton Kuchin wrote:
> I really don't understand why and what do you want to check on
> destination.

Yes I understand your patch controls source. Let me try to rephrase
why I think it's better on destination.
Here's my understanding
- With vhost-user-fs state lives inside an external daemon.
A- If after load you connect to the same daemon you can get migration mostly
  for free.
B- If you connect to a different daemon then that daemon will need
  to pass information from original one.

Is this a fair summary?

Current solution is to set flag on the source meaning "I have an
orchestration tool that will make sure that either A or B is correct".

However both A and B can only be known when destination is known.
Especially as long as what we are really trying to do is just allow qemu
restarts, Checking the flag on load will thus achive it in a cleaner
way, in that orchestration tool can reasonably keep the flag
clear normally and only set it if restarting qemu locally.


By comparison, with your approach orchestration tool will have
to either always set the flag (risky since then we lose the
extra check that we coded) or keep it clear and set before migration
(complex).

I hope I explained what and why I want to check.

I am far from a vhost-user-fs expert so maybe I am wrong but
I wanted to make sure I got the point across even if other
disagree.
Anton Kuchin Feb. 28, 2023, 5:59 p.m. UTC | #26
On 28/02/2023 16:57, Michael S. Tsirkin wrote:
> On Tue, Feb 28, 2023 at 04:30:36PM +0200, Anton Kuchin wrote:
>> I really don't understand why and what do you want to check on
>> destination.
> Yes I understand your patch controls source. Let me try to rephrase
> why I think it's better on destination.
> Here's my understanding
> - With vhost-user-fs state lives inside an external daemon.
> A- If after load you connect to the same daemon you can get migration mostly
>    for free.
> B- If you connect to a different daemon then that daemon will need
>    to pass information from original one.
>
> Is this a fair summary?
>
> Current solution is to set flag on the source meaning "I have an
> orchestration tool that will make sure that either A or B is correct".
>
> However both A and B can only be known when destination is known.
> Especially as long as what we are really trying to do is just allow qemu
> restarts, Checking the flag on load will thus achive it in a cleaner
> way, in that orchestration tool can reasonably keep the flag
> clear normally and only set it if restarting qemu locally.
>
>
> By comparison, with your approach orchestration tool will have
> to either always set the flag (risky since then we lose the
> extra check that we coded) or keep it clear and set before migration
> (complex).
>
> I hope I explained what and why I want to check.
>
> I am far from a vhost-user-fs expert so maybe I am wrong but
> I wanted to make sure I got the point across even if other
> disagree.
>

Thank you for the explanation. Now I understand your concerns.

You are right about this mechanism being a bit risky if orchestrator is
not using it properly or clunky if it is used in a safest possible way.
That's why first attempt of this feature was with migration capability
to let orchestrator choose behavior right at the moment of migration.
But it has its own problems.

We can't move this check only to destination because one of main goals
was to prevent orchestrators that are unaware of vhost-user-fs specifics
from accidentally migrating such VMs. We can't rely here entirely on
destination to block this because if VM is migrated to file and then
can't be loaded by destination there is no way to fallback and resume
the source so we need to have some kind of blocker on source by default.

Said that checking on destination would need another flag and the safe
way of using this feature would require managing two flags instead of one
making it even more fragile. So I'd prefer not to make it more complex.

In my opinion the best way to use this property by orchestrator is to
leave default unmigratable behavior at start and just before migration when
destination is known enumerate all vhost-user-fs devices and set properties
according to their backends capability with QMP like you mentioned. This
gives us single point of making the decision for each device and avoids
guessing future at VM start.
But allowing setup via command-line is valid too because some backends may
always be capable of external migration independent of hosts and don't need
the manipulations with QMP before migration at all.
Stefan Hajnoczi Feb. 28, 2023, 7:18 p.m. UTC | #27
On Tue, 28 Feb 2023 at 09:58, Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Tue, Feb 28, 2023 at 04:30:36PM +0200, Anton Kuchin wrote:
> > I really don't understand why and what do you want to check on
> > destination.
>
> Yes I understand your patch controls source. Let me try to rephrase
> why I think it's better on destination.
> Here's my understanding
> - With vhost-user-fs state lives inside an external daemon.
> A- If after load you connect to the same daemon you can get migration mostly
>   for free.
> B- If you connect to a different daemon then that daemon will need
>   to pass information from original one.
>
> Is this a fair summary?
>
> Current solution is to set flag on the source meaning "I have an
> orchestration tool that will make sure that either A or B is correct".
>
> However both A and B can only be known when destination is known.
> Especially as long as what we are really trying to do is just allow qemu
> restarts, Checking the flag on load will thus achive it in a cleaner
> way, in that orchestration tool can reasonably keep the flag
> clear normally and only set it if restarting qemu locally.
>
>
> By comparison, with your approach orchestration tool will have
> to either always set the flag (risky since then we lose the
> extra check that we coded) or keep it clear and set before migration
> (complex).
>
> I hope I explained what and why I want to check.
>
> I am far from a vhost-user-fs expert so maybe I am wrong but
> I wanted to make sure I got the point across even if other
> disagree.

How do the source QEMU and virtiofsd know which migration mode to use?
The virtiofsd implementation might support both external and internal
migration. Source QEMU needs to know whether to ask source virtiofsd
for device state so it can be included in the migration stream or not.

Stefan
Michael S. Tsirkin Feb. 28, 2023, 9:24 p.m. UTC | #28
On Tue, Feb 28, 2023 at 07:59:54PM +0200, Anton Kuchin wrote:
> On 28/02/2023 16:57, Michael S. Tsirkin wrote:
> > On Tue, Feb 28, 2023 at 04:30:36PM +0200, Anton Kuchin wrote:
> > > I really don't understand why and what do you want to check on
> > > destination.
> > Yes I understand your patch controls source. Let me try to rephrase
> > why I think it's better on destination.
> > Here's my understanding
> > - With vhost-user-fs state lives inside an external daemon.
> > A- If after load you connect to the same daemon you can get migration mostly
> >    for free.
> > B- If you connect to a different daemon then that daemon will need
> >    to pass information from original one.
> > 
> > Is this a fair summary?
> > 
> > Current solution is to set flag on the source meaning "I have an
> > orchestration tool that will make sure that either A or B is correct".
> > 
> > However both A and B can only be known when destination is known.
> > Especially as long as what we are really trying to do is just allow qemu
> > restarts, Checking the flag on load will thus achive it in a cleaner
> > way, in that orchestration tool can reasonably keep the flag
> > clear normally and only set it if restarting qemu locally.
> > 
> > 
> > By comparison, with your approach orchestration tool will have
> > to either always set the flag (risky since then we lose the
> > extra check that we coded) or keep it clear and set before migration
> > (complex).
> > 
> > I hope I explained what and why I want to check.
> > 
> > I am far from a vhost-user-fs expert so maybe I am wrong but
> > I wanted to make sure I got the point across even if other
> > disagree.
> > 
> 
> Thank you for the explanation. Now I understand your concerns.
> 
> You are right about this mechanism being a bit risky if orchestrator is
> not using it properly or clunky if it is used in a safest possible way.
> That's why first attempt of this feature was with migration capability
> to let orchestrator choose behavior right at the moment of migration.
> But it has its own problems.
> 
> We can't move this check only to destination because one of main goals
> was to prevent orchestrators that are unaware of vhost-user-fs specifics
> from accidentally migrating such VMs. We can't rely here entirely on
> destination to block this because if VM is migrated to file and then
> can't be loaded by destination there is no way to fallback and resume
> the source so we need to have some kind of blocker on source by default.

Interesting.  Why is there no way? Just load it back on source? Isn't
this how any other load failure is managed? Because for sure you
need to manage these, they will happen.

> Said that checking on destination would need another flag and the safe
> way of using this feature would require managing two flags instead of one
> making it even more fragile. So I'd prefer not to make it more complex.
>
> In my opinion the best way to use this property by orchestrator is to
> leave default unmigratable behavior at start and just before migration when
> destination is known enumerate all vhost-user-fs devices and set properties
> according to their backends capability with QMP like you mentioned. This
> gives us single point of making the decision for each device and avoids
> guessing future at VM start.

this means that you need to remember what the values were and then
any failure on destination requires you to go back and set them
to original values. With possibility of crashes on the orchestrator
you also need to recall the temporary values in some file ...
This is huge complexity much worse than two flags.

Assuming we need two let's see whether just reload on source is good
enough.

> But allowing setup via command-line is valid too because some backends may
> always be capable of external migration independent of hosts and don't need
> the manipulations with QMP before migration at all.

I am much more worried that the realistic schenario is hard to manage
safely than about theoretical state migrating backends that don't exist.
Michael S. Tsirkin Feb. 28, 2023, 9:29 p.m. UTC | #29
On Tue, Feb 28, 2023 at 02:18:25PM -0500, Stefan Hajnoczi wrote:
> On Tue, 28 Feb 2023 at 09:58, Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Tue, Feb 28, 2023 at 04:30:36PM +0200, Anton Kuchin wrote:
> > > I really don't understand why and what do you want to check on
> > > destination.
> >
> > Yes I understand your patch controls source. Let me try to rephrase
> > why I think it's better on destination.
> > Here's my understanding
> > - With vhost-user-fs state lives inside an external daemon.
> > A- If after load you connect to the same daemon you can get migration mostly
> >   for free.
> > B- If you connect to a different daemon then that daemon will need
> >   to pass information from original one.
> >
> > Is this a fair summary?
> >
> > Current solution is to set flag on the source meaning "I have an
> > orchestration tool that will make sure that either A or B is correct".
> >
> > However both A and B can only be known when destination is known.
> > Especially as long as what we are really trying to do is just allow qemu
> > restarts, Checking the flag on load will thus achive it in a cleaner
> > way, in that orchestration tool can reasonably keep the flag
> > clear normally and only set it if restarting qemu locally.
> >
> >
> > By comparison, with your approach orchestration tool will have
> > to either always set the flag (risky since then we lose the
> > extra check that we coded) or keep it clear and set before migration
> > (complex).
> >
> > I hope I explained what and why I want to check.
> >
> > I am far from a vhost-user-fs expert so maybe I am wrong but
> > I wanted to make sure I got the point across even if other
> > disagree.
> 
> How do the source QEMU and virtiofsd know which migration mode to use?
> The virtiofsd implementation might support both external and internal
> migration. Source QEMU needs to know whether to ask source virtiofsd
> for device state so it can be included in the migration stream or not.
> 
> Stefan

Well internal migration does not exist yet.  So it is simple.
When it does maybe then we worry about how to support it?
When it does will there be a reason to do external one at all?
Why?  We do internal for everything else after all.

Or maybe we tie it to machine property? We'll know more when
it actually exists, if it ever does.
Michael S. Tsirkin Feb. 28, 2023, 9:54 p.m. UTC | #30
On Tue, Feb 28, 2023 at 04:29:22PM -0500, Michael S. Tsirkin wrote:
> On Tue, Feb 28, 2023 at 02:18:25PM -0500, Stefan Hajnoczi wrote:
> > On Tue, 28 Feb 2023 at 09:58, Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Tue, Feb 28, 2023 at 04:30:36PM +0200, Anton Kuchin wrote:
> > > > I really don't understand why and what do you want to check on
> > > > destination.
> > >
> > > Yes I understand your patch controls source. Let me try to rephrase
> > > why I think it's better on destination.
> > > Here's my understanding
> > > - With vhost-user-fs state lives inside an external daemon.
> > > A- If after load you connect to the same daemon you can get migration mostly
> > >   for free.
> > > B- If you connect to a different daemon then that daemon will need
> > >   to pass information from original one.
> > >
> > > Is this a fair summary?
> > >
> > > Current solution is to set flag on the source meaning "I have an
> > > orchestration tool that will make sure that either A or B is correct".
> > >
> > > However both A and B can only be known when destination is known.
> > > Especially as long as what we are really trying to do is just allow qemu
> > > restarts, Checking the flag on load will thus achive it in a cleaner
> > > way, in that orchestration tool can reasonably keep the flag
> > > clear normally and only set it if restarting qemu locally.
> > >
> > >
> > > By comparison, with your approach orchestration tool will have
> > > to either always set the flag (risky since then we lose the
> > > extra check that we coded) or keep it clear and set before migration
> > > (complex).
> > >
> > > I hope I explained what and why I want to check.
> > >
> > > I am far from a vhost-user-fs expert so maybe I am wrong but
> > > I wanted to make sure I got the point across even if other
> > > disagree.
> > 
> > How do the source QEMU and virtiofsd know which migration mode to use?
> > The virtiofsd implementation might support both external and internal
> > migration. Source QEMU needs to know whether to ask source virtiofsd
> > for device state so it can be included in the migration stream or not.
> > 
> > Stefan
> 
> Well internal migration does not exist yet.  So it is simple.
> When it does maybe then we worry about how to support it?
> When it does will there be a reason to do external one at all?
> Why?  We do internal for everything else after all.
> 
> Or maybe we tie it to machine property? We'll know more when
> it actually exists, if it ever does.

Or maybe there's a way to ask virtiofsd what does it support:
it has to be supported on both source and destination.
In short - we don't know, let's engineer for what we know and
not guess.

> 
> -- 
> MST
Vladimir Sementsov-Ogievskiy March 1, 2023, 2:03 p.m. UTC | #31
On 01.03.23 00:24, Michael S. Tsirkin wrote:
>> Said that checking on destination would need another flag and the safe
>> way of using this feature would require managing two flags instead of one
>> making it even more fragile. So I'd prefer not to make it more complex.
>>
>> In my opinion the best way to use this property by orchestrator is to
>> leave default unmigratable behavior at start and just before migration when
>> destination is known enumerate all vhost-user-fs devices and set properties
>> according to their backends capability with QMP like you mentioned. This
>> gives us single point of making the decision for each device and avoids
>> guessing future at VM start.
> this means that you need to remember what the values were and then
> any failure on destination requires you to go back and set them
> to original values. 

Why do we need to restore old values?

For me, this new property is a kind of per-device migration capability. Do we care to restore migration capabilities to the values that they had before setting them for failed migration? We don't need it, as we just always set capabilities as we want before each migration. Same thing for this new property: just set it properly before migration and you don't need to care about restoring it after failed migration attempt.

> With possibility of crashes on the orchestrator
> you also need to recall the temporary values in some file ...
> This is huge complexity much worse than two flags.
> 
> Assuming we need two let's see whether just reload on source is good
> enough.
>
Michael S. Tsirkin March 1, 2023, 2:46 p.m. UTC | #32
On Wed, Mar 01, 2023 at 05:03:03PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> On 01.03.23 00:24, Michael S. Tsirkin wrote:
> > > Said that checking on destination would need another flag and the safe
> > > way of using this feature would require managing two flags instead of one
> > > making it even more fragile. So I'd prefer not to make it more complex.
> > > 
> > > In my opinion the best way to use this property by orchestrator is to
> > > leave default unmigratable behavior at start and just before migration when
> > > destination is known enumerate all vhost-user-fs devices and set properties
> > > according to their backends capability with QMP like you mentioned. This
> > > gives us single point of making the decision for each device and avoids
> > > guessing future at VM start.
> > this means that you need to remember what the values were and then
> > any failure on destination requires you to go back and set them
> > to original values.
> 
> Why do we need to restore old values?

To get back to where you were before you were starting migration.

> For me, this new property is a kind of per-device migration
> capability. Do we care to restore migration capabilities to the values
> that they had before setting them for failed migration? We don't need
> it, as we just always set capabilities as we want before each
> migration. Same thing for this new property: just set it properly
> before migration and you don't need to care about restoring it after
> failed migration attempt.

If you really trust your management then we can just remove the
migration blocker and be done with it. All this song and dance
with changing properties is to catch errors. If one has to
carefully play with QOM to achieve the desired result then
IMHO we failed in this.


> > With possibility of crashes on the orchestrator
> > you also need to recall the temporary values in some file ...
> > This is huge complexity much worse than two flags.
> > 
> > Assuming we need two let's see whether just reload on source is good
> > enough.
> > 
> 
> -- 
> Best regards,
> Vladimir
Anton Kuchin March 1, 2023, 3:07 p.m. UTC | #33
On 28/02/2023 23:24, Michael S. Tsirkin wrote:
> On Tue, Feb 28, 2023 at 07:59:54PM +0200, Anton Kuchin wrote:
>> On 28/02/2023 16:57, Michael S. Tsirkin wrote:
>>> On Tue, Feb 28, 2023 at 04:30:36PM +0200, Anton Kuchin wrote:
>>>> I really don't understand why and what do you want to check on
>>>> destination.
>>> Yes I understand your patch controls source. Let me try to rephrase
>>> why I think it's better on destination.
>>> Here's my understanding
>>> - With vhost-user-fs state lives inside an external daemon.
>>> A- If after load you connect to the same daemon you can get migration mostly
>>>     for free.
>>> B- If you connect to a different daemon then that daemon will need
>>>     to pass information from original one.
>>>
>>> Is this a fair summary?
>>>
>>> Current solution is to set flag on the source meaning "I have an
>>> orchestration tool that will make sure that either A or B is correct".
>>>
>>> However both A and B can only be known when destination is known.
>>> Especially as long as what we are really trying to do is just allow qemu
>>> restarts, Checking the flag on load will thus achive it in a cleaner
>>> way, in that orchestration tool can reasonably keep the flag
>>> clear normally and only set it if restarting qemu locally.
>>>
>>>
>>> By comparison, with your approach orchestration tool will have
>>> to either always set the flag (risky since then we lose the
>>> extra check that we coded) or keep it clear and set before migration
>>> (complex).
>>>
>>> I hope I explained what and why I want to check.
>>>
>>> I am far from a vhost-user-fs expert so maybe I am wrong but
>>> I wanted to make sure I got the point across even if other
>>> disagree.
>>>
>> Thank you for the explanation. Now I understand your concerns.
>>
>> You are right about this mechanism being a bit risky if orchestrator is
>> not using it properly or clunky if it is used in a safest possible way.
>> That's why first attempt of this feature was with migration capability
>> to let orchestrator choose behavior right at the moment of migration.
>> But it has its own problems.
>>
>> We can't move this check only to destination because one of main goals
>> was to prevent orchestrators that are unaware of vhost-user-fs specifics
>> from accidentally migrating such VMs. We can't rely here entirely on
>> destination to block this because if VM is migrated to file and then
>> can't be loaded by destination there is no way to fallback and resume
>> the source so we need to have some kind of blocker on source by default.
> Interesting.  Why is there no way? Just load it back on source? Isn't
> this how any other load failure is managed? Because for sure you
> need to manage these, they will happen.

Because source can be already terminated and if load is not supported by
orchestrator and backend stream can't be loaded on source too. So we need to
ensure that only orchestrators that know what they are doing explicitly 
enable
the feature are allowed to start migration.

>
>> Said that checking on destination would need another flag and the safe
>> way of using this feature would require managing two flags instead of one
>> making it even more fragile. So I'd prefer not to make it more complex.
>>
>> In my opinion the best way to use this property by orchestrator is to
>> leave default unmigratable behavior at start and just before migration when
>> destination is known enumerate all vhost-user-fs devices and set properties
>> according to their backends capability with QMP like you mentioned. This
>> gives us single point of making the decision for each device and avoids
>> guessing future at VM start.
> this means that you need to remember what the values were and then
> any failure on destination requires you to go back and set them
> to original values. With possibility of crashes on the orchestrator
> you also need to recall the temporary values in some file ...
> This is huge complexity much worse than two flags.
>
> Assuming we need two let's see whether just reload on source is good
> enough.

Reload on source can't be guaranteed to work too. And even if we could
guarantee it to work then we would also need to setup its incoming migration
type in case outgoing migration fails.
If orchestrator crashes and restarts it can revert flags for all devices
or can rely on next migration code to setup them correctly because they have
no effect between migrations anyway.

Reverting migration that failed on destination is not an easy task too.
It seems to be much more complicated than refusing to migrate on source.

I believe we should perform sanity checks if we have data but engineering
additional checks and putting extra restrictions just to prevent 
orchestrator
from doing wrong things is an overkill.

>> But allowing setup via command-line is valid too because some backends may
>> always be capable of external migration independent of hosts and don't need
>> the manipulations with QMP before migration at all.
> I am much more worried that the realistic schenario is hard to manage
> safely than about theoretical state migrating backends that don't exist.
>
>
Michael S. Tsirkin March 1, 2023, 3:24 p.m. UTC | #34
On Wed, Mar 01, 2023 at 05:07:28PM +0200, Anton Kuchin wrote:
> On 28/02/2023 23:24, Michael S. Tsirkin wrote:
> > On Tue, Feb 28, 2023 at 07:59:54PM +0200, Anton Kuchin wrote:
> > > On 28/02/2023 16:57, Michael S. Tsirkin wrote:
> > > > On Tue, Feb 28, 2023 at 04:30:36PM +0200, Anton Kuchin wrote:
> > > > > I really don't understand why and what do you want to check on
> > > > > destination.
> > > > Yes I understand your patch controls source. Let me try to rephrase
> > > > why I think it's better on destination.
> > > > Here's my understanding
> > > > - With vhost-user-fs state lives inside an external daemon.
> > > > A- If after load you connect to the same daemon you can get migration mostly
> > > >     for free.
> > > > B- If you connect to a different daemon then that daemon will need
> > > >     to pass information from original one.
> > > > 
> > > > Is this a fair summary?
> > > > 
> > > > Current solution is to set flag on the source meaning "I have an
> > > > orchestration tool that will make sure that either A or B is correct".
> > > > 
> > > > However both A and B can only be known when destination is known.
> > > > Especially as long as what we are really trying to do is just allow qemu
> > > > restarts, Checking the flag on load will thus achive it in a cleaner
> > > > way, in that orchestration tool can reasonably keep the flag
> > > > clear normally and only set it if restarting qemu locally.
> > > > 
> > > > 
> > > > By comparison, with your approach orchestration tool will have
> > > > to either always set the flag (risky since then we lose the
> > > > extra check that we coded) or keep it clear and set before migration
> > > > (complex).
> > > > 
> > > > I hope I explained what and why I want to check.
> > > > 
> > > > I am far from a vhost-user-fs expert so maybe I am wrong but
> > > > I wanted to make sure I got the point across even if other
> > > > disagree.
> > > > 
> > > Thank you for the explanation. Now I understand your concerns.
> > > 
> > > You are right about this mechanism being a bit risky if orchestrator is
> > > not using it properly or clunky if it is used in a safest possible way.
> > > That's why first attempt of this feature was with migration capability
> > > to let orchestrator choose behavior right at the moment of migration.
> > > But it has its own problems.
> > > 
> > > We can't move this check only to destination because one of main goals
> > > was to prevent orchestrators that are unaware of vhost-user-fs specifics
> > > from accidentally migrating such VMs. We can't rely here entirely on
> > > destination to block this because if VM is migrated to file and then
> > > can't be loaded by destination there is no way to fallback and resume
> > > the source so we need to have some kind of blocker on source by default.
> > Interesting.  Why is there no way? Just load it back on source? Isn't
> > this how any other load failure is managed? Because for sure you
> > need to manage these, they will happen.
> 
> Because source can be already terminated

So start it again.

> and if load is not supported by
> orchestrator and backend stream can't be loaded on source too.

How can an orchestrator not support load but support migration?

> So we need to
> ensure that only orchestrators that know what they are doing explicitly
> enable
> the feature are allowed to start migration.

that seems par for the course - if you want to use a feature you better
have an idea about how to do it.

If orchestrator is doing things like migrating to file
then scp that file, then it better be prepared to
restart VM on source because sometimes it will fail
on destination.

And an orchestrator that is not clever enough to do it, then it
just should not come up with funky ways to do migration.


> > 
> > > Said that checking on destination would need another flag and the safe
> > > way of using this feature would require managing two flags instead of one
> > > making it even more fragile. So I'd prefer not to make it more complex.
> > > 
> > > In my opinion the best way to use this property by orchestrator is to
> > > leave default unmigratable behavior at start and just before migration when
> > > destination is known enumerate all vhost-user-fs devices and set properties
> > > according to their backends capability with QMP like you mentioned. This
> > > gives us single point of making the decision for each device and avoids
> > > guessing future at VM start.
> > this means that you need to remember what the values were and then
> > any failure on destination requires you to go back and set them
> > to original values. With possibility of crashes on the orchestrator
> > you also need to recall the temporary values in some file ...
> > This is huge complexity much worse than two flags.
> > 
> > Assuming we need two let's see whether just reload on source is good
> > enough.
> 
> Reload on source can't be guaranteed to work too. And even if we could
> guarantee it to work then we would also need to setup its incoming migration
> type in case outgoing migration fails.

Since it's local you naturally just set it to allow load. It's trivial - just
a command line property no games with QOM and no state.


> If orchestrator crashes and restarts it can revert flags for all devices

revert to what?

> or can rely on next migration code to setup them correctly because they have
> no effect between migrations anyway.

but the whole reason we have this stuff is to protect against
an orchestrator that forgets to do it.

> Reverting migration that failed on destination is not an easy task too.
> It seems to be much more complicated than refusing to migrate on source.

It is only more complicated because you do not consider that
migration can fail even if QEMU allows it.

Imagine that you start playing with features through QOM.
Now you start migration, it fails for some reason (e.g. a network
issue), and you are left with a misconfigured feature.

Your answer is basically that we don't need this protection at all,
we can trust orchestrators to do the right thing.
In that case just drop the blocker and be done with it.


> I believe we should perform sanity checks if we have data but engineering
> additional checks and putting extra restrictions just to prevent
> orchestrator
> from doing wrong things is an overkill.

Exactly. The check on source is such an overkill - your problem
is not on source, source has no issue sending the VM. Your problem is
on destination - it can not get the data from daemon since the daemon
is not local.


> > > But allowing setup via command-line is valid too because some backends may
> > > always be capable of external migration independent of hosts and don't need
> > > the manipulations with QMP before migration at all.
> > I am much more worried that the realistic schenario is hard to manage
> > safely than about theoretical state migrating backends that don't exist.
> > 
> >
Michael S. Tsirkin March 1, 2023, 3:33 p.m. UTC | #35
On Tue, Feb 28, 2023 at 07:59:54PM +0200, Anton Kuchin wrote:
> We can't rely here entirely on
> destination to block this because if VM is migrated to file and then
> can't be loaded by destination there is no way to fallback and resume
> the source so we need to have some kind of blocker on source by default.

So I commented about a fallback. IMO it's just orchestrator being silly:
don't kill source qemu until you have made sure destination loaded the
file, or re-load it, and all will be well.


But a bigger issue that this highlights is simply that if migrating to
file you have no idea at all where will the file be loaded.  Maybe some
orchestrators know but I don't see how we can be sure all of them know.
The only time where we know whether the file is loaded on the same host
where it was saved is when we actually load it.
Anton Kuchin March 1, 2023, 3:40 p.m. UTC | #36
On 01/03/2023 16:46, Michael S. Tsirkin wrote:
> On Wed, Mar 01, 2023 at 05:03:03PM +0300, Vladimir Sementsov-Ogievskiy wrote:
>> On 01.03.23 00:24, Michael S. Tsirkin wrote:
>>>> Said that checking on destination would need another flag and the safe
>>>> way of using this feature would require managing two flags instead of one
>>>> making it even more fragile. So I'd prefer not to make it more complex.
>>>>
>>>> In my opinion the best way to use this property by orchestrator is to
>>>> leave default unmigratable behavior at start and just before migration when
>>>> destination is known enumerate all vhost-user-fs devices and set properties
>>>> according to their backends capability with QMP like you mentioned. This
>>>> gives us single point of making the decision for each device and avoids
>>>> guessing future at VM start.
>>> this means that you need to remember what the values were and then
>>> any failure on destination requires you to go back and set them
>>> to original values.
>> Why do we need to restore old values?
> To get back to where you were before you were starting migration.
>
>> For me, this new property is a kind of per-device migration
>> capability. Do we care to restore migration capabilities to the values
>> that they had before setting them for failed migration? We don't need
>> it, as we just always set capabilities as we want before each
>> migration. Same thing for this new property: just set it properly
>> before migration and you don't need to care about restoring it after
>> failed migration attempt.
> If you really trust your management then we can just remove the
> migration blocker and be done with it. All this song and dance
> with changing properties is to catch errors. If one has to
> carefully play with QOM to achieve the desired result then
> IMHO we failed in this.

To be honest I would prefer just removing blocker because if orchestrator
doesn't know what it is doing it has lots of different ways to break
things and we can't do anything about it.
Just like vhost-user-scsi always allows migration since the day it was
introduced without additional checks and relies on the orchestrator.
But migration was blocked for vhost-user-fs when it was initially merged
and it is bad to change this contract now.

This property here is not only to block migration by default and catch
errors but really to select migration type. External migration can be
sometimes preferred even after internal is implemented because it requires
less calls to backend to extract internal state, less code to execute in
order to save and restore daemon state.
And this also will allow compatibility with old VMs that support only
external migration to move to internal migration without reboot someday
when it is implemented.

So catching errors in not the only purpose of this property, but it 
definitely
allows us to catch some obvious ones.

>
>>> With possibility of crashes on the orchestrator
>>> you also need to recall the temporary values in some file ...
>>> This is huge complexity much worse than two flags.
>>>
>>> Assuming we need two let's see whether just reload on source is good
>>> enough.
>>>
>> -- 
>> Best regards,
>> Vladimir
Michael S. Tsirkin March 1, 2023, 3:40 p.m. UTC | #37
On Wed, Feb 22, 2023 at 10:50:02PM +0200, Anton Kuchin wrote:
> > Property on source does not satisfy both at the same time.
> > Property on destination does.
> 
> If destination QEMUs on local and remote hosts have same properties

I'd say just make an exception from this rule.



> how can
> we
> write check that passes on the same host and fails on remote?
> Sorry, I don't understand how qemu can help to handle this. It knows nothing
> about the hosts so this is responsibility of management to software to know
> where it can migrate and configure it appropriately.
> 
> Maybe I didn't understand your scenario or what you propose to check on
> destination. Could you explain a bit more?

Basically you would add "migration=external" to device before loading.
Pre load check fails migration of vhost-user-fs device if migration
has not been set.
Michael S. Tsirkin March 1, 2023, 3:52 p.m. UTC | #38
On Wed, Mar 01, 2023 at 05:40:09PM +0200, Anton Kuchin wrote:
> So catching errors in not the only purpose of this property, but it
> definitely
> allows us to catch some obvious ones.

OK let's say this. If migration=external then migration just works.
If migration=internal it fails for now. We are agreed here right?

Our argument is whether to check on load or save?

I propose this compromize: two properties:
	migration-load and migration-save

	migration-load : how is incoming migration handled.
			 internal - through qemu
			 external - through the daemon

			  checked in pre-load

	migration-save : how is outgoing migration handled.
			 internal - through qemu
			 external - through the daemon

			  checked in post-save

This way whether the check is on source or destination or both
is up to the user.

Hmm?
Anton Kuchin March 1, 2023, 4:04 p.m. UTC | #39
On 01/03/2023 17:24, Michael S. Tsirkin wrote:
> On Wed, Mar 01, 2023 at 05:07:28PM +0200, Anton Kuchin wrote:
>> On 28/02/2023 23:24, Michael S. Tsirkin wrote:
>>> On Tue, Feb 28, 2023 at 07:59:54PM +0200, Anton Kuchin wrote:
>>>> On 28/02/2023 16:57, Michael S. Tsirkin wrote:
>>>>> On Tue, Feb 28, 2023 at 04:30:36PM +0200, Anton Kuchin wrote:
>>>>>> I really don't understand why and what do you want to check on
>>>>>> destination.
>>>>> Yes I understand your patch controls source. Let me try to rephrase
>>>>> why I think it's better on destination.
>>>>> Here's my understanding
>>>>> - With vhost-user-fs state lives inside an external daemon.
>>>>> A- If after load you connect to the same daemon you can get migration mostly
>>>>>      for free.
>>>>> B- If you connect to a different daemon then that daemon will need
>>>>>      to pass information from original one.
>>>>>
>>>>> Is this a fair summary?
>>>>>
>>>>> Current solution is to set flag on the source meaning "I have an
>>>>> orchestration tool that will make sure that either A or B is correct".
>>>>>
>>>>> However both A and B can only be known when destination is known.
>>>>> Especially as long as what we are really trying to do is just allow qemu
>>>>> restarts, Checking the flag on load will thus achive it in a cleaner
>>>>> way, in that orchestration tool can reasonably keep the flag
>>>>> clear normally and only set it if restarting qemu locally.
>>>>>
>>>>>
>>>>> By comparison, with your approach orchestration tool will have
>>>>> to either always set the flag (risky since then we lose the
>>>>> extra check that we coded) or keep it clear and set before migration
>>>>> (complex).
>>>>>
>>>>> I hope I explained what and why I want to check.
>>>>>
>>>>> I am far from a vhost-user-fs expert so maybe I am wrong but
>>>>> I wanted to make sure I got the point across even if other
>>>>> disagree.
>>>>>
>>>> Thank you for the explanation. Now I understand your concerns.
>>>>
>>>> You are right about this mechanism being a bit risky if orchestrator is
>>>> not using it properly or clunky if it is used in a safest possible way.
>>>> That's why first attempt of this feature was with migration capability
>>>> to let orchestrator choose behavior right at the moment of migration.
>>>> But it has its own problems.
>>>>
>>>> We can't move this check only to destination because one of main goals
>>>> was to prevent orchestrators that are unaware of vhost-user-fs specifics
>>>> from accidentally migrating such VMs. We can't rely here entirely on
>>>> destination to block this because if VM is migrated to file and then
>>>> can't be loaded by destination there is no way to fallback and resume
>>>> the source so we need to have some kind of blocker on source by default.
>>> Interesting.  Why is there no way? Just load it back on source? Isn't
>>> this how any other load failure is managed? Because for sure you
>>> need to manage these, they will happen.
>> Because source can be already terminated
> So start it again.

What is the difference between restarting the source and restarting
the destination to retry migration? If stream is correct it can be
loaded by destination if it is broken it won't be accepted at source too.

>> and if load is not supported by
>> orchestrator and backend stream can't be loaded on source too.
> How can an orchestrator not support load but support migration?

I was talking about orchestrators that rely on old device behavior
of blocking migration. They could attempt migration anyway and check if
it was blocked that is far from ideal but was OK and safe, and now this
becomes dangerous because state can be lost and VM becomes unloadable.

>
>> So we need to
>> ensure that only orchestrators that know what they are doing explicitly
>> enable
>> the feature are allowed to start migration.
> that seems par for the course - if you want to use a feature you better
> have an idea about how to do it.
>
> If orchestrator is doing things like migrating to file
> then scp that file, then it better be prepared to
> restart VM on source because sometimes it will fail
> on destination.
>
> And an orchestrator that is not clever enough to do it, then it
> just should not come up with funky ways to do migration.
>
>
>>>> Said that checking on destination would need another flag and the safe
>>>> way of using this feature would require managing two flags instead of one
>>>> making it even more fragile. So I'd prefer not to make it more complex.
>>>>
>>>> In my opinion the best way to use this property by orchestrator is to
>>>> leave default unmigratable behavior at start and just before migration when
>>>> destination is known enumerate all vhost-user-fs devices and set properties
>>>> according to their backends capability with QMP like you mentioned. This
>>>> gives us single point of making the decision for each device and avoids
>>>> guessing future at VM start.
>>> this means that you need to remember what the values were and then
>>> any failure on destination requires you to go back and set them
>>> to original values. With possibility of crashes on the orchestrator
>>> you also need to recall the temporary values in some file ...
>>> This is huge complexity much worse than two flags.
>>>
>>> Assuming we need two let's see whether just reload on source is good
>>> enough.
>> Reload on source can't be guaranteed to work too. And even if we could
>> guarantee it to work then we would also need to setup its incoming migration
>> type in case outgoing migration fails.
> Since it's local you naturally just set it to allow load. It's trivial - just
> a command line property no games with QOM and no state.

It is not too hard but it adds complexity

>
>> If orchestrator crashes and restarts it can revert flags for all devices
> revert to what?

To default migration=none, and set correct value before next migration 
attempt.

>> or can rely on next migration code to setup them correctly because they have
>> no effect between migrations anyway.
> but the whole reason we have this stuff is to protect against
> an orchestrator that forgets to do it.

No, it is to protect orchestrators that doesn't even know this feature 
exists.

>> Reverting migration that failed on destination is not an easy task too.
>> It seems to be much more complicated than refusing to migrate on source.
> It is only more complicated because you do not consider that
> migration can fail even if QEMU allows it.
>
> Imagine that you start playing with features through QOM.
> Now you start migration, it fails for some reason (e.g. a network
> issue), and you are left with a misconfigured feature.
>
> Your answer is basically that we don't need this protection at all,
> we can trust orchestrators to do the right thing.
> In that case just drop the blocker and be done with it.

Yes, we don't need to protect from orchestrators. But we need to protect 
unaware orchestrators.

>
>> I believe we should perform sanity checks if we have data but engineering
>> additional checks and putting extra restrictions just to prevent
>> orchestrator
>> from doing wrong things is an overkill.
> Exactly. The check on source is such an overkill - your problem
> is not on source, source has no issue sending the VM. Your problem is
> on destination - it can not get the data from daemon since the daemon
> is not local.
>
>
>>>> But allowing setup via command-line is valid too because some backends may
>>>> always be capable of external migration independent of hosts and don't need
>>>> the manipulations with QMP before migration at all.
>>> I am much more worried that the realistic schenario is hard to manage
>>> safely than about theoretical state migrating backends that don't exist.
>>>
>>>
Anton Kuchin March 1, 2023, 4:29 p.m. UTC | #40
On 01/03/2023 17:52, Michael S. Tsirkin wrote:
> On Wed, Mar 01, 2023 at 05:40:09PM +0200, Anton Kuchin wrote:
>> So catching errors in not the only purpose of this property, but it
>> definitely
>> allows us to catch some obvious ones.
> OK let's say this. If migration=external then migration just works.
> If migration=internal it fails for now. We are agreed here right?
>
> Our argument is whether to check on load or save?
>
> I propose this compromize: two properties:
> 	migration-load and migration-save
>
> 	migration-load : how is incoming migration handled.
> 			 internal - through qemu
> 			 external - through the daemon
>
> 			  checked in pre-load
>
> 	migration-save : how is outgoing migration handled.
> 			 internal - through qemu
> 			 external - through the daemon
>
> 			  checked in post-save
>
> This way whether the check is on source or destination or both
> is up to the user.
>
> Hmm?

Hmm, sounds interesting, this can be a really good compromise.

So migration-save will be "none" by default to protect unaware 
orchestrators.
What do you think should migration-load be "none" too to force orchestrator
set proper incoming migration type?
Michael S. Tsirkin March 1, 2023, 5:17 p.m. UTC | #41
On Wed, Mar 01, 2023 at 06:04:31PM +0200, Anton Kuchin wrote:
> On 01/03/2023 17:24, Michael S. Tsirkin wrote:
> > On Wed, Mar 01, 2023 at 05:07:28PM +0200, Anton Kuchin wrote:
> > > On 28/02/2023 23:24, Michael S. Tsirkin wrote:
> > > > On Tue, Feb 28, 2023 at 07:59:54PM +0200, Anton Kuchin wrote:
> > > > > On 28/02/2023 16:57, Michael S. Tsirkin wrote:
> > > > > > On Tue, Feb 28, 2023 at 04:30:36PM +0200, Anton Kuchin wrote:
> > > > > > > I really don't understand why and what do you want to check on
> > > > > > > destination.
> > > > > > Yes I understand your patch controls source. Let me try to rephrase
> > > > > > why I think it's better on destination.
> > > > > > Here's my understanding
> > > > > > - With vhost-user-fs state lives inside an external daemon.
> > > > > > A- If after load you connect to the same daemon you can get migration mostly
> > > > > >      for free.
> > > > > > B- If you connect to a different daemon then that daemon will need
> > > > > >      to pass information from original one.
> > > > > > 
> > > > > > Is this a fair summary?
> > > > > > 
> > > > > > Current solution is to set flag on the source meaning "I have an
> > > > > > orchestration tool that will make sure that either A or B is correct".
> > > > > > 
> > > > > > However both A and B can only be known when destination is known.
> > > > > > Especially as long as what we are really trying to do is just allow qemu
> > > > > > restarts, Checking the flag on load will thus achive it in a cleaner
> > > > > > way, in that orchestration tool can reasonably keep the flag
> > > > > > clear normally and only set it if restarting qemu locally.
> > > > > > 
> > > > > > 
> > > > > > By comparison, with your approach orchestration tool will have
> > > > > > to either always set the flag (risky since then we lose the
> > > > > > extra check that we coded) or keep it clear and set before migration
> > > > > > (complex).
> > > > > > 
> > > > > > I hope I explained what and why I want to check.
> > > > > > 
> > > > > > I am far from a vhost-user-fs expert so maybe I am wrong but
> > > > > > I wanted to make sure I got the point across even if other
> > > > > > disagree.
> > > > > > 
> > > > > Thank you for the explanation. Now I understand your concerns.
> > > > > 
> > > > > You are right about this mechanism being a bit risky if orchestrator is
> > > > > not using it properly or clunky if it is used in a safest possible way.
> > > > > That's why first attempt of this feature was with migration capability
> > > > > to let orchestrator choose behavior right at the moment of migration.
> > > > > But it has its own problems.
> > > > > 
> > > > > We can't move this check only to destination because one of main goals
> > > > > was to prevent orchestrators that are unaware of vhost-user-fs specifics
> > > > > from accidentally migrating such VMs. We can't rely here entirely on
> > > > > destination to block this because if VM is migrated to file and then
> > > > > can't be loaded by destination there is no way to fallback and resume
> > > > > the source so we need to have some kind of blocker on source by default.
> > > > Interesting.  Why is there no way? Just load it back on source? Isn't
> > > > this how any other load failure is managed? Because for sure you
> > > > need to manage these, they will happen.
> > > Because source can be already terminated
> > So start it again.
> 
> What is the difference between restarting the source and restarting
> the destination to retry migration? If stream is correct it can be
> loaded by destination if it is broken it won't be accepted at source too.

No.  First, destination has a different qemu version. Second file
can be corrupted in transfer. Third transfer can fail. Etc ...



> > > and if load is not supported by
> > > orchestrator and backend stream can't be loaded on source too.
> > How can an orchestrator not support load but support migration?
> 
> I was talking about orchestrators that rely on old device behavior
> of blocking migration. They could attempt migration anyway and check if
> it was blocked that is far from ideal but was OK and safe, and now this
> becomes dangerous because state can be lost and VM becomes unloadable.
> 
> > 
> > > So we need to
> > > ensure that only orchestrators that know what they are doing explicitly
> > > enable
> > > the feature are allowed to start migration.
> > that seems par for the course - if you want to use a feature you better
> > have an idea about how to do it.
> > 
> > If orchestrator is doing things like migrating to file
> > then scp that file, then it better be prepared to
> > restart VM on source because sometimes it will fail
> > on destination.
> > 
> > And an orchestrator that is not clever enough to do it, then it
> > just should not come up with funky ways to do migration.
> > 
> > 
> > > > > Said that checking on destination would need another flag and the safe
> > > > > way of using this feature would require managing two flags instead of one
> > > > > making it even more fragile. So I'd prefer not to make it more complex.
> > > > > 
> > > > > In my opinion the best way to use this property by orchestrator is to
> > > > > leave default unmigratable behavior at start and just before migration when
> > > > > destination is known enumerate all vhost-user-fs devices and set properties
> > > > > according to their backends capability with QMP like you mentioned. This
> > > > > gives us single point of making the decision for each device and avoids
> > > > > guessing future at VM start.
> > > > this means that you need to remember what the values were and then
> > > > any failure on destination requires you to go back and set them
> > > > to original values. With possibility of crashes on the orchestrator
> > > > you also need to recall the temporary values in some file ...
> > > > This is huge complexity much worse than two flags.
> > > > 
> > > > Assuming we need two let's see whether just reload on source is good
> > > > enough.
> > > Reload on source can't be guaranteed to work too. And even if we could
> > > guarantee it to work then we would also need to setup its incoming migration
> > > type in case outgoing migration fails.
> > Since it's local you naturally just set it to allow load. It's trivial - just
> > a command line property no games with QOM and no state.
> 
> It is not too hard but it adds complexity
> 
> > 
> > > If orchestrator crashes and restarts it can revert flags for all devices
> > revert to what?
> 
> To default migration=none, and set correct value before next migration
> attempt.
> 
> > > or can rely on next migration code to setup them correctly because they have
> > > no effect between migrations anyway.
> > but the whole reason we have this stuff is to protect against
> > an orchestrator that forgets to do it.
> 
> No, it is to protect orchestrators that doesn't even know this feature
> exists.
> 
> > > Reverting migration that failed on destination is not an easy task too.
> > > It seems to be much more complicated than refusing to migrate on source.
> > It is only more complicated because you do not consider that
> > migration can fail even if QEMU allows it.
> > 
> > Imagine that you start playing with features through QOM.
> > Now you start migration, it fails for some reason (e.g. a network
> > issue), and you are left with a misconfigured feature.
> > 
> > Your answer is basically that we don't need this protection at all,
> > we can trust orchestrators to do the right thing.
> > In that case just drop the blocker and be done with it.
> 
> Yes, we don't need to protect from orchestrators. But we need to protect
> unaware orchestrators.

Right. You just trust orchestrators to do the right thing more than I do :)
I feel they will blindly set flag and then we are back to square one.
I feel it's less likely with load because load already has a slightly
different command line.


In fact, if we wanted to we could fail qemu if the property is set
but VM is started and not migrated.




> > 
> > > I believe we should perform sanity checks if we have data but engineering
> > > additional checks and putting extra restrictions just to prevent
> > > orchestrator
> > > from doing wrong things is an overkill.
> > Exactly. The check on source is such an overkill - your problem
> > is not on source, source has no issue sending the VM. Your problem is
> > on destination - it can not get the data from daemon since the daemon
> > is not local.
> > 
> > 
> > > > > But allowing setup via command-line is valid too because some backends may
> > > > > always be capable of external migration independent of hosts and don't need
> > > > > the manipulations with QMP before migration at all.
> > > > I am much more worried that the realistic schenario is hard to manage
> > > > safely than about theoretical state migrating backends that don't exist.
> > > > 
> > > >
Michael S. Tsirkin March 1, 2023, 5:19 p.m. UTC | #42
On Wed, Mar 01, 2023 at 06:29:50PM +0200, Anton Kuchin wrote:
> On 01/03/2023 17:52, Michael S. Tsirkin wrote:
> > On Wed, Mar 01, 2023 at 05:40:09PM +0200, Anton Kuchin wrote:
> > > So catching errors in not the only purpose of this property, but it
> > > definitely
> > > allows us to catch some obvious ones.
> > OK let's say this. If migration=external then migration just works.
> > If migration=internal it fails for now. We are agreed here right?
> > 
> > Our argument is whether to check on load or save?
> > 
> > I propose this compromize: two properties:
> > 	migration-load and migration-save
> > 
> > 	migration-load : how is incoming migration handled.
> > 			 internal - through qemu
> > 			 external - through the daemon
> > 
> > 			  checked in pre-load
> > 
> > 	migration-save : how is outgoing migration handled.
> > 			 internal - through qemu
> > 			 external - through the daemon
> > 
> > 			  checked in post-save
> > 
> > This way whether the check is on source or destination or both
> > is up to the user.
> > 
> > Hmm?
> 
> Hmm, sounds interesting, this can be a really good compromise.
> 
> So migration-save will be "none" by default to protect unaware
> orchestrators.
> What do you think should migration-load be "none" too to force orchestrator
> set proper incoming migration type?

Yes.
I am also thinking whether we should block setting migration-load
if qemu is starting a VM as opposed to loading it.
Thoughts?
Anton Kuchin March 1, 2023, 7:35 p.m. UTC | #43
On 01/03/2023 19:17, Michael S. Tsirkin wrote:
> On Wed, Mar 01, 2023 at 06:04:31PM +0200, Anton Kuchin wrote:
>> On 01/03/2023 17:24, Michael S. Tsirkin wrote:
>>> On Wed, Mar 01, 2023 at 05:07:28PM +0200, Anton Kuchin wrote:
>>>> On 28/02/2023 23:24, Michael S. Tsirkin wrote:
>>>>> On Tue, Feb 28, 2023 at 07:59:54PM +0200, Anton Kuchin wrote:
>>>>>> On 28/02/2023 16:57, Michael S. Tsirkin wrote:
>>>>>>> On Tue, Feb 28, 2023 at 04:30:36PM +0200, Anton Kuchin wrote:
>>>>>>>> I really don't understand why and what do you want to check on
>>>>>>>> destination.
>>>>>>> Yes I understand your patch controls source. Let me try to rephrase
>>>>>>> why I think it's better on destination.
>>>>>>> Here's my understanding
>>>>>>> - With vhost-user-fs state lives inside an external daemon.
>>>>>>> A- If after load you connect to the same daemon you can get migration mostly
>>>>>>>       for free.
>>>>>>> B- If you connect to a different daemon then that daemon will need
>>>>>>>       to pass information from original one.
>>>>>>>
>>>>>>> Is this a fair summary?
>>>>>>>
>>>>>>> Current solution is to set flag on the source meaning "I have an
>>>>>>> orchestration tool that will make sure that either A or B is correct".
>>>>>>>
>>>>>>> However both A and B can only be known when destination is known.
>>>>>>> Especially as long as what we are really trying to do is just allow qemu
>>>>>>> restarts, Checking the flag on load will thus achive it in a cleaner
>>>>>>> way, in that orchestration tool can reasonably keep the flag
>>>>>>> clear normally and only set it if restarting qemu locally.
>>>>>>>
>>>>>>>
>>>>>>> By comparison, with your approach orchestration tool will have
>>>>>>> to either always set the flag (risky since then we lose the
>>>>>>> extra check that we coded) or keep it clear and set before migration
>>>>>>> (complex).
>>>>>>>
>>>>>>> I hope I explained what and why I want to check.
>>>>>>>
>>>>>>> I am far from a vhost-user-fs expert so maybe I am wrong but
>>>>>>> I wanted to make sure I got the point across even if other
>>>>>>> disagree.
>>>>>>>
>>>>>> Thank you for the explanation. Now I understand your concerns.
>>>>>>
>>>>>> You are right about this mechanism being a bit risky if orchestrator is
>>>>>> not using it properly or clunky if it is used in a safest possible way.
>>>>>> That's why first attempt of this feature was with migration capability
>>>>>> to let orchestrator choose behavior right at the moment of migration.
>>>>>> But it has its own problems.
>>>>>>
>>>>>> We can't move this check only to destination because one of main goals
>>>>>> was to prevent orchestrators that are unaware of vhost-user-fs specifics
>>>>>> from accidentally migrating such VMs. We can't rely here entirely on
>>>>>> destination to block this because if VM is migrated to file and then
>>>>>> can't be loaded by destination there is no way to fallback and resume
>>>>>> the source so we need to have some kind of blocker on source by default.
>>>>> Interesting.  Why is there no way? Just load it back on source? Isn't
>>>>> this how any other load failure is managed? Because for sure you
>>>>> need to manage these, they will happen.
>>>> Because source can be already terminated
>>> So start it again.
>> What is the difference between restarting the source and restarting
>> the destination to retry migration? If stream is correct it can be
>> loaded by destination if it is broken it won't be accepted at source too.
> No.  First, destination has a different qemu version. Second file
> can be corrupted in transfer. Third transfer can fail. Etc ...
>
>
>
>>>> and if load is not supported by
>>>> orchestrator and backend stream can't be loaded on source too.
>>> How can an orchestrator not support load but support migration?
>> I was talking about orchestrators that rely on old device behavior
>> of blocking migration. They could attempt migration anyway and check if
>> it was blocked that is far from ideal but was OK and safe, and now this
>> becomes dangerous because state can be lost and VM becomes unloadable.
>>
>>>> So we need to
>>>> ensure that only orchestrators that know what they are doing explicitly
>>>> enable
>>>> the feature are allowed to start migration.
>>> that seems par for the course - if you want to use a feature you better
>>> have an idea about how to do it.
>>>
>>> If orchestrator is doing things like migrating to file
>>> then scp that file, then it better be prepared to
>>> restart VM on source because sometimes it will fail
>>> on destination.
>>>
>>> And an orchestrator that is not clever enough to do it, then it
>>> just should not come up with funky ways to do migration.
>>>
>>>
>>>>>> Said that checking on destination would need another flag and the safe
>>>>>> way of using this feature would require managing two flags instead of one
>>>>>> making it even more fragile. So I'd prefer not to make it more complex.
>>>>>>
>>>>>> In my opinion the best way to use this property by orchestrator is to
>>>>>> leave default unmigratable behavior at start and just before migration when
>>>>>> destination is known enumerate all vhost-user-fs devices and set properties
>>>>>> according to their backends capability with QMP like you mentioned. This
>>>>>> gives us single point of making the decision for each device and avoids
>>>>>> guessing future at VM start.
>>>>> this means that you need to remember what the values were and then
>>>>> any failure on destination requires you to go back and set them
>>>>> to original values. With possibility of crashes on the orchestrator
>>>>> you also need to recall the temporary values in some file ...
>>>>> This is huge complexity much worse than two flags.
>>>>>
>>>>> Assuming we need two let's see whether just reload on source is good
>>>>> enough.
>>>> Reload on source can't be guaranteed to work too. And even if we could
>>>> guarantee it to work then we would also need to setup its incoming migration
>>>> type in case outgoing migration fails.
>>> Since it's local you naturally just set it to allow load. It's trivial - just
>>> a command line property no games with QOM and no state.
>> It is not too hard but it adds complexity
>>
>>>> If orchestrator crashes and restarts it can revert flags for all devices
>>> revert to what?
>> To default migration=none, and set correct value before next migration
>> attempt.
>>
>>>> or can rely on next migration code to setup them correctly because they have
>>>> no effect between migrations anyway.
>>> but the whole reason we have this stuff is to protect against
>>> an orchestrator that forgets to do it.
>> No, it is to protect orchestrators that doesn't even know this feature
>> exists.
>>
>>>> Reverting migration that failed on destination is not an easy task too.
>>>> It seems to be much more complicated than refusing to migrate on source.
>>> It is only more complicated because you do not consider that
>>> migration can fail even if QEMU allows it.
>>>
>>> Imagine that you start playing with features through QOM.
>>> Now you start migration, it fails for some reason (e.g. a network
>>> issue), and you are left with a misconfigured feature.
>>>
>>> Your answer is basically that we don't need this protection at all,
>>> we can trust orchestrators to do the right thing.
>>> In that case just drop the blocker and be done with it.
>> Yes, we don't need to protect from orchestrators. But we need to protect
>> unaware orchestrators.
> Right. You just trust orchestrators to do the right thing more than I do :)
> I feel they will blindly set flag and then we are back to square one.
> I feel it's less likely with load because load already has a slightly
> different command line.

I do trust them :)
I have to, otherwise we would need to pack all the properties and
flags of qemu to the migration stream and validate them at
destination or entire migration ends up broken beyond repair if
orchestrators tend to do stupid things and need babysitting.

Virtio-fs is not an easy-to-miss case when implementing its
migration in the orchestrator. It is not the one that usually
works but sometimes breaks because of inflight request in unlucky
state of processing. If destination daemon doesn't have context
from the source mounted directory will fail all IO-requests
and will transfer to broken state immediately at first request
spamming dmesg with errors. Not delayed or probabilistic errors.

>
> In fact, if we wanted to we could fail qemu if the property is set
> but VM is started and not migrated.

I don't know. This still feels like taking away too much responsibility
from the orchestrator and putting additional restrictions just not
to let the management software screw up in a weird way.
And forcing to immediately migrate if migration flag is set is even
more restrictive.

>
>
>
>>>> I believe we should perform sanity checks if we have data but engineering
>>>> additional checks and putting extra restrictions just to prevent
>>>> orchestrator
>>>> from doing wrong things is an overkill.
>>> Exactly. The check on source is such an overkill - your problem
>>> is not on source, source has no issue sending the VM. Your problem is
>>> on destination - it can not get the data from daemon since the daemon
>>> is not local.
>>>
>>>
>>>>>> But allowing setup via command-line is valid too because some backends may
>>>>>> always be capable of external migration independent of hosts and don't need
>>>>>> the manipulations with QMP before migration at all.
>>>>> I am much more worried that the realistic schenario is hard to manage
>>>>> safely than about theoretical state migrating backends that don't exist.
>>>>>
>>>>>
Anton Kuchin March 1, 2023, 7:42 p.m. UTC | #44
On 01/03/2023 17:52, Michael S. Tsirkin wrote:
> On Wed, Mar 01, 2023 at 05:40:09PM +0200, Anton Kuchin wrote:
>> So catching errors in not the only purpose of this property, but it
>> definitely
>> allows us to catch some obvious ones.
> OK let's say this. If migration=external then migration just works.
> If migration=internal it fails for now. We are agreed here right?
>
> Our argument is whether to check on load or save?
>
> I propose this compromize: two properties:
> 	migration-load and migration-save
>
> 	migration-load : how is incoming migration handled.
> 			 internal - through qemu
> 			 external - through the daemon
>
> 			  checked in pre-load
>
> 	migration-save : how is outgoing migration handled.
> 			 internal - through qemu
> 			 external - through the daemon
>
> 			  checked in post-save
>
> This way whether the check is on source or destination or both
> is up to the user.
>
> Hmm?

Stefan, what do you think about this idea?
Michael S. Tsirkin March 1, 2023, 8:22 p.m. UTC | #45
On Wed, Mar 01, 2023 at 09:35:56PM +0200, Anton Kuchin wrote:
> I do trust them :)
> I have to, otherwise we would need to pack all the properties and
> flags of qemu to the migration stream and validate them at
> destination or entire migration ends up broken beyond repair if
> orchestrators tend to do stupid things and need babysitting.

This is not at all a crazy idea. It has some disadvantages
too esp around cross version migration, which is why we
don't do this yet.
Anton Kuchin March 6, 2023, 8:55 p.m. UTC | #46
On 01/03/2023 22:22, Michael S. Tsirkin wrote:
> On Wed, Mar 01, 2023 at 09:35:56PM +0200, Anton Kuchin wrote:
>> I do trust them :)
>> I have to, otherwise we would need to pack all the properties and
>> flags of qemu to the migration stream and validate them at
>> destination or entire migration ends up broken beyond repair if
>> orchestrators tend to do stupid things and need babysitting.
> This is not at all a crazy idea. It has some disadvantages
> too esp around cross version migration, which is why we
> don't do this yet.
>

Indeed. I know VMMs that chose this path. But as long as
qemu decided to trust orchestrators I think we'd better
keep this consistent across the codebase.

Only ivshmem_pre_load callback bothers to check device
property and virtio_net_tx_waiting_pre_load checks that
number of queue pairs doesn't exceed allowed maximum, all
other *_pre_load functions generally just initialize some
parts of state that need to be set before stream starts
loading.
Michael S. Tsirkin March 6, 2023, 9:53 p.m. UTC | #47
On Mon, Mar 06, 2023 at 10:55:29PM +0200, Anton Kuchin wrote:
> On 01/03/2023 22:22, Michael S. Tsirkin wrote:
> > On Wed, Mar 01, 2023 at 09:35:56PM +0200, Anton Kuchin wrote:
> > > I do trust them :)
> > > I have to, otherwise we would need to pack all the properties and
> > > flags of qemu to the migration stream and validate them at
> > > destination or entire migration ends up broken beyond repair if
> > > orchestrators tend to do stupid things and need babysitting.
> > This is not at all a crazy idea. It has some disadvantages
> > too esp around cross version migration, which is why we
> > don't do this yet.
> > 
> 
> Indeed. I know VMMs that chose this path. But as long as
> qemu decided to trust orchestrators I think we'd better
> keep this consistent across the codebase.
> 
> Only ivshmem_pre_load callback bothers to check device
> property and virtio_net_tx_waiting_pre_load checks that
> number of queue pairs doesn't exceed allowed maximum, all
> other *_pre_load functions generally just initialize some
> parts of state that need to be set before stream starts
> loading.

We do validate things by necessity we just try to do
as much as we can table-driver so it's not open-coded
and less visible. We used to have lot more callbacks
nowdays we try to keep it table driven.
But e.g. pci streams RO state and validates that it's the same
on destination.
Anton Kuchin March 17, 2023, 6:04 p.m. UTC | #48
On 06/03/2023 23:53, Michael S. Tsirkin wrote:
> On Mon, Mar 06, 2023 at 10:55:29PM +0200, Anton Kuchin wrote:
>> On 01/03/2023 22:22, Michael S. Tsirkin wrote:
>>> On Wed, Mar 01, 2023 at 09:35:56PM +0200, Anton Kuchin wrote:
>>>> I do trust them :)
>>>> I have to, otherwise we would need to pack all the properties and
>>>> flags of qemu to the migration stream and validate them at
>>>> destination or entire migration ends up broken beyond repair if
>>>> orchestrators tend to do stupid things and need babysitting.
>>> This is not at all a crazy idea. It has some disadvantages
>>> too esp around cross version migration, which is why we
>>> don't do this yet.
>>>
>> Indeed. I know VMMs that chose this path. But as long as
>> qemu decided to trust orchestrators I think we'd better
>> keep this consistent across the codebase.
>>
>> Only ivshmem_pre_load callback bothers to check device
>> property and virtio_net_tx_waiting_pre_load checks that
>> number of queue pairs doesn't exceed allowed maximum, all
>> other *_pre_load functions generally just initialize some
>> parts of state that need to be set before stream starts
>> loading.
> We do validate things by necessity we just try to do
> as much as we can table-driver so it's not open-coded
> and less visible. We used to have lot more callbacks
> nowdays we try to keep it table driven.
> But e.g. pci streams RO state and validates that it's the same
> on destination.
>

Sorry for late reply. I agree that checks should be done if
we have data at hand.
But in my opinion the situation here is a little different:
pci is emulated by qemu and RO state affects how emulation
will work on destination, the point of vhost-user is to
outsource device emulation logic to daemons outside qemu and
allow orchestrator manage both qemu and daemons.
So engineering additional flags in qemu that don't affect
device operation but only to check orchestrator correctness
is excessive in my opinion.
Anton Kuchin March 17, 2023, 7:02 p.m. UTC | #49
On 01/03/2023 17:33, Michael S. Tsirkin wrote:
> On Tue, Feb 28, 2023 at 07:59:54PM +0200, Anton Kuchin wrote:
>> We can't rely here entirely on
>> destination to block this because if VM is migrated to file and then
>> can't be loaded by destination there is no way to fallback and resume
>> the source so we need to have some kind of blocker on source by default.
> So I commented about a fallback. IMO it's just orchestrator being silly:
> don't kill source qemu until you have made sure destination loaded the
> file, or re-load it, and all will be well.

I agree that it is always better to keep source alive until destination
is loaded and ready to take-off. But in some cases resources are limited
or strictly partitioned so we can't keep two VMs alive at the same time
so the bast option is to check all we need on the source to make sure
destination will run.
Off the top of my head host-size VM would need entire additional host to
migrate properly and lots of memory streaming that can cause huge downtime,
but if memory is in shmem local migration to update qemu can take as much
as just saving device state to file and running new qemu to load devices,
take-over memory and continue running guest.

>
> But a bigger issue that this highlights is simply that if migrating to
> file you have no idea at all where will the file be loaded.  Maybe some
> orchestrators know but I don't see how we can be sure all of them know.
> The only time where we know whether the file is loaded on the same host
> where it was saved is when we actually load it.
>

Yes. Migration to file requires orchestrator to be well aware of what
it is doing because file always contains only part of the state. This
is hard but sometimes there are no other good options.
diff mbox series

Patch

diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index d42493f630..d9b1aa2a5d 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -1143,3 +1143,13 @@  const PropertyInfo qdev_prop_uuid = {
     .set   = set_uuid,
     .set_default_value = set_default_uuid_auto,
 };
+
+const PropertyInfo qdev_prop_vhost_user_migration_type = {
+    .name = "VhostUserMigrationType",
+    .description = "none/external",
+    .enum_table = &VhostUserMigrationType_lookup,
+    .get = qdev_propinfo_get_enum,
+    .set = qdev_propinfo_set_enum,
+    .set_default_value = qdev_propinfo_set_default_value_enum,
+    .realized_set_allowed = true,
+};
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index 83fc20e49e..7deb9df5ec 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@@ -298,9 +298,35 @@  static struct vhost_dev *vuf_get_vhost(VirtIODevice *vdev)
     return &fs->vhost_dev;
 }
 
+static int vhost_user_fs_pre_save(void *opaque)
+{
+    VHostUserFS *fs = opaque;
+    g_autofree char *path = object_get_canonical_path(OBJECT(fs));
+
+    switch (fs->migration_type) {
+    case VHOST_USER_MIGRATION_TYPE_NONE:
+        error_report("Migration is blocked by device %s", path);
+        break;
+    case VHOST_USER_MIGRATION_TYPE_EXTERNAL:
+        return 0;
+    default:
+        error_report("Migration type '%s' is not supported by device %s",
+                     VhostUserMigrationType_str(fs->migration_type), path);
+        break;
+    }
+
+    return -1;
+}
+
 static const VMStateDescription vuf_vmstate = {
     .name = "vhost-user-fs",
-    .unmigratable = 1,
+    .minimum_version_id = 0,
+    .version_id = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+   .pre_save = vhost_user_fs_pre_save,
 };
 
 static Property vuf_properties[] = {
@@ -309,6 +335,10 @@  static Property vuf_properties[] = {
     DEFINE_PROP_UINT16("num-request-queues", VHostUserFS,
                        conf.num_request_queues, 1),
     DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128),
+    DEFINE_PROP_UNSIGNED("migration", VHostUserFS, migration_type,
+                         VHOST_USER_MIGRATION_TYPE_NONE,
+                         qdev_prop_vhost_user_migration_type,
+                         VhostUserMigrationType),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/include/hw/qdev-properties-system.h b/include/hw/qdev-properties-system.h
index 0ac327ae60..1a67591590 100644
--- a/include/hw/qdev-properties-system.h
+++ b/include/hw/qdev-properties-system.h
@@ -22,6 +22,7 @@  extern const PropertyInfo qdev_prop_audiodev;
 extern const PropertyInfo qdev_prop_off_auto_pcibar;
 extern const PropertyInfo qdev_prop_pcie_link_speed;
 extern const PropertyInfo qdev_prop_pcie_link_width;
+extern const PropertyInfo qdev_prop_vhost_user_migration_type;
 
 #define DEFINE_PROP_PCI_DEVFN(_n, _s, _f, _d)                   \
     DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pci_devfn, int32_t)
diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h
index 94c3aaa84e..821b2a121c 100644
--- a/include/hw/virtio/vhost-user-fs.h
+++ b/include/hw/virtio/vhost-user-fs.h
@@ -19,6 +19,7 @@ 
 #include "hw/virtio/vhost-user.h"
 #include "chardev/char-fe.h"
 #include "qom/object.h"
+#include "qapi/qapi-types-migration.h"
 
 #define TYPE_VHOST_USER_FS "vhost-user-fs-device"
 OBJECT_DECLARE_SIMPLE_TYPE(VHostUserFS, VHOST_USER_FS)
@@ -40,6 +41,7 @@  struct VHostUserFS {
     VirtQueue **req_vqs;
     VirtQueue *hiprio_vq;
     int32_t bootindex;
+    VhostUserMigrationType migration_type;
 
     /*< public >*/
 };
diff --git a/qapi/migration.json b/qapi/migration.json
index c84fa10e86..78feb20ffc 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -2178,3 +2178,19 @@ 
   'data': { 'job-id': 'str',
             'tag': 'str',
             'devices': ['str'] } }
+
+##
+# @VhostUserMigrationType:
+#
+# Type of vhost-user device migration.
+#
+# @none: Migration is not supported, attempts to migrate with this device
+#        will be blocked.
+#
+# @external: Migration stream contains only virtio device state,
+#            daemon state should be transferred externally by orchestrator.
+#
+# Since: 8.0
+##
+{ 'enum': 'VhostUserMigrationType',
+  'data': [ 'none', 'external' ] }