diff mbox series

[V2,02/11] migration: cpr-state

Message ID 1719776434-435013-3-git-send-email-steven.sistare@oracle.com (mailing list archive)
State New, archived
Headers show
Series Live update: cpr-exec | expand

Commit Message

Steven Sistare June 30, 2024, 7:40 p.m. UTC
CPR must save state that is needed after QEMU is restarted, when devices
are realized.  Thus the extra state cannot be saved in the migration stream,
as objects must already exist before that stream can be loaded.  Instead,
define auxilliary state structures and vmstate descriptions, not associated
with any registered object, and serialize the aux state to a cpr-specific
stream in cpr_state_save.  Deserialize in cpr_state_load after QEMU
restarts, before devices are realized.

Provide accessors for clients to register file descriptors for saving.
The mechanism for passing the fd's to the new process will be specific
to each migration mode, and added in subsequent patches.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 include/migration/cpr.h |  21 ++++++
 migration/cpr.c         | 188 ++++++++++++++++++++++++++++++++++++++++++++++++
 migration/meson.build   |   1 +
 migration/migration.c   |   6 ++
 migration/trace-events  |   5 ++
 system/vl.c             |   3 +
 6 files changed, 224 insertions(+)
 create mode 100644 include/migration/cpr.h
 create mode 100644 migration/cpr.c

Comments

Fabiano Rosas July 17, 2024, 6:39 p.m. UTC | #1
Steve Sistare <steven.sistare@oracle.com> writes:

> CPR must save state that is needed after QEMU is restarted, when devices
> are realized.  Thus the extra state cannot be saved in the migration stream,
> as objects must already exist before that stream can be loaded.  Instead,
> define auxilliary state structures and vmstate descriptions, not associated
> with any registered object, and serialize the aux state to a cpr-specific
> stream in cpr_state_save.  Deserialize in cpr_state_load after QEMU
> restarts, before devices are realized.
>
> Provide accessors for clients to register file descriptors for saving.
> The mechanism for passing the fd's to the new process will be specific
> to each migration mode, and added in subsequent patches.
>
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>

Reviewed-by: Fabiano Rosas <farosas@suse.de>
Peter Xu July 19, 2024, 3:03 p.m. UTC | #2
On Sun, Jun 30, 2024 at 12:40:25PM -0700, Steve Sistare wrote:
> CPR must save state that is needed after QEMU is restarted, when devices
> are realized.  Thus the extra state cannot be saved in the migration stream,
> as objects must already exist before that stream can be loaded.  Instead,
> define auxilliary state structures and vmstate descriptions, not associated
> with any registered object, and serialize the aux state to a cpr-specific
> stream in cpr_state_save.  Deserialize in cpr_state_load after QEMU
> restarts, before devices are realized.
> 
> Provide accessors for clients to register file descriptors for saving.
> The mechanism for passing the fd's to the new process will be specific
> to each migration mode, and added in subsequent patches.
> 
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> ---
>  include/migration/cpr.h |  21 ++++++
>  migration/cpr.c         | 188 ++++++++++++++++++++++++++++++++++++++++++++++++
>  migration/meson.build   |   1 +
>  migration/migration.c   |   6 ++
>  migration/trace-events  |   5 ++
>  system/vl.c             |   3 +
>  6 files changed, 224 insertions(+)
>  create mode 100644 include/migration/cpr.h
>  create mode 100644 migration/cpr.c
> 
> diff --git a/include/migration/cpr.h b/include/migration/cpr.h
> new file mode 100644
> index 0000000..8e7e705
> --- /dev/null
> +++ b/include/migration/cpr.h
> @@ -0,0 +1,21 @@
> +/*
> + * Copyright (c) 2021, 2024 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef MIGRATION_CPR_H
> +#define MIGRATION_CPR_H
> +
> +typedef int (*cpr_walk_fd_cb)(int fd);
> +void cpr_save_fd(const char *name, int id, int fd);
> +void cpr_delete_fd(const char *name, int id);
> +int cpr_find_fd(const char *name, int id);
> +int cpr_walk_fd(cpr_walk_fd_cb cb);
> +void cpr_resave_fd(const char *name, int id, int fd);
> +
> +int cpr_state_save(Error **errp);
> +int cpr_state_load(Error **errp);
> +
> +#endif
> diff --git a/migration/cpr.c b/migration/cpr.c
> new file mode 100644
> index 0000000..313e74e
> --- /dev/null
> +++ b/migration/cpr.c
> @@ -0,0 +1,188 @@
> +/*
> + * Copyright (c) 2021-2024 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "migration/cpr.h"
> +#include "migration/misc.h"
> +#include "migration/qemu-file.h"
> +#include "migration/savevm.h"
> +#include "migration/vmstate.h"
> +#include "sysemu/runstate.h"
> +#include "trace.h"
> +
> +/*************************************************************************/
> +/* cpr state container for all information to be saved. */
> +
> +typedef QLIST_HEAD(CprFdList, CprFd) CprFdList;
> +
> +typedef struct CprState {
> +    CprFdList fds;
> +} CprState;
> +
> +static CprState cpr_state;
> +
> +/****************************************************************************/
> +
> +typedef struct CprFd {
> +    char *name;
> +    unsigned int namelen;
> +    int id;
> +    int fd;

[1]

> +    QLIST_ENTRY(CprFd) next;
> +} CprFd;
> +
> +static const VMStateDescription vmstate_cpr_fd = {
> +    .name = "cpr fd",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT32(namelen, CprFd),
> +        VMSTATE_VBUFFER_ALLOC_UINT32(name, CprFd, 0, NULL, namelen),
> +        VMSTATE_INT32(id, CprFd),
> +        VMSTATE_INT32(fd, CprFd),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +void cpr_save_fd(const char *name, int id, int fd)
> +{
> +    CprFd *elem = g_new0(CprFd, 1);
> +
> +    trace_cpr_save_fd(name, id, fd);
> +    elem->name = g_strdup(name);
> +    elem->namelen = strlen(name) + 1;
> +    elem->id = id;
> +    elem->fd = fd;
> +    QLIST_INSERT_HEAD(&cpr_state.fds, elem, next);
> +}
> +
> +static CprFd *find_fd(CprFdList *head, const char *name, int id)
> +{
> +    CprFd *elem;
> +
> +    QLIST_FOREACH(elem, head, next) {
> +        if (!strcmp(elem->name, name) && elem->id == id) {
> +            return elem;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +void cpr_delete_fd(const char *name, int id)
> +{
> +    CprFd *elem = find_fd(&cpr_state.fds, name, id);
> +
> +    if (elem) {
> +        QLIST_REMOVE(elem, next);
> +        g_free(elem->name);
> +        g_free(elem);
> +    }
> +
> +    trace_cpr_delete_fd(name, id);
> +}
> +
> +int cpr_find_fd(const char *name, int id)
> +{
> +    CprFd *elem = find_fd(&cpr_state.fds, name, id);
> +    int fd = elem ? elem->fd : -1;
> +
> +    trace_cpr_find_fd(name, id, fd);
> +    return fd;
> +}
> +
> +int cpr_walk_fd(cpr_walk_fd_cb cb)
> +{
> +    CprFd *elem;
> +
> +    QLIST_FOREACH(elem, &cpr_state.fds, next) {
> +        if (elem->fd >= 0 && cb(elem->fd)) {
> +            return 1;
> +        }
> +    }
> +    return 0;
> +}
> +
> +void cpr_resave_fd(const char *name, int id, int fd)
> +{
> +    CprFd *elem = find_fd(&cpr_state.fds, name, id);
> +    int old_fd = elem ? elem->fd : -1;
> +
> +    if (old_fd < 0) {
> +        cpr_save_fd(name, id, fd);

I don't think I know well on when old_fd<0 would happen yet, as this series
doesn't look like to use this function at all.  From that POV, maybe nice
to add a comment above [1] for "fd" field.

Meanwhile, do we need to remove the old_fd<0 element here, or is it
intended to keep that and the new CprFD?

> +    } else if (old_fd != fd) {
> +        error_setg(&error_fatal,
> +                   "internal error: cpr fd '%s' id %d value %d "
> +                   "already saved with a different value %d",
> +                   name, id, fd, old_fd);
> +    }
> +}
> +/*************************************************************************/
> +#define CPR_STATE "CprState"
> +
> +static const VMStateDescription vmstate_cpr_state = {
> +    .name = CPR_STATE,
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_QLIST_V(fds, CprState, 1, vmstate_cpr_fd, CprFd, next),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +/*************************************************************************/
> +
> +int cpr_state_save(Error **errp)
> +{
> +    int ret;
> +    QEMUFile *f;
> +
> +    /* set f based on mode in a later patch in this series */
> +    return 0;
> +
> +    qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
> +    qemu_put_be32(f, QEMU_VM_FILE_VERSION);

Having magic/version makes sense to me, though I'd suggest we use CPR new
magic/versions, so that if we see an binary dump we know what it is, and we
don't mixup a CPR image against a migration stream image.

> +
> +    ret = vmstate_save_state(f, &vmstate_cpr_state, &cpr_state, 0);

s/0/NULL/

> +    if (ret) {
> +        error_setg(errp, "vmstate_save_state error %d", ret);
> +    }

Can consider using vmstate_save_state_with_err().

> +
> +    qemu_fclose(f);
> +    return ret;
> +}
> +
> +int cpr_state_load(Error **errp)
> +{
> +    int ret;
> +    uint32_t v;
> +    QEMUFile *f;
> +
> +    /* set f based on mode in a later patch in this series */
> +    return 0;
> +
> +    v = qemu_get_be32(f);
> +    if (v != QEMU_VM_FILE_MAGIC) {
> +        error_setg(errp, "Not a migration stream (bad magic %x)", v);
> +        qemu_fclose(f);
> +        return -EINVAL;
> +    }
> +    v = qemu_get_be32(f);
> +    if (v != QEMU_VM_FILE_VERSION) {
> +        error_setg(errp, "Unsupported migration stream version %d", v);
> +        qemu_fclose(f);
> +        return -ENOTSUP;
> +    }
> +
> +    ret = vmstate_load_state(f, &vmstate_cpr_state, &cpr_state, 1);
> +    if (ret) {
> +        error_setg(errp, "vmstate_load_state error %d", ret);
> +    }

Simiarly, can use vmstate_save_state_with_err().

> +
> +    qemu_fclose(f);
> +    return ret;
> +}
> +
> diff --git a/migration/meson.build b/migration/meson.build
> index 5ce2acb4..87feb4c 100644
> --- a/migration/meson.build
> +++ b/migration/meson.build
> @@ -13,6 +13,7 @@ system_ss.add(files(
>    'block-dirty-bitmap.c',
>    'channel.c',
>    'channel-block.c',
> +  'cpr.c',
>    'dirtyrate.c',
>    'exec.c',
>    'fd.c',
> diff --git a/migration/migration.c b/migration/migration.c
> index 3dea06d..e394ad7 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -27,6 +27,7 @@
>  #include "sysemu/cpu-throttle.h"
>  #include "rdma.h"
>  #include "ram.h"
> +#include "migration/cpr.h"
>  #include "migration/global_state.h"
>  #include "migration/misc.h"
>  #include "migration.h"
> @@ -2118,6 +2119,10 @@ void qmp_migrate(const char *uri, bool has_channels,
>          }
>      }
>  
> +    if (cpr_state_save(&local_err)) {
> +        goto out;
> +    }
> +
>      if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
>          SocketAddress *saddr = &addr->u.socket;
>          if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
> @@ -2142,6 +2147,7 @@ void qmp_migrate(const char *uri, bool has_channels,
>                            MIGRATION_STATUS_FAILED);
>      }
>  
> +out:
>      if (local_err) {
>          if (!resume_requested) {
>              yank_unregister_instance(MIGRATION_YANK_INSTANCE);
> diff --git a/migration/trace-events b/migration/trace-events
> index 0b7c332..173f2c0 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -340,6 +340,11 @@ colo_receive_message(const char *msg) "Receive '%s' message"
>  # colo-failover.c
>  colo_failover_set_state(const char *new_state) "new state %s"
>  
> +# cpr.c
> +cpr_save_fd(const char *name, int id, int fd) "%s, id %d, fd %d"
> +cpr_delete_fd(const char *name, int id) "%s, id %d"
> +cpr_find_fd(const char *name, int id, int fd) "%s, id %d returns %d"
> +
>  # block-dirty-bitmap.c
>  send_bitmap_header_enter(void) ""
>  send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "flags: 0x%x, start_sector: %" PRIu64 ", nr_sectors: %" PRIu32 ", data_size: %" PRIu64
> diff --git a/system/vl.c b/system/vl.c
> index 03951be..6521ee3 100644
> --- a/system/vl.c
> +++ b/system/vl.c
> @@ -77,6 +77,7 @@
>  #include "hw/block/block.h"
>  #include "hw/i386/x86.h"
>  #include "hw/i386/pc.h"
> +#include "migration/cpr.h"
>  #include "migration/misc.h"
>  #include "migration/snapshot.h"
>  #include "sysemu/tpm.h"
> @@ -3713,6 +3714,8 @@ void qemu_init(int argc, char **argv)
>  
>      qemu_create_machine(machine_opts_dict);
>  
> +    cpr_state_load(&error_fatal);

Might be good to add a rich comment here explaining the decision on why
loading here; I think most of the tricks lie here.  E.g., it needs to be
before XXX and it needs to be after YYY.

Thanks,

> +
>      suspend_mux_open();
>  
>      qemu_disable_default_devices();
> -- 
> 1.8.3.1
>
Steven Sistare July 20, 2024, 7:53 p.m. UTC | #3
On 7/19/2024 11:03 AM, Peter Xu wrote:
> On Sun, Jun 30, 2024 at 12:40:25PM -0700, Steve Sistare wrote:
>> CPR must save state that is needed after QEMU is restarted, when devices
>> are realized.  Thus the extra state cannot be saved in the migration stream,
>> as objects must already exist before that stream can be loaded.  Instead,
>> define auxilliary state structures and vmstate descriptions, not associated
>> with any registered object, and serialize the aux state to a cpr-specific
>> stream in cpr_state_save.  Deserialize in cpr_state_load after QEMU
>> restarts, before devices are realized.
>>
>> Provide accessors for clients to register file descriptors for saving.
>> The mechanism for passing the fd's to the new process will be specific
>> to each migration mode, and added in subsequent patches.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>> ---
>>   include/migration/cpr.h |  21 ++++++
>>   migration/cpr.c         | 188 ++++++++++++++++++++++++++++++++++++++++++++++++
>>   migration/meson.build   |   1 +
>>   migration/migration.c   |   6 ++
>>   migration/trace-events  |   5 ++
>>   system/vl.c             |   3 +
>>   6 files changed, 224 insertions(+)
>>   create mode 100644 include/migration/cpr.h
>>   create mode 100644 migration/cpr.c
>>
>> diff --git a/include/migration/cpr.h b/include/migration/cpr.h
>> new file mode 100644
>> index 0000000..8e7e705
>> --- /dev/null
>> +++ b/include/migration/cpr.h
>> @@ -0,0 +1,21 @@
>> +/*
>> + * Copyright (c) 2021, 2024 Oracle and/or its affiliates.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + */
>> +
>> +#ifndef MIGRATION_CPR_H
>> +#define MIGRATION_CPR_H
>> +
>> +typedef int (*cpr_walk_fd_cb)(int fd);
>> +void cpr_save_fd(const char *name, int id, int fd);
>> +void cpr_delete_fd(const char *name, int id);
>> +int cpr_find_fd(const char *name, int id);
>> +int cpr_walk_fd(cpr_walk_fd_cb cb);
>> +void cpr_resave_fd(const char *name, int id, int fd);
>> +
>> +int cpr_state_save(Error **errp);
>> +int cpr_state_load(Error **errp);
>> +
>> +#endif
>> diff --git a/migration/cpr.c b/migration/cpr.c
>> new file mode 100644
>> index 0000000..313e74e
>> --- /dev/null
>> +++ b/migration/cpr.c
>> @@ -0,0 +1,188 @@
>> +/*
>> + * Copyright (c) 2021-2024 Oracle and/or its affiliates.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qapi/error.h"
>> +#include "migration/cpr.h"
>> +#include "migration/misc.h"
>> +#include "migration/qemu-file.h"
>> +#include "migration/savevm.h"
>> +#include "migration/vmstate.h"
>> +#include "sysemu/runstate.h"
>> +#include "trace.h"
>> +
>> +/*************************************************************************/
>> +/* cpr state container for all information to be saved. */
>> +
>> +typedef QLIST_HEAD(CprFdList, CprFd) CprFdList;
>> +
>> +typedef struct CprState {
>> +    CprFdList fds;
>> +} CprState;
>> +
>> +static CprState cpr_state;
>> +
>> +/****************************************************************************/
>> +
>> +typedef struct CprFd {
>> +    char *name;
>> +    unsigned int namelen;
>> +    int id;
>> +    int fd;
> 
> [1]
> 
>> +    QLIST_ENTRY(CprFd) next;
>> +} CprFd;
>> +
>> +static const VMStateDescription vmstate_cpr_fd = {
>> +    .name = "cpr fd",
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_UINT32(namelen, CprFd),
>> +        VMSTATE_VBUFFER_ALLOC_UINT32(name, CprFd, 0, NULL, namelen),
>> +        VMSTATE_INT32(id, CprFd),
>> +        VMSTATE_INT32(fd, CprFd),
>> +        VMSTATE_END_OF_LIST()
>> +    }
>> +};
>> +
>> +void cpr_save_fd(const char *name, int id, int fd)
>> +{
>> +    CprFd *elem = g_new0(CprFd, 1);
>> +
>> +    trace_cpr_save_fd(name, id, fd);
>> +    elem->name = g_strdup(name);
>> +    elem->namelen = strlen(name) + 1;
>> +    elem->id = id;
>> +    elem->fd = fd;
>> +    QLIST_INSERT_HEAD(&cpr_state.fds, elem, next);
>> +}
>> +
>> +static CprFd *find_fd(CprFdList *head, const char *name, int id)
>> +{
>> +    CprFd *elem;
>> +
>> +    QLIST_FOREACH(elem, head, next) {
>> +        if (!strcmp(elem->name, name) && elem->id == id) {
>> +            return elem;
>> +        }
>> +    }
>> +    return NULL;
>> +}
>> +
>> +void cpr_delete_fd(const char *name, int id)
>> +{
>> +    CprFd *elem = find_fd(&cpr_state.fds, name, id);
>> +
>> +    if (elem) {
>> +        QLIST_REMOVE(elem, next);
>> +        g_free(elem->name);
>> +        g_free(elem);
>> +    }
>> +
>> +    trace_cpr_delete_fd(name, id);
>> +}
>> +
>> +int cpr_find_fd(const char *name, int id)
>> +{
>> +    CprFd *elem = find_fd(&cpr_state.fds, name, id);
>> +    int fd = elem ? elem->fd : -1;
>> +
>> +    trace_cpr_find_fd(name, id, fd);
>> +    return fd;
>> +}
>> +
>> +int cpr_walk_fd(cpr_walk_fd_cb cb)
>> +{
>> +    CprFd *elem;
>> +
>> +    QLIST_FOREACH(elem, &cpr_state.fds, next) {
>> +        if (elem->fd >= 0 && cb(elem->fd)) {
>> +            return 1;
>> +        }
>> +    }
>> +    return 0;
>> +}
>> +
>> +void cpr_resave_fd(const char *name, int id, int fd)
>> +{
>> +    CprFd *elem = find_fd(&cpr_state.fds, name, id);
>> +    int old_fd = elem ? elem->fd : -1;
>> +
>> +    if (old_fd < 0) {
>> +        cpr_save_fd(name, id, fd);
> 
> I don't think I know well on when old_fd<0 would happen yet, as this series
> doesn't look like to use this function at all.  From that POV, maybe nice
> to add a comment above [1] for "fd" field.
cpr_resave_fd can simplify client logic.  It allows the same name,fd,id triplet to
be (re) saved without creating a duplicate entry.  The vfio series uses it.  Yes,
I should add some brief API docs in the header file.

> Meanwhile, do we need to remove the old_fd<0 element here, or is it
> intended to keep that and the new CprFD?

old_fd < 0 is not an entry, it means no entry was found.

>> +    } else if (old_fd != fd) {
>> +        error_setg(&error_fatal,
>> +                   "internal error: cpr fd '%s' id %d value %d "
>> +                   "already saved with a different value %d",
>> +                   name, id, fd, old_fd);
>> +    }
>> +}
>> +/*************************************************************************/
>> +#define CPR_STATE "CprState"
>> +
>> +static const VMStateDescription vmstate_cpr_state = {
>> +    .name = CPR_STATE,
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_QLIST_V(fds, CprState, 1, vmstate_cpr_fd, CprFd, next),
>> +        VMSTATE_END_OF_LIST()
>> +    }
>> +};
>> +/*************************************************************************/
>> +
>> +int cpr_state_save(Error **errp)
>> +{
>> +    int ret;
>> +    QEMUFile *f;
>> +
>> +    /* set f based on mode in a later patch in this series */
>> +    return 0;
>> +
>> +    qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
>> +    qemu_put_be32(f, QEMU_VM_FILE_VERSION);
> 
> Having magic/version makes sense to me, though I'd suggest we use CPR new
> magic/versions, so that if we see an binary dump we know what it is, and we
> don't mixup a CPR image against a migration stream image.

Will do.

>> +
>> +    ret = vmstate_save_state(f, &vmstate_cpr_state, &cpr_state, 0);
> 
> s/0/NULL/

Will do.

>> +    if (ret) {
>> +        error_setg(errp, "vmstate_save_state error %d", ret);
>> +    }
> 
> Can consider using vmstate_save_state_with_err().

Cool, will do.

>> +
>> +    qemu_fclose(f);
>> +    return ret;
>> +}
>> +
>> +int cpr_state_load(Error **errp)
>> +{
>> +    int ret;
>> +    uint32_t v;
>> +    QEMUFile *f;
>> +
>> +    /* set f based on mode in a later patch in this series */
>> +    return 0;
>> +
>> +    v = qemu_get_be32(f);
>> +    if (v != QEMU_VM_FILE_MAGIC) {
>> +        error_setg(errp, "Not a migration stream (bad magic %x)", v);
>> +        qemu_fclose(f);
>> +        return -EINVAL;
>> +    }
>> +    v = qemu_get_be32(f);
>> +    if (v != QEMU_VM_FILE_VERSION) {
>> +        error_setg(errp, "Unsupported migration stream version %d", v);
>> +        qemu_fclose(f);
>> +        return -ENOTSUP;
>> +    }
>> +
>> +    ret = vmstate_load_state(f, &vmstate_cpr_state, &cpr_state, 1);
>> +    if (ret) {
>> +        error_setg(errp, "vmstate_load_state error %d", ret);
>> +    }
> 
> Simiarly, can use vmstate_save_state_with_err().

Hmm, vmstate_load_state_with_err does not exist.

>> +
>> +    qemu_fclose(f);
>> +    return ret;
>> +}
>> +
>> diff --git a/migration/meson.build b/migration/meson.build
>> index 5ce2acb4..87feb4c 100644
>> --- a/migration/meson.build
>> +++ b/migration/meson.build
>> @@ -13,6 +13,7 @@ system_ss.add(files(
>>     'block-dirty-bitmap.c',
>>     'channel.c',
>>     'channel-block.c',
>> +  'cpr.c',
>>     'dirtyrate.c',
>>     'exec.c',
>>     'fd.c',
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 3dea06d..e394ad7 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -27,6 +27,7 @@
>>   #include "sysemu/cpu-throttle.h"
>>   #include "rdma.h"
>>   #include "ram.h"
>> +#include "migration/cpr.h"
>>   #include "migration/global_state.h"
>>   #include "migration/misc.h"
>>   #include "migration.h"
>> @@ -2118,6 +2119,10 @@ void qmp_migrate(const char *uri, bool has_channels,
>>           }
>>       }
>>   
>> +    if (cpr_state_save(&local_err)) {
>> +        goto out;
>> +    }
>> +
>>       if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
>>           SocketAddress *saddr = &addr->u.socket;
>>           if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
>> @@ -2142,6 +2147,7 @@ void qmp_migrate(const char *uri, bool has_channels,
>>                             MIGRATION_STATUS_FAILED);
>>       }
>>   
>> +out:
>>       if (local_err) {
>>           if (!resume_requested) {
>>               yank_unregister_instance(MIGRATION_YANK_INSTANCE);
>> diff --git a/migration/trace-events b/migration/trace-events
>> index 0b7c332..173f2c0 100644
>> --- a/migration/trace-events
>> +++ b/migration/trace-events
>> @@ -340,6 +340,11 @@ colo_receive_message(const char *msg) "Receive '%s' message"
>>   # colo-failover.c
>>   colo_failover_set_state(const char *new_state) "new state %s"
>>   
>> +# cpr.c
>> +cpr_save_fd(const char *name, int id, int fd) "%s, id %d, fd %d"
>> +cpr_delete_fd(const char *name, int id) "%s, id %d"
>> +cpr_find_fd(const char *name, int id, int fd) "%s, id %d returns %d"
>> +
>>   # block-dirty-bitmap.c
>>   send_bitmap_header_enter(void) ""
>>   send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "flags: 0x%x, start_sector: %" PRIu64 ", nr_sectors: %" PRIu32 ", data_size: %" PRIu64
>> diff --git a/system/vl.c b/system/vl.c
>> index 03951be..6521ee3 100644
>> --- a/system/vl.c
>> +++ b/system/vl.c
>> @@ -77,6 +77,7 @@
>>   #include "hw/block/block.h"
>>   #include "hw/i386/x86.h"
>>   #include "hw/i386/pc.h"
>> +#include "migration/cpr.h"
>>   #include "migration/misc.h"
>>   #include "migration/snapshot.h"
>>   #include "sysemu/tpm.h"
>> @@ -3713,6 +3714,8 @@ void qemu_init(int argc, char **argv)
>>   
>>       qemu_create_machine(machine_opts_dict);
>>   
>> +    cpr_state_load(&error_fatal);
> 
> Might be good to add a rich comment here explaining the decision on why
> loading here; I think most of the tricks lie here.  E.g., it needs to be
> before XXX and it needs to be after YYY.

Will do.

- Steve
diff mbox series

Patch

diff --git a/include/migration/cpr.h b/include/migration/cpr.h
new file mode 100644
index 0000000..8e7e705
--- /dev/null
+++ b/include/migration/cpr.h
@@ -0,0 +1,21 @@ 
+/*
+ * Copyright (c) 2021, 2024 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef MIGRATION_CPR_H
+#define MIGRATION_CPR_H
+
+typedef int (*cpr_walk_fd_cb)(int fd);
+void cpr_save_fd(const char *name, int id, int fd);
+void cpr_delete_fd(const char *name, int id);
+int cpr_find_fd(const char *name, int id);
+int cpr_walk_fd(cpr_walk_fd_cb cb);
+void cpr_resave_fd(const char *name, int id, int fd);
+
+int cpr_state_save(Error **errp);
+int cpr_state_load(Error **errp);
+
+#endif
diff --git a/migration/cpr.c b/migration/cpr.c
new file mode 100644
index 0000000..313e74e
--- /dev/null
+++ b/migration/cpr.c
@@ -0,0 +1,188 @@ 
+/*
+ * Copyright (c) 2021-2024 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "migration/cpr.h"
+#include "migration/misc.h"
+#include "migration/qemu-file.h"
+#include "migration/savevm.h"
+#include "migration/vmstate.h"
+#include "sysemu/runstate.h"
+#include "trace.h"
+
+/*************************************************************************/
+/* cpr state container for all information to be saved. */
+
+typedef QLIST_HEAD(CprFdList, CprFd) CprFdList;
+
+typedef struct CprState {
+    CprFdList fds;
+} CprState;
+
+static CprState cpr_state;
+
+/****************************************************************************/
+
+typedef struct CprFd {
+    char *name;
+    unsigned int namelen;
+    int id;
+    int fd;
+    QLIST_ENTRY(CprFd) next;
+} CprFd;
+
+static const VMStateDescription vmstate_cpr_fd = {
+    .name = "cpr fd",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(namelen, CprFd),
+        VMSTATE_VBUFFER_ALLOC_UINT32(name, CprFd, 0, NULL, namelen),
+        VMSTATE_INT32(id, CprFd),
+        VMSTATE_INT32(fd, CprFd),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+void cpr_save_fd(const char *name, int id, int fd)
+{
+    CprFd *elem = g_new0(CprFd, 1);
+
+    trace_cpr_save_fd(name, id, fd);
+    elem->name = g_strdup(name);
+    elem->namelen = strlen(name) + 1;
+    elem->id = id;
+    elem->fd = fd;
+    QLIST_INSERT_HEAD(&cpr_state.fds, elem, next);
+}
+
+static CprFd *find_fd(CprFdList *head, const char *name, int id)
+{
+    CprFd *elem;
+
+    QLIST_FOREACH(elem, head, next) {
+        if (!strcmp(elem->name, name) && elem->id == id) {
+            return elem;
+        }
+    }
+    return NULL;
+}
+
+void cpr_delete_fd(const char *name, int id)
+{
+    CprFd *elem = find_fd(&cpr_state.fds, name, id);
+
+    if (elem) {
+        QLIST_REMOVE(elem, next);
+        g_free(elem->name);
+        g_free(elem);
+    }
+
+    trace_cpr_delete_fd(name, id);
+}
+
+int cpr_find_fd(const char *name, int id)
+{
+    CprFd *elem = find_fd(&cpr_state.fds, name, id);
+    int fd = elem ? elem->fd : -1;
+
+    trace_cpr_find_fd(name, id, fd);
+    return fd;
+}
+
+int cpr_walk_fd(cpr_walk_fd_cb cb)
+{
+    CprFd *elem;
+
+    QLIST_FOREACH(elem, &cpr_state.fds, next) {
+        if (elem->fd >= 0 && cb(elem->fd)) {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+void cpr_resave_fd(const char *name, int id, int fd)
+{
+    CprFd *elem = find_fd(&cpr_state.fds, name, id);
+    int old_fd = elem ? elem->fd : -1;
+
+    if (old_fd < 0) {
+        cpr_save_fd(name, id, fd);
+    } else if (old_fd != fd) {
+        error_setg(&error_fatal,
+                   "internal error: cpr fd '%s' id %d value %d "
+                   "already saved with a different value %d",
+                   name, id, fd, old_fd);
+    }
+}
+/*************************************************************************/
+#define CPR_STATE "CprState"
+
+static const VMStateDescription vmstate_cpr_state = {
+    .name = CPR_STATE,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_QLIST_V(fds, CprState, 1, vmstate_cpr_fd, CprFd, next),
+        VMSTATE_END_OF_LIST()
+    }
+};
+/*************************************************************************/
+
+int cpr_state_save(Error **errp)
+{
+    int ret;
+    QEMUFile *f;
+
+    /* set f based on mode in a later patch in this series */
+    return 0;
+
+    qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
+    qemu_put_be32(f, QEMU_VM_FILE_VERSION);
+
+    ret = vmstate_save_state(f, &vmstate_cpr_state, &cpr_state, 0);
+    if (ret) {
+        error_setg(errp, "vmstate_save_state error %d", ret);
+    }
+
+    qemu_fclose(f);
+    return ret;
+}
+
+int cpr_state_load(Error **errp)
+{
+    int ret;
+    uint32_t v;
+    QEMUFile *f;
+
+    /* set f based on mode in a later patch in this series */
+    return 0;
+
+    v = qemu_get_be32(f);
+    if (v != QEMU_VM_FILE_MAGIC) {
+        error_setg(errp, "Not a migration stream (bad magic %x)", v);
+        qemu_fclose(f);
+        return -EINVAL;
+    }
+    v = qemu_get_be32(f);
+    if (v != QEMU_VM_FILE_VERSION) {
+        error_setg(errp, "Unsupported migration stream version %d", v);
+        qemu_fclose(f);
+        return -ENOTSUP;
+    }
+
+    ret = vmstate_load_state(f, &vmstate_cpr_state, &cpr_state, 1);
+    if (ret) {
+        error_setg(errp, "vmstate_load_state error %d", ret);
+    }
+
+    qemu_fclose(f);
+    return ret;
+}
+
diff --git a/migration/meson.build b/migration/meson.build
index 5ce2acb4..87feb4c 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -13,6 +13,7 @@  system_ss.add(files(
   'block-dirty-bitmap.c',
   'channel.c',
   'channel-block.c',
+  'cpr.c',
   'dirtyrate.c',
   'exec.c',
   'fd.c',
diff --git a/migration/migration.c b/migration/migration.c
index 3dea06d..e394ad7 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -27,6 +27,7 @@ 
 #include "sysemu/cpu-throttle.h"
 #include "rdma.h"
 #include "ram.h"
+#include "migration/cpr.h"
 #include "migration/global_state.h"
 #include "migration/misc.h"
 #include "migration.h"
@@ -2118,6 +2119,10 @@  void qmp_migrate(const char *uri, bool has_channels,
         }
     }
 
+    if (cpr_state_save(&local_err)) {
+        goto out;
+    }
+
     if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
         SocketAddress *saddr = &addr->u.socket;
         if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
@@ -2142,6 +2147,7 @@  void qmp_migrate(const char *uri, bool has_channels,
                           MIGRATION_STATUS_FAILED);
     }
 
+out:
     if (local_err) {
         if (!resume_requested) {
             yank_unregister_instance(MIGRATION_YANK_INSTANCE);
diff --git a/migration/trace-events b/migration/trace-events
index 0b7c332..173f2c0 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -340,6 +340,11 @@  colo_receive_message(const char *msg) "Receive '%s' message"
 # colo-failover.c
 colo_failover_set_state(const char *new_state) "new state %s"
 
+# cpr.c
+cpr_save_fd(const char *name, int id, int fd) "%s, id %d, fd %d"
+cpr_delete_fd(const char *name, int id) "%s, id %d"
+cpr_find_fd(const char *name, int id, int fd) "%s, id %d returns %d"
+
 # block-dirty-bitmap.c
 send_bitmap_header_enter(void) ""
 send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "flags: 0x%x, start_sector: %" PRIu64 ", nr_sectors: %" PRIu32 ", data_size: %" PRIu64
diff --git a/system/vl.c b/system/vl.c
index 03951be..6521ee3 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -77,6 +77,7 @@ 
 #include "hw/block/block.h"
 #include "hw/i386/x86.h"
 #include "hw/i386/pc.h"
+#include "migration/cpr.h"
 #include "migration/misc.h"
 #include "migration/snapshot.h"
 #include "sysemu/tpm.h"
@@ -3713,6 +3714,8 @@  void qemu_init(int argc, char **argv)
 
     qemu_create_machine(machine_opts_dict);
 
+    cpr_state_load(&error_fatal);
+
     suspend_mux_open();
 
     qemu_disable_default_devices();