diff mbox series

[V2,02/13] migration: cpr-state

Message ID 1727725244-105198-3-git-send-email-steven.sistare@oracle.com (mailing list archive)
State New, archived
Headers show
Series Live update: cpr-transfer | expand

Commit Message

Steven Sistare Sept. 30, 2024, 7:40 p.m. UTC
CPR must save state that is needed after QEMU is restarted, when devices
are realized.  Thus the extra state cannot be saved in the migration stream,
as objects must already exist before that stream can be loaded.  Instead,
define auxilliary state structures and vmstate descriptions, not associated
with any registered object, and serialize the aux state to a cpr-specific
stream in cpr_state_save.  Deserialize in cpr_state_load after QEMU
restarts, before devices are realized.

Provide accessors for clients to register file descriptors for saving.
The mechanism for passing the fd's to the new process will be specific
to each migration mode, and added in subsequent patches.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Reviewed-by: Fabiano Rosas <farosas@suse.de>
---
 include/migration/cpr.h |  26 ++++++
 migration/cpr.c         | 217 ++++++++++++++++++++++++++++++++++++++++++++++++
 migration/meson.build   |   1 +
 migration/migration.c   |   6 ++
 migration/trace-events  |   5 ++
 system/vl.c             |   7 ++
 6 files changed, 262 insertions(+)
 create mode 100644 include/migration/cpr.h
 create mode 100644 migration/cpr.c

Comments

Peter Xu Oct. 7, 2024, 2:14 p.m. UTC | #1
On Mon, Sep 30, 2024 at 12:40:33PM -0700, Steve Sistare wrote:
> CPR must save state that is needed after QEMU is restarted, when devices
> are realized.  Thus the extra state cannot be saved in the migration stream,
> as objects must already exist before that stream can be loaded.  Instead,
> define auxilliary state structures and vmstate descriptions, not associated
> with any registered object, and serialize the aux state to a cpr-specific
> stream in cpr_state_save.  Deserialize in cpr_state_load after QEMU
> restarts, before devices are realized.
> 
> Provide accessors for clients to register file descriptors for saving.
> The mechanism for passing the fd's to the new process will be specific
> to each migration mode, and added in subsequent patches.
> 
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> Reviewed-by: Fabiano Rosas <farosas@suse.de>

Only two trivial comments below.

> ---
>  include/migration/cpr.h |  26 ++++++
>  migration/cpr.c         | 217 ++++++++++++++++++++++++++++++++++++++++++++++++
>  migration/meson.build   |   1 +
>  migration/migration.c   |   6 ++
>  migration/trace-events  |   5 ++
>  system/vl.c             |   7 ++
>  6 files changed, 262 insertions(+)
>  create mode 100644 include/migration/cpr.h
>  create mode 100644 migration/cpr.c
> 
> diff --git a/include/migration/cpr.h b/include/migration/cpr.h
> new file mode 100644
> index 0000000..e7b898b
> --- /dev/null
> +++ b/include/migration/cpr.h
> @@ -0,0 +1,26 @@
> +/*
> + * Copyright (c) 2021, 2024 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef MIGRATION_CPR_H
> +#define MIGRATION_CPR_H
> +
> +#define QEMU_CPR_FILE_MAGIC     0x51435052
> +#define QEMU_CPR_FILE_VERSION   0x00000001
> +
> +typedef int (*cpr_walk_fd_cb)(int fd);
> +void cpr_save_fd(const char *name, int id, int fd);
> +void cpr_delete_fd(const char *name, int id);
> +int cpr_find_fd(const char *name, int id);
> +int cpr_walk_fd(cpr_walk_fd_cb cb);
> +void cpr_resave_fd(const char *name, int id, int fd);
> +
> +int cpr_state_save(Error **errp);
> +int cpr_state_load(Error **errp);
> +void cpr_state_close(void);
> +struct QIOChannel *cpr_state_ioc(void);
> +
> +#endif
> diff --git a/migration/cpr.c b/migration/cpr.c
> new file mode 100644
> index 0000000..e50fc75
> --- /dev/null
> +++ b/migration/cpr.c
> @@ -0,0 +1,217 @@
> +/*
> + * Copyright (c) 2021-2024 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "migration/cpr.h"
> +#include "migration/misc.h"
> +#include "migration/qemu-file.h"
> +#include "migration/savevm.h"
> +#include "migration/vmstate.h"
> +#include "sysemu/runstate.h"
> +#include "trace.h"
> +
> +/*************************************************************************/
> +/* cpr state container for all information to be saved. */
> +
> +typedef QLIST_HEAD(CprFdList, CprFd) CprFdList;
> +
> +typedef struct CprState {
> +    CprFdList fds;
> +} CprState;
> +
> +static CprState cpr_state;
> +
> +/****************************************************************************/
> +
> +typedef struct CprFd {
> +    char *name;
> +    unsigned int namelen;
> +    int id;
> +    int fd;
> +    QLIST_ENTRY(CprFd) next;
> +} CprFd;
> +
> +static const VMStateDescription vmstate_cpr_fd = {
> +    .name = "cpr fd",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT32(namelen, CprFd),
> +        VMSTATE_VBUFFER_ALLOC_UINT32(name, CprFd, 0, NULL, namelen),
> +        VMSTATE_INT32(id, CprFd),
> +        VMSTATE_INT32(fd, CprFd),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +void cpr_save_fd(const char *name, int id, int fd)
> +{
> +    CprFd *elem = g_new0(CprFd, 1);
> +
> +    trace_cpr_save_fd(name, id, fd);
> +    elem->name = g_strdup(name);
> +    elem->namelen = strlen(name) + 1;
> +    elem->id = id;
> +    elem->fd = fd;
> +    QLIST_INSERT_HEAD(&cpr_state.fds, elem, next);
> +}
> +
> +static CprFd *find_fd(CprFdList *head, const char *name, int id)
> +{
> +    CprFd *elem;
> +
> +    QLIST_FOREACH(elem, head, next) {
> +        if (!strcmp(elem->name, name) && elem->id == id) {
> +            return elem;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +void cpr_delete_fd(const char *name, int id)
> +{
> +    CprFd *elem = find_fd(&cpr_state.fds, name, id);
> +
> +    if (elem) {
> +        QLIST_REMOVE(elem, next);
> +        g_free(elem->name);
> +        g_free(elem);
> +    }
> +
> +    trace_cpr_delete_fd(name, id);
> +}
> +
> +int cpr_find_fd(const char *name, int id)
> +{
> +    CprFd *elem = find_fd(&cpr_state.fds, name, id);
> +    int fd = elem ? elem->fd : -1;
> +
> +    trace_cpr_find_fd(name, id, fd);
> +    return fd;
> +}
> +
> +int cpr_walk_fd(cpr_walk_fd_cb cb)
> +{
> +    CprFd *elem;
> +
> +    QLIST_FOREACH(elem, &cpr_state.fds, next) {
> +        if (elem->fd >= 0 && cb(elem->fd)) {
> +            return 1;
> +        }
> +    }
> +    return 0;
> +}
> +
> +void cpr_resave_fd(const char *name, int id, int fd)
> +{
> +    CprFd *elem = find_fd(&cpr_state.fds, name, id);
> +    int old_fd = elem ? elem->fd : -1;
> +
> +    if (old_fd < 0) {
> +        cpr_save_fd(name, id, fd);
> +    } else if (old_fd != fd) {
> +        error_setg(&error_fatal,
> +                   "internal error: cpr fd '%s' id %d value %d "
> +                   "already saved with a different value %d",
> +                   name, id, fd, old_fd);
> +    }
> +}

I remember I commented this, maybe not.. cpr_walk_fd() and cpr_resave_fd()
are not used in this series.  Suggest introduce them only when they're
used.

> +/*************************************************************************/
> +#define CPR_STATE "CprState"
> +
> +static const VMStateDescription vmstate_cpr_state = {
> +    .name = CPR_STATE,
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_QLIST_V(fds, CprState, 1, vmstate_cpr_fd, CprFd, next),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +/*************************************************************************/
> +
> +static QEMUFile *cpr_state_file;
> +
> +QIOChannel *cpr_state_ioc(void)
> +{
> +    return qemu_file_get_ioc(cpr_state_file);
> +}
> +
> +int cpr_state_save(Error **errp)
> +{
> +    int ret;
> +    QEMUFile *f;
> +
> +    /* set f based on mode in a later patch in this series */
> +    return 0;
> +
> +    qemu_put_be32(f, QEMU_CPR_FILE_MAGIC);
> +    qemu_put_be32(f, QEMU_CPR_FILE_VERSION);
> +
> +    ret = vmstate_save_state(f, &vmstate_cpr_state, &cpr_state, 0);
> +    if (ret) {
> +        error_setg(errp, "vmstate_save_state error %d", ret);
> +        qemu_fclose(f);
> +        return ret;
> +    }
> +
> +    /*
> +     * Close the socket only partially so we can later detect when the other
> +     * end closes by getting a HUP event.
> +     */
> +    qemu_fflush(f);
> +    qio_channel_shutdown(qemu_file_get_ioc(f), QIO_CHANNEL_SHUTDOWN_WRITE,
> +                         NULL);

What happens if we send everything and close immediately?

I didn't see how this cached file is used later throughout the whole
series.  Is it used in some follow up series?

> +    cpr_state_file = f;
> +    return 0;
> +}
> +
> +int cpr_state_load(Error **errp)
> +{
> +    int ret;
> +    uint32_t v;
> +    QEMUFile *f;
> +
> +    /* set f based on mode in a later patch in this series */
> +    return 0;
> +
> +    v = qemu_get_be32(f);
> +    if (v != QEMU_CPR_FILE_MAGIC) {
> +        error_setg(errp, "Not a migration stream (bad magic %x)", v);
> +        qemu_fclose(f);
> +        return -EINVAL;
> +    }
> +    v = qemu_get_be32(f);
> +    if (v != QEMU_CPR_FILE_VERSION) {
> +        error_setg(errp, "Unsupported migration stream version %d", v);
> +        qemu_fclose(f);
> +        return -ENOTSUP;
> +    }
> +
> +    ret = vmstate_load_state(f, &vmstate_cpr_state, &cpr_state, 1);
> +    if (ret) {
> +        error_setg(errp, "vmstate_load_state error %d", ret);
> +        qemu_fclose(f);
> +        return ret;
> +    }
> +
> +    /*
> +     * Let the caller decide when to close the socket (and generate a HUP event
> +     * for the sending side).
> +     */
> +    cpr_state_file = f;
> +    return ret;
> +}
> +
> +void cpr_state_close(void)
> +{
> +    if (cpr_state_file) {
> +        qemu_fclose(cpr_state_file);
> +        cpr_state_file = NULL;
> +    }
> +}
> diff --git a/migration/meson.build b/migration/meson.build
> index 66d3de8..e5f4211 100644
> --- a/migration/meson.build
> +++ b/migration/meson.build
> @@ -13,6 +13,7 @@ system_ss.add(files(
>    'block-dirty-bitmap.c',
>    'channel.c',
>    'channel-block.c',
> +  'cpr.c',
>    'dirtyrate.c',
>    'exec.c',
>    'fd.c',
> diff --git a/migration/migration.c b/migration/migration.c
> index ae2be31..834b0a2 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -27,6 +27,7 @@
>  #include "sysemu/cpu-throttle.h"
>  #include "rdma.h"
>  #include "ram.h"
> +#include "migration/cpr.h"
>  #include "migration/global_state.h"
>  #include "migration/misc.h"
>  #include "migration.h"
> @@ -2123,6 +2124,10 @@ void qmp_migrate(const char *uri, bool has_channels,
>          }
>      }
>  
> +    if (cpr_state_save(&local_err)) {
> +        goto out;
> +    }
> +
>      if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
>          SocketAddress *saddr = &addr->u.socket;
>          if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
> @@ -2147,6 +2152,7 @@ void qmp_migrate(const char *uri, bool has_channels,
>                            MIGRATION_STATUS_FAILED);
>      }
>  
> +out:
>      if (local_err) {
>          if (!resume_requested) {
>              yank_unregister_instance(MIGRATION_YANK_INSTANCE);
> diff --git a/migration/trace-events b/migration/trace-events
> index c65902f..5356fb5 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -341,6 +341,11 @@ colo_receive_message(const char *msg) "Receive '%s' message"
>  # colo-failover.c
>  colo_failover_set_state(const char *new_state) "new state %s"
>  
> +# cpr.c
> +cpr_save_fd(const char *name, int id, int fd) "%s, id %d, fd %d"
> +cpr_delete_fd(const char *name, int id) "%s, id %d"
> +cpr_find_fd(const char *name, int id, int fd) "%s, id %d returns %d"
> +
>  # block-dirty-bitmap.c
>  send_bitmap_header_enter(void) ""
>  send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "flags: 0x%x, start_sector: %" PRIu64 ", nr_sectors: %" PRIu32 ", data_size: %" PRIu64
> diff --git a/system/vl.c b/system/vl.c
> index 752a1da..565d932 100644
> --- a/system/vl.c
> +++ b/system/vl.c
> @@ -77,6 +77,7 @@
>  #include "hw/block/block.h"
>  #include "hw/i386/x86.h"
>  #include "hw/i386/pc.h"
> +#include "migration/cpr.h"
>  #include "migration/misc.h"
>  #include "migration/snapshot.h"
>  #include "sysemu/tpm.h"
> @@ -3720,6 +3721,12 @@ void qemu_init(int argc, char **argv)
>  
>      qemu_create_machine(machine_opts_dict);
>  
> +    /*
> +     * Load incoming CPR state before any devices are created, because it
> +     * contains file descriptors that are needed in device initialization code.
> +     */
> +    cpr_state_load(&error_fatal);
> +
>      suspend_mux_open();
>  
>      qemu_disable_default_devices();
> -- 
> 1.8.3.1
>
Steven Sistare Oct. 7, 2024, 7:30 p.m. UTC | #2
On 10/7/2024 10:14 AM, Peter Xu wrote:
> On Mon, Sep 30, 2024 at 12:40:33PM -0700, Steve Sistare wrote:
>> CPR must save state that is needed after QEMU is restarted, when devices
>> are realized.  Thus the extra state cannot be saved in the migration stream,
>> as objects must already exist before that stream can be loaded.  Instead,
>> define auxilliary state structures and vmstate descriptions, not associated
>> with any registered object, and serialize the aux state to a cpr-specific
>> stream in cpr_state_save.  Deserialize in cpr_state_load after QEMU
>> restarts, before devices are realized.
>>
>> Provide accessors for clients to register file descriptors for saving.
>> The mechanism for passing the fd's to the new process will be specific
>> to each migration mode, and added in subsequent patches.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>> Reviewed-by: Fabiano Rosas <farosas@suse.de>
> 
> Only two trivial comments below.
> 
>> ---
>>   include/migration/cpr.h |  26 ++++++
>>   migration/cpr.c         | 217 ++++++++++++++++++++++++++++++++++++++++++++++++
>>   migration/meson.build   |   1 +
>>   migration/migration.c   |   6 ++
>>   migration/trace-events  |   5 ++
>>   system/vl.c             |   7 ++
>>   6 files changed, 262 insertions(+)
>>   create mode 100644 include/migration/cpr.h
>>   create mode 100644 migration/cpr.c
>>
>> diff --git a/include/migration/cpr.h b/include/migration/cpr.h
>> new file mode 100644
>> index 0000000..e7b898b
>> --- /dev/null
>> +++ b/include/migration/cpr.h
>> @@ -0,0 +1,26 @@
>> +/*
>> + * Copyright (c) 2021, 2024 Oracle and/or its affiliates.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + */
>> +
>> +#ifndef MIGRATION_CPR_H
>> +#define MIGRATION_CPR_H
>> +
>> +#define QEMU_CPR_FILE_MAGIC     0x51435052
>> +#define QEMU_CPR_FILE_VERSION   0x00000001
>> +
>> +typedef int (*cpr_walk_fd_cb)(int fd);
>> +void cpr_save_fd(const char *name, int id, int fd);
>> +void cpr_delete_fd(const char *name, int id);
>> +int cpr_find_fd(const char *name, int id);
>> +int cpr_walk_fd(cpr_walk_fd_cb cb);
>> +void cpr_resave_fd(const char *name, int id, int fd);
>> +
>> +int cpr_state_save(Error **errp);
>> +int cpr_state_load(Error **errp);
>> +void cpr_state_close(void);
>> +struct QIOChannel *cpr_state_ioc(void);
>> +
>> +#endif
>> diff --git a/migration/cpr.c b/migration/cpr.c
>> new file mode 100644
>> index 0000000..e50fc75
>> --- /dev/null
>> +++ b/migration/cpr.c
>> @@ -0,0 +1,217 @@
>> +/*
>> + * Copyright (c) 2021-2024 Oracle and/or its affiliates.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qapi/error.h"
>> +#include "migration/cpr.h"
>> +#include "migration/misc.h"
>> +#include "migration/qemu-file.h"
>> +#include "migration/savevm.h"
>> +#include "migration/vmstate.h"
>> +#include "sysemu/runstate.h"
>> +#include "trace.h"
>> +
>> +/*************************************************************************/
>> +/* cpr state container for all information to be saved. */
>> +
>> +typedef QLIST_HEAD(CprFdList, CprFd) CprFdList;
>> +
>> +typedef struct CprState {
>> +    CprFdList fds;
>> +} CprState;
>> +
>> +static CprState cpr_state;
>> +
>> +/****************************************************************************/
>> +
>> +typedef struct CprFd {
>> +    char *name;
>> +    unsigned int namelen;
>> +    int id;
>> +    int fd;
>> +    QLIST_ENTRY(CprFd) next;
>> +} CprFd;
>> +
>> +static const VMStateDescription vmstate_cpr_fd = {
>> +    .name = "cpr fd",
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_UINT32(namelen, CprFd),
>> +        VMSTATE_VBUFFER_ALLOC_UINT32(name, CprFd, 0, NULL, namelen),
>> +        VMSTATE_INT32(id, CprFd),
>> +        VMSTATE_INT32(fd, CprFd),
>> +        VMSTATE_END_OF_LIST()
>> +    }
>> +};
>> +
>> +void cpr_save_fd(const char *name, int id, int fd)
>> +{
>> +    CprFd *elem = g_new0(CprFd, 1);
>> +
>> +    trace_cpr_save_fd(name, id, fd);
>> +    elem->name = g_strdup(name);
>> +    elem->namelen = strlen(name) + 1;
>> +    elem->id = id;
>> +    elem->fd = fd;
>> +    QLIST_INSERT_HEAD(&cpr_state.fds, elem, next);
>> +}
>> +
>> +static CprFd *find_fd(CprFdList *head, const char *name, int id)
>> +{
>> +    CprFd *elem;
>> +
>> +    QLIST_FOREACH(elem, head, next) {
>> +        if (!strcmp(elem->name, name) && elem->id == id) {
>> +            return elem;
>> +        }
>> +    }
>> +    return NULL;
>> +}
>> +
>> +void cpr_delete_fd(const char *name, int id)
>> +{
>> +    CprFd *elem = find_fd(&cpr_state.fds, name, id);
>> +
>> +    if (elem) {
>> +        QLIST_REMOVE(elem, next);
>> +        g_free(elem->name);
>> +        g_free(elem);
>> +    }
>> +
>> +    trace_cpr_delete_fd(name, id);
>> +}
>> +
>> +int cpr_find_fd(const char *name, int id)
>> +{
>> +    CprFd *elem = find_fd(&cpr_state.fds, name, id);
>> +    int fd = elem ? elem->fd : -1;
>> +
>> +    trace_cpr_find_fd(name, id, fd);
>> +    return fd;
>> +}
>> +
>> +int cpr_walk_fd(cpr_walk_fd_cb cb)
>> +{
>> +    CprFd *elem;
>> +
>> +    QLIST_FOREACH(elem, &cpr_state.fds, next) {
>> +        if (elem->fd >= 0 && cb(elem->fd)) {
>> +            return 1;
>> +        }
>> +    }
>> +    return 0;
>> +}
>> +
>> +void cpr_resave_fd(const char *name, int id, int fd)
>> +{
>> +    CprFd *elem = find_fd(&cpr_state.fds, name, id);
>> +    int old_fd = elem ? elem->fd : -1;
>> +
>> +    if (old_fd < 0) {
>> +        cpr_save_fd(name, id, fd);
>> +    } else if (old_fd != fd) {
>> +        error_setg(&error_fatal,
>> +                   "internal error: cpr fd '%s' id %d value %d "
>> +                   "already saved with a different value %d",
>> +                   name, id, fd, old_fd);
>> +    }
>> +}
> 
> I remember I commented this, maybe not.. cpr_walk_fd() and cpr_resave_fd()
> are not used in this series.  Suggest introduce them only when they're
> used.

Thanks, you probably did.  I just edited my tree, so I will not forget again.

>> +/*************************************************************************/
>> +#define CPR_STATE "CprState"
>> +
>> +static const VMStateDescription vmstate_cpr_state = {
>> +    .name = CPR_STATE,
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_QLIST_V(fds, CprState, 1, vmstate_cpr_fd, CprFd, next),
>> +        VMSTATE_END_OF_LIST()
>> +    }
>> +};
>> +/*************************************************************************/
>> +
>> +static QEMUFile *cpr_state_file;
>> +
>> +QIOChannel *cpr_state_ioc(void)
>> +{
>> +    return qemu_file_get_ioc(cpr_state_file);
>> +}
>> +
>> +int cpr_state_save(Error **errp)
>> +{
>> +    int ret;
>> +    QEMUFile *f;
>> +
>> +    /* set f based on mode in a later patch in this series */
>> +    return 0;
>> +
>> +    qemu_put_be32(f, QEMU_CPR_FILE_MAGIC);
>> +    qemu_put_be32(f, QEMU_CPR_FILE_VERSION);
>> +
>> +    ret = vmstate_save_state(f, &vmstate_cpr_state, &cpr_state, 0);
>> +    if (ret) {
>> +        error_setg(errp, "vmstate_save_state error %d", ret);
>> +        qemu_fclose(f);
>> +        return ret;
>> +    }
>> +
>> +    /*
>> +     * Close the socket only partially so we can later detect when the other
>> +     * end closes by getting a HUP event.
>> +     */
>> +    qemu_fflush(f);
>> +    qio_channel_shutdown(qemu_file_get_ioc(f), QIO_CHANNEL_SHUTDOWN_WRITE,
>> +                         NULL);
> 
> What happens if we send everything and close immediately?
> 
> I didn't see how this cached file is used later throughout the whole
> series.  Is it used in some follow up series?

The complete usage and rationale is in the last patch, "migration: cpr-transfer mode"

- Steve

>> +    cpr_state_file = f;
>> +    return 0;
>> +}
>> +
>> +int cpr_state_load(Error **errp)
>> +{
>> +    int ret;
>> +    uint32_t v;
>> +    QEMUFile *f;
>> +
>> +    /* set f based on mode in a later patch in this series */
>> +    return 0;
>> +
>> +    v = qemu_get_be32(f);
>> +    if (v != QEMU_CPR_FILE_MAGIC) {
>> +        error_setg(errp, "Not a migration stream (bad magic %x)", v);
>> +        qemu_fclose(f);
>> +        return -EINVAL;
>> +    }
>> +    v = qemu_get_be32(f);
>> +    if (v != QEMU_CPR_FILE_VERSION) {
>> +        error_setg(errp, "Unsupported migration stream version %d", v);
>> +        qemu_fclose(f);
>> +        return -ENOTSUP;
>> +    }
>> +
>> +    ret = vmstate_load_state(f, &vmstate_cpr_state, &cpr_state, 1);
>> +    if (ret) {
>> +        error_setg(errp, "vmstate_load_state error %d", ret);
>> +        qemu_fclose(f);
>> +        return ret;
>> +    }
>> +
>> +    /*
>> +     * Let the caller decide when to close the socket (and generate a HUP event
>> +     * for the sending side).
>> +     */
>> +    cpr_state_file = f;
>> +    return ret;
>> +}
>> +
>> +void cpr_state_close(void)
>> +{
>> +    if (cpr_state_file) {
>> +        qemu_fclose(cpr_state_file);
>> +        cpr_state_file = NULL;
>> +    }
>> +}
>> diff --git a/migration/meson.build b/migration/meson.build
>> index 66d3de8..e5f4211 100644
>> --- a/migration/meson.build
>> +++ b/migration/meson.build
>> @@ -13,6 +13,7 @@ system_ss.add(files(
>>     'block-dirty-bitmap.c',
>>     'channel.c',
>>     'channel-block.c',
>> +  'cpr.c',
>>     'dirtyrate.c',
>>     'exec.c',
>>     'fd.c',
>> diff --git a/migration/migration.c b/migration/migration.c
>> index ae2be31..834b0a2 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -27,6 +27,7 @@
>>   #include "sysemu/cpu-throttle.h"
>>   #include "rdma.h"
>>   #include "ram.h"
>> +#include "migration/cpr.h"
>>   #include "migration/global_state.h"
>>   #include "migration/misc.h"
>>   #include "migration.h"
>> @@ -2123,6 +2124,10 @@ void qmp_migrate(const char *uri, bool has_channels,
>>           }
>>       }
>>   
>> +    if (cpr_state_save(&local_err)) {
>> +        goto out;
>> +    }
>> +
>>       if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
>>           SocketAddress *saddr = &addr->u.socket;
>>           if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
>> @@ -2147,6 +2152,7 @@ void qmp_migrate(const char *uri, bool has_channels,
>>                             MIGRATION_STATUS_FAILED);
>>       }
>>   
>> +out:
>>       if (local_err) {
>>           if (!resume_requested) {
>>               yank_unregister_instance(MIGRATION_YANK_INSTANCE);
>> diff --git a/migration/trace-events b/migration/trace-events
>> index c65902f..5356fb5 100644
>> --- a/migration/trace-events
>> +++ b/migration/trace-events
>> @@ -341,6 +341,11 @@ colo_receive_message(const char *msg) "Receive '%s' message"
>>   # colo-failover.c
>>   colo_failover_set_state(const char *new_state) "new state %s"
>>   
>> +# cpr.c
>> +cpr_save_fd(const char *name, int id, int fd) "%s, id %d, fd %d"
>> +cpr_delete_fd(const char *name, int id) "%s, id %d"
>> +cpr_find_fd(const char *name, int id, int fd) "%s, id %d returns %d"
>> +
>>   # block-dirty-bitmap.c
>>   send_bitmap_header_enter(void) ""
>>   send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "flags: 0x%x, start_sector: %" PRIu64 ", nr_sectors: %" PRIu32 ", data_size: %" PRIu64
>> diff --git a/system/vl.c b/system/vl.c
>> index 752a1da..565d932 100644
>> --- a/system/vl.c
>> +++ b/system/vl.c
>> @@ -77,6 +77,7 @@
>>   #include "hw/block/block.h"
>>   #include "hw/i386/x86.h"
>>   #include "hw/i386/pc.h"
>> +#include "migration/cpr.h"
>>   #include "migration/misc.h"
>>   #include "migration/snapshot.h"
>>   #include "sysemu/tpm.h"
>> @@ -3720,6 +3721,12 @@ void qemu_init(int argc, char **argv)
>>   
>>       qemu_create_machine(machine_opts_dict);
>>   
>> +    /*
>> +     * Load incoming CPR state before any devices are created, because it
>> +     * contains file descriptors that are needed in device initialization code.
>> +     */
>> +    cpr_state_load(&error_fatal);
>> +
>>       suspend_mux_open();
>>   
>>       qemu_disable_default_devices();
>> -- 
>> 1.8.3.1
>>
>
diff mbox series

Patch

diff --git a/include/migration/cpr.h b/include/migration/cpr.h
new file mode 100644
index 0000000..e7b898b
--- /dev/null
+++ b/include/migration/cpr.h
@@ -0,0 +1,26 @@ 
+/*
+ * Copyright (c) 2021, 2024 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef MIGRATION_CPR_H
+#define MIGRATION_CPR_H
+
+#define QEMU_CPR_FILE_MAGIC     0x51435052
+#define QEMU_CPR_FILE_VERSION   0x00000001
+
+typedef int (*cpr_walk_fd_cb)(int fd);
+void cpr_save_fd(const char *name, int id, int fd);
+void cpr_delete_fd(const char *name, int id);
+int cpr_find_fd(const char *name, int id);
+int cpr_walk_fd(cpr_walk_fd_cb cb);
+void cpr_resave_fd(const char *name, int id, int fd);
+
+int cpr_state_save(Error **errp);
+int cpr_state_load(Error **errp);
+void cpr_state_close(void);
+struct QIOChannel *cpr_state_ioc(void);
+
+#endif
diff --git a/migration/cpr.c b/migration/cpr.c
new file mode 100644
index 0000000..e50fc75
--- /dev/null
+++ b/migration/cpr.c
@@ -0,0 +1,217 @@ 
+/*
+ * Copyright (c) 2021-2024 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "migration/cpr.h"
+#include "migration/misc.h"
+#include "migration/qemu-file.h"
+#include "migration/savevm.h"
+#include "migration/vmstate.h"
+#include "sysemu/runstate.h"
+#include "trace.h"
+
+/*************************************************************************/
+/* cpr state container for all information to be saved. */
+
+typedef QLIST_HEAD(CprFdList, CprFd) CprFdList;
+
+typedef struct CprState {
+    CprFdList fds;
+} CprState;
+
+static CprState cpr_state;
+
+/****************************************************************************/
+
+typedef struct CprFd {
+    char *name;
+    unsigned int namelen;
+    int id;
+    int fd;
+    QLIST_ENTRY(CprFd) next;
+} CprFd;
+
+static const VMStateDescription vmstate_cpr_fd = {
+    .name = "cpr fd",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(namelen, CprFd),
+        VMSTATE_VBUFFER_ALLOC_UINT32(name, CprFd, 0, NULL, namelen),
+        VMSTATE_INT32(id, CprFd),
+        VMSTATE_INT32(fd, CprFd),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+void cpr_save_fd(const char *name, int id, int fd)
+{
+    CprFd *elem = g_new0(CprFd, 1);
+
+    trace_cpr_save_fd(name, id, fd);
+    elem->name = g_strdup(name);
+    elem->namelen = strlen(name) + 1;
+    elem->id = id;
+    elem->fd = fd;
+    QLIST_INSERT_HEAD(&cpr_state.fds, elem, next);
+}
+
+static CprFd *find_fd(CprFdList *head, const char *name, int id)
+{
+    CprFd *elem;
+
+    QLIST_FOREACH(elem, head, next) {
+        if (!strcmp(elem->name, name) && elem->id == id) {
+            return elem;
+        }
+    }
+    return NULL;
+}
+
+void cpr_delete_fd(const char *name, int id)
+{
+    CprFd *elem = find_fd(&cpr_state.fds, name, id);
+
+    if (elem) {
+        QLIST_REMOVE(elem, next);
+        g_free(elem->name);
+        g_free(elem);
+    }
+
+    trace_cpr_delete_fd(name, id);
+}
+
+int cpr_find_fd(const char *name, int id)
+{
+    CprFd *elem = find_fd(&cpr_state.fds, name, id);
+    int fd = elem ? elem->fd : -1;
+
+    trace_cpr_find_fd(name, id, fd);
+    return fd;
+}
+
+int cpr_walk_fd(cpr_walk_fd_cb cb)
+{
+    CprFd *elem;
+
+    QLIST_FOREACH(elem, &cpr_state.fds, next) {
+        if (elem->fd >= 0 && cb(elem->fd)) {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+void cpr_resave_fd(const char *name, int id, int fd)
+{
+    CprFd *elem = find_fd(&cpr_state.fds, name, id);
+    int old_fd = elem ? elem->fd : -1;
+
+    if (old_fd < 0) {
+        cpr_save_fd(name, id, fd);
+    } else if (old_fd != fd) {
+        error_setg(&error_fatal,
+                   "internal error: cpr fd '%s' id %d value %d "
+                   "already saved with a different value %d",
+                   name, id, fd, old_fd);
+    }
+}
+/*************************************************************************/
+#define CPR_STATE "CprState"
+
+static const VMStateDescription vmstate_cpr_state = {
+    .name = CPR_STATE,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_QLIST_V(fds, CprState, 1, vmstate_cpr_fd, CprFd, next),
+        VMSTATE_END_OF_LIST()
+    }
+};
+/*************************************************************************/
+
+static QEMUFile *cpr_state_file;
+
+QIOChannel *cpr_state_ioc(void)
+{
+    return qemu_file_get_ioc(cpr_state_file);
+}
+
+int cpr_state_save(Error **errp)
+{
+    int ret;
+    QEMUFile *f;
+
+    /* set f based on mode in a later patch in this series */
+    return 0;
+
+    qemu_put_be32(f, QEMU_CPR_FILE_MAGIC);
+    qemu_put_be32(f, QEMU_CPR_FILE_VERSION);
+
+    ret = vmstate_save_state(f, &vmstate_cpr_state, &cpr_state, 0);
+    if (ret) {
+        error_setg(errp, "vmstate_save_state error %d", ret);
+        qemu_fclose(f);
+        return ret;
+    }
+
+    /*
+     * Close the socket only partially so we can later detect when the other
+     * end closes by getting a HUP event.
+     */
+    qemu_fflush(f);
+    qio_channel_shutdown(qemu_file_get_ioc(f), QIO_CHANNEL_SHUTDOWN_WRITE,
+                         NULL);
+    cpr_state_file = f;
+    return 0;
+}
+
+int cpr_state_load(Error **errp)
+{
+    int ret;
+    uint32_t v;
+    QEMUFile *f;
+
+    /* set f based on mode in a later patch in this series */
+    return 0;
+
+    v = qemu_get_be32(f);
+    if (v != QEMU_CPR_FILE_MAGIC) {
+        error_setg(errp, "Not a migration stream (bad magic %x)", v);
+        qemu_fclose(f);
+        return -EINVAL;
+    }
+    v = qemu_get_be32(f);
+    if (v != QEMU_CPR_FILE_VERSION) {
+        error_setg(errp, "Unsupported migration stream version %d", v);
+        qemu_fclose(f);
+        return -ENOTSUP;
+    }
+
+    ret = vmstate_load_state(f, &vmstate_cpr_state, &cpr_state, 1);
+    if (ret) {
+        error_setg(errp, "vmstate_load_state error %d", ret);
+        qemu_fclose(f);
+        return ret;
+    }
+
+    /*
+     * Let the caller decide when to close the socket (and generate a HUP event
+     * for the sending side).
+     */
+    cpr_state_file = f;
+    return ret;
+}
+
+void cpr_state_close(void)
+{
+    if (cpr_state_file) {
+        qemu_fclose(cpr_state_file);
+        cpr_state_file = NULL;
+    }
+}
diff --git a/migration/meson.build b/migration/meson.build
index 66d3de8..e5f4211 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -13,6 +13,7 @@  system_ss.add(files(
   'block-dirty-bitmap.c',
   'channel.c',
   'channel-block.c',
+  'cpr.c',
   'dirtyrate.c',
   'exec.c',
   'fd.c',
diff --git a/migration/migration.c b/migration/migration.c
index ae2be31..834b0a2 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -27,6 +27,7 @@ 
 #include "sysemu/cpu-throttle.h"
 #include "rdma.h"
 #include "ram.h"
+#include "migration/cpr.h"
 #include "migration/global_state.h"
 #include "migration/misc.h"
 #include "migration.h"
@@ -2123,6 +2124,10 @@  void qmp_migrate(const char *uri, bool has_channels,
         }
     }
 
+    if (cpr_state_save(&local_err)) {
+        goto out;
+    }
+
     if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
         SocketAddress *saddr = &addr->u.socket;
         if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
@@ -2147,6 +2152,7 @@  void qmp_migrate(const char *uri, bool has_channels,
                           MIGRATION_STATUS_FAILED);
     }
 
+out:
     if (local_err) {
         if (!resume_requested) {
             yank_unregister_instance(MIGRATION_YANK_INSTANCE);
diff --git a/migration/trace-events b/migration/trace-events
index c65902f..5356fb5 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -341,6 +341,11 @@  colo_receive_message(const char *msg) "Receive '%s' message"
 # colo-failover.c
 colo_failover_set_state(const char *new_state) "new state %s"
 
+# cpr.c
+cpr_save_fd(const char *name, int id, int fd) "%s, id %d, fd %d"
+cpr_delete_fd(const char *name, int id) "%s, id %d"
+cpr_find_fd(const char *name, int id, int fd) "%s, id %d returns %d"
+
 # block-dirty-bitmap.c
 send_bitmap_header_enter(void) ""
 send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "flags: 0x%x, start_sector: %" PRIu64 ", nr_sectors: %" PRIu32 ", data_size: %" PRIu64
diff --git a/system/vl.c b/system/vl.c
index 752a1da..565d932 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -77,6 +77,7 @@ 
 #include "hw/block/block.h"
 #include "hw/i386/x86.h"
 #include "hw/i386/pc.h"
+#include "migration/cpr.h"
 #include "migration/misc.h"
 #include "migration/snapshot.h"
 #include "sysemu/tpm.h"
@@ -3720,6 +3721,12 @@  void qemu_init(int argc, char **argv)
 
     qemu_create_machine(machine_opts_dict);
 
+    /*
+     * Load incoming CPR state before any devices are created, because it
+     * contains file descriptors that are needed in device initialization code.
+     */
+    cpr_state_load(&error_fatal);
+
     suspend_mux_open();
 
     qemu_disable_default_devices();