diff mbox series

[v7,1/8] Introduce yank feature

Message ID 0092d4fe5f64078a18db3790c46c508416dbdb6b.1596528468.git.lukasstraub2@web.de (mailing list archive)
State New, archived
Headers show
Series Introduce 'yank' oob qmp command to recover from hanging qemu | expand

Commit Message

Lukas Straub Aug. 4, 2020, 8:11 a.m. UTC
The yank feature allows to recover from hanging qemu by "yanking"
at various parts. Other qemu systems can register themselves and
multiple yank functions. Then all yank functions for selected
instances can be called by the 'yank' out-of-band qmp command.
Available instances can be queried by a 'query-yank' oob command.

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 include/qemu/yank.h |  80 +++++++++++++++++++
 qapi/misc.json      |  45 +++++++++++
 util/Makefile.objs  |   1 +
 util/yank.c         | 184 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 310 insertions(+)
 create mode 100644 include/qemu/yank.h
 create mode 100644 util/yank.c

--
2.20.1

Comments

Daniel P. Berrangé Aug. 27, 2020, 10:31 a.m. UTC | #1
On Tue, Aug 04, 2020 at 10:11:34AM +0200, Lukas Straub wrote:
> The yank feature allows to recover from hanging qemu by "yanking"
> at various parts. Other qemu systems can register themselves and
> multiple yank functions. Then all yank functions for selected
> instances can be called by the 'yank' out-of-band qmp command.
> Available instances can be queried by a 'query-yank' oob command.
> 
> Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  include/qemu/yank.h |  80 +++++++++++++++++++
>  qapi/misc.json      |  45 +++++++++++
>  util/Makefile.objs  |   1 +
>  util/yank.c         | 184 ++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 310 insertions(+)
>  create mode 100644 include/qemu/yank.h
>  create mode 100644 util/yank.c

Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>


Regards,
Daniel
Markus Armbruster Aug. 27, 2020, 12:37 p.m. UTC | #2
I apologize for not reviewing this much earlier.

Lukas Straub <lukasstraub2@web.de> writes:

> The yank feature allows to recover from hanging qemu by "yanking"
> at various parts. Other qemu systems can register themselves and
> multiple yank functions. Then all yank functions for selected
> instances can be called by the 'yank' out-of-band qmp command.
> Available instances can be queried by a 'query-yank' oob command.
>
> Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  include/qemu/yank.h |  80 +++++++++++++++++++
>  qapi/misc.json      |  45 +++++++++++
>  util/Makefile.objs  |   1 +
>  util/yank.c         | 184 ++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 310 insertions(+)
>  create mode 100644 include/qemu/yank.h
>  create mode 100644 util/yank.c
>
> diff --git a/include/qemu/yank.h b/include/qemu/yank.h
> new file mode 100644
> index 0000000000..cd184fcd05
> --- /dev/null
> +++ b/include/qemu/yank.h
> @@ -0,0 +1,80 @@
> +/*
> + * QEMU yank feature
> + *
> + * Copyright (c) Lukas Straub <lukasstraub2@web.de>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef YANK_H
> +#define YANK_H
> +
> +typedef void (YankFn) (void *opaque);

No space before parameter lists, please.

> +
> +/**
> + * yank_register_instance: Register a new instance.
> + *
> + * This registers a new instance for yanking. Must be called before any yank
> + * function is registered for this instance.
> + *
> + * This function is thread-safe.
> + *
> + * @instance_name: The globally unique name of the instance.
> + * @errp: ...
> + */
> +void yank_register_instance(const char *instance_name, Error **errp);
> +
> +/**
> + * yank_unregister_instance: Unregister a instance.
> + *
> + * This unregisters a instance. Must be called only after every yank function
> + * of the instance has been unregistered.
> + *
> + * This function is thread-safe.
> + *
> + * @instance_name: The name of the instance.
> + */
> +void yank_unregister_instance(const char *instance_name);
> +
> +/**
> + * yank_register_function: Register a yank function
> + *
> + * This registers a yank function. All limitations of qmp oob commands apply
> + * to the yank function as well.

The restrictions are documented in docs/devel/qapi-code-gen.txt under
"An OOB-capable command handler must satisfy the following conditions".
Let's point there,

> + *
> + * This function is thread-safe.
> + *
> + * @instance_name: The name of the instance
> + * @func: The yank function
> + * @opaque: Will be passed to the yank function
> + */
> +void yank_register_function(const char *instance_name,
> +                            YankFn *func,
> +                            void *opaque);

Pardon my ignorance... can you explain to me why a yank instance may
have multiple functions?

> +
> +/**
> + * yank_unregister_function: Unregister a yank function
> + *
> + * This unregisters a yank function.
> + *
> + * This function is thread-safe.
> + *
> + * @instance_name: The name of the instance
> + * @func: func that was passed to yank_register_function
> + * @opaque: opaque that was passed to yank_register_function
> + */
> +void yank_unregister_function(const char *instance_name,
> +                              YankFn *func,
> +                              void *opaque);
> +
> +/**
> + * yank_unregister_function: Generic yank function for iochannel

Pasto, should be

    * yank_generic_iochannel: ...

> + *
> + * This is a generic yank function which will call qio_channel_shutdown on the
> + * provided QIOChannel.
> + *
> + * @opaque: QIOChannel to shutdown
> + */
> +void yank_generic_iochannel(void *opaque);
> +#endif
> diff --git a/qapi/misc.json b/qapi/misc.json
> index 9d32820dc1..0d6a8f20b7 100644
> --- a/qapi/misc.json
> +++ b/qapi/misc.json
> @@ -1615,3 +1615,48 @@
>  ##
>  { 'command': 'query-vm-generation-id', 'returns': 'GuidInfo' }
>
> +##
> +# @YankInstances:
> +#
> +# @instances: List of yank instances.
> +#
> +# Yank instances are named after the following schema:
> +# "blockdev:<node-name>", "chardev:<chardev-name>" and "migration"
> +#
> +# Since: 5.1
> +##
> +{ 'struct': 'YankInstances', 'data': {'instances': ['str'] } }

I'm afraid this is a problematic QMP interface.

By making YankInstances a struct, you keep the door open to adding more
members, which is good.

But by making its 'instances' member a ['str'], you close the door to
using anything but a single string for the individual instances.  Not so
good.

The single string encodes information which QMP client will need to
parse from the string.  We frown on that in QMP.  Use QAPI complex types
capabilities for structured data.

Could you use something like this instead?

{ 'enum': 'YankInstanceType',
  'data': { 'block-node', 'chardev', 'migration' } }

{ 'struct': 'YankInstanceBlockNode',
  'data': { 'node-name': 'str' } }

{ 'struct': 'YankInstanceChardev',
  'data' { 'label': 'str' } }

{ 'union': 'YankInstance',
  'base': { 'type': 'YankInstanceType' },
  'discriminator': 'type',
  'data': {
      'block-node': 'YankInstanceBlockNode',
      'chardev': 'YankInstanceChardev' } }

{ 'command': 'yank',
  'data': { 'instances': ['YankInstance'] },
  'allow-oob': true }

If you're confident nothing will ever be added to YankInstanceBlockNode
and YankInstanceChardev, you could use str instead.

> +
> +##
> +# @yank:
> +#
> +# Recover from hanging qemu by yanking the specified instances.

What's an "instance", and what does it mean to "yank" it?

The documentation of YankInstances above gives a clue on what an
"instance" is: presumably a block node, a character device or the
migration job.

I guess a YankInstance is whatever the code chooses to make one, and the
current code makes these three kinds.

Does it make every block node a YankInstance?  If not, which ones?

Does it make every character device a YankInstance?  If not, which ones?

Does it make migration always a YankInstance?  If not, when?

> +#
> +# Takes @YankInstances as argument.
> +#
> +# Returns: nothing.
> +#
> +# Example:
> +#
> +# -> { "execute": "yank", "arguments": { "instances": ["blockdev:nbd0"] } }
> +# <- { "return": {} }
> +#
> +# Since: 5.1
> +##
> +{ 'command': 'yank', 'data': 'YankInstances', 'allow-oob': true }
> +
> +##
> +# @query-yank:
> +#
> +# Query yank instances.
> +#
> +# Returns: @YankInstances
> +#
> +# Example:
> +#
> +# -> { "execute": "query-yank" }
> +# <- { "return": { "instances": ["blockdev:nbd0"] } }
> +#
> +# Since: 5.1
> +##
> +{ 'command': 'query-yank', 'returns': 'YankInstances', 'allow-oob': true }
> diff --git a/util/Makefile.objs b/util/Makefile.objs
> index cc5e37177a..13faa98425 100644
> --- a/util/Makefile.objs
> +++ b/util/Makefile.objs
> @@ -45,6 +45,7 @@ util-obj-$(CONFIG_GIO) += dbus.o
>  dbus.o-cflags = $(GIO_CFLAGS)
>  dbus.o-libs = $(GIO_LIBS)
>  util-obj-$(CONFIG_USER_ONLY) += selfmap.o
> +util-obj-y += yank.o
>
>  #######################################################################
>  # code used by both qemu system emulation and qemu-img
> diff --git a/util/yank.c b/util/yank.c
> new file mode 100644
> index 0000000000..b0cd27728b
> --- /dev/null
> +++ b/util/yank.c
> @@ -0,0 +1,184 @@
> +/*
> + * QEMU yank feature
> + *
> + * Copyright (c) Lukas Straub <lukasstraub2@web.de>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "qemu/thread.h"
> +#include "qemu/queue.h"
> +#include "qapi/qapi-commands-misc.h"
> +#include "io/channel.h"
> +#include "qemu/yank.h"
> +
> +struct YankFuncAndParam {
> +    YankFn *func;
> +    void *opaque;
> +    QLIST_ENTRY(YankFuncAndParam) next;
> +};
> +
> +struct YankInstance {
> +    char *name;
> +    QLIST_HEAD(, YankFuncAndParam) yankfns;
> +    QLIST_ENTRY(YankInstance) next;
> +};
> +
> +static QemuMutex lock;

Please give the variable a more telling name, such as @yank_lock, and
document what exactly the lock protects.  I can guess it's just the list
that immediately follows, but I prefer to be explicit when it comes to
locking.

> +static QLIST_HEAD(yankinst_list, YankInstance) head
> +    = QLIST_HEAD_INITIALIZER(head);

Please give the variable a more telling name, such as
@yank_instance_list.

I doubt there is a need to name the struct.  This should do:

   static QLIST_HEAD(, YankInstance) yank_instance_list

> +
> +static struct YankInstance *yank_find_instance(const char *name)
> +{
> +    struct YankInstance *tmp, *instance;
> +    instance = NULL;
> +    QLIST_FOREACH(tmp, &head, next) {
> +        if (!strcmp(tmp->name, name)) {
> +            instance = tmp;
> +        }
> +    }
> +    return instance;
> +}

Suggest

   static struct YankInstance *yank_find_instance(const char *name)
   {
       struct YankInstance *instance;

       QLIST_FOREACH(instance, &head, next) {
           if (!strcmp(instance->name, name)) {
               return instance;
           }
       }
       return NULL;
   }

> +
> +void yank_register_instance(const char *instance_name, Error **errp)
> +{
> +    struct YankInstance *instance;
> +
> +    qemu_mutex_lock(&lock);
> +
> +    if (yank_find_instance(instance_name)) {
> +        error_setg(errp, "duplicate yank instance name: '%s'", instance_name);

Rather long line, suggest to wrap before the last argument.

> +        qemu_mutex_unlock(&lock);
> +        return;
> +    }
> +
> +    instance = g_slice_new(struct YankInstance);
> +    instance->name = g_strdup(instance_name);
> +    QLIST_INIT(&instance->yankfns);
> +    QLIST_INSERT_HEAD(&head, instance, next);
> +
> +    qemu_mutex_unlock(&lock);
> +}
> +
> +void yank_unregister_instance(const char *instance_name)
> +{
> +    struct YankInstance *instance;
> +
> +    qemu_mutex_lock(&lock);
> +    instance = yank_find_instance(instance_name);
> +    assert(instance);
> +
> +    assert(QLIST_EMPTY(&instance->yankfns));
> +    QLIST_REMOVE(instance, next);
> +    g_free(instance->name);
> +    g_slice_free(struct YankInstance, instance);
> +
> +    qemu_mutex_unlock(&lock);
> +}
> +
> +void yank_register_function(const char *instance_name,
> +                            YankFn *func,
> +                            void *opaque)
> +{
> +    struct YankInstance *instance;
> +    struct YankFuncAndParam *entry;
> +
> +    qemu_mutex_lock(&lock);
> +    instance = yank_find_instance(instance_name);
> +    assert(instance);
> +
> +    entry = g_slice_new(struct YankFuncAndParam);
> +    entry->func = func;
> +    entry->opaque = opaque;
> +
> +    QLIST_INSERT_HEAD(&instance->yankfns, entry, next);
> +    qemu_mutex_unlock(&lock);
> +}
> +
> +void yank_unregister_function(const char *instance_name,
> +                              YankFn *func,
> +                              void *opaque)
> +{
> +    struct YankInstance *instance;
> +    struct YankFuncAndParam *entry;
> +
> +    qemu_mutex_lock(&lock);
> +    instance = yank_find_instance(instance_name);
> +    assert(instance);
> +
> +    QLIST_FOREACH(entry, &instance->yankfns, next) {
> +        if (entry->func == func && entry->opaque == opaque) {
> +            QLIST_REMOVE(entry, next);
> +            g_slice_free(struct YankFuncAndParam, entry);
> +            qemu_mutex_unlock(&lock);
> +            return;
> +        }
> +    }
> +
> +    abort();
> +}
> +
> +void yank_generic_iochannel(void *opaque)
> +{
> +    QIOChannel *ioc = QIO_CHANNEL(opaque);
> +
> +    qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
> +}
> +
> +void qmp_yank(strList *instances,
> +              Error **errp)
> +{
> +    strList *tmp;
> +    struct YankInstance *instance;
> +    struct YankFuncAndParam *entry;
> +
> +    qemu_mutex_lock(&lock);
> +    tmp = instances;
> +    for (; tmp; tmp = tmp->next) {

Make that

       for (tail = instances; tail; tail = tail->next) {

> +        instance = yank_find_instance(tmp->value);
> +        if (!instance) {
> +            error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
> +                      "Instance '%s' not found", tmp->value);
> +            qemu_mutex_unlock(&lock);
> +            return;
> +        }
> +    }
> +    tmp = instances;
> +    for (; tmp; tmp = tmp->next) {

Likewise.

> +        instance = yank_find_instance(tmp->value);
> +        assert(instance);
> +        QLIST_FOREACH(entry, &instance->yankfns, next) {
> +            entry->func(entry->opaque);
> +        }
> +    }
> +    qemu_mutex_unlock(&lock);
> +}
> +
> +YankInstances *qmp_query_yank(Error **errp)
> +{
> +    struct YankInstance *instance;
> +    YankInstances *ret;
> +
> +    ret = g_new0(YankInstances, 1);
> +    ret->instances = NULL;
> +
> +    qemu_mutex_lock(&lock);
> +    QLIST_FOREACH(instance, &head, next) {
> +        strList *entry;
> +        entry = g_new0(strList, 1);
> +        entry->value = g_strdup(instance->name);
> +        entry->next = ret->instances;
> +        ret->instances = entry;
> +    }
> +    qemu_mutex_unlock(&lock);
> +
> +    return ret;
> +}
> +
> +static void __attribute__((__constructor__)) yank_init(void)
> +{
> +    qemu_mutex_init(&lock);
> +}
> --
> 2.20.1

The two QMP commands permit out-of-band execution ('allow-oob': true).
OOB is easy to get wrong, but I figure you have a legitimate use case.
Let's review the restrictions documented in
docs/devel/qapi-code-gen.txt:

    An OOB-capable command handler must satisfy the following conditions:

    - It terminates quickly.
    - It does not invoke system calls that may block.
    - It does not access guest RAM that may block when userfaultfd is
      enabled for postcopy live migration.
    - It takes only "fast" locks, i.e. all critical sections protected by
      any lock it takes also satisfy the conditions for OOB command
      handler code.

Since the command handlers take &lock, the restrictions apply to the
other critical sections protected by &lock as well.  I believe these are
all okay: they do nothing but allocate, initialize and free memory.

The restrictions also apply to the YankFn callbacks, but you documented
that.  Okay.

The one such callback included in this patch is
yank_generic_iochannel(), which is a thin wrapper around
qio_channel_shutdown(), which in turn runs the io_shutdown method.
Thus, the restructions also apply to all the io_shutdown methods.
That's not documented.

Daniel, should it be documented?
Daniel P. Berrangé Aug. 27, 2020, 2:28 p.m. UTC | #3
On Thu, Aug 27, 2020 at 02:37:00PM +0200, Markus Armbruster wrote:
> I apologize for not reviewing this much earlier.
> 
> Lukas Straub <lukasstraub2@web.de> writes:
> 
> > The yank feature allows to recover from hanging qemu by "yanking"
> > at various parts. Other qemu systems can register themselves and
> > multiple yank functions. Then all yank functions for selected
> > instances can be called by the 'yank' out-of-band qmp command.
> > Available instances can be queried by a 'query-yank' oob command.
> >
> > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> >  include/qemu/yank.h |  80 +++++++++++++++++++
> >  qapi/misc.json      |  45 +++++++++++
> >  util/Makefile.objs  |   1 +
> >  util/yank.c         | 184 ++++++++++++++++++++++++++++++++++++++++++++
> >  4 files changed, 310 insertions(+)
> >  create mode 100644 include/qemu/yank.h
> >  create mode 100644 util/yank.c


> > + *
> > + * This function is thread-safe.
> > + *
> > + * @instance_name: The name of the instance
> > + * @func: The yank function
> > + * @opaque: Will be passed to the yank function
> > + */
> > +void yank_register_function(const char *instance_name,
> > +                            YankFn *func,
> > +                            void *opaque);
> 
> Pardon my ignorance... can you explain to me why a yank instance may
> have multiple functions?

multifd migration - there's a single migration "instance", which
has multiple FDs open, each of which has a func registered.


> > diff --git a/qapi/misc.json b/qapi/misc.json
> > index 9d32820dc1..0d6a8f20b7 100644
> > --- a/qapi/misc.json
> > +++ b/qapi/misc.json
> > @@ -1615,3 +1615,48 @@
> >  ##
> >  { 'command': 'query-vm-generation-id', 'returns': 'GuidInfo' }
> >
> > +##
> > +# @YankInstances:
> > +#
> > +# @instances: List of yank instances.
> > +#
> > +# Yank instances are named after the following schema:
> > +# "blockdev:<node-name>", "chardev:<chardev-name>" and "migration"
> > +#
> > +# Since: 5.1
> > +##
> > +{ 'struct': 'YankInstances', 'data': {'instances': ['str'] } }
> 
> I'm afraid this is a problematic QMP interface.
> 
> By making YankInstances a struct, you keep the door open to adding more
> members, which is good.
> 
> But by making its 'instances' member a ['str'], you close the door to
> using anything but a single string for the individual instances.  Not so
> good.
> 
> The single string encodes information which QMP client will need to
> parse from the string.  We frown on that in QMP.  Use QAPI complex types
> capabilities for structured data.
> 
> Could you use something like this instead?
> 
> { 'enum': 'YankInstanceType',
>   'data': { 'block-node', 'chardev', 'migration' } }
> 
> { 'struct': 'YankInstanceBlockNode',
>   'data': { 'node-name': 'str' } }
> 
> { 'struct': 'YankInstanceChardev',
>   'data' { 'label': 'str' } }
> 
> { 'union': 'YankInstance',
>   'base': { 'type': 'YankInstanceType' },
>   'discriminator': 'type',
>   'data': {
>       'block-node': 'YankInstanceBlockNode',
>       'chardev': 'YankInstanceChardev' } }
> 
> { 'command': 'yank',
>   'data': { 'instances': ['YankInstance'] },
>   'allow-oob': true }
> 
> If you're confident nothing will ever be added to YankInstanceBlockNode
> and YankInstanceChardev, you could use str instead.

I raised this idea back in the v1 posting. There's a thread starting
at this:

  https://lists.gnu.org/archive/html/qemu-devel/2020-05/msg02881.html

which eventually concluded that plain string is best.

I think that's right because the yank feature is providing generic
infrastructure with zero knowledge of backends that are using it.
The only interaction between the yank functionality and its callers
is via an opaque function callback. So there's no conceptual place
at which the yank infra would want to know about the backends nor
pass any backend specific config params to it.

> > +##
> > +# @yank:
> > +#
> > +# Recover from hanging qemu by yanking the specified instances.
> 
> What's an "instance", and what does it mean to "yank" it?
> 
> The documentation of YankInstances above gives a clue on what an
> "instance" is: presumably a block node, a character device or the
> migration job.
> 
> I guess a YankInstance is whatever the code chooses to make one, and the
> current code makes these three kinds.
> 
> Does it make every block node a YankInstance?  If not, which ones?
> 
> Does it make every character device a YankInstance?  If not, which ones?
> 
> Does it make migration always a YankInstance?  If not, when?

From the POV of the yank code, the "instance" is intentionally opaque.
Whatever the instance wants todo with its callback is acceptable, as
long as it isn't violating the rules for the callback by doing stuff
which can block. In practice right now, an instance is anything which
has a network connection associated with it, but it doesn't have to be
restricted to just networking. Anything which is talking to a service
which can get "stuck" is in scope for supporting yanking.

eg I could imagine an instance having some external helper process and
the yank callback would kill the process 



> > +static void __attribute__((__constructor__)) yank_init(void)
> > +{
> > +    qemu_mutex_init(&lock);
> > +}
> > --
> > 2.20.1
> 
> The two QMP commands permit out-of-band execution ('allow-oob': true).
> OOB is easy to get wrong, but I figure you have a legitimate use case.
> Let's review the restrictions documented in
> docs/devel/qapi-code-gen.txt:
> 
>     An OOB-capable command handler must satisfy the following conditions:
> 
>     - It terminates quickly.
>     - It does not invoke system calls that may block.
>     - It does not access guest RAM that may block when userfaultfd is
>       enabled for postcopy live migration.
>     - It takes only "fast" locks, i.e. all critical sections protected by
>       any lock it takes also satisfy the conditions for OOB command
>       handler code.
> 
> Since the command handlers take &lock, the restrictions apply to the
> other critical sections protected by &lock as well.  I believe these are
> all okay: they do nothing but allocate, initialize and free memory.
> 
> The restrictions also apply to the YankFn callbacks, but you documented
> that.  Okay.
> 
> The one such callback included in this patch is
> yank_generic_iochannel(), which is a thin wrapper around
> qio_channel_shutdown(), which in turn runs the io_shutdown method.
> Thus, the restructions also apply to all the io_shutdown methods.
> That's not documented.
> 
> Daniel, should it be documented?

Patch 6 documents that the qio_channel_shutdown method must be
thread safe but perhaps the doc could be more explicit

Regards,
Daniel
Lukas Straub Aug. 28, 2020, 2:21 p.m. UTC | #4
On Thu, 27 Aug 2020 14:37:00 +0200
Markus Armbruster <armbru@redhat.com> wrote:

> I apologize for not reviewing this much earlier.
> 
> Lukas Straub <lukasstraub2@web.de> writes:
> 
> > The yank feature allows to recover from hanging qemu by "yanking"
> > at various parts. Other qemu systems can register themselves and
> > multiple yank functions. Then all yank functions for selected
> > instances can be called by the 'yank' out-of-band qmp command.
> > Available instances can be queried by a 'query-yank' oob command.
> >
> > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> >  include/qemu/yank.h |  80 +++++++++++++++++++
> >  qapi/misc.json      |  45 +++++++++++
> >  util/Makefile.objs  |   1 +
> >  util/yank.c         | 184 ++++++++++++++++++++++++++++++++++++++++++++
> >  4 files changed, 310 insertions(+)
> >  create mode 100644 include/qemu/yank.h
> >  create mode 100644 util/yank.c
> >
> > diff --git a/include/qemu/yank.h b/include/qemu/yank.h
> > new file mode 100644
> > index 0000000000..cd184fcd05
> > --- /dev/null
> > +++ b/include/qemu/yank.h
> > @@ -0,0 +1,80 @@
> > +/*
> > + * QEMU yank feature
> > + *
> > + * Copyright (c) Lukas Straub <lukasstraub2@web.de>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> > + * See the COPYING file in the top-level directory.
> > + */
> > +
> > +#ifndef YANK_H
> > +#define YANK_H
> > +
> > +typedef void (YankFn) (void *opaque);  
> 
> No space before parameter lists, please.

Ok, I will fix this in the next version.

> > +
> > +/**
> > + * yank_register_instance: Register a new instance.
> > + *
> > + * This registers a new instance for yanking. Must be called before any yank
> > + * function is registered for this instance.
> > + *
> > + * This function is thread-safe.
> > + *
> > + * @instance_name: The globally unique name of the instance.
> > + * @errp: ...
> > + */
> > +void yank_register_instance(const char *instance_name, Error **errp);
> > +
> > +/**
> > + * yank_unregister_instance: Unregister a instance.
> > + *
> > + * This unregisters a instance. Must be called only after every yank function
> > + * of the instance has been unregistered.
> > + *
> > + * This function is thread-safe.
> > + *
> > + * @instance_name: The name of the instance.
> > + */
> > +void yank_unregister_instance(const char *instance_name);
> > +
> > +/**
> > + * yank_register_function: Register a yank function
> > + *
> > + * This registers a yank function. All limitations of qmp oob commands apply
> > + * to the yank function as well.  
> 
> The restrictions are documented in docs/devel/qapi-code-gen.txt under
> "An OOB-capable command handler must satisfy the following conditions".
> Let's point there,

I will fix this in the next version.

> > + *
> > + * This function is thread-safe.
> > + *
> > + * @instance_name: The name of the instance
> > + * @func: The yank function
> > + * @opaque: Will be passed to the yank function
> > + */
> > +void yank_register_function(const char *instance_name,
> > +                            YankFn *func,
> > +                            void *opaque);  
> 
> Pardon my ignorance... can you explain to me why a yank instance may
> have multiple functions?
>
> > +
> > +/**
> > + * yank_unregister_function: Unregister a yank function
> > + *
> > + * This unregisters a yank function.
> > + *
> > + * This function is thread-safe.
> > + *
> > + * @instance_name: The name of the instance
> > + * @func: func that was passed to yank_register_function
> > + * @opaque: opaque that was passed to yank_register_function
> > + */
> > +void yank_unregister_function(const char *instance_name,
> > +                              YankFn *func,
> > +                              void *opaque);
> > +
> > +/**
> > + * yank_unregister_function: Generic yank function for iochannel  
> 
> Pasto, should be
> 
>     * yank_generic_iochannel: ...

I will fix this in the next version.

> > + *
> > + * This is a generic yank function which will call qio_channel_shutdown on the
> > + * provided QIOChannel.
> > + *
> > + * @opaque: QIOChannel to shutdown
> > + */
> > +void yank_generic_iochannel(void *opaque);
> > +#endif
> > diff --git a/qapi/misc.json b/qapi/misc.json
> > index 9d32820dc1..0d6a8f20b7 100644
> > --- a/qapi/misc.json
> > +++ b/qapi/misc.json
> > @@ -1615,3 +1615,48 @@
> >  ##
> >  { 'command': 'query-vm-generation-id', 'returns': 'GuidInfo' }
> >
> > +##
> > +# @YankInstances:
> > +#
> > +# @instances: List of yank instances.
> > +#
> > +# Yank instances are named after the following schema:
> > +# "blockdev:<node-name>", "chardev:<chardev-name>" and "migration"
> > +#
> > +# Since: 5.1
> > +##
> > +{ 'struct': 'YankInstances', 'data': {'instances': ['str'] } }  
> 
> I'm afraid this is a problematic QMP interface.
> 
> By making YankInstances a struct, you keep the door open to adding more
> members, which is good.
> 
> But by making its 'instances' member a ['str'], you close the door to
> using anything but a single string for the individual instances.  Not so
> good.
> 
> The single string encodes information which QMP client will need to
> parse from the string.  We frown on that in QMP.  Use QAPI complex types
> capabilities for structured data.
> 
> Could you use something like this instead?
> 
> { 'enum': 'YankInstanceType',
>   'data': { 'block-node', 'chardev', 'migration' } }
> 
> { 'struct': 'YankInstanceBlockNode',
>   'data': { 'node-name': 'str' } }
> 
> { 'struct': 'YankInstanceChardev',
>   'data' { 'label': 'str' } }
> 
> { 'union': 'YankInstance',
>   'base': { 'type': 'YankInstanceType' },
>   'discriminator': 'type',
>   'data': {
>       'block-node': 'YankInstanceBlockNode',
>       'chardev': 'YankInstanceChardev' } }
> 
> { 'command': 'yank',
>   'data': { 'instances': ['YankInstance'] },
>   'allow-oob': true }
> 
> If you're confident nothing will ever be added to YankInstanceBlockNode
> and YankInstanceChardev, you could use str instead.

As Daniel said, this has already been discussed.

> > +
> > +##
> > +# @yank:
> > +#
> > +# Recover from hanging qemu by yanking the specified instances.  
> 
> What's an "instance", and what does it mean to "yank" it?
> 
> The documentation of YankInstances above gives a clue on what an
> "instance" is: presumably a block node, a character device or the
> migration job.
> 
> I guess a YankInstance is whatever the code chooses to make one, and the
> current code makes these three kinds.
> 
> Does it make every block node a YankInstance?  If not, which ones?
> 
> Does it make every character device a YankInstance?  If not, which ones?
> 
> Does it make migration always a YankInstance?  If not, when?

The yank function is generic so anything more specific than "instance"
doesn't fit. But you are right, I'll improve the documentation here and
list what is currently implemented for yanking and what yanking does in
each case.

> > +#
> > +# Takes @YankInstances as argument.
> > +#
> > +# Returns: nothing.
> > +#
> > +# Example:
> > +#
> > +# -> { "execute": "yank", "arguments": { "instances": ["blockdev:nbd0"] } }
> > +# <- { "return": {} }
> > +#
> > +# Since: 5.1
> > +##
> > +{ 'command': 'yank', 'data': 'YankInstances', 'allow-oob': true }
> > +
> > +##
> > +# @query-yank:
> > +#
> > +# Query yank instances.
> > +#
> > +# Returns: @YankInstances
> > +#
> > +# Example:
> > +#
> > +# -> { "execute": "query-yank" }
> > +# <- { "return": { "instances": ["blockdev:nbd0"] } }
> > +#
> > +# Since: 5.1
> > +##
> > +{ 'command': 'query-yank', 'returns': 'YankInstances', 'allow-oob': true }
> > diff --git a/util/Makefile.objs b/util/Makefile.objs
> > index cc5e37177a..13faa98425 100644
> > --- a/util/Makefile.objs
> > +++ b/util/Makefile.objs
> > @@ -45,6 +45,7 @@ util-obj-$(CONFIG_GIO) += dbus.o
> >  dbus.o-cflags = $(GIO_CFLAGS)
> >  dbus.o-libs = $(GIO_LIBS)
> >  util-obj-$(CONFIG_USER_ONLY) += selfmap.o
> > +util-obj-y += yank.o
> >
> >  #######################################################################
> >  # code used by both qemu system emulation and qemu-img
> > diff --git a/util/yank.c b/util/yank.c
> > new file mode 100644
> > index 0000000000..b0cd27728b
> > --- /dev/null
> > +++ b/util/yank.c
> > @@ -0,0 +1,184 @@
> > +/*
> > + * QEMU yank feature
> > + *
> > + * Copyright (c) Lukas Straub <lukasstraub2@web.de>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> > + * See the COPYING file in the top-level directory.
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "qapi/error.h"
> > +#include "qemu/thread.h"
> > +#include "qemu/queue.h"
> > +#include "qapi/qapi-commands-misc.h"
> > +#include "io/channel.h"
> > +#include "qemu/yank.h"
> > +
> > +struct YankFuncAndParam {
> > +    YankFn *func;
> > +    void *opaque;
> > +    QLIST_ENTRY(YankFuncAndParam) next;
> > +};
> > +
> > +struct YankInstance {
> > +    char *name;
> > +    QLIST_HEAD(, YankFuncAndParam) yankfns;
> > +    QLIST_ENTRY(YankInstance) next;
> > +};
> > +
> > +static QemuMutex lock;  
> 
> Please give the variable a more telling name, such as @yank_lock, and
> document what exactly the lock protects.  I can guess it's just the list
> that immediately follows, but I prefer to be explicit when it comes to
> locking.

I will fix this in the next version.

> > +static QLIST_HEAD(yankinst_list, YankInstance) head
> > +    = QLIST_HEAD_INITIALIZER(head);  
> 
> Please give the variable a more telling name, such as
> @yank_instance_list.
> 
> I doubt there is a need to name the struct.  This should do:
> 
>    static QLIST_HEAD(, YankInstance) yank_instance_list

I will fix this in the next version.

> > +
> > +static struct YankInstance *yank_find_instance(const char *name)
> > +{
> > +    struct YankInstance *tmp, *instance;
> > +    instance = NULL;
> > +    QLIST_FOREACH(tmp, &head, next) {
> > +        if (!strcmp(tmp->name, name)) {
> > +            instance = tmp;
> > +        }
> > +    }
> > +    return instance;
> > +}  
> 
> Suggest
> 
>    static struct YankInstance *yank_find_instance(const char *name)
>    {
>        struct YankInstance *instance;
> 
>        QLIST_FOREACH(instance, &head, next) {
>            if (!strcmp(instance->name, name)) {
>                return instance;
>            }
>        }
>        return NULL;
>    }

I will fix this in the next version.

> > +
> > +void yank_register_instance(const char *instance_name, Error **errp)
> > +{
> > +    struct YankInstance *instance;
> > +
> > +    qemu_mutex_lock(&lock);
> > +
> > +    if (yank_find_instance(instance_name)) {
> > +        error_setg(errp, "duplicate yank instance name: '%s'", instance_name);  
> 
> Rather long line, suggest to wrap before the last argument.

I will fix this in the next version.

> > +        qemu_mutex_unlock(&lock);
> > +        return;
> > +    }
> > +
> > +    instance = g_slice_new(struct YankInstance);
> > +    instance->name = g_strdup(instance_name);
> > +    QLIST_INIT(&instance->yankfns);
> > +    QLIST_INSERT_HEAD(&head, instance, next);
> > +
> > +    qemu_mutex_unlock(&lock);
> > +}
> > +
> > +void yank_unregister_instance(const char *instance_name)
> > +{
> > +    struct YankInstance *instance;
> > +
> > +    qemu_mutex_lock(&lock);
> > +    instance = yank_find_instance(instance_name);
> > +    assert(instance);
> > +
> > +    assert(QLIST_EMPTY(&instance->yankfns));
> > +    QLIST_REMOVE(instance, next);
> > +    g_free(instance->name);
> > +    g_slice_free(struct YankInstance, instance);
> > +
> > +    qemu_mutex_unlock(&lock);
> > +}
> > +
> > +void yank_register_function(const char *instance_name,
> > +                            YankFn *func,
> > +                            void *opaque)
> > +{
> > +    struct YankInstance *instance;
> > +    struct YankFuncAndParam *entry;
> > +
> > +    qemu_mutex_lock(&lock);
> > +    instance = yank_find_instance(instance_name);
> > +    assert(instance);
> > +
> > +    entry = g_slice_new(struct YankFuncAndParam);
> > +    entry->func = func;
> > +    entry->opaque = opaque;
> > +
> > +    QLIST_INSERT_HEAD(&instance->yankfns, entry, next);
> > +    qemu_mutex_unlock(&lock);
> > +}
> > +
> > +void yank_unregister_function(const char *instance_name,
> > +                              YankFn *func,
> > +                              void *opaque)
> > +{
> > +    struct YankInstance *instance;
> > +    struct YankFuncAndParam *entry;
> > +
> > +    qemu_mutex_lock(&lock);
> > +    instance = yank_find_instance(instance_name);
> > +    assert(instance);
> > +
> > +    QLIST_FOREACH(entry, &instance->yankfns, next) {
> > +        if (entry->func == func && entry->opaque == opaque) {
> > +            QLIST_REMOVE(entry, next);
> > +            g_slice_free(struct YankFuncAndParam, entry);
> > +            qemu_mutex_unlock(&lock);
> > +            return;
> > +        }
> > +    }
> > +
> > +    abort();
> > +}
> > +
> > +void yank_generic_iochannel(void *opaque)
> > +{
> > +    QIOChannel *ioc = QIO_CHANNEL(opaque);
> > +
> > +    qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
> > +}
> > +
> > +void qmp_yank(strList *instances,
> > +              Error **errp)
> > +{
> > +    strList *tmp;
> > +    struct YankInstance *instance;
> > +    struct YankFuncAndParam *entry;
> > +
> > +    qemu_mutex_lock(&lock);
> > +    tmp = instances;
> > +    for (; tmp; tmp = tmp->next) {  
> 
> Make that
> 
>        for (tail = instances; tail; tail = tail->next) {

I will fix this in the next version.

> > +        instance = yank_find_instance(tmp->value);
> > +        if (!instance) {
> > +            error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
> > +                      "Instance '%s' not found", tmp->value);
> > +            qemu_mutex_unlock(&lock);
> > +            return;
> > +        }
> > +    }
> > +    tmp = instances;
> > +    for (; tmp; tmp = tmp->next) {  
> 
> Likewise.
> 
> > +        instance = yank_find_instance(tmp->value);
> > +        assert(instance);
> > +        QLIST_FOREACH(entry, &instance->yankfns, next) {
> > +            entry->func(entry->opaque);
> > +        }
> > +    }
> > +    qemu_mutex_unlock(&lock);
> > +}
> > +
> > +YankInstances *qmp_query_yank(Error **errp)
> > +{
> > +    struct YankInstance *instance;
> > +    YankInstances *ret;
> > +
> > +    ret = g_new0(YankInstances, 1);
> > +    ret->instances = NULL;
> > +
> > +    qemu_mutex_lock(&lock);
> > +    QLIST_FOREACH(instance, &head, next) {
> > +        strList *entry;
> > +        entry = g_new0(strList, 1);
> > +        entry->value = g_strdup(instance->name);
> > +        entry->next = ret->instances;
> > +        ret->instances = entry;
> > +    }
> > +    qemu_mutex_unlock(&lock);
> > +
> > +    return ret;
> > +}
> > +
> > +static void __attribute__((__constructor__)) yank_init(void)
> > +{
> > +    qemu_mutex_init(&lock);
> > +}
> > --
> > 2.20.1  
> 
> The two QMP commands permit out-of-band execution ('allow-oob': true).
> OOB is easy to get wrong, but I figure you have a legitimate use case.
> Let's review the restrictions documented in
> docs/devel/qapi-code-gen.txt:
> 
>     An OOB-capable command handler must satisfy the following conditions:
> 
>     - It terminates quickly.
>     - It does not invoke system calls that may block.
>     - It does not access guest RAM that may block when userfaultfd is
>       enabled for postcopy live migration.
>     - It takes only "fast" locks, i.e. all critical sections protected by
>       any lock it takes also satisfy the conditions for OOB command
>       handler code.
> 
> Since the command handlers take &lock, the restrictions apply to the
> other critical sections protected by &lock as well.  I believe these are
> all okay: they do nothing but allocate, initialize and free memory.
> 
> The restrictions also apply to the YankFn callbacks, but you documented
> that.  Okay.
> 
> The one such callback included in this patch is
> yank_generic_iochannel(), which is a thin wrapper around
> qio_channel_shutdown(), which in turn runs the io_shutdown method.
> Thus, the restructions also apply to all the io_shutdown methods.
> That's not documented.
> 
> Daniel, should it be documented?
> 
This is already done in patch 6.

Thank you for you review.

Regards,
Lukas Straub
Markus Armbruster Aug. 31, 2020, 7:47 a.m. UTC | #5
Lukas Straub <lukasstraub2@web.de> writes:

> On Thu, 27 Aug 2020 14:37:00 +0200
> Markus Armbruster <armbru@redhat.com> wrote:
>
>> I apologize for not reviewing this much earlier.
>> 
>> Lukas Straub <lukasstraub2@web.de> writes:
>> 
>> > The yank feature allows to recover from hanging qemu by "yanking"
>> > at various parts. Other qemu systems can register themselves and
>> > multiple yank functions. Then all yank functions for selected
>> > instances can be called by the 'yank' out-of-band qmp command.
>> > Available instances can be queried by a 'query-yank' oob command.
>> >
>> > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
>> > Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
[...]
>> > diff --git a/qapi/misc.json b/qapi/misc.json
>> > index 9d32820dc1..0d6a8f20b7 100644
>> > --- a/qapi/misc.json
>> > +++ b/qapi/misc.json
>> > @@ -1615,3 +1615,48 @@
>> >  ##
>> >  { 'command': 'query-vm-generation-id', 'returns': 'GuidInfo' }
>> >
>> > +##
>> > +# @YankInstances:
>> > +#
>> > +# @instances: List of yank instances.
>> > +#
>> > +# Yank instances are named after the following schema:
>> > +# "blockdev:<node-name>", "chardev:<chardev-name>" and "migration"
>> > +#
>> > +# Since: 5.1
>> > +##
>> > +{ 'struct': 'YankInstances', 'data': {'instances': ['str'] } }  
>> 
>> I'm afraid this is a problematic QMP interface.
>> 
>> By making YankInstances a struct, you keep the door open to adding more
>> members, which is good.
>> 
>> But by making its 'instances' member a ['str'], you close the door to
>> using anything but a single string for the individual instances.  Not so
>> good.
>> 
>> The single string encodes information which QMP client will need to
>> parse from the string.  We frown on that in QMP.  Use QAPI complex types
>> capabilities for structured data.
>> 
>> Could you use something like this instead?
>> 
>> { 'enum': 'YankInstanceType',
>>   'data': { 'block-node', 'chardev', 'migration' } }
>> 
>> { 'struct': 'YankInstanceBlockNode',
>>   'data': { 'node-name': 'str' } }
>> 
>> { 'struct': 'YankInstanceChardev',
>>   'data' { 'label': 'str' } }
>> 
>> { 'union': 'YankInstance',
>>   'base': { 'type': 'YankInstanceType' },
>>   'discriminator': 'type',
>>   'data': {
>>       'block-node': 'YankInstanceBlockNode',
>>       'chardev': 'YankInstanceChardev' } }
>> 
>> { 'command': 'yank',
>>   'data': { 'instances': ['YankInstance'] },
>>   'allow-oob': true }
>> 
>> If you're confident nothing will ever be added to YankInstanceBlockNode
>> and YankInstanceChardev, you could use str instead.
>
> As Daniel said, this has already been discussed.

I'll look up that discussion.

[...]
>> The two QMP commands permit out-of-band execution ('allow-oob': true).
>> OOB is easy to get wrong, but I figure you have a legitimate use case.
>> Let's review the restrictions documented in
>> docs/devel/qapi-code-gen.txt:
>> 
>>     An OOB-capable command handler must satisfy the following conditions:
>> 
>>     - It terminates quickly.
>>     - It does not invoke system calls that may block.
>>     - It does not access guest RAM that may block when userfaultfd is
>>       enabled for postcopy live migration.
>>     - It takes only "fast" locks, i.e. all critical sections protected by
>>       any lock it takes also satisfy the conditions for OOB command
>>       handler code.
>> 
>> Since the command handlers take &lock, the restrictions apply to the
>> other critical sections protected by &lock as well.  I believe these are
>> all okay: they do nothing but allocate, initialize and free memory.
>> 
>> The restrictions also apply to the YankFn callbacks, but you documented
>> that.  Okay.
>> 
>> The one such callback included in this patch is
>> yank_generic_iochannel(), which is a thin wrapper around
>> qio_channel_shutdown(), which in turn runs the io_shutdown method.
>> Thus, the restructions also apply to all the io_shutdown methods.
>> That's not documented.
>> 
>> Daniel, should it be documented?
>> 
> This is already done in patch 6.

Patch 6 adds "This function is thread-safe" to its contract.  The
restrictions on OOB-capable handler code are much more severe than
ordinary thread safety.  For instance, blocking system calls outside
critical sections are thread safe, but not permitted in OOB-capable
handler code.  The contract needs to be more specific.

> Thank you for you review.

Better late than never...  you're welcome!
Lukas Straub Sept. 4, 2020, 12:33 p.m. UTC | #6
On Thu, 27 Aug 2020 14:37:00 +0200
Markus Armbruster <armbru@redhat.com> wrote:

> I apologize for not reviewing this much earlier.
> 
> Lukas Straub <lukasstraub2@web.de> writes:
> 
> > The yank feature allows to recover from hanging qemu by "yanking"
> > at various parts. Other qemu systems can register themselves and
> > multiple yank functions. Then all yank functions for selected
> > instances can be called by the 'yank' out-of-band qmp command.
> > Available instances can be queried by a 'query-yank' oob command.
> >
> > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> ...
> > diff --git a/qapi/misc.json b/qapi/misc.json
> > index 9d32820dc1..0d6a8f20b7 100644
> > --- a/qapi/misc.json
> > +++ b/qapi/misc.json
> > @@ -1615,3 +1615,48 @@
> >  ##
> >  { 'command': 'query-vm-generation-id', 'returns': 'GuidInfo' }
> >
> > +##
> > +# @YankInstances:
> > +#
> > +# @instances: List of yank instances.
> > +#
> > +# Yank instances are named after the following schema:
> > +# "blockdev:<node-name>", "chardev:<chardev-name>" and "migration"
> > +#
> > +# Since: 5.1
> > +##
> > +{ 'struct': 'YankInstances', 'data': {'instances': ['str'] } }  
> 
> I'm afraid this is a problematic QMP interface.
> 
> By making YankInstances a struct, you keep the door open to adding more
> members, which is good.
> 
> But by making its 'instances' member a ['str'], you close the door to
> using anything but a single string for the individual instances.  Not so
> good.
> 
> The single string encodes information which QMP client will need to
> parse from the string.  We frown on that in QMP.  Use QAPI complex types
> capabilities for structured data.
> 
> Could you use something like this instead?
> 
> { 'enum': 'YankInstanceType',
>   'data': { 'block-node', 'chardev', 'migration' } }
> 
> { 'struct': 'YankInstanceBlockNode',
>   'data': { 'node-name': 'str' } }
> 
> { 'struct': 'YankInstanceChardev',
>   'data' { 'label': 'str' } }
> 
> { 'union': 'YankInstance',
>   'base': { 'type': 'YankInstanceType' },
>   'discriminator': 'type',
>   'data': {
>       'block-node': 'YankInstanceBlockNode',
>       'chardev': 'YankInstanceChardev' } }
> 
> { 'command': 'yank',
>   'data': { 'instances': ['YankInstance'] },
>   'allow-oob': true }

This proposal looks good to me. Does everyone agree?

Regards,
Lukas Straub

> If you're confident nothing will ever be added to YankInstanceBlockNode
> and YankInstanceChardev, you could use str instead.
> 
> > +
> > +##
> > +# @yank:
> > +#
> > +# Recover from hanging qemu by yanking the specified instances.  
> 
> What's an "instance", and what does it mean to "yank" it?
> 
> The documentation of YankInstances above gives a clue on what an
> "instance" is: presumably a block node, a character device or the
> migration job.
> 
> I guess a YankInstance is whatever the code chooses to make one, and the
> current code makes these three kinds.
> 
> Does it make every block node a YankInstance?  If not, which ones?
> 
> Does it make every character device a YankInstance?  If not, which ones?
> 
> Does it make migration always a YankInstance?  If not, when?
> 
> > +#
> > +# Takes @YankInstances as argument.
> > +#
> > +# Returns: nothing.
> > +#
> > +# Example:
> > +#
> > +# -> { "execute": "yank", "arguments": { "instances": ["blockdev:nbd0"] } }
> > +# <- { "return": {} }
> > +#
> > +# Since: 5.1
> > +##
> > +{ 'command': 'yank', 'data': 'YankInstances', 'allow-oob': true }
> > +
> > +##
> > +# @query-yank:
> > +#
> > +# Query yank instances.
> > +#
> > +# Returns: @YankInstances
> > +#
> > +# Example:
> > +#
> > +# -> { "execute": "query-yank" }
> > +# <- { "return": { "instances": ["blockdev:nbd0"] } }
> > +#
> > +# Since: 5.1
> > +##
> > +{ 'command': 'query-yank', 'returns': 'YankInstances', 'allow-oob': true }
> ...
Eric Blake Sept. 4, 2020, 12:47 p.m. UTC | #7
On 9/4/20 7:33 AM, Lukas Straub wrote:

>>> +##
>>> +# @YankInstances:
>>> +#
>>> +# @instances: List of yank instances.
>>> +#
>>> +# Yank instances are named after the following schema:
>>> +# "blockdev:<node-name>", "chardev:<chardev-name>" and "migration"
>>> +#
>>> +# Since: 5.1
>>> +##
>>> +{ 'struct': 'YankInstances', 'data': {'instances': ['str'] } }
>>
>> I'm afraid this is a problematic QMP interface.
>>
>> By making YankInstances a struct, you keep the door open to adding more
>> members, which is good.
>>
>> But by making its 'instances' member a ['str'], you close the door to
>> using anything but a single string for the individual instances.  Not so
>> good.
>>
>> The single string encodes information which QMP client will need to
>> parse from the string.  We frown on that in QMP.  Use QAPI complex types
>> capabilities for structured data.
>>
>> Could you use something like this instead?
>>
>> { 'enum': 'YankInstanceType',
>>    'data': { 'block-node', 'chardev', 'migration' } }
>>
>> { 'struct': 'YankInstanceBlockNode',
>>    'data': { 'node-name': 'str' } }
>>
>> { 'struct': 'YankInstanceChardev',
>>    'data' { 'label': 'str' } }
>>
>> { 'union': 'YankInstance',
>>    'base': { 'type': 'YankInstanceType' },
>>    'discriminator': 'type',
>>    'data': {
>>        'block-node': 'YankInstanceBlockNode',
>>        'chardev': 'YankInstanceChardev' } }
>>
>> { 'command': 'yank',
>>    'data': { 'instances': ['YankInstance'] },
>>    'allow-oob': true }
> 
> This proposal looks good to me. Does everyone agree?

Yes; this is also more introspectible, so that if we add more yank 
instances down the road, or even more optional features to existing yank 
instances, it becomes easier to detect whether a particular qemu has 
those additions.
diff mbox series

Patch

diff --git a/include/qemu/yank.h b/include/qemu/yank.h
new file mode 100644
index 0000000000..cd184fcd05
--- /dev/null
+++ b/include/qemu/yank.h
@@ -0,0 +1,80 @@ 
+/*
+ * QEMU yank feature
+ *
+ * Copyright (c) Lukas Straub <lukasstraub2@web.de>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef YANK_H
+#define YANK_H
+
+typedef void (YankFn) (void *opaque);
+
+/**
+ * yank_register_instance: Register a new instance.
+ *
+ * This registers a new instance for yanking. Must be called before any yank
+ * function is registered for this instance.
+ *
+ * This function is thread-safe.
+ *
+ * @instance_name: The globally unique name of the instance.
+ * @errp: ...
+ */
+void yank_register_instance(const char *instance_name, Error **errp);
+
+/**
+ * yank_unregister_instance: Unregister a instance.
+ *
+ * This unregisters a instance. Must be called only after every yank function
+ * of the instance has been unregistered.
+ *
+ * This function is thread-safe.
+ *
+ * @instance_name: The name of the instance.
+ */
+void yank_unregister_instance(const char *instance_name);
+
+/**
+ * yank_register_function: Register a yank function
+ *
+ * This registers a yank function. All limitations of qmp oob commands apply
+ * to the yank function as well.
+ *
+ * This function is thread-safe.
+ *
+ * @instance_name: The name of the instance
+ * @func: The yank function
+ * @opaque: Will be passed to the yank function
+ */
+void yank_register_function(const char *instance_name,
+                            YankFn *func,
+                            void *opaque);
+
+/**
+ * yank_unregister_function: Unregister a yank function
+ *
+ * This unregisters a yank function.
+ *
+ * This function is thread-safe.
+ *
+ * @instance_name: The name of the instance
+ * @func: func that was passed to yank_register_function
+ * @opaque: opaque that was passed to yank_register_function
+ */
+void yank_unregister_function(const char *instance_name,
+                              YankFn *func,
+                              void *opaque);
+
+/**
+ * yank_unregister_function: Generic yank function for iochannel
+ *
+ * This is a generic yank function which will call qio_channel_shutdown on the
+ * provided QIOChannel.
+ *
+ * @opaque: QIOChannel to shutdown
+ */
+void yank_generic_iochannel(void *opaque);
+#endif
diff --git a/qapi/misc.json b/qapi/misc.json
index 9d32820dc1..0d6a8f20b7 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -1615,3 +1615,48 @@ 
 ##
 { 'command': 'query-vm-generation-id', 'returns': 'GuidInfo' }

+##
+# @YankInstances:
+#
+# @instances: List of yank instances.
+#
+# Yank instances are named after the following schema:
+# "blockdev:<node-name>", "chardev:<chardev-name>" and "migration"
+#
+# Since: 5.1
+##
+{ 'struct': 'YankInstances', 'data': {'instances': ['str'] } }
+
+##
+# @yank:
+#
+# Recover from hanging qemu by yanking the specified instances.
+#
+# Takes @YankInstances as argument.
+#
+# Returns: nothing.
+#
+# Example:
+#
+# -> { "execute": "yank", "arguments": { "instances": ["blockdev:nbd0"] } }
+# <- { "return": {} }
+#
+# Since: 5.1
+##
+{ 'command': 'yank', 'data': 'YankInstances', 'allow-oob': true }
+
+##
+# @query-yank:
+#
+# Query yank instances.
+#
+# Returns: @YankInstances
+#
+# Example:
+#
+# -> { "execute": "query-yank" }
+# <- { "return": { "instances": ["blockdev:nbd0"] } }
+#
+# Since: 5.1
+##
+{ 'command': 'query-yank', 'returns': 'YankInstances', 'allow-oob': true }
diff --git a/util/Makefile.objs b/util/Makefile.objs
index cc5e37177a..13faa98425 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -45,6 +45,7 @@  util-obj-$(CONFIG_GIO) += dbus.o
 dbus.o-cflags = $(GIO_CFLAGS)
 dbus.o-libs = $(GIO_LIBS)
 util-obj-$(CONFIG_USER_ONLY) += selfmap.o
+util-obj-y += yank.o

 #######################################################################
 # code used by both qemu system emulation and qemu-img
diff --git a/util/yank.c b/util/yank.c
new file mode 100644
index 0000000000..b0cd27728b
--- /dev/null
+++ b/util/yank.c
@@ -0,0 +1,184 @@ 
+/*
+ * QEMU yank feature
+ *
+ * Copyright (c) Lukas Straub <lukasstraub2@web.de>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/thread.h"
+#include "qemu/queue.h"
+#include "qapi/qapi-commands-misc.h"
+#include "io/channel.h"
+#include "qemu/yank.h"
+
+struct YankFuncAndParam {
+    YankFn *func;
+    void *opaque;
+    QLIST_ENTRY(YankFuncAndParam) next;
+};
+
+struct YankInstance {
+    char *name;
+    QLIST_HEAD(, YankFuncAndParam) yankfns;
+    QLIST_ENTRY(YankInstance) next;
+};
+
+static QemuMutex lock;
+static QLIST_HEAD(yankinst_list, YankInstance) head
+    = QLIST_HEAD_INITIALIZER(head);
+
+static struct YankInstance *yank_find_instance(const char *name)
+{
+    struct YankInstance *tmp, *instance;
+    instance = NULL;
+    QLIST_FOREACH(tmp, &head, next) {
+        if (!strcmp(tmp->name, name)) {
+            instance = tmp;
+        }
+    }
+    return instance;
+}
+
+void yank_register_instance(const char *instance_name, Error **errp)
+{
+    struct YankInstance *instance;
+
+    qemu_mutex_lock(&lock);
+
+    if (yank_find_instance(instance_name)) {
+        error_setg(errp, "duplicate yank instance name: '%s'", instance_name);
+        qemu_mutex_unlock(&lock);
+        return;
+    }
+
+    instance = g_slice_new(struct YankInstance);
+    instance->name = g_strdup(instance_name);
+    QLIST_INIT(&instance->yankfns);
+    QLIST_INSERT_HEAD(&head, instance, next);
+
+    qemu_mutex_unlock(&lock);
+}
+
+void yank_unregister_instance(const char *instance_name)
+{
+    struct YankInstance *instance;
+
+    qemu_mutex_lock(&lock);
+    instance = yank_find_instance(instance_name);
+    assert(instance);
+
+    assert(QLIST_EMPTY(&instance->yankfns));
+    QLIST_REMOVE(instance, next);
+    g_free(instance->name);
+    g_slice_free(struct YankInstance, instance);
+
+    qemu_mutex_unlock(&lock);
+}
+
+void yank_register_function(const char *instance_name,
+                            YankFn *func,
+                            void *opaque)
+{
+    struct YankInstance *instance;
+    struct YankFuncAndParam *entry;
+
+    qemu_mutex_lock(&lock);
+    instance = yank_find_instance(instance_name);
+    assert(instance);
+
+    entry = g_slice_new(struct YankFuncAndParam);
+    entry->func = func;
+    entry->opaque = opaque;
+
+    QLIST_INSERT_HEAD(&instance->yankfns, entry, next);
+    qemu_mutex_unlock(&lock);
+}
+
+void yank_unregister_function(const char *instance_name,
+                              YankFn *func,
+                              void *opaque)
+{
+    struct YankInstance *instance;
+    struct YankFuncAndParam *entry;
+
+    qemu_mutex_lock(&lock);
+    instance = yank_find_instance(instance_name);
+    assert(instance);
+
+    QLIST_FOREACH(entry, &instance->yankfns, next) {
+        if (entry->func == func && entry->opaque == opaque) {
+            QLIST_REMOVE(entry, next);
+            g_slice_free(struct YankFuncAndParam, entry);
+            qemu_mutex_unlock(&lock);
+            return;
+        }
+    }
+
+    abort();
+}
+
+void yank_generic_iochannel(void *opaque)
+{
+    QIOChannel *ioc = QIO_CHANNEL(opaque);
+
+    qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+}
+
+void qmp_yank(strList *instances,
+              Error **errp)
+{
+    strList *tmp;
+    struct YankInstance *instance;
+    struct YankFuncAndParam *entry;
+
+    qemu_mutex_lock(&lock);
+    tmp = instances;
+    for (; tmp; tmp = tmp->next) {
+        instance = yank_find_instance(tmp->value);
+        if (!instance) {
+            error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                      "Instance '%s' not found", tmp->value);
+            qemu_mutex_unlock(&lock);
+            return;
+        }
+    }
+    tmp = instances;
+    for (; tmp; tmp = tmp->next) {
+        instance = yank_find_instance(tmp->value);
+        assert(instance);
+        QLIST_FOREACH(entry, &instance->yankfns, next) {
+            entry->func(entry->opaque);
+        }
+    }
+    qemu_mutex_unlock(&lock);
+}
+
+YankInstances *qmp_query_yank(Error **errp)
+{
+    struct YankInstance *instance;
+    YankInstances *ret;
+
+    ret = g_new0(YankInstances, 1);
+    ret->instances = NULL;
+
+    qemu_mutex_lock(&lock);
+    QLIST_FOREACH(instance, &head, next) {
+        strList *entry;
+        entry = g_new0(strList, 1);
+        entry->value = g_strdup(instance->name);
+        entry->next = ret->instances;
+        ret->instances = entry;
+    }
+    qemu_mutex_unlock(&lock);
+
+    return ret;
+}
+
+static void __attribute__((__constructor__)) yank_init(void)
+{
+    qemu_mutex_init(&lock);
+}