diff mbox series

[v4,5/6] net/colo-compare.c, softmmu/vl.c: Check that colo-compare is active

Message ID f6cbde747d78ff080f680c710e2793867a3cf1fa.1588587700.git.lukasstraub2@web.de (mailing list archive)
State New, archived
Headers show
Series colo-compare bugfixes | expand

Commit Message

Lukas Straub May 4, 2020, 10:28 a.m. UTC
If the colo-compare object is removed before failover and a
checkpoint happens, qemu crashes because it tries to lock
the destroyed event_mtx in colo_notify_compares_event.

Fix this by checking if everything is initialized by
introducing a new variable colo_compare_active which
is protected by a new mutex colo_compare_mutex. The new mutex
also protects against concurrent access of the net_compares
list and makes sure that colo_notify_compares_event isn't
active while we destroy event_mtx and event_complete_cond.

With this it also is again possible to use colo without
colo-compare (periodic mode) and to use multiple colo-compare
for multiple network interfaces.

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
---
 net/colo-compare.c | 35 +++++++++++++++++++++++++++++------
 net/colo-compare.h |  1 +
 softmmu/vl.c       |  2 ++
 3 files changed, 32 insertions(+), 6 deletions(-)

Comments

Zhang Chen May 7, 2020, 11:38 a.m. UTC | #1
> -----Original Message-----
> From: Lukas Straub <lukasstraub2@web.de>
> Sent: Monday, May 4, 2020 6:28 PM
> To: qemu-devel <qemu-devel@nongnu.org>
> Cc: Zhang, Chen <chen.zhang@intel.com>; Li Zhijian
> <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>; Marc-
> André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
> <pbonzini@redhat.com>
> Subject: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check that colo-
> compare is active
> 
> If the colo-compare object is removed before failover and a checkpoint
> happens, qemu crashes because it tries to lock the destroyed event_mtx in
> colo_notify_compares_event.
> 
> Fix this by checking if everything is initialized by introducing a new variable
> colo_compare_active which is protected by a new mutex
> colo_compare_mutex. The new mutex also protects against concurrent
> access of the net_compares list and makes sure that
> colo_notify_compares_event isn't active while we destroy event_mtx and
> event_complete_cond.
> 
> With this it also is again possible to use colo without colo-compare (periodic
> mode) and to use multiple colo-compare for multiple network interfaces.
> 

Hi Lukas,

For this case I think we don't need to touch vl.c code, we can solve this issue from another perspective:
How to remove colo-compare?
User will use qemu-monitor or QMP command to disable an object, so we just need return operation failed
When user try to remove colo-compare object while COLO is running.

Thanks
Zhang Chen

> Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> ---
>  net/colo-compare.c | 35 +++++++++++++++++++++++++++++------
>  net/colo-compare.h |  1 +
>  softmmu/vl.c       |  2 ++
>  3 files changed, 32 insertions(+), 6 deletions(-)
> 
> diff --git a/net/colo-compare.c b/net/colo-compare.c index
> 56db3d3bfc..c7572d75e9 100644
> --- a/net/colo-compare.c
> +++ b/net/colo-compare.c
> @@ -54,6 +54,8 @@ static NotifierList colo_compare_notifiers =  #define
> REGULAR_PACKET_CHECK_MS 3000  #define DEFAULT_TIME_OUT_MS 3000
> 
> +static QemuMutex colo_compare_mutex;
> +static bool colo_compare_active;
>  static QemuMutex event_mtx;
>  static QemuCond event_complete_cond;
>  static int event_unhandled_count;
> @@ -906,6 +908,12 @@ static void check_old_packet_regular(void *opaque)
> void colo_notify_compares_event(void *opaque, int event, Error **errp)  {
>      CompareState *s;
> +    qemu_mutex_lock(&colo_compare_mutex);
> +
> +    if (!colo_compare_active) {
> +        qemu_mutex_unlock(&colo_compare_mutex);
> +        return;
> +    }
> 
>      qemu_mutex_lock(&event_mtx);
>      QTAILQ_FOREACH(s, &net_compares, next) { @@ -919,6 +927,7 @@ void
> colo_notify_compares_event(void *opaque, int event, Error **errp)
>      }
> 
>      qemu_mutex_unlock(&event_mtx);
> +    qemu_mutex_unlock(&colo_compare_mutex);
>  }
> 
>  static void colo_compare_timer_init(CompareState *s) @@ -1274,7 +1283,14
> @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
>                             s->vnet_hdr);
>      }
> 
> +    qemu_mutex_lock(&colo_compare_mutex);
> +    if (!colo_compare_active) {
> +        qemu_mutex_init(&event_mtx);
> +        qemu_cond_init(&event_complete_cond);
> +        colo_compare_active = true;
> +    }
>      QTAILQ_INSERT_TAIL(&net_compares, s, next);
> +    qemu_mutex_unlock(&colo_compare_mutex);
> 
>      s->out_sendco.s = s;
>      s->out_sendco.chr = &s->chr_out;
> @@ -1290,9 +1306,6 @@ static void colo_compare_complete(UserCreatable
> *uc, Error **errp)
> 
>      g_queue_init(&s->conn_list);
> 
> -    qemu_mutex_init(&event_mtx);
> -    qemu_cond_init(&event_complete_cond);
> -
>      s->connection_track_table =
> g_hash_table_new_full(connection_key_hash,
>                                                        connection_key_equal,
>                                                        g_free, @@ -1384,12 +1397,19 @@ static void
> colo_compare_finalize(Object *obj)
> 
>      qemu_bh_delete(s->event_bh);
> 
> +    qemu_mutex_lock(&colo_compare_mutex);
>      QTAILQ_FOREACH(tmp, &net_compares, next) {
>          if (tmp == s) {
>              QTAILQ_REMOVE(&net_compares, s, next);
>              break;
>          }
>      }
> +    if (QTAILQ_EMPTY(&net_compares)) {
> +        colo_compare_active = false;
> +        qemu_mutex_destroy(&event_mtx);
> +        qemu_cond_destroy(&event_complete_cond);
> +    }
> +    qemu_mutex_unlock(&colo_compare_mutex);
> 
>      AioContext *ctx = iothread_get_aio_context(s->iothread);
>      aio_context_acquire(ctx);
> @@ -1413,15 +1433,18 @@ static void colo_compare_finalize(Object *obj)
>          object_unref(OBJECT(s->iothread));
>      }
> 
> -    qemu_mutex_destroy(&event_mtx);
> -    qemu_cond_destroy(&event_complete_cond);
> -
>      g_free(s->pri_indev);
>      g_free(s->sec_indev);
>      g_free(s->outdev);
>      g_free(s->notify_dev);
>  }
> 
> +void colo_compare_init_globals(void)
> +{
> +    colo_compare_active = false;
> +    qemu_mutex_init(&colo_compare_mutex);
> +}
> +
>  static const TypeInfo colo_compare_info = {
>      .name = TYPE_COLO_COMPARE,
>      .parent = TYPE_OBJECT,
> diff --git a/net/colo-compare.h b/net/colo-compare.h index
> 22ddd512e2..eb483ac586 100644
> --- a/net/colo-compare.h
> +++ b/net/colo-compare.h
> @@ -17,6 +17,7 @@
>  #ifndef QEMU_COLO_COMPARE_H
>  #define QEMU_COLO_COMPARE_H
> 
> +void colo_compare_init_globals(void);
>  void colo_notify_compares_event(void *opaque, int event, Error **errp);
> void colo_compare_register_notifier(Notifier *notify);  void
> colo_compare_unregister_notifier(Notifier *notify); diff --git a/softmmu/vl.c
> b/softmmu/vl.c index 32c0047889..a913ed5469 100644
> --- a/softmmu/vl.c
> +++ b/softmmu/vl.c
> @@ -112,6 +112,7 @@
>  #include "qapi/qmp/qerror.h"
>  #include "sysemu/iothread.h"
>  #include "qemu/guest-random.h"
> +#include "net/colo-compare.h"
> 
>  #define MAX_VIRTIO_CONSOLES 1
> 
> @@ -2906,6 +2907,7 @@ void qemu_init(int argc, char **argv, char **envp)
>      precopy_infrastructure_init();
>      postcopy_infrastructure_init();
>      monitor_init_globals();
> +    colo_compare_init_globals();
> 
>      if (qcrypto_init(&err) < 0) {
>          error_reportf_err(err, "cannot initialize crypto: ");
> --
> 2.20.1
Lukas Straub May 7, 2020, 3:54 p.m. UTC | #2
On Thu, 7 May 2020 11:38:04 +0000
"Zhang, Chen" <chen.zhang@intel.com> wrote:

> > -----Original Message-----
> > From: Lukas Straub <lukasstraub2@web.de>
> > Sent: Monday, May 4, 2020 6:28 PM
> > To: qemu-devel <qemu-devel@nongnu.org>
> > Cc: Zhang, Chen <chen.zhang@intel.com>; Li Zhijian
> > <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>; Marc-
> > André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
> > <pbonzini@redhat.com>
> > Subject: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check that colo-
> > compare is active
> > 
> > If the colo-compare object is removed before failover and a checkpoint
> > happens, qemu crashes because it tries to lock the destroyed event_mtx in
> > colo_notify_compares_event.
> > 
> > Fix this by checking if everything is initialized by introducing a new variable
> > colo_compare_active which is protected by a new mutex
> > colo_compare_mutex. The new mutex also protects against concurrent
> > access of the net_compares list and makes sure that
> > colo_notify_compares_event isn't active while we destroy event_mtx and
> > event_complete_cond.
> > 
> > With this it also is again possible to use colo without colo-compare (periodic
> > mode) and to use multiple colo-compare for multiple network interfaces.
> >   
> 
> Hi Lukas,
> 
> For this case I think we don't need to touch vl.c code, we can solve this issue from another perspective:
> How to remove colo-compare?
> User will use qemu-monitor or QMP command to disable an object, so we just need return operation failed
> When user try to remove colo-compare object while COLO is running.

Yeah, but that still leaves the other problem that colo can't be used without colo-compare (qemu crashes then).

Regards,
Lukas Straub

> Thanks
> Zhang Chen
> 
> > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > ---
> >  net/colo-compare.c | 35 +++++++++++++++++++++++++++++------
> >  net/colo-compare.h |  1 +
> >  softmmu/vl.c       |  2 ++
> >  3 files changed, 32 insertions(+), 6 deletions(-)
> > 
> > diff --git a/net/colo-compare.c b/net/colo-compare.c index
> > 56db3d3bfc..c7572d75e9 100644
> > --- a/net/colo-compare.c
> > +++ b/net/colo-compare.c
> > @@ -54,6 +54,8 @@ static NotifierList colo_compare_notifiers =  #define
> > REGULAR_PACKET_CHECK_MS 3000  #define DEFAULT_TIME_OUT_MS 3000
> > 
> > +static QemuMutex colo_compare_mutex;
> > +static bool colo_compare_active;
> >  static QemuMutex event_mtx;
> >  static QemuCond event_complete_cond;
> >  static int event_unhandled_count;
> > @@ -906,6 +908,12 @@ static void check_old_packet_regular(void *opaque)
> > void colo_notify_compares_event(void *opaque, int event, Error **errp)  {
> >      CompareState *s;
> > +    qemu_mutex_lock(&colo_compare_mutex);
> > +
> > +    if (!colo_compare_active) {
> > +        qemu_mutex_unlock(&colo_compare_mutex);
> > +        return;
> > +    }
> > 
> >      qemu_mutex_lock(&event_mtx);
> >      QTAILQ_FOREACH(s, &net_compares, next) { @@ -919,6 +927,7 @@ void
> > colo_notify_compares_event(void *opaque, int event, Error **errp)
> >      }
> > 
> >      qemu_mutex_unlock(&event_mtx);
> > +    qemu_mutex_unlock(&colo_compare_mutex);
> >  }
> > 
> >  static void colo_compare_timer_init(CompareState *s) @@ -1274,7 +1283,14
> > @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
> >                             s->vnet_hdr);
> >      }
> > 
> > +    qemu_mutex_lock(&colo_compare_mutex);
> > +    if (!colo_compare_active) {
> > +        qemu_mutex_init(&event_mtx);
> > +        qemu_cond_init(&event_complete_cond);
> > +        colo_compare_active = true;
> > +    }
> >      QTAILQ_INSERT_TAIL(&net_compares, s, next);
> > +    qemu_mutex_unlock(&colo_compare_mutex);
> > 
> >      s->out_sendco.s = s;
> >      s->out_sendco.chr = &s->chr_out;
> > @@ -1290,9 +1306,6 @@ static void colo_compare_complete(UserCreatable
> > *uc, Error **errp)
> > 
> >      g_queue_init(&s->conn_list);
> > 
> > -    qemu_mutex_init(&event_mtx);
> > -    qemu_cond_init(&event_complete_cond);
> > -
> >      s->connection_track_table =
> > g_hash_table_new_full(connection_key_hash,
> >                                                        connection_key_equal,
> >                                                        g_free, @@ -1384,12 +1397,19 @@ static void
> > colo_compare_finalize(Object *obj)
> > 
> >      qemu_bh_delete(s->event_bh);
> > 
> > +    qemu_mutex_lock(&colo_compare_mutex);
> >      QTAILQ_FOREACH(tmp, &net_compares, next) {
> >          if (tmp == s) {
> >              QTAILQ_REMOVE(&net_compares, s, next);
> >              break;
> >          }
> >      }
> > +    if (QTAILQ_EMPTY(&net_compares)) {
> > +        colo_compare_active = false;
> > +        qemu_mutex_destroy(&event_mtx);
> > +        qemu_cond_destroy(&event_complete_cond);
> > +    }
> > +    qemu_mutex_unlock(&colo_compare_mutex);
> > 
> >      AioContext *ctx = iothread_get_aio_context(s->iothread);
> >      aio_context_acquire(ctx);
> > @@ -1413,15 +1433,18 @@ static void colo_compare_finalize(Object *obj)
> >          object_unref(OBJECT(s->iothread));
> >      }
> > 
> > -    qemu_mutex_destroy(&event_mtx);
> > -    qemu_cond_destroy(&event_complete_cond);
> > -
> >      g_free(s->pri_indev);
> >      g_free(s->sec_indev);
> >      g_free(s->outdev);
> >      g_free(s->notify_dev);
> >  }
> > 
> > +void colo_compare_init_globals(void)
> > +{
> > +    colo_compare_active = false;
> > +    qemu_mutex_init(&colo_compare_mutex);
> > +}
> > +
> >  static const TypeInfo colo_compare_info = {
> >      .name = TYPE_COLO_COMPARE,
> >      .parent = TYPE_OBJECT,
> > diff --git a/net/colo-compare.h b/net/colo-compare.h index
> > 22ddd512e2..eb483ac586 100644
> > --- a/net/colo-compare.h
> > +++ b/net/colo-compare.h
> > @@ -17,6 +17,7 @@
> >  #ifndef QEMU_COLO_COMPARE_H
> >  #define QEMU_COLO_COMPARE_H
> > 
> > +void colo_compare_init_globals(void);
> >  void colo_notify_compares_event(void *opaque, int event, Error **errp);
> > void colo_compare_register_notifier(Notifier *notify);  void
> > colo_compare_unregister_notifier(Notifier *notify); diff --git a/softmmu/vl.c
> > b/softmmu/vl.c index 32c0047889..a913ed5469 100644
> > --- a/softmmu/vl.c
> > +++ b/softmmu/vl.c
> > @@ -112,6 +112,7 @@
> >  #include "qapi/qmp/qerror.h"
> >  #include "sysemu/iothread.h"
> >  #include "qemu/guest-random.h"
> > +#include "net/colo-compare.h"
> > 
> >  #define MAX_VIRTIO_CONSOLES 1
> > 
> > @@ -2906,6 +2907,7 @@ void qemu_init(int argc, char **argv, char **envp)
> >      precopy_infrastructure_init();
> >      postcopy_infrastructure_init();
> >      monitor_init_globals();
> > +    colo_compare_init_globals();
> > 
> >      if (qcrypto_init(&err) < 0) {
> >          error_reportf_err(err, "cannot initialize crypto: ");
> > --
> > 2.20.1  
>
Zhang Chen May 8, 2020, 2:26 a.m. UTC | #3
> -----Original Message-----
> From: Lukas Straub <lukasstraub2@web.de>
> Sent: Thursday, May 7, 2020 11:54 PM
> To: Zhang, Chen <chen.zhang@intel.com>
> Cc: qemu-devel <qemu-devel@nongnu.org>; Li Zhijian
> <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>; Marc-
> André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
> <pbonzini@redhat.com>
> Subject: Re: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check that
> colo-compare is active
> 
> On Thu, 7 May 2020 11:38:04 +0000
> "Zhang, Chen" <chen.zhang@intel.com> wrote:
> 
> > > -----Original Message-----
> > > From: Lukas Straub <lukasstraub2@web.de>
> > > Sent: Monday, May 4, 2020 6:28 PM
> > > To: qemu-devel <qemu-devel@nongnu.org>
> > > Cc: Zhang, Chen <chen.zhang@intel.com>; Li Zhijian
> > > <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>; Marc-
> > > André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
> > > <pbonzini@redhat.com>
> > > Subject: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check that
> > > colo- compare is active
> > >
> > > If the colo-compare object is removed before failover and a
> > > checkpoint happens, qemu crashes because it tries to lock the
> > > destroyed event_mtx in colo_notify_compares_event.
> > >
> > > Fix this by checking if everything is initialized by introducing a
> > > new variable colo_compare_active which is protected by a new mutex
> > > colo_compare_mutex. The new mutex also protects against concurrent
> > > access of the net_compares list and makes sure that
> > > colo_notify_compares_event isn't active while we destroy event_mtx
> > > and event_complete_cond.
> > >
> > > With this it also is again possible to use colo without colo-compare
> > > (periodic
> > > mode) and to use multiple colo-compare for multiple network interfaces.
> > >
> >
> > Hi Lukas,
> >
> > For this case I think we don't need to touch vl.c code, we can solve this
> issue from another perspective:
> > How to remove colo-compare?
> > User will use qemu-monitor or QMP command to disable an object, so we
> > just need return operation failed When user try to remove colo-compare
> object while COLO is running.
> 
> Yeah, but that still leaves the other problem that colo can't be used without
> colo-compare (qemu crashes then).

Yes, the COLO-compare is necessary module in COLO original design.
At most cases, user need it do dynamic sync.
For rare cases, maybe we can add a new colo-compare parameter to bypass all the network workload.

Thanks
Zhang Chen 

> 
> Regards,
> Lukas Straub
> 
> > Thanks
> > Zhang Chen
> >
> > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > > ---
> > >  net/colo-compare.c | 35 +++++++++++++++++++++++++++++------
> > >  net/colo-compare.h |  1 +
> > >  softmmu/vl.c       |  2 ++
> > >  3 files changed, 32 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/net/colo-compare.c b/net/colo-compare.c index
> > > 56db3d3bfc..c7572d75e9 100644
> > > --- a/net/colo-compare.c
> > > +++ b/net/colo-compare.c
> > > @@ -54,6 +54,8 @@ static NotifierList colo_compare_notifiers =
> > > #define REGULAR_PACKET_CHECK_MS 3000  #define
> DEFAULT_TIME_OUT_MS
> > > 3000
> > >
> > > +static QemuMutex colo_compare_mutex; static bool
> > > +colo_compare_active;
> > >  static QemuMutex event_mtx;
> > >  static QemuCond event_complete_cond;  static int
> > > event_unhandled_count; @@ -906,6 +908,12 @@ static void
> > > check_old_packet_regular(void *opaque) void
> > > colo_notify_compares_event(void *opaque, int event, Error **errp)  {
> > >      CompareState *s;
> > > +    qemu_mutex_lock(&colo_compare_mutex);
> > > +
> > > +    if (!colo_compare_active) {
> > > +        qemu_mutex_unlock(&colo_compare_mutex);
> > > +        return;
> > > +    }
> > >
> > >      qemu_mutex_lock(&event_mtx);
> > >      QTAILQ_FOREACH(s, &net_compares, next) { @@ -919,6 +927,7 @@
> > > void colo_notify_compares_event(void *opaque, int event, Error **errp)
> > >      }
> > >
> > >      qemu_mutex_unlock(&event_mtx);
> > > +    qemu_mutex_unlock(&colo_compare_mutex);
> > >  }
> > >
> > >  static void colo_compare_timer_init(CompareState *s) @@ -1274,7
> > > +1283,14 @@ static void colo_compare_complete(UserCreatable *uc,
> Error **errp)
> > >                             s->vnet_hdr);
> > >      }
> > >
> > > +    qemu_mutex_lock(&colo_compare_mutex);
> > > +    if (!colo_compare_active) {
> > > +        qemu_mutex_init(&event_mtx);
> > > +        qemu_cond_init(&event_complete_cond);
> > > +        colo_compare_active = true;
> > > +    }
> > >      QTAILQ_INSERT_TAIL(&net_compares, s, next);
> > > +    qemu_mutex_unlock(&colo_compare_mutex);
> > >
> > >      s->out_sendco.s = s;
> > >      s->out_sendco.chr = &s->chr_out; @@ -1290,9 +1306,6 @@ static
> > > void colo_compare_complete(UserCreatable
> > > *uc, Error **errp)
> > >
> > >      g_queue_init(&s->conn_list);
> > >
> > > -    qemu_mutex_init(&event_mtx);
> > > -    qemu_cond_init(&event_complete_cond);
> > > -
> > >      s->connection_track_table =
> > > g_hash_table_new_full(connection_key_hash,
> > >                                                        connection_key_equal,
> > >                                                        g_free, @@
> > > -1384,12 +1397,19 @@ static void colo_compare_finalize(Object *obj)
> > >
> > >      qemu_bh_delete(s->event_bh);
> > >
> > > +    qemu_mutex_lock(&colo_compare_mutex);
> > >      QTAILQ_FOREACH(tmp, &net_compares, next) {
> > >          if (tmp == s) {
> > >              QTAILQ_REMOVE(&net_compares, s, next);
> > >              break;
> > >          }
> > >      }
> > > +    if (QTAILQ_EMPTY(&net_compares)) {
> > > +        colo_compare_active = false;
> > > +        qemu_mutex_destroy(&event_mtx);
> > > +        qemu_cond_destroy(&event_complete_cond);
> > > +    }
> > > +    qemu_mutex_unlock(&colo_compare_mutex);
> > >
> > >      AioContext *ctx = iothread_get_aio_context(s->iothread);
> > >      aio_context_acquire(ctx);
> > > @@ -1413,15 +1433,18 @@ static void colo_compare_finalize(Object *obj)
> > >          object_unref(OBJECT(s->iothread));
> > >      }
> > >
> > > -    qemu_mutex_destroy(&event_mtx);
> > > -    qemu_cond_destroy(&event_complete_cond);
> > > -
> > >      g_free(s->pri_indev);
> > >      g_free(s->sec_indev);
> > >      g_free(s->outdev);
> > >      g_free(s->notify_dev);
> > >  }
> > >
> > > +void colo_compare_init_globals(void) {
> > > +    colo_compare_active = false;
> > > +    qemu_mutex_init(&colo_compare_mutex);
> > > +}
> > > +
> > >  static const TypeInfo colo_compare_info = {
> > >      .name = TYPE_COLO_COMPARE,
> > >      .parent = TYPE_OBJECT,
> > > diff --git a/net/colo-compare.h b/net/colo-compare.h index
> > > 22ddd512e2..eb483ac586 100644
> > > --- a/net/colo-compare.h
> > > +++ b/net/colo-compare.h
> > > @@ -17,6 +17,7 @@
> > >  #ifndef QEMU_COLO_COMPARE_H
> > >  #define QEMU_COLO_COMPARE_H
> > >
> > > +void colo_compare_init_globals(void);
> > >  void colo_notify_compares_event(void *opaque, int event, Error
> > > **errp); void colo_compare_register_notifier(Notifier *notify);
> > > void colo_compare_unregister_notifier(Notifier *notify); diff --git
> > > a/softmmu/vl.c b/softmmu/vl.c index 32c0047889..a913ed5469 100644
> > > --- a/softmmu/vl.c
> > > +++ b/softmmu/vl.c
> > > @@ -112,6 +112,7 @@
> > >  #include "qapi/qmp/qerror.h"
> > >  #include "sysemu/iothread.h"
> > >  #include "qemu/guest-random.h"
> > > +#include "net/colo-compare.h"
> > >
> > >  #define MAX_VIRTIO_CONSOLES 1
> > >
> > > @@ -2906,6 +2907,7 @@ void qemu_init(int argc, char **argv, char
> **envp)
> > >      precopy_infrastructure_init();
> > >      postcopy_infrastructure_init();
> > >      monitor_init_globals();
> > > +    colo_compare_init_globals();
> > >
> > >      if (qcrypto_init(&err) < 0) {
> > >          error_reportf_err(err, "cannot initialize crypto: ");
> > > --
> > > 2.20.1
> >
Derek Su May 8, 2020, 3:55 a.m. UTC | #4
On 2020/5/8 上午10:26, Zhang, Chen wrote:
> 
> 
>> -----Original Message-----
>> From: Lukas Straub <lukasstraub2@web.de>
>> Sent: Thursday, May 7, 2020 11:54 PM
>> To: Zhang, Chen <chen.zhang@intel.com>
>> Cc: qemu-devel <qemu-devel@nongnu.org>; Li Zhijian
>> <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>; Marc-
>> André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
>> <pbonzini@redhat.com>
>> Subject: Re: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check that
>> colo-compare is active
>>
>> On Thu, 7 May 2020 11:38:04 +0000
>> "Zhang, Chen" <chen.zhang@intel.com> wrote:
>>
>>>> -----Original Message-----
>>>> From: Lukas Straub <lukasstraub2@web.de>
>>>> Sent: Monday, May 4, 2020 6:28 PM
>>>> To: qemu-devel <qemu-devel@nongnu.org>
>>>> Cc: Zhang, Chen <chen.zhang@intel.com>; Li Zhijian
>>>> <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>; Marc-
>>>> André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
>>>> <pbonzini@redhat.com>
>>>> Subject: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check that
>>>> colo- compare is active
>>>>
>>>> If the colo-compare object is removed before failover and a
>>>> checkpoint happens, qemu crashes because it tries to lock the
>>>> destroyed event_mtx in colo_notify_compares_event.
>>>>
>>>> Fix this by checking if everything is initialized by introducing a
>>>> new variable colo_compare_active which is protected by a new mutex
>>>> colo_compare_mutex. The new mutex also protects against concurrent
>>>> access of the net_compares list and makes sure that
>>>> colo_notify_compares_event isn't active while we destroy event_mtx
>>>> and event_complete_cond.
>>>>
>>>> With this it also is again possible to use colo without colo-compare
>>>> (periodic
>>>> mode) and to use multiple colo-compare for multiple network interfaces.
>>>>
>>>
>>> Hi Lukas,
>>>
>>> For this case I think we don't need to touch vl.c code, we can solve this
>> issue from another perspective:
>>> How to remove colo-compare?
>>> User will use qemu-monitor or QMP command to disable an object, so we
>>> just need return operation failed When user try to remove colo-compare
>> object while COLO is running.
>>
>> Yeah, but that still leaves the other problem that colo can't be used without
>> colo-compare (qemu crashes then).
> 
> Yes, the COLO-compare is necessary module in COLO original design.
> At most cases, user need it do dynamic sync.
> For rare cases, maybe we can add a new colo-compare parameter to bypass all the network workload.

Hi, Chen

In our application, we only need "periodical mode" because of the 
performance issue, and have internal patch now. Is it OK to send the 
internal patch for review?

Thanks.

Regards,
Derek

> 
> Thanks
> Zhang Chen
> 
>>
>> Regards,
>> Lukas Straub
>>
>>> Thanks
>>> Zhang Chen
>>>
>>>> Signed-off-by: Lukas Straub <lukasstraub2@web.de>
>>>> ---
>>>>   net/colo-compare.c | 35 +++++++++++++++++++++++++++++------
>>>>   net/colo-compare.h |  1 +
>>>>   softmmu/vl.c       |  2 ++
>>>>   3 files changed, 32 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/net/colo-compare.c b/net/colo-compare.c index
>>>> 56db3d3bfc..c7572d75e9 100644
>>>> --- a/net/colo-compare.c
>>>> +++ b/net/colo-compare.c
>>>> @@ -54,6 +54,8 @@ static NotifierList colo_compare_notifiers =
>>>> #define REGULAR_PACKET_CHECK_MS 3000  #define
>> DEFAULT_TIME_OUT_MS
>>>> 3000
>>>>
>>>> +static QemuMutex colo_compare_mutex; static bool
>>>> +colo_compare_active;
>>>>   static QemuMutex event_mtx;
>>>>   static QemuCond event_complete_cond;  static int
>>>> event_unhandled_count; @@ -906,6 +908,12 @@ static void
>>>> check_old_packet_regular(void *opaque) void
>>>> colo_notify_compares_event(void *opaque, int event, Error **errp)  {
>>>>       CompareState *s;
>>>> +    qemu_mutex_lock(&colo_compare_mutex);
>>>> +
>>>> +    if (!colo_compare_active) {
>>>> +        qemu_mutex_unlock(&colo_compare_mutex);
>>>> +        return;
>>>> +    }
>>>>
>>>>       qemu_mutex_lock(&event_mtx);
>>>>       QTAILQ_FOREACH(s, &net_compares, next) { @@ -919,6 +927,7 @@
>>>> void colo_notify_compares_event(void *opaque, int event, Error **errp)
>>>>       }
>>>>
>>>>       qemu_mutex_unlock(&event_mtx);
>>>> +    qemu_mutex_unlock(&colo_compare_mutex);
>>>>   }
>>>>
>>>>   static void colo_compare_timer_init(CompareState *s) @@ -1274,7
>>>> +1283,14 @@ static void colo_compare_complete(UserCreatable *uc,
>> Error **errp)
>>>>                              s->vnet_hdr);
>>>>       }
>>>>
>>>> +    qemu_mutex_lock(&colo_compare_mutex);
>>>> +    if (!colo_compare_active) {
>>>> +        qemu_mutex_init(&event_mtx);
>>>> +        qemu_cond_init(&event_complete_cond);
>>>> +        colo_compare_active = true;
>>>> +    }
>>>>       QTAILQ_INSERT_TAIL(&net_compares, s, next);
>>>> +    qemu_mutex_unlock(&colo_compare_mutex);
>>>>
>>>>       s->out_sendco.s = s;
>>>>       s->out_sendco.chr = &s->chr_out; @@ -1290,9 +1306,6 @@ static
>>>> void colo_compare_complete(UserCreatable
>>>> *uc, Error **errp)
>>>>
>>>>       g_queue_init(&s->conn_list);
>>>>
>>>> -    qemu_mutex_init(&event_mtx);
>>>> -    qemu_cond_init(&event_complete_cond);
>>>> -
>>>>       s->connection_track_table =
>>>> g_hash_table_new_full(connection_key_hash,
>>>>                                                         connection_key_equal,
>>>>                                                         g_free, @@
>>>> -1384,12 +1397,19 @@ static void colo_compare_finalize(Object *obj)
>>>>
>>>>       qemu_bh_delete(s->event_bh);
>>>>
>>>> +    qemu_mutex_lock(&colo_compare_mutex);
>>>>       QTAILQ_FOREACH(tmp, &net_compares, next) {
>>>>           if (tmp == s) {
>>>>               QTAILQ_REMOVE(&net_compares, s, next);
>>>>               break;
>>>>           }
>>>>       }
>>>> +    if (QTAILQ_EMPTY(&net_compares)) {
>>>> +        colo_compare_active = false;
>>>> +        qemu_mutex_destroy(&event_mtx);
>>>> +        qemu_cond_destroy(&event_complete_cond);
>>>> +    }
>>>> +    qemu_mutex_unlock(&colo_compare_mutex);
>>>>
>>>>       AioContext *ctx = iothread_get_aio_context(s->iothread);
>>>>       aio_context_acquire(ctx);
>>>> @@ -1413,15 +1433,18 @@ static void colo_compare_finalize(Object *obj)
>>>>           object_unref(OBJECT(s->iothread));
>>>>       }
>>>>
>>>> -    qemu_mutex_destroy(&event_mtx);
>>>> -    qemu_cond_destroy(&event_complete_cond);
>>>> -
>>>>       g_free(s->pri_indev);
>>>>       g_free(s->sec_indev);
>>>>       g_free(s->outdev);
>>>>       g_free(s->notify_dev);
>>>>   }
>>>>
>>>> +void colo_compare_init_globals(void) {
>>>> +    colo_compare_active = false;
>>>> +    qemu_mutex_init(&colo_compare_mutex);
>>>> +}
>>>> +
>>>>   static const TypeInfo colo_compare_info = {
>>>>       .name = TYPE_COLO_COMPARE,
>>>>       .parent = TYPE_OBJECT,
>>>> diff --git a/net/colo-compare.h b/net/colo-compare.h index
>>>> 22ddd512e2..eb483ac586 100644
>>>> --- a/net/colo-compare.h
>>>> +++ b/net/colo-compare.h
>>>> @@ -17,6 +17,7 @@
>>>>   #ifndef QEMU_COLO_COMPARE_H
>>>>   #define QEMU_COLO_COMPARE_H
>>>>
>>>> +void colo_compare_init_globals(void);
>>>>   void colo_notify_compares_event(void *opaque, int event, Error
>>>> **errp); void colo_compare_register_notifier(Notifier *notify);
>>>> void colo_compare_unregister_notifier(Notifier *notify); diff --git
>>>> a/softmmu/vl.c b/softmmu/vl.c index 32c0047889..a913ed5469 100644
>>>> --- a/softmmu/vl.c
>>>> +++ b/softmmu/vl.c
>>>> @@ -112,6 +112,7 @@
>>>>   #include "qapi/qmp/qerror.h"
>>>>   #include "sysemu/iothread.h"
>>>>   #include "qemu/guest-random.h"
>>>> +#include "net/colo-compare.h"
>>>>
>>>>   #define MAX_VIRTIO_CONSOLES 1
>>>>
>>>> @@ -2906,6 +2907,7 @@ void qemu_init(int argc, char **argv, char
>> **envp)
>>>>       precopy_infrastructure_init();
>>>>       postcopy_infrastructure_init();
>>>>       monitor_init_globals();
>>>> +    colo_compare_init_globals();
>>>>
>>>>       if (qcrypto_init(&err) < 0) {
>>>>           error_reportf_err(err, "cannot initialize crypto: ");
>>>> --
>>>> 2.20.1
>>>
>
Lukas Straub May 8, 2020, 6:10 a.m. UTC | #5
On Fri, 8 May 2020 02:26:21 +0000
"Zhang, Chen" <chen.zhang@intel.com> wrote:

> > -----Original Message-----
> > From: Lukas Straub <lukasstraub2@web.de>
> > Sent: Thursday, May 7, 2020 11:54 PM
> > To: Zhang, Chen <chen.zhang@intel.com>
> > Cc: qemu-devel <qemu-devel@nongnu.org>; Li Zhijian
> > <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>; Marc-
> > André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
> > <pbonzini@redhat.com>
> > Subject: Re: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check that
> > colo-compare is active
> > 
> > On Thu, 7 May 2020 11:38:04 +0000
> > "Zhang, Chen" <chen.zhang@intel.com> wrote:
> >   
> > > > -----Original Message-----
> > > > From: Lukas Straub <lukasstraub2@web.de>
> > > > Sent: Monday, May 4, 2020 6:28 PM
> > > > To: qemu-devel <qemu-devel@nongnu.org>
> > > > Cc: Zhang, Chen <chen.zhang@intel.com>; Li Zhijian
> > > > <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>; Marc-
> > > > André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
> > > > <pbonzini@redhat.com>
> > > > Subject: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check that
> > > > colo- compare is active
> > > >
> > > > If the colo-compare object is removed before failover and a
> > > > checkpoint happens, qemu crashes because it tries to lock the
> > > > destroyed event_mtx in colo_notify_compares_event.
> > > >
> > > > Fix this by checking if everything is initialized by introducing a
> > > > new variable colo_compare_active which is protected by a new mutex
> > > > colo_compare_mutex. The new mutex also protects against concurrent
> > > > access of the net_compares list and makes sure that
> > > > colo_notify_compares_event isn't active while we destroy event_mtx
> > > > and event_complete_cond.
> > > >
> > > > With this it also is again possible to use colo without colo-compare
> > > > (periodic
> > > > mode) and to use multiple colo-compare for multiple network interfaces.
> > > >  
> > >
> > > Hi Lukas,
> > >
> > > For this case I think we don't need to touch vl.c code, we can solve this  
> > issue from another perspective:  
> > > How to remove colo-compare?
> > > User will use qemu-monitor or QMP command to disable an object, so we
> > > just need return operation failed When user try to remove colo-compare  
> > object while COLO is running.
> > 
> > Yeah, but that still leaves the other problem that colo can't be used without
> > colo-compare (qemu crashes then).  
> 
> Yes, the COLO-compare is necessary module in COLO original design.
> At most cases, user need it do dynamic sync.
> For rare cases, maybe we can add a new colo-compare parameter to bypass all the network workload.

I think such an parameter would only be a workaround instead of a real solution like this patch.

Regards,
Lukas Straub

> Thanks
> Zhang Chen 
> 
> > 
> > Regards,
> > Lukas Straub
> >   
> > > Thanks
> > > Zhang Chen
> > >  
> > > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > > > ---
> > > >  net/colo-compare.c | 35 +++++++++++++++++++++++++++++------
> > > >  net/colo-compare.h |  1 +
> > > >  softmmu/vl.c       |  2 ++
> > > >  3 files changed, 32 insertions(+), 6 deletions(-)
> > > >
> > > > diff --git a/net/colo-compare.c b/net/colo-compare.c index
> > > > 56db3d3bfc..c7572d75e9 100644
> > > > --- a/net/colo-compare.c
> > > > +++ b/net/colo-compare.c
> > > > @@ -54,6 +54,8 @@ static NotifierList colo_compare_notifiers =
> > > > #define REGULAR_PACKET_CHECK_MS 3000  #define  
> > DEFAULT_TIME_OUT_MS  
> > > > 3000
> > > >
> > > > +static QemuMutex colo_compare_mutex; static bool
> > > > +colo_compare_active;
> > > >  static QemuMutex event_mtx;
> > > >  static QemuCond event_complete_cond;  static int
> > > > event_unhandled_count; @@ -906,6 +908,12 @@ static void
> > > > check_old_packet_regular(void *opaque) void
> > > > colo_notify_compares_event(void *opaque, int event, Error **errp)  {
> > > >      CompareState *s;
> > > > +    qemu_mutex_lock(&colo_compare_mutex);
> > > > +
> > > > +    if (!colo_compare_active) {
> > > > +        qemu_mutex_unlock(&colo_compare_mutex);
> > > > +        return;
> > > > +    }
> > > >
> > > >      qemu_mutex_lock(&event_mtx);
> > > >      QTAILQ_FOREACH(s, &net_compares, next) { @@ -919,6 +927,7 @@
> > > > void colo_notify_compares_event(void *opaque, int event, Error **errp)
> > > >      }
> > > >
> > > >      qemu_mutex_unlock(&event_mtx);
> > > > +    qemu_mutex_unlock(&colo_compare_mutex);
> > > >  }
> > > >
> > > >  static void colo_compare_timer_init(CompareState *s) @@ -1274,7
> > > > +1283,14 @@ static void colo_compare_complete(UserCreatable *uc,  
> > Error **errp)  
> > > >                             s->vnet_hdr);
> > > >      }
> > > >
> > > > +    qemu_mutex_lock(&colo_compare_mutex);
> > > > +    if (!colo_compare_active) {
> > > > +        qemu_mutex_init(&event_mtx);
> > > > +        qemu_cond_init(&event_complete_cond);
> > > > +        colo_compare_active = true;
> > > > +    }
> > > >      QTAILQ_INSERT_TAIL(&net_compares, s, next);
> > > > +    qemu_mutex_unlock(&colo_compare_mutex);
> > > >
> > > >      s->out_sendco.s = s;
> > > >      s->out_sendco.chr = &s->chr_out; @@ -1290,9 +1306,6 @@ static
> > > > void colo_compare_complete(UserCreatable
> > > > *uc, Error **errp)
> > > >
> > > >      g_queue_init(&s->conn_list);
> > > >
> > > > -    qemu_mutex_init(&event_mtx);
> > > > -    qemu_cond_init(&event_complete_cond);
> > > > -
> > > >      s->connection_track_table =
> > > > g_hash_table_new_full(connection_key_hash,
> > > >                                                        connection_key_equal,
> > > >                                                        g_free, @@
> > > > -1384,12 +1397,19 @@ static void colo_compare_finalize(Object *obj)
> > > >
> > > >      qemu_bh_delete(s->event_bh);
> > > >
> > > > +    qemu_mutex_lock(&colo_compare_mutex);
> > > >      QTAILQ_FOREACH(tmp, &net_compares, next) {
> > > >          if (tmp == s) {
> > > >              QTAILQ_REMOVE(&net_compares, s, next);
> > > >              break;
> > > >          }
> > > >      }
> > > > +    if (QTAILQ_EMPTY(&net_compares)) {
> > > > +        colo_compare_active = false;
> > > > +        qemu_mutex_destroy(&event_mtx);
> > > > +        qemu_cond_destroy(&event_complete_cond);
> > > > +    }
> > > > +    qemu_mutex_unlock(&colo_compare_mutex);
> > > >
> > > >      AioContext *ctx = iothread_get_aio_context(s->iothread);
> > > >      aio_context_acquire(ctx);
> > > > @@ -1413,15 +1433,18 @@ static void colo_compare_finalize(Object *obj)
> > > >          object_unref(OBJECT(s->iothread));
> > > >      }
> > > >
> > > > -    qemu_mutex_destroy(&event_mtx);
> > > > -    qemu_cond_destroy(&event_complete_cond);
> > > > -
> > > >      g_free(s->pri_indev);
> > > >      g_free(s->sec_indev);
> > > >      g_free(s->outdev);
> > > >      g_free(s->notify_dev);
> > > >  }
> > > >
> > > > +void colo_compare_init_globals(void) {
> > > > +    colo_compare_active = false;
> > > > +    qemu_mutex_init(&colo_compare_mutex);
> > > > +}
> > > > +
> > > >  static const TypeInfo colo_compare_info = {
> > > >      .name = TYPE_COLO_COMPARE,
> > > >      .parent = TYPE_OBJECT,
> > > > diff --git a/net/colo-compare.h b/net/colo-compare.h index
> > > > 22ddd512e2..eb483ac586 100644
> > > > --- a/net/colo-compare.h
> > > > +++ b/net/colo-compare.h
> > > > @@ -17,6 +17,7 @@
> > > >  #ifndef QEMU_COLO_COMPARE_H
> > > >  #define QEMU_COLO_COMPARE_H
> > > >
> > > > +void colo_compare_init_globals(void);
> > > >  void colo_notify_compares_event(void *opaque, int event, Error
> > > > **errp); void colo_compare_register_notifier(Notifier *notify);
> > > > void colo_compare_unregister_notifier(Notifier *notify); diff --git
> > > > a/softmmu/vl.c b/softmmu/vl.c index 32c0047889..a913ed5469 100644
> > > > --- a/softmmu/vl.c
> > > > +++ b/softmmu/vl.c
> > > > @@ -112,6 +112,7 @@
> > > >  #include "qapi/qmp/qerror.h"
> > > >  #include "sysemu/iothread.h"
> > > >  #include "qemu/guest-random.h"
> > > > +#include "net/colo-compare.h"
> > > >
> > > >  #define MAX_VIRTIO_CONSOLES 1
> > > >
> > > > @@ -2906,6 +2907,7 @@ void qemu_init(int argc, char **argv, char  
> > **envp)  
> > > >      precopy_infrastructure_init();
> > > >      postcopy_infrastructure_init();
> > > >      monitor_init_globals();
> > > > +    colo_compare_init_globals();
> > > >
> > > >      if (qcrypto_init(&err) < 0) {
> > > >          error_reportf_err(err, "cannot initialize crypto: ");
> > > > --
> > > > 2.20.1  
> > >  
>
Zhang Chen May 8, 2020, 6:50 a.m. UTC | #6
> -----Original Message-----
> From: Lukas Straub <lukasstraub2@web.de>
> Sent: Friday, May 8, 2020 2:11 PM
> To: Zhang, Chen <chen.zhang@intel.com>
> Cc: qemu-devel <qemu-devel@nongnu.org>; Li Zhijian
> <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>; Marc-
> André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
> <pbonzini@redhat.com>
> Subject: Re: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check that
> colo-compare is active
> 
> On Fri, 8 May 2020 02:26:21 +0000
> "Zhang, Chen" <chen.zhang@intel.com> wrote:
> 
> > > -----Original Message-----
> > > From: Lukas Straub <lukasstraub2@web.de>
> > > Sent: Thursday, May 7, 2020 11:54 PM
> > > To: Zhang, Chen <chen.zhang@intel.com>
> > > Cc: qemu-devel <qemu-devel@nongnu.org>; Li Zhijian
> > > <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>; Marc-
> > > André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
> > > <pbonzini@redhat.com>
> > > Subject: Re: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check
> > > that colo-compare is active
> > >
> > > On Thu, 7 May 2020 11:38:04 +0000
> > > "Zhang, Chen" <chen.zhang@intel.com> wrote:
> > >
> > > > > -----Original Message-----
> > > > > From: Lukas Straub <lukasstraub2@web.de>
> > > > > Sent: Monday, May 4, 2020 6:28 PM
> > > > > To: qemu-devel <qemu-devel@nongnu.org>
> > > > > Cc: Zhang, Chen <chen.zhang@intel.com>; Li Zhijian
> > > > > <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>;
> > > > > Marc- André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
> > > > > <pbonzini@redhat.com>
> > > > > Subject: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check
> > > > > that
> > > > > colo- compare is active
> > > > >
> > > > > If the colo-compare object is removed before failover and a
> > > > > checkpoint happens, qemu crashes because it tries to lock the
> > > > > destroyed event_mtx in colo_notify_compares_event.
> > > > >
> > > > > Fix this by checking if everything is initialized by introducing
> > > > > a new variable colo_compare_active which is protected by a new
> > > > > mutex colo_compare_mutex. The new mutex also protects against
> > > > > concurrent access of the net_compares list and makes sure that
> > > > > colo_notify_compares_event isn't active while we destroy
> > > > > event_mtx and event_complete_cond.
> > > > >
> > > > > With this it also is again possible to use colo without
> > > > > colo-compare (periodic
> > > > > mode) and to use multiple colo-compare for multiple network
> interfaces.
> > > > >
> > > >
> > > > Hi Lukas,
> > > >
> > > > For this case I think we don't need to touch vl.c code, we can
> > > > solve this
> > > issue from another perspective:
> > > > How to remove colo-compare?
> > > > User will use qemu-monitor or QMP command to disable an object, so
> > > > we just need return operation failed When user try to remove
> > > > colo-compare
> > > object while COLO is running.
> > >
> > > Yeah, but that still leaves the other problem that colo can't be
> > > used without colo-compare (qemu crashes then).
> >
> > Yes, the COLO-compare is necessary module in COLO original design.
> > At most cases, user need it do dynamic sync.
> > For rare cases, maybe we can add a new colo-compare parameter to
> bypass all the network workload.
> 
> I think such an parameter would only be a workaround instead of a real
> solution like this patch.

The root problem is why COLO-compare is necessary.
Yes, maybe someone want to use pure periodic synchronization mode,
But it means it will lost all guest network support(without colo-compare/filter-mirror/filter-redirector/filter-rewriter).
The secondary guest just a solid backup for the primary guest, when occur failover the new build stateful connection (like TCP)
will crashed, need userspace to handle this status. It lost the original meaning for COLO FT/HA solution, no need use do HA in application layer.
it looks like normal/remote periodic VM snapshot here. 
Dave or Jason have any comments here? 

Thanks
Zhang Chen

> 
> Regards,
> Lukas Straub
> 
> > Thanks
> > Zhang Chen
> >
> > >
> > > Regards,
> > > Lukas Straub
> > >
> > > > Thanks
> > > > Zhang Chen
> > > >
> > > > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > > > > ---
> > > > >  net/colo-compare.c | 35 +++++++++++++++++++++++++++++------
> > > > >  net/colo-compare.h |  1 +
> > > > >  softmmu/vl.c       |  2 ++
> > > > >  3 files changed, 32 insertions(+), 6 deletions(-)
> > > > >
> > > > > diff --git a/net/colo-compare.c b/net/colo-compare.c index
> > > > > 56db3d3bfc..c7572d75e9 100644
> > > > > --- a/net/colo-compare.c
> > > > > +++ b/net/colo-compare.c
> > > > > @@ -54,6 +54,8 @@ static NotifierList colo_compare_notifiers =
> > > > > #define REGULAR_PACKET_CHECK_MS 3000  #define
> > > DEFAULT_TIME_OUT_MS
> > > > > 3000
> > > > >
> > > > > +static QemuMutex colo_compare_mutex; static bool
> > > > > +colo_compare_active;
> > > > >  static QemuMutex event_mtx;
> > > > >  static QemuCond event_complete_cond;  static int
> > > > > event_unhandled_count; @@ -906,6 +908,12 @@ static void
> > > > > check_old_packet_regular(void *opaque) void
> > > > > colo_notify_compares_event(void *opaque, int event, Error **errp)
> {
> > > > >      CompareState *s;
> > > > > +    qemu_mutex_lock(&colo_compare_mutex);
> > > > > +
> > > > > +    if (!colo_compare_active) {
> > > > > +        qemu_mutex_unlock(&colo_compare_mutex);
> > > > > +        return;
> > > > > +    }
> > > > >
> > > > >      qemu_mutex_lock(&event_mtx);
> > > > >      QTAILQ_FOREACH(s, &net_compares, next) { @@ -919,6 +927,7
> > > > > @@ void colo_notify_compares_event(void *opaque, int event,
> Error **errp)
> > > > >      }
> > > > >
> > > > >      qemu_mutex_unlock(&event_mtx);
> > > > > +    qemu_mutex_unlock(&colo_compare_mutex);
> > > > >  }
> > > > >
> > > > >  static void colo_compare_timer_init(CompareState *s) @@ -1274,7
> > > > > +1283,14 @@ static void colo_compare_complete(UserCreatable *uc,
> > > Error **errp)
> > > > >                             s->vnet_hdr);
> > > > >      }
> > > > >
> > > > > +    qemu_mutex_lock(&colo_compare_mutex);
> > > > > +    if (!colo_compare_active) {
> > > > > +        qemu_mutex_init(&event_mtx);
> > > > > +        qemu_cond_init(&event_complete_cond);
> > > > > +        colo_compare_active = true;
> > > > > +    }
> > > > >      QTAILQ_INSERT_TAIL(&net_compares, s, next);
> > > > > +    qemu_mutex_unlock(&colo_compare_mutex);
> > > > >
> > > > >      s->out_sendco.s = s;
> > > > >      s->out_sendco.chr = &s->chr_out; @@ -1290,9 +1306,6 @@
> > > > > static void colo_compare_complete(UserCreatable
> > > > > *uc, Error **errp)
> > > > >
> > > > >      g_queue_init(&s->conn_list);
> > > > >
> > > > > -    qemu_mutex_init(&event_mtx);
> > > > > -    qemu_cond_init(&event_complete_cond);
> > > > > -
> > > > >      s->connection_track_table =
> > > > > g_hash_table_new_full(connection_key_hash,
> > > > >                                                        connection_key_equal,
> > > > >                                                        g_free,
> > > > > @@
> > > > > -1384,12 +1397,19 @@ static void colo_compare_finalize(Object
> > > > > *obj)
> > > > >
> > > > >      qemu_bh_delete(s->event_bh);
> > > > >
> > > > > +    qemu_mutex_lock(&colo_compare_mutex);
> > > > >      QTAILQ_FOREACH(tmp, &net_compares, next) {
> > > > >          if (tmp == s) {
> > > > >              QTAILQ_REMOVE(&net_compares, s, next);
> > > > >              break;
> > > > >          }
> > > > >      }
> > > > > +    if (QTAILQ_EMPTY(&net_compares)) {
> > > > > +        colo_compare_active = false;
> > > > > +        qemu_mutex_destroy(&event_mtx);
> > > > > +        qemu_cond_destroy(&event_complete_cond);
> > > > > +    }
> > > > > +    qemu_mutex_unlock(&colo_compare_mutex);
> > > > >
> > > > >      AioContext *ctx = iothread_get_aio_context(s->iothread);
> > > > >      aio_context_acquire(ctx);
> > > > > @@ -1413,15 +1433,18 @@ static void colo_compare_finalize(Object
> *obj)
> > > > >          object_unref(OBJECT(s->iothread));
> > > > >      }
> > > > >
> > > > > -    qemu_mutex_destroy(&event_mtx);
> > > > > -    qemu_cond_destroy(&event_complete_cond);
> > > > > -
> > > > >      g_free(s->pri_indev);
> > > > >      g_free(s->sec_indev);
> > > > >      g_free(s->outdev);
> > > > >      g_free(s->notify_dev);
> > > > >  }
> > > > >
> > > > > +void colo_compare_init_globals(void) {
> > > > > +    colo_compare_active = false;
> > > > > +    qemu_mutex_init(&colo_compare_mutex);
> > > > > +}
> > > > > +
> > > > >  static const TypeInfo colo_compare_info = {
> > > > >      .name = TYPE_COLO_COMPARE,
> > > > >      .parent = TYPE_OBJECT,
> > > > > diff --git a/net/colo-compare.h b/net/colo-compare.h index
> > > > > 22ddd512e2..eb483ac586 100644
> > > > > --- a/net/colo-compare.h
> > > > > +++ b/net/colo-compare.h
> > > > > @@ -17,6 +17,7 @@
> > > > >  #ifndef QEMU_COLO_COMPARE_H
> > > > >  #define QEMU_COLO_COMPARE_H
> > > > >
> > > > > +void colo_compare_init_globals(void);
> > > > >  void colo_notify_compares_event(void *opaque, int event, Error
> > > > > **errp); void colo_compare_register_notifier(Notifier *notify);
> > > > > void colo_compare_unregister_notifier(Notifier *notify); diff
> > > > > --git a/softmmu/vl.c b/softmmu/vl.c index 32c0047889..a913ed5469
> > > > > 100644
> > > > > --- a/softmmu/vl.c
> > > > > +++ b/softmmu/vl.c
> > > > > @@ -112,6 +112,7 @@
> > > > >  #include "qapi/qmp/qerror.h"
> > > > >  #include "sysemu/iothread.h"
> > > > >  #include "qemu/guest-random.h"
> > > > > +#include "net/colo-compare.h"
> > > > >
> > > > >  #define MAX_VIRTIO_CONSOLES 1
> > > > >
> > > > > @@ -2906,6 +2907,7 @@ void qemu_init(int argc, char **argv, char
> > > **envp)
> > > > >      precopy_infrastructure_init();
> > > > >      postcopy_infrastructure_init();
> > > > >      monitor_init_globals();
> > > > > +    colo_compare_init_globals();
> > > > >
> > > > >      if (qcrypto_init(&err) < 0) {
> > > > >          error_reportf_err(err, "cannot initialize crypto: ");
> > > > > --
> > > > > 2.20.1
> > > >
> >
Lukas Straub May 9, 2020, 12:21 p.m. UTC | #7
On Fri, 8 May 2020 06:50:39 +0000
"Zhang, Chen" <chen.zhang@intel.com> wrote:

> > -----Original Message-----
> > From: Lukas Straub <lukasstraub2@web.de>
> > Sent: Friday, May 8, 2020 2:11 PM
> > To: Zhang, Chen <chen.zhang@intel.com>
> > Cc: qemu-devel <qemu-devel@nongnu.org>; Li Zhijian
> > <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>; Marc-
> > André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
> > <pbonzini@redhat.com>
> > Subject: Re: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check that
> > colo-compare is active
> > 
> > On Fri, 8 May 2020 02:26:21 +0000
> > "Zhang, Chen" <chen.zhang@intel.com> wrote:
> >   
> > > > -----Original Message-----
> > > > From: Lukas Straub <lukasstraub2@web.de>
> > > > Sent: Thursday, May 7, 2020 11:54 PM
> > > > To: Zhang, Chen <chen.zhang@intel.com>
> > > > Cc: qemu-devel <qemu-devel@nongnu.org>; Li Zhijian
> > > > <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>; Marc-
> > > > André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
> > > > <pbonzini@redhat.com>
> > > > Subject: Re: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check
> > > > that colo-compare is active
> > > >
> > > > On Thu, 7 May 2020 11:38:04 +0000
> > > > "Zhang, Chen" <chen.zhang@intel.com> wrote:
> > > >  
> > > > > > -----Original Message-----
> > > > > > From: Lukas Straub <lukasstraub2@web.de>
> > > > > > Sent: Monday, May 4, 2020 6:28 PM
> > > > > > To: qemu-devel <qemu-devel@nongnu.org>
> > > > > > Cc: Zhang, Chen <chen.zhang@intel.com>; Li Zhijian
> > > > > > <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>;
> > > > > > Marc- André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
> > > > > > <pbonzini@redhat.com>
> > > > > > Subject: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check
> > > > > > that
> > > > > > colo- compare is active
> > > > > >
> > > > > > If the colo-compare object is removed before failover and a
> > > > > > checkpoint happens, qemu crashes because it tries to lock the
> > > > > > destroyed event_mtx in colo_notify_compares_event.
> > > > > >
> > > > > > Fix this by checking if everything is initialized by introducing
> > > > > > a new variable colo_compare_active which is protected by a new
> > > > > > mutex colo_compare_mutex. The new mutex also protects against
> > > > > > concurrent access of the net_compares list and makes sure that
> > > > > > colo_notify_compares_event isn't active while we destroy
> > > > > > event_mtx and event_complete_cond.
> > > > > >
> > > > > > With this it also is again possible to use colo without
> > > > > > colo-compare (periodic
> > > > > > mode) and to use multiple colo-compare for multiple network  
> > interfaces.  
> > > > > >  
> > > > >
> > > > > Hi Lukas,
> > > > >
> > > > > For this case I think we don't need to touch vl.c code, we can
> > > > > solve this  
> > > > issue from another perspective:  
> > > > > How to remove colo-compare?
> > > > > User will use qemu-monitor or QMP command to disable an object, so
> > > > > we just need return operation failed When user try to remove
> > > > > colo-compare  
> > > > object while COLO is running.
> > > >
> > > > Yeah, but that still leaves the other problem that colo can't be
> > > > used without colo-compare (qemu crashes then).  
> > >
> > > Yes, the COLO-compare is necessary module in COLO original design.
> > > At most cases, user need it do dynamic sync.
> > > For rare cases, maybe we can add a new colo-compare parameter to  
> > bypass all the network workload.
> > 
> > I think such an parameter would only be a workaround instead of a real
> > solution like this patch.  
> 
> The root problem is why COLO-compare is necessary.
> Yes, maybe someone want to use pure periodic synchronization mode,
> But it means it will lost all guest network support(without colo-compare/filter-mirror/filter-redirector/filter-rewriter).
> The secondary guest just a solid backup for the primary guest, when occur failover the new build stateful connection (like TCP)
> will crashed, need userspace to handle this status. It lost the original meaning for COLO FT/HA solution, no need use do HA in application layer.
> it looks like normal/remote periodic VM snapshot here. 

Sure, but maybe the user doesn't need (reliable) network on failover. Also proper network support with periodic mode can easily be implemented by modifying filter-buffer to buffer packets until checkpoint. Being able to use colo without colo-compare gives more flexibility to the user.

Regards,
Lukas Straub

> Dave or Jason have any comments here? 
> 
> Thanks
> Zhang Chen
> 
> > 
> > Regards,
> > Lukas Straub
> >   
> > > Thanks
> > > Zhang Chen
> > >  
> > > >
> > > > Regards,
> > > > Lukas Straub
> > > >  
> > > > > Thanks
> > > > > Zhang Chen
> > > > >  
> > > > > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > > > > > ---
> > > > > >  net/colo-compare.c | 35 +++++++++++++++++++++++++++++------
> > > > > >  net/colo-compare.h |  1 +
> > > > > >  softmmu/vl.c       |  2 ++
> > > > > >  3 files changed, 32 insertions(+), 6 deletions(-)
> > > > > >
> > > > > > diff --git a/net/colo-compare.c b/net/colo-compare.c index
> > > > > > 56db3d3bfc..c7572d75e9 100644
> > > > > > --- a/net/colo-compare.c
> > > > > > +++ b/net/colo-compare.c
> > > > > > @@ -54,6 +54,8 @@ static NotifierList colo_compare_notifiers =
> > > > > > #define REGULAR_PACKET_CHECK_MS 3000  #define  
> > > > DEFAULT_TIME_OUT_MS  
> > > > > > 3000
> > > > > >
> > > > > > +static QemuMutex colo_compare_mutex; static bool
> > > > > > +colo_compare_active;
> > > > > >  static QemuMutex event_mtx;
> > > > > >  static QemuCond event_complete_cond;  static int
> > > > > > event_unhandled_count; @@ -906,6 +908,12 @@ static void
> > > > > > check_old_packet_regular(void *opaque) void
> > > > > > colo_notify_compares_event(void *opaque, int event, Error **errp)  
> > {  
> > > > > >      CompareState *s;
> > > > > > +    qemu_mutex_lock(&colo_compare_mutex);
> > > > > > +
> > > > > > +    if (!colo_compare_active) {
> > > > > > +        qemu_mutex_unlock(&colo_compare_mutex);
> > > > > > +        return;
> > > > > > +    }
> > > > > >
> > > > > >      qemu_mutex_lock(&event_mtx);
> > > > > >      QTAILQ_FOREACH(s, &net_compares, next) { @@ -919,6 +927,7
> > > > > > @@ void colo_notify_compares_event(void *opaque, int event,  
> > Error **errp)  
> > > > > >      }
> > > > > >
> > > > > >      qemu_mutex_unlock(&event_mtx);
> > > > > > +    qemu_mutex_unlock(&colo_compare_mutex);
> > > > > >  }
> > > > > >
> > > > > >  static void colo_compare_timer_init(CompareState *s) @@ -1274,7
> > > > > > +1283,14 @@ static void colo_compare_complete(UserCreatable *uc,  
> > > > Error **errp)  
> > > > > >                             s->vnet_hdr);
> > > > > >      }
> > > > > >
> > > > > > +    qemu_mutex_lock(&colo_compare_mutex);
> > > > > > +    if (!colo_compare_active) {
> > > > > > +        qemu_mutex_init(&event_mtx);
> > > > > > +        qemu_cond_init(&event_complete_cond);
> > > > > > +        colo_compare_active = true;
> > > > > > +    }
> > > > > >      QTAILQ_INSERT_TAIL(&net_compares, s, next);
> > > > > > +    qemu_mutex_unlock(&colo_compare_mutex);
> > > > > >
> > > > > >      s->out_sendco.s = s;
> > > > > >      s->out_sendco.chr = &s->chr_out; @@ -1290,9 +1306,6 @@
> > > > > > static void colo_compare_complete(UserCreatable
> > > > > > *uc, Error **errp)
> > > > > >
> > > > > >      g_queue_init(&s->conn_list);
> > > > > >
> > > > > > -    qemu_mutex_init(&event_mtx);
> > > > > > -    qemu_cond_init(&event_complete_cond);
> > > > > > -
> > > > > >      s->connection_track_table =
> > > > > > g_hash_table_new_full(connection_key_hash,
> > > > > >                                                        connection_key_equal,
> > > > > >                                                        g_free,
> > > > > > @@
> > > > > > -1384,12 +1397,19 @@ static void colo_compare_finalize(Object
> > > > > > *obj)
> > > > > >
> > > > > >      qemu_bh_delete(s->event_bh);
> > > > > >
> > > > > > +    qemu_mutex_lock(&colo_compare_mutex);
> > > > > >      QTAILQ_FOREACH(tmp, &net_compares, next) {
> > > > > >          if (tmp == s) {
> > > > > >              QTAILQ_REMOVE(&net_compares, s, next);
> > > > > >              break;
> > > > > >          }
> > > > > >      }
> > > > > > +    if (QTAILQ_EMPTY(&net_compares)) {
> > > > > > +        colo_compare_active = false;
> > > > > > +        qemu_mutex_destroy(&event_mtx);
> > > > > > +        qemu_cond_destroy(&event_complete_cond);
> > > > > > +    }
> > > > > > +    qemu_mutex_unlock(&colo_compare_mutex);
> > > > > >
> > > > > >      AioContext *ctx = iothread_get_aio_context(s->iothread);
> > > > > >      aio_context_acquire(ctx);
> > > > > > @@ -1413,15 +1433,18 @@ static void colo_compare_finalize(Object  
> > *obj)  
> > > > > >          object_unref(OBJECT(s->iothread));
> > > > > >      }
> > > > > >
> > > > > > -    qemu_mutex_destroy(&event_mtx);
> > > > > > -    qemu_cond_destroy(&event_complete_cond);
> > > > > > -
> > > > > >      g_free(s->pri_indev);
> > > > > >      g_free(s->sec_indev);
> > > > > >      g_free(s->outdev);
> > > > > >      g_free(s->notify_dev);
> > > > > >  }
> > > > > >
> > > > > > +void colo_compare_init_globals(void) {
> > > > > > +    colo_compare_active = false;
> > > > > > +    qemu_mutex_init(&colo_compare_mutex);
> > > > > > +}
> > > > > > +
> > > > > >  static const TypeInfo colo_compare_info = {
> > > > > >      .name = TYPE_COLO_COMPARE,
> > > > > >      .parent = TYPE_OBJECT,
> > > > > > diff --git a/net/colo-compare.h b/net/colo-compare.h index
> > > > > > 22ddd512e2..eb483ac586 100644
> > > > > > --- a/net/colo-compare.h
> > > > > > +++ b/net/colo-compare.h
> > > > > > @@ -17,6 +17,7 @@
> > > > > >  #ifndef QEMU_COLO_COMPARE_H
> > > > > >  #define QEMU_COLO_COMPARE_H
> > > > > >
> > > > > > +void colo_compare_init_globals(void);
> > > > > >  void colo_notify_compares_event(void *opaque, int event, Error
> > > > > > **errp); void colo_compare_register_notifier(Notifier *notify);
> > > > > > void colo_compare_unregister_notifier(Notifier *notify); diff
> > > > > > --git a/softmmu/vl.c b/softmmu/vl.c index 32c0047889..a913ed5469
> > > > > > 100644
> > > > > > --- a/softmmu/vl.c
> > > > > > +++ b/softmmu/vl.c
> > > > > > @@ -112,6 +112,7 @@
> > > > > >  #include "qapi/qmp/qerror.h"
> > > > > >  #include "sysemu/iothread.h"
> > > > > >  #include "qemu/guest-random.h"
> > > > > > +#include "net/colo-compare.h"
> > > > > >
> > > > > >  #define MAX_VIRTIO_CONSOLES 1
> > > > > >
> > > > > > @@ -2906,6 +2907,7 @@ void qemu_init(int argc, char **argv, char  
> > > > **envp)  
> > > > > >      precopy_infrastructure_init();
> > > > > >      postcopy_infrastructure_init();
> > > > > >      monitor_init_globals();
> > > > > > +    colo_compare_init_globals();
> > > > > >
> > > > > >      if (qcrypto_init(&err) < 0) {
> > > > > >          error_reportf_err(err, "cannot initialize crypto: ");
> > > > > > --
> > > > > > 2.20.1  
> > > > >  
> > >  
>
Zhang Chen May 11, 2020, 8:49 a.m. UTC | #8
> -----Original Message-----
> From: Lukas Straub <lukasstraub2@web.de>
> Sent: Saturday, May 9, 2020 8:21 PM
> To: Zhang, Chen <chen.zhang@intel.com>
> Cc: qemu-devel <qemu-devel@nongnu.org>; Li Zhijian
> <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>; Marc-
> André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
> <pbonzini@redhat.com>; Dr . David Alan Gilbert <dgilbert@redhat.com>
> Subject: Re: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check that
> colo-compare is active
> 
> On Fri, 8 May 2020 06:50:39 +0000
> "Zhang, Chen" <chen.zhang@intel.com> wrote:
> 
> > > -----Original Message-----
> > > From: Lukas Straub <lukasstraub2@web.de>
> > > Sent: Friday, May 8, 2020 2:11 PM
> > > To: Zhang, Chen <chen.zhang@intel.com>
> > > Cc: qemu-devel <qemu-devel@nongnu.org>; Li Zhijian
> > > <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>; Marc-
> > > André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
> > > <pbonzini@redhat.com>
> > > Subject: Re: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check
> > > that colo-compare is active
> > >
> > > On Fri, 8 May 2020 02:26:21 +0000
> > > "Zhang, Chen" <chen.zhang@intel.com> wrote:
> > >
> > > > > -----Original Message-----
> > > > > From: Lukas Straub <lukasstraub2@web.de>
> > > > > Sent: Thursday, May 7, 2020 11:54 PM
> > > > > To: Zhang, Chen <chen.zhang@intel.com>
> > > > > Cc: qemu-devel <qemu-devel@nongnu.org>; Li Zhijian
> > > > > <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>;
> > > > > Marc- André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
> > > > > <pbonzini@redhat.com>
> > > > > Subject: Re: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c:
> > > > > Check that colo-compare is active
> > > > >
> > > > > On Thu, 7 May 2020 11:38:04 +0000 "Zhang, Chen"
> > > > > <chen.zhang@intel.com> wrote:
> > > > >
> > > > > > > -----Original Message-----
> > > > > > > From: Lukas Straub <lukasstraub2@web.de>
> > > > > > > Sent: Monday, May 4, 2020 6:28 PM
> > > > > > > To: qemu-devel <qemu-devel@nongnu.org>
> > > > > > > Cc: Zhang, Chen <chen.zhang@intel.com>; Li Zhijian
> > > > > > > <lizhijian@cn.fujitsu.com>; Jason Wang
> > > > > > > <jasowang@redhat.com>;
> > > > > > > Marc- André Lureau <marcandre.lureau@redhat.com>; Paolo
> > > > > > > Bonzini <pbonzini@redhat.com>
> > > > > > > Subject: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c:
> > > > > > > Check that
> > > > > > > colo- compare is active
> > > > > > >
> > > > > > > If the colo-compare object is removed before failover and a
> > > > > > > checkpoint happens, qemu crashes because it tries to lock
> > > > > > > the destroyed event_mtx in colo_notify_compares_event.
> > > > > > >
> > > > > > > Fix this by checking if everything is initialized by
> > > > > > > introducing a new variable colo_compare_active which is
> > > > > > > protected by a new mutex colo_compare_mutex. The new mutex
> > > > > > > also protects against concurrent access of the net_compares
> > > > > > > list and makes sure that colo_notify_compares_event isn't
> > > > > > > active while we destroy event_mtx and event_complete_cond.
> > > > > > >
> > > > > > > With this it also is again possible to use colo without
> > > > > > > colo-compare (periodic
> > > > > > > mode) and to use multiple colo-compare for multiple network
> > > interfaces.
> > > > > > >
> > > > > >
> > > > > > Hi Lukas,
> > > > > >
> > > > > > For this case I think we don't need to touch vl.c code, we can
> > > > > > solve this
> > > > > issue from another perspective:
> > > > > > How to remove colo-compare?
> > > > > > User will use qemu-monitor or QMP command to disable an
> > > > > > object, so we just need return operation failed When user try
> > > > > > to remove colo-compare
> > > > > object while COLO is running.
> > > > >
> > > > > Yeah, but that still leaves the other problem that colo can't be
> > > > > used without colo-compare (qemu crashes then).
> > > >
> > > > Yes, the COLO-compare is necessary module in COLO original design.
> > > > At most cases, user need it do dynamic sync.
> > > > For rare cases, maybe we can add a new colo-compare parameter to
> > > bypass all the network workload.
> > >
> > > I think such an parameter would only be a workaround instead of a
> > > real solution like this patch.
> >
> > The root problem is why COLO-compare is necessary.
> > Yes, maybe someone want to use pure periodic synchronization mode, But
> > it means it will lost all guest network support(without colo-compare/filter-
> mirror/filter-redirector/filter-rewriter).
> > The secondary guest just a solid backup for the primary guest, when
> > occur failover the new build stateful connection (like TCP) will crashed,
> need userspace to handle this status. It lost the original meaning for COLO
> FT/HA solution, no need use do HA in application layer.
> > it looks like normal/remote periodic VM snapshot here.
> 
> Sure, but maybe the user doesn't need (reliable) network on failover. Also
> proper network support with periodic mode can easily be implemented by
> modifying filter-buffer to buffer packets until checkpoint. Being able to use
> colo without colo-compare gives more flexibility to the user.

OK, use the filter-buffer instead of colo-compare is a new use case here.
If other maintainers have no comments about touch the vl.c for COLO , I think it's OK here.

Thanks
Zhang Chen 


> 
> Regards,
> Lukas Straub
> 
> > Dave or Jason have any comments here?
> >
> > Thanks
> > Zhang Chen
> >
> > >
> > > Regards,
> > > Lukas Straub
> > >
> > > > Thanks
> > > > Zhang Chen
> > > >
> > > > >
> > > > > Regards,
> > > > > Lukas Straub
> > > > >
> > > > > > Thanks
> > > > > > Zhang Chen
> > > > > >
> > > > > > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > > > > > > ---
> > > > > > >  net/colo-compare.c | 35 +++++++++++++++++++++++++++++---
> ---
> > > > > > >  net/colo-compare.h |  1 +
> > > > > > >  softmmu/vl.c       |  2 ++
> > > > > > >  3 files changed, 32 insertions(+), 6 deletions(-)
> > > > > > >
> > > > > > > diff --git a/net/colo-compare.c b/net/colo-compare.c index
> > > > > > > 56db3d3bfc..c7572d75e9 100644
> > > > > > > --- a/net/colo-compare.c
> > > > > > > +++ b/net/colo-compare.c
> > > > > > > @@ -54,6 +54,8 @@ static NotifierList colo_compare_notifiers
> > > > > > > = #define REGULAR_PACKET_CHECK_MS 3000  #define
> > > > > DEFAULT_TIME_OUT_MS
> > > > > > > 3000
> > > > > > >
> > > > > > > +static QemuMutex colo_compare_mutex; static bool
> > > > > > > +colo_compare_active;
> > > > > > >  static QemuMutex event_mtx;  static QemuCond
> > > > > > > event_complete_cond;  static int event_unhandled_count; @@
> > > > > > > -906,6 +908,12 @@ static void check_old_packet_regular(void
> > > > > > > *opaque) void colo_notify_compares_event(void *opaque, int
> > > > > > > event, Error **errp)
> > > {
> > > > > > >      CompareState *s;
> > > > > > > +    qemu_mutex_lock(&colo_compare_mutex);
> > > > > > > +
> > > > > > > +    if (!colo_compare_active) {
> > > > > > > +        qemu_mutex_unlock(&colo_compare_mutex);
> > > > > > > +        return;
> > > > > > > +    }
> > > > > > >
> > > > > > >      qemu_mutex_lock(&event_mtx);
> > > > > > >      QTAILQ_FOREACH(s, &net_compares, next) { @@ -919,6
> > > > > > > +927,7 @@ void colo_notify_compares_event(void *opaque, int
> > > > > > > event,
> > > Error **errp)
> > > > > > >      }
> > > > > > >
> > > > > > >      qemu_mutex_unlock(&event_mtx);
> > > > > > > +    qemu_mutex_unlock(&colo_compare_mutex);
> > > > > > >  }
> > > > > > >
> > > > > > >  static void colo_compare_timer_init(CompareState *s) @@
> > > > > > > -1274,7
> > > > > > > +1283,14 @@ static void colo_compare_complete(UserCreatable
> > > > > > > +*uc,
> > > > > Error **errp)
> > > > > > >                             s->vnet_hdr);
> > > > > > >      }
> > > > > > >
> > > > > > > +    qemu_mutex_lock(&colo_compare_mutex);
> > > > > > > +    if (!colo_compare_active) {
> > > > > > > +        qemu_mutex_init(&event_mtx);
> > > > > > > +        qemu_cond_init(&event_complete_cond);
> > > > > > > +        colo_compare_active = true;
> > > > > > > +    }
> > > > > > >      QTAILQ_INSERT_TAIL(&net_compares, s, next);
> > > > > > > +    qemu_mutex_unlock(&colo_compare_mutex);
> > > > > > >
> > > > > > >      s->out_sendco.s = s;
> > > > > > >      s->out_sendco.chr = &s->chr_out; @@ -1290,9 +1306,6 @@
> > > > > > > static void colo_compare_complete(UserCreatable
> > > > > > > *uc, Error **errp)
> > > > > > >
> > > > > > >      g_queue_init(&s->conn_list);
> > > > > > >
> > > > > > > -    qemu_mutex_init(&event_mtx);
> > > > > > > -    qemu_cond_init(&event_complete_cond);
> > > > > > > -
> > > > > > >      s->connection_track_table =
> > > > > > > g_hash_table_new_full(connection_key_hash,
> > > > > > >                                                        connection_key_equal,
> > > > > > >
> > > > > > > g_free, @@
> > > > > > > -1384,12 +1397,19 @@ static void
> > > > > > > colo_compare_finalize(Object
> > > > > > > *obj)
> > > > > > >
> > > > > > >      qemu_bh_delete(s->event_bh);
> > > > > > >
> > > > > > > +    qemu_mutex_lock(&colo_compare_mutex);
> > > > > > >      QTAILQ_FOREACH(tmp, &net_compares, next) {
> > > > > > >          if (tmp == s) {
> > > > > > >              QTAILQ_REMOVE(&net_compares, s, next);
> > > > > > >              break;
> > > > > > >          }
> > > > > > >      }
> > > > > > > +    if (QTAILQ_EMPTY(&net_compares)) {
> > > > > > > +        colo_compare_active = false;
> > > > > > > +        qemu_mutex_destroy(&event_mtx);
> > > > > > > +        qemu_cond_destroy(&event_complete_cond);
> > > > > > > +    }
> > > > > > > +    qemu_mutex_unlock(&colo_compare_mutex);
> > > > > > >
> > > > > > >      AioContext *ctx = iothread_get_aio_context(s->iothread);
> > > > > > >      aio_context_acquire(ctx); @@ -1413,15 +1433,18 @@
> > > > > > > static void colo_compare_finalize(Object
> > > *obj)
> > > > > > >          object_unref(OBJECT(s->iothread));
> > > > > > >      }
> > > > > > >
> > > > > > > -    qemu_mutex_destroy(&event_mtx);
> > > > > > > -    qemu_cond_destroy(&event_complete_cond);
> > > > > > > -
> > > > > > >      g_free(s->pri_indev);
> > > > > > >      g_free(s->sec_indev);
> > > > > > >      g_free(s->outdev);
> > > > > > >      g_free(s->notify_dev);
> > > > > > >  }
> > > > > > >
> > > > > > > +void colo_compare_init_globals(void) {
> > > > > > > +    colo_compare_active = false;
> > > > > > > +    qemu_mutex_init(&colo_compare_mutex);
> > > > > > > +}
> > > > > > > +
> > > > > > >  static const TypeInfo colo_compare_info = {
> > > > > > >      .name = TYPE_COLO_COMPARE,
> > > > > > >      .parent = TYPE_OBJECT,
> > > > > > > diff --git a/net/colo-compare.h b/net/colo-compare.h index
> > > > > > > 22ddd512e2..eb483ac586 100644
> > > > > > > --- a/net/colo-compare.h
> > > > > > > +++ b/net/colo-compare.h
> > > > > > > @@ -17,6 +17,7 @@
> > > > > > >  #ifndef QEMU_COLO_COMPARE_H  #define
> QEMU_COLO_COMPARE_H
> > > > > > >
> > > > > > > +void colo_compare_init_globals(void);
> > > > > > >  void colo_notify_compares_event(void *opaque, int event,
> > > > > > > Error **errp); void colo_compare_register_notifier(Notifier
> > > > > > > *notify); void colo_compare_unregister_notifier(Notifier
> > > > > > > *notify); diff --git a/softmmu/vl.c b/softmmu/vl.c index
> > > > > > > 32c0047889..a913ed5469
> > > > > > > 100644
> > > > > > > --- a/softmmu/vl.c
> > > > > > > +++ b/softmmu/vl.c
> > > > > > > @@ -112,6 +112,7 @@
> > > > > > >  #include "qapi/qmp/qerror.h"
> > > > > > >  #include "sysemu/iothread.h"
> > > > > > >  #include "qemu/guest-random.h"
> > > > > > > +#include "net/colo-compare.h"
> > > > > > >
> > > > > > >  #define MAX_VIRTIO_CONSOLES 1
> > > > > > >
> > > > > > > @@ -2906,6 +2907,7 @@ void qemu_init(int argc, char **argv,
> > > > > > > char
> > > > > **envp)
> > > > > > >      precopy_infrastructure_init();
> > > > > > >      postcopy_infrastructure_init();
> > > > > > >      monitor_init_globals();
> > > > > > > +    colo_compare_init_globals();
> > > > > > >
> > > > > > >      if (qcrypto_init(&err) < 0) {
> > > > > > >          error_reportf_err(err, "cannot initialize crypto:
> > > > > > > ");
> > > > > > > --
> > > > > > > 2.20.1
> > > > > >
> > > >
> >
Dr. David Alan Gilbert May 12, 2020, 5:28 p.m. UTC | #9
* Zhang, Chen (chen.zhang@intel.com) wrote:
> 
> 
> > -----Original Message-----
> > From: Lukas Straub <lukasstraub2@web.de>
> > Sent: Friday, May 8, 2020 2:11 PM
> > To: Zhang, Chen <chen.zhang@intel.com>
> > Cc: qemu-devel <qemu-devel@nongnu.org>; Li Zhijian
> > <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>; Marc-
> > André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
> > <pbonzini@redhat.com>
> > Subject: Re: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check that
> > colo-compare is active
> > 
> > On Fri, 8 May 2020 02:26:21 +0000
> > "Zhang, Chen" <chen.zhang@intel.com> wrote:
> > 
> > > > -----Original Message-----
> > > > From: Lukas Straub <lukasstraub2@web.de>
> > > > Sent: Thursday, May 7, 2020 11:54 PM
> > > > To: Zhang, Chen <chen.zhang@intel.com>
> > > > Cc: qemu-devel <qemu-devel@nongnu.org>; Li Zhijian
> > > > <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>; Marc-
> > > > André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
> > > > <pbonzini@redhat.com>
> > > > Subject: Re: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check
> > > > that colo-compare is active
> > > >
> > > > On Thu, 7 May 2020 11:38:04 +0000
> > > > "Zhang, Chen" <chen.zhang@intel.com> wrote:
> > > >
> > > > > > -----Original Message-----
> > > > > > From: Lukas Straub <lukasstraub2@web.de>
> > > > > > Sent: Monday, May 4, 2020 6:28 PM
> > > > > > To: qemu-devel <qemu-devel@nongnu.org>
> > > > > > Cc: Zhang, Chen <chen.zhang@intel.com>; Li Zhijian
> > > > > > <lizhijian@cn.fujitsu.com>; Jason Wang <jasowang@redhat.com>;
> > > > > > Marc- André Lureau <marcandre.lureau@redhat.com>; Paolo Bonzini
> > > > > > <pbonzini@redhat.com>
> > > > > > Subject: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check
> > > > > > that
> > > > > > colo- compare is active
> > > > > >
> > > > > > If the colo-compare object is removed before failover and a
> > > > > > checkpoint happens, qemu crashes because it tries to lock the
> > > > > > destroyed event_mtx in colo_notify_compares_event.
> > > > > >
> > > > > > Fix this by checking if everything is initialized by introducing
> > > > > > a new variable colo_compare_active which is protected by a new
> > > > > > mutex colo_compare_mutex. The new mutex also protects against
> > > > > > concurrent access of the net_compares list and makes sure that
> > > > > > colo_notify_compares_event isn't active while we destroy
> > > > > > event_mtx and event_complete_cond.
> > > > > >
> > > > > > With this it also is again possible to use colo without
> > > > > > colo-compare (periodic
> > > > > > mode) and to use multiple colo-compare for multiple network
> > interfaces.
> > > > > >
> > > > >
> > > > > Hi Lukas,
> > > > >
> > > > > For this case I think we don't need to touch vl.c code, we can
> > > > > solve this
> > > > issue from another perspective:
> > > > > How to remove colo-compare?
> > > > > User will use qemu-monitor or QMP command to disable an object, so
> > > > > we just need return operation failed When user try to remove
> > > > > colo-compare
> > > > object while COLO is running.
> > > >
> > > > Yeah, but that still leaves the other problem that colo can't be
> > > > used without colo-compare (qemu crashes then).
> > >
> > > Yes, the COLO-compare is necessary module in COLO original design.
> > > At most cases, user need it do dynamic sync.
> > > For rare cases, maybe we can add a new colo-compare parameter to
> > bypass all the network workload.
> > 
> > I think such an parameter would only be a workaround instead of a real
> > solution like this patch.
> 
> The root problem is why COLO-compare is necessary.
> Yes, maybe someone want to use pure periodic synchronization mode,
> But it means it will lost all guest network support(without colo-compare/filter-mirror/filter-redirector/filter-rewriter).
> The secondary guest just a solid backup for the primary guest, when occur failover the new build stateful connection (like TCP)
> will crashed, need userspace to handle this status. It lost the original meaning for COLO FT/HA solution, no need use do HA in application layer.
> it looks like normal/remote periodic VM snapshot here. 
> Dave or Jason have any comments here? 

People have done fixed-rate VM synchronisation in the past;
and there are workloads where the packet stream is particularly random
so you almost always get comparison miscompares.

But in those setups, the rest of the configuration is very different -
since the destination isn't running at the same time as the source, the
block mirroring also gets simpler in those scenarious.

One thing we played with in the past was dynamically turning
comparison on and off; switching back to simpler synchronisation
if the workload turns out to suit it.

Dave

> Thanks
> Zhang Chen
> 
> > 
> > Regards,
> > Lukas Straub
> > 
> > > Thanks
> > > Zhang Chen
> > >
> > > >
> > > > Regards,
> > > > Lukas Straub
> > > >
> > > > > Thanks
> > > > > Zhang Chen
> > > > >
> > > > > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > > > > > ---
> > > > > >  net/colo-compare.c | 35 +++++++++++++++++++++++++++++------
> > > > > >  net/colo-compare.h |  1 +
> > > > > >  softmmu/vl.c       |  2 ++
> > > > > >  3 files changed, 32 insertions(+), 6 deletions(-)
> > > > > >
> > > > > > diff --git a/net/colo-compare.c b/net/colo-compare.c index
> > > > > > 56db3d3bfc..c7572d75e9 100644
> > > > > > --- a/net/colo-compare.c
> > > > > > +++ b/net/colo-compare.c
> > > > > > @@ -54,6 +54,8 @@ static NotifierList colo_compare_notifiers =
> > > > > > #define REGULAR_PACKET_CHECK_MS 3000  #define
> > > > DEFAULT_TIME_OUT_MS
> > > > > > 3000
> > > > > >
> > > > > > +static QemuMutex colo_compare_mutex; static bool
> > > > > > +colo_compare_active;
> > > > > >  static QemuMutex event_mtx;
> > > > > >  static QemuCond event_complete_cond;  static int
> > > > > > event_unhandled_count; @@ -906,6 +908,12 @@ static void
> > > > > > check_old_packet_regular(void *opaque) void
> > > > > > colo_notify_compares_event(void *opaque, int event, Error **errp)
> > {
> > > > > >      CompareState *s;
> > > > > > +    qemu_mutex_lock(&colo_compare_mutex);
> > > > > > +
> > > > > > +    if (!colo_compare_active) {
> > > > > > +        qemu_mutex_unlock(&colo_compare_mutex);
> > > > > > +        return;
> > > > > > +    }
> > > > > >
> > > > > >      qemu_mutex_lock(&event_mtx);
> > > > > >      QTAILQ_FOREACH(s, &net_compares, next) { @@ -919,6 +927,7
> > > > > > @@ void colo_notify_compares_event(void *opaque, int event,
> > Error **errp)
> > > > > >      }
> > > > > >
> > > > > >      qemu_mutex_unlock(&event_mtx);
> > > > > > +    qemu_mutex_unlock(&colo_compare_mutex);
> > > > > >  }
> > > > > >
> > > > > >  static void colo_compare_timer_init(CompareState *s) @@ -1274,7
> > > > > > +1283,14 @@ static void colo_compare_complete(UserCreatable *uc,
> > > > Error **errp)
> > > > > >                             s->vnet_hdr);
> > > > > >      }
> > > > > >
> > > > > > +    qemu_mutex_lock(&colo_compare_mutex);
> > > > > > +    if (!colo_compare_active) {
> > > > > > +        qemu_mutex_init(&event_mtx);
> > > > > > +        qemu_cond_init(&event_complete_cond);
> > > > > > +        colo_compare_active = true;
> > > > > > +    }
> > > > > >      QTAILQ_INSERT_TAIL(&net_compares, s, next);
> > > > > > +    qemu_mutex_unlock(&colo_compare_mutex);
> > > > > >
> > > > > >      s->out_sendco.s = s;
> > > > > >      s->out_sendco.chr = &s->chr_out; @@ -1290,9 +1306,6 @@
> > > > > > static void colo_compare_complete(UserCreatable
> > > > > > *uc, Error **errp)
> > > > > >
> > > > > >      g_queue_init(&s->conn_list);
> > > > > >
> > > > > > -    qemu_mutex_init(&event_mtx);
> > > > > > -    qemu_cond_init(&event_complete_cond);
> > > > > > -
> > > > > >      s->connection_track_table =
> > > > > > g_hash_table_new_full(connection_key_hash,
> > > > > >                                                        connection_key_equal,
> > > > > >                                                        g_free,
> > > > > > @@
> > > > > > -1384,12 +1397,19 @@ static void colo_compare_finalize(Object
> > > > > > *obj)
> > > > > >
> > > > > >      qemu_bh_delete(s->event_bh);
> > > > > >
> > > > > > +    qemu_mutex_lock(&colo_compare_mutex);
> > > > > >      QTAILQ_FOREACH(tmp, &net_compares, next) {
> > > > > >          if (tmp == s) {
> > > > > >              QTAILQ_REMOVE(&net_compares, s, next);
> > > > > >              break;
> > > > > >          }
> > > > > >      }
> > > > > > +    if (QTAILQ_EMPTY(&net_compares)) {
> > > > > > +        colo_compare_active = false;
> > > > > > +        qemu_mutex_destroy(&event_mtx);
> > > > > > +        qemu_cond_destroy(&event_complete_cond);
> > > > > > +    }
> > > > > > +    qemu_mutex_unlock(&colo_compare_mutex);
> > > > > >
> > > > > >      AioContext *ctx = iothread_get_aio_context(s->iothread);
> > > > > >      aio_context_acquire(ctx);
> > > > > > @@ -1413,15 +1433,18 @@ static void colo_compare_finalize(Object
> > *obj)
> > > > > >          object_unref(OBJECT(s->iothread));
> > > > > >      }
> > > > > >
> > > > > > -    qemu_mutex_destroy(&event_mtx);
> > > > > > -    qemu_cond_destroy(&event_complete_cond);
> > > > > > -
> > > > > >      g_free(s->pri_indev);
> > > > > >      g_free(s->sec_indev);
> > > > > >      g_free(s->outdev);
> > > > > >      g_free(s->notify_dev);
> > > > > >  }
> > > > > >
> > > > > > +void colo_compare_init_globals(void) {
> > > > > > +    colo_compare_active = false;
> > > > > > +    qemu_mutex_init(&colo_compare_mutex);
> > > > > > +}
> > > > > > +
> > > > > >  static const TypeInfo colo_compare_info = {
> > > > > >      .name = TYPE_COLO_COMPARE,
> > > > > >      .parent = TYPE_OBJECT,
> > > > > > diff --git a/net/colo-compare.h b/net/colo-compare.h index
> > > > > > 22ddd512e2..eb483ac586 100644
> > > > > > --- a/net/colo-compare.h
> > > > > > +++ b/net/colo-compare.h
> > > > > > @@ -17,6 +17,7 @@
> > > > > >  #ifndef QEMU_COLO_COMPARE_H
> > > > > >  #define QEMU_COLO_COMPARE_H
> > > > > >
> > > > > > +void colo_compare_init_globals(void);
> > > > > >  void colo_notify_compares_event(void *opaque, int event, Error
> > > > > > **errp); void colo_compare_register_notifier(Notifier *notify);
> > > > > > void colo_compare_unregister_notifier(Notifier *notify); diff
> > > > > > --git a/softmmu/vl.c b/softmmu/vl.c index 32c0047889..a913ed5469
> > > > > > 100644
> > > > > > --- a/softmmu/vl.c
> > > > > > +++ b/softmmu/vl.c
> > > > > > @@ -112,6 +112,7 @@
> > > > > >  #include "qapi/qmp/qerror.h"
> > > > > >  #include "sysemu/iothread.h"
> > > > > >  #include "qemu/guest-random.h"
> > > > > > +#include "net/colo-compare.h"
> > > > > >
> > > > > >  #define MAX_VIRTIO_CONSOLES 1
> > > > > >
> > > > > > @@ -2906,6 +2907,7 @@ void qemu_init(int argc, char **argv, char
> > > > **envp)
> > > > > >      precopy_infrastructure_init();
> > > > > >      postcopy_infrastructure_init();
> > > > > >      monitor_init_globals();
> > > > > > +    colo_compare_init_globals();
> > > > > >
> > > > > >      if (qcrypto_init(&err) < 0) {
> > > > > >          error_reportf_err(err, "cannot initialize crypto: ");
> > > > > > --
> > > > > > 2.20.1
> > > > >
> > >
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox series

Patch

diff --git a/net/colo-compare.c b/net/colo-compare.c
index 56db3d3bfc..c7572d75e9 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -54,6 +54,8 @@  static NotifierList colo_compare_notifiers =
 #define REGULAR_PACKET_CHECK_MS 3000
 #define DEFAULT_TIME_OUT_MS 3000
 
+static QemuMutex colo_compare_mutex;
+static bool colo_compare_active;
 static QemuMutex event_mtx;
 static QemuCond event_complete_cond;
 static int event_unhandled_count;
@@ -906,6 +908,12 @@  static void check_old_packet_regular(void *opaque)
 void colo_notify_compares_event(void *opaque, int event, Error **errp)
 {
     CompareState *s;
+    qemu_mutex_lock(&colo_compare_mutex);
+
+    if (!colo_compare_active) {
+        qemu_mutex_unlock(&colo_compare_mutex);
+        return;
+    }
 
     qemu_mutex_lock(&event_mtx);
     QTAILQ_FOREACH(s, &net_compares, next) {
@@ -919,6 +927,7 @@  void colo_notify_compares_event(void *opaque, int event, Error **errp)
     }
 
     qemu_mutex_unlock(&event_mtx);
+    qemu_mutex_unlock(&colo_compare_mutex);
 }
 
 static void colo_compare_timer_init(CompareState *s)
@@ -1274,7 +1283,14 @@  static void colo_compare_complete(UserCreatable *uc, Error **errp)
                            s->vnet_hdr);
     }
 
+    qemu_mutex_lock(&colo_compare_mutex);
+    if (!colo_compare_active) {
+        qemu_mutex_init(&event_mtx);
+        qemu_cond_init(&event_complete_cond);
+        colo_compare_active = true;
+    }
     QTAILQ_INSERT_TAIL(&net_compares, s, next);
+    qemu_mutex_unlock(&colo_compare_mutex);
 
     s->out_sendco.s = s;
     s->out_sendco.chr = &s->chr_out;
@@ -1290,9 +1306,6 @@  static void colo_compare_complete(UserCreatable *uc, Error **errp)
 
     g_queue_init(&s->conn_list);
 
-    qemu_mutex_init(&event_mtx);
-    qemu_cond_init(&event_complete_cond);
-
     s->connection_track_table = g_hash_table_new_full(connection_key_hash,
                                                       connection_key_equal,
                                                       g_free,
@@ -1384,12 +1397,19 @@  static void colo_compare_finalize(Object *obj)
 
     qemu_bh_delete(s->event_bh);
 
+    qemu_mutex_lock(&colo_compare_mutex);
     QTAILQ_FOREACH(tmp, &net_compares, next) {
         if (tmp == s) {
             QTAILQ_REMOVE(&net_compares, s, next);
             break;
         }
     }
+    if (QTAILQ_EMPTY(&net_compares)) {
+        colo_compare_active = false;
+        qemu_mutex_destroy(&event_mtx);
+        qemu_cond_destroy(&event_complete_cond);
+    }
+    qemu_mutex_unlock(&colo_compare_mutex);
 
     AioContext *ctx = iothread_get_aio_context(s->iothread);
     aio_context_acquire(ctx);
@@ -1413,15 +1433,18 @@  static void colo_compare_finalize(Object *obj)
         object_unref(OBJECT(s->iothread));
     }
 
-    qemu_mutex_destroy(&event_mtx);
-    qemu_cond_destroy(&event_complete_cond);
-
     g_free(s->pri_indev);
     g_free(s->sec_indev);
     g_free(s->outdev);
     g_free(s->notify_dev);
 }
 
+void colo_compare_init_globals(void)
+{
+    colo_compare_active = false;
+    qemu_mutex_init(&colo_compare_mutex);
+}
+
 static const TypeInfo colo_compare_info = {
     .name = TYPE_COLO_COMPARE,
     .parent = TYPE_OBJECT,
diff --git a/net/colo-compare.h b/net/colo-compare.h
index 22ddd512e2..eb483ac586 100644
--- a/net/colo-compare.h
+++ b/net/colo-compare.h
@@ -17,6 +17,7 @@ 
 #ifndef QEMU_COLO_COMPARE_H
 #define QEMU_COLO_COMPARE_H
 
+void colo_compare_init_globals(void);
 void colo_notify_compares_event(void *opaque, int event, Error **errp);
 void colo_compare_register_notifier(Notifier *notify);
 void colo_compare_unregister_notifier(Notifier *notify);
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 32c0047889..a913ed5469 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -112,6 +112,7 @@ 
 #include "qapi/qmp/qerror.h"
 #include "sysemu/iothread.h"
 #include "qemu/guest-random.h"
+#include "net/colo-compare.h"
 
 #define MAX_VIRTIO_CONSOLES 1
 
@@ -2906,6 +2907,7 @@  void qemu_init(int argc, char **argv, char **envp)
     precopy_infrastructure_init();
     postcopy_infrastructure_init();
     monitor_init_globals();
+    colo_compare_init_globals();
 
     if (qcrypto_init(&err) < 0) {
         error_reportf_err(err, "cannot initialize crypto: ");