diff mbox

[2/3] COLO: Shutdown related socket fd while do failover

Message ID 1484657864-21708-3-git-send-email-zhang.zhanghailiang@huawei.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zhanghailiang Jan. 17, 2017, 12:57 p.m. UTC
If the net connection between primary host and secondary host breaks
while COLO/COLO incoming threads are doing read() or write().
It will block until connection is timeout, and the failover process
will be blocked because of it.

So it is necessary to shutdown all the socket fds used by COLO
to avoid this situation. Besides, we should close the corresponding
file descriptors after failvoer BH shutdown them,
Or there will be an error.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 include/migration/migration.h |  3 +++
 migration/colo.c              | 43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)

Comments

Dr. David Alan Gilbert Jan. 18, 2017, 11:01 a.m. UTC | #1
* zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
> If the net connection between primary host and secondary host breaks
> while COLO/COLO incoming threads are doing read() or write().
> It will block until connection is timeout, and the failover process
> will be blocked because of it.
> 
> So it is necessary to shutdown all the socket fds used by COLO
> to avoid this situation. Besides, we should close the corresponding
> file descriptors after failvoer BH shutdown them,
> Or there will be an error.

Hi,
  There are two parts to this patch:
   a) Add some semaphores to sequence failover
   b) Use shutdown()

  At first I wondered if perhaps they should be split; but I see
the reason for the semaphores is mostly to stop the race between
the fd's getting closed and the shutdown() calls; so I think it's
OK.

Do you have any problems with these semaphores during powering off the
guest?

Dave




> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
>  include/migration/migration.h |  3 +++
>  migration/colo.c              | 43 +++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 46 insertions(+)
> 
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index 487ac1e..7cac877 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -113,6 +113,7 @@ struct MigrationIncomingState {
>      QemuThread colo_incoming_thread;
>      /* The coroutine we should enter (back) after failover */
>      Coroutine *migration_incoming_co;
> +    QemuSemaphore colo_incoming_sem;
>  
>      /* See savevm.c */
>      LoadStateEntry_Head loadvm_handlers;
> @@ -182,6 +183,8 @@ struct MigrationState
>      QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) src_page_requests;
>      /* The RAMBlock used in the last src_page_request */
>      RAMBlock *last_req_rb;
> +    /* The semaphore is used to notify COLO thread that failover is finished */
> +    QemuSemaphore colo_exit_sem;
>  
>      /* The semaphore is used to notify COLO thread to do checkpoint */
>      QemuSemaphore colo_checkpoint_sem;
> diff --git a/migration/colo.c b/migration/colo.c
> index 08b2e46..3222812 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -59,6 +59,18 @@ static void secondary_vm_do_failover(void)
>          /* recover runstate to normal migration finish state */
>          autostart = true;
>      }
> +    /*
> +     * Make sure COLO incoming thread not block in recv or send,
> +     * If mis->from_src_file and mis->to_src_file use the same fd,
> +     * The second shutdown() will return -1, we ignore this value,
> +     * It is harmless.
> +     */
> +    if (mis->from_src_file) {
> +        qemu_file_shutdown(mis->from_src_file);
> +    }
> +    if (mis->to_src_file) {
> +        qemu_file_shutdown(mis->to_src_file);
> +    }
>  
>      old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
>                                     FAILOVER_STATUS_COMPLETED);
> @@ -67,6 +79,8 @@ static void secondary_vm_do_failover(void)
>                       "secondary VM", FailoverStatus_lookup[old_state]);
>          return;
>      }
> +    /* Notify COLO incoming thread that failover work is finished */
> +    qemu_sem_post(&mis->colo_incoming_sem);
>      /* For Secondary VM, jump to incoming co */
>      if (mis->migration_incoming_co) {
>          qemu_coroutine_enter(mis->migration_incoming_co);
> @@ -81,6 +95,18 @@ static void primary_vm_do_failover(void)
>      migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
>                        MIGRATION_STATUS_COMPLETED);
>  
> +    /*
> +     * Wake up COLO thread which may blocked in recv() or send(),
> +     * The s->rp_state.from_dst_file and s->to_dst_file may use the
> +     * same fd, but we still shutdown the fd for twice, it is harmless.
> +     */
> +    if (s->to_dst_file) {
> +        qemu_file_shutdown(s->to_dst_file);
> +    }
> +    if (s->rp_state.from_dst_file) {
> +        qemu_file_shutdown(s->rp_state.from_dst_file);
> +    }
> +
>      old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
>                                     FAILOVER_STATUS_COMPLETED);
>      if (old_state != FAILOVER_STATUS_ACTIVE) {
> @@ -88,6 +114,8 @@ static void primary_vm_do_failover(void)
>                       FailoverStatus_lookup[old_state]);
>          return;
>      }
> +    /* Notify COLO thread that failover work is finished */
> +    qemu_sem_post(&s->colo_exit_sem);
>  }
>  
>  void colo_do_failover(MigrationState *s)
> @@ -361,6 +389,14 @@ out:
>  
>      timer_del(s->colo_delay_timer);
>  
> +    /* Hope this not to be too long to wait here */
> +    qemu_sem_wait(&s->colo_exit_sem);
> +    qemu_sem_destroy(&s->colo_exit_sem);
> +    /*
> +     * Must be called after failover BH is completed,
> +     * Or the failover BH may shutdown the wrong fd that
> +     * re-used by other threads after we release here.
> +     */
>      if (s->rp_state.from_dst_file) {
>          qemu_fclose(s->rp_state.from_dst_file);
>      }
> @@ -385,6 +421,7 @@ void migrate_start_colo_process(MigrationState *s)
>      s->colo_delay_timer =  timer_new_ms(QEMU_CLOCK_HOST,
>                                  colo_checkpoint_notify, s);
>  
> +    qemu_sem_init(&s->colo_exit_sem, 0);
>      migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
>                        MIGRATION_STATUS_COLO);
>      colo_process_checkpoint(s);
> @@ -423,6 +460,8 @@ void *colo_process_incoming_thread(void *opaque)
>      uint64_t value;
>      Error *local_err = NULL;
>  
> +    qemu_sem_init(&mis->colo_incoming_sem, 0);
> +
>      migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>                        MIGRATION_STATUS_COLO);
>  
> @@ -533,6 +572,10 @@ out:
>          qemu_fclose(fb);
>      }
>  
> +    /* Hope this not to be too long to loop here */
> +    qemu_sem_wait(&mis->colo_incoming_sem);
> +    qemu_sem_destroy(&mis->colo_incoming_sem);
> +    /* Must be called after failover BH is completed */
>      if (mis->to_src_file) {
>          qemu_fclose(mis->to_src_file);
>      }
> -- 
> 1.8.3.1
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Zhanghailiang Feb. 8, 2017, 11:14 a.m. UTC | #2
On 2017/1/18 19:01, Dr. David Alan Gilbert wrote:
> * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
>> If the net connection between primary host and secondary host breaks
>> while COLO/COLO incoming threads are doing read() or write().
>> It will block until connection is timeout, and the failover process
>> will be blocked because of it.
>>
>> So it is necessary to shutdown all the socket fds used by COLO
>> to avoid this situation. Besides, we should close the corresponding
>> file descriptors after failvoer BH shutdown them,
>> Or there will be an error.
>
> Hi,
>    There are two parts to this patch:
>     a) Add some semaphores to sequence failover
>     b) Use shutdown()
>
>    At first I wondered if perhaps they should be split; but I see
> the reason for the semaphores is mostly to stop the race between
> the fd's getting closed and the shutdown() calls; so I think it's
> OK.
>

Hi,

Yes, you are right, maybe i should add some comments about that.
Will do in next version.

> Do you have any problems with these semaphores during powering off the
> guest?
>

No, we didn't encounter any problems or trigger any bugs in our test
with this semaphores. In what places do you doubt it may has problems ? :)

Thanks,
Hailiang

> Dave
>
>
>
>
>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
>> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> ---
>>   include/migration/migration.h |  3 +++
>>   migration/colo.c              | 43 +++++++++++++++++++++++++++++++++++++++++++
>>   2 files changed, 46 insertions(+)
>>
>> diff --git a/include/migration/migration.h b/include/migration/migration.h
>> index 487ac1e..7cac877 100644
>> --- a/include/migration/migration.h
>> +++ b/include/migration/migration.h
>> @@ -113,6 +113,7 @@ struct MigrationIncomingState {
>>       QemuThread colo_incoming_thread;
>>       /* The coroutine we should enter (back) after failover */
>>       Coroutine *migration_incoming_co;
>> +    QemuSemaphore colo_incoming_sem;
>>
>>       /* See savevm.c */
>>       LoadStateEntry_Head loadvm_handlers;
>> @@ -182,6 +183,8 @@ struct MigrationState
>>       QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) src_page_requests;
>>       /* The RAMBlock used in the last src_page_request */
>>       RAMBlock *last_req_rb;
>> +    /* The semaphore is used to notify COLO thread that failover is finished */
>> +    QemuSemaphore colo_exit_sem;
>>
>>       /* The semaphore is used to notify COLO thread to do checkpoint */
>>       QemuSemaphore colo_checkpoint_sem;
>> diff --git a/migration/colo.c b/migration/colo.c
>> index 08b2e46..3222812 100644
>> --- a/migration/colo.c
>> +++ b/migration/colo.c
>> @@ -59,6 +59,18 @@ static void secondary_vm_do_failover(void)
>>           /* recover runstate to normal migration finish state */
>>           autostart = true;
>>       }
>> +    /*
>> +     * Make sure COLO incoming thread not block in recv or send,
>> +     * If mis->from_src_file and mis->to_src_file use the same fd,
>> +     * The second shutdown() will return -1, we ignore this value,
>> +     * It is harmless.
>> +     */
>> +    if (mis->from_src_file) {
>> +        qemu_file_shutdown(mis->from_src_file);
>> +    }
>> +    if (mis->to_src_file) {
>> +        qemu_file_shutdown(mis->to_src_file);
>> +    }
>>
>>       old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
>>                                      FAILOVER_STATUS_COMPLETED);
>> @@ -67,6 +79,8 @@ static void secondary_vm_do_failover(void)
>>                        "secondary VM", FailoverStatus_lookup[old_state]);
>>           return;
>>       }
>> +    /* Notify COLO incoming thread that failover work is finished */
>> +    qemu_sem_post(&mis->colo_incoming_sem);
>>       /* For Secondary VM, jump to incoming co */
>>       if (mis->migration_incoming_co) {
>>           qemu_coroutine_enter(mis->migration_incoming_co);
>> @@ -81,6 +95,18 @@ static void primary_vm_do_failover(void)
>>       migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
>>                         MIGRATION_STATUS_COMPLETED);
>>
>> +    /*
>> +     * Wake up COLO thread which may blocked in recv() or send(),
>> +     * The s->rp_state.from_dst_file and s->to_dst_file may use the
>> +     * same fd, but we still shutdown the fd for twice, it is harmless.
>> +     */
>> +    if (s->to_dst_file) {
>> +        qemu_file_shutdown(s->to_dst_file);
>> +    }
>> +    if (s->rp_state.from_dst_file) {
>> +        qemu_file_shutdown(s->rp_state.from_dst_file);
>> +    }
>> +
>>       old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
>>                                      FAILOVER_STATUS_COMPLETED);
>>       if (old_state != FAILOVER_STATUS_ACTIVE) {
>> @@ -88,6 +114,8 @@ static void primary_vm_do_failover(void)
>>                        FailoverStatus_lookup[old_state]);
>>           return;
>>       }
>> +    /* Notify COLO thread that failover work is finished */
>> +    qemu_sem_post(&s->colo_exit_sem);
>>   }
>>
>>   void colo_do_failover(MigrationState *s)
>> @@ -361,6 +389,14 @@ out:
>>
>>       timer_del(s->colo_delay_timer);
>>
>> +    /* Hope this not to be too long to wait here */
>> +    qemu_sem_wait(&s->colo_exit_sem);
>> +    qemu_sem_destroy(&s->colo_exit_sem);
>> +    /*
>> +     * Must be called after failover BH is completed,
>> +     * Or the failover BH may shutdown the wrong fd that
>> +     * re-used by other threads after we release here.
>> +     */
>>       if (s->rp_state.from_dst_file) {
>>           qemu_fclose(s->rp_state.from_dst_file);
>>       }
>> @@ -385,6 +421,7 @@ void migrate_start_colo_process(MigrationState *s)
>>       s->colo_delay_timer =  timer_new_ms(QEMU_CLOCK_HOST,
>>                                   colo_checkpoint_notify, s);
>>
>> +    qemu_sem_init(&s->colo_exit_sem, 0);
>>       migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
>>                         MIGRATION_STATUS_COLO);
>>       colo_process_checkpoint(s);
>> @@ -423,6 +460,8 @@ void *colo_process_incoming_thread(void *opaque)
>>       uint64_t value;
>>       Error *local_err = NULL;
>>
>> +    qemu_sem_init(&mis->colo_incoming_sem, 0);
>> +
>>       migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>>                         MIGRATION_STATUS_COLO);
>>
>> @@ -533,6 +572,10 @@ out:
>>           qemu_fclose(fb);
>>       }
>>
>> +    /* Hope this not to be too long to loop here */
>> +    qemu_sem_wait(&mis->colo_incoming_sem);
>> +    qemu_sem_destroy(&mis->colo_incoming_sem);
>> +    /* Must be called after failover BH is completed */
>>       if (mis->to_src_file) {
>>           qemu_fclose(mis->to_src_file);
>>       }
>> --
>> 1.8.3.1
>>
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
> .
>
Dr. David Alan Gilbert Feb. 8, 2017, 7:53 p.m. UTC | #3
* Hailiang Zhang (zhang.zhanghailiang@huawei.com) wrote:
> On 2017/1/18 19:01, Dr. David Alan Gilbert wrote:
> > * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
> > > If the net connection between primary host and secondary host breaks
> > > while COLO/COLO incoming threads are doing read() or write().
> > > It will block until connection is timeout, and the failover process
> > > will be blocked because of it.
> > > 
> > > So it is necessary to shutdown all the socket fds used by COLO
> > > to avoid this situation. Besides, we should close the corresponding
> > > file descriptors after failvoer BH shutdown them,
> > > Or there will be an error.
> > 
> > Hi,
> >    There are two parts to this patch:
> >     a) Add some semaphores to sequence failover
> >     b) Use shutdown()
> > 
> >    At first I wondered if perhaps they should be split; but I see
> > the reason for the semaphores is mostly to stop the race between
> > the fd's getting closed and the shutdown() calls; so I think it's
> > OK.
> > 
> 
> Hi,
> 
> Yes, you are right, maybe i should add some comments about that.
> Will do in next version.
> 
> > Do you have any problems with these semaphores during powering off the
> > guest?
> > 
> 
> No, we didn't encounter any problems or trigger any bugs in our test
> with this semaphores. In what places do you doubt it may has problems ? :)

I just wondered about other exit cases other than failover; e.g. what
if the guest shutdown or something like that, would it get stuck
waiting for the colo_incoming_sem.

Dave

> Thanks,
> Hailiang
> 
> > Dave
> > 
> > 
> > 
> > 
> > > Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
> > > Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> > > Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > > Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > > ---
> > >   include/migration/migration.h |  3 +++
> > >   migration/colo.c              | 43 +++++++++++++++++++++++++++++++++++++++++++
> > >   2 files changed, 46 insertions(+)
> > > 
> > > diff --git a/include/migration/migration.h b/include/migration/migration.h
> > > index 487ac1e..7cac877 100644
> > > --- a/include/migration/migration.h
> > > +++ b/include/migration/migration.h
> > > @@ -113,6 +113,7 @@ struct MigrationIncomingState {
> > >       QemuThread colo_incoming_thread;
> > >       /* The coroutine we should enter (back) after failover */
> > >       Coroutine *migration_incoming_co;
> > > +    QemuSemaphore colo_incoming_sem;
> > > 
> > >       /* See savevm.c */
> > >       LoadStateEntry_Head loadvm_handlers;
> > > @@ -182,6 +183,8 @@ struct MigrationState
> > >       QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) src_page_requests;
> > >       /* The RAMBlock used in the last src_page_request */
> > >       RAMBlock *last_req_rb;
> > > +    /* The semaphore is used to notify COLO thread that failover is finished */
> > > +    QemuSemaphore colo_exit_sem;
> > > 
> > >       /* The semaphore is used to notify COLO thread to do checkpoint */
> > >       QemuSemaphore colo_checkpoint_sem;
> > > diff --git a/migration/colo.c b/migration/colo.c
> > > index 08b2e46..3222812 100644
> > > --- a/migration/colo.c
> > > +++ b/migration/colo.c
> > > @@ -59,6 +59,18 @@ static void secondary_vm_do_failover(void)
> > >           /* recover runstate to normal migration finish state */
> > >           autostart = true;
> > >       }
> > > +    /*
> > > +     * Make sure COLO incoming thread not block in recv or send,
> > > +     * If mis->from_src_file and mis->to_src_file use the same fd,
> > > +     * The second shutdown() will return -1, we ignore this value,
> > > +     * It is harmless.
> > > +     */
> > > +    if (mis->from_src_file) {
> > > +        qemu_file_shutdown(mis->from_src_file);
> > > +    }
> > > +    if (mis->to_src_file) {
> > > +        qemu_file_shutdown(mis->to_src_file);
> > > +    }
> > > 
> > >       old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
> > >                                      FAILOVER_STATUS_COMPLETED);
> > > @@ -67,6 +79,8 @@ static void secondary_vm_do_failover(void)
> > >                        "secondary VM", FailoverStatus_lookup[old_state]);
> > >           return;
> > >       }
> > > +    /* Notify COLO incoming thread that failover work is finished */
> > > +    qemu_sem_post(&mis->colo_incoming_sem);
> > >       /* For Secondary VM, jump to incoming co */
> > >       if (mis->migration_incoming_co) {
> > >           qemu_coroutine_enter(mis->migration_incoming_co);
> > > @@ -81,6 +95,18 @@ static void primary_vm_do_failover(void)
> > >       migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
> > >                         MIGRATION_STATUS_COMPLETED);
> > > 
> > > +    /*
> > > +     * Wake up COLO thread which may blocked in recv() or send(),
> > > +     * The s->rp_state.from_dst_file and s->to_dst_file may use the
> > > +     * same fd, but we still shutdown the fd for twice, it is harmless.
> > > +     */
> > > +    if (s->to_dst_file) {
> > > +        qemu_file_shutdown(s->to_dst_file);
> > > +    }
> > > +    if (s->rp_state.from_dst_file) {
> > > +        qemu_file_shutdown(s->rp_state.from_dst_file);
> > > +    }
> > > +
> > >       old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
> > >                                      FAILOVER_STATUS_COMPLETED);
> > >       if (old_state != FAILOVER_STATUS_ACTIVE) {
> > > @@ -88,6 +114,8 @@ static void primary_vm_do_failover(void)
> > >                        FailoverStatus_lookup[old_state]);
> > >           return;
> > >       }
> > > +    /* Notify COLO thread that failover work is finished */
> > > +    qemu_sem_post(&s->colo_exit_sem);
> > >   }
> > > 
> > >   void colo_do_failover(MigrationState *s)
> > > @@ -361,6 +389,14 @@ out:
> > > 
> > >       timer_del(s->colo_delay_timer);
> > > 
> > > +    /* Hope this not to be too long to wait here */
> > > +    qemu_sem_wait(&s->colo_exit_sem);
> > > +    qemu_sem_destroy(&s->colo_exit_sem);
> > > +    /*
> > > +     * Must be called after failover BH is completed,
> > > +     * Or the failover BH may shutdown the wrong fd that
> > > +     * re-used by other threads after we release here.
> > > +     */
> > >       if (s->rp_state.from_dst_file) {
> > >           qemu_fclose(s->rp_state.from_dst_file);
> > >       }
> > > @@ -385,6 +421,7 @@ void migrate_start_colo_process(MigrationState *s)
> > >       s->colo_delay_timer =  timer_new_ms(QEMU_CLOCK_HOST,
> > >                                   colo_checkpoint_notify, s);
> > > 
> > > +    qemu_sem_init(&s->colo_exit_sem, 0);
> > >       migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> > >                         MIGRATION_STATUS_COLO);
> > >       colo_process_checkpoint(s);
> > > @@ -423,6 +460,8 @@ void *colo_process_incoming_thread(void *opaque)
> > >       uint64_t value;
> > >       Error *local_err = NULL;
> > > 
> > > +    qemu_sem_init(&mis->colo_incoming_sem, 0);
> > > +
> > >       migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
> > >                         MIGRATION_STATUS_COLO);
> > > 
> > > @@ -533,6 +572,10 @@ out:
> > >           qemu_fclose(fb);
> > >       }
> > > 
> > > +    /* Hope this not to be too long to loop here */
> > > +    qemu_sem_wait(&mis->colo_incoming_sem);
> > > +    qemu_sem_destroy(&mis->colo_incoming_sem);
> > > +    /* Must be called after failover BH is completed */
> > >       if (mis->to_src_file) {
> > >           qemu_fclose(mis->to_src_file);
> > >       }
> > > --
> > > 1.8.3.1
> > > 
> > > 
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > 
> > .
> > 
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Zhanghailiang Feb. 13, 2017, 4:13 a.m. UTC | #4
On 2017/2/9 3:53, Dr. David Alan Gilbert wrote:
> * Hailiang Zhang (zhang.zhanghailiang@huawei.com) wrote:
>> On 2017/1/18 19:01, Dr. David Alan Gilbert wrote:
>>> * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
>>>> If the net connection between primary host and secondary host breaks
>>>> while COLO/COLO incoming threads are doing read() or write().
>>>> It will block until connection is timeout, and the failover process
>>>> will be blocked because of it.
>>>>
>>>> So it is necessary to shutdown all the socket fds used by COLO
>>>> to avoid this situation. Besides, we should close the corresponding
>>>> file descriptors after failvoer BH shutdown them,
>>>> Or there will be an error.
>>>
>>> Hi,
>>>     There are two parts to this patch:
>>>      a) Add some semaphores to sequence failover
>>>      b) Use shutdown()
>>>
>>>     At first I wondered if perhaps they should be split; but I see
>>> the reason for the semaphores is mostly to stop the race between
>>> the fd's getting closed and the shutdown() calls; so I think it's
>>> OK.
>>>
>>
>> Hi,
>>
>> Yes, you are right, maybe i should add some comments about that.
>> Will do in next version.
>>
>>> Do you have any problems with these semaphores during powering off the
>>> guest?
>>>
>>
>> No, we didn't encounter any problems or trigger any bugs in our test
>> with this semaphores. In what places do you doubt it may has problems ? :)
>
> I just wondered about other exit cases other than failover; e.g. what
> if the guest shutdown or something like that, would it get stuck
> waiting for the colo_incoming_sem.
>

Sorry for the late reply, too busy with our project these days ...

Your worry makes sense, we have processed the shutdown case specially,
Let qemu does a checkpoint process (It seems that, we should kick colo
thread which might be waiting for colo_checkpoint_sem.) before exit colo therad.
And for the secondary sides, if it receives shutdown request, it will exit
colo incoming thread after some cleanup works.


Thanks.
Hailiang


> Dave
>
>> Thanks,
>> Hailiang
>>
>>> Dave
>>>
>>>
>>>
>>>
>>>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
>>>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
>>>> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>> ---
>>>>    include/migration/migration.h |  3 +++
>>>>    migration/colo.c              | 43 +++++++++++++++++++++++++++++++++++++++++++
>>>>    2 files changed, 46 insertions(+)
>>>>
>>>> diff --git a/include/migration/migration.h b/include/migration/migration.h
>>>> index 487ac1e..7cac877 100644
>>>> --- a/include/migration/migration.h
>>>> +++ b/include/migration/migration.h
>>>> @@ -113,6 +113,7 @@ struct MigrationIncomingState {
>>>>        QemuThread colo_incoming_thread;
>>>>        /* The coroutine we should enter (back) after failover */
>>>>        Coroutine *migration_incoming_co;
>>>> +    QemuSemaphore colo_incoming_sem;
>>>>
>>>>        /* See savevm.c */
>>>>        LoadStateEntry_Head loadvm_handlers;
>>>> @@ -182,6 +183,8 @@ struct MigrationState
>>>>        QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) src_page_requests;
>>>>        /* The RAMBlock used in the last src_page_request */
>>>>        RAMBlock *last_req_rb;
>>>> +    /* The semaphore is used to notify COLO thread that failover is finished */
>>>> +    QemuSemaphore colo_exit_sem;
>>>>
>>>>        /* The semaphore is used to notify COLO thread to do checkpoint */
>>>>        QemuSemaphore colo_checkpoint_sem;
>>>> diff --git a/migration/colo.c b/migration/colo.c
>>>> index 08b2e46..3222812 100644
>>>> --- a/migration/colo.c
>>>> +++ b/migration/colo.c
>>>> @@ -59,6 +59,18 @@ static void secondary_vm_do_failover(void)
>>>>            /* recover runstate to normal migration finish state */
>>>>            autostart = true;
>>>>        }
>>>> +    /*
>>>> +     * Make sure COLO incoming thread not block in recv or send,
>>>> +     * If mis->from_src_file and mis->to_src_file use the same fd,
>>>> +     * The second shutdown() will return -1, we ignore this value,
>>>> +     * It is harmless.
>>>> +     */
>>>> +    if (mis->from_src_file) {
>>>> +        qemu_file_shutdown(mis->from_src_file);
>>>> +    }
>>>> +    if (mis->to_src_file) {
>>>> +        qemu_file_shutdown(mis->to_src_file);
>>>> +    }
>>>>
>>>>        old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
>>>>                                       FAILOVER_STATUS_COMPLETED);
>>>> @@ -67,6 +79,8 @@ static void secondary_vm_do_failover(void)
>>>>                         "secondary VM", FailoverStatus_lookup[old_state]);
>>>>            return;
>>>>        }
>>>> +    /* Notify COLO incoming thread that failover work is finished */
>>>> +    qemu_sem_post(&mis->colo_incoming_sem);
>>>>        /* For Secondary VM, jump to incoming co */
>>>>        if (mis->migration_incoming_co) {
>>>>            qemu_coroutine_enter(mis->migration_incoming_co);
>>>> @@ -81,6 +95,18 @@ static void primary_vm_do_failover(void)
>>>>        migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
>>>>                          MIGRATION_STATUS_COMPLETED);
>>>>
>>>> +    /*
>>>> +     * Wake up COLO thread which may blocked in recv() or send(),
>>>> +     * The s->rp_state.from_dst_file and s->to_dst_file may use the
>>>> +     * same fd, but we still shutdown the fd for twice, it is harmless.
>>>> +     */
>>>> +    if (s->to_dst_file) {
>>>> +        qemu_file_shutdown(s->to_dst_file);
>>>> +    }
>>>> +    if (s->rp_state.from_dst_file) {
>>>> +        qemu_file_shutdown(s->rp_state.from_dst_file);
>>>> +    }
>>>> +
>>>>        old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
>>>>                                       FAILOVER_STATUS_COMPLETED);
>>>>        if (old_state != FAILOVER_STATUS_ACTIVE) {
>>>> @@ -88,6 +114,8 @@ static void primary_vm_do_failover(void)
>>>>                         FailoverStatus_lookup[old_state]);
>>>>            return;
>>>>        }
>>>> +    /* Notify COLO thread that failover work is finished */
>>>> +    qemu_sem_post(&s->colo_exit_sem);
>>>>    }
>>>>
>>>>    void colo_do_failover(MigrationState *s)
>>>> @@ -361,6 +389,14 @@ out:
>>>>
>>>>        timer_del(s->colo_delay_timer);
>>>>
>>>> +    /* Hope this not to be too long to wait here */
>>>> +    qemu_sem_wait(&s->colo_exit_sem);
>>>> +    qemu_sem_destroy(&s->colo_exit_sem);
>>>> +    /*
>>>> +     * Must be called after failover BH is completed,
>>>> +     * Or the failover BH may shutdown the wrong fd that
>>>> +     * re-used by other threads after we release here.
>>>> +     */
>>>>        if (s->rp_state.from_dst_file) {
>>>>            qemu_fclose(s->rp_state.from_dst_file);
>>>>        }
>>>> @@ -385,6 +421,7 @@ void migrate_start_colo_process(MigrationState *s)
>>>>        s->colo_delay_timer =  timer_new_ms(QEMU_CLOCK_HOST,
>>>>                                    colo_checkpoint_notify, s);
>>>>
>>>> +    qemu_sem_init(&s->colo_exit_sem, 0);
>>>>        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
>>>>                          MIGRATION_STATUS_COLO);
>>>>        colo_process_checkpoint(s);
>>>> @@ -423,6 +460,8 @@ void *colo_process_incoming_thread(void *opaque)
>>>>        uint64_t value;
>>>>        Error *local_err = NULL;
>>>>
>>>> +    qemu_sem_init(&mis->colo_incoming_sem, 0);
>>>> +
>>>>        migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>>>>                          MIGRATION_STATUS_COLO);
>>>>
>>>> @@ -533,6 +572,10 @@ out:
>>>>            qemu_fclose(fb);
>>>>        }
>>>>
>>>> +    /* Hope this not to be too long to loop here */
>>>> +    qemu_sem_wait(&mis->colo_incoming_sem);
>>>> +    qemu_sem_destroy(&mis->colo_incoming_sem);
>>>> +    /* Must be called after failover BH is completed */
>>>>        if (mis->to_src_file) {
>>>>            qemu_fclose(mis->to_src_file);
>>>>        }
>>>> --
>>>> 1.8.3.1
>>>>
>>>>
>>> --
>>> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>>>
>>> .
>>>
>>
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
> .
>
diff mbox

Patch

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 487ac1e..7cac877 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -113,6 +113,7 @@  struct MigrationIncomingState {
     QemuThread colo_incoming_thread;
     /* The coroutine we should enter (back) after failover */
     Coroutine *migration_incoming_co;
+    QemuSemaphore colo_incoming_sem;
 
     /* See savevm.c */
     LoadStateEntry_Head loadvm_handlers;
@@ -182,6 +183,8 @@  struct MigrationState
     QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) src_page_requests;
     /* The RAMBlock used in the last src_page_request */
     RAMBlock *last_req_rb;
+    /* The semaphore is used to notify COLO thread that failover is finished */
+    QemuSemaphore colo_exit_sem;
 
     /* The semaphore is used to notify COLO thread to do checkpoint */
     QemuSemaphore colo_checkpoint_sem;
diff --git a/migration/colo.c b/migration/colo.c
index 08b2e46..3222812 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -59,6 +59,18 @@  static void secondary_vm_do_failover(void)
         /* recover runstate to normal migration finish state */
         autostart = true;
     }
+    /*
+     * Make sure COLO incoming thread not block in recv or send,
+     * If mis->from_src_file and mis->to_src_file use the same fd,
+     * The second shutdown() will return -1, we ignore this value,
+     * It is harmless.
+     */
+    if (mis->from_src_file) {
+        qemu_file_shutdown(mis->from_src_file);
+    }
+    if (mis->to_src_file) {
+        qemu_file_shutdown(mis->to_src_file);
+    }
 
     old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
                                    FAILOVER_STATUS_COMPLETED);
@@ -67,6 +79,8 @@  static void secondary_vm_do_failover(void)
                      "secondary VM", FailoverStatus_lookup[old_state]);
         return;
     }
+    /* Notify COLO incoming thread that failover work is finished */
+    qemu_sem_post(&mis->colo_incoming_sem);
     /* For Secondary VM, jump to incoming co */
     if (mis->migration_incoming_co) {
         qemu_coroutine_enter(mis->migration_incoming_co);
@@ -81,6 +95,18 @@  static void primary_vm_do_failover(void)
     migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
                       MIGRATION_STATUS_COMPLETED);
 
+    /*
+     * Wake up COLO thread which may blocked in recv() or send(),
+     * The s->rp_state.from_dst_file and s->to_dst_file may use the
+     * same fd, but we still shutdown the fd for twice, it is harmless.
+     */
+    if (s->to_dst_file) {
+        qemu_file_shutdown(s->to_dst_file);
+    }
+    if (s->rp_state.from_dst_file) {
+        qemu_file_shutdown(s->rp_state.from_dst_file);
+    }
+
     old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
                                    FAILOVER_STATUS_COMPLETED);
     if (old_state != FAILOVER_STATUS_ACTIVE) {
@@ -88,6 +114,8 @@  static void primary_vm_do_failover(void)
                      FailoverStatus_lookup[old_state]);
         return;
     }
+    /* Notify COLO thread that failover work is finished */
+    qemu_sem_post(&s->colo_exit_sem);
 }
 
 void colo_do_failover(MigrationState *s)
@@ -361,6 +389,14 @@  out:
 
     timer_del(s->colo_delay_timer);
 
+    /* Hope this not to be too long to wait here */
+    qemu_sem_wait(&s->colo_exit_sem);
+    qemu_sem_destroy(&s->colo_exit_sem);
+    /*
+     * Must be called after failover BH is completed,
+     * Or the failover BH may shutdown the wrong fd that
+     * re-used by other threads after we release here.
+     */
     if (s->rp_state.from_dst_file) {
         qemu_fclose(s->rp_state.from_dst_file);
     }
@@ -385,6 +421,7 @@  void migrate_start_colo_process(MigrationState *s)
     s->colo_delay_timer =  timer_new_ms(QEMU_CLOCK_HOST,
                                 colo_checkpoint_notify, s);
 
+    qemu_sem_init(&s->colo_exit_sem, 0);
     migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
                       MIGRATION_STATUS_COLO);
     colo_process_checkpoint(s);
@@ -423,6 +460,8 @@  void *colo_process_incoming_thread(void *opaque)
     uint64_t value;
     Error *local_err = NULL;
 
+    qemu_sem_init(&mis->colo_incoming_sem, 0);
+
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
                       MIGRATION_STATUS_COLO);
 
@@ -533,6 +572,10 @@  out:
         qemu_fclose(fb);
     }
 
+    /* Hope this not to be too long to loop here */
+    qemu_sem_wait(&mis->colo_incoming_sem);
+    qemu_sem_destroy(&mis->colo_incoming_sem);
+    /* Must be called after failover BH is completed */
     if (mis->to_src_file) {
         qemu_fclose(mis->to_src_file);
     }