Message ID | 1456109555-28299-23-git-send-email-wency@cn.fujitsu.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Feb 22, 2016 at 10:52:26AM +0800, Wen Congyang wrote: [...] > + if (libxl_defbool_val(info->colo)) { > + if (libxl_defbool_val(info->compression)) { This can be simplified as if (libxl_defbool_val(xxx) && libxl_defbool_val(yyy)) > + LOG(ERROR, "cannot use memory checkpoint compression in COLO mode"); > + rc = ERROR_FAIL; > + goto out; > + } > + } > + > if (!libxl_defbool_val(info->allow_unsafe) && > (libxl_defbool_val(info->blackhole) || > !libxl_defbool_val(info->netbuf) || > @@ -876,7 +892,10 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, > dss->live = 1; > dss->debug = 0; > dss->remus = info; > - dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_REMUS; > + if (libxl_defbool_val(info->colo)) > + dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_COLO; > + else > + dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_REMUS; > > assert(info); > > diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c > index df7268b..0dc7220 100644 > --- a/tools/libxl/xl_cmdimpl.c > +++ b/tools/libxl/xl_cmdimpl.c > @@ -4440,6 +4440,8 @@ static void migrate_receive(int debug, int daemonize, int monitor, > char rc_buf; > char *migration_domname; > struct domain_create dom_info; > + const char *ha = checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO ? > + "COLO" : "Remus"; > > signal(SIGPIPE, SIG_IGN); > /* if we get SIGPIPE we'd rather just have it as an error */ > @@ -4460,6 +4462,9 @@ static void migrate_receive(int debug, int daemonize, int monitor, > dom_info.send_back_fd = send_fd; > dom_info.migration_domname_r = &migration_domname; > dom_info.checkpointed_stream = checkpointed; > + if (checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO) > + /* COLO uses stdout to send control message to master */ > + dom_info.quiet = 1; > It seems that dom_info->quiet affects stderr, not stdout. See the only place that checks this in xl_cmdimpl.c. > rc = create_domain(&dom_info); > if (rc < 0) { > @@ -4472,11 +4477,12 @@ static void migrate_receive(int debug, int daemonize, int monitor, > > switch (checkpointed) { > case LIBXL_CHECKPOINTED_STREAM_REMUS: > + case LIBXL_CHECKPOINTED_STREAM_COLO: > /* If we are here, it means that the sender (primary) has crashed. > * TODO: Split-Brain Check. > */ > - fprintf(stderr, "migration target: Remus Failover for domain %u\n", > - domid); > + fprintf(stderr, "migration target: %s Failover for domain %u\n", > + ha, domid); > > /* > * If domain renaming fails, lets just continue (as we need the domain > @@ -4492,16 +4498,20 @@ static void migrate_receive(int debug, int daemonize, int monitor, > rc = libxl_domain_rename(ctx, domid, migration_domname, > common_domname); > if (rc) > - fprintf(stderr, "migration target (Remus): " > + fprintf(stderr, "migration target (%s): " > "Failed to rename domain from %s to %s:%d\n", > - migration_domname, common_domname, rc); > + ha, migration_domname, common_domname, rc); > } > > + if (checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO) > + /* The guest is running after failover in COLO mode */ > + exit(rc ? -ERROR_FAIL: 0); > + > rc = libxl_domain_unpause(ctx, domid); > if (rc) > - fprintf(stderr, "migration target (Remus): " > + fprintf(stderr, "migration target (%s): " > "Failed to unpause domain %s (id: %u):%d\n", > - common_domname, domid, rc); > + ha, common_domname, domid, rc); > > exit(rc ? -ERROR_FAIL: 0); > default: > @@ -4649,7 +4659,7 @@ int main_migrate_receive(int argc, char **argv) > libxl_checkpointed_stream checkpointed = LIBXL_CHECKPOINTED_STREAM_NONE; > int opt; > > - SWITCH_FOREACH_OPT(opt, "Fedr", NULL, "migrate-receive", 0) { > + SWITCH_FOREACH_OPT(opt, "Fedrc", NULL, "migrate-receive", 0) { > case 'F': > daemonize = 0; > break; > @@ -4663,6 +4673,9 @@ int main_migrate_receive(int argc, char **argv) > case 'r': > checkpointed = LIBXL_CHECKPOINTED_STREAM_REMUS; > break; > + case 'c': > + checkpointed = LIBXL_CHECKPOINTED_STREAM_COLO; > + break; > } > > if (argc-optind != 0) { > @@ -8032,11 +8045,8 @@ int main_remus(int argc, char **argv) > int config_len; > > memset(&r_info, 0, sizeof(libxl_domain_remus_info)); > - /* Defaults */ > - r_info.interval = 200; > - libxl_defbool_setdefault(&r_info.blackhole, false); > > - SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:e", NULL, "remus", 2) { > + SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ec", NULL, "remus", 2) { > case 'i': > r_info.interval = atoi(optarg); > break; > @@ -8064,11 +8074,32 @@ int main_remus(int argc, char **argv) > case 'e': > daemonize = 0; > break; > + case 'c': > + libxl_defbool_set(&r_info.colo, true); > } > > domid = find_domain(argv[optind]); > host = argv[optind + 1]; > > + /* Defaults */ > + libxl_defbool_setdefault(&r_info.blackhole, false); > + libxl_defbool_setdefault(&r_info.colo, false); > + if (!libxl_defbool_val(r_info.colo) && !r_info.interval) > + r_info.interval = 200; > + > + if (libxl_defbool_val(r_info.colo)) { > + if (r_info.interval || libxl_defbool_val(r_info.blackhole)) { > + perror("Option -c conflicts with -i or -b"); > + exit(-1); > + } > + > + if (libxl_defbool_is_default(r_info.compression)) { > + perror("COLO can't be used with memory compression. " > + "Disable memory checkpoint compression now..."); > + libxl_defbool_set(&r_info.compression, false); > + } > + } > + I don't think I'm entirely happy with how these things are arranged. Remus and COLO don't seem to have a set of consistent APIs that arbitrary users can call. But for the sake of not growing this series any longer let's leave it like this for the moment. I think COLO at best is going to be (as you stated in manpage) experimental at this stage. > if (!r_info.netbufscript) > r_info.netbufscript = default_remus_netbufscript; > > @@ -8083,8 +8114,9 @@ int main_remus(int argc, char **argv) > if (!ssh_command[0]) { > rune = host; > } else { > - xasprintf(&rune, "exec %s %s xl migrate-receive -r %s", > + xasprintf(&rune, "exec %s %s xl migrate-receive %s %s", > ssh_command, host, > + libxl_defbool_val(r_info.colo) ? "-c" : "-r", > daemonize ? "" : " -e"); > } > > @@ -8112,7 +8144,8 @@ int main_remus(int argc, char **argv) > * domain to force failover > */ > if (libxl_domain_info(ctx, 0, domid)) { > - fprintf(stderr, "Remus: Primary domain has been destroyed.\n"); > + fprintf(stderr, "%s: Primary domain has been destroyed.\n", > + libxl_defbool_val(r_info.colo) ? "COLO" : "Remus"); > close(send_fd); > return 0; > } > @@ -8124,7 +8157,8 @@ int main_remus(int argc, char **argv) > if (rc == ERROR_GUEST_TIMEDOUT) > fprintf(stderr, "Failed to suspend domain at primary.\n"); > else { > - fprintf(stderr, "Remus: Backup failed? resuming domain at primary.\n"); > + fprintf(stderr, "%s: Backup failed? resuming domain at primary.\n", > + libxl_defbool_val(r_info.colo) ? "COLO" : "Remus"); > libxl_domain_resume(ctx, domid, 1, 0); > } > > diff --git a/tools/libxl/xl_cmdtable.c b/tools/libxl/xl_cmdtable.c > index fdc1ac6..b6b630c 100644 > --- a/tools/libxl/xl_cmdtable.c > +++ b/tools/libxl/xl_cmdtable.c > @@ -499,7 +499,9 @@ struct cmd_spec cmd_table[] = { > "-b Replicate memory checkpoints to /dev/null (blackhole).\n" > " Works only in unsafe mode.\n" > "-n Disable network output buffering. Works only in unsafe mode.\n" > - "-d Disable disk replication. Works only in unsafe mode." > + "-d Disable disk replication. Works only in unsafe mode.\n" > + "-c Enable COLO HA. It is conflict with -i and -b, and memory\n" > + " checkpoint must be disabled" > }, > #endif > { "devd", > -- > 2.5.0 > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
On 03/02/2016 11:03 PM, Wei Liu wrote: > On Mon, Feb 22, 2016 at 10:52:26AM +0800, Wen Congyang wrote: > [...] >> + if (libxl_defbool_val(info->colo)) { >> + if (libxl_defbool_val(info->compression)) { > > This can be simplified as > > if (libxl_defbool_val(xxx) && libxl_defbool_val(yyy)) OK. will fix it in the next version. > >> + LOG(ERROR, "cannot use memory checkpoint compression in COLO mode"); >> + rc = ERROR_FAIL; >> + goto out; >> + } >> + } >> + >> if (!libxl_defbool_val(info->allow_unsafe) && >> (libxl_defbool_val(info->blackhole) || >> !libxl_defbool_val(info->netbuf) || >> @@ -876,7 +892,10 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, >> dss->live = 1; >> dss->debug = 0; >> dss->remus = info; >> - dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_REMUS; >> + if (libxl_defbool_val(info->colo)) >> + dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_COLO; >> + else >> + dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_REMUS; >> >> assert(info); >> >> diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c >> index df7268b..0dc7220 100644 >> --- a/tools/libxl/xl_cmdimpl.c >> +++ b/tools/libxl/xl_cmdimpl.c >> @@ -4440,6 +4440,8 @@ static void migrate_receive(int debug, int daemonize, int monitor, >> char rc_buf; >> char *migration_domname; >> struct domain_create dom_info; >> + const char *ha = checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO ? >> + "COLO" : "Remus"; >> >> signal(SIGPIPE, SIG_IGN); >> /* if we get SIGPIPE we'd rather just have it as an error */ >> @@ -4460,6 +4462,9 @@ static void migrate_receive(int debug, int daemonize, int monitor, >> dom_info.send_back_fd = send_fd; >> dom_info.migration_domname_r = &migration_domname; >> dom_info.checkpointed_stream = checkpointed; >> + if (checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO) >> + /* COLO uses stdout to send control message to master */ >> + dom_info.quiet = 1; >> > > It seems that dom_info->quiet affects stderr, not stdout. See the only > place that checks this in xl_cmdimpl.c. > >> rc = create_domain(&dom_info); >> if (rc < 0) { >> @@ -4472,11 +4477,12 @@ static void migrate_receive(int debug, int daemonize, int monitor, >> >> switch (checkpointed) { >> case LIBXL_CHECKPOINTED_STREAM_REMUS: >> + case LIBXL_CHECKPOINTED_STREAM_COLO: >> /* If we are here, it means that the sender (primary) has crashed. >> * TODO: Split-Brain Check. >> */ >> - fprintf(stderr, "migration target: Remus Failover for domain %u\n", >> - domid); >> + fprintf(stderr, "migration target: %s Failover for domain %u\n", >> + ha, domid); >> >> /* >> * If domain renaming fails, lets just continue (as we need the domain >> @@ -4492,16 +4498,20 @@ static void migrate_receive(int debug, int daemonize, int monitor, >> rc = libxl_domain_rename(ctx, domid, migration_domname, >> common_domname); >> if (rc) >> - fprintf(stderr, "migration target (Remus): " >> + fprintf(stderr, "migration target (%s): " >> "Failed to rename domain from %s to %s:%d\n", >> - migration_domname, common_domname, rc); >> + ha, migration_domname, common_domname, rc); >> } >> >> + if (checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO) >> + /* The guest is running after failover in COLO mode */ >> + exit(rc ? -ERROR_FAIL: 0); >> + >> rc = libxl_domain_unpause(ctx, domid); >> if (rc) >> - fprintf(stderr, "migration target (Remus): " >> + fprintf(stderr, "migration target (%s): " >> "Failed to unpause domain %s (id: %u):%d\n", >> - common_domname, domid, rc); >> + ha, common_domname, domid, rc); >> >> exit(rc ? -ERROR_FAIL: 0); >> default: >> @@ -4649,7 +4659,7 @@ int main_migrate_receive(int argc, char **argv) >> libxl_checkpointed_stream checkpointed = LIBXL_CHECKPOINTED_STREAM_NONE; >> int opt; >> >> - SWITCH_FOREACH_OPT(opt, "Fedr", NULL, "migrate-receive", 0) { >> + SWITCH_FOREACH_OPT(opt, "Fedrc", NULL, "migrate-receive", 0) { >> case 'F': >> daemonize = 0; >> break; >> @@ -4663,6 +4673,9 @@ int main_migrate_receive(int argc, char **argv) >> case 'r': >> checkpointed = LIBXL_CHECKPOINTED_STREAM_REMUS; >> break; >> + case 'c': >> + checkpointed = LIBXL_CHECKPOINTED_STREAM_COLO; >> + break; >> } >> >> if (argc-optind != 0) { >> @@ -8032,11 +8045,8 @@ int main_remus(int argc, char **argv) >> int config_len; >> >> memset(&r_info, 0, sizeof(libxl_domain_remus_info)); >> - /* Defaults */ >> - r_info.interval = 200; >> - libxl_defbool_setdefault(&r_info.blackhole, false); >> >> - SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:e", NULL, "remus", 2) { >> + SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ec", NULL, "remus", 2) { >> case 'i': >> r_info.interval = atoi(optarg); >> break; >> @@ -8064,11 +8074,32 @@ int main_remus(int argc, char **argv) >> case 'e': >> daemonize = 0; >> break; >> + case 'c': >> + libxl_defbool_set(&r_info.colo, true); >> } >> >> domid = find_domain(argv[optind]); >> host = argv[optind + 1]; >> >> + /* Defaults */ >> + libxl_defbool_setdefault(&r_info.blackhole, false); >> + libxl_defbool_setdefault(&r_info.colo, false); >> + if (!libxl_defbool_val(r_info.colo) && !r_info.interval) >> + r_info.interval = 200; >> + >> + if (libxl_defbool_val(r_info.colo)) { >> + if (r_info.interval || libxl_defbool_val(r_info.blackhole)) { >> + perror("Option -c conflicts with -i or -b"); >> + exit(-1); >> + } >> + >> + if (libxl_defbool_is_default(r_info.compression)) { >> + perror("COLO can't be used with memory compression. " >> + "Disable memory checkpoint compression now..."); >> + libxl_defbool_set(&r_info.compression, false); >> + } >> + } >> + > > I don't think I'm entirely happy with how these things are arranged. > Remus and COLO don't seem to have a set of consistent APIs that > arbitrary users can call. > > But for the sake of not growing this series any longer let's leave it > like this for the moment. I think COLO at best is going to be (as you > stated in manpage) experimental at this stage. Yes, it is experimental now. Thanks Wen Congyang > > >> if (!r_info.netbufscript) >> r_info.netbufscript = default_remus_netbufscript; >> >> @@ -8083,8 +8114,9 @@ int main_remus(int argc, char **argv) >> if (!ssh_command[0]) { >> rune = host; >> } else { >> - xasprintf(&rune, "exec %s %s xl migrate-receive -r %s", >> + xasprintf(&rune, "exec %s %s xl migrate-receive %s %s", >> ssh_command, host, >> + libxl_defbool_val(r_info.colo) ? "-c" : "-r", >> daemonize ? "" : " -e"); >> } >> >> @@ -8112,7 +8144,8 @@ int main_remus(int argc, char **argv) >> * domain to force failover >> */ >> if (libxl_domain_info(ctx, 0, domid)) { >> - fprintf(stderr, "Remus: Primary domain has been destroyed.\n"); >> + fprintf(stderr, "%s: Primary domain has been destroyed.\n", >> + libxl_defbool_val(r_info.colo) ? "COLO" : "Remus"); >> close(send_fd); >> return 0; >> } >> @@ -8124,7 +8157,8 @@ int main_remus(int argc, char **argv) >> if (rc == ERROR_GUEST_TIMEDOUT) >> fprintf(stderr, "Failed to suspend domain at primary.\n"); >> else { >> - fprintf(stderr, "Remus: Backup failed? resuming domain at primary.\n"); >> + fprintf(stderr, "%s: Backup failed? resuming domain at primary.\n", >> + libxl_defbool_val(r_info.colo) ? "COLO" : "Remus"); >> libxl_domain_resume(ctx, domid, 1, 0); >> } >> >> diff --git a/tools/libxl/xl_cmdtable.c b/tools/libxl/xl_cmdtable.c >> index fdc1ac6..b6b630c 100644 >> --- a/tools/libxl/xl_cmdtable.c >> +++ b/tools/libxl/xl_cmdtable.c >> @@ -499,7 +499,9 @@ struct cmd_spec cmd_table[] = { >> "-b Replicate memory checkpoints to /dev/null (blackhole).\n" >> " Works only in unsafe mode.\n" >> "-n Disable network output buffering. Works only in unsafe mode.\n" >> - "-d Disable disk replication. Works only in unsafe mode." >> + "-d Disable disk replication. Works only in unsafe mode.\n" >> + "-c Enable COLO HA. It is conflict with -i and -b, and memory\n" >> + " checkpoint must be disabled" >> }, >> #endif >> { "devd", >> -- >> 2.5.0 >> >> >> >> >> _______________________________________________ >> Xen-devel mailing list >> Xen-devel@lists.xen.org >> http://lists.xen.org/xen-devel > > > . >
diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1 index 4279c7c..1c6dd87 100644 --- a/docs/man/xl.pod.1 +++ b/docs/man/xl.pod.1 @@ -447,12 +447,15 @@ Print huge (!) amount of debug during the migration process. =item B<remus> [I<OPTIONS>] I<domain-id> I<host> -Enable Remus HA for domain. By default B<xl> relies on ssh as a transport -mechanism between the two hosts. +Enable Remus HA or COLO HA for domain. By default B<xl> relies on ssh as a +transport mechanism between the two hosts. N.B: Remus support in xl is still in experimental (proof-of-concept) phase. Disk replication support is limited to DRBD disks. + COLO support in xl is still in experimental (proof-of-concept) phase. + There is no support for network or disk at the moment. + B<OPTIONS> =over 4 @@ -498,6 +501,11 @@ Disable network output buffering. Requires enabling unsafe mode. Disable disk replication. Requires enabling unsafe mode. +=item B<-c> + +Enable COLO HA. This conflicts with B<-i> and B<-b>, and memory +checkpoint compression must be disabled. + =back =item B<pause> I<domain-id> diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c index 7236175..12df81a 100644 --- a/tools/libxl/libxl.c +++ b/tools/libxl/libxl.c @@ -849,12 +849,28 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, goto out; } + /* The caller must set this defbool */ + if (libxl_defbool_is_default(info->colo)) { + LOG(ERROR, "colo mode must be enabled/disabled"); + rc = ERROR_FAIL; + goto out; + } + libxl_defbool_setdefault(&info->allow_unsafe, false); libxl_defbool_setdefault(&info->blackhole, false); - libxl_defbool_setdefault(&info->compression, true); + libxl_defbool_setdefault(&info->compression, + !libxl_defbool_val(info->colo)); libxl_defbool_setdefault(&info->netbuf, true); libxl_defbool_setdefault(&info->diskbuf, true); + if (libxl_defbool_val(info->colo)) { + if (libxl_defbool_val(info->compression)) { + LOG(ERROR, "cannot use memory checkpoint compression in COLO mode"); + rc = ERROR_FAIL; + goto out; + } + } + if (!libxl_defbool_val(info->allow_unsafe) && (libxl_defbool_val(info->blackhole) || !libxl_defbool_val(info->netbuf) || @@ -876,7 +892,10 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, dss->live = 1; dss->debug = 0; dss->remus = info; - dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_REMUS; + if (libxl_defbool_val(info->colo)) + dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_COLO; + else + dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_REMUS; assert(info); diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c index df7268b..0dc7220 100644 --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -4440,6 +4440,8 @@ static void migrate_receive(int debug, int daemonize, int monitor, char rc_buf; char *migration_domname; struct domain_create dom_info; + const char *ha = checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO ? + "COLO" : "Remus"; signal(SIGPIPE, SIG_IGN); /* if we get SIGPIPE we'd rather just have it as an error */ @@ -4460,6 +4462,9 @@ static void migrate_receive(int debug, int daemonize, int monitor, dom_info.send_back_fd = send_fd; dom_info.migration_domname_r = &migration_domname; dom_info.checkpointed_stream = checkpointed; + if (checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO) + /* COLO uses stdout to send control message to master */ + dom_info.quiet = 1; rc = create_domain(&dom_info); if (rc < 0) { @@ -4472,11 +4477,12 @@ static void migrate_receive(int debug, int daemonize, int monitor, switch (checkpointed) { case LIBXL_CHECKPOINTED_STREAM_REMUS: + case LIBXL_CHECKPOINTED_STREAM_COLO: /* If we are here, it means that the sender (primary) has crashed. * TODO: Split-Brain Check. */ - fprintf(stderr, "migration target: Remus Failover for domain %u\n", - domid); + fprintf(stderr, "migration target: %s Failover for domain %u\n", + ha, domid); /* * If domain renaming fails, lets just continue (as we need the domain @@ -4492,16 +4498,20 @@ static void migrate_receive(int debug, int daemonize, int monitor, rc = libxl_domain_rename(ctx, domid, migration_domname, common_domname); if (rc) - fprintf(stderr, "migration target (Remus): " + fprintf(stderr, "migration target (%s): " "Failed to rename domain from %s to %s:%d\n", - migration_domname, common_domname, rc); + ha, migration_domname, common_domname, rc); } + if (checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO) + /* The guest is running after failover in COLO mode */ + exit(rc ? -ERROR_FAIL: 0); + rc = libxl_domain_unpause(ctx, domid); if (rc) - fprintf(stderr, "migration target (Remus): " + fprintf(stderr, "migration target (%s): " "Failed to unpause domain %s (id: %u):%d\n", - common_domname, domid, rc); + ha, common_domname, domid, rc); exit(rc ? -ERROR_FAIL: 0); default: @@ -4649,7 +4659,7 @@ int main_migrate_receive(int argc, char **argv) libxl_checkpointed_stream checkpointed = LIBXL_CHECKPOINTED_STREAM_NONE; int opt; - SWITCH_FOREACH_OPT(opt, "Fedr", NULL, "migrate-receive", 0) { + SWITCH_FOREACH_OPT(opt, "Fedrc", NULL, "migrate-receive", 0) { case 'F': daemonize = 0; break; @@ -4663,6 +4673,9 @@ int main_migrate_receive(int argc, char **argv) case 'r': checkpointed = LIBXL_CHECKPOINTED_STREAM_REMUS; break; + case 'c': + checkpointed = LIBXL_CHECKPOINTED_STREAM_COLO; + break; } if (argc-optind != 0) { @@ -8032,11 +8045,8 @@ int main_remus(int argc, char **argv) int config_len; memset(&r_info, 0, sizeof(libxl_domain_remus_info)); - /* Defaults */ - r_info.interval = 200; - libxl_defbool_setdefault(&r_info.blackhole, false); - SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:e", NULL, "remus", 2) { + SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ec", NULL, "remus", 2) { case 'i': r_info.interval = atoi(optarg); break; @@ -8064,11 +8074,32 @@ int main_remus(int argc, char **argv) case 'e': daemonize = 0; break; + case 'c': + libxl_defbool_set(&r_info.colo, true); } domid = find_domain(argv[optind]); host = argv[optind + 1]; + /* Defaults */ + libxl_defbool_setdefault(&r_info.blackhole, false); + libxl_defbool_setdefault(&r_info.colo, false); + if (!libxl_defbool_val(r_info.colo) && !r_info.interval) + r_info.interval = 200; + + if (libxl_defbool_val(r_info.colo)) { + if (r_info.interval || libxl_defbool_val(r_info.blackhole)) { + perror("Option -c conflicts with -i or -b"); + exit(-1); + } + + if (libxl_defbool_is_default(r_info.compression)) { + perror("COLO can't be used with memory compression. " + "Disable memory checkpoint compression now..."); + libxl_defbool_set(&r_info.compression, false); + } + } + if (!r_info.netbufscript) r_info.netbufscript = default_remus_netbufscript; @@ -8083,8 +8114,9 @@ int main_remus(int argc, char **argv) if (!ssh_command[0]) { rune = host; } else { - xasprintf(&rune, "exec %s %s xl migrate-receive -r %s", + xasprintf(&rune, "exec %s %s xl migrate-receive %s %s", ssh_command, host, + libxl_defbool_val(r_info.colo) ? "-c" : "-r", daemonize ? "" : " -e"); } @@ -8112,7 +8144,8 @@ int main_remus(int argc, char **argv) * domain to force failover */ if (libxl_domain_info(ctx, 0, domid)) { - fprintf(stderr, "Remus: Primary domain has been destroyed.\n"); + fprintf(stderr, "%s: Primary domain has been destroyed.\n", + libxl_defbool_val(r_info.colo) ? "COLO" : "Remus"); close(send_fd); return 0; } @@ -8124,7 +8157,8 @@ int main_remus(int argc, char **argv) if (rc == ERROR_GUEST_TIMEDOUT) fprintf(stderr, "Failed to suspend domain at primary.\n"); else { - fprintf(stderr, "Remus: Backup failed? resuming domain at primary.\n"); + fprintf(stderr, "%s: Backup failed? resuming domain at primary.\n", + libxl_defbool_val(r_info.colo) ? "COLO" : "Remus"); libxl_domain_resume(ctx, domid, 1, 0); } diff --git a/tools/libxl/xl_cmdtable.c b/tools/libxl/xl_cmdtable.c index fdc1ac6..b6b630c 100644 --- a/tools/libxl/xl_cmdtable.c +++ b/tools/libxl/xl_cmdtable.c @@ -499,7 +499,9 @@ struct cmd_spec cmd_table[] = { "-b Replicate memory checkpoints to /dev/null (blackhole).\n" " Works only in unsafe mode.\n" "-n Disable network output buffering. Works only in unsafe mode.\n" - "-d Disable disk replication. Works only in unsafe mode." + "-d Disable disk replication. Works only in unsafe mode.\n" + "-c Enable COLO HA. It is conflict with -i and -b, and memory\n" + " checkpoint must be disabled" }, #endif { "devd",