Message ID | 1612339311-114805-11-git-send-email-zhengchuan@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Support Multifd for RDMA migration | expand |
* Chuan Zheng (zhengchuan@huawei.com) wrote: > We still don't transmit anything through them, and we only build > the RDMA connections. > > Signed-off-by: Zhimin Feng <fengzhimin1@huawei.com> > Signed-off-by: Chuan Zheng <zhengchuan@huawei.com> > --- > migration/rdma.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 67 insertions(+), 2 deletions(-) > > diff --git a/migration/rdma.c b/migration/rdma.c > index 996afb0..ed8a015 100644 > --- a/migration/rdma.c > +++ b/migration/rdma.c > @@ -3267,6 +3267,40 @@ static void rdma_cm_poll_handler(void *opaque) > } > } > > +static bool qemu_rdma_accept_setup(RDMAContext *rdma) > +{ > + RDMAContext *multifd_rdma = NULL; > + int thread_count; > + int i; > + MultiFDRecvParams *multifd_recv_param; > + thread_count = migrate_multifd_channels(); > + /* create the multifd channels for RDMA */ > + for (i = 0; i < thread_count; i++) { > + if (get_multifd_recv_param(i, &multifd_recv_param) < 0) { > + error_report("rdma: error getting multifd_recv_param(%d)", i); > + return false; > + } > + > + multifd_rdma = (RDMAContext *) multifd_recv_param->rdma; > + if (multifd_rdma->cm_id == NULL) { > + break; > + } else { > + multifd_rdma = NULL; > + } I'm confused by what this if is doing - what are the two cases? > + } > + > + if (multifd_rdma) { > + qemu_set_fd_handler(rdma->channel->fd, > + rdma_accept_incoming_migration, > + NULL, (void *)(intptr_t)multifd_rdma); > + } else { > + qemu_set_fd_handler(rdma->channel->fd, rdma_cm_poll_handler, > + NULL, rdma); > + } > + > + return true; > +} > + > static int qemu_rdma_accept(RDMAContext *rdma) > { > RDMACapabilities cap; > @@ -3366,6 +3400,10 @@ static int qemu_rdma_accept(RDMAContext *rdma) > qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration, > NULL, > (void *)(intptr_t)rdma->return_path); > + } else if (migrate_use_multifd()) { > + if (!qemu_rdma_accept_setup(rdma)) { > + goto err_rdma_dest_wait; > + } > } else { > qemu_set_fd_handler(rdma->channel->fd, rdma_cm_poll_handler, > NULL, rdma); > @@ -3976,6 +4014,34 @@ static QEMUFile *qemu_fopen_rdma(RDMAContext *rdma, const char *mode) > return rioc->file; > } > > +static void migration_rdma_process_incoming(QEMUFile *f, > + RDMAContext *rdma, Error **errp) > +{ > + MigrationIncomingState *mis = migration_incoming_get_current(); > + QIOChannel *ioc = NULL; > + bool start_migration = false; > + > + if (!migrate_use_multifd()) { > + rdma->migration_started_on_destination = 1; > + migration_fd_process_incoming(f, errp); > + return; > + } > + > + if (!mis->from_src_file) { > + mis->from_src_file = f; > + qemu_file_set_blocking(f, false); > + } else { > + ioc = QIO_CHANNEL(getQIOChannel(f)); > + /* Multiple connections */ > + assert(migrate_use_multifd()); Are you sure that's never triggerable by something trying to connect badly? If it was it would be better to error than abort. > + start_migration = multifd_recv_new_channel(ioc, errp); And what does 'start_migration' mean here - is that meaning that we have a full set of connections? Dave > + } > + > + if (start_migration) { > + migration_incoming_process(); > + } > +} > + > static void rdma_accept_incoming_migration(void *opaque) > { > RDMAContext *rdma = opaque; > @@ -4004,8 +4070,7 @@ static void rdma_accept_incoming_migration(void *opaque) > return; > } > > - rdma->migration_started_on_destination = 1; > - migration_fd_process_incoming(f, &local_err); > + migration_rdma_process_incoming(f, rdma, &local_err); > if (local_err) { > error_reportf_err(local_err, "RDMA ERROR:"); > } > -- > 1.8.3.1 >
On 2021/2/4 2:59, Dr. David Alan Gilbert wrote: > * Chuan Zheng (zhengchuan@huawei.com) wrote: >> We still don't transmit anything through them, and we only build >> the RDMA connections. >> >> Signed-off-by: Zhimin Feng <fengzhimin1@huawei.com> >> Signed-off-by: Chuan Zheng <zhengchuan@huawei.com> >> --- >> migration/rdma.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- >> 1 file changed, 67 insertions(+), 2 deletions(-) >> >> diff --git a/migration/rdma.c b/migration/rdma.c >> index 996afb0..ed8a015 100644 >> --- a/migration/rdma.c >> +++ b/migration/rdma.c >> @@ -3267,6 +3267,40 @@ static void rdma_cm_poll_handler(void *opaque) >> } >> } >> >> +static bool qemu_rdma_accept_setup(RDMAContext *rdma) >> +{ >> + RDMAContext *multifd_rdma = NULL; >> + int thread_count; >> + int i; >> + MultiFDRecvParams *multifd_recv_param; >> + thread_count = migrate_multifd_channels(); >> + /* create the multifd channels for RDMA */ >> + for (i = 0; i < thread_count; i++) { >> + if (get_multifd_recv_param(i, &multifd_recv_param) < 0) { >> + error_report("rdma: error getting multifd_recv_param(%d)", i); >> + return false; >> + } >> + >> + multifd_rdma = (RDMAContext *) multifd_recv_param->rdma; >> + if (multifd_rdma->cm_id == NULL) { >> + break; >> + } else { >> + multifd_rdma = NULL; >> + } > > I'm confused by what this if is doing - what are the two cases? > Since we share the CM channel and CM id with main thread, we assign the cmd_id through the callback rdma_accept_incoming_migration() for the multifd thread if cm_id is NULL. Once it is assigned, we could go to the normal rdma_cm_poll_handler() set handler. >> + } >> + >> + if (multifd_rdma) { >> + qemu_set_fd_handler(rdma->channel->fd, >> + rdma_accept_incoming_migration, >> + NULL, (void *)(intptr_t)multifd_rdma); >> + } else { >> + qemu_set_fd_handler(rdma->channel->fd, rdma_cm_poll_handler, >> + NULL, rdma); >> + } >> + >> + return true; >> +} >> + >> static int qemu_rdma_accept(RDMAContext *rdma) >> { >> RDMACapabilities cap; >> @@ -3366,6 +3400,10 @@ static int qemu_rdma_accept(RDMAContext *rdma) >> qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration, >> NULL, >> (void *)(intptr_t)rdma->return_path); >> + } else if (migrate_use_multifd()) { >> + if (!qemu_rdma_accept_setup(rdma)) { >> + goto err_rdma_dest_wait; >> + } >> } else { >> qemu_set_fd_handler(rdma->channel->fd, rdma_cm_poll_handler, >> NULL, rdma); >> @@ -3976,6 +4014,34 @@ static QEMUFile *qemu_fopen_rdma(RDMAContext *rdma, const char *mode) >> return rioc->file; >> } >> >> +static void migration_rdma_process_incoming(QEMUFile *f, >> + RDMAContext *rdma, Error **errp) >> +{ >> + MigrationIncomingState *mis = migration_incoming_get_current(); >> + QIOChannel *ioc = NULL; >> + bool start_migration = false; >> + >> + if (!migrate_use_multifd()) { >> + rdma->migration_started_on_destination = 1; >> + migration_fd_process_incoming(f, errp); >> + return; >> + } >> + >> + if (!mis->from_src_file) { >> + mis->from_src_file = f; >> + qemu_file_set_blocking(f, false); >> + } else { >> + ioc = QIO_CHANNEL(getQIOChannel(f)); >> + /* Multiple connections */ >> + assert(migrate_use_multifd()); > > Are you sure that's never triggerable by something trying to connect > badly? If it was it would be better to error than abort. > This is the similiar action with tcp multifd which is introduced by a429e7f4887313370, However we will never get there if migrate_use_multifd is false because of return at the first judgement of function, we could not do it or just put a warning. >> + start_migration = multifd_recv_new_channel(ioc, errp); > > And what does 'start_migration' mean here - is that meaning that we have > a full set of connections? > Yes, multifd_recv_new_channel returns true when correctly receiving all channels. > Dave > >> + } >> + >> + if (start_migration) { >> + migration_incoming_process(); >> + } >> +} >> + >> static void rdma_accept_incoming_migration(void *opaque) >> { >> RDMAContext *rdma = opaque; >> @@ -4004,8 +4070,7 @@ static void rdma_accept_incoming_migration(void *opaque) >> return; >> } >> >> - rdma->migration_started_on_destination = 1; >> - migration_fd_process_incoming(f, &local_err); >> + migration_rdma_process_incoming(f, rdma, &local_err); >> if (local_err) { >> error_reportf_err(local_err, "RDMA ERROR:"); >> } >> -- >> 1.8.3.1 >>
diff --git a/migration/rdma.c b/migration/rdma.c index 996afb0..ed8a015 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -3267,6 +3267,40 @@ static void rdma_cm_poll_handler(void *opaque) } } +static bool qemu_rdma_accept_setup(RDMAContext *rdma) +{ + RDMAContext *multifd_rdma = NULL; + int thread_count; + int i; + MultiFDRecvParams *multifd_recv_param; + thread_count = migrate_multifd_channels(); + /* create the multifd channels for RDMA */ + for (i = 0; i < thread_count; i++) { + if (get_multifd_recv_param(i, &multifd_recv_param) < 0) { + error_report("rdma: error getting multifd_recv_param(%d)", i); + return false; + } + + multifd_rdma = (RDMAContext *) multifd_recv_param->rdma; + if (multifd_rdma->cm_id == NULL) { + break; + } else { + multifd_rdma = NULL; + } + } + + if (multifd_rdma) { + qemu_set_fd_handler(rdma->channel->fd, + rdma_accept_incoming_migration, + NULL, (void *)(intptr_t)multifd_rdma); + } else { + qemu_set_fd_handler(rdma->channel->fd, rdma_cm_poll_handler, + NULL, rdma); + } + + return true; +} + static int qemu_rdma_accept(RDMAContext *rdma) { RDMACapabilities cap; @@ -3366,6 +3400,10 @@ static int qemu_rdma_accept(RDMAContext *rdma) qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration, NULL, (void *)(intptr_t)rdma->return_path); + } else if (migrate_use_multifd()) { + if (!qemu_rdma_accept_setup(rdma)) { + goto err_rdma_dest_wait; + } } else { qemu_set_fd_handler(rdma->channel->fd, rdma_cm_poll_handler, NULL, rdma); @@ -3976,6 +4014,34 @@ static QEMUFile *qemu_fopen_rdma(RDMAContext *rdma, const char *mode) return rioc->file; } +static void migration_rdma_process_incoming(QEMUFile *f, + RDMAContext *rdma, Error **errp) +{ + MigrationIncomingState *mis = migration_incoming_get_current(); + QIOChannel *ioc = NULL; + bool start_migration = false; + + if (!migrate_use_multifd()) { + rdma->migration_started_on_destination = 1; + migration_fd_process_incoming(f, errp); + return; + } + + if (!mis->from_src_file) { + mis->from_src_file = f; + qemu_file_set_blocking(f, false); + } else { + ioc = QIO_CHANNEL(getQIOChannel(f)); + /* Multiple connections */ + assert(migrate_use_multifd()); + start_migration = multifd_recv_new_channel(ioc, errp); + } + + if (start_migration) { + migration_incoming_process(); + } +} + static void rdma_accept_incoming_migration(void *opaque) { RDMAContext *rdma = opaque; @@ -4004,8 +4070,7 @@ static void rdma_accept_incoming_migration(void *opaque) return; } - rdma->migration_started_on_destination = 1; - migration_fd_process_incoming(f, &local_err); + migration_rdma_process_incoming(f, rdma, &local_err); if (local_err) { error_reportf_err(local_err, "RDMA ERROR:"); }