Message ID | 20210525080552.28259-4-lizhijian@cn.fujitsu.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2,1/4] migration/rdma: cleanup rmda in rdma_start_incoming_migration error path | expand |
* Li Zhijian (lizhijian@cn.fujitsu.com) wrote: > source side always blocks if postcopy is only enabled at source side. > users are not able to cancel this migration in this case. > > Let source side have chance to cancel this migration > > Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com> > --- > V2: utilize poll to check cm event > --- > migration/rdma.c | 42 ++++++++++++++++++++++++++++++++++++++---- > 1 file changed, 38 insertions(+), 4 deletions(-) > > diff --git a/migration/rdma.c b/migration/rdma.c > index d829d08d076..f67e21b4f54 100644 > --- a/migration/rdma.c > +++ b/migration/rdma.c > @@ -36,6 +36,7 @@ > #include <rdma/rdma_cma.h> > #include "trace.h" > #include "qom/object.h" > +#include <poll.h> > > /* > * Print and error on both the Monitor and the Log file. > @@ -2460,7 +2461,36 @@ err_rdma_source_init: > return -1; > } > > -static int qemu_rdma_connect(RDMAContext *rdma, Error **errp) > +static int qemu_get_cm_event_timeout(RDMAContext *rdma, > + struct rdma_cm_event **cm_event, > + long msec, Error **errp) > +{ > + int ret; > + struct pollfd poll_fd = { > + .fd = rdma->channel->fd, > + .events = POLLIN, > + .revents = 0 > + }; > + > + do { > + ret = poll(&poll_fd, 1, msec); > + } while (ret < 0 && errno == EINTR); > + > + if (ret == 0) { > + ERROR(errp, "poll cm event timeout"); > + return -1; > + } else if (ret < 0) { > + ERROR(errp, "failed to pull cm event, errno=%i", errno); Typo: 'poll' - I can fix that. > + return -1; > + } else if (poll_fd.revents & POLLIN) { > + return rdma_get_cm_event(rdma->channel, cm_event); > + } else { > + ERROR(errp, "no POLLIN event, revent=%x", poll_fd.revents); > + return -1; > + } > +} > + > +static int qemu_rdma_connect(RDMAContext *rdma, Error **errp, bool return_path) > { > RDMACapabilities cap = { > .version = RDMA_CONTROL_VERSION_CURRENT, > @@ -2498,7 +2528,11 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp) > goto err_rdma_source_connect; > } > > - ret = rdma_get_cm_event(rdma->channel, &cm_event); > + if (return_path) { > + ret = qemu_get_cm_event_timeout(rdma, &cm_event, 5000, errp); Fixed timeouts are not a great fix; but I can't think of anything better; the only alternative would be to register the fd on the main thread's poll and get it to be called back when the event happened. But for now; Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > + } else { > + ret = rdma_get_cm_event(rdma->channel, &cm_event); > + } > if (ret) { > perror("rdma_get_cm_event after rdma_connect"); > ERROR(errp, "connecting to destination!"); > @@ -4111,7 +4145,7 @@ void rdma_start_outgoing_migration(void *opaque, > } > > trace_rdma_start_outgoing_migration_after_rdma_source_init(); > - ret = qemu_rdma_connect(rdma, errp); > + ret = qemu_rdma_connect(rdma, errp, false); > > if (ret) { > goto err; > @@ -4132,7 +4166,7 @@ void rdma_start_outgoing_migration(void *opaque, > goto return_path_err; > } > > - ret = qemu_rdma_connect(rdma_return_path, errp); > + ret = qemu_rdma_connect(rdma_return_path, errp, true); > > if (ret) { > goto return_path_err; > -- > 2.30.2 > > >
diff --git a/migration/rdma.c b/migration/rdma.c index d829d08d076..f67e21b4f54 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -36,6 +36,7 @@ #include <rdma/rdma_cma.h> #include "trace.h" #include "qom/object.h" +#include <poll.h> /* * Print and error on both the Monitor and the Log file. @@ -2460,7 +2461,36 @@ err_rdma_source_init: return -1; } -static int qemu_rdma_connect(RDMAContext *rdma, Error **errp) +static int qemu_get_cm_event_timeout(RDMAContext *rdma, + struct rdma_cm_event **cm_event, + long msec, Error **errp) +{ + int ret; + struct pollfd poll_fd = { + .fd = rdma->channel->fd, + .events = POLLIN, + .revents = 0 + }; + + do { + ret = poll(&poll_fd, 1, msec); + } while (ret < 0 && errno == EINTR); + + if (ret == 0) { + ERROR(errp, "poll cm event timeout"); + return -1; + } else if (ret < 0) { + ERROR(errp, "failed to pull cm event, errno=%i", errno); + return -1; + } else if (poll_fd.revents & POLLIN) { + return rdma_get_cm_event(rdma->channel, cm_event); + } else { + ERROR(errp, "no POLLIN event, revent=%x", poll_fd.revents); + return -1; + } +} + +static int qemu_rdma_connect(RDMAContext *rdma, Error **errp, bool return_path) { RDMACapabilities cap = { .version = RDMA_CONTROL_VERSION_CURRENT, @@ -2498,7 +2528,11 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp) goto err_rdma_source_connect; } - ret = rdma_get_cm_event(rdma->channel, &cm_event); + if (return_path) { + ret = qemu_get_cm_event_timeout(rdma, &cm_event, 5000, errp); + } else { + ret = rdma_get_cm_event(rdma->channel, &cm_event); + } if (ret) { perror("rdma_get_cm_event after rdma_connect"); ERROR(errp, "connecting to destination!"); @@ -4111,7 +4145,7 @@ void rdma_start_outgoing_migration(void *opaque, } trace_rdma_start_outgoing_migration_after_rdma_source_init(); - ret = qemu_rdma_connect(rdma, errp); + ret = qemu_rdma_connect(rdma, errp, false); if (ret) { goto err; @@ -4132,7 +4166,7 @@ void rdma_start_outgoing_migration(void *opaque, goto return_path_err; } - ret = qemu_rdma_connect(rdma_return_path, errp); + ret = qemu_rdma_connect(rdma_return_path, errp, true); if (ret) { goto return_path_err;
source side always blocks if postcopy is only enabled at source side. users are not able to cancel this migration in this case. Let source side have chance to cancel this migration Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com> --- V2: utilize poll to check cm event --- migration/rdma.c | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-)