@@ -2458,7 +2458,54 @@ err_rdma_source_init:
return -1;
}
-static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
+#define RDMA_GET_EVENT_INTERVAL 100000 /* 100ms */
+static int qemu_get_cm_event_timeout(RDMAContext *rdma,
+ struct rdma_cm_event **cm_event,
+ long sec, Error **errp)
+{
+ long wait_ns = 0;
+ int ret;
+ int flags = fcntl(rdma->channel->fd, F_GETFL), save_flags;
+
+ if (flags == -1) {
+ perror("failed to get file flags");
+ return flags;
+ }
+ save_flags = flags;
+ flags |= O_NONBLOCK;
+ ret = fcntl(rdma->channel->fd, F_SETFL, flags);
+ if (ret) {
+ perror("failed to set file flags nonblocking");
+ return ret;
+ }
+
+retry:
+ ret = rdma_get_cm_event(rdma->channel, cm_event);
+ if (ret && errno == EAGAIN) {
+ if (wait_ns < sec * 1000000) {
+ perror("rdma_get_cm_event after rdma_connect");
+ wait_ns += RDMA_GET_EVENT_INTERVAL;
+ usleep(RDMA_GET_EVENT_INTERVAL);
+ goto retry;
+ }
+ }
+ if (ret) {
+ perror("rdma_get_cm_event after rdma_connect");
+ ERROR(errp, "connecting to destination!");
+ return ret;
+ }
+
+ /* restore flags */
+ ret = fcntl(rdma->channel->fd, F_SETFL, save_flags);
+ if (ret) {
+ rdma_ack_cm_event(*cm_event);
+ perror("failed to restore file flags");
+ }
+
+ return ret;
+}
+
+static int qemu_rdma_connect(RDMAContext *rdma, Error **errp, bool return_path)
{
RDMACapabilities cap = {
.version = RDMA_CONTROL_VERSION_CURRENT,
@@ -2496,7 +2543,11 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
goto err_rdma_source_connect;
}
- ret = rdma_get_cm_event(rdma->channel, &cm_event);
+ if (return_path) {
+ ret = qemu_get_cm_event_timeout(rdma, &cm_event, 2, errp);
+ } else {
+ ret = rdma_get_cm_event(rdma->channel, &cm_event);
+ }
if (ret) {
perror("rdma_get_cm_event after rdma_connect");
ERROR(errp, "connecting to destination!");
@@ -4108,7 +4159,7 @@ void rdma_start_outgoing_migration(void *opaque,
}
trace_rdma_start_outgoing_migration_after_rdma_source_init();
- ret = qemu_rdma_connect(rdma, errp);
+ ret = qemu_rdma_connect(rdma, errp, false);
if (ret) {
goto err;
@@ -4129,7 +4180,7 @@ void rdma_start_outgoing_migration(void *opaque,
goto return_path_err;
}
- ret = qemu_rdma_connect(rdma_return_path, errp);
+ ret = qemu_rdma_connect(rdma_return_path, errp, true);
if (ret) {
goto return_path_err;
source side always blocks if postcopy is only enabled at source side. users are not able to cancel this migration in this case. Here we try to get the cm_event every 100ms tile timeout. Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com> --- migration/rdma.c | 59 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 4 deletions(-)