diff mbox series

[v4,10/18] migration/rdma: Create the multifd recv channels for RDMA

Message ID 1612339311-114805-11-git-send-email-zhengchuan@huawei.com (mailing list archive)
State New, archived
Headers show
Series Support Multifd for RDMA migration | expand

Commit Message

Zheng Chuan Feb. 3, 2021, 8:01 a.m. UTC
We still don't transmit anything through them, and we only build
the RDMA connections.

Signed-off-by: Zhimin Feng <fengzhimin1@huawei.com>
Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
---
 migration/rdma.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 67 insertions(+), 2 deletions(-)

Comments

Dr. David Alan Gilbert Feb. 3, 2021, 6:59 p.m. UTC | #1
* Chuan Zheng (zhengchuan@huawei.com) wrote:
> We still don't transmit anything through them, and we only build
> the RDMA connections.
> 
> Signed-off-by: Zhimin Feng <fengzhimin1@huawei.com>
> Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
> ---
>  migration/rdma.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 67 insertions(+), 2 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 996afb0..ed8a015 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -3267,6 +3267,40 @@ static void rdma_cm_poll_handler(void *opaque)
>      }
>  }
>  
> +static bool qemu_rdma_accept_setup(RDMAContext *rdma)
> +{
> +    RDMAContext *multifd_rdma = NULL;
> +    int thread_count;
> +    int i;
> +    MultiFDRecvParams *multifd_recv_param;
> +    thread_count = migrate_multifd_channels();
> +    /* create the multifd channels for RDMA */
> +    for (i = 0; i < thread_count; i++) {
> +        if (get_multifd_recv_param(i, &multifd_recv_param) < 0) {
> +            error_report("rdma: error getting multifd_recv_param(%d)", i);
> +            return false;
> +        }
> +
> +        multifd_rdma = (RDMAContext *) multifd_recv_param->rdma;
> +        if (multifd_rdma->cm_id == NULL) {
> +            break;
> +        } else {
> +            multifd_rdma = NULL;
> +        }

I'm confused by what this if is doing - what are the two cases?

> +    }
> +
> +    if (multifd_rdma) {
> +        qemu_set_fd_handler(rdma->channel->fd,
> +                            rdma_accept_incoming_migration,
> +                            NULL, (void *)(intptr_t)multifd_rdma);
> +    } else {
> +        qemu_set_fd_handler(rdma->channel->fd, rdma_cm_poll_handler,
> +                            NULL, rdma);
> +    }
> +
> +    return true;
> +}
> +
>  static int qemu_rdma_accept(RDMAContext *rdma)
>  {
>      RDMACapabilities cap;
> @@ -3366,6 +3400,10 @@ static int qemu_rdma_accept(RDMAContext *rdma)
>          qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration,
>                              NULL,
>                              (void *)(intptr_t)rdma->return_path);
> +    } else if (migrate_use_multifd()) {
> +        if (!qemu_rdma_accept_setup(rdma)) {
> +            goto err_rdma_dest_wait;
> +        }
>      } else {
>          qemu_set_fd_handler(rdma->channel->fd, rdma_cm_poll_handler,
>                              NULL, rdma);
> @@ -3976,6 +4014,34 @@ static QEMUFile *qemu_fopen_rdma(RDMAContext *rdma, const char *mode)
>      return rioc->file;
>  }
>  
> +static void migration_rdma_process_incoming(QEMUFile *f,
> +                                            RDMAContext *rdma, Error **errp)
> +{
> +    MigrationIncomingState *mis = migration_incoming_get_current();
> +    QIOChannel *ioc = NULL;
> +    bool start_migration = false;
> +
> +    if (!migrate_use_multifd()) {
> +        rdma->migration_started_on_destination = 1;
> +        migration_fd_process_incoming(f, errp);
> +        return;
> +    }
> +
> +    if (!mis->from_src_file) {
> +        mis->from_src_file = f;
> +        qemu_file_set_blocking(f, false);
> +    } else {
> +        ioc = QIO_CHANNEL(getQIOChannel(f));
> +        /* Multiple connections */
> +        assert(migrate_use_multifd());

Are you sure that's never triggerable by something trying to connect
badly? If it was it would be better to error than abort.

> +        start_migration = multifd_recv_new_channel(ioc, errp);

And what does 'start_migration' mean here - is that meaning that we have
a full set of connections?

Dave

> +    }
> +
> +    if (start_migration) {
> +        migration_incoming_process();
> +    }
> +}
> +
>  static void rdma_accept_incoming_migration(void *opaque)
>  {
>      RDMAContext *rdma = opaque;
> @@ -4004,8 +4070,7 @@ static void rdma_accept_incoming_migration(void *opaque)
>          return;
>      }
>  
> -    rdma->migration_started_on_destination = 1;
> -    migration_fd_process_incoming(f, &local_err);
> +    migration_rdma_process_incoming(f, rdma, &local_err);
>      if (local_err) {
>          error_reportf_err(local_err, "RDMA ERROR:");
>      }
> -- 
> 1.8.3.1
>
Zheng Chuan March 6, 2021, 8:45 a.m. UTC | #2
On 2021/2/4 2:59, Dr. David Alan Gilbert wrote:
> * Chuan Zheng (zhengchuan@huawei.com) wrote:
>> We still don't transmit anything through them, and we only build
>> the RDMA connections.
>>
>> Signed-off-by: Zhimin Feng <fengzhimin1@huawei.com>
>> Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
>> ---
>>  migration/rdma.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>>  1 file changed, 67 insertions(+), 2 deletions(-)
>>
>> diff --git a/migration/rdma.c b/migration/rdma.c
>> index 996afb0..ed8a015 100644
>> --- a/migration/rdma.c
>> +++ b/migration/rdma.c
>> @@ -3267,6 +3267,40 @@ static void rdma_cm_poll_handler(void *opaque)
>>      }
>>  }
>>  
>> +static bool qemu_rdma_accept_setup(RDMAContext *rdma)
>> +{
>> +    RDMAContext *multifd_rdma = NULL;
>> +    int thread_count;
>> +    int i;
>> +    MultiFDRecvParams *multifd_recv_param;
>> +    thread_count = migrate_multifd_channels();
>> +    /* create the multifd channels for RDMA */
>> +    for (i = 0; i < thread_count; i++) {
>> +        if (get_multifd_recv_param(i, &multifd_recv_param) < 0) {
>> +            error_report("rdma: error getting multifd_recv_param(%d)", i);
>> +            return false;
>> +        }
>> +
>> +        multifd_rdma = (RDMAContext *) multifd_recv_param->rdma;
>> +        if (multifd_rdma->cm_id == NULL) {
>> +            break;
>> +        } else {
>> +            multifd_rdma = NULL;
>> +        }
> 
> I'm confused by what this if is doing - what are the two cases?
> 
Since we share the CM channel and CM id with main thread,
we assign the cmd_id through the callback rdma_accept_incoming_migration() for the multifd thread if cm_id is NULL.
Once it is assigned, we could go to the normal rdma_cm_poll_handler() set handler.

>> +    }
>> +
>> +    if (multifd_rdma) {
>> +        qemu_set_fd_handler(rdma->channel->fd,
>> +                            rdma_accept_incoming_migration,
>> +                            NULL, (void *)(intptr_t)multifd_rdma);
>> +    } else {
>> +        qemu_set_fd_handler(rdma->channel->fd, rdma_cm_poll_handler,
>> +                            NULL, rdma);
>> +    }
>> +
>> +    return true;
>> +}
>> +
>>  static int qemu_rdma_accept(RDMAContext *rdma)
>>  {
>>      RDMACapabilities cap;
>> @@ -3366,6 +3400,10 @@ static int qemu_rdma_accept(RDMAContext *rdma)
>>          qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration,
>>                              NULL,
>>                              (void *)(intptr_t)rdma->return_path);
>> +    } else if (migrate_use_multifd()) {
>> +        if (!qemu_rdma_accept_setup(rdma)) {
>> +            goto err_rdma_dest_wait;
>> +        }
>>      } else {
>>          qemu_set_fd_handler(rdma->channel->fd, rdma_cm_poll_handler,
>>                              NULL, rdma);
>> @@ -3976,6 +4014,34 @@ static QEMUFile *qemu_fopen_rdma(RDMAContext *rdma, const char *mode)
>>      return rioc->file;
>>  }
>>  
>> +static void migration_rdma_process_incoming(QEMUFile *f,
>> +                                            RDMAContext *rdma, Error **errp)
>> +{
>> +    MigrationIncomingState *mis = migration_incoming_get_current();
>> +    QIOChannel *ioc = NULL;
>> +    bool start_migration = false;
>> +
>> +    if (!migrate_use_multifd()) {
>> +        rdma->migration_started_on_destination = 1;
>> +        migration_fd_process_incoming(f, errp);
>> +        return;
>> +    }
>> +
>> +    if (!mis->from_src_file) {
>> +        mis->from_src_file = f;
>> +        qemu_file_set_blocking(f, false);
>> +    } else {
>> +        ioc = QIO_CHANNEL(getQIOChannel(f));
>> +        /* Multiple connections */
>> +        assert(migrate_use_multifd());
> 
> Are you sure that's never triggerable by something trying to connect
> badly? If it was it would be better to error than abort.
> 
This is the similiar action with tcp multifd which is introduced by a429e7f4887313370,
However we will never get there if migrate_use_multifd is false because of return at the first judgement of function, we could not do it or just put a warning.

>> +        start_migration = multifd_recv_new_channel(ioc, errp);
> 
> And what does 'start_migration' mean here - is that meaning that we have
> a full set of connections?
> 
Yes, multifd_recv_new_channel returns true when correctly receiving all channels.

> Dave
> 
>> +    }
>> +
>> +    if (start_migration) {
>> +        migration_incoming_process();
>> +    }
>> +}
>> +
>>  static void rdma_accept_incoming_migration(void *opaque)
>>  {
>>      RDMAContext *rdma = opaque;
>> @@ -4004,8 +4070,7 @@ static void rdma_accept_incoming_migration(void *opaque)
>>          return;
>>      }
>>  
>> -    rdma->migration_started_on_destination = 1;
>> -    migration_fd_process_incoming(f, &local_err);
>> +    migration_rdma_process_incoming(f, rdma, &local_err);
>>      if (local_err) {
>>          error_reportf_err(local_err, "RDMA ERROR:");
>>      }
>> -- 
>> 1.8.3.1
>>
diff mbox series

Patch

diff --git a/migration/rdma.c b/migration/rdma.c
index 996afb0..ed8a015 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -3267,6 +3267,40 @@  static void rdma_cm_poll_handler(void *opaque)
     }
 }
 
+static bool qemu_rdma_accept_setup(RDMAContext *rdma)
+{
+    RDMAContext *multifd_rdma = NULL;
+    int thread_count;
+    int i;
+    MultiFDRecvParams *multifd_recv_param;
+    thread_count = migrate_multifd_channels();
+    /* create the multifd channels for RDMA */
+    for (i = 0; i < thread_count; i++) {
+        if (get_multifd_recv_param(i, &multifd_recv_param) < 0) {
+            error_report("rdma: error getting multifd_recv_param(%d)", i);
+            return false;
+        }
+
+        multifd_rdma = (RDMAContext *) multifd_recv_param->rdma;
+        if (multifd_rdma->cm_id == NULL) {
+            break;
+        } else {
+            multifd_rdma = NULL;
+        }
+    }
+
+    if (multifd_rdma) {
+        qemu_set_fd_handler(rdma->channel->fd,
+                            rdma_accept_incoming_migration,
+                            NULL, (void *)(intptr_t)multifd_rdma);
+    } else {
+        qemu_set_fd_handler(rdma->channel->fd, rdma_cm_poll_handler,
+                            NULL, rdma);
+    }
+
+    return true;
+}
+
 static int qemu_rdma_accept(RDMAContext *rdma)
 {
     RDMACapabilities cap;
@@ -3366,6 +3400,10 @@  static int qemu_rdma_accept(RDMAContext *rdma)
         qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration,
                             NULL,
                             (void *)(intptr_t)rdma->return_path);
+    } else if (migrate_use_multifd()) {
+        if (!qemu_rdma_accept_setup(rdma)) {
+            goto err_rdma_dest_wait;
+        }
     } else {
         qemu_set_fd_handler(rdma->channel->fd, rdma_cm_poll_handler,
                             NULL, rdma);
@@ -3976,6 +4014,34 @@  static QEMUFile *qemu_fopen_rdma(RDMAContext *rdma, const char *mode)
     return rioc->file;
 }
 
+static void migration_rdma_process_incoming(QEMUFile *f,
+                                            RDMAContext *rdma, Error **errp)
+{
+    MigrationIncomingState *mis = migration_incoming_get_current();
+    QIOChannel *ioc = NULL;
+    bool start_migration = false;
+
+    if (!migrate_use_multifd()) {
+        rdma->migration_started_on_destination = 1;
+        migration_fd_process_incoming(f, errp);
+        return;
+    }
+
+    if (!mis->from_src_file) {
+        mis->from_src_file = f;
+        qemu_file_set_blocking(f, false);
+    } else {
+        ioc = QIO_CHANNEL(getQIOChannel(f));
+        /* Multiple connections */
+        assert(migrate_use_multifd());
+        start_migration = multifd_recv_new_channel(ioc, errp);
+    }
+
+    if (start_migration) {
+        migration_incoming_process();
+    }
+}
+
 static void rdma_accept_incoming_migration(void *opaque)
 {
     RDMAContext *rdma = opaque;
@@ -4004,8 +4070,7 @@  static void rdma_accept_incoming_migration(void *opaque)
         return;
     }
 
-    rdma->migration_started_on_destination = 1;
-    migration_fd_process_incoming(f, &local_err);
+    migration_rdma_process_incoming(f, rdma, &local_err);
     if (local_err) {
         error_reportf_err(local_err, "RDMA ERROR:");
     }