Message ID | 20240104004452.324068-4-hao.xiang@bytedance.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Use Intel DSA accelerator to offload zero page checking in multifd live migration. | expand |
> On 04-Jan-2024, at 6:14 AM, Hao Xiang <hao.xiang@bytedance.com> wrote: > > From: Juan Quintela <quintela@redhat.com> > > This implements the zero page dection and handling. > > Signed-off-by: Juan Quintela <quintela@redhat.com> > --- > migration/multifd.c | 41 +++++++++++++++++++++++++++++++++++++++-- > migration/multifd.h | 5 +++++ > 2 files changed, 44 insertions(+), 2 deletions(-) > > diff --git a/migration/multifd.c b/migration/multifd.c > index 5a1f50c7e8..756673029d 100644 > --- a/migration/multifd.c > +++ b/migration/multifd.c > @@ -11,6 +11,7 @@ > */ > > #include "qemu/osdep.h" > +#include "qemu/cutils.h" > #include "qemu/rcu.h" > #include "exec/target_page.h" > #include "sysemu/sysemu.h" > @@ -279,6 +280,12 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) > > packet->offset[i] = cpu_to_be64(temp); > } > + for (i = 0; i < p->zero_num; i++) { > + /* there are architectures where ram_addr_t is 32 bit */ > + uint64_t temp = p->zero[i]; > + > + packet->offset[p->normal_num + i] = cpu_to_be64(temp); > + } > } > > static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) > @@ -361,6 +368,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) > p->normal[i] = offset; > } > > + for (i = 0; i < p->zero_num; i++) { > + uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]); > + > + if (offset > (p->block->used_length - p->page_size)) { > + error_setg(errp, "multifd: offset too long %" PRIu64 > + " (max " RAM_ADDR_FMT ")", > + offset, p->block->used_length); > + return -1; > + } > + p->zero[i] = offset; > + } > + > return 0; > } > > @@ -664,6 +683,8 @@ static void *multifd_send_thread(void *opaque) > MultiFDSendParams *p = opaque; > MigrationThread *thread = NULL; > Error *local_err = NULL; > + /* qemu older than 8.2 don't understand zero page on multifd channel */ > + bool use_zero_page = !migrate_use_main_zero_page(); > int ret = 0; > bool use_zero_copy_send = migrate_zero_copy_send(); > > @@ -689,6 +710,7 @@ static void *multifd_send_thread(void *opaque) > qemu_mutex_lock(&p->mutex); > > if (p->pending_job) { > + RAMBlock *rb = p->pages->block; > uint64_t packet_num = p->packet_num; > uint32_t flags; > p->normal_num = 0; > @@ -701,8 +723,16 @@ static void *multifd_send_thread(void *opaque) > } > > for (int i = 0; i < p->pages->num; i++) { > - p->normal[p->normal_num] = p->pages->offset[i]; > - p->normal_num++; > + uint64_t offset = p->pages->offset[i]; > + if (use_zero_page && > + buffer_is_zero(rb->host + offset, p->page_size)) { > + p->zero[p->zero_num] = offset; > + p->zero_num++; > + ram_release_page(rb->idstr, offset); > + } else { > + p->normal[p->normal_num] = offset; > + p->normal_num++; > + } > } > > if (p->normal_num) { > @@ -1155,6 +1185,13 @@ static void *multifd_recv_thread(void *opaque) > } > } > > + for (int i = 0; i < p->zero_num; i++) { > + void *page = p->host + p->zero[i]; > + if (!buffer_is_zero(page, p->page_size)) { > + memset(page, 0, p->page_size); > + } > + } > + I am wondering if zeroing the zero page on the destination can also be offloaded to DSA. Can it help in reducing cpu consumption on the destination in case of multifd-based migration? > if (flags & MULTIFD_FLAG_SYNC) { > qemu_sem_post(&multifd_recv_state->sem_sync); > qemu_sem_wait(&p->sem_sync); > diff --git a/migration/multifd.h b/migration/multifd.h > index d587b0e19c..13762900d4 100644 > --- a/migration/multifd.h > +++ b/migration/multifd.h > @@ -53,6 +53,11 @@ typedef struct { > uint32_t unused32[1]; /* Reserved for future use */ > uint64_t unused64[3]; /* Reserved for future use */ > char ramblock[256]; > + /* > + * This array contains the pointers to: > + * - normal pages (initial normal_pages entries) > + * - zero pages (following zero_pages entries) > + */ > uint64_t offset[]; > } __attribute__((packed)) MultiFDPacket_t; > > -- > 2.30.2 > > >
On Sun, Jan 14, 2024 at 11:01 PM Shivam Kumar <shivam.kumar1@nutanix.com> wrote: > > > > > On 04-Jan-2024, at 6:14 AM, Hao Xiang <hao.xiang@bytedance.com> wrote: > > > > From: Juan Quintela <quintela@redhat.com> > > > > This implements the zero page dection and handling. > > > > Signed-off-by: Juan Quintela <quintela@redhat.com> > > --- > > migration/multifd.c | 41 +++++++++++++++++++++++++++++++++++++++-- > > migration/multifd.h | 5 +++++ > > 2 files changed, 44 insertions(+), 2 deletions(-) > > > > diff --git a/migration/multifd.c b/migration/multifd.c > > index 5a1f50c7e8..756673029d 100644 > > --- a/migration/multifd.c > > +++ b/migration/multifd.c > > @@ -11,6 +11,7 @@ > > */ > > > > #include "qemu/osdep.h" > > +#include "qemu/cutils.h" > > #include "qemu/rcu.h" > > #include "exec/target_page.h" > > #include "sysemu/sysemu.h" > > @@ -279,6 +280,12 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) > > > > packet->offset[i] = cpu_to_be64(temp); > > } > > + for (i = 0; i < p->zero_num; i++) { > > + /* there are architectures where ram_addr_t is 32 bit */ > > + uint64_t temp = p->zero[i]; > > + > > + packet->offset[p->normal_num + i] = cpu_to_be64(temp); > > + } > > } > > > > static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) > > @@ -361,6 +368,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) > > p->normal[i] = offset; > > } > > > > + for (i = 0; i < p->zero_num; i++) { > > + uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]); > > + > > + if (offset > (p->block->used_length - p->page_size)) { > > + error_setg(errp, "multifd: offset too long %" PRIu64 > > + " (max " RAM_ADDR_FMT ")", > > + offset, p->block->used_length); > > + return -1; > > + } > > + p->zero[i] = offset; > > + } > > + > > return 0; > > } > > > > @@ -664,6 +683,8 @@ static void *multifd_send_thread(void *opaque) > > MultiFDSendParams *p = opaque; > > MigrationThread *thread = NULL; > > Error *local_err = NULL; > > + /* qemu older than 8.2 don't understand zero page on multifd channel */ > > + bool use_zero_page = !migrate_use_main_zero_page(); > > int ret = 0; > > bool use_zero_copy_send = migrate_zero_copy_send(); > > > > @@ -689,6 +710,7 @@ static void *multifd_send_thread(void *opaque) > > qemu_mutex_lock(&p->mutex); > > > > if (p->pending_job) { > > + RAMBlock *rb = p->pages->block; > > uint64_t packet_num = p->packet_num; > > uint32_t flags; > > p->normal_num = 0; > > @@ -701,8 +723,16 @@ static void *multifd_send_thread(void *opaque) > > } > > > > for (int i = 0; i < p->pages->num; i++) { > > - p->normal[p->normal_num] = p->pages->offset[i]; > > - p->normal_num++; > > + uint64_t offset = p->pages->offset[i]; > > + if (use_zero_page && > > + buffer_is_zero(rb->host + offset, p->page_size)) { > > + p->zero[p->zero_num] = offset; > > + p->zero_num++; > > + ram_release_page(rb->idstr, offset); > > + } else { > > + p->normal[p->normal_num] = offset; > > + p->normal_num++; > > + } > > } > > > > if (p->normal_num) { > > @@ -1155,6 +1185,13 @@ static void *multifd_recv_thread(void *opaque) > > } > > } > > > > + for (int i = 0; i < p->zero_num; i++) { > > + void *page = p->host + p->zero[i]; > > + if (!buffer_is_zero(page, p->page_size)) { > > + memset(page, 0, p->page_size); > > + } > > + } > > + > I am wondering if zeroing the zero page on the destination can also be offloaded to DSA. Can it help in reducing cpu consumption on the destination in case of multifd-based migration? I have that change coded up and I have tested for performance. It's not saving as much CPU cycles as I hoped. The problem is that on the sender side, we run zero page detection on every single page but on the receiver side, we only zero-ing the pages if the sender tells us so. For instance, if a multifd packet with 128 pages are all zero pages, we can offload the zero-ing pages on the entire 128 pages in a single DSA operation and that's the best case. In a worse case, if a multifd packet with 128 pages only has say 10 zero pages, we can offload the zero-ing pages on only the 10 pages instead of the entire 128 pages. Considering DSA software overhead, that is not a good deal. In the future, if we refactor the multifd path to separate zero pages in its own packet, it will be a different story. For instance, if there are 1024 non-contiguous zero pages detected, they will be sent across multiple packets. With a new packet format, those pages can be put into the same packet (and we can put more than 1024 zero pages in a packet) and DSA offloading can become much more effective. We can add that function after we have the new packet format implemented. > > if (flags & MULTIFD_FLAG_SYNC) { > > qemu_sem_post(&multifd_recv_state->sem_sync); > > qemu_sem_wait(&p->sem_sync); > > diff --git a/migration/multifd.h b/migration/multifd.h > > index d587b0e19c..13762900d4 100644 > > --- a/migration/multifd.h > > +++ b/migration/multifd.h > > @@ -53,6 +53,11 @@ typedef struct { > > uint32_t unused32[1]; /* Reserved for future use */ > > uint64_t unused64[3]; /* Reserved for future use */ > > char ramblock[256]; > > + /* > > + * This array contains the pointers to: > > + * - normal pages (initial normal_pages entries) > > + * - zero pages (following zero_pages entries) > > + */ > > uint64_t offset[]; > > } __attribute__((packed)) MultiFDPacket_t; > > > > -- > > 2.30.2 > > > > > > >
On Thu, Jan 04, 2024 at 12:44:35AM +0000, Hao Xiang wrote: > From: Juan Quintela <quintela@redhat.com> > > This implements the zero page dection and handling. > > Signed-off-by: Juan Quintela <quintela@redhat.com> > --- > migration/multifd.c | 41 +++++++++++++++++++++++++++++++++++++++-- > migration/multifd.h | 5 +++++ > 2 files changed, 44 insertions(+), 2 deletions(-) > > diff --git a/migration/multifd.c b/migration/multifd.c > index 5a1f50c7e8..756673029d 100644 > --- a/migration/multifd.c > +++ b/migration/multifd.c > @@ -11,6 +11,7 @@ > */ > > #include "qemu/osdep.h" > +#include "qemu/cutils.h" > #include "qemu/rcu.h" > #include "exec/target_page.h" > #include "sysemu/sysemu.h" > @@ -279,6 +280,12 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) > > packet->offset[i] = cpu_to_be64(temp); > } > + for (i = 0; i < p->zero_num; i++) { > + /* there are architectures where ram_addr_t is 32 bit */ > + uint64_t temp = p->zero[i]; > + > + packet->offset[p->normal_num + i] = cpu_to_be64(temp); > + } > } I think changes like this needs to be moved into the previous patch. I got quite confused when reading previous one and only understood what happens until now. Fabiano, if you're going to pick these ones out and post separately, please also consider. Perhaps squashing them together?
On Wed, Jan 31, 2024 at 9:22 PM Peter Xu <peterx@redhat.com> wrote: > > On Thu, Jan 04, 2024 at 12:44:35AM +0000, Hao Xiang wrote: > > From: Juan Quintela <quintela@redhat.com> > > > > This implements the zero page dection and handling. > > > > Signed-off-by: Juan Quintela <quintela@redhat.com> > > --- > > migration/multifd.c | 41 +++++++++++++++++++++++++++++++++++++++-- > > migration/multifd.h | 5 +++++ > > 2 files changed, 44 insertions(+), 2 deletions(-) > > > > diff --git a/migration/multifd.c b/migration/multifd.c > > index 5a1f50c7e8..756673029d 100644 > > --- a/migration/multifd.c > > +++ b/migration/multifd.c > > @@ -11,6 +11,7 @@ > > */ > > > > #include "qemu/osdep.h" > > +#include "qemu/cutils.h" > > #include "qemu/rcu.h" > > #include "exec/target_page.h" > > #include "sysemu/sysemu.h" > > @@ -279,6 +280,12 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) > > > > packet->offset[i] = cpu_to_be64(temp); > > } > > + for (i = 0; i < p->zero_num; i++) { > > + /* there are architectures where ram_addr_t is 32 bit */ > > + uint64_t temp = p->zero[i]; > > + > > + packet->offset[p->normal_num + i] = cpu_to_be64(temp); > > + } > > } > > I think changes like this needs to be moved into the previous patch. I got > quite confused when reading previous one and only understood what happens > until now. Fabiano, if you're going to pick these ones out and post > separately, please also consider. Perhaps squashing them together? > Discussed with Fabiano on a separate thread here https://lore.kernel.org/all/CAAYibXi=WB5wfvLFM0b=d9oJf66Lb7FTGoNzzZ-tvK4RbBXxDw@mail.gmail.com/ I am moving the original multifd zero page checking changes into a seperate patchset. There is some necessary refactoring work on the top of the original series. I will send that out this week. > -- > Peter Xu >
diff --git a/migration/multifd.c b/migration/multifd.c index 5a1f50c7e8..756673029d 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -11,6 +11,7 @@ */ #include "qemu/osdep.h" +#include "qemu/cutils.h" #include "qemu/rcu.h" #include "exec/target_page.h" #include "sysemu/sysemu.h" @@ -279,6 +280,12 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) packet->offset[i] = cpu_to_be64(temp); } + for (i = 0; i < p->zero_num; i++) { + /* there are architectures where ram_addr_t is 32 bit */ + uint64_t temp = p->zero[i]; + + packet->offset[p->normal_num + i] = cpu_to_be64(temp); + } } static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) @@ -361,6 +368,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) p->normal[i] = offset; } + for (i = 0; i < p->zero_num; i++) { + uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]); + + if (offset > (p->block->used_length - p->page_size)) { + error_setg(errp, "multifd: offset too long %" PRIu64 + " (max " RAM_ADDR_FMT ")", + offset, p->block->used_length); + return -1; + } + p->zero[i] = offset; + } + return 0; } @@ -664,6 +683,8 @@ static void *multifd_send_thread(void *opaque) MultiFDSendParams *p = opaque; MigrationThread *thread = NULL; Error *local_err = NULL; + /* qemu older than 8.2 don't understand zero page on multifd channel */ + bool use_zero_page = !migrate_use_main_zero_page(); int ret = 0; bool use_zero_copy_send = migrate_zero_copy_send(); @@ -689,6 +710,7 @@ static void *multifd_send_thread(void *opaque) qemu_mutex_lock(&p->mutex); if (p->pending_job) { + RAMBlock *rb = p->pages->block; uint64_t packet_num = p->packet_num; uint32_t flags; p->normal_num = 0; @@ -701,8 +723,16 @@ static void *multifd_send_thread(void *opaque) } for (int i = 0; i < p->pages->num; i++) { - p->normal[p->normal_num] = p->pages->offset[i]; - p->normal_num++; + uint64_t offset = p->pages->offset[i]; + if (use_zero_page && + buffer_is_zero(rb->host + offset, p->page_size)) { + p->zero[p->zero_num] = offset; + p->zero_num++; + ram_release_page(rb->idstr, offset); + } else { + p->normal[p->normal_num] = offset; + p->normal_num++; + } } if (p->normal_num) { @@ -1155,6 +1185,13 @@ static void *multifd_recv_thread(void *opaque) } } + for (int i = 0; i < p->zero_num; i++) { + void *page = p->host + p->zero[i]; + if (!buffer_is_zero(page, p->page_size)) { + memset(page, 0, p->page_size); + } + } + if (flags & MULTIFD_FLAG_SYNC) { qemu_sem_post(&multifd_recv_state->sem_sync); qemu_sem_wait(&p->sem_sync); diff --git a/migration/multifd.h b/migration/multifd.h index d587b0e19c..13762900d4 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -53,6 +53,11 @@ typedef struct { uint32_t unused32[1]; /* Reserved for future use */ uint64_t unused64[3]; /* Reserved for future use */ char ramblock[256]; + /* + * This array contains the pointers to: + * - normal pages (initial normal_pages entries) + * - zero pages (following zero_pages entries) + */ uint64_t offset[]; } __attribute__((packed)) MultiFDPacket_t;