Message ID | 20180207073331.14158-8-haozhong.zhang@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
* Haozhong Zhang (haozhong.zhang@intel.com) wrote: > When loading a compressed page to persistent memory, flush CPU cache > after the data is decompressed. Combined with a call to pmem_drain() > at the end of memory loading, we can guarantee those compressed pages > are persistently loaded to PMEM. Can you explain why this can use the flush and doesn't need the special memset? Dave > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com> > --- > include/qemu/pmem.h | 4 ++++ > migration/ram.c | 16 +++++++++++----- > 2 files changed, 15 insertions(+), 5 deletions(-) > > diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h > index 77ee1fc4eb..20e3f6e71d 100644 > --- a/include/qemu/pmem.h > +++ b/include/qemu/pmem.h > @@ -37,6 +37,10 @@ static inline void *pmem_memset_nodrain(void *pmemdest, int c, size_t len) > return memset(pmemdest, c, len); > } > > +static inline void pmem_flush(const void *addr, size_t len) > +{ > +} > + > static inline void pmem_drain(void) > { > } > diff --git a/migration/ram.c b/migration/ram.c > index 5a79bbff64..924d2b9537 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -274,6 +274,7 @@ struct DecompressParam { > void *des; > uint8_t *compbuf; > int len; > + bool is_pmem; > }; > typedef struct DecompressParam DecompressParam; > > @@ -2502,7 +2503,7 @@ static void *do_data_decompress(void *opaque) > DecompressParam *param = opaque; > unsigned long pagesize; > uint8_t *des; > - int len; > + int len, rc; > > qemu_mutex_lock(¶m->mutex); > while (!param->quit) { > @@ -2518,8 +2519,11 @@ static void *do_data_decompress(void *opaque) > * not a problem because the dirty page will be retransferred > * and uncompress() won't break the data in other pages. > */ > - uncompress((Bytef *)des, &pagesize, > - (const Bytef *)param->compbuf, len); > + rc = uncompress((Bytef *)des, &pagesize, > + (const Bytef *)param->compbuf, len); > + if (rc == Z_OK && param->is_pmem) { > + pmem_flush(des, len); > + } > > qemu_mutex_lock(&decomp_done_lock); > param->done = true; > @@ -2605,7 +2609,8 @@ static void compress_threads_load_cleanup(void) > } > > static void decompress_data_with_multi_threads(QEMUFile *f, > - void *host, int len) > + void *host, int len, > + bool is_pmem) > { > int idx, thread_count; > > @@ -2619,6 +2624,7 @@ static void decompress_data_with_multi_threads(QEMUFile *f, > qemu_get_buffer(f, decomp_param[idx].compbuf, len); > decomp_param[idx].des = host; > decomp_param[idx].len = len; > + decomp_param[idx].is_pmem = is_pmem; > qemu_cond_signal(&decomp_param[idx].cond); > qemu_mutex_unlock(&decomp_param[idx].mutex); > break; > @@ -2964,7 +2970,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) > ret = -EINVAL; > break; > } > - decompress_data_with_multi_threads(f, host, len); > + decompress_data_with_multi_threads(f, host, len, is_pmem); > break; > > case RAM_SAVE_FLAG_XBZRLE: > -- > 2.14.1 > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On 02/07/18 11:54 +0000, Dr. David Alan Gilbert wrote: > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: > > When loading a compressed page to persistent memory, flush CPU cache > > after the data is decompressed. Combined with a call to pmem_drain() > > at the end of memory loading, we can guarantee those compressed pages > > are persistently loaded to PMEM. > > Can you explain why this can use the flush and doesn't need the special > memset? The best approach to ensure the write persistence is to operate pmem all via libpmem, e.g., pmem_memcpy_nodrain() + pmem_drain(). However, the write to pmem in this case is performed by uncompress() which is implemented out of QEMU and libpmem. It may or may not use libpmem, which is not controlled by QEMU. Therefore, we have to use the less optimal approach, that is to flush cache for all pmem addresses that uncompress() may have written, i.e.,/e.g., memcpy() and/or memset() in uncompress(), and pmem_flush() + pmem_drain() in QEMU. Haozhong > > Dave > > > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com> > > --- > > include/qemu/pmem.h | 4 ++++ > > migration/ram.c | 16 +++++++++++----- > > 2 files changed, 15 insertions(+), 5 deletions(-) > > > > diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h > > index 77ee1fc4eb..20e3f6e71d 100644 > > --- a/include/qemu/pmem.h > > +++ b/include/qemu/pmem.h > > @@ -37,6 +37,10 @@ static inline void *pmem_memset_nodrain(void *pmemdest, int c, size_t len) > > return memset(pmemdest, c, len); > > } > > > > +static inline void pmem_flush(const void *addr, size_t len) > > +{ > > +} > > + > > static inline void pmem_drain(void) > > { > > } > > diff --git a/migration/ram.c b/migration/ram.c > > index 5a79bbff64..924d2b9537 100644 > > --- a/migration/ram.c > > +++ b/migration/ram.c > > @@ -274,6 +274,7 @@ struct DecompressParam { > > void *des; > > uint8_t *compbuf; > > int len; > > + bool is_pmem; > > }; > > typedef struct DecompressParam DecompressParam; > > > > @@ -2502,7 +2503,7 @@ static void *do_data_decompress(void *opaque) > > DecompressParam *param = opaque; > > unsigned long pagesize; > > uint8_t *des; > > - int len; > > + int len, rc; > > > > qemu_mutex_lock(¶m->mutex); > > while (!param->quit) { > > @@ -2518,8 +2519,11 @@ static void *do_data_decompress(void *opaque) > > * not a problem because the dirty page will be retransferred > > * and uncompress() won't break the data in other pages. > > */ > > - uncompress((Bytef *)des, &pagesize, > > - (const Bytef *)param->compbuf, len); > > + rc = uncompress((Bytef *)des, &pagesize, > > + (const Bytef *)param->compbuf, len); > > + if (rc == Z_OK && param->is_pmem) { > > + pmem_flush(des, len); > > + } > > > > qemu_mutex_lock(&decomp_done_lock); > > param->done = true; > > @@ -2605,7 +2609,8 @@ static void compress_threads_load_cleanup(void) > > } > > > > static void decompress_data_with_multi_threads(QEMUFile *f, > > - void *host, int len) > > + void *host, int len, > > + bool is_pmem) > > { > > int idx, thread_count; > > > > @@ -2619,6 +2624,7 @@ static void decompress_data_with_multi_threads(QEMUFile *f, > > qemu_get_buffer(f, decomp_param[idx].compbuf, len); > > decomp_param[idx].des = host; > > decomp_param[idx].len = len; > > + decomp_param[idx].is_pmem = is_pmem; > > qemu_cond_signal(&decomp_param[idx].cond); > > qemu_mutex_unlock(&decomp_param[idx].mutex); > > break; > > @@ -2964,7 +2970,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) > > ret = -EINVAL; > > break; > > } > > - decompress_data_with_multi_threads(f, host, len); > > + decompress_data_with_multi_threads(f, host, len, is_pmem); > > break; > > > > case RAM_SAVE_FLAG_XBZRLE: > > -- > > 2.14.1 > > > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
* Haozhong Zhang (haozhong.zhang@intel.com) wrote: > On 02/07/18 11:54 +0000, Dr. David Alan Gilbert wrote: > > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: > > > When loading a compressed page to persistent memory, flush CPU cache > > > after the data is decompressed. Combined with a call to pmem_drain() > > > at the end of memory loading, we can guarantee those compressed pages > > > are persistently loaded to PMEM. > > > > Can you explain why this can use the flush and doesn't need the special > > memset? > > The best approach to ensure the write persistence is to operate pmem > all via libpmem, e.g., pmem_memcpy_nodrain() + pmem_drain(). However, > the write to pmem in this case is performed by uncompress() which is > implemented out of QEMU and libpmem. It may or may not use libpmem, > which is not controlled by QEMU. Therefore, we have to use the less > optimal approach, that is to flush cache for all pmem addresses that > uncompress() may have written, i.e.,/e.g., memcpy() and/or memset() in > uncompress(), and pmem_flush() + pmem_drain() in QEMU. In what way is it less optimal? If that's a legal thing to do, then why not just do a pmem_flush + pmem_drain right at the end of the ram loading and leave all the rest of the code untouched? Dave > Haozhong > > > > > Dave > > > > > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com> > > > --- > > > include/qemu/pmem.h | 4 ++++ > > > migration/ram.c | 16 +++++++++++----- > > > 2 files changed, 15 insertions(+), 5 deletions(-) > > > > > > diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h > > > index 77ee1fc4eb..20e3f6e71d 100644 > > > --- a/include/qemu/pmem.h > > > +++ b/include/qemu/pmem.h > > > @@ -37,6 +37,10 @@ static inline void *pmem_memset_nodrain(void *pmemdest, int c, size_t len) > > > return memset(pmemdest, c, len); > > > } > > > > > > +static inline void pmem_flush(const void *addr, size_t len) > > > +{ > > > +} > > > + > > > static inline void pmem_drain(void) > > > { > > > } > > > diff --git a/migration/ram.c b/migration/ram.c > > > index 5a79bbff64..924d2b9537 100644 > > > --- a/migration/ram.c > > > +++ b/migration/ram.c > > > @@ -274,6 +274,7 @@ struct DecompressParam { > > > void *des; > > > uint8_t *compbuf; > > > int len; > > > + bool is_pmem; > > > }; > > > typedef struct DecompressParam DecompressParam; > > > > > > @@ -2502,7 +2503,7 @@ static void *do_data_decompress(void *opaque) > > > DecompressParam *param = opaque; > > > unsigned long pagesize; > > > uint8_t *des; > > > - int len; > > > + int len, rc; > > > > > > qemu_mutex_lock(¶m->mutex); > > > while (!param->quit) { > > > @@ -2518,8 +2519,11 @@ static void *do_data_decompress(void *opaque) > > > * not a problem because the dirty page will be retransferred > > > * and uncompress() won't break the data in other pages. > > > */ > > > - uncompress((Bytef *)des, &pagesize, > > > - (const Bytef *)param->compbuf, len); > > > + rc = uncompress((Bytef *)des, &pagesize, > > > + (const Bytef *)param->compbuf, len); > > > + if (rc == Z_OK && param->is_pmem) { > > > + pmem_flush(des, len); > > > + } > > > > > > qemu_mutex_lock(&decomp_done_lock); > > > param->done = true; > > > @@ -2605,7 +2609,8 @@ static void compress_threads_load_cleanup(void) > > > } > > > > > > static void decompress_data_with_multi_threads(QEMUFile *f, > > > - void *host, int len) > > > + void *host, int len, > > > + bool is_pmem) > > > { > > > int idx, thread_count; > > > > > > @@ -2619,6 +2624,7 @@ static void decompress_data_with_multi_threads(QEMUFile *f, > > > qemu_get_buffer(f, decomp_param[idx].compbuf, len); > > > decomp_param[idx].des = host; > > > decomp_param[idx].len = len; > > > + decomp_param[idx].is_pmem = is_pmem; > > > qemu_cond_signal(&decomp_param[idx].cond); > > > qemu_mutex_unlock(&decomp_param[idx].mutex); > > > break; > > > @@ -2964,7 +2970,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) > > > ret = -EINVAL; > > > break; > > > } > > > - decompress_data_with_multi_threads(f, host, len); > > > + decompress_data_with_multi_threads(f, host, len, is_pmem); > > > break; > > > > > > case RAM_SAVE_FLAG_XBZRLE: > > > -- > > > 2.14.1 > > > > > -- > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On 02/07/18 13:03 +0000, Dr. David Alan Gilbert wrote: > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: > > On 02/07/18 11:54 +0000, Dr. David Alan Gilbert wrote: > > > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: > > > > When loading a compressed page to persistent memory, flush CPU cache > > > > after the data is decompressed. Combined with a call to pmem_drain() > > > > at the end of memory loading, we can guarantee those compressed pages > > > > are persistently loaded to PMEM. > > > > > > Can you explain why this can use the flush and doesn't need the special > > > memset? > > > > The best approach to ensure the write persistence is to operate pmem > > all via libpmem, e.g., pmem_memcpy_nodrain() + pmem_drain(). However, > > the write to pmem in this case is performed by uncompress() which is > > implemented out of QEMU and libpmem. It may or may not use libpmem, > > which is not controlled by QEMU. Therefore, we have to use the less > > optimal approach, that is to flush cache for all pmem addresses that > > uncompress() may have written, i.e.,/e.g., memcpy() and/or memset() in > > uncompress(), and pmem_flush() + pmem_drain() in QEMU. > > In what way is it less optimal? > If that's a legal thing to do, then why not just do a pmem_flush + > pmem_drain right at the end of the ram loading and leave all the rest of > the code untouched? For example, the implementation pmem_memcpy_nodrain() prefers to use movnt instructions w/o flush to write pmem if those instructions are available, and falls back to memcpy() + flush if movnt are not available, so I suppose the latter is less optimal. Haozhong > > Dave > > > Haozhong > > > > > > > > Dave > > > > > > > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com> > > > > --- > > > > include/qemu/pmem.h | 4 ++++ > > > > migration/ram.c | 16 +++++++++++----- > > > > 2 files changed, 15 insertions(+), 5 deletions(-) > > > > > > > > diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h > > > > index 77ee1fc4eb..20e3f6e71d 100644 > > > > --- a/include/qemu/pmem.h > > > > +++ b/include/qemu/pmem.h > > > > @@ -37,6 +37,10 @@ static inline void *pmem_memset_nodrain(void *pmemdest, int c, size_t len) > > > > return memset(pmemdest, c, len); > > > > } > > > > > > > > +static inline void pmem_flush(const void *addr, size_t len) > > > > +{ > > > > +} > > > > + > > > > static inline void pmem_drain(void) > > > > { > > > > } > > > > diff --git a/migration/ram.c b/migration/ram.c > > > > index 5a79bbff64..924d2b9537 100644 > > > > --- a/migration/ram.c > > > > +++ b/migration/ram.c > > > > @@ -274,6 +274,7 @@ struct DecompressParam { > > > > void *des; > > > > uint8_t *compbuf; > > > > int len; > > > > + bool is_pmem; > > > > }; > > > > typedef struct DecompressParam DecompressParam; > > > > > > > > @@ -2502,7 +2503,7 @@ static void *do_data_decompress(void *opaque) > > > > DecompressParam *param = opaque; > > > > unsigned long pagesize; > > > > uint8_t *des; > > > > - int len; > > > > + int len, rc; > > > > > > > > qemu_mutex_lock(¶m->mutex); > > > > while (!param->quit) { > > > > @@ -2518,8 +2519,11 @@ static void *do_data_decompress(void *opaque) > > > > * not a problem because the dirty page will be retransferred > > > > * and uncompress() won't break the data in other pages. > > > > */ > > > > - uncompress((Bytef *)des, &pagesize, > > > > - (const Bytef *)param->compbuf, len); > > > > + rc = uncompress((Bytef *)des, &pagesize, > > > > + (const Bytef *)param->compbuf, len); > > > > + if (rc == Z_OK && param->is_pmem) { > > > > + pmem_flush(des, len); > > > > + } > > > > > > > > qemu_mutex_lock(&decomp_done_lock); > > > > param->done = true; > > > > @@ -2605,7 +2609,8 @@ static void compress_threads_load_cleanup(void) > > > > } > > > > > > > > static void decompress_data_with_multi_threads(QEMUFile *f, > > > > - void *host, int len) > > > > + void *host, int len, > > > > + bool is_pmem) > > > > { > > > > int idx, thread_count; > > > > > > > > @@ -2619,6 +2624,7 @@ static void decompress_data_with_multi_threads(QEMUFile *f, > > > > qemu_get_buffer(f, decomp_param[idx].compbuf, len); > > > > decomp_param[idx].des = host; > > > > decomp_param[idx].len = len; > > > > + decomp_param[idx].is_pmem = is_pmem; > > > > qemu_cond_signal(&decomp_param[idx].cond); > > > > qemu_mutex_unlock(&decomp_param[idx].mutex); > > > > break; > > > > @@ -2964,7 +2970,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) > > > > ret = -EINVAL; > > > > break; > > > > } > > > > - decompress_data_with_multi_threads(f, host, len); > > > > + decompress_data_with_multi_threads(f, host, len, is_pmem); > > > > break; > > > > > > > > case RAM_SAVE_FLAG_XBZRLE: > > > > -- > > > > 2.14.1 > > > > > > > -- > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK >
* Haozhong Zhang (haozhong.zhang@intel.com) wrote: > On 02/07/18 13:03 +0000, Dr. David Alan Gilbert wrote: > > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: > > > On 02/07/18 11:54 +0000, Dr. David Alan Gilbert wrote: > > > > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: > > > > > When loading a compressed page to persistent memory, flush CPU cache > > > > > after the data is decompressed. Combined with a call to pmem_drain() > > > > > at the end of memory loading, we can guarantee those compressed pages > > > > > are persistently loaded to PMEM. > > > > > > > > Can you explain why this can use the flush and doesn't need the special > > > > memset? > > > > > > The best approach to ensure the write persistence is to operate pmem > > > all via libpmem, e.g., pmem_memcpy_nodrain() + pmem_drain(). However, > > > the write to pmem in this case is performed by uncompress() which is > > > implemented out of QEMU and libpmem. It may or may not use libpmem, > > > which is not controlled by QEMU. Therefore, we have to use the less > > > optimal approach, that is to flush cache for all pmem addresses that > > > uncompress() may have written, i.e.,/e.g., memcpy() and/or memset() in > > > uncompress(), and pmem_flush() + pmem_drain() in QEMU. > > > > In what way is it less optimal? > > If that's a legal thing to do, then why not just do a pmem_flush + > > pmem_drain right at the end of the ram loading and leave all the rest of > > the code untouched? > > For example, the implementation pmem_memcpy_nodrain() prefers to use > movnt instructions w/o flush to write pmem if those instructions are > available, and falls back to memcpy() + flush if movnt are not > available, so I suppose the latter is less optimal. But if you use normal memcpy calls to copy a few GB of RAM in an incoming migrate and then do a single flush at the end, isn't that better? Dave > Haozhong > > > > > Dave > > > > > Haozhong > > > > > > > > > > > Dave > > > > > > > > > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com> > > > > > --- > > > > > include/qemu/pmem.h | 4 ++++ > > > > > migration/ram.c | 16 +++++++++++----- > > > > > 2 files changed, 15 insertions(+), 5 deletions(-) > > > > > > > > > > diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h > > > > > index 77ee1fc4eb..20e3f6e71d 100644 > > > > > --- a/include/qemu/pmem.h > > > > > +++ b/include/qemu/pmem.h > > > > > @@ -37,6 +37,10 @@ static inline void *pmem_memset_nodrain(void *pmemdest, int c, size_t len) > > > > > return memset(pmemdest, c, len); > > > > > } > > > > > > > > > > +static inline void pmem_flush(const void *addr, size_t len) > > > > > +{ > > > > > +} > > > > > + > > > > > static inline void pmem_drain(void) > > > > > { > > > > > } > > > > > diff --git a/migration/ram.c b/migration/ram.c > > > > > index 5a79bbff64..924d2b9537 100644 > > > > > --- a/migration/ram.c > > > > > +++ b/migration/ram.c > > > > > @@ -274,6 +274,7 @@ struct DecompressParam { > > > > > void *des; > > > > > uint8_t *compbuf; > > > > > int len; > > > > > + bool is_pmem; > > > > > }; > > > > > typedef struct DecompressParam DecompressParam; > > > > > > > > > > @@ -2502,7 +2503,7 @@ static void *do_data_decompress(void *opaque) > > > > > DecompressParam *param = opaque; > > > > > unsigned long pagesize; > > > > > uint8_t *des; > > > > > - int len; > > > > > + int len, rc; > > > > > > > > > > qemu_mutex_lock(¶m->mutex); > > > > > while (!param->quit) { > > > > > @@ -2518,8 +2519,11 @@ static void *do_data_decompress(void *opaque) > > > > > * not a problem because the dirty page will be retransferred > > > > > * and uncompress() won't break the data in other pages. > > > > > */ > > > > > - uncompress((Bytef *)des, &pagesize, > > > > > - (const Bytef *)param->compbuf, len); > > > > > + rc = uncompress((Bytef *)des, &pagesize, > > > > > + (const Bytef *)param->compbuf, len); > > > > > + if (rc == Z_OK && param->is_pmem) { > > > > > + pmem_flush(des, len); > > > > > + } > > > > > > > > > > qemu_mutex_lock(&decomp_done_lock); > > > > > param->done = true; > > > > > @@ -2605,7 +2609,8 @@ static void compress_threads_load_cleanup(void) > > > > > } > > > > > > > > > > static void decompress_data_with_multi_threads(QEMUFile *f, > > > > > - void *host, int len) > > > > > + void *host, int len, > > > > > + bool is_pmem) > > > > > { > > > > > int idx, thread_count; > > > > > > > > > > @@ -2619,6 +2624,7 @@ static void decompress_data_with_multi_threads(QEMUFile *f, > > > > > qemu_get_buffer(f, decomp_param[idx].compbuf, len); > > > > > decomp_param[idx].des = host; > > > > > decomp_param[idx].len = len; > > > > > + decomp_param[idx].is_pmem = is_pmem; > > > > > qemu_cond_signal(&decomp_param[idx].cond); > > > > > qemu_mutex_unlock(&decomp_param[idx].mutex); > > > > > break; > > > > > @@ -2964,7 +2970,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) > > > > > ret = -EINVAL; > > > > > break; > > > > > } > > > > > - decompress_data_with_multi_threads(f, host, len); > > > > > + decompress_data_with_multi_threads(f, host, len, is_pmem); > > > > > break; > > > > > > > > > > case RAM_SAVE_FLAG_XBZRLE: > > > > > -- > > > > > 2.14.1 > > > > > > > > > -- > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > -- > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Wed, Feb 7, 2018 at 5:24 AM, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote: > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: >> On 02/07/18 13:03 +0000, Dr. David Alan Gilbert wrote: >> > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: >> > > On 02/07/18 11:54 +0000, Dr. David Alan Gilbert wrote: >> > > > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: >> > > > > When loading a compressed page to persistent memory, flush CPU cache >> > > > > after the data is decompressed. Combined with a call to pmem_drain() >> > > > > at the end of memory loading, we can guarantee those compressed pages >> > > > > are persistently loaded to PMEM. >> > > > >> > > > Can you explain why this can use the flush and doesn't need the special >> > > > memset? >> > > >> > > The best approach to ensure the write persistence is to operate pmem >> > > all via libpmem, e.g., pmem_memcpy_nodrain() + pmem_drain(). However, >> > > the write to pmem in this case is performed by uncompress() which is >> > > implemented out of QEMU and libpmem. It may or may not use libpmem, >> > > which is not controlled by QEMU. Therefore, we have to use the less >> > > optimal approach, that is to flush cache for all pmem addresses that >> > > uncompress() may have written, i.e.,/e.g., memcpy() and/or memset() in >> > > uncompress(), and pmem_flush() + pmem_drain() in QEMU. >> > >> > In what way is it less optimal? >> > If that's a legal thing to do, then why not just do a pmem_flush + >> > pmem_drain right at the end of the ram loading and leave all the rest of >> > the code untouched? >> >> For example, the implementation pmem_memcpy_nodrain() prefers to use >> movnt instructions w/o flush to write pmem if those instructions are >> available, and falls back to memcpy() + flush if movnt are not >> available, so I suppose the latter is less optimal. > > But if you use normal memcpy calls to copy a few GB of RAM in an > incoming migrate and then do a single flush at the end, isn't that > better? Not really, because now you've needlessly polluted the cache and are spending CPU looping over the cachelines that could have been bypassed with movnt.
* Dan Williams (dan.j.williams@intel.com) wrote: > On Wed, Feb 7, 2018 at 5:24 AM, Dr. David Alan Gilbert > <dgilbert@redhat.com> wrote: > > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: > >> On 02/07/18 13:03 +0000, Dr. David Alan Gilbert wrote: > >> > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: > >> > > On 02/07/18 11:54 +0000, Dr. David Alan Gilbert wrote: > >> > > > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: > >> > > > > When loading a compressed page to persistent memory, flush CPU cache > >> > > > > after the data is decompressed. Combined with a call to pmem_drain() > >> > > > > at the end of memory loading, we can guarantee those compressed pages > >> > > > > are persistently loaded to PMEM. > >> > > > > >> > > > Can you explain why this can use the flush and doesn't need the special > >> > > > memset? > >> > > > >> > > The best approach to ensure the write persistence is to operate pmem > >> > > all via libpmem, e.g., pmem_memcpy_nodrain() + pmem_drain(). However, > >> > > the write to pmem in this case is performed by uncompress() which is > >> > > implemented out of QEMU and libpmem. It may or may not use libpmem, > >> > > which is not controlled by QEMU. Therefore, we have to use the less > >> > > optimal approach, that is to flush cache for all pmem addresses that > >> > > uncompress() may have written, i.e.,/e.g., memcpy() and/or memset() in > >> > > uncompress(), and pmem_flush() + pmem_drain() in QEMU. > >> > > >> > In what way is it less optimal? > >> > If that's a legal thing to do, then why not just do a pmem_flush + > >> > pmem_drain right at the end of the ram loading and leave all the rest of > >> > the code untouched? > >> > >> For example, the implementation pmem_memcpy_nodrain() prefers to use > >> movnt instructions w/o flush to write pmem if those instructions are > >> available, and falls back to memcpy() + flush if movnt are not > >> available, so I suppose the latter is less optimal. > > > > But if you use normal memcpy calls to copy a few GB of RAM in an > > incoming migrate and then do a single flush at the end, isn't that > > better? > > Not really, because now you've needlessly polluted the cache and are > spending CPU looping over the cachelines that could have been bypassed > with movnt. What's different in the pmem case? Isn't what you've said true in the normal migrate case as well? Dave -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Wed, Feb 7, 2018 at 10:08 AM, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote: > * Dan Williams (dan.j.williams@intel.com) wrote: >> On Wed, Feb 7, 2018 at 5:24 AM, Dr. David Alan Gilbert >> <dgilbert@redhat.com> wrote: >> > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: >> >> On 02/07/18 13:03 +0000, Dr. David Alan Gilbert wrote: >> >> > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: >> >> > > On 02/07/18 11:54 +0000, Dr. David Alan Gilbert wrote: >> >> > > > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: >> >> > > > > When loading a compressed page to persistent memory, flush CPU cache >> >> > > > > after the data is decompressed. Combined with a call to pmem_drain() >> >> > > > > at the end of memory loading, we can guarantee those compressed pages >> >> > > > > are persistently loaded to PMEM. >> >> > > > >> >> > > > Can you explain why this can use the flush and doesn't need the special >> >> > > > memset? >> >> > > >> >> > > The best approach to ensure the write persistence is to operate pmem >> >> > > all via libpmem, e.g., pmem_memcpy_nodrain() + pmem_drain(). However, >> >> > > the write to pmem in this case is performed by uncompress() which is >> >> > > implemented out of QEMU and libpmem. It may or may not use libpmem, >> >> > > which is not controlled by QEMU. Therefore, we have to use the less >> >> > > optimal approach, that is to flush cache for all pmem addresses that >> >> > > uncompress() may have written, i.e.,/e.g., memcpy() and/or memset() in >> >> > > uncompress(), and pmem_flush() + pmem_drain() in QEMU. >> >> > >> >> > In what way is it less optimal? >> >> > If that's a legal thing to do, then why not just do a pmem_flush + >> >> > pmem_drain right at the end of the ram loading and leave all the rest of >> >> > the code untouched? >> >> >> >> For example, the implementation pmem_memcpy_nodrain() prefers to use >> >> movnt instructions w/o flush to write pmem if those instructions are >> >> available, and falls back to memcpy() + flush if movnt are not >> >> available, so I suppose the latter is less optimal. >> > >> > But if you use normal memcpy calls to copy a few GB of RAM in an >> > incoming migrate and then do a single flush at the end, isn't that >> > better? >> >> Not really, because now you've needlessly polluted the cache and are >> spending CPU looping over the cachelines that could have been bypassed >> with movnt. > > What's different in the pmem case? Isn't what you've said true in the > normal migrate case as well? > In the normal migrate case the memory is volatile so once the copy is globally visiable you're done. In the pmem case the migration is not complete until the persistent state is synchronized. Note, I'm talking in generalities because I don't know the deeper details of the migrate flow.
* Dan Williams (dan.j.williams@intel.com) wrote: > On Wed, Feb 7, 2018 at 10:08 AM, Dr. David Alan Gilbert > <dgilbert@redhat.com> wrote: > > * Dan Williams (dan.j.williams@intel.com) wrote: > >> On Wed, Feb 7, 2018 at 5:24 AM, Dr. David Alan Gilbert > >> <dgilbert@redhat.com> wrote: > >> > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: > >> >> On 02/07/18 13:03 +0000, Dr. David Alan Gilbert wrote: > >> >> > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: > >> >> > > On 02/07/18 11:54 +0000, Dr. David Alan Gilbert wrote: > >> >> > > > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: > >> >> > > > > When loading a compressed page to persistent memory, flush CPU cache > >> >> > > > > after the data is decompressed. Combined with a call to pmem_drain() > >> >> > > > > at the end of memory loading, we can guarantee those compressed pages > >> >> > > > > are persistently loaded to PMEM. > >> >> > > > > >> >> > > > Can you explain why this can use the flush and doesn't need the special > >> >> > > > memset? > >> >> > > > >> >> > > The best approach to ensure the write persistence is to operate pmem > >> >> > > all via libpmem, e.g., pmem_memcpy_nodrain() + pmem_drain(). However, > >> >> > > the write to pmem in this case is performed by uncompress() which is > >> >> > > implemented out of QEMU and libpmem. It may or may not use libpmem, > >> >> > > which is not controlled by QEMU. Therefore, we have to use the less > >> >> > > optimal approach, that is to flush cache for all pmem addresses that > >> >> > > uncompress() may have written, i.e.,/e.g., memcpy() and/or memset() in > >> >> > > uncompress(), and pmem_flush() + pmem_drain() in QEMU. > >> >> > > >> >> > In what way is it less optimal? > >> >> > If that's a legal thing to do, then why not just do a pmem_flush + > >> >> > pmem_drain right at the end of the ram loading and leave all the rest of > >> >> > the code untouched? > >> >> > >> >> For example, the implementation pmem_memcpy_nodrain() prefers to use > >> >> movnt instructions w/o flush to write pmem if those instructions are > >> >> available, and falls back to memcpy() + flush if movnt are not > >> >> available, so I suppose the latter is less optimal. > >> > > >> > But if you use normal memcpy calls to copy a few GB of RAM in an > >> > incoming migrate and then do a single flush at the end, isn't that > >> > better? > >> > >> Not really, because now you've needlessly polluted the cache and are > >> spending CPU looping over the cachelines that could have been bypassed > >> with movnt. > > > > What's different in the pmem case? Isn't what you've said true in the > > normal migrate case as well? > > > > In the normal migrate case the memory is volatile so once the copy is > globally visiable you're done. In the pmem case the migration is not > complete until the persistent state is synchronized. > > Note, I'm talking in generalities because I don't know the deeper > details of the migrate flow. On the receive side of a migrate, during a normal precopy migrate (without xbzrle) nothing is going to be reading that RAM until the whole RAM load has completed anyway - so we're not benefiting from it being in the caches either. In the pmem case, again since nothing is going to be reading from that RAM until the end anyway, why bother flushing as we go as opposed to at the end? Note, I'm also talking generalities because I don't know the deeper details of the pmem flow. Hmm. Dave -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Wed, Feb 7, 2018 at 10:37 AM, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote: > * Dan Williams (dan.j.williams@intel.com) wrote: >> On Wed, Feb 7, 2018 at 10:08 AM, Dr. David Alan Gilbert >> <dgilbert@redhat.com> wrote: >> > * Dan Williams (dan.j.williams@intel.com) wrote: >> >> On Wed, Feb 7, 2018 at 5:24 AM, Dr. David Alan Gilbert >> >> <dgilbert@redhat.com> wrote: >> >> > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: >> >> >> On 02/07/18 13:03 +0000, Dr. David Alan Gilbert wrote: >> >> >> > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: >> >> >> > > On 02/07/18 11:54 +0000, Dr. David Alan Gilbert wrote: >> >> >> > > > * Haozhong Zhang (haozhong.zhang@intel.com) wrote: >> >> >> > > > > When loading a compressed page to persistent memory, flush CPU cache >> >> >> > > > > after the data is decompressed. Combined with a call to pmem_drain() >> >> >> > > > > at the end of memory loading, we can guarantee those compressed pages >> >> >> > > > > are persistently loaded to PMEM. >> >> >> > > > >> >> >> > > > Can you explain why this can use the flush and doesn't need the special >> >> >> > > > memset? >> >> >> > > >> >> >> > > The best approach to ensure the write persistence is to operate pmem >> >> >> > > all via libpmem, e.g., pmem_memcpy_nodrain() + pmem_drain(). However, >> >> >> > > the write to pmem in this case is performed by uncompress() which is >> >> >> > > implemented out of QEMU and libpmem. It may or may not use libpmem, >> >> >> > > which is not controlled by QEMU. Therefore, we have to use the less >> >> >> > > optimal approach, that is to flush cache for all pmem addresses that >> >> >> > > uncompress() may have written, i.e.,/e.g., memcpy() and/or memset() in >> >> >> > > uncompress(), and pmem_flush() + pmem_drain() in QEMU. >> >> >> > >> >> >> > In what way is it less optimal? >> >> >> > If that's a legal thing to do, then why not just do a pmem_flush + >> >> >> > pmem_drain right at the end of the ram loading and leave all the rest of >> >> >> > the code untouched? >> >> >> >> >> >> For example, the implementation pmem_memcpy_nodrain() prefers to use >> >> >> movnt instructions w/o flush to write pmem if those instructions are >> >> >> available, and falls back to memcpy() + flush if movnt are not >> >> >> available, so I suppose the latter is less optimal. >> >> > >> >> > But if you use normal memcpy calls to copy a few GB of RAM in an >> >> > incoming migrate and then do a single flush at the end, isn't that >> >> > better? >> >> >> >> Not really, because now you've needlessly polluted the cache and are >> >> spending CPU looping over the cachelines that could have been bypassed >> >> with movnt. >> > >> > What's different in the pmem case? Isn't what you've said true in the >> > normal migrate case as well? >> > >> >> In the normal migrate case the memory is volatile so once the copy is >> globally visiable you're done. In the pmem case the migration is not >> complete until the persistent state is synchronized. >> >> Note, I'm talking in generalities because I don't know the deeper >> details of the migrate flow. > > On the receive side of a migrate, during a normal precopy migrate > (without xbzrle) nothing is going to be reading that RAM until > the whole RAM load has completed anyway - so we're not benefiting from > it being in the caches either. > > In the pmem case, again since nothing is going to be reading from that > RAM until the end anyway, why bother flushing as we go as opposed to at > the end? Flushing at the end implies doing a large loop flushing the caches at the end of the transfer because the x86 ISA only exposes a line-by-line flush to unprivileged code rather than a full cache flush like what the kernel can do with wbinvd. So, better to flush as we go rather than incur the overhead of the loop at the end. I.e. I'm assuming it is more efficient to do 'movnt' in the first instance and not worry about the flush loop.
diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h index 77ee1fc4eb..20e3f6e71d 100644 --- a/include/qemu/pmem.h +++ b/include/qemu/pmem.h @@ -37,6 +37,10 @@ static inline void *pmem_memset_nodrain(void *pmemdest, int c, size_t len) return memset(pmemdest, c, len); } +static inline void pmem_flush(const void *addr, size_t len) +{ +} + static inline void pmem_drain(void) { } diff --git a/migration/ram.c b/migration/ram.c index 5a79bbff64..924d2b9537 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -274,6 +274,7 @@ struct DecompressParam { void *des; uint8_t *compbuf; int len; + bool is_pmem; }; typedef struct DecompressParam DecompressParam; @@ -2502,7 +2503,7 @@ static void *do_data_decompress(void *opaque) DecompressParam *param = opaque; unsigned long pagesize; uint8_t *des; - int len; + int len, rc; qemu_mutex_lock(¶m->mutex); while (!param->quit) { @@ -2518,8 +2519,11 @@ static void *do_data_decompress(void *opaque) * not a problem because the dirty page will be retransferred * and uncompress() won't break the data in other pages. */ - uncompress((Bytef *)des, &pagesize, - (const Bytef *)param->compbuf, len); + rc = uncompress((Bytef *)des, &pagesize, + (const Bytef *)param->compbuf, len); + if (rc == Z_OK && param->is_pmem) { + pmem_flush(des, len); + } qemu_mutex_lock(&decomp_done_lock); param->done = true; @@ -2605,7 +2609,8 @@ static void compress_threads_load_cleanup(void) } static void decompress_data_with_multi_threads(QEMUFile *f, - void *host, int len) + void *host, int len, + bool is_pmem) { int idx, thread_count; @@ -2619,6 +2624,7 @@ static void decompress_data_with_multi_threads(QEMUFile *f, qemu_get_buffer(f, decomp_param[idx].compbuf, len); decomp_param[idx].des = host; decomp_param[idx].len = len; + decomp_param[idx].is_pmem = is_pmem; qemu_cond_signal(&decomp_param[idx].cond); qemu_mutex_unlock(&decomp_param[idx].mutex); break; @@ -2964,7 +2970,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) ret = -EINVAL; break; } - decompress_data_with_multi_threads(f, host, len); + decompress_data_with_multi_threads(f, host, len, is_pmem); break; case RAM_SAVE_FLAG_XBZRLE:
When loading a compressed page to persistent memory, flush CPU cache after the data is decompressed. Combined with a call to pmem_drain() at the end of memory loading, we can guarantee those compressed pages are persistently loaded to PMEM. Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com> --- include/qemu/pmem.h | 4 ++++ migration/ram.c | 16 +++++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-)