Message ID | deaf1c683fd32c28a72853710b9564dedc7d0060.1313076455.git.udeshpan@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
> diff --git a/buffered_file.c b/buffered_file.c > index b64ada7..5735e18 100644 > --- a/buffered_file.c > +++ b/buffered_file.c > @@ -243,7 +243,9 @@ static void *migrate_vm(void *opaque) > > while (!s->closed) { > if (s->freeze_output) { > + qemu_mutex_unlock_iothread(); > s->wait_for_unfreeze(s); > + qemu_mutex_lock_iothread(); > s->freeze_output = 0; > continue; > } > @@ -255,7 +257,9 @@ static void *migrate_vm(void *opaque) > current_time = qemu_get_clock_ms(rt_clock); > if (!s->closed&& (expire_time> current_time)) { > tv.tv_usec = 1000 * (expire_time - current_time); > + qemu_mutex_unlock_iothread(); > select(0, NULL, NULL, NULL,&tv); > + qemu_mutex_lock_iothread(); > continue; > } > I believe these should be already in patch 1 or anyway in a separate patch. > diff --git a/cpus.c b/cpus.c > index de70e02..6090c44 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -666,6 +666,7 @@ int qemu_init_main_loop(void) > qemu_cond_init(&qemu_work_cond); > qemu_mutex_init(&qemu_fair_mutex); > qemu_mutex_init(&qemu_global_mutex); > + qemu_mutex_init(&ram_list.mutex); > qemu_mutex_lock(&qemu_global_mutex); > > qemu_thread_get_self(&io_thread); These are only executed if CONFIG_IOTHREAD; you can probably move it somewhere in exec.c. > @@ -919,6 +920,17 @@ void qemu_mutex_unlock_iothread(void) > qemu_mutex_unlock(&qemu_global_mutex); > } > > +void qemu_mutex_lock_ramlist(void) > +{ > + qemu_mutex_lock(&ram_list.mutex); > +} > + > +void qemu_mutex_unlock_ramlist(void) > +{ > + qemu_mutex_unlock(&ram_list.mutex); > +} > + > + And these too. > static int all_vcpus_paused(void) > { > CPUState *penv = first_cpu; > diff --git a/exec.c b/exec.c > index 0e2ce57..7bfb36f 100644 > --- a/exec.c > +++ b/exec.c > @@ -2972,6 +2972,8 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, > } > new_block->length = size; > > + qemu_mutex_lock_ramlist(); > + > QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); > > ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty, > @@ -2979,6 +2981,8 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, > memset(ram_list.phys_dirty + (new_block->offset>> TARGET_PAGE_BITS), > 0xff, size>> TARGET_PAGE_BITS); > > + qemu_mutex_unlock_ramlist(); > + > if (kvm_enabled()) > kvm_setup_guest_memory(new_block->host, size); > > @@ -2996,7 +3000,9 @@ void qemu_ram_free_from_ptr(ram_addr_t addr) > > QLIST_FOREACH(block,&ram_list.blocks, next) { > if (addr == block->offset) { > + qemu_mutex_lock_ramlist(); > QLIST_REMOVE(block, next); > + qemu_mutex_unlock_ramlist(); > qemu_free(block); > return; > } > @@ -3009,7 +3015,9 @@ void qemu_ram_free(ram_addr_t addr) > > QLIST_FOREACH(block,&ram_list.blocks, next) { > if (addr == block->offset) { > + qemu_mutex_lock_ramlist(); > QLIST_REMOVE(block, next); > + qemu_mutex_unlock_ramlist(); > if (block->flags& RAM_PREALLOC_MASK) { > ; > } else if (mem_path) { > @@ -3117,8 +3125,10 @@ void *qemu_get_ram_ptr(ram_addr_t addr) > if (addr - block->offset< block->length) { > /* Move this entry to to start of the list. */ > if (block != QLIST_FIRST(&ram_list.blocks)) { > + qemu_mutex_lock_ramlist(); > QLIST_REMOVE(block, next); > QLIST_INSERT_HEAD(&ram_list.blocks, block, next); > + qemu_mutex_unlock_ramlist(); Theoretically qemu_get_ram_ptr should be protected. The problem is not just accessing the ramlist, it is accessing the data underneath it before anyone frees it. Luckily we can set aside that problem for now, because qemu_ram_free_from_ptr is only used by device assignment and device assignment makes VMs unmigratable. If we support generic RAM hotplug/hotunplug, we would probably need something RCU-like to protect accesses, since protecting all uses of qemu_get_ram_ptr is not practical. > } > if (xen_mapcache_enabled()) { > /* We need to check if the requested address is in the RAM > diff --git a/qemu-common.h b/qemu-common.h > index abd7a75..b802883 100644 > --- a/qemu-common.h > +++ b/qemu-common.h > @@ -212,6 +212,8 @@ char *qemu_strndup(const char *str, size_t size); > > void qemu_mutex_lock_iothread(void); > void qemu_mutex_unlock_iothread(void); > +void qemu_mutex_lock_ramlist(void); > +void qemu_mutex_unlock_ramlist(void); > > int qemu_open(const char *name, int flags, ...); > ssize_t qemu_write_full(int fd, const void *buf, size_t count) Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 08/11/2011 06:20 PM, Paolo Bonzini wrote: >> >> + qemu_mutex_lock_ramlist(); >> QLIST_REMOVE(block, next); >> QLIST_INSERT_HEAD(&ram_list.blocks, block, next); >> + qemu_mutex_unlock_ramlist(); > > Theoretically qemu_get_ram_ptr should be protected. The problem is not > just accessing the ramlist, it is accessing the data underneath it > before anyone frees it. Luckily we can set aside that problem for now, > because qemu_ram_free_from_ptr is only used by device assignment and > device assignment makes VMs unmigratable. Hmm, rethinking about it, all the loops in exec.c should be protected from the mutex. That's not too good because qemu_get_ram_ptr is a hot path for TCG. Perhaps you can also avoid the mutex entirely, and just disable the above optimization for most-recently-used-block while migration is running. It's not a complete solution, but it could be good enough until we have RAM hot-plug/hot-unplug. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 08/12/2011 02:45 AM, Paolo Bonzini wrote: > On 08/11/2011 06:20 PM, Paolo Bonzini wrote: >>> >>> + qemu_mutex_lock_ramlist(); >>> QLIST_REMOVE(block, next); >>> QLIST_INSERT_HEAD(&ram_list.blocks, block, next); >>> + qemu_mutex_unlock_ramlist(); >> >> Theoretically qemu_get_ram_ptr should be protected. The problem is not >> just accessing the ramlist, it is accessing the data underneath it >> before anyone frees it. Luckily we can set aside that problem for now, >> because qemu_ram_free_from_ptr is only used by device assignment and >> device assignment makes VMs unmigratable. > > Hmm, rethinking about it, all the loops in exec.c should be protected > from the mutex. Other loops in exec.c are just for reading the ram_list members, and the migration thread doesn't modify ram_list. Also, protecting the loops in exec.c would make those functions un-callable from the functions that are already holding the ram_list mutex to protect themselves against memslot removal (migration thread in our case). > That's not too good because qemu_get_ram_ptr is a hot path for TCG. Looks like qemu_get_ram_ptr isn't called from the source side code of guest migration. > Perhaps you can also avoid the mutex entirely, and just disable the > above optimization for most-recently-used-block while migration is > running. It's not a complete solution, but it could be good enough > until we have RAM hot-plug/hot-unplug. > > Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Aug 12, 2011 at 08:45:50AM +0200, Paolo Bonzini wrote: > On 08/11/2011 06:20 PM, Paolo Bonzini wrote: > >> > >>+ qemu_mutex_lock_ramlist(); > >> QLIST_REMOVE(block, next); > >> QLIST_INSERT_HEAD(&ram_list.blocks, block, next); > >>+ qemu_mutex_unlock_ramlist(); > > > >Theoretically qemu_get_ram_ptr should be protected. The problem is not > >just accessing the ramlist, it is accessing the data underneath it > >before anyone frees it. Luckily we can set aside that problem for now, > >because qemu_ram_free_from_ptr is only used by device assignment and > >device assignment makes VMs unmigratable. > > Hmm, rethinking about it, all the loops in exec.c should be > protected from the mutex. That's not too good because > qemu_get_ram_ptr is a hot path for TCG. Right. > Perhaps you can also avoid > the mutex entirely, and just disable the above optimization for > most-recently-used-block while migration is running. It's not a > complete solution, but it could be good enough until we have RAM > hot-plug/hot-unplug. Actually the previous patchset does not traverse the ramlist without qemu_mutex locked, which is safe versus the most-recently-used-block optimization. So, agreed that the new ramlist lock is not necessary for now. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 08/14/2011 11:45 PM, Umesh Deshpande wrote: >> That's not too good because qemu_get_ram_ptr is a hot path for TCG. > > Looks like qemu_get_ram_ptr isn't called from the source side code of > guest migration. Right, but since you are accessing the list from both migration and non-migration threads, qemu_get_ram_ptr needs to take the ram_list lock. The ram_list and IO thread mutexes together protect the "next" member, and you need to hold them both when modifying the list. See the reply to Marcelo for a proposed solution. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch_init.c b/arch_init.c index 484b39d..f0ddda6 100644 --- a/arch_init.c +++ b/arch_init.c @@ -298,6 +298,11 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) bytes_transferred_last = bytes_transferred; bwidth = qemu_get_clock_ns(rt_clock); + if (stage != 3) { + qemu_mutex_lock_ramlist(); + qemu_mutex_unlock_iothread(); + } + while (!qemu_file_rate_limit(f)) { int bytes_sent; @@ -308,6 +313,11 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) } } + if (stage != 3) { + qemu_mutex_lock_iothread(); + qemu_mutex_unlock_ramlist(); + } + bwidth = qemu_get_clock_ns(rt_clock) - bwidth; bwidth = (bytes_transferred - bytes_transferred_last) / bwidth; diff --git a/buffered_file.c b/buffered_file.c index b64ada7..5735e18 100644 --- a/buffered_file.c +++ b/buffered_file.c @@ -243,7 +243,9 @@ static void *migrate_vm(void *opaque) while (!s->closed) { if (s->freeze_output) { + qemu_mutex_unlock_iothread(); s->wait_for_unfreeze(s); + qemu_mutex_lock_iothread(); s->freeze_output = 0; continue; } @@ -255,7 +257,9 @@ static void *migrate_vm(void *opaque) current_time = qemu_get_clock_ms(rt_clock); if (!s->closed && (expire_time > current_time)) { tv.tv_usec = 1000 * (expire_time - current_time); + qemu_mutex_unlock_iothread(); select(0, NULL, NULL, NULL, &tv); + qemu_mutex_lock_iothread(); continue; } diff --git a/cpu-all.h b/cpu-all.h index e839100..6a5dbb3 100644 --- a/cpu-all.h +++ b/cpu-all.h @@ -21,6 +21,7 @@ #include "qemu-common.h" #include "cpu-common.h" +#include "qemu-thread.h" /* some important defines: * @@ -931,6 +932,7 @@ typedef struct RAMBlock { } RAMBlock; typedef struct RAMList { + QemuMutex mutex; uint8_t *phys_dirty; QLIST_HEAD(ram, RAMBlock) blocks; } RAMList; diff --git a/cpus.c b/cpus.c index de70e02..6090c44 100644 --- a/cpus.c +++ b/cpus.c @@ -666,6 +666,7 @@ int qemu_init_main_loop(void) qemu_cond_init(&qemu_work_cond); qemu_mutex_init(&qemu_fair_mutex); qemu_mutex_init(&qemu_global_mutex); + qemu_mutex_init(&ram_list.mutex); qemu_mutex_lock(&qemu_global_mutex); qemu_thread_get_self(&io_thread); @@ -919,6 +920,17 @@ void qemu_mutex_unlock_iothread(void) qemu_mutex_unlock(&qemu_global_mutex); } +void qemu_mutex_lock_ramlist(void) +{ + qemu_mutex_lock(&ram_list.mutex); +} + +void qemu_mutex_unlock_ramlist(void) +{ + qemu_mutex_unlock(&ram_list.mutex); +} + + static int all_vcpus_paused(void) { CPUState *penv = first_cpu; diff --git a/exec.c b/exec.c index 0e2ce57..7bfb36f 100644 --- a/exec.c +++ b/exec.c @@ -2972,6 +2972,8 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, } new_block->length = size; + qemu_mutex_lock_ramlist(); + QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty, @@ -2979,6 +2981,8 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS), 0xff, size >> TARGET_PAGE_BITS); + qemu_mutex_unlock_ramlist(); + if (kvm_enabled()) kvm_setup_guest_memory(new_block->host, size); @@ -2996,7 +3000,9 @@ void qemu_ram_free_from_ptr(ram_addr_t addr) QLIST_FOREACH(block, &ram_list.blocks, next) { if (addr == block->offset) { + qemu_mutex_lock_ramlist(); QLIST_REMOVE(block, next); + qemu_mutex_unlock_ramlist(); qemu_free(block); return; } @@ -3009,7 +3015,9 @@ void qemu_ram_free(ram_addr_t addr) QLIST_FOREACH(block, &ram_list.blocks, next) { if (addr == block->offset) { + qemu_mutex_lock_ramlist(); QLIST_REMOVE(block, next); + qemu_mutex_unlock_ramlist(); if (block->flags & RAM_PREALLOC_MASK) { ; } else if (mem_path) { @@ -3117,8 +3125,10 @@ void *qemu_get_ram_ptr(ram_addr_t addr) if (addr - block->offset < block->length) { /* Move this entry to to start of the list. */ if (block != QLIST_FIRST(&ram_list.blocks)) { + qemu_mutex_lock_ramlist(); QLIST_REMOVE(block, next); QLIST_INSERT_HEAD(&ram_list.blocks, block, next); + qemu_mutex_unlock_ramlist(); } if (xen_mapcache_enabled()) { /* We need to check if the requested address is in the RAM diff --git a/qemu-common.h b/qemu-common.h index abd7a75..b802883 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -212,6 +212,8 @@ char *qemu_strndup(const char *str, size_t size); void qemu_mutex_lock_iothread(void); void qemu_mutex_unlock_iothread(void); +void qemu_mutex_lock_ramlist(void); +void qemu_mutex_unlock_ramlist(void); int qemu_open(const char *name, int flags, ...); ssize_t qemu_write_full(int fd, const void *buf, size_t count)
Following patch introduces a mutex to protect the migration thread against the removal of memslots during the guest migration iteration. Signed-off-by: Umesh Deshpande <udeshpan@redhat.com> --- arch_init.c | 10 ++++++++++ buffered_file.c | 4 ++++ cpu-all.h | 2 ++ cpus.c | 12 ++++++++++++ exec.c | 10 ++++++++++ qemu-common.h | 2 ++ 6 files changed, 40 insertions(+), 0 deletions(-)