Message ID | 20130312174530.489f793c.yoshikawa_takuya_b1@lab.ntt.co.jp (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Mar 12, 2013 at 05:45:30PM +0900, Takuya Yoshikawa wrote: > When we create or move a memory slot, we need to zap mmio sptes. > Currently, zap_all() is used for this and this is causing two problems: > - extra page faults after zapping mmu pages > - long mmu_lock hold time during zapping mmu pages > > For the latter, Marcelo reported a disastrous mmu_lock hold time during > hot-plug, which made the guest unresponsive for a long time. > > This patch takes a simple way to fix these problems: do not zap mmu > pages unless they are marked mmio cached. On our test box, this took > only 50us for the 4GB guest and we did not see ms of mmu_lock hold time > any more. > > Note that we still need to do zap_all() for other cases. So another > work is also needed: Xiao's work may be the one. > > Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp> > --- > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/kvm/mmu.c | 18 ++++++++++++++++++ > arch/x86/kvm/x86.c | 2 +- > 3 files changed, 20 insertions(+), 1 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index b84310a..028b03f 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -768,6 +768,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, > struct kvm_memory_slot *slot, > gfn_t gfn_offset, unsigned long mask); > void kvm_mmu_zap_all(struct kvm *kvm); > +void kvm_mmu_zap_mmio_sptes(struct kvm *kvm); > unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); > void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index de45ec1..c1a9b7b 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -4189,6 +4189,24 @@ restart: > spin_unlock(&kvm->mmu_lock); > } > > +void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) > +{ > + struct kvm_mmu_page *sp, *node; > + LIST_HEAD(invalid_list); > + > + spin_lock(&kvm->mmu_lock); > +restart: > + list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { > + if (!sp->mmio_cached) > + continue; > + if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) > + goto restart; > + } > + > + kvm_mmu_commit_zap_page(kvm, &invalid_list); > + spin_unlock(&kvm->mmu_lock); > +} > + > static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) > { > struct kvm *kvm; > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 35b4912..16b6df2 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -6969,7 +6969,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, > * mmio sptes. > */ > if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { I wonder why check for KVM_MR_MOVE here. For KVM_MR_MOVE kvm_mmu_zap_all() should be called and it is indeed called by the common code. > - kvm_mmu_zap_all(kvm); > + kvm_mmu_zap_mmio_sptes(kvm); > kvm_reload_remote_mmus(kvm); > } > } > -- > 1.7.5.4 -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Mar 12, 2013 at 02:06:22PM +0200, Gleb Natapov wrote: > On Tue, Mar 12, 2013 at 05:45:30PM +0900, Takuya Yoshikawa wrote: > > When we create or move a memory slot, we need to zap mmio sptes. > > Currently, zap_all() is used for this and this is causing two problems: > > - extra page faults after zapping mmu pages > > - long mmu_lock hold time during zapping mmu pages > > > > For the latter, Marcelo reported a disastrous mmu_lock hold time during > > hot-plug, which made the guest unresponsive for a long time. > > > > This patch takes a simple way to fix these problems: do not zap mmu > > pages unless they are marked mmio cached. On our test box, this took > > only 50us for the 4GB guest and we did not see ms of mmu_lock hold time > > any more. > > > > Note that we still need to do zap_all() for other cases. So another > > work is also needed: Xiao's work may be the one. > > > > Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp> > > --- > > arch/x86/include/asm/kvm_host.h | 1 + > > arch/x86/kvm/mmu.c | 18 ++++++++++++++++++ > > arch/x86/kvm/x86.c | 2 +- > > 3 files changed, 20 insertions(+), 1 deletions(-) > > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index b84310a..028b03f 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -768,6 +768,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, > > struct kvm_memory_slot *slot, > > gfn_t gfn_offset, unsigned long mask); > > void kvm_mmu_zap_all(struct kvm *kvm); > > +void kvm_mmu_zap_mmio_sptes(struct kvm *kvm); > > unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); > > void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); > > > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > > index de45ec1..c1a9b7b 100644 > > --- a/arch/x86/kvm/mmu.c > > +++ b/arch/x86/kvm/mmu.c > > @@ -4189,6 +4189,24 @@ restart: > > spin_unlock(&kvm->mmu_lock); > > } > > > > +void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) > > +{ > > + struct kvm_mmu_page *sp, *node; > > + LIST_HEAD(invalid_list); > > + > > + spin_lock(&kvm->mmu_lock); > > +restart: > > + list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { > > + if (!sp->mmio_cached) > > + continue; > > + if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) > > + goto restart; > > + } > > + > > + kvm_mmu_commit_zap_page(kvm, &invalid_list); > > + spin_unlock(&kvm->mmu_lock); > > +} > > + > > static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) > > { > > struct kvm *kvm; > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index 35b4912..16b6df2 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -6969,7 +6969,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, > > * mmio sptes. > > */ > > if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { > I wonder why check for KVM_MR_MOVE here. For KVM_MR_MOVE > kvm_mmu_zap_all() should be called and it is indeed called by the common code. Its per memslot, the common code flush: kvm_arch_flush_shadow_memslot(kvm, slot); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b84310a..028b03f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -768,6 +768,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask); void kvm_mmu_zap_all(struct kvm *kvm); +void kvm_mmu_zap_mmio_sptes(struct kvm *kvm); unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index de45ec1..c1a9b7b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4189,6 +4189,24 @@ restart: spin_unlock(&kvm->mmu_lock); } +void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) +{ + struct kvm_mmu_page *sp, *node; + LIST_HEAD(invalid_list); + + spin_lock(&kvm->mmu_lock); +restart: + list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { + if (!sp->mmio_cached) + continue; + if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) + goto restart; + } + + kvm_mmu_commit_zap_page(kvm, &invalid_list); + spin_unlock(&kvm->mmu_lock); +} + static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) { struct kvm *kvm; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 35b4912..16b6df2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6969,7 +6969,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * mmio sptes. */ if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { - kvm_mmu_zap_all(kvm); + kvm_mmu_zap_mmio_sptes(kvm); kvm_reload_remote_mmus(kvm); } }
When we create or move a memory slot, we need to zap mmio sptes. Currently, zap_all() is used for this and this is causing two problems: - extra page faults after zapping mmu pages - long mmu_lock hold time during zapping mmu pages For the latter, Marcelo reported a disastrous mmu_lock hold time during hot-plug, which made the guest unresponsive for a long time. This patch takes a simple way to fix these problems: do not zap mmu pages unless they are marked mmio cached. On our test box, this took only 50us for the 4GB guest and we did not see ms of mmu_lock hold time any more. Note that we still need to do zap_all() for other cases. So another work is also needed: Xiao's work may be the one. Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp> --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu.c | 18 ++++++++++++++++++ arch/x86/kvm/x86.c | 2 +- 3 files changed, 20 insertions(+), 1 deletions(-)