diff mbox

[2/2] KVM: x86: Optimize mmio spte zapping when creating/moving memslot

Message ID 20130312174530.489f793c.yoshikawa_takuya_b1@lab.ntt.co.jp (mailing list archive)
State New, archived
Headers show

Commit Message

Takuya Yoshikawa March 12, 2013, 8:45 a.m. UTC
When we create or move a memory slot, we need to zap mmio sptes.
Currently, zap_all() is used for this and this is causing two problems:
 - extra page faults after zapping mmu pages
 - long mmu_lock hold time during zapping mmu pages

For the latter, Marcelo reported a disastrous mmu_lock hold time during
hot-plug, which made the guest unresponsive for a long time.

This patch takes a simple way to fix these problems: do not zap mmu
pages unless they are marked mmio cached.  On our test box, this took
only 50us for the 4GB guest and we did not see ms of mmu_lock hold time
any more.

Note that we still need to do zap_all() for other cases.  So another
work is also needed: Xiao's work may be the one.

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
---
 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/mmu.c              |   18 ++++++++++++++++++
 arch/x86/kvm/x86.c              |    2 +-
 3 files changed, 20 insertions(+), 1 deletions(-)

Comments

Gleb Natapov March 12, 2013, 12:06 p.m. UTC | #1
On Tue, Mar 12, 2013 at 05:45:30PM +0900, Takuya Yoshikawa wrote:
> When we create or move a memory slot, we need to zap mmio sptes.
> Currently, zap_all() is used for this and this is causing two problems:
>  - extra page faults after zapping mmu pages
>  - long mmu_lock hold time during zapping mmu pages
> 
> For the latter, Marcelo reported a disastrous mmu_lock hold time during
> hot-plug, which made the guest unresponsive for a long time.
> 
> This patch takes a simple way to fix these problems: do not zap mmu
> pages unless they are marked mmio cached.  On our test box, this took
> only 50us for the 4GB guest and we did not see ms of mmu_lock hold time
> any more.
> 
> Note that we still need to do zap_all() for other cases.  So another
> work is also needed: Xiao's work may be the one.
> 
> Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
> ---
>  arch/x86/include/asm/kvm_host.h |    1 +
>  arch/x86/kvm/mmu.c              |   18 ++++++++++++++++++
>  arch/x86/kvm/x86.c              |    2 +-
>  3 files changed, 20 insertions(+), 1 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index b84310a..028b03f 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -768,6 +768,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
>  				     struct kvm_memory_slot *slot,
>  				     gfn_t gfn_offset, unsigned long mask);
>  void kvm_mmu_zap_all(struct kvm *kvm);
> +void kvm_mmu_zap_mmio_sptes(struct kvm *kvm);
>  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
>  void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
>  
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index de45ec1..c1a9b7b 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -4189,6 +4189,24 @@ restart:
>  	spin_unlock(&kvm->mmu_lock);
>  }
>  
> +void kvm_mmu_zap_mmio_sptes(struct kvm *kvm)
> +{
> +	struct kvm_mmu_page *sp, *node;
> +	LIST_HEAD(invalid_list);
> +
> +	spin_lock(&kvm->mmu_lock);
> +restart:
> +	list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
> +		if (!sp->mmio_cached)
> +			continue;
> +		if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
> +			goto restart;
> +	}
> +
> +	kvm_mmu_commit_zap_page(kvm, &invalid_list);
> +	spin_unlock(&kvm->mmu_lock);
> +}
> +
>  static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
>  {
>  	struct kvm *kvm;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 35b4912..16b6df2 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6969,7 +6969,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
>  	 * mmio sptes.
>  	 */
>  	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
I wonder why check for KVM_MR_MOVE here. For KVM_MR_MOVE
kvm_mmu_zap_all() should be called and it is indeed called by the common code.

> -		kvm_mmu_zap_all(kvm);
> +		kvm_mmu_zap_mmio_sptes(kvm);
>  		kvm_reload_remote_mmus(kvm);
>  	}
>  }
> -- 
> 1.7.5.4

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti March 13, 2013, 1:40 a.m. UTC | #2
On Tue, Mar 12, 2013 at 02:06:22PM +0200, Gleb Natapov wrote:
> On Tue, Mar 12, 2013 at 05:45:30PM +0900, Takuya Yoshikawa wrote:
> > When we create or move a memory slot, we need to zap mmio sptes.
> > Currently, zap_all() is used for this and this is causing two problems:
> >  - extra page faults after zapping mmu pages
> >  - long mmu_lock hold time during zapping mmu pages
> > 
> > For the latter, Marcelo reported a disastrous mmu_lock hold time during
> > hot-plug, which made the guest unresponsive for a long time.
> > 
> > This patch takes a simple way to fix these problems: do not zap mmu
> > pages unless they are marked mmio cached.  On our test box, this took
> > only 50us for the 4GB guest and we did not see ms of mmu_lock hold time
> > any more.
> > 
> > Note that we still need to do zap_all() for other cases.  So another
> > work is also needed: Xiao's work may be the one.
> > 
> > Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
> > ---
> >  arch/x86/include/asm/kvm_host.h |    1 +
> >  arch/x86/kvm/mmu.c              |   18 ++++++++++++++++++
> >  arch/x86/kvm/x86.c              |    2 +-
> >  3 files changed, 20 insertions(+), 1 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index b84310a..028b03f 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -768,6 +768,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
> >  				     struct kvm_memory_slot *slot,
> >  				     gfn_t gfn_offset, unsigned long mask);
> >  void kvm_mmu_zap_all(struct kvm *kvm);
> > +void kvm_mmu_zap_mmio_sptes(struct kvm *kvm);
> >  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
> >  void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
> >  
> > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> > index de45ec1..c1a9b7b 100644
> > --- a/arch/x86/kvm/mmu.c
> > +++ b/arch/x86/kvm/mmu.c
> > @@ -4189,6 +4189,24 @@ restart:
> >  	spin_unlock(&kvm->mmu_lock);
> >  }
> >  
> > +void kvm_mmu_zap_mmio_sptes(struct kvm *kvm)
> > +{
> > +	struct kvm_mmu_page *sp, *node;
> > +	LIST_HEAD(invalid_list);
> > +
> > +	spin_lock(&kvm->mmu_lock);
> > +restart:
> > +	list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
> > +		if (!sp->mmio_cached)
> > +			continue;
> > +		if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
> > +			goto restart;
> > +	}
> > +
> > +	kvm_mmu_commit_zap_page(kvm, &invalid_list);
> > +	spin_unlock(&kvm->mmu_lock);
> > +}
> > +
> >  static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
> >  {
> >  	struct kvm *kvm;
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 35b4912..16b6df2 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -6969,7 +6969,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
> >  	 * mmio sptes.
> >  	 */
> >  	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
> I wonder why check for KVM_MR_MOVE here. For KVM_MR_MOVE
> kvm_mmu_zap_all() should be called and it is indeed called by the common code.

Its per memslot, the common code flush:

kvm_arch_flush_shadow_memslot(kvm, slot);

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b84310a..028b03f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -768,6 +768,7 @@  void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 				     struct kvm_memory_slot *slot,
 				     gfn_t gfn_offset, unsigned long mask);
 void kvm_mmu_zap_all(struct kvm *kvm);
+void kvm_mmu_zap_mmio_sptes(struct kvm *kvm);
 unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index de45ec1..c1a9b7b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4189,6 +4189,24 @@  restart:
 	spin_unlock(&kvm->mmu_lock);
 }
 
+void kvm_mmu_zap_mmio_sptes(struct kvm *kvm)
+{
+	struct kvm_mmu_page *sp, *node;
+	LIST_HEAD(invalid_list);
+
+	spin_lock(&kvm->mmu_lock);
+restart:
+	list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
+		if (!sp->mmio_cached)
+			continue;
+		if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
+			goto restart;
+	}
+
+	kvm_mmu_commit_zap_page(kvm, &invalid_list);
+	spin_unlock(&kvm->mmu_lock);
+}
+
 static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
 {
 	struct kvm *kvm;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 35b4912..16b6df2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6969,7 +6969,7 @@  void kvm_arch_commit_memory_region(struct kvm *kvm,
 	 * mmio sptes.
 	 */
 	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
-		kvm_mmu_zap_all(kvm);
+		kvm_mmu_zap_mmio_sptes(kvm);
 		kvm_reload_remote_mmus(kvm);
 	}
 }