diff mbox

[01/11] KVM: MMU: avoid pte_list_desc run out in kvm_mmu_pte_write

Message ID 4E2EA41A.3080606@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show

Commit Message

Xiao Guangrong July 26, 2011, 11:25 a.m. UTC
kvm_mmu_pte_write is unsafe since we need to alloc pte_list_desc in the
function when spte is prefetched, unfortunately, we can not know how many
spte need to be prefetched on this path, that means we can use out of the
free  pte_list_desc object in the cache, and BUG_ON() is triggered, also some
path does not fill the cache, such as INS instruction emulated that does not
trigger page fault

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/mmu.c |   20 +++++++++++++++-----
 1 files changed, 15 insertions(+), 5 deletions(-)

Comments

Avi Kivity July 27, 2011, 9 a.m. UTC | #1
On 07/26/2011 02:25 PM, Xiao Guangrong wrote:
> kvm_mmu_pte_write is unsafe since we need to alloc pte_list_desc in the
> function when spte is prefetched, unfortunately, we can not know how many
> spte need to be prefetched on this path, that means we can use out of the
> free  pte_list_desc object in the cache, and BUG_ON() is triggered, also some
> path does not fill the cache, such as INS instruction emulated that does not
> trigger page fault
>
>
>   void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
>   		       const u8 *new, int bytes,
>   		       bool guest_initiated)
> @@ -3583,6 +3596,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
>   		break;
>   	}
>
> +	mmu_topup_memory_caches(vcpu);

Please add a comment here describing why it's okay to ignore the error 
return.

>   	spin_lock(&vcpu->kvm->mmu_lock);
>   	if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
>   		gentry = 0;
> @@ -3653,7 +3667,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
>   			mmu_page_zap_pte(vcpu->kvm, sp, spte);
>   			if (gentry&&
>   			!((sp->role.word ^ vcpu->arch.mmu.base_role.word)
> -			&  mask.word))
> +			&  mask.word)&&  get_free_pte_list_desc_nr(vcpu))
>   				mmu_pte_write_new_pte(vcpu, sp, spte,&gentry);

Wow, this bug was here since 2.6.23.  Good catch.

Please wrap or rename get_free_pte_list_desc_nr() in rmap_can_add(vcpu) 
so it's clearer why we're doing this.
Xiao Guangrong July 27, 2011, 9:37 a.m. UTC | #2
On 07/27/2011 05:00 PM, Avi Kivity wrote:
> On 07/26/2011 02:25 PM, Xiao Guangrong wrote:
>> kvm_mmu_pte_write is unsafe since we need to alloc pte_list_desc in the
>> function when spte is prefetched, unfortunately, we can not know how many
>> spte need to be prefetched on this path, that means we can use out of the
>> free  pte_list_desc object in the cache, and BUG_ON() is triggered, also some
>> path does not fill the cache, such as INS instruction emulated that does not
>> trigger page fault
>>
>>
>>   void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
>>                  const u8 *new, int bytes,
>>                  bool guest_initiated)
>> @@ -3583,6 +3596,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
>>           break;
>>       }
>>
>> +    mmu_topup_memory_caches(vcpu);
> 
> Please add a comment here describing why it's okay to ignore the error return.
> 

OK

>>       spin_lock(&vcpu->kvm->mmu_lock);
>>       if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
>>           gentry = 0;
>> @@ -3653,7 +3667,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
>>               mmu_page_zap_pte(vcpu->kvm, sp, spte);
>>               if (gentry&&
>>               !((sp->role.word ^ vcpu->arch.mmu.base_role.word)
>> -            &  mask.word))
>> +            &  mask.word)&&  get_free_pte_list_desc_nr(vcpu))
>>                   mmu_pte_write_new_pte(vcpu, sp, spte,&gentry);
> 
> Wow, this bug was here since 2.6.23.  Good catch.
> 
> Please wrap or rename get_free_pte_list_desc_nr() in rmap_can_add(vcpu) so it's clearer why we're doing this.
> 

OK, will do, thanks!

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9335e1b..587695b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -593,6 +593,11 @@  static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
 	return 0;
 }
 
+static int mmu_memory_cache_free_object(struct kvm_mmu_memory_cache *cache)
+{
+	return cache->nobjs;
+}
+
 static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc,
 				  struct kmem_cache *cache)
 {
@@ -3526,6 +3531,14 @@  static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn)
 		set_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte);
 }
 
+static int get_free_pte_list_desc_nr(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmu_memory_cache *cache;
+
+	cache = &vcpu->arch.mmu_pte_list_desc_cache;
+	return mmu_memory_cache_free_object(cache);
+}
+
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 		       const u8 *new, int bytes,
 		       bool guest_initiated)
@@ -3583,6 +3596,7 @@  void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 		break;
 	}
 
+	mmu_topup_memory_caches(vcpu);
 	spin_lock(&vcpu->kvm->mmu_lock);
 	if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
 		gentry = 0;
@@ -3653,7 +3667,7 @@  void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 			mmu_page_zap_pte(vcpu->kvm, sp, spte);
 			if (gentry &&
 			      !((sp->role.word ^ vcpu->arch.mmu.base_role.word)
-			      & mask.word))
+			      & mask.word) && get_free_pte_list_desc_nr(vcpu))
 				mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
 			if (!remote_flush && need_remote_flush(entry, *spte))
 				remote_flush = true;
@@ -3714,10 +3728,6 @@  int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
 		goto out;
 	}
 
-	r = mmu_topup_memory_caches(vcpu);
-	if (r)
-		goto out;
-
 	er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len);
 
 	switch (er) {