[v3,10/15] KVM: MMU: allocate shadow pages from slab

Message ID	1382534973-13197-11-git-send-email-xiaoguangrong@linux.vnet.ibm.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@kernel.org> Gateway: Authorized Use Only! Violators will be prosecuted for <kvm@vger.kernel.org> from <xiaoguangrong@linux.vnet.ibm.com>; Wed, 23 Oct 2013 19:00:02 +0530 Gateway: Authorized Use Only! Violators will be prosecuted; Wed, 23 Oct 2013 18:59:59 +0530 From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> To: gleb@redhat.com Cc: avi.kivity@gmail.com, mtosatti@redhat.com, pbonzini@redhat.com, linux-kernel@vger.kernel.org, kvm@vger.kernel.org, Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Subject: [PATCH v3 10/15] KVM: MMU: allocate shadow pages from slab Date: Wed, 23 Oct 2013 21:29:28 +0800 Message-Id: <1382534973-13197-11-git-send-email-xiaoguangrong@linux.vnet.ibm.com> In-Reply-To: <1382534973-13197-1-git-send-email-xiaoguangrong@linux.vnet.ibm.com> References: <1382534973-13197-1-git-send-email-xiaoguangrong@linux.vnet.ibm.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk

Xiao Guangrong Oct. 23, 2013, 1:29 p.m. UTC

Allocate shadow pages from slab instead of page-allocator, frequent
shadow page allocation and free can be hit in the slab cache, it is
very useful for shadow mmu

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
---
 arch/x86/include/asm/kvm_host.h |  3 ++-
 arch/x86/kvm/mmu.c              | 46 ++++++++++++++++++++++++++++++++++-------
 2 files changed, 41 insertions(+), 8 deletions(-)

Gleb Natapov Oct. 24, 2013, 9:19 a.m. UTC | #1

On Wed, Oct 23, 2013 at 09:29:28PM +0800, Xiao Guangrong wrote:
> Allocate shadow pages from slab instead of page-allocator, frequent
> shadow page allocation and free can be hit in the slab cache, it is
> very useful for shadow mmu
> 
> Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  3 ++-
>  arch/x86/kvm/mmu.c              | 46 ++++++++++++++++++++++++++++++++++-------
>  2 files changed, 41 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 5cbf316..df9ae10 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -389,6 +389,7 @@ struct kvm_vcpu_arch {
>  	struct kvm_mmu *walk_mmu;
>  
>  	struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
> +	struct kvm_mmu_memory_cache mmu_shadow_page_cache;
>  	struct kvm_mmu_memory_cache mmu_page_cache;
>  	struct kvm_mmu_memory_cache mmu_page_header_cache;
>  
> @@ -946,7 +947,7 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
>  {
>  	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
>  
> -	return (struct kvm_mmu_page *)page_private(page);
> +	return (struct kvm_mmu_page *)(page->mapping);
Why?

>  }
>  
>  static inline u16 kvm_read_ldt(void)
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index f3ae74e6..1bcc8c8 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -178,6 +178,7 @@ struct kvm_shadow_walk_iterator {
>  	     __shadow_walk_next(&(_walker), spte))
>  
>  static struct kmem_cache *pte_list_desc_cache;
> +static struct kmem_cache *mmu_shadow_page_cache;
>  static struct kmem_cache *mmu_page_header_cache;
>  static struct percpu_counter kvm_total_used_mmu_pages;
>  
> @@ -746,7 +747,14 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
>  				   GFP_KERNEL);
>  	if (r)
>  		goto out;
> -	r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
> +
> +	r = mmu_topup_memory_cache(&vcpu->arch.mmu_shadow_page_cache,
> +				   mmu_shadow_page_cache, 4,
> +				   GFP_KERNEL);
> +	if (r)
> +		goto out;
> +
> +	r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 4);
>  	if (r)
>  		goto out;
>  	r = mmu_topup_memory_cache(&vcpu->arch.mmu_page_header_cache,
> @@ -760,6 +768,8 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
>  {
>  	mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache,
>  				pte_list_desc_cache);
> +	mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache,
> +				mmu_shadow_page_cache);
>  	mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache);
>  	mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache,
>  				mmu_page_header_cache);
> @@ -1675,12 +1685,28 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
>  	percpu_counter_add(&kvm_total_used_mmu_pages, nr);
>  }
>  
> +static void set_page_header(struct kvm_mmu_page *sp)
> +{
> +	struct page *page = virt_to_page(sp->spt);
> +
> +	WARN_ON(page->mapping);
> +	page->mapping = (struct address_space *)sp;
> +}
> +
> +static void clear_page_header(struct kvm_mmu_page *sp)
> +{
> +	struct page *page = virt_to_page(sp->spt);
> +
> +	page->mapping = NULL;
> +}
> +
>  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
>  {
>  	ASSERT(is_empty_shadow_page(sp->spt));
>  	hlist_del(&sp->hash_link);
>  	list_del(&sp->link);
> -	free_page((unsigned long)sp->spt);
> +	clear_page_header(sp);
> +	kmem_cache_free(mmu_shadow_page_cache, sp->spt);
>  	if (!sp->role.direct)
>  		free_page((unsigned long)sp->gfns);
>  	kmem_cache_free(mmu_page_header_cache, sp);
> @@ -1719,10 +1745,10 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
>  	struct kvm_mmu_page *sp;
>  
>  	sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
> -	sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
> +	sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache);
>  	if (!direct)
>  		sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
> -	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
> +	set_page_header(sp);
>  
>  	/*
>  	 * The active_mmu_pages list is the FIFO list, do not move the
> @@ -2046,12 +2072,13 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
>  	}
>  }
>  
> -static void init_shadow_page_table(struct kvm_mmu_page *sp)
> +static void init_shadow_page_table(void *p)
>  {
> +	u64 *sptp = (u64 *)p;
>  	int i;
>  
>  	for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
> -		sp->spt[i] = 0ull;
> +		sptp[i] = 0ull;
>  }
>  
>  static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
> @@ -2137,7 +2164,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>  		account_shadowed(vcpu->kvm, gfn);
>  	}
>  	sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
> -	init_shadow_page_table(sp);
>  	trace_kvm_mmu_get_page(sp, true);
>  	return sp;
>  }
> @@ -4683,6 +4709,12 @@ int kvm_mmu_module_init(void)
>  	if (!pte_list_desc_cache)
>  		goto nomem;
>  
> +	mmu_shadow_page_cache = kmem_cache_create("mmu_shadow_page_cache",
> +						  PAGE_SIZE, PAGE_SIZE, 0,
> +						  init_shadow_page_table);
> +	if (!mmu_shadow_page_cache)
> +		goto nomem;
> +
>  	mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
>  						  sizeof(struct kvm_mmu_page),
>  						  0, 0, NULL);
> -- 
> 1.8.1.4

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Xiao Guangrong Oct. 24, 2013, 9:29 a.m. UTC | #2

On 10/24/2013 05:19 PM, Gleb Natapov wrote:

>> @@ -946,7 +947,7 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
>>  {
>>  	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
>>  
>> -	return (struct kvm_mmu_page *)page_private(page);
>> +	return (struct kvm_mmu_page *)(page->mapping);
> Why?

That's because page->private has been used by slab:

	/* Remainder is not double word aligned */
	union {
		unsigned long private;		/* Mapping-private opaque data:
					 	 * usually used for buffer_heads
						 * if PagePrivate set; used for
						 * swp_entry_t if PageSwapCache;
						 * indicates order in the buddy
						 * system if PG_buddy is set.
						 */
#if USE_SPLIT_PTLOCKS
		spinlock_t ptl;
#endif
		struct kmem_cache *slab_cache;	/* SL[AU]B: Pointer to slab */
		struct page *first_page;	/* Compound tail pages */
	};

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Gleb Natapov Oct. 24, 2013, 9:52 a.m. UTC | #3

On Thu, Oct 24, 2013 at 05:29:44PM +0800, Xiao Guangrong wrote:
> On 10/24/2013 05:19 PM, Gleb Natapov wrote:
> 
> >> @@ -946,7 +947,7 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
> >>  {
> >>  	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
> >>  
> >> -	return (struct kvm_mmu_page *)page_private(page);
> >> +	return (struct kvm_mmu_page *)(page->mapping);
> > Why?
> 
> That's because page->private has been used by slab:
>
But does lockless path actually looks at it?
  
> 	/* Remainder is not double word aligned */
> 	union {
> 		unsigned long private;		/* Mapping-private opaque data:
> 					 	 * usually used for buffer_heads
> 						 * if PagePrivate set; used for
> 						 * swp_entry_t if PageSwapCache;
> 						 * indicates order in the buddy
> 						 * system if PG_buddy is set.
> 						 */
> #if USE_SPLIT_PTLOCKS
> 		spinlock_t ptl;
> #endif
> 		struct kmem_cache *slab_cache;	/* SL[AU]B: Pointer to slab */
> 		struct page *first_page;	/* Compound tail pages */
> 	};

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Xiao Guangrong Oct. 24, 2013, 10:10 a.m. UTC | #4

On 10/24/2013 05:52 PM, Gleb Natapov wrote:
> On Thu, Oct 24, 2013 at 05:29:44PM +0800, Xiao Guangrong wrote:
>> On 10/24/2013 05:19 PM, Gleb Natapov wrote:
>>
>>>> @@ -946,7 +947,7 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
>>>>  {
>>>>  	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
>>>>  
>>>> -	return (struct kvm_mmu_page *)page_private(page);
>>>> +	return (struct kvm_mmu_page *)(page->mapping);
>>> Why?
>>
>> That's because page->private has been used by slab:
>>
> But does lockless path actually looks at it?

Lockless path does not use it, however, it is used by kvm_mmu_page():

static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
{
	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);

	return (struct kvm_mmu_page *)(page->mapping);
}

which is used in the common code.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Gleb Natapov Oct. 24, 2013, 10:39 a.m. UTC | #5

On Thu, Oct 24, 2013 at 06:10:46PM +0800, Xiao Guangrong wrote:
> On 10/24/2013 05:52 PM, Gleb Natapov wrote:
> > On Thu, Oct 24, 2013 at 05:29:44PM +0800, Xiao Guangrong wrote:
> >> On 10/24/2013 05:19 PM, Gleb Natapov wrote:
> >>
> >>>> @@ -946,7 +947,7 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
> >>>>  {
> >>>>  	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
> >>>>  
> >>>> -	return (struct kvm_mmu_page *)page_private(page);
> >>>> +	return (struct kvm_mmu_page *)(page->mapping);
> >>> Why?
> >>
> >> That's because page->private has been used by slab:
> >>
> > But does lockless path actually looks at it?
> 
> Lockless path does not use it, however, it is used by kvm_mmu_page():
> 
> static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
> {
> 	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
> 
> 	return (struct kvm_mmu_page *)(page->mapping);
> }
> 
> which is used in the common code.
Ah, so the pointer is not available even after object is allocated.
Make sense since we allocate object, not page here, but is it safe to
use mapping like that?

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Xiao Guangrong Oct. 24, 2013, 11:01 a.m. UTC | #6

On 10/24/2013 06:39 PM, Gleb Natapov wrote:
> On Thu, Oct 24, 2013 at 06:10:46PM +0800, Xiao Guangrong wrote:
>> On 10/24/2013 05:52 PM, Gleb Natapov wrote:
>>> On Thu, Oct 24, 2013 at 05:29:44PM +0800, Xiao Guangrong wrote:
>>>> On 10/24/2013 05:19 PM, Gleb Natapov wrote:
>>>>
>>>>>> @@ -946,7 +947,7 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
>>>>>>  {
>>>>>>  	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
>>>>>>  
>>>>>> -	return (struct kvm_mmu_page *)page_private(page);
>>>>>> +	return (struct kvm_mmu_page *)(page->mapping);
>>>>> Why?
>>>>
>>>> That's because page->private has been used by slab:
>>>>
>>> But does lockless path actually looks at it?
>>
>> Lockless path does not use it, however, it is used by kvm_mmu_page():
>>
>> static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
>> {
>> 	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
>>
>> 	return (struct kvm_mmu_page *)(page->mapping);
>> }
>>
>> which is used in the common code.
> Ah, so the pointer is not available even after object is allocated.
> Make sense since we allocate object, not page here, but is it safe to
> use mapping like that?

The commens says:

	struct address_space *mapping;	/* If low bit clear, points to
					 * inode address_space, or NULL.
					 * If page mapped as anonymous
					 * memory, low bit is set, and
					 * it points to anon_vma object:
					 * see PAGE_MAPPING_ANON below.

It seems mapping is used for address_space or anonymous memory, in
our case, the page is used by slab, so I guess it is ok. And the bug
i put in set_page_header() was not tiggered on both slab and slub.


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Gleb Natapov Oct. 24, 2013, 12:32 p.m. UTC | #7

On Thu, Oct 24, 2013 at 07:01:49PM +0800, Xiao Guangrong wrote:
> On 10/24/2013 06:39 PM, Gleb Natapov wrote:
> > On Thu, Oct 24, 2013 at 06:10:46PM +0800, Xiao Guangrong wrote:
> >> On 10/24/2013 05:52 PM, Gleb Natapov wrote:
> >>> On Thu, Oct 24, 2013 at 05:29:44PM +0800, Xiao Guangrong wrote:
> >>>> On 10/24/2013 05:19 PM, Gleb Natapov wrote:
> >>>>
> >>>>>> @@ -946,7 +947,7 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
> >>>>>>  {
> >>>>>>  	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
> >>>>>>  
> >>>>>> -	return (struct kvm_mmu_page *)page_private(page);
> >>>>>> +	return (struct kvm_mmu_page *)(page->mapping);
> >>>>> Why?
> >>>>
> >>>> That's because page->private has been used by slab:
> >>>>
> >>> But does lockless path actually looks at it?
> >>
> >> Lockless path does not use it, however, it is used by kvm_mmu_page():
> >>
> >> static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
> >> {
> >> 	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
> >>
> >> 	return (struct kvm_mmu_page *)(page->mapping);
> >> }
> >>
> >> which is used in the common code.
> > Ah, so the pointer is not available even after object is allocated.
> > Make sense since we allocate object, not page here, but is it safe to
> > use mapping like that?
> 
> The commens says:
> 
> 	struct address_space *mapping;	/* If low bit clear, points to
> 					 * inode address_space, or NULL.
> 					 * If page mapped as anonymous
> 					 * memory, low bit is set, and
> 					 * it points to anon_vma object:
> 					 * see PAGE_MAPPING_ANON below.
> 
> It seems mapping is used for address_space or anonymous memory, in
> our case, the page is used by slab, so I guess it is ok. And the bug
> i put in set_page_header() was not tiggered on both slab and slub.
> 
Yeah, I also think so. I asked Andrea (copied) and he thinks that it is
safe too currently, but things changes fast in this area. Andrea?
Another option is too save slab_cache pointer and reset it before
freeing the object but it looks ugly.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Xiao Guangrong Oct. 28, 2013, 3:16 a.m. UTC | #8

On 10/24/2013 08:32 PM, Gleb Natapov wrote:
> On Thu, Oct 24, 2013 at 07:01:49PM +0800, Xiao Guangrong wrote:
>> On 10/24/2013 06:39 PM, Gleb Natapov wrote:
>>> On Thu, Oct 24, 2013 at 06:10:46PM +0800, Xiao Guangrong wrote:
>>>> On 10/24/2013 05:52 PM, Gleb Natapov wrote:
>>>>> On Thu, Oct 24, 2013 at 05:29:44PM +0800, Xiao Guangrong wrote:
>>>>>> On 10/24/2013 05:19 PM, Gleb Natapov wrote:
>>>>>>
>>>>>>>> @@ -946,7 +947,7 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
>>>>>>>>  {
>>>>>>>>  	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
>>>>>>>>  
>>>>>>>> -	return (struct kvm_mmu_page *)page_private(page);
>>>>>>>> +	return (struct kvm_mmu_page *)(page->mapping);
>>>>>>> Why?
>>>>>>
>>>>>> That's because page->private has been used by slab:
>>>>>>
>>>>> But does lockless path actually looks at it?
>>>>
>>>> Lockless path does not use it, however, it is used by kvm_mmu_page():
>>>>
>>>> static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
>>>> {
>>>> 	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
>>>>
>>>> 	return (struct kvm_mmu_page *)(page->mapping);
>>>> }
>>>>
>>>> which is used in the common code.
>>> Ah, so the pointer is not available even after object is allocated.
>>> Make sense since we allocate object, not page here, but is it safe to
>>> use mapping like that?
>>
>> The commens says:
>>
>> 	struct address_space *mapping;	/* If low bit clear, points to
>> 					 * inode address_space, or NULL.
>> 					 * If page mapped as anonymous
>> 					 * memory, low bit is set, and
>> 					 * it points to anon_vma object:
>> 					 * see PAGE_MAPPING_ANON below.
>>
>> It seems mapping is used for address_space or anonymous memory, in
>> our case, the page is used by slab, so I guess it is ok. And the bug
>> i put in set_page_header() was not tiggered on both slab and slub.
>>
> Yeah, I also think so. I asked Andrea (copied) and he thinks that it is
> safe too currently, but things changes fast in this area. Andrea?
> Another option is too save slab_cache pointer and reset it before
> freeing the object but it looks ugly.

It's ugly but it isn't too bad. :)

Since currently kvm is extensively used to test/measure linux kernel
and the BUG_ON() in set_page_header() can help us to detect the potential
issue, it is easy for us to fix the possible bug in the development-cycle
if 'mapping' is used by slab. If that really happen, maybe we can switch
it to your way instead.


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[v3,10/15] KVM: MMU: allocate shadow pages from slab

Commit Message

Comments

Patch