diff mbox series

[v2,2/4] mm, page_owner: record page owner for each subpage

Message ID 20190820131828.22684-3-vbabka@suse.cz (mailing list archive)
State New, archived
Headers show
Series debug_pagealloc improvements through page_owner | expand

Commit Message

Vlastimil Babka Aug. 20, 2019, 1:18 p.m. UTC
Currently, page owner info is only recorded for the first page of a high-order
allocation, and copied to tail pages in the event of a split page. With the
plan to keep previous owner info after freeing the page, it would be benefical
to record page owner for each subpage upon allocation. This increases the
overhead for high orders, but that should be acceptable for a debugging option.

The order stored for each subpage is the order of the whole allocation. This
makes it possible to calculate the "head" pfn and to recognize "tail" pages
(quoted because not all high-order allocations are compound pages with true
head and tail pages). When reading the page_owner debugfs file, keep skipping
the "tail" pages so that stats gathered by existing scripts don't get inflated.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 mm/page_owner.c | 40 ++++++++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 12 deletions(-)

Comments

Kirill A. Shutemov Sept. 24, 2019, 11:31 a.m. UTC | #1
On Tue, Aug 20, 2019 at 03:18:26PM +0200, Vlastimil Babka wrote:
> Currently, page owner info is only recorded for the first page of a high-order
> allocation, and copied to tail pages in the event of a split page. With the
> plan to keep previous owner info after freeing the page, it would be benefical
> to record page owner for each subpage upon allocation. This increases the
> overhead for high orders, but that should be acceptable for a debugging option.
> 
> The order stored for each subpage is the order of the whole allocation. This
> makes it possible to calculate the "head" pfn and to recognize "tail" pages
> (quoted because not all high-order allocations are compound pages with true
> head and tail pages). When reading the page_owner debugfs file, keep skipping
> the "tail" pages so that stats gathered by existing scripts don't get inflated.
> 
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  mm/page_owner.c | 40 ++++++++++++++++++++++++++++------------
>  1 file changed, 28 insertions(+), 12 deletions(-)
> 
> diff --git a/mm/page_owner.c b/mm/page_owner.c
> index addcbb2ae4e4..813fcb70547b 100644
> --- a/mm/page_owner.c
> +++ b/mm/page_owner.c
> @@ -154,18 +154,23 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags)
>  	return handle;
>  }
>  
> -static inline void __set_page_owner_handle(struct page_ext *page_ext,
> -	depot_stack_handle_t handle, unsigned int order, gfp_t gfp_mask)
> +static inline void __set_page_owner_handle(struct page *page,
> +	struct page_ext *page_ext, depot_stack_handle_t handle,
> +	unsigned int order, gfp_t gfp_mask)
>  {
>  	struct page_owner *page_owner;
> +	int i;
>  
> -	page_owner = get_page_owner(page_ext);
> -	page_owner->handle = handle;
> -	page_owner->order = order;
> -	page_owner->gfp_mask = gfp_mask;
> -	page_owner->last_migrate_reason = -1;
> +	for (i = 0; i < (1 << order); i++) {
> +		page_owner = get_page_owner(page_ext);
> +		page_owner->handle = handle;
> +		page_owner->order = order;
> +		page_owner->gfp_mask = gfp_mask;
> +		page_owner->last_migrate_reason = -1;
> +		__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
>  
> -	__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
> +		page_ext = lookup_page_ext(page + i);

Isn't it off-by-one? You are calculating page_ext for the next page,
right? And cant we just do page_ext++ here instead?

> +	}
>  }
>  
>  noinline void __set_page_owner(struct page *page, unsigned int order,
> @@ -178,7 +183,7 @@ noinline void __set_page_owner(struct page *page, unsigned int order,
>  		return;
>  
>  	handle = save_stack(gfp_mask);
> -	__set_page_owner_handle(page_ext, handle, order, gfp_mask);
> +	__set_page_owner_handle(page, page_ext, handle, order, gfp_mask);
>  }
>  
>  void __set_page_owner_migrate_reason(struct page *page, int reason)
> @@ -204,8 +209,11 @@ void __split_page_owner(struct page *page, unsigned int order)
>  
>  	page_owner = get_page_owner(page_ext);
>  	page_owner->order = 0;
> -	for (i = 1; i < (1 << order); i++)
> -		__copy_page_owner(page, page + i);
> +	for (i = 1; i < (1 << order); i++) {
> +		page_ext = lookup_page_ext(page + i);

Again, page_ext++?

> +		page_owner = get_page_owner(page_ext);
> +		page_owner->order = 0;
> +	}
>  }
>  
>  void __copy_page_owner(struct page *oldpage, struct page *newpage)
> @@ -483,6 +491,13 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
>  
>  		page_owner = get_page_owner(page_ext);
>  
> +		/*
> +		 * Don't print "tail" pages of high-order allocations as that
> +		 * would inflate the stats.
> +		 */
> +		if (!IS_ALIGNED(pfn, 1 << page_owner->order))
> +			continue;
> +
>  		/*
>  		 * Access to page_ext->handle isn't synchronous so we should
>  		 * be careful to access it.
> @@ -562,7 +577,8 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
>  				continue;
>  
>  			/* Found early allocated page */
> -			__set_page_owner_handle(page_ext, early_handle, 0, 0);
> +			__set_page_owner_handle(page, page_ext, early_handle,
> +						0, 0);
>  			count++;
>  		}
>  		cond_resched();
> -- 
> 2.22.0
> 
>
Vlastimil Babka Sept. 24, 2019, 3:10 p.m. UTC | #2
On 9/24/19 1:31 PM, Kirill A. Shutemov wrote:
> On Tue, Aug 20, 2019 at 03:18:26PM +0200, Vlastimil Babka wrote:
>> Currently, page owner info is only recorded for the first page of a high-order
>> allocation, and copied to tail pages in the event of a split page. With the
>> plan to keep previous owner info after freeing the page, it would be benefical
>> to record page owner for each subpage upon allocation. This increases the
>> overhead for high orders, but that should be acceptable for a debugging option.
>>
>> The order stored for each subpage is the order of the whole allocation. This
>> makes it possible to calculate the "head" pfn and to recognize "tail" pages
>> (quoted because not all high-order allocations are compound pages with true
>> head and tail pages). When reading the page_owner debugfs file, keep skipping
>> the "tail" pages so that stats gathered by existing scripts don't get inflated.
>>
>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
>> ---
>>  mm/page_owner.c | 40 ++++++++++++++++++++++++++++------------
>>  1 file changed, 28 insertions(+), 12 deletions(-)
>>
>> diff --git a/mm/page_owner.c b/mm/page_owner.c
>> index addcbb2ae4e4..813fcb70547b 100644
>> --- a/mm/page_owner.c
>> +++ b/mm/page_owner.c
>> @@ -154,18 +154,23 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags)
>>  	return handle;
>>  }
>>  
>> -static inline void __set_page_owner_handle(struct page_ext *page_ext,
>> -	depot_stack_handle_t handle, unsigned int order, gfp_t gfp_mask)
>> +static inline void __set_page_owner_handle(struct page *page,
>> +	struct page_ext *page_ext, depot_stack_handle_t handle,
>> +	unsigned int order, gfp_t gfp_mask)
>>  {
>>  	struct page_owner *page_owner;
>> +	int i;
>>  
>> -	page_owner = get_page_owner(page_ext);
>> -	page_owner->handle = handle;
>> -	page_owner->order = order;
>> -	page_owner->gfp_mask = gfp_mask;
>> -	page_owner->last_migrate_reason = -1;
>> +	for (i = 0; i < (1 << order); i++) {
>> +		page_owner = get_page_owner(page_ext);
>> +		page_owner->handle = handle;
>> +		page_owner->order = order;
>> +		page_owner->gfp_mask = gfp_mask;
>> +		page_owner->last_migrate_reason = -1;
>> +		__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
>>  
>> -	__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
>> +		page_ext = lookup_page_ext(page + i);
> 
> Isn't it off-by-one? You are calculating page_ext for the next page,
> right?

You're right, thanks!

> And cant we just do page_ext++ here instead?

Unfortunately no, as that implies sizeof(page_ext), which only declares
unsigned long flags; and the rest is runtime-determined.
Perhaps I could add a wrapper named e.g. page_ext_next() that would use
get_entry_size() internally and hide the necessary casts to void * and back?
Kirill A. Shutemov Sept. 24, 2019, 3:16 p.m. UTC | #3
On Tue, Sep 24, 2019 at 05:10:59PM +0200, Vlastimil Babka wrote:
> On 9/24/19 1:31 PM, Kirill A. Shutemov wrote:
> > On Tue, Aug 20, 2019 at 03:18:26PM +0200, Vlastimil Babka wrote:
> >> Currently, page owner info is only recorded for the first page of a high-order
> >> allocation, and copied to tail pages in the event of a split page. With the
> >> plan to keep previous owner info after freeing the page, it would be benefical
> >> to record page owner for each subpage upon allocation. This increases the
> >> overhead for high orders, but that should be acceptable for a debugging option.
> >>
> >> The order stored for each subpage is the order of the whole allocation. This
> >> makes it possible to calculate the "head" pfn and to recognize "tail" pages
> >> (quoted because not all high-order allocations are compound pages with true
> >> head and tail pages). When reading the page_owner debugfs file, keep skipping
> >> the "tail" pages so that stats gathered by existing scripts don't get inflated.
> >>
> >> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> >> ---
> >>  mm/page_owner.c | 40 ++++++++++++++++++++++++++++------------
> >>  1 file changed, 28 insertions(+), 12 deletions(-)
> >>
> >> diff --git a/mm/page_owner.c b/mm/page_owner.c
> >> index addcbb2ae4e4..813fcb70547b 100644
> >> --- a/mm/page_owner.c
> >> +++ b/mm/page_owner.c
> >> @@ -154,18 +154,23 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags)
> >>  	return handle;
> >>  }
> >>  
> >> -static inline void __set_page_owner_handle(struct page_ext *page_ext,
> >> -	depot_stack_handle_t handle, unsigned int order, gfp_t gfp_mask)
> >> +static inline void __set_page_owner_handle(struct page *page,
> >> +	struct page_ext *page_ext, depot_stack_handle_t handle,
> >> +	unsigned int order, gfp_t gfp_mask)
> >>  {
> >>  	struct page_owner *page_owner;
> >> +	int i;
> >>  
> >> -	page_owner = get_page_owner(page_ext);
> >> -	page_owner->handle = handle;
> >> -	page_owner->order = order;
> >> -	page_owner->gfp_mask = gfp_mask;
> >> -	page_owner->last_migrate_reason = -1;
> >> +	for (i = 0; i < (1 << order); i++) {
> >> +		page_owner = get_page_owner(page_ext);
> >> +		page_owner->handle = handle;
> >> +		page_owner->order = order;
> >> +		page_owner->gfp_mask = gfp_mask;
> >> +		page_owner->last_migrate_reason = -1;
> >> +		__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
> >>  
> >> -	__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
> >> +		page_ext = lookup_page_ext(page + i);
> > 
> > Isn't it off-by-one? You are calculating page_ext for the next page,
> > right?
> 
> You're right, thanks!
> 
> > And cant we just do page_ext++ here instead?
> 
> Unfortunately no, as that implies sizeof(page_ext), which only declares
> unsigned long flags; and the rest is runtime-determined.
> Perhaps I could add a wrapper named e.g. page_ext_next() that would use
> get_entry_size() internally and hide the necessary casts to void * and back?

Yeah, it looks less costly than calling lookup_page_ext() on each
iteration. And looks like it can be inlined if we make 'extra_mem' visible
(under different name) outside page_ext.c.
diff mbox series

Patch

diff --git a/mm/page_owner.c b/mm/page_owner.c
index addcbb2ae4e4..813fcb70547b 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -154,18 +154,23 @@  static noinline depot_stack_handle_t save_stack(gfp_t flags)
 	return handle;
 }
 
-static inline void __set_page_owner_handle(struct page_ext *page_ext,
-	depot_stack_handle_t handle, unsigned int order, gfp_t gfp_mask)
+static inline void __set_page_owner_handle(struct page *page,
+	struct page_ext *page_ext, depot_stack_handle_t handle,
+	unsigned int order, gfp_t gfp_mask)
 {
 	struct page_owner *page_owner;
+	int i;
 
-	page_owner = get_page_owner(page_ext);
-	page_owner->handle = handle;
-	page_owner->order = order;
-	page_owner->gfp_mask = gfp_mask;
-	page_owner->last_migrate_reason = -1;
+	for (i = 0; i < (1 << order); i++) {
+		page_owner = get_page_owner(page_ext);
+		page_owner->handle = handle;
+		page_owner->order = order;
+		page_owner->gfp_mask = gfp_mask;
+		page_owner->last_migrate_reason = -1;
+		__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
 
-	__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
+		page_ext = lookup_page_ext(page + i);
+	}
 }
 
 noinline void __set_page_owner(struct page *page, unsigned int order,
@@ -178,7 +183,7 @@  noinline void __set_page_owner(struct page *page, unsigned int order,
 		return;
 
 	handle = save_stack(gfp_mask);
-	__set_page_owner_handle(page_ext, handle, order, gfp_mask);
+	__set_page_owner_handle(page, page_ext, handle, order, gfp_mask);
 }
 
 void __set_page_owner_migrate_reason(struct page *page, int reason)
@@ -204,8 +209,11 @@  void __split_page_owner(struct page *page, unsigned int order)
 
 	page_owner = get_page_owner(page_ext);
 	page_owner->order = 0;
-	for (i = 1; i < (1 << order); i++)
-		__copy_page_owner(page, page + i);
+	for (i = 1; i < (1 << order); i++) {
+		page_ext = lookup_page_ext(page + i);
+		page_owner = get_page_owner(page_ext);
+		page_owner->order = 0;
+	}
 }
 
 void __copy_page_owner(struct page *oldpage, struct page *newpage)
@@ -483,6 +491,13 @@  read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 
 		page_owner = get_page_owner(page_ext);
 
+		/*
+		 * Don't print "tail" pages of high-order allocations as that
+		 * would inflate the stats.
+		 */
+		if (!IS_ALIGNED(pfn, 1 << page_owner->order))
+			continue;
+
 		/*
 		 * Access to page_ext->handle isn't synchronous so we should
 		 * be careful to access it.
@@ -562,7 +577,8 @@  static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
 				continue;
 
 			/* Found early allocated page */
-			__set_page_owner_handle(page_ext, early_handle, 0, 0);
+			__set_page_owner_handle(page, page_ext, early_handle,
+						0, 0);
 			count++;
 		}
 		cond_resched();