diff mbox series

[RFC,4/4] perf: Use folios for the aux ringbuffer & pagefault path

Message ID 20230821202016.2910321-5-willy@infradead.org (mailing list archive)
State New
Headers show
Series Convert perf ringbuffer to folios | expand

Commit Message

Matthew Wilcox Aug. 21, 2023, 8:20 p.m. UTC
Instead of allocating a non-compound page and splitting it, allocate
a folio and make its refcount the count of the number of pages in it.
That way, when we free each page in the folio, we'll only actually free
it when the last page in the folio is freed.  Keeping the memory intact
is better for the MM system than allocating it and splitting it.

Now, instead of setting each page->mapping, we only set folio->mapping
which is better for our cacheline usage, as well as helping towards the
goal of eliminating page->mapping.  We remove the setting of page->index;
I do not believe this is needed.  And we return with the folio locked,
which the fault handler should have been doing all along.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 kernel/events/core.c        | 13 +++++++---
 kernel/events/ring_buffer.c | 51 ++++++++++++++++---------------------
 2 files changed, 31 insertions(+), 33 deletions(-)

Comments

Yin Fengwei Aug. 23, 2023, 7:38 a.m. UTC | #1
On 8/22/23 04:20, Matthew Wilcox (Oracle) wrote:
> Instead of allocating a non-compound page and splitting it, allocate
> a folio and make its refcount the count of the number of pages in it.
> That way, when we free each page in the folio, we'll only actually free
> it when the last page in the folio is freed.  Keeping the memory intact
> is better for the MM system than allocating it and splitting it.
> 
> Now, instead of setting each page->mapping, we only set folio->mapping
> which is better for our cacheline usage, as well as helping towards the
> goal of eliminating page->mapping.  We remove the setting of page->index;
> I do not believe this is needed.  And we return with the folio locked,
> which the fault handler should have been doing all along.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  kernel/events/core.c        | 13 +++++++---
>  kernel/events/ring_buffer.c | 51 ++++++++++++++++---------------------
>  2 files changed, 31 insertions(+), 33 deletions(-)
> 
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 4c72a41f11af..59d4f7c48c8c 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -29,6 +29,7 @@
>  #include <linux/vmalloc.h>
>  #include <linux/hardirq.h>
>  #include <linux/hugetlb.h>
> +#include <linux/pagemap.h>
>  #include <linux/rculist.h>
>  #include <linux/uaccess.h>
>  #include <linux/syscalls.h>
> @@ -6083,6 +6084,7 @@ static vm_fault_t perf_mmap_fault(struct vm_fault *vmf)
>  {
>  	struct perf_event *event = vmf->vma->vm_file->private_data;
>  	struct perf_buffer *rb;
> +	struct folio *folio;
>  	vm_fault_t ret = VM_FAULT_SIGBUS;
>  
>  	if (vmf->flags & FAULT_FLAG_MKWRITE) {
> @@ -6102,12 +6104,15 @@ static vm_fault_t perf_mmap_fault(struct vm_fault *vmf)
>  	vmf->page = perf_mmap_to_page(rb, vmf->pgoff);
>  	if (!vmf->page)
>  		goto unlock;
> +	folio = page_folio(vmf->page);
>  
> -	get_page(vmf->page);
> -	vmf->page->mapping = vmf->vma->vm_file->f_mapping;
> -	vmf->page->index   = vmf->pgoff;
> +	folio_get(folio);
> +	rcu_read_unlock();
> +	folio_lock(folio);
> +	if (!folio->mapping)
> +		folio->mapping = vmf->vma->vm_file->f_mapping;
>  
> -	ret = 0;
> +	return VM_FAULT_LOCKED;
In __do_fault():

        if (unlikely(!(ret & VM_FAULT_LOCKED)))                                 
                lock_page(vmf->page);                                           
        else                                                                    
                VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page); 

As we lock folio, not sure whether !PageLocked(vmf->page) can be true
here. My understanding is yes if vmf->pgoff belongs to tail pages. Did
I can miss something here?


Regards
Yin, Fengwei

>  unlock:
>  	rcu_read_unlock();
>  
> diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
> index 56939dc3bf33..0a026e5ff4f5 100644
> --- a/kernel/events/ring_buffer.c
> +++ b/kernel/events/ring_buffer.c
> @@ -606,39 +606,28 @@ long perf_output_copy_aux(struct perf_output_handle *aux_handle,
>  
>  #define PERF_AUX_GFP	(GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY)
>  
> -static struct page *rb_alloc_aux_page(int node, int order)
> +static struct folio *rb_alloc_aux_folio(int node, int order)
>  {
> -	struct page *page;
> +	struct folio *folio;
>  
>  	if (order > MAX_ORDER)
>  		order = MAX_ORDER;
>  
>  	do {
> -		page = alloc_pages_node(node, PERF_AUX_GFP, order);
> -	} while (!page && order--);
> -
> -	if (page && order) {
> -		/*
> -		 * Communicate the allocation size to the driver:
> -		 * if we managed to secure a high-order allocation,
> -		 * set its first page's private to this order;
> -		 * !PagePrivate(page) means it's just a normal page.
> -		 */
> -		split_page(page, order);
> -		SetPagePrivate(page);
> -		set_page_private(page, order);
> -	}
> +		folio = __folio_alloc_node(PERF_AUX_GFP, order, node);
> +	} while (!folio && order--);
>  
> -	return page;
> +	if (order)
> +		folio_ref_add(folio, (1 << order) - 1);
> +	return folio;
>  }
>  
>  static void rb_free_aux_page(struct perf_buffer *rb, int idx)
>  {
> -	struct page *page = virt_to_page(rb->aux_pages[idx]);
> +	struct folio *folio = virt_to_folio(rb->aux_pages[idx]);
>  
> -	ClearPagePrivate(page);
> -	page->mapping = NULL;
> -	__free_page(page);
> +	folio->mapping = NULL;
> +	folio_put(folio);
>  }
>  
>  static void __rb_free_aux(struct perf_buffer *rb)
> @@ -672,7 +661,7 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
>  		 pgoff_t pgoff, int nr_pages, long watermark, int flags)
>  {
>  	bool overwrite = !(flags & RING_BUFFER_WRITABLE);
> -	int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu);
> +	int node = (event->cpu == -1) ? numa_mem_id() : cpu_to_node(event->cpu);
>  	int ret = -ENOMEM, max_order;
>  
>  	if (!has_aux(event))
> @@ -707,17 +696,21 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
>  
>  	rb->free_aux = event->pmu->free_aux;
>  	for (rb->aux_nr_pages = 0; rb->aux_nr_pages < nr_pages;) {
> -		struct page *page;
> -		int last, order;
> +		struct folio *folio;
> +		unsigned int i, nr, order;
> +		void *addr;
>  
>  		order = min(max_order, ilog2(nr_pages - rb->aux_nr_pages));
> -		page = rb_alloc_aux_page(node, order);
> -		if (!page)
> +		folio = rb_alloc_aux_folio(node, order);
> +		if (!folio)
>  			goto out;
> +		addr = folio_address(folio);
> +		nr = folio_nr_pages(folio);
>  
> -		for (last = rb->aux_nr_pages + (1 << page_private(page));
> -		     last > rb->aux_nr_pages; rb->aux_nr_pages++)
> -			rb->aux_pages[rb->aux_nr_pages] = page_address(page++);
> +		for (i = 0; i < nr; i++) {
> +			rb->aux_pages[rb->aux_nr_pages++] = addr;
> +			addr += PAGE_SIZE;
> +		}
>  	}
>  
>  	/*
Matthew Wilcox Aug. 23, 2023, 12:23 p.m. UTC | #2
On Wed, Aug 23, 2023 at 03:38:13PM +0800, Yin Fengwei wrote:
> On 8/22/23 04:20, Matthew Wilcox (Oracle) wrote:
> > -	get_page(vmf->page);
> > -	vmf->page->mapping = vmf->vma->vm_file->f_mapping;
> > -	vmf->page->index   = vmf->pgoff;
> > +	folio_get(folio);
> > +	rcu_read_unlock();
> > +	folio_lock(folio);
> > +	if (!folio->mapping)
> > +		folio->mapping = vmf->vma->vm_file->f_mapping;
> >  
> > -	ret = 0;
> > +	return VM_FAULT_LOCKED;
> In __do_fault():
> 
>         if (unlikely(!(ret & VM_FAULT_LOCKED)))                                 
>                 lock_page(vmf->page);                                           
>         else                                                                    
>                 VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page); 
> 
> As we lock folio, not sure whether !PageLocked(vmf->page) can be true
> here. My understanding is yes if vmf->pgoff belongs to tail pages. Did
> I can miss something here?

There's only one lock bit per folio; there's no lock bit for individual
pages.  When we check PageLocked() on a tail page, it redirects to the
head page.

__PAGEFLAG(Locked, locked, PF_NO_TAIL)

#define PF_NO_TAIL(page, enforce) ({                                    \
                VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page);     \
                PF_POISONED_CHECK(compound_head(page)); })

#define TESTPAGEFLAG(uname, lname, policy)                              \
static __always_inline int Page##uname(struct page *page)               \
{ return test_bit(PG_##lname, &policy(page, 0)->flags); }

and that expands out to:

static __always_inline int PageLocked(struct page *page)
{ return test_bit(PG_##locked, &compound_head(page)->flags); }
Yin Fengwei Aug. 23, 2023, 12:45 p.m. UTC | #3
On 8/23/2023 8:23 PM, Matthew Wilcox wrote:
> On Wed, Aug 23, 2023 at 03:38:13PM +0800, Yin Fengwei wrote:
>> On 8/22/23 04:20, Matthew Wilcox (Oracle) wrote:
>>> -	get_page(vmf->page);
>>> -	vmf->page->mapping = vmf->vma->vm_file->f_mapping;
>>> -	vmf->page->index   = vmf->pgoff;
>>> +	folio_get(folio);
>>> +	rcu_read_unlock();
>>> +	folio_lock(folio);
>>> +	if (!folio->mapping)
>>> +		folio->mapping = vmf->vma->vm_file->f_mapping;
>>>  
>>> -	ret = 0;
>>> +	return VM_FAULT_LOCKED;
>> In __do_fault():
>>
>>         if (unlikely(!(ret & VM_FAULT_LOCKED)))                                 
>>                 lock_page(vmf->page);                                           
>>         else                                                                    
>>                 VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page); 
>>
>> As we lock folio, not sure whether !PageLocked(vmf->page) can be true
>> here. My understanding is yes if vmf->pgoff belongs to tail pages. Did
>> I can miss something here?
> 
> There's only one lock bit per folio; there's no lock bit for individual
> pages.  When we check PageLocked() on a tail page, it redirects to the
> head page.
> 
> __PAGEFLAG(Locked, locked, PF_NO_TAIL)
> 
> #define PF_NO_TAIL(page, enforce) ({                                    \
>                 VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page);     \
>                 PF_POISONED_CHECK(compound_head(page)); })
> 
> #define TESTPAGEFLAG(uname, lname, policy)                              \
> static __always_inline int Page##uname(struct page *page)               \
> { return test_bit(PG_##lname, &policy(page, 0)->flags); }
> 
> and that expands out to:
> 
> static __always_inline int PageLocked(struct page *page)
> { return test_bit(PG_##locked, &compound_head(page)->flags); }
> 
Ah. Here is the trick. Thanks a lot for detail explanation.


Regards
Yin, Fengwei
Lorenzo Stoakes Aug. 27, 2023, 8:01 a.m. UTC | #4
On Mon, Aug 21, 2023 at 09:20:16PM +0100, Matthew Wilcox (Oracle) wrote:
> Instead of allocating a non-compound page and splitting it, allocate
> a folio and make its refcount the count of the number of pages in it.
> That way, when we free each page in the folio, we'll only actually free
> it when the last page in the folio is freed.  Keeping the memory intact
> is better for the MM system than allocating it and splitting it.
>
> Now, instead of setting each page->mapping, we only set folio->mapping
> which is better for our cacheline usage, as well as helping towards the
> goal of eliminating page->mapping.  We remove the setting of page->index;
> I do not believe this is needed.  And we return with the folio locked,
> which the fault handler should have been doing all along.
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  kernel/events/core.c        | 13 +++++++---
>  kernel/events/ring_buffer.c | 51 ++++++++++++++++---------------------
>  2 files changed, 31 insertions(+), 33 deletions(-)
>
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 4c72a41f11af..59d4f7c48c8c 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -29,6 +29,7 @@
>  #include <linux/vmalloc.h>
>  #include <linux/hardirq.h>
>  #include <linux/hugetlb.h>
> +#include <linux/pagemap.h>
>  #include <linux/rculist.h>
>  #include <linux/uaccess.h>
>  #include <linux/syscalls.h>
> @@ -6083,6 +6084,7 @@ static vm_fault_t perf_mmap_fault(struct vm_fault *vmf)
>  {
>  	struct perf_event *event = vmf->vma->vm_file->private_data;
>  	struct perf_buffer *rb;
> +	struct folio *folio;
>  	vm_fault_t ret = VM_FAULT_SIGBUS;

Since we're explicitly returning VM_FAULT_LOCKED on success, perhaps worth
simply renaming the unlock label to error and returning VM_FAULT_SIGBUS there?

The FAULT_FLAG_MKWRITE branch can simply return vmf->pgoff == 0 ? 0 :
VM_FAULT_SIGBUS;

>
>  	if (vmf->flags & FAULT_FLAG_MKWRITE) {
> @@ -6102,12 +6104,15 @@ static vm_fault_t perf_mmap_fault(struct vm_fault *vmf)
>  	vmf->page = perf_mmap_to_page(rb, vmf->pgoff);
>  	if (!vmf->page)
>  		goto unlock;
> +	folio = page_folio(vmf->page);
>
> -	get_page(vmf->page);
> -	vmf->page->mapping = vmf->vma->vm_file->f_mapping;
> -	vmf->page->index   = vmf->pgoff;
> +	folio_get(folio);
> +	rcu_read_unlock();
> +	folio_lock(folio);
> +	if (!folio->mapping)
> +		folio->mapping = vmf->vma->vm_file->f_mapping;
>
> -	ret = 0;
> +	return VM_FAULT_LOCKED;
>  unlock:
>  	rcu_read_unlock();
>
> diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
> index 56939dc3bf33..0a026e5ff4f5 100644
> --- a/kernel/events/ring_buffer.c
> +++ b/kernel/events/ring_buffer.c
> @@ -606,39 +606,28 @@ long perf_output_copy_aux(struct perf_output_handle *aux_handle,
>
>  #define PERF_AUX_GFP	(GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY)
>
> -static struct page *rb_alloc_aux_page(int node, int order)
> +static struct folio *rb_alloc_aux_folio(int node, int order)
>  {
> -	struct page *page;
> +	struct folio *folio;
>
>  	if (order > MAX_ORDER)
>  		order = MAX_ORDER;
>
>  	do {
> -		page = alloc_pages_node(node, PERF_AUX_GFP, order);
> -	} while (!page && order--);
> -
> -	if (page && order) {
> -		/*
> -		 * Communicate the allocation size to the driver:
> -		 * if we managed to secure a high-order allocation,
> -		 * set its first page's private to this order;
> -		 * !PagePrivate(page) means it's just a normal page.
> -		 */
> -		split_page(page, order);
> -		SetPagePrivate(page);
> -		set_page_private(page, order);

I'm guessing this was used in conjunction with the page_private() logic
that existed below and can simply be discarded now?

> -	}
> +		folio = __folio_alloc_node(PERF_AUX_GFP, order, node);
> +	} while (!folio && order--);
>
> -	return page;
> +	if (order)
> +		folio_ref_add(folio, (1 << order) - 1);

Can't order go to -1 if we continue to fail to allocate a folio?

> +	return folio;
>  }
>
>  static void rb_free_aux_page(struct perf_buffer *rb, int idx)
>  {
> -	struct page *page = virt_to_page(rb->aux_pages[idx]);
> +	struct folio *folio = virt_to_folio(rb->aux_pages[idx]);
>
> -	ClearPagePrivate(page);
> -	page->mapping = NULL;
> -	__free_page(page);
> +	folio->mapping = NULL;
> +	folio_put(folio);
>  }
>
>  static void __rb_free_aux(struct perf_buffer *rb)
> @@ -672,7 +661,7 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
>  		 pgoff_t pgoff, int nr_pages, long watermark, int flags)
>  {
>  	bool overwrite = !(flags & RING_BUFFER_WRITABLE);
> -	int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu);
> +	int node = (event->cpu == -1) ? numa_mem_id() : cpu_to_node(event->cpu);
>  	int ret = -ENOMEM, max_order;
>
>  	if (!has_aux(event))
> @@ -707,17 +696,21 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
>
>  	rb->free_aux = event->pmu->free_aux;
>  	for (rb->aux_nr_pages = 0; rb->aux_nr_pages < nr_pages;) {
> -		struct page *page;
> -		int last, order;
> +		struct folio *folio;
> +		unsigned int i, nr, order;
> +		void *addr;
>
>  		order = min(max_order, ilog2(nr_pages - rb->aux_nr_pages));
> -		page = rb_alloc_aux_page(node, order);
> -		if (!page)
> +		folio = rb_alloc_aux_folio(node, order);
> +		if (!folio)
>  			goto out;
> +		addr = folio_address(folio);
> +		nr = folio_nr_pages(folio);

I was going to raise the unspeakably annoying nit about this function returning
a long, but then that made me wonder why, given folio->_folio_nr_pages is an
unsigned int folio_nr_pages() returns a long in the first instance?

>
> -		for (last = rb->aux_nr_pages + (1 << page_private(page));
> -		     last > rb->aux_nr_pages; rb->aux_nr_pages++)
> -			rb->aux_pages[rb->aux_nr_pages] = page_address(page++);
> +		for (i = 0; i < nr; i++) {
> +			rb->aux_pages[rb->aux_nr_pages++] = addr;
> +			addr += PAGE_SIZE;
> +		}
>  	}
>
>  	/*
> --
> 2.40.1
>
Matthew Wilcox Aug. 27, 2023, 11:50 a.m. UTC | #5
On Sun, Aug 27, 2023 at 09:01:04AM +0100, Lorenzo Stoakes wrote:
> On Mon, Aug 21, 2023 at 09:20:16PM +0100, Matthew Wilcox (Oracle) wrote:
> > -	if (page && order) {
> > -		/*
> > -		 * Communicate the allocation size to the driver:
> > -		 * if we managed to secure a high-order allocation,
> > -		 * set its first page's private to this order;
> > -		 * !PagePrivate(page) means it's just a normal page.
> > -		 */
> > -		split_page(page, order);
> > -		SetPagePrivate(page);
> > -		set_page_private(page, order);
> 
> I'm guessing this was used in conjunction with the page_private() logic
> that existed below and can simply be discarded now?

As far as I can tell, yes.

> > -	}
> > +		folio = __folio_alloc_node(PERF_AUX_GFP, order, node);
> > +	} while (!folio && order--);
> >
> > -	return page;
> > +	if (order)
> > +		folio_ref_add(folio, (1 << order) - 1);
> 
> Can't order go to -1 if we continue to fail to allocate a folio?

Hm, yes.

> > @@ -707,17 +696,21 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
> >
> >  	rb->free_aux = event->pmu->free_aux;
> >  	for (rb->aux_nr_pages = 0; rb->aux_nr_pages < nr_pages;) {
> > -		struct page *page;
> > -		int last, order;
> > +		struct folio *folio;
> > +		unsigned int i, nr, order;
> > +		void *addr;
> >
> >  		order = min(max_order, ilog2(nr_pages - rb->aux_nr_pages));
> > -		page = rb_alloc_aux_page(node, order);
> > -		if (!page)
> > +		folio = rb_alloc_aux_folio(node, order);
> > +		if (!folio)
> >  			goto out;
> > +		addr = folio_address(folio);
> > +		nr = folio_nr_pages(folio);
> 
> I was going to raise the unspeakably annoying nit about this function returning
> a long, but then that made me wonder why, given folio->_folio_nr_pages is an
> unsigned int folio_nr_pages() returns a long in the first instance?

Futureproofing the API.  While I don't expect us to be allocating
order-32 folios any time soon, and x86 has excluded p4d-sizes from the
architecture, I don't want to have to go through and audit everywhere
when it turns out we do want to support such a thing on some architecture.

(on x86 PMD - order 9, PUD - order 18, P4D - order 27, so we'd need a
hypothetical P5D level, or a machine with, say 16k pages, which would
go PMD-11, PUD-22, P4D-33, and be managing a folio of size 2^57 bytes)

But supporting nr_pages stored directly in the otherwise wasted space
in the folio seemed like a cheap win, and there was no reason to take
up the extra 32 bits.

Anyway, here, we know we're not getting back an arbitrary folio that
somebody else allocated, we're allocating for ourselves, and we know we're
allocating something rather smaller than that, so it's fine to calculate
in terms of unsigned int.  If I were writing in rust, I'd use usize,
but since nobody's done the work to make rust types available in C yet,
I haven't done that either.

We actually use usize as a variable name in a couple of hundred places,
so turning it into a generally available type is harder than one might
like.
diff mbox series

Patch

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4c72a41f11af..59d4f7c48c8c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -29,6 +29,7 @@ 
 #include <linux/vmalloc.h>
 #include <linux/hardirq.h>
 #include <linux/hugetlb.h>
+#include <linux/pagemap.h>
 #include <linux/rculist.h>
 #include <linux/uaccess.h>
 #include <linux/syscalls.h>
@@ -6083,6 +6084,7 @@  static vm_fault_t perf_mmap_fault(struct vm_fault *vmf)
 {
 	struct perf_event *event = vmf->vma->vm_file->private_data;
 	struct perf_buffer *rb;
+	struct folio *folio;
 	vm_fault_t ret = VM_FAULT_SIGBUS;
 
 	if (vmf->flags & FAULT_FLAG_MKWRITE) {
@@ -6102,12 +6104,15 @@  static vm_fault_t perf_mmap_fault(struct vm_fault *vmf)
 	vmf->page = perf_mmap_to_page(rb, vmf->pgoff);
 	if (!vmf->page)
 		goto unlock;
+	folio = page_folio(vmf->page);
 
-	get_page(vmf->page);
-	vmf->page->mapping = vmf->vma->vm_file->f_mapping;
-	vmf->page->index   = vmf->pgoff;
+	folio_get(folio);
+	rcu_read_unlock();
+	folio_lock(folio);
+	if (!folio->mapping)
+		folio->mapping = vmf->vma->vm_file->f_mapping;
 
-	ret = 0;
+	return VM_FAULT_LOCKED;
 unlock:
 	rcu_read_unlock();
 
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 56939dc3bf33..0a026e5ff4f5 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -606,39 +606,28 @@  long perf_output_copy_aux(struct perf_output_handle *aux_handle,
 
 #define PERF_AUX_GFP	(GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY)
 
-static struct page *rb_alloc_aux_page(int node, int order)
+static struct folio *rb_alloc_aux_folio(int node, int order)
 {
-	struct page *page;
+	struct folio *folio;
 
 	if (order > MAX_ORDER)
 		order = MAX_ORDER;
 
 	do {
-		page = alloc_pages_node(node, PERF_AUX_GFP, order);
-	} while (!page && order--);
-
-	if (page && order) {
-		/*
-		 * Communicate the allocation size to the driver:
-		 * if we managed to secure a high-order allocation,
-		 * set its first page's private to this order;
-		 * !PagePrivate(page) means it's just a normal page.
-		 */
-		split_page(page, order);
-		SetPagePrivate(page);
-		set_page_private(page, order);
-	}
+		folio = __folio_alloc_node(PERF_AUX_GFP, order, node);
+	} while (!folio && order--);
 
-	return page;
+	if (order)
+		folio_ref_add(folio, (1 << order) - 1);
+	return folio;
 }
 
 static void rb_free_aux_page(struct perf_buffer *rb, int idx)
 {
-	struct page *page = virt_to_page(rb->aux_pages[idx]);
+	struct folio *folio = virt_to_folio(rb->aux_pages[idx]);
 
-	ClearPagePrivate(page);
-	page->mapping = NULL;
-	__free_page(page);
+	folio->mapping = NULL;
+	folio_put(folio);
 }
 
 static void __rb_free_aux(struct perf_buffer *rb)
@@ -672,7 +661,7 @@  int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
 		 pgoff_t pgoff, int nr_pages, long watermark, int flags)
 {
 	bool overwrite = !(flags & RING_BUFFER_WRITABLE);
-	int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu);
+	int node = (event->cpu == -1) ? numa_mem_id() : cpu_to_node(event->cpu);
 	int ret = -ENOMEM, max_order;
 
 	if (!has_aux(event))
@@ -707,17 +696,21 @@  int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
 
 	rb->free_aux = event->pmu->free_aux;
 	for (rb->aux_nr_pages = 0; rb->aux_nr_pages < nr_pages;) {
-		struct page *page;
-		int last, order;
+		struct folio *folio;
+		unsigned int i, nr, order;
+		void *addr;
 
 		order = min(max_order, ilog2(nr_pages - rb->aux_nr_pages));
-		page = rb_alloc_aux_page(node, order);
-		if (!page)
+		folio = rb_alloc_aux_folio(node, order);
+		if (!folio)
 			goto out;
+		addr = folio_address(folio);
+		nr = folio_nr_pages(folio);
 
-		for (last = rb->aux_nr_pages + (1 << page_private(page));
-		     last > rb->aux_nr_pages; rb->aux_nr_pages++)
-			rb->aux_pages[rb->aux_nr_pages] = page_address(page++);
+		for (i = 0; i < nr; i++) {
+			rb->aux_pages[rb->aux_nr_pages++] = addr;
+			addr += PAGE_SIZE;
+		}
 	}
 
 	/*