diff mbox series

[v4,29/39] unwind_user/deferred: Add unwind cache

Message ID 51855c0902486060cd6e1ccc6b22fd092a2e676d.1737511963.git.jpoimboe@kernel.org (mailing list archive)
State New
Headers show
Series unwind, perf: sframe user space unwinding | expand

Commit Message

Josh Poimboeuf Jan. 22, 2025, 2:31 a.m. UTC
Cache the results of the unwind to ensure the unwind is only performed
once, even when called by multiple tracers.

Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
 include/linux/unwind_deferred_types.h |  8 +++++++-
 kernel/unwind/deferred.c              | 26 ++++++++++++++++++++------
 2 files changed, 27 insertions(+), 7 deletions(-)

Comments

Peter Zijlstra Jan. 22, 2025, 1:57 p.m. UTC | #1
On Tue, Jan 21, 2025 at 06:31:21PM -0800, Josh Poimboeuf wrote:
> Cache the results of the unwind to ensure the unwind is only performed
> once, even when called by multiple tracers.
> 
> Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
> ---
>  include/linux/unwind_deferred_types.h |  8 +++++++-
>  kernel/unwind/deferred.c              | 26 ++++++++++++++++++++------
>  2 files changed, 27 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h
> index 9749824aea09..6f71a06329fb 100644
> --- a/include/linux/unwind_deferred_types.h
> +++ b/include/linux/unwind_deferred_types.h
> @@ -2,8 +2,14 @@
>  #ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H
>  #define _LINUX_UNWIND_USER_DEFERRED_TYPES_H
>  
> -struct unwind_task_info {
> +struct unwind_cache {
>  	unsigned long		*entries;
> +	unsigned int		nr_entries;
> +	u64			cookie;
> +};

If you make the return to user path clear nr_entries you don't need a
second cookie field I think.
Josh Poimboeuf Jan. 22, 2025, 10:36 p.m. UTC | #2
On Wed, Jan 22, 2025 at 02:57:00PM +0100, Peter Zijlstra wrote:
> On Tue, Jan 21, 2025 at 06:31:21PM -0800, Josh Poimboeuf wrote:
> > Cache the results of the unwind to ensure the unwind is only performed
> > once, even when called by multiple tracers.
> > 
> > Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
> > ---
> >  include/linux/unwind_deferred_types.h |  8 +++++++-
> >  kernel/unwind/deferred.c              | 26 ++++++++++++++++++++------
> >  2 files changed, 27 insertions(+), 7 deletions(-)
> > 
> > diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h
> > index 9749824aea09..6f71a06329fb 100644
> > --- a/include/linux/unwind_deferred_types.h
> > +++ b/include/linux/unwind_deferred_types.h
> > @@ -2,8 +2,14 @@
> >  #ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H
> >  #define _LINUX_UNWIND_USER_DEFERRED_TYPES_H
> >  
> > -struct unwind_task_info {
> > +struct unwind_cache {
> >  	unsigned long		*entries;
> > +	unsigned int		nr_entries;
> > +	u64			cookie;
> > +};
> 
> If you make the return to user path clear nr_entries you don't need a
> second cookie field I think.

But if the NMI happens late in the exit-to-user path, with IRQs
disabled, right before nr_entries gets cleared, the cache won't get
used in the task work.

However I think we can clear it on entry-from-user.
Peter Zijlstra Jan. 23, 2025, 8:31 a.m. UTC | #3
On Wed, Jan 22, 2025 at 02:36:25PM -0800, Josh Poimboeuf wrote:
> On Wed, Jan 22, 2025 at 02:57:00PM +0100, Peter Zijlstra wrote:
> > On Tue, Jan 21, 2025 at 06:31:21PM -0800, Josh Poimboeuf wrote:
> > > Cache the results of the unwind to ensure the unwind is only performed
> > > once, even when called by multiple tracers.
> > > 
> > > Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
> > > ---
> > >  include/linux/unwind_deferred_types.h |  8 +++++++-
> > >  kernel/unwind/deferred.c              | 26 ++++++++++++++++++++------
> > >  2 files changed, 27 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h
> > > index 9749824aea09..6f71a06329fb 100644
> > > --- a/include/linux/unwind_deferred_types.h
> > > +++ b/include/linux/unwind_deferred_types.h
> > > @@ -2,8 +2,14 @@
> > >  #ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H
> > >  #define _LINUX_UNWIND_USER_DEFERRED_TYPES_H
> > >  
> > > -struct unwind_task_info {
> > > +struct unwind_cache {
> > >  	unsigned long		*entries;
> > > +	unsigned int		nr_entries;
> > > +	u64			cookie;
> > > +};
> > 
> > If you make the return to user path clear nr_entries you don't need a
> > second cookie field I think.
> 
> But if the NMI happens late in the exit-to-user path, with IRQs
> disabled, right before nr_entries gets cleared, the cache won't get
> used in the task work.
> 
> However I think we can clear it on entry-from-user.

Return to user runs with interrupts disabled, if an NMI hits that, it
will have to set TIF_NOTIFY_RESUME again and queue the IRQ work thing.
That self-IPI will hit the moment we do IRET (which is what re-enables
interrupts) and we're going back into the kernel.

Anyway, I suppose that is a long way of saying that you should be able
to do this on return to user.

But yes, enter-from-user should work too.
Josh Poimboeuf Jan. 23, 2025, 6:45 p.m. UTC | #4
On Thu, Jan 23, 2025 at 09:31:31AM +0100, Peter Zijlstra wrote:
> On Wed, Jan 22, 2025 at 02:36:25PM -0800, Josh Poimboeuf wrote:
> > On Wed, Jan 22, 2025 at 02:57:00PM +0100, Peter Zijlstra wrote:
> > > On Tue, Jan 21, 2025 at 06:31:21PM -0800, Josh Poimboeuf wrote:
> > But if the NMI happens late in the exit-to-user path, with IRQs
> > disabled, right before nr_entries gets cleared, the cache won't get
> > used in the task work.
> > 
> > However I think we can clear it on entry-from-user.
> 
> Return to user runs with interrupts disabled, if an NMI hits that, it
> will have to set TIF_NOTIFY_RESUME again and queue the IRQ work thing.
> That self-IPI will hit the moment we do IRET (which is what re-enables
> interrupts) and we're going back into the kernel.
> 
> Anyway, I suppose that is a long way of saying that you should be able
> to do this on return to user.

Indeed, I knew that but somehow overlooked the fact that the IRQ would
clear the cookie so the cache wouldn't be usable anyway.
diff mbox series

Patch

diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h
index 9749824aea09..6f71a06329fb 100644
--- a/include/linux/unwind_deferred_types.h
+++ b/include/linux/unwind_deferred_types.h
@@ -2,8 +2,14 @@ 
 #ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H
 #define _LINUX_UNWIND_USER_DEFERRED_TYPES_H
 
-struct unwind_task_info {
+struct unwind_cache {
 	unsigned long		*entries;
+	unsigned int		nr_entries;
+	u64			cookie;
+};
+
+struct unwind_task_info {
+	struct unwind_cache	cache;
 	u64			cookie;
 };
 
diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c
index f0dbe4069247..2f38055cce48 100644
--- a/kernel/unwind/deferred.c
+++ b/kernel/unwind/deferred.c
@@ -56,6 +56,7 @@  static void unwind_deferred_task_work(struct callback_head *head)
 {
 	struct unwind_work *work = container_of(head, struct unwind_work, work);
 	struct unwind_task_info *info = &current->unwind_info;
+	struct unwind_cache *cache = &info->cache;
 	struct unwind_stacktrace trace;
 	u64 cookie;
 
@@ -73,17 +74,30 @@  static void unwind_deferred_task_work(struct callback_head *head)
 	if (!current->mm)
 		goto do_callback;
 
-	if (!info->entries) {
-		info->entries = kmalloc(UNWIND_MAX_ENTRIES * sizeof(long),
-					GFP_KERNEL);
-		if (!info->entries)
+	if (!cache->entries) {
+		cache->entries = kmalloc(UNWIND_MAX_ENTRIES * sizeof(long),
+					 GFP_KERNEL);
+		if (!cache->entries)
 			goto do_callback;
 	}
 
-	trace.entries = info->entries;
+	trace.entries = cache->entries;
+
+	if (cookie == cache->cookie) {
+		/*
+		 * The user stack has already been previously unwound in this
+		 * entry context.  Skip the unwind and use the cache.
+		 */
+		trace.nr = cache->nr_entries;
+		goto do_callback;
+	}
+
 	trace.nr = 0;
 	unwind_user(&trace, UNWIND_MAX_ENTRIES);
 
+	cache->cookie = cookie;
+	cache->nr_entries = trace.nr;
+
 do_callback:
 	work->func(work, &trace, cookie);
 	work->pending = 0;
@@ -174,5 +188,5 @@  void unwind_task_free(struct task_struct *task)
 {
 	struct unwind_task_info *info = &task->unwind_info;
 
-	kfree(info->entries);
+	kfree(info->cache.entries);
 }