diff mbox

[RFC,2/3] arm64: refactor save_stack_trace()

Message ID 20150717090009.720f6bd0@gandalf.local.home (mailing list archive)
State New, archived
Headers show

Commit Message

Steven Rostedt July 17, 2015, 1 p.m. UTC
Here's my latest version of the patch. I also added a fix that made
entries off from the real number of entries. That was to stop the loop
on ULONG_MAX in stack_dump_trace[i], otherwise if for some reason
nr_entries is one off and points to ULONG_MAX, and there is a -1 in the
stack, the trace will include it in the count. The output of the stack
tests against both nr_entries and ULONG_MAX and will stop with either
case, making the dump and the count different.

-- Steve

From 1c3697c3c4ce1f237466f76e40c91f66e2030bac Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 16 Jul 2015 13:24:54 -0400
Subject: [PATCH] tracing: Clean up stack tracing and fix fentry updates

Akashi Takahiro was porting the stack tracer to arm64 and found some
issues with it. One was that it repeats the top function, due to the
stack frame added by the mcount caller and added by itself. This
was added when fentry came in, and before fentry created its own stack
frame. But x86's fentry now creates its own stack frame, and there's
no need to insert the function again.

This also cleans up the code a bit, where it doesn't need to do something
special for fentry, and doesn't include insertion of a duplicate
entry for the called function being traced.

Link: http://lkml.kernel.org/r/55A646EE.6030402@linaro.org

Some-suggestions-by: Jungseok Lee <jungseoklee85@gmail.com>
Some-suggestions-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_stack.c | 68 ++++++++++++++++------------------------------
 1 file changed, 23 insertions(+), 45 deletions(-)

Comments

Jungseok Lee July 17, 2015, 2:28 p.m. UTC | #1
On Jul 17, 2015, at 10:00 PM, Steven Rostedt wrote:

Hi Steve,

> Here's my latest version of the patch. I also added a fix that made
> entries off from the real number of entries. That was to stop the loop
> on ULONG_MAX in stack_dump_trace[i], otherwise if for some reason
> nr_entries is one off and points to ULONG_MAX, and there is a -1 in the
> stack, the trace will include it in the count. The output of the stack
> tests against both nr_entries and ULONG_MAX and will stop with either
> case, making the dump and the count different.
> 
> -- Steve
> 
> From 1c3697c3c4ce1f237466f76e40c91f66e2030bac Mon Sep 17 00:00:00 2001
> From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
> Date: Thu, 16 Jul 2015 13:24:54 -0400
> Subject: [PATCH] tracing: Clean up stack tracing and fix fentry updates
> 
> Akashi Takahiro was porting the stack tracer to arm64 and found some
> issues with it. One was that it repeats the top function, due to the
> stack frame added by the mcount caller and added by itself. This
> was added when fentry came in, and before fentry created its own stack
> frame. But x86's fentry now creates its own stack frame, and there's
> no need to insert the function again.
> 
> This also cleans up the code a bit, where it doesn't need to do something
> special for fentry, and doesn't include insertion of a duplicate
> entry for the called function being traced.
> 
> Link: http://lkml.kernel.org/r/55A646EE.6030402@linaro.org
> 
> Some-suggestions-by: Jungseok Lee <jungseoklee85@gmail.com>
> Some-suggestions-by: Mark Rutland <mark.rutland@arm.com>
> Reported-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
> ---
> kernel/trace/trace_stack.c | 68 ++++++++++++++++------------------------------
> 1 file changed, 23 insertions(+), 45 deletions(-)
> 
> diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
> index 3f34496244e9..b746399ab59c 100644
> --- a/kernel/trace/trace_stack.c
> +++ b/kernel/trace/trace_stack.c
> @@ -18,12 +18,6 @@
> 
> #define STACK_TRACE_ENTRIES 500
> 
> -#ifdef CC_USING_FENTRY
> -# define fentry		1
> -#else
> -# define fentry		0
> -#endif
> -
> static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
> 	 { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
> static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
> @@ -35,7 +29,7 @@ static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
>  */
> static struct stack_trace max_stack_trace = {
> 	.max_entries		= STACK_TRACE_ENTRIES - 1,
> -	.entries		= &stack_dump_trace[1],
> +	.entries		= &stack_dump_trace[0],
> };
> 
> static unsigned long max_stack_size;
> @@ -55,7 +49,7 @@ static inline void print_max_stack(void)
> 
> 	pr_emerg("        Depth    Size   Location    (%d entries)\n"
> 			   "        -----    ----   --------\n",
> -			   max_stack_trace.nr_entries - 1);
> +			   max_stack_trace.nr_entries);
> 
> 	for (i = 0; i < max_stack_trace.nr_entries; i++) {
> 		if (stack_dump_trace[i] == ULONG_MAX)
> @@ -77,7 +71,7 @@ check_stack(unsigned long ip, unsigned long *stack)
> 	unsigned long this_size, flags; unsigned long *p, *top, *start;
> 	static int tracer_frame;
> 	int frame_size = ACCESS_ONCE(tracer_frame);
> -	int i;
> +	int i, x;
> 
> 	this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
> 	this_size = THREAD_SIZE - this_size;
> @@ -105,26 +99,20 @@ check_stack(unsigned long ip, unsigned long *stack)
> 	max_stack_size = this_size;
> 
> 	max_stack_trace.nr_entries = 0;
> -
> -	if (using_ftrace_ops_list_func())
> -		max_stack_trace.skip = 4;
> -	else
> -		max_stack_trace.skip = 3;
> +	max_stack_trace.skip = 3;
> 
> 	save_stack_trace(&max_stack_trace);
> 
> -	/*
> -	 * Add the passed in ip from the function tracer.
> -	 * Searching for this on the stack will skip over
> -	 * most of the overhead from the stack tracer itself.
> -	 */
> -	stack_dump_trace[0] = ip;
> -	max_stack_trace.nr_entries++;
> +	/* Skip over the overhead of the stack tracer itself */
> +	for (i = 0; i < max_stack_trace.nr_entries; i++) {
> +		if (stack_dump_trace[i] == ip)
> +			break;
> +	}
> 
> 	/*
> 	 * Now find where in the stack these are.
> 	 */
> -	i = 0;
> +	x = 0;
> 	start = stack;
> 	top = (unsigned long *)
> 		(((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
> @@ -139,12 +127,15 @@ check_stack(unsigned long ip, unsigned long *stack)
> 	while (i < max_stack_trace.nr_entries) {
> 		int found = 0;
> 
> -		stack_dump_index[i] = this_size;
> +		stack_dump_index[x] = this_size;
> 		p = start;
> 
> 		for (; p < top && i < max_stack_trace.nr_entries; p++) {
> +			if (stack_dump_trace[i] == ULONG_MAX)
> +				break;
> 			if (*p == stack_dump_trace[i]) {
> -				this_size = stack_dump_index[i++] =
> +				stack_dump_trace[x] = stack_dump_trace[i++];
> +				this_size = stack_dump_index[x++] =
> 					(top - p) * sizeof(unsigned long);
> 				found = 1;
> 				/* Start the search from here */
> @@ -156,7 +147,7 @@ check_stack(unsigned long ip, unsigned long *stack)
> 				 * out what that is, then figure it out
> 				 * now.
> 				 */
> -				if (unlikely(!tracer_frame) && i == 1) {
> +				if (unlikely(!tracer_frame)) {
> 					tracer_frame = (p - stack) *
> 						sizeof(unsigned long);
> 					max_stack_size -= tracer_frame;
> @@ -168,6 +159,10 @@ check_stack(unsigned long ip, unsigned long *stack)
> 			i++;
> 	}
> 
> +	max_stack_trace.nr_entries = x;
> +	for (; x < i; x++)
> +		stack_dump_trace[x] = ULONG_MAX;
> +
> 	if (task_stack_end_corrupted(current)) {
> 		print_max_stack();
> 		BUG();
> @@ -192,24 +187,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
> 	if (per_cpu(trace_active, cpu)++ != 0)
> 		goto out;
> 
> -	/*
> -	 * When fentry is used, the traced function does not get
> -	 * its stack frame set up, and we lose the parent.
> -	 * The ip is pretty useless because the function tracer
> -	 * was called before that function set up its stack frame.
> -	 * In this case, we use the parent ip.
> -	 *
> -	 * By adding the return address of either the parent ip
> -	 * or the current ip we can disregard most of the stack usage
> -	 * caused by the stack tracer itself.
> -	 *
> -	 * The function tracer always reports the address of where the
> -	 * mcount call was, but the stack will hold the return address.
> -	 */
> -	if (fentry)
> -		ip = parent_ip;
> -	else
> -		ip += MCOUNT_INSN_SIZE;
> +	ip += MCOUNT_INSN_SIZE;
> 
> 	check_stack(ip, &stack);
> 
> @@ -284,7 +262,7 @@ __next(struct seq_file *m, loff_t *pos)
> {
> 	long n = *pos - 1;
> 
> -	if (n >= max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
> +	if (n > max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
> 		return NULL;
> 
> 	m->private = (void *)n;
> @@ -354,7 +332,7 @@ static int t_show(struct seq_file *m, void *v)
> 		seq_printf(m, "        Depth    Size   Location"
> 			   "    (%d entries)\n"
> 			   "        -----    ----   --------\n",
> -			   max_stack_trace.nr_entries - 1);
> +			   max_stack_trace.nr_entries);
> 
> 		if (!stack_tracer_enabled && !max_stack_size)
> 			print_disabled(m);
> -- 


I have reviewed and tested the kernel including this patch and only [RFC 1/3].

Now, the number of entries and max_stack_size are always okay, but unexpected functions,
such as ftrace_ops_no_ops and ftrace_call, are *sometimes* listed as follows.

$ cat /sys/kernel/debug/tracing/stack_trace 

        Depth    Size   Location    (49 entries)
        -----    ----   --------
  0)     4456      16   arch_counter_read+0xc/0x24
  1)     4440      16   ktime_get+0x44/0xb4
  2)     4424      48   get_drm_timestamp+0x30/0x40
  3)     4376      16   drm_get_last_vbltimestamp+0x94/0xb4
  4)     4360      80   drm_handle_vblank+0x84/0x3c0
  5)     4280     144   mdp5_irq+0x118/0x130
  6)     4136      80   msm_irq+0x2c/0x68
  7)     4056      32   handle_irq_event_percpu+0x60/0x210
  8)     4024      96   handle_irq_event+0x50/0x80
  9)     3928      64   handle_fasteoi_irq+0xb0/0x178
 10)     3864      48   generic_handle_irq+0x38/0x54
 11)     3816      32   __handle_domain_irq+0x68/0xbc
 12)     3784      64   gic_handle_irq+0x38/0x88
 13)     3720     280   el1_irq+0x64/0xd8
 14)     3440     168   ftrace_ops_no_ops+0xb4/0x16c
 15)     3272      64   ftrace_call+0x0/0x4
 16)     3208      16   _raw_spin_lock_irqsave+0x14/0x70
 17)     3192      32   msm_gpio_set+0x44/0xb4
 18)     3160      48   _gpiod_set_raw_value+0x68/0x148
 19)     3112      64   gpiod_set_value+0x40/0x70
 20)     3048      32   gpio_led_set+0x3c/0x94
 21)     3016      48   led_set_brightness+0x50/0xa4
 22)     2968      32   led_trigger_event+0x4c/0x78
 23)     2936      48   mmc_request_done+0x38/0x84
 24)     2888      32   sdhci_tasklet_finish+0xcc/0x12c
 25)     2856      48   tasklet_action+0x64/0x120
 26)     2808      48   __do_softirq+0x114/0x2f0
 27)     2760     128   irq_exit+0x98/0xd8
 28)     2632      32   __handle_domain_irq+0x6c/0xbc
 29)     2600      64   gic_handle_irq+0x38/0x88
 30)     2536     280   el1_irq+0x64/0xd8
 31)     2256     168   ftrace_ops_no_ops+0xb4/0x16c
 32)     2088      64   ftrace_call+0x0/0x4
 33)     2024      16   __schedule+0x1c/0x748
 34)     2008      80   schedule+0x38/0x94
 35)     1928      32   schedule_timeout+0x1a8/0x200
 36)     1896     128   wait_for_common+0xa8/0x150
 37)     1768     128   wait_for_completion+0x24/0x34
 38)     1640      32   mmc_wait_for_req_done+0x3c/0x104
 39)     1608      64   mmc_wait_for_cmd+0x68/0x94
 40)     1544     128   get_card_status.isra.25+0x6c/0x88
 41)     1416     112   card_busy_detect.isra.31+0x7c/0x154
 42)     1304     128   mmc_blk_err_check+0xd0/0x4f8
 43)     1176     192   mmc_start_req+0xe4/0x3a8
 44)      984     160   mmc_blk_issue_rw_rq+0xc4/0x9c0
 45)      824     176   mmc_blk_issue_rq+0x19c/0x450
 46)      648     112   mmc_queue_thread+0x134/0x17c
 47)      536      80   kthread+0xe0/0xf8
 48)      456     456   ret_from_fork+0xc/0x50

$ cat /sys/kernel/debug/tracing/stack_max_size
4456

This issue might be related to arch code, not this change.
IMHO, this patch look settled now.

Best Regards
Jungseok Lee
Steven Rostedt July 17, 2015, 2:41 p.m. UTC | #2
On Fri, 17 Jul 2015 23:28:13 +0900
Jungseok Lee <jungseoklee85@gmail.com> wrote:


> 
> I have reviewed and tested the kernel including this patch and only [RFC 1/3].

Thanks! Can you repost patch 1 with the changes I recommended, so that
I can get an Acked-by from the arm64 maintainers and pull all the
changes in together. This is fine for a 4.3 release, right? That is, it
doesn't need to go into 4.2-rcs.

> 
> Now, the number of entries and max_stack_size are always okay, but unexpected functions,
> such as ftrace_ops_no_ops and ftrace_call, are *sometimes* listed as follows.
> 
> $ cat /sys/kernel/debug/tracing/stack_trace 
> 
>         Depth    Size   Location    (49 entries)
>         -----    ----   --------
>   0)     4456      16   arch_counter_read+0xc/0x24
>   1)     4440      16   ktime_get+0x44/0xb4
>   2)     4424      48   get_drm_timestamp+0x30/0x40
>   3)     4376      16   drm_get_last_vbltimestamp+0x94/0xb4
>   4)     4360      80   drm_handle_vblank+0x84/0x3c0
>   5)     4280     144   mdp5_irq+0x118/0x130
>   6)     4136      80   msm_irq+0x2c/0x68
>   7)     4056      32   handle_irq_event_percpu+0x60/0x210
>   8)     4024      96   handle_irq_event+0x50/0x80
>   9)     3928      64   handle_fasteoi_irq+0xb0/0x178
>  10)     3864      48   generic_handle_irq+0x38/0x54
>  11)     3816      32   __handle_domain_irq+0x68/0xbc
>  12)     3784      64   gic_handle_irq+0x38/0x88
>  13)     3720     280   el1_irq+0x64/0xd8
>  14)     3440     168   ftrace_ops_no_ops+0xb4/0x16c
>  15)     3272      64   ftrace_call+0x0/0x4
>  16)     3208      16   _raw_spin_lock_irqsave+0x14/0x70
>  17)     3192      32   msm_gpio_set+0x44/0xb4
>  18)     3160      48   _gpiod_set_raw_value+0x68/0x148
>  19)     3112      64   gpiod_set_value+0x40/0x70
>  20)     3048      32   gpio_led_set+0x3c/0x94
>  21)     3016      48   led_set_brightness+0x50/0xa4
>  22)     2968      32   led_trigger_event+0x4c/0x78
>  23)     2936      48   mmc_request_done+0x38/0x84
>  24)     2888      32   sdhci_tasklet_finish+0xcc/0x12c
>  25)     2856      48   tasklet_action+0x64/0x120
>  26)     2808      48   __do_softirq+0x114/0x2f0
>  27)     2760     128   irq_exit+0x98/0xd8
>  28)     2632      32   __handle_domain_irq+0x6c/0xbc
>  29)     2600      64   gic_handle_irq+0x38/0x88
>  30)     2536     280   el1_irq+0x64/0xd8
>  31)     2256     168   ftrace_ops_no_ops+0xb4/0x16c
>  32)     2088      64   ftrace_call+0x0/0x4

Like I stated before, the above looks to be an interrupt coming in
while the tracing was happening. This looks legitimate to me. I'm
guessing that arm64 uses one stack for normal context and interrupt
context, where as x86 uses a separate stack for interrupt context.

-- Steve


>  33)     2024      16   __schedule+0x1c/0x748
>  34)     2008      80   schedule+0x38/0x94
>  35)     1928      32   schedule_timeout+0x1a8/0x200
>  36)     1896     128   wait_for_common+0xa8/0x150
>  37)     1768     128   wait_for_completion+0x24/0x34
>  38)     1640      32   mmc_wait_for_req_done+0x3c/0x104
>  39)     1608      64   mmc_wait_for_cmd+0x68/0x94
>  40)     1544     128   get_card_status.isra.25+0x6c/0x88
>  41)     1416     112   card_busy_detect.isra.31+0x7c/0x154
>  42)     1304     128   mmc_blk_err_check+0xd0/0x4f8
>  43)     1176     192   mmc_start_req+0xe4/0x3a8
>  44)      984     160   mmc_blk_issue_rw_rq+0xc4/0x9c0
>  45)      824     176   mmc_blk_issue_rq+0x19c/0x450
>  46)      648     112   mmc_queue_thread+0x134/0x17c
>  47)      536      80   kthread+0xe0/0xf8
>  48)      456     456   ret_from_fork+0xc/0x50
> 
> $ cat /sys/kernel/debug/tracing/stack_max_size
> 4456
> 
> This issue might be related to arch code, not this change.
> IMHO, this patch look settled now.
> 
> Best Regards
> Jungseok Lee
Jungseok Lee July 17, 2015, 2:59 p.m. UTC | #3
On Jul 17, 2015, at 11:41 PM, Steven Rostedt wrote:
> On Fri, 17 Jul 2015 23:28:13 +0900
> Jungseok Lee <jungseoklee85@gmail.com> wrote:
> 
> 
>> 
>> I have reviewed and tested the kernel including this patch and only [RFC 1/3].
> 
> Thanks! Can you repost patch 1 with the changes I recommended, so that
> I can get an Acked-by from the arm64 maintainers and pull all the
> changes in together. This is fine for a 4.3 release, right? That is, it
> doesn't need to go into 4.2-rcs.
> 

Welcome!

It's not hard to repost a patch, but I feel like we have to wait for Akashi's response.
Also, it might be needed to consider Mark's comment on arch part.

If they are okay, I will proceed.

>> 
>> Now, the number of entries and max_stack_size are always okay, but unexpected functions,
>> such as ftrace_ops_no_ops and ftrace_call, are *sometimes* listed as follows.
>> 
>> $ cat /sys/kernel/debug/tracing/stack_trace 
>> 
>>        Depth    Size   Location    (49 entries)
>>        -----    ----   --------
>>  0)     4456      16   arch_counter_read+0xc/0x24
>>  1)     4440      16   ktime_get+0x44/0xb4
>>  2)     4424      48   get_drm_timestamp+0x30/0x40
>>  3)     4376      16   drm_get_last_vbltimestamp+0x94/0xb4
>>  4)     4360      80   drm_handle_vblank+0x84/0x3c0
>>  5)     4280     144   mdp5_irq+0x118/0x130
>>  6)     4136      80   msm_irq+0x2c/0x68
>>  7)     4056      32   handle_irq_event_percpu+0x60/0x210
>>  8)     4024      96   handle_irq_event+0x50/0x80
>>  9)     3928      64   handle_fasteoi_irq+0xb0/0x178
>> 10)     3864      48   generic_handle_irq+0x38/0x54
>> 11)     3816      32   __handle_domain_irq+0x68/0xbc
>> 12)     3784      64   gic_handle_irq+0x38/0x88
>> 13)     3720     280   el1_irq+0x64/0xd8
>> 14)     3440     168   ftrace_ops_no_ops+0xb4/0x16c
>> 15)     3272      64   ftrace_call+0x0/0x4
>> 16)     3208      16   _raw_spin_lock_irqsave+0x14/0x70
>> 17)     3192      32   msm_gpio_set+0x44/0xb4
>> 18)     3160      48   _gpiod_set_raw_value+0x68/0x148
>> 19)     3112      64   gpiod_set_value+0x40/0x70
>> 20)     3048      32   gpio_led_set+0x3c/0x94
>> 21)     3016      48   led_set_brightness+0x50/0xa4
>> 22)     2968      32   led_trigger_event+0x4c/0x78
>> 23)     2936      48   mmc_request_done+0x38/0x84
>> 24)     2888      32   sdhci_tasklet_finish+0xcc/0x12c
>> 25)     2856      48   tasklet_action+0x64/0x120
>> 26)     2808      48   __do_softirq+0x114/0x2f0
>> 27)     2760     128   irq_exit+0x98/0xd8
>> 28)     2632      32   __handle_domain_irq+0x6c/0xbc
>> 29)     2600      64   gic_handle_irq+0x38/0x88
>> 30)     2536     280   el1_irq+0x64/0xd8
>> 31)     2256     168   ftrace_ops_no_ops+0xb4/0x16c
>> 32)     2088      64   ftrace_call+0x0/0x4
> 
> Like I stated before, the above looks to be an interrupt coming in
> while the tracing was happening. This looks legitimate to me. I'm
> guessing that arm64 uses one stack for normal context and interrupt
> context, where as x86 uses a separate stack for interrupt context.

Okay. Thanks for a kind explanation.

Best Regards
Jungseok Lee
diff mbox

Patch

diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 3f34496244e9..b746399ab59c 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -18,12 +18,6 @@ 
 
 #define STACK_TRACE_ENTRIES 500
 
-#ifdef CC_USING_FENTRY
-# define fentry		1
-#else
-# define fentry		0
-#endif
-
 static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
 	 { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
 static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
@@ -35,7 +29,7 @@  static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
  */
 static struct stack_trace max_stack_trace = {
 	.max_entries		= STACK_TRACE_ENTRIES - 1,
-	.entries		= &stack_dump_trace[1],
+	.entries		= &stack_dump_trace[0],
 };
 
 static unsigned long max_stack_size;
@@ -55,7 +49,7 @@  static inline void print_max_stack(void)
 
 	pr_emerg("        Depth    Size   Location    (%d entries)\n"
 			   "        -----    ----   --------\n",
-			   max_stack_trace.nr_entries - 1);
+			   max_stack_trace.nr_entries);
 
 	for (i = 0; i < max_stack_trace.nr_entries; i++) {
 		if (stack_dump_trace[i] == ULONG_MAX)
@@ -77,7 +71,7 @@  check_stack(unsigned long ip, unsigned long *stack)
 	unsigned long this_size, flags; unsigned long *p, *top, *start;
 	static int tracer_frame;
 	int frame_size = ACCESS_ONCE(tracer_frame);
-	int i;
+	int i, x;
 
 	this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
 	this_size = THREAD_SIZE - this_size;
@@ -105,26 +99,20 @@  check_stack(unsigned long ip, unsigned long *stack)
 	max_stack_size = this_size;
 
 	max_stack_trace.nr_entries = 0;
-
-	if (using_ftrace_ops_list_func())
-		max_stack_trace.skip = 4;
-	else
-		max_stack_trace.skip = 3;
+	max_stack_trace.skip = 3;
 
 	save_stack_trace(&max_stack_trace);
 
-	/*
-	 * Add the passed in ip from the function tracer.
-	 * Searching for this on the stack will skip over
-	 * most of the overhead from the stack tracer itself.
-	 */
-	stack_dump_trace[0] = ip;
-	max_stack_trace.nr_entries++;
+	/* Skip over the overhead of the stack tracer itself */
+	for (i = 0; i < max_stack_trace.nr_entries; i++) {
+		if (stack_dump_trace[i] == ip)
+			break;
+	}
 
 	/*
 	 * Now find where in the stack these are.
 	 */
-	i = 0;
+	x = 0;
 	start = stack;
 	top = (unsigned long *)
 		(((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
@@ -139,12 +127,15 @@  check_stack(unsigned long ip, unsigned long *stack)
 	while (i < max_stack_trace.nr_entries) {
 		int found = 0;
 
-		stack_dump_index[i] = this_size;
+		stack_dump_index[x] = this_size;
 		p = start;
 
 		for (; p < top && i < max_stack_trace.nr_entries; p++) {
+			if (stack_dump_trace[i] == ULONG_MAX)
+				break;
 			if (*p == stack_dump_trace[i]) {
-				this_size = stack_dump_index[i++] =
+				stack_dump_trace[x] = stack_dump_trace[i++];
+				this_size = stack_dump_index[x++] =
 					(top - p) * sizeof(unsigned long);
 				found = 1;
 				/* Start the search from here */
@@ -156,7 +147,7 @@  check_stack(unsigned long ip, unsigned long *stack)
 				 * out what that is, then figure it out
 				 * now.
 				 */
-				if (unlikely(!tracer_frame) && i == 1) {
+				if (unlikely(!tracer_frame)) {
 					tracer_frame = (p - stack) *
 						sizeof(unsigned long);
 					max_stack_size -= tracer_frame;
@@ -168,6 +159,10 @@  check_stack(unsigned long ip, unsigned long *stack)
 			i++;
 	}
 
+	max_stack_trace.nr_entries = x;
+	for (; x < i; x++)
+		stack_dump_trace[x] = ULONG_MAX;
+
 	if (task_stack_end_corrupted(current)) {
 		print_max_stack();
 		BUG();
@@ -192,24 +187,7 @@  stack_trace_call(unsigned long ip, unsigned long parent_ip,
 	if (per_cpu(trace_active, cpu)++ != 0)
 		goto out;
 
-	/*
-	 * When fentry is used, the traced function does not get
-	 * its stack frame set up, and we lose the parent.
-	 * The ip is pretty useless because the function tracer
-	 * was called before that function set up its stack frame.
-	 * In this case, we use the parent ip.
-	 *
-	 * By adding the return address of either the parent ip
-	 * or the current ip we can disregard most of the stack usage
-	 * caused by the stack tracer itself.
-	 *
-	 * The function tracer always reports the address of where the
-	 * mcount call was, but the stack will hold the return address.
-	 */
-	if (fentry)
-		ip = parent_ip;
-	else
-		ip += MCOUNT_INSN_SIZE;
+	ip += MCOUNT_INSN_SIZE;
 
 	check_stack(ip, &stack);
 
@@ -284,7 +262,7 @@  __next(struct seq_file *m, loff_t *pos)
 {
 	long n = *pos - 1;
 
-	if (n >= max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
+	if (n > max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
 		return NULL;
 
 	m->private = (void *)n;
@@ -354,7 +332,7 @@  static int t_show(struct seq_file *m, void *v)
 		seq_printf(m, "        Depth    Size   Location"
 			   "    (%d entries)\n"
 			   "        -----    ----   --------\n",
-			   max_stack_trace.nr_entries - 1);
+			   max_stack_trace.nr_entries);
 
 		if (!stack_tracer_enabled && !max_stack_size)
 			print_disabled(m);