@@ -1160,6 +1160,7 @@ static void cs_etm__add_stack_event(struct cs_etm_queue *etmq,
u8 trace_chan_id = tidq->trace_chan_id;
int insn_len;
u64 from_ip, to_ip;
+ u32 flags;
if (etm->synth_opts.callchain || etm->synth_opts.thread_stack) {
from_ip = cs_etm__last_executed_instr(tidq->prev_packet);
@@ -1168,6 +1169,27 @@ static void cs_etm__add_stack_event(struct cs_etm_queue *etmq,
insn_len = cs_etm__instr_size(etmq, trace_chan_id,
tidq->prev_packet->isa, from_ip);
+ /*
+ * Fixup the exception entry.
+ *
+ * If the packet's start_addr is same with its end_addr, this
+ * packet was altered from a exception packet to a range packet;
+ * the detailed info is described in cs_etm__exception(), which
+ * is used to handle the case for a branch instruction is not
+ * taken but the branch triggers an exception.
+ *
+ * In this case, fixup 'insn_len' to zero so that allow the
+ * thread stack's return address can match with the exception
+ * return address, finally can pop up thread stack properly when
+ * return from exception.
+ */
+ flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_INTERRUPT;
+ if (tidq->prev_packet->flags == flags &&
+ tidq->prev_packet->start_addr ==
+ tidq->prev_packet->end_addr)
+ insn_len = 0;
+
/*
* Create thread stacks by keeping track of calls and returns;
* any call pushes thread stack, return pops the stack, and
In theory when an exception is taken, the thread stack is pushed with an expected return address (ret_addr): from_ip + insn_len; and later when the exception returns back, it compares the return address (from the new packet's to_ip) with the ret_addr in the of thread stack, if have the same values then the thread stack will be popped. When a branch instruction's target address triggers an exception, the thread stack's ret_addr is the branch target address plus instruction length for exception entry; but this branch instruction is not taken, the exception return address is the branch target address, thus the thread stack's ret_addr cannot match with the exception return address, so the thread stack cannot pop properly. This patch fixes up the ret_addr at the exception entry, when it detects the exception is triggered by a branch target address, it sets 'insn_len' to zero. This allows the thread stack can pop properly when return from exception. Before: # perf script --itrace=g16l64i100 main 3258 100 instructions: ffff800010082c1c el0_sync+0x5c ([kernel.kallsyms]) ffffad816a14 memcpy+0x4 (/usr/lib/aarch64-linux-gnu/ld-2.28.so) ffffad800820 _dl_start_final+0x48 (/usr/lib/aarch64-linux-gnu/ld-2.28.so) ffffad800b00 _dl_start+0x200 (/usr/lib/aarch64-linux-gnu/ld-2.28.so) ffffad800048 _start+0x8 (/usr/lib/aarch64-linux-gnu/ld-2.28.so) ffffad800044 _start+0x4 (/usr/lib/aarch64-linux-gnu/ld-2.28.so) The issues in the output: memcpy+0x4 => The function call memcpy() causes exception; it's return address should be memcpy+0x0. _start+0x4 => The thread stack is not popped correctly, this is a stale data which is left in the previous exception flow. After: # perf script --itrace=g16l64i100 main 3258 100 instructions: ffff800010082c1c el0_sync+0x5c ([kernel.kallsyms]) ffffad816a10 memcpy+0x0 (/usr/lib/aarch64-linux-gnu/ld-2.28.so) ffffad800820 _dl_start_final+0x48 (/usr/lib/aarch64-linux-gnu/ld-2.28.so) ffffad800b00 _dl_start+0x200 (/usr/lib/aarch64-linux-gnu/ld-2.28.so) ffffad800048 _start+0x8 (/usr/lib/aarch64-linux-gnu/ld-2.28.so) Signed-off-by: Leo Yan <leo.yan@linaro.org> --- tools/perf/util/cs-etm.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+)