@@ -53,49 +53,294 @@ user_backtrace(struct frame_tail __user *tail,
}
#ifdef CONFIG_COMPAT
-/*
- * The registers we're interested in are at the end of the variable
- * length saved register structure. The fp points at the end of this
- * structure so the address of this struct is:
- * (struct compat_frame_tail *)(xxx->fp)-1
- *
- * This code has been adapted from the ARM OProfile support.
- */
-struct compat_frame_tail {
- compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */
- u32 sp;
- u32 lr;
-} __attribute__((packed));
-static struct compat_frame_tail __user *
-compat_user_backtrace(struct compat_frame_tail __user *tail,
- struct perf_callchain_entry_ctx *entry)
+static int compat_user_read(void *dest, u32 user_addr, size_t bytes)
{
- struct compat_frame_tail buftail;
+ const u32 __user *addr = compat_ptr(user_addr);
unsigned long err;
- /* Also check accessibility of one struct frame_tail beyond */
- if (!access_ok(tail, sizeof(buftail)))
- return NULL;
+ if (!access_ok(addr, bytes))
+ return -EACCES;
pagefault_disable();
- err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
+ err = __copy_from_user_inatomic(dest, addr, bytes);
pagefault_enable();
if (err)
- return NULL;
+ return -EACCES;
+
+ return 0;
+}
- perf_callchain_store(entry, buftail.lr);
+static bool compat_is_ptr_executable(u32 ptr, u32 fp)
+{
+ struct vm_area_struct *vma;
/*
- * Frame pointers should strictly progress back up the stack
- * (towards higher addresses).
+ * Fastpath: if what's pointed to by the frame pointer
+ * is on the same page as the frame pointer (which
+ * must point to the stack) then it's on the stack too.
*/
- if (tail + 1 >= (struct compat_frame_tail __user *)
- compat_ptr(buftail.fp))
- return NULL;
+ if ((ptr & ~0xfff) == (fp & ~0xfff))
+ return false;
- return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1;
+ vma = find_vma(current->active_mm, (unsigned long)compat_ptr(ptr));
+
+ if (!vma)
+ return false;
+
+ return vma->vm_flags & VM_EXEC;
+}
+
+/**
+ * compat_perf_trace_1() - Trace down one stack frame
+ * @fp: Input: the frame pointer to trace at; output: the next frame pointer.
+ * @pc: Input: the program counter at the start; output: the next PC.
+ * @leaf_lr: If this is the first frame, this will have the value from the
+ * CPU register LR (link register). If this isn't the first frame,
+ * this will be 0.
+ *
+ * Return: 0 if no error; else an error code
+ */
+static int compat_perf_trace_1(u32 *fp, u32 *pc, u32 leaf_lr)
+{
+ u32 frame_buf[3];
+ u32 before_fp;
+ u32 at_fp;
+ u32 at_at_fp;
+ int err;
+
+ if (*pc & 1) {
+ /*
+ * Thumb mode.
+ *
+ * There are no flags that make frame pointers on gcc-compiled
+ * thumb code useful. Thus we'll simply assume userspace was
+ * compiled by clang and hope for the best. On clang things
+ * look like this:
+ *
+ * +-----------+ 0x1000
+ * | random |
+ * +-----------+ 0x1004
+ * | old FP | <--- FP
+ * +-----------+ 0x1008
+ * | LR |
+ * +-----------+
+ * | random |
+ * +-----------+
+ *
+ * Note that clang doesn't create frames for leaf functions
+ * which means the _caller_ of the leaf function will be
+ * omitted from the crawl. Clang has a flag
+ * "-mno-omit-leaf-frame-pointer " that's supposed to change
+ * this behavior but (when I tried it) it didn't seem to work.
+ * We also won't know for sure if userspace was built with it
+ * since it presumably adds a bunch of extra overhead.
+ *
+ *
+ * Just for reference, here is what happens when you ask gcc to
+ * compile in thumb mode with frame pointers. This illustrates
+ * why ARM should probably remove the following from AAPCS:
+ * The AAPCS does not specify where, within a function's
+ * stack frame record, the frame chain data structure
+ * resides. This permits implementors the freedom to use
+ * whatever location will result in the most efficient code
+ * needed to establish the frame chain record.
+ *
+ * +-----------+ 0x1000
+ * | random | <--- FP
+ * +-----------+ 0x1004
+ * | ... |
+ * +-----------+ ???
+ * | random |
+ * +-----------+ ???
+ * | old FP |
+ * +-----------+ ???
+ * | LR |
+ * +-----------+
+ * | random |
+ * +-----------+
+ */
+ err = compat_user_read(frame_buf, *fp, 8);
+ if (err)
+ return err;
+
+ *fp = frame_buf[0];
+ *pc = frame_buf[1];
+
+ return 0;
+ }
+
+ /*
+ * ARM mode.
+ *
+ * With ARM code we can support 3 options:
+ * - clang compiled with frame pointers
+ * Looks just like the clang thumb style except that we get the
+ * initial FP from R11 instead of R7.
+ *
+ * NOTE: the R11 vs. R7 bit will likely change in the future.
+ * The 2019Q4 Issue of AAPCS revised the frame pointer to be R11
+ * for BOTH ARM and Thumb, as of 2021Q2 no such production
+ * compiler implements that change. There are rumors that
+ * toolchains may change to match this specification in the future.
+ * If/when that happens, we'll probably need a CONFIG option to
+ * support userspace code generated by the old compilers.
+ *
+ *
+ * - gcc compiled with frame pointers (non-leaf)
+ * +-----------+ 0x1000
+ * | random |
+ * +-----------+ 0x1004
+ * | old FP |
+ * +-----------+ 0x1008
+ * | LR | <--- FP
+ * +-----------+
+ * | random |
+ * +-----------+
+ *
+ * - gcc compiled with frame pointers (leaf). NOTE LR is in registers
+ * +-----------+ 0x1000
+ * | random |
+ * +-----------+ 0x1004
+ * | old FP | <--- FP
+ * +-----------+
+ * | random |
+ * +-----------+
+ *
+ * - APCS (ARM Procedure Call Standard)
+ * +-----------+ 0x1000
+ * | random |
+ * +-----------+ 0x1004
+ * | old FP |
+ * +-----------+ 0x1008
+ * | SP |
+ * +-----------+
+ * | LR |
+ * +-----------+
+ * | PC | <--- FP
+ * +-----------+
+ * | random |
+ * +-----------+
+ *
+ * We will autodetect.
+ */
+
+ /*
+ * We'll start with reading whatever the FP points directly at. That
+ * should be valid memory with all 3 supported layouts.
+ */
+ err = compat_user_read(&at_fp, *fp, 4);
+ if (err)
+ return err;
+
+ /*
+ * First let's try to identify clang or a gcc leaf function. In both
+ * cases the FP points directly at the old FP. Frame pointers always
+ * point to the stack and the stack should never be marked as
+ * executable. This contrasts to APCS / gcc-non-leaf where FP will
+ * either point to a PC near the start of the function or a LR, both
+ * of which must be executable.
+ */
+ if (!compat_is_ptr_executable(at_fp, *fp)) {
+ /*
+ * We need to tell the difference between gcc-leaf and clang.
+ *
+ * The first clue is to look at "leaf_lr". It will only be
+ * non-zero on the first call which is the only one that can
+ * be a leaf. Thus if it's non-zero, we're clang.
+ *
+ * If there's a chance we could be in a leaf, look one frame
+ * up. On gcc that would be a non-leaf FP and would point to
+ * something executable. If it's non-executable then we're
+ * clang. If we have any errors, we'll assume clang.
+ */
+ if (!leaf_lr || compat_user_read(&at_at_fp, at_fp, 4) ||
+ !compat_is_ptr_executable(at_at_fp, at_fp)) {
+ /* It's clang; we already have new FP so get PC */
+ err = compat_user_read(pc, *fp + 4, 4);
+ if (err)
+ return err;
+
+ *fp = at_fp;
+
+ return 0;
+ }
+
+ /* It's gcc leaf */
+ *pc = leaf_lr;
+ *fp = at_fp;
+ return 0;
+ }
+
+ /* We need to look at one word before to decide APCS / gcc-ARM */
+ err = compat_user_read(&before_fp, *fp - 4, 4);
+ if (err)
+ return err;
+
+ if (!compat_is_ptr_executable(before_fp, *fp)) {
+ /* It's gcc-ARM */
+ *fp = before_fp;
+ *pc = at_fp;
+
+ return 0;
+ }
+
+ /* APCS */
+ err = compat_user_read(frame_buf, *fp - 12, 12);
+ if (err)
+ return err;
+
+ *fp = frame_buf[0];
+ *pc = frame_buf[2];
+
+ return 0;
+}
+
+static void compat_perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ u32 old_fp = 0;
+ u32 fp;
+ u32 pc;
+ u32 leaf_lr;
+ int err;
+
+ pc = regs->pc;
+ leaf_lr = regs->regs[14];
+
+ /*
+ * Assuming userspace is compiled with frame pointers then it's in
+ * R11 for ARM code and R7 for thumb code. If it's thumb mode we'll
+ * also set the low bit of the PC to match how the PC indicates thumb
+ * mode when crawling down the stack.
+ */
+ if (compat_thumb_mode(regs)) {
+ fp = regs->regs[7];
+ pc |= BIT(0);
+ } else {
+ fp = regs->compat_fp;
+ }
+
+ /* Trace as long as we have room and we have a 4-byte aligned FP. */
+ while ((entry->nr < entry->max_stack) && fp && !(fp & 0x3)) {
+ err = compat_perf_trace_1(&fp, &pc, leaf_lr);
+
+ /* Bail out on any type of error */
+ if (err)
+ break;
+
+ /*
+ * Frame pointers should strictly progress back up the stack
+ * (towards higher addresses).
+ */
+ if (fp <= old_fp)
+ break;
+ old_fp = fp;
+ leaf_lr = 0;
+
+ perf_callchain_store(entry, pc & ~BIT(0));
+ }
}
#endif /* CONFIG_COMPAT */
@@ -121,13 +366,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
} else {
#ifdef CONFIG_COMPAT
/* AARCH32 compat mode */
- struct compat_frame_tail __user *tail;
-
- tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
-
- while ((entry->nr < entry->max_stack) &&
- tail && !((unsigned long)tail & 0x3))
- tail = compat_user_backtrace(tail, entry);
+ compat_perf_callchain_user(entry, regs);
#endif
}
}