diff mbox

[RFC,2/6] arm64: Kprobes with single stepping support

Message ID 1382008671-4515-3-git-send-email-sandeepa.prabhu@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

Sandeepa Prabhu Oct. 17, 2013, 11:17 a.m. UTC
Add support for basic kernel probes(kprobes), jump probes (jprobes)
for ARM64.

Kprobes will utilize software breakpoint and single step debug
exceptions supported on ARM v8.

software breakpoint is placed at the probe address to trap the
kernel execution into kprobe handler.

ARM v8 support single stepping to be enabled while exception return
(ERET) with next PC in exception return address (ELR_EL1).
kprobe handler prepares a executable memory slot for out-of-line
execution with the copy of the original instruction under probe, and
enable single stepping from the instruction slot. With this scheme,
the instruction is executed with the exact same register context
'except PC' that points to instruction slot.

Debug mask(PSTATE.D) is enabled while calling user pre & post handlers
since it can hit breakpoint again(like kprobe placed on printk, with
user handler invoking printk again). This allow kprobe re-entry and
any further re-entry is prevented by not calling handlers (counted as
missed kprobe)

Single stepping from slot has drawback on PC-relative accesses
like branching and symbolic literals access as offset from new PC
may not be ensured to fit in immediate value of opcode,
(usually +/-1MB range). So these instructions needs simulation, so
are not allowed to place probe on those instructions.

Instructions generating exceptions or cpu mode change are rejected,
and not allowed to insert probe for such instructions.

Instructions using Exclusive Monitor are rejected right now.

System instructions are mostly enabled for stepping, except MSR
immediate that update "daif" flags in PSTATE, which are not safe
for probing(rejected)

Signed-off-by: Sandeepa Prabhu <sandeepa.prabhu@linaro.org>
---
 arch/arm64/Kconfig                |   1 +
 arch/arm64/include/asm/kprobes.h  |  59 +++++
 arch/arm64/include/asm/probes.h   |  50 ++++
 arch/arm64/include/asm/ptrace.h   |   1 +
 arch/arm64/kernel/Makefile        |   1 +
 arch/arm64/kernel/kprobes-arm64.c | 211 +++++++++++++++
 arch/arm64/kernel/kprobes-arm64.h |  28 ++
 arch/arm64/kernel/kprobes.c       | 538 ++++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/kprobes.h       |  30 +++
 arch/arm64/kernel/probes-decode.h | 110 ++++++++
 arch/arm64/kernel/vmlinux.lds.S   |   1 +
 11 files changed, 1030 insertions(+)
 create mode 100644 arch/arm64/include/asm/kprobes.h
 create mode 100644 arch/arm64/include/asm/probes.h
 create mode 100644 arch/arm64/kernel/kprobes-arm64.c
 create mode 100644 arch/arm64/kernel/kprobes-arm64.h
 create mode 100644 arch/arm64/kernel/kprobes.c
 create mode 100644 arch/arm64/kernel/kprobes.h
 create mode 100644 arch/arm64/kernel/probes-decode.h

Comments

Will Deacon Nov. 8, 2013, 4:56 p.m. UTC | #1
Hi Sandeepa,

On Thu, Oct 17, 2013 at 12:17:47PM +0100, Sandeepa Prabhu wrote:
> Add support for basic kernel probes(kprobes), jump probes (jprobes)
> for ARM64.

I think this series will conflict quite heavily with the jump_label series,
since they both introduce some common instruction manipulation code. On the
debug side, there will also be conflicts with the kgdb series, so it might
make sense for us to merge those two first, then you can rebase on a stable
branch from us.

[...]

> diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
> new file mode 100644
> index 0000000..9b491d0
> --- /dev/null
> +++ b/arch/arm64/include/asm/kprobes.h
> @@ -0,0 +1,59 @@
> +/*
> + * arch/arm64/include/asm/kprobes.h
> + *
> + * Copyright (C) 2013 Linaro Limited
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + */
> +
> +#ifndef _ARM_KPROBES_H
> +#define _ARM_KPROBES_H
> +
> +#include <linux/types.h>
> +#include <linux/ptrace.h>
> +#include <linux/percpu.h>
> +
> +#define __ARCH_WANT_KPROBES_INSN_SLOT
> +#define MAX_INSN_SIZE                  2

Why is this 2?

> +#define MAX_STACK_SIZE                 128
> +
> +#define flush_insn_slot(p)             do { } while (0)
> +#define kretprobe_blacklist_size       0
> +
> +#include <asm/probes.h>
> +
> +struct prev_kprobe {
> +       struct kprobe *kp;
> +       unsigned int status;
> +};
> +
> +/* Single step context for kprobe */
> +struct kprobe_step_ctx {
> +#define KPROBES_STEP_NONE      0x0
> +#define KPROBES_STEP_PENDING   0x1

Maybe use an enum to stay consistent with what you did for pc_restore_t?

> diff --git a/arch/arm64/kernel/kprobes-arm64.c b/arch/arm64/kernel/kprobes-arm64.c
> new file mode 100644
> index 0000000..30d1c14
> --- /dev/null
> +++ b/arch/arm64/kernel/kprobes-arm64.c
> @@ -0,0 +1,211 @@
> +/*
> + * arch/arm64/kernel/kprobes-arm64.c
> + *
> + * Copyright (C) 2013 Linaro Limited.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/kprobes.h>
> +#include <linux/module.h>
> +#include <asm/kprobes.h>
> +
> +#include "probes-decode.h"
> +#include "kprobes-arm64.h"
> +
> +/* AArch64 instruction decode table for kprobes:
> + * The instruction will fall into one of the 3 groups:
> + *  1. Single stepped out-of-the-line slot.
> + *     -Most instructions fall in this group, those does not
> + *      depend on PC address.
> + *
> + *  2. Should be simulated because of PC-relative/literal access.
> + *     -All branching and PC-relative insrtcutions are simulated
> + *      in C code, making use of saved pt_regs
> + *      Catch: SIMD/NEON register context are not saved while
> + *      entering debug exception, so are rejected for now.
> + *
> + *  3. Cannot be probed(not safe) so are rejected.
> + *     - Exception generation and exception return instructions
> + *     - Exclusive monitor(LDREX/STREX family)
> + *
> + */
> +static const struct aarch64_decode_item aarch64_decode_table[] = {
> +       /*
> +        * Data processing - PC relative(literal) addressing:
> +        * Encoding: xxx1 0000 xxxx xxxx xxxx xxxx xxxx xxxx
> +        */
> +       DECODE_REJECT(0x10000000, 0x1F000000),
> +
> +       /*
> +        * Data processing - Add/Substract Immediate:
> +        * Encoding: xxx1 0001 xxxx xxxx xxxx xxxx xxxx xxxx
> +        */
> +       DECODE_SINGLESTEP(0x11000000, 0x1F000000),
> +
> +       /*
> +        * Data processing
> +        * Encoding:
> +        *      xxx1 0010 0xxx xxxx xxxx xxxx xxxx xxxx (Logical)
> +        *      xxx1 0010 1xxx xxxx xxxx xxxx xxxx xxxx (Move wide)
> +        *      xxx1 0011 0xxx xxxx xxxx xxxx xxxx xxxx (Bitfield)
> +        *      xxx1 0011 1xxx xxxx xxxx xxxx xxxx xxxx (Extract)
> +        */
> +       DECODE_SINGLESTEP(0x12000000, 0x1E000000),
> +
> +       /*
> +        * Data processing - SIMD/FP/AdvSIMD/Crypto-AES/SHA
> +        * Encoding: xxx0 111x xxxx xxxx xxxx xxxx xxxx xxxx
> +        * Encoding: xxx1 111x xxxx xxxx xxxx xxxx xxxx xxxx
> +        */
> +       DECODE_SINGLESTEP(0x0E000000, 0x0E000000),
> +
> +       /*
> +        * Data processing - Register
> +        * Encoding: xxxx 101x xxxx xxxx xxxx xxxx xxxx xxxx
> +        */
> +       DECODE_SINGLESTEP(0x0A000000, 0x0E000000),
> +
> +       /* Branching Instructions
> +        *
> +        * Encoding:
> +        *  x001 01xx xxxx xxxx xxxx xxxx xxxx xxxx (uncondtional Branch)

Unconditional

> +        *  x011 010x xxxx xxxx xxxx xxxx xxxx xxxx (compare & branch)

Compare (capitalisation)

> +        *  x011 011x xxxx xxxx xxxx xxxx xxxx xxxx (Test & Branch)
> +        *  0101 010x xxxx xxxx xxxx xxxx xxxx xxxx (Conditional, immediate)
> +        *  1101 011x xxxx xxxx xxxx xxxx xxxx xxxx (Unconditional,register)
> +        */
> +       DECODE_REJECT(0x14000000, 0x7C000000),
> +       DECODE_REJECT(0x14000000, 0x7C000000),
> +       DECODE_REJECT(0x34000000, 0x7E000000),
> +       DECODE_REJECT(0x36000000, 0x7E000000),
> +       DECODE_REJECT(0x54000000, 0xFE000000),
> +       DECODE_REJECT(0xD6000000, 0xFE000000),
> +
> +       /* System insn:
> +        * Encoding: 1101 0101 00xx xxxx xxxx xxxx xxxx xxxx
> +        *
> +        * Note: MSR immediate (update PSTATE daif) is not safe handling
> +        * within kprobes, rejected.
> +        *
> +        * Don't re-arrange these decode table entries.
> +        */
> +       DECODE_REJECT(0xD500401F, 0xFFF8F01F),
> +       DECODE_SINGLESTEP(0xD5000000, 0xFFC00000),
> +
> +       /* Exception Generation:
> +        * Encoding:  1101 0100 xxxx xxxx xxxx xxxx xxxx xxxx
> +        * Instructions: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3
> +        */
> +       DECODE_REJECT(0xD4000000, 0xFF000000),
> +
> +       /*
> +        * Load/Store - Exclusive monitor
> +        * Encoding: xx00 1000 xxxx xxxx xxxx xxxx xxxx xxxx
> +        *
> +        * Reject exlusive monitor'ed instructions

exclusive. Also, omit `monitor'ed' -- it doesn't mean anything.

> +        */
> +       DECODE_REJECT(0x08000000, 0x3F000000),
> +
> +       /*
> +        * Load/Store - PC relative(literal):
> +        * Encoding:  xx01 1x00 xxxx xxxx xxxx xxxx xxxx xxxx
> +        */
> +       DECODE_REJECT(0x18000000, 0x3B000000),
> +
> +       /*
> +        * Load/Store - Register Pair
> +        * Encoding:
> +        *      xx10 1x00 0xxx xxxx xxxx xxxx xxxx xxxx
> +        *      xx10 1x00 1xxx xxxx xxxx xxxx xxxx xxxx
> +        *      xx10 1x01 0xxx xxxx xxxx xxxx xxxx xxxx
> +        *      xx10 1x01 1xxx xxxx xxxx xxxx xxxx xxxx
> +        */
> +       DECODE_SINGLESTEP(0x28000000, 0x3A000000),
> +
> +       /*
> +        * Load/Store - Register
> +        * Encoding:
> +        *      xx11 1x00 xx0x xxxx xxxx 00xx xxxx xxxx (unscaled imm)
> +        *      xx11 1x00 xx0x xxxx xxxx 01xx xxxx xxxx (imm post-indexed)
> +        *      xx11 1x00 xx0x xxxx xxxx 10xx xxxx xxxx (unpriviledged)
> +        *      xx11 1x00 xx0x xxxx xxxx 11xx xxxx xxxx (imm pre-indexed)
> +        *
> +        *      xx11 1x00 xx10 xxxx xxxx xx10 xxxx xxxx (register offset)
> +        *
> +        *      xx11 1x01 xxxx xxxx xxxx xxxx xxxx xxxx (unsigned imm)
> +        */
> +       DECODE_SINGLESTEP(0x38000000, 0x3B200000),
> +       DECODE_SINGLESTEP(0x38200200, 0x38300300),
> +       DECODE_SINGLESTEP(0x39000000, 0x3B000000),
> +
> +       /*
> +        * Load/Store - AdvSIMD
> +        * Encoding:
> +        *  0x00 1100 0x00 0000 xxxx xxxx xxxx xxxx (Multiple-structure)
> +        *  0x00 1100 1x0x xxxx xxxx xxxx xxxx xxxx (Multi-struct post-indexed)
> +        *  0x00 1101 0xx0 0000 xxxx xxxx xxxx xxxx (Single-structure))
> +        *  0x00 1101 1xxx xxxx xxxx xxxx xxxx xxxx (Single-struct post-index)
> +        */
> +       DECODE_SINGLESTEP(0x0C000000, 0xBFBF0000),
> +       DECODE_SINGLESTEP(0x0C800000, 0xBFA00000),
> +       DECODE_SINGLESTEP(0x0D000000, 0xBF9F0000),
> +       DECODE_SINGLESTEP(0x0D800000, 0xBF800000),
> +
> +       /* Unallocated:         xxx0 0xxx xxxx xxxx xxxx xxxx xxxx xxxx */
> +       DECODE_REJECT(0x00000000, 0x18000000),
> +       DECODE_END,
> +};
> +
> +static int __kprobes
> +kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi,
> +                  const struct aarch64_decode_item *tbl)
> +{
> +       unsigned int entry, ret = INSN_REJECTED;
> +
> +       for (entry = 0; !decode_table_end(tbl[entry]); entry++) {
> +               if (decode_table_hit(tbl[entry], insn))
> +                       break;
> +       }
> +
> +       switch (decode_get_type(tbl[entry])) {
> +       case DECODE_TYPE_END:
> +       case DECODE_TYPE_REJECT:
> +       default:
> +               ret = INSN_REJECTED;
> +               break;
> +
> +       case DECODE_TYPE_SINGLESTEP:
> +               ret = INSN_GOOD;
> +               break;
> +
> +       case DECODE_TYPE_SIMULATE:
> +               ret = INSN_REJECTED;
> +               break;
> +
> +       case DECODE_TYPE_TABLE:
> +               /* recurse with next level decode table */
> +               ret = kprobe_decode_insn(insn, asi,
> +                                        decode_sub_table(tbl[entry]));
> +       };
> +       return ret;
> +}
> +
> +/* Return:
> + *   INSN_REJECTED     If instruction is one not allowed to kprobe,
> + *   INSN_GOOD         If instruction is supported and uses instruction slot,
> + *   INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
> + */
> +enum kprobe_insn __kprobes
> +arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
> +{
> +       return kprobe_decode_insn(insn, asi, aarch64_decode_table);
> +}
> diff --git a/arch/arm64/kernel/kprobes-arm64.h b/arch/arm64/kernel/kprobes-arm64.h
> new file mode 100644
> index 0000000..87e7891
> --- /dev/null
> +++ b/arch/arm64/kernel/kprobes-arm64.h
> @@ -0,0 +1,28 @@
> +/*
> + * arch/arm64/kernel/kprobes-arm64.h
> + *
> + * Copyright (C) 2013 Linaro Limited.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + */
> +
> +#ifndef _ARM_KERNEL_KPROBES_ARM64_H
> +#define _ARM_KERNEL_KPROBES_ARM64_H
> +
> +enum kprobe_insn {
> +       INSN_REJECTED,
> +       INSN_GOOD_NO_SLOT,
> +       INSN_GOOD,
> +};
> +
> +enum kprobe_insn __kprobes
> +arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi);
> +
> +#endif /* _ARM_KERNEL_KPROBES_ARM64_H */
> diff --git a/arch/arm64/kernel/kprobes.c b/arch/arm64/kernel/kprobes.c
> new file mode 100644
> index 0000000..def10b6
> --- /dev/null
> +++ b/arch/arm64/kernel/kprobes.c
> @@ -0,0 +1,538 @@
> +/*
> + * arch/arm64/kernel/kprobes.c
> + *
> + * Kprobes support for ARM64
> + *
> + * Copyright (C) 2013 Linaro Limited.
> + * Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + *
> + */
> +#include <linux/kernel.h>
> +#include <linux/kprobes.h>
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +#include <linux/stop_machine.h>
> +#include <linux/stringify.h>
> +#include <asm/traps.h>
> +#include <asm/cacheflush.h>
> +#include <asm/debug-monitors.h>
> +#include <asm/system_misc.h>
> +#include <asm/insn.h>
> +
> +#include "kprobes.h"
> +#include "kprobes-arm64.h"
> +
> +#define MIN_STACK_SIZE(addr)   min((unsigned long)MAX_STACK_SIZE,      \
> +       (unsigned long)current_thread_info() + THREAD_START_SP - (addr))

Why is this not defined along with MAX_STACK_SIZE?

> +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
> +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
> +
> +static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
> +{
> +       int i;
> +       /* prepare insn slot */
> +       p->ainsn.insn[0] = p->opcode;
> +       /* NOP for superscalar uArch decode */

superscalar uArch?

> +       for (i = 1; i < MAX_INSN_SIZE; i++)
> +               p->ainsn.insn[i] = ARCH64_NOP_OPCODE;

Should be AARCH64 (yes, that's two adjacent 'A's!). Also, I think the
jump_label stuff had helpers to give you a NOP hint.

> +
> +       flush_icache_range((uintptr_t) (p->ainsn.insn),
> +                          (uintptr_t) (p->ainsn.insn) + MAX_INSN_SIZE);
> +}
> +
> +int __kprobes arch_prepare_kprobe(struct kprobe *p)
> +{
> +       kprobe_opcode_t insn;
> +       unsigned long probe_addr = (unsigned long)p->addr;
> +
> +       /* copy instruction */
> +       insn = *p->addr;
> +       p->opcode = insn;
> +
> +       if (in_exception_text(probe_addr))
> +               return -EINVAL;
> +
> +       /* decode instruction */
> +       switch (arm_kprobe_decode_insn(insn, &p->ainsn)) {
> +       case INSN_REJECTED:     /* insn not supported */
> +               return -EINVAL;
> +               break;
> +
> +       case INSN_GOOD_NO_SLOT: /* insn need simulation */
> +               return -EINVAL;
> +               break;
> +
> +       case INSN_GOOD: /* instruction uses slot */
> +               p->ainsn.insn = get_insn_slot();
> +               if (!p->ainsn.insn)
> +                       return -ENOMEM;
> +               break;
> +       };
> +
> +       /* prepare the instruction */
> +       arch_prepare_ss_slot(p);
> +
> +       return 0;
> +}
> +
> +static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
> +{
> +       void *addrs[1];
> +       u32 insns[1];
> +
> +       addrs[0] = (void *)addr;
> +       insns[0] = (u32)opcode;
> +
> +       return aarch64_insn_patch_text_sync(addrs, insns, 1);
> +}
> +
> +/* arm kprobe: install breakpoint in text */
> +void __kprobes arch_arm_kprobe(struct kprobe *p)
> +{
> +       patch_text(p->addr, BRK64_OPCODE_KPROBES);
> +}
> +
> +/* disarm kprobe: remove breakpoint from text */
> +void __kprobes arch_disarm_kprobe(struct kprobe *p)
> +{
> +       patch_text(p->addr, p->opcode);
> +}
> +
> +void __kprobes arch_remove_kprobe(struct kprobe *p)
> +{
> +       if (p->ainsn.insn) {
> +               free_insn_slot(p->ainsn.insn, 0);
> +               p->ainsn.insn = NULL;
> +       }
> +}
> +
> +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
> +{
> +       kcb->prev_kprobe.kp = kprobe_running();
> +       kcb->prev_kprobe.status = kcb->kprobe_status;
> +}
> +
> +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
> +{
> +       __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
> +       kcb->kprobe_status = kcb->prev_kprobe.status;
> +}
> +
> +static void __kprobes set_current_kprobe(struct kprobe *p)
> +{
> +       __get_cpu_var(current_kprobe) = p;
> +}

__get_cpu_var uses were being cleaned up by Christoph recently. Take a look
in -next to see some examples of conversions over to this_cpu_*.

> +static void __kprobes
> +set_ss_context(struct kprobe_ctlblk *kcb, unsigned long addr)
> +{
> +       kcb->ss_ctx.ss_status = KPROBES_STEP_PENDING;
> +       kcb->ss_ctx.match_addr = addr + sizeof(kprobe_opcode_t);
> +}
> +
> +static void __kprobes clear_ss_context(struct kprobe_ctlblk *kcb)
> +{
> +       kcb->ss_ctx.ss_status = KPROBES_STEP_NONE;
> +       kcb->ss_ctx.match_addr = 0;
> +}
> +
> +static void __kprobes
> +nop_singlestep_skip(struct kprobe *p, struct pt_regs *regs)
> +{
> +       /* set return addr to next pc to continue */
> +       instruction_pointer(regs) += sizeof(kprobe_opcode_t);
> +       return;
> +}
> +
> +/* Mask/Unmask PSTATE.D flag
> + *
> + * Unmasking D-flag enables recursing into another debug
> + * exception (breakpoint or single step).
> + *
> + * Upon every exception entry, D-flag is disabled by the
> + * hardware. We shall unmask this flag only after safely
> + * saved the previous context and kprobes state machines.
> + *
> + * kprobes can generate recursing in breakpoint (followed
> + * by single stepping) within user-provided handlers only.
> + *
> + * All other places, keep the D-flag masked.
> + */
> +static void mask_pstate_debug_flag(u32 mask)
> +{
> +       if (mask)
> +               asm volatile("msr daifset, #9\n\t");
> +       else
> +               asm volatile("msr daifclr, #9\n\t");
> +}

NAK. Unmasking debug exceptions from within a debug exception is not safe.
I'd much rather we returned from handling this exception, then took whatever
other pending exception there was.

In fact, how do you avoid a race with hardware breakpoints? E.g., somebody
places a hardware breakpoint on an instruction in the kernel for which
kprobes has patched in a brk. We take the hardware breakpoint, disable the
breakpoint and set up a single step before returning to the brk. The brk
then traps, but we must take care not to disable single-step and/or unmask
debug exceptions, because that will cause the hardware breakpoint code to
re-arm its breakpoint before we've stepped off the brk instruction.

Will
Masami Hiramatsu Nov. 9, 2013, 9:10 a.m. UTC | #2
(2013/11/09 1:56), Will Deacon wrote:
> Hi Sandeepa,
> 
> On Thu, Oct 17, 2013 at 12:17:47PM +0100, Sandeepa Prabhu wrote:
>> Add support for basic kernel probes(kprobes), jump probes (jprobes)
>> for ARM64.
> 
> I think this series will conflict quite heavily with the jump_label series,
> since they both introduce some common instruction manipulation code. On the
> debug side, there will also be conflicts with the kgdb series, so it might
> make sense for us to merge those two first, then you can rebase on a stable
> branch from us.

[...]

> In fact, how do you avoid a race with hardware breakpoints? E.g., somebody
> places a hardware breakpoint on an instruction in the kernel for which
> kprobes has patched in a brk. We take the hardware breakpoint, disable the
> breakpoint and set up a single step before returning to the brk. The brk
> then traps, but we must take care not to disable single-step and/or unmask
> debug exceptions, because that will cause the hardware breakpoint code to
> re-arm its breakpoint before we've stepped off the brk instruction.

Hmm, frankly to say, this kind of race issue is not seriously discussed
on x86 too, since kgdb is still a special tool (not used on the production
system).
I think under such situation kgdb operator must have full control of the
system, and he can (and has to) avoid such kind of race.

Thank you,
Sandeepa Prabhu Nov. 11, 2013, 5:35 a.m. UTC | #3
On 8 November 2013 22:26, Will Deacon <will.deacon@arm.com> wrote:
> Hi Sandeepa,
>
> On Thu, Oct 17, 2013 at 12:17:47PM +0100, Sandeepa Prabhu wrote:
>> Add support for basic kernel probes(kprobes), jump probes (jprobes)
>> for ARM64.
>
> I think this series will conflict quite heavily with the jump_label series,
> since they both introduce some common instruction manipulation code. On the
> debug side, there will also be conflicts with the kgdb series, so it might
> make sense for us to merge those two first, then you can rebase on a stable
> branch from us.
Yes, I understand, also the 3 features (kgdb, kprobes, jump_labels)
share some API dependencies.
I can rebase and re-post kprobes after the other 2 are merged.

The patch  "[PATCH RFC v3 1/5] AArch64: Add single-step and breakpoint
handler hooks"  will merge first along with kgdb?

>
> [...]
>
>> diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
>> new file mode 100644
>> index 0000000..9b491d0
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/kprobes.h
>> @@ -0,0 +1,59 @@
>> +/*
>> + * arch/arm64/include/asm/kprobes.h
>> + *
>> + * Copyright (C) 2013 Linaro Limited
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * General Public License for more details.
>> + */
>> +
>> +#ifndef _ARM_KPROBES_H
>> +#define _ARM_KPROBES_H
>> +
>> +#include <linux/types.h>
>> +#include <linux/ptrace.h>
>> +#include <linux/percpu.h>
>> +
>> +#define __ARCH_WANT_KPROBES_INSN_SLOT
>> +#define MAX_INSN_SIZE                  2
>
> Why is this 2?
Second entry is to hold NOP instruction, absence of it cause abort
while instruction decode.

>
>> +#define MAX_STACK_SIZE                 128
>> +
>> +#define flush_insn_slot(p)             do { } while (0)
>> +#define kretprobe_blacklist_size       0
>> +
>> +#include <asm/probes.h>
>> +
>> +struct prev_kprobe {
>> +       struct kprobe *kp;
>> +       unsigned int status;
>> +};
>> +
>> +/* Single step context for kprobe */
>> +struct kprobe_step_ctx {
>> +#define KPROBES_STEP_NONE      0x0
>> +#define KPROBES_STEP_PENDING   0x1
>
> Maybe use an enum to stay consistent with what you did for pc_restore_t?
OK, I will change it to enums.
>
>> diff --git a/arch/arm64/kernel/kprobes-arm64.c b/arch/arm64/kernel/kprobes-arm64.c
>> new file mode 100644
>> index 0000000..30d1c14
>> --- /dev/null
>> +++ b/arch/arm64/kernel/kprobes-arm64.c
>> @@ -0,0 +1,211 @@
>> +/*
>> + * arch/arm64/kernel/kprobes-arm64.c
>> + *
>> + * Copyright (C) 2013 Linaro Limited.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * General Public License for more details.
>> + */
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/kprobes.h>
>> +#include <linux/module.h>
>> +#include <asm/kprobes.h>
>> +
>> +#include "probes-decode.h"
>> +#include "kprobes-arm64.h"
>> +
>> +/* AArch64 instruction decode table for kprobes:
>> + * The instruction will fall into one of the 3 groups:
>> + *  1. Single stepped out-of-the-line slot.
>> + *     -Most instructions fall in this group, those does not
>> + *      depend on PC address.
>> + *
>> + *  2. Should be simulated because of PC-relative/literal access.
>> + *     -All branching and PC-relative insrtcutions are simulated
>> + *      in C code, making use of saved pt_regs
>> + *      Catch: SIMD/NEON register context are not saved while
>> + *      entering debug exception, so are rejected for now.
>> + *
>> + *  3. Cannot be probed(not safe) so are rejected.
>> + *     - Exception generation and exception return instructions
>> + *     - Exclusive monitor(LDREX/STREX family)
>> + *
>> + */
>> +static const struct aarch64_decode_item aarch64_decode_table[] = {
>> +       /*
>> +        * Data processing - PC relative(literal) addressing:
>> +        * Encoding: xxx1 0000 xxxx xxxx xxxx xxxx xxxx xxxx
>> +        */
>> +       DECODE_REJECT(0x10000000, 0x1F000000),
>> +
>> +       /*
>> +        * Data processing - Add/Substract Immediate:
>> +        * Encoding: xxx1 0001 xxxx xxxx xxxx xxxx xxxx xxxx
>> +        */
>> +       DECODE_SINGLESTEP(0x11000000, 0x1F000000),
>> +
>> +       /*
>> +        * Data processing
>> +        * Encoding:
>> +        *      xxx1 0010 0xxx xxxx xxxx xxxx xxxx xxxx (Logical)
>> +        *      xxx1 0010 1xxx xxxx xxxx xxxx xxxx xxxx (Move wide)
>> +        *      xxx1 0011 0xxx xxxx xxxx xxxx xxxx xxxx (Bitfield)
>> +        *      xxx1 0011 1xxx xxxx xxxx xxxx xxxx xxxx (Extract)
>> +        */
>> +       DECODE_SINGLESTEP(0x12000000, 0x1E000000),
>> +
>> +       /*
>> +        * Data processing - SIMD/FP/AdvSIMD/Crypto-AES/SHA
>> +        * Encoding: xxx0 111x xxxx xxxx xxxx xxxx xxxx xxxx
>> +        * Encoding: xxx1 111x xxxx xxxx xxxx xxxx xxxx xxxx
>> +        */
>> +       DECODE_SINGLESTEP(0x0E000000, 0x0E000000),
>> +
>> +       /*
>> +        * Data processing - Register
>> +        * Encoding: xxxx 101x xxxx xxxx xxxx xxxx xxxx xxxx
>> +        */
>> +       DECODE_SINGLESTEP(0x0A000000, 0x0E000000),
>> +
>> +       /* Branching Instructions
>> +        *
>> +        * Encoding:
>> +        *  x001 01xx xxxx xxxx xxxx xxxx xxxx xxxx (uncondtional Branch)
>
> Unconditional
OK, I will make all the descriptions consistent (capitalised).
>
>> +        *  x011 010x xxxx xxxx xxxx xxxx xxxx xxxx (compare & branch)
>
> Compare (capitalisation)
>
>> +        *  x011 011x xxxx xxxx xxxx xxxx xxxx xxxx (Test & Branch)
>> +        *  0101 010x xxxx xxxx xxxx xxxx xxxx xxxx (Conditional, immediate)
>> +        *  1101 011x xxxx xxxx xxxx xxxx xxxx xxxx (Unconditional,register)
>> +        */
>> +       DECODE_REJECT(0x14000000, 0x7C000000),
>> +       DECODE_REJECT(0x14000000, 0x7C000000),
>> +       DECODE_REJECT(0x34000000, 0x7E000000),
>> +       DECODE_REJECT(0x36000000, 0x7E000000),
>> +       DECODE_REJECT(0x54000000, 0xFE000000),
>> +       DECODE_REJECT(0xD6000000, 0xFE000000),
>> +
>> +       /* System insn:
>> +        * Encoding: 1101 0101 00xx xxxx xxxx xxxx xxxx xxxx
>> +        *
>> +        * Note: MSR immediate (update PSTATE daif) is not safe handling
>> +        * within kprobes, rejected.
>> +        *
>> +        * Don't re-arrange these decode table entries.
>> +        */
>> +       DECODE_REJECT(0xD500401F, 0xFFF8F01F),
>> +       DECODE_SINGLESTEP(0xD5000000, 0xFFC00000),
>> +
>> +       /* Exception Generation:
>> +        * Encoding:  1101 0100 xxxx xxxx xxxx xxxx xxxx xxxx
>> +        * Instructions: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3
>> +        */
>> +       DECODE_REJECT(0xD4000000, 0xFF000000),
>> +
>> +       /*
>> +        * Load/Store - Exclusive monitor
>> +        * Encoding: xx00 1000 xxxx xxxx xxxx xxxx xxxx xxxx
>> +        *
>> +        * Reject exlusive monitor'ed instructions
>
> exclusive. Also, omit `monitor'ed' -- it doesn't mean anything.
Sure.
>
>> +        */
>> +       DECODE_REJECT(0x08000000, 0x3F000000),
>> +
>> +       /*
>> +        * Load/Store - PC relative(literal):
>> +        * Encoding:  xx01 1x00 xxxx xxxx xxxx xxxx xxxx xxxx
>> +        */
>> +       DECODE_REJECT(0x18000000, 0x3B000000),
>> +
>> +       /*
>> +        * Load/Store - Register Pair
>> +        * Encoding:
>> +        *      xx10 1x00 0xxx xxxx xxxx xxxx xxxx xxxx
>> +        *      xx10 1x00 1xxx xxxx xxxx xxxx xxxx xxxx
>> +        *      xx10 1x01 0xxx xxxx xxxx xxxx xxxx xxxx
>> +        *      xx10 1x01 1xxx xxxx xxxx xxxx xxxx xxxx
>> +        */
>> +       DECODE_SINGLESTEP(0x28000000, 0x3A000000),
>> +
>> +       /*
>> +        * Load/Store - Register
>> +        * Encoding:
>> +        *      xx11 1x00 xx0x xxxx xxxx 00xx xxxx xxxx (unscaled imm)
>> +        *      xx11 1x00 xx0x xxxx xxxx 01xx xxxx xxxx (imm post-indexed)
>> +        *      xx11 1x00 xx0x xxxx xxxx 10xx xxxx xxxx (unpriviledged)
>> +        *      xx11 1x00 xx0x xxxx xxxx 11xx xxxx xxxx (imm pre-indexed)
>> +        *
>> +        *      xx11 1x00 xx10 xxxx xxxx xx10 xxxx xxxx (register offset)
>> +        *
>> +        *      xx11 1x01 xxxx xxxx xxxx xxxx xxxx xxxx (unsigned imm)
>> +        */
>> +       DECODE_SINGLESTEP(0x38000000, 0x3B200000),
>> +       DECODE_SINGLESTEP(0x38200200, 0x38300300),
>> +       DECODE_SINGLESTEP(0x39000000, 0x3B000000),
>> +
>> +       /*
>> +        * Load/Store - AdvSIMD
>> +        * Encoding:
>> +        *  0x00 1100 0x00 0000 xxxx xxxx xxxx xxxx (Multiple-structure)
>> +        *  0x00 1100 1x0x xxxx xxxx xxxx xxxx xxxx (Multi-struct post-indexed)
>> +        *  0x00 1101 0xx0 0000 xxxx xxxx xxxx xxxx (Single-structure))
>> +        *  0x00 1101 1xxx xxxx xxxx xxxx xxxx xxxx (Single-struct post-index)
>> +        */
>> +       DECODE_SINGLESTEP(0x0C000000, 0xBFBF0000),
>> +       DECODE_SINGLESTEP(0x0C800000, 0xBFA00000),
>> +       DECODE_SINGLESTEP(0x0D000000, 0xBF9F0000),
>> +       DECODE_SINGLESTEP(0x0D800000, 0xBF800000),
>> +
>> +       /* Unallocated:         xxx0 0xxx xxxx xxxx xxxx xxxx xxxx xxxx */
>> +       DECODE_REJECT(0x00000000, 0x18000000),
>> +       DECODE_END,
>> +};
>> +
>> +static int __kprobes
>> +kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi,
>> +                  const struct aarch64_decode_item *tbl)
>> +{
>> +       unsigned int entry, ret = INSN_REJECTED;
>> +
>> +       for (entry = 0; !decode_table_end(tbl[entry]); entry++) {
>> +               if (decode_table_hit(tbl[entry], insn))
>> +                       break;
>> +       }
>> +
>> +       switch (decode_get_type(tbl[entry])) {
>> +       case DECODE_TYPE_END:
>> +       case DECODE_TYPE_REJECT:
>> +       default:
>> +               ret = INSN_REJECTED;
>> +               break;
>> +
>> +       case DECODE_TYPE_SINGLESTEP:
>> +               ret = INSN_GOOD;
>> +               break;
>> +
>> +       case DECODE_TYPE_SIMULATE:
>> +               ret = INSN_REJECTED;
>> +               break;
>> +
>> +       case DECODE_TYPE_TABLE:
>> +               /* recurse with next level decode table */
>> +               ret = kprobe_decode_insn(insn, asi,
>> +                                        decode_sub_table(tbl[entry]));
>> +       };
>> +       return ret;
>> +}
>> +
>> +/* Return:
>> + *   INSN_REJECTED     If instruction is one not allowed to kprobe,
>> + *   INSN_GOOD         If instruction is supported and uses instruction slot,
>> + *   INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
>> + */
>> +enum kprobe_insn __kprobes
>> +arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
>> +{
>> +       return kprobe_decode_insn(insn, asi, aarch64_decode_table);
>> +}
>> diff --git a/arch/arm64/kernel/kprobes-arm64.h b/arch/arm64/kernel/kprobes-arm64.h
>> new file mode 100644
>> index 0000000..87e7891
>> --- /dev/null
>> +++ b/arch/arm64/kernel/kprobes-arm64.h
>> @@ -0,0 +1,28 @@
>> +/*
>> + * arch/arm64/kernel/kprobes-arm64.h
>> + *
>> + * Copyright (C) 2013 Linaro Limited.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * General Public License for more details.
>> + */
>> +
>> +#ifndef _ARM_KERNEL_KPROBES_ARM64_H
>> +#define _ARM_KERNEL_KPROBES_ARM64_H
>> +
>> +enum kprobe_insn {
>> +       INSN_REJECTED,
>> +       INSN_GOOD_NO_SLOT,
>> +       INSN_GOOD,
>> +};
>> +
>> +enum kprobe_insn __kprobes
>> +arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi);
>> +
>> +#endif /* _ARM_KERNEL_KPROBES_ARM64_H */
>> diff --git a/arch/arm64/kernel/kprobes.c b/arch/arm64/kernel/kprobes.c
>> new file mode 100644
>> index 0000000..def10b6
>> --- /dev/null
>> +++ b/arch/arm64/kernel/kprobes.c
>> @@ -0,0 +1,538 @@
>> +/*
>> + * arch/arm64/kernel/kprobes.c
>> + *
>> + * Kprobes support for ARM64
>> + *
>> + * Copyright (C) 2013 Linaro Limited.
>> + * Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * General Public License for more details.
>> + *
>> + */
>> +#include <linux/kernel.h>
>> +#include <linux/kprobes.h>
>> +#include <linux/module.h>
>> +#include <linux/slab.h>
>> +#include <linux/stop_machine.h>
>> +#include <linux/stringify.h>
>> +#include <asm/traps.h>
>> +#include <asm/cacheflush.h>
>> +#include <asm/debug-monitors.h>
>> +#include <asm/system_misc.h>
>> +#include <asm/insn.h>
>> +
>> +#include "kprobes.h"
>> +#include "kprobes-arm64.h"
>> +
>> +#define MIN_STACK_SIZE(addr)   min((unsigned long)MAX_STACK_SIZE,      \
>> +       (unsigned long)current_thread_info() + THREAD_START_SP - (addr))
>
> Why is this not defined along with MAX_STACK_SIZE?
Hmm can be move to header file.
>
>> +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
>> +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
>> +
>> +static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
>> +{
>> +       int i;
>> +       /* prepare insn slot */
>> +       p->ainsn.insn[0] = p->opcode;
>> +       /* NOP for superscalar uArch decode */
>
> superscalar uArch?
well, the comment needs refining, what we mean is that one NOP should
follow the instruction in memory slot, for the decode stage(not to hit
undefined instruction).
>
>> +       for (i = 1; i < MAX_INSN_SIZE; i++)
>> +               p->ainsn.insn[i] = ARCH64_NOP_OPCODE;
>
> Should be AARCH64 (yes, that's two adjacent 'A's!). Also, I think the
> jump_label stuff had helpers to give you a NOP hint.
will check, all we need here is 'opcode' value to fill-in the memory location.
>
>> +
>> +       flush_icache_range((uintptr_t) (p->ainsn.insn),
>> +                          (uintptr_t) (p->ainsn.insn) + MAX_INSN_SIZE);
>> +}
>> +
>> +int __kprobes arch_prepare_kprobe(struct kprobe *p)
>> +{
>> +       kprobe_opcode_t insn;
>> +       unsigned long probe_addr = (unsigned long)p->addr;
>> +
>> +       /* copy instruction */
>> +       insn = *p->addr;
>> +       p->opcode = insn;
>> +
>> +       if (in_exception_text(probe_addr))
>> +               return -EINVAL;
>> +
>> +       /* decode instruction */
>> +       switch (arm_kprobe_decode_insn(insn, &p->ainsn)) {
>> +       case INSN_REJECTED:     /* insn not supported */
>> +               return -EINVAL;
>> +               break;
>> +
>> +       case INSN_GOOD_NO_SLOT: /* insn need simulation */
>> +               return -EINVAL;
>> +               break;
>> +
>> +       case INSN_GOOD: /* instruction uses slot */
>> +               p->ainsn.insn = get_insn_slot();
>> +               if (!p->ainsn.insn)
>> +                       return -ENOMEM;
>> +               break;
>> +       };
>> +
>> +       /* prepare the instruction */
>> +       arch_prepare_ss_slot(p);
>> +
>> +       return 0;
>> +}
>> +
>> +static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
>> +{
>> +       void *addrs[1];
>> +       u32 insns[1];
>> +
>> +       addrs[0] = (void *)addr;
>> +       insns[0] = (u32)opcode;
>> +
>> +       return aarch64_insn_patch_text_sync(addrs, insns, 1);
>> +}
>> +
>> +/* arm kprobe: install breakpoint in text */
>> +void __kprobes arch_arm_kprobe(struct kprobe *p)
>> +{
>> +       patch_text(p->addr, BRK64_OPCODE_KPROBES);
>> +}
>> +
>> +/* disarm kprobe: remove breakpoint from text */
>> +void __kprobes arch_disarm_kprobe(struct kprobe *p)
>> +{
>> +       patch_text(p->addr, p->opcode);
>> +}
>> +
>> +void __kprobes arch_remove_kprobe(struct kprobe *p)
>> +{
>> +       if (p->ainsn.insn) {
>> +               free_insn_slot(p->ainsn.insn, 0);
>> +               p->ainsn.insn = NULL;
>> +       }
>> +}
>> +
>> +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
>> +{
>> +       kcb->prev_kprobe.kp = kprobe_running();
>> +       kcb->prev_kprobe.status = kcb->kprobe_status;
>> +}
>> +
>> +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
>> +{
>> +       __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
>> +       kcb->kprobe_status = kcb->prev_kprobe.status;
>> +}
>> +
>> +static void __kprobes set_current_kprobe(struct kprobe *p)
>> +{
>> +       __get_cpu_var(current_kprobe) = p;
>> +}
>
> __get_cpu_var uses were being cleaned up by Christoph recently. Take a look
> in -next to see some examples of conversions over to this_cpu_*.
Ok, would check this, can change in next version of the patch.
>
>> +static void __kprobes
>> +set_ss_context(struct kprobe_ctlblk *kcb, unsigned long addr)
>> +{
>> +       kcb->ss_ctx.ss_status = KPROBES_STEP_PENDING;
>> +       kcb->ss_ctx.match_addr = addr + sizeof(kprobe_opcode_t);
>> +}
>> +
>> +static void __kprobes clear_ss_context(struct kprobe_ctlblk *kcb)
>> +{
>> +       kcb->ss_ctx.ss_status = KPROBES_STEP_NONE;
>> +       kcb->ss_ctx.match_addr = 0;
>> +}
>> +
>> +static void __kprobes
>> +nop_singlestep_skip(struct kprobe *p, struct pt_regs *regs)
>> +{
>> +       /* set return addr to next pc to continue */
>> +       instruction_pointer(regs) += sizeof(kprobe_opcode_t);
>> +       return;
>> +}
>> +
>> +/* Mask/Unmask PSTATE.D flag
>> + *
>> + * Unmasking D-flag enables recursing into another debug
>> + * exception (breakpoint or single step).
>> + *
>> + * Upon every exception entry, D-flag is disabled by the
>> + * hardware. We shall unmask this flag only after safely
>> + * saved the previous context and kprobes state machines.
>> + *
>> + * kprobes can generate recursing in breakpoint (followed
>> + * by single stepping) within user-provided handlers only.
>> + *
>> + * All other places, keep the D-flag masked.
>> + */
>> +static void mask_pstate_debug_flag(u32 mask)
>> +{
>> +       if (mask)
>> +               asm volatile("msr daifset, #9\n\t");
>> +       else
>> +               asm volatile("msr daifclr, #9\n\t");
>> +}
>
> NAK. Unmasking debug exceptions from within a debug exception is not safe.
> I'd much rather we returned from handling this exception, then took whatever
> other pending exception there was.
well, kprobes needs recursive breakpoints to be handled, and I am not
sure if this can be achieved other way than unmasking D-flag for a
shorter duration where we can expect re-entry (I would check if this
can be done without re-cursing)
I want to understand why unmasking D-flag is unsafe here, kprobes make
sure that recursion depth is only 2 (i.e. does not generate 3rd
Breakpoint trap) and interrupts are kept masked while recursion/single
stepping. Is it unsafe only if conflict with hardware breakpoint on
same CPU?

>
> In fact, how do you avoid a race with hardware breakpoints? E.g., somebody
> places a hardware breakpoint on an instruction in the kernel for which
> kprobes has patched in a brk. We take the hardware breakpoint, disable the
> breakpoint and set up a single step before returning to the brk. The brk
> then traps, but we must take care not to disable single-step and/or unmask
> debug exceptions, because that will cause the hardware breakpoint code to
> re-arm its breakpoint before we've stepped off the brk instruction.
Not sure if kprobes can work together with hardware breakpoint or kgdb
when multiple breakpoints (h/w and s/w) are placed on same text
address. How arm32 and x86 handle this kind of scenario?
Probably, the person debugging hardware breakpoint or kgdb should have
control over the flow and disable kprobes (sysfs interface available)?

>
> Will
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
Sandeepa Prabhu Nov. 11, 2013, 5:39 a.m. UTC | #4
On 9 November 2013 14:40, Masami Hiramatsu
<masami.hiramatsu.pt@hitachi.com> wrote:
> (2013/11/09 1:56), Will Deacon wrote:
>> Hi Sandeepa,
>>
>> On Thu, Oct 17, 2013 at 12:17:47PM +0100, Sandeepa Prabhu wrote:
>>> Add support for basic kernel probes(kprobes), jump probes (jprobes)
>>> for ARM64.
>>
>> I think this series will conflict quite heavily with the jump_label series,
>> since they both introduce some common instruction manipulation code. On the
>> debug side, there will also be conflicts with the kgdb series, so it might
>> make sense for us to merge those two first, then you can rebase on a stable
>> branch from us.
>
> [...]
>
>> In fact, how do you avoid a race with hardware breakpoints? E.g., somebody
>> places a hardware breakpoint on an instruction in the kernel for which
>> kprobes has patched in a brk. We take the hardware breakpoint, disable the
>> breakpoint and set up a single step before returning to the brk. The brk
>> then traps, but we must take care not to disable single-step and/or unmask
>> debug exceptions, because that will cause the hardware breakpoint code to
>> re-arm its breakpoint before we've stepped off the brk instruction.
>
> Hmm, frankly to say, this kind of race issue is not seriously discussed
> on x86 too, since kgdb is still a special tool (not used on the production
> system).
> I think under such situation kgdb operator must have full control of the
> system, and he can (and has to) avoid such kind of race.
Masami,

Hmm I think in same lines, but not sure if we expect kprobes to be
able to work fool-proof along with kgdb or hw breakpoints ?

Thanks,
Sandeepa
>
> Thank you,
>
> --
> Masami HIRAMATSU
> IT Management Research Dept. Linux Technology Center
> Hitachi, Ltd., Yokohama Research Laboratory
> E-mail: masami.hiramatsu.pt@hitachi.com
>
>
Masami Hiramatsu Nov. 11, 2013, 7:54 a.m. UTC | #5
(2013/11/11 14:39), Sandeepa Prabhu wrote:
> On 9 November 2013 14:40, Masami Hiramatsu
> <masami.hiramatsu.pt@hitachi.com> wrote:
>> (2013/11/09 1:56), Will Deacon wrote:
>>> Hi Sandeepa,
>>>
>>> On Thu, Oct 17, 2013 at 12:17:47PM +0100, Sandeepa Prabhu wrote:
>>>> Add support for basic kernel probes(kprobes), jump probes (jprobes)
>>>> for ARM64.
>>>
>>> I think this series will conflict quite heavily with the jump_label series,
>>> since they both introduce some common instruction manipulation code. On the
>>> debug side, there will also be conflicts with the kgdb series, so it might
>>> make sense for us to merge those two first, then you can rebase on a stable
>>> branch from us.
>>
>> [...]
>>
>>> In fact, how do you avoid a race with hardware breakpoints? E.g., somebody
>>> places a hardware breakpoint on an instruction in the kernel for which
>>> kprobes has patched in a brk. We take the hardware breakpoint, disable the
>>> breakpoint and set up a single step before returning to the brk. The brk
>>> then traps, but we must take care not to disable single-step and/or unmask
>>> debug exceptions, because that will cause the hardware breakpoint code to
>>> re-arm its breakpoint before we've stepped off the brk instruction.
>>
>> Hmm, frankly to say, this kind of race issue is not seriously discussed
>> on x86 too, since kgdb is still a special tool (not used on the production
>> system).
>> I think under such situation kgdb operator must have full control of the
>> system, and he can (and has to) avoid such kind of race.
> Masami,
> 
> Hmm I think in same lines, but not sure if we expect kprobes to be
> able to work fool-proof along with kgdb or hw breakpoints ?

For hw breakpoint, yes, we finally get check each other to safely
use it even if one rejects the other one at some points(address).
Since the hw breakpoint is already open for normal user via perf,
we should do it. But the policy still needs to be discussed.

On the other hand, kgdb is a special case and it should be,
because, IMHO, kgdb is a debugger, which means that the kgdb
has to be able to monitor whole the kernel from outside. So
kprobes should not change its behavior even if used with kgdb.

Thank you,
Masami Hiramatsu Nov. 11, 2013, 10:51 a.m. UTC | #6
(2013/11/11 16:54), Masami Hiramatsu wrote:
>>>> In fact, how do you avoid a race with hardware breakpoints? E.g., somebody
>>>> places a hardware breakpoint on an instruction in the kernel for which
>>>> kprobes has patched in a brk. We take the hardware breakpoint, disable the
>>>> breakpoint and set up a single step before returning to the brk. The brk
>>>> then traps, but we must take care not to disable single-step and/or unmask
>>>> debug exceptions, because that will cause the hardware breakpoint code to
>>>> re-arm its breakpoint before we've stepped off the brk instruction.
>>>
>>> Hmm, frankly to say, this kind of race issue is not seriously discussed
>>> on x86 too, since kgdb is still a special tool (not used on the production
>>> system).
>>> I think under such situation kgdb operator must have full control of the
>>> system, and he can (and has to) avoid such kind of race.
>> Masami,
>>
>> Hmm I think in same lines, but not sure if we expect kprobes to be
>> able to work fool-proof along with kgdb or hw breakpoints ?
> 
> For hw breakpoint, yes, we finally get check each other to safely
> use it even if one rejects the other one at some points(address).
> Since the hw breakpoint is already open for normal user via perf,
> we should do it. But the policy still needs to be discussed.

OK, I've ensured that the hw_breakpoint (from perf) can work
with kprobes (from ftrace) at the same address on x86.
So if arm64 already support hw_breakpoint on perf, kprobes should
work with it.

Thank you,
Will Deacon Nov. 11, 2013, 10:58 a.m. UTC | #7
On Mon, Nov 11, 2013 at 10:51:52AM +0000, Masami Hiramatsu wrote:
> (2013/11/11 16:54), Masami Hiramatsu wrote:
> >>>> In fact, how do you avoid a race with hardware breakpoints? E.g., somebody
> >>>> places a hardware breakpoint on an instruction in the kernel for which
> >>>> kprobes has patched in a brk. We take the hardware breakpoint, disable the
> >>>> breakpoint and set up a single step before returning to the brk. The brk
> >>>> then traps, but we must take care not to disable single-step and/or unmask
> >>>> debug exceptions, because that will cause the hardware breakpoint code to
> >>>> re-arm its breakpoint before we've stepped off the brk instruction.
> >>>
> >>> Hmm, frankly to say, this kind of race issue is not seriously discussed
> >>> on x86 too, since kgdb is still a special tool (not used on the production
> >>> system).
> >>> I think under such situation kgdb operator must have full control of the
> >>> system, and he can (and has to) avoid such kind of race.
> >> Masami,
> >>
> >> Hmm I think in same lines, but not sure if we expect kprobes to be
> >> able to work fool-proof along with kgdb or hw breakpoints ?
> > 
> > For hw breakpoint, yes, we finally get check each other to safely
> > use it even if one rejects the other one at some points(address).
> > Since the hw breakpoint is already open for normal user via perf,
> > we should do it. But the policy still needs to be discussed.
> 
> OK, I've ensured that the hw_breakpoint (from perf) can work
> with kprobes (from ftrace) at the same address on x86.
> So if arm64 already support hw_breakpoint on perf, kprobes should
> work with it.

Single-stepping on x86 is different to the step behaviour on arm64 afaik. On
ARM, we have to manually remove the breakpoint, perform a single-step, then
add the breakpoint again. If we re-enable debug exceptions in the kprobe
handler, the step will complete early and we'll never step off the
breakpoint.

Sandeepa: I think you need to retry Masami's test on the arm64 model, since
I'm fairly sure it won't work as expected without some additional code.

Will
Will Deacon Nov. 11, 2013, 11:21 a.m. UTC | #8
On Mon, Nov 11, 2013 at 05:35:37AM +0000, Sandeepa Prabhu wrote:
> On 8 November 2013 22:26, Will Deacon <will.deacon@arm.com> wrote:
> >> diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
> >> new file mode 100644
> >> index 0000000..9b491d0
> >> --- /dev/null
> >> +++ b/arch/arm64/include/asm/kprobes.h
> >> @@ -0,0 +1,59 @@
> >> +/*
> >> + * arch/arm64/include/asm/kprobes.h
> >> + *
> >> + * Copyright (C) 2013 Linaro Limited
> >> + *
> >> + * This program is free software; you can redistribute it and/or modify
> >> + * it under the terms of the GNU General Public License version 2 as
> >> + * published by the Free Software Foundation.
> >> + *
> >> + * This program is distributed in the hope that it will be useful,
> >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >> + * General Public License for more details.
> >> + */
> >> +
> >> +#ifndef _ARM_KPROBES_H
> >> +#define _ARM_KPROBES_H
> >> +
> >> +#include <linux/types.h>
> >> +#include <linux/ptrace.h>
> >> +#include <linux/percpu.h>
> >> +
> >> +#define __ARCH_WANT_KPROBES_INSN_SLOT
> >> +#define MAX_INSN_SIZE                  2
> >
> > Why is this 2?
> Second entry is to hold NOP instruction, absence of it cause abort
> while instruction decode.

Hmm, can you elaborate please? I'm not sure why you should get an abort
decoding kernel addresses.

> >> +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
> >> +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
> >> +
> >> +static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
> >> +{
> >> +       int i;
> >> +       /* prepare insn slot */
> >> +       p->ainsn.insn[0] = p->opcode;
> >> +       /* NOP for superscalar uArch decode */
> >
> > superscalar uArch?
> well, the comment needs refining, what we mean is that one NOP should
> follow the instruction in memory slot, for the decode stage(not to hit
> undefined instruction).

Is this undef related to your comment above?

> > NAK. Unmasking debug exceptions from within a debug exception is not safe.
> > I'd much rather we returned from handling this exception, then took whatever
> > other pending exception there was.
> well, kprobes needs recursive breakpoints to be handled, and I am not
> sure if this can be achieved other way than unmasking D-flag for a
> shorter duration where we can expect re-entry (I would check if this
> can be done without re-cursing)
> I want to understand why unmasking D-flag is unsafe here, kprobes make
> sure that recursion depth is only 2 (i.e. does not generate 3rd
> Breakpoint trap) and interrupts are kept masked while recursion/single
> stepping. Is it unsafe only if conflict with hardware breakpoint on
> same CPU?

Is this recursion only to support setting kprobes on the kprobe
implementation? The problem is that the rest of the debug infrastructure is
not set up to deal with recursive exceptions, so allowing them can break
state machines maintained by code like hw_breakpoint.

> >
> > In fact, how do you avoid a race with hardware breakpoints? E.g., somebody
> > places a hardware breakpoint on an instruction in the kernel for which
> > kprobes has patched in a brk. We take the hardware breakpoint, disable the
> > breakpoint and set up a single step before returning to the brk. The brk
> > then traps, but we must take care not to disable single-step and/or unmask
> > debug exceptions, because that will cause the hardware breakpoint code to
> > re-arm its breakpoint before we've stepped off the brk instruction.
> Not sure if kprobes can work together with hardware breakpoint or kgdb
> when multiple breakpoints (h/w and s/w) are placed on same text
> address. How arm32 and x86 handle this kind of scenario?

ARM doesn't support kernel hw breakpoints due to various limitations (we
don't have hw single step in ARMv7).

> Probably, the person debugging hardware breakpoint or kgdb should have
> control over the flow and disable kprobes (sysfs interface available)?

That sounds like a bit of a cop-out. I'd rather we understand the situation
and, if necessary, add code so that we can deny use of kprobes if kgdb is
active (or something similar) if we can't get the subsystems to collaborate.

Will
Masami Hiramatsu Nov. 11, 2013, 5:32 p.m. UTC | #9
(2013/11/11 19:58), Will Deacon wrote:
> On Mon, Nov 11, 2013 at 10:51:52AM +0000, Masami Hiramatsu wrote:
>> (2013/11/11 16:54), Masami Hiramatsu wrote:
>>>>>> In fact, how do you avoid a race with hardware breakpoints? E.g., somebody
>>>>>> places a hardware breakpoint on an instruction in the kernel for which
>>>>>> kprobes has patched in a brk. We take the hardware breakpoint, disable the
>>>>>> breakpoint and set up a single step before returning to the brk. The brk
>>>>>> then traps, but we must take care not to disable single-step and/or unmask
>>>>>> debug exceptions, because that will cause the hardware breakpoint code to
>>>>>> re-arm its breakpoint before we've stepped off the brk instruction.
>>>>>
>>>>> Hmm, frankly to say, this kind of race issue is not seriously discussed
>>>>> on x86 too, since kgdb is still a special tool (not used on the production
>>>>> system).
>>>>> I think under such situation kgdb operator must have full control of the
>>>>> system, and he can (and has to) avoid such kind of race.
>>>> Masami,
>>>>
>>>> Hmm I think in same lines, but not sure if we expect kprobes to be
>>>> able to work fool-proof along with kgdb or hw breakpoints ?
>>>
>>> For hw breakpoint, yes, we finally get check each other to safely
>>> use it even if one rejects the other one at some points(address).
>>> Since the hw breakpoint is already open for normal user via perf,
>>> we should do it. But the policy still needs to be discussed.
>>
>> OK, I've ensured that the hw_breakpoint (from perf) can work
>> with kprobes (from ftrace) at the same address on x86.
>> So if arm64 already support hw_breakpoint on perf, kprobes should
>> work with it.
> 
> Single-stepping on x86 is different to the step behaviour on arm64 afaik. On
> ARM, we have to manually remove the breakpoint, perform a single-step, then
> add the breakpoint again. If we re-enable debug exceptions in the kprobe
> handler, the step will complete early and we'll never step off the
> breakpoint.

I'm unsure about arm64's debug feature behavior, what does happen when
it performs a single-step on sw-breakpoint?

> Sandeepa: I think you need to retry Masami's test on the arm64 model, since
> I'm fairly sure it won't work as expected without some additional code.

OK, anyway, for testing same one, we need to port ftrace first. So the next
plan is to make a kprobe module to put a probe (which just printk something)
on a specific function (e.g. vfs_symlink), and run perf record with
hw-breakpoint as below

$ perf record -e "mem:0xXXXXXX:k" ln -s /dev/null /tmp/foo

Note that 0xXXXXXX is the address of vfs_symlink.

After that, you can see the message in dmesg and also check the perf result
with "sudo perf script --dump" (you can find a PERF_RECORD_SAMPLE entry if
it works)

Thank you,
Sandeepa Prabhu Nov. 12, 2013, 6:23 a.m. UTC | #10
>>> OK, I've ensured that the hw_breakpoint (from perf) can work
>>> with kprobes (from ftrace) at the same address on x86.
>>> So if arm64 already support hw_breakpoint on perf, kprobes should
>>> work with it.
>>
>> Single-stepping on x86 is different to the step behaviour on arm64 afaik. On
>> ARM, we have to manually remove the breakpoint, perform a single-step, then
>> add the breakpoint again. If we re-enable debug exceptions in the kprobe
>> handler, the step will complete early and we'll never step off the
>> breakpoint.
>
> I'm unsure about arm64's debug feature behavior, what does happen when
> it performs a single-step on sw-breakpoint?
>
>> Sandeepa: I think you need to retry Masami's test on the arm64 model, since
>> I'm fairly sure it won't work as expected without some additional code.
>
> OK, anyway, for testing same one, we need to port ftrace first. So the next
> plan is to make a kprobe module to put a probe (which just printk something)
> on a specific function (e.g. vfs_symlink), and run perf record with
> hw-breakpoint as below
>
> $ perf record -e "mem:0xXXXXXX:k" ln -s /dev/null /tmp/foo
>
> Note that 0xXXXXXX is the address of vfs_symlink.
>
> After that, you can see the message in dmesg and also check the perf result
> with "sudo perf script --dump" (you can find a PERF_RECORD_SAMPLE entry if
> it works)
Thanks for steps, ARM64 ftrace patches are under review on arm mailing
list, I can contact the (linaro) developer implementing ftrace on
what's supported and then figure-out a way to test this concurrency of
kprobes breakpoint and hardware breakpoint.

Thanks,
Sandeepa
>
> Thank you,
>
> --
> Masami HIRAMATSU
> IT Management Research Dept. Linux Technology Center
> Hitachi, Ltd., Yokohama Research Laboratory
> E-mail: masami.hiramatsu.pt@hitachi.com
>
>
Sandeepa Prabhu Nov. 12, 2013, 6:52 a.m. UTC | #11
On 11 November 2013 16:51, Will Deacon <will.deacon@arm.com> wrote:
> On Mon, Nov 11, 2013 at 05:35:37AM +0000, Sandeepa Prabhu wrote:
>> On 8 November 2013 22:26, Will Deacon <will.deacon@arm.com> wrote:
>> >> diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
>> >> new file mode 100644
>> >> index 0000000..9b491d0
>> >> --- /dev/null
>> >> +++ b/arch/arm64/include/asm/kprobes.h
>> >> @@ -0,0 +1,59 @@
>> >> +/*
>> >> + * arch/arm64/include/asm/kprobes.h
>> >> + *
>> >> + * Copyright (C) 2013 Linaro Limited
>> >> + *
>> >> + * This program is free software; you can redistribute it and/or modify
>> >> + * it under the terms of the GNU General Public License version 2 as
>> >> + * published by the Free Software Foundation.
>> >> + *
>> >> + * This program is distributed in the hope that it will be useful,
>> >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> >> + * General Public License for more details.
>> >> + */
>> >> +
>> >> +#ifndef _ARM_KPROBES_H
>> >> +#define _ARM_KPROBES_H
>> >> +
>> >> +#include <linux/types.h>
>> >> +#include <linux/ptrace.h>
>> >> +#include <linux/percpu.h>
>> >> +
>> >> +#define __ARCH_WANT_KPROBES_INSN_SLOT
>> >> +#define MAX_INSN_SIZE                  2
>> >
>> > Why is this 2?
>> Second entry is to hold NOP instruction, absence of it cause abort
>> while instruction decode.
>
> Hmm, can you elaborate please? I'm not sure why you should get an abort
> decoding kernel addresses.
well, kprobes does not step from kernel address, but it prepares a
allocated memory(executable),  copies the instruction and update the
single step address (ELR) to enable stepping while ERET.
So, don't we need NOP at next location after the instruction because
next instruction will be in decode stage and might throw "undefined
instruction" error?
I have removed the trailing NOP and tested single step and it worked
fine!, so I am not sure if  MAX_INSN_SIZE can be 1, I would check v8
debug spec again.

>
>> >> +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
>> >> +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
>> >> +
>> >> +static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
>> >> +{
>> >> +       int i;
>> >> +       /* prepare insn slot */
>> >> +       p->ainsn.insn[0] = p->opcode;
>> >> +       /* NOP for superscalar uArch decode */
>> >
>> > superscalar uArch?
>> well, the comment needs refining, what we mean is that one NOP should
>> follow the instruction in memory slot, for the decode stage(not to hit
>> undefined instruction).
>
> Is this undef related to your comment above?
[Yes,  again, I don't know if trailing NOP is necessary as step is
working without it -decode stage for next location is not throwing
"undefined instruction" error while single stepping previous
instruction]

>
>> > NAK. Unmasking debug exceptions from within a debug exception is not safe.
>> > I'd much rather we returned from handling this exception, then took whatever
>> > other pending exception there was.
>> well, kprobes needs recursive breakpoints to be handled, and I am not
>> sure if this can be achieved other way than unmasking D-flag for a
>> shorter duration where we can expect re-entry (I would check if this
>> can be done without re-cursing)
>> I want to understand why unmasking D-flag is unsafe here, kprobes make
>> sure that recursion depth is only 2 (i.e. does not generate 3rd
>> Breakpoint trap) and interrupts are kept masked while recursion/single
>> stepping. Is it unsafe only if conflict with hardware breakpoint on
>> same CPU?
>
> Is this recursion only to support setting kprobes on the kprobe
> implementation? The problem is that the rest of the debug infrastructure is
> not set up to deal with recursive exceptions, so allowing them can break
> state machines maintained by code like hw_breakpoint.
No, upon one kprobe hit for an address, the subsystem can call the
user-defined handlers (pre- and -post) which can call same function
again. Example, if we place kprobe on "printk" entry, and registered
handler can invoke printk to print more info.
This will make kprobe to trigger again and re-enter, so the kprobe
subsystem need to handle the 2nd instance first, and then return back
to previous execution. D-flag is enabled only the duration when the
pre- and post- handler are called, so they they can recurse and handle
single stepping, after that, D-flag is kept disabled.   I am yet to
test the concurrency with hw_breakpoint, would update once I run these
tests.

>
>> >
>> > In fact, how do you avoid a race with hardware breakpoints? E.g., somebody
>> > places a hardware breakpoint on an instruction in the kernel for which
>> > kprobes has patched in a brk. We take the hardware breakpoint, disable the
>> > breakpoint and set up a single step before returning to the brk. The brk
>> > then traps, but we must take care not to disable single-step and/or unmask
>> > debug exceptions, because that will cause the hardware breakpoint code to
>> > re-arm its breakpoint before we've stepped off the brk instruction.
>> Not sure if kprobes can work together with hardware breakpoint or kgdb
>> when multiple breakpoints (h/w and s/w) are placed on same text
>> address. How arm32 and x86 handle this kind of scenario?
>
> ARM doesn't support kernel hw breakpoints due to various limitations (we
> don't have hw single step in ARMv7).
>
>> Probably, the person debugging hardware breakpoint or kgdb should have
>> control over the flow and disable kprobes (sysfs interface available)?
>
> That sounds like a bit of a cop-out. I'd rather we understand the situation
> and, if necessary, add code so that we can deny use of kprobes if kgdb is
> active (or something similar) if we can't get the subsystems to collaborate.
Hmm, as it seems from x86 (updates from Masami), guessing it is fair
expectation that kprobes can co-exist with hardware breakpoints and
work even when happened to be both placed on same text address.
Though we have not discussed about *policy",  I think as long as this
can be fixed, we should support it.
>
> Will
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
Masami Hiramatsu Nov. 12, 2013, 7:27 a.m. UTC | #12
(2013/11/12 15:23), Sandeepa Prabhu wrote:
>>>> OK, I've ensured that the hw_breakpoint (from perf) can work
>>>> with kprobes (from ftrace) at the same address on x86.
>>>> So if arm64 already support hw_breakpoint on perf, kprobes should
>>>> work with it.
>>>
>>> Single-stepping on x86 is different to the step behaviour on arm64 afaik. On
>>> ARM, we have to manually remove the breakpoint, perform a single-step, then
>>> add the breakpoint again. If we re-enable debug exceptions in the kprobe
>>> handler, the step will complete early and we'll never step off the
>>> breakpoint.
>>
>> I'm unsure about arm64's debug feature behavior, what does happen when
>> it performs a single-step on sw-breakpoint?
>>
>>> Sandeepa: I think you need to retry Masami's test on the arm64 model, since
>>> I'm fairly sure it won't work as expected without some additional code.
>>
>> OK, anyway, for testing same one, we need to port ftrace first. So the next

Sorry for confusion, s/next/fallback is what I meant. Making a kprobe module
can be done without ftrace port.

>> plan is to make a kprobe module to put a probe (which just printk something)
>> on a specific function (e.g. vfs_symlink), and run perf record with
>> hw-breakpoint as below
>>
>> $ perf record -e "mem:0xXXXXXX:k" ln -s /dev/null /tmp/foo
>>
>> Note that 0xXXXXXX is the address of vfs_symlink.
>>
>> After that, you can see the message in dmesg and also check the perf result
>> with "sudo perf script --dump" (you can find a PERF_RECORD_SAMPLE entry if
>> it works)
> Thanks for steps, ARM64 ftrace patches are under review on arm mailing
> list, I can contact the (linaro) developer implementing ftrace on
> what's supported and then figure-out a way to test this concurrency of
> kprobes breakpoint and hardware breakpoint.

Would you mean this? :)
http://www.spinics.net/lists/arm-kernel/msg278477.html

Wow, it seems that this also has some works around instruction
manipulation (and confusable filenames...)

Thank you,
Sandeepa Prabhu Nov. 12, 2013, 8:44 a.m. UTC | #13
On 12 November 2013 12:57, Masami Hiramatsu
<masami.hiramatsu.pt@hitachi.com> wrote:
> (2013/11/12 15:23), Sandeepa Prabhu wrote:
>>>>> OK, I've ensured that the hw_breakpoint (from perf) can work
>>>>> with kprobes (from ftrace) at the same address on x86.
>>>>> So if arm64 already support hw_breakpoint on perf, kprobes should
>>>>> work with it.
>>>>
>>>> Single-stepping on x86 is different to the step behaviour on arm64 afaik. On
>>>> ARM, we have to manually remove the breakpoint, perform a single-step, then
>>>> add the breakpoint again. If we re-enable debug exceptions in the kprobe
>>>> handler, the step will complete early and we'll never step off the
>>>> breakpoint.
>>>
>>> I'm unsure about arm64's debug feature behavior, what does happen when
>>> it performs a single-step on sw-breakpoint?
>>>
>>>> Sandeepa: I think you need to retry Masami's test on the arm64 model, since
>>>> I'm fairly sure it won't work as expected without some additional code.
>>>
>>> OK, anyway, for testing same one, we need to port ftrace first. So the next
>
> Sorry for confusion, s/next/fallback is what I meant. Making a kprobe module
> can be done without ftrace port.
Yes, got it, all my verification until now are done using sample
modules only,  looking out for perf (or some other mechanism: ptrace?)
that uses v8 hw breakpoint.
>
>>> plan is to make a kprobe module to put a probe (which just printk something)
>>> on a specific function (e.g. vfs_symlink), and run perf record with
>>> hw-breakpoint as below
>>>
>>> $ perf record -e "mem:0xXXXXXX:k" ln -s /dev/null /tmp/foo
>>>
>>> Note that 0xXXXXXX is the address of vfs_symlink.
>>>
>>> After that, you can see the message in dmesg and also check the perf result
>>> with "sudo perf script --dump" (you can find a PERF_RECORD_SAMPLE entry if
>>> it works)
>> Thanks for steps, ARM64 ftrace patches are under review on arm mailing
>> list, I can contact the (linaro) developer implementing ftrace on
>> what's supported and then figure-out a way to test this concurrency of
>> kprobes breakpoint and hardware breakpoint.
>
> Would you mean this? :)
> http://www.spinics.net/lists/arm-kernel/msg278477.html
>
> Wow, it seems that this also has some works around instruction
> manipulation (and confusable filenames...)
I referred to: http://lwn.net/Articles/572323/  which is another
implementation and on LAKML

>
> Thank you,
>
> --
> Masami HIRAMATSU
> IT Management Research Dept. Linux Technology Center
> Hitachi, Ltd., Yokohama Research Laboratory
> E-mail: masami.hiramatsu.pt@hitachi.com
>
>
Masami Hiramatsu Nov. 12, 2013, 10:17 a.m. UTC | #14
(2013/11/12 17:44), Sandeepa Prabhu wrote:
> On 12 November 2013 12:57, Masami Hiramatsu
> <masami.hiramatsu.pt@hitachi.com> wrote:
>> (2013/11/12 15:23), Sandeepa Prabhu wrote:
>>>>>> OK, I've ensured that the hw_breakpoint (from perf) can work
>>>>>> with kprobes (from ftrace) at the same address on x86.
>>>>>> So if arm64 already support hw_breakpoint on perf, kprobes should
>>>>>> work with it.
>>>>>
>>>>> Single-stepping on x86 is different to the step behaviour on arm64 afaik. On
>>>>> ARM, we have to manually remove the breakpoint, perform a single-step, then
>>>>> add the breakpoint again. If we re-enable debug exceptions in the kprobe
>>>>> handler, the step will complete early and we'll never step off the
>>>>> breakpoint.
>>>>
>>>> I'm unsure about arm64's debug feature behavior, what does happen when
>>>> it performs a single-step on sw-breakpoint?
>>>>
>>>>> Sandeepa: I think you need to retry Masami's test on the arm64 model, since
>>>>> I'm fairly sure it won't work as expected without some additional code.
>>>>
>>>> OK, anyway, for testing same one, we need to port ftrace first. So the next
>>
>> Sorry for confusion, s/next/fallback is what I meant. Making a kprobe module
>> can be done without ftrace port.
> Yes, got it, all my verification until now are done using sample
> modules only,  looking out for perf (or some other mechanism: ptrace?)
> that uses v8 hw breakpoint.

Yes, kprobe vs. perf and uprobe vs. ptrace :)


>>>> plan is to make a kprobe module to put a probe (which just printk something)
>>>> on a specific function (e.g. vfs_symlink), and run perf record with
>>>> hw-breakpoint as below
>>>>
>>>> $ perf record -e "mem:0xXXXXXX:k" ln -s /dev/null /tmp/foo
>>>>
>>>> Note that 0xXXXXXX is the address of vfs_symlink.
>>>>
>>>> After that, you can see the message in dmesg and also check the perf result
>>>> with "sudo perf script --dump" (you can find a PERF_RECORD_SAMPLE entry if
>>>> it works)
>>> Thanks for steps, ARM64 ftrace patches are under review on arm mailing
>>> list, I can contact the (linaro) developer implementing ftrace on
>>> what's supported and then figure-out a way to test this concurrency of
>>> kprobes breakpoint and hardware breakpoint.
>>
>> Would you mean this? :)
>> http://www.spinics.net/lists/arm-kernel/msg278477.html
>>
>> Wow, it seems that this also has some works around instruction
>> manipulation (and confusable filenames...)
> I referred to: http://lwn.net/Articles/572323/  which is another
> implementation and on LAKML

OK, I'll check that (and looks good at a glance).
By the way, I concern about Linaro guys who looks working a bit far
from the LKML and original feature maintainers. Please contact them,
I'm sure they don't bite your hand :)

BTW, I'm currently trying a general housecleaning of __kprobes
annotations. It may also have impact on your patch.
https://lkml.org/lkml/2013/11/8/187

Thank you,
Sandeepa Prabhu Nov. 12, 2013, 10:55 a.m. UTC | #15
On 12 November 2013 15:47, Masami Hiramatsu
<masami.hiramatsu.pt@hitachi.com> wrote:
> (2013/11/12 17:44), Sandeepa Prabhu wrote:
>> On 12 November 2013 12:57, Masami Hiramatsu
>> <masami.hiramatsu.pt@hitachi.com> wrote:
>>> (2013/11/12 15:23), Sandeepa Prabhu wrote:
>>>>>>> OK, I've ensured that the hw_breakpoint (from perf) can work
>>>>>>> with kprobes (from ftrace) at the same address on x86.
>>>>>>> So if arm64 already support hw_breakpoint on perf, kprobes should
>>>>>>> work with it.
>>>>>>
>>>>>> Single-stepping on x86 is different to the step behaviour on arm64 afaik. On
>>>>>> ARM, we have to manually remove the breakpoint, perform a single-step, then
>>>>>> add the breakpoint again. If we re-enable debug exceptions in the kprobe
>>>>>> handler, the step will complete early and we'll never step off the
>>>>>> breakpoint.
>>>>>
>>>>> I'm unsure about arm64's debug feature behavior, what does happen when
>>>>> it performs a single-step on sw-breakpoint?
>>>>>
>>>>>> Sandeepa: I think you need to retry Masami's test on the arm64 model, since
>>>>>> I'm fairly sure it won't work as expected without some additional code.
>>>>>
>>>>> OK, anyway, for testing same one, we need to port ftrace first. So the next
>>>
>>> Sorry for confusion, s/next/fallback is what I meant. Making a kprobe module
>>> can be done without ftrace port.
>> Yes, got it, all my verification until now are done using sample
>> modules only,  looking out for perf (or some other mechanism: ptrace?)
>> that uses v8 hw breakpoint.
>
> Yes, kprobe vs. perf and uprobe vs. ptrace :)
>
>
>>>>> plan is to make a kprobe module to put a probe (which just printk something)
>>>>> on a specific function (e.g. vfs_symlink), and run perf record with
>>>>> hw-breakpoint as below
>>>>>
>>>>> $ perf record -e "mem:0xXXXXXX:k" ln -s /dev/null /tmp/foo
>>>>>
>>>>> Note that 0xXXXXXX is the address of vfs_symlink.
>>>>>
>>>>> After that, you can see the message in dmesg and also check the perf result
>>>>> with "sudo perf script --dump" (you can find a PERF_RECORD_SAMPLE entry if
>>>>> it works)
>>>> Thanks for steps, ARM64 ftrace patches are under review on arm mailing
>>>> list, I can contact the (linaro) developer implementing ftrace on
>>>> what's supported and then figure-out a way to test this concurrency of
>>>> kprobes breakpoint and hardware breakpoint.
>>>
>>> Would you mean this? :)
>>> http://www.spinics.net/lists/arm-kernel/msg278477.html
>>>
>>> Wow, it seems that this also has some works around instruction
>>> manipulation (and confusable filenames...)
>> I referred to: http://lwn.net/Articles/572323/  which is another
>> implementation and on LAKML
>
> OK, I'll check that (and looks good at a glance).
> By the way, I concern about Linaro guys who looks working a bit far
> from the LKML and original feature maintainers. Please contact them,
> I'm sure they don't bite your hand :)
Hmm sure, will convey to our developers/leads :-)

>
> BTW, I'm currently trying a general housecleaning of __kprobes
> annotations. It may also have impact on your patch.
> https://lkml.org/lkml/2013/11/8/187
Hmm, we can help testing your patchset on arm64 platforms. Also have
many doubts on the changes you are working [blacklisting probes etc]

Basically I had tried placing kprobe on memcpy() and the model hung
(insmod never returned back!). Fast-model I have does not have option
of any debug so no clue what happened!.
memcpy() is low-level call being used internally within kprobes, so
probably we cannot handle probe on that routine, but then how to make
sure all such API are rejected by kprobe sub-system ?

~Sandeepa
>
> Thank you,
>
> --
> Masami HIRAMATSU
> IT Management Research Dept. Linux Technology Center
> Hitachi, Ltd., Yokohama Research Laboratory
> E-mail: masami.hiramatsu.pt@hitachi.com
>
>
Masami Hiramatsu Nov. 12, 2013, 2:11 p.m. UTC | #16
(2013/11/12 19:55), Sandeepa Prabhu wrote:
>>>>> Thanks for steps, ARM64 ftrace patches are under review on arm mailing
>>>>> list, I can contact the (linaro) developer implementing ftrace on
>>>>> what's supported and then figure-out a way to test this concurrency of
>>>>> kprobes breakpoint and hardware breakpoint.
>>>>
>>>> Would you mean this? :)
>>>> http://www.spinics.net/lists/arm-kernel/msg278477.html
>>>>
>>>> Wow, it seems that this also has some works around instruction
>>>> manipulation (and confusable filenames...)
>>> I referred to: http://lwn.net/Articles/572323/  which is another
>>> implementation and on LAKML
>>
>> OK, I'll check that (and looks good at a glance).
>> By the way, I concern about Linaro guys who looks working a bit far
>> from the LKML and original feature maintainers. Please contact them,
>> I'm sure they don't bite your hand :)
> Hmm sure, will convey to our developers/leads :-)

Nice :)

>> BTW, I'm currently trying a general housecleaning of __kprobes
>> annotations. It may also have impact on your patch.
>> https://lkml.org/lkml/2013/11/8/187
> Hmm, we can help testing your patchset on arm64 platforms. Also have
> many doubts on the changes you are working [blacklisting probes etc]
> 
> Basically I had tried placing kprobe on memcpy() and the model hung
> (insmod never returned back!). Fast-model I have does not have option
> of any debug so no clue what happened!.

On x86, I can probe memcpy() safely. It depends on the kprobes (and
breakpoint handling) implementation, and it could be found.

> memcpy() is low-level call being used internally within kprobes, so
> probably we cannot handle probe on that routine, but then how to make
> sure all such API are rejected by kprobe sub-system ?

I see, the blacklist still needs to be maintained. I periodically
run a test for probing each function on my kernel, and if I found
such problem, I added it on the blacklist.
Currently I run the test only on x86, so perhaps, other arch does
not have well tested yet.

Thank you,
Steven Rostedt Nov. 12, 2013, 4:59 p.m. UTC | #17
On Tue, 12 Nov 2013 16:25:26 +0530
Sandeepa Prabhu <sandeepa.prabhu@linaro.org> wrote:


> >
> > BTW, I'm currently trying a general housecleaning of __kprobes
> > annotations. It may also have impact on your patch.
> > https://lkml.org/lkml/2013/11/8/187
> Hmm, we can help testing your patchset on arm64 platforms. Also have
> many doubts on the changes you are working [blacklisting probes etc]
> 
> Basically I had tried placing kprobe on memcpy() and the model hung
> (insmod never returned back!). Fast-model I have does not have option
> of any debug so no clue what happened!.
> memcpy() is low-level call being used internally within kprobes, so
> probably we cannot handle probe on that routine, but then how to make
> sure all such API are rejected by kprobe sub-system ?

Working on ports of ftrace, I found that many of the functions in lib/
are used by several locations that just can't be traced, due to how
low level they are. I just simply blacklisted the entire lib/
directory (See the top of lib/Makefile)

I wonder if there's an easy way to blacklist entire directories from
being used by kprobes too. Or at least do it by a file per file basis.

-- Steve
Sandeepa Prabhu Nov. 13, 2013, 7:08 a.m. UTC | #18
On 13 November 2013 12:25, Sandeepa Prabhu <sandeepa.prabhu@linaro.org> wrote:
>>>> I'm unsure about arm64's debug feature behavior, what does happen when
>>>> it performs a single-step on sw-breakpoint?
>>>>
>>>>> Sandeepa: I think you need to retry Masami's test on the arm64 model, since
>>>>> I'm fairly sure it won't work as expected without some additional code.
>>>>
>>>> OK, anyway, for testing same one, we need to port ftrace first. So the next
>>
>> Sorry for confusion, s/next/fallback is what I meant. Making a kprobe module
>> can be done without ftrace port.
>>
>>>> plan is to make a kprobe module to put a probe (which just printk something)
>>>> on a specific function (e.g. vfs_symlink), and run perf record with
>>>> hw-breakpoint as below
>>>>
>>>> $ perf record -e "mem:0xXXXXXX:k" ln -s /dev/null /tmp/foo
>>>>
>>>> Note that 0xXXXXXX is the address of vfs_symlink.
>>>>
>>>> After that, you can see the message in dmesg and also check the perf result
>>>> with "sudo perf script --dump" (you can find a PERF_RECORD_SAMPLE entry if
>>>> it works)
Hi Will, Masami,

I am not sure of 'perf' right now (my minimal rootfs doesn't have) but
I tried to test hardware breakpoints using sample modules
"samples/hw_breakpoint/" on arm64 upstream branch. This should use
same kernel api as perf I believe.

1.  Placing watchpoint ( attr.bp_type = HW_BREAKPOINT_W |
HW_BREAKPOINT_R) upon vfs_symlink symbol, but seems watch-point is not
triggering at all.
2.  Placing text breakpoint (modified sample module with attr.bp_type
= HW_BREAKPOINT_X) upon vfs_symlink, and run "ln -s /dev/null
/tmp/foo".  This time, breakpoint hit but exception is re-cursing
infinitely!

I have attached the kernel logs for reference. So wanted to check if
hw breakpoint/watch-points are working on the upstream branch? Has it
been tested recently with sample modules  or perf/ptrace?

Thanks,
Sandeepa
Peter Zijlstra Nov. 13, 2013, 1:58 p.m. UTC | #19
On Mon, Nov 11, 2013 at 10:58:12AM +0000, Will Deacon wrote:
> Single-stepping on x86 is different to the step behaviour on arm64 afaik. On
> ARM, we have to manually remove the breakpoint, perform a single-step, then
> add the breakpoint again. If we re-enable debug exceptions in the kprobe
> handler, the step will complete early and we'll never step off the
> breakpoint.

This is about hardware breakpoints right? Which are per-cpu? Otherwise
removing the breakpoint would open up a hole for another thread to slip
through while you're single stepping.
Masami Hiramatsu Nov. 13, 2013, 2:07 p.m. UTC | #20
(2013/11/13 15:55), Sandeepa Prabhu wrote:
>>>> I'm unsure about arm64's debug feature behavior, what does happen when
>>>> it performs a single-step on sw-breakpoint?
>>>>
>>>>> Sandeepa: I think you need to retry Masami's test on the arm64 model, since
>>>>> I'm fairly sure it won't work as expected without some additional code.
>>>>
>>>> OK, anyway, for testing same one, we need to port ftrace first. So the next
>>
>> Sorry for confusion, s/next/fallback is what I meant. Making a kprobe module
>> can be done without ftrace port.
>>
>>>> plan is to make a kprobe module to put a probe (which just printk something)
>>>> on a specific function (e.g. vfs_symlink), and run perf record with
>>>> hw-breakpoint as below
>>>>
>>>> $ perf record -e "mem:0xXXXXXX:k" ln -s /dev/null /tmp/foo
>>>>
>>>> Note that 0xXXXXXX is the address of vfs_symlink.
>>>>
>>>> After that, you can see the message in dmesg and also check the perf result
>>>> with "sudo perf script --dump" (you can find a PERF_RECORD_SAMPLE entry if
>>>> it works)
> Hi Will, Masami,
> 
> I am not sure of 'perf' right now (my minimal rootfs doesn't have) but
> I tried to test hardware breakpoints using sample modules
> "samples/hw_breakpoint/" on arm64 upstream branch. This should use
> same kernel api as perf I believe.
> 
> 1.  Placing watchpoint ( attr.bp_type = HW_BREAKPOINT_W |
> HW_BREAKPOINT_R) upon vfs_symlink symbol, but seems watch-point is not
> triggering at all.
> 2.  Placing text breakpoint (modified sample module with attr.bp_type
> = HW_BREAKPOINT_X) upon vfs_symlink, and run "ln -s /dev/null
> /tmp/foo".  This time, breakpoint hit but exception is re-cursing
> infinitely!

Did you this without kprobes? If so, the hw_breakpoint porting
on arm64 may have a bug.

> I have attached the kernel logs for reference. So wanted to check if
> hw breakpoint/watch-points are working on the upstream branch? Has it
> been tested recently with sample modules  or perf/ptrace?

I've tested on x86 with the latest tip-tree kernel and it worked.

Thank you,
Will Deacon Nov. 13, 2013, 2:20 p.m. UTC | #21
On Wed, Nov 13, 2013 at 01:58:07PM +0000, Peter Zijlstra wrote:
> On Mon, Nov 11, 2013 at 10:58:12AM +0000, Will Deacon wrote:
> > Single-stepping on x86 is different to the step behaviour on arm64 afaik. On
> > ARM, we have to manually remove the breakpoint, perform a single-step, then
> > add the breakpoint again. If we re-enable debug exceptions in the kprobe
> > handler, the step will complete early and we'll never step off the
> > breakpoint.
> 
> This is about hardware breakpoints right? Which are per-cpu? Otherwise
> removing the breakpoint would open up a hole for another thread to slip
> through while you're single stepping.

Correct, but our hardware breakpoints don't have any resume feature, so they
have to be disabled on the relevant CPU, then stepped on that same CPU
before they can be reactivated.

Will
Will Deacon Nov. 13, 2013, 2:31 p.m. UTC | #22
On Wed, Nov 13, 2013 at 06:55:33AM +0000, Sandeepa Prabhu wrote:
> >>> I'm unsure about arm64's debug feature behavior, what does happen when
> >>> it performs a single-step on sw-breakpoint?
> >>>
> >>>> Sandeepa: I think you need to retry Masami's test on the arm64 model, since
> >>>> I'm fairly sure it won't work as expected without some additional code.
> >>>
> >>> OK, anyway, for testing same one, we need to port ftrace first. So the next
> >
> > Sorry for confusion, s/next/fallback is what I meant. Making a kprobe module
> > can be done without ftrace port.
> >
> >>> plan is to make a kprobe module to put a probe (which just printk something)
> >>> on a specific function (e.g. vfs_symlink), and run perf record with
> >>> hw-breakpoint as below
> >>>
> >>> $ perf record -e "mem:0xXXXXXX:k" ln -s /dev/null /tmp/foo
> >>>
> >>> Note that 0xXXXXXX is the address of vfs_symlink.
> >>>
> >>> After that, you can see the message in dmesg and also check the perf result
> >>> with "sudo perf script --dump" (you can find a PERF_RECORD_SAMPLE entry if
> >>> it works)
> Hi Will, Masami,
> 
> I am not sure of 'perf' right now (my minimal rootfs doesn't have) but
> I tried to test hardware breakpoints using sample modules
> "samples/hw_breakpoint/" on arm64 upstream branch. This should use
> same kernel api as perf I believe.
> 
> 1.  Placing watchpoint ( attr.bp_type = HW_BREAKPOINT_W |
> HW_BREAKPOINT_R) upon vfs_symlink symbol, but seems watch-point is not
> triggering at all.

vfs_symlink is a function. Why would you expect to write it?

> 2.  Placing text breakpoint (modified sample module with attr.bp_type
> = HW_BREAKPOINT_X) upon vfs_symlink, and run "ln -s /dev/null
> /tmp/foo".  This time, breakpoint hit but exception is re-cursing
> infinitely!

The problem here is that we expect the overflow handler to deal with the
stepping (like GDB does via ptrace). If you don't register a handler, the
kernel will do the step (like you would get if you used perf stat -e
mem:0xNNNN:x).

Will
Sandeepa Prabhu Nov. 13, 2013, 3:55 p.m. UTC | #23
On 13 November 2013 20:01, Will Deacon <will.deacon@arm.com> wrote:
> On Wed, Nov 13, 2013 at 06:55:33AM +0000, Sandeepa Prabhu wrote:
>> >>> I'm unsure about arm64's debug feature behavior, what does happen when
>> >>> it performs a single-step on sw-breakpoint?
>> >>>
>> >>>> Sandeepa: I think you need to retry Masami's test on the arm64 model, since
>> >>>> I'm fairly sure it won't work as expected without some additional code.
>> >>>
>> >>> OK, anyway, for testing same one, we need to port ftrace first. So the next
>> >
>> > Sorry for confusion, s/next/fallback is what I meant. Making a kprobe module
>> > can be done without ftrace port.
>> >
>> >>> plan is to make a kprobe module to put a probe (which just printk something)
>> >>> on a specific function (e.g. vfs_symlink), and run perf record with
>> >>> hw-breakpoint as below
>> >>>
>> >>> $ perf record -e "mem:0xXXXXXX:k" ln -s /dev/null /tmp/foo
>> >>>
>> >>> Note that 0xXXXXXX is the address of vfs_symlink.
>> >>>
>> >>> After that, you can see the message in dmesg and also check the perf result
>> >>> with "sudo perf script --dump" (you can find a PERF_RECORD_SAMPLE entry if
>> >>> it works)
>> Hi Will, Masami,
>>
>> I am not sure of 'perf' right now (my minimal rootfs doesn't have) but
>> I tried to test hardware breakpoints using sample modules
>> "samples/hw_breakpoint/" on arm64 upstream branch. This should use
>> same kernel api as perf I believe.
>>
>> 1.  Placing watchpoint ( attr.bp_type = HW_BREAKPOINT_W |
>> HW_BREAKPOINT_R) upon vfs_symlink symbol, but seems watch-point is not
>> triggering at all.
>
> vfs_symlink is a function. Why would you expect to write it?
This is generic test module (samples/hw_breakpoint/data_breakpoint.ko)
which places watchpoint for bothe read/write.
Atleast watchpt should have triggered for Read right? I also tried
with othe functions like do_fork, vfs_read etc but no hit.

>
>> 2.  Placing text breakpoint (modified sample module with attr.bp_type
>> = HW_BREAKPOINT_X) upon vfs_symlink, and run "ln -s /dev/null
>> /tmp/foo".  This time, breakpoint hit but exception is re-cursing
>> infinitely!
>
> The problem here is that we expect the overflow handler to deal with the
> stepping (like GDB does via ptrace). If you don't register a handler, the
> kernel will do the step (like you would get if you used perf stat -e
> mem:0xNNNN:x).
[This test was done on upstream branch, without kprobes patches.]
Hmm, then this is expected with test breakpoint right? is this
handling to be done by perf and ptrace?
I did not see arm64 support in linux/tools/perf/,  there are multiple
patches in mailing list though. Are you aware of any version of perf
that work with arm64?

~Sandeepa
>
> Will
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
Masami Hiramatsu Nov. 13, 2013, 4:05 p.m. UTC | #24
(2013/11/13 1:59), Steven Rostedt wrote:
> On Tue, 12 Nov 2013 16:25:26 +0530
> Sandeepa Prabhu <sandeepa.prabhu@linaro.org> wrote:
> 
> 
>>>
>>> BTW, I'm currently trying a general housecleaning of __kprobes
>>> annotations. It may also have impact on your patch.
>>> https://lkml.org/lkml/2013/11/8/187
>> Hmm, we can help testing your patchset on arm64 platforms. Also have
>> many doubts on the changes you are working [blacklisting probes etc]
>>
>> Basically I had tried placing kprobe on memcpy() and the model hung
>> (insmod never returned back!). Fast-model I have does not have option
>> of any debug so no clue what happened!.
>> memcpy() is low-level call being used internally within kprobes, so
>> probably we cannot handle probe on that routine, but then how to make
>> sure all such API are rejected by kprobe sub-system ?
> 
> Working on ports of ftrace, I found that many of the functions in lib/
> are used by several locations that just can't be traced, due to how
> low level they are. I just simply blacklisted the entire lib/
> directory (See the top of lib/Makefile)
> 
> I wonder if there's an easy way to blacklist entire directories from
> being used by kprobes too. Or at least do it by a file per file basis.

Hm, perhaps we can do some magic in post-build script as kallsyms does.
1) make an object file
2) extract symbols from the file
3) put the symbols into data section as a list of strings
4) analyze the list at boot (init) time by using kallsyms
how about this? :)

Thank you,
Will Deacon Nov. 15, 2013, 4:37 p.m. UTC | #25
On Tue, Nov 12, 2013 at 06:52:51AM +0000, Sandeepa Prabhu wrote:
> On 11 November 2013 16:51, Will Deacon <will.deacon@arm.com> wrote:
> > On Mon, Nov 11, 2013 at 05:35:37AM +0000, Sandeepa Prabhu wrote:
> >> On 8 November 2013 22:26, Will Deacon <will.deacon@arm.com> wrote:
> >> >> +#define MAX_INSN_SIZE                  2
> >> >
> >> > Why is this 2?
> >> Second entry is to hold NOP instruction, absence of it cause abort
> >> while instruction decode.
> >
> > Hmm, can you elaborate please? I'm not sure why you should get an abort
> > decoding kernel addresses.
> well, kprobes does not step from kernel address, but it prepares a
> allocated memory(executable),  copies the instruction and update the
> single step address (ELR) to enable stepping while ERET.
> So, don't we need NOP at next location after the instruction because
> next instruction will be in decode stage and might throw "undefined
> instruction" error?

You can't take speculative prefetch aborts like that, so unless you actually
go and *execute* garbage, you don't need that NOP. From the sounds of it, it's
not required, as long as you handle the step exception correctly.

> >> > NAK. Unmasking debug exceptions from within a debug exception is not safe.
> >> > I'd much rather we returned from handling this exception, then took whatever
> >> > other pending exception there was.
> >> well, kprobes needs recursive breakpoints to be handled, and I am not
> >> sure if this can be achieved other way than unmasking D-flag for a
> >> shorter duration where we can expect re-entry (I would check if this
> >> can be done without re-cursing)
> >> I want to understand why unmasking D-flag is unsafe here, kprobes make
> >> sure that recursion depth is only 2 (i.e. does not generate 3rd
> >> Breakpoint trap) and interrupts are kept masked while recursion/single
> >> stepping. Is it unsafe only if conflict with hardware breakpoint on
> >> same CPU?
> >
> > Is this recursion only to support setting kprobes on the kprobe
> > implementation? The problem is that the rest of the debug infrastructure is
> > not set up to deal with recursive exceptions, so allowing them can break
> > state machines maintained by code like hw_breakpoint.
> No, upon one kprobe hit for an address, the subsystem can call the
> user-defined handlers (pre- and -post) which can call same function
> again. Example, if we place kprobe on "printk" entry, and registered
> handler can invoke printk to print more info.

Hang on, I think I'm missing something here. If you run into a recursive
probe, you'll simply hit another BRK instruction, right? That should be
fine, since PSTATE.D doesn't mask software breakpoint exceptions. The
tricky part comes when you try to step over that guy, but you might be ok
if you clear PSTATE.D *only* while you step your single instruction that you
copied out to the buffer.

What do you think? I'd really like you to try testing something like:

  1. Place a hardware breakpoint in the kernel
  2. Place a kprobe on the same address
  3. Place a kprobe somewhere in the pre- hook for the kprobe placed in (2)

then check that (a) we manage to get through that lot without locking up and
(b) each probe/breakpoint is hit exactly once.

> This will make kprobe to trigger again and re-enter, so the kprobe
> subsystem need to handle the 2nd instance first, and then return back
> to previous execution. D-flag is enabled only the duration when the
> pre- and post- handler are called, so they they can recurse and handle
> single stepping, after that, D-flag is kept disabled.   I am yet to
> test the concurrency with hw_breakpoint, would update once I run these
> tests.

If you really want to support this, you need to do more than just clear the
D flag. Not only do you need to deal with hardware breakpoints, but also
things like scheduling... Assuming that the user-defined handlers can block,
then you run the risk of context-switching with the D-flag set, which
introduces a significant black-out period to kernel debugging. There are
also issues like returning to userspace with MDSCR_EL1.SS set because of a
context switch triggered by the pre- handler, resulting in a single-step
exception from userspace.

I reckon what I suggested above might work, but I'd like your input.

Will
Will Deacon Nov. 15, 2013, 4:39 p.m. UTC | #26
On Wed, Nov 13, 2013 at 03:55:42PM +0000, Sandeepa Prabhu wrote:
> On 13 November 2013 20:01, Will Deacon <will.deacon@arm.com> wrote:
> > On Wed, Nov 13, 2013 at 06:55:33AM +0000, Sandeepa Prabhu wrote:
> >> 1.  Placing watchpoint ( attr.bp_type = HW_BREAKPOINT_W |
> >> HW_BREAKPOINT_R) upon vfs_symlink symbol, but seems watch-point is not
> >> triggering at all.
> >
> > vfs_symlink is a function. Why would you expect to write it?
> This is generic test module (samples/hw_breakpoint/data_breakpoint.ko)
> which places watchpoint for bothe read/write.
> Atleast watchpt should have triggered for Read right? I also tried
> with othe functions like do_fork, vfs_read etc but no hit.

You'd need to place something for exec if you want to see anything on the
instruction side. A read by the instruction fetcher does not trigger a read
watchpoint on ARM.

> >> 2.  Placing text breakpoint (modified sample module with attr.bp_type
> >> = HW_BREAKPOINT_X) upon vfs_symlink, and run "ln -s /dev/null
> >> /tmp/foo".  This time, breakpoint hit but exception is re-cursing
> >> infinitely!
> >
> > The problem here is that we expect the overflow handler to deal with the
> > stepping (like GDB does via ptrace). If you don't register a handler, the
> > kernel will do the step (like you would get if you used perf stat -e
> > mem:0xNNNN:x).
> [This test was done on upstream branch, without kprobes patches.]
> Hmm, then this is expected with test breakpoint right? is this
> handling to be done by perf and ptrace?

perf stat doesn't register an overflow handler, so the hw_breakpoint
backend will handle the step. ptrace registers a handler which sends a
SIGTRAP to the debugger (e.g. gdb), which handles the step manually
(probably using a PTRACE_SINGLESTEP request).

> I did not see arm64 support in linux/tools/perf/,  there are multiple
> patches in mailing list though. Are you aware of any version of perf
> that work with arm64?

The perf tool should work fine on arm64 using mainline. Are you seeing
problems?

Will
Sandeepa Prabhu Nov. 18, 2013, 6:43 a.m. UTC | #27
On 15 November 2013 22:07, Will Deacon <will.deacon@arm.com> wrote:

>> well, kprobes does not step from kernel address, but it prepares a
>> allocated memory(executable),  copies the instruction and update the
>> single step address (ELR) to enable stepping while ERET.
>> So, don't we need NOP at next location after the instruction because
>> next instruction will be in decode stage and might throw "undefined
>> instruction" error?
>
> You can't take speculative prefetch aborts like that, so unless you actually
> go and *execute* garbage, you don't need that NOP. From the sounds of it, it's
> not required, as long as you handle the step exception correctly.
Ok, the stepping is for exactly one instruction, after trapping again
stepping is disabled.
so clear that NOP is not necessary. I will remove NOP in next version.

>> > Is this recursion only to support setting kprobes on the kprobe
>> > implementation? The problem is that the rest of the debug infrastructure is
>> > not set up to deal with recursive exceptions, so allowing them can break
>> > state machines maintained by code like hw_breakpoint.
>> No, upon one kprobe hit for an address, the subsystem can call the
>> user-defined handlers (pre- and -post) which can call same function
>> again. Example, if we place kprobe on "printk" entry, and registered
>> handler can invoke printk to print more info.
>
> Hang on, I think I'm missing something here. If you run into a recursive
> probe, you'll simply hit another BRK instruction, right? That should be
> fine, since PSTATE.D doesn't mask software breakpoint exceptions. The
> tricky part comes when you try to step over that guy, but you might be ok
> if you clear PSTATE.D *only* while you step your single instruction that you
> copied out to the buffer.
Yes exactly, D-flag was enabled so that single step can run on BRK
handler code! exactly once. So I was unmasking D-flag before
taking(expecting) a BRK so that saved spsr_el1 in pt_regs have it
stored and restore when return with single stepping!
I agree with your suggestion to unmask D-flag just before ERET for
stepping, makes more sense, is it as simple as updating pt_regs
(regs->pstate)?

>
> What do you think? I'd really like you to try testing something like:
>
>   1. Place a hardware breakpoint in the kernel
>   2. Place a kprobe on the same address
>   3. Place a kprobe somewhere in the pre- hook for the kprobe placed in (2)
>
> then check that (a) we manage to get through that lot without locking up and
> (b) each probe/breakpoint is hit exactly once.
Ok, will do, this can be easily done with kprobing "printk" with pre-
handler invoking printk, and then placing a hw_breakpoint.  (also we
need to check for kprobe-watchpoint concurrency right?)
I would update with results.

>
>> This will make kprobe to trigger again and re-enter, so the kprobe
>> subsystem need to handle the 2nd instance first, and then return back
>> to previous execution. D-flag is enabled only the duration when the
>> pre- and post- handler are called, so they they can recurse and handle
>> single stepping, after that, D-flag is kept disabled.   I am yet to
>> test the concurrency with hw_breakpoint, would update once I run these
>> tests.
>
> If you really want to support this, you need to do more than just clear the
> D flag. Not only do you need to deal with hardware breakpoints, but also
> things like scheduling... Assuming that the user-defined handlers can block,
> then you run the risk of context-switching with the D-flag set, which
> introduces a significant black-out period to kernel debugging. There are
> also issues like returning to userspace with MDSCR_EL1.SS set because of a
> context switch triggered by the pre- handler, resulting in a single-step
> exception from userspace.
kprobe user-defined handlers should not block no blocking calls) and
this is one of the requirements of kprobes, so we cannot manage
context-switching or interrupts until the kprobe trap is completely
handled. So, we basically disable local interrupts throughout
exception handler.

Masami,
kprobe framework cannot check if the  supplied user-handlers can block
right? and blocking call would cause undesired sequences, how to
handle this? (recommendations/policies?)

>
> I reckon what I suggested above might work, but I'd like your input.
Sure, would come back with more clear picture after running all tests
and making the changes as per your suggestion.

Thanks for reviewing this in detail,
-Sandeepa
>
> Will
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
Sandeepa Prabhu Nov. 18, 2013, 6:55 a.m. UTC | #28
>> This is generic test module (samples/hw_breakpoint/data_breakpoint.ko)
>> which places watchpoint for bothe read/write.
>> Atleast watchpt should have triggered for Read right? I also tried
>> with othe functions like do_fork, vfs_read etc but no hit.
>
> You'd need to place something for exec if you want to see anything on the
> instruction side. A read by the instruction fetcher does not trigger a read
> watchpoint on ARM.
Hmm, then watchpoint cannot not be placed on text address (instruction
fetch path) right?
(Sorry I did not check debug spec fully for watchpoint/hw breakpoints,
I should do that)

>
>> >> 2.  Placing text breakpoint (modified sample module with attr.bp_type
>> >> = HW_BREAKPOINT_X) upon vfs_symlink, and run "ln -s /dev/null
>> >> /tmp/foo".  This time, breakpoint hit but exception is re-cursing
>> >> infinitely!
>> >
>> > The problem here is that we expect the overflow handler to deal with the
>> > stepping (like GDB does via ptrace). If you don't register a handler, the
>> > kernel will do the step (like you would get if you used perf stat -e
>> > mem:0xNNNN:x).
>> [This test was done on upstream branch, without kprobes patches.]
>> Hmm, then this is expected with test breakpoint right? is this
>> handling to be done by perf and ptrace?
>
> perf stat doesn't register an overflow handler, so the hw_breakpoint
> backend will handle the step. ptrace registers a handler which sends a
> SIGTRAP to the debugger (e.g. gdb), which handles the step manually
> (probably using a PTRACE_SINGLESTEP request).
>
>> I did not see arm64 support in linux/tools/perf/,  there are multiple
>> patches in mailing list though. Are you aware of any version of perf
>> that work with arm64?
>
> The perf tool should work fine on arm64 using mainline. Are you seeing
> problems?
Hmm basically perf is working and I can run tests now.
>
> Will
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
Sandeepa Prabhu Nov. 18, 2013, 8:51 a.m. UTC | #29
On 18 November 2013 12:25, Sandeepa Prabhu <sandeepa.prabhu@linaro.org> wrote:
>>> This is generic test module (samples/hw_breakpoint/data_breakpoint.ko)
>>> which places watchpoint for bothe read/write.
>>> Atleast watchpt should have triggered for Read right? I also tried
>>> with othe functions like do_fork, vfs_read etc but no hit.
>>
>> You'd need to place something for exec if you want to see anything on the
>> instruction side. A read by the instruction fetcher does not trigger a read
>> watchpoint on ARM.
> Hmm, then watchpoint cannot not be placed on text address (instruction
> fetch path) right?
> (Sorry I did not check debug spec fully for watchpoint/hw breakpoints,
> I should do that)
>
>>
>>> >> 2.  Placing text breakpoint (modified sample module with attr.bp_type
>>> >> = HW_BREAKPOINT_X) upon vfs_symlink, and run "ln -s /dev/null
>>> >> /tmp/foo".  This time, breakpoint hit but exception is re-cursing
>>> >> infinitely!
>>> >
>>> > The problem here is that we expect the overflow handler to deal with the
>>> > stepping (like GDB does via ptrace). If you don't register a handler, the
>>> > kernel will do the step (like you would get if you used perf stat -e
>>> > mem:0xNNNN:x).
>>> [This test was done on upstream branch, without kprobes patches.]
>>> Hmm, then this is expected with test breakpoint right? is this
>>> handling to be done by perf and ptrace?
>>
>> perf stat doesn't register an overflow handler, so the hw_breakpoint
>> backend will handle the step. ptrace registers a handler which sends a
>> SIGTRAP to the debugger (e.g. gdb), which handles the step manually
>> (probably using a PTRACE_SINGLESTEP request).
>>
>>> I did not see arm64 support in linux/tools/perf/,  there are multiple
>>> patches in mailing list though. Are you aware of any version of perf
>>> that work with arm64?
>>
>> The perf tool should work fine on arm64 using mainline. Are you seeing
>> problems?
> Hmm basically perf is working and I can run tests now.
Hi Will,
Okay. I can see what you meant with respect to D-flag unmasking, on
original patchset(v2). :)

a. Placed a kprobe on vfs_read - works fine independently.
b. Placed hw_breakpoint on vfs_read using perf (perf record -e
mem:0xffffffc000134ba4:x -a -- sleep 10) and it works fine
independently.

c. Now, a+b, first placed kprobe on vfs_read and then ran perf record
event on vfs_read (hw breakpoint). Now, seeing that kprobe single step
is never complete/never disabled!, so debug exception would generate
forever! (Continuously printing "Unexpected kernel single-step
exception at EL1")

 kprobe pre_handler: p->addr = 0xffffffc000134ba4
 reenter_dbg: test API invoked
 kprobe post_handler: p->addr = 0xffffffc000134ba4
 fmp/fo^[[5Dreenter_dbg: test API invoked
 kprobe pre_handler: p->addr = 0xffffffc000134ba4
 Unexpected kernel single-step exception at EL1
 Unexpected kernel single-step exception at EL1
....

Once I change the location of D-flag manipulation (your suggestion)
and run enough tests, I would come back with more details and inputs.

Thanks,
Sandeepa

>>
>> Will
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
diff mbox

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index da388e4..2e89059 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -27,6 +27,7 @@  config ARM64
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_MEMBLOCK
 	select HAVE_PERF_EVENTS
+	select HAVE_KPROBES if !XIP_KERNEL
 	select IRQ_DOMAIN
 	select MODULES_USE_ELF_RELA
 	select NO_BOOTMEM
diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
new file mode 100644
index 0000000..9b491d0
--- /dev/null
+++ b/arch/arm64/include/asm/kprobes.h
@@ -0,0 +1,59 @@ 
+/*
+ * arch/arm64/include/asm/kprobes.h
+ *
+ * Copyright (C) 2013 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _ARM_KPROBES_H
+#define _ARM_KPROBES_H
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE			2
+#define MAX_STACK_SIZE			128
+
+#define flush_insn_slot(p)		do { } while (0)
+#define kretprobe_blacklist_size	0
+
+#include <asm/probes.h>
+
+struct prev_kprobe {
+	struct kprobe *kp;
+	unsigned int status;
+};
+
+/* Single step context for kprobe */
+struct kprobe_step_ctx {
+#define KPROBES_STEP_NONE	0x0
+#define KPROBES_STEP_PENDING	0x1
+	unsigned long ss_status;
+	unsigned long match_addr;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+	unsigned int kprobe_status;
+	struct prev_kprobe prev_kprobe;
+	struct kprobe_step_ctx ss_ctx;
+	struct pt_regs jprobe_saved_regs;
+	char jprobes_stack[MAX_STACK_SIZE];
+};
+
+void arch_remove_kprobe(struct kprobe *);
+int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr);
+int kprobe_exceptions_notify(struct notifier_block *self,
+			     unsigned long val, void *data);
+
+#endif /* _ARM_KPROBES_H */
diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h
new file mode 100644
index 0000000..c9c7734
--- /dev/null
+++ b/arch/arm64/include/asm/probes.h
@@ -0,0 +1,50 @@ 
+/*
+ * arch/arm64/include/asm/probes.h
+ *
+ * Copyright (C) 2013 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef _ARM_PROBES_H
+#define _ARM_PROBES_H
+
+struct kprobe;
+struct arch_specific_insn;
+
+typedef u32 kprobe_opcode_t;
+typedef unsigned long (kprobes_pstate_check_t)(unsigned long);
+typedef unsigned long
+(kprobes_condition_check_t)(struct kprobe *p, struct pt_regs *);
+typedef void
+(kprobes_prepare_t)(struct kprobe *, struct arch_specific_insn *);
+typedef void (kprobes_handler_t) (u32 opcode, long addr, struct pt_regs *);
+
+typedef enum {
+	NO_RESTORE,
+	RESTORE_PC,
+} pc_restore_t;
+
+struct kprobe_pc_restore {
+	pc_restore_t type;
+	unsigned long addr;
+};
+
+/* architecture specific copy of original instruction */
+struct arch_specific_insn {
+	kprobe_opcode_t *insn;
+	kprobes_pstate_check_t *pstate_cc;
+	kprobes_condition_check_t *check_condn;
+	kprobes_prepare_t *prepare;
+	kprobes_handler_t *handler;
+	/* restore address after step xol */
+	struct kprobe_pc_restore restore;
+};
+
+#endif
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 0dacbbf..89f1727 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -164,6 +164,7 @@  static inline int valid_user_regs(struct user_pt_regs *regs)
 }
 
 #define instruction_pointer(regs)	(regs)->pc
+#define stack_pointer(regs)		((regs)->sp)
 
 #ifdef CONFIG_SMP
 extern unsigned long profile_pc(struct pt_regs *regs);
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index b7db65e..12ef8d2 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -19,6 +19,7 @@  arm64-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o
 arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
 arm64-obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 arm64-obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
+arm64-obj-$(CONFIG_KPROBES)		+= kprobes.o kprobes-arm64.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/kprobes-arm64.c b/arch/arm64/kernel/kprobes-arm64.c
new file mode 100644
index 0000000..30d1c14
--- /dev/null
+++ b/arch/arm64/kernel/kprobes-arm64.c
@@ -0,0 +1,211 @@ 
+/*
+ * arch/arm64/kernel/kprobes-arm64.c
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <asm/kprobes.h>
+
+#include "probes-decode.h"
+#include "kprobes-arm64.h"
+
+/* AArch64 instruction decode table for kprobes:
+ * The instruction will fall into one of the 3 groups:
+ *  1. Single stepped out-of-the-line slot.
+ *     -Most instructions fall in this group, those does not
+ *      depend on PC address.
+ *
+ *  2. Should be simulated because of PC-relative/literal access.
+ *     -All branching and PC-relative insrtcutions are simulated
+ *      in C code, making use of saved pt_regs
+ *      Catch: SIMD/NEON register context are not saved while
+ *      entering debug exception, so are rejected for now.
+ *
+ *  3. Cannot be probed(not safe) so are rejected.
+ *     - Exception generation and exception return instructions
+ *     - Exclusive monitor(LDREX/STREX family)
+ *
+ */
+static const struct aarch64_decode_item aarch64_decode_table[] = {
+	/*
+	 * Data processing - PC relative(literal) addressing:
+	 * Encoding: xxx1 0000 xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_REJECT(0x10000000, 0x1F000000),
+
+	/*
+	 * Data processing - Add/Substract Immediate:
+	 * Encoding: xxx1 0001 xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_SINGLESTEP(0x11000000, 0x1F000000),
+
+	/*
+	 * Data processing
+	 * Encoding:
+	 *      xxx1 0010 0xxx xxxx xxxx xxxx xxxx xxxx (Logical)
+	 *      xxx1 0010 1xxx xxxx xxxx xxxx xxxx xxxx (Move wide)
+	 *      xxx1 0011 0xxx xxxx xxxx xxxx xxxx xxxx (Bitfield)
+	 *      xxx1 0011 1xxx xxxx xxxx xxxx xxxx xxxx (Extract)
+	 */
+	DECODE_SINGLESTEP(0x12000000, 0x1E000000),
+
+	/*
+	 * Data processing - SIMD/FP/AdvSIMD/Crypto-AES/SHA
+	 * Encoding: xxx0 111x xxxx xxxx xxxx xxxx xxxx xxxx
+	 * Encoding: xxx1 111x xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_SINGLESTEP(0x0E000000, 0x0E000000),
+
+	/*
+	 * Data processing - Register
+	 * Encoding: xxxx 101x xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_SINGLESTEP(0x0A000000, 0x0E000000),
+
+	/* Branching Instructions
+	 *
+	 * Encoding:
+	 *  x001 01xx xxxx xxxx xxxx xxxx xxxx xxxx (uncondtional Branch)
+	 *  x011 010x xxxx xxxx xxxx xxxx xxxx xxxx (compare & branch)
+	 *  x011 011x xxxx xxxx xxxx xxxx xxxx xxxx (Test & Branch)
+	 *  0101 010x xxxx xxxx xxxx xxxx xxxx xxxx (Conditional, immediate)
+	 *  1101 011x xxxx xxxx xxxx xxxx xxxx xxxx (Unconditional,register)
+	 */
+	DECODE_REJECT(0x14000000, 0x7C000000),
+	DECODE_REJECT(0x14000000, 0x7C000000),
+	DECODE_REJECT(0x34000000, 0x7E000000),
+	DECODE_REJECT(0x36000000, 0x7E000000),
+	DECODE_REJECT(0x54000000, 0xFE000000),
+	DECODE_REJECT(0xD6000000, 0xFE000000),
+
+	/* System insn:
+	 * Encoding: 1101 0101 00xx xxxx xxxx xxxx xxxx xxxx
+	 *
+	 * Note: MSR immediate (update PSTATE daif) is not safe handling
+	 * within kprobes, rejected.
+	 *
+	 * Don't re-arrange these decode table entries.
+	 */
+	DECODE_REJECT(0xD500401F, 0xFFF8F01F),
+	DECODE_SINGLESTEP(0xD5000000, 0xFFC00000),
+
+	/* Exception Generation:
+	 * Encoding:  1101 0100 xxxx xxxx xxxx xxxx xxxx xxxx
+	 * Instructions: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3
+	 */
+	DECODE_REJECT(0xD4000000, 0xFF000000),
+
+	/*
+	 * Load/Store - Exclusive monitor
+	 * Encoding: xx00 1000 xxxx xxxx xxxx xxxx xxxx xxxx
+	 *
+	 * Reject exlusive monitor'ed instructions
+	 */
+	DECODE_REJECT(0x08000000, 0x3F000000),
+
+	/*
+	 * Load/Store - PC relative(literal):
+	 * Encoding:  xx01 1x00 xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_REJECT(0x18000000, 0x3B000000),
+
+	/*
+	 * Load/Store - Register Pair
+	 * Encoding:
+	 *      xx10 1x00 0xxx xxxx xxxx xxxx xxxx xxxx
+	 *      xx10 1x00 1xxx xxxx xxxx xxxx xxxx xxxx
+	 *      xx10 1x01 0xxx xxxx xxxx xxxx xxxx xxxx
+	 *      xx10 1x01 1xxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_SINGLESTEP(0x28000000, 0x3A000000),
+
+	/*
+	 * Load/Store - Register
+	 * Encoding:
+	 *      xx11 1x00 xx0x xxxx xxxx 00xx xxxx xxxx (unscaled imm)
+	 *      xx11 1x00 xx0x xxxx xxxx 01xx xxxx xxxx (imm post-indexed)
+	 *      xx11 1x00 xx0x xxxx xxxx 10xx xxxx xxxx (unpriviledged)
+	 *      xx11 1x00 xx0x xxxx xxxx 11xx xxxx xxxx (imm pre-indexed)
+	 *
+	 *      xx11 1x00 xx10 xxxx xxxx xx10 xxxx xxxx (register offset)
+	 *
+	 *      xx11 1x01 xxxx xxxx xxxx xxxx xxxx xxxx (unsigned imm)
+	 */
+	DECODE_SINGLESTEP(0x38000000, 0x3B200000),
+	DECODE_SINGLESTEP(0x38200200, 0x38300300),
+	DECODE_SINGLESTEP(0x39000000, 0x3B000000),
+
+	/*
+	 * Load/Store - AdvSIMD
+	 * Encoding:
+	 *  0x00 1100 0x00 0000 xxxx xxxx xxxx xxxx (Multiple-structure)
+	 *  0x00 1100 1x0x xxxx xxxx xxxx xxxx xxxx (Multi-struct post-indexed)
+	 *  0x00 1101 0xx0 0000 xxxx xxxx xxxx xxxx (Single-structure))
+	 *  0x00 1101 1xxx xxxx xxxx xxxx xxxx xxxx (Single-struct post-index)
+	 */
+	DECODE_SINGLESTEP(0x0C000000, 0xBFBF0000),
+	DECODE_SINGLESTEP(0x0C800000, 0xBFA00000),
+	DECODE_SINGLESTEP(0x0D000000, 0xBF9F0000),
+	DECODE_SINGLESTEP(0x0D800000, 0xBF800000),
+
+	/* Unallocated:         xxx0 0xxx xxxx xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT(0x00000000, 0x18000000),
+	DECODE_END,
+};
+
+static int __kprobes
+kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi,
+		   const struct aarch64_decode_item *tbl)
+{
+	unsigned int entry, ret = INSN_REJECTED;
+
+	for (entry = 0; !decode_table_end(tbl[entry]); entry++) {
+		if (decode_table_hit(tbl[entry], insn))
+			break;
+	}
+
+	switch (decode_get_type(tbl[entry])) {
+	case DECODE_TYPE_END:
+	case DECODE_TYPE_REJECT:
+	default:
+		ret = INSN_REJECTED;
+		break;
+
+	case DECODE_TYPE_SINGLESTEP:
+		ret = INSN_GOOD;
+		break;
+
+	case DECODE_TYPE_SIMULATE:
+		ret = INSN_REJECTED;
+		break;
+
+	case DECODE_TYPE_TABLE:
+		/* recurse with next level decode table */
+		ret = kprobe_decode_insn(insn, asi,
+					 decode_sub_table(tbl[entry]));
+	};
+	return ret;
+}
+
+/* Return:
+ *   INSN_REJECTED     If instruction is one not allowed to kprobe,
+ *   INSN_GOOD         If instruction is supported and uses instruction slot,
+ *   INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
+ */
+enum kprobe_insn __kprobes
+arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+{
+	return kprobe_decode_insn(insn, asi, aarch64_decode_table);
+}
diff --git a/arch/arm64/kernel/kprobes-arm64.h b/arch/arm64/kernel/kprobes-arm64.h
new file mode 100644
index 0000000..87e7891
--- /dev/null
+++ b/arch/arm64/kernel/kprobes-arm64.h
@@ -0,0 +1,28 @@ 
+/*
+ * arch/arm64/kernel/kprobes-arm64.h
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _ARM_KERNEL_KPROBES_ARM64_H
+#define _ARM_KERNEL_KPROBES_ARM64_H
+
+enum kprobe_insn {
+	INSN_REJECTED,
+	INSN_GOOD_NO_SLOT,
+	INSN_GOOD,
+};
+
+enum kprobe_insn __kprobes
+arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi);
+
+#endif /* _ARM_KERNEL_KPROBES_ARM64_H */
diff --git a/arch/arm64/kernel/kprobes.c b/arch/arm64/kernel/kprobes.c
new file mode 100644
index 0000000..def10b6
--- /dev/null
+++ b/arch/arm64/kernel/kprobes.c
@@ -0,0 +1,538 @@ 
+/*
+ * arch/arm64/kernel/kprobes.c
+ *
+ * Kprobes support for ARM64
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ * Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/stop_machine.h>
+#include <linux/stringify.h>
+#include <asm/traps.h>
+#include <asm/cacheflush.h>
+#include <asm/debug-monitors.h>
+#include <asm/system_misc.h>
+#include <asm/insn.h>
+
+#include "kprobes.h"
+#include "kprobes-arm64.h"
+
+#define MIN_STACK_SIZE(addr)	min((unsigned long)MAX_STACK_SIZE,	\
+	(unsigned long)current_thread_info() + THREAD_START_SP - (addr))
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
+{
+	int i;
+	/* prepare insn slot */
+	p->ainsn.insn[0] = p->opcode;
+	/* NOP for superscalar uArch decode */
+	for (i = 1; i < MAX_INSN_SIZE; i++)
+		p->ainsn.insn[i] = ARCH64_NOP_OPCODE;
+
+	flush_icache_range((uintptr_t) (p->ainsn.insn),
+			   (uintptr_t) (p->ainsn.insn) + MAX_INSN_SIZE);
+}
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+	kprobe_opcode_t insn;
+	unsigned long probe_addr = (unsigned long)p->addr;
+
+	/* copy instruction */
+	insn = *p->addr;
+	p->opcode = insn;
+
+	if (in_exception_text(probe_addr))
+		return -EINVAL;
+
+	/* decode instruction */
+	switch (arm_kprobe_decode_insn(insn, &p->ainsn)) {
+	case INSN_REJECTED:	/* insn not supported */
+		return -EINVAL;
+		break;
+
+	case INSN_GOOD_NO_SLOT:	/* insn need simulation */
+		return -EINVAL;
+		break;
+
+	case INSN_GOOD:	/* instruction uses slot */
+		p->ainsn.insn = get_insn_slot();
+		if (!p->ainsn.insn)
+			return -ENOMEM;
+		break;
+	};
+
+	/* prepare the instruction */
+	arch_prepare_ss_slot(p);
+
+	return 0;
+}
+
+static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
+{
+	void *addrs[1];
+	u32 insns[1];
+
+	addrs[0] = (void *)addr;
+	insns[0] = (u32)opcode;
+
+	return aarch64_insn_patch_text_sync(addrs, insns, 1);
+}
+
+/* arm kprobe: install breakpoint in text */
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+	patch_text(p->addr, BRK64_OPCODE_KPROBES);
+}
+
+/* disarm kprobe: remove breakpoint from text */
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+	patch_text(p->addr, p->opcode);
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+	if (p->ainsn.insn) {
+		free_insn_slot(p->ainsn.insn, 0);
+		p->ainsn.insn = NULL;
+	}
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	kcb->prev_kprobe.kp = kprobe_running();
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p)
+{
+	__get_cpu_var(current_kprobe) = p;
+}
+
+static void __kprobes
+set_ss_context(struct kprobe_ctlblk *kcb, unsigned long addr)
+{
+	kcb->ss_ctx.ss_status = KPROBES_STEP_PENDING;
+	kcb->ss_ctx.match_addr = addr + sizeof(kprobe_opcode_t);
+}
+
+static void __kprobes clear_ss_context(struct kprobe_ctlblk *kcb)
+{
+	kcb->ss_ctx.ss_status = KPROBES_STEP_NONE;
+	kcb->ss_ctx.match_addr = 0;
+}
+
+static void __kprobes
+nop_singlestep_skip(struct kprobe *p, struct pt_regs *regs)
+{
+	/* set return addr to next pc to continue */
+	instruction_pointer(regs) += sizeof(kprobe_opcode_t);
+	return;
+}
+
+/* Mask/Unmask PSTATE.D flag
+ *
+ * Unmasking D-flag enables recursing into another debug
+ * exception (breakpoint or single step).
+ *
+ * Upon every exception entry, D-flag is disabled by the
+ * hardware. We shall unmask this flag only after safely
+ * saved the previous context and kprobes state machines.
+ *
+ * kprobes can generate recursing in breakpoint (followed
+ * by single stepping) within user-provided handlers only.
+ *
+ * All other places, keep the D-flag masked.
+ */
+static void mask_pstate_debug_flag(u32 mask)
+{
+	if (mask)
+		asm volatile("msr daifset, #9\n\t");
+	else
+		asm volatile("msr daifclr, #9\n\t");
+}
+
+static void __kprobes setup_singlestep(struct kprobe *p,
+				       struct pt_regs *regs,
+				       struct kprobe_ctlblk *kcb, int reenter)
+{
+	unsigned long slot;
+
+	if (reenter) {
+		save_previous_kprobe(kcb);
+		set_current_kprobe(p);
+		kcb->kprobe_status = KPROBE_REENTER;
+	} else {
+		kcb->kprobe_status = KPROBE_HIT_SS;
+	}
+
+	if (p->ainsn.insn) {
+		/* prepare for single stepping */
+		slot = (unsigned long)p->ainsn.insn;
+
+		/*
+		 * Needs restoring of return address after stepping xol.
+		 */
+		p->ainsn.restore.addr = instruction_pointer(regs) +
+				sizeof(kprobe_opcode_t);
+		p->ainsn.restore.type = RESTORE_PC;
+
+		set_ss_context(kcb, slot);	/* mark pending ss */
+		kernel_enable_single_step(regs);
+		instruction_pointer(regs) = slot;
+	} else	{
+		BUG();
+	}
+}
+
+static int __kprobes reenter_kprobe(struct kprobe *p,
+				    struct pt_regs *regs,
+				    struct kprobe_ctlblk *kcb)
+{
+	switch (kcb->kprobe_status) {
+	case KPROBE_HIT_SSDONE:
+	case KPROBE_HIT_ACTIVE:
+		if (!p->ainsn.check_condn || p->ainsn.check_condn(p, regs)) {
+			kprobes_inc_nmissed_count(p);
+			setup_singlestep(p, regs, kcb, 1);
+		} else	{
+			/* condition failed, it's NOP so skip stepping */
+			nop_singlestep_skip(p, regs);
+		}
+		break;
+	case KPROBE_HIT_SS:
+		pr_warn("Unrecoverable kprobe detected at %p.\n", p->addr);
+		dump_kprobe(p);
+		BUG();
+	default:
+		WARN_ON(1);
+		return 0;
+	}
+
+	return 1;
+}
+
+static int __kprobes
+post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs)
+{
+	struct kprobe *cur = kprobe_running();
+
+	if (!cur)
+		return 0;
+
+	/* return addr restore if non-branching insn */
+	if (cur->ainsn.restore.type == RESTORE_PC) {
+		instruction_pointer(regs) = cur->ainsn.restore.addr;
+		cur->ainsn.restore.addr = 0;
+		cur->ainsn.restore.type = NO_RESTORE;
+	}
+
+	/* restore back original saved kprobe variables and continue */
+	if (kcb->kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe(kcb);
+		goto out;
+	} else	{ /* call post handler */
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		if (cur->post_handler)	{
+			/* post_handler can hit breakpoint and single step
+			 * again, so we enable D-flag for recursive exception.
+			 */
+			mask_pstate_debug_flag(0);
+			cur->post_handler(cur, regs, 0);
+			mask_pstate_debug_flag(1);
+		}
+	}
+	reset_current_kprobe();
+out:
+
+	return 1;
+}
+
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	switch (kcb->kprobe_status) {
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		/*
+		 * We are here because the instruction being single
+		 * stepped caused a page fault. We reset the current
+		 * kprobe and the ip points back to the probe address
+		 * and allow the page fault handler to continue as a
+		 * normal page fault.
+		 */
+		instruction_pointer(regs) = (unsigned long)cur->addr;
+		if (kcb->kprobe_status == KPROBE_REENTER)
+			restore_previous_kprobe(kcb);
+		else
+			reset_current_kprobe();
+
+		break;
+	case KPROBE_HIT_ACTIVE:
+	case KPROBE_HIT_SSDONE:
+		/*
+		 * We increment the nmissed count for accounting,
+		 * we can also use npre/npostfault count for accounting
+		 * these specific fault cases.
+		 */
+		kprobes_inc_nmissed_count(cur);
+
+		/*
+		 * We come here because instructions in the pre/post
+		 * handler caused the page_fault, this could happen
+		 * if handler tries to access user space by
+		 * copy_from_user(), get_user() etc. Let the
+		 * user-specified handler try to fix it first.
+		 */
+		if (cur->fault_handler && cur->fault_handler(cur, regs, fsr))
+			return 1;
+
+		/*
+		 * In case the user-specified fault handler returned
+		 * zero, try to fix up.
+		 */
+		if (fixup_exception(regs))
+			return 1;
+
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+				       unsigned long val, void *data)
+{
+	return NOTIFY_DONE;
+}
+
+void __kprobes kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *p, *cur;
+	struct kprobe_ctlblk *kcb;
+	unsigned long addr = instruction_pointer(regs);
+
+	kcb = get_kprobe_ctlblk();
+	cur = kprobe_running();
+
+	p = get_kprobe((kprobe_opcode_t *) addr);
+
+	if (p) {
+		if (cur) {
+			if (reenter_kprobe(p, regs, kcb))
+				return;
+		} else if (!p->ainsn.check_condn ||
+			   p->ainsn.check_condn(p, regs)) {
+			/* Probe hit and conditional execution check ok. */
+			set_current_kprobe(p);
+			kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+			/*
+			 * If we have no pre-handler or it returned 0, we
+			 * continue with normal processing.  If we have a
+			 * pre-handler and it returned non-zero, it prepped
+			 * for calling the break_handler below on re-entry,
+			 * so get out doing nothing more here.
+			 *
+			 * pre_handler can hit a breakpoint and can step thru
+			 * before return, keep PSTATE D-flag enabled until
+			 * pre_handler return back.
+			 */
+			mask_pstate_debug_flag(0);
+			if (!p->pre_handler || !p->pre_handler(p, regs)) {
+				mask_pstate_debug_flag(1);
+				kcb->kprobe_status = KPROBE_HIT_SS;
+				setup_singlestep(p, regs, kcb, 0);
+				return;
+			}
+			mask_pstate_debug_flag(1);
+		} else {
+			/*
+			 * Breakpoint hit but conditional check failed,
+			 * so just skip handling since it is NOP.
+			 */
+			nop_singlestep_skip(p, regs);
+			return;
+		}
+	} else if (*(kprobe_opcode_t *) addr != BRK64_OPCODE_KPROBES) {
+		/*
+		 * The breakpoint instruction was removed right
+		 * after we hit it.  Another cpu has removed
+		 * either a probepoint or a debugger breakpoint
+		 * at this address.  In either case, no further
+		 * handling of this interrupt is appropriate.
+		 * Return back to original instruction, and continue.
+		 */
+		preempt_enable_no_resched();
+		return;
+	} else if (cur) {
+		/* We probably hit a jprobe.  Call its break handler. */
+		if (cur->break_handler && cur->break_handler(cur, regs)) {
+			kcb->kprobe_status = KPROBE_HIT_SS;
+			setup_singlestep(cur, regs, kcb, 0);
+			return;
+		}
+	} else {
+		/* breakpoint is removed, now in a race
+		 * Return back to original instruction & continue.
+		 */
+		preempt_enable_no_resched();
+	}
+	return;
+}
+
+static int __kprobes
+kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr)
+{
+	if ((kcb->ss_ctx.ss_status == KPROBES_STEP_PENDING)
+	    && (kcb->ss_ctx.match_addr == addr)) {
+		clear_ss_context(kcb);	/* clear pending ss */
+		return DBG_HOOK_HANDLED;
+	} else {
+		/* not ours, kprobes should ignore it */
+		return DBG_HOOK_ERROR;
+	}
+}
+
+static int __kprobes
+kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long flags;
+	int retval;
+
+	/* check, and return error if this is not our step */
+	retval = kprobe_ss_hit(kcb, instruction_pointer(regs));
+
+	if (retval == DBG_HOOK_HANDLED) {
+		/* single step complete, call post handlers */
+		local_irq_save(flags);
+		kernel_disable_single_step();
+		post_kprobe_handler(kcb, regs);
+		local_irq_restore(flags);
+	}
+	return retval;
+}
+
+static int __kprobes
+kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr)
+{
+	unsigned long flags;
+	local_irq_save(flags);
+	kprobe_handler(regs);
+	local_irq_restore(flags);
+
+	return DBG_HOOK_HANDLED;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct jprobe *jp = container_of(p, struct jprobe, kp);
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	long stack_ptr = stack_pointer(regs);
+
+	kcb->jprobe_saved_regs = *regs;
+	memcpy(kcb->jprobes_stack, (void *)stack_ptr,
+	       MIN_STACK_SIZE(stack_ptr));
+
+	instruction_pointer(regs) = (long)jp->entry;
+
+	preempt_disable();
+	return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	/*
+	 * Jprobe handler return by entering break exception,
+	 * encoded same as kprobe, but with following conditions
+	 * -a magic number in x0 to identify from rest of other kprobes.
+	 * -restore stack addr to original saved pt_regs
+	 */
+	asm volatile ("ldr x0, [%0]\n\t"
+		      "mov sp, x0\n\t"
+		      "ldr x0, =" __stringify(JPROBES_MAGIC_NUM) "\n\t"
+		      "BRK %1\n\t"
+		      "NOP\n\t"
+		      :
+		      : "r"(&kcb->jprobe_saved_regs.sp),
+		      "I"(BRK64_ESR_KPROBES)
+		      : "memory");
+}
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	long stack_addr = kcb->jprobe_saved_regs.sp;
+	long orig_sp = stack_pointer(regs);
+	struct jprobe *jp = container_of(p, struct jprobe, kp);
+
+	if (regs->regs[0] == JPROBES_MAGIC_NUM) {
+		if (orig_sp != stack_addr) {
+			struct pt_regs *saved_regs =
+			    (struct pt_regs *)kcb->jprobe_saved_regs.sp;
+			pr_err("current sp %lx does not match saved sp %lx\n",
+			       orig_sp, stack_addr);
+			pr_err("Saved registers for jprobe %p\n", jp);
+			show_regs(saved_regs);
+			pr_err("Current registers\n");
+			show_regs(regs);
+			BUG();
+		}
+		*regs = kcb->jprobe_saved_regs;
+		memcpy((void *)stack_addr, kcb->jprobes_stack,
+		       MIN_STACK_SIZE(stack_addr));
+		preempt_enable_no_resched();
+		return 1;
+	}
+	return 0;
+}
+
+/* Break Handler hook */
+static struct break_hook kprobes_break_hook = {
+	.esr_mask = BRK64_ESR_MASK,
+	.esr_val = BRK64_ESR_KPROBES,
+	.fn = kprobe_breakpoint_handler,
+};
+
+/* Single Step handler hook */
+static struct step_hook kprobes_step_hook = {
+	.fn = kprobe_single_step_handler,
+};
+
+int __init arch_init_kprobes()
+{
+	register_break_hook(&kprobes_break_hook);
+	register_step_hook(&kprobes_step_hook);
+
+	return 0;
+}
diff --git a/arch/arm64/kernel/kprobes.h b/arch/arm64/kernel/kprobes.h
new file mode 100644
index 0000000..93c54b4
--- /dev/null
+++ b/arch/arm64/kernel/kprobes.h
@@ -0,0 +1,30 @@ 
+/*
+ * arch/arm64/kernel/kprobes.h
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _ARM_KERNEL_KPROBES_H
+#define _ARM_KERNEL_KPROBES_H
+
+/* BRK opcodes with ESR encoding  */
+#define BRK64_ESR_MASK		0xFFFF
+#define BRK64_ESR_KPROBES	0x0004
+#define BRK64_OPCODE_KPROBES	0xD4200080	/* "brk 0x4" */
+#define ARCH64_NOP_OPCODE	0xD503201F
+
+#define JPROBES_MAGIC_NUM	0xa5a5a5a5a5a5a5a5
+
+/* Move this out to appropriate header file */
+int fixup_exception(struct pt_regs *regs);
+
+#endif /* _ARM_KERNEL_KPROBES_H */
diff --git a/arch/arm64/kernel/probes-decode.h b/arch/arm64/kernel/probes-decode.h
new file mode 100644
index 0000000..3650ab3
--- /dev/null
+++ b/arch/arm64/kernel/probes-decode.h
@@ -0,0 +1,110 @@ 
+/*
+ * arch/arm64/kernel/probes-decode.h
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _ARM_KERNEL_PROBES_DECODE_H
+#define _ARM_KERNEL_PROBES_DECODE_H
+
+/*
+ * The following definitions and macros are used to build instruction
+ * decoding tables.
+ */
+enum decode_type {
+	DECODE_TYPE_END,
+	DECODE_TYPE_SINGLESTEP,
+	DECODE_TYPE_SIMULATE,
+	DECODE_TYPE_TABLE,
+	DECODE_TYPE_REJECT,
+};
+
+struct aarch64_decode_item;
+
+struct aarch64_decode_header {
+	enum decode_type type;
+	u32 mask;
+	u32 val;
+};
+
+struct aarch64_decode_actions {
+	kprobes_prepare_t *prepare;
+	kprobes_handler_t *handler;
+};
+
+struct aarch64_decode_table {
+	const struct aarch64_decode_item *tbl;
+};
+
+union aarch64_decode_handler {
+	struct aarch64_decode_actions actions;
+	struct aarch64_decode_table table;
+};
+
+struct aarch64_decode_item {
+	struct aarch64_decode_header header;
+	union aarch64_decode_handler decode;
+};
+
+#define decode_get_type(_entry)	 ((_entry).header.type)
+
+#define decode_table_end(_entry)		\
+	((_entry).header.type == DECODE_TYPE_END)
+
+#define decode_table_hit(_entry, insn)		\
+	((insn & (_entry).header.mask) == (_entry).header.val)
+
+#define decode_prepare_fn(_entry)	((_entry).decode.actions.prepare)
+#define decode_handler_fn(_entry)	((_entry).decode.actions.handler)
+#define decode_sub_table(_entry)	((_entry).decode.table.tbl)
+
+#define DECODE_ADD_HEADER(_type, _val, _mask)	\
+	.header = {				\
+		.type = _type,			\
+		.mask = _mask,			\
+		.val = _val,			\
+	},
+
+#define DECODE_ADD_ACTION(_prepare, _handler)	\
+	.decode = {				\
+		.actions = {			\
+			.prepare = _prepare,	\
+			.handler = _handler,	\
+		}				\
+	},
+
+#define DECODE_ADD_TABLE(_table)		\
+	.decode = {				\
+		.table = {.tbl = _table}	\
+	},
+
+#define DECODE_REJECT(_v, _m)					\
+	{ DECODE_ADD_HEADER(DECODE_TYPE_REJECT, _v, _m) }
+
+#define DECODE_SINGLESTEP(_v, _m)				\
+	{ DECODE_ADD_HEADER(DECODE_TYPE_SINGLESTEP, _v, _m) }
+
+#define DECODE_SIMULATE(_v, _m, _p, _h)				\
+	{ DECODE_ADD_HEADER(DECODE_TYPE_SIMULATE, _v, _m)	\
+	  DECODE_ADD_ACTION(_p, _h) }
+
+#define DECODE_TABLE(_v, _m, _table)				\
+	{ DECODE_ADD_HEADER(DECODE_TYPE_TABLE, _v, _m)		\
+	  DECODE_ADD_TABLE(_table) }
+
+#define DECODE_LITERAL(_v, _m, _p, _h)	DECODE_SIMULATE(_v, _m, _p, _h)
+#define DECODE_BRANCH(_v, _m, _p, _h)	DECODE_SIMULATE(_v, _m, _p, _h)
+
+/* should be the last element in decode structure */
+#define DECODE_END	{ .header = {.type = DECODE_TYPE_END, } }
+
+#endif /* _ARM_KERNEL_PROBES_DECODE_H */
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index f8ab9d8..40951b1 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -62,6 +62,7 @@  SECTIONS
 			TEXT_TEXT
 			SCHED_TEXT
 			LOCK_TEXT
+			KPROBES_TEXT
 			HYPERVISOR_TEXT
 			*(.fixup)
 			*(.gnu.warning)