diff mbox series

ftrace_direct (used by bpf trampoline) conflicts with live patch

Message ID 0962AC9B-2FBD-4578-8B2F-A376A6B3B83F@fb.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series ftrace_direct (used by bpf trampoline) conflicts with live patch | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
netdev/tree_selection success Not a local patch

Commit Message

Song Liu April 1, 2022, 1:11 a.m. UTC
Hi Steven, 

We hit an issue with bpf trampoline and kernel live patch on the 
same function. 

Basically, we have tracing and live patch on the same function. 
If we use kprobe (over ftrace) for tracing, it works fine with 
live patch. However, fentry on the same function does not work 
with live patch (the one comes later fails to attach).

After digging into this, I found this is because bpf trampoline
uses register_ftrace_direct, which enables IPMODIFY by default. 
OTOH, it seems that BPF doesn't really need IPMODIFY. As BPF 
trampoline does a "goto do_fexit" in jit for BPF_TRAMP_MODIFY_RETURN.

IIUC, we can let bpf trampoline and live patch work together with
an ipmodify-less version of register_ftrace_direct, like attached 
below. 

Does this make sense to you? Did I miss something?

Thanks in advance,
Song

Comments

Steven Rostedt April 1, 2022, 1:48 a.m. UTC | #1
On Fri, 1 Apr 2022 01:11:01 +0000
Song Liu <songliubraving@fb.com> wrote:

> Hi Steven, 
> 
> We hit an issue with bpf trampoline and kernel live patch on the 
> same function. 
> 
> Basically, we have tracing and live patch on the same function. 
> If we use kprobe (over ftrace) for tracing, it works fine with 
> live patch. However, fentry on the same function does not work 
> with live patch (the one comes later fails to attach).
> 
> After digging into this, I found this is because bpf trampoline
> uses register_ftrace_direct, which enables IPMODIFY by default. 
> OTOH, it seems that BPF doesn't really need IPMODIFY. As BPF 
> trampoline does a "goto do_fexit" in jit for BPF_TRAMP_MODIFY_RETURN.
> 
> IIUC, we can let bpf trampoline and live patch work together with
> an ipmodify-less version of register_ftrace_direct, like attached 
> below. 
> 
> Does this make sense to you? Did I miss something?

I thought the BPF trampoline does:

	call bpf_trace_before_function
	call original_function + X86_PATCH_SIZE
	call bpf_trace_after_function

Thus, the bpf direct trampoline calls the unpatched version of the
function call making the live patch useless. Or is this not what it
does?

-- Steve
Steven Rostedt April 1, 2022, 1:52 a.m. UTC | #2
On Thu, 31 Mar 2022 21:48:36 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> > Does this make sense to you? Did I miss something?  
> 
> I thought the BPF trampoline does:
> 
> 	call bpf_trace_before_function
> 	call original_function + X86_PATCH_SIZE
> 	call bpf_trace_after_function
> 
> Thus, the bpf direct trampoline calls the unpatched version of the
> function call making the live patch useless. Or is this not what it
> does?

Or perhaps you are only talking about the part of bpf that does not
trace the end of a function?

-- Steve
Song Liu April 1, 2022, 9:49 p.m. UTC | #3
> On Mar 31, 2022, at 6:52 PM, Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> On Thu, 31 Mar 2022 21:48:36 -0400
> Steven Rostedt <rostedt@goodmis.org> wrote:
> 
>>> Does this make sense to you? Did I miss something?  
>> 
>> I thought the BPF trampoline does:
>> 
>> 	call bpf_trace_before_function
>> 	call original_function + X86_PATCH_SIZE
>> 	call bpf_trace_after_function
>> 
>> Thus, the bpf direct trampoline calls the unpatched version of the
>> function call making the live patch useless. Or is this not what it
>> does?
> 
> Or perhaps you are only talking about the part of bpf that does not
> trace the end of a function?

Yeah, we do call original_function + X86_PATCH_SIZE if there is 
fexit or fmod_ret programs. So this alone is not enough to make the 
two work together. :(

Let me see how can we fix it...

Thanks,
Song
diff mbox series

Patch

diff --git i/include/linux/ftrace.h w/include/linux/ftrace.h
index ed8cf433a46a..46c40f0e0368 100644
--- i/include/linux/ftrace.h
+++ w/include/linux/ftrace.h
@@ -326,6 +326,8 @@  struct dyn_ftrace;
 extern int ftrace_direct_func_count;
 int register_ftrace_direct(unsigned long ip, unsigned long addr);
 int unregister_ftrace_direct(unsigned long ip, unsigned long addr);
+int register_ftrace_direct_no_ipmodify(unsigned long ip, unsigned long addr);
+int unregister_ftrace_direct_no_ipmodify(unsigned long ip, unsigned long addr);
 int modify_ftrace_direct(unsigned long ip, unsigned long old_addr, unsigned long new_addr);
 struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr);
 int ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
diff --git i/kernel/bpf/trampoline.c w/kernel/bpf/trampoline.c
index ada97751ae1b..52ff503692cb 100644
--- i/kernel/bpf/trampoline.c
+++ w/kernel/bpf/trampoline.c
@@ -123,7 +123,7 @@  static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
 	int ret;
 
 	if (tr->func.ftrace_managed)
-		ret = unregister_ftrace_direct((long)ip, (long)old_addr);
+		ret = unregister_ftrace_direct_no_ipmodify((long)ip, (long)old_addr);
 	else
 		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL);
 
@@ -159,7 +159,7 @@  static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
 		return -ENOENT;
 
 	if (tr->func.ftrace_managed)
-		ret = register_ftrace_direct((long)ip, (long)new_addr);
+		ret = register_ftrace_direct_no_ipmodify((long)ip, (long)new_addr);
 	else
 		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
 
diff --git i/kernel/trace/ftrace.c w/kernel/trace/ftrace.c
index 4f1d2f5e7263..afb5598c103f 100644
--- i/kernel/trace/ftrace.c
+++ w/kernel/trace/ftrace.c
@@ -2467,6 +2467,20 @@  struct ftrace_ops direct_ops = {
 	 */
 	.trampoline	= FTRACE_REGS_ADDR,
 };
+
+struct ftrace_ops no_ipmodify_direct_ops = {
+	.func		= call_direct_funcs,
+	.flags		= FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS
+			  | FTRACE_OPS_FL_PERMANENT,
+	/*
+	 * By declaring the main trampoline as this trampoline
+	 * it will never have one allocated for it. Allocated
+	 * trampolines should not call direct functions.
+	 * The direct_ops should only be called by the builtin
+	 * ftrace_regs_caller trampoline.
+	 */
+	.trampoline	= FTRACE_REGS_ADDR,
+};
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
 
 /**
@@ -5126,6 +5140,9 @@  static struct ftrace_direct_func *ftrace_alloc_direct_func(unsigned long addr)
 	return direct;
 }
 
+static int __register_ftrace_direct(unsigned long ip, unsigned long addr,
+				    struct ftrace_ops *ops);
+
 /**
  * register_ftrace_direct - Call a custom trampoline directly
  * @ip: The address of the nop at the beginning of a function
@@ -5144,6 +5161,12 @@  static struct ftrace_direct_func *ftrace_alloc_direct_func(unsigned long addr)
  *  -ENOMEM - There was an allocation failure.
  */
 int register_ftrace_direct(unsigned long ip, unsigned long addr)
+{
+	return __register_ftrace_direct(ip, addr, &direct_ops);
+}
+
+static int __register_ftrace_direct(unsigned long ip, unsigned long addr,
+				    struct ftrace_ops *ops)
 {
 	struct ftrace_direct_func *direct;
 	struct ftrace_func_entry *entry;
@@ -5194,14 +5217,14 @@  int register_ftrace_direct(unsigned long ip, unsigned long addr)
 	if (!entry)
 		goto out_unlock;
 
-	ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0);
+	ret = ftrace_set_filter_ip(ops, ip, 0, 0);
 	if (ret)
 		remove_hash_entry(direct_functions, entry);
 
-	if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) {
-		ret = register_ftrace_function(&direct_ops);
+	if (!ret && !(ops->flags & FTRACE_OPS_FL_ENABLED)) {
+		ret = register_ftrace_function(ops);
 		if (ret)
-			ftrace_set_filter_ip(&direct_ops, ip, 1, 0);
+			ftrace_set_filter_ip(ops, ip, 1, 0);
 	}
 
 	if (ret) {
@@ -5230,6 +5253,29 @@  int register_ftrace_direct(unsigned long ip, unsigned long addr)
 }
 EXPORT_SYMBOL_GPL(register_ftrace_direct);
 
+/**
+ * register_ftrace_direct_no_ipmodify - Call a custom trampoline directly.
+ * The custom trampoline should not use IP_MODIFY.
+ * @ip: The address of the nop at the beginning of a function
+ * @addr: The address of the trampoline to call at @ip
+ *
+ * This is used to connect a direct call from the nop location (@ip)
+ * at the start of ftrace traced functions. The location that it calls
+ * (@addr) must be able to handle a direct call, and save the parameters
+ * of the function being traced, and restore them (or inject new ones
+ * if needed), before returning.
+ *
+ * Returns:
+ *  0 on success
+ *  -EBUSY - Another direct function is already attached (there can be only one)
+ *  -ENODEV - @ip does not point to a ftrace nop location (or not supported)
+ *  -ENOMEM - There was an allocation failure.
+ */
+int register_ftrace_direct_no_ipmodify(unsigned long ip, unsigned long addr)
+{
+	return __register_ftrace_direct(ip, addr, &no_ipmodify_direct_ops);
+}
+
 static struct ftrace_func_entry *find_direct_entry(unsigned long *ip,
 						   struct dyn_ftrace **recp)
 {
@@ -5257,7 +5303,21 @@  static struct ftrace_func_entry *find_direct_entry(unsigned long *ip,
 	return entry;
 }
 
+static int __unregister_ftrace_direct(unsigned long ip, unsigned long addr,
+				      struct ftrace_ops *ops);
+
 int unregister_ftrace_direct(unsigned long ip, unsigned long addr)
+{
+	return __unregister_ftrace_direct(ip, addr, &direct_ops);
+}
+
+int unregister_ftrace_direct_no_ipmodify(unsigned long ip, unsigned long addr)
+{
+	return __unregister_ftrace_direct(ip, addr, &no_ipmodify_direct_ops);
+}
+
+static int __unregister_ftrace_direct(unsigned long ip, unsigned long addr,
+				      struct ftrace_ops *ops)
 {
 	struct ftrace_direct_func *direct;
 	struct ftrace_func_entry *entry;
@@ -5274,11 +5334,11 @@  int unregister_ftrace_direct(unsigned long ip, unsigned long addr)
 	if (!entry)
 		goto out_unlock;
 
-	hash = direct_ops.func_hash->filter_hash;
+	hash = ops->func_hash->filter_hash;
 	if (hash->count == 1)
-		unregister_ftrace_function(&direct_ops);
+		unregister_ftrace_function(ops);
 
-	ret = ftrace_set_filter_ip(&direct_ops, ip, 1, 0);
+	ret = ftrace_set_filter_ip(ops, ip, 1, 0);
 
 	WARN_ON(ret);