Message ID | 20220718001405.2236811-5-song@kernel.org (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | ftrace: host klp and bpf trampoline together | expand |
On Sun 2022-07-17 17:14:05, Song Liu wrote: > When tracing a function with IPMODIFY ftrace_ops (livepatch), the bpf > trampoline must follow the instruction pointer saved on stack. This needs > extra handling for bpf trampolines with BPF_TRAMP_F_CALL_ORIG flag. > > Implement bpf_tramp_ftrace_ops_func and use it for the ftrace_ops used > by BPF trampoline. This enables tracing functions with livepatch. > > This also requires moving bpf trampoline to *_ftrace_direct_mult APIs. > > --- a/kernel/bpf/trampoline.c > +++ b/kernel/bpf/trampoline.c > @@ -13,6 +13,7 @@ > #include <linux/static_call.h> > #include <linux/bpf_verifier.h> > #include <linux/bpf_lsm.h> > +#include <linux/delay.h> > > /* dummy _ops. The verifier will operate on target program's ops. */ > const struct bpf_verifier_ops bpf_extension_verifier_ops = { > @@ -29,6 +30,81 @@ static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; > /* serializes access to trampoline_table */ > static DEFINE_MUTEX(trampoline_mutex); > > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS > +static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex); > + > +static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd) > +{ > + struct bpf_trampoline *tr = ops->private; > + int ret = 0; > + > + if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) { > + /* This is called inside register_ftrace_direct_multi(), so > + * tr->mutex is already locked. > + */ > + WARN_ON_ONCE(!mutex_is_locked(&tr->mutex)); Again, better is: lockdep_assert_held_once(&tr->mutex); > + > + /* Instead of updating the trampoline here, we propagate > + * -EAGAIN to register_ftrace_direct_multi(). Then we can > + * retry register_ftrace_direct_multi() after updating the > + * trampoline. > + */ > + if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && > + !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) { > + if (WARN_ON_ONCE(tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY)) > + return -EBUSY; > + > + tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY; > + return -EAGAIN; > + } > + > + return 0; > + } > + > + /* The normal locking order is > + * tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c) > + * > + * The following two commands are called from > + * > + * prepare_direct_functions_for_ipmodify > + * cleanup_direct_functions_after_ipmodify > + * > + * In both cases, direct_mutex is already locked. Use > + * mutex_trylock(&tr->mutex) to avoid deadlock in race condition > + * (something else is making changes to this same trampoline). > + */ > + if (!mutex_trylock(&tr->mutex)) { > + /* sleep 1 ms to make sure whatever holding tr->mutex makes > + * some progress. > + */ > + msleep(1); > + return -EAGAIN; > + } Huh, this looks horrible. And I do not get it. The above block prints a warning when the mutex is not taken. Why it is already taken when cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF and why it has to be explicitly taken otherwise? Would it be possible to call prepare_direct_functions_for_ipmodify(), cleanup_direct_functions_after_ipmodify() with rt->mutex already taken so that the ordering is correct even in this case. That said, this is the first version when I am in Cc. I am not sure if it has already been discussed. > + switch (cmd) { > + case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER: > + tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY; > + > + if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && > + !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) > + ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */); > + break; > + case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER: > + tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY; > + > + if (tr->flags & BPF_TRAMP_F_ORIG_STACK) > + ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */); > + break; > + default: > + ret = -EINVAL; > + break; > + }; > + > + mutex_unlock(&tr->mutex); > + return ret; > +} > +#endif > + > bool bpf_prog_has_trampoline(const struct bpf_prog *prog) > { > enum bpf_attach_type eatype = prog->expected_attach_type; Note that I did not do proper review. I not much familiar with the ftrace code. I just wanted to check how much this patchset affects livepatching and noticed the commented things. Best Regards, Petr
> On Jul 18, 2022, at 6:07 AM, Petr Mladek <pmladek@suse.com> wrote: > > On Sun 2022-07-17 17:14:05, Song Liu wrote: >> When tracing a function with IPMODIFY ftrace_ops (livepatch), the bpf >> trampoline must follow the instruction pointer saved on stack. This needs >> extra handling for bpf trampolines with BPF_TRAMP_F_CALL_ORIG flag. >> >> Implement bpf_tramp_ftrace_ops_func and use it for the ftrace_ops used >> by BPF trampoline. This enables tracing functions with livepatch. >> >> This also requires moving bpf trampoline to *_ftrace_direct_mult APIs. >> >> --- a/kernel/bpf/trampoline.c >> +++ b/kernel/bpf/trampoline.c >> @@ -13,6 +13,7 @@ >> #include <linux/static_call.h> >> #include <linux/bpf_verifier.h> >> #include <linux/bpf_lsm.h> >> +#include <linux/delay.h> >> >> /* dummy _ops. The verifier will operate on target program's ops. */ >> const struct bpf_verifier_ops bpf_extension_verifier_ops = { >> @@ -29,6 +30,81 @@ static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; >> /* serializes access to trampoline_table */ >> static DEFINE_MUTEX(trampoline_mutex); >> >> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS >> +static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex); >> + >> +static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd) >> +{ >> + struct bpf_trampoline *tr = ops->private; >> + int ret = 0; >> + >> + if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) { >> + /* This is called inside register_ftrace_direct_multi(), so >> + * tr->mutex is already locked. >> + */ >> + WARN_ON_ONCE(!mutex_is_locked(&tr->mutex)); > > Again, better is: > > lockdep_assert_held_once(&tr->mutex); Will fix. > >> + >> + /* Instead of updating the trampoline here, we propagate >> + * -EAGAIN to register_ftrace_direct_multi(). Then we can >> + * retry register_ftrace_direct_multi() after updating the >> + * trampoline. >> + */ >> + if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && >> + !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) { >> + if (WARN_ON_ONCE(tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY)) >> + return -EBUSY; >> + >> + tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY; >> + return -EAGAIN; >> + } >> + >> + return 0; >> + } >> + >> + /* The normal locking order is >> + * tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c) >> + * >> + * The following two commands are called from >> + * >> + * prepare_direct_functions_for_ipmodify >> + * cleanup_direct_functions_after_ipmodify >> + * >> + * In both cases, direct_mutex is already locked. Use >> + * mutex_trylock(&tr->mutex) to avoid deadlock in race condition >> + * (something else is making changes to this same trampoline). >> + */ >> + if (!mutex_trylock(&tr->mutex)) { >> + /* sleep 1 ms to make sure whatever holding tr->mutex makes >> + * some progress. >> + */ >> + msleep(1); >> + return -EAGAIN; >> + } > > Huh, this looks horrible. And I do not get it. The above block prints > a warning when the mutex is not taken. Why it is already taken > when cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF > and why it has to be explicitly taken otherwise? There are two different scenarios: 1. livepatch applied first, then bpf trampoline is registered. In this case, we call ops_func(FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF). _SELF means we are making change to the DIRECT ops (bpf trampoline) itself. In this case, tr->mutex is already taken. 2. bpf_trampoline registered first, then livepatch is applied. In this case, ftrace cannot take tr->mutex first. Instead, ftrace has to lock direct_mutex, find any conflict DIRECT ops, and call ops_func(FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER). _PEER means this is called by a peer ftrace ops (livepatch). > > Would it be possible to call prepare_direct_functions_for_ipmodify(), > cleanup_direct_functions_after_ipmodify() with rt->mutex already taken > so that the ordering is correct even in this case. > > That said, this is the first version when I am in Cc. I am not sure > if it has already been discussed. > > >> + switch (cmd) { >> + case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER: >> + tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY; >> + >> + if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && >> + !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) >> + ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */); >> + break; >> + case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER: >> + tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY; >> + >> + if (tr->flags & BPF_TRAMP_F_ORIG_STACK) >> + ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */); >> + break; >> + default: >> + ret = -EINVAL; >> + break; >> + }; >> + >> + mutex_unlock(&tr->mutex); >> + return ret; >> +} >> +#endif >> + >> bool bpf_prog_has_trampoline(const struct bpf_prog *prog) >> { >> enum bpf_attach_type eatype = prog->expected_attach_type; > > Note that I did not do proper review. I not much familiar with the > ftrace code. I just wanted to check how much this patchset affects > livepatching and noticed the commented things. Before this set, livepatch and bpf trampoline cannot work on the same function. Whichever applied latter will fail. After this, the two will work almost perfectly together. By "almost" I mean the race condition protected by the mutex_trylock: if (!mutex_trylock(&tr->mutex)) { /* sleep 1 ms to make sure whatever holding tr->mutex makes * some progress. */ msleep(1); return -EAGAIN; } If livepatch is applied while something is making changes to the bpf trampoline at the same time, livepatch code will get -EAGAIN from register_ftrace_function(). Then we need to retry from livepatch side. AFAICT, kpatch user space already does the retry, so it gonna work as-is. I am not sure about other user space though. The msleep(1) is suggested by Steven to avoid deadlock in RT case [1]. Does this make sense? Thanks, Song [1] https://lore.kernel.org/bpf/20220706211858.67f9254d@rorschach.local.home/
> On Jul 18, 2022, at 6:07 AM, Petr Mladek <pmladek@suse.com> wrote: > > On Sun 2022-07-17 17:14:05, Song Liu wrote: >> When tracing a function with IPMODIFY ftrace_ops (livepatch), the bpf >> trampoline must follow the instruction pointer saved on stack. This needs >> extra handling for bpf trampolines with BPF_TRAMP_F_CALL_ORIG flag. >> >> Implement bpf_tramp_ftrace_ops_func and use it for the ftrace_ops used >> by BPF trampoline. This enables tracing functions with livepatch. >> >> This also requires moving bpf trampoline to *_ftrace_direct_mult APIs. >> >> --- a/kernel/bpf/trampoline.c >> +++ b/kernel/bpf/trampoline.c >> @@ -13,6 +13,7 @@ >> #include <linux/static_call.h> >> #include <linux/bpf_verifier.h> >> #include <linux/bpf_lsm.h> >> +#include <linux/delay.h> >> >> /* dummy _ops. The verifier will operate on target program's ops. */ >> const struct bpf_verifier_ops bpf_extension_verifier_ops = { >> @@ -29,6 +30,81 @@ static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; >> /* serializes access to trampoline_table */ >> static DEFINE_MUTEX(trampoline_mutex); >> >> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS >> +static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex); >> + >> +static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd) >> +{ >> + struct bpf_trampoline *tr = ops->private; >> + int ret = 0; >> + >> + if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) { >> + /* This is called inside register_ftrace_direct_multi(), so >> + * tr->mutex is already locked. >> + */ >> + WARN_ON_ONCE(!mutex_is_locked(&tr->mutex)); > > Again, better is: > > lockdep_assert_held_once(&tr->mutex); Will fix. > >> + >> + /* Instead of updating the trampoline here, we propagate >> + * -EAGAIN to register_ftrace_direct_multi(). Then we can >> + * retry register_ftrace_direct_multi() after updating the >> + * trampoline. >> + */ >> + if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && >> + !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) { >> + if (WARN_ON_ONCE(tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY)) >> + return -EBUSY; >> + >> + tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY; >> + return -EAGAIN; >> + } >> + >> + return 0; >> + } >> + >> + /* The normal locking order is >> + * tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c) >> + * >> + * The following two commands are called from >> + * >> + * prepare_direct_functions_for_ipmodify >> + * cleanup_direct_functions_after_ipmodify >> + * >> + * In both cases, direct_mutex is already locked. Use >> + * mutex_trylock(&tr->mutex) to avoid deadlock in race condition >> + * (something else is making changes to this same trampoline). >> + */ >> + if (!mutex_trylock(&tr->mutex)) { >> + /* sleep 1 ms to make sure whatever holding tr->mutex makes >> + * some progress. >> + */ >> + msleep(1); >> + return -EAGAIN; >> + } > > Huh, this looks horrible. And I do not get it. The above block prints > a warning when the mutex is not taken. Why it is already taken > when cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF > and why it has to be explicitly taken otherwise? There are two different scenarios: 1. livepatch applied first, then bpf trampoline is registered. In this case, we call ops_func(FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF). _SELF means we are making change to the DIRECT ops (bpf trampoline) itself. In this case, tr->mutex is already taken. 2. bpf_trampoline registered first, then livepatch is applied. In this case, ftrace cannot take tr->mutex first. Instead, ftrace has to lock direct_mutex, find any conflict DIRECT ops, and call ops_func(FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER). _PEER means this is called by a peer ftrace ops (livepatch). > > Would it be possible to call prepare_direct_functions_for_ipmodify(), > cleanup_direct_functions_after_ipmodify() with rt->mutex already taken > so that the ordering is correct even in this case. > > That said, this is the first version when I am in Cc. I am not sure > if it has already been discussed. > > >> + switch (cmd) { >> + case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER: >> + tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY; >> + >> + if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && >> + !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) >> + ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */); >> + break; >> + case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER: >> + tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY; >> + >> + if (tr->flags & BPF_TRAMP_F_ORIG_STACK) >> + ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */); >> + break; >> + default: >> + ret = -EINVAL; >> + break; >> + }; >> + >> + mutex_unlock(&tr->mutex); >> + return ret; >> +} >> +#endif >> + >> bool bpf_prog_has_trampoline(const struct bpf_prog *prog) >> { >> enum bpf_attach_type eatype = prog->expected_attach_type; > > Note that I did not do proper review. I not much familiar with the > ftrace code. I just wanted to check how much this patchset affects > livepatching and noticed the commented things. Before this set, livepatch and bpf trampoline cannot work on the same function. Whichever applied latter will fail. After this, the two will work almost perfectly together. By "almost" I mean the race condition protected by the mutex_trylock: if (!mutex_trylock(&tr->mutex)) { /* sleep 1 ms to make sure whatever holding tr->mutex makes * some progress. */ msleep(1); return -EAGAIN; } If livepatch is applied while something is making changes to the bpf trampoline at the same time, livepatch code will get -EAGAIN from register_ftrace_function(). Then we need to retry from livepatch side. AFAICT, kpatch user space already does the retry, so it gonna work as-is. I am not sure about other user space though. The msleep(1) is suggested by Steven to avoid deadlock in RT case [1]. Does this make sense? Thanks, Song [1] https://lore.kernel.org/bpf/20220706211858.67f9254d@rorschach.local.home/
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7496842a4671..f35c59e0b742 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -47,6 +47,7 @@ struct kobject; struct mem_cgroup; struct module; struct bpf_func_state; +struct ftrace_ops; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -756,6 +757,11 @@ struct btf_func_model { */ #define BPF_TRAMP_F_ORIG_STACK BIT(5) +/* This trampoline is on a function with another ftrace_ops with IPMODIFY, + * e.g., a live patch. This flag is set and cleared by ftrace call backs, + */ +#define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. */ @@ -838,9 +844,11 @@ struct bpf_tramp_image { struct bpf_trampoline { /* hlist for trampoline_table */ struct hlist_node hlist; + struct ftrace_ops *fops; /* serializes access to fields of this trampoline */ struct mutex mutex; refcount_t refcnt; + u32 flags; u64 key; struct { struct btf_func_model model; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index fd69812412ca..fa901aef7930 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -13,6 +13,7 @@ #include <linux/static_call.h> #include <linux/bpf_verifier.h> #include <linux/bpf_lsm.h> +#include <linux/delay.h> /* dummy _ops. The verifier will operate on target program's ops. */ const struct bpf_verifier_ops bpf_extension_verifier_ops = { @@ -29,6 +30,81 @@ static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; /* serializes access to trampoline_table */ static DEFINE_MUTEX(trampoline_mutex); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex); + +static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd) +{ + struct bpf_trampoline *tr = ops->private; + int ret = 0; + + if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) { + /* This is called inside register_ftrace_direct_multi(), so + * tr->mutex is already locked. + */ + WARN_ON_ONCE(!mutex_is_locked(&tr->mutex)); + + /* Instead of updating the trampoline here, we propagate + * -EAGAIN to register_ftrace_direct_multi(). Then we can + * retry register_ftrace_direct_multi() after updating the + * trampoline. + */ + if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && + !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) { + if (WARN_ON_ONCE(tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY)) + return -EBUSY; + + tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY; + return -EAGAIN; + } + + return 0; + } + + /* The normal locking order is + * tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c) + * + * The following two commands are called from + * + * prepare_direct_functions_for_ipmodify + * cleanup_direct_functions_after_ipmodify + * + * In both cases, direct_mutex is already locked. Use + * mutex_trylock(&tr->mutex) to avoid deadlock in race condition + * (something else is making changes to this same trampoline). + */ + if (!mutex_trylock(&tr->mutex)) { + /* sleep 1 ms to make sure whatever holding tr->mutex makes + * some progress. + */ + msleep(1); + return -EAGAIN; + } + + switch (cmd) { + case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER: + tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY; + + if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && + !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) + ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */); + break; + case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER: + tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY; + + if (tr->flags & BPF_TRAMP_F_ORIG_STACK) + ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */); + break; + default: + ret = -EINVAL; + break; + }; + + mutex_unlock(&tr->mutex); + return ret; +} +#endif + bool bpf_prog_has_trampoline(const struct bpf_prog *prog) { enum bpf_attach_type eatype = prog->expected_attach_type; @@ -89,6 +165,16 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key) tr = kzalloc(sizeof(*tr), GFP_KERNEL); if (!tr) goto out; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL); + if (!tr->fops) { + kfree(tr); + tr = NULL; + goto out; + } + tr->fops->private = tr; + tr->fops->ops_func = bpf_tramp_ftrace_ops_func; +#endif tr->key = key; INIT_HLIST_NODE(&tr->hlist); @@ -128,7 +214,7 @@ static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) int ret; if (tr->func.ftrace_managed) - ret = unregister_ftrace_direct((long)ip, (long)old_addr); + ret = unregister_ftrace_direct_multi(tr->fops, (long)old_addr); else ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL); @@ -137,15 +223,20 @@ static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) return ret; } -static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr) +static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr, + bool lock_direct_mutex) { void *ip = tr->func.addr; int ret; - if (tr->func.ftrace_managed) - ret = modify_ftrace_direct((long)ip, (long)old_addr, (long)new_addr); - else + if (tr->func.ftrace_managed) { + if (lock_direct_mutex) + ret = modify_ftrace_direct_multi(tr->fops, (long)new_addr); + else + ret = modify_ftrace_direct_multi_nolock(tr->fops, (long)new_addr); + } else { ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr); + } return ret; } @@ -163,10 +254,12 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) if (bpf_trampoline_module_get(tr)) return -ENOENT; - if (tr->func.ftrace_managed) - ret = register_ftrace_direct((long)ip, (long)new_addr); - else + if (tr->func.ftrace_managed) { + ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 0); + ret = register_ftrace_direct_multi(tr->fops, (long)new_addr); + } else { ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr); + } if (ret) bpf_trampoline_module_put(tr); @@ -332,11 +425,11 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx) return ERR_PTR(err); } -static int bpf_trampoline_update(struct bpf_trampoline *tr) +static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex) { struct bpf_tramp_image *im; struct bpf_tramp_links *tlinks; - u32 flags = BPF_TRAMP_F_RESTORE_REGS; + u32 orig_flags = tr->flags; bool ip_arg = false; int err, total; @@ -358,18 +451,31 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) goto out; } + /* clear all bits except SHARE_IPMODIFY */ + tr->flags &= BPF_TRAMP_F_SHARE_IPMODIFY; + if (tlinks[BPF_TRAMP_FEXIT].nr_links || - tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) + tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) { /* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME * should not be set together. */ - flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; + tr->flags |= BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; + } else { + tr->flags |= BPF_TRAMP_F_RESTORE_REGS; + } if (ip_arg) - flags |= BPF_TRAMP_F_IP_ARG; + tr->flags |= BPF_TRAMP_F_IP_ARG; + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +again: + if ((tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) && + (tr->flags & BPF_TRAMP_F_CALL_ORIG)) + tr->flags |= BPF_TRAMP_F_ORIG_STACK; +#endif err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE, - &tr->func.model, flags, tlinks, + &tr->func.model, tr->flags, tlinks, tr->func.addr); if (err < 0) goto out; @@ -378,17 +484,34 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) WARN_ON(!tr->cur_image && tr->selector); if (tr->cur_image) /* progs already running at this address */ - err = modify_fentry(tr, tr->cur_image->image, im->image); + err = modify_fentry(tr, tr->cur_image->image, im->image, lock_direct_mutex); else /* first time registering */ err = register_fentry(tr, im->image); + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + if (err == -EAGAIN) { + /* -EAGAIN from bpf_tramp_ftrace_ops_func. Now + * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the + * trampoline again, and retry register. + */ + /* reset fops->func and fops->trampoline for re-register */ + tr->fops->func = NULL; + tr->fops->trampoline = 0; + goto again; + } +#endif if (err) goto out; + if (tr->cur_image) bpf_tramp_image_put(tr->cur_image); tr->cur_image = im; tr->selector++; out: + /* If any error happens, restore previous flags */ + if (err) + tr->flags = orig_flags; kfree(tlinks); return err; } @@ -454,7 +577,7 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_tr hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]); tr->progs_cnt[kind]++; - err = bpf_trampoline_update(tr); + err = bpf_trampoline_update(tr, true /* lock_direct_mutex */); if (err) { hlist_del_init(&link->tramp_hlist); tr->progs_cnt[kind]--; @@ -487,7 +610,7 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_ } hlist_del_init(&link->tramp_hlist); tr->progs_cnt[kind]--; - return bpf_trampoline_update(tr); + return bpf_trampoline_update(tr, true /* lock_direct_mutex */); } /* bpf_trampoline_unlink_prog() should never fail. */ @@ -715,6 +838,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr) * multiple rcu callbacks. */ hlist_del(&tr->hlist); + kfree(tr->fops); kfree(tr); out: mutex_unlock(&trampoline_mutex);
When tracing a function with IPMODIFY ftrace_ops (livepatch), the bpf trampoline must follow the instruction pointer saved on stack. This needs extra handling for bpf trampolines with BPF_TRAMP_F_CALL_ORIG flag. Implement bpf_tramp_ftrace_ops_func and use it for the ftrace_ops used by BPF trampoline. This enables tracing functions with livepatch. This also requires moving bpf trampoline to *_ftrace_direct_mult APIs. Link: https://lore.kernel.org/all/20220602193706.2607681-2-song@kernel.org/ Signed-off-by: Song Liu <song@kernel.org> --- include/linux/bpf.h | 8 ++ kernel/bpf/trampoline.c | 158 +++++++++++++++++++++++++++++++++++----- 2 files changed, 149 insertions(+), 17 deletions(-)