diff mbox series

[v5,bpf-next,2/4] ftrace: Allow IPMODIFY and DIRECT ops on the same function

Message ID 20220720002126.803253-3-song@kernel.org (mailing list archive)
State Accepted
Commit 53cd885bc5c3ea283cc9c00ca6446c778f00bfba
Delegated to: BPF
Headers show
Series ftrace: host klp and bpf trampoline together | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 6979 this patch: 6981
netdev/cc_maintainers warning 1 maintainers not CCed: mingo@redhat.com
netdev/build_clang success Errors and warnings before: 522 this patch: 522
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 5865 this patch: 5867
netdev/checkpatch warning WARNING: line length of 100 exceeds 80 columns WARNING: line length of 81 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 27 this patch: 27
netdev/source_inline success Was 1 now: 0
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Kernel LATEST on ubuntu-latest with llvm-15
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Kernel LATEST on z15 with gcc
bpf/vmtest-bpf-next-VM_Test-1 success Logs for Kernel LATEST on ubuntu-latest with gcc
bpf/vmtest-bpf-next-PR success PR summary

Commit Message

Song Liu July 20, 2022, 12:21 a.m. UTC
IPMODIFY (livepatch) and DIRECT (bpf trampoline) ops are both important
users of ftrace. It is necessary to allow them work on the same function
at the same time.

First, DIRECT ops no longer specify IPMODIFY flag. Instead, DIRECT flag is
handled together with IPMODIFY flag in __ftrace_hash_update_ipmodify().

Then, a callback function, ops_func, is added to ftrace_ops. This is used
by ftrace core code to understand whether the DIRECT ops can share with an
IPMODIFY ops. To share with IPMODIFY ops, the DIRECT ops need to implement
the callback function and adjust the direct trampoline accordingly.

If DIRECT ops is attached before the IPMODIFY ops, ftrace core code calls
ENABLE_SHARE_IPMODIFY_PEER on the DIRECT ops before registering the
IPMODIFY ops.

If IPMODIFY ops is attached before the DIRECT ops, ftrace core code calls
ENABLE_SHARE_IPMODIFY_SELF in __ftrace_hash_update_ipmodify. Owner of the
DIRECT ops may return 0 if the DIRECT trampoline can share with IPMODIFY,
so error code otherwise. The error code is propagated to
register_ftrace_direct_multi so that onwer of the DIRECT trampoline can
handle it properly.

For more details, please refer to comment before enum ftrace_ops_cmd.

Link: https://lore.kernel.org/all/20220602193706.2607681-2-song@kernel.org/
Link: https://lore.kernel.org/all/20220718055449.3960512-1-song@kernel.org/
Signed-off-by: Song Liu <song@kernel.org>
---
 include/linux/ftrace.h |  38 +++++++
 kernel/trace/ftrace.c  | 242 ++++++++++++++++++++++++++++++++++++-----
 2 files changed, 254 insertions(+), 26 deletions(-)

Comments

Naveen N. Rao Sept. 9, 2022, 11:58 a.m. UTC | #1
Song Liu wrote:
> IPMODIFY (livepatch) and DIRECT (bpf trampoline) ops are both important
> users of ftrace. It is necessary to allow them work on the same function
> at the same time.
> 
> First, DIRECT ops no longer specify IPMODIFY flag. Instead, DIRECT flag is
> handled together with IPMODIFY flag in __ftrace_hash_update_ipmodify().
> 
> Then, a callback function, ops_func, is added to ftrace_ops. This is used
> by ftrace core code to understand whether the DIRECT ops can share with an
> IPMODIFY ops. To share with IPMODIFY ops, the DIRECT ops need to implement
> the callback function and adjust the direct trampoline accordingly.
> 
> If DIRECT ops is attached before the IPMODIFY ops, ftrace core code calls
> ENABLE_SHARE_IPMODIFY_PEER on the DIRECT ops before registering the
> IPMODIFY ops.
> 
> If IPMODIFY ops is attached before the DIRECT ops, ftrace core code calls
> ENABLE_SHARE_IPMODIFY_SELF in __ftrace_hash_update_ipmodify. Owner of the
> DIRECT ops may return 0 if the DIRECT trampoline can share with IPMODIFY,
> so error code otherwise. The error code is propagated to
> register_ftrace_direct_multi so that onwer of the DIRECT trampoline can
> handle it properly.
> 
> For more details, please refer to comment before enum ftrace_ops_cmd.
> 
> Link: https://lore.kernel.org/all/20220602193706.2607681-2-song@kernel.org/
> Link: https://lore.kernel.org/all/20220718055449.3960512-1-song@kernel.org/
> Signed-off-by: Song Liu <song@kernel.org>
> ---
>  include/linux/ftrace.h |  38 +++++++
>  kernel/trace/ftrace.c  | 242 ++++++++++++++++++++++++++++++++++++-----
>  2 files changed, 254 insertions(+), 26 deletions(-)
> 
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index acb35243ce5d..0b61371e287b 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -208,6 +208,43 @@ enum {
>  	FTRACE_OPS_FL_DIRECT			= BIT(17),
>  };
>  
> +/*
> + * FTRACE_OPS_CMD_* commands allow the ftrace core logic to request changes
> + * to a ftrace_ops. Note, the requests may fail.
> + *
> + * ENABLE_SHARE_IPMODIFY_SELF - enable a DIRECT ops to work on the same
> + *                              function as an ops with IPMODIFY. Called
> + *                              when the DIRECT ops is being registered.
> + *                              This is called with both direct_mutex and
> + *                              ftrace_lock are locked.
> + *
> + * ENABLE_SHARE_IPMODIFY_PEER - enable a DIRECT ops to work on the same
> + *                              function as an ops with IPMODIFY. Called
> + *                              when the other ops (the one with IPMODIFY)
> + *                              is being registered.
> + *                              This is called with direct_mutex locked.
> + *
> + * DISABLE_SHARE_IPMODIFY_PEER - disable a DIRECT ops to work on the same
> + *                               function as an ops with IPMODIFY. Called
> + *                               when the other ops (the one with IPMODIFY)
> + *                               is being unregistered.
> + *                               This is called with direct_mutex locked.
> + */
> +enum ftrace_ops_cmd {
> +	FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF,
> +	FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER,
> +	FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER,
> +};
> +
> +/*
> + * For most ftrace_ops_cmd,
> + * Returns:
> + *        0 - Success.
> + *        Negative on failure. The return value is dependent on the
> + *        callback.
> + */
> +typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, enum ftrace_ops_cmd cmd);
> +
>  #ifdef CONFIG_DYNAMIC_FTRACE
>  /* The hash used to know what functions callbacks trace */
>  struct ftrace_ops_hash {
> @@ -250,6 +287,7 @@ struct ftrace_ops {
>  	unsigned long			trampoline;
>  	unsigned long			trampoline_size;
>  	struct list_head		list;
> +	ftrace_ops_func_t		ops_func;
>  #endif
>  };
>  
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 5d67dc12231d..bc921a3f7ea8 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -1861,6 +1861,8 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops,
>  	ftrace_hash_rec_update_modify(ops, filter_hash, 1);
>  }
>  
> +static bool ops_references_ip(struct ftrace_ops *ops, unsigned long ip);
> +
>  /*
>   * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK
>   * or no-needed to update, -EBUSY if it detects a conflict of the flag
> @@ -1869,6 +1871,13 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops,
>   *  - If the hash is NULL, it hits all recs (if IPMODIFY is set, this is rejected)
>   *  - If the hash is EMPTY_HASH, it hits nothing
>   *  - Anything else hits the recs which match the hash entries.
> + *
> + * DIRECT ops does not have IPMODIFY flag, but we still need to check it
> + * against functions with FTRACE_FL_IPMODIFY. If there is any overlap, call
> + * ops_func(SHARE_IPMODIFY_SELF) to make sure current ops can share with
> + * IPMODIFY. If ops_func(SHARE_IPMODIFY_SELF) returns non-zero, propagate
> + * the return value to the caller and eventually to the owner of the DIRECT
> + * ops.
>   */
>  static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
>  					 struct ftrace_hash *old_hash,
> @@ -1877,17 +1886,26 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
>  	struct ftrace_page *pg;
>  	struct dyn_ftrace *rec, *end = NULL;
>  	int in_old, in_new;
> +	bool is_ipmodify, is_direct;
>  
>  	/* Only update if the ops has been registered */
>  	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
>  		return 0;
>  
> -	if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
> +	is_ipmodify = ops->flags & FTRACE_OPS_FL_IPMODIFY;
> +	is_direct = ops->flags & FTRACE_OPS_FL_DIRECT;
> +
> +	/* neither IPMODIFY nor DIRECT, skip */
> +	if (!is_ipmodify && !is_direct)
> +		return 0;
> +
> +	if (WARN_ON_ONCE(is_ipmodify && is_direct))
>  		return 0;
>  
>  	/*
> -	 * Since the IPMODIFY is a very address sensitive action, we do not
> -	 * allow ftrace_ops to set all functions to new hash.
> +	 * Since the IPMODIFY and DIRECT are very address sensitive
> +	 * actions, we do not allow ftrace_ops to set all functions to new
> +	 * hash.
>  	 */
>  	if (!new_hash || !old_hash)
>  		return -EINVAL;
> @@ -1905,12 +1923,32 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
>  			continue;
>  
>  		if (in_new) {
> -			/* New entries must ensure no others are using it */
> -			if (rec->flags & FTRACE_FL_IPMODIFY)
> -				goto rollback;
> -			rec->flags |= FTRACE_FL_IPMODIFY;
> -		} else /* Removed entry */
> +			if (rec->flags & FTRACE_FL_IPMODIFY) {
> +				int ret;
> +
> +				/* Cannot have two ipmodify on same rec */
> +				if (is_ipmodify)
> +					goto rollback;
> +
> +				FTRACE_WARN_ON(rec->flags & FTRACE_FL_DIRECT);
> +
> +				/*
> +				 * Another ops with IPMODIFY is already
> +				 * attached. We are now attaching a direct
> +				 * ops. Run SHARE_IPMODIFY_SELF, to check
> +				 * whether sharing is supported.
> +				 */
> +				if (!ops->ops_func)
> +					return -EBUSY;
> +				ret = ops->ops_func(ops, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF);
> +				if (ret)
> +					return ret;
> +			} else if (is_ipmodify) {
> +				rec->flags |= FTRACE_FL_IPMODIFY;
> +			}
> +		} else if (is_ipmodify) {
>  			rec->flags &= ~FTRACE_FL_IPMODIFY;
> +		}
>  	} while_for_each_ftrace_rec();
>  
>  	return 0;
> @@ -2454,8 +2492,7 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip,
>  
>  struct ftrace_ops direct_ops = {
>  	.func		= call_direct_funcs,
> -	.flags		= FTRACE_OPS_FL_IPMODIFY
> -			  | FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS
> +	.flags		= FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS
>  			  | FTRACE_OPS_FL_PERMANENT,
>  	/*
>  	 * By declaring the main trampoline as this trampoline
> @@ -3072,14 +3109,14 @@ static inline int ops_traces_mod(struct ftrace_ops *ops)
>  }
>  
>  /*
> - * Check if the current ops references the record.
> + * Check if the current ops references the given ip.
>   *
>   * If the ops traces all functions, then it was already accounted for.
>   * If the ops does not trace the current record function, skip it.
>   * If the ops ignores the function via notrace filter, skip it.
>   */
> -static inline bool
> -ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
> +static bool
> +ops_references_ip(struct ftrace_ops *ops, unsigned long ip)
>  {
>  	/* If ops isn't enabled, ignore it */
>  	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
> @@ -3091,16 +3128,29 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
>  
>  	/* The function must be in the filter */
>  	if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
> -	    !__ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip))
> +	    !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip))
>  		return false;
>  
>  	/* If in notrace hash, we ignore it too */
> -	if (ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip))
> +	if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip))
>  		return false;
>  
>  	return true;
>  }
>  
> +/*
> + * Check if the current ops references the record.
> + *
> + * If the ops traces all functions, then it was already accounted for.
> + * If the ops does not trace the current record function, skip it.
> + * If the ops ignores the function via notrace filter, skip it.
> + */
> +static bool
> +ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
> +{
> +	return ops_references_ip(ops, rec->ip);
> +}
> +
>  static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
>  {
>  	bool init_nop = ftrace_need_init_nop();
> @@ -5215,6 +5265,8 @@ static struct ftrace_direct_func *ftrace_alloc_direct_func(unsigned long addr)
>  	return direct;
>  }
>  
> +static int register_ftrace_function_nolock(struct ftrace_ops *ops);
> +
>  /**
>   * register_ftrace_direct - Call a custom trampoline directly
>   * @ip: The address of the nop at the beginning of a function
> @@ -5286,7 +5338,7 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr)
>  	ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0);
>  
>  	if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) {
> -		ret = register_ftrace_function(&direct_ops);
> +		ret = register_ftrace_function_nolock(&direct_ops);
>  		if (ret)
>  			ftrace_set_filter_ip(&direct_ops, ip, 1, 0);
>  	}
> @@ -5545,8 +5597,7 @@ int modify_ftrace_direct(unsigned long ip,
>  }
>  EXPORT_SYMBOL_GPL(modify_ftrace_direct);
>  
> -#define MULTI_FLAGS (FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_DIRECT | \
> -		     FTRACE_OPS_FL_SAVE_REGS)
> +#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS)
>  
>  static int check_direct_multi(struct ftrace_ops *ops)
>  {
> @@ -5639,7 +5690,7 @@ int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
>  	ops->flags = MULTI_FLAGS;
>  	ops->trampoline = FTRACE_REGS_ADDR;
>  
> -	err = register_ftrace_function(ops);
> +	err = register_ftrace_function_nolock(ops);
>  
>   out_remove:
>  	if (err)
> @@ -5709,7 +5760,7 @@ __modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
>  	ftrace_ops_init(&tmp_ops);
>  	tmp_ops.func_hash = ops->func_hash;
>  
> -	err = register_ftrace_function(&tmp_ops);
> +	err = register_ftrace_function_nolock(&tmp_ops);
>  	if (err)
>  		return err;
>  
> @@ -8003,6 +8054,143 @@ int ftrace_is_dead(void)
>  	return ftrace_disabled;
>  }
>  
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
> +/*
> + * When registering ftrace_ops with IPMODIFY, it is necessary to make sure
> + * it doesn't conflict with any direct ftrace_ops. If there is existing
> + * direct ftrace_ops on a kernel function being patched, call
> + * FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER on it to enable sharing.
> + *
> + * @ops:     ftrace_ops being registered.
> + *
> + * Returns:
> + *         0 on success;
> + *         Negative on failure.
> + */
> +static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops)
> +{
> +	struct ftrace_func_entry *entry;
> +	struct ftrace_hash *hash;
> +	struct ftrace_ops *op;
> +	int size, i, ret;
> +
> +	lockdep_assert_held_once(&direct_mutex);
> +
> +	if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
> +		return 0;
> +
> +	hash = ops->func_hash->filter_hash;
> +	size = 1 << hash->size_bits;
> +	for (i = 0; i < size; i++) {
> +		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
> +			unsigned long ip = entry->ip;
> +			bool found_op = false;
> +
> +			mutex_lock(&ftrace_lock);
> +			do_for_each_ftrace_op(op, ftrace_ops_list) {
> +				if (!(op->flags & FTRACE_OPS_FL_DIRECT))
> +					continue;
> +				if (ops_references_ip(op, ip)) {
> +					found_op = true;
> +					break;
> +				}
> +			} while_for_each_ftrace_op(op);
> +			mutex_unlock(&ftrace_lock);
> +
> +			if (found_op) {
> +				if (!op->ops_func)
> +					return -EBUSY;
> +
> +				ret = op->ops_func(op, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER);
> +				if (ret)
> +					return ret;
> +			}
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * Similar to prepare_direct_functions_for_ipmodify, clean up after ops
> + * with IPMODIFY is unregistered. The cleanup is optional for most DIRECT
> + * ops.
> + */
> +static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops)
> +{
> +	struct ftrace_func_entry *entry;
> +	struct ftrace_hash *hash;
> +	struct ftrace_ops *op;
> +	int size, i;
> +
> +	if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
> +		return;
> +
> +	mutex_lock(&direct_mutex);
> +
> +	hash = ops->func_hash->filter_hash;
> +	size = 1 << hash->size_bits;
> +	for (i = 0; i < size; i++) {
> +		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
> +			unsigned long ip = entry->ip;
> +			bool found_op = false;
> +
> +			mutex_lock(&ftrace_lock);
> +			do_for_each_ftrace_op(op, ftrace_ops_list) {
> +				if (!(op->flags & FTRACE_OPS_FL_DIRECT))
> +					continue;
> +				if (ops_references_ip(op, ip)) {
> +					found_op = true;
> +					break;
> +				}
> +			} while_for_each_ftrace_op(op);
> +			mutex_unlock(&ftrace_lock);
> +
> +			/* The cleanup is optional, ignore any errors */
> +			if (found_op && op->ops_func)
> +				op->ops_func(op, FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER);
> +		}
> +	}
> +	mutex_unlock(&direct_mutex);
> +}
> +
> +#define lock_direct_mutex()	mutex_lock(&direct_mutex)
> +#define unlock_direct_mutex()	mutex_unlock(&direct_mutex)
> +
> +#else  /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
> +
> +static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops)
> +{
> +	return 0;
> +}
> +
> +static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops)
> +{
> +}
> +
> +#define lock_direct_mutex()	do { } while (0)
> +#define unlock_direct_mutex()	do { } while (0)
> +
> +#endif  /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
> +
> +/*
> + * Similar to register_ftrace_function, except we don't lock direct_mutex.
> + */
> +static int register_ftrace_function_nolock(struct ftrace_ops *ops)
> +{
> +	int ret;
> +
> +	ftrace_ops_init(ops);
> +
> +	mutex_lock(&ftrace_lock);
> +
> +	ret = ftrace_startup(ops, 0);
> +
> +	mutex_unlock(&ftrace_lock);
> +
> +	return ret;
> +}
> +
>  /**
>   * register_ftrace_function - register a function for profiling
>   * @ops:	ops structure that holds the function for profiling.
> @@ -8018,14 +8206,15 @@ int register_ftrace_function(struct ftrace_ops *ops)
>  {
>  	int ret;
>  
> -	ftrace_ops_init(ops);
> -
> -	mutex_lock(&ftrace_lock);
> -
> -	ret = ftrace_startup(ops, 0);
> +	lock_direct_mutex();

Trying to enable ftrace direct on powerpc, this is resulting in a hung 
task when testing samples/ftrace/ftrace-direct-modify.c

Essentially, the sample calls modify_ftrace_direct(), which grabs 
direct_mutex before calling 
ftrace_modify_direct_caller()->register_ftrace_function().


- Naveen


> +	ret = prepare_direct_functions_for_ipmodify(ops);
> +	if (ret < 0)
> +		goto out_unlock;
>  
> -	mutex_unlock(&ftrace_lock);
> +	ret = register_ftrace_function_nolock(ops);
>  
> +out_unlock:
> +	unlock_direct_mutex();
>  	return ret;
>  }
>  EXPORT_SYMBOL_GPL(register_ftrace_function);
> @@ -8044,6 +8233,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
>  	ret = ftrace_shutdown(ops, 0);
>  	mutex_unlock(&ftrace_lock);
>  
> +	cleanup_direct_functions_after_ipmodify(ops);
>  	return ret;
>  }
>  EXPORT_SYMBOL_GPL(unregister_ftrace_function);
> -- 
> 2.30.2
> 
> 
>
Song Liu Sept. 9, 2022, 2:05 p.m. UTC | #2
> On Sep 9, 2022, at 4:58 AM, Naveen N. Rao <naveen.n.rao@linux.ibm.com> wrote:
> 
> Song Liu wrote:

[...]

>> +
>> /**
>>  * register_ftrace_function - register a function for profiling
>>  * @ops:	ops structure that holds the function for profiling.
>> @@ -8018,14 +8206,15 @@ int register_ftrace_function(struct ftrace_ops *ops)
>> {
>> 	int ret;
>> -	ftrace_ops_init(ops);
>> -
>> -	mutex_lock(&ftrace_lock);
>> -
>> -	ret = ftrace_startup(ops, 0);
>> +	lock_direct_mutex();
> 
> Trying to enable ftrace direct on powerpc, this is resulting in a hung task when testing samples/ftrace/ftrace-direct-modify.c
> 
> Essentially, the sample calls modify_ftrace_direct(), which grabs direct_mutex before calling ftrace_modify_direct_caller()->register_ftrace_function().
> 

Thanks for the report. Would the following change fix the issue?

Song

diff --git i/kernel/trace/ftrace.c w/kernel/trace/ftrace.c
index bc921a3f7ea8..2f1e6cfa834e 100644
--- i/kernel/trace/ftrace.c
+++ w/kernel/trace/ftrace.c
@@ -5496,7 +5496,7 @@ int __weak ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
        if (ret)
                goto out_lock;

-       ret = register_ftrace_function(&stub_ops);
+       ret = register_ftrace_function_nolock(&stub_ops);
        if (ret) {
                ftrace_set_filter_ip(&stub_ops, ip, 1, 0);
                goto out_lock;

> 
> - Naveen
> 
> 
>> +	ret = prepare_direct_functions_for_ipmodify(ops);
>> +	if (ret < 0)
>> +		goto out_unlock;
>> -	mutex_unlock(&ftrace_lock);
>> +	ret = register_ftrace_function_nolock(ops);
>> +out_unlock:
>> +	unlock_direct_mutex();
>> 	return ret;
>> }
>> EXPORT_SYMBOL_GPL(register_ftrace_function);
>> @@ -8044,6 +8233,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
>> 	ret = ftrace_shutdown(ops, 0);
>> 	mutex_unlock(&ftrace_lock);
>> +	cleanup_direct_functions_after_ipmodify(ops);
>> 	return ret;
>> }
>> EXPORT_SYMBOL_GPL(unregister_ftrace_function);
>> -- 
>> 2.30.2
Naveen N. Rao Sept. 9, 2022, 6:13 p.m. UTC | #3
Hi Song,

Song Liu wrote:
> 
> 
>> On Sep 9, 2022, at 4:58 AM, Naveen N. Rao <naveen.n.rao@linux.ibm.com> wrote:
>> 
>> Song Liu wrote:
> 
> [...]
> 
>>> +
>>> /**
>>>  * register_ftrace_function - register a function for profiling
>>>  * @ops:	ops structure that holds the function for profiling.
>>> @@ -8018,14 +8206,15 @@ int register_ftrace_function(struct ftrace_ops *ops)
>>> {
>>> 	int ret;
>>> -	ftrace_ops_init(ops);
>>> -
>>> -	mutex_lock(&ftrace_lock);
>>> -
>>> -	ret = ftrace_startup(ops, 0);
>>> +	lock_direct_mutex();
>> 
>> Trying to enable ftrace direct on powerpc, this is resulting in a hung task when testing samples/ftrace/ftrace-direct-modify.c
>> 
>> Essentially, the sample calls modify_ftrace_direct(), which grabs direct_mutex before calling ftrace_modify_direct_caller()->register_ftrace_function().
>> 
> 
> Thanks for the report. Would the following change fix the issue?
> 
> Song
> 
> diff --git i/kernel/trace/ftrace.c w/kernel/trace/ftrace.c
> index bc921a3f7ea8..2f1e6cfa834e 100644
> --- i/kernel/trace/ftrace.c
> +++ w/kernel/trace/ftrace.c
> @@ -5496,7 +5496,7 @@ int __weak ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
>         if (ret)
>                 goto out_lock;
> 
> -       ret = register_ftrace_function(&stub_ops);
> +       ret = register_ftrace_function_nolock(&stub_ops);
>         if (ret) {
>                 ftrace_set_filter_ip(&stub_ops, ip, 1, 0);
>                 goto out_lock;
> 

That fixes it for me.
Reported-and-Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>


Thanks!
- Naveen
diff mbox series

Patch

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index acb35243ce5d..0b61371e287b 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -208,6 +208,43 @@  enum {
 	FTRACE_OPS_FL_DIRECT			= BIT(17),
 };
 
+/*
+ * FTRACE_OPS_CMD_* commands allow the ftrace core logic to request changes
+ * to a ftrace_ops. Note, the requests may fail.
+ *
+ * ENABLE_SHARE_IPMODIFY_SELF - enable a DIRECT ops to work on the same
+ *                              function as an ops with IPMODIFY. Called
+ *                              when the DIRECT ops is being registered.
+ *                              This is called with both direct_mutex and
+ *                              ftrace_lock are locked.
+ *
+ * ENABLE_SHARE_IPMODIFY_PEER - enable a DIRECT ops to work on the same
+ *                              function as an ops with IPMODIFY. Called
+ *                              when the other ops (the one with IPMODIFY)
+ *                              is being registered.
+ *                              This is called with direct_mutex locked.
+ *
+ * DISABLE_SHARE_IPMODIFY_PEER - disable a DIRECT ops to work on the same
+ *                               function as an ops with IPMODIFY. Called
+ *                               when the other ops (the one with IPMODIFY)
+ *                               is being unregistered.
+ *                               This is called with direct_mutex locked.
+ */
+enum ftrace_ops_cmd {
+	FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF,
+	FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER,
+	FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER,
+};
+
+/*
+ * For most ftrace_ops_cmd,
+ * Returns:
+ *        0 - Success.
+ *        Negative on failure. The return value is dependent on the
+ *        callback.
+ */
+typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, enum ftrace_ops_cmd cmd);
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 /* The hash used to know what functions callbacks trace */
 struct ftrace_ops_hash {
@@ -250,6 +287,7 @@  struct ftrace_ops {
 	unsigned long			trampoline;
 	unsigned long			trampoline_size;
 	struct list_head		list;
+	ftrace_ops_func_t		ops_func;
 #endif
 };
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5d67dc12231d..bc921a3f7ea8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1861,6 +1861,8 @@  static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops,
 	ftrace_hash_rec_update_modify(ops, filter_hash, 1);
 }
 
+static bool ops_references_ip(struct ftrace_ops *ops, unsigned long ip);
+
 /*
  * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK
  * or no-needed to update, -EBUSY if it detects a conflict of the flag
@@ -1869,6 +1871,13 @@  static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops,
  *  - If the hash is NULL, it hits all recs (if IPMODIFY is set, this is rejected)
  *  - If the hash is EMPTY_HASH, it hits nothing
  *  - Anything else hits the recs which match the hash entries.
+ *
+ * DIRECT ops does not have IPMODIFY flag, but we still need to check it
+ * against functions with FTRACE_FL_IPMODIFY. If there is any overlap, call
+ * ops_func(SHARE_IPMODIFY_SELF) to make sure current ops can share with
+ * IPMODIFY. If ops_func(SHARE_IPMODIFY_SELF) returns non-zero, propagate
+ * the return value to the caller and eventually to the owner of the DIRECT
+ * ops.
  */
 static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
 					 struct ftrace_hash *old_hash,
@@ -1877,17 +1886,26 @@  static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
 	struct ftrace_page *pg;
 	struct dyn_ftrace *rec, *end = NULL;
 	int in_old, in_new;
+	bool is_ipmodify, is_direct;
 
 	/* Only update if the ops has been registered */
 	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
 		return 0;
 
-	if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
+	is_ipmodify = ops->flags & FTRACE_OPS_FL_IPMODIFY;
+	is_direct = ops->flags & FTRACE_OPS_FL_DIRECT;
+
+	/* neither IPMODIFY nor DIRECT, skip */
+	if (!is_ipmodify && !is_direct)
+		return 0;
+
+	if (WARN_ON_ONCE(is_ipmodify && is_direct))
 		return 0;
 
 	/*
-	 * Since the IPMODIFY is a very address sensitive action, we do not
-	 * allow ftrace_ops to set all functions to new hash.
+	 * Since the IPMODIFY and DIRECT are very address sensitive
+	 * actions, we do not allow ftrace_ops to set all functions to new
+	 * hash.
 	 */
 	if (!new_hash || !old_hash)
 		return -EINVAL;
@@ -1905,12 +1923,32 @@  static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
 			continue;
 
 		if (in_new) {
-			/* New entries must ensure no others are using it */
-			if (rec->flags & FTRACE_FL_IPMODIFY)
-				goto rollback;
-			rec->flags |= FTRACE_FL_IPMODIFY;
-		} else /* Removed entry */
+			if (rec->flags & FTRACE_FL_IPMODIFY) {
+				int ret;
+
+				/* Cannot have two ipmodify on same rec */
+				if (is_ipmodify)
+					goto rollback;
+
+				FTRACE_WARN_ON(rec->flags & FTRACE_FL_DIRECT);
+
+				/*
+				 * Another ops with IPMODIFY is already
+				 * attached. We are now attaching a direct
+				 * ops. Run SHARE_IPMODIFY_SELF, to check
+				 * whether sharing is supported.
+				 */
+				if (!ops->ops_func)
+					return -EBUSY;
+				ret = ops->ops_func(ops, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF);
+				if (ret)
+					return ret;
+			} else if (is_ipmodify) {
+				rec->flags |= FTRACE_FL_IPMODIFY;
+			}
+		} else if (is_ipmodify) {
 			rec->flags &= ~FTRACE_FL_IPMODIFY;
+		}
 	} while_for_each_ftrace_rec();
 
 	return 0;
@@ -2454,8 +2492,7 @@  static void call_direct_funcs(unsigned long ip, unsigned long pip,
 
 struct ftrace_ops direct_ops = {
 	.func		= call_direct_funcs,
-	.flags		= FTRACE_OPS_FL_IPMODIFY
-			  | FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS
+	.flags		= FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS
 			  | FTRACE_OPS_FL_PERMANENT,
 	/*
 	 * By declaring the main trampoline as this trampoline
@@ -3072,14 +3109,14 @@  static inline int ops_traces_mod(struct ftrace_ops *ops)
 }
 
 /*
- * Check if the current ops references the record.
+ * Check if the current ops references the given ip.
  *
  * If the ops traces all functions, then it was already accounted for.
  * If the ops does not trace the current record function, skip it.
  * If the ops ignores the function via notrace filter, skip it.
  */
-static inline bool
-ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
+static bool
+ops_references_ip(struct ftrace_ops *ops, unsigned long ip)
 {
 	/* If ops isn't enabled, ignore it */
 	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
@@ -3091,16 +3128,29 @@  ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
 
 	/* The function must be in the filter */
 	if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
-	    !__ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip))
+	    !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip))
 		return false;
 
 	/* If in notrace hash, we ignore it too */
-	if (ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip))
+	if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip))
 		return false;
 
 	return true;
 }
 
+/*
+ * Check if the current ops references the record.
+ *
+ * If the ops traces all functions, then it was already accounted for.
+ * If the ops does not trace the current record function, skip it.
+ * If the ops ignores the function via notrace filter, skip it.
+ */
+static bool
+ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
+{
+	return ops_references_ip(ops, rec->ip);
+}
+
 static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
 {
 	bool init_nop = ftrace_need_init_nop();
@@ -5215,6 +5265,8 @@  static struct ftrace_direct_func *ftrace_alloc_direct_func(unsigned long addr)
 	return direct;
 }
 
+static int register_ftrace_function_nolock(struct ftrace_ops *ops);
+
 /**
  * register_ftrace_direct - Call a custom trampoline directly
  * @ip: The address of the nop at the beginning of a function
@@ -5286,7 +5338,7 @@  int register_ftrace_direct(unsigned long ip, unsigned long addr)
 	ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0);
 
 	if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) {
-		ret = register_ftrace_function(&direct_ops);
+		ret = register_ftrace_function_nolock(&direct_ops);
 		if (ret)
 			ftrace_set_filter_ip(&direct_ops, ip, 1, 0);
 	}
@@ -5545,8 +5597,7 @@  int modify_ftrace_direct(unsigned long ip,
 }
 EXPORT_SYMBOL_GPL(modify_ftrace_direct);
 
-#define MULTI_FLAGS (FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_DIRECT | \
-		     FTRACE_OPS_FL_SAVE_REGS)
+#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS)
 
 static int check_direct_multi(struct ftrace_ops *ops)
 {
@@ -5639,7 +5690,7 @@  int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
 	ops->flags = MULTI_FLAGS;
 	ops->trampoline = FTRACE_REGS_ADDR;
 
-	err = register_ftrace_function(ops);
+	err = register_ftrace_function_nolock(ops);
 
  out_remove:
 	if (err)
@@ -5709,7 +5760,7 @@  __modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
 	ftrace_ops_init(&tmp_ops);
 	tmp_ops.func_hash = ops->func_hash;
 
-	err = register_ftrace_function(&tmp_ops);
+	err = register_ftrace_function_nolock(&tmp_ops);
 	if (err)
 		return err;
 
@@ -8003,6 +8054,143 @@  int ftrace_is_dead(void)
 	return ftrace_disabled;
 }
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+/*
+ * When registering ftrace_ops with IPMODIFY, it is necessary to make sure
+ * it doesn't conflict with any direct ftrace_ops. If there is existing
+ * direct ftrace_ops on a kernel function being patched, call
+ * FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER on it to enable sharing.
+ *
+ * @ops:     ftrace_ops being registered.
+ *
+ * Returns:
+ *         0 on success;
+ *         Negative on failure.
+ */
+static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops)
+{
+	struct ftrace_func_entry *entry;
+	struct ftrace_hash *hash;
+	struct ftrace_ops *op;
+	int size, i, ret;
+
+	lockdep_assert_held_once(&direct_mutex);
+
+	if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
+		return 0;
+
+	hash = ops->func_hash->filter_hash;
+	size = 1 << hash->size_bits;
+	for (i = 0; i < size; i++) {
+		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
+			unsigned long ip = entry->ip;
+			bool found_op = false;
+
+			mutex_lock(&ftrace_lock);
+			do_for_each_ftrace_op(op, ftrace_ops_list) {
+				if (!(op->flags & FTRACE_OPS_FL_DIRECT))
+					continue;
+				if (ops_references_ip(op, ip)) {
+					found_op = true;
+					break;
+				}
+			} while_for_each_ftrace_op(op);
+			mutex_unlock(&ftrace_lock);
+
+			if (found_op) {
+				if (!op->ops_func)
+					return -EBUSY;
+
+				ret = op->ops_func(op, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER);
+				if (ret)
+					return ret;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Similar to prepare_direct_functions_for_ipmodify, clean up after ops
+ * with IPMODIFY is unregistered. The cleanup is optional for most DIRECT
+ * ops.
+ */
+static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops)
+{
+	struct ftrace_func_entry *entry;
+	struct ftrace_hash *hash;
+	struct ftrace_ops *op;
+	int size, i;
+
+	if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
+		return;
+
+	mutex_lock(&direct_mutex);
+
+	hash = ops->func_hash->filter_hash;
+	size = 1 << hash->size_bits;
+	for (i = 0; i < size; i++) {
+		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
+			unsigned long ip = entry->ip;
+			bool found_op = false;
+
+			mutex_lock(&ftrace_lock);
+			do_for_each_ftrace_op(op, ftrace_ops_list) {
+				if (!(op->flags & FTRACE_OPS_FL_DIRECT))
+					continue;
+				if (ops_references_ip(op, ip)) {
+					found_op = true;
+					break;
+				}
+			} while_for_each_ftrace_op(op);
+			mutex_unlock(&ftrace_lock);
+
+			/* The cleanup is optional, ignore any errors */
+			if (found_op && op->ops_func)
+				op->ops_func(op, FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER);
+		}
+	}
+	mutex_unlock(&direct_mutex);
+}
+
+#define lock_direct_mutex()	mutex_lock(&direct_mutex)
+#define unlock_direct_mutex()	mutex_unlock(&direct_mutex)
+
+#else  /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
+static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops)
+{
+	return 0;
+}
+
+static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops)
+{
+}
+
+#define lock_direct_mutex()	do { } while (0)
+#define unlock_direct_mutex()	do { } while (0)
+
+#endif  /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
+/*
+ * Similar to register_ftrace_function, except we don't lock direct_mutex.
+ */
+static int register_ftrace_function_nolock(struct ftrace_ops *ops)
+{
+	int ret;
+
+	ftrace_ops_init(ops);
+
+	mutex_lock(&ftrace_lock);
+
+	ret = ftrace_startup(ops, 0);
+
+	mutex_unlock(&ftrace_lock);
+
+	return ret;
+}
+
 /**
  * register_ftrace_function - register a function for profiling
  * @ops:	ops structure that holds the function for profiling.
@@ -8018,14 +8206,15 @@  int register_ftrace_function(struct ftrace_ops *ops)
 {
 	int ret;
 
-	ftrace_ops_init(ops);
-
-	mutex_lock(&ftrace_lock);
-
-	ret = ftrace_startup(ops, 0);
+	lock_direct_mutex();
+	ret = prepare_direct_functions_for_ipmodify(ops);
+	if (ret < 0)
+		goto out_unlock;
 
-	mutex_unlock(&ftrace_lock);
+	ret = register_ftrace_function_nolock(ops);
 
+out_unlock:
+	unlock_direct_mutex();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(register_ftrace_function);
@@ -8044,6 +8233,7 @@  int unregister_ftrace_function(struct ftrace_ops *ops)
 	ret = ftrace_shutdown(ops, 0);
 	mutex_unlock(&ftrace_lock);
 
+	cleanup_direct_functions_after_ipmodify(ops);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(unregister_ftrace_function);