diff mbox series

[v4,bpf-next,2/4] ftrace: allow IPMODIFY and DIRECT ops on the same function

Message ID 20220718055449.3960512-3-song@kernel.org (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series ftrace: host klp and bpf trampoline together | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Kernel LATEST on z15 with gcc
bpf/vmtest-bpf-next-VM_Test-1 success Logs for Kernel LATEST on ubuntu-latest with gcc
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Kernel LATEST on ubuntu-latest with llvm-15
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 6979 this patch: 6983
netdev/cc_maintainers warning 1 maintainers not CCed: mingo@redhat.com
netdev/build_clang success Errors and warnings before: 522 this patch: 522
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 5865 this patch: 5869
netdev/checkpatch warning WARNING: line length of 100 exceeds 80 columns WARNING: line length of 81 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 27 this patch: 27
netdev/source_inline success Was 1 now: 0

Commit Message

Song Liu July 18, 2022, 5:54 a.m. UTC
IPMODIFY (livepatch) and DIRECT (bpf trampoline) ops are both important
users of ftrace. It is necessary to allow them work on the same function
at the same time.

First, DIRECT ops no longer specify IPMODIFY flag. Instead, DIRECT flag is
handled together with IPMODIFY flag in __ftrace_hash_update_ipmodify().

The, a callback function, ops_func, is added to ftrace_ops. This is used
by ftrace core code to understand whether the DIRECT ops can share with an
IPMODIFY ops. To share with IPMODIFY ops, the DIRECT ops need to implement
the callback function and adjust the direct trampoline accordingly.

If DIRECT ops is attached before the IPMODIFY ops, ftrace core code calls
ENABLE_SHARE_IPMODIFY_PEER on the DIRECT ops before registering the
IPMODIFY ops.

If IPMODIFY ops is attached before the DIRECT ops, ftrace core code calls
ENABLE_SHARE_IPMODIFY_SELF in __ftrace_hash_update_ipmodify. Owner of the
DIRECT ops may return 0 if the DIRECT trampoline can share with IPMODIFY,
so error code otherwise. The error code is propagated to
register_ftrace_direct_multi so that onwer of the DIRECT trampoline can
handle it properly.

For more details, please refer to comment before enum ftrace_ops_cmd.

Link: https://lore.kernel.org/all/20220602193706.2607681-2-song@kernel.org/
Signed-off-by: Song Liu <song@kernel.org>
---
 include/linux/ftrace.h |  38 +++++++
 kernel/trace/ftrace.c  | 221 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 243 insertions(+), 16 deletions(-)

Comments

kernel test robot July 18, 2022, 7:42 a.m. UTC | #1
Hi Song,

I love your patch! Perhaps something to improve:

[auto build test WARNING on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Song-Liu/ftrace-host-klp-and-bpf-trampoline-together/20220718-135652
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: x86_64-randconfig-a004 (https://download.01.org/0day-ci/archive/20220718/202207181552.VuKfz9zg-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-3) 11.3.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/9ef1ec8cb818d8ca70887c8c123f2d579384a6c6
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Song-Liu/ftrace-host-klp-and-bpf-trampoline-together/20220718-135652
        git checkout 9ef1ec8cb818d8ca70887c8c123f2d579384a6c6
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash kernel/trace/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   kernel/trace/ftrace.c: In function 'register_ftrace_function':
>> kernel/trace/ftrace.c:8197:14: warning: variable 'direct_mutex_locked' set but not used [-Wunused-but-set-variable]
    8197 |         bool direct_mutex_locked = false;
         |              ^~~~~~~~~~~~~~~~~~~


vim +/direct_mutex_locked +8197 kernel/trace/ftrace.c

  8182	
  8183	/**
  8184	 * register_ftrace_function - register a function for profiling
  8185	 * @ops:	ops structure that holds the function for profiling.
  8186	 *
  8187	 * Register a function to be called by all functions in the
  8188	 * kernel.
  8189	 *
  8190	 * Note: @ops->func and all the functions it calls must be labeled
  8191	 *       with "notrace", otherwise it will go into a
  8192	 *       recursive loop.
  8193	 */
  8194	int register_ftrace_function(struct ftrace_ops *ops)
  8195		__releases(&direct_mutex)
  8196	{
> 8197		bool direct_mutex_locked = false;
  8198		int ret;
  8199	
  8200		ftrace_ops_init(ops);
  8201	
  8202		ret = prepare_direct_functions_for_ipmodify(ops);
  8203		if (ret < 0)
  8204			return ret;
  8205		else if (ret == 1)
  8206			direct_mutex_locked = true;
  8207	
  8208		mutex_lock(&ftrace_lock);
  8209	
  8210		ret = ftrace_startup(ops, 0);
  8211	
  8212		mutex_unlock(&ftrace_lock);
  8213
Petr Mladek July 18, 2022, 1:19 p.m. UTC | #2
On Mon 2022-07-18 15:42:25, kernel test robot wrote:
> Hi Song,
> 
> I love your patch! Perhaps something to improve:
> 
> [auto build test WARNING on bpf-next/master]
> 
> url:    https://github.com/intel-lab-lkp/linux/commits/Song-Liu/ftrace-host-klp-and-bpf-trampoline-together/20220718-135652
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
> config: x86_64-randconfig-a004 (https://download.01.org/0day-ci/archive/20220718/202207181552.VuKfz9zg-lkp@intel.com/config)
> compiler: gcc-11 (Debian 11.3.0-3) 11.3.0
> reproduce (this is a W=1 build):
>         # https://github.com/intel-lab-lkp/linux/commit/9ef1ec8cb818d8ca70887c8c123f2d579384a6c6
>         git remote add linux-review https://github.com/intel-lab-lkp/linux
>         git fetch --no-tags linux-review Song-Liu/ftrace-host-klp-and-bpf-trampoline-together/20220718-135652
>         git checkout 9ef1ec8cb818d8ca70887c8c123f2d579384a6c6
>         # save the config file
>         mkdir build_dir && cp config build_dir/.config
>         make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash kernel/trace/
> 
> If you fix the issue, kindly add following tag where applicable
> Reported-by: kernel test robot <lkp@intel.com>
> 
> All warnings (new ones prefixed by >>):
> 
>    kernel/trace/ftrace.c: In function 'register_ftrace_function':
> >> kernel/trace/ftrace.c:8197:14: warning: variable 'direct_mutex_locked' set but not used [-Wunused-but-set-variable]
>     8197 |         bool direct_mutex_locked = false;
>          |              ^~~~~~~~~~~~~~~~~~~
> 
> 
> vim +/direct_mutex_locked +8197 kernel/trace/ftrace.c
> 
>   8182	
>   8183	/**
>   8184	 * register_ftrace_function - register a function for profiling
>   8185	 * @ops:	ops structure that holds the function for profiling.
>   8186	 *
>   8187	 * Register a function to be called by all functions in the
>   8188	 * kernel.
>   8189	 *
>   8190	 * Note: @ops->func and all the functions it calls must be labeled
>   8191	 *       with "notrace", otherwise it will go into a
>   8192	 *       recursive loop.
>   8193	 */
>   8194	int register_ftrace_function(struct ftrace_ops *ops)
>   8195		__releases(&direct_mutex)
>   8196	{
> > 8197		bool direct_mutex_locked = false;
>   8198		int ret;
>   8199	
>   8200		ftrace_ops_init(ops);
>   8201	
>   8202		ret = prepare_direct_functions_for_ipmodify(ops);
>   8203		if (ret < 0)
>   8204			return ret;
>   8205		else if (ret == 1)
>   8206			direct_mutex_locked = true;

Honestly, this is another horrible trick. Would it be possible to
call prepare_direct_functions_for_ipmodify() with direct_mutex
already taken?

I mean something like:

	mutex_lock(&direct_mutex);

	ret = prepare_direct_functions_for_ipmodify(ops);
	if (ret)
		goto out:

	mutex_lock(&ftrace_lock);
	ret = ftrace_startup(ops, 0);
	mutex_unlock(&ftrace_lock);

out:
	mutex_unlock(&direct_mutex);
	return ret;


>   8208		mutex_lock(&ftrace_lock);
>   8209	
>   8210		ret = ftrace_startup(ops, 0);
>   8211	
>   8212		mutex_unlock(&ftrace_lock);
>   8213	

Would be possible to handle tr->mutex the same way to avoid
the trylock? I mean to take it in advance before direct_mutex?

Best Regards,
Petr
Song Liu July 18, 2022, 4:59 p.m. UTC | #3
> On Jul 18, 2022, at 6:19 AM, Petr Mladek <pmladek@suse.com> wrote:
> 
> On Mon 2022-07-18 15:42:25, kernel test robot wrote:
>> Hi Song,
>> 
>> I love your patch! Perhaps something to improve:
>> 
>> [auto build test WARNING on bpf-next/master]
>> 
>> url:    https://github.com/intel-lab-lkp/linux/commits/Song-Liu/ftrace-host-klp-and-bpf-trampoline-together/20220718-135652
>> base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
>> config: x86_64-randconfig-a004 (https://download.01.org/0day-ci/archive/20220718/202207181552.VuKfz9zg-lkp@intel.com/config )
>> compiler: gcc-11 (Debian 11.3.0-3) 11.3.0
>> reproduce (this is a W=1 build):
>>        # https://github.com/intel-lab-lkp/linux/commit/9ef1ec8cb818d8ca70887c8c123f2d579384a6c6
>>        git remote add linux-review https://github.com/intel-lab-lkp/linux
>>        git fetch --no-tags linux-review Song-Liu/ftrace-host-klp-and-bpf-trampoline-together/20220718-135652
>>        git checkout 9ef1ec8cb818d8ca70887c8c123f2d579384a6c6
>>        # save the config file
>>        mkdir build_dir && cp config build_dir/.config
>>        make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash kernel/trace/
>> 
>> If you fix the issue, kindly add following tag where applicable
>> Reported-by: kernel test robot <lkp@intel.com>
>> 
>> All warnings (new ones prefixed by >>):
>> 
>>   kernel/trace/ftrace.c: In function 'register_ftrace_function':
>>>> kernel/trace/ftrace.c:8197:14: warning: variable 'direct_mutex_locked' set but not used [-Wunused-but-set-variable]
>>    8197 |         bool direct_mutex_locked = false;
>>         |              ^~~~~~~~~~~~~~~~~~~
>> 
>> 
>> vim +/direct_mutex_locked +8197 kernel/trace/ftrace.c
>> 
>>  8182	
>>  8183	/**
>>  8184	 * register_ftrace_function - register a function for profiling
>>  8185	 * @ops:	ops structure that holds the function for profiling.
>>  8186	 *
>>  8187	 * Register a function to be called by all functions in the
>>  8188	 * kernel.
>>  8189	 *
>>  8190	 * Note: @ops->func and all the functions it calls must be labeled
>>  8191	 *       with "notrace", otherwise it will go into a
>>  8192	 *       recursive loop.
>>  8193	 */
>>  8194	int register_ftrace_function(struct ftrace_ops *ops)
>>  8195		__releases(&direct_mutex)
>>  8196	{
>>> 8197		bool direct_mutex_locked = false;
>>  8198		int ret;
>>  8199	
>>  8200		ftrace_ops_init(ops);
>>  8201	
>>  8202		ret = prepare_direct_functions_for_ipmodify(ops);
>>  8203		if (ret < 0)
>>  8204			return ret;
>>  8205		else if (ret == 1)
>>  8206			direct_mutex_locked = true;
> 
> Honestly, this is another horrible trick. Would it be possible to
> call prepare_direct_functions_for_ipmodify() with direct_mutex
> already taken?
> 
> I mean something like:
> 
> 	mutex_lock(&direct_mutex);
> 
> 	ret = prepare_direct_functions_for_ipmodify(ops);
> 	if (ret)
> 		goto out:
> 
> 	mutex_lock(&ftrace_lock);
> 	ret = ftrace_startup(ops, 0);
> 	mutex_unlock(&ftrace_lock);
> 
> out:
> 	mutex_unlock(&direct_mutex);
> 	return ret;

Yeah, we can actually do something like this. We can also move the
ops->flags & FTRACE_OPS_FL_IPMODIFY check to 
register_ftrace_function(), so we only lock direct_mutex when when
it is necessary. 

> 
> 
>>  8208		mutex_lock(&ftrace_lock);
>>  8209	
>>  8210		ret = ftrace_startup(ops, 0);
>>  8211	
>>  8212		mutex_unlock(&ftrace_lock);
>>  8213	
> 
> Would be possible to handle tr->mutex the same way to avoid
> the trylock? I mean to take it in advance before direct_mutex?

Unfortunately, we cannot do this. ftrace code cannot look up 
bpf trampolines without locking direct_mutex. 

Thanks,
Song
Steven Rostedt July 19, 2022, 6:28 p.m. UTC | #4
On Sun, 17 Jul 2022 22:54:47 -0700
Song Liu <song@kernel.org> wrote:

Again, make the subject:

  ftrace: Allow IPMODIFY and DIRECT ops on the same function


> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index acb35243ce5d..306bf08acda6 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -208,6 +208,43 @@ enum {
>  	FTRACE_OPS_FL_DIRECT			= BIT(17),
>  };
>  
> +/*
> + * FTRACE_OPS_CMD_* commands allow the ftrace core logic to request changes
> + * to a ftrace_ops. Note, the requests may fail.
> + *
> + * ENABLE_SHARE_IPMODIFY_SELF - enable a DIRECT ops to work on the same
> + *                              function as an ops with IPMODIFY. Called
> + *                              when the DIRECT ops is being registered.
> + *                              This is called with both direct_mutex and
> + *                              ftrace_lock are locked.
> + *
> + * ENABLE_SHARE_IPMODIFY_PEER - enable a DIRECT ops to work on the same
> + *                              function as an ops with IPMODIFY. Called
> + *                              when the other ops (the one with IPMODIFY)
> + *                              is being registered.
> + *                              This is called with direct_mutex locked.
> + *
> + * DISABLE_SHARE_IPMODIFY_PEER - disable a DIRECT ops to work on the same
> + *                               function as an ops with IPMODIFY. Called
> + *                               when the other ops (the one with IPMODIFY)
> + *                               is being unregistered.
> + *                               This is called with direct_mutex locked.
> + */
> +enum ftrace_ops_cmd {
> +	FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF,
> +	FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER,
> +	FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER,
> +};
> +
> +/*
> + * For most ftrace_ops_cmd,
> + * Returns:
> + *        0 - Success.
> + *        -EBUSY - The operation cannot process
> + *        -EAGAIN - The operation cannot process tempoorarily.

Just state:

	Returns:
		0 - Success
		Negative on failure. The return value is dependent
		on the callback.

Let's not bind policy of the callback with ftrace.

> + */
> +typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, enum ftrace_ops_cmd cmd);
> +
>  #ifdef CONFIG_DYNAMIC_FTRACE
>  /* The hash used to know what functions callbacks trace */
>  struct ftrace_ops_hash {
> @@ -250,6 +287,7 @@ struct ftrace_ops {
>  	unsigned long			trampoline;
>  	unsigned long			trampoline_size;
>  	struct list_head		list;
> +	ftrace_ops_func_t		ops_func;
>  #endif
>  };
>  
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 0c15ec997c13..f52efbd13e51 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -1861,6 +1861,8 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops,
>  	ftrace_hash_rec_update_modify(ops, filter_hash, 1);
>  }
>  
> +static bool ops_references_ip(struct ftrace_ops *ops, unsigned long ip);
> +
>  /*
>   * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK
>   * or no-needed to update, -EBUSY if it detects a conflict of the flag
> @@ -1869,6 +1871,13 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops,
>   *  - If the hash is NULL, it hits all recs (if IPMODIFY is set, this is rejected)
>   *  - If the hash is EMPTY_HASH, it hits nothing
>   *  - Anything else hits the recs which match the hash entries.
> + *
> + * DIRECT ops does not have IPMODIFY flag, but we still need to check it
> + * against functions with FTRACE_FL_IPMODIFY. If there is any overlap, call
> + * ops_func(SHARE_IPMODIFY_SELF) to make sure current ops can share with
> + * IPMODIFY. If ops_func(SHARE_IPMODIFY_SELF) returns non-zero, propagate
> + * the return value to the caller and eventually to the owner of the DIRECT
> + * ops.
>   */
>  static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
>  					 struct ftrace_hash *old_hash,
> @@ -1877,17 +1886,23 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
>  	struct ftrace_page *pg;
>  	struct dyn_ftrace *rec, *end = NULL;
>  	int in_old, in_new;
> +	bool is_ipmodify, is_direct;
>  
>  	/* Only update if the ops has been registered */
>  	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
>  		return 0;
>  
> -	if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
> +	is_ipmodify = ops->flags & FTRACE_OPS_FL_IPMODIFY;
> +	is_direct = ops->flags & FTRACE_OPS_FL_DIRECT;
> +
> +	/* either IPMODIFY nor DIRECT, skip */
> +	if (!is_ipmodify && !is_direct)
>  		return 0;

I wonder if we should also add:

	if (WARN_ON_ONCE(is_ipmodify && is_direct))
		return 0;

As a direct should never have an ipmodify.

>  
>  	/*
> -	 * Since the IPMODIFY is a very address sensitive action, we do not
> -	 * allow ftrace_ops to set all functions to new hash.
> +	 * Since the IPMODIFY and DIRECT are very address sensitive
> +	 * actions, we do not allow ftrace_ops to set all functions to new
> +	 * hash.
>  	 */
>  	if (!new_hash || !old_hash)
>  		return -EINVAL;
> @@ -1905,12 +1920,30 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
>  			continue;
>  
>  		if (in_new) {
> -			/* New entries must ensure no others are using it */
> -			if (rec->flags & FTRACE_FL_IPMODIFY)
> -				goto rollback;
> -			rec->flags |= FTRACE_FL_IPMODIFY;
> -		} else /* Removed entry */
> +			if (rec->flags & FTRACE_FL_IPMODIFY) {
> +				int ret;
> +
> +				/* Cannot have two ipmodify on same rec */
> +				if (is_ipmodify)
> +					goto rollback;
> +

I might add a

				FTRACE_WARN_ON(rec->flags &
				FTRACE_FL_DIRECT);

Just to be safe.

That is, if this is true, we are adding a new direct function to a record
that already has one.

> +				/*
> +				 * Another ops with IPMODIFY is already
> +				 * attached. We are now attaching a direct
> +				 * ops. Run SHARE_IPMODIFY_SELF, to check
> +				 * whether sharing is supported.
> +				 */
> +				if (!ops->ops_func)
> +					return -EBUSY;
> +				ret = ops->ops_func(ops, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF);
> +				if (ret)
> +					return ret;
> +			} else if (is_ipmodify) {
> +				rec->flags |= FTRACE_FL_IPMODIFY;
> +			}
> +		} else if (is_ipmodify) {
>  			rec->flags &= ~FTRACE_FL_IPMODIFY;
> +		}
>  	} while_for_each_ftrace_rec();
>  
>  	return 0;
> @@ -2455,7 +2488,7 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip,
>  struct ftrace_ops direct_ops = {
>  	.func		= call_direct_funcs,
>  	.flags		= FTRACE_OPS_FL_IPMODIFY
> -			  | FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS
> +			  | FTRACE_OPS_FL_SAVE_REGS
>  			  | FTRACE_OPS_FL_PERMANENT,
>  	/*
>  	 * By declaring the main trampoline as this trampoline
> @@ -3072,14 +3105,14 @@ static inline int ops_traces_mod(struct ftrace_ops *ops)
>  }
>  
>  /*
> - * Check if the current ops references the record.
> + * Check if the current ops references the given ip.
>   *
>   * If the ops traces all functions, then it was already accounted for.
>   * If the ops does not trace the current record function, skip it.
>   * If the ops ignores the function via notrace filter, skip it.
>   */
> -static inline bool
> -ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
> +static bool
> +ops_references_ip(struct ftrace_ops *ops, unsigned long ip)
>  {
>  	/* If ops isn't enabled, ignore it */
>  	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
> @@ -3091,16 +3124,29 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
>  
>  	/* The function must be in the filter */
>  	if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
> -	    !__ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip))
> +	    !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip))
>  		return false;
>  
>  	/* If in notrace hash, we ignore it too */
> -	if (ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip))
> +	if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip))
>  		return false;
>  
>  	return true;
>  }
>  
> +/*
> + * Check if the current ops references the record.
> + *
> + * If the ops traces all functions, then it was already accounted for.
> + * If the ops does not trace the current record function, skip it.
> + * If the ops ignores the function via notrace filter, skip it.
> + */
> +static bool
> +ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
> +{
> +	return ops_references_ip(ops, rec->ip);
> +}
> +
>  static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
>  {
>  	bool init_nop = ftrace_need_init_nop();
> @@ -5545,8 +5591,7 @@ int modify_ftrace_direct(unsigned long ip,
>  }
>  EXPORT_SYMBOL_GPL(modify_ftrace_direct);
>  
> -#define MULTI_FLAGS (FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_DIRECT | \
> -		     FTRACE_OPS_FL_SAVE_REGS)
> +#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS)
>  
>  static int check_direct_multi(struct ftrace_ops *ops)
>  {
> @@ -8004,6 +8049,137 @@ int ftrace_is_dead(void)
>  	return ftrace_disabled;
>  }
>  
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
> +/*
> + * When registering ftrace_ops with IPMODIFY, it is necessary to make sure
> + * it doesn't conflict with any direct ftrace_ops. If there is existing
> + * direct ftrace_ops on a kernel function being patched, call
> + * FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER on it to enable sharing.
> + *
> + * @ops:     ftrace_ops being registered.
> + *
> + * Returns:
> + *         0 - @ops does not have IPMODIFY or @ops itself is DIRECT, no
> + *             change needed;
> + *         1 - @ops has IPMODIFY, hold direct_mutex;

> + *         -EBUSY - currently registered DIRECT ftrace_ops cannot share the
> + *                  same function with IPMODIFY, abort the register.
> + *         -EAGAIN - cannot make changes to currently registered DIRECT
> + *                   ftrace_ops due to rare race conditions. Should retry
> + *                   later. This is needed to avoid potential deadlocks
> + *                   on the DIRECT ftrace_ops side.

Again, these are ops_func() specific and has nothing to do with the logic
in this file. Just state:

 * Returns:
 *         0 - @ops does not have IPMODIFY or @ops itself is DIRECT, no
 *             change needed;
 *         1 - @ops has IPMODIFY, hold direct_mutex;
 *         Negative on error.

And if we move the logic that this does not keep hold of the direct_mutex,
we could just let the callback return any non-zero on error.

> + */
> +static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops)
> +	__acquires(&direct_mutex)
> +{
> +	struct ftrace_func_entry *entry;
> +	struct ftrace_hash *hash;
> +	struct ftrace_ops *op;
> +	int size, i, ret;
> +
> +	if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
> +		return 0;
> +
> +	mutex_lock(&direct_mutex);
> +
> +	hash = ops->func_hash->filter_hash;
> +	size = 1 << hash->size_bits;
> +	for (i = 0; i < size; i++) {
> +		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
> +			unsigned long ip = entry->ip;
> +			bool found_op = false;
> +
> +			mutex_lock(&ftrace_lock);
> +			do_for_each_ftrace_op(op, ftrace_ops_list) {
> +				if (!(op->flags & FTRACE_OPS_FL_DIRECT))
> +					continue;
> +				if (ops_references_ip(op, ip)) {
> +					found_op = true;
> +					break;

I think you want a goto here. The macros "do_for_each_ftrace_op() { .. }
while_for_each_ftrace_op()" is a double loop. The break just moves to the
next set of pages and does not break out of the outer loop.

					goto out_loop;

> +				}
> +			} while_for_each_ftrace_op(op);

 out_loop:

> +			mutex_unlock(&ftrace_lock);
> +
> +			if (found_op) {
> +				if (!op->ops_func) {
> +					ret = -EBUSY;
> +					goto err_out;
> +				}
> +				ret = op->ops_func(op, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER);
> +				if (ret)
> +					goto err_out;
> +			}
> +		}
> +	}
> +
> +	/*
> +	 * Didn't find any overlap with direct ftrace_ops, or the direct
> +	 * function can share with ipmodify. Hold direct_mutex to make sure
> +	 * this doesn't change until we are done.
> +	 */
> +	return 1;
> +
> +err_out:
> +	mutex_unlock(&direct_mutex);
> +	return ret;
> +}
> +
> +/*
> + * Similar to prepare_direct_functions_for_ipmodify, clean up after ops
> + * with IPMODIFY is unregistered. The cleanup is optional for most DIRECT
> + * ops.
> + */
> +static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops)
> +{
> +	struct ftrace_func_entry *entry;
> +	struct ftrace_hash *hash;
> +	struct ftrace_ops *op;
> +	int size, i;
> +
> +	if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
> +		return;
> +
> +	mutex_lock(&direct_mutex);
> +
> +	hash = ops->func_hash->filter_hash;
> +	size = 1 << hash->size_bits;
> +	for (i = 0; i < size; i++) {
> +		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
> +			unsigned long ip = entry->ip;
> +			bool found_op = false;
> +
> +			mutex_lock(&ftrace_lock);
> +			do_for_each_ftrace_op(op, ftrace_ops_list) {
> +				if (!(op->flags & FTRACE_OPS_FL_DIRECT))
> +					continue;
> +				if (ops_references_ip(op, ip)) {
> +					found_op = true;
> +					break;

					goto out_loop;

> +				}
> +			} while_for_each_ftrace_op(op);

 out_loop:

> +			mutex_unlock(&ftrace_lock);
> +
> +			/* The cleanup is optional, iggore any errors */

					"ignore"

> +			if (found_op && op->ops_func)
> +				op->ops_func(op, FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER);
> +		}
> +	}
> +	mutex_unlock(&direct_mutex);
> +}
> +
> +#else  /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
> +
> +static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops)
> +{
> +	return 0;
> +}
> +
> +static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops)
> +{
> +}
> +
> +#endif  /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
> +
>  /**
>   * register_ftrace_function - register a function for profiling
>   * @ops:	ops structure that holds the function for profiling.
> @@ -8016,17 +8192,29 @@ int ftrace_is_dead(void)
>   *       recursive loop.
>   */
>  int register_ftrace_function(struct ftrace_ops *ops)
> +	__releases(&direct_mutex)
>  {
> +	bool direct_mutex_locked = false;
>  	int ret;
>  
>  	ftrace_ops_init(ops);
>  

I agree with Petr.

Just grab the direct_mutex_lock here.

	mutex_lock(&direct_mutex);

> +	ret = prepare_direct_functions_for_ipmodify(ops);
> +	if (ret < 0)
> +		return ret;

		goto out_unlock;


> +	else if (ret == 1)
> +		direct_mutex_locked = true;
> +

Nuke the above;

>  	mutex_lock(&ftrace_lock);
>  
>  	ret = ftrace_startup(ops, 0);
>  
>  	mutex_unlock(&ftrace_lock);
>  
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
> +	if (direct_mutex_locked)
> +		mutex_unlock(&direct_mutex);
> +#endif

Change this to:

 out_unlock:
	mutex_unlock(&direct_mutex);

>  	return ret;
>  }

-- Steve

>  EXPORT_SYMBOL_GPL(register_ftrace_function);
> @@ -8045,6 +8233,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
>  	ret = ftrace_shutdown(ops, 0);
>  	mutex_unlock(&ftrace_lock);
>  
> +	cleanup_direct_functions_after_ipmodify(ops);
>  	return ret;
>  }
>  EXPORT_SYMBOL_GPL(unregister_ftrace_function);
Steven Rostedt July 19, 2022, 6:32 p.m. UTC | #5
On Mon, 18 Jul 2022 16:59:51 +0000
Song Liu <songliubraving@fb.com> wrote:

> >> vim +/direct_mutex_locked +8197 kernel/trace/ftrace.c
> >> 
> >>  8182	
> >>  8183	/**
> >>  8184	 * register_ftrace_function - register a function for profiling
> >>  8185	 * @ops:	ops structure that holds the function for profiling.
> >>  8186	 *
> >>  8187	 * Register a function to be called by all functions in the
> >>  8188	 * kernel.
> >>  8189	 *
> >>  8190	 * Note: @ops->func and all the functions it calls must be labeled
> >>  8191	 *       with "notrace", otherwise it will go into a
> >>  8192	 *       recursive loop.
> >>  8193	 */
> >>  8194	int register_ftrace_function(struct ftrace_ops *ops)
> >>  8195		__releases(&direct_mutex)
> >>  8196	{  
> >>> 8197		bool direct_mutex_locked = false;  
> >>  8198		int ret;
> >>  8199	
> >>  8200		ftrace_ops_init(ops);
> >>  8201	
> >>  8202		ret = prepare_direct_functions_for_ipmodify(ops);
> >>  8203		if (ret < 0)
> >>  8204			return ret;
> >>  8205		else if (ret == 1)
> >>  8206			direct_mutex_locked = true;  
> > 
> > Honestly, this is another horrible trick. Would it be possible to
> > call prepare_direct_functions_for_ipmodify() with direct_mutex
> > already taken?

Agreed. I'm not sure why I didn't notice this in the other versions.
Probably was looking too much at the other logic. :-/

> > 
> > I mean something like:
> > 
> > 	mutex_lock(&direct_mutex);
> > 
> > 	ret = prepare_direct_functions_for_ipmodify(ops);
> > 	if (ret)
> > 		goto out:
> > 
> > 	mutex_lock(&ftrace_lock);
> > 	ret = ftrace_startup(ops, 0);
> > 	mutex_unlock(&ftrace_lock);
> > 
> > out:
> > 	mutex_unlock(&direct_mutex);
> > 	return ret;  
> 
> Yeah, we can actually do something like this. We can also move the
> ops->flags & FTRACE_OPS_FL_IPMODIFY check to 
> register_ftrace_function(), so we only lock direct_mutex when when
> it is necessary. 

No need. Just take the direct_mutex, and perhaps add a:

	lockdep_assert_held(&direct_mutex);

in the prepare_direct_functions_for_ipmodify().

This is far from a fast path to do any tricks in trying to optimize it.

-- Steve
Steven Rostedt July 19, 2022, 6:39 p.m. UTC | #6
On Tue, 19 Jul 2022 14:28:56 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> > +				/* Cannot have two ipmodify on same rec */
> > +				if (is_ipmodify)
> > +					goto rollback;
> > +  
> 
> I might add a
> 
> 				FTRACE_WARN_ON(rec->flags &
> 				FTRACE_FL_DIRECT);

Bah, my email client line wrapped this. It was suppose to be:

 				FTRACE_WARN_ON(rec->flags & FTRACE_FL_DIRECT);

Just so you don't think I wanted that initial formatting ;-)

-- Steve

> 
> Just to be safe.
> 
> That is, if this is true, we are adding a new direct function to a record
> that already has one.
Song Liu July 19, 2022, 10:28 p.m. UTC | #7
> On Jul 19, 2022, at 11:32 AM, Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> On Mon, 18 Jul 2022 16:59:51 +0000
> Song Liu <songliubraving@fb.com> wrote:
> 
>>>> vim +/direct_mutex_locked +8197 kernel/trace/ftrace.c
>>>> 
>>>> 8182	
>>>> 8183	/**
>>>> 8184	 * register_ftrace_function - register a function for profiling
>>>> 8185	 * @ops:	ops structure that holds the function for profiling.
>>>> 8186	 *
>>>> 8187	 * Register a function to be called by all functions in the
>>>> 8188	 * kernel.
>>>> 8189	 *
>>>> 8190	 * Note: @ops->func and all the functions it calls must be labeled
>>>> 8191	 *       with "notrace", otherwise it will go into a
>>>> 8192	 *       recursive loop.
>>>> 8193	 */
>>>> 8194	int register_ftrace_function(struct ftrace_ops *ops)
>>>> 8195		__releases(&direct_mutex)
>>>> 8196	{  
>>>>> 8197		bool direct_mutex_locked = false;  
>>>> 8198		int ret;
>>>> 8199	
>>>> 8200		ftrace_ops_init(ops);
>>>> 8201	
>>>> 8202		ret = prepare_direct_functions_for_ipmodify(ops);
>>>> 8203		if (ret < 0)
>>>> 8204			return ret;
>>>> 8205		else if (ret == 1)
>>>> 8206			direct_mutex_locked = true;  
>>> 
>>> Honestly, this is another horrible trick. Would it be possible to
>>> call prepare_direct_functions_for_ipmodify() with direct_mutex
>>> already taken?
> 
> Agreed. I'm not sure why I didn't notice this in the other versions.
> Probably was looking too much at the other logic. :-/
> 
>>> 
>>> I mean something like:
>>> 
>>> 	mutex_lock(&direct_mutex);
>>> 
>>> 	ret = prepare_direct_functions_for_ipmodify(ops);
>>> 	if (ret)
>>> 		goto out:
>>> 
>>> 	mutex_lock(&ftrace_lock);
>>> 	ret = ftrace_startup(ops, 0);
>>> 	mutex_unlock(&ftrace_lock);
>>> 
>>> out:
>>> 	mutex_unlock(&direct_mutex);
>>> 	return ret;  
>> 
>> Yeah, we can actually do something like this. We can also move the
>> ops->flags & FTRACE_OPS_FL_IPMODIFY check to 
>> register_ftrace_function(), so we only lock direct_mutex when when
>> it is necessary. 
> 
> No need. Just take the direct_mutex, and perhaps add a:
> 
> 	lockdep_assert_held(&direct_mutex);
> 
> in the prepare_direct_functions_for_ipmodify().
> 
> This is far from a fast path to do any tricks in trying to optimize it.

Got it. I will fix these and send v5. 

Thanks,
Song
Song Liu July 19, 2022, 10:57 p.m. UTC | #8
> On Jul 19, 2022, at 11:28 AM, Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> On Sun, 17 Jul 2022 22:54:47 -0700
> Song Liu <song@kernel.org> wrote:
> 
> Again, make the subject:
> 
>  ftrace: Allow IPMODIFY and DIRECT ops on the same function
> 
Will fix. 

> [...]

>> +
>> +/*
>> + * For most ftrace_ops_cmd,
>> + * Returns:
>> + *        0 - Success.
>> + *        -EBUSY - The operation cannot process
>> + *        -EAGAIN - The operation cannot process tempoorarily.
> 
> Just state:
> 
> 	Returns:
> 		0 - Success
> 		Negative on failure. The return value is dependent
> 		on the callback.
> 
> Let's not bind policy of the callback with ftrace.

Will fix. 

> 
>> + */
>> +typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, enum ftrace_ops_cmd cmd);
>> +
>> #ifdef CONFIG_DYNAMIC_FTRACE
>> /* The hash used to know what functions callbacks trace */
>> 

[...]

>> 
>> -	if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
>> +	is_ipmodify = ops->flags & FTRACE_OPS_FL_IPMODIFY;
>> +	is_direct = ops->flags & FTRACE_OPS_FL_DIRECT;
>> +
>> +	/* either IPMODIFY nor DIRECT, skip */
>> +	if (!is_ipmodify && !is_direct)
>> 		return 0;
> 
> I wonder if we should also add:
> 
> 	if (WARN_ON_ONCE(is_ipmodify && is_direct))
> 		return 0;
> 
> As a direct should never have an ipmodify.

Right, I will also remove IPMODIFY from direct_ops:

@ -2487,8 +2490,7 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip,

 struct ftrace_ops direct_ops = {
        .func           = call_direct_funcs,
-       .flags          = FTRACE_OPS_FL_IPMODIFY
-                         | FTRACE_OPS_FL_SAVE_REGS
+       .flags          = FTRACE_OPS_FL_SAVE_REGS
                          | FTRACE_OPS_FL_PERMANENT,
        /*
         * By declaring the main trampoline as this trampoline


> 
>> 
>> 	/*
>> -	 * Since the IPMODIFY is a very address sensitive action, we do not
>> -	 * allow ftrace_ops to set all functions to new hash.
>> +	 * Since the IPMODIFY and DIRECT are very address sensitive
>> +	 * actions, we do not allow ftrace_ops to set all functions to new
>> +	 * hash.

[...]

> 
> Again, these are ops_func() specific and has nothing to do with the logic
> in this file. Just state:
> 
> * Returns:
> *         0 - @ops does not have IPMODIFY or @ops itself is DIRECT, no
> *             change needed;
> *         1 - @ops has IPMODIFY, hold direct_mutex;
> *         Negative on error.
> 
> And if we move the logic that this does not keep hold of the direct_mutex,
> we could just let the callback return any non-zero on error.
> 
>> + */
>> +static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops)
>> +	__acquires(&direct_mutex)
>> +{
>> +	struct ftrace_func_entry *entry;
>> +	struct ftrace_hash *hash;
>> +	struct ftrace_ops *op;
>> +	int size, i, ret;
>> +
>> +	if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
>> +		return 0;
>> +
>> +	mutex_lock(&direct_mutex);
>> +
>> +	hash = ops->func_hash->filter_hash;
>> +	size = 1 << hash->size_bits;
>> +	for (i = 0; i < size; i++) {
>> +		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
>> +			unsigned long ip = entry->ip;
>> +			bool found_op = false;
>> +
>> +			mutex_lock(&ftrace_lock);
>> +			do_for_each_ftrace_op(op, ftrace_ops_list) {
>> +				if (!(op->flags & FTRACE_OPS_FL_DIRECT))
>> +					continue;
>> +				if (ops_references_ip(op, ip)) {
>> +					found_op = true;
>> +					break;
> 
> I think you want a goto here. The macros "do_for_each_ftrace_op() { .. }
> while_for_each_ftrace_op()" is a double loop. The break just moves to the
> next set of pages and does not break out of the outer loop.

Hmmm... really? I didn't see it ...


#define do_for_each_ftrace_op(op, list)                 \
        op = rcu_dereference_raw_check(list);                   \
        do

#define while_for_each_ftrace_op(op)                            \
        while (likely(op = rcu_dereference_raw_check((op)->next)) &&    \
               unlikely((op) != &ftrace_list_end))

Did I miss something...?

> 
> 					goto out_loop;
> 
>> +				}
>> +			} while_for_each_ftrace_op(op);

[...]

> 
>> 	mutex_lock(&ftrace_lock);
>> 
>> 	ret = ftrace_startup(ops, 0);
>> 
>> 	mutex_unlock(&ftrace_lock);
>> 
>> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
>> +	if (direct_mutex_locked)
>> +		mutex_unlock(&direct_mutex);
>> +#endif
> 
> Change this to:
> 
> out_unlock:
> 	mutex_unlock(&direct_mutex);
> 

We still need #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS, as 
direct_mutex is not defined without that config. 

Thanks,
Song

[...]
Steven Rostedt July 19, 2022, 11:04 p.m. UTC | #9
On Tue, 19 Jul 2022 22:57:52 +0000
Song Liu <songliubraving@fb.com> wrote:

> >> +	hash = ops->func_hash->filter_hash;
> >> +	size = 1 << hash->size_bits;
> >> +	for (i = 0; i < size; i++) {
> >> +		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
> >> +			unsigned long ip = entry->ip;
> >> +			bool found_op = false;
> >> +
> >> +			mutex_lock(&ftrace_lock);
> >> +			do_for_each_ftrace_op(op, ftrace_ops_list) {
> >> +				if (!(op->flags & FTRACE_OPS_FL_DIRECT))
> >> +					continue;
> >> +				if (ops_references_ip(op, ip)) {
> >> +					found_op = true;
> >> +					break;  
> > 
> > I think you want a goto here. The macros "do_for_each_ftrace_op() { .. }
> > while_for_each_ftrace_op()" is a double loop. The break just moves to the
> > next set of pages and does not break out of the outer loop.  
> 
> Hmmm... really? I didn't see it ...
> 
> 
> #define do_for_each_ftrace_op(op, list)                 \
>         op = rcu_dereference_raw_check(list);                   \
>         do
> 
> #define while_for_each_ftrace_op(op)                            \
>         while (likely(op = rcu_dereference_raw_check((op)->next)) &&    \
>                unlikely((op) != &ftrace_list_end))
> 
> Did I miss something...?

Bah, you're right. I was confusing it with do_for_each_ftrace_rec(), which
*is* a double loop.

Never mind ;-)

> 
> > 
> > 					goto out_loop;
> >   
> >> +				}
> >> +			} while_for_each_ftrace_op(op);  
> 
> [...]
> 
> >   
> >> 	mutex_lock(&ftrace_lock);
> >> 
> >> 	ret = ftrace_startup(ops, 0);
> >> 
> >> 	mutex_unlock(&ftrace_lock);
> >> 
> >> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
> >> +	if (direct_mutex_locked)
> >> +		mutex_unlock(&direct_mutex);
> >> +#endif  
> > 
> > Change this to:
> > 
> > out_unlock:
> > 	mutex_unlock(&direct_mutex);
> >   
> 
> We still need #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS, as 
> direct_mutex is not defined without that config. 

Ah, right. I just meant to get rid of the if statement.

To keep the code clean, perhaps we should have:

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
# define lock_direct_mutex()	mutex_lock(&direct_mutex)
# define unlock_direct_mutex()	mutex_unlock(&direct_mutex)
#else
# define lock_direct_mutex()	do { } while (0)
# define unlock_direct_mutex()	do { } while (0)
#endif

And use that here.

-- Steve
Song Liu July 19, 2022, 11:24 p.m. UTC | #10
> On Jul 19, 2022, at 11:28 AM, Steven Rostedt <rostedt@goodmis.org> wrote:
> 
>> /**
>> * register_ftrace_function - register a function for profiling
>> * @ops:	ops structure that holds the function for profiling.
>> @@ -8016,17 +8192,29 @@ int ftrace_is_dead(void)
>> * recursive loop.
>> */
>> int register_ftrace_function(struct ftrace_ops *ops)
>> +	__releases(&direct_mutex)
>> {
>> +	bool direct_mutex_locked = false;
>> 	int ret;
>> 
>> 	ftrace_ops_init(ops);
>> 
> 
> I agree with Petr.
> 
> Just grab the direct_mutex_lock here.
> 
> 	mutex_lock(&direct_mutex);

Actually, we cannot blindly lock direct_mutex here, as 
register_ftrace_direct() already locks it before calling 
register_ftrace_function(). We still need the if (IPMODIFY)
check. 

Thanks,
Song

> 
>> +	ret = prepare_direct_functions_for_ipmodify(ops);
>> +	if (ret < 0)
>> +		return ret;
>
Steven Rostedt July 19, 2022, 11:27 p.m. UTC | #11
On Tue, 19 Jul 2022 23:24:35 +0000
Song Liu <songliubraving@fb.com> wrote:

> Actually, we cannot blindly lock direct_mutex here, as 
> register_ftrace_direct() already locks it before calling 
> register_ftrace_function(). We still need the if (IPMODIFY)
> check. 

Let's not play games with this then.

Create a register_ftrace_function_nolock()

and use that for register_ftrace_direct().

Otherwise it's going to be a nightmare to keep track of.

-- Steve
diff mbox series

Patch

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index acb35243ce5d..306bf08acda6 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -208,6 +208,43 @@  enum {
 	FTRACE_OPS_FL_DIRECT			= BIT(17),
 };
 
+/*
+ * FTRACE_OPS_CMD_* commands allow the ftrace core logic to request changes
+ * to a ftrace_ops. Note, the requests may fail.
+ *
+ * ENABLE_SHARE_IPMODIFY_SELF - enable a DIRECT ops to work on the same
+ *                              function as an ops with IPMODIFY. Called
+ *                              when the DIRECT ops is being registered.
+ *                              This is called with both direct_mutex and
+ *                              ftrace_lock are locked.
+ *
+ * ENABLE_SHARE_IPMODIFY_PEER - enable a DIRECT ops to work on the same
+ *                              function as an ops with IPMODIFY. Called
+ *                              when the other ops (the one with IPMODIFY)
+ *                              is being registered.
+ *                              This is called with direct_mutex locked.
+ *
+ * DISABLE_SHARE_IPMODIFY_PEER - disable a DIRECT ops to work on the same
+ *                               function as an ops with IPMODIFY. Called
+ *                               when the other ops (the one with IPMODIFY)
+ *                               is being unregistered.
+ *                               This is called with direct_mutex locked.
+ */
+enum ftrace_ops_cmd {
+	FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF,
+	FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER,
+	FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER,
+};
+
+/*
+ * For most ftrace_ops_cmd,
+ * Returns:
+ *        0 - Success.
+ *        -EBUSY - The operation cannot process
+ *        -EAGAIN - The operation cannot process tempoorarily.
+ */
+typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, enum ftrace_ops_cmd cmd);
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 /* The hash used to know what functions callbacks trace */
 struct ftrace_ops_hash {
@@ -250,6 +287,7 @@  struct ftrace_ops {
 	unsigned long			trampoline;
 	unsigned long			trampoline_size;
 	struct list_head		list;
+	ftrace_ops_func_t		ops_func;
 #endif
 };
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0c15ec997c13..f52efbd13e51 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1861,6 +1861,8 @@  static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops,
 	ftrace_hash_rec_update_modify(ops, filter_hash, 1);
 }
 
+static bool ops_references_ip(struct ftrace_ops *ops, unsigned long ip);
+
 /*
  * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK
  * or no-needed to update, -EBUSY if it detects a conflict of the flag
@@ -1869,6 +1871,13 @@  static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops,
  *  - If the hash is NULL, it hits all recs (if IPMODIFY is set, this is rejected)
  *  - If the hash is EMPTY_HASH, it hits nothing
  *  - Anything else hits the recs which match the hash entries.
+ *
+ * DIRECT ops does not have IPMODIFY flag, but we still need to check it
+ * against functions with FTRACE_FL_IPMODIFY. If there is any overlap, call
+ * ops_func(SHARE_IPMODIFY_SELF) to make sure current ops can share with
+ * IPMODIFY. If ops_func(SHARE_IPMODIFY_SELF) returns non-zero, propagate
+ * the return value to the caller and eventually to the owner of the DIRECT
+ * ops.
  */
 static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
 					 struct ftrace_hash *old_hash,
@@ -1877,17 +1886,23 @@  static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
 	struct ftrace_page *pg;
 	struct dyn_ftrace *rec, *end = NULL;
 	int in_old, in_new;
+	bool is_ipmodify, is_direct;
 
 	/* Only update if the ops has been registered */
 	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
 		return 0;
 
-	if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
+	is_ipmodify = ops->flags & FTRACE_OPS_FL_IPMODIFY;
+	is_direct = ops->flags & FTRACE_OPS_FL_DIRECT;
+
+	/* either IPMODIFY nor DIRECT, skip */
+	if (!is_ipmodify && !is_direct)
 		return 0;
 
 	/*
-	 * Since the IPMODIFY is a very address sensitive action, we do not
-	 * allow ftrace_ops to set all functions to new hash.
+	 * Since the IPMODIFY and DIRECT are very address sensitive
+	 * actions, we do not allow ftrace_ops to set all functions to new
+	 * hash.
 	 */
 	if (!new_hash || !old_hash)
 		return -EINVAL;
@@ -1905,12 +1920,30 @@  static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
 			continue;
 
 		if (in_new) {
-			/* New entries must ensure no others are using it */
-			if (rec->flags & FTRACE_FL_IPMODIFY)
-				goto rollback;
-			rec->flags |= FTRACE_FL_IPMODIFY;
-		} else /* Removed entry */
+			if (rec->flags & FTRACE_FL_IPMODIFY) {
+				int ret;
+
+				/* Cannot have two ipmodify on same rec */
+				if (is_ipmodify)
+					goto rollback;
+
+				/*
+				 * Another ops with IPMODIFY is already
+				 * attached. We are now attaching a direct
+				 * ops. Run SHARE_IPMODIFY_SELF, to check
+				 * whether sharing is supported.
+				 */
+				if (!ops->ops_func)
+					return -EBUSY;
+				ret = ops->ops_func(ops, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF);
+				if (ret)
+					return ret;
+			} else if (is_ipmodify) {
+				rec->flags |= FTRACE_FL_IPMODIFY;
+			}
+		} else if (is_ipmodify) {
 			rec->flags &= ~FTRACE_FL_IPMODIFY;
+		}
 	} while_for_each_ftrace_rec();
 
 	return 0;
@@ -2455,7 +2488,7 @@  static void call_direct_funcs(unsigned long ip, unsigned long pip,
 struct ftrace_ops direct_ops = {
 	.func		= call_direct_funcs,
 	.flags		= FTRACE_OPS_FL_IPMODIFY
-			  | FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS
+			  | FTRACE_OPS_FL_SAVE_REGS
 			  | FTRACE_OPS_FL_PERMANENT,
 	/*
 	 * By declaring the main trampoline as this trampoline
@@ -3072,14 +3105,14 @@  static inline int ops_traces_mod(struct ftrace_ops *ops)
 }
 
 /*
- * Check if the current ops references the record.
+ * Check if the current ops references the given ip.
  *
  * If the ops traces all functions, then it was already accounted for.
  * If the ops does not trace the current record function, skip it.
  * If the ops ignores the function via notrace filter, skip it.
  */
-static inline bool
-ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
+static bool
+ops_references_ip(struct ftrace_ops *ops, unsigned long ip)
 {
 	/* If ops isn't enabled, ignore it */
 	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
@@ -3091,16 +3124,29 @@  ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
 
 	/* The function must be in the filter */
 	if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
-	    !__ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip))
+	    !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip))
 		return false;
 
 	/* If in notrace hash, we ignore it too */
-	if (ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip))
+	if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip))
 		return false;
 
 	return true;
 }
 
+/*
+ * Check if the current ops references the record.
+ *
+ * If the ops traces all functions, then it was already accounted for.
+ * If the ops does not trace the current record function, skip it.
+ * If the ops ignores the function via notrace filter, skip it.
+ */
+static bool
+ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
+{
+	return ops_references_ip(ops, rec->ip);
+}
+
 static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
 {
 	bool init_nop = ftrace_need_init_nop();
@@ -5545,8 +5591,7 @@  int modify_ftrace_direct(unsigned long ip,
 }
 EXPORT_SYMBOL_GPL(modify_ftrace_direct);
 
-#define MULTI_FLAGS (FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_DIRECT | \
-		     FTRACE_OPS_FL_SAVE_REGS)
+#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS)
 
 static int check_direct_multi(struct ftrace_ops *ops)
 {
@@ -8004,6 +8049,137 @@  int ftrace_is_dead(void)
 	return ftrace_disabled;
 }
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+/*
+ * When registering ftrace_ops with IPMODIFY, it is necessary to make sure
+ * it doesn't conflict with any direct ftrace_ops. If there is existing
+ * direct ftrace_ops on a kernel function being patched, call
+ * FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER on it to enable sharing.
+ *
+ * @ops:     ftrace_ops being registered.
+ *
+ * Returns:
+ *         0 - @ops does not have IPMODIFY or @ops itself is DIRECT, no
+ *             change needed;
+ *         1 - @ops has IPMODIFY, hold direct_mutex;
+ *         -EBUSY - currently registered DIRECT ftrace_ops cannot share the
+ *                  same function with IPMODIFY, abort the register.
+ *         -EAGAIN - cannot make changes to currently registered DIRECT
+ *                   ftrace_ops due to rare race conditions. Should retry
+ *                   later. This is needed to avoid potential deadlocks
+ *                   on the DIRECT ftrace_ops side.
+ */
+static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops)
+	__acquires(&direct_mutex)
+{
+	struct ftrace_func_entry *entry;
+	struct ftrace_hash *hash;
+	struct ftrace_ops *op;
+	int size, i, ret;
+
+	if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
+		return 0;
+
+	mutex_lock(&direct_mutex);
+
+	hash = ops->func_hash->filter_hash;
+	size = 1 << hash->size_bits;
+	for (i = 0; i < size; i++) {
+		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
+			unsigned long ip = entry->ip;
+			bool found_op = false;
+
+			mutex_lock(&ftrace_lock);
+			do_for_each_ftrace_op(op, ftrace_ops_list) {
+				if (!(op->flags & FTRACE_OPS_FL_DIRECT))
+					continue;
+				if (ops_references_ip(op, ip)) {
+					found_op = true;
+					break;
+				}
+			} while_for_each_ftrace_op(op);
+			mutex_unlock(&ftrace_lock);
+
+			if (found_op) {
+				if (!op->ops_func) {
+					ret = -EBUSY;
+					goto err_out;
+				}
+				ret = op->ops_func(op, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER);
+				if (ret)
+					goto err_out;
+			}
+		}
+	}
+
+	/*
+	 * Didn't find any overlap with direct ftrace_ops, or the direct
+	 * function can share with ipmodify. Hold direct_mutex to make sure
+	 * this doesn't change until we are done.
+	 */
+	return 1;
+
+err_out:
+	mutex_unlock(&direct_mutex);
+	return ret;
+}
+
+/*
+ * Similar to prepare_direct_functions_for_ipmodify, clean up after ops
+ * with IPMODIFY is unregistered. The cleanup is optional for most DIRECT
+ * ops.
+ */
+static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops)
+{
+	struct ftrace_func_entry *entry;
+	struct ftrace_hash *hash;
+	struct ftrace_ops *op;
+	int size, i;
+
+	if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
+		return;
+
+	mutex_lock(&direct_mutex);
+
+	hash = ops->func_hash->filter_hash;
+	size = 1 << hash->size_bits;
+	for (i = 0; i < size; i++) {
+		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
+			unsigned long ip = entry->ip;
+			bool found_op = false;
+
+			mutex_lock(&ftrace_lock);
+			do_for_each_ftrace_op(op, ftrace_ops_list) {
+				if (!(op->flags & FTRACE_OPS_FL_DIRECT))
+					continue;
+				if (ops_references_ip(op, ip)) {
+					found_op = true;
+					break;
+				}
+			} while_for_each_ftrace_op(op);
+			mutex_unlock(&ftrace_lock);
+
+			/* The cleanup is optional, iggore any errors */
+			if (found_op && op->ops_func)
+				op->ops_func(op, FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER);
+		}
+	}
+	mutex_unlock(&direct_mutex);
+}
+
+#else  /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
+static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops)
+{
+	return 0;
+}
+
+static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops)
+{
+}
+
+#endif  /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
 /**
  * register_ftrace_function - register a function for profiling
  * @ops:	ops structure that holds the function for profiling.
@@ -8016,17 +8192,29 @@  int ftrace_is_dead(void)
  *       recursive loop.
  */
 int register_ftrace_function(struct ftrace_ops *ops)
+	__releases(&direct_mutex)
 {
+	bool direct_mutex_locked = false;
 	int ret;
 
 	ftrace_ops_init(ops);
 
+	ret = prepare_direct_functions_for_ipmodify(ops);
+	if (ret < 0)
+		return ret;
+	else if (ret == 1)
+		direct_mutex_locked = true;
+
 	mutex_lock(&ftrace_lock);
 
 	ret = ftrace_startup(ops, 0);
 
 	mutex_unlock(&ftrace_lock);
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+	if (direct_mutex_locked)
+		mutex_unlock(&direct_mutex);
+#endif
 	return ret;
 }
 EXPORT_SYMBOL_GPL(register_ftrace_function);
@@ -8045,6 +8233,7 @@  int unregister_ftrace_function(struct ftrace_ops *ops)
 	ret = ftrace_shutdown(ops, 0);
 	mutex_unlock(&ftrace_lock);
 
+	cleanup_direct_functions_after_ipmodify(ops);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(unregister_ftrace_function);