diff mbox series

[bpf-next,v6] bpf: Support 64-bit pointers to kfuncs

Message ID 20230405213453.49756-1-iii@linux.ibm.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series [bpf-next,v6] bpf: Support 64-bit pointers to kfuncs | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-36 success Logs for test_verifier on x86_64 with llvm-16
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1468 this patch: 1468
netdev/cc_maintainers warning 14 maintainers not CCed: song@kernel.org haoluo@google.com yhs@fb.com borntraeger@linux.ibm.com kuba@kernel.org linux-s390@vger.kernel.org svens@linux.ibm.com john.fastabend@gmail.com kpsingh@kernel.org jolsa@kernel.org hawk@kernel.org netdev@vger.kernel.org martin.lau@linux.dev davem@davemloft.net
netdev/build_clang success Errors and warnings before: 168 this patch: 168
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1462 this patch: 1462
netdev/checkpatch warning CHECK: Please don't use multiple blank lines WARNING: line length of 87 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on s390x with gcc

Commit Message

Ilya Leoshkevich April 5, 2023, 9:34 p.m. UTC
test_ksyms_module fails to emit a kfunc call targeting a module on
s390x, because the verifier stores the difference between kfunc
address and __bpf_call_base in bpf_insn.imm, which is s32, and modules
are roughly (1 << 42) bytes away from the kernel on s390x.

Fix by keeping BTF id in bpf_insn.imm for BPF_PSEUDO_KFUNC_CALLs,
and storing the absolute address in bpf_kfunc_desc.

Introduce bpf_jit_supports_far_kfunc_call() in order to limit this new
behavior to the s390x JIT. Otherwise other JITs need to be modified,
which is not desired.

Introduce bpf_get_kfunc_addr() instead of exposing both
find_kfunc_desc() and struct bpf_kfunc_desc.

In addition to sorting kfuncs by imm, also sort them by offset, in
order to handle conflicting imms from different modules. Do this on
all architectures in order to simplify code.

Factor out resolving specialized kfuncs (XPD and dynptr) from
fixup_kfunc_call(). This was required in the first place, because
fixup_kfunc_call() uses find_kfunc_desc(), which returns a const
pointer, so it's not possible to modify kfunc addr without stripping
const, which is not nice. It also removes repetition of code like:

	if (bpf_jit_supports_far_kfunc_call())
		desc->addr = func;
	else
		insn->imm = BPF_CALL_IMM(func);

and separates kfunc_desc_tab fixups from kfunc_call fixups.

Suggested-by: Jiri Olsa <olsajiri@gmail.com>
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---

v5: https://lore.kernel.org/bpf/20230405141407.172357-1-iii@linux.ibm.com/
v5 -> v6: Fix build with !CONFIG_BPF_SYSCALL by moving bpf_get_kfunc_addr()
          declaration outside of the respective ifdef.
          Reported-by: kernel test robot <lkp@intel.com>
          Link: https://lore.kernel.org/oe-kbuild-all/202304060240.OeUgnjzZ-lkp@intel.com/

v4: https://lore.kernel.org/bpf/20230403172833.1552354-1-iii@linux.ibm.com/
v4 -> v5: Fix issues identified by Andrii:
          - Improve bpf_get_kfunc_addr() argument naming.
          - Do overflow check only in !bpf_jit_supports_far_kfunc_call() case.
          - Fix kfunc_desc_cmp_by_imm_off() bug when passing huge values.
          - Update fixup_kfunc_call() comment to reflect the new logic.

v3: https://lore.kernel.org/bpf/20230222223714.80671-1-iii@linux.ibm.com/
v3 -> v4: Use Jiri's proposal and make it work on s390x.

 arch/s390/net/bpf_jit_comp.c |   5 ++
 include/linux/bpf.h          |   3 +
 include/linux/filter.h       |   1 +
 kernel/bpf/core.c            |  11 +++
 kernel/bpf/verifier.c        | 137 +++++++++++++++++++++++++----------
 5 files changed, 117 insertions(+), 40 deletions(-)

Comments

kernel test robot April 6, 2023, 12:25 a.m. UTC | #1
Hi Ilya,

kernel test robot noticed the following build errors:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Ilya-Leoshkevich/bpf-Support-64-bit-pointers-to-kfuncs/20230406-053713
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link:    https://lore.kernel.org/r/20230405213453.49756-1-iii%40linux.ibm.com
patch subject: [PATCH bpf-next v6] bpf: Support 64-bit pointers to kfuncs
config: i386-randconfig-c001-20230403 (https://download.01.org/0day-ci/archive/20230406/202304060822.L9VsdUzS-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/2a9559efd98d24493ac5c889a3ae03dd66b0de26
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Ilya-Leoshkevich/bpf-Support-64-bit-pointers-to-kfuncs/20230406-053713
        git checkout 2a9559efd98d24493ac5c889a3ae03dd66b0de26
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=i386 olddefconfig
        make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202304060822.L9VsdUzS-lkp@intel.com/

All errors (new ones prefixed by >>):

   ld: kernel/bpf/core.o: in function `bpf_jit_get_func_addr':
>> kernel/bpf/core.c:1207: undefined reference to `bpf_get_kfunc_addr'


vim +1207 kernel/bpf/core.c

  1182	
  1183	int bpf_jit_get_func_addr(const struct bpf_prog *prog,
  1184				  const struct bpf_insn *insn, bool extra_pass,
  1185				  u64 *func_addr, bool *func_addr_fixed)
  1186	{
  1187		s16 off = insn->off;
  1188		s32 imm = insn->imm;
  1189		u8 *addr;
  1190		int err;
  1191	
  1192		*func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL;
  1193		if (!*func_addr_fixed) {
  1194			/* Place-holder address till the last pass has collected
  1195			 * all addresses for JITed subprograms in which case we
  1196			 * can pick them up from prog->aux.
  1197			 */
  1198			if (!extra_pass)
  1199				addr = NULL;
  1200			else if (prog->aux->func &&
  1201				 off >= 0 && off < prog->aux->func_cnt)
  1202				addr = (u8 *)prog->aux->func[off]->bpf_func;
  1203			else
  1204				return -EINVAL;
  1205		} else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
  1206			   bpf_jit_supports_far_kfunc_call()) {
> 1207			err = bpf_get_kfunc_addr(prog, insn->imm, insn->off, &addr);
  1208			if (err)
  1209				return err;
  1210		} else {
  1211			/* Address of a BPF helper call. Since part of the core
  1212			 * kernel, it's always at a fixed location. __bpf_call_base
  1213			 * and the helper with imm relative to it are both in core
  1214			 * kernel.
  1215			 */
  1216			addr = (u8 *)__bpf_call_base + imm;
  1217		}
  1218	
  1219		*func_addr = (unsigned long)addr;
  1220		return 0;
  1221	}
  1222
Jiri Olsa April 6, 2023, 9:44 a.m. UTC | #2
On Wed, Apr 05, 2023 at 11:34:53PM +0200, Ilya Leoshkevich wrote:

SNIP

>  
> +int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
> +		       u16 btf_fd_idx, u8 **func_addr)
> +{
> +	const struct bpf_kfunc_desc *desc;
> +
> +	desc = find_kfunc_desc(prog, func_id, btf_fd_idx);
> +	if (!desc)
> +		return -EFAULT;
> +
> +	*func_addr = (u8 *)desc->addr;
> +	return 0;
> +}
> +
>  static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
>  					 s16 offset)
>  {
> @@ -2672,14 +2691,19 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
>  		return -EINVAL;
>  	}
>  
> -	call_imm = BPF_CALL_IMM(addr);
> -	/* Check whether or not the relative offset overflows desc->imm */
> -	if ((unsigned long)(s32)call_imm != call_imm) {
> -		verbose(env, "address of kernel function %s is out of range\n",
> -			func_name);
> -		return -EINVAL;
> +	if (bpf_jit_supports_far_kfunc_call()) {
> +		call_imm = func_id;
> +	} else {
> +		call_imm = BPF_CALL_IMM(addr);

we compute call_imm again in fixup_kfunc_call, seems like we could store
the address and the func_id in desc and have fixup_kfunc_call do the
insn->imm setup


> +		/* Check whether the relative offset overflows desc->imm */
> +		if ((unsigned long)(s32)call_imm != call_imm) {
> +			verbose(env, "address of kernel function %s is out of range\n",
> +				func_name);
> +			return -EINVAL;
> +		}
>  	}
>  
> +

nit, extra line

>  	if (bpf_dev_bound_kfunc_id(func_id)) {
>  		err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
>  		if (err)
> @@ -2690,6 +2714,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
>  	desc->func_id = func_id;
>  	desc->imm = call_imm;
>  	desc->offset = offset;
> +	desc->addr = addr;
>  	err = btf_distill_func_proto(&env->log, desc_btf,
>  				     func_proto, func_name,
>  				     &desc->func_model);
> @@ -2699,19 +2724,19 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
>  	return err;
>  }
>  
> -static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
> +static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
>  {
>  	const struct bpf_kfunc_desc *d0 = a;
>  	const struct bpf_kfunc_desc *d1 = b;
>  
> -	if (d0->imm > d1->imm)
> -		return 1;
> -	else if (d0->imm < d1->imm)
> -		return -1;
> +	if (d0->imm != d1->imm)
> +		return d0->imm < d1->imm ? -1 : 1;
> +	if (d0->offset != d1->offset)
> +		return d0->offset < d1->offset ? -1 : 1;
>  	return 0;
>  }
>  

SNIP

> +/* replace a generic kfunc with a specialized version if necessary */
> +static void fixup_kfunc_desc(struct bpf_verifier_env *env,
> +			     struct bpf_kfunc_desc *desc)
> +{
> +	struct bpf_prog *prog = env->prog;
> +	u32 func_id = desc->func_id;
> +	u16 offset = desc->offset;
> +	bool seen_direct_write;
> +	void *xdp_kfunc;
> +	bool is_rdonly;
> +
> +	if (bpf_dev_bound_kfunc_id(func_id)) {
> +		xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
> +		if (xdp_kfunc) {
> +			desc->addr = (unsigned long)xdp_kfunc;
> +			return;
> +		}
> +		/* fallback to default kfunc when not supported by netdev */
> +	}
> +
> +	if (offset)
> +		return;
> +
> +	if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
> +		seen_direct_write = env->seen_direct_write;
> +		is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
> +
> +		if (is_rdonly)
> +			desc->addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
> +
> +		/* restore env->seen_direct_write to its original value, since
> +		 * may_access_direct_pkt_data mutates it
> +		 */
> +		env->seen_direct_write = seen_direct_write;
> +	}

could we do this directly in add_kfunc_call? 

thanks,
jirka


> +}
> +
> +static void fixup_kfunc_desc_tab(struct bpf_verifier_env *env)
> +{
> +	struct bpf_kfunc_desc_tab *tab = env->prog->aux->kfunc_tab;
> +	u32 i;
> +
> +	if (!tab)
> +		return;
> +
> +	for (i = 0; i < tab->nr_descs; i++)
> +		fixup_kfunc_desc(env, &tab->descs[i]);
> +}
> +
>  static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  			    struct bpf_insn *insn_buf, int insn_idx, int *cnt)
>  {
>  	const struct bpf_kfunc_desc *desc;
> -	void *xdp_kfunc;
>  
>  	if (!insn->imm) {
>  		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
> @@ -17355,18 +17429,9 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  
>  	*cnt = 0;
>  
> -	if (bpf_dev_bound_kfunc_id(insn->imm)) {
> -		xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm);
> -		if (xdp_kfunc) {
> -			insn->imm = BPF_CALL_IMM(xdp_kfunc);
> -			return 0;
> -		}
> -
> -		/* fallback to default kfunc when not supported by netdev */
> -	}
> -
> -	/* insn->imm has the btf func_id. Replace it with
> -	 * an address (relative to __bpf_call_base).
> +	/* insn->imm has the btf func_id. Replace it with an offset relative to
> +	 * __bpf_call_base, unless the JIT needs to call functions that are
> +	 * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
>  	 */
>  	desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
>  	if (!desc) {
> @@ -17375,7 +17440,8 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  		return -EFAULT;
>  	}
>  
> -	insn->imm = desc->imm;
> +	if (!bpf_jit_supports_far_kfunc_call())
> +		insn->imm = BPF_CALL_IMM(desc->addr);
>  	if (insn->off)
>  		return 0;
>  	if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
> @@ -17400,17 +17466,6 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  		   desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
>  		insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
>  		*cnt = 1;
> -	} else if (desc->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
> -		bool seen_direct_write = env->seen_direct_write;
> -		bool is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
> -
> -		if (is_rdonly)
> -			insn->imm = BPF_CALL_IMM(bpf_dynptr_from_skb_rdonly);
> -
> -		/* restore env->seen_direct_write to its original value, since
> -		 * may_access_direct_pkt_data mutates it
> -		 */
> -		env->seen_direct_write = seen_direct_write;
>  	}
>  	return 0;
>  }
> @@ -17433,6 +17488,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
>  	struct bpf_map *map_ptr;
>  	int i, ret, cnt, delta = 0;
>  
> +	fixup_kfunc_desc_tab(env);
> +
>  	for (i = 0; i < insn_cnt; i++, insn++) {
>  		/* Make divide-by-zero exceptions impossible. */
>  		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
> @@ -17940,7 +17997,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
>  		}
>  	}
>  
> -	sort_kfunc_descs_by_imm(env->prog);
> +	sort_kfunc_descs_by_imm_off(env->prog);
>  
>  	return 0;
>  }
> -- 
> 2.39.2
>
Ilya Leoshkevich April 6, 2023, 12:31 p.m. UTC | #3
On Thu, 2023-04-06 at 11:44 +0200, Jiri Olsa wrote:
> On Wed, Apr 05, 2023 at 11:34:53PM +0200, Ilya Leoshkevich wrote:
> 
> SNIP
> 
> >  
> > +int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
> > +                      u16 btf_fd_idx, u8 **func_addr)
> > +{
> > +       const struct bpf_kfunc_desc *desc;
> > +
> > +       desc = find_kfunc_desc(prog, func_id, btf_fd_idx);
> > +       if (!desc)
> > +               return -EFAULT;
> > +
> > +       *func_addr = (u8 *)desc->addr;
> > +       return 0;
> > +}
> > +
> >  static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env
> > *env,
> >                                          s16 offset)
> >  {
> > @@ -2672,14 +2691,19 @@ static int add_kfunc_call(struct
> > bpf_verifier_env *env, u32 func_id, s16 offset)
> >                 return -EINVAL;
> >         }
> >  
> > -       call_imm = BPF_CALL_IMM(addr);
> > -       /* Check whether or not the relative offset overflows desc-
> > >imm */
> > -       if ((unsigned long)(s32)call_imm != call_imm) {
> > -               verbose(env, "address of kernel function %s is out
> > of range\n",
> > -                       func_name);
> > -               return -EINVAL;
> > +       if (bpf_jit_supports_far_kfunc_call()) {
> > +               call_imm = func_id;
> > +       } else {
> > +               call_imm = BPF_CALL_IMM(addr);
> 
> we compute call_imm again in fixup_kfunc_call, seems like we could
> store
> the address and the func_id in desc and have fixup_kfunc_call do the
> insn->imm setup

We can drop this diff in fixup_kfunc_call():

-       insn->imm = desc->imm;
+       if (!bpf_jit_supports_far_kfunc_call())
+               insn->imm = BPF_CALL_IMM(desc->addr);

in order to avoid duplicating the imm calculation logic, but I'm not
sure if we want to move the entire desc->imm setup there.

For example, fixup_kfunc_call() considers kfunc_tab const, which is a
nice property that I think is worth keeping.

Another option would be to drop desc->imm, but having it is very
convenient for doing lookups the same way on all architectures. 

> > +               /* Check whether the relative offset overflows
> > desc->imm */
> > +               if ((unsigned long)(s32)call_imm != call_imm) {
> > +                       verbose(env, "address of kernel function %s
> > is out of range\n",
> > +                               func_name);
> > +                       return -EINVAL;
> > +               }
> >         }
> >  
> > +
> 
> nit, extra line

Ouch. Thanks for spotting this.

> 
> >         if (bpf_dev_bound_kfunc_id(func_id)) {
> >                 err = bpf_dev_bound_kfunc_check(&env->log,
> > prog_aux);
> >                 if (err)
> > @@ -2690,6 +2714,7 @@ static int add_kfunc_call(struct
> > bpf_verifier_env *env, u32 func_id, s16 offset)
> >         desc->func_id = func_id;
> >         desc->imm = call_imm;
> >         desc->offset = offset;
> > +       desc->addr = addr;
> >         err = btf_distill_func_proto(&env->log, desc_btf,
> >                                      func_proto, func_name,
> >                                      &desc->func_model);
> > @@ -2699,19 +2724,19 @@ static int add_kfunc_call(struct
> > bpf_verifier_env *env, u32 func_id, s16 offset)
> >         return err;
> >  }
> >  
> > -static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
> > +static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
> >  {
> >         const struct bpf_kfunc_desc *d0 = a;
> >         const struct bpf_kfunc_desc *d1 = b;
> >  
> > -       if (d0->imm > d1->imm)
> > -               return 1;
> > -       else if (d0->imm < d1->imm)
> > -               return -1;
> > +       if (d0->imm != d1->imm)
> > +               return d0->imm < d1->imm ? -1 : 1;
> > +       if (d0->offset != d1->offset)
> > +               return d0->offset < d1->offset ? -1 : 1;
> >         return 0;
> >  }
> >  
> 
> SNIP
> 
> > +/* replace a generic kfunc with a specialized version if necessary
> > */
> > +static void fixup_kfunc_desc(struct bpf_verifier_env *env,
> > +                            struct bpf_kfunc_desc *desc)
> > +{
> > +       struct bpf_prog *prog = env->prog;
> > +       u32 func_id = desc->func_id;
> > +       u16 offset = desc->offset;
> > +       bool seen_direct_write;
> > +       void *xdp_kfunc;
> > +       bool is_rdonly;
> > +
> > +       if (bpf_dev_bound_kfunc_id(func_id)) {
> > +               xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog,
> > func_id);
> > +               if (xdp_kfunc) {
> > +                       desc->addr = (unsigned long)xdp_kfunc;
> > +                       return;
> > +               }
> > +               /* fallback to default kfunc when not supported by
> > netdev */
> > +       }
> > +
> > +       if (offset)
> > +               return;
> > +
> > +       if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb])
> > {
> > +               seen_direct_write = env->seen_direct_write;
> > +               is_rdonly = !may_access_direct_pkt_data(env, NULL,
> > BPF_WRITE);
> > +
> > +               if (is_rdonly)
> > +                       desc->addr = (unsigned
> > long)bpf_dynptr_from_skb_rdonly;
> > +
> > +               /* restore env->seen_direct_write to its original
> > value, since
> > +                * may_access_direct_pkt_data mutates it
> > +                */
> > +               env->seen_direct_write = seen_direct_write;
> > +       }
> 
> could we do this directly in add_kfunc_call?

Initially I thought that it wasn't possible, because
may_access_direct_pkt_data() may depend on data gathered during
verification. But on a second look that's simply not the case, so this
code can indeed be moved to add_kfunc_call().

> 
> thanks,
> jirka

[...]
Jiri Olsa April 6, 2023, 1:06 p.m. UTC | #4
On Thu, Apr 06, 2023 at 02:31:06PM +0200, Ilya Leoshkevich wrote:
> On Thu, 2023-04-06 at 11:44 +0200, Jiri Olsa wrote:
> > On Wed, Apr 05, 2023 at 11:34:53PM +0200, Ilya Leoshkevich wrote:
> > 
> > SNIP
> > 
> > >  
> > > +int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
> > > +                      u16 btf_fd_idx, u8 **func_addr)
> > > +{
> > > +       const struct bpf_kfunc_desc *desc;
> > > +
> > > +       desc = find_kfunc_desc(prog, func_id, btf_fd_idx);
> > > +       if (!desc)
> > > +               return -EFAULT;
> > > +
> > > +       *func_addr = (u8 *)desc->addr;
> > > +       return 0;
> > > +}
> > > +
> > >  static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env
> > > *env,
> > >                                          s16 offset)
> > >  {
> > > @@ -2672,14 +2691,19 @@ static int add_kfunc_call(struct
> > > bpf_verifier_env *env, u32 func_id, s16 offset)
> > >                 return -EINVAL;
> > >         }
> > >  
> > > -       call_imm = BPF_CALL_IMM(addr);
> > > -       /* Check whether or not the relative offset overflows desc-
> > > >imm */
> > > -       if ((unsigned long)(s32)call_imm != call_imm) {
> > > -               verbose(env, "address of kernel function %s is out
> > > of range\n",
> > > -                       func_name);
> > > -               return -EINVAL;
> > > +       if (bpf_jit_supports_far_kfunc_call()) {
> > > +               call_imm = func_id;
> > > +       } else {
> > > +               call_imm = BPF_CALL_IMM(addr);
> > 
> > we compute call_imm again in fixup_kfunc_call, seems like we could
> > store
> > the address and the func_id in desc and have fixup_kfunc_call do the
> > insn->imm setup
> 
> We can drop this diff in fixup_kfunc_call():
> 
> -       insn->imm = desc->imm;
> +       if (!bpf_jit_supports_far_kfunc_call())
> +               insn->imm = BPF_CALL_IMM(desc->addr);
> 
> in order to avoid duplicating the imm calculation logic, but I'm not
> sure if we want to move the entire desc->imm setup there.
> 
> For example, fixup_kfunc_call() considers kfunc_tab const, which is a
> nice property that I think is worth keeping.
> 
> Another option would be to drop desc->imm, but having it is very
> convenient for doing lookups the same way on all architectures. 

ok, I see..  so should we do following in fixup_kfunc_call:

	if (!bpf_jit_supports_far_kfunc_call())
		insn->imm = desc->imm;

by default there's func_id in insn->imm

jirka

> 
> > > +               /* Check whether the relative offset overflows
> > > desc->imm */
> > > +               if ((unsigned long)(s32)call_imm != call_imm) {
> > > +                       verbose(env, "address of kernel function %s
> > > is out of range\n",
> > > +                               func_name);
> > > +                       return -EINVAL;
> > > +               }
> > >         }
> > >  
> > > +
> > 
> > nit, extra line
> 
> Ouch. Thanks for spotting this.
> 
> > 
> > >         if (bpf_dev_bound_kfunc_id(func_id)) {
> > >                 err = bpf_dev_bound_kfunc_check(&env->log,
> > > prog_aux);
> > >                 if (err)
> > > @@ -2690,6 +2714,7 @@ static int add_kfunc_call(struct
> > > bpf_verifier_env *env, u32 func_id, s16 offset)
> > >         desc->func_id = func_id;
> > >         desc->imm = call_imm;
> > >         desc->offset = offset;
> > > +       desc->addr = addr;
> > >         err = btf_distill_func_proto(&env->log, desc_btf,
> > >                                      func_proto, func_name,
> > >                                      &desc->func_model);
> > > @@ -2699,19 +2724,19 @@ static int add_kfunc_call(struct
> > > bpf_verifier_env *env, u32 func_id, s16 offset)
> > >         return err;
> > >  }
> > >  
> > > -static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
> > > +static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
> > >  {
> > >         const struct bpf_kfunc_desc *d0 = a;
> > >         const struct bpf_kfunc_desc *d1 = b;
> > >  
> > > -       if (d0->imm > d1->imm)
> > > -               return 1;
> > > -       else if (d0->imm < d1->imm)
> > > -               return -1;
> > > +       if (d0->imm != d1->imm)
> > > +               return d0->imm < d1->imm ? -1 : 1;
> > > +       if (d0->offset != d1->offset)
> > > +               return d0->offset < d1->offset ? -1 : 1;
> > >         return 0;
> > >  }
> > >  
> > 
> > SNIP
> > 
> > > +/* replace a generic kfunc with a specialized version if necessary
> > > */
> > > +static void fixup_kfunc_desc(struct bpf_verifier_env *env,
> > > +                            struct bpf_kfunc_desc *desc)
> > > +{
> > > +       struct bpf_prog *prog = env->prog;
> > > +       u32 func_id = desc->func_id;
> > > +       u16 offset = desc->offset;
> > > +       bool seen_direct_write;
> > > +       void *xdp_kfunc;
> > > +       bool is_rdonly;
> > > +
> > > +       if (bpf_dev_bound_kfunc_id(func_id)) {
> > > +               xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog,
> > > func_id);
> > > +               if (xdp_kfunc) {
> > > +                       desc->addr = (unsigned long)xdp_kfunc;
> > > +                       return;
> > > +               }
> > > +               /* fallback to default kfunc when not supported by
> > > netdev */
> > > +       }
> > > +
> > > +       if (offset)
> > > +               return;
> > > +
> > > +       if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb])
> > > {
> > > +               seen_direct_write = env->seen_direct_write;
> > > +               is_rdonly = !may_access_direct_pkt_data(env, NULL,
> > > BPF_WRITE);
> > > +
> > > +               if (is_rdonly)
> > > +                       desc->addr = (unsigned
> > > long)bpf_dynptr_from_skb_rdonly;
> > > +
> > > +               /* restore env->seen_direct_write to its original
> > > value, since
> > > +                * may_access_direct_pkt_data mutates it
> > > +                */
> > > +               env->seen_direct_write = seen_direct_write;
> > > +       }
> > 
> > could we do this directly in add_kfunc_call?
> 
> Initially I thought that it wasn't possible, because
> may_access_direct_pkt_data() may depend on data gathered during
> verification. But on a second look that's simply not the case, so this
> code can indeed be moved to add_kfunc_call().
> 
> > 
> > thanks,
> > jirka
> 
> [...]
diff mbox series

Patch

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index d0846ba818ee..7102e4b674a0 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -2001,6 +2001,11 @@  bool bpf_jit_supports_kfunc_call(void)
 	return true;
 }
 
+bool bpf_jit_supports_far_kfunc_call(void)
+{
+	return true;
+}
+
 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
 		       void *old_addr, void *new_addr)
 {
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 002a811b6b90..37115c729a0e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -3035,4 +3035,7 @@  static inline gfp_t bpf_memcg_flags(gfp_t flags)
 	return flags;
 }
 
+int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
+		       u16 btf_fd_idx, u8 **func_addr);
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5364b0c52c1d..bbce89937fde 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -920,6 +920,7 @@  void bpf_jit_compile(struct bpf_prog *prog);
 bool bpf_jit_needs_zext(void);
 bool bpf_jit_supports_subprog_tailcalls(void);
 bool bpf_jit_supports_kfunc_call(void);
+bool bpf_jit_supports_far_kfunc_call(void);
 bool bpf_helper_changes_pkt_data(void *func);
 
 static inline bool bpf_dump_raw_ok(const struct cred *cred)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b297e9f60ca1..7a75fdfd707e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1187,6 +1187,7 @@  int bpf_jit_get_func_addr(const struct bpf_prog *prog,
 	s16 off = insn->off;
 	s32 imm = insn->imm;
 	u8 *addr;
+	int err;
 
 	*func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL;
 	if (!*func_addr_fixed) {
@@ -1201,6 +1202,11 @@  int bpf_jit_get_func_addr(const struct bpf_prog *prog,
 			addr = (u8 *)prog->aux->func[off]->bpf_func;
 		else
 			return -EINVAL;
+	} else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
+		   bpf_jit_supports_far_kfunc_call()) {
+		err = bpf_get_kfunc_addr(prog, insn->imm, insn->off, &addr);
+		if (err)
+			return err;
 	} else {
 		/* Address of a BPF helper call. Since part of the core
 		 * kernel, it's always at a fixed location. __bpf_call_base
@@ -2732,6 +2738,11 @@  bool __weak bpf_jit_supports_kfunc_call(void)
 	return false;
 }
 
+bool __weak bpf_jit_supports_far_kfunc_call(void)
+{
+	return false;
+}
+
 /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
  * skb_copy_bits(), so provide a weak definition of it for NET-less config.
  */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 56f569811f70..55dd33274c40 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2443,6 +2443,7 @@  struct bpf_kfunc_desc {
 	u32 func_id;
 	s32 imm;
 	u16 offset;
+	unsigned long addr;
 };
 
 struct bpf_kfunc_btf {
@@ -2452,6 +2453,11 @@  struct bpf_kfunc_btf {
 };
 
 struct bpf_kfunc_desc_tab {
+	/* Sorted by func_id (BTF ID) and offset (fd_array offset) during
+	 * verification. JITs do lookups by bpf_insn, where func_id may not be
+	 * available, therefore at the end of verification do_misc_fixups()
+	 * sorts this by imm and offset.
+	 */
 	struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
 	u32 nr_descs;
 };
@@ -2492,6 +2498,19 @@  find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
 		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
 }
 
+int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
+		       u16 btf_fd_idx, u8 **func_addr)
+{
+	const struct bpf_kfunc_desc *desc;
+
+	desc = find_kfunc_desc(prog, func_id, btf_fd_idx);
+	if (!desc)
+		return -EFAULT;
+
+	*func_addr = (u8 *)desc->addr;
+	return 0;
+}
+
 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
 					 s16 offset)
 {
@@ -2672,14 +2691,19 @@  static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
 		return -EINVAL;
 	}
 
-	call_imm = BPF_CALL_IMM(addr);
-	/* Check whether or not the relative offset overflows desc->imm */
-	if ((unsigned long)(s32)call_imm != call_imm) {
-		verbose(env, "address of kernel function %s is out of range\n",
-			func_name);
-		return -EINVAL;
+	if (bpf_jit_supports_far_kfunc_call()) {
+		call_imm = func_id;
+	} else {
+		call_imm = BPF_CALL_IMM(addr);
+		/* Check whether the relative offset overflows desc->imm */
+		if ((unsigned long)(s32)call_imm != call_imm) {
+			verbose(env, "address of kernel function %s is out of range\n",
+				func_name);
+			return -EINVAL;
+		}
 	}
 
+
 	if (bpf_dev_bound_kfunc_id(func_id)) {
 		err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
 		if (err)
@@ -2690,6 +2714,7 @@  static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
 	desc->func_id = func_id;
 	desc->imm = call_imm;
 	desc->offset = offset;
+	desc->addr = addr;
 	err = btf_distill_func_proto(&env->log, desc_btf,
 				     func_proto, func_name,
 				     &desc->func_model);
@@ -2699,19 +2724,19 @@  static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
 	return err;
 }
 
-static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
+static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
 {
 	const struct bpf_kfunc_desc *d0 = a;
 	const struct bpf_kfunc_desc *d1 = b;
 
-	if (d0->imm > d1->imm)
-		return 1;
-	else if (d0->imm < d1->imm)
-		return -1;
+	if (d0->imm != d1->imm)
+		return d0->imm < d1->imm ? -1 : 1;
+	if (d0->offset != d1->offset)
+		return d0->offset < d1->offset ? -1 : 1;
 	return 0;
 }
 
-static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
+static void sort_kfunc_descs_by_imm_off(struct bpf_prog *prog)
 {
 	struct bpf_kfunc_desc_tab *tab;
 
@@ -2720,7 +2745,7 @@  static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
 		return;
 
 	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
-	     kfunc_desc_cmp_by_imm, NULL);
+	     kfunc_desc_cmp_by_imm_off, NULL);
 }
 
 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
@@ -2734,13 +2759,14 @@  bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
 {
 	const struct bpf_kfunc_desc desc = {
 		.imm = insn->imm,
+		.offset = insn->off,
 	};
 	const struct bpf_kfunc_desc *res;
 	struct bpf_kfunc_desc_tab *tab;
 
 	tab = prog->aux->kfunc_tab;
 	res = bsearch(&desc, tab->descs, tab->nr_descs,
-		      sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
+		      sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off);
 
 	return res ? &res->func_model : NULL;
 }
@@ -17342,11 +17368,59 @@  static int fixup_call_args(struct bpf_verifier_env *env)
 	return err;
 }
 
+/* replace a generic kfunc with a specialized version if necessary */
+static void fixup_kfunc_desc(struct bpf_verifier_env *env,
+			     struct bpf_kfunc_desc *desc)
+{
+	struct bpf_prog *prog = env->prog;
+	u32 func_id = desc->func_id;
+	u16 offset = desc->offset;
+	bool seen_direct_write;
+	void *xdp_kfunc;
+	bool is_rdonly;
+
+	if (bpf_dev_bound_kfunc_id(func_id)) {
+		xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
+		if (xdp_kfunc) {
+			desc->addr = (unsigned long)xdp_kfunc;
+			return;
+		}
+		/* fallback to default kfunc when not supported by netdev */
+	}
+
+	if (offset)
+		return;
+
+	if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
+		seen_direct_write = env->seen_direct_write;
+		is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
+
+		if (is_rdonly)
+			desc->addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
+
+		/* restore env->seen_direct_write to its original value, since
+		 * may_access_direct_pkt_data mutates it
+		 */
+		env->seen_direct_write = seen_direct_write;
+	}
+}
+
+static void fixup_kfunc_desc_tab(struct bpf_verifier_env *env)
+{
+	struct bpf_kfunc_desc_tab *tab = env->prog->aux->kfunc_tab;
+	u32 i;
+
+	if (!tab)
+		return;
+
+	for (i = 0; i < tab->nr_descs; i++)
+		fixup_kfunc_desc(env, &tab->descs[i]);
+}
+
 static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			    struct bpf_insn *insn_buf, int insn_idx, int *cnt)
 {
 	const struct bpf_kfunc_desc *desc;
-	void *xdp_kfunc;
 
 	if (!insn->imm) {
 		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
@@ -17355,18 +17429,9 @@  static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 
 	*cnt = 0;
 
-	if (bpf_dev_bound_kfunc_id(insn->imm)) {
-		xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm);
-		if (xdp_kfunc) {
-			insn->imm = BPF_CALL_IMM(xdp_kfunc);
-			return 0;
-		}
-
-		/* fallback to default kfunc when not supported by netdev */
-	}
-
-	/* insn->imm has the btf func_id. Replace it with
-	 * an address (relative to __bpf_call_base).
+	/* insn->imm has the btf func_id. Replace it with an offset relative to
+	 * __bpf_call_base, unless the JIT needs to call functions that are
+	 * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
 	 */
 	desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
 	if (!desc) {
@@ -17375,7 +17440,8 @@  static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		return -EFAULT;
 	}
 
-	insn->imm = desc->imm;
+	if (!bpf_jit_supports_far_kfunc_call())
+		insn->imm = BPF_CALL_IMM(desc->addr);
 	if (insn->off)
 		return 0;
 	if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
@@ -17400,17 +17466,6 @@  static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		   desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
 		insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
 		*cnt = 1;
-	} else if (desc->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
-		bool seen_direct_write = env->seen_direct_write;
-		bool is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
-
-		if (is_rdonly)
-			insn->imm = BPF_CALL_IMM(bpf_dynptr_from_skb_rdonly);
-
-		/* restore env->seen_direct_write to its original value, since
-		 * may_access_direct_pkt_data mutates it
-		 */
-		env->seen_direct_write = seen_direct_write;
 	}
 	return 0;
 }
@@ -17433,6 +17488,8 @@  static int do_misc_fixups(struct bpf_verifier_env *env)
 	struct bpf_map *map_ptr;
 	int i, ret, cnt, delta = 0;
 
+	fixup_kfunc_desc_tab(env);
+
 	for (i = 0; i < insn_cnt; i++, insn++) {
 		/* Make divide-by-zero exceptions impossible. */
 		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
@@ -17940,7 +17997,7 @@  static int do_misc_fixups(struct bpf_verifier_env *env)
 		}
 	}
 
-	sort_kfunc_descs_by_imm(env->prog);
+	sort_kfunc_descs_by_imm_off(env->prog);
 
 	return 0;
 }