Message ID | 20240221-hid-bpf-sleepable-v3-5-1fb378ca6301@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | sleepable bpf_timer (was: allow HID-BPF to do device IOs) | expand |
On Wed, 2024-02-21 at 17:25 +0100, Benjamin Tissoires wrote: [...] > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index f81c799b2c80..2b11687063ff 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -5444,6 +5444,26 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, > return -EACCES; > } > break; > + case BPF_TIMER: > + /* FIXME: kptr does the above, should we use the same? */ I don't think so. Basically this allows double word reads / writes from timer address, which probably should not be allowed. The ACCESS_DIRECT is passed to check_map_access() from check_mem_access() and I don't see points where check_mem_access() call would be triggered for pointer parameter of kfunc (unless it is accompanied by a size parameter). I tried the following simple program and it verifies fine: struct elem { struct bpf_timer t; }; struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 2); __type(key, int); __type(value, struct elem); } array SEC(".maps"); int bpf_timer_set_sleepable_cb (struct bpf_timer *timer, int (callback_fn)(void *map, int *key, struct bpf_timer *timer)) __ksym __weak; static int cb_sleepable(void *map, int *key, struct bpf_timer *timer) { return 0; } SEC("fentry/bpf_fentry_test5") int BPF_PROG2(test_sleepable, int, a) { struct bpf_timer *arr_timer; int array_key = ARRAY; arr_timer = bpf_map_lookup_elem(&array, &array_key); if (!arr_timer) return 0; bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC); bpf_timer_set_sleepable_cb(arr_timer, cb_sleepable); bpf_timer_start(arr_timer, 0, 0); return 0; } (in general, it would be easier to review if there were some test cases to play with). > + if (src != ACCESS_DIRECT) { > + verbose(env, "bpf_timer cannot be accessed indirectly by helper\n"); > + return -EACCES; > + } > + if (!tnum_is_const(reg->var_off)) { > + verbose(env, "bpf_timer access cannot have variable offset\n"); > + return -EACCES; > + } > + if (p != off + reg->var_off.value) { > + verbose(env, "bpf_timer access misaligned expected=%u off=%llu\n", > + p, off + reg->var_off.value); > + return -EACCES; > + } > + if (size != bpf_size_to_bytes(BPF_DW)) { > + verbose(env, "bpf_timer access size must be BPF_DW\n"); > + return -EACCES; > + } > + break; > default: > verbose(env, "%s cannot be accessed directly by load/store\n", > btf_field_type_name(field->type)); [...]
On Fri, 2024-02-23 at 02:22 +0200, Eduard Zingerman wrote: [...] > > + case BPF_TIMER: > > + /* FIXME: kptr does the above, should we use the same? */ [...] > I tried the following simple program and it verifies fine: Sorry, I meant that I tried it with the above check removed.
On Wed, 2024-02-21 at 17:25 +0100, Benjamin Tissoires wrote: [...] > @@ -11973,6 +12006,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ > if (ret) > return ret; > break; > + case KF_ARG_PTR_TO_TIMER: > + /* FIXME: should we do anything here? */ > + break; I think that here it is necessary to enforce that R1 is PTR_TO_MAP_VALUE and that it points to the timer field of the map value. As is, the following program leads to in-kernel page fault when printing verifier log: --- 8< ---------------------------- struct elem { struct bpf_timer t; }; struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 2); __type(key, int); __type(value, struct elem); } array SEC(".maps"); int bpf_timer_set_sleepable_cb (struct bpf_timer *timer, int (callback_fn)(void *map, int *key, struct bpf_timer *timer)) __ksym __weak; static int cb_sleepable(void *map, int *key, struct bpf_timer *timer) { return 0; } SEC("fentry/bpf_fentry_test5") int BPF_PROG2(test_sleepable, int, a) { struct bpf_timer *arr_timer; int array_key = 1; arr_timer = bpf_map_lookup_elem(&array, &array_key); if (!arr_timer) return 0; bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC); bpf_timer_set_sleepable_cb((void *)&arr_timer, // note incorrrect pointer type! cb_sleepable); bpf_timer_start(arr_timer, 0, 0); return 0; } ---------------------------- >8 --- I get the page fault when doing: $ ./veristat -l7 -vvv -f test_sleepable timer.bpf.o [ 21.014886] BUG: kernel NULL pointer dereference, address: 0000000000000060 ... [ 21.015780] RIP: 0010:print_reg_state (kernel/bpf/log.c:715) And here is a relevant fragment of print_reg_state(): 713 if (type_is_map_ptr(t)) { 714 if (reg->map_ptr->name[0]) 715 verbose_a("map=%s", reg->map_ptr->name); 716 verbose_a("ks=%d,vs=%d", 717 reg->map_ptr->key_size, 718 reg->map_ptr->value_size); 719 } The error is caused by reg->map_ptr being NULL. The code in check_kfunc_args() allows anything in R1, including registers for which type is not pointer to map and reg->map_ptr is NULL. When later the check_kfunc_call() is done it does push_callback_call(): 12152 err = push_callback_call(env, insn, insn_idx, meta.subprogno, 12153 set_timer_callback_state); Which calls set_timer_callback_state(), that sets bogus state for R{1,2,3}: 9683 static int set_timer_callback_state(...) 9684 { 9685 struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr; 9687 9688 /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn); 9689 * callback_fn(struct bpf_map *map, void *key, void *value); 9690 */ 9691 callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP; 9692 __mark_reg_known_zero(&callee->regs[BPF_REG_1]); 9693 callee->regs[BPF_REG_1].map_ptr = map_ptr; ^^^^^^^^^ This is NULL!
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f81c799b2c80..2b11687063ff 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5444,6 +5444,26 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, return -EACCES; } break; + case BPF_TIMER: + /* FIXME: kptr does the above, should we use the same? */ + if (src != ACCESS_DIRECT) { + verbose(env, "bpf_timer cannot be accessed indirectly by helper\n"); + return -EACCES; + } + if (!tnum_is_const(reg->var_off)) { + verbose(env, "bpf_timer access cannot have variable offset\n"); + return -EACCES; + } + if (p != off + reg->var_off.value) { + verbose(env, "bpf_timer access misaligned expected=%u off=%llu\n", + p, off + reg->var_off.value); + return -EACCES; + } + if (size != bpf_size_to_bytes(BPF_DW)) { + verbose(env, "bpf_timer access size must be BPF_DW\n"); + return -EACCES; + } + break; default: verbose(env, "%s cannot be accessed directly by load/store\n", btf_field_type_name(field->type)); @@ -10789,6 +10809,7 @@ enum { KF_ARG_LIST_NODE_ID, KF_ARG_RB_ROOT_ID, KF_ARG_RB_NODE_ID, + KF_ARG_TIMER_ID, }; BTF_ID_LIST(kf_arg_btf_ids) @@ -10797,6 +10818,7 @@ BTF_ID(struct, bpf_list_head) BTF_ID(struct, bpf_list_node) BTF_ID(struct, bpf_rb_root) BTF_ID(struct, bpf_rb_node) +BTF_ID(struct, bpf_timer_kern) static bool __is_kfunc_ptr_arg_type(const struct btf *btf, const struct btf_param *arg, int type) @@ -10840,6 +10862,12 @@ static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_par return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID); } +static bool is_kfunc_arg_timer(const struct btf *btf, const struct btf_param *arg) +{ + bool ret = __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TIMER_ID); + return ret; +} + static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf, const struct btf_param *arg) { @@ -10908,6 +10936,7 @@ enum kfunc_ptr_arg_type { KF_ARG_PTR_TO_RB_NODE, KF_ARG_PTR_TO_NULL, KF_ARG_PTR_TO_CONST_STR, + KF_ARG_PTR_TO_TIMER, }; enum special_kfunc_type { @@ -11061,6 +11090,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_const_str(meta->btf, &args[argno])) return KF_ARG_PTR_TO_CONST_STR; + if (is_kfunc_arg_timer(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_TIMER; + if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) { if (!btf_type_is_struct(ref_t)) { verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n", @@ -11693,6 +11725,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_CALLBACK: case KF_ARG_PTR_TO_REFCOUNTED_KPTR: case KF_ARG_PTR_TO_CONST_STR: + case KF_ARG_PTR_TO_TIMER: /* Trusted by default */ break; default: @@ -11973,6 +12006,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (ret) return ret; break; + case KF_ARG_PTR_TO_TIMER: + /* FIXME: should we do anything here? */ + break; } }
We need to extend the bpf_timer API, but the way forward relies on kfuncs. So make bpf_timer known for kfuncs from the verifier PoV Signed-off-by: Benjamin Tissoires <bentiss@kernel.org> --- new in v3 (split from v2 02/10) --- kernel/bpf/verifier.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+)