Message ID | 20231030192810.382942-8-thinker.li@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | Registrating struct_ops types from modules | expand |
On 10/30/23 12:28 PM, thinker.li@gmail.com wrote: > From: Kui-Feng Lee <thinker.li@gmail.com> > > Replace the static list of struct_ops types with per-btf struct_ops_tab to > enable dynamic registration. > > Both bpf_dummy_ops and bpf_tcp_ca now utilize the registration function > instead of being listed in bpf_struct_ops_types.h. > > Cc: netdev@vger.kernel.org > Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com> > --- > include/linux/bpf.h | 36 ++++++-- > include/linux/btf.h | 5 +- > kernel/bpf/bpf_struct_ops.c | 140 +++++++++--------------------- > kernel/bpf/bpf_struct_ops_types.h | 12 --- > kernel/bpf/btf.c | 41 ++++++++- > net/bpf/bpf_dummy_struct_ops.c | 14 ++- > net/ipv4/bpf_tcp_ca.c | 16 +++- > 7 files changed, 140 insertions(+), 124 deletions(-) > delete mode 100644 kernel/bpf/bpf_struct_ops_types.h > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index c993df3cf699..9d7105ff06db 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -1644,7 +1644,6 @@ struct bpf_struct_ops_desc { > #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) > #define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) > const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *btf, u32 type_id); > -void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log); > bool bpf_struct_ops_get(const void *kdata); > void bpf_struct_ops_put(const void *kdata); > int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, > @@ -1690,10 +1689,6 @@ static inline const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf * > { > return NULL; > } > -static inline void bpf_struct_ops_init(struct btf *btf, > - struct bpf_verifier_log *log) > -{ > -} > static inline bool bpf_try_module_get(const void *data, struct module *owner) > { > return try_module_get(owner); > @@ -3232,4 +3227,35 @@ static inline bool bpf_is_subprog(const struct bpf_prog *prog) > return prog->aux->func_idx != 0; > } > > +int register_bpf_struct_ops(struct bpf_struct_ops *st_ops); > + > +enum bpf_struct_ops_state { > + BPF_STRUCT_OPS_STATE_INIT, > + BPF_STRUCT_OPS_STATE_INUSE, > + BPF_STRUCT_OPS_STATE_TOBEFREE, > + BPF_STRUCT_OPS_STATE_READY, > +}; > + > +struct bpf_struct_ops_common_value { > + refcount_t refcnt; > + enum bpf_struct_ops_state state; > +}; > + > +/* bpf_struct_ops_##_name (e.g. bpf_struct_ops_tcp_congestion_ops) is > + * the map's value exposed to the userspace and its btf-type-id is > + * stored at the map->btf_vmlinux_value_type_id. > + * > + */ > +#define DEFINE_STRUCT_OPS_VALUE_TYPE(_name) \ > +extern struct bpf_struct_ops bpf_##_name; \ > + \ > +struct bpf_struct_ops_##_name { \ > + struct bpf_struct_ops_common_value common; \ > + struct _name data ____cacheline_aligned_in_smp; \ > +} > + > +extern int bpf_struct_ops_init(struct bpf_struct_ops_desc *st_ops_desc, > + struct btf *btf, > + struct bpf_verifier_log *log); > + > #endif /* _LINUX_BPF_H */ > diff --git a/include/linux/btf.h b/include/linux/btf.h > index a8813605f2f6..954536431e0b 100644 > --- a/include/linux/btf.h > +++ b/include/linux/btf.h > @@ -12,6 +12,8 @@ > #include <uapi/linux/bpf.h> > > #define BTF_TYPE_EMIT(type) ((void)(type *)0) > +#define BTF_STRUCT_OPS_TYPE_EMIT(type) {((void)(struct type *)0); \ ((void)(struct type *)0); is new. Why is it needed? > + ((void)(struct bpf_struct_ops_##type *)0); } > #define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val) > > /* These need to be macros, as the expressions are used in assembler input */ > @@ -201,6 +203,7 @@ u32 btf_obj_id(const struct btf *btf); > bool btf_is_kernel(const struct btf *btf); > bool btf_is_module(const struct btf *btf); > struct module *btf_try_get_module(const struct btf *btf); > +struct btf *btf_get_module_btf(const struct module *module); > u32 btf_nr_types(const struct btf *btf); > bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, > const struct btf_member *m, > @@ -575,8 +578,6 @@ static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type > struct bpf_struct_ops; > struct bpf_struct_ops_desc; > > -struct bpf_struct_ops_desc * > -btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops); > const struct bpf_struct_ops_desc * > btf_get_struct_ops(struct btf *btf, u32 *ret_cnt); > > diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c > index db2bbba50e38..f3ec72be9c63 100644 > --- a/kernel/bpf/bpf_struct_ops.c > +++ b/kernel/bpf/bpf_struct_ops.c > @@ -13,21 +13,8 @@ > #include <linux/btf_ids.h> > #include <linux/rcupdate_wait.h> > > -enum bpf_struct_ops_state { > - BPF_STRUCT_OPS_STATE_INIT, > - BPF_STRUCT_OPS_STATE_INUSE, > - BPF_STRUCT_OPS_STATE_TOBEFREE, > - BPF_STRUCT_OPS_STATE_READY, > -}; > - > -struct bpf_struct_ops_common_value { > - refcount_t refcnt; > - enum bpf_struct_ops_state state; > -}; > -#define BPF_STRUCT_OPS_COMMON_VALUE struct bpf_struct_ops_common_value common > - > struct bpf_struct_ops_value { > - BPF_STRUCT_OPS_COMMON_VALUE; > + struct bpf_struct_ops_common_value common; This cleanup is good. It should have been done together in patch 5 instead when refcnt and state were grouped into a new 'struct bpf_struct_ops_common_value'. > char data[] ____cacheline_aligned_in_smp; > }; > > @@ -72,35 +59,6 @@ static DEFINE_MUTEX(update_mutex); > #define VALUE_PREFIX "bpf_struct_ops_" > #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1) > > -/* bpf_struct_ops_##_name (e.g. bpf_struct_ops_tcp_congestion_ops) is > - * the map's value exposed to the userspace and its btf-type-id is > - * stored at the map->btf_vmlinux_value_type_id. > - * > - */ > -#define BPF_STRUCT_OPS_TYPE(_name) \ > -extern struct bpf_struct_ops bpf_##_name; \ > - \ > -struct bpf_struct_ops_##_name { \ > - BPF_STRUCT_OPS_COMMON_VALUE; \ > - struct _name data ____cacheline_aligned_in_smp; \ > -}; > -#include "bpf_struct_ops_types.h" > -#undef BPF_STRUCT_OPS_TYPE > - > -enum { > -#define BPF_STRUCT_OPS_TYPE(_name) BPF_STRUCT_OPS_TYPE_##_name, > -#include "bpf_struct_ops_types.h" > -#undef BPF_STRUCT_OPS_TYPE > - __NR_BPF_STRUCT_OPS_TYPE, > -}; > - > -static struct bpf_struct_ops_desc bpf_struct_ops[] = { > -#define BPF_STRUCT_OPS_TYPE(_name) \ > - [BPF_STRUCT_OPS_TYPE_##_name] = { .st_ops = &bpf_##_name }, > -#include "bpf_struct_ops_types.h" > -#undef BPF_STRUCT_OPS_TYPE > -}; > - > const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = { > }; > > @@ -110,13 +68,22 @@ const struct bpf_prog_ops bpf_struct_ops_prog_ops = { > #endif > }; > > -static const struct btf_type *module_type; > -static const struct btf_type *common_value_type; > +BTF_ID_LIST(st_ops_ids) > +BTF_ID(struct, module) > +BTF_ID(struct, bpf_struct_ops_common_value) This should have been done in a separated patch immediately after patch 1. The patch 7 has unrelated changes/cleanups like this and the above BPF_STRUCT_OPS_COMMON_VALUE which could have been done earlier as preparation patches instead of packing them together with the main change here: "switch to dynamic registration". The commit message for the BTF_ID_LIST changes could be like: "A preparation to completely retire the bpf_struct_ops_init() function in the latter patch...". > + > +enum { > + idx_module_id, > + idx_st_ops_common_value_id, nit. upper case to stay consistent with other similar usages. > +}; > + [ ... ] > +int register_bpf_struct_ops(struct bpf_struct_ops *st_ops) > +{ > + struct bpf_struct_ops_desc *desc; > + struct bpf_verifier_log *log; > + struct btf *btf; > + int err = 0; > + > + if (st_ops == NULL) NULL check is not needed. caller will never do that. If it really wanted to try, other values would have similar effect. > + return -EINVAL; > + > + btf = btf_get_module_btf(st_ops->owner); > + if (!btf) > + return -EINVAL; > + > + log = kzalloc(sizeof(*log), GFP_KERNEL | __GFP_NOWARN); > + if (!log) { > + err = -ENOMEM; > + goto errout; > + } > + > + log->level = BPF_LOG_KERNEL; > + > + desc = btf_add_struct_ops(btf, st_ops); > + if (IS_ERR(desc)) { > + err = PTR_ERR(desc); > + goto errout; > + } > + > + err = bpf_struct_ops_init(desc, btf, log); When bpf_struct_ops_init() returns err, desc is in btf_struct_ops_tab but it is in an uninitialized state. May be do the bpf_struct_ops_init() in btf_add_struct_ops() and only increment struct_ops_tab->cnt when everything is correct. > + > +errout: > + kfree(log); > + btf_put(btf); > + > + return err; > +} > +EXPORT_SYMBOL_GPL(register_bpf_struct_ops);
On 10/30/23 23:36, Martin KaFai Lau wrote: > On 10/30/23 12:28 PM, thinker.li@gmail.com wrote: >> From: Kui-Feng Lee <thinker.li@gmail.com> >> >> Replace the static list of struct_ops types with per-btf >> struct_ops_tab to >> enable dynamic registration. >> >> Both bpf_dummy_ops and bpf_tcp_ca now utilize the registration function >> instead of being listed in bpf_struct_ops_types.h. >> >> Cc: netdev@vger.kernel.org >> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com> >> --- >> include/linux/bpf.h | 36 ++++++-- >> include/linux/btf.h | 5 +- >> kernel/bpf/bpf_struct_ops.c | 140 +++++++++--------------------- >> kernel/bpf/bpf_struct_ops_types.h | 12 --- >> kernel/bpf/btf.c | 41 ++++++++- >> net/bpf/bpf_dummy_struct_ops.c | 14 ++- >> net/ipv4/bpf_tcp_ca.c | 16 +++- >> 7 files changed, 140 insertions(+), 124 deletions(-) >> delete mode 100644 kernel/bpf/bpf_struct_ops_types.h >> >> diff --git a/include/linux/bpf.h b/include/linux/bpf.h >> index c993df3cf699..9d7105ff06db 100644 >> --- a/include/linux/bpf.h >> +++ b/include/linux/bpf.h >> @@ -1644,7 +1644,6 @@ struct bpf_struct_ops_desc { >> #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) >> #define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + >> POISON_POINTER_DELTA)) >> const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf >> *btf, u32 type_id); >> -void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log); >> bool bpf_struct_ops_get(const void *kdata); >> void bpf_struct_ops_put(const void *kdata); >> int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, >> @@ -1690,10 +1689,6 @@ static inline const struct bpf_struct_ops_desc >> *bpf_struct_ops_find(struct btf * >> { >> return NULL; >> } >> -static inline void bpf_struct_ops_init(struct btf *btf, >> - struct bpf_verifier_log *log) >> -{ >> -} >> static inline bool bpf_try_module_get(const void *data, struct >> module *owner) >> { >> return try_module_get(owner); >> @@ -3232,4 +3227,35 @@ static inline bool bpf_is_subprog(const struct >> bpf_prog *prog) >> return prog->aux->func_idx != 0; >> } >> +int register_bpf_struct_ops(struct bpf_struct_ops *st_ops); >> + >> +enum bpf_struct_ops_state { >> + BPF_STRUCT_OPS_STATE_INIT, >> + BPF_STRUCT_OPS_STATE_INUSE, >> + BPF_STRUCT_OPS_STATE_TOBEFREE, >> + BPF_STRUCT_OPS_STATE_READY, >> +}; >> + >> +struct bpf_struct_ops_common_value { >> + refcount_t refcnt; >> + enum bpf_struct_ops_state state; >> +}; >> + >> +/* bpf_struct_ops_##_name (e.g. bpf_struct_ops_tcp_congestion_ops) is >> + * the map's value exposed to the userspace and its btf-type-id is >> + * stored at the map->btf_vmlinux_value_type_id. >> + * >> + */ >> +#define DEFINE_STRUCT_OPS_VALUE_TYPE(_name) \ >> +extern struct bpf_struct_ops bpf_##_name; \ >> + \ >> +struct bpf_struct_ops_##_name { \ >> + struct bpf_struct_ops_common_value common; \ >> + struct _name data ____cacheline_aligned_in_smp; \ >> +} >> + >> +extern int bpf_struct_ops_init(struct bpf_struct_ops_desc *st_ops_desc, >> + struct btf *btf, >> + struct bpf_verifier_log *log); >> + >> #endif /* _LINUX_BPF_H */ >> diff --git a/include/linux/btf.h b/include/linux/btf.h >> index a8813605f2f6..954536431e0b 100644 >> --- a/include/linux/btf.h >> +++ b/include/linux/btf.h >> @@ -12,6 +12,8 @@ >> #include <uapi/linux/bpf.h> >> #define BTF_TYPE_EMIT(type) ((void)(type *)0) >> +#define BTF_STRUCT_OPS_TYPE_EMIT(type) {((void)(struct type *)0); \ > > ((void)(struct type *)0); is new. Why is it needed? This is a trick of BTF to force compiler generate type info for the given type. Without trick, compiler may skip these types if these type are not used at all in the module. For example, modules usually don't use value types of struct_ops directly. > >> + ((void)(struct bpf_struct_ops_##type *)0); } >> #define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val) >> /* These need to be macros, as the expressions are used in assembler >> input */ >> @@ -201,6 +203,7 @@ u32 btf_obj_id(const struct btf *btf); >> bool btf_is_kernel(const struct btf *btf); >> bool btf_is_module(const struct btf *btf); >> struct module *btf_try_get_module(const struct btf *btf); >> +struct btf *btf_get_module_btf(const struct module *module); >> u32 btf_nr_types(const struct btf *btf); >> bool btf_member_is_reg_int(const struct btf *btf, const struct >> btf_type *s, >> const struct btf_member *m, >> @@ -575,8 +578,6 @@ static inline bool btf_type_is_struct_ptr(struct >> btf *btf, const struct btf_type >> struct bpf_struct_ops; >> struct bpf_struct_ops_desc; >> -struct bpf_struct_ops_desc * >> -btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops); >> const struct bpf_struct_ops_desc * >> btf_get_struct_ops(struct btf *btf, u32 *ret_cnt); >> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c >> index db2bbba50e38..f3ec72be9c63 100644 >> --- a/kernel/bpf/bpf_struct_ops.c >> +++ b/kernel/bpf/bpf_struct_ops.c >> @@ -13,21 +13,8 @@ >> #include <linux/btf_ids.h> >> #include <linux/rcupdate_wait.h> >> -enum bpf_struct_ops_state { >> - BPF_STRUCT_OPS_STATE_INIT, >> - BPF_STRUCT_OPS_STATE_INUSE, >> - BPF_STRUCT_OPS_STATE_TOBEFREE, >> - BPF_STRUCT_OPS_STATE_READY, >> -}; >> - >> -struct bpf_struct_ops_common_value { >> - refcount_t refcnt; >> - enum bpf_struct_ops_state state; >> -}; >> -#define BPF_STRUCT_OPS_COMMON_VALUE struct >> bpf_struct_ops_common_value common >> - >> struct bpf_struct_ops_value { >> - BPF_STRUCT_OPS_COMMON_VALUE; >> + struct bpf_struct_ops_common_value common; > > This cleanup is good. It should have been done together in patch 5 > instead when refcnt and state were grouped into a new 'struct > bpf_struct_ops_common_value'. Got it! > >> char data[] ____cacheline_aligned_in_smp; >> }; >> @@ -72,35 +59,6 @@ static DEFINE_MUTEX(update_mutex); >> #define VALUE_PREFIX "bpf_struct_ops_" >> #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1) >> -/* bpf_struct_ops_##_name (e.g. bpf_struct_ops_tcp_congestion_ops) is >> - * the map's value exposed to the userspace and its btf-type-id is >> - * stored at the map->btf_vmlinux_value_type_id. >> - * >> - */ >> -#define BPF_STRUCT_OPS_TYPE(_name) \ >> -extern struct bpf_struct_ops bpf_##_name; \ >> - \ >> -struct bpf_struct_ops_##_name { \ >> - BPF_STRUCT_OPS_COMMON_VALUE; \ >> - struct _name data ____cacheline_aligned_in_smp; \ >> -}; >> -#include "bpf_struct_ops_types.h" >> -#undef BPF_STRUCT_OPS_TYPE >> - >> -enum { >> -#define BPF_STRUCT_OPS_TYPE(_name) BPF_STRUCT_OPS_TYPE_##_name, >> -#include "bpf_struct_ops_types.h" >> -#undef BPF_STRUCT_OPS_TYPE >> - __NR_BPF_STRUCT_OPS_TYPE, >> -}; >> - >> -static struct bpf_struct_ops_desc bpf_struct_ops[] = { >> -#define BPF_STRUCT_OPS_TYPE(_name) \ >> - [BPF_STRUCT_OPS_TYPE_##_name] = { .st_ops = &bpf_##_name }, >> -#include "bpf_struct_ops_types.h" >> -#undef BPF_STRUCT_OPS_TYPE >> -}; >> - >> const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = { >> }; >> @@ -110,13 +68,22 @@ const struct bpf_prog_ops bpf_struct_ops_prog_ops >> = { >> #endif >> }; >> -static const struct btf_type *module_type; >> -static const struct btf_type *common_value_type; >> +BTF_ID_LIST(st_ops_ids) >> +BTF_ID(struct, module) >> +BTF_ID(struct, bpf_struct_ops_common_value) > > This should have been done in a separated patch immediately after patch > 1. The patch 7 has unrelated changes/cleanups like this and the above > BPF_STRUCT_OPS_COMMON_VALUE which could have been done earlier as > preparation patches instead of packing them together with the main > change here: "switch to dynamic registration". The commit message for > the BTF_ID_LIST changes could be like: "A preparation to completely > retire the bpf_struct_ops_init() function in the latter patch...". > Got it! >> + >> +enum { >> + idx_module_id, >> + idx_st_ops_common_value_id, > > nit. upper case to stay consistent with other similar usages. > >> +}; >> + > > [ ... ] > >> +int register_bpf_struct_ops(struct bpf_struct_ops *st_ops) >> +{ >> + struct bpf_struct_ops_desc *desc; >> + struct bpf_verifier_log *log; >> + struct btf *btf; >> + int err = 0; >> + >> + if (st_ops == NULL) > > NULL check is not needed. caller will never do that. If it really wanted > to try, other values would have similar effect. Ok! > >> + return -EINVAL; >> + >> + btf = btf_get_module_btf(st_ops->owner); >> + if (!btf) >> + return -EINVAL; >> + >> + log = kzalloc(sizeof(*log), GFP_KERNEL | __GFP_NOWARN); >> + if (!log) { >> + err = -ENOMEM; >> + goto errout; >> + } >> + >> + log->level = BPF_LOG_KERNEL; >> + >> + desc = btf_add_struct_ops(btf, st_ops); >> + if (IS_ERR(desc)) { >> + err = PTR_ERR(desc); >> + goto errout; >> + } >> + >> + err = bpf_struct_ops_init(desc, btf, log); > > When bpf_struct_ops_init() returns err, desc is in btf_struct_ops_tab > but it is in an uninitialized state. May be do the bpf_struct_ops_init() > in btf_add_struct_ops() and only increment struct_ops_tab->cnt when > everything is correct. agree > >> + >> +errout: >> + kfree(log); >> + btf_put(btf); >> + >> + return err; >> +} >> +EXPORT_SYMBOL_GPL(register_bpf_struct_ops); > >
On 10/31/23 4:34 PM, Kui-Feng Lee wrote: >>> diff --git a/include/linux/btf.h b/include/linux/btf.h >>> index a8813605f2f6..954536431e0b 100644 >>> --- a/include/linux/btf.h >>> +++ b/include/linux/btf.h >>> @@ -12,6 +12,8 @@ >>> #include <uapi/linux/bpf.h> >>> #define BTF_TYPE_EMIT(type) ((void)(type *)0) >>> +#define BTF_STRUCT_OPS_TYPE_EMIT(type) {((void)(struct type *)0); \ >> >> ((void)(struct type *)0); is new. Why is it needed? > > This is a trick of BTF to force compiler generate type info for > the given type. Without trick, compiler may skip these types if these > type are not used at all in the module. For example, modules usually > don't use value types of struct_ops directly. It is not the value type and value type emit is understood. It is the struct_ops type itself and it is new addition in this patchset afaict. The value type emit is in the next line which was cut out from the context here.
On 10/31/23 17:02, Martin KaFai Lau wrote: > On 10/31/23 4:34 PM, Kui-Feng Lee wrote: >>>> diff --git a/include/linux/btf.h b/include/linux/btf.h >>>> index a8813605f2f6..954536431e0b 100644 >>>> --- a/include/linux/btf.h >>>> +++ b/include/linux/btf.h >>>> @@ -12,6 +12,8 @@ >>>> #include <uapi/linux/bpf.h> >>>> #define BTF_TYPE_EMIT(type) ((void)(type *)0) >>>> +#define BTF_STRUCT_OPS_TYPE_EMIT(type) {((void)(struct type *)0); \ >>> >>> ((void)(struct type *)0); is new. Why is it needed? >> >> This is a trick of BTF to force compiler generate type info for >> the given type. Without trick, compiler may skip these types if these >> type are not used at all in the module. For example, modules usually >> don't use value types of struct_ops directly. > It is not the value type and value type emit is understood. It is the > struct_ops type itself and it is new addition in this patchset afaict. > The value type emit is in the next line which was cut out from the > context here. > I mean both of them are required. In the case of a dummy implementation, struct_ops type itself properly never being used, only being declared by the module. Without this line, the module developer will fail to load a struct_ops map of the dummy type. This line is added to avoid this awful situation.
On 10/31/23 17:02, Martin KaFai Lau wrote: > On 10/31/23 4:34 PM, Kui-Feng Lee wrote: >>>> diff --git a/include/linux/btf.h b/include/linux/btf.h >>>> index a8813605f2f6..954536431e0b 100644 >>>> --- a/include/linux/btf.h >>>> +++ b/include/linux/btf.h >>>> @@ -12,6 +12,8 @@ >>>> #include <uapi/linux/bpf.h> >>>> #define BTF_TYPE_EMIT(type) ((void)(type *)0) >>>> +#define BTF_STRUCT_OPS_TYPE_EMIT(type) {((void)(struct type *)0); \ >>> >>> ((void)(struct type *)0); is new. Why is it needed? >> >> This is a trick of BTF to force compiler generate type info for >> the given type. Without trick, compiler may skip these types if these >> type are not used at all in the module. For example, modules usually >> don't use value types of struct_ops directly. > It is not the value type and value type emit is understood. It is the > struct_ops type itself and it is new addition in this patchset afaict. > The value type emit is in the next line which was cut out from the > context here. > I mean both of them are required. In the case of a dummy implementation, struct_ops type itself properly never being used, only being declared by the module. Without this line, the module developer will fail to load a struct_ops map of the dummy type. This line is added to avoid this awful situation.
On 10/31/23 5:19 PM, Kui-Feng Lee wrote: > > > On 10/31/23 17:02, Martin KaFai Lau wrote: >> On 10/31/23 4:34 PM, Kui-Feng Lee wrote: >>>>> diff --git a/include/linux/btf.h b/include/linux/btf.h >>>>> index a8813605f2f6..954536431e0b 100644 >>>>> --- a/include/linux/btf.h >>>>> +++ b/include/linux/btf.h >>>>> @@ -12,6 +12,8 @@ >>>>> #include <uapi/linux/bpf.h> >>>>> #define BTF_TYPE_EMIT(type) ((void)(type *)0) >>>>> +#define BTF_STRUCT_OPS_TYPE_EMIT(type) {((void)(struct type *)0); \ >>>> >>>> ((void)(struct type *)0); is new. Why is it needed? >>> >>> This is a trick of BTF to force compiler generate type info for >>> the given type. Without trick, compiler may skip these types if these >>> type are not used at all in the module. For example, modules usually >>> don't use value types of struct_ops directly. >> It is not the value type and value type emit is understood. It is the >> struct_ops type itself and it is new addition in this patchset afaict. The >> value type emit is in the next line which was cut out from the context here. >> > I mean both of them are required. > In the case of a dummy implementation, struct_ops type itself properly never > being used, only being declared by the module. Without this line, Other than bpf_dummy_ops, after reg(), the struct_ops->func() must be used somewhere in the kernel or module. Like tcp must be using the tcp_congestion_ops after reg(). bpf_dummy_ops is very special and probably should be moved out to bpf_testmod somehow but this is for later. Even bpf_dummy_ops does not have an issue now. Why it is needed after the kmod support change? or it is a preemptive addition to be future proof only? Addition is fine if it is required to work. I am trying to understand why this new addition is needed after the kmod support change. The reason why this is needed after the kmod support change is not obvious from looking at the code. The commit message didn't mention why and what broke after this kmod change. If someone wants to clean it up a few months later, we will need to figure out why it was added in the first place. > the module developer will fail to load a struct_ops map of the dummy > type. This line is added to avoid this awful situation. >
On 11/1/23 17:17, Martin KaFai Lau wrote: > On 10/31/23 5:19 PM, Kui-Feng Lee wrote: >> >> >> On 10/31/23 17:02, Martin KaFai Lau wrote: >>> On 10/31/23 4:34 PM, Kui-Feng Lee wrote: >>>>>> diff --git a/include/linux/btf.h b/include/linux/btf.h >>>>>> index a8813605f2f6..954536431e0b 100644 >>>>>> --- a/include/linux/btf.h >>>>>> +++ b/include/linux/btf.h >>>>>> @@ -12,6 +12,8 @@ >>>>>> #include <uapi/linux/bpf.h> >>>>>> #define BTF_TYPE_EMIT(type) ((void)(type *)0) >>>>>> +#define BTF_STRUCT_OPS_TYPE_EMIT(type) {((void)(struct type >>>>>> *)0); \ >>>>> >>>>> ((void)(struct type *)0); is new. Why is it needed? >>>> >>>> This is a trick of BTF to force compiler generate type info for >>>> the given type. Without trick, compiler may skip these types if these >>>> type are not used at all in the module. For example, modules usually >>>> don't use value types of struct_ops directly. >>> It is not the value type and value type emit is understood. It is the >>> struct_ops type itself and it is new addition in this patchset >>> afaict. The value type emit is in the next line which was cut out >>> from the context here. >>> >> I mean both of them are required. >> In the case of a dummy implementation, struct_ops type itself properly >> never being used, only being declared by the module. Without this line, > > Other than bpf_dummy_ops, after reg(), the struct_ops->func() must be > used somewhere in the kernel or module. Like tcp must be using the > tcp_congestion_ops after reg(). bpf_dummy_ops is very special and > probably should be moved out to bpf_testmod somehow but this is for > later. Even bpf_dummy_ops does not have an issue now. Why it is needed > after the kmod support change? > > or it is a preemptive addition to be future proof only? > > Addition is fine if it is required to work. I am trying to understand > why this new addition is needed after the kmod support change. The > reason why this is needed after the kmod support change is not obvious > from looking at the code. The commit message didn't mention why and what > broke after this kmod change. If someone wants to clean it up a few > months later, we will need to figure out why it was added in the first > place. It is a future proof. What do you think if I add a comment in the code? > > >> the module developer will fail to load a struct_ops map of the dummy >> type. This line is added to avoid this awful situation. >> >
On 11/1/23 5:59 PM, Kui-Feng Lee wrote: > > > On 11/1/23 17:17, Martin KaFai Lau wrote: >> On 10/31/23 5:19 PM, Kui-Feng Lee wrote: >>> >>> >>> On 10/31/23 17:02, Martin KaFai Lau wrote: >>>> On 10/31/23 4:34 PM, Kui-Feng Lee wrote: >>>>>>> diff --git a/include/linux/btf.h b/include/linux/btf.h >>>>>>> index a8813605f2f6..954536431e0b 100644 >>>>>>> --- a/include/linux/btf.h >>>>>>> +++ b/include/linux/btf.h >>>>>>> @@ -12,6 +12,8 @@ >>>>>>> #include <uapi/linux/bpf.h> >>>>>>> #define BTF_TYPE_EMIT(type) ((void)(type *)0) >>>>>>> +#define BTF_STRUCT_OPS_TYPE_EMIT(type) {((void)(struct type *)0); \ >>>>>> >>>>>> ((void)(struct type *)0); is new. Why is it needed? >>>>> >>>>> This is a trick of BTF to force compiler generate type info for >>>>> the given type. Without trick, compiler may skip these types if these >>>>> type are not used at all in the module. For example, modules usually >>>>> don't use value types of struct_ops directly. >>>> It is not the value type and value type emit is understood. It is the >>>> struct_ops type itself and it is new addition in this patchset afaict. The >>>> value type emit is in the next line which was cut out from the context here. >>>> >>> I mean both of them are required. >>> In the case of a dummy implementation, struct_ops type itself properly never >>> being used, only being declared by the module. Without this line, >> >> Other than bpf_dummy_ops, after reg(), the struct_ops->func() must be used >> somewhere in the kernel or module. Like tcp must be using the >> tcp_congestion_ops after reg(). bpf_dummy_ops is very special and probably >> should be moved out to bpf_testmod somehow but this is for later. Even >> bpf_dummy_ops does not have an issue now. Why it is needed after the kmod >> support change? >> >> or it is a preemptive addition to be future proof only? >> >> Addition is fine if it is required to work. I am trying to understand why this >> new addition is needed after the kmod support change. The reason why this is >> needed after the kmod support change is not obvious from looking at the code. >> The commit message didn't mention why and what broke after this kmod change. >> If someone wants to clean it up a few months later, we will need to figure out >> why it was added in the first place. > > > It is a future proof. > What do you think if I add a comment in the code? If it is not required to work, I prefer not adding it to avoid confusion and avoid future cleanup temptation. Even the artificial bpf_dummy_ops does not need it, so not enough reason to introduce this code redundancy. Switch topic. While we are on a new macro topic, I think a new macro will be useful to emit the value type and register_bpf_struct_ops together. wdyt? > >> >> >>> the module developer will fail to load a struct_ops map of the dummy >>> type. This line is added to avoid this awful situation. >>> >>
On 11/1/23 18:32, Martin KaFai Lau wrote: > On 11/1/23 5:59 PM, Kui-Feng Lee wrote: >> >> >> On 11/1/23 17:17, Martin KaFai Lau wrote: >>> On 10/31/23 5:19 PM, Kui-Feng Lee wrote: >>>> >>>> >>>> On 10/31/23 17:02, Martin KaFai Lau wrote: >>>>> On 10/31/23 4:34 PM, Kui-Feng Lee wrote: >>>>>>>> diff --git a/include/linux/btf.h b/include/linux/btf.h >>>>>>>> index a8813605f2f6..954536431e0b 100644 >>>>>>>> --- a/include/linux/btf.h >>>>>>>> +++ b/include/linux/btf.h >>>>>>>> @@ -12,6 +12,8 @@ >>>>>>>> #include <uapi/linux/bpf.h> >>>>>>>> #define BTF_TYPE_EMIT(type) ((void)(type *)0) >>>>>>>> +#define BTF_STRUCT_OPS_TYPE_EMIT(type) {((void)(struct type >>>>>>>> *)0); \ >>>>>>> >>>>>>> ((void)(struct type *)0); is new. Why is it needed? >>>>>> >>>>>> This is a trick of BTF to force compiler generate type info for >>>>>> the given type. Without trick, compiler may skip these types if these >>>>>> type are not used at all in the module. For example, modules usually >>>>>> don't use value types of struct_ops directly. >>>>> It is not the value type and value type emit is understood. It is >>>>> the struct_ops type itself and it is new addition in this patchset >>>>> afaict. The value type emit is in the next line which was cut out >>>>> from the context here. >>>>> >>>> I mean both of them are required. >>>> In the case of a dummy implementation, struct_ops type itself >>>> properly never being used, only being declared by the module. >>>> Without this line, >>> >>> Other than bpf_dummy_ops, after reg(), the struct_ops->func() must be >>> used somewhere in the kernel or module. Like tcp must be using the >>> tcp_congestion_ops after reg(). bpf_dummy_ops is very special and >>> probably should be moved out to bpf_testmod somehow but this is for >>> later. Even bpf_dummy_ops does not have an issue now. Why it is >>> needed after the kmod support change? >>> >>> or it is a preemptive addition to be future proof only? >>> >>> Addition is fine if it is required to work. I am trying to understand >>> why this new addition is needed after the kmod support change. The >>> reason why this is needed after the kmod support change is not >>> obvious from looking at the code. The commit message didn't mention >>> why and what broke after this kmod change. If someone wants to clean >>> it up a few months later, we will need to figure out why it was added >>> in the first place. >> >> >> It is a future proof. >> What do you think if I add a comment in the code? > > If it is not required to work, I prefer not adding it to avoid confusion > and avoid future cleanup temptation. Even the artificial bpf_dummy_ops > does not need it, so not enough reason to introduce this code redundancy. Got it! > > Switch topic. > While we are on a new macro topic, I think a new macro will be useful to > emit the value type and register_bpf_struct_ops together. wdyt? Like this? #define REGISTER_STRUCT_OPS(st_type, st_ops) { \ BTF_STRUCT_OPS_TYPE_EMIT(st_type); \ register_bpf_struct_ops(st_ops); } while(0) static int bpf_testmod_init(void) { .... REGISTER_STRUCT_OPS(bpf_testmod_ops, &bpf_bpf_testmod_ops); .... } or you like something more aggressive #define REGISTER_STRUCT_OPS(st_type) { \ BTF_STRUCT_OPS_TYPE_EMIT(st_type); \ register_bpf_struct_ops(&bpf_##st_type); } while(0) > >> >>> >>> >>>> the module developer will fail to load a struct_ops map of the dummy >>>> type. This line is added to avoid this awful situation. >>>> >>> >
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c993df3cf699..9d7105ff06db 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1644,7 +1644,6 @@ struct bpf_struct_ops_desc { #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) #define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *btf, u32 type_id); -void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log); bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, @@ -1690,10 +1689,6 @@ static inline const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf * { return NULL; } -static inline void bpf_struct_ops_init(struct btf *btf, - struct bpf_verifier_log *log) -{ -} static inline bool bpf_try_module_get(const void *data, struct module *owner) { return try_module_get(owner); @@ -3232,4 +3227,35 @@ static inline bool bpf_is_subprog(const struct bpf_prog *prog) return prog->aux->func_idx != 0; } +int register_bpf_struct_ops(struct bpf_struct_ops *st_ops); + +enum bpf_struct_ops_state { + BPF_STRUCT_OPS_STATE_INIT, + BPF_STRUCT_OPS_STATE_INUSE, + BPF_STRUCT_OPS_STATE_TOBEFREE, + BPF_STRUCT_OPS_STATE_READY, +}; + +struct bpf_struct_ops_common_value { + refcount_t refcnt; + enum bpf_struct_ops_state state; +}; + +/* bpf_struct_ops_##_name (e.g. bpf_struct_ops_tcp_congestion_ops) is + * the map's value exposed to the userspace and its btf-type-id is + * stored at the map->btf_vmlinux_value_type_id. + * + */ +#define DEFINE_STRUCT_OPS_VALUE_TYPE(_name) \ +extern struct bpf_struct_ops bpf_##_name; \ + \ +struct bpf_struct_ops_##_name { \ + struct bpf_struct_ops_common_value common; \ + struct _name data ____cacheline_aligned_in_smp; \ +} + +extern int bpf_struct_ops_init(struct bpf_struct_ops_desc *st_ops_desc, + struct btf *btf, + struct bpf_verifier_log *log); + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/btf.h b/include/linux/btf.h index a8813605f2f6..954536431e0b 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -12,6 +12,8 @@ #include <uapi/linux/bpf.h> #define BTF_TYPE_EMIT(type) ((void)(type *)0) +#define BTF_STRUCT_OPS_TYPE_EMIT(type) {((void)(struct type *)0); \ + ((void)(struct bpf_struct_ops_##type *)0); } #define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val) /* These need to be macros, as the expressions are used in assembler input */ @@ -201,6 +203,7 @@ u32 btf_obj_id(const struct btf *btf); bool btf_is_kernel(const struct btf *btf); bool btf_is_module(const struct btf *btf); struct module *btf_try_get_module(const struct btf *btf); +struct btf *btf_get_module_btf(const struct module *module); u32 btf_nr_types(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, @@ -575,8 +578,6 @@ static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type struct bpf_struct_ops; struct bpf_struct_ops_desc; -struct bpf_struct_ops_desc * -btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops); const struct bpf_struct_ops_desc * btf_get_struct_ops(struct btf *btf, u32 *ret_cnt); diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index db2bbba50e38..f3ec72be9c63 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -13,21 +13,8 @@ #include <linux/btf_ids.h> #include <linux/rcupdate_wait.h> -enum bpf_struct_ops_state { - BPF_STRUCT_OPS_STATE_INIT, - BPF_STRUCT_OPS_STATE_INUSE, - BPF_STRUCT_OPS_STATE_TOBEFREE, - BPF_STRUCT_OPS_STATE_READY, -}; - -struct bpf_struct_ops_common_value { - refcount_t refcnt; - enum bpf_struct_ops_state state; -}; -#define BPF_STRUCT_OPS_COMMON_VALUE struct bpf_struct_ops_common_value common - struct bpf_struct_ops_value { - BPF_STRUCT_OPS_COMMON_VALUE; + struct bpf_struct_ops_common_value common; char data[] ____cacheline_aligned_in_smp; }; @@ -72,35 +59,6 @@ static DEFINE_MUTEX(update_mutex); #define VALUE_PREFIX "bpf_struct_ops_" #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1) -/* bpf_struct_ops_##_name (e.g. bpf_struct_ops_tcp_congestion_ops) is - * the map's value exposed to the userspace and its btf-type-id is - * stored at the map->btf_vmlinux_value_type_id. - * - */ -#define BPF_STRUCT_OPS_TYPE(_name) \ -extern struct bpf_struct_ops bpf_##_name; \ - \ -struct bpf_struct_ops_##_name { \ - BPF_STRUCT_OPS_COMMON_VALUE; \ - struct _name data ____cacheline_aligned_in_smp; \ -}; -#include "bpf_struct_ops_types.h" -#undef BPF_STRUCT_OPS_TYPE - -enum { -#define BPF_STRUCT_OPS_TYPE(_name) BPF_STRUCT_OPS_TYPE_##_name, -#include "bpf_struct_ops_types.h" -#undef BPF_STRUCT_OPS_TYPE - __NR_BPF_STRUCT_OPS_TYPE, -}; - -static struct bpf_struct_ops_desc bpf_struct_ops[] = { -#define BPF_STRUCT_OPS_TYPE(_name) \ - [BPF_STRUCT_OPS_TYPE_##_name] = { .st_ops = &bpf_##_name }, -#include "bpf_struct_ops_types.h" -#undef BPF_STRUCT_OPS_TYPE -}; - const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = { }; @@ -110,13 +68,22 @@ const struct bpf_prog_ops bpf_struct_ops_prog_ops = { #endif }; -static const struct btf_type *module_type; -static const struct btf_type *common_value_type; +BTF_ID_LIST(st_ops_ids) +BTF_ID(struct, module) +BTF_ID(struct, bpf_struct_ops_common_value) + +enum { + idx_module_id, + idx_st_ops_common_value_id, +}; + +extern struct btf *btf_vmlinux; static bool is_valid_value_type(struct btf *btf, s32 value_id, const struct btf_type *type, const char *value_name) { + const struct btf_type *common_value_type; const struct btf_member *member; const struct btf_type *vt, *mt; @@ -128,6 +95,8 @@ static bool is_valid_value_type(struct btf *btf, s32 value_id, } member = btf_type_member(vt); mt = btf_type_by_id(btf, member->type); + common_value_type = btf_type_by_id(btf_vmlinux, + st_ops_ids[idx_st_ops_common_value_id]); if (mt != common_value_type) { pr_warn("The first member of %s should be bpf_struct_ops_common_value\n", value_name); @@ -144,9 +113,9 @@ static bool is_valid_value_type(struct btf *btf, s32 value_id, return true; } -static void bpf_struct_ops_init_one(struct bpf_struct_ops_desc *st_ops_desc, - struct btf *btf, - struct bpf_verifier_log *log) +int bpf_struct_ops_init(struct bpf_struct_ops_desc *st_ops_desc, + struct btf *btf, + struct bpf_verifier_log *log) { struct bpf_struct_ops *st_ops = st_ops_desc->st_ops; const struct btf_member *member; @@ -160,7 +129,7 @@ static void bpf_struct_ops_init_one(struct bpf_struct_ops_desc *st_ops_desc, sizeof(value_name)) { pr_warn("struct_ops name %s is too long\n", st_ops->name); - return; + return -EINVAL; } sprintf(value_name, "%s%s", VALUE_PREFIX, st_ops->name); @@ -169,13 +138,13 @@ static void bpf_struct_ops_init_one(struct bpf_struct_ops_desc *st_ops_desc, if (type_id < 0) { pr_warn("Cannot find struct %s in btf_vmlinux\n", st_ops->name); - return; + return -EINVAL; } t = btf_type_by_id(btf, type_id); if (btf_type_vlen(t) > BPF_STRUCT_OPS_MAX_NR_MEMBERS) { pr_warn("Cannot support #%u members in struct %s\n", btf_type_vlen(t), st_ops->name); - return; + return -EINVAL; } value_id = btf_find_by_name_kind(btf, value_name, @@ -183,10 +152,10 @@ static void bpf_struct_ops_init_one(struct bpf_struct_ops_desc *st_ops_desc, if (value_id < 0) { pr_warn("Cannot find struct %s in btf_vmlinux\n", value_name); - return; + return -EINVAL; } if (!is_valid_value_type(btf, value_id, t, value_name)) - return; + return -EINVAL; for_each_member(i, t, member) { const struct btf_type *func_proto; @@ -195,13 +164,13 @@ static void bpf_struct_ops_init_one(struct bpf_struct_ops_desc *st_ops_desc, if (!*mname) { pr_warn("anon member in struct %s is not supported\n", st_ops->name); - break; + return -EOPNOTSUPP; } if (__btf_member_bitfield_size(t, member)) { pr_warn("bit field member %s in struct %s is not supported\n", mname, st_ops->name); - break; + return -EOPNOTSUPP; } func_proto = btf_type_resolve_func_ptr(btf, @@ -213,7 +182,7 @@ static void bpf_struct_ops_init_one(struct bpf_struct_ops_desc *st_ops_desc, &st_ops->func_models[i])) { pr_warn("Error in parsing func ptr %s in struct %s\n", mname, st_ops->name); - break; + return -EINVAL; } } @@ -221,6 +190,7 @@ static void bpf_struct_ops_init_one(struct bpf_struct_ops_desc *st_ops_desc, if (st_ops->init(btf)) { pr_warn("Error in init bpf_struct_ops %s\n", st_ops->name); + return -EINVAL; } else { st_ops_desc->btf = btf; st_ops_desc->type_id = type_id; @@ -230,53 +200,24 @@ static void bpf_struct_ops_init_one(struct bpf_struct_ops_desc *st_ops_desc, value_id); } } -} - -void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log) -{ - struct bpf_struct_ops_desc *st_ops_desc; - s32 module_id, common_value_id; - u32 i; - - /* Ensure BTF type is emitted for "struct bpf_struct_ops_##_name" */ -#define BPF_STRUCT_OPS_TYPE(_name) BTF_TYPE_EMIT(struct bpf_struct_ops_##_name); -#include "bpf_struct_ops_types.h" -#undef BPF_STRUCT_OPS_TYPE - - module_id = btf_find_by_name_kind(btf, "module", BTF_KIND_STRUCT); - if (module_id < 0) { - pr_warn("Cannot find struct module in btf_vmlinux\n"); - return; - } - module_type = btf_type_by_id(btf, module_id); - common_value_id = btf_find_by_name_kind(btf, - "bpf_struct_ops_common_value", - BTF_KIND_STRUCT); - if (common_value_id < 0) { - pr_warn("Cannot find struct common_value in btf_vmlinux\n"); - return; - } - common_value_type = btf_type_by_id(btf, common_value_id); - for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { - st_ops_desc = &bpf_struct_ops[i]; - bpf_struct_ops_init_one(st_ops_desc, btf, log); - } + return 0; } -extern struct btf *btf_vmlinux; - static const struct bpf_struct_ops_desc * bpf_struct_ops_find_value(struct btf *btf, u32 value_id) { + const struct bpf_struct_ops_desc *st_ops_list; unsigned int i; + u32 cnt = 0; - if (!value_id || !btf_vmlinux) + if (!value_id) return NULL; - for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { - if (bpf_struct_ops[i].value_id == value_id) - return &bpf_struct_ops[i]; + st_ops_list = btf_get_struct_ops(btf, &cnt); + for (i = 0; i < cnt; i++) { + if (st_ops_list[i].value_id == value_id) + return &st_ops_list[i]; } return NULL; @@ -285,14 +226,17 @@ bpf_struct_ops_find_value(struct btf *btf, u32 value_id) const struct bpf_struct_ops_desc * bpf_struct_ops_find(struct btf *btf, u32 type_id) { + const struct bpf_struct_ops_desc *st_ops_list; unsigned int i; + u32 cnt; - if (!type_id || !btf_vmlinux) + if (!type_id) return NULL; - for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { - if (bpf_struct_ops[i].type_id == type_id) - return &bpf_struct_ops[i]; + st_ops_list = btf_get_struct_ops(btf, &cnt); + for (i = 0; i < cnt; i++) { + if (st_ops_list[i].type_id == type_id) + return &st_ops_list[i]; } return NULL; @@ -429,6 +373,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, const struct bpf_struct_ops_desc *st_ops_desc = st_map->st_ops_desc; const struct bpf_struct_ops *st_ops = st_ops_desc->st_ops; struct bpf_struct_ops_value *uvalue, *kvalue; + const struct btf_type *module_type; const struct btf_member *member; const struct btf_type *t = st_ops_desc->type; struct bpf_tramp_links *tlinks; @@ -485,6 +430,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, image = st_map->image; image_end = st_map->image + PAGE_SIZE; + module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[idx_module_id]); for_each_member(i, t, member) { const struct btf_type *mtype, *ptype; struct bpf_prog *prog; diff --git a/kernel/bpf/bpf_struct_ops_types.h b/kernel/bpf/bpf_struct_ops_types.h deleted file mode 100644 index 5678a9ddf817..000000000000 --- a/kernel/bpf/bpf_struct_ops_types.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* internal file - do not include directly */ - -#ifdef CONFIG_BPF_JIT -#ifdef CONFIG_NET -BPF_STRUCT_OPS_TYPE(bpf_dummy_ops) -#endif -#ifdef CONFIG_INET -#include <net/tcp.h> -BPF_STRUCT_OPS_TYPE(tcp_congestion_ops) -#endif -#endif diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 076bd61d88b1..57d2114927e4 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5790,8 +5790,6 @@ struct btf *btf_parse_vmlinux(void) /* btf_parse_vmlinux() runs under bpf_verifier_lock */ bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]); - bpf_struct_ops_init(btf, log); - refcount_set(&btf->refcnt, 1); err = btf_alloc_id(btf); @@ -7532,7 +7530,7 @@ struct module *btf_try_get_module(const struct btf *btf) /* Returns struct btf corresponding to the struct module. * This function can return NULL or ERR_PTR. */ -static struct btf *btf_get_module_btf(const struct module *module) +struct btf *btf_get_module_btf(const struct module *module) { #ifdef CONFIG_DEBUG_INFO_BTF_MODULES struct btf_module *btf_mod, *tmp; @@ -8673,3 +8671,40 @@ const struct bpf_struct_ops_desc *btf_get_struct_ops(struct btf *btf, u32 *ret_c return (const struct bpf_struct_ops_desc *)btf->struct_ops_tab->ops; } +int register_bpf_struct_ops(struct bpf_struct_ops *st_ops) +{ + struct bpf_struct_ops_desc *desc; + struct bpf_verifier_log *log; + struct btf *btf; + int err = 0; + + if (st_ops == NULL) + return -EINVAL; + + btf = btf_get_module_btf(st_ops->owner); + if (!btf) + return -EINVAL; + + log = kzalloc(sizeof(*log), GFP_KERNEL | __GFP_NOWARN); + if (!log) { + err = -ENOMEM; + goto errout; + } + + log->level = BPF_LOG_KERNEL; + + desc = btf_add_struct_ops(btf, st_ops); + if (IS_ERR(desc)) { + err = PTR_ERR(desc); + goto errout; + } + + err = bpf_struct_ops_init(desc, btf, log); + +errout: + kfree(log); + btf_put(btf); + + return err; +} +EXPORT_SYMBOL_GPL(register_bpf_struct_ops); diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index ffa224053a6c..148a5851c4fa 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -7,7 +7,7 @@ #include <linux/bpf.h> #include <linux/btf.h> -extern struct bpf_struct_ops bpf_bpf_dummy_ops; +static struct bpf_struct_ops bpf_bpf_dummy_ops; /* A common type for test_N with return value in bpf_dummy_ops */ typedef int (*dummy_ops_test_ret_fn)(struct bpf_dummy_ops_state *state, ...); @@ -223,11 +223,13 @@ static int bpf_dummy_reg(void *kdata) return -EOPNOTSUPP; } +DEFINE_STRUCT_OPS_VALUE_TYPE(bpf_dummy_ops); + static void bpf_dummy_unreg(void *kdata) { } -struct bpf_struct_ops bpf_bpf_dummy_ops = { +static struct bpf_struct_ops bpf_bpf_dummy_ops = { .verifier_ops = &bpf_dummy_verifier_ops, .init = bpf_dummy_init, .check_member = bpf_dummy_ops_check_member, @@ -235,4 +237,12 @@ struct bpf_struct_ops bpf_bpf_dummy_ops = { .reg = bpf_dummy_reg, .unreg = bpf_dummy_unreg, .name = "bpf_dummy_ops", + .owner = THIS_MODULE, }; + +static int __init bpf_dummy_struct_ops_init(void) +{ + BTF_STRUCT_OPS_TYPE_EMIT(bpf_dummy_ops); + return register_bpf_struct_ops(&bpf_bpf_dummy_ops); +} +late_initcall(bpf_dummy_struct_ops_init); diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 3c8b76578a2a..b36a19274e5b 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -12,7 +12,7 @@ #include <net/bpf_sk_storage.h> /* "extern" is to avoid sparse warning. It is only used in bpf_struct_ops.c. */ -extern struct bpf_struct_ops bpf_tcp_congestion_ops; +static struct bpf_struct_ops bpf_tcp_congestion_ops; static u32 unsupported_ops[] = { offsetof(struct tcp_congestion_ops, get_info), @@ -277,7 +277,9 @@ static int bpf_tcp_ca_validate(void *kdata) return tcp_validate_congestion_control(kdata); } -struct bpf_struct_ops bpf_tcp_congestion_ops = { +DEFINE_STRUCT_OPS_VALUE_TYPE(tcp_congestion_ops); + +static struct bpf_struct_ops bpf_tcp_congestion_ops = { .verifier_ops = &bpf_tcp_ca_verifier_ops, .reg = bpf_tcp_ca_reg, .unreg = bpf_tcp_ca_unreg, @@ -287,10 +289,18 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = { .init = bpf_tcp_ca_init, .validate = bpf_tcp_ca_validate, .name = "tcp_congestion_ops", + .owner = THIS_MODULE, }; static int __init bpf_tcp_ca_kfunc_init(void) { - return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set); + int ret; + + BTF_STRUCT_OPS_TYPE_EMIT(tcp_congestion_ops); + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set); + ret = ret ?: register_bpf_struct_ops(&bpf_tcp_congestion_ops); + + return ret; } late_initcall(bpf_tcp_ca_kfunc_init);