Message ID | 20210420193740.124285-3-memxor@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | Add TC-BPF API | expand |
Context | Check | Description |
---|---|---|
netdev/cover_letter | success | Link |
netdev/fixes_present | success | Link |
netdev/patch_count | success | Link |
netdev/tree_selection | success | Clearly marked for bpf-next |
netdev/subject_prefix | success | Link |
netdev/cc_maintainers | success | CCed 10 of 10 maintainers |
netdev/source_inline | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Link |
netdev/module_param | success | Was 0 now: 0 |
netdev/build_32bit | success | Errors and warnings before: 0 this patch: 0 |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/verify_fixes | success | Link |
netdev/checkpatch | fail | ERROR: code indent should use tabs where possible ERROR: space prohibited before that ':' (ctx:WxV) |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 0 this patch: 0 |
netdev/header_inline | success | Link |
On 4/20/21 12:37 PM, Kumar Kartikeya Dwivedi wrote: > This adds functions that wrap the netlink API used for adding, > manipulating, and removing traffic control filters. These functions > operate directly on the loaded prog's fd, and return a handle to the > filter using an out parameter named id. > > The basic featureset is covered to allow for attaching and removal of > filters. Some additional features like TCA_BPF_POLICE and TCA_RATE for > the API have been omitted. These can added on top later by extending the "later" => "layer"? > bpf_tc_opts struct. > > Support for binding actions directly to a classifier by passing them in > during filter creation has also been omitted for now. These actions have > an auto clean up property because their lifetime is bound to the filter > they are attached to. This can be added later, but was omitted for now > as direct action mode is a better alternative to it, which is enabled by > default. > > An API summary: > > bpf_tc_attach may be used to attach, and replace SCHED_CLS bpf > classifier. The protocol is always set as ETH_P_ALL. The replace option > in bpf_tc_opts is used to control replacement behavior. Attachment > fails if filter with existing attributes already exists. > > bpf_tc_detach may be used to detach existing SCHED_CLS filter. The > bpf_tc_attach_id object filled in during attach must be passed in to the > detach functions for them to remove the filter and its attached > classififer correctly. > > bpf_tc_get_info is a helper that can be used to obtain attributes > for the filter and classififer. > > Examples: > > struct bpf_tc_attach_id id = {}; > struct bpf_object *obj; > struct bpf_program *p; > int fd, r; > > obj = bpf_object_open("foo.o"); > if (IS_ERR_OR_NULL(obj)) > return PTR_ERR(obj); > > p = bpf_object__find_program_by_title(obj, "classifier"); Please use bpf_object__find_program_by_name() API. bpf_object__find_program_by_title() is not recommended as now libbpf supports multiple programs within the same section. > if (IS_ERR_OR_NULL(p)) > return PTR_ERR(p); > > if (bpf_object__load(obj) < 0) > return -1; > > fd = bpf_program__fd(p); > > r = bpf_tc_attach(fd, if_nametoindex("lo"), > BPF_TC_CLSACT_INGRESS, > NULL, &id); > if (r < 0) > return r; > > ... which is roughly equivalent to: > # tc qdisc add dev lo clsact > # tc filter add dev lo ingress bpf obj foo.o sec classifier da > > ... as direct action mode is always enabled. > > To replace an existing filter: > > DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = id.handle, > .priority = id.priority, .replace = true); > r = bpf_tc_attach(fd, if_nametoindex("lo"), > BPF_TC_CLSACT_INGRESS, > &opts, &id); > if (r < 0) > return r; > > To obtain info of a particular filter, the example above can be extended > as follows: > > struct bpf_tc_info info = {}; > > r = bpf_tc_get_info(if_nametoindex("lo"), > BPF_TC_CLSACT_INGRESS, > &id, &info); > if (r < 0) > return r; > > ... where id corresponds to the bpf_tc_attach_id filled in during an > attach operation. > > Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> > Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> > --- > tools/lib/bpf/libbpf.h | 44 ++++++ > tools/lib/bpf/libbpf.map | 3 + > tools/lib/bpf/netlink.c | 319 ++++++++++++++++++++++++++++++++++++++- > 3 files changed, 360 insertions(+), 6 deletions(-) > > diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h > index bec4e6a6e31d..b4ed6a41ea70 100644 > --- a/tools/lib/bpf/libbpf.h > +++ b/tools/lib/bpf/libbpf.h > @@ -16,6 +16,8 @@ > #include <stdbool.h> > #include <sys/types.h> // for size_t > #include <linux/bpf.h> > +#include <linux/pkt_sched.h> > +#include <linux/tc_act/tc_bpf.h> > > #include "libbpf_common.h" > > @@ -775,6 +777,48 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen > LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); > LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); > > +/* Convenience macros for the clsact attach hooks */ > +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) > +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) > + > +struct bpf_tc_opts { > + size_t sz; > + __u32 handle; > + __u32 class_id; > + __u16 priority; > + bool replace; > + size_t :0; Did you see any error without "size_t :0"? > +}; > + > +#define bpf_tc_opts__last_field replace > + > +/* Acts as a handle for an attached filter */ > +struct bpf_tc_attach_id { > + __u32 handle; > + __u16 priority; > +}; > + [...]
On Wed, Apr 21, 2021 at 12:28:01PM IST, Yonghong Song wrote: > > > On 4/20/21 12:37 PM, Kumar Kartikeya Dwivedi wrote: > > This adds functions that wrap the netlink API used for adding, > > manipulating, and removing traffic control filters. These functions > > operate directly on the loaded prog's fd, and return a handle to the > > filter using an out parameter named id. > > > > The basic featureset is covered to allow for attaching and removal of > > filters. Some additional features like TCA_BPF_POLICE and TCA_RATE for > > the API have been omitted. These can added on top later by extending the > > "later" => "layer"? > No, I meant that other options can be added on top of this series by someone later. I'll reword it. > > bpf_tc_opts struct. > > > > Support for binding actions directly to a classifier by passing them in > > during filter creation has also been omitted for now. These actions have > > an auto clean up property because their lifetime is bound to the filter > > they are attached to. This can be added later, but was omitted for now > > as direct action mode is a better alternative to it, which is enabled by > > default. > > > > An API summary: > > > > bpf_tc_attach may be used to attach, and replace SCHED_CLS bpf > > classifier. The protocol is always set as ETH_P_ALL. The replace option > > in bpf_tc_opts is used to control replacement behavior. Attachment > > fails if filter with existing attributes already exists. > > > > bpf_tc_detach may be used to detach existing SCHED_CLS filter. The > > bpf_tc_attach_id object filled in during attach must be passed in to the > > detach functions for them to remove the filter and its attached > > classififer correctly. > > > > bpf_tc_get_info is a helper that can be used to obtain attributes > > for the filter and classififer. > > > > Examples: > > > > struct bpf_tc_attach_id id = {}; > > struct bpf_object *obj; > > struct bpf_program *p; > > int fd, r; > > > > obj = bpf_object_open("foo.o"); > > if (IS_ERR_OR_NULL(obj)) > > return PTR_ERR(obj); > > > > p = bpf_object__find_program_by_title(obj, "classifier"); > > Please use bpf_object__find_program_by_name() API. > bpf_object__find_program_by_title() is not recommended as now > libbpf supports multiple programs within the same section. Thanks, will do. > > > if (IS_ERR_OR_NULL(p)) > > return PTR_ERR(p); > > > > if (bpf_object__load(obj) < 0) > > return -1; > > > > fd = bpf_program__fd(p); > > > > r = bpf_tc_attach(fd, if_nametoindex("lo"), > > BPF_TC_CLSACT_INGRESS, > > NULL, &id); > > if (r < 0) > > return r; > > > > ... which is roughly equivalent to: > > # tc qdisc add dev lo clsact > > # tc filter add dev lo ingress bpf obj foo.o sec classifier da > > > > ... as direct action mode is always enabled. > > > > To replace an existing filter: > > > > DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = id.handle, > > .priority = id.priority, .replace = true); > > r = bpf_tc_attach(fd, if_nametoindex("lo"), > > BPF_TC_CLSACT_INGRESS, > > &opts, &id); > > if (r < 0) > > return r; > > > > To obtain info of a particular filter, the example above can be extended > > as follows: > > > > struct bpf_tc_info info = {}; > > > > r = bpf_tc_get_info(if_nametoindex("lo"), > > BPF_TC_CLSACT_INGRESS, > > &id, &info); > > if (r < 0) > > return r; > > > > ... where id corresponds to the bpf_tc_attach_id filled in during an > > attach operation. > > > > Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> > > Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> > > --- > > tools/lib/bpf/libbpf.h | 44 ++++++ > > tools/lib/bpf/libbpf.map | 3 + > > tools/lib/bpf/netlink.c | 319 ++++++++++++++++++++++++++++++++++++++- > > 3 files changed, 360 insertions(+), 6 deletions(-) > > > > diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h > > index bec4e6a6e31d..b4ed6a41ea70 100644 > > --- a/tools/lib/bpf/libbpf.h > > +++ b/tools/lib/bpf/libbpf.h > > @@ -16,6 +16,8 @@ > > #include <stdbool.h> > > #include <sys/types.h> // for size_t > > #include <linux/bpf.h> > > +#include <linux/pkt_sched.h> > > +#include <linux/tc_act/tc_bpf.h> > > #include "libbpf_common.h" > > @@ -775,6 +777,48 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen > > LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); > > LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); > > +/* Convenience macros for the clsact attach hooks */ > > +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) > > +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) > > + > > +struct bpf_tc_opts { > > + size_t sz; > > + __u32 handle; > > + __u32 class_id; > > + __u16 priority; > > + bool replace; > > + size_t :0; > > Did you see any error without "size_t :0"? > Not really, I just added this considering this recent change: dde7b3f5f2f4 ("libbpf: Add explicit padding to bpf_xdp_set_link_opts") > > +}; > > + > > +#define bpf_tc_opts__last_field replace > > + > > +/* Acts as a handle for an attached filter */ > > +struct bpf_tc_attach_id { > > + __u32 handle; > > + __u16 priority; > > +}; > > + > [...] -- Kartikeya
On Tue, Apr 20, 2021 at 12:37 PM Kumar Kartikeya Dwivedi <memxor@gmail.com> wrote: > > This adds functions that wrap the netlink API used for adding, > manipulating, and removing traffic control filters. These functions > operate directly on the loaded prog's fd, and return a handle to the > filter using an out parameter named id. > > The basic featureset is covered to allow for attaching and removal of > filters. Some additional features like TCA_BPF_POLICE and TCA_RATE for > the API have been omitted. These can added on top later by extending the > bpf_tc_opts struct. > > Support for binding actions directly to a classifier by passing them in > during filter creation has also been omitted for now. These actions have > an auto clean up property because their lifetime is bound to the filter > they are attached to. This can be added later, but was omitted for now > as direct action mode is a better alternative to it, which is enabled by > default. > > An API summary: > > bpf_tc_attach may be used to attach, and replace SCHED_CLS bpf > classifier. The protocol is always set as ETH_P_ALL. The replace option > in bpf_tc_opts is used to control replacement behavior. Attachment > fails if filter with existing attributes already exists. > > bpf_tc_detach may be used to detach existing SCHED_CLS filter. The > bpf_tc_attach_id object filled in during attach must be passed in to the > detach functions for them to remove the filter and its attached > classififer correctly. > > bpf_tc_get_info is a helper that can be used to obtain attributes > for the filter and classififer. > > Examples: > > struct bpf_tc_attach_id id = {}; > struct bpf_object *obj; > struct bpf_program *p; > int fd, r; > > obj = bpf_object_open("foo.o"); > if (IS_ERR_OR_NULL(obj)) > return PTR_ERR(obj); > > p = bpf_object__find_program_by_title(obj, "classifier"); > if (IS_ERR_OR_NULL(p)) > return PTR_ERR(p); > > if (bpf_object__load(obj) < 0) > return -1; > > fd = bpf_program__fd(p); > > r = bpf_tc_attach(fd, if_nametoindex("lo"), > BPF_TC_CLSACT_INGRESS, > NULL, &id); > if (r < 0) > return r; > > ... which is roughly equivalent to: > # tc qdisc add dev lo clsact > # tc filter add dev lo ingress bpf obj foo.o sec classifier da > > ... as direct action mode is always enabled. > > To replace an existing filter: > > DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = id.handle, > .priority = id.priority, .replace = true); > r = bpf_tc_attach(fd, if_nametoindex("lo"), > BPF_TC_CLSACT_INGRESS, > &opts, &id); > if (r < 0) > return r; > > To obtain info of a particular filter, the example above can be extended > as follows: > > struct bpf_tc_info info = {}; > > r = bpf_tc_get_info(if_nametoindex("lo"), > BPF_TC_CLSACT_INGRESS, > &id, &info); > if (r < 0) > return r; > > ... where id corresponds to the bpf_tc_attach_id filled in during an > attach operation. > > Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> > Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> > --- > tools/lib/bpf/libbpf.h | 44 ++++++ > tools/lib/bpf/libbpf.map | 3 + > tools/lib/bpf/netlink.c | 319 ++++++++++++++++++++++++++++++++++++++- > 3 files changed, 360 insertions(+), 6 deletions(-) > > diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h > index bec4e6a6e31d..b4ed6a41ea70 100644 > --- a/tools/lib/bpf/libbpf.h > +++ b/tools/lib/bpf/libbpf.h > @@ -16,6 +16,8 @@ > #include <stdbool.h> > #include <sys/types.h> // for size_t > #include <linux/bpf.h> > +#include <linux/pkt_sched.h> > +#include <linux/tc_act/tc_bpf.h> apart from those unused macros below, are these needed in public API header? > > #include "libbpf_common.h" > > @@ -775,6 +777,48 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen > LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); > LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); > > +/* Convenience macros for the clsact attach hooks */ > +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) > +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) these seem to be used only internally, why exposing them in public API? > + > +struct bpf_tc_opts { > + size_t sz; > + __u32 handle; > + __u32 class_id; > + __u16 priority; > + bool replace; > + size_t :0; > +}; > + > +#define bpf_tc_opts__last_field replace > + > +/* Acts as a handle for an attached filter */ > +struct bpf_tc_attach_id { > + __u32 handle; > + __u16 priority; > +}; what are the chances that we'll need to grow this id struct? If that happens, how do we do that in a backward/forward compatible manner? if handle/prio are the only two ever necessary, we can actually use bpf_tc_opts to return them back to user (we do that with bpf_test_run_opts API). And then adjust detach/get_info methods to let pass those values. The whole idea of a struct for id just screams "compatibility problems down the road" at me. Does anyone else has any other opinion on this? > + > +struct bpf_tc_info { > + struct bpf_tc_attach_id id; > + __u16 protocol; > + __u32 chain_index; > + __u32 prog_id; > + __u8 tag[BPF_TAG_SIZE]; > + __u32 class_id; > + __u32 bpf_flags; > + __u32 bpf_flags_gen; > +}; > + > +/* id is out parameter that will be written to, it must not be NULL */ > +LIBBPF_API int bpf_tc_attach(int fd, __u32 ifindex, __u32 parent_id, so parent_id is INGRESS|EGRESS, right? Is that an obvious name for this parameter? I had to look at the code to understand what's expected. Is it possible that it will be anything other than INGRESS or EGRESS? If not `bool ingress` might be an option. Or perhaps enum bpf_tc_direction { BPF_TC_INGRESS, BPF_TC_EGRESS } is better still. > + const struct bpf_tc_opts *opts, > + struct bpf_tc_attach_id *id); > +LIBBPF_API int bpf_tc_detach(__u32 ifindex, __u32 parent_id, > + const struct bpf_tc_attach_id *id); > +LIBBPF_API int bpf_tc_get_info(__u32 ifindex, __u32 parent_id, bpf_tc_query() to be more in line with attach/detach single-word verbs? > + const struct bpf_tc_attach_id *id, > + struct bpf_tc_info *info); > + > #ifdef __cplusplus > } /* extern "C" */ > #endif > diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map > index b9b29baf1df8..686444fbb838 100644 > --- a/tools/lib/bpf/libbpf.map > +++ b/tools/lib/bpf/libbpf.map > @@ -361,4 +361,7 @@ LIBBPF_0.4.0 { > bpf_linker__new; > bpf_map__inner_map; > bpf_object__set_kversion; > + bpf_tc_attach; > + bpf_tc_detach; > + bpf_tc_get_info; > } LIBBPF_0.3.0; [...]
Andrii Nakryiko <andrii.nakryiko@gmail.com> writes: > On Tue, Apr 20, 2021 at 12:37 PM Kumar Kartikeya Dwivedi > <memxor@gmail.com> wrote: >> >> This adds functions that wrap the netlink API used for adding, >> manipulating, and removing traffic control filters. These functions >> operate directly on the loaded prog's fd, and return a handle to the >> filter using an out parameter named id. >> >> The basic featureset is covered to allow for attaching and removal of >> filters. Some additional features like TCA_BPF_POLICE and TCA_RATE for >> the API have been omitted. These can added on top later by extending the >> bpf_tc_opts struct. >> >> Support for binding actions directly to a classifier by passing them in >> during filter creation has also been omitted for now. These actions have >> an auto clean up property because their lifetime is bound to the filter >> they are attached to. This can be added later, but was omitted for now >> as direct action mode is a better alternative to it, which is enabled by >> default. >> >> An API summary: >> >> bpf_tc_attach may be used to attach, and replace SCHED_CLS bpf >> classifier. The protocol is always set as ETH_P_ALL. The replace option >> in bpf_tc_opts is used to control replacement behavior. Attachment >> fails if filter with existing attributes already exists. >> >> bpf_tc_detach may be used to detach existing SCHED_CLS filter. The >> bpf_tc_attach_id object filled in during attach must be passed in to the >> detach functions for them to remove the filter and its attached >> classififer correctly. >> >> bpf_tc_get_info is a helper that can be used to obtain attributes >> for the filter and classififer. >> >> Examples: >> >> struct bpf_tc_attach_id id = {}; >> struct bpf_object *obj; >> struct bpf_program *p; >> int fd, r; >> >> obj = bpf_object_open("foo.o"); >> if (IS_ERR_OR_NULL(obj)) >> return PTR_ERR(obj); >> >> p = bpf_object__find_program_by_title(obj, "classifier"); >> if (IS_ERR_OR_NULL(p)) >> return PTR_ERR(p); >> >> if (bpf_object__load(obj) < 0) >> return -1; >> >> fd = bpf_program__fd(p); >> >> r = bpf_tc_attach(fd, if_nametoindex("lo"), >> BPF_TC_CLSACT_INGRESS, >> NULL, &id); >> if (r < 0) >> return r; >> >> ... which is roughly equivalent to: >> # tc qdisc add dev lo clsact >> # tc filter add dev lo ingress bpf obj foo.o sec classifier da >> >> ... as direct action mode is always enabled. >> >> To replace an existing filter: >> >> DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = id.handle, >> .priority = id.priority, .replace = true); >> r = bpf_tc_attach(fd, if_nametoindex("lo"), >> BPF_TC_CLSACT_INGRESS, >> &opts, &id); >> if (r < 0) >> return r; >> >> To obtain info of a particular filter, the example above can be extended >> as follows: >> >> struct bpf_tc_info info = {}; >> >> r = bpf_tc_get_info(if_nametoindex("lo"), >> BPF_TC_CLSACT_INGRESS, >> &id, &info); >> if (r < 0) >> return r; >> >> ... where id corresponds to the bpf_tc_attach_id filled in during an >> attach operation. >> >> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> >> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> >> --- >> tools/lib/bpf/libbpf.h | 44 ++++++ >> tools/lib/bpf/libbpf.map | 3 + >> tools/lib/bpf/netlink.c | 319 ++++++++++++++++++++++++++++++++++++++- >> 3 files changed, 360 insertions(+), 6 deletions(-) >> >> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h >> index bec4e6a6e31d..b4ed6a41ea70 100644 >> --- a/tools/lib/bpf/libbpf.h >> +++ b/tools/lib/bpf/libbpf.h >> @@ -16,6 +16,8 @@ >> #include <stdbool.h> >> #include <sys/types.h> // for size_t >> #include <linux/bpf.h> >> +#include <linux/pkt_sched.h> >> +#include <linux/tc_act/tc_bpf.h> > > apart from those unused macros below, are these needed in public API header? > >> >> #include "libbpf_common.h" >> >> @@ -775,6 +777,48 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen >> LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); >> LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); >> >> +/* Convenience macros for the clsact attach hooks */ >> +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) >> +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) > > these seem to be used only internally, why exposing them in public > API? No they're "aliases" for when you want to attach the filter directly to the interface (and thus install the clsact qdisc as the root). You can also use the filter with an existing qdisc (most commonly HTB), in which case you need to specify the qdisc handle as the root. We have a few examples of this use case: https://github.com/xdp-project/bpf-examples/tree/master/traffic-pacing-edt and https://github.com/xdp-project/xdp-cpumap-tc >> +struct bpf_tc_opts { >> + size_t sz; >> + __u32 handle; >> + __u32 class_id; >> + __u16 priority; >> + bool replace; >> + size_t :0; >> +}; >> + >> +#define bpf_tc_opts__last_field replace >> + >> +/* Acts as a handle for an attached filter */ >> +struct bpf_tc_attach_id { >> + __u32 handle; >> + __u16 priority; >> +}; > > what are the chances that we'll need to grow this id struct? If that > happens, how do we do that in a backward/forward compatible manner? > > if handle/prio are the only two ever necessary, we can actually use > bpf_tc_opts to return them back to user (we do that with > bpf_test_run_opts API). And then adjust detach/get_info methods to let > pass those values. > > The whole idea of a struct for id just screams "compatibility problems > down the road" at me. Does anyone else has any other opinion on this? Well, *if* we ever want to extend them (e.g., to support other values of the protocol field, if that ever becomes necessary), we'll probably also want to make it possible to pass the same identifiers as options, so just reusing the opts struct definitely makes sense! >> +struct bpf_tc_info { >> + struct bpf_tc_attach_id id; >> + __u16 protocol; >> + __u32 chain_index; >> + __u32 prog_id; >> + __u8 tag[BPF_TAG_SIZE]; >> + __u32 class_id; >> + __u32 bpf_flags; >> + __u32 bpf_flags_gen; >> +}; >> + >> +/* id is out parameter that will be written to, it must not be NULL */ >> +LIBBPF_API int bpf_tc_attach(int fd, __u32 ifindex, __u32 parent_id, > > so parent_id is INGRESS|EGRESS, right? Is that an obvious name for > this parameter? I had to look at the code to understand what's > expected. Is it possible that it will be anything other than INGRESS > or EGRESS? If not `bool ingress` might be an option. Or perhaps enum > bpf_tc_direction { BPF_TC_INGRESS, BPF_TC_EGRESS } is better still. See above; the parent is the attach point, and you use the defines from above if you just want to attach to the interface. But maybe documenting this in a comment above the function signature would be good (along with a bit of terminology from the TC world for those coming from there) :) >> + const struct bpf_tc_opts *opts, >> + struct bpf_tc_attach_id *id); >> +LIBBPF_API int bpf_tc_detach(__u32 ifindex, __u32 parent_id, >> + const struct bpf_tc_attach_id *id); >> +LIBBPF_API int bpf_tc_get_info(__u32 ifindex, __u32 parent_id, > > bpf_tc_query() to be more in line with attach/detach single-word > verbs? OK by me! -Toke
On 4/20/21 9:37 PM, Kumar Kartikeya Dwivedi wrote: [...] > diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h > index bec4e6a6e31d..b4ed6a41ea70 100644 > --- a/tools/lib/bpf/libbpf.h > +++ b/tools/lib/bpf/libbpf.h > @@ -16,6 +16,8 @@ > #include <stdbool.h> > #include <sys/types.h> // for size_t > #include <linux/bpf.h> > +#include <linux/pkt_sched.h> > +#include <linux/tc_act/tc_bpf.h> > > #include "libbpf_common.h" > > @@ -775,6 +777,48 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen > LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); > LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); > > +/* Convenience macros for the clsact attach hooks */ > +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) > +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) I would abstract those away into an enum, plus avoid having to pull in linux/pkt_sched.h and linux/tc_act/tc_bpf.h from main libbpf.h header. Just add a enum { BPF_TC_DIR_INGRESS, BPF_TC_DIR_EGRESS, } and then the concrete tc bits (TC_H_MAKE()) can be translated internally. > +struct bpf_tc_opts { > + size_t sz; Is this set anywhere? > + __u32 handle; > + __u32 class_id; I'd remove class_id from here as well given in direct-action a BPF prog can set it if needed. > + __u16 priority; > + bool replace; > + size_t :0; What's the rationale for this padding? > +}; > + > +#define bpf_tc_opts__last_field replace > + > +/* Acts as a handle for an attached filter */ > +struct bpf_tc_attach_id { nit: maybe bpf_tc_ctx > + __u32 handle; > + __u16 priority; > +}; > + > +struct bpf_tc_info { > + struct bpf_tc_attach_id id; > + __u16 protocol; > + __u32 chain_index; > + __u32 prog_id; > + __u8 tag[BPF_TAG_SIZE]; > + __u32 class_id; > + __u32 bpf_flags; > + __u32 bpf_flags_gen; Given we do not yet have any setters e.g. for offload, etc, the one thing I'd see useful and crucial initially is prog_id. The protocol, chain_index, and I would also include tag should be dropped. Similarly class_id given my earlier statement, and flags I would extend once this lib API would support offloading progs. > +}; > + > +/* id is out parameter that will be written to, it must not be NULL */ > +LIBBPF_API int bpf_tc_attach(int fd, __u32 ifindex, __u32 parent_id, > + const struct bpf_tc_opts *opts, > + struct bpf_tc_attach_id *id); > +LIBBPF_API int bpf_tc_detach(__u32 ifindex, __u32 parent_id, > + const struct bpf_tc_attach_id *id); > +LIBBPF_API int bpf_tc_get_info(__u32 ifindex, __u32 parent_id, > + const struct bpf_tc_attach_id *id, > + struct bpf_tc_info *info); As per above, for parent_id I'd replace with dir enum. > + > #ifdef __cplusplus > } /* extern "C" */ > #endif
On Thu, Apr 22, 2021 at 04:29:28AM IST, Daniel Borkmann wrote: > On 4/20/21 9:37 PM, Kumar Kartikeya Dwivedi wrote: > [...] > > diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h > > index bec4e6a6e31d..b4ed6a41ea70 100644 > > --- a/tools/lib/bpf/libbpf.h > > +++ b/tools/lib/bpf/libbpf.h > > @@ -16,6 +16,8 @@ > > #include <stdbool.h> > > #include <sys/types.h> // for size_t > > #include <linux/bpf.h> > > +#include <linux/pkt_sched.h> > > +#include <linux/tc_act/tc_bpf.h> > > #include "libbpf_common.h" > > @@ -775,6 +777,48 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen > > LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); > > LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); > > +/* Convenience macros for the clsact attach hooks */ > > +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) > > +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) > > I would abstract those away into an enum, plus avoid having to pull in > linux/pkt_sched.h and linux/tc_act/tc_bpf.h from main libbpf.h header. > > Just add a enum { BPF_TC_DIR_INGRESS, BPF_TC_DIR_EGRESS, } and then the > concrete tc bits (TC_H_MAKE()) can be translated internally. > Ok, will do. > > +struct bpf_tc_opts { > > + size_t sz; > > Is this set anywhere? > This is needed by the OPTS_* infrastructure. > > + __u32 handle; > > + __u32 class_id; > > I'd remove class_id from here as well given in direct-action a BPF prog can > set it if needed. > Ok, makes sense. > > + __u16 priority; > > + bool replace; > > + size_t :0; > > What's the rationale for this padding? > dde7b3f5f2f4 ("libbpf: Add explicit padding to bpf_xdp_set_link_opts") > > +}; > > + > > +#define bpf_tc_opts__last_field replace > > + > > +/* Acts as a handle for an attached filter */ > > +struct bpf_tc_attach_id { > > nit: maybe bpf_tc_ctx > Noted. > > + __u32 handle; > > + __u16 priority; > > +}; > > + > > +struct bpf_tc_info { > > + struct bpf_tc_attach_id id; > > + __u16 protocol; > > + __u32 chain_index; > > + __u32 prog_id; > > + __u8 tag[BPF_TAG_SIZE]; > > + __u32 class_id; > > + __u32 bpf_flags; > > + __u32 bpf_flags_gen; > > Given we do not yet have any setters e.g. for offload, etc, the one thing > I'd see useful and crucial initially is prog_id. > > The protocol, chain_index, and I would also include tag should be dropped. A future user of this API needs to know the tag, so I would like to keep that. The rest we can drop, and probably document the default values explicitly. > Similarly class_id given my earlier statement, and flags I would extend once > this lib API would support offloading progs. > > > +}; > > + > > +/* id is out parameter that will be written to, it must not be NULL */ > > +LIBBPF_API int bpf_tc_attach(int fd, __u32 ifindex, __u32 parent_id, > > + const struct bpf_tc_opts *opts, > > + struct bpf_tc_attach_id *id); > > +LIBBPF_API int bpf_tc_detach(__u32 ifindex, __u32 parent_id, > > + const struct bpf_tc_attach_id *id); > > +LIBBPF_API int bpf_tc_get_info(__u32 ifindex, __u32 parent_id, > > + const struct bpf_tc_attach_id *id, > > + struct bpf_tc_info *info); > > As per above, for parent_id I'd replace with dir enum. > > > + > > #ifdef __cplusplus > > } /* extern "C" */ > > #endif -- Kartikeya
On 4/21/21 9:48 PM, Toke Høiland-Jørgensen wrote: > Andrii Nakryiko <andrii.nakryiko@gmail.com> writes: >> On Tue, Apr 20, 2021 at 12:37 PM Kumar Kartikeya Dwivedi >> <memxor@gmail.com> wrote: [...] >>> --- >>> tools/lib/bpf/libbpf.h | 44 ++++++ >>> tools/lib/bpf/libbpf.map | 3 + >>> tools/lib/bpf/netlink.c | 319 ++++++++++++++++++++++++++++++++++++++- >>> 3 files changed, 360 insertions(+), 6 deletions(-) >>> >>> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h >>> index bec4e6a6e31d..b4ed6a41ea70 100644 >>> --- a/tools/lib/bpf/libbpf.h >>> +++ b/tools/lib/bpf/libbpf.h >>> @@ -16,6 +16,8 @@ >>> #include <stdbool.h> >>> #include <sys/types.h> // for size_t >>> #include <linux/bpf.h> >>> +#include <linux/pkt_sched.h> >>> +#include <linux/tc_act/tc_bpf.h> >> >> apart from those unused macros below, are these needed in public API header? >> >>> #include "libbpf_common.h" >>> >>> @@ -775,6 +777,48 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen >>> LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); >>> LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); >>> >>> +/* Convenience macros for the clsact attach hooks */ >>> +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) >>> +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) >> >> these seem to be used only internally, why exposing them in public >> API? > > No they're "aliases" for when you want to attach the filter directly to > the interface (and thus install the clsact qdisc as the root). You can > also use the filter with an existing qdisc (most commonly HTB), in which > case you need to specify the qdisc handle as the root. We have a few > examples of this use case: > > https://github.com/xdp-project/bpf-examples/tree/master/traffic-pacing-edt > and > https://github.com/xdp-project/xdp-cpumap-tc I'm a bit puzzled, could you elaborate on your use case on why you wouldn't use the tc egress hook for those especially given it's guaranteed to run outside of root qdisc lock? Some pointers as well: - http://vger.kernel.org/lpc-bpf2018.html#session-1 - https://netdevconf.info/0x14/session.html?talk-replacing-HTB-with-EDT-and-BPF - https://cilium.io/blog/2020/11/10/cilium-19#bwmanager Thanks, Daniel
On 4/22/21 1:08 AM, Kumar Kartikeya Dwivedi wrote: > On Thu, Apr 22, 2021 at 04:29:28AM IST, Daniel Borkmann wrote: >> On 4/20/21 9:37 PM, Kumar Kartikeya Dwivedi wrote: >> [...] >>> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h >>> index bec4e6a6e31d..b4ed6a41ea70 100644 >>> --- a/tools/lib/bpf/libbpf.h >>> +++ b/tools/lib/bpf/libbpf.h >>> @@ -16,6 +16,8 @@ >>> #include <stdbool.h> >>> #include <sys/types.h> // for size_t >>> #include <linux/bpf.h> >>> +#include <linux/pkt_sched.h> >>> +#include <linux/tc_act/tc_bpf.h> >>> #include "libbpf_common.h" >>> @@ -775,6 +777,48 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen >>> LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); >>> LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); >>> +/* Convenience macros for the clsact attach hooks */ >>> +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) >>> +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) >> >> I would abstract those away into an enum, plus avoid having to pull in >> linux/pkt_sched.h and linux/tc_act/tc_bpf.h from main libbpf.h header. >> >> Just add a enum { BPF_TC_DIR_INGRESS, BPF_TC_DIR_EGRESS, } and then the >> concrete tc bits (TC_H_MAKE()) can be translated internally. > > Ok, will do. > >>> +struct bpf_tc_opts { >>> + size_t sz; >> >> Is this set anywhere? > > This is needed by the OPTS_* infrastructure. > >>> + __u32 handle; >>> + __u32 class_id; >> >> I'd remove class_id from here as well given in direct-action a BPF prog can >> set it if needed. > > Ok, makes sense. > >>> + __u16 priority; >>> + bool replace; >>> + size_t :0; >> >> What's the rationale for this padding? > > dde7b3f5f2f4 ("libbpf: Add explicit padding to bpf_xdp_set_link_opts") Hm, fair enough. >>> +}; >>> + >>> +#define bpf_tc_opts__last_field replace >>> + >>> +/* Acts as a handle for an attached filter */ >>> +struct bpf_tc_attach_id { >> >> nit: maybe bpf_tc_ctx >> > > Noted. > >>> + __u32 handle; >>> + __u16 priority; >>> +}; >>> + >>> +struct bpf_tc_info { >>> + struct bpf_tc_attach_id id; >>> + __u16 protocol; >>> + __u32 chain_index; >>> + __u32 prog_id; >>> + __u8 tag[BPF_TAG_SIZE]; >>> + __u32 class_id; >>> + __u32 bpf_flags; >>> + __u32 bpf_flags_gen; >> >> Given we do not yet have any setters e.g. for offload, etc, the one thing >> I'd see useful and crucial initially is prog_id. >> >> The protocol, chain_index, and I would also include tag should be dropped. > > A future user of this API needs to know the tag, so I would like to keep that. > The rest we can drop, and probably document the default values explicitly. Couldn't this be added along with the future patch for the [future] user? The tag should be the tag of the prog itself, so if you have prog_id, you could also retrieve the same tag from the prog. The tag was basically from the early days where we didn't have bpf_prog_get_info_by_fd(). What does that future user need to do different here? Thanks, Daniel
On Thu, Apr 22, 2021 at 04:51:55AM IST, Daniel Borkmann wrote: > On 4/22/21 1:08 AM, Kumar Kartikeya Dwivedi wrote: > > On Thu, Apr 22, 2021 at 04:29:28AM IST, Daniel Borkmann wrote: > > > On 4/20/21 9:37 PM, Kumar Kartikeya Dwivedi wrote: > > > [...] > > > > diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h > > > > index bec4e6a6e31d..b4ed6a41ea70 100644 > > > > --- a/tools/lib/bpf/libbpf.h > > > > +++ b/tools/lib/bpf/libbpf.h > > > > @@ -16,6 +16,8 @@ > > > > #include <stdbool.h> > > > > #include <sys/types.h> // for size_t > > > > #include <linux/bpf.h> > > > > +#include <linux/pkt_sched.h> > > > > +#include <linux/tc_act/tc_bpf.h> > > > > #include "libbpf_common.h" > > > > @@ -775,6 +777,48 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen > > > > LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); > > > > LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); > > > > +/* Convenience macros for the clsact attach hooks */ > > > > +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) > > > > +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) > > > > > > I would abstract those away into an enum, plus avoid having to pull in > > > linux/pkt_sched.h and linux/tc_act/tc_bpf.h from main libbpf.h header. > > > > > > Just add a enum { BPF_TC_DIR_INGRESS, BPF_TC_DIR_EGRESS, } and then the > > > concrete tc bits (TC_H_MAKE()) can be translated internally. > > > > Ok, will do. > > > > > > +struct bpf_tc_opts { > > > > + size_t sz; > > > > > > Is this set anywhere? > > > > This is needed by the OPTS_* infrastructure. > > > > > > + __u32 handle; > > > > + __u32 class_id; > > > > > > I'd remove class_id from here as well given in direct-action a BPF prog can > > > set it if needed. > > > > Ok, makes sense. > > > > > > + __u16 priority; > > > > + bool replace; > > > > + size_t :0; > > > > > > What's the rationale for this padding? > > > > dde7b3f5f2f4 ("libbpf: Add explicit padding to bpf_xdp_set_link_opts") > > Hm, fair enough. > > > > > +}; > > > > + > > > > +#define bpf_tc_opts__last_field replace > > > > + > > > > +/* Acts as a handle for an attached filter */ > > > > +struct bpf_tc_attach_id { > > > > > > nit: maybe bpf_tc_ctx > > > > > > > Noted. > > > > > > + __u32 handle; > > > > + __u16 priority; > > > > +}; > > > > + > > > > +struct bpf_tc_info { > > > > + struct bpf_tc_attach_id id; > > > > + __u16 protocol; > > > > + __u32 chain_index; > > > > + __u32 prog_id; > > > > + __u8 tag[BPF_TAG_SIZE]; > > > > + __u32 class_id; > > > > + __u32 bpf_flags; > > > > + __u32 bpf_flags_gen; > > > > > > Given we do not yet have any setters e.g. for offload, etc, the one thing > > > I'd see useful and crucial initially is prog_id. > > > > > > The protocol, chain_index, and I would also include tag should be dropped. > > > > A future user of this API needs to know the tag, so I would like to keep that. > > The rest we can drop, and probably document the default values explicitly. > > Couldn't this be added along with the future patch for the [future] user? > True. > The tag should be the tag of the prog itself, so if you have prog_id, you > could also retrieve the same tag from the prog. The tag was basically from > the early days where we didn't have bpf_prog_get_info_by_fd(). > > What does that future user need to do different here? > From Shaun Crampton: "My particular use case is to load a program, link it with its maps and then check if its tag matches the existing program on the interface (and if so, abort the update)" Also CC'd, they would be able to elaborate better, and whether or not dropping it is ok. > Thanks, > Daniel -- Kartikeya
On 4/22/21 1:30 AM, Kumar Kartikeya Dwivedi wrote: > On Thu, Apr 22, 2021 at 04:51:55AM IST, Daniel Borkmann wrote: >> On 4/22/21 1:08 AM, Kumar Kartikeya Dwivedi wrote: >>> On Thu, Apr 22, 2021 at 04:29:28AM IST, Daniel Borkmann wrote: >>>> On 4/20/21 9:37 PM, Kumar Kartikeya Dwivedi wrote: >>>> [...] >>>>> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h >>>>> index bec4e6a6e31d..b4ed6a41ea70 100644 >>>>> --- a/tools/lib/bpf/libbpf.h >>>>> +++ b/tools/lib/bpf/libbpf.h >>>>> @@ -16,6 +16,8 @@ >>>>> #include <stdbool.h> >>>>> #include <sys/types.h> // for size_t >>>>> #include <linux/bpf.h> >>>>> +#include <linux/pkt_sched.h> >>>>> +#include <linux/tc_act/tc_bpf.h> >>>>> #include "libbpf_common.h" >>>>> @@ -775,6 +777,48 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen >>>>> LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); >>>>> LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); >>>>> +/* Convenience macros for the clsact attach hooks */ >>>>> +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) >>>>> +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) >>>> >>>> I would abstract those away into an enum, plus avoid having to pull in >>>> linux/pkt_sched.h and linux/tc_act/tc_bpf.h from main libbpf.h header. >>>> >>>> Just add a enum { BPF_TC_DIR_INGRESS, BPF_TC_DIR_EGRESS, } and then the >>>> concrete tc bits (TC_H_MAKE()) can be translated internally. >>> >>> Ok, will do. >>> >>>>> +struct bpf_tc_opts { >>>>> + size_t sz; >>>> >>>> Is this set anywhere? >>> >>> This is needed by the OPTS_* infrastructure. >>> >>>>> + __u32 handle; >>>>> + __u32 class_id; >>>> >>>> I'd remove class_id from here as well given in direct-action a BPF prog can >>>> set it if needed. >>> >>> Ok, makes sense. >>> >>>>> + __u16 priority; >>>>> + bool replace; >>>>> + size_t :0; >>>> >>>> What's the rationale for this padding? >>> >>> dde7b3f5f2f4 ("libbpf: Add explicit padding to bpf_xdp_set_link_opts") >> >> Hm, fair enough. >> >>>>> +}; >>>>> + >>>>> +#define bpf_tc_opts__last_field replace >>>>> + >>>>> +/* Acts as a handle for an attached filter */ >>>>> +struct bpf_tc_attach_id { >>>> >>>> nit: maybe bpf_tc_ctx >>> >>> Noted. >>> >>>>> + __u32 handle; >>>>> + __u16 priority; >>>>> +}; >>>>> + >>>>> +struct bpf_tc_info { >>>>> + struct bpf_tc_attach_id id; >>>>> + __u16 protocol; >>>>> + __u32 chain_index; >>>>> + __u32 prog_id; >>>>> + __u8 tag[BPF_TAG_SIZE]; >>>>> + __u32 class_id; >>>>> + __u32 bpf_flags; >>>>> + __u32 bpf_flags_gen; >>>> >>>> Given we do not yet have any setters e.g. for offload, etc, the one thing >>>> I'd see useful and crucial initially is prog_id. >>>> >>>> The protocol, chain_index, and I would also include tag should be dropped. >>> >>> A future user of this API needs to know the tag, so I would like to keep that. >>> The rest we can drop, and probably document the default values explicitly. >> >> Couldn't this be added along with the future patch for the [future] user? > > True. > >> The tag should be the tag of the prog itself, so if you have prog_id, you >> could also retrieve the same tag from the prog. The tag was basically from >> the early days where we didn't have bpf_prog_get_info_by_fd(). >> >> What does that future user need to do different here? > > From Shaun Crampton: > "My particular use case is to load a program, link it with its maps and then > check if its tag matches the existing program on the interface (and if so, abort > the update)" > > Also CC'd, they would be able to elaborate better, and whether or not dropping > it is ok. Nope, just get it from the prog itself.
On 4/21/21 10:06 AM, Kumar Kartikeya Dwivedi wrote: > On Wed, Apr 21, 2021 at 12:28:01PM IST, Yonghong Song wrote: >> >> >> On 4/20/21 12:37 PM, Kumar Kartikeya Dwivedi wrote: >>> This adds functions that wrap the netlink API used for adding, >>> manipulating, and removing traffic control filters. These functions >>> operate directly on the loaded prog's fd, and return a handle to the >>> filter using an out parameter named id. >>> >>> The basic featureset is covered to allow for attaching and removal of >>> filters. Some additional features like TCA_BPF_POLICE and TCA_RATE for >>> the API have been omitted. These can added on top later by extending the >> >> "later" => "layer"? >> > > No, I meant that other options can be added on top of this series by someone > later. I'll reword it. > [...] >>> >>> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> >>> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> >>> --- >>> tools/lib/bpf/libbpf.h | 44 ++++++ >>> tools/lib/bpf/libbpf.map | 3 + >>> tools/lib/bpf/netlink.c | 319 ++++++++++++++++++++++++++++++++++++++- >>> 3 files changed, 360 insertions(+), 6 deletions(-) >>> >>> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h >>> index bec4e6a6e31d..b4ed6a41ea70 100644 >>> --- a/tools/lib/bpf/libbpf.h >>> +++ b/tools/lib/bpf/libbpf.h >>> @@ -16,6 +16,8 @@ >>> #include <stdbool.h> >>> #include <sys/types.h> // for size_t >>> #include <linux/bpf.h> >>> +#include <linux/pkt_sched.h> >>> +#include <linux/tc_act/tc_bpf.h> >>> #include "libbpf_common.h" >>> @@ -775,6 +777,48 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen >>> LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); >>> LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); >>> +/* Convenience macros for the clsact attach hooks */ >>> +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) >>> +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) >>> + >>> +struct bpf_tc_opts { >>> + size_t sz; >>> + __u32 handle; >>> + __u32 class_id; >>> + __u16 priority; >>> + bool replace; >>> + size_t :0; >> >> Did you see any error without "size_t :0"? >> > > Not really, I just added this considering this recent change: > dde7b3f5f2f4 ("libbpf: Add explicit padding to bpf_xdp_set_link_opts") Thanks. That is fine with me then. > >>> +}; >>> + >>> +#define bpf_tc_opts__last_field replace >>> + >>> +/* Acts as a handle for an attached filter */ >>> +struct bpf_tc_attach_id { >>> + __u32 handle; >>> + __u16 priority; >>> +}; >>> + >> [...] > > -- > Kartikeya >
On Wed, Apr 21, 2021 at 3:59 PM Daniel Borkmann <daniel@iogearbox.net> wrote: > > On 4/20/21 9:37 PM, Kumar Kartikeya Dwivedi wrote: > [...] > > diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h > > index bec4e6a6e31d..b4ed6a41ea70 100644 > > --- a/tools/lib/bpf/libbpf.h > > +++ b/tools/lib/bpf/libbpf.h > > @@ -16,6 +16,8 @@ > > #include <stdbool.h> > > #include <sys/types.h> // for size_t > > #include <linux/bpf.h> > > +#include <linux/pkt_sched.h> > > +#include <linux/tc_act/tc_bpf.h> > > > > #include "libbpf_common.h" > > > > @@ -775,6 +777,48 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen > > LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); > > LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); > > > > +/* Convenience macros for the clsact attach hooks */ > > +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) > > +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) > > I would abstract those away into an enum, plus avoid having to pull in > linux/pkt_sched.h and linux/tc_act/tc_bpf.h from main libbpf.h header. > > Just add a enum { BPF_TC_DIR_INGRESS, BPF_TC_DIR_EGRESS, } and then the > concrete tc bits (TC_H_MAKE()) can be translated internally. > > > +struct bpf_tc_opts { > > + size_t sz; > > Is this set anywhere? > > > + __u32 handle; > > + __u32 class_id; > > I'd remove class_id from here as well given in direct-action a BPF prog can > set it if needed. > > > + __u16 priority; > > + bool replace; > > + size_t :0; > > What's the rationale for this padding? > > > +}; > > + > > +#define bpf_tc_opts__last_field replace > > + > > +/* Acts as a handle for an attached filter */ > > +struct bpf_tc_attach_id { > > nit: maybe bpf_tc_ctx ok, so wait. It seems like apart from INGRESS|EGRESS enum and ifindex, everything else is optional and/or has some sane defaults, right? So this bpf_tc_attach_id or bpf_tc_ctx seems a bit artificial construct and it will cause problems for extending this. So if my understanding is correct, I'd get rid of it completely. As I said previously, opts allow returning parameters back, so if user didn't specify handle and priority and kernel picks values on user's behalf, we can return them in the same opts fields. For detach, again, ifindex and INGRESS|EGRESS is sufficient, but if user want to provide more detailed parameters, we should do that through extensible opts. That way we can keep growing this easily, plus simple cases will remain simple. Similarly bpf_tc_info below, there is no need to have struct bpf_tc_attach_id id; field, just have handle and priority right there. And bpf_tc_info should use OPTS framework for extensibility (though opts name doesn't fit it very well, but it is still nice for extensibility and for doing optional input/output params). Does this make sense? Am I missing something crucial here? The general rule with any new structs added to libbpf APIs is to either be 100% (ok, 99.99%) sure that they will never be changed, or do forward/backward compatible OPTS. Any other thing is pain and calls for symbol versioning, which we are trying really hard to avoid. > > > + __u32 handle; > > + __u16 priority; > > +}; > > + > > +struct bpf_tc_info { > > + struct bpf_tc_attach_id id; > > + __u16 protocol; > > + __u32 chain_index; > > + __u32 prog_id; > > + __u8 tag[BPF_TAG_SIZE]; > > + __u32 class_id; > > + __u32 bpf_flags; > > + __u32 bpf_flags_gen; > > Given we do not yet have any setters e.g. for offload, etc, the one thing > I'd see useful and crucial initially is prog_id. > > The protocol, chain_index, and I would also include tag should be dropped. > Similarly class_id given my earlier statement, and flags I would extend once > this lib API would support offloading progs. > > > +}; > > + > > +/* id is out parameter that will be written to, it must not be NULL */ > > +LIBBPF_API int bpf_tc_attach(int fd, __u32 ifindex, __u32 parent_id, > > + const struct bpf_tc_opts *opts, > > + struct bpf_tc_attach_id *id); > > +LIBBPF_API int bpf_tc_detach(__u32 ifindex, __u32 parent_id, > > + const struct bpf_tc_attach_id *id); > > +LIBBPF_API int bpf_tc_get_info(__u32 ifindex, __u32 parent_id, > > + const struct bpf_tc_attach_id *id, > > + struct bpf_tc_info *info); > > As per above, for parent_id I'd replace with dir enum. > > > + > > #ifdef __cplusplus > > } /* extern "C" */ > > #endif
Daniel Borkmann <daniel@iogearbox.net> writes: > On 4/21/21 9:48 PM, Toke Høiland-Jørgensen wrote: >> Andrii Nakryiko <andrii.nakryiko@gmail.com> writes: >>> On Tue, Apr 20, 2021 at 12:37 PM Kumar Kartikeya Dwivedi >>> <memxor@gmail.com> wrote: > [...] >>>> --- >>>> tools/lib/bpf/libbpf.h | 44 ++++++ >>>> tools/lib/bpf/libbpf.map | 3 + >>>> tools/lib/bpf/netlink.c | 319 ++++++++++++++++++++++++++++++++++++++- >>>> 3 files changed, 360 insertions(+), 6 deletions(-) >>>> >>>> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h >>>> index bec4e6a6e31d..b4ed6a41ea70 100644 >>>> --- a/tools/lib/bpf/libbpf.h >>>> +++ b/tools/lib/bpf/libbpf.h >>>> @@ -16,6 +16,8 @@ >>>> #include <stdbool.h> >>>> #include <sys/types.h> // for size_t >>>> #include <linux/bpf.h> >>>> +#include <linux/pkt_sched.h> >>>> +#include <linux/tc_act/tc_bpf.h> >>> >>> apart from those unused macros below, are these needed in public API header? >>> >>>> #include "libbpf_common.h" >>>> >>>> @@ -775,6 +777,48 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen >>>> LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); >>>> LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); >>>> >>>> +/* Convenience macros for the clsact attach hooks */ >>>> +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) >>>> +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) >>> >>> these seem to be used only internally, why exposing them in public >>> API? >> >> No they're "aliases" for when you want to attach the filter directly to >> the interface (and thus install the clsact qdisc as the root). You can >> also use the filter with an existing qdisc (most commonly HTB), in which >> case you need to specify the qdisc handle as the root. We have a few >> examples of this use case: >> >> https://github.com/xdp-project/bpf-examples/tree/master/traffic-pacing-edt >> and >> https://github.com/xdp-project/xdp-cpumap-tc > > I'm a bit puzzled, could you elaborate on your use case on why you wouldn't > use the tc egress hook for those especially given it's guaranteed to run > outside of root qdisc lock? Jesper can correct me if I'm wrong, but I think the first one of the links above is basically his implementation of just that EDT-based shaper. And it works reasonably well, except you don't get the nice per-flow scheduling and sparse flow prioritisation like in FQ-CoDel unless you implement that yourself in BPF when you set the timestamps (and that is by no means trivial to implement). So if you want to use any of the features of the existing qdiscs (I have also been suggesting to people that they use tc_bpf if they want to customise sch_cake's notion of flows or shaping tiers), you need to be able to attach the filter to an existing qdisc. Sure, this means you're still stuck behind the qdisc lock, but for some applications that is fine (not everything is a data centre, some devices don't have that many CPUs anyway; and as the second example above shows, you can get around the qdisc lock by some clever use of partitioning of flows using cpumap). So what this boils down to is, we should keep the 'parent' parameter not just as an egress/ingress enum, but also as a field the user can fill out. I'm fine with moving the latter into the opts struct, though, so maybe the function parameter can be an enum like: enum bpf_tc_attach_point { BPF_TC_CLSACT_INGRESS, BPF_TC_CLSACT_EGRESS, BPF_TC_QDISC_PARENT }; where if you set the last one you have to fill in the parent in opts? -Toke
> Nope, just get it from the prog itself.
Looks like the API returns the prog ID, so from that we can look up the prog
and then get its tag? If so that meets our needs. Alternatively, if
the API allows
for atomic replacement of a BPF program with another, that'd also work for us.
The use case is that our daemon is restarted and it doesn't know what BPF
program was previously loaded. We want to make sure we end up with the
latest version loaded, either by doing a seamless replace with the
latest version
or by checking if the loaded version matches the latest.
On 4/22/21 11:47 AM, Shaun Crampton wrote: >> Nope, just get it from the prog itself. > > Looks like the API returns the prog ID, so from that we can look up the prog > and then get its tag? If so that meets our needs. Alternatively, if > the API allows > for atomic replacement of a BPF program with another, that'd also work for us. Both is the case: from prog ID you can already retrieve that same tag, and progs can be atomically replaced with the current API code. Exposing the tag in here otherwise feels just odd/wrong from a design PoV, explain that to a user of this API on /why/ such field is in the tc API when it already can be retrieved via bpf_prog_get_info_by_fd(), as in, what is so special on this field in the tc API here (aside from legacy reasons when there was no mentioned helper [which we don't need to support given it dates way too far back]) ... I cannot. ;-) Hence lets drop it from there. Thanks, Daniel
On 4/22/21 11:08 AM, Toke Høiland-Jørgensen wrote: > Daniel Borkmann <daniel@iogearbox.net> writes: >> On 4/21/21 9:48 PM, Toke Høiland-Jørgensen wrote: >>> Andrii Nakryiko <andrii.nakryiko@gmail.com> writes: >>>> On Tue, Apr 20, 2021 at 12:37 PM Kumar Kartikeya Dwivedi >>>> <memxor@gmail.com> wrote: >> [...] >>>>> --- >>>>> tools/lib/bpf/libbpf.h | 44 ++++++ >>>>> tools/lib/bpf/libbpf.map | 3 + >>>>> tools/lib/bpf/netlink.c | 319 ++++++++++++++++++++++++++++++++++++++- >>>>> 3 files changed, 360 insertions(+), 6 deletions(-) >>>>> >>>>> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h >>>>> index bec4e6a6e31d..b4ed6a41ea70 100644 >>>>> --- a/tools/lib/bpf/libbpf.h >>>>> +++ b/tools/lib/bpf/libbpf.h >>>>> @@ -16,6 +16,8 @@ >>>>> #include <stdbool.h> >>>>> #include <sys/types.h> // for size_t >>>>> #include <linux/bpf.h> >>>>> +#include <linux/pkt_sched.h> >>>>> +#include <linux/tc_act/tc_bpf.h> >>>> >>>> apart from those unused macros below, are these needed in public API header? >>>> >>>>> #include "libbpf_common.h" >>>>> >>>>> @@ -775,6 +777,48 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen >>>>> LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); >>>>> LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); >>>>> >>>>> +/* Convenience macros for the clsact attach hooks */ >>>>> +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) >>>>> +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) >>>> >>>> these seem to be used only internally, why exposing them in public >>>> API? >>> >>> No they're "aliases" for when you want to attach the filter directly to >>> the interface (and thus install the clsact qdisc as the root). You can >>> also use the filter with an existing qdisc (most commonly HTB), in which >>> case you need to specify the qdisc handle as the root. We have a few >>> examples of this use case: >>> >>> https://github.com/xdp-project/bpf-examples/tree/master/traffic-pacing-edt >>> and >>> https://github.com/xdp-project/xdp-cpumap-tc >> >> I'm a bit puzzled, could you elaborate on your use case on why you wouldn't >> use the tc egress hook for those especially given it's guaranteed to run >> outside of root qdisc lock? > > Jesper can correct me if I'm wrong, but I think the first one of the > links above is basically his implementation of just that EDT-based > shaper. And it works reasonably well, except you don't get the nice > per-flow scheduling and sparse flow prioritisation like in FQ-CoDel > unless you implement that yourself in BPF when you set the timestamps > (and that is by no means trivial to implement). > > So if you want to use any of the features of the existing qdiscs (I have > also been suggesting to people that they use tc_bpf if they want to > customise sch_cake's notion of flows or shaping tiers), you need to be > able to attach the filter to an existing qdisc. Sure, this means you're > still stuck behind the qdisc lock, but for some applications that is > fine (not everything is a data centre, some devices don't have that many > CPUs anyway; and as the second example above shows, you can get around > the qdisc lock by some clever use of partitioning of flows using > cpumap). > > So what this boils down to is, we should keep the 'parent' parameter not > just as an egress/ingress enum, but also as a field the user can fill > out. I'm fine with moving the latter into the opts struct, though, so > maybe the function parameter can be an enum like: > > enum bpf_tc_attach_point { > BPF_TC_CLSACT_INGRESS, > BPF_TC_CLSACT_EGRESS, > BPF_TC_QDISC_PARENT > }; > > where if you set the last one you have to fill in the parent in opts? Fair enough, I still think this is a bit backwards and should be discouraged given the constraints, but if you have an actual need for it ... I'd rather simplify API naming, the fact that it's clsact is an implementation detail and shouldn't matter to a user, like: enum bpf_tc_attach_point { BPF_TC_INGRESS, BPF_TC_EGRESS, BPF_TC_CUSTOM_PARENT, }; For BPF_TC_INGRESS and BPF_TC_EGRESS, I would enforce opts parent parameter to be /zero/ from the API. Thanks, Daniel
Daniel Borkmann <daniel@iogearbox.net> writes: > On 4/22/21 11:08 AM, Toke Høiland-Jørgensen wrote: >> Daniel Borkmann <daniel@iogearbox.net> writes: >>> On 4/21/21 9:48 PM, Toke Høiland-Jørgensen wrote: >>>> Andrii Nakryiko <andrii.nakryiko@gmail.com> writes: >>>>> On Tue, Apr 20, 2021 at 12:37 PM Kumar Kartikeya Dwivedi >>>>> <memxor@gmail.com> wrote: >>> [...] >>>>>> --- >>>>>> tools/lib/bpf/libbpf.h | 44 ++++++ >>>>>> tools/lib/bpf/libbpf.map | 3 + >>>>>> tools/lib/bpf/netlink.c | 319 ++++++++++++++++++++++++++++++++++++++- >>>>>> 3 files changed, 360 insertions(+), 6 deletions(-) >>>>>> >>>>>> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h >>>>>> index bec4e6a6e31d..b4ed6a41ea70 100644 >>>>>> --- a/tools/lib/bpf/libbpf.h >>>>>> +++ b/tools/lib/bpf/libbpf.h >>>>>> @@ -16,6 +16,8 @@ >>>>>> #include <stdbool.h> >>>>>> #include <sys/types.h> // for size_t >>>>>> #include <linux/bpf.h> >>>>>> +#include <linux/pkt_sched.h> >>>>>> +#include <linux/tc_act/tc_bpf.h> >>>>> >>>>> apart from those unused macros below, are these needed in public API header? >>>>> >>>>>> #include "libbpf_common.h" >>>>>> >>>>>> @@ -775,6 +777,48 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen >>>>>> LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); >>>>>> LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); >>>>>> >>>>>> +/* Convenience macros for the clsact attach hooks */ >>>>>> +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) >>>>>> +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) >>>>> >>>>> these seem to be used only internally, why exposing them in public >>>>> API? >>>> >>>> No they're "aliases" for when you want to attach the filter directly to >>>> the interface (and thus install the clsact qdisc as the root). You can >>>> also use the filter with an existing qdisc (most commonly HTB), in which >>>> case you need to specify the qdisc handle as the root. We have a few >>>> examples of this use case: >>>> >>>> https://github.com/xdp-project/bpf-examples/tree/master/traffic-pacing-edt >>>> and >>>> https://github.com/xdp-project/xdp-cpumap-tc >>> >>> I'm a bit puzzled, could you elaborate on your use case on why you wouldn't >>> use the tc egress hook for those especially given it's guaranteed to run >>> outside of root qdisc lock? >> >> Jesper can correct me if I'm wrong, but I think the first one of the >> links above is basically his implementation of just that EDT-based >> shaper. And it works reasonably well, except you don't get the nice >> per-flow scheduling and sparse flow prioritisation like in FQ-CoDel >> unless you implement that yourself in BPF when you set the timestamps >> (and that is by no means trivial to implement). >> >> So if you want to use any of the features of the existing qdiscs (I have >> also been suggesting to people that they use tc_bpf if they want to >> customise sch_cake's notion of flows or shaping tiers), you need to be >> able to attach the filter to an existing qdisc. Sure, this means you're >> still stuck behind the qdisc lock, but for some applications that is >> fine (not everything is a data centre, some devices don't have that many >> CPUs anyway; and as the second example above shows, you can get around >> the qdisc lock by some clever use of partitioning of flows using >> cpumap). >> >> So what this boils down to is, we should keep the 'parent' parameter not >> just as an egress/ingress enum, but also as a field the user can fill >> out. I'm fine with moving the latter into the opts struct, though, so >> maybe the function parameter can be an enum like: >> >> enum bpf_tc_attach_point { >> BPF_TC_CLSACT_INGRESS, >> BPF_TC_CLSACT_EGRESS, >> BPF_TC_QDISC_PARENT >> }; >> >> where if you set the last one you have to fill in the parent in opts? > > Fair enough, I still think this is a bit backwards and should be discouraged > given the constraints, but if you have an actual need for it ... I'd rather > simplify API naming, the fact that it's clsact is an implementation detail > and shouldn't matter to a user, like: > > enum bpf_tc_attach_point { > BPF_TC_INGRESS, > BPF_TC_EGRESS, > BPF_TC_CUSTOM_PARENT, > }; > > For BPF_TC_INGRESS and BPF_TC_EGRESS, I would enforce opts parent parameter > to be /zero/ from the API. OK, SGTM :) -Toke
On 4/22/21 5:43 AM, Andrii Nakryiko wrote: > On Wed, Apr 21, 2021 at 3:59 PM Daniel Borkmann <daniel@iogearbox.net> wrote: >> On 4/20/21 9:37 PM, Kumar Kartikeya Dwivedi wrote: >> [...] >>> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h >>> index bec4e6a6e31d..b4ed6a41ea70 100644 >>> --- a/tools/lib/bpf/libbpf.h >>> +++ b/tools/lib/bpf/libbpf.h >>> @@ -16,6 +16,8 @@ >>> #include <stdbool.h> >>> #include <sys/types.h> // for size_t >>> #include <linux/bpf.h> >>> +#include <linux/pkt_sched.h> >>> +#include <linux/tc_act/tc_bpf.h> >>> >>> #include "libbpf_common.h" >>> >>> @@ -775,6 +777,48 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen >>> LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); >>> LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); >>> >>> +/* Convenience macros for the clsact attach hooks */ >>> +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) >>> +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) >> >> I would abstract those away into an enum, plus avoid having to pull in >> linux/pkt_sched.h and linux/tc_act/tc_bpf.h from main libbpf.h header. >> >> Just add a enum { BPF_TC_DIR_INGRESS, BPF_TC_DIR_EGRESS, } and then the >> concrete tc bits (TC_H_MAKE()) can be translated internally. >> >>> +struct bpf_tc_opts { >>> + size_t sz; >> >> Is this set anywhere? >> >>> + __u32 handle; >>> + __u32 class_id; >> >> I'd remove class_id from here as well given in direct-action a BPF prog can >> set it if needed. >> >>> + __u16 priority; >>> + bool replace; >>> + size_t :0; >> >> What's the rationale for this padding? >> >>> +}; >>> + >>> +#define bpf_tc_opts__last_field replace >>> + >>> +/* Acts as a handle for an attached filter */ >>> +struct bpf_tc_attach_id { >> >> nit: maybe bpf_tc_ctx > > ok, so wait. It seems like apart from INGRESS|EGRESS enum and ifindex, > everything else is optional and/or has some sane defaults, right? So > this bpf_tc_attach_id or bpf_tc_ctx seems a bit artificial construct > and it will cause problems for extending this. > > So if my understanding is correct, I'd get rid of it completely. As I > said previously, opts allow returning parameters back, so if user > didn't specify handle and priority and kernel picks values on user's > behalf, we can return them in the same opts fields. > > For detach, again, ifindex and INGRESS|EGRESS is sufficient, but if > user want to provide more detailed parameters, we should do that > through extensible opts. That way we can keep growing this easily, > plus simple cases will remain simple. > > Similarly bpf_tc_info below, there is no need to have struct > bpf_tc_attach_id id; field, just have handle and priority right there. > And bpf_tc_info should use OPTS framework for extensibility (though > opts name doesn't fit it very well, but it is still nice for > extensibility and for doing optional input/output params). > > Does this make sense? Am I missing something crucial here? I would probably keep the handle + priority in there; maybe if both are 0, we could fix it to some default value internally, but without those it might be a bit hard if people want to build a 'pipeline' of cls_bpf progs if they need/want to. Potentially, one could fixate the handle itself, and then allow to specify different priorities for it such that when a BPF prog returns a TC_ACT_UNSPEC, it will exec the next one inside that cls_bpf instance, every other TC_ACT_* opcode will terminate the processing. Technically, only priority would really be needed (unless you combine multiple different classifiers from tc side on the ingress/egress hook which is not great to begin with, tbh). > The general rule with any new structs added to libbpf APIs is to > either be 100% (ok, 99.99%) sure that they will never be changed, or > do forward/backward compatible OPTS. Any other thing is pain and calls > for symbol versioning, which we are trying really hard to avoid. > >>> + __u32 handle; >>> + __u16 priority; >>> +}; >>> + >>> +struct bpf_tc_info { >>> + struct bpf_tc_attach_id id; >>> + __u16 protocol; >>> + __u32 chain_index; >>> + __u32 prog_id; >>> + __u8 tag[BPF_TAG_SIZE]; >>> + __u32 class_id; >>> + __u32 bpf_flags; >>> + __u32 bpf_flags_gen; >> >> Given we do not yet have any setters e.g. for offload, etc, the one thing >> I'd see useful and crucial initially is prog_id. >> >> The protocol, chain_index, and I would also include tag should be dropped. >> Similarly class_id given my earlier statement, and flags I would extend once >> this lib API would support offloading progs. >> >>> +}; >>> + >>> +/* id is out parameter that will be written to, it must not be NULL */ >>> +LIBBPF_API int bpf_tc_attach(int fd, __u32 ifindex, __u32 parent_id, >>> + const struct bpf_tc_opts *opts, >>> + struct bpf_tc_attach_id *id); >>> +LIBBPF_API int bpf_tc_detach(__u32 ifindex, __u32 parent_id, >>> + const struct bpf_tc_attach_id *id); >>> +LIBBPF_API int bpf_tc_get_info(__u32 ifindex, __u32 parent_id, >>> + const struct bpf_tc_attach_id *id, >>> + struct bpf_tc_info *info); >> >> As per above, for parent_id I'd replace with dir enum. >> >>> + >>> #ifdef __cplusplus >>> } /* extern "C" */ >>> #endif
On Thu, Apr 22, 2021 at 8:35 AM Daniel Borkmann <daniel@iogearbox.net> wrote: > > On 4/22/21 5:43 AM, Andrii Nakryiko wrote: > > On Wed, Apr 21, 2021 at 3:59 PM Daniel Borkmann <daniel@iogearbox.net> wrote: > >> On 4/20/21 9:37 PM, Kumar Kartikeya Dwivedi wrote: > >> [...] > >>> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h > >>> index bec4e6a6e31d..b4ed6a41ea70 100644 > >>> --- a/tools/lib/bpf/libbpf.h > >>> +++ b/tools/lib/bpf/libbpf.h > >>> @@ -16,6 +16,8 @@ > >>> #include <stdbool.h> > >>> #include <sys/types.h> // for size_t > >>> #include <linux/bpf.h> > >>> +#include <linux/pkt_sched.h> > >>> +#include <linux/tc_act/tc_bpf.h> > >>> > >>> #include "libbpf_common.h" > >>> > >>> @@ -775,6 +777,48 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen > >>> LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); > >>> LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); > >>> > >>> +/* Convenience macros for the clsact attach hooks */ > >>> +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) > >>> +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) > >> > >> I would abstract those away into an enum, plus avoid having to pull in > >> linux/pkt_sched.h and linux/tc_act/tc_bpf.h from main libbpf.h header. > >> > >> Just add a enum { BPF_TC_DIR_INGRESS, BPF_TC_DIR_EGRESS, } and then the > >> concrete tc bits (TC_H_MAKE()) can be translated internally. > >> > >>> +struct bpf_tc_opts { > >>> + size_t sz; > >> > >> Is this set anywhere? > >> > >>> + __u32 handle; > >>> + __u32 class_id; > >> > >> I'd remove class_id from here as well given in direct-action a BPF prog can > >> set it if needed. > >> > >>> + __u16 priority; > >>> + bool replace; > >>> + size_t :0; > >> > >> What's the rationale for this padding? > >> > >>> +}; > >>> + > >>> +#define bpf_tc_opts__last_field replace > >>> + > >>> +/* Acts as a handle for an attached filter */ > >>> +struct bpf_tc_attach_id { > >> > >> nit: maybe bpf_tc_ctx > > > > ok, so wait. It seems like apart from INGRESS|EGRESS enum and ifindex, > > everything else is optional and/or has some sane defaults, right? So > > this bpf_tc_attach_id or bpf_tc_ctx seems a bit artificial construct > > and it will cause problems for extending this. > > > > So if my understanding is correct, I'd get rid of it completely. As I > > said previously, opts allow returning parameters back, so if user > > didn't specify handle and priority and kernel picks values on user's > > behalf, we can return them in the same opts fields. > > > > For detach, again, ifindex and INGRESS|EGRESS is sufficient, but if > > user want to provide more detailed parameters, we should do that > > through extensible opts. That way we can keep growing this easily, > > plus simple cases will remain simple. > > > > Similarly bpf_tc_info below, there is no need to have struct > > bpf_tc_attach_id id; field, just have handle and priority right there. > > And bpf_tc_info should use OPTS framework for extensibility (though > > opts name doesn't fit it very well, but it is still nice for > > extensibility and for doing optional input/output params). > > > > Does this make sense? Am I missing something crucial here? > > I would probably keep the handle + priority in there; maybe if both are 0, > we could fix it to some default value internally, but without those it might > be a bit hard if people want to build a 'pipeline' of cls_bpf progs if they > need/want to. Oh, I'm not proposing to drop support for specifying handle and prio. I'm just saying having a fixed UAPI struct bpf_tc_attach_id as an "ID" is problematic from API stability point of view. So instead of pretending we know what "ID" will always be like, pass any extra non-default fields in OPTS struct. And if those are not specified by user (either opts is NULL or handle/prio is 0), use sane defaults, as you are proposing. > > Potentially, one could fixate the handle itself, and then allow to specify > different priorities for it such that when a BPF prog returns a TC_ACT_UNSPEC, > it will exec the next one inside that cls_bpf instance, every other TC_ACT_* > opcode will terminate the processing. Technically, only priority would really > be needed (unless you combine multiple different classifiers from tc side on > the ingress/egress hook which is not great to begin with, tbh). > > > The general rule with any new structs added to libbpf APIs is to > > either be 100% (ok, 99.99%) sure that they will never be changed, or > > do forward/backward compatible OPTS. Any other thing is pain and calls > > for symbol versioning, which we are trying really hard to avoid. > > > >>> + __u32 handle; > >>> + __u16 priority; > >>> +}; > >>> + > >>> +struct bpf_tc_info { > >>> + struct bpf_tc_attach_id id; > >>> + __u16 protocol; > >>> + __u32 chain_index; > >>> + __u32 prog_id; > >>> + __u8 tag[BPF_TAG_SIZE]; > >>> + __u32 class_id; > >>> + __u32 bpf_flags; > >>> + __u32 bpf_flags_gen; > >> > >> Given we do not yet have any setters e.g. for offload, etc, the one thing > >> I'd see useful and crucial initially is prog_id. > >> > >> The protocol, chain_index, and I would also include tag should be dropped. > >> Similarly class_id given my earlier statement, and flags I would extend once > >> this lib API would support offloading progs. > >> > >>> +}; > >>> + > >>> +/* id is out parameter that will be written to, it must not be NULL */ > >>> +LIBBPF_API int bpf_tc_attach(int fd, __u32 ifindex, __u32 parent_id, > >>> + const struct bpf_tc_opts *opts, > >>> + struct bpf_tc_attach_id *id); > >>> +LIBBPF_API int bpf_tc_detach(__u32 ifindex, __u32 parent_id, > >>> + const struct bpf_tc_attach_id *id); > >>> +LIBBPF_API int bpf_tc_get_info(__u32 ifindex, __u32 parent_id, > >>> + const struct bpf_tc_attach_id *id, > >>> + struct bpf_tc_info *info); > >> > >> As per above, for parent_id I'd replace with dir enum. > >> > >>> + > >>> #ifdef __cplusplus > >>> } /* extern "C" */ > >>> #endif >
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index bec4e6a6e31d..b4ed6a41ea70 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -16,6 +16,8 @@ #include <stdbool.h> #include <sys/types.h> // for size_t #include <linux/bpf.h> +#include <linux/pkt_sched.h> +#include <linux/tc_act/tc_bpf.h> #include "libbpf_common.h" @@ -775,6 +777,48 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); +/* Convenience macros for the clsact attach hooks */ +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) + +struct bpf_tc_opts { + size_t sz; + __u32 handle; + __u32 class_id; + __u16 priority; + bool replace; + size_t :0; +}; + +#define bpf_tc_opts__last_field replace + +/* Acts as a handle for an attached filter */ +struct bpf_tc_attach_id { + __u32 handle; + __u16 priority; +}; + +struct bpf_tc_info { + struct bpf_tc_attach_id id; + __u16 protocol; + __u32 chain_index; + __u32 prog_id; + __u8 tag[BPF_TAG_SIZE]; + __u32 class_id; + __u32 bpf_flags; + __u32 bpf_flags_gen; +}; + +/* id is out parameter that will be written to, it must not be NULL */ +LIBBPF_API int bpf_tc_attach(int fd, __u32 ifindex, __u32 parent_id, + const struct bpf_tc_opts *opts, + struct bpf_tc_attach_id *id); +LIBBPF_API int bpf_tc_detach(__u32 ifindex, __u32 parent_id, + const struct bpf_tc_attach_id *id); +LIBBPF_API int bpf_tc_get_info(__u32 ifindex, __u32 parent_id, + const struct bpf_tc_attach_id *id, + struct bpf_tc_info *info); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index b9b29baf1df8..686444fbb838 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -361,4 +361,7 @@ LIBBPF_0.4.0 { bpf_linker__new; bpf_map__inner_map; bpf_object__set_kversion; + bpf_tc_attach; + bpf_tc_detach; + bpf_tc_get_info; } LIBBPF_0.3.0; diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index c79e30484e81..71109dcea9e4 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -4,7 +4,10 @@ #include <stdlib.h> #include <memory.h> #include <unistd.h> +#include <inttypes.h> +#include <arpa/inet.h> #include <linux/bpf.h> +#include <linux/if_ether.h> #include <linux/rtnetlink.h> #include <sys/socket.h> #include <errno.h> @@ -344,6 +347,20 @@ int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) return ret; } +static int bpf_nl_get_ext(struct nlmsghdr *nh, int sock, unsigned int nl_pid, + __dump_nlmsg_t dump_link_nlmsg_p, + libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) +{ + int seq = time(NULL); + + nh->nlmsg_seq = seq; + if (send(sock, nh, nh->nlmsg_len, 0) < 0) + return -errno; + + return bpf_netlink_recv(sock, nl_pid, seq, dump_link_nlmsg_p, + dump_link_nlmsg, cookie); +} + int libbpf_nl_get_link(int sock, unsigned int nl_pid, libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) { @@ -356,12 +373,302 @@ int libbpf_nl_get_link(int sock, unsigned int nl_pid, .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, .ifm.ifi_family = AF_PACKET, }; - int seq = time(NULL); - req.nlh.nlmsg_seq = seq; - if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) - return -errno; + return bpf_nl_get_ext(&req.nlh, sock, nl_pid, __dump_link_nlmsg, + dump_link_nlmsg, cookie); +} - return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg, - dump_link_nlmsg, cookie); +static int tc_setup_clsact_excl(int sock, __u32 nl_pid, __u32 ifindex) +{ + int seq = 0, ret = 0; + struct { + struct nlmsghdr nh; + struct tcmsg t; + char buf[256]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.nh.nlmsg_flags = + NLM_F_REQUEST | NLM_F_CREATE | NLM_F_ACK | NLM_F_EXCL; + req.nh.nlmsg_type = RTM_NEWQDISC; + req.nh.nlmsg_pid = 0; + req.nh.nlmsg_seq = ++seq; + req.t.tcm_family = AF_UNSPEC; + req.t.tcm_ifindex = ifindex; + req.t.tcm_parent = TC_H_CLSACT; + req.t.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0); + + ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "clsact", + sizeof("clsact")); + if (ret < 0) + return ret; + + ret = send(sock, &req.nh, req.nh.nlmsg_len, 0); + if (ret < 0) + return ret; + + return bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL); +} + +static int tc_bpf_add_fd_and_name(struct nlmsghdr *nh, size_t maxsz, int fd) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + char name[256] = {}; + int len, ret; + + ret = bpf_obj_get_info_by_fd(fd, &info, &info_len); + if (ret < 0) + return ret; + + ret = nlattr_add(nh, maxsz, TCA_BPF_FD, &fd, sizeof(fd)); + if (ret < 0) + return ret; + + len = snprintf(name, sizeof(name), "%s:[%" PRIu32 "]", info.name, + info.id); + if (len < 0 || len >= sizeof(name)) + return len < 0 ? -EINVAL : -ENAMETOOLONG; + + return nlattr_add(nh, maxsz, TCA_BPF_NAME, name, len + 1); +} + +static int cls_get_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn, + void *cookie); + +static int tc_cls_bpf_modify(int fd, int cmd, unsigned int flags, __u32 ifindex, + __u32 parent_id, const struct bpf_tc_opts *opts, + __dump_nlmsg_t fn, struct bpf_tc_attach_id *id) +{ + __u32 nl_pid = 0, protocol, priority; + struct bpf_tc_info info = {}; + unsigned int bpf_flags = 0; + int sock, seq = 0, ret; + struct nlattr *nla; + struct { + struct nlmsghdr nh; + struct tcmsg t; + char buf[256]; + } req; + + sock = libbpf_netlink_open(&nl_pid); + if (sock < 0) + return sock; + + if ((parent_id == BPF_TC_CLSACT_INGRESS || + parent_id == BPF_TC_CLSACT_EGRESS) && + flags & NLM_F_CREATE) { + ret = tc_setup_clsact_excl(sock, nl_pid, ifindex); + /* Attachment can still fail if ingress qdisc is installed, and + * we're trying attach on egress as parent. Ignore in that case + * as well. + */ + if (ret < 0 && ret != -EEXIST) + goto end; + } + + priority = OPTS_GET(opts, priority, 0); + protocol = ETH_P_ALL; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags; + req.nh.nlmsg_type = cmd; + req.nh.nlmsg_pid = 0; + req.nh.nlmsg_seq = ++seq; + req.t.tcm_family = AF_UNSPEC; + req.t.tcm_handle = OPTS_GET(opts, handle, 0); + req.t.tcm_parent = parent_id; + req.t.tcm_ifindex = ifindex; + req.t.tcm_info = TC_H_MAKE(priority << 16, htons(protocol)); + + ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf")); + if (ret < 0) + goto end; + + nla = nlattr_begin_nested(&req.nh, sizeof(req), TCA_OPTIONS); + if (!nla) { + ret = -EMSGSIZE; + goto end; + } + + if (OPTS_GET(opts, class_id, TC_H_UNSPEC)) { + ret = nlattr_add(&req.nh, sizeof(req), TCA_BPF_CLASSID, + &opts->class_id, sizeof(opts->class_id)); + if (ret < 0) + goto end; + } + + if (cmd != RTM_DELTFILTER) { + ret = tc_bpf_add_fd_and_name(&req.nh, sizeof(req), fd); + if (ret < 0) + goto end; + + /* direct action is always set */ + bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT; + ret = nlattr_add(&req.nh, sizeof(req), TCA_BPF_FLAGS, + &bpf_flags, sizeof(bpf_flags)); + if (ret < 0) + goto end; + } + + nlattr_end_nested(&req.nh, nla); + + ret = send(sock, &req.nh, req.nh.nlmsg_len, 0); + if (ret < 0) + goto end; + + ret = bpf_netlink_recv(sock, nl_pid, seq, fn, NULL, &info); + + if (fn) { + *id = info.id; + ret = ret < 0 ? ret : 0; + } + +end: + close(sock); + return ret; +} + +int bpf_tc_attach(int fd, __u32 ifindex, __u32 parent_id, + const struct bpf_tc_opts *opts, + struct bpf_tc_attach_id *id) +{ + if (fd < 0 || !OPTS_VALID(opts, bpf_tc_opts) || !id) + return -EINVAL; + + return tc_cls_bpf_modify(fd, RTM_NEWTFILTER, + NLM_F_ECHO | NLM_F_CREATE | + (OPTS_GET(opts, replace, false) ? + NLM_F_REPLACE : NLM_F_EXCL), + ifindex, parent_id, opts, cls_get_info, id); +} + +int bpf_tc_detach(__u32 ifindex, __u32 parent_id, + const struct bpf_tc_attach_id *id) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, 0); + + if (!id) + return -EINVAL; + + opts.handle = id->handle; + opts.priority = id->priority; + + return tc_cls_bpf_modify(-1, RTM_DELTFILTER, 0, ifindex, parent_id, + &opts, NULL, NULL); +} + +static int __cls_get_info(void *cookie, void *msg, struct nlattr **tb) +{ + struct nlattr *tbb[TCA_BPF_MAX + 1]; + struct bpf_tc_info *info = cookie; + struct tcmsg *t = msg; + + if (!tb[TCA_OPTIONS]) + return 0; + + libbpf_nla_parse_nested(tbb, TCA_BPF_MAX, tb[TCA_OPTIONS], NULL); + if (!tbb[TCA_BPF_ID]) + return 0; + + info->protocol = ntohs(TC_H_MIN(t->tcm_info)); + info->id.priority = TC_H_MAJ(t->tcm_info) >> 16; + info->id.handle = t->tcm_handle; + + if (tb[TCA_CHAIN]) + info->chain_index = libbpf_nla_getattr_u32(tb[TCA_CHAIN]); + else + info->chain_index = 0; + + if (tbb[TCA_BPF_FLAGS]) + info->bpf_flags = libbpf_nla_getattr_u32(tbb[TCA_BPF_FLAGS]); + + if (tbb[TCA_BPF_FLAGS_GEN]) + info->bpf_flags_gen = + libbpf_nla_getattr_u32(tbb[TCA_BPF_FLAGS_GEN]); + + if (tbb[TCA_BPF_ID]) + info->prog_id = libbpf_nla_getattr_u32(tbb[TCA_BPF_ID]); + + if (tbb[TCA_BPF_TAG]) + memcpy(info->tag, libbpf_nla_getattr_str(tbb[TCA_BPF_TAG]), + sizeof(info->tag)); + + if (tbb[TCA_BPF_CLASSID]) + info->class_id = libbpf_nla_getattr_u32(tbb[TCA_BPF_CLASSID]); + + return 1; +} + +static int cls_get_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn, + void *cookie) +{ + struct tcmsg *t = NLMSG_DATA(nh); + struct nlattr *tb[TCA_MAX + 1]; + + libbpf_nla_parse(tb, TCA_MAX, + (struct nlattr *)((char *)t + NLMSG_ALIGN(sizeof(*t))), + NLMSG_PAYLOAD(nh, sizeof(*t)), NULL); + if (!tb[TCA_KIND]) + return -EINVAL; + + return __cls_get_info(cookie, t, tb); +} + +static int tc_cls_get_info(__u32 ifindex, __u32 parent_id, + const struct bpf_tc_attach_id *id, + struct bpf_tc_info *info) +{ + __u32 nl_pid = 0, protocol; + __u32 priority; + int sock, ret; + struct { + struct nlmsghdr nh; + struct tcmsg t; + char buf[256]; + } req = { + .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .nh.nlmsg_type = RTM_GETTFILTER, + .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP, + .t.tcm_family = AF_UNSPEC, + }; + + priority = id->priority; + protocol = ETH_P_ALL; + + req.t.tcm_parent = parent_id; + req.t.tcm_ifindex = ifindex; + req.t.tcm_handle = id->handle; + req.t.tcm_info = TC_H_MAKE(priority << 16, htons(protocol)); + + sock = libbpf_netlink_open(&nl_pid); + if (sock < 0) + return sock; + + ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf")); + if (ret < 0) + goto end; + + req.nh.nlmsg_seq = time(NULL); + + ret = bpf_nl_get_ext(&req.nh, sock, nl_pid, cls_get_info, NULL, info); + if (ret < 0) + goto end; + /* 1 denotes a match */ + ret = ret == 1 ? 0 : -ESRCH; +end: + close(sock); + return ret; +} + +int bpf_tc_get_info(__u32 ifindex, __u32 parent_id, + const struct bpf_tc_attach_id *id, + struct bpf_tc_info *info) +{ + if (!id || !info) + return -EINVAL; + + return tc_cls_get_info(ifindex, parent_id, id, info); }