@@ -2,9 +2,11 @@
#include <linux/rtnetlink.h>
#include <test_progs.h>
+#include "netlink_helpers.h"
#include "network_helpers.h"
#include "bpf_qdisc_fifo.skel.h"
#include "bpf_qdisc_fq.skel.h"
+#include "bpf_qdisc_prio.skel.h"
struct crndstate {
u32 last;
@@ -65,7 +67,7 @@ static void *server(void *arg)
return NULL;
}
-static void do_test(char *qdisc)
+static void do_test(char *qdisc, int (*setup)(void))
{
DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
.attach_point = BPF_TC_QDISC,
@@ -87,6 +89,12 @@ static void do_test(char *qdisc)
if (!ASSERT_OK(err, "attach qdisc"))
return;
+ if (setup) {
+ err = setup();
+ if (!ASSERT_OK(err, "setup qdisc"))
+ return;
+ }
+
lfd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
if (!ASSERT_NEQ(lfd, -1, "socket")) {
bpf_tc_hook_destroy(&hook);
@@ -156,7 +164,7 @@ static void test_fifo(void)
return;
}
- do_test("bpf_fifo");
+ do_test("bpf_fifo", NULL);
bpf_link__destroy(link);
bpf_qdisc_fifo__destroy(fifo_skel);
@@ -177,7 +185,7 @@ static void test_fq(void)
return;
}
- do_test("bpf_fq");
+ do_test("bpf_fq", NULL);
bpf_link__destroy(link);
bpf_qdisc_fq__destroy(fq_skel);
@@ -198,12 +206,46 @@ static void test_netem(void)
return;
}
- do_test("bpf_netem");
+ do_test("bpf_netem", NULL);
bpf_link__destroy(link);
bpf_qdisc_netem__destroy(netem_skel);
}
+static int setup_prio_bands(void)
+{
+ char cmd[128];
+ int i;
+
+ for (i = 1; i <= 16; i++) {
+ snprintf(cmd, sizeof(cmd), "tc qdisc add dev lo parent 800:%x handle %x0: fq", i, i);
+ if (!ASSERT_OK(system(cmd), cmd))
+ return -1;
+ }
+ return 0;
+}
+
+static void test_prio_qdisc(void)
+{
+ struct bpf_qdisc_prio *prio_skel;
+ struct bpf_link *link;
+
+ prio_skel = bpf_qdisc_prio__open_and_load();
+ if (!ASSERT_OK_PTR(prio_skel, "bpf_qdisc_prio__open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(prio_skel->maps.prio);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
+ bpf_qdisc_prio__destroy(prio_skel);
+ return;
+ }
+
+ do_test("bpf_prio", &setup_prio_bands);
+
+ bpf_link__destroy(link);
+ bpf_qdisc_prio__destroy(prio_skel);
+}
+
void test_bpf_qdisc(void)
{
if (test__start_subtest("fifo"))
@@ -212,4 +254,6 @@ void test_bpf_qdisc(void)
test_fq();
if (test__start_subtest("netem"))
test_netem();
+ if (test__start_subtest("prio"))
+ test_prio_qdisc();
}
new file mode 100644
@@ -0,0 +1,112 @@
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+
+private(B) struct bpf_spin_lock direct_queue_lock;
+private(B) struct bpf_list_head direct_queue __contains_kptr(sk_buff, bpf_list);
+
+unsigned int q_limit = 1000;
+unsigned int q_qlen = 0;
+
+SEC("struct_ops/bpf_prio_enqueue")
+int BPF_PROG(bpf_prio_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+ struct bpf_sk_buff_ptr *to_free)
+{
+ u32 classid = sch->handle | (skb->priority & TC_PRIO_MAX);
+
+ if (bpf_qdisc_find_class(sch, classid))
+ return bpf_qdisc_enqueue(skb, sch, classid, to_free);
+
+ q_qlen++;
+ if (q_qlen > q_limit) {
+ bpf_qdisc_skb_drop(skb, to_free);
+ return NET_XMIT_DROP;
+ }
+
+ bpf_spin_lock(&direct_queue_lock);
+ bpf_list_excl_push_back(&direct_queue, &skb->bpf_list);
+ bpf_spin_unlock(&direct_queue_lock);
+
+ return NET_XMIT_SUCCESS;
+}
+
+SEC("struct_ops/bpf_prio_dequeue")
+struct sk_buff *BPF_PROG(bpf_prio_dequeue, struct Qdisc *sch)
+{
+ struct bpf_list_excl_node *node;
+ struct sk_buff *skb;
+ u32 i, classid;
+
+ bpf_spin_lock(&direct_queue_lock);
+ node = bpf_list_excl_pop_front(&direct_queue);
+ bpf_spin_unlock(&direct_queue_lock);
+ if (!node) {
+ for (i = 0; i <= TC_PRIO_MAX; i++) {
+ classid = sch->handle | i;
+ skb = bpf_qdisc_dequeue(sch, classid);
+ if (skb)
+ return skb;
+ }
+ return NULL;
+ }
+
+ skb = container_of(node, struct sk_buff, bpf_list);
+ bpf_skb_set_dev(skb, sch);
+ q_qlen--;
+
+ return skb;
+}
+
+SEC("struct_ops/bpf_prio_init")
+int BPF_PROG(bpf_prio_init, struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ int i, err;
+
+ for (i = 1; i <= TC_PRIO_MAX + 1; i++) {
+ err = bpf_qdisc_create_child(sch, i, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int reset_direct_queue(u32 index, void *ctx)
+{
+ struct bpf_list_excl_node *node;
+ struct sk_buff *skb;
+
+ bpf_spin_lock(&direct_queue_lock);
+ node = bpf_list_excl_pop_front(&direct_queue);
+ bpf_spin_unlock(&direct_queue_lock);
+
+ if (!node) {
+ return 1;
+ }
+
+ skb = container_of(node, struct sk_buff, bpf_list);
+ bpf_skb_release(skb);
+ return 0;
+}
+
+SEC("struct_ops/bpf_prio_reset")
+void BPF_PROG(bpf_prio_reset, struct Qdisc *sch)
+{
+ bpf_loop(q_qlen, reset_direct_queue, NULL, 0);
+ q_qlen = 0;
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops prio = {
+ .enqueue = (void *)bpf_prio_enqueue,
+ .dequeue = (void *)bpf_prio_dequeue,
+ .init = (void *)bpf_prio_init,
+ .reset = (void *)bpf_prio_reset,
+ .id = "bpf_prio",
+};
+
This test implements a classful qdisc using bpf. The prio qdisc, like its native counterpart, has 16 bands. An skb is classified into a band based on its priority. During dequeue, the band with the lowest priority value are tried first. The bpf prio qdisc populates the classes during initialization with pfifo qdisc, and we later change them to be fq qdiscs. A direct queue using bpf list is provided to make sure the traffic will be always flowing even if qdiscs in all bands are removed. Signed-off-by: Amery Hung <amery.hung@bytedance.com> --- .../selftests/bpf/prog_tests/bpf_qdisc.c | 52 +++++++- .../selftests/bpf/progs/bpf_qdisc_prio.c | 112 ++++++++++++++++++ 2 files changed, 160 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/bpf_qdisc_prio.c