@@ -1864,6 +1864,8 @@ int array_map_alloc_check(union bpf_attr *attr);
int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
+int bpf_prog_test_run_dequeue(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr);
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
int bpf_prog_test_run_tracing(struct bpf_prog *prog,
@@ -2107,6 +2109,13 @@ static inline int bpf_prog_test_run_xdp(struct bpf_prog *prog,
return -ENOTSUPP;
}
+static inline int bpf_prog_test_run_dequeue(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ return -ENOTSUPP;
+}
+
static inline int bpf_prog_test_run_skb(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr)
@@ -10,6 +10,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act,
struct __sk_buff, struct sk_buff)
BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp,
struct xdp_md, struct xdp_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_DEQUEUE, dequeue,
+ struct dequeue_ctx, struct dequeue_data)
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb,
struct __sk_buff, struct sk_buff)
@@ -85,6 +85,10 @@ struct xdp_buff {
u32 flags; /* supported values defined in xdp_buff_flags */
};
+struct dequeue_data {
+ struct xdp_txq_info *txq;
+};
+
static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp)
{
return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS);
@@ -954,6 +954,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LSM,
BPF_PROG_TYPE_SK_LOOKUP,
BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
+ BPF_PROG_TYPE_DEQUEUE,
};
enum bpf_attach_type {
@@ -5961,6 +5962,10 @@ struct xdp_md {
__u32 egress_ifindex; /* txq->dev->ifindex */
};
+struct dequeue_ctx {
+ __u32 egress_ifindex;
+};
+
/* DEVMAP map-value layout
*
* The struct data-layout of map-value is a configuration interface.
@@ -2370,6 +2370,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
default:
return -EINVAL;
}
+ case BPF_PROG_TYPE_DEQUEUE:
case BPF_PROG_TYPE_SYSCALL:
case BPF_PROG_TYPE_EXT:
if (expected_attach_type)
@@ -1390,6 +1390,39 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
return ret;
}
+int bpf_prog_test_run_dequeue(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ struct xdp_txq_info txq = { .dev = current->nsproxy->net_ns->loopback_dev };
+ u32 repeat = kattr->test.repeat, duration, size;
+ struct dequeue_data ctx = { .txq = &txq };
+ struct xdp_buff xdp = {};
+ struct xdp_frame *pkt;
+ int ret = -EINVAL;
+ u64 retval;
+
+ if (prog->expected_attach_type)
+ return -EINVAL;
+
+ if (kattr->test.data_in || kattr->test.data_size_in ||
+ kattr->test.ctx_in || kattr->test.ctx_out || repeat > 1)
+ return -EINVAL;
+
+ ret = bpf_test_run(prog, &ctx, repeat, &retval, &duration, false);
+ if (ret)
+ return ret;
+ if (!retval)
+ return bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
+
+ pkt = (void *)(unsigned long)retval;
+ xdp_convert_frame_to_buff(pkt, &xdp);
+ size = xdp.data_end - xdp.data_meta;
+ /* We set retval == 1 if pkt != NULL, otherwise 0 */
+ ret = bpf_test_finish(kattr, uattr, xdp.data_meta, NULL, size, !!retval, duration);
+ xdp_return_frame(pkt);
+ return ret;
+}
+
static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx)
{
/* make sure the fields we don't use are zeroed */
@@ -8062,6 +8062,12 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
}
+static const struct bpf_func_proto *
+dequeue_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ return bpf_base_func_proto(func_id);
+}
+
const struct bpf_func_proto bpf_sock_map_update_proto __weak;
const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
@@ -8776,6 +8782,20 @@ void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog,
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
+static bool dequeue_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (type == BPF_WRITE)
+ return false;
+ switch (off) {
+ case offsetof(struct dequeue_ctx, egress_ifindex):
+ return true;
+ }
+ return false;
+}
+
static bool sock_addr_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
@@ -9835,6 +9855,28 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
+static u32 dequeue_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct dequeue_ctx, egress_ifindex):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct dequeue_data, txq),
+ si->dst_reg, si->src_reg,
+ offsetof(struct dequeue_data, txq));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_txq_info, dev),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct xdp_txq_info, dev));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct net_device, ifindex));
+ break;
+ }
+ return insn - insn_buf;
+}
+
/* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
* context Structure, F is Field in context structure that contains a pointer
* to Nested Structure of type NS that has the field NF.
@@ -10687,6 +10729,17 @@ const struct bpf_prog_ops xdp_prog_ops = {
.test_run = bpf_prog_test_run_xdp,
};
+const struct bpf_verifier_ops dequeue_verifier_ops = {
+ .get_func_proto = dequeue_func_proto,
+ .is_valid_access = dequeue_is_valid_access,
+ .convert_ctx_access = dequeue_convert_ctx_access,
+ .gen_prologue = bpf_noop_prologue,
+};
+
+const struct bpf_prog_ops dequeue_prog_ops = {
+ .test_run = bpf_prog_test_run_dequeue,
+};
+
const struct bpf_verifier_ops cg_skb_verifier_ops = {
.get_func_proto = cg_skb_func_proto,
.is_valid_access = cg_skb_is_valid_access,
@@ -954,6 +954,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LSM,
BPF_PROG_TYPE_SK_LOOKUP,
BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
+ BPF_PROG_TYPE_DEQUEUE,
};
enum bpf_attach_type {
@@ -5961,6 +5962,10 @@ struct xdp_md {
__u32 egress_ifindex; /* txq->dev->ifindex */
};
+struct dequeue_ctx {
+ __u32 egress_ifindex;
+};
+
/* DEVMAP map-value layout
*
* The struct data-layout of map-value is a configuration interface.
Add a new BPF_PROG_TYPE_DEQUEUE, which will be executed by a new device hook to retrieve queued packets for transmission. The API of the dequeue program is simple: it takes a context object containing as its sole member the ifindex of the device it is being executed on. The program can return a pointer to a packet, or NULL to indicate it has nothing to transmit at this time. Packet pointers are obtained by dequeueing them from a PIFO map (using a helper added in a subsequent commit). This commit adds dequeue program type and the ability to run it using the bpf_prog_run() syscall (returning the dequeued packet to userspace); a subsequent commit introduces the network stack hook to attach and execute dequeue programs. Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> --- include/linux/bpf.h | 9 ++++++ include/linux/bpf_types.h | 2 ++ include/net/xdp.h | 4 +++ include/uapi/linux/bpf.h | 5 ++++ kernel/bpf/syscall.c | 1 + net/bpf/test_run.c | 33 +++++++++++++++++++++ net/core/filter.c | 53 ++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 5 ++++ 8 files changed, 112 insertions(+)