@@ -457,11 +457,14 @@ enum bpf_type_flag {
/* Size is known at compile time. */
MEM_FIXED_SIZE = BIT(10 + BPF_BASE_TYPE_BITS),
+ /* DYNPTR points to sk_buff */
+ DYNPTR_TYPE_SKB = BIT(11 + BPF_BASE_TYPE_BITS),
+
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
-#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF)
+#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB)
/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
@@ -963,6 +966,35 @@ static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func(
return bpf_func(ctx, insnsi);
}
+/* the implementation of the opaque uapi struct bpf_dynptr */
+struct bpf_dynptr_kern {
+ void *data;
+ /* Size represents the number of usable bytes of dynptr data.
+ * If for example the offset is at 4 for a local dynptr whose data is
+ * of type u64, the number of usable bytes is 4.
+ *
+ * The upper 8 bits are reserved. It is as follows:
+ * Bits 0 - 23 = size
+ * Bits 24 - 30 = dynptr type
+ * Bit 31 = whether dynptr is read-only
+ */
+ u32 size;
+ u32 offset;
+} __aligned(8);
+
+enum bpf_dynptr_type {
+ BPF_DYNPTR_TYPE_INVALID,
+ /* Points to memory that is local to the bpf program */
+ BPF_DYNPTR_TYPE_LOCAL,
+ /* Underlying data is a ringbuf record */
+ BPF_DYNPTR_TYPE_RINGBUF,
+ /* Underlying data is a sk_buff */
+ BPF_DYNPTR_TYPE_SKB,
+};
+
+int bpf_dynptr_check_size(u32 size);
+u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr);
+
#ifdef CONFIG_BPF_JIT
int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
@@ -2073,6 +2105,11 @@ static inline bool has_current_bpf_ctx(void)
}
void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog);
+
+void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
+ enum bpf_dynptr_type type, u32 offset, u32 size);
+void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
+void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -2299,6 +2336,19 @@ static inline bool has_current_bpf_ctx(void)
static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog)
{
}
+
+static inline void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
+ enum bpf_dynptr_type type, u32 offset, u32 size)
+{
+}
+
+static inline void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
+{
+}
+
+static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
+{
+}
#endif /* CONFIG_BPF_SYSCALL */
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
@@ -2661,36 +2711,6 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
u32 **bin_buf, u32 num_args);
void bpf_bprintf_cleanup(void);
-/* the implementation of the opaque uapi struct bpf_dynptr */
-struct bpf_dynptr_kern {
- void *data;
- /* Size represents the number of usable bytes of dynptr data.
- * If for example the offset is at 4 for a local dynptr whose data is
- * of type u64, the number of usable bytes is 4.
- *
- * The upper 8 bits are reserved. It is as follows:
- * Bits 0 - 23 = size
- * Bits 24 - 30 = dynptr type
- * Bit 31 = whether dynptr is read-only
- */
- u32 size;
- u32 offset;
-} __aligned(8);
-
-enum bpf_dynptr_type {
- BPF_DYNPTR_TYPE_INVALID,
- /* Points to memory that is local to the bpf program */
- BPF_DYNPTR_TYPE_LOCAL,
- /* Underlying data is a kernel-produced ringbuf record */
- BPF_DYNPTR_TYPE_RINGBUF,
-};
-
-void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
- enum bpf_dynptr_type type, u32 offset, u32 size);
-void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
-int bpf_dynptr_check_size(u32 size);
-u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr);
-
#ifdef CONFIG_BPF_LSM
void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype);
void bpf_cgroup_atype_put(int cgroup_atype);
@@ -1542,4 +1542,22 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind
return XDP_REDIRECT;
}
+#ifdef CONFIG_NET
+int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len);
+int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from,
+ u32 len, u64 flags);
+#else /* CONFIG_NET */
+static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset,
+ void *to, u32 len)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset,
+ const void *from, u32 len, u64 flags)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_NET */
+
#endif /* __LINUX_FILTER_H__ */
@@ -5296,22 +5296,45 @@ union bpf_attr {
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
- * *flags* is currently unused.
+ *
+ * *flags* must be 0 except for skb-type dynptrs.
+ *
+ * For skb-type dynptrs:
+ * * All data slices of the dynptr are automatically
+ * invalidated after **bpf_dynptr_write**\ (). If you wish to
+ * avoid this, please perform the write using direct data slices
+ * instead.
+ *
+ * * For *flags*, please see the flags accepted by
+ * **bpf_skb_store_bytes**\ ().
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
- * is a read-only dynptr or if *flags* is not 0.
+ * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
+ * other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
*
* void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
* Get a pointer to the underlying dynptr data.
*
* *len* must be a statically known value. The returned data slice
- * is invalidated whenever the dynptr is invalidated.
+ * is invalidated whenever the dynptr is invalidated. Please note
+ * that if the dynptr is read-only, then the returned data slice will
+ * be read-only.
+ *
+ * For skb-type dynptrs:
+ * * If *offset* + *len* extends into the skb's paged buffers,
+ * the user should manually pull the skb with **bpf_skb_pull_data**\ ()
+ * and try again.
+ *
+ * * The data slice is automatically invalidated anytime
+ * **bpf_dynptr_write**\ () or a helper call that changes
+ * the underlying packet buffer (eg **bpf_skb_pull_data**\ ())
+ * is called.
* Return
- * Pointer to the underlying dynptr data, NULL if the dynptr is
- * read-only, if the dynptr is invalid, or if the offset and length
- * is out of bounds.
+ * Pointer to the underlying dynptr data, NULL if the dynptr is invalid,
+ * or if the offset and length is out of bounds or in a paged buffer for
+ * skb-type dynptrs.
*
* s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len)
* Description
@@ -5435,6 +5458,19 @@ union bpf_attr {
* **-E2BIG** if user-space has tried to publish a sample which is
* larger than the size of the ring buffer, or which cannot fit
* within a struct bpf_dynptr.
+ *
+ * long bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags, struct bpf_dynptr *ptr)
+ * Description
+ * Get a dynptr to the data in *skb*. *skb* must be the BPF program
+ * context. Depending on program type, the dynptr may be read-only.
+ *
+ * Calls that change the *skb*'s underlying packet buffer
+ * (eg **bpf_skb_pull_data**\ ()) do not invalidate the dynptr, but
+ * they do invalidate any data slices associated with the dynptr.
+ *
+ * *flags* is currently unused, it must be 0 for now.
+ * Return
+ * 0 on success or -EINVAL if flags is not 0.
*/
#define ___BPF_FUNC_MAPPER(FN, ctx...) \
FN(unspec, 0, ##ctx) \
@@ -5647,6 +5683,7 @@ union bpf_attr {
FN(tcp_raw_check_syncookie_ipv6, 207, ##ctx) \
FN(ktime_get_tai_ns, 208, ##ctx) \
FN(user_ringbuf_drain, 209, ##ctx) \
+ FN(dynptr_from_skb, 210, ##ctx) \
/* */
/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
@@ -1403,11 +1403,21 @@ static bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
return ptr->size & DYNPTR_RDONLY_BIT;
}
+void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
+{
+ ptr->size |= DYNPTR_RDONLY_BIT;
+}
+
static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type)
{
ptr->size |= type << DYNPTR_TYPE_SHIFT;
}
+static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr)
+{
+ return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
+}
+
u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr)
{
return ptr->size & DYNPTR_SIZE_MASK;
@@ -1480,6 +1490,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src,
u32, offset, u64, flags)
{
+ enum bpf_dynptr_type type;
int err;
if (!src->data || flags)
@@ -1489,9 +1500,19 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src
if (err)
return err;
- memcpy(dst, src->data + src->offset + offset, len);
+ type = bpf_dynptr_get_type(src);
- return 0;
+ switch (type) {
+ case BPF_DYNPTR_TYPE_LOCAL:
+ case BPF_DYNPTR_TYPE_RINGBUF:
+ memcpy(dst, src->data + src->offset + offset, len);
+ return 0;
+ case BPF_DYNPTR_TYPE_SKB:
+ return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len);
+ default:
+ WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
+ return -EFAULT;
+ }
}
static const struct bpf_func_proto bpf_dynptr_read_proto = {
@@ -1508,18 +1529,32 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
u32, len, u64, flags)
{
+ enum bpf_dynptr_type type;
int err;
- if (!dst->data || flags || bpf_dynptr_is_rdonly(dst))
+ if (!dst->data || bpf_dynptr_is_rdonly(dst))
return -EINVAL;
err = bpf_dynptr_check_off_len(dst, offset, len);
if (err)
return err;
- memcpy(dst->data + dst->offset + offset, src, len);
+ type = bpf_dynptr_get_type(dst);
- return 0;
+ switch (type) {
+ case BPF_DYNPTR_TYPE_LOCAL:
+ case BPF_DYNPTR_TYPE_RINGBUF:
+ if (flags)
+ return -EINVAL;
+ memcpy(dst->data + dst->offset + offset, src, len);
+ return 0;
+ case BPF_DYNPTR_TYPE_SKB:
+ return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len,
+ flags);
+ default:
+ WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
+ return -EFAULT;
+ }
}
static const struct bpf_func_proto bpf_dynptr_write_proto = {
@@ -1535,6 +1570,8 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = {
BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
{
+ enum bpf_dynptr_type type;
+ void *data;
int err;
if (!ptr->data)
@@ -1544,10 +1581,36 @@ BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len
if (err)
return 0;
- if (bpf_dynptr_is_rdonly(ptr))
- return 0;
+ type = bpf_dynptr_get_type(ptr);
+
+ switch (type) {
+ case BPF_DYNPTR_TYPE_LOCAL:
+ case BPF_DYNPTR_TYPE_RINGBUF:
+ if (bpf_dynptr_is_rdonly(ptr))
+ return 0;
+
+ data = ptr->data;
+ break;
+ case BPF_DYNPTR_TYPE_SKB:
+ {
+ struct sk_buff *skb = ptr->data;
+
+ /* if the data is paged, the caller needs to pull it first */
+ if (ptr->offset + offset + len > skb_headlen(skb))
+ return 0;
- return (unsigned long)(ptr->data + ptr->offset + offset);
+ /* Depending on the prog type, the data slice will be either
+ * read-writable or read-only. The verifier will enforce that
+ * any writes to read-only data slices are rejected
+ */
+ data = skb->data;
+ break;
+ }
+ default:
+ WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type);
+ return 0;
+ }
+ return (unsigned long)(data + ptr->offset + offset);
}
static const struct bpf_func_proto bpf_dynptr_data_proto = {
@@ -687,6 +687,8 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
return BPF_DYNPTR_TYPE_LOCAL;
case DYNPTR_TYPE_RINGBUF:
return BPF_DYNPTR_TYPE_RINGBUF;
+ case DYNPTR_TYPE_SKB:
+ return BPF_DYNPTR_TYPE_SKB;
default:
return BPF_DYNPTR_TYPE_INVALID;
}
@@ -1422,6 +1424,12 @@ static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
reg->type == PTR_TO_PACKET_END;
}
+static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
+{
+ return base_type(reg->type) == PTR_TO_MEM &&
+ reg->type & DYNPTR_TYPE_SKB;
+}
+
/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
enum bpf_reg_type which)
@@ -5881,12 +5889,29 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
return __check_ptr_off_reg(env, reg, regno, fixed_off_ok);
}
-static u32 stack_slot_get_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+static struct bpf_reg_state *get_dynptr_arg_reg(const struct bpf_func_proto *fn,
+ struct bpf_reg_state *regs)
+{
+ int i;
+
+ for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
+ if (arg_type_is_dynptr(fn->arg_type[i]))
+ return ®s[BPF_REG_1 + i];
+
+ return NULL;
+}
+
+static enum bpf_dynptr_type stack_slot_get_dynptr_info(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg,
+ int *ref_obj_id)
{
struct bpf_func_state *state = func(env, reg);
int spi = get_spi(reg->off);
- return state->stack[spi].spilled_ptr.id;
+ if (ref_obj_id)
+ *ref_obj_id = state->stack[spi].spilled_ptr.id;
+
+ return state->stack[spi].spilled_ptr.dynptr.type;
}
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
@@ -6123,6 +6148,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
case DYNPTR_TYPE_RINGBUF:
err_extra = "ringbuf";
break;
+ case DYNPTR_TYPE_SKB:
+ err_extra = "skb ";
+ break;
default:
err_extra = "<unknown>";
break;
@@ -6565,6 +6593,9 @@ static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
* are now invalid, so turn them into unknown SCALAR_VALUE.
+ *
+ * This also applies to dynptr slices belonging to skb dynptrs,
+ * since these slices point to packet data.
*/
static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
{
@@ -6572,7 +6603,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
struct bpf_reg_state *reg;
bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
- if (reg_is_pkt_pointer_any(reg))
+ if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
__mark_reg_unknown(env, reg);
}));
}
@@ -7233,6 +7264,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
int *insn_idx_p)
{
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
+ enum bpf_dynptr_type dynptr_type = BPF_DYNPTR_TYPE_INVALID;
const struct bpf_func_proto *fn = NULL;
enum bpf_return_type ret_type;
enum bpf_type_flag ret_flag;
@@ -7406,28 +7438,44 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
}
break;
case BPF_FUNC_dynptr_data:
- for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
- if (arg_type_is_dynptr(fn->arg_type[i])) {
- struct bpf_reg_state *reg = ®s[BPF_REG_1 + i];
+ {
+ struct bpf_reg_state *reg;
- if (meta.ref_obj_id) {
- verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
- return -EFAULT;
- }
+ reg = get_dynptr_arg_reg(fn, regs);
+ if (!reg) {
+ verbose(env, "verifier internal error: no dynptr in bpf_dynptr_data()\n");
+ return -EFAULT;
+ }
- if (base_type(reg->type) != PTR_TO_DYNPTR)
- /* Find the id of the dynptr we're
- * tracking the reference of
- */
- meta.ref_obj_id = stack_slot_get_id(env, reg);
- break;
- }
+ if (base_type(reg->type) == PTR_TO_DYNPTR)
+ break;
+
+ if (meta.ref_obj_id) {
+ verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
+ return -EFAULT;
}
- if (i == MAX_BPF_FUNC_REG_ARGS) {
- verbose(env, "verifier internal error: no dynptr in bpf_dynptr_data()\n");
+
+ dynptr_type = stack_slot_get_dynptr_info(env, reg, &meta.ref_obj_id);
+ break;
+ }
+ case BPF_FUNC_dynptr_write:
+ {
+ struct bpf_reg_state *reg;
+
+ reg = get_dynptr_arg_reg(fn, regs);
+ if (!reg) {
+ verbose(env, "verifier internal error: no dynptr in bpf_dynptr_write()\n");
return -EFAULT;
}
+
+ /* bpf_dynptr_write() for skb-type dynptrs may pull the skb, so we must
+ * invalidate all data slices associated with it
+ */
+ if (stack_slot_get_dynptr_info(env, reg, NULL) == BPF_DYNPTR_TYPE_SKB)
+ changes_data = true;
+
break;
+ }
case BPF_FUNC_user_ringbuf_drain:
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
set_user_ringbuf_callback_state);
@@ -7494,6 +7542,28 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
regs[BPF_REG_0].mem_size = meta.mem_size;
+ if (func_id == BPF_FUNC_dynptr_data &&
+ dynptr_type == BPF_DYNPTR_TYPE_SKB) {
+ bool seen_direct_write = env->seen_direct_write;
+
+ regs[BPF_REG_0].type |= DYNPTR_TYPE_SKB;
+ if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE))
+ regs[BPF_REG_0].type |= MEM_RDONLY;
+ else
+ /*
+ * Calling may_access_direct_pkt_data() will set
+ * env->seen_direct_write to true if the skb is
+ * writable. As an optimization, we can ignore
+ * setting env->seen_direct_write.
+ *
+ * env->seen_direct_write is used by skb
+ * programs to determine whether the skb's page
+ * buffers should be cloned. Since data slice
+ * writes would only be to the head, we can skip
+ * this.
+ */
+ env->seen_direct_write = seen_direct_write;
+ }
break;
case RET_PTR_TO_MEM_OR_BTF_ID:
{
@@ -1683,8 +1683,8 @@ static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
}
-BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
- const void *, from, u32, len, u64, flags)
+int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from,
+ u32 len, u64 flags)
{
void *ptr;
@@ -1709,6 +1709,12 @@ BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
return 0;
}
+BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
+ const void *, from, u32, len, u64, flags)
+{
+ return __bpf_skb_store_bytes(skb, offset, from, len, flags);
+}
+
static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
.func = bpf_skb_store_bytes,
.gpl_only = false,
@@ -1720,8 +1726,7 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
.arg5_type = ARG_ANYTHING,
};
-BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
- void *, to, u32, len)
+int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
{
void *ptr;
@@ -1740,6 +1745,12 @@ BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
return -EFAULT;
}
+BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
+ void *, to, u32, len)
+{
+ return __bpf_skb_load_bytes(skb, offset, to, len);
+}
+
static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
.func = bpf_skb_load_bytes,
.gpl_only = false,
@@ -1851,6 +1862,52 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
.arg2_type = ARG_ANYTHING,
};
+BPF_CALL_3(bpf_dynptr_from_skb_rdwr, struct sk_buff *, skb, u64, flags,
+ struct bpf_dynptr_kern *, ptr)
+{
+ if (flags) {
+ bpf_dynptr_set_null(ptr);
+ return -EINVAL;
+ }
+
+ bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len);
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_dynptr_from_skb_rdwr_proto = {
+ .func = bpf_dynptr_from_skb_rdwr,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_SKB | MEM_UNINIT,
+};
+
+BPF_CALL_3(bpf_dynptr_from_skb_rdonly, struct sk_buff *, skb, u64, flags,
+ struct bpf_dynptr_kern *, ptr)
+{
+ if (flags) {
+ bpf_dynptr_set_null(ptr);
+ return -EINVAL;
+ }
+
+ bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len);
+
+ bpf_dynptr_set_rdonly(ptr);
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_dynptr_from_skb_rdonly_proto = {
+ .func = bpf_dynptr_from_skb_rdonly,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_SKB | MEM_UNINIT,
+};
+
BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
{
return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
@@ -7734,6 +7791,8 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_socket_uid_proto;
case BPF_FUNC_perf_event_output:
return &bpf_skb_event_output_proto;
+ case BPF_FUNC_dynptr_from_skb:
+ return &bpf_dynptr_from_skb_rdonly_proto;
default:
return bpf_sk_base_func_proto(func_id);
}
@@ -7921,6 +7980,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_raw_check_syncookie_ipv6_proto;
#endif
#endif
+ case BPF_FUNC_dynptr_from_skb:
+ return &bpf_dynptr_from_skb_rdwr_proto;
default:
return bpf_sk_base_func_proto(func_id);
}
@@ -8120,6 +8181,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skc_lookup_tcp:
return &bpf_skc_lookup_tcp_proto;
#endif
+ case BPF_FUNC_dynptr_from_skb:
+ return &bpf_dynptr_from_skb_rdwr_proto;
default:
return bpf_sk_base_func_proto(func_id);
}
@@ -8158,6 +8221,8 @@ lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_smp_processor_id_proto;
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
+ case BPF_FUNC_dynptr_from_skb:
+ return &bpf_dynptr_from_skb_rdonly_proto;
default:
return bpf_sk_base_func_proto(func_id);
}
@@ -5296,22 +5296,45 @@ union bpf_attr {
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
- * *flags* is currently unused.
+ *
+ * *flags* must be 0 except for skb-type dynptrs.
+ *
+ * For skb-type dynptrs:
+ * * All data slices of the dynptr are automatically
+ * invalidated after **bpf_dynptr_write**\ (). If you wish to
+ * avoid this, please perform the write using direct data slices
+ * instead.
+ *
+ * * For *flags*, please see the flags accepted by
+ * **bpf_skb_store_bytes**\ ().
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
- * is a read-only dynptr or if *flags* is not 0.
+ * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
+ * other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
*
* void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
* Get a pointer to the underlying dynptr data.
*
* *len* must be a statically known value. The returned data slice
- * is invalidated whenever the dynptr is invalidated.
+ * is invalidated whenever the dynptr is invalidated. Please note
+ * that if the dynptr is read-only, then the returned data slice will
+ * be read-only.
+ *
+ * For skb-type dynptrs:
+ * * If *offset* + *len* extends into the skb's paged buffers,
+ * the user should manually pull the skb with **bpf_skb_pull_data**\ ()
+ * and try again.
+ *
+ * * The data slice is automatically invalidated anytime
+ * **bpf_dynptr_write**\ () or a helper call that changes
+ * the underlying packet buffer (eg **bpf_skb_pull_data**\ ())
+ * is called.
* Return
- * Pointer to the underlying dynptr data, NULL if the dynptr is
- * read-only, if the dynptr is invalid, or if the offset and length
- * is out of bounds.
+ * Pointer to the underlying dynptr data, NULL if the dynptr is invalid,
+ * or if the offset and length is out of bounds or in a paged buffer for
+ * skb-type dynptrs.
*
* s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len)
* Description
@@ -5435,6 +5458,19 @@ union bpf_attr {
* **-E2BIG** if user-space has tried to publish a sample which is
* larger than the size of the ring buffer, or which cannot fit
* within a struct bpf_dynptr.
+ *
+ * long bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags, struct bpf_dynptr *ptr)
+ * Description
+ * Get a dynptr to the data in *skb*. *skb* must be the BPF program
+ * context. Depending on program type, the dynptr may be read-only.
+ *
+ * Calls that change the *skb*'s underlying packet buffer
+ * (eg **bpf_skb_pull_data**\ ()) do not invalidate the dynptr, but
+ * they do invalidate any data slices associated with the dynptr.
+ *
+ * *flags* is currently unused, it must be 0 for now.
+ * Return
+ * 0 on success or -EINVAL if flags is not 0.
*/
#define ___BPF_FUNC_MAPPER(FN, ctx...) \
FN(unspec, 0, ##ctx) \
@@ -5647,6 +5683,7 @@ union bpf_attr {
FN(tcp_raw_check_syncookie_ipv6, 207, ##ctx) \
FN(ktime_get_tai_ns, 208, ##ctx) \
FN(user_ringbuf_drain, 209, ##ctx) \
+ FN(dynptr_from_skb, 210, ##ctx) \
/* */
/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
Add skb dynptrs, which are dynptrs whose underlying pointer points to a skb. The dynptr acts on skb data. skb dynptrs have two main benefits. One is that they allow operations on sizes that are not statically known at compile-time (eg variable-sized accesses). Another is that parsing the packet data through dynptrs (instead of through direct access of skb->data and skb->data_end) can be more ergonomic and less brittle (eg does not need manual if checking for being within bounds of data_end). For bpf prog types that don't support writes on skb data, the dynptr is read-only (bpf_dynptr_write() will return an error and bpf_dynptr_data() will return a data slice that is read-only where any writes to it will be rejected by the verifier). For reads and writes through the bpf_dynptr_read() and bpf_dynptr_write() interfaces, reading and writing from/to data in the head as well as from/to non-linear paged buffers is supported. For data slices (through the bpf_dynptr_data() interface), if the data is in a paged buffer, the user must first call bpf_skb_pull_data() to pull the data into the linear portion. Any bpf_dynptr_write() automatically invalidates any prior data slices to the skb dynptr. This is because a bpf_dynptr_write() may be writing to data in a paged buffer, so it will need to pull the buffer first into the head. The reason it needs to be pulled instead of writing directly to the paged buffers is because they may be cloned (only the head of the skb is by default uncloned). As such, any bpf_dynptr_write() will automatically have its prior data slices invalidated, even if the write is to data in the skb head (the verifier has no way of differentiating whether the write is to the head or paged buffers during program load time). Please note as well that any other helper calls that change the underlying packet buffer (eg bpf_skb_pull_data()) invalidates any data slices of the skb dynptr as well. The stack trace for this is check_helper_call() -> clear_all_pkt_pointers() -> __clear_all_pkt_pointers() -> mark_reg_unknown(). For examples of how skb dynptrs can be used, please see the attached selftests. Signed-off-by: Joanne Koong <joannelkoong@gmail.com> --- include/linux/bpf.h | 82 +++++++++++++++---------- include/linux/filter.h | 18 ++++++ include/uapi/linux/bpf.h | 49 +++++++++++++-- kernel/bpf/helpers.c | 79 +++++++++++++++++++++--- kernel/bpf/verifier.c | 108 +++++++++++++++++++++++++++------ net/core/filter.c | 73 ++++++++++++++++++++-- tools/include/uapi/linux/bpf.h | 49 +++++++++++++-- 7 files changed, 384 insertions(+), 74 deletions(-)