diff mbox series

[v18,bpf-next,20/23] net: xdp: introduce bpf_xdp_pointer utility routine

Message ID ce5ad30af8f9b4d2b8128e7488818449a5c0d833.1637013639.git.lorenzo@kernel.org (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series mvneta: introduce XDP multi-buffer support | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count fail Series longer than 15 patches (and no cover letter)
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 12481 this patch: 12481
netdev/cc_maintainers warning 7 maintainers not CCed: kafai@fb.com joe@cilium.io andrii@kernel.org yhs@fb.com songliubraving@fb.com hawk@kernel.org kpsingh@kernel.org
netdev/build_clang success Errors and warnings before: 2106 this patch: 2106
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 11647 this patch: 11647
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next fail VM_Test

Commit Message

Lorenzo Bianconi Nov. 15, 2021, 10:33 p.m. UTC
Similar to skb_header_pointer, introduce bpf_xdp_pointer utility routine
to return a pointer to a given position in the xdp_buff if the requested
area (offset + len) is contained in a contiguous memory area otherwise it
will be copied in a bounce buffer provided by the caller.
Similar to the tc counterpart, introduce the two following xdp helpers:
- bpf_xdp_load_bytes
- bpf_xdp_store_bytes

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 include/uapi/linux/bpf.h       |  18 +++++
 net/core/filter.c              | 129 +++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  18 +++++
 3 files changed, 165 insertions(+)

Comments

Jakub Kicinski Nov. 16, 2021, 3:13 p.m. UTC | #1
On Mon, 15 Nov 2021 23:33:14 +0100 Lorenzo Bianconi wrote:
> +	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
> +	u32 headsize = xdp->data_end - xdp->data;
> +	u32 count = 0, frame_offset = headsize;
> +	int i;
> +
> +	if (offset < headsize) {
> +		int size = min_t(int, headsize - offset, len);
> +		void *src = flush ? buf : xdp->data + offset;
> +		void *dst = flush ? xdp->data + offset : buf;
> +
> +		memcpy(dst, src, size);
> +		count = size;
> +		offset = 0;
> +	}

is this missing
	else
		offset -= headsize;
?

I'm struggling to understand this. Say
	headsize = 400
	frag[0].size = 200

	offset = 500
	len = 50

we enter the loop having missed the previous if...

> +	for (i = 0; i < sinfo->nr_frags; i++) {
> +		skb_frag_t *frag = &sinfo->frags[i];
> +		u32 frag_size = skb_frag_size(frag);
> +
> +		if (count >= len)
> +			break;
> +
> +		if (offset < frame_offset + frag_size) {

		500 < 400 + 200 => true

> +			int size = min_t(int, frag_size - offset, len - count);

			size = min(200 - 500, 50 - 0)
			size = -300 ??

> +			void *addr = skb_frag_address(frag);
> +			void *src = flush ? buf + count : addr + offset;
> +			void *dst = flush ? addr + offset : buf + count;
> +
> +			memcpy(dst, src, size);
> +			count += size;
> +			offset = 0;
> +		}
> +		frame_offset += frag_size;
Lorenzo Bianconi Nov. 17, 2021, 12:12 a.m. UTC | #2
> On Mon, 15 Nov 2021 23:33:14 +0100 Lorenzo Bianconi wrote:
> > +	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
> > +	u32 headsize = xdp->data_end - xdp->data;
> > +	u32 count = 0, frame_offset = headsize;
> > +	int i;
> > +
> > +	if (offset < headsize) {
> > +		int size = min_t(int, headsize - offset, len);
> > +		void *src = flush ? buf : xdp->data + offset;
> > +		void *dst = flush ? xdp->data + offset : buf;
> > +
> > +		memcpy(dst, src, size);
> > +		count = size;
> > +		offset = 0;
> > +	}
> 
> is this missing
> 	else
> 		offset -= headsize;
> ?
> 
> I'm struggling to understand this. Say
> 	headsize = 400
> 	frag[0].size = 200
> 
> 	offset = 500
> 	len = 50
> 
> we enter the loop having missed the previous if...
> 
> > +	for (i = 0; i < sinfo->nr_frags; i++) {
> > +		skb_frag_t *frag = &sinfo->frags[i];
> > +		u32 frag_size = skb_frag_size(frag);
> > +
> > +		if (count >= len)
> > +			break;
> > +
> > +		if (offset < frame_offset + frag_size) {
> 
> 		500 < 400 + 200 => true
> 
> > +			int size = min_t(int, frag_size - offset, len - count);
> 
> 			size = min(200 - 500, 50 - 0)
> 			size = -300 ??

ack, you are right. Sorry for the issue.
I did not trigger the problem with xdp-mb self-tests since we will not run
bpf_xdp_copy_buf() in this specific case, but just the memcpy()
(but what you reported is a bug and must be fixed). I will add more
self-tests.
Moreover, reviewing the code I guess we can just update bpf_xdp_copy() for our case.
Something like:

static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
			     void *buf, unsigned long len, bool flush)
{
	unsigned long ptr_len, ptr_off = 0;
	skb_frag_t *next_frag, *end_frag;
	struct skb_shared_info *sinfo;
	void *src, *dst;
	u8 *ptr_buf;

	if (likely(xdp->data_end - xdp->data >= off + len)) {
		src = flush ? buf : xdp->data + off;
		dst = flush ? xdp->data + off : buf;
		memcpy(dst, src, len);
		return;
	}

	sinfo = xdp_get_shared_info_from_buff(xdp);
	end_frag = &sinfo->frags[sinfo->nr_frags];
	next_frag = &sinfo->frags[0];

	ptr_len = xdp->data_end - xdp->data;
	ptr_buf = xdp->data;

	while (true) {
		if (off < ptr_off + ptr_len) {
			unsigned long copy_off = off - ptr_off;
			unsigned long copy_len = min(len, ptr_len - copy_off);

			src = flush ? buf : ptr_buf + copy_off;
			dst = flush ? ptr_buf + copy_off : buf;
			memcpy(dst, src, copy_len);

			off += copy_len;
			len -= copy_len;
			buf += copy_len;
		}

		if (!len || next_frag == end_frag)
			break;

		ptr_off += ptr_len;
		ptr_buf = skb_frag_address(next_frag);
		ptr_len = skb_frag_size(next_frag);
		next_frag++;
	}
}

...

static unsigned long bpf_xdp_copy(void *dst, const void *ctx,
				  unsigned long off, unsigned long len)
{
	struct xdp_buff *xdp = (struct xdp_buff *)ctx;

	bpf_xdp_copy_buf(xdp, off, dst, len, false);
	return 0;
}

What do you think?

Regards,
Lorenzo


> 
> > +			void *addr = skb_frag_address(frag);
> > +			void *src = flush ? buf + count : addr + offset;
> > +			void *dst = flush ? addr + offset : buf + count;
> > +
> > +			memcpy(dst, src, size);
> > +			count += size;
> > +			offset = 0;
> > +		}
> > +		frame_offset += frag_size;
Jakub Kicinski Nov. 17, 2021, 12:31 a.m. UTC | #3
On Wed, 17 Nov 2021 01:12:41 +0100 Lorenzo Bianconi wrote:
> ack, you are right. Sorry for the issue.
> I did not trigger the problem with xdp-mb self-tests since we will not run
> bpf_xdp_copy_buf() in this specific case, but just the memcpy()
> (but what you reported is a bug and must be fixed). I will add more
> self-tests.
> Moreover, reviewing the code I guess we can just update bpf_xdp_copy() for our case.
> Something like:

Seems reasonable.  We could probably play some tricks with double
pointers to avoid the ternary operator being re-evaluated for each
chunk. But even if it's faster it is probably not worth the ugliness
of the code.
Lorenzo Bianconi Nov. 17, 2021, 11:12 a.m. UTC | #4
> On Wed, 17 Nov 2021 01:12:41 +0100 Lorenzo Bianconi wrote:
> > ack, you are right. Sorry for the issue.
> > I did not trigger the problem with xdp-mb self-tests since we will not run
> > bpf_xdp_copy_buf() in this specific case, but just the memcpy()
> > (but what you reported is a bug and must be fixed). I will add more
> > self-tests.
> > Moreover, reviewing the code I guess we can just update bpf_xdp_copy() for our case.
> > Something like:
> 
> Seems reasonable.  We could probably play some tricks with double
> pointers to avoid the ternary operator being re-evaluated for each
> chunk. But even if it's faster it is probably not worth the ugliness
> of the code.

ack, moreover I guess the slowest operation here is the mempcy().
I added a new self-test to cover the case where buf is across frag0
and frag1.
I will wait for some more feedbacks and then I will post a new version.
Thanks.

Regards,
Lorenzo
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 556621a3f7b4..c20e9534fab5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4968,6 +4968,22 @@  union bpf_attr {
  *		Get the total size of a given xdp buff (linear and paged area)
  *	Return
  *		The total size of a given xdp buffer.
+ *
+ * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *	Description
+ *		This helper is provided as an easy way to load data from a
+ *		xdp buffer. It can be used to load *len* bytes from *offset* from
+ *		the frame associated to *xdp_md*, into the buffer pointed by
+ *		*buf*.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *	Description
+ *		Store *len* bytes from buffer *buf* into the frame
+ *		associated to *xdp_md*, at *offset*.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5152,6 +5168,8 @@  union bpf_attr {
 	FN(kallsyms_lookup_name),	\
 	FN(find_vma),			\
 	FN(xdp_get_buff_len),		\
+	FN(xdp_load_bytes),		\
+	FN(xdp_store_bytes),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/net/core/filter.c b/net/core/filter.c
index 975393300535..21a2c47809b5 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3840,6 +3840,131 @@  static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+static void bpf_xdp_copy_buf(struct xdp_buff *xdp, u32 offset,
+			     u32 len, void *buf, bool flush)
+{
+	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+	u32 headsize = xdp->data_end - xdp->data;
+	u32 count = 0, frame_offset = headsize;
+	int i;
+
+	if (offset < headsize) {
+		int size = min_t(int, headsize - offset, len);
+		void *src = flush ? buf : xdp->data + offset;
+		void *dst = flush ? xdp->data + offset : buf;
+
+		memcpy(dst, src, size);
+		count = size;
+		offset = 0;
+	}
+
+	for (i = 0; i < sinfo->nr_frags; i++) {
+		skb_frag_t *frag = &sinfo->frags[i];
+		u32 frag_size = skb_frag_size(frag);
+
+		if (count >= len)
+			break;
+
+		if (offset < frame_offset + frag_size) {
+			int size = min_t(int, frag_size - offset, len - count);
+			void *addr = skb_frag_address(frag);
+			void *src = flush ? buf + count : addr + offset;
+			void *dst = flush ? addr + offset : buf + count;
+
+			memcpy(dst, src, size);
+			count += size;
+			offset = 0;
+		}
+		frame_offset += frag_size;
+	}
+}
+
+static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset,
+			     u32 len, void *buf)
+{
+	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+	u32 size = xdp->data_end - xdp->data;
+	void *addr = xdp->data;
+	int i;
+
+	if (unlikely(offset > 0xffff || len > 0xffff))
+		return ERR_PTR(-EFAULT);
+
+	if (offset + len > xdp_get_buff_len(xdp))
+		return ERR_PTR(-EINVAL);
+
+	if (offset < size) /* linear area */
+		goto out;
+
+	offset -= size;
+	for (i = 0; i < sinfo->nr_frags; i++) { /* paged area */
+		u32 frag_size = skb_frag_size(&sinfo->frags[i]);
+
+		if  (offset < frag_size) {
+			addr = skb_frag_address(&sinfo->frags[i]);
+			size = frag_size;
+			break;
+		}
+		offset -= frag_size;
+	}
+out:
+	return offset + len < size ? addr + offset : NULL;
+}
+
+BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset,
+	   void *, buf, u32, len)
+{
+	void *ptr;
+
+	ptr = bpf_xdp_pointer(xdp, offset, len, buf);
+	if (IS_ERR(ptr))
+		return PTR_ERR(ptr);
+
+	if (!ptr)
+		bpf_xdp_copy_buf(xdp, offset, len, buf, false);
+	else
+		memcpy(buf, ptr, len);
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
+	.func		= bpf_xdp_load_bytes,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg4_type	= ARG_CONST_SIZE,
+};
+
+BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
+	   void *, buf, u32, len)
+{
+	void *ptr;
+
+	ptr = bpf_xdp_pointer(xdp, offset, len, NULL);
+	if (IS_ERR(ptr))
+		return PTR_ERR(ptr);
+
+	if (!ptr)
+		bpf_xdp_copy_buf(xdp, offset, len, buf, true);
+	else
+		memcpy(ptr, buf, len);
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
+	.func		= bpf_xdp_store_bytes,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg4_type	= ARG_CONST_SIZE,
+};
+
 static int bpf_xdp_mb_increase_tail(struct xdp_buff *xdp, int offset)
 {
 	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
@@ -7600,6 +7725,10 @@  xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_adjust_tail_proto;
 	case BPF_FUNC_xdp_get_buff_len:
 		return &bpf_xdp_get_buff_len_proto;
+	case BPF_FUNC_xdp_load_bytes:
+		return &bpf_xdp_load_bytes_proto;
+	case BPF_FUNC_xdp_store_bytes:
+		return &bpf_xdp_store_bytes_proto;
 	case BPF_FUNC_fib_lookup:
 		return &bpf_xdp_fib_lookup_proto;
 	case BPF_FUNC_check_mtu:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 556621a3f7b4..c20e9534fab5 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4968,6 +4968,22 @@  union bpf_attr {
  *		Get the total size of a given xdp buff (linear and paged area)
  *	Return
  *		The total size of a given xdp buffer.
+ *
+ * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *	Description
+ *		This helper is provided as an easy way to load data from a
+ *		xdp buffer. It can be used to load *len* bytes from *offset* from
+ *		the frame associated to *xdp_md*, into the buffer pointed by
+ *		*buf*.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *	Description
+ *		Store *len* bytes from buffer *buf* into the frame
+ *		associated to *xdp_md*, at *offset*.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5152,6 +5168,8 @@  union bpf_attr {
 	FN(kallsyms_lookup_name),	\
 	FN(find_vma),			\
 	FN(xdp_get_buff_len),		\
+	FN(xdp_load_bytes),		\
+	FN(xdp_store_bytes),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper