diff mbox series

[RFC,bpf-next,16/20] trait: Support sk_buffs

Message ID 20250305-afabre-traits-010-rfc2-v1-16-d0ecfb869797@cloudflare.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series traits: Per packet metadata KV store | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for aarch64-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-12 success Logs for aarch64-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-18 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for s390x-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-20 success Logs for s390x-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-21 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-22 fail Logs for x86_64-gcc / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 fail Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 fail Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-gcc / veristat-kernel / x86_64-gcc veristat_kernel
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-gcc / veristat-meta / x86_64-gcc veristat_meta
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 fail Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-38 fail Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-17 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-17 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-42 fail Logs for x86_64-llvm-18 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-46 fail Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-47 fail Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-48 fail Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-49 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-50 success Logs for x86_64-llvm-18 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-51 success Logs for x86_64-llvm-18 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-8 fail Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 fail Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 fail Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 fail Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-33 fail Logs for x86_64-llvm-17 / GCC BPF / GCC BPF
netdev/series_format fail Series longer than 15 patches
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 189 this patch: 72
netdev/build_tools success Errors and warnings before: 26 (+0) this patch: 26 (+0)
netdev/cc_maintainers warning 7 maintainers not CCed: daniel@iogearbox.net horms@kernel.org edumazet@google.com john.fastabend@gmail.com kuba@kernel.org pabeni@redhat.com ast@kernel.org
netdev/build_clang success Errors and warnings before: 82 this patch: 82
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4980 this patch: 4980
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 106 this patch: 106
netdev/source_inline success Was 0 now: 0

Commit Message

Arthur Fabre March 5, 2025, 2:32 p.m. UTC
From: Arthur Fabre <afabre@cloudflare.com>

Hide the space used by traits from skb_headroom(): that space isn't
actually usable.

Preserve the trait store in pskb_expand_head() by copying it ahead of
the new headroom. The struct xdp_frame at the start of the headroom
isn't needed anymore, so we can overwrite it with traits, and introduce
a new flag to indicate traits are stored at the start of the headroom.

Cloned skbs share the same packet data and headroom as the original skb,
so changes to traits in one would be reflected in the other.
Is that ok?
Are there cases where we would want a clone to have different traits?
For now, prevent clones from using traits.

Signed-off-by: Arthur Fabre <afabre@cloudflare.com>
---
 include/linux/skbuff.h | 25 +++++++++++++++++++++++--
 net/core/skbuff.c      | 25 +++++++++++++++++++++++--
 2 files changed, 46 insertions(+), 4 deletions(-)

Comments

Lorenzo Bianconi March 10, 2025, 11:45 a.m. UTC | #1
> From: Arthur Fabre <afabre@cloudflare.com>
> 
> Hide the space used by traits from skb_headroom(): that space isn't
> actually usable.
> 
> Preserve the trait store in pskb_expand_head() by copying it ahead of
> the new headroom. The struct xdp_frame at the start of the headroom
> isn't needed anymore, so we can overwrite it with traits, and introduce
> a new flag to indicate traits are stored at the start of the headroom.
> 
> Cloned skbs share the same packet data and headroom as the original skb,
> so changes to traits in one would be reflected in the other.
> Is that ok?
> Are there cases where we would want a clone to have different traits?
> For now, prevent clones from using traits.
> 
> Signed-off-by: Arthur Fabre <afabre@cloudflare.com>
> ---
>  include/linux/skbuff.h | 25 +++++++++++++++++++++++--
>  net/core/skbuff.c      | 25 +++++++++++++++++++++++--
>  2 files changed, 46 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index d7dfee152ebd26ce87a230222e94076aca793adc..886537508789202339c925b5613574de67b7e43c 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -39,6 +39,7 @@
>  #include <net/net_debug.h>
>  #include <net/dropreason-core.h>
>  #include <net/netmem.h>
> +#include <net/trait.h>
>  
>  /**
>   * DOC: skb checksums
> @@ -729,6 +730,8 @@ enum skb_traits_type {
>  	SKB_TRAITS_NONE,
>  	/* Trait store in headroom, offset by sizeof(struct xdp_frame) */
>  	SKB_TRAITS_AFTER_XDP,
> +	/* Trait store at start of headroom */
> +	SKB_TRAITS_AT_HEAD,
>  };
>  
>  /**
> @@ -1029,7 +1032,7 @@ struct sk_buff {
>  	__u8			csum_not_inet:1;
>  #endif
>  	__u8			unreadable:1;
> -	__u8			traits_type:1;	/* See enum skb_traits_type */
> +	__u8			traits_type:2;	/* See enum skb_traits_type */
>  #if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS)
>  	__u16			tc_index;	/* traffic control index */
>  #endif
> @@ -2836,6 +2839,18 @@ static inline void *pskb_pull(struct sk_buff *skb, unsigned int len)
>  
>  void skb_condense(struct sk_buff *skb);
>  
> +static inline void *skb_traits(const struct sk_buff *skb)
> +{
> +	switch (skb->traits_type) {
> +	case SKB_TRAITS_AFTER_XDP:
> +		return skb->head + _XDP_FRAME_SIZE;
> +	case SKB_TRAITS_AT_HEAD:
> +		return skb->head;
> +	default:
> +		return NULL;
> +	}
> +}
> +
>  /**
>   *	skb_headroom - bytes at buffer head
>   *	@skb: buffer to check
> @@ -2844,7 +2859,13 @@ void skb_condense(struct sk_buff *skb);
>   */
>  static inline unsigned int skb_headroom(const struct sk_buff *skb)
>  {
> -	return skb->data - skb->head;
> +	int trait_size = 0;
> +	void *traits = skb_traits(skb);
> +
> +	if (traits)
> +		trait_size = traits_size(traits);
> +
> +	return skb->data - skb->head - trait_size;

I am not fully aware of all possible use-cases, but do we really need to
store hw medata traits (e.g. hw rx checksum or hw rx hash) in the skb
headroom when we convert the xdp_frame/xdp_buff in the skb? All of these
fields already have dedicated fields in the skb struct. Moreover, we need
to set them in order to have a real performance improvements when we execute
XDP_PASS. Something like:

https://lore.kernel.org/bpf/01ce17910fdd7b693c23132663fa884d5ec7f440.1726935917.git.lorenzo@kernel.org/

Regards,
Lorenzo

>  }
>  
>  /**
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 7b03b64fdcb276f68ce881d1d8da8e4c6b897efc..83f58517738e8ff12990c28b09336ed44f4be32a 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -1515,6 +1515,19 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
>  	atomic_inc(&(skb_shinfo(skb)->dataref));
>  	skb->cloned = 1;
>  
> +	/* traits would end up shared with the clone,
> +	 * and edits would be reflected there.
> +	 *
> +	 * Is that ok? What if the original skb and the clone take different paths?
> +	 * Does that even happen?
> +	 *
> +	 * If that's not ok, we could copy the traits and store them in an extension header
> +	 * for clones.
> +	 *
> +	 * For now, pretend the clone doesn't have any traits.
> +	 */
> +	skb->traits_type = SKB_TRAITS_NONE;
> +
>  	return n;
>  #undef C
>  }
> @@ -2170,7 +2183,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
>  	unsigned int osize = skb_end_offset(skb);
>  	unsigned int size = osize + nhead + ntail;
>  	long off;
> -	u8 *data;
> +	u8 *data, *head;
>  	int i;
>  
>  	BUG_ON(nhead < 0);
> @@ -2187,10 +2200,18 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
>  		goto nodata;
>  	size = SKB_WITH_OVERHEAD(size);
>  
> +	head = skb->head;
> +	if (skb->traits_type != SKB_TRAITS_NONE) {
> +		head = skb_traits(skb) + traits_size(skb_traits(skb));
> +		/* struct xdp_frame isn't needed in the headroom, drop it */
> +		memcpy(data, skb_traits(skb), traits_size(skb_traits(skb)));
> +		skb->traits_type = SKB_TRAITS_AT_HEAD;
> +	}
> +
>  	/* Copy only real data... and, alas, header. This should be
>  	 * optimized for the cases when header is void.
>  	 */
> -	memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);
> +	memcpy(data + nhead, head, skb_tail_pointer(skb) - head);
>  
>  	memcpy((struct skb_shared_info *)(data + size),
>  	       skb_shinfo(skb),
> 
> -- 
> 2.43.0
> 
>
diff mbox series

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d7dfee152ebd26ce87a230222e94076aca793adc..886537508789202339c925b5613574de67b7e43c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -39,6 +39,7 @@ 
 #include <net/net_debug.h>
 #include <net/dropreason-core.h>
 #include <net/netmem.h>
+#include <net/trait.h>
 
 /**
  * DOC: skb checksums
@@ -729,6 +730,8 @@  enum skb_traits_type {
 	SKB_TRAITS_NONE,
 	/* Trait store in headroom, offset by sizeof(struct xdp_frame) */
 	SKB_TRAITS_AFTER_XDP,
+	/* Trait store at start of headroom */
+	SKB_TRAITS_AT_HEAD,
 };
 
 /**
@@ -1029,7 +1032,7 @@  struct sk_buff {
 	__u8			csum_not_inet:1;
 #endif
 	__u8			unreadable:1;
-	__u8			traits_type:1;	/* See enum skb_traits_type */
+	__u8			traits_type:2;	/* See enum skb_traits_type */
 #if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS)
 	__u16			tc_index;	/* traffic control index */
 #endif
@@ -2836,6 +2839,18 @@  static inline void *pskb_pull(struct sk_buff *skb, unsigned int len)
 
 void skb_condense(struct sk_buff *skb);
 
+static inline void *skb_traits(const struct sk_buff *skb)
+{
+	switch (skb->traits_type) {
+	case SKB_TRAITS_AFTER_XDP:
+		return skb->head + _XDP_FRAME_SIZE;
+	case SKB_TRAITS_AT_HEAD:
+		return skb->head;
+	default:
+		return NULL;
+	}
+}
+
 /**
  *	skb_headroom - bytes at buffer head
  *	@skb: buffer to check
@@ -2844,7 +2859,13 @@  void skb_condense(struct sk_buff *skb);
  */
 static inline unsigned int skb_headroom(const struct sk_buff *skb)
 {
-	return skb->data - skb->head;
+	int trait_size = 0;
+	void *traits = skb_traits(skb);
+
+	if (traits)
+		trait_size = traits_size(traits);
+
+	return skb->data - skb->head - trait_size;
 }
 
 /**
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7b03b64fdcb276f68ce881d1d8da8e4c6b897efc..83f58517738e8ff12990c28b09336ed44f4be32a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1515,6 +1515,19 @@  static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	atomic_inc(&(skb_shinfo(skb)->dataref));
 	skb->cloned = 1;
 
+	/* traits would end up shared with the clone,
+	 * and edits would be reflected there.
+	 *
+	 * Is that ok? What if the original skb and the clone take different paths?
+	 * Does that even happen?
+	 *
+	 * If that's not ok, we could copy the traits and store them in an extension header
+	 * for clones.
+	 *
+	 * For now, pretend the clone doesn't have any traits.
+	 */
+	skb->traits_type = SKB_TRAITS_NONE;
+
 	return n;
 #undef C
 }
@@ -2170,7 +2183,7 @@  int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	unsigned int osize = skb_end_offset(skb);
 	unsigned int size = osize + nhead + ntail;
 	long off;
-	u8 *data;
+	u8 *data, *head;
 	int i;
 
 	BUG_ON(nhead < 0);
@@ -2187,10 +2200,18 @@  int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 		goto nodata;
 	size = SKB_WITH_OVERHEAD(size);
 
+	head = skb->head;
+	if (skb->traits_type != SKB_TRAITS_NONE) {
+		head = skb_traits(skb) + traits_size(skb_traits(skb));
+		/* struct xdp_frame isn't needed in the headroom, drop it */
+		memcpy(data, skb_traits(skb), traits_size(skb_traits(skb)));
+		skb->traits_type = SKB_TRAITS_AT_HEAD;
+	}
+
 	/* Copy only real data... and, alas, header. This should be
 	 * optimized for the cases when header is void.
 	 */
-	memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);
+	memcpy(data + nhead, head, skb_tail_pointer(skb) - head);
 
 	memcpy((struct skb_shared_info *)(data + size),
 	       skb_shinfo(skb),