diff mbox series

[bpf-next,v2,2/6] net: tun: enable transfer of XDP metadata to skb

Message ID 20250217172308.3291739-3-marcus.wichelmann@hetzner-cloud.de (mailing list archive)
State New
Delegated to: BPF
Headers show
Series XDP metadata support for tun driver | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 12 of 12 maintainers
netdev/build_clang success Errors and warnings before: 1 this patch: 1
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 68 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-11 success Logs for aarch64-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-12 success Logs for aarch64-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for s390x-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-20 success Logs for s390x-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-21 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-17 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-17 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-50 success Logs for x86_64-llvm-18 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-51 success Logs for x86_64-llvm-18 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-gcc / veristat-kernel / x86_64-gcc veristat_kernel
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-gcc / veristat-meta / x86_64-gcc veristat_meta
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-49 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-47 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-48 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Marcus Wichelmann Feb. 17, 2025, 5:23 p.m. UTC
When the XDP metadata area was used, it is expected that the same
metadata can also be accessed from TC, as can be read in the description
of the bpf_xdp_adjust_meta helper function. In the tun driver, this was
not yet implemented.

To make this work, the skb that is being built on XDP_PASS should know
of the current size of the metadata area. This is ensured by adding
calls to skb_metadata_set. For the tun_xdp_one code path, an additional
check is necessary to handle the case where the externally initialized
xdp_buff has no metadata support (xdp->data_meta == xdp->data + 1).

More information about this feature can be found in the commit message
of commit de8f3a83b0a0 ("bpf: add meta pointer for direct access").

Signed-off-by: Marcus Wichelmann <marcus.wichelmann@hetzner-cloud.de>
---
 drivers/net/tun.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

Comments

Willem de Bruijn Feb. 18, 2025, 1:45 a.m. UTC | #1
Marcus Wichelmann wrote:
> When the XDP metadata area was used, it is expected that the same
> metadata can also be accessed from TC, as can be read in the description
> of the bpf_xdp_adjust_meta helper function. In the tun driver, this was
> not yet implemented.
> 
> To make this work, the skb that is being built on XDP_PASS should know
> of the current size of the metadata area. This is ensured by adding
> calls to skb_metadata_set. For the tun_xdp_one code path, an additional
> check is necessary to handle the case where the externally initialized
> xdp_buff has no metadata support (xdp->data_meta == xdp->data + 1).
> 
> More information about this feature can be found in the commit message
> of commit de8f3a83b0a0 ("bpf: add meta pointer for direct access").
> 
> Signed-off-by: Marcus Wichelmann <marcus.wichelmann@hetzner-cloud.de>
> ---
>  drivers/net/tun.c | 21 ++++++++++++++++++---
>  1 file changed, 18 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index c95ab9c46bd2..3dde6cd29a84 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -1604,7 +1604,8 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
>  
>  static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
>  				       struct page_frag *alloc_frag, char *buf,
> -				       int buflen, int len, int pad)
> +				       int buflen, int len, int pad,
> +				       int metasize)
>  {
>  	struct sk_buff *skb = build_skb(buf, buflen);
>  
> @@ -1613,6 +1614,8 @@ static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
>  
>  	skb_reserve(skb, pad);
>  	skb_put(skb, len);
> +	if (metasize)
> +		skb_metadata_set(skb, metasize);
>  	skb_set_owner_w(skb, tfile->socket.sk);
>  
>  	get_page(alloc_frag->page);
> @@ -1672,6 +1675,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>  	char *buf;
>  	size_t copied;
>  	int pad = TUN_RX_PAD;
> +	int metasize = 0;
>  	int err = 0;
>  
>  	rcu_read_lock();
> @@ -1699,7 +1703,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>  	if (hdr->gso_type || !xdp_prog) {
>  		*skb_xdp = 1;
>  		return __tun_build_skb(tfile, alloc_frag, buf, buflen, len,
> -				       pad);
> +				       pad, metasize);
>  	}
>  
>  	*skb_xdp = 0;
> @@ -1734,12 +1738,18 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>  
>  		pad = xdp.data - xdp.data_hard_start;
>  		len = xdp.data_end - xdp.data;
> +
> +		/* It is known that the xdp_buff was prepared with metadata
> +		 * support, so no additional check is necessary.
> +		 */
> +		metasize = xdp.data - xdp.data_meta;
>  	}
>  	bpf_net_ctx_clear(bpf_net_ctx);
>  	rcu_read_unlock();
>  	local_bh_enable();
>  
> -	return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad);
> +	return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad,
> +			       metasize);
>  
>  out:
>  	bpf_net_ctx_clear(bpf_net_ctx);
> @@ -2456,6 +2466,7 @@ static int tun_xdp_one(struct tun_struct *tun,
>  	struct sk_buff_head *queue;
>  	u32 rxhash = 0, act;
>  	int buflen = hdr->buflen;
> +	int metasize = 0;
>  	int ret = 0;
>  	bool skb_xdp = false;
>  	struct page *page;
> @@ -2510,6 +2521,10 @@ static int tun_xdp_one(struct tun_struct *tun,
>  	skb_reserve(skb, xdp->data - xdp->data_hard_start);
>  	skb_put(skb, xdp->data_end - xdp->data);
>  
> +	metasize = max(xdp->data - xdp->data_meta, 0);

Is it ever possible for xdp->data_meta to be greater than xdp->data?

This is pointer arithmetic, which is a bit complex wrt type. This is
likely ptrdiff_t, which is signed. But may want to use max_t(int, to
make this more explicit.

> +	if (metasize)
> +		skb_metadata_set(skb, metasize);
> +

Technically not needed as skb_metadata_clear is just
skb_metadata_set(skb, 0). But fine to test and elide.

>  	if (virtio_net_hdr_to_skb(skb, gso, tun_is_lttle_endian(tun))) {
>  		atomic_long_inc(&tun->rx_frame_errors);
>  		kfree_skb(skb);
> -- 
> 2.43.0
>
Willem de Bruijn Feb. 18, 2025, 1:47 a.m. UTC | #2
Marcus Wichelmann wrote:
> When the XDP metadata area was used, it is expected that the same
> metadata can also be accessed from TC, as can be read in the description
> of the bpf_xdp_adjust_meta helper function. In the tun driver, this was
> not yet implemented.
> 
> To make this work, the skb that is being built on XDP_PASS should know
> of the current size of the metadata area. This is ensured by adding
> calls to skb_metadata_set. For the tun_xdp_one code path, an additional
> check is necessary to handle the case where the externally initialized
> xdp_buff has no metadata support (xdp->data_meta == xdp->data + 1).
> 
> More information about this feature can be found in the commit message
> of commit de8f3a83b0a0 ("bpf: add meta pointer for direct access").
> 
> Signed-off-by: Marcus Wichelmann <marcus.wichelmann@hetzner-cloud.de>
> ---
>  drivers/net/tun.c | 21 ++++++++++++++++++---
>  1 file changed, 18 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index c95ab9c46bd2..3dde6cd29a84 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -1604,7 +1604,8 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
>  
>  static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
>  				       struct page_frag *alloc_frag, char *buf,
> -				       int buflen, int len, int pad)
> +				       int buflen, int len, int pad,
> +				       int metasize)
>  {
>  	struct sk_buff *skb = build_skb(buf, buflen);
>  
> @@ -1613,6 +1614,8 @@ static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
>  
>  	skb_reserve(skb, pad);
>  	skb_put(skb, len);
> +	if (metasize)
> +		skb_metadata_set(skb, metasize);
>  	skb_set_owner_w(skb, tfile->socket.sk);
>  
>  	get_page(alloc_frag->page);
> @@ -1672,6 +1675,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>  	char *buf;
>  	size_t copied;
>  	int pad = TUN_RX_PAD;
> +	int metasize = 0;
>  	int err = 0;
>  
>  	rcu_read_lock();
> @@ -1699,7 +1703,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>  	if (hdr->gso_type || !xdp_prog) {
>  		*skb_xdp = 1;
>  		return __tun_build_skb(tfile, alloc_frag, buf, buflen, len,
> -				       pad);
> +				       pad, metasize);
>  	}
>  
>  	*skb_xdp = 0;
> @@ -1734,12 +1738,18 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>  
>  		pad = xdp.data - xdp.data_hard_start;
>  		len = xdp.data_end - xdp.data;
> +
> +		/* It is known that the xdp_buff was prepared with metadata
> +		 * support, so no additional check is necessary.
> +		 */
> +		metasize = xdp.data - xdp.data_meta;
>  	}
>  	bpf_net_ctx_clear(bpf_net_ctx);
>  	rcu_read_unlock();
>  	local_bh_enable();
>  
> -	return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad);
> +	return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad,
> +			       metasize);
>  
>  out:
>  	bpf_net_ctx_clear(bpf_net_ctx);
> @@ -2456,6 +2466,7 @@ static int tun_xdp_one(struct tun_struct *tun,
>  	struct sk_buff_head *queue;
>  	u32 rxhash = 0, act;
>  	int buflen = hdr->buflen;
> +	int metasize = 0;
>  	int ret = 0;
>  	bool skb_xdp = false;
>  	struct page *page;
> @@ -2510,6 +2521,10 @@ static int tun_xdp_one(struct tun_struct *tun,
>  	skb_reserve(skb, xdp->data - xdp->data_hard_start);
>  	skb_put(skb, xdp->data_end - xdp->data);
>  
> +	metasize = max(xdp->data - xdp->data_meta, 0);

Can xdp->data_meta ever be greater than xdp->data?

This is pointer comparison, which is tricky wrt type. It likely is
ptrdiff_t and thus signed. But may want to use max_t(long int, ..) to
make this explicit.

> +	if (metasize)
> +		skb_metadata_set(skb, metasize);
> +

Not strictly needed. As skb_metadata_clear is just
skb_metadata_set(skb, 0). But also not wrong, so fine to keep.

>  	if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
>  		atomic_long_inc(&tun->rx_frame_errors);
>  		kfree_skb(skb);
> -- 
> 2.43.0
>
Marcus Wichelmann Feb. 19, 2025, 2:47 p.m. UTC | #3
Am 18.02.25 um 02:47 schrieb Willem de Bruijn:
> Marcus Wichelmann wrote:
>> [...]
>> +	metasize = max(xdp->data - xdp->data_meta, 0);
> 
> Can xdp->data_meta ever be greater than xdp->data?

When an xdp_buff has no metadata support, then this is marked by setting
xdp->data_meta to xdp->data + 1. See xdp_prepare_buff or
xdp_set_data_meta_invalid.

In the case of tun_xdp_one, the xdp_buff is externally created by another
driver and passed to the tun driver using sendmsg and TUN_MSG_PTR. For
now, the vhost_net driver is the only driver doing that, and
xdp->data_meta is set to xdp->data there, marking support for metadata.

So knowing that vhost_net is currently the only driver passing xdp_buffs
to tun_sendmsg, the check is not strictly necessary. But other drivers
may use this API as well in the future. That's why I'd like to not make
the assumption that other drivers always create the xdp_buffs with
metadata support, when they pass them to tun_sendmsg.

Or am I just to careful about this? What do you think?

> This is pointer comparison, which is tricky wrt type. It likely is
> ptrdiff_t and thus signed. But may want to use max_t(long int, ..) to
> make this explicit.

Ah, I see, good point.

So like that?

	metasize = max_t(long int, xdp->data - xdp->data_meta, 0);
	if (metasize)
		skb_metadata_set(skb, metasize);

Alternatively, there is also xdp_data_meta_unsupported(xdp_buff) which
could be used to make this check very explicit, but I don't see it being
used in network drivers elsewhere. Not sure why.

>> +	if (metasize)
>> +		skb_metadata_set(skb, metasize);
>> +
> 
> Not strictly needed. As skb_metadata_clear is just
> skb_metadata_set(skb, 0). But also not wrong, so fine to keep.

Oh, haven't seen that.
I'm following a common pattern here that I've seen in many other network
drivers (grep for "skb_metadata_set"):

	unsigned int metasize = xdp->data - xdp->data_meta;
	[...]
	if (metasize)
		skb_metadata_set(skb, metasize);

Marcus
Willem de Bruijn Feb. 19, 2025, 3:06 p.m. UTC | #4
Marcus Wichelmann wrote:
> Am 18.02.25 um 02:47 schrieb Willem de Bruijn:
> > Marcus Wichelmann wrote:
> >> [...]
> >> +	metasize = max(xdp->data - xdp->data_meta, 0);
> > 
> > Can xdp->data_meta ever be greater than xdp->data?
> 
> When an xdp_buff has no metadata support, then this is marked by setting
> xdp->data_meta to xdp->data + 1. See xdp_prepare_buff or
> xdp_set_data_meta_invalid.
> 
> In the case of tun_xdp_one, the xdp_buff is externally created by another
> driver and passed to the tun driver using sendmsg and TUN_MSG_PTR. For
> now, the vhost_net driver is the only driver doing that, and
> xdp->data_meta is set to xdp->data there, marking support for metadata.
> 
> So knowing that vhost_net is currently the only driver passing xdp_buffs
> to tun_sendmsg, the check is not strictly necessary. But other drivers
> may use this API as well in the future. That's why I'd like to not make
> the assumption that other drivers always create the xdp_buffs with
> metadata support, when they pass them to tun_sendmsg.
> 
> Or am I just to careful about this? What do you think?

I agree.
 
> > This is pointer comparison, which is tricky wrt type. It likely is
> > ptrdiff_t and thus signed. But may want to use max_t(long int, ..) to
> > make this explicit.
> 
> Ah, I see, good point.
> 
> So like that?
> 
> 	metasize = max_t(long int, xdp->data - xdp->data_meta, 0);
> 	if (metasize)
> 		skb_metadata_set(skb, metasize);

Or just this? Also ensures the test uses signed int.

    int metasize;

    ...


    metasize = xdp->data - xdp->data_meta;
    if (metasize > 0)
            skb_metadata_set(skb, metasize);


> Alternatively, there is also xdp_data_meta_unsupported(xdp_buff) which
> could be used to make this check very explicit, but I don't see it being
> used in network drivers elsewhere. Not sure why.
> 
> >> +	if (metasize)
> >> +		skb_metadata_set(skb, metasize);
> >> +
> > 
> > Not strictly needed. As skb_metadata_clear is just
> > skb_metadata_set(skb, 0). But also not wrong, so fine to keep.
> 
> Oh, haven't seen that.
> I'm following a common pattern here that I've seen in many other network
> drivers (grep for "skb_metadata_set"):
> 
> 	unsigned int metasize = xdp->data - xdp->data_meta;
> 	[...]
> 	if (metasize)
> 		skb_metadata_set(skb, metasize);

Thanks for that context. Sounds good.
Marcus Wichelmann Feb. 19, 2025, 3:15 p.m. UTC | #5
Am 19.02.25 um 16:06 schrieb Willem de Bruijn:
> Marcus Wichelmann wrote:
>> Am 18.02.25 um 02:47 schrieb Willem de Bruijn:
>>> [...]
>>> This is pointer comparison, which is tricky wrt type. It likely is
>>> ptrdiff_t and thus signed. But may want to use max_t(long int, ..) to
>>> make this explicit.
>>
>> Ah, I see, good point.
>>
>> So like that?
>>
>> 	metasize = max_t(long int, xdp->data - xdp->data_meta, 0);
>> 	if (metasize)
>> 		skb_metadata_set(skb, metasize);
> 
> Or just this? Also ensures the test uses signed int.
> 
>      int metasize;
> 
>      ...
> 
> 
>      metasize = xdp->data - xdp->data_meta;
>      if (metasize > 0)
>              skb_metadata_set(skb, metasize);
> 

Well, yeah, just keep it simple I guess. ;) Will do that.

I'll send a V3 patch series with the change.

Thanks!

Marcus
diff mbox series

Patch

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index c95ab9c46bd2..3dde6cd29a84 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1604,7 +1604,8 @@  static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
 
 static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
 				       struct page_frag *alloc_frag, char *buf,
-				       int buflen, int len, int pad)
+				       int buflen, int len, int pad,
+				       int metasize)
 {
 	struct sk_buff *skb = build_skb(buf, buflen);
 
@@ -1613,6 +1614,8 @@  static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
 
 	skb_reserve(skb, pad);
 	skb_put(skb, len);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
 	skb_set_owner_w(skb, tfile->socket.sk);
 
 	get_page(alloc_frag->page);
@@ -1672,6 +1675,7 @@  static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	char *buf;
 	size_t copied;
 	int pad = TUN_RX_PAD;
+	int metasize = 0;
 	int err = 0;
 
 	rcu_read_lock();
@@ -1699,7 +1703,7 @@  static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	if (hdr->gso_type || !xdp_prog) {
 		*skb_xdp = 1;
 		return __tun_build_skb(tfile, alloc_frag, buf, buflen, len,
-				       pad);
+				       pad, metasize);
 	}
 
 	*skb_xdp = 0;
@@ -1734,12 +1738,18 @@  static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 
 		pad = xdp.data - xdp.data_hard_start;
 		len = xdp.data_end - xdp.data;
+
+		/* It is known that the xdp_buff was prepared with metadata
+		 * support, so no additional check is necessary.
+		 */
+		metasize = xdp.data - xdp.data_meta;
 	}
 	bpf_net_ctx_clear(bpf_net_ctx);
 	rcu_read_unlock();
 	local_bh_enable();
 
-	return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad);
+	return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad,
+			       metasize);
 
 out:
 	bpf_net_ctx_clear(bpf_net_ctx);
@@ -2456,6 +2466,7 @@  static int tun_xdp_one(struct tun_struct *tun,
 	struct sk_buff_head *queue;
 	u32 rxhash = 0, act;
 	int buflen = hdr->buflen;
+	int metasize = 0;
 	int ret = 0;
 	bool skb_xdp = false;
 	struct page *page;
@@ -2510,6 +2521,10 @@  static int tun_xdp_one(struct tun_struct *tun,
 	skb_reserve(skb, xdp->data - xdp->data_hard_start);
 	skb_put(skb, xdp->data_end - xdp->data);
 
+	metasize = max(xdp->data - xdp->data_meta, 0);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
 	if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
 		atomic_long_inc(&tun->rx_frame_errors);
 		kfree_skb(skb);