diff mbox series

[1/1] net: tun: add XDP metadata support

Message ID 20250130171614.1657224-2-marcus.wichelmann@hetzner-cloud.de (mailing list archive)
State Deferred
Delegated to: Netdev Maintainers
Headers show
Series XDP metadata support for tun driver | expand

Checks

Context Check Description
netdev/series_format warning Target tree name not specified in the subject
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 12 of 12 maintainers
netdev/build_clang success Errors and warnings before: 1 this patch: 1
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 82 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2025-01-30--18-00 (tests: 885)

Commit Message

Marcus Wichelmann Jan. 30, 2025, 5:16 p.m. UTC
Enable the support for bpf_xdp_adjust_meta for XDP buffers initialized
by the tun driver. This is useful to pass metadata from an XDP program
that's attached to a tap device to following XDP/TC programs.

When used together with vhost_net, the batched XDP buffers were already
initialized with metadata support by the vhost_net driver, but the
metadata was not yet passed to the skb on XDP_PASS. So this also adds
the required skb_metadata_set calls.

Signed-off-by: Marcus Wichelmann <marcus.wichelmann@hetzner-cloud.de>
---
 drivers/net/tun.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

Comments

Stanislav Fomichev Jan. 30, 2025, 11:16 p.m. UTC | #1
On 01/30, Marcus Wichelmann wrote:
> Enable the support for bpf_xdp_adjust_meta for XDP buffers initialized
> by the tun driver. This is useful to pass metadata from an XDP program
> that's attached to a tap device to following XDP/TC programs.
> 
> When used together with vhost_net, the batched XDP buffers were already
> initialized with metadata support by the vhost_net driver, but the
> metadata was not yet passed to the skb on XDP_PASS. So this also adds
> the required skb_metadata_set calls.

Can you expand more on what kind of metadata is present with vhost_net
and who fills it in? Is it virtio header stuff? I wonder how you
want to consume it..

Can you also add a selftest to use this new functionality?

> Signed-off-by: Marcus Wichelmann <marcus.wichelmann@hetzner-cloud.de>
> ---
>  drivers/net/tun.c | 23 ++++++++++++++++++-----
>  1 file changed, 18 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index e816aaba8..d3cfea40a 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -1600,7 +1600,8 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
>  
>  static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
>  				       struct page_frag *alloc_frag, char *buf,
> -				       int buflen, int len, int pad)
> +				       int buflen, int len, int pad,
> +				       int metasize)
>  {
>  	struct sk_buff *skb = build_skb(buf, buflen);
>  
> @@ -1609,6 +1610,8 @@ static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
>  
>  	skb_reserve(skb, pad);
>  	skb_put(skb, len);
> +	if (metasize)
> +		skb_metadata_set(skb, metasize);
>  	skb_set_owner_w(skb, tfile->socket.sk);
>  
>  	get_page(alloc_frag->page);
> @@ -1668,6 +1671,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>  	char *buf;
>  	size_t copied;
>  	int pad = TUN_RX_PAD;
> +	int metasize = 0;
>  	int err = 0;
>  
>  	rcu_read_lock();
> @@ -1695,7 +1699,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>  	if (hdr->gso_type || !xdp_prog) {
>  		*skb_xdp = 1;
>  		return __tun_build_skb(tfile, alloc_frag, buf, buflen, len,
> -				       pad);
> +				       pad, metasize);
>  	}
>  
>  	*skb_xdp = 0;
> @@ -1709,7 +1713,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>  		u32 act;
>  
>  		xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq);
> -		xdp_prepare_buff(&xdp, buf, pad, len, false);
> +		xdp_prepare_buff(&xdp, buf, pad, len, true);
>  
>  		act = bpf_prog_run_xdp(xdp_prog, &xdp);
>  		if (act == XDP_REDIRECT || act == XDP_TX) {
> @@ -1730,12 +1734,16 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>  
>  		pad = xdp.data - xdp.data_hard_start;
>  		len = xdp.data_end - xdp.data;
> +
> +		metasize = xdp.data - xdp.data_meta;

[..]

> +		metasize = metasize > 0 ? metasize : 0;

Why is this part needed?
Marcus Wichelmann Jan. 31, 2025, 2:25 p.m. UTC | #2
Am 31.01.25 um 00:16 schrieb Stanislav Fomichev:
> On 01/30, Marcus Wichelmann wrote:
>> Enable the support for bpf_xdp_adjust_meta for XDP buffers initialized
>> by the tun driver. This is useful to pass metadata from an XDP program
>> that's attached to a tap device to following XDP/TC programs.
>>
>> When used together with vhost_net, the batched XDP buffers were already
>> initialized with metadata support by the vhost_net driver, but the
>> metadata was not yet passed to the skb on XDP_PASS. So this also adds
>> the required skb_metadata_set calls.
> 
> Can you expand more on what kind of metadata is present with vhost_net
> and who fills it in? Is it virtio header stuff? I wonder how you
> want to consume it..

Hm, my commit message may have been a bit misleading.

I'm talking about regular support for the bpf_xdp_adjust_meta helper
function. This allows to reserve a metadata area that is useful to pass
any information from one XDP program to another one, for example when
using tail-calls.

Whether it can be used in an XDP program depends on how the xdp_buff
was initialized. Most net drivers initialize the xdp_buff in a way, that
allows bpf_xdp_adjust_meta to be used. In case of the tun driver, this is
not always the case.

There are two code paths in the tun driver that lead to a bpf_prog_run_xdp:

1. tun_build_skb, which is called by tun_get_user and is used when writing
    packets from userspace into the tap device. In this case, the xdp_buff
    created in tun_build_skb has no support for bpf_xdp_adjust_meta and calls
    of that helper function result in ENOTSUPP.

2. tun_xdp_one, which is called by tun_sendmsg which again is called by other
    drivers (e.g. vhost_net). When the TUN_MSG_PTR mode is used, another driver
    may pass a batch of xdp_buffs to the tun driver. In this case, that other
    driver is the one initializing the xdp_buff already. And in the case of
    vhost_net,  it already initializes the buffer with metadata support (see
    xdp_prepare_buff call).
    See 043d222f93ab ("tuntap: accept an array of XDP buffs through sendmsg()")
    for details.

This patch enables metadata support for the first code path.

It also adds the missing skb_metadata_set calls for both code paths. This is
important when the attached XDP program returns with XDP_PASS, because then
the code continues with creating an skb and that skb should be aware of the
metadata area's size. At a later stage, a TC program, for example, can then
access the metadata again using __sk_buff->data_meta.

So I'm doing not much new here but am rather enabling a feature that is
supported by other drivers already.

> Can you also add a selftest to use this new functionality?

Of course, I'll see what I can do.

>> Signed-off-by: Marcus Wichelmann <marcus.wichelmann@hetzner-cloud.de>
>> ---
>>   drivers/net/tun.c | 23 ++++++++++++++++++-----
>>   1 file changed, 18 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index e816aaba8..d3cfea40a 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -1600,7 +1600,8 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
>>   
>>   static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
>>   				       struct page_frag *alloc_frag, char *buf,
>> -				       int buflen, int len, int pad)
>> +				       int buflen, int len, int pad,
>> +				       int metasize)
>>   {
>>   	struct sk_buff *skb = build_skb(buf, buflen);
>>   
>> @@ -1609,6 +1610,8 @@ static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
>>   
>>   	skb_reserve(skb, pad);
>>   	skb_put(skb, len);
>> +	if (metasize)
>> +		skb_metadata_set(skb, metasize);
>>   	skb_set_owner_w(skb, tfile->socket.sk);
>>   
>>   	get_page(alloc_frag->page);
>> @@ -1668,6 +1671,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>   	char *buf;
>>   	size_t copied;
>>   	int pad = TUN_RX_PAD;
>> +	int metasize = 0;
>>   	int err = 0;
>>   
>>   	rcu_read_lock();
>> @@ -1695,7 +1699,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>   	if (hdr->gso_type || !xdp_prog) {
>>   		*skb_xdp = 1;
>>   		return __tun_build_skb(tfile, alloc_frag, buf, buflen, len,
>> -				       pad);
>> +				       pad, metasize);
>>   	}
>>   
>>   	*skb_xdp = 0;
>> @@ -1709,7 +1713,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>   		u32 act;
>>   
>>   		xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq);
>> -		xdp_prepare_buff(&xdp, buf, pad, len, false);
>> +		xdp_prepare_buff(&xdp, buf, pad, len, true);
>>   
>>   		act = bpf_prog_run_xdp(xdp_prog, &xdp);
>>   		if (act == XDP_REDIRECT || act == XDP_TX) {
>> @@ -1730,12 +1734,16 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>   
>>   		pad = xdp.data - xdp.data_hard_start;
>>   		len = xdp.data_end - xdp.data;
>> +
>> +		metasize = xdp.data - xdp.data_meta;
> 
> [..]
> 
>> +		metasize = metasize > 0 ? metasize : 0;
> 
> Why is this part needed?

When an xdp_buff was initialized withouth metadata support (meta_valid
argument of xdp_prepare_buff is false), then data_meta == data + 1.
So this check makes sure that metadata was supported for the given xdp_buff
and metasize is not -1 (data - data_meta).

But you have a good point here: Because we have control over the
initialization of xdp_buff in the tun_build_skb function (first code path),
we know, that metadata is always supported for that buffer and metasize
is never < 0. So this check is redundant and I'll remove it.

But in the tun_xdp_one function (second code path), I'd prefer to keep that
check, as the xdp_buff is externally passed to tun_sendmsg and the tun driver
should probably not make assumptions about the metadata support of buffers
created by other drivers (e.g. vhost_net).

Thank you for taking a look, I hope things are more clear now.

Marcus
Stanislav Fomichev Feb. 1, 2025, 3:39 a.m. UTC | #3
On 01/31, Marcus Wichelmann wrote:
> Am 31.01.25 um 00:16 schrieb Stanislav Fomichev:
> > On 01/30, Marcus Wichelmann wrote:
> > > Enable the support for bpf_xdp_adjust_meta for XDP buffers initialized
> > > by the tun driver. This is useful to pass metadata from an XDP program
> > > that's attached to a tap device to following XDP/TC programs.
> > > 
> > > When used together with vhost_net, the batched XDP buffers were already
> > > initialized with metadata support by the vhost_net driver, but the
> > > metadata was not yet passed to the skb on XDP_PASS. So this also adds
> > > the required skb_metadata_set calls.
> > 
> > Can you expand more on what kind of metadata is present with vhost_net
> > and who fills it in? Is it virtio header stuff? I wonder how you
> > want to consume it..
> 
> Hm, my commit message may have been a bit misleading.
> 
> I'm talking about regular support for the bpf_xdp_adjust_meta helper
> function. This allows to reserve a metadata area that is useful to pass
> any information from one XDP program to another one, for example when
> using tail-calls.
> 
> Whether it can be used in an XDP program depends on how the xdp_buff
> was initialized. Most net drivers initialize the xdp_buff in a way, that
> allows bpf_xdp_adjust_meta to be used. In case of the tun driver, this is
> not always the case.
> 
> There are two code paths in the tun driver that lead to a bpf_prog_run_xdp:
> 
> 1. tun_build_skb, which is called by tun_get_user and is used when writing
>    packets from userspace into the tap device. In this case, the xdp_buff
>    created in tun_build_skb has no support for bpf_xdp_adjust_meta and calls
>    of that helper function result in ENOTSUPP.
> 
> 2. tun_xdp_one, which is called by tun_sendmsg which again is called by other
>    drivers (e.g. vhost_net). When the TUN_MSG_PTR mode is used, another driver
>    may pass a batch of xdp_buffs to the tun driver. In this case, that other
>    driver is the one initializing the xdp_buff already. And in the case of
>    vhost_net,  it already initializes the buffer with metadata support (see
>    xdp_prepare_buff call).
>    See 043d222f93ab ("tuntap: accept an array of XDP buffs through sendmsg()")
>    for details.
> 
> This patch enables metadata support for the first code path.
> 
> It also adds the missing skb_metadata_set calls for both code paths. This is
> important when the attached XDP program returns with XDP_PASS, because then
> the code continues with creating an skb and that skb should be aware of the
> metadata area's size. At a later stage, a TC program, for example, can then
> access the metadata again using __sk_buff->data_meta.
> 
> So I'm doing not much new here but am rather enabling a feature that is
> supported by other drivers already.
> 
> > Can you also add a selftest to use this new functionality?
> 
> Of course, I'll see what I can do.
> 
> > > Signed-off-by: Marcus Wichelmann <marcus.wichelmann@hetzner-cloud.de>
> > > ---
> > >   drivers/net/tun.c | 23 ++++++++++++++++++-----
> > >   1 file changed, 18 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> > > index e816aaba8..d3cfea40a 100644
> > > --- a/drivers/net/tun.c
> > > +++ b/drivers/net/tun.c
> > > @@ -1600,7 +1600,8 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
> > >   static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
> > >   				       struct page_frag *alloc_frag, char *buf,
> > > -				       int buflen, int len, int pad)
> > > +				       int buflen, int len, int pad,
> > > +				       int metasize)
> > >   {
> > >   	struct sk_buff *skb = build_skb(buf, buflen);
> > > @@ -1609,6 +1610,8 @@ static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
> > >   	skb_reserve(skb, pad);
> > >   	skb_put(skb, len);
> > > +	if (metasize)
> > > +		skb_metadata_set(skb, metasize);
> > >   	skb_set_owner_w(skb, tfile->socket.sk);
> > >   	get_page(alloc_frag->page);
> > > @@ -1668,6 +1671,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
> > >   	char *buf;
> > >   	size_t copied;
> > >   	int pad = TUN_RX_PAD;
> > > +	int metasize = 0;
> > >   	int err = 0;
> > >   	rcu_read_lock();
> > > @@ -1695,7 +1699,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
> > >   	if (hdr->gso_type || !xdp_prog) {
> > >   		*skb_xdp = 1;
> > >   		return __tun_build_skb(tfile, alloc_frag, buf, buflen, len,
> > > -				       pad);
> > > +				       pad, metasize);
> > >   	}
> > >   	*skb_xdp = 0;
> > > @@ -1709,7 +1713,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
> > >   		u32 act;
> > >   		xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq);
> > > -		xdp_prepare_buff(&xdp, buf, pad, len, false);
> > > +		xdp_prepare_buff(&xdp, buf, pad, len, true);
> > >   		act = bpf_prog_run_xdp(xdp_prog, &xdp);
> > >   		if (act == XDP_REDIRECT || act == XDP_TX) {
> > > @@ -1730,12 +1734,16 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
> > >   		pad = xdp.data - xdp.data_hard_start;
> > >   		len = xdp.data_end - xdp.data;
> > > +
> > > +		metasize = xdp.data - xdp.data_meta;
> > 
> > [..]
> > 
> > > +		metasize = metasize > 0 ? metasize : 0;
> > 
> > Why is this part needed?
> 
> When an xdp_buff was initialized withouth metadata support (meta_valid
> argument of xdp_prepare_buff is false), then data_meta == data + 1.
> So this check makes sure that metadata was supported for the given xdp_buff
> and metasize is not -1 (data - data_meta).
> 
> But you have a good point here: Because we have control over the
> initialization of xdp_buff in the tun_build_skb function (first code path),
> we know, that metadata is always supported for that buffer and metasize
> is never < 0. So this check is redundant and I'll remove it.
> 
> But in the tun_xdp_one function (second code path), I'd prefer to keep that
> check, as the xdp_buff is externally passed to tun_sendmsg and the tun driver
> should probably not make assumptions about the metadata support of buffers
> created by other drivers (e.g. vhost_net).
> 
> Thank you for taking a look, I hope things are more clear now.

Thanks for the explanations, makes sense. Please follow up with a
selftest (in tools/testing/selftests/bpf) and take a look at [0] to route it
to the proper bpf-next subtree (unless netdev folks prefer to take it via
net-next).

0: https://www.kernel.org/doc/html/latest/bpf/bpf_devel_QA.html
Willem de Bruijn Feb. 3, 2025, 1:32 a.m. UTC | #4
Marcus Wichelmann wrote:
> Am 31.01.25 um 00:16 schrieb Stanislav Fomichev:
> > On 01/30, Marcus Wichelmann wrote:
> >> Enable the support for bpf_xdp_adjust_meta for XDP buffers initialized
> >> by the tun driver. This is useful to pass metadata from an XDP program
> >> that's attached to a tap device to following XDP/TC programs.
> >>
> >> When used together with vhost_net, the batched XDP buffers were already
> >> initialized with metadata support by the vhost_net driver, but the
> >> metadata was not yet passed to the skb on XDP_PASS. So this also adds
> >> the required skb_metadata_set calls.
> > 
> > Can you expand more on what kind of metadata is present with vhost_net
> > and who fills it in? Is it virtio header stuff? I wonder how you
> > want to consume it..
> 
> Hm, my commit message may have been a bit misleading.
> 
> I'm talking about regular support for the bpf_xdp_adjust_meta helper
> function. This allows to reserve a metadata area that is useful to pass
> any information from one XDP program to another one, for example when
> using tail-calls.
> 
> Whether it can be used in an XDP program depends on how the xdp_buff
> was initialized. Most net drivers initialize the xdp_buff in a way, that
> allows bpf_xdp_adjust_meta to be used. In case of the tun driver, this is
> not always the case.
> 
> There are two code paths in the tun driver that lead to a bpf_prog_run_xdp:
> 
> 1. tun_build_skb, which is called by tun_get_user and is used when writing
>     packets from userspace into the tap device. In this case, the xdp_buff
>     created in tun_build_skb has no support for bpf_xdp_adjust_meta and calls
>     of that helper function result in ENOTSUPP.
> 
> 2. tun_xdp_one, which is called by tun_sendmsg which again is called by other
>     drivers (e.g. vhost_net). When the TUN_MSG_PTR mode is used, another driver
>     may pass a batch of xdp_buffs to the tun driver. In this case, that other
>     driver is the one initializing the xdp_buff already. And in the case of
>     vhost_net,  it already initializes the buffer with metadata support (see
>     xdp_prepare_buff call).
>     See 043d222f93ab ("tuntap: accept an array of XDP buffs through sendmsg()")
>     for details.
> 
> This patch enables metadata support for the first code path.
> 
> It also adds the missing skb_metadata_set calls for both code paths.

Thanks for the clarification.

Sounds like this may be better two patches. And some of the
explanation in this thread wrapped into the commit messages.

> This is
> important when the attached XDP program returns with XDP_PASS, because then
> the code continues with creating an skb and that skb should be aware of the
> metadata area's size. At a later stage, a TC program, for example, can then
> access the metadata again using __sk_buff->data_meta.
> 
> So I'm doing not much new here but am rather enabling a feature that is
> supported by other drivers already.
> 
> > Can you also add a selftest to use this new functionality?
> 
> Of course, I'll see what I can do.
> 
> >> Signed-off-by: Marcus Wichelmann <marcus.wichelmann@hetzner-cloud.de>
> >> ---
> >>   drivers/net/tun.c | 23 ++++++++++++++++++-----
> >>   1 file changed, 18 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> >> index e816aaba8..d3cfea40a 100644
> >> --- a/drivers/net/tun.c
> >> +++ b/drivers/net/tun.c
> >> @@ -1600,7 +1600,8 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
> >>   
> >>   static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
> >>   				       struct page_frag *alloc_frag, char *buf,
> >> -				       int buflen, int len, int pad)
> >> +				       int buflen, int len, int pad,
> >> +				       int metasize)
> >>   {
> >>   	struct sk_buff *skb = build_skb(buf, buflen);
> >>   
> >> @@ -1609,6 +1610,8 @@ static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
> >>   
> >>   	skb_reserve(skb, pad);
> >>   	skb_put(skb, len);
> >> +	if (metasize)
> >> +		skb_metadata_set(skb, metasize);
> >>   	skb_set_owner_w(skb, tfile->socket.sk);
> >>   
> >>   	get_page(alloc_frag->page);
> >> @@ -1668,6 +1671,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
> >>   	char *buf;
> >>   	size_t copied;
> >>   	int pad = TUN_RX_PAD;
> >> +	int metasize = 0;
> >>   	int err = 0;
> >>   
> >>   	rcu_read_lock();
> >> @@ -1695,7 +1699,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
> >>   	if (hdr->gso_type || !xdp_prog) {
> >>   		*skb_xdp = 1;
> >>   		return __tun_build_skb(tfile, alloc_frag, buf, buflen, len,
> >> -				       pad);
> >> +				       pad, metasize);
> >>   	}
> >>   
> >>   	*skb_xdp = 0;
> >> @@ -1709,7 +1713,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
> >>   		u32 act;
> >>   
> >>   		xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq);
> >> -		xdp_prepare_buff(&xdp, buf, pad, len, false);
> >> +		xdp_prepare_buff(&xdp, buf, pad, len, true);
> >>   
> >>   		act = bpf_prog_run_xdp(xdp_prog, &xdp);
> >>   		if (act == XDP_REDIRECT || act == XDP_TX) {
> >> @@ -1730,12 +1734,16 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
> >>   
> >>   		pad = xdp.data - xdp.data_hard_start;
> >>   		len = xdp.data_end - xdp.data;
> >> +
> >> +		metasize = xdp.data - xdp.data_meta;
> > 
> > [..]
> > 
> >> +		metasize = metasize > 0 ? metasize : 0;
> > 
> > Why is this part needed?
> 
> When an xdp_buff was initialized withouth metadata support (meta_valid
> argument of xdp_prepare_buff is false), then data_meta == data + 1.
> So this check makes sure that metadata was supported for the given xdp_buff
> and metasize is not -1 (data - data_meta).
> 
> But you have a good point here: Because we have control over the
> initialization of xdp_buff in the tun_build_skb function (first code path),
> we know, that metadata is always supported for that buffer and metasize
> is never < 0. So this check is redundant and I'll remove it.
> 
> But in the tun_xdp_one function (second code path), I'd prefer to keep that
> check, as the xdp_buff is externally passed to tun_sendmsg and the tun driver
> should probably not make assumptions about the metadata support of buffers
> created by other drivers (e.g. vhost_net).
> 
> Thank you for taking a look, I hope things are more clear now.

Please use min()
Willem de Bruijn Feb. 3, 2025, 1:39 a.m. UTC | #5
> > >> +		metasize = metasize > 0 ? metasize : 0;
> > > 
> > > Why is this part needed?
> > 
> > When an xdp_buff was initialized withouth metadata support (meta_valid
> > argument of xdp_prepare_buff is false), then data_meta == data + 1.
> > So this check makes sure that metadata was supported for the given xdp_buff
> > and metasize is not -1 (data - data_meta).
> > 
> > But you have a good point here: Because we have control over the
> > initialization of xdp_buff in the tun_build_skb function (first code path),
> > we know, that metadata is always supported for that buffer and metasize
> > is never < 0. So this check is redundant and I'll remove it.
> > 
> > But in the tun_xdp_one function (second code path), I'd prefer to keep that
> > check, as the xdp_buff is externally passed to tun_sendmsg and the tun driver
> > should probably not make assumptions about the metadata support of buffers
> > created by other drivers (e.g. vhost_net).
> > 
> > Thank you for taking a look, I hope things are more clear now.
> 
> Please use min()
> 

Err.. max.
diff mbox series

Patch

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e816aaba8..d3cfea40a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1600,7 +1600,8 @@  static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
 
 static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
 				       struct page_frag *alloc_frag, char *buf,
-				       int buflen, int len, int pad)
+				       int buflen, int len, int pad,
+				       int metasize)
 {
 	struct sk_buff *skb = build_skb(buf, buflen);
 
@@ -1609,6 +1610,8 @@  static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
 
 	skb_reserve(skb, pad);
 	skb_put(skb, len);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
 	skb_set_owner_w(skb, tfile->socket.sk);
 
 	get_page(alloc_frag->page);
@@ -1668,6 +1671,7 @@  static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	char *buf;
 	size_t copied;
 	int pad = TUN_RX_PAD;
+	int metasize = 0;
 	int err = 0;
 
 	rcu_read_lock();
@@ -1695,7 +1699,7 @@  static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	if (hdr->gso_type || !xdp_prog) {
 		*skb_xdp = 1;
 		return __tun_build_skb(tfile, alloc_frag, buf, buflen, len,
-				       pad);
+				       pad, metasize);
 	}
 
 	*skb_xdp = 0;
@@ -1709,7 +1713,7 @@  static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 		u32 act;
 
 		xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq);
-		xdp_prepare_buff(&xdp, buf, pad, len, false);
+		xdp_prepare_buff(&xdp, buf, pad, len, true);
 
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
 		if (act == XDP_REDIRECT || act == XDP_TX) {
@@ -1730,12 +1734,16 @@  static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 
 		pad = xdp.data - xdp.data_hard_start;
 		len = xdp.data_end - xdp.data;
+
+		metasize = xdp.data - xdp.data_meta;
+		metasize = metasize > 0 ? metasize : 0;
 	}
 	bpf_net_ctx_clear(bpf_net_ctx);
 	rcu_read_unlock();
 	local_bh_enable();
 
-	return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad);
+	return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad,
+			       metasize);
 
 out:
 	bpf_net_ctx_clear(bpf_net_ctx);
@@ -2452,6 +2460,7 @@  static int tun_xdp_one(struct tun_struct *tun,
 	struct sk_buff_head *queue;
 	u32 rxhash = 0, act;
 	int buflen = hdr->buflen;
+	int metasize = 0;
 	int ret = 0;
 	bool skb_xdp = false;
 	struct page *page;
@@ -2467,7 +2476,6 @@  static int tun_xdp_one(struct tun_struct *tun,
 		}
 
 		xdp_init_buff(xdp, buflen, &tfile->xdp_rxq);
-		xdp_set_data_meta_invalid(xdp);
 
 		act = bpf_prog_run_xdp(xdp_prog, xdp);
 		ret = tun_xdp_act(tun, xdp_prog, xdp, act);
@@ -2507,6 +2515,11 @@  static int tun_xdp_one(struct tun_struct *tun,
 	skb_reserve(skb, xdp->data - xdp->data_hard_start);
 	skb_put(skb, xdp->data_end - xdp->data);
 
+	metasize = xdp->data - xdp->data_meta;
+	metasize = metasize > 0 ? metasize : 0;
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
 	if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
 		atomic_long_inc(&tun->rx_frame_errors);
 		kfree_skb(skb);