diff mbox series

netdev: Use flexible array for trailing private bytes

Message ID 20240229213018.work.556-kees@kernel.org (mailing list archive)
State Superseded
Headers show
Series netdev: Use flexible array for trailing private bytes | expand

Commit Message

Kees Cook Feb. 29, 2024, 9:30 p.m. UTC
Introduce a new struct net_device_priv that contains struct net_device
but also accounts for the commonly trailing bytes through the "size" and
"data" members. As many dummy struct net_device instances exist still,
it is non-trivial to but this flexible array inside struct net_device
itself. But we can add a sanity check in netdev_priv() to catch any
attempts to access the private data of a dummy struct.

Adjust allocation logic to use the new full structure.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Cc: netdev@vger.kernel.org
Cc: linux-hardening@vger.kernel.org
---
 include/linux/netdevice.h | 21 ++++++++++++++++++---
 net/core/dev.c            | 12 ++++--------
 2 files changed, 22 insertions(+), 11 deletions(-)

Comments

Gustavo A. R. Silva Feb. 29, 2024, 10:15 p.m. UTC | #1
On 2/29/24 15:30, Kees Cook wrote:
> Introduce a new struct net_device_priv that contains struct net_device
> but also accounts for the commonly trailing bytes through the "size" and
> "data" members. As many dummy struct net_device instances exist still,
> it is non-trivial to but this flexible array inside struct net_device
> itself. But we can add a sanity check in netdev_priv() to catch any
> attempts to access the private data of a dummy struct.
> 
> Adjust allocation logic to use the new full structure.
> 
> Signed-off-by: Kees Cook <keescook@chromium.org>

Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
[for the flex `struct net_device_priv`, `struct_size()`, `__counted_by()`,
and the use of `container_of()` to retrieve a pointer to the flex struct
and return pointer to flex-array member `data` in `netdev_priv()`]

Thanks
--
Gustavo

> ---
> Cc: Jakub Kicinski <kuba@kernel.org>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Eric Dumazet <edumazet@google.com>
> Cc: Paolo Abeni <pabeni@redhat.com>
> Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> Cc: "Gustavo A. R. Silva" <gustavoars@kernel.org>
> Cc: netdev@vger.kernel.org
> Cc: linux-hardening@vger.kernel.org
> ---
>   include/linux/netdevice.h | 21 ++++++++++++++++++---
>   net/core/dev.c            | 12 ++++--------
>   2 files changed, 22 insertions(+), 11 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 118c40258d07..b476809d0bae 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1815,6 +1815,8 @@ enum netdev_stat_type {
>   	NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
>   };
>   
> +#define	NETDEV_ALIGN		32
> +
>   /**
>    *	struct net_device - The DEVICE structure.
>    *
> @@ -2476,6 +2478,14 @@ struct net_device {
>   	struct hlist_head	page_pools;
>   #endif
>   };
> +
> +struct net_device_priv {
> +	struct net_device	dev;
> +	u32			size;
> +	u8			data[] __counted_by(size)
> +				       __aligned(NETDEV_ALIGN);
> +};
> +
>   #define to_net_dev(d) container_of(d, struct net_device, dev)
>   
>   /*
> @@ -2496,8 +2506,6 @@ static inline bool netif_elide_gro(const struct net_device *dev)
>   	return false;
>   }
>   
> -#define	NETDEV_ALIGN		32
> -
>   static inline
>   int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio)
>   {
> @@ -2665,7 +2673,14 @@ void dev_net_set(struct net_device *dev, struct net *net)
>    */
>   static inline void *netdev_priv(const struct net_device *dev)
>   {
> -	return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN);
> +	struct net_device_priv *priv;
> +
> +	/* Dummy struct net_device have no trailing data. */
> +	if (WARN_ON_ONCE(dev->reg_state == NETREG_DUMMY))
> +		return NULL;
> +
> +	priv = container_of(dev, struct net_device_priv, dev);
> +	return (u8 *)priv->data;
>   }
>   
>   /* Set the sysfs physical device reference for the network logical device
> diff --git a/net/core/dev.c b/net/core/dev.c
> index cb2dab0feee0..0fcaf6ae8486 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -10800,7 +10800,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
>   {
>   	struct net_device *dev;
>   	unsigned int alloc_size;
> -	struct net_device *p;
> +	struct net_device_priv *p;
>   
>   	BUG_ON(strlen(name) >= sizeof(dev->name));
>   
> @@ -10814,20 +10814,16 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
>   		return NULL;
>   	}
>   
> -	alloc_size = sizeof(struct net_device);
> -	if (sizeof_priv) {
> -		/* ensure 32-byte alignment of private area */
> -		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
> -		alloc_size += sizeof_priv;
> -	}
> +	alloc_size = struct_size(p, data, sizeof_priv);
>   	/* ensure 32-byte alignment of whole construct */
>   	alloc_size += NETDEV_ALIGN - 1;
>   
>   	p = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
>   	if (!p)
>   		return NULL;
> +	p->size = sizeof_priv;
>   
> -	dev = PTR_ALIGN(p, NETDEV_ALIGN);
> +	dev = &PTR_ALIGN(p, NETDEV_ALIGN)->dev;
>   	dev->padded = (char *)dev - (char *)p;
>   
>   	ref_tracker_dir_init(&dev->refcnt_tracker, 128, name);
Jakub Kicinski March 1, 2024, 6:59 a.m. UTC | #2
On Thu, 29 Feb 2024 13:30:22 -0800 Kees Cook wrote:
> Introduce a new struct net_device_priv that contains struct net_device
> but also accounts for the commonly trailing bytes through the "size" and
> "data" members.

I'm a bit unclear on the benefit. Perhaps I'm unaccustomed to "safe C".

> As many dummy struct net_device instances exist still,
> it is non-trivial to but this flexible array inside struct net_device

put

Non-trivial, meaning what's the challenge?
We also do somewhat silly things with netdev lifetime, because we can't
assume netdev gets freed by netdev_free(). Cleaning up the "embedders"
would be beneficial for multiple reasons.

> itself. But we can add a sanity check in netdev_priv() to catch any
> attempts to access the private data of a dummy struct.
> 
> Adjust allocation logic to use the new full structure.
> 
> Signed-off-by: Kees Cook <keescook@chromium.org>

> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 118c40258d07..b476809d0bae 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1815,6 +1815,8 @@ enum netdev_stat_type {
>  	NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
>  };
>  
> +#define	NETDEV_ALIGN		32

Unless someone knows what this is for it should go.
Align priv to cacheline size.

>  /**
>   *	struct net_device - The DEVICE structure.
>   *

> @@ -2665,7 +2673,14 @@ void dev_net_set(struct net_device *dev, struct net *net)
>   */
>  static inline void *netdev_priv(const struct net_device *dev)
>  {
> -	return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN);
> +	struct net_device_priv *priv;
> +
> +	/* Dummy struct net_device have no trailing data. */
> +	if (WARN_ON_ONCE(dev->reg_state == NETREG_DUMMY))
> +		return NULL;

This is a static inline with roughly 11,000 call sites, according to 
a quick grep. Aren't WARN_ONCE() in static inlines creating a "once"
object in every compilation unit where they get used?

> +	priv = container_of(dev, struct net_device_priv, dev);
> +	return (u8 *)priv->data;
>  }
Eric Dumazet March 1, 2024, 8:03 a.m. UTC | #3
On Fri, Mar 1, 2024 at 7:59 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Thu, 29 Feb 2024 13:30:22 -0800 Kees Cook wrote:

> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> > index 118c40258d07..b476809d0bae 100644
> > --- a/include/linux/netdevice.h
> > +++ b/include/linux/netdevice.h
> > @@ -1815,6 +1815,8 @@ enum netdev_stat_type {
> >       NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
> >  };
> >
> > +#define      NETDEV_ALIGN            32
>
> Unless someone knows what this is for it should go.
> Align priv to cacheline size.

+2

#define NETDEV_ALIGN    L1_CACHE_BYTES

or a general replacement of NETDEV_ALIGN....
Greg Kroah-Hartman March 1, 2024, 11:41 a.m. UTC | #4
On Thu, Feb 29, 2024 at 10:59:10PM -0800, Jakub Kicinski wrote:
> On Thu, 29 Feb 2024 13:30:22 -0800 Kees Cook wrote:
> > Introduce a new struct net_device_priv that contains struct net_device
> > but also accounts for the commonly trailing bytes through the "size" and
> > "data" members.
> 
> I'm a bit unclear on the benefit. Perhaps I'm unaccustomed to "safe C".
> 
> > As many dummy struct net_device instances exist still,
> > it is non-trivial to but this flexible array inside struct net_device
> 
> put
> 
> Non-trivial, meaning what's the challenge?
> We also do somewhat silly things with netdev lifetime, because we can't
> assume netdev gets freed by netdev_free(). Cleaning up the "embedders"
> would be beneficial for multiple reasons.
> 
> > itself. But we can add a sanity check in netdev_priv() to catch any
> > attempts to access the private data of a dummy struct.
> > 
> > Adjust allocation logic to use the new full structure.
> > 
> > Signed-off-by: Kees Cook <keescook@chromium.org>
> 
> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> > index 118c40258d07..b476809d0bae 100644
> > --- a/include/linux/netdevice.h
> > +++ b/include/linux/netdevice.h
> > @@ -1815,6 +1815,8 @@ enum netdev_stat_type {
> >  	NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
> >  };
> >  
> > +#define	NETDEV_ALIGN		32
> 
> Unless someone knows what this is for it should go.
> Align priv to cacheline size.
> 
> >  /**
> >   *	struct net_device - The DEVICE structure.
> >   *
> 
> > @@ -2665,7 +2673,14 @@ void dev_net_set(struct net_device *dev, struct net *net)
> >   */
> >  static inline void *netdev_priv(const struct net_device *dev)
> >  {
> > -	return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN);
> > +	struct net_device_priv *priv;
> > +
> > +	/* Dummy struct net_device have no trailing data. */
> > +	if (WARN_ON_ONCE(dev->reg_state == NETREG_DUMMY))
> > +		return NULL;
> 
> This is a static inline with roughly 11,000 call sites, according to 
> a quick grep. Aren't WARN_ONCE() in static inlines creating a "once"
> object in every compilation unit where they get used?

It also, if this every trips, will reboot the box for those that run
with panic-on-warn set, is that something that you all really want?

thanks,

greg k-h
Alexander Lobakin March 1, 2024, 12:58 p.m. UTC | #5
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 1 Mar 2024 09:03:55 +0100

> On Fri, Mar 1, 2024 at 7:59 AM Jakub Kicinski <kuba@kernel.org> wrote:
>>
>> On Thu, 29 Feb 2024 13:30:22 -0800 Kees Cook wrote:

Re WARN_ONCE() in netdev_priv(): netdev_priv() is VERY hot, I'm not sure
we want to add checks there. Maybe under CONFIG_DEBUG_NET?

> 
>>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>>> index 118c40258d07..b476809d0bae 100644
>>> --- a/include/linux/netdevice.h
>>> +++ b/include/linux/netdevice.h
>>> @@ -1815,6 +1815,8 @@ enum netdev_stat_type {
>>>       NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
>>>  };
>>>
>>> +#define      NETDEV_ALIGN            32
>>
>> Unless someone knows what this is for it should go.
>> Align priv to cacheline size.
> 
> +2
> 

Maybe

> #define NETDEV_ALIGN    L1_CACHE_BYTES

#define NETDEV_ALIGN	max(SMP_CACHE_BYTES, 32)

?

(or even max(1 << INTERNODE_CACHE_SHIFT, 32))

> 
> or a general replacement of NETDEV_ALIGN....
> 
> 

+ I'd align both struct net_device AND its private space to
%NETDEV_ALIGN and remove this weird PTR_ALIGN. {k,v}malloc ensures
natural alignment of allocations for at least a couple years already
(IOW if struct net_device is aligned to 64, {k,v}malloc will *always*
return a 64-byte aligned address).

Thanks,
Olek
Eric Dumazet March 1, 2024, 1:25 p.m. UTC | #6
On Fri, Mar 1, 2024 at 1:59 PM Alexander Lobakin
<aleksander.lobakin@intel.com> wrote:
>
> From: Eric Dumazet <edumazet@google.com>
> Date: Fri, 1 Mar 2024 09:03:55 +0100
>
> > On Fri, Mar 1, 2024 at 7:59 AM Jakub Kicinski <kuba@kernel.org> wrote:
> >>
> >> On Thu, 29 Feb 2024 13:30:22 -0800 Kees Cook wrote:
>
> Re WARN_ONCE() in netdev_priv(): netdev_priv() is VERY hot, I'm not sure
> we want to add checks there. Maybe under CONFIG_DEBUG_NET?
>
> >
> >>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> >>> index 118c40258d07..b476809d0bae 100644
> >>> --- a/include/linux/netdevice.h
> >>> +++ b/include/linux/netdevice.h
> >>> @@ -1815,6 +1815,8 @@ enum netdev_stat_type {
> >>>       NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
> >>>  };
> >>>
> >>> +#define      NETDEV_ALIGN            32
> >>
> >> Unless someone knows what this is for it should go.
> >> Align priv to cacheline size.
> >
> > +2
> >
>
> Maybe
>
> > #define NETDEV_ALIGN    L1_CACHE_BYTES
>
> #define NETDEV_ALIGN    max(SMP_CACHE_BYTES, 32)

Why would we care if some arches have a very small SMP_CACHE_BYTES ?
Bet it !

IMO nothing in networking mandates this minimal 32 byte alignment.

>
> ?
>
> (or even max(1 << INTERNODE_CACHE_SHIFT, 32))

I do not think so.

INTERNODE_CACHE_SHIFT is a bit extreme on allyesconfig on x86 :/
(with CONFIG_X86_VSMP=y)


>
> >
> > or a general replacement of NETDEV_ALIGN....
> >
> >
>
> + I'd align both struct net_device AND its private space to
> %NETDEV_ALIGN and remove this weird PTR_ALIGN. {k,v}malloc ensures
> natural alignment of allocations for at least a couple years already
> (IOW if struct net_device is aligned to 64, {k,v}malloc will *always*
> return a 64-byte aligned address).

I think that with SLAB or SLOB in the past with some DEBUG options
there was no such guarantee.

But this is probably no longer the case, and heavy DEBUG options these
days (KASAN, KFENCE...)
do not expect fast networking anyway.
Alexander Lobakin March 1, 2024, 2:30 p.m. UTC | #7
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 1 Mar 2024 14:25:37 +0100

> On Fri, Mar 1, 2024 at 1:59 PM Alexander Lobakin
> <aleksander.lobakin@intel.com> wrote:
>>
>> From: Eric Dumazet <edumazet@google.com>
>> Date: Fri, 1 Mar 2024 09:03:55 +0100
>>
>>> On Fri, Mar 1, 2024 at 7:59 AM Jakub Kicinski <kuba@kernel.org> wrote:
>>>>
>>>> On Thu, 29 Feb 2024 13:30:22 -0800 Kees Cook wrote:
>>
>> Re WARN_ONCE() in netdev_priv(): netdev_priv() is VERY hot, I'm not sure
>> we want to add checks there. Maybe under CONFIG_DEBUG_NET?
>>
>>>
>>>>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>>>>> index 118c40258d07..b476809d0bae 100644
>>>>> --- a/include/linux/netdevice.h
>>>>> +++ b/include/linux/netdevice.h
>>>>> @@ -1815,6 +1815,8 @@ enum netdev_stat_type {
>>>>>       NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
>>>>>  };
>>>>>
>>>>> +#define      NETDEV_ALIGN            32
>>>>
>>>> Unless someone knows what this is for it should go.
>>>> Align priv to cacheline size.
>>>
>>> +2
>>>
>>
>> Maybe
>>
>>> #define NETDEV_ALIGN    L1_CACHE_BYTES
>>
>> #define NETDEV_ALIGN    max(SMP_CACHE_BYTES, 32)
> 
> Why would we care if some arches have a very small SMP_CACHE_BYTES ?

Oh sorry, I thought %SMP_CACHE_BYTES is 1 when !SMP.
We can then just add ____cacheline_aligned to both struct net_device and
its ::priv flex array and that's it.

I like the idea of declaring priv explicitly rather than doing size +
ptr magic. But maybe we could just add this flex array to struct
net_device and avoid introducing a new structure.

> Bet it !
> 
> IMO nothing in networking mandates this minimal 32 byte alignment.
> 
>>
>> ?
>>
>> (or even max(1 << INTERNODE_CACHE_SHIFT, 32))
> 
> I do not think so.
> 
> INTERNODE_CACHE_SHIFT is a bit extreme on allyesconfig on x86 :/
> (with CONFIG_X86_VSMP=y)
> 
> 
>>
>>>
>>> or a general replacement of NETDEV_ALIGN....
>>>
>>>
>>
>> + I'd align both struct net_device AND its private space to
>> %NETDEV_ALIGN and remove this weird PTR_ALIGN. {k,v}malloc ensures
>> natural alignment of allocations for at least a couple years already
>> (IOW if struct net_device is aligned to 64, {k,v}malloc will *always*
>> return a 64-byte aligned address).
> 
> I think that with SLAB or SLOB in the past with some DEBUG options
> there was no such guarantee.
> 
> But this is probably no longer the case, and heavy DEBUG options these
> days (KASAN, KFENCE...)
> do not expect fast networking anyway.

Thanks,
Olek
Jakub Kicinski March 1, 2024, 5:35 p.m. UTC | #8
On Fri, 1 Mar 2024 15:30:03 +0100 Alexander Lobakin wrote:
> I like the idea of declaring priv explicitly rather than doing size +
> ptr magic. But maybe we could just add this flex array to struct
> net_device and avoid introducing a new structure.

100% I should have linked to the thread that led to Kees's work.
Adding directly to net_device would be way better but there's
a handful of drivers which embed the struct.
If we can switch them to dynamic allocation, that'd be great.
And, as you may be alluding to, it removes the need for the WARN_ON()
entirely as well.
Alexander Lobakin March 4, 2024, 2:32 p.m. UTC | #9
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 1 Mar 2024 09:35:17 -0800

> On Fri, 1 Mar 2024 15:30:03 +0100 Alexander Lobakin wrote:
>> I like the idea of declaring priv explicitly rather than doing size +
>> ptr magic. But maybe we could just add this flex array to struct
>> net_device and avoid introducing a new structure.
> 
> 100% I should have linked to the thread that led to Kees's work.
> Adding directly to net_device would be way better but there's
> a handful of drivers which embed the struct.

I think it's okay to embed a struct with flex array at the end as long
as it's not used? Or the compiler will say that the flex array is not at
the end of the structure?

> If we can switch them to dynamic allocation, that'd be great.

It's mega weird to embed &net_device rather than do alloc_*dev() >_<

> And, as you may be alluding to, it removes the need for the WARN_ON()
> entirely as well.

Thanks,
Olek
Jakub Kicinski March 4, 2024, 3:24 p.m. UTC | #10
On Mon, 4 Mar 2024 15:32:51 +0100 Alexander Lobakin wrote:
> > 100% I should have linked to the thread that led to Kees's work.
> > Adding directly to net_device would be way better but there's
> > a handful of drivers which embed the struct.  
> 
> I think it's okay to embed a struct with flex array at the end as long
> as it's not used? Or the compiler will say that the flex array is not at
> the end of the structure?

Technically, yes. Practically it ties the lifetime of a refcounted
object to something semi-related with different lifetime rules :(
Breno Leitao March 6, 2024, 1:16 p.m. UTC | #11
On Thu, Feb 29, 2024 at 10:59:10PM -0800, Jakub Kicinski wrote:
> On Thu, 29 Feb 2024 13:30:22 -0800 Kees Cook wrote:
> > Introduce a new struct net_device_priv that contains struct net_device
> > but also accounts for the commonly trailing bytes through the "size" and
> > "data" members.
> 
> I'm a bit unclear on the benefit. Perhaps I'm unaccustomed to "safe C".
> 
> > As many dummy struct net_device instances exist still,
> > it is non-trivial to but this flexible array inside struct net_device
> 
> put
> 
> Non-trivial, meaning what's the challenge?
> We also do somewhat silly things with netdev lifetime, because we can't
> assume netdev gets freed by netdev_free(). Cleaning up the "embedders"
> would be beneficial for multiple reasons.

I've been looking at some of these embedders as reported by Kees[1], and
most of them are for dummy interfaces. I.e, they are basically used for
schedule NAPI poll.

From that list[1], most of the driver matches with:

	# git grep init_dummy_netdev

That said, do you think it is still worth cleaning up embedders for
dummy net_devices?

[1] https://lore.kernel.org/all/202402281554.C1CEEF744@keescook/
Jakub Kicinski March 6, 2024, 3:06 p.m. UTC | #12
On Wed, 6 Mar 2024 05:16:16 -0800 Breno Leitao wrote:
> I've been looking at some of these embedders as reported by Kees[1], and
> most of them are for dummy interfaces. I.e, they are basically used for
> schedule NAPI poll.
> 
> From that list[1], most of the driver matches with:
> 
> 	# git grep init_dummy_netdev
> 
> That said, do you think it is still worth cleaning up embedders for
> dummy net_devices?
> 
> [1] https://lore.kernel.org/all/202402281554.C1CEEF744@keescook/

Yes, I think so.
Kees, did you plan to send a v2? Otherwise I can put the cleanup on our
"public ToDo" list :)
Kees Cook March 6, 2024, 11:42 p.m. UTC | #13
On Wed, Mar 06, 2024 at 07:06:58AM -0800, Jakub Kicinski wrote:
> On Wed, 6 Mar 2024 05:16:16 -0800 Breno Leitao wrote:
> > I've been looking at some of these embedders as reported by Kees[1], and
> > most of them are for dummy interfaces. I.e, they are basically used for
> > schedule NAPI poll.
> > 
> > From that list[1], most of the driver matches with:
> > 
> > 	# git grep init_dummy_netdev
> > 
> > That said, do you think it is still worth cleaning up embedders for
> > dummy net_devices?
> > 
> > [1] https://lore.kernel.org/all/202402281554.C1CEEF744@keescook/
> 
> Yes, I think so.
> Kees, did you plan to send a v2? Otherwise I can put the cleanup on our
> "public ToDo" list :)

I found the requested collateral changes that popped out of v1 to be
rather a bit much for me to tackle right now, so I think adding to the
TODO list is probably best. :)

-Kees
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 118c40258d07..b476809d0bae 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1815,6 +1815,8 @@  enum netdev_stat_type {
 	NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
 };
 
+#define	NETDEV_ALIGN		32
+
 /**
  *	struct net_device - The DEVICE structure.
  *
@@ -2476,6 +2478,14 @@  struct net_device {
 	struct hlist_head	page_pools;
 #endif
 };
+
+struct net_device_priv {
+	struct net_device	dev;
+	u32			size;
+	u8			data[] __counted_by(size)
+				       __aligned(NETDEV_ALIGN);
+};
+
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
 /*
@@ -2496,8 +2506,6 @@  static inline bool netif_elide_gro(const struct net_device *dev)
 	return false;
 }
 
-#define	NETDEV_ALIGN		32
-
 static inline
 int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio)
 {
@@ -2665,7 +2673,14 @@  void dev_net_set(struct net_device *dev, struct net *net)
  */
 static inline void *netdev_priv(const struct net_device *dev)
 {
-	return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN);
+	struct net_device_priv *priv;
+
+	/* Dummy struct net_device have no trailing data. */
+	if (WARN_ON_ONCE(dev->reg_state == NETREG_DUMMY))
+		return NULL;
+
+	priv = container_of(dev, struct net_device_priv, dev);
+	return (u8 *)priv->data;
 }
 
 /* Set the sysfs physical device reference for the network logical device
diff --git a/net/core/dev.c b/net/core/dev.c
index cb2dab0feee0..0fcaf6ae8486 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10800,7 +10800,7 @@  struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 {
 	struct net_device *dev;
 	unsigned int alloc_size;
-	struct net_device *p;
+	struct net_device_priv *p;
 
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 
@@ -10814,20 +10814,16 @@  struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 		return NULL;
 	}
 
-	alloc_size = sizeof(struct net_device);
-	if (sizeof_priv) {
-		/* ensure 32-byte alignment of private area */
-		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
-		alloc_size += sizeof_priv;
-	}
+	alloc_size = struct_size(p, data, sizeof_priv);
 	/* ensure 32-byte alignment of whole construct */
 	alloc_size += NETDEV_ALIGN - 1;
 
 	p = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
 	if (!p)
 		return NULL;
+	p->size = sizeof_priv;
 
-	dev = PTR_ALIGN(p, NETDEV_ALIGN);
+	dev = &PTR_ALIGN(p, NETDEV_ALIGN)->dev;
 	dev->padded = (char *)dev - (char *)p;
 
 	ref_tracker_dir_init(&dev->refcnt_tracker, 128, name);