diff mbox series

[net-next] netlink: add igmp join/leave notifications

Message ID 20241110081953.121682-1-yuyanghuang@google.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [net-next] netlink: add igmp join/leave notifications | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 18 this patch: 18
netdev/build_tools success Errors and warnings before: 0 (+0) this patch: 0 (+0)
netdev/cc_maintainers success CCed 6 of 6 maintainers
netdev/build_clang success Errors and warnings before: 46 this patch: 46
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 3527 this patch: 3527
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 100 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 1 this patch: 1
netdev/source_inline fail Was 0 now: 1
netdev/contest success net-next-2024-11-11--21-00 (tests: 787)

Commit Message

Yuyang Huang Nov. 10, 2024, 8:19 a.m. UTC
This change introduces netlink notifications for multicast address
changes, enabling components like the Android Packet Filter to implement
IGMP offload solutions.

The following features are included:
* Addition and deletion of multicast addresses are reported using
  RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET.
* A new notification group, RTNLGRP_IPV4_MCADDR, is introduced for
  receiving these events.

This enhancement allows user-space components to efficiently track
multicast group memberships and program hardware offload filters
accordingly.

Cc: Maciej Żenczykowski <maze@google.com>
Cc: Lorenzo Colitti <lorenzo@google.com>
Co-developed-by: Patrick Ruddy <pruddy@vyatta.att-mail.com>
Signed-off-by: Patrick Ruddy <pruddy@vyatta.att-mail.com>
Link: https://lore.kernel.org/r/20180906091056.21109-1-pruddy@vyatta.att-mail.com
Signed-off-by: Yuyang Huang <yuyanghuang@google.com>
---
 include/uapi/linux/rtnetlink.h |  6 ++++
 net/ipv4/igmp.c                | 58 ++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)

Comments

Hangbin Liu Nov. 12, 2024, 9:54 a.m. UTC | #1
Hi Yuyang,

Would you mind also update iproute2 for testing?
On Sun, Nov 10, 2024 at 05:19:53PM +0900, Yuyang Huang wrote:
> This change introduces netlink notifications for multicast address
> changes, enabling components like the Android Packet Filter to implement
> IGMP offload solutions.
> 
> The following features are included:
> * Addition and deletion of multicast addresses are reported using
>   RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET.
> * A new notification group, RTNLGRP_IPV4_MCADDR, is introduced for
>   receiving these events.
> 
> This enhancement allows user-space components to efficiently track
> multicast group memberships and program hardware offload filters
> accordingly.
> 
> Cc: Maciej Żenczykowski <maze@google.com>
> Cc: Lorenzo Colitti <lorenzo@google.com>
> Co-developed-by: Patrick Ruddy <pruddy@vyatta.att-mail.com>
> Signed-off-by: Patrick Ruddy <pruddy@vyatta.att-mail.com>
> Link: https://lore.kernel.org/r/20180906091056.21109-1-pruddy@vyatta.att-mail.com
> Signed-off-by: Yuyang Huang <yuyanghuang@google.com>
> ---
>  include/uapi/linux/rtnetlink.h |  6 ++++
>  net/ipv4/igmp.c                | 58 ++++++++++++++++++++++++++++++++++
>  2 files changed, 64 insertions(+)
> 
> diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
> index 3b687d20c9ed..354a923f129d 100644
> --- a/include/uapi/linux/rtnetlink.h
> +++ b/include/uapi/linux/rtnetlink.h
> @@ -93,6 +93,10 @@ enum {
>  	RTM_NEWPREFIX	= 52,
>  #define RTM_NEWPREFIX	RTM_NEWPREFIX
>  
> +	RTM_NEWMULTICAST,
> +#define RTM_NEWMULTICAST RTM_NEWMULTICAST
> +	RTM_DELMULTICAST,
> +#define RTM_DELMULTICAST RTM_DELMULTICAST
>  	RTM_GETMULTICAST = 58,
>  #define RTM_GETMULTICAST RTM_GETMULTICAST
>  
> @@ -774,6 +778,8 @@ enum rtnetlink_groups {
>  #define RTNLGRP_TUNNEL		RTNLGRP_TUNNEL
>  	RTNLGRP_STATS,
>  #define RTNLGRP_STATS		RTNLGRP_STATS
> +	RTNLGRP_IPV4_MCADDR,
> +#define RTNLGRP_IPV4_MCADDR	RTNLGRP_IPV4_MCADDR
>  	__RTNLGRP_MAX
>  };
>  #define RTNLGRP_MAX	(__RTNLGRP_MAX - 1)
> diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
> index 9bf09de6a2e7..34575f5392a8 100644
> --- a/net/ipv4/igmp.c
> +++ b/net/ipv4/igmp.c
> @@ -88,6 +88,7 @@
>  #include <linux/byteorder/generic.h>
>  
>  #include <net/net_namespace.h>
> +#include <net/netlink.h>
>  #include <net/arp.h>
>  #include <net/ip.h>
>  #include <net/protocol.h>
> @@ -1430,6 +1431,60 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
>  	*mc_hash = im->next_hash;
>  }
>  
> +static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev,
> +			      __be32 addr, int event)
> +{
> +	struct nlmsghdr *nlh;
> +	struct ifaddrmsg *ifm;
> +
> +	nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0);
> +	if (!nlh)
> +		return -EMSGSIZE;
> +
> +	ifm = nlmsg_data(nlh);
> +	ifm->ifa_family = AF_INET;
> +	ifm->ifa_prefixlen = 32;
> +	ifm->ifa_flags = IFA_F_PERMANENT;
> +	ifm->ifa_scope = RT_SCOPE_LINK;
> +	ifm->ifa_index = dev->ifindex;
> +
> +	if (nla_put_in_addr(skb, IFA_MULTICAST, addr) < 0) {
> +		nlmsg_cancel(skb, nlh);
> +		return -EMSGSIZE;
> +	}
> +
> +	nlmsg_end(skb, nlh);
> +	return 0;
> +}
> +
> +static inline int inet_ifmcaddr_msgsize(void)
> +{
> +	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
> +			+ nla_total_size(sizeof(__be32));
> +}
> +
> +static void inet_ifmcaddr_notify(struct net_device *dev, __be32 addr, int event)
> +{
> +	struct net *net = dev_net(dev);
> +	struct sk_buff *skb;
> +	int err = -ENOBUFS;
> +
> +	skb = nlmsg_new(inet_ifmcaddr_msgsize(), GFP_ATOMIC);
> +	if (!skb)
> +		goto error;
> +
> +	err = inet_fill_ifmcaddr(skb, dev, addr, event);
> +	if (err < 0) {
> +		WARN_ON(err == -EMSGSIZE);
> +		kfree_skb(skb);
> +		goto error;
> +	}
> +
> +	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MCADDR, NULL, GFP_ATOMIC);
> +	return;
> +error:
> +	rtnl_set_sk_err(net, RTNLGRP_IPV4_MCADDR, err);
> +}
>  
>  /*
>   *	A socket has joined a multicast group on device dev.
> @@ -1476,6 +1531,7 @@ static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
>  	igmpv3_del_delrec(in_dev, im);
>  #endif
>  	igmp_group_added(im);
> +	inet_ifmcaddr_notify(in_dev->dev, addr, RTM_NEWMULTICAST);
>  	if (!in_dev->dead)
>  		ip_rt_multicast_event(in_dev);
>  out:
> @@ -1689,6 +1745,8 @@ void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp)
>  				*ip = i->next_rcu;
>  				in_dev->mc_count--;
>  				__igmp_group_dropped(i, gfp);
> +				inet_ifmcaddr_notify(in_dev->dev, addr,
> +						     RTM_DELMULTICAST);
>  				ip_mc_clear_src(i);
>  
>  				if (!in_dev->dead)
> -- 
> 2.47.0.277.g8800431eea-goog
>
Yuyang Huang Nov. 12, 2024, 10:10 a.m. UTC | #2
> Would you mind also update iproute2 for testing?

Sure, besides updating the ip monitor command, are there any other
places that need to be updated?

Thanks,
Yuyang

On Tue, Nov 12, 2024 at 6:54 PM Hangbin Liu <liuhangbin@gmail.com> wrote:
>
>
> Hi Yuyang,
>
> Would you mind also update iproute2 for testing?
> On Sun, Nov 10, 2024 at 05:19:53PM +0900, Yuyang Huang wrote:
> > This change introduces netlink notifications for multicast address
> > changes, enabling components like the Android Packet Filter to implement
> > IGMP offload solutions.
> >
> > The following features are included:
> > * Addition and deletion of multicast addresses are reported using
> >   RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET.
> > * A new notification group, RTNLGRP_IPV4_MCADDR, is introduced for
> >   receiving these events.
> >
> > This enhancement allows user-space components to efficiently track
> > multicast group memberships and program hardware offload filters
> > accordingly.
> >
> > Cc: Maciej Żenczykowski <maze@google.com>
> > Cc: Lorenzo Colitti <lorenzo@google.com>
> > Co-developed-by: Patrick Ruddy <pruddy@vyatta.att-mail.com>
> > Signed-off-by: Patrick Ruddy <pruddy@vyatta.att-mail.com>
> > Link: https://lore.kernel.org/r/20180906091056.21109-1-pruddy@vyatta.att-mail.com
> > Signed-off-by: Yuyang Huang <yuyanghuang@google.com>
> > ---
> >  include/uapi/linux/rtnetlink.h |  6 ++++
> >  net/ipv4/igmp.c                | 58 ++++++++++++++++++++++++++++++++++
> >  2 files changed, 64 insertions(+)
> >
> > diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
> > index 3b687d20c9ed..354a923f129d 100644
> > --- a/include/uapi/linux/rtnetlink.h
> > +++ b/include/uapi/linux/rtnetlink.h
> > @@ -93,6 +93,10 @@ enum {
> >       RTM_NEWPREFIX   = 52,
> >  #define RTM_NEWPREFIX        RTM_NEWPREFIX
> >
> > +     RTM_NEWMULTICAST,
> > +#define RTM_NEWMULTICAST RTM_NEWMULTICAST
> > +     RTM_DELMULTICAST,
> > +#define RTM_DELMULTICAST RTM_DELMULTICAST
> >       RTM_GETMULTICAST = 58,
> >  #define RTM_GETMULTICAST RTM_GETMULTICAST
> >
> > @@ -774,6 +778,8 @@ enum rtnetlink_groups {
> >  #define RTNLGRP_TUNNEL               RTNLGRP_TUNNEL
> >       RTNLGRP_STATS,
> >  #define RTNLGRP_STATS                RTNLGRP_STATS
> > +     RTNLGRP_IPV4_MCADDR,
> > +#define RTNLGRP_IPV4_MCADDR  RTNLGRP_IPV4_MCADDR
> >       __RTNLGRP_MAX
> >  };
> >  #define RTNLGRP_MAX  (__RTNLGRP_MAX - 1)
> > diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
> > index 9bf09de6a2e7..34575f5392a8 100644
> > --- a/net/ipv4/igmp.c
> > +++ b/net/ipv4/igmp.c
> > @@ -88,6 +88,7 @@
> >  #include <linux/byteorder/generic.h>
> >
> >  #include <net/net_namespace.h>
> > +#include <net/netlink.h>
> >  #include <net/arp.h>
> >  #include <net/ip.h>
> >  #include <net/protocol.h>
> > @@ -1430,6 +1431,60 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
> >       *mc_hash = im->next_hash;
> >  }
> >
> > +static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev,
> > +                           __be32 addr, int event)
> > +{
> > +     struct nlmsghdr *nlh;
> > +     struct ifaddrmsg *ifm;
> > +
> > +     nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0);
> > +     if (!nlh)
> > +             return -EMSGSIZE;
> > +
> > +     ifm = nlmsg_data(nlh);
> > +     ifm->ifa_family = AF_INET;
> > +     ifm->ifa_prefixlen = 32;
> > +     ifm->ifa_flags = IFA_F_PERMANENT;
> > +     ifm->ifa_scope = RT_SCOPE_LINK;
> > +     ifm->ifa_index = dev->ifindex;
> > +
> > +     if (nla_put_in_addr(skb, IFA_MULTICAST, addr) < 0) {
> > +             nlmsg_cancel(skb, nlh);
> > +             return -EMSGSIZE;
> > +     }
> > +
> > +     nlmsg_end(skb, nlh);
> > +     return 0;
> > +}
> > +
> > +static inline int inet_ifmcaddr_msgsize(void)
> > +{
> > +     return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
> > +                     + nla_total_size(sizeof(__be32));
> > +}
> > +
> > +static void inet_ifmcaddr_notify(struct net_device *dev, __be32 addr, int event)
> > +{
> > +     struct net *net = dev_net(dev);
> > +     struct sk_buff *skb;
> > +     int err = -ENOBUFS;
> > +
> > +     skb = nlmsg_new(inet_ifmcaddr_msgsize(), GFP_ATOMIC);
> > +     if (!skb)
> > +             goto error;
> > +
> > +     err = inet_fill_ifmcaddr(skb, dev, addr, event);
> > +     if (err < 0) {
> > +             WARN_ON(err == -EMSGSIZE);
> > +             kfree_skb(skb);
> > +             goto error;
> > +     }
> > +
> > +     rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MCADDR, NULL, GFP_ATOMIC);
> > +     return;
> > +error:
> > +     rtnl_set_sk_err(net, RTNLGRP_IPV4_MCADDR, err);
> > +}
> >
> >  /*
> >   *   A socket has joined a multicast group on device dev.
> > @@ -1476,6 +1531,7 @@ static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
> >       igmpv3_del_delrec(in_dev, im);
> >  #endif
> >       igmp_group_added(im);
> > +     inet_ifmcaddr_notify(in_dev->dev, addr, RTM_NEWMULTICAST);
> >       if (!in_dev->dead)
> >               ip_rt_multicast_event(in_dev);
> >  out:
> > @@ -1689,6 +1745,8 @@ void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp)
> >                               *ip = i->next_rcu;
> >                               in_dev->mc_count--;
> >                               __igmp_group_dropped(i, gfp);
> > +                             inet_ifmcaddr_notify(in_dev->dev, addr,
> > +                                                  RTM_DELMULTICAST);
> >                               ip_mc_clear_src(i);
> >
> >                               if (!in_dev->dead)
> > --
> > 2.47.0.277.g8800431eea-goog
> >
Hangbin Liu Nov. 12, 2024, 11:34 a.m. UTC | #3
On Tue, Nov 12, 2024 at 07:10:07PM +0900, Yuyang Huang wrote:
> > Would you mind also update iproute2 for testing?
> 
> Sure, besides updating the ip monitor command, are there any other
> places that need to be updated?

Usually a selftest need to be added for new features. But I don't if this one
needed since the iproute2 hasn't updated.

Thanks
Hangbin
Nicolas Dichtel Nov. 12, 2024, 1:45 p.m. UTC | #4
Le 10/11/2024 à 09:19, Yuyang Huang a écrit :
> This change introduces netlink notifications for multicast address
> changes, enabling components like the Android Packet Filter to implement
> IGMP offload solutions.
> 
> The following features are included:
> * Addition and deletion of multicast addresses are reported using
>   RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET.
> * A new notification group, RTNLGRP_IPV4_MCADDR, is introduced for
>   receiving these events.
> 
> This enhancement allows user-space components to efficiently track
> multicast group memberships and program hardware offload filters
> accordingly.
> 
> Cc: Maciej Żenczykowski <maze@google.com>
> Cc: Lorenzo Colitti <lorenzo@google.com>
> Co-developed-by: Patrick Ruddy <pruddy@vyatta.att-mail.com>
> Signed-off-by: Patrick Ruddy <pruddy@vyatta.att-mail.com>
> Link: https://lore.kernel.org/r/20180906091056.21109-1-pruddy@vyatta.att-mail.com
> Signed-off-by: Yuyang Huang <yuyanghuang@google.com>
> ---
>  include/uapi/linux/rtnetlink.h |  6 ++++
>  net/ipv4/igmp.c                | 58 ++++++++++++++++++++++++++++++++++
>  2 files changed, 64 insertions(+)
> 
> diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
> index 3b687d20c9ed..354a923f129d 100644
> --- a/include/uapi/linux/rtnetlink.h
> +++ b/include/uapi/linux/rtnetlink.h
> @@ -93,6 +93,10 @@ enum {
>  	RTM_NEWPREFIX	= 52,
>  #define RTM_NEWPREFIX	RTM_NEWPREFIX
>  
> +	RTM_NEWMULTICAST,
> +#define RTM_NEWMULTICAST RTM_NEWMULTICAST
> +	RTM_DELMULTICAST,
> +#define RTM_DELMULTICAST RTM_DELMULTICAST
These names are misleading, they are very generic, but in fact, specialized to igmp.

Are there plans to add more notifications later?
What about ipv6?

>  	RTM_GETMULTICAST = 58,>  #define RTM_GETMULTICAST RTM_GETMULTICAST
The RTM_GETMULTICAST works only for with IPv6 and the NEW/DEL will work for IPv4
only.

Regards,
Nicolas

>  
> @@ -774,6 +778,8 @@ enum rtnetlink_groups {
>  #define RTNLGRP_TUNNEL		RTNLGRP_TUNNEL
>  	RTNLGRP_STATS,
>  #define RTNLGRP_STATS		RTNLGRP_STATS
> +	RTNLGRP_IPV4_MCADDR,
> +#define RTNLGRP_IPV4_MCADDR	RTNLGRP_IPV4_MCADDR
>  	__RTNLGRP_MAX
>  };
>  #define RTNLGRP_MAX	(__RTNLGRP_MAX - 1)
Yuyang Huang Nov. 12, 2024, 2:06 p.m. UTC | #5
> These names are misleading, they are very generic, but in fact, specialized to igmp.
> Are there plans to add more notifications later?
> What about ipv6?

I plan to add MLD support as well. The
RTM_NEWMULTICAST/RTM_DELMULTICAST can be used for monitoring both IPv4
and IPv6 multicast addresses. As it was brought up now, I will send a
v2 patch later to include the MLD change.

>The RTM_GETMULTICAST works only for with IPv6 and the NEW/DEL will work for IPv4

I plan to make igmp.c support RTM_GETMULTICAST as well, maybe in a
different patch series. In this case, it can be used to dump both IPv4
and IPv6 multicast addresses. After adding RTM_GETMULTICAST support
for IPv4, I'll migrate 'ip maddr' to use netlink instead of parsing
procfs to dump IPv4/v6 multicast addresses.

Thanks,
Yuyang

On Tue, Nov 12, 2024 at 10:45 PM Nicolas Dichtel
<nicolas.dichtel@6wind.com> wrote:
>
> Le 10/11/2024 à 09:19, Yuyang Huang a écrit :
> > This change introduces netlink notifications for multicast address
> > changes, enabling components like the Android Packet Filter to implement
> > IGMP offload solutions.
> >
> > The following features are included:
> > * Addition and deletion of multicast addresses are reported using
> >   RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET.
> > * A new notification group, RTNLGRP_IPV4_MCADDR, is introduced for
> >   receiving these events.
> >
> > This enhancement allows user-space components to efficiently track
> > multicast group memberships and program hardware offload filters
> > accordingly.
> >
> > Cc: Maciej Żenczykowski <maze@google.com>
> > Cc: Lorenzo Colitti <lorenzo@google.com>
> > Co-developed-by: Patrick Ruddy <pruddy@vyatta.att-mail.com>
> > Signed-off-by: Patrick Ruddy <pruddy@vyatta.att-mail.com>
> > Link: https://lore.kernel.org/r/20180906091056.21109-1-pruddy@vyatta.att-mail.com
> > Signed-off-by: Yuyang Huang <yuyanghuang@google.com>
> > ---
> >  include/uapi/linux/rtnetlink.h |  6 ++++
> >  net/ipv4/igmp.c                | 58 ++++++++++++++++++++++++++++++++++
> >  2 files changed, 64 insertions(+)
> >
> > diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
> > index 3b687d20c9ed..354a923f129d 100644
> > --- a/include/uapi/linux/rtnetlink.h
> > +++ b/include/uapi/linux/rtnetlink.h
> > @@ -93,6 +93,10 @@ enum {
> >       RTM_NEWPREFIX   = 52,
> >  #define RTM_NEWPREFIX        RTM_NEWPREFIX
> >
> > +     RTM_NEWMULTICAST,
> > +#define RTM_NEWMULTICAST RTM_NEWMULTICAST
> > +     RTM_DELMULTICAST,
> > +#define RTM_DELMULTICAST RTM_DELMULTICAST
> These names are misleading, they are very generic, but in fact, specialized to igmp.
>
> Are there plans to add more notifications later?
> What about ipv6?
>
> >       RTM_GETMULTICAST = 58,>  #define RTM_GETMULTICAST RTM_GETMULTICAST
> The RTM_GETMULTICAST works only for with IPv6 and the NEW/DEL will work for IPv4
> only.
>
> Regards,
> Nicolas
>
> >
> > @@ -774,6 +778,8 @@ enum rtnetlink_groups {
> >  #define RTNLGRP_TUNNEL               RTNLGRP_TUNNEL
> >       RTNLGRP_STATS,
> >  #define RTNLGRP_STATS                RTNLGRP_STATS
> > +     RTNLGRP_IPV4_MCADDR,
> > +#define RTNLGRP_IPV4_MCADDR  RTNLGRP_IPV4_MCADDR
> >       __RTNLGRP_MAX
> >  };
> >  #define RTNLGRP_MAX  (__RTNLGRP_MAX - 1)
Andrew Lunn Nov. 12, 2024, 7:34 p.m. UTC | #6
> > This enhancement allows user-space components to efficiently track
> > multicast group memberships and program hardware offload filters
> > accordingly.

Sorry, i missed the original posting.

Please could you say more about programming hardware offload filters?
How is this done?

	Andrew
Yuyang Huang Nov. 13, 2024, 12:56 a.m. UTC | #7
> Please could you say more about programming hardware offload filters?
> How is this done?

Sure,  please let me explain a little bit further on how Android
Packet Filter (APF) works here.

The Android Packet Filter (APF) has two parts:
* APF Interpreter: Runs on the Wi-Fi chipset and executes APF
programs(bytecodes) to decide whether to accept, drop, or reply to
incoming packets.
* APF program generator: Resides in the Android Framework within the
Network Stack process. It creates and updates APF programs based on
network conditions and device state, allowing for dynamic adjustments
to the filtering rules.
APF program generator is part of the Network Stack module, which can
be updated with monthly mainline releases.

Feel free to let me know if the above explanation is still unclear.

I will include a more detailed explanation in the commit message in patch v2.

Thanks,
Yuyang

On Wed, Nov 13, 2024 at 4:34 AM Andrew Lunn <andrew@lunn.ch> wrote:
>
> > > This enhancement allows user-space components to efficiently track
> > > multicast group memberships and program hardware offload filters
> > > accordingly.
>
> Sorry, i missed the original posting.
>
> Please could you say more about programming hardware offload filters?
> How is this done?
>
>         Andrew
Andrew Lunn Nov. 13, 2024, 1:17 a.m. UTC | #8
On Wed, Nov 13, 2024 at 09:56:17AM +0900, Yuyang Huang wrote:
> > Please could you say more about programming hardware offload filters?
> > How is this done?
> 
> Sure,  please let me explain a little bit further on how Android
> Packet Filter (APF) works here.
> 
> The Android Packet Filter (APF) has two parts:
> * APF Interpreter: Runs on the Wi-Fi chipset and executes APF
> programs(bytecodes) to decide whether to accept, drop, or reply to
> incoming packets.

O.K. WiFi is not my area. But i'm more interested in uAPIs, and
ensuring you are not adding APIs which promote kernel bypass.

Is the API to pass this bytestream to the wifi chipset in mainline?
And how does APF differ from BPF? Or P4? Why would we want APF when
mainline has BPF?

Do the new netlink message make sense without APF? Can i write a user
space IGMP snooping implementation and then call bridge mdb
add/del/replace? Assuming i can, why are the WiFi card not using that
API, same a switches do?

	Andrew
Yuyang Huang Nov. 13, 2024, 4:26 a.m. UTC | #9
>O.K. WiFi is not my area. But i'm more interested in uAPIs, and
> ensuring you are not adding APIs which promote kernel bypass.

WiFi chipset vendors must implement the Android WiFi HAL to install
and read the APF program from WiFi firmware. The Android System Server
will talk to vendor HAL service using the WiFi HAL. The datapath is:
Network Stack process -> Android System Server -> vendor HAL service
-> WiFi driver -> WiFi firmware. The Android WiFi HAL is specific to
Android. The vendor HAL service, WiFi driver and WiFi firmware are all
vendor proprietary software.  In other words, those API are not in
mainline yet.

Feel free to referring to the following page for more details:
* APF public doc:
https://source.android.com/docs/core/connect/android-packet-filter
* APF interpreter:
https://cs.android.com/android/platform/superproject/main/+/main:hardware/google/apf/v6/
* APF program generator:
https://cs.android.com/android/platform/superproject/main/+/main:packages/modules/NetworkStack/src/android/net/apf/ApfFilter.java
* APF WiFi HAL:
https://cs.android.com/android/platform/superproject/main/+/main:hardware/interfaces/wifi/aidl/aidl_api/android.hardware.wifi/2/android/hardware/wifi/IWifiStaIface.aidl;l=50?q=installApfPacketFilter

>Is the API to pass this bytestream to the wifi chipset in mainline?

Currently, these APIs are only in Android HAL, which is not in
mainline. We are working on adding proper APF APIs into ethtool.
Please some previous discussion here:
https://lore.kernel.org/netdev/20240813223325.3522113-1-maze@google.com/T/

>And how does APF differ from BPF? Or P4? Why would we want APF when
>mainline has BPF?

Android has utilized APF on Pixel devices since approximately 2016
(potentially as early as Pixel 3). All new devices launching with
Android 14/U+ are required to implement APFv4 on their WiFi client/STA
interface, enforced through Android VTS. APFv6 introduces support for
reply sending (e.g., ARP offload). This version is planned to be a
requirement for new devices launching with Android 16/W (or its
eventual successor), extending to wired interfaces as well.

APF is distinct from eBPF due to the latter's complexity and larger
memory footprint.  Many low-end WiFi chipsets lack sufficient RAM in
firmware to accommodate eBPF. In contrast, the latest APFv6
interpreter requires only ~5kB of compiled code and ~2kB of bytecode
(4kB recommended). Even this minimal size presents integration
challenges for some WiFi chipset vendors.

In sum, eBPF bytecode is very space inefficient compared to APF.

We have collaborated with major WiFi chipset vendors (Broadcom,
Synaptics, Qualcomm, Mediatek etc.) across various device categories
(phones, watches, tablets, TV etc.). To my knowledge, none have
integrated eBPF or P4 into their chipsets. Instead, they have either
implemented vendor-specific offload solutions or adopted APF. One of
APF's goals is to defrag the hardware offloading within the Android
ecosystem.

Moving the APF program generator into kernel space would eliminate the
need for generating the program in userspace. Ideally, future Linux
APIs would directly interact with drivers to manage offloads and APF
loading. The kernel could even be responsible for building the APF
bytecode, resolving race conditions caused by fetching kernel state
for bytecode generation.  A BPF program could potentially build the
APF bytecode during suspend.

However, this is a long-term goal requiring significant design and
discussion with the upstream Linux community. In the short term, APF
program generation will remain in user space.

>Do the new netlink message make sense without APF? Can i write a user
>space IGMP snooping implementation and then call bridge mdb
>add/del/replace?

The RTM_NEWMULTICAST and RTM_DELMULTICAST events introduced in this
patch enable user space implementation of IGMP/MLD offloading and
IPv4/IPv6 multicast filtering. I have limited knowledge on how to
implement IGMP snooping correctly so I don't know if they are
sufficient.

These two events have broader applications beyond APF. Any user space
implementation of IGMP/MLD offloading requires kernel events to signal
when a multicast address is added or removed. In fact, in the original
attempt of this patch from "Patrick Ruddy", the commit message also
mentioned about "having userspace applications to program multicast
MAC filters in hardware". Which makes me believe APF won't be the only
potential use case. Please check the following thread for more
details:  https://lore.kernel.org/r/20180906091056.21109-1-pruddy@vyatta.att-mail.com

>Assuming i can, why are the WiFi card not using that API, same a switches do?

I'm unsure if I understand your suggestion to use
RTM_NEWMDB/RTM_DELMDB/RTNLGRP_MDB. Currently, the kernel doesn't send
these events outside the bridge context.
Previous patches attempted to reuse these events, but reviewers raised
concerns about potential confusion for existing users.  IMHO, I agree
with the reviewer that re-using the RTM_NEWMDB/RTM_DELMDB/RTNLGRP_MDB
might be confusing and might affect the current applications that use
those events. Those events seem to make more sense from a bridge
perspective. Our use cases are targeting the end device which is not a
bridge or router.

It might also make sense to consider whether to accept the proposed
APIs from an API completeness perspective. The current netlink API for
multicast addresses seems incomplete. While RTM_GETMULTICAST exists,
it only supports IPv6, not IPv4. This limitation forces tools like 'ip
maddr' to rely on parsing procfs instead of using netlink.
Additionally, 'ip monitor' cannot track multicast address additions or
removals. I feel it would make sense to have full netlink based
dumping and event notification support for both IPv4/IPv6 multicast
addresses as well.

Thanks,
Yuyang



On Wed, Nov 13, 2024 at 10:17 AM Andrew Lunn <andrew@lunn.ch> wrote:
>
> On Wed, Nov 13, 2024 at 09:56:17AM +0900, Yuyang Huang wrote:
> > > Please could you say more about programming hardware offload filters?
> > > How is this done?
> >
> > Sure,  please let me explain a little bit further on how Android
> > Packet Filter (APF) works here.
> >
> > The Android Packet Filter (APF) has two parts:
> > * APF Interpreter: Runs on the Wi-Fi chipset and executes APF
> > programs(bytecodes) to decide whether to accept, drop, or reply to
> > incoming packets.
>
> O.K. WiFi is not my area. But i'm more interested in uAPIs, and
> ensuring you are not adding APIs which promote kernel bypass.
>
> Is the API to pass this bytestream to the wifi chipset in mainline?
> And how does APF differ from BPF? Or P4? Why would we want APF when
> mainline has BPF?
>
> Do the new netlink message make sense without APF? Can i write a user
> space IGMP snooping implementation and then call bridge mdb
> add/del/replace? Assuming i can, why are the WiFi card not using that
> API, same a switches do?
>
>         Andrew
Yuyang Huang Nov. 14, 2024, 5:58 a.m. UTC | #10
> So that probably leads to a NACK for these patches. If APF is your
> target, and to me it seems unlikely APF will get accepted into
> mainline, there is no need for these netlink changes. Hence a NACK.

> And this is probably your way in. Forget about APF when talking to
> mainline.

Thanks for the advice. I'll remove the APF context from the commit
message, as these patches should benefit mainline regardless of their
APF use case.

> Show these patches are useful in general. Explain the use
> case of IGMP/MLD control in user space using existing APIs to control
> the network stack and hardware.

> Yes, this is going in correct direction to get these patches
> merged. Focus on this. Solve these problems with ip monitor etc.  Once
> merged you can then use it with the out of mainline APF.

I plan to include MLD modifications and rewrite the commit message as
follows in patch v2.  I will also send the ip monitor patches in
parallel.

Please let me know if you have any further suggestions.

```
netlink: add IGMP/MLD join/leave notifications

This change introduces netlink notifications for multicast address
changes. The following features are included:
* Addition and deletion of multicast addresses are reported using
  RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET and
  AF_INET6.
* Two new notification groups: RTNLGRP_IPV4_MCADDR and
  RTNLGRP_IPV6_MCADDR are introduced for receiving these events.

This change allows user space applications (e.g., ip monitor) to
efficiently track multicast group memberships by listening for netlink
events. Previously, applications relied on inefficient polling of
procfs, introducing delays. With netlink notifications, applications
receive realtime updates on multicast group membership changes,
enabling more precise metrics collection and system monitoring.

This change also empowers user space applications to manage multicast
filters and IGMP/MLD offload rules using the same netlink notification
mechanism. This allows applications to dynamically adjust rules and
configurations via generic netlink communication with the Wi-Fi driver,
offering greater flexibility and updatability compared to implementing
all logic within the driver itself. This is a key consideration for some
commercial devices.
```

Thanks,
Yuyang

On Thu, Nov 14, 2024 at 6:06 AM Andrew Lunn <andrew@lunn.ch> wrote:
>
> On Wed, Nov 13, 2024 at 01:26:29PM +0900, Yuyang Huang wrote:
> > >O.K. WiFi is not my area. But i'm more interested in uAPIs, and
> > > ensuring you are not adding APIs which promote kernel bypass.
>
> [Off list]
>
> >
> > WiFi chipset vendors must implement the Android WiFi HAL to install
> > and read the APF program from WiFi firmware. The Android System Server
> > will talk to vendor HAL service using the WiFi HAL. The datapath is:
> > Network Stack process -> Android System Server -> vendor HAL service
> > -> WiFi driver -> WiFi firmware. The Android WiFi HAL is specific to
> > Android. The vendor HAL service, WiFi driver and WiFi firmware are all
> > vendor proprietary software.  In other words, those API are not in
> > mainline yet.
>
> So that probably leads to a NACK for these patches. If APF is your
> target, and to me it seems unlikely APF will get accepted into
> mainline, there is no need for these netlink changes. Hence a NACK.
>
> > >Do the new netlink message make sense without APF? Can i write a user
> > >space IGMP snooping implementation and then call bridge mdb
> > >add/del/replace?
> >
> > The RTM_NEWMULTICAST and RTM_DELMULTICAST events introduced in this
> > patch enable user space implementation of IGMP/MLD offloading and
> > IPv4/IPv6 multicast filtering. I have limited knowledge on how to
> > implement IGMP snooping correctly so I don't know if they are
> > sufficient.
> >
> > These two events have broader applications beyond APF.
>
> And this is probably your way in. Forget about APF when talking to
> mainline. Show these patches are useful in general. Explain the use
> case of IGMP/MLD control in user space using existing APIs to control
> the network stack and hardware.
>
> > It might also make sense to consider whether to accept the proposed
> > APIs from an API completeness perspective. The current netlink API for
> > multicast addresses seems incomplete. While RTM_GETMULTICAST exists,
> > it only supports IPv6, not IPv4. This limitation forces tools like 'ip
> > maddr' to rely on parsing procfs instead of using netlink.
> > Additionally, 'ip monitor' cannot track multicast address additions or
> > removals. I feel it would make sense to have full netlink based
> > dumping and event notification support for both IPv4/IPv6 multicast
> > addresses as well.
>
> Yes, this is going in correct direction to get these patches
> merged. Focus on this. Solve these problems with ip monitor etc.  Once
> merged you can then use it with the out of mainline APF.
>
>         Andrew
Andrew Lunn Nov. 14, 2024, 1:25 p.m. UTC | #11
> This change also empowers user space applications to manage multicast
> filters and IGMP/MLD offload rules using the same netlink notification
> mechanism. This allows applications to dynamically adjust rules and
> configurations via generic netlink communication with the Wi-Fi driver,
> offering greater flexibility and updatability compared to implementing
> all logic within the driver itself. This is a key consideration for some
> commercial devices.

You are still focused or your narrow use case. Why is this only for
WiFi? Why cannot i use it for other systems using IGMP? Switches? VPN
servers? etc. We want a generic solution for all use cases, not just
some obscure niche.

And Linux does not care about commercial devices. And you would never
implement this in the driver itself, that would immediately gets
NACKed because it is not generic, but the concept should be generic.

	Andrew
Yuyang Huang Nov. 14, 2024, 2:19 p.m. UTC | #12
>You are still focused or your narrow use case. Why is this only for
>WiFi? Why cannot i use it for other systems using IGMP? Switches? VPN
>servers? etc. We want a generic solution for all use cases, not just
>some obscure niche.

>And Linux does not care about commercial devices. And you would never
>implement this in the driver itself, that would immediately gets
>NACKed because it is not generic, but the concept should be generic.

Thanks for the further feedback! I've revised the commit message to
discuss more generic use cases without mentioning specific drivers or
devices. I also removed the discussion about commercial devices.
Please take another look and let me know if it needs further
adjustments.


```
netlink: add IGMP/MLD join/leave notifications

This change introduces netlink notifications for multicast address
changes. The following features are included:
* Addition and deletion of multicast addresses are reported using
  RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET and
  AF_INET6.
* Two new notification groups: RTNLGRP_IPV4_MCADDR and
  RTNLGRP_IPV6_MCADDR are introduced for receiving these events.

This change allows user space applications (e.g., ip monitor) to
efficiently track multicast group memberships by listening for netlink
events. Previously, applications relied on inefficient polling of
procfs, introducing delays. With netlink notifications, applications
receive realtime updates on multicast group membership changes,
enabling more precise metrics collection and system monitoring.

This change also unlocks the potential for implementing a wide range
of sophisticated multicast related features in user space by allowing
applications to combine kernel provided multicast address information
with user space data and communicate decisions back to the kernel for
more fine grained control. This mechanism can be used for various
purposes, including multicast filtering, IGMP/MLD offload, and
IGMP/MLD snooping.
```

Thanks,
Yuyang

On Thu, Nov 14, 2024 at 10:26 PM Andrew Lunn <andrew@lunn.ch> wrote:
>
> > This change also empowers user space applications to manage multicast
> > filters and IGMP/MLD offload rules using the same netlink notification
> > mechanism. This allows applications to dynamically adjust rules and
> > configurations via generic netlink communication with the Wi-Fi driver,
> > offering greater flexibility and updatability compared to implementing
> > all logic within the driver itself. This is a key consideration for some
> > commercial devices.
>
> You are still focused or your narrow use case. Why is this only for
> WiFi? Why cannot i use it for other systems using IGMP? Switches? VPN
> servers? etc. We want a generic solution for all use cases, not just
> some obscure niche.
>
> And Linux does not care about commercial devices. And you would never
> implement this in the driver itself, that would immediately gets
> NACKed because it is not generic, but the concept should be generic.
>
>         Andrew
diff mbox series

Patch

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 3b687d20c9ed..354a923f129d 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -93,6 +93,10 @@  enum {
 	RTM_NEWPREFIX	= 52,
 #define RTM_NEWPREFIX	RTM_NEWPREFIX
 
+	RTM_NEWMULTICAST,
+#define RTM_NEWMULTICAST RTM_NEWMULTICAST
+	RTM_DELMULTICAST,
+#define RTM_DELMULTICAST RTM_DELMULTICAST
 	RTM_GETMULTICAST = 58,
 #define RTM_GETMULTICAST RTM_GETMULTICAST
 
@@ -774,6 +778,8 @@  enum rtnetlink_groups {
 #define RTNLGRP_TUNNEL		RTNLGRP_TUNNEL
 	RTNLGRP_STATS,
 #define RTNLGRP_STATS		RTNLGRP_STATS
+	RTNLGRP_IPV4_MCADDR,
+#define RTNLGRP_IPV4_MCADDR	RTNLGRP_IPV4_MCADDR
 	__RTNLGRP_MAX
 };
 #define RTNLGRP_MAX	(__RTNLGRP_MAX - 1)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 9bf09de6a2e7..34575f5392a8 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -88,6 +88,7 @@ 
 #include <linux/byteorder/generic.h>
 
 #include <net/net_namespace.h>
+#include <net/netlink.h>
 #include <net/arp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -1430,6 +1431,60 @@  static void ip_mc_hash_remove(struct in_device *in_dev,
 	*mc_hash = im->next_hash;
 }
 
+static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev,
+			      __be32 addr, int event)
+{
+	struct nlmsghdr *nlh;
+	struct ifaddrmsg *ifm;
+
+	nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	ifm = nlmsg_data(nlh);
+	ifm->ifa_family = AF_INET;
+	ifm->ifa_prefixlen = 32;
+	ifm->ifa_flags = IFA_F_PERMANENT;
+	ifm->ifa_scope = RT_SCOPE_LINK;
+	ifm->ifa_index = dev->ifindex;
+
+	if (nla_put_in_addr(skb, IFA_MULTICAST, addr) < 0) {
+		nlmsg_cancel(skb, nlh);
+		return -EMSGSIZE;
+	}
+
+	nlmsg_end(skb, nlh);
+	return 0;
+}
+
+static inline int inet_ifmcaddr_msgsize(void)
+{
+	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
+			+ nla_total_size(sizeof(__be32));
+}
+
+static void inet_ifmcaddr_notify(struct net_device *dev, __be32 addr, int event)
+{
+	struct net *net = dev_net(dev);
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(inet_ifmcaddr_msgsize(), GFP_ATOMIC);
+	if (!skb)
+		goto error;
+
+	err = inet_fill_ifmcaddr(skb, dev, addr, event);
+	if (err < 0) {
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto error;
+	}
+
+	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MCADDR, NULL, GFP_ATOMIC);
+	return;
+error:
+	rtnl_set_sk_err(net, RTNLGRP_IPV4_MCADDR, err);
+}
 
 /*
  *	A socket has joined a multicast group on device dev.
@@ -1476,6 +1531,7 @@  static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
 	igmpv3_del_delrec(in_dev, im);
 #endif
 	igmp_group_added(im);
+	inet_ifmcaddr_notify(in_dev->dev, addr, RTM_NEWMULTICAST);
 	if (!in_dev->dead)
 		ip_rt_multicast_event(in_dev);
 out:
@@ -1689,6 +1745,8 @@  void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp)
 				*ip = i->next_rcu;
 				in_dev->mc_count--;
 				__igmp_group_dropped(i, gfp);
+				inet_ifmcaddr_notify(in_dev->dev, addr,
+						     RTM_DELMULTICAST);
 				ip_mc_clear_src(i);
 
 				if (!in_dev->dead)