Message ID | 20241110081953.121682-1-yuyanghuang@google.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [net-next] netlink: add igmp join/leave notifications | expand |
Hi Yuyang, Would you mind also update iproute2 for testing? On Sun, Nov 10, 2024 at 05:19:53PM +0900, Yuyang Huang wrote: > This change introduces netlink notifications for multicast address > changes, enabling components like the Android Packet Filter to implement > IGMP offload solutions. > > The following features are included: > * Addition and deletion of multicast addresses are reported using > RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET. > * A new notification group, RTNLGRP_IPV4_MCADDR, is introduced for > receiving these events. > > This enhancement allows user-space components to efficiently track > multicast group memberships and program hardware offload filters > accordingly. > > Cc: Maciej Żenczykowski <maze@google.com> > Cc: Lorenzo Colitti <lorenzo@google.com> > Co-developed-by: Patrick Ruddy <pruddy@vyatta.att-mail.com> > Signed-off-by: Patrick Ruddy <pruddy@vyatta.att-mail.com> > Link: https://lore.kernel.org/r/20180906091056.21109-1-pruddy@vyatta.att-mail.com > Signed-off-by: Yuyang Huang <yuyanghuang@google.com> > --- > include/uapi/linux/rtnetlink.h | 6 ++++ > net/ipv4/igmp.c | 58 ++++++++++++++++++++++++++++++++++ > 2 files changed, 64 insertions(+) > > diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h > index 3b687d20c9ed..354a923f129d 100644 > --- a/include/uapi/linux/rtnetlink.h > +++ b/include/uapi/linux/rtnetlink.h > @@ -93,6 +93,10 @@ enum { > RTM_NEWPREFIX = 52, > #define RTM_NEWPREFIX RTM_NEWPREFIX > > + RTM_NEWMULTICAST, > +#define RTM_NEWMULTICAST RTM_NEWMULTICAST > + RTM_DELMULTICAST, > +#define RTM_DELMULTICAST RTM_DELMULTICAST > RTM_GETMULTICAST = 58, > #define RTM_GETMULTICAST RTM_GETMULTICAST > > @@ -774,6 +778,8 @@ enum rtnetlink_groups { > #define RTNLGRP_TUNNEL RTNLGRP_TUNNEL > RTNLGRP_STATS, > #define RTNLGRP_STATS RTNLGRP_STATS > + RTNLGRP_IPV4_MCADDR, > +#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR > __RTNLGRP_MAX > }; > #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) > diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c > index 9bf09de6a2e7..34575f5392a8 100644 > --- a/net/ipv4/igmp.c > +++ b/net/ipv4/igmp.c > @@ -88,6 +88,7 @@ > #include <linux/byteorder/generic.h> > > #include <net/net_namespace.h> > +#include <net/netlink.h> > #include <net/arp.h> > #include <net/ip.h> > #include <net/protocol.h> > @@ -1430,6 +1431,60 @@ static void ip_mc_hash_remove(struct in_device *in_dev, > *mc_hash = im->next_hash; > } > > +static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, > + __be32 addr, int event) > +{ > + struct nlmsghdr *nlh; > + struct ifaddrmsg *ifm; > + > + nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0); > + if (!nlh) > + return -EMSGSIZE; > + > + ifm = nlmsg_data(nlh); > + ifm->ifa_family = AF_INET; > + ifm->ifa_prefixlen = 32; > + ifm->ifa_flags = IFA_F_PERMANENT; > + ifm->ifa_scope = RT_SCOPE_LINK; > + ifm->ifa_index = dev->ifindex; > + > + if (nla_put_in_addr(skb, IFA_MULTICAST, addr) < 0) { > + nlmsg_cancel(skb, nlh); > + return -EMSGSIZE; > + } > + > + nlmsg_end(skb, nlh); > + return 0; > +} > + > +static inline int inet_ifmcaddr_msgsize(void) > +{ > + return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) > + + nla_total_size(sizeof(__be32)); > +} > + > +static void inet_ifmcaddr_notify(struct net_device *dev, __be32 addr, int event) > +{ > + struct net *net = dev_net(dev); > + struct sk_buff *skb; > + int err = -ENOBUFS; > + > + skb = nlmsg_new(inet_ifmcaddr_msgsize(), GFP_ATOMIC); > + if (!skb) > + goto error; > + > + err = inet_fill_ifmcaddr(skb, dev, addr, event); > + if (err < 0) { > + WARN_ON(err == -EMSGSIZE); > + kfree_skb(skb); > + goto error; > + } > + > + rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MCADDR, NULL, GFP_ATOMIC); > + return; > +error: > + rtnl_set_sk_err(net, RTNLGRP_IPV4_MCADDR, err); > +} > > /* > * A socket has joined a multicast group on device dev. > @@ -1476,6 +1531,7 @@ static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr, > igmpv3_del_delrec(in_dev, im); > #endif > igmp_group_added(im); > + inet_ifmcaddr_notify(in_dev->dev, addr, RTM_NEWMULTICAST); > if (!in_dev->dead) > ip_rt_multicast_event(in_dev); > out: > @@ -1689,6 +1745,8 @@ void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp) > *ip = i->next_rcu; > in_dev->mc_count--; > __igmp_group_dropped(i, gfp); > + inet_ifmcaddr_notify(in_dev->dev, addr, > + RTM_DELMULTICAST); > ip_mc_clear_src(i); > > if (!in_dev->dead) > -- > 2.47.0.277.g8800431eea-goog >
> Would you mind also update iproute2 for testing? Sure, besides updating the ip monitor command, are there any other places that need to be updated? Thanks, Yuyang On Tue, Nov 12, 2024 at 6:54 PM Hangbin Liu <liuhangbin@gmail.com> wrote: > > > Hi Yuyang, > > Would you mind also update iproute2 for testing? > On Sun, Nov 10, 2024 at 05:19:53PM +0900, Yuyang Huang wrote: > > This change introduces netlink notifications for multicast address > > changes, enabling components like the Android Packet Filter to implement > > IGMP offload solutions. > > > > The following features are included: > > * Addition and deletion of multicast addresses are reported using > > RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET. > > * A new notification group, RTNLGRP_IPV4_MCADDR, is introduced for > > receiving these events. > > > > This enhancement allows user-space components to efficiently track > > multicast group memberships and program hardware offload filters > > accordingly. > > > > Cc: Maciej Żenczykowski <maze@google.com> > > Cc: Lorenzo Colitti <lorenzo@google.com> > > Co-developed-by: Patrick Ruddy <pruddy@vyatta.att-mail.com> > > Signed-off-by: Patrick Ruddy <pruddy@vyatta.att-mail.com> > > Link: https://lore.kernel.org/r/20180906091056.21109-1-pruddy@vyatta.att-mail.com > > Signed-off-by: Yuyang Huang <yuyanghuang@google.com> > > --- > > include/uapi/linux/rtnetlink.h | 6 ++++ > > net/ipv4/igmp.c | 58 ++++++++++++++++++++++++++++++++++ > > 2 files changed, 64 insertions(+) > > > > diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h > > index 3b687d20c9ed..354a923f129d 100644 > > --- a/include/uapi/linux/rtnetlink.h > > +++ b/include/uapi/linux/rtnetlink.h > > @@ -93,6 +93,10 @@ enum { > > RTM_NEWPREFIX = 52, > > #define RTM_NEWPREFIX RTM_NEWPREFIX > > > > + RTM_NEWMULTICAST, > > +#define RTM_NEWMULTICAST RTM_NEWMULTICAST > > + RTM_DELMULTICAST, > > +#define RTM_DELMULTICAST RTM_DELMULTICAST > > RTM_GETMULTICAST = 58, > > #define RTM_GETMULTICAST RTM_GETMULTICAST > > > > @@ -774,6 +778,8 @@ enum rtnetlink_groups { > > #define RTNLGRP_TUNNEL RTNLGRP_TUNNEL > > RTNLGRP_STATS, > > #define RTNLGRP_STATS RTNLGRP_STATS > > + RTNLGRP_IPV4_MCADDR, > > +#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR > > __RTNLGRP_MAX > > }; > > #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) > > diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c > > index 9bf09de6a2e7..34575f5392a8 100644 > > --- a/net/ipv4/igmp.c > > +++ b/net/ipv4/igmp.c > > @@ -88,6 +88,7 @@ > > #include <linux/byteorder/generic.h> > > > > #include <net/net_namespace.h> > > +#include <net/netlink.h> > > #include <net/arp.h> > > #include <net/ip.h> > > #include <net/protocol.h> > > @@ -1430,6 +1431,60 @@ static void ip_mc_hash_remove(struct in_device *in_dev, > > *mc_hash = im->next_hash; > > } > > > > +static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, > > + __be32 addr, int event) > > +{ > > + struct nlmsghdr *nlh; > > + struct ifaddrmsg *ifm; > > + > > + nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0); > > + if (!nlh) > > + return -EMSGSIZE; > > + > > + ifm = nlmsg_data(nlh); > > + ifm->ifa_family = AF_INET; > > + ifm->ifa_prefixlen = 32; > > + ifm->ifa_flags = IFA_F_PERMANENT; > > + ifm->ifa_scope = RT_SCOPE_LINK; > > + ifm->ifa_index = dev->ifindex; > > + > > + if (nla_put_in_addr(skb, IFA_MULTICAST, addr) < 0) { > > + nlmsg_cancel(skb, nlh); > > + return -EMSGSIZE; > > + } > > + > > + nlmsg_end(skb, nlh); > > + return 0; > > +} > > + > > +static inline int inet_ifmcaddr_msgsize(void) > > +{ > > + return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) > > + + nla_total_size(sizeof(__be32)); > > +} > > + > > +static void inet_ifmcaddr_notify(struct net_device *dev, __be32 addr, int event) > > +{ > > + struct net *net = dev_net(dev); > > + struct sk_buff *skb; > > + int err = -ENOBUFS; > > + > > + skb = nlmsg_new(inet_ifmcaddr_msgsize(), GFP_ATOMIC); > > + if (!skb) > > + goto error; > > + > > + err = inet_fill_ifmcaddr(skb, dev, addr, event); > > + if (err < 0) { > > + WARN_ON(err == -EMSGSIZE); > > + kfree_skb(skb); > > + goto error; > > + } > > + > > + rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MCADDR, NULL, GFP_ATOMIC); > > + return; > > +error: > > + rtnl_set_sk_err(net, RTNLGRP_IPV4_MCADDR, err); > > +} > > > > /* > > * A socket has joined a multicast group on device dev. > > @@ -1476,6 +1531,7 @@ static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr, > > igmpv3_del_delrec(in_dev, im); > > #endif > > igmp_group_added(im); > > + inet_ifmcaddr_notify(in_dev->dev, addr, RTM_NEWMULTICAST); > > if (!in_dev->dead) > > ip_rt_multicast_event(in_dev); > > out: > > @@ -1689,6 +1745,8 @@ void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp) > > *ip = i->next_rcu; > > in_dev->mc_count--; > > __igmp_group_dropped(i, gfp); > > + inet_ifmcaddr_notify(in_dev->dev, addr, > > + RTM_DELMULTICAST); > > ip_mc_clear_src(i); > > > > if (!in_dev->dead) > > -- > > 2.47.0.277.g8800431eea-goog > >
On Tue, Nov 12, 2024 at 07:10:07PM +0900, Yuyang Huang wrote: > > Would you mind also update iproute2 for testing? > > Sure, besides updating the ip monitor command, are there any other > places that need to be updated? Usually a selftest need to be added for new features. But I don't if this one needed since the iproute2 hasn't updated. Thanks Hangbin
Le 10/11/2024 à 09:19, Yuyang Huang a écrit : > This change introduces netlink notifications for multicast address > changes, enabling components like the Android Packet Filter to implement > IGMP offload solutions. > > The following features are included: > * Addition and deletion of multicast addresses are reported using > RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET. > * A new notification group, RTNLGRP_IPV4_MCADDR, is introduced for > receiving these events. > > This enhancement allows user-space components to efficiently track > multicast group memberships and program hardware offload filters > accordingly. > > Cc: Maciej Żenczykowski <maze@google.com> > Cc: Lorenzo Colitti <lorenzo@google.com> > Co-developed-by: Patrick Ruddy <pruddy@vyatta.att-mail.com> > Signed-off-by: Patrick Ruddy <pruddy@vyatta.att-mail.com> > Link: https://lore.kernel.org/r/20180906091056.21109-1-pruddy@vyatta.att-mail.com > Signed-off-by: Yuyang Huang <yuyanghuang@google.com> > --- > include/uapi/linux/rtnetlink.h | 6 ++++ > net/ipv4/igmp.c | 58 ++++++++++++++++++++++++++++++++++ > 2 files changed, 64 insertions(+) > > diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h > index 3b687d20c9ed..354a923f129d 100644 > --- a/include/uapi/linux/rtnetlink.h > +++ b/include/uapi/linux/rtnetlink.h > @@ -93,6 +93,10 @@ enum { > RTM_NEWPREFIX = 52, > #define RTM_NEWPREFIX RTM_NEWPREFIX > > + RTM_NEWMULTICAST, > +#define RTM_NEWMULTICAST RTM_NEWMULTICAST > + RTM_DELMULTICAST, > +#define RTM_DELMULTICAST RTM_DELMULTICAST These names are misleading, they are very generic, but in fact, specialized to igmp. Are there plans to add more notifications later? What about ipv6? > RTM_GETMULTICAST = 58,> #define RTM_GETMULTICAST RTM_GETMULTICAST The RTM_GETMULTICAST works only for with IPv6 and the NEW/DEL will work for IPv4 only. Regards, Nicolas > > @@ -774,6 +778,8 @@ enum rtnetlink_groups { > #define RTNLGRP_TUNNEL RTNLGRP_TUNNEL > RTNLGRP_STATS, > #define RTNLGRP_STATS RTNLGRP_STATS > + RTNLGRP_IPV4_MCADDR, > +#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR > __RTNLGRP_MAX > }; > #define RTNLGRP_MAX (__RTNLGRP_MAX - 1)
> These names are misleading, they are very generic, but in fact, specialized to igmp. > Are there plans to add more notifications later? > What about ipv6? I plan to add MLD support as well. The RTM_NEWMULTICAST/RTM_DELMULTICAST can be used for monitoring both IPv4 and IPv6 multicast addresses. As it was brought up now, I will send a v2 patch later to include the MLD change. >The RTM_GETMULTICAST works only for with IPv6 and the NEW/DEL will work for IPv4 I plan to make igmp.c support RTM_GETMULTICAST as well, maybe in a different patch series. In this case, it can be used to dump both IPv4 and IPv6 multicast addresses. After adding RTM_GETMULTICAST support for IPv4, I'll migrate 'ip maddr' to use netlink instead of parsing procfs to dump IPv4/v6 multicast addresses. Thanks, Yuyang On Tue, Nov 12, 2024 at 10:45 PM Nicolas Dichtel <nicolas.dichtel@6wind.com> wrote: > > Le 10/11/2024 à 09:19, Yuyang Huang a écrit : > > This change introduces netlink notifications for multicast address > > changes, enabling components like the Android Packet Filter to implement > > IGMP offload solutions. > > > > The following features are included: > > * Addition and deletion of multicast addresses are reported using > > RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET. > > * A new notification group, RTNLGRP_IPV4_MCADDR, is introduced for > > receiving these events. > > > > This enhancement allows user-space components to efficiently track > > multicast group memberships and program hardware offload filters > > accordingly. > > > > Cc: Maciej Żenczykowski <maze@google.com> > > Cc: Lorenzo Colitti <lorenzo@google.com> > > Co-developed-by: Patrick Ruddy <pruddy@vyatta.att-mail.com> > > Signed-off-by: Patrick Ruddy <pruddy@vyatta.att-mail.com> > > Link: https://lore.kernel.org/r/20180906091056.21109-1-pruddy@vyatta.att-mail.com > > Signed-off-by: Yuyang Huang <yuyanghuang@google.com> > > --- > > include/uapi/linux/rtnetlink.h | 6 ++++ > > net/ipv4/igmp.c | 58 ++++++++++++++++++++++++++++++++++ > > 2 files changed, 64 insertions(+) > > > > diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h > > index 3b687d20c9ed..354a923f129d 100644 > > --- a/include/uapi/linux/rtnetlink.h > > +++ b/include/uapi/linux/rtnetlink.h > > @@ -93,6 +93,10 @@ enum { > > RTM_NEWPREFIX = 52, > > #define RTM_NEWPREFIX RTM_NEWPREFIX > > > > + RTM_NEWMULTICAST, > > +#define RTM_NEWMULTICAST RTM_NEWMULTICAST > > + RTM_DELMULTICAST, > > +#define RTM_DELMULTICAST RTM_DELMULTICAST > These names are misleading, they are very generic, but in fact, specialized to igmp. > > Are there plans to add more notifications later? > What about ipv6? > > > RTM_GETMULTICAST = 58,> #define RTM_GETMULTICAST RTM_GETMULTICAST > The RTM_GETMULTICAST works only for with IPv6 and the NEW/DEL will work for IPv4 > only. > > Regards, > Nicolas > > > > > @@ -774,6 +778,8 @@ enum rtnetlink_groups { > > #define RTNLGRP_TUNNEL RTNLGRP_TUNNEL > > RTNLGRP_STATS, > > #define RTNLGRP_STATS RTNLGRP_STATS > > + RTNLGRP_IPV4_MCADDR, > > +#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR > > __RTNLGRP_MAX > > }; > > #define RTNLGRP_MAX (__RTNLGRP_MAX - 1)
> > This enhancement allows user-space components to efficiently track > > multicast group memberships and program hardware offload filters > > accordingly. Sorry, i missed the original posting. Please could you say more about programming hardware offload filters? How is this done? Andrew
> Please could you say more about programming hardware offload filters? > How is this done? Sure, please let me explain a little bit further on how Android Packet Filter (APF) works here. The Android Packet Filter (APF) has two parts: * APF Interpreter: Runs on the Wi-Fi chipset and executes APF programs(bytecodes) to decide whether to accept, drop, or reply to incoming packets. * APF program generator: Resides in the Android Framework within the Network Stack process. It creates and updates APF programs based on network conditions and device state, allowing for dynamic adjustments to the filtering rules. APF program generator is part of the Network Stack module, which can be updated with monthly mainline releases. Feel free to let me know if the above explanation is still unclear. I will include a more detailed explanation in the commit message in patch v2. Thanks, Yuyang On Wed, Nov 13, 2024 at 4:34 AM Andrew Lunn <andrew@lunn.ch> wrote: > > > > This enhancement allows user-space components to efficiently track > > > multicast group memberships and program hardware offload filters > > > accordingly. > > Sorry, i missed the original posting. > > Please could you say more about programming hardware offload filters? > How is this done? > > Andrew
On Wed, Nov 13, 2024 at 09:56:17AM +0900, Yuyang Huang wrote: > > Please could you say more about programming hardware offload filters? > > How is this done? > > Sure, please let me explain a little bit further on how Android > Packet Filter (APF) works here. > > The Android Packet Filter (APF) has two parts: > * APF Interpreter: Runs on the Wi-Fi chipset and executes APF > programs(bytecodes) to decide whether to accept, drop, or reply to > incoming packets. O.K. WiFi is not my area. But i'm more interested in uAPIs, and ensuring you are not adding APIs which promote kernel bypass. Is the API to pass this bytestream to the wifi chipset in mainline? And how does APF differ from BPF? Or P4? Why would we want APF when mainline has BPF? Do the new netlink message make sense without APF? Can i write a user space IGMP snooping implementation and then call bridge mdb add/del/replace? Assuming i can, why are the WiFi card not using that API, same a switches do? Andrew
>O.K. WiFi is not my area. But i'm more interested in uAPIs, and > ensuring you are not adding APIs which promote kernel bypass. WiFi chipset vendors must implement the Android WiFi HAL to install and read the APF program from WiFi firmware. The Android System Server will talk to vendor HAL service using the WiFi HAL. The datapath is: Network Stack process -> Android System Server -> vendor HAL service -> WiFi driver -> WiFi firmware. The Android WiFi HAL is specific to Android. The vendor HAL service, WiFi driver and WiFi firmware are all vendor proprietary software. In other words, those API are not in mainline yet. Feel free to referring to the following page for more details: * APF public doc: https://source.android.com/docs/core/connect/android-packet-filter * APF interpreter: https://cs.android.com/android/platform/superproject/main/+/main:hardware/google/apf/v6/ * APF program generator: https://cs.android.com/android/platform/superproject/main/+/main:packages/modules/NetworkStack/src/android/net/apf/ApfFilter.java * APF WiFi HAL: https://cs.android.com/android/platform/superproject/main/+/main:hardware/interfaces/wifi/aidl/aidl_api/android.hardware.wifi/2/android/hardware/wifi/IWifiStaIface.aidl;l=50?q=installApfPacketFilter >Is the API to pass this bytestream to the wifi chipset in mainline? Currently, these APIs are only in Android HAL, which is not in mainline. We are working on adding proper APF APIs into ethtool. Please some previous discussion here: https://lore.kernel.org/netdev/20240813223325.3522113-1-maze@google.com/T/ >And how does APF differ from BPF? Or P4? Why would we want APF when >mainline has BPF? Android has utilized APF on Pixel devices since approximately 2016 (potentially as early as Pixel 3). All new devices launching with Android 14/U+ are required to implement APFv4 on their WiFi client/STA interface, enforced through Android VTS. APFv6 introduces support for reply sending (e.g., ARP offload). This version is planned to be a requirement for new devices launching with Android 16/W (or its eventual successor), extending to wired interfaces as well. APF is distinct from eBPF due to the latter's complexity and larger memory footprint. Many low-end WiFi chipsets lack sufficient RAM in firmware to accommodate eBPF. In contrast, the latest APFv6 interpreter requires only ~5kB of compiled code and ~2kB of bytecode (4kB recommended). Even this minimal size presents integration challenges for some WiFi chipset vendors. In sum, eBPF bytecode is very space inefficient compared to APF. We have collaborated with major WiFi chipset vendors (Broadcom, Synaptics, Qualcomm, Mediatek etc.) across various device categories (phones, watches, tablets, TV etc.). To my knowledge, none have integrated eBPF or P4 into their chipsets. Instead, they have either implemented vendor-specific offload solutions or adopted APF. One of APF's goals is to defrag the hardware offloading within the Android ecosystem. Moving the APF program generator into kernel space would eliminate the need for generating the program in userspace. Ideally, future Linux APIs would directly interact with drivers to manage offloads and APF loading. The kernel could even be responsible for building the APF bytecode, resolving race conditions caused by fetching kernel state for bytecode generation. A BPF program could potentially build the APF bytecode during suspend. However, this is a long-term goal requiring significant design and discussion with the upstream Linux community. In the short term, APF program generation will remain in user space. >Do the new netlink message make sense without APF? Can i write a user >space IGMP snooping implementation and then call bridge mdb >add/del/replace? The RTM_NEWMULTICAST and RTM_DELMULTICAST events introduced in this patch enable user space implementation of IGMP/MLD offloading and IPv4/IPv6 multicast filtering. I have limited knowledge on how to implement IGMP snooping correctly so I don't know if they are sufficient. These two events have broader applications beyond APF. Any user space implementation of IGMP/MLD offloading requires kernel events to signal when a multicast address is added or removed. In fact, in the original attempt of this patch from "Patrick Ruddy", the commit message also mentioned about "having userspace applications to program multicast MAC filters in hardware". Which makes me believe APF won't be the only potential use case. Please check the following thread for more details: https://lore.kernel.org/r/20180906091056.21109-1-pruddy@vyatta.att-mail.com >Assuming i can, why are the WiFi card not using that API, same a switches do? I'm unsure if I understand your suggestion to use RTM_NEWMDB/RTM_DELMDB/RTNLGRP_MDB. Currently, the kernel doesn't send these events outside the bridge context. Previous patches attempted to reuse these events, but reviewers raised concerns about potential confusion for existing users. IMHO, I agree with the reviewer that re-using the RTM_NEWMDB/RTM_DELMDB/RTNLGRP_MDB might be confusing and might affect the current applications that use those events. Those events seem to make more sense from a bridge perspective. Our use cases are targeting the end device which is not a bridge or router. It might also make sense to consider whether to accept the proposed APIs from an API completeness perspective. The current netlink API for multicast addresses seems incomplete. While RTM_GETMULTICAST exists, it only supports IPv6, not IPv4. This limitation forces tools like 'ip maddr' to rely on parsing procfs instead of using netlink. Additionally, 'ip monitor' cannot track multicast address additions or removals. I feel it would make sense to have full netlink based dumping and event notification support for both IPv4/IPv6 multicast addresses as well. Thanks, Yuyang On Wed, Nov 13, 2024 at 10:17 AM Andrew Lunn <andrew@lunn.ch> wrote: > > On Wed, Nov 13, 2024 at 09:56:17AM +0900, Yuyang Huang wrote: > > > Please could you say more about programming hardware offload filters? > > > How is this done? > > > > Sure, please let me explain a little bit further on how Android > > Packet Filter (APF) works here. > > > > The Android Packet Filter (APF) has two parts: > > * APF Interpreter: Runs on the Wi-Fi chipset and executes APF > > programs(bytecodes) to decide whether to accept, drop, or reply to > > incoming packets. > > O.K. WiFi is not my area. But i'm more interested in uAPIs, and > ensuring you are not adding APIs which promote kernel bypass. > > Is the API to pass this bytestream to the wifi chipset in mainline? > And how does APF differ from BPF? Or P4? Why would we want APF when > mainline has BPF? > > Do the new netlink message make sense without APF? Can i write a user > space IGMP snooping implementation and then call bridge mdb > add/del/replace? Assuming i can, why are the WiFi card not using that > API, same a switches do? > > Andrew
> So that probably leads to a NACK for these patches. If APF is your > target, and to me it seems unlikely APF will get accepted into > mainline, there is no need for these netlink changes. Hence a NACK. > And this is probably your way in. Forget about APF when talking to > mainline. Thanks for the advice. I'll remove the APF context from the commit message, as these patches should benefit mainline regardless of their APF use case. > Show these patches are useful in general. Explain the use > case of IGMP/MLD control in user space using existing APIs to control > the network stack and hardware. > Yes, this is going in correct direction to get these patches > merged. Focus on this. Solve these problems with ip monitor etc. Once > merged you can then use it with the out of mainline APF. I plan to include MLD modifications and rewrite the commit message as follows in patch v2. I will also send the ip monitor patches in parallel. Please let me know if you have any further suggestions. ``` netlink: add IGMP/MLD join/leave notifications This change introduces netlink notifications for multicast address changes. The following features are included: * Addition and deletion of multicast addresses are reported using RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET and AF_INET6. * Two new notification groups: RTNLGRP_IPV4_MCADDR and RTNLGRP_IPV6_MCADDR are introduced for receiving these events. This change allows user space applications (e.g., ip monitor) to efficiently track multicast group memberships by listening for netlink events. Previously, applications relied on inefficient polling of procfs, introducing delays. With netlink notifications, applications receive realtime updates on multicast group membership changes, enabling more precise metrics collection and system monitoring. This change also empowers user space applications to manage multicast filters and IGMP/MLD offload rules using the same netlink notification mechanism. This allows applications to dynamically adjust rules and configurations via generic netlink communication with the Wi-Fi driver, offering greater flexibility and updatability compared to implementing all logic within the driver itself. This is a key consideration for some commercial devices. ``` Thanks, Yuyang On Thu, Nov 14, 2024 at 6:06 AM Andrew Lunn <andrew@lunn.ch> wrote: > > On Wed, Nov 13, 2024 at 01:26:29PM +0900, Yuyang Huang wrote: > > >O.K. WiFi is not my area. But i'm more interested in uAPIs, and > > > ensuring you are not adding APIs which promote kernel bypass. > > [Off list] > > > > > WiFi chipset vendors must implement the Android WiFi HAL to install > > and read the APF program from WiFi firmware. The Android System Server > > will talk to vendor HAL service using the WiFi HAL. The datapath is: > > Network Stack process -> Android System Server -> vendor HAL service > > -> WiFi driver -> WiFi firmware. The Android WiFi HAL is specific to > > Android. The vendor HAL service, WiFi driver and WiFi firmware are all > > vendor proprietary software. In other words, those API are not in > > mainline yet. > > So that probably leads to a NACK for these patches. If APF is your > target, and to me it seems unlikely APF will get accepted into > mainline, there is no need for these netlink changes. Hence a NACK. > > > >Do the new netlink message make sense without APF? Can i write a user > > >space IGMP snooping implementation and then call bridge mdb > > >add/del/replace? > > > > The RTM_NEWMULTICAST and RTM_DELMULTICAST events introduced in this > > patch enable user space implementation of IGMP/MLD offloading and > > IPv4/IPv6 multicast filtering. I have limited knowledge on how to > > implement IGMP snooping correctly so I don't know if they are > > sufficient. > > > > These two events have broader applications beyond APF. > > And this is probably your way in. Forget about APF when talking to > mainline. Show these patches are useful in general. Explain the use > case of IGMP/MLD control in user space using existing APIs to control > the network stack and hardware. > > > It might also make sense to consider whether to accept the proposed > > APIs from an API completeness perspective. The current netlink API for > > multicast addresses seems incomplete. While RTM_GETMULTICAST exists, > > it only supports IPv6, not IPv4. This limitation forces tools like 'ip > > maddr' to rely on parsing procfs instead of using netlink. > > Additionally, 'ip monitor' cannot track multicast address additions or > > removals. I feel it would make sense to have full netlink based > > dumping and event notification support for both IPv4/IPv6 multicast > > addresses as well. > > Yes, this is going in correct direction to get these patches > merged. Focus on this. Solve these problems with ip monitor etc. Once > merged you can then use it with the out of mainline APF. > > Andrew
> This change also empowers user space applications to manage multicast > filters and IGMP/MLD offload rules using the same netlink notification > mechanism. This allows applications to dynamically adjust rules and > configurations via generic netlink communication with the Wi-Fi driver, > offering greater flexibility and updatability compared to implementing > all logic within the driver itself. This is a key consideration for some > commercial devices. You are still focused or your narrow use case. Why is this only for WiFi? Why cannot i use it for other systems using IGMP? Switches? VPN servers? etc. We want a generic solution for all use cases, not just some obscure niche. And Linux does not care about commercial devices. And you would never implement this in the driver itself, that would immediately gets NACKed because it is not generic, but the concept should be generic. Andrew
>You are still focused or your narrow use case. Why is this only for >WiFi? Why cannot i use it for other systems using IGMP? Switches? VPN >servers? etc. We want a generic solution for all use cases, not just >some obscure niche. >And Linux does not care about commercial devices. And you would never >implement this in the driver itself, that would immediately gets >NACKed because it is not generic, but the concept should be generic. Thanks for the further feedback! I've revised the commit message to discuss more generic use cases without mentioning specific drivers or devices. I also removed the discussion about commercial devices. Please take another look and let me know if it needs further adjustments. ``` netlink: add IGMP/MLD join/leave notifications This change introduces netlink notifications for multicast address changes. The following features are included: * Addition and deletion of multicast addresses are reported using RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET and AF_INET6. * Two new notification groups: RTNLGRP_IPV4_MCADDR and RTNLGRP_IPV6_MCADDR are introduced for receiving these events. This change allows user space applications (e.g., ip monitor) to efficiently track multicast group memberships by listening for netlink events. Previously, applications relied on inefficient polling of procfs, introducing delays. With netlink notifications, applications receive realtime updates on multicast group membership changes, enabling more precise metrics collection and system monitoring. This change also unlocks the potential for implementing a wide range of sophisticated multicast related features in user space by allowing applications to combine kernel provided multicast address information with user space data and communicate decisions back to the kernel for more fine grained control. This mechanism can be used for various purposes, including multicast filtering, IGMP/MLD offload, and IGMP/MLD snooping. ``` Thanks, Yuyang On Thu, Nov 14, 2024 at 10:26 PM Andrew Lunn <andrew@lunn.ch> wrote: > > > This change also empowers user space applications to manage multicast > > filters and IGMP/MLD offload rules using the same netlink notification > > mechanism. This allows applications to dynamically adjust rules and > > configurations via generic netlink communication with the Wi-Fi driver, > > offering greater flexibility and updatability compared to implementing > > all logic within the driver itself. This is a key consideration for some > > commercial devices. > > You are still focused or your narrow use case. Why is this only for > WiFi? Why cannot i use it for other systems using IGMP? Switches? VPN > servers? etc. We want a generic solution for all use cases, not just > some obscure niche. > > And Linux does not care about commercial devices. And you would never > implement this in the driver itself, that would immediately gets > NACKed because it is not generic, but the concept should be generic. > > Andrew
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 3b687d20c9ed..354a923f129d 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -93,6 +93,10 @@ enum { RTM_NEWPREFIX = 52, #define RTM_NEWPREFIX RTM_NEWPREFIX + RTM_NEWMULTICAST, +#define RTM_NEWMULTICAST RTM_NEWMULTICAST + RTM_DELMULTICAST, +#define RTM_DELMULTICAST RTM_DELMULTICAST RTM_GETMULTICAST = 58, #define RTM_GETMULTICAST RTM_GETMULTICAST @@ -774,6 +778,8 @@ enum rtnetlink_groups { #define RTNLGRP_TUNNEL RTNLGRP_TUNNEL RTNLGRP_STATS, #define RTNLGRP_STATS RTNLGRP_STATS + RTNLGRP_IPV4_MCADDR, +#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 9bf09de6a2e7..34575f5392a8 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -88,6 +88,7 @@ #include <linux/byteorder/generic.h> #include <net/net_namespace.h> +#include <net/netlink.h> #include <net/arp.h> #include <net/ip.h> #include <net/protocol.h> @@ -1430,6 +1431,60 @@ static void ip_mc_hash_remove(struct in_device *in_dev, *mc_hash = im->next_hash; } +static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, + __be32 addr, int event) +{ + struct nlmsghdr *nlh; + struct ifaddrmsg *ifm; + + nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0); + if (!nlh) + return -EMSGSIZE; + + ifm = nlmsg_data(nlh); + ifm->ifa_family = AF_INET; + ifm->ifa_prefixlen = 32; + ifm->ifa_flags = IFA_F_PERMANENT; + ifm->ifa_scope = RT_SCOPE_LINK; + ifm->ifa_index = dev->ifindex; + + if (nla_put_in_addr(skb, IFA_MULTICAST, addr) < 0) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } + + nlmsg_end(skb, nlh); + return 0; +} + +static inline int inet_ifmcaddr_msgsize(void) +{ + return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(sizeof(__be32)); +} + +static void inet_ifmcaddr_notify(struct net_device *dev, __be32 addr, int event) +{ + struct net *net = dev_net(dev); + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(inet_ifmcaddr_msgsize(), GFP_ATOMIC); + if (!skb) + goto error; + + err = inet_fill_ifmcaddr(skb, dev, addr, event); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto error; + } + + rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MCADDR, NULL, GFP_ATOMIC); + return; +error: + rtnl_set_sk_err(net, RTNLGRP_IPV4_MCADDR, err); +} /* * A socket has joined a multicast group on device dev. @@ -1476,6 +1531,7 @@ static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr, igmpv3_del_delrec(in_dev, im); #endif igmp_group_added(im); + inet_ifmcaddr_notify(in_dev->dev, addr, RTM_NEWMULTICAST); if (!in_dev->dead) ip_rt_multicast_event(in_dev); out: @@ -1689,6 +1745,8 @@ void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp) *ip = i->next_rcu; in_dev->mc_count--; __igmp_group_dropped(i, gfp); + inet_ifmcaddr_notify(in_dev->dev, addr, + RTM_DELMULTICAST); ip_mc_clear_src(i); if (!in_dev->dead)