Message ID | 20241204134752.2691102-1-yuyanghuang@google.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [net-next,v4] netlink: add IGMP/MLD join/leave notifications | expand |
Le 04/12/2024 à 14:47, Yuyang Huang a écrit : > This change introduces netlink notifications for multicast address > changes. The following features are included: > * Addition and deletion of multicast addresses are reported using > RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET and > AF_INET6. > * Two new notification groups: RTNLGRP_IPV4_MCADDR and > RTNLGRP_IPV6_MCADDR are introduced for receiving these events. > > This change allows user space applications (e.g., ip monitor) to > efficiently track multicast group memberships by listening for netlink > events. Previously, applications relied on inefficient polling of > procfs, introducing delays. With netlink notifications, applications > receive realtime updates on multicast group membership changes, > enabling more precise metrics collection and system monitoring. > > This change also unlocks the potential for implementing a wide range > of sophisticated multicast related features in user space by allowing > applications to combine kernel provided multicast address information > with user space data and communicate decisions back to the kernel for > more fine grained control. This mechanism can be used for various > purposes, including multicast filtering, IGMP/MLD offload, and > IGMP/MLD snooping. > > Cc: Maciej Żenczykowski <maze@google.com> > Cc: Lorenzo Colitti <lorenzo@google.com> > Co-developed-by: Patrick Ruddy <pruddy@vyatta.att-mail.com> > Signed-off-by: Patrick Ruddy <pruddy@vyatta.att-mail.com> > Link: https://lore.kernel.org/r/20180906091056.21109-1-pruddy@vyatta.att-mail.com > Signed-off-by: Yuyang Huang <yuyanghuang@google.com> A minor comment below and then: Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> > --- > > Changelog since v3: > - Remove unused variable 'scope' declaration. > - Align RTM_NEWMULTICAST and RTM_GETMULTICAST enum definitions with > existing code style. > > Changelog since v2: > - Use RT_SCOPE_UNIVERSE for both IGMP and MLD notification messages for > consistency. > > Changelog since v1: > - Implement MLD join/leave notifications. > - Revise the comment message to make it generic. > - Fix netdev/source_inline error. > - Reorder local variables according to "reverse xmas tree” style. > > include/uapi/linux/rtnetlink.h | 10 +++++- > net/ipv4/igmp.c | 53 +++++++++++++++++++++++++++++++ > net/ipv6/mcast.c | 57 ++++++++++++++++++++++++++++++++++ > 3 files changed, 119 insertions(+), 1 deletion(-) > > diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h > index db7254d52d93..eccc0e7dcb7d 100644 > --- a/include/uapi/linux/rtnetlink.h > +++ b/include/uapi/linux/rtnetlink.h > @@ -93,7 +93,11 @@ enum { > RTM_NEWPREFIX = 52, > #define RTM_NEWPREFIX RTM_NEWPREFIX > > - RTM_GETMULTICAST = 58, > + RTM_NEWMULTICAST = 56, > +#define RTM_NEWMULTICAST RTM_NEWMULTICAST > + RTM_DELMULTICAST, > +#define RTM_DELMULTICAST RTM_DELMULTICAST > + RTM_GETMULTICAST, > #define RTM_GETMULTICAST RTM_GETMULTICAST > > RTM_GETANYCAST = 62, > @@ -774,6 +778,10 @@ enum rtnetlink_groups { > #define RTNLGRP_TUNNEL RTNLGRP_TUNNEL > RTNLGRP_STATS, > #define RTNLGRP_STATS RTNLGRP_STATS > + RTNLGRP_IPV4_MCADDR, > +#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR > + RTNLGRP_IPV6_MCADDR, > +#define RTNLGRP_IPV6_MCADDR RTNLGRP_IPV6_MCADDR > __RTNLGRP_MAX > }; > #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) > diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c > index 6a238398acc9..8d6ee19864c6 100644 > --- a/net/ipv4/igmp.c > +++ b/net/ipv4/igmp.c > @@ -88,6 +88,7 @@ > #include <linux/byteorder/generic.h> > > #include <net/net_namespace.h> > +#include <net/netlink.h> > #include <net/arp.h> > #include <net/ip.h> > #include <net/protocol.h> > @@ -1430,6 +1431,55 @@ static void ip_mc_hash_remove(struct in_device *in_dev, > *mc_hash = im->next_hash; > } > > +static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, > + __be32 addr, int event) > +{ > + struct ifaddrmsg *ifm; > + struct nlmsghdr *nlh; > + > + nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0); > + if (!nlh) > + return -EMSGSIZE; > + > + ifm = nlmsg_data(nlh); > + ifm->ifa_family = AF_INET; > + ifm->ifa_prefixlen = 32; > + ifm->ifa_flags = IFA_F_PERMANENT; > + ifm->ifa_scope = RT_SCOPE_UNIVERSE; > + ifm->ifa_index = dev->ifindex; > + > + if (nla_put_in_addr(skb, IFA_MULTICAST, addr) < 0) { > + nlmsg_cancel(skb, nlh); > + return -EMSGSIZE; > + } > + > + nlmsg_end(skb, nlh); > + return 0; > +} > + > +static void inet_ifmcaddr_notify(struct net_device *dev, __be32 addr, int event) > +{ > + struct net *net = dev_net(dev); > + struct sk_buff *skb; > + int err = -ENOBUFS; > + > + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) > + + nla_total_size(sizeof(__be32)), GFP_ATOMIC); > + if (!skb) > + goto error; > + > + err = inet_fill_ifmcaddr(skb, dev, addr, event); > + if (err < 0) { > + WARN_ON(err == -EMSGSIZE); Maybe WARN_ON_ONCE() is enough? Regards, Nicolas
>Maybe WARN_ON_ONCE() is enough? Thank you very much for the prompt review feedback. Will update in patch v5. Thanks, Yuyang On Wed, Dec 4, 2024 at 10:54 PM Nicolas Dichtel <nicolas.dichtel@6wind.com> wrote: > > Le 04/12/2024 à 14:47, Yuyang Huang a écrit : > > This change introduces netlink notifications for multicast address > > changes. The following features are included: > > * Addition and deletion of multicast addresses are reported using > > RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET and > > AF_INET6. > > * Two new notification groups: RTNLGRP_IPV4_MCADDR and > > RTNLGRP_IPV6_MCADDR are introduced for receiving these events. > > > > This change allows user space applications (e.g., ip monitor) to > > efficiently track multicast group memberships by listening for netlink > > events. Previously, applications relied on inefficient polling of > > procfs, introducing delays. With netlink notifications, applications > > receive realtime updates on multicast group membership changes, > > enabling more precise metrics collection and system monitoring. > > > > This change also unlocks the potential for implementing a wide range > > of sophisticated multicast related features in user space by allowing > > applications to combine kernel provided multicast address information > > with user space data and communicate decisions back to the kernel for > > more fine grained control. This mechanism can be used for various > > purposes, including multicast filtering, IGMP/MLD offload, and > > IGMP/MLD snooping. > > > > Cc: Maciej Żenczykowski <maze@google.com> > > Cc: Lorenzo Colitti <lorenzo@google.com> > > Co-developed-by: Patrick Ruddy <pruddy@vyatta.att-mail.com> > > Signed-off-by: Patrick Ruddy <pruddy@vyatta.att-mail.com> > > Link: https://lore.kernel.org/r/20180906091056.21109-1-pruddy@vyatta.att-mail.com > > Signed-off-by: Yuyang Huang <yuyanghuang@google.com> > > A minor comment below and then: > Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> > > > --- > > > > Changelog since v3: > > - Remove unused variable 'scope' declaration. > > - Align RTM_NEWMULTICAST and RTM_GETMULTICAST enum definitions with > > existing code style. > > > > Changelog since v2: > > - Use RT_SCOPE_UNIVERSE for both IGMP and MLD notification messages for > > consistency. > > > > Changelog since v1: > > - Implement MLD join/leave notifications. > > - Revise the comment message to make it generic. > > - Fix netdev/source_inline error. > > - Reorder local variables according to "reverse xmas tree” style. > > > > include/uapi/linux/rtnetlink.h | 10 +++++- > > net/ipv4/igmp.c | 53 +++++++++++++++++++++++++++++++ > > net/ipv6/mcast.c | 57 ++++++++++++++++++++++++++++++++++ > > 3 files changed, 119 insertions(+), 1 deletion(-) > > > > diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h > > index db7254d52d93..eccc0e7dcb7d 100644 > > --- a/include/uapi/linux/rtnetlink.h > > +++ b/include/uapi/linux/rtnetlink.h > > @@ -93,7 +93,11 @@ enum { > > RTM_NEWPREFIX = 52, > > #define RTM_NEWPREFIX RTM_NEWPREFIX > > > > - RTM_GETMULTICAST = 58, > > + RTM_NEWMULTICAST = 56, > > +#define RTM_NEWMULTICAST RTM_NEWMULTICAST > > + RTM_DELMULTICAST, > > +#define RTM_DELMULTICAST RTM_DELMULTICAST > > + RTM_GETMULTICAST, > > #define RTM_GETMULTICAST RTM_GETMULTICAST > > > > RTM_GETANYCAST = 62, > > @@ -774,6 +778,10 @@ enum rtnetlink_groups { > > #define RTNLGRP_TUNNEL RTNLGRP_TUNNEL > > RTNLGRP_STATS, > > #define RTNLGRP_STATS RTNLGRP_STATS > > + RTNLGRP_IPV4_MCADDR, > > +#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR > > + RTNLGRP_IPV6_MCADDR, > > +#define RTNLGRP_IPV6_MCADDR RTNLGRP_IPV6_MCADDR > > __RTNLGRP_MAX > > }; > > #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) > > diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c > > index 6a238398acc9..8d6ee19864c6 100644 > > --- a/net/ipv4/igmp.c > > +++ b/net/ipv4/igmp.c > > @@ -88,6 +88,7 @@ > > #include <linux/byteorder/generic.h> > > > > #include <net/net_namespace.h> > > +#include <net/netlink.h> > > #include <net/arp.h> > > #include <net/ip.h> > > #include <net/protocol.h> > > @@ -1430,6 +1431,55 @@ static void ip_mc_hash_remove(struct in_device *in_dev, > > *mc_hash = im->next_hash; > > } > > > > +static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, > > + __be32 addr, int event) > > +{ > > + struct ifaddrmsg *ifm; > > + struct nlmsghdr *nlh; > > + > > + nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0); > > + if (!nlh) > > + return -EMSGSIZE; > > + > > + ifm = nlmsg_data(nlh); > > + ifm->ifa_family = AF_INET; > > + ifm->ifa_prefixlen = 32; > > + ifm->ifa_flags = IFA_F_PERMANENT; > > + ifm->ifa_scope = RT_SCOPE_UNIVERSE; > > + ifm->ifa_index = dev->ifindex; > > + > > + if (nla_put_in_addr(skb, IFA_MULTICAST, addr) < 0) { > > + nlmsg_cancel(skb, nlh); > > + return -EMSGSIZE; > > + } > > + > > + nlmsg_end(skb, nlh); > > + return 0; > > +} > > + > > +static void inet_ifmcaddr_notify(struct net_device *dev, __be32 addr, int event) > > +{ > > + struct net *net = dev_net(dev); > > + struct sk_buff *skb; > > + int err = -ENOBUFS; > > + > > + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) > > + + nla_total_size(sizeof(__be32)), GFP_ATOMIC); > > + if (!skb) > > + goto error; > > + > > + err = inet_fill_ifmcaddr(skb, dev, addr, event); > > + if (err < 0) { > > + WARN_ON(err == -EMSGSIZE); > Maybe WARN_ON_ONCE() is enough? > > > Regards, > Nicolas
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index db7254d52d93..eccc0e7dcb7d 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -93,7 +93,11 @@ enum { RTM_NEWPREFIX = 52, #define RTM_NEWPREFIX RTM_NEWPREFIX - RTM_GETMULTICAST = 58, + RTM_NEWMULTICAST = 56, +#define RTM_NEWMULTICAST RTM_NEWMULTICAST + RTM_DELMULTICAST, +#define RTM_DELMULTICAST RTM_DELMULTICAST + RTM_GETMULTICAST, #define RTM_GETMULTICAST RTM_GETMULTICAST RTM_GETANYCAST = 62, @@ -774,6 +778,10 @@ enum rtnetlink_groups { #define RTNLGRP_TUNNEL RTNLGRP_TUNNEL RTNLGRP_STATS, #define RTNLGRP_STATS RTNLGRP_STATS + RTNLGRP_IPV4_MCADDR, +#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR + RTNLGRP_IPV6_MCADDR, +#define RTNLGRP_IPV6_MCADDR RTNLGRP_IPV6_MCADDR __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6a238398acc9..8d6ee19864c6 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -88,6 +88,7 @@ #include <linux/byteorder/generic.h> #include <net/net_namespace.h> +#include <net/netlink.h> #include <net/arp.h> #include <net/ip.h> #include <net/protocol.h> @@ -1430,6 +1431,55 @@ static void ip_mc_hash_remove(struct in_device *in_dev, *mc_hash = im->next_hash; } +static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, + __be32 addr, int event) +{ + struct ifaddrmsg *ifm; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0); + if (!nlh) + return -EMSGSIZE; + + ifm = nlmsg_data(nlh); + ifm->ifa_family = AF_INET; + ifm->ifa_prefixlen = 32; + ifm->ifa_flags = IFA_F_PERMANENT; + ifm->ifa_scope = RT_SCOPE_UNIVERSE; + ifm->ifa_index = dev->ifindex; + + if (nla_put_in_addr(skb, IFA_MULTICAST, addr) < 0) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } + + nlmsg_end(skb, nlh); + return 0; +} + +static void inet_ifmcaddr_notify(struct net_device *dev, __be32 addr, int event) +{ + struct net *net = dev_net(dev); + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(sizeof(__be32)), GFP_ATOMIC); + if (!skb) + goto error; + + err = inet_fill_ifmcaddr(skb, dev, addr, event); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto error; + } + + rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MCADDR, NULL, GFP_ATOMIC); + return; +error: + rtnl_set_sk_err(net, RTNLGRP_IPV4_MCADDR, err); +} /* * A socket has joined a multicast group on device dev. @@ -1492,6 +1542,7 @@ static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr, igmpv3_del_delrec(in_dev, im); #endif igmp_group_added(im); + inet_ifmcaddr_notify(in_dev->dev, addr, RTM_NEWMULTICAST); if (!in_dev->dead) ip_rt_multicast_event(in_dev); out: @@ -1705,6 +1756,8 @@ void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp) *ip = i->next_rcu; in_dev->mc_count--; __igmp_group_dropped(i, gfp); + inet_ifmcaddr_notify(in_dev->dev, addr, + RTM_DELMULTICAST); ip_mc_clear_src(i); if (!in_dev->dead) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index b244dbf61d5f..41a486d422d0 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -33,8 +33,10 @@ #include <linux/in.h> #include <linux/in6.h> #include <linux/netdevice.h> +#include <linux/if_addr.h> #include <linux/if_arp.h> #include <linux/route.h> +#include <linux/rtnetlink.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -47,6 +49,7 @@ #include <linux/netfilter_ipv6.h> #include <net/net_namespace.h> +#include <net/netlink.h> #include <net/sock.h> #include <net/snmp.h> @@ -901,6 +904,57 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, return mc; } +static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, + const struct in6_addr *addr, int event) +{ + struct ifaddrmsg *ifm; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0); + if (!nlh) + return -EMSGSIZE; + + ifm = nlmsg_data(nlh); + ifm->ifa_family = AF_INET6; + ifm->ifa_prefixlen = 128; + ifm->ifa_flags = IFA_F_PERMANENT; + ifm->ifa_scope = RT_SCOPE_UNIVERSE; + ifm->ifa_index = dev->ifindex; + + if (nla_put_in6_addr(skb, IFA_MULTICAST, addr) < 0) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } + + nlmsg_end(skb, nlh); + return 0; +} + +static void inet6_ifmcaddr_notify(struct net_device *dev, + const struct in6_addr *addr, int event) +{ + struct net *net = dev_net(dev); + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(16), GFP_ATOMIC); + if (!skb) + goto error; + + err = inet6_fill_ifmcaddr(skb, dev, addr, event); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto error; + } + + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MCADDR, NULL, GFP_ATOMIC); + return; +error: + rtnl_set_sk_err(net, RTNLGRP_IPV6_MCADDR, err); +} + /* * device multicast group inc (add if not found) */ @@ -948,6 +1002,7 @@ static int __ipv6_dev_mc_inc(struct net_device *dev, mld_del_delrec(idev, mc); igmp6_group_added(mc); + inet6_ifmcaddr_notify(dev, addr, RTM_NEWMULTICAST); mutex_unlock(&idev->mc_lock); ma_put(mc); return 0; @@ -977,6 +1032,8 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) *map = ma->next; igmp6_group_dropped(ma); + inet6_ifmcaddr_notify(idev->dev, addr, + RTM_DELMULTICAST); ip6_mc_clear_src(ma); mutex_unlock(&idev->mc_lock);