@@ -550,6 +550,26 @@ lacp_rate
The default is slow.
+mac_filter
+
+ Tells the bonding device to drop frames received who's source MAC
+ address matches entries in a filter table. The filter table is
+ populated when the bond transmits frames. This is similar in
+ concept to the MAC learning table implemented in the bridge code.
+
+ This filtering is only enabled for the balance-xor bonding mode.
+
+ off or 0
+ Turns the feature off
+
+ number
+ A number greater than zero turns the feature on and sets
+ the maximum number of MAC addresses to store in the hash
+ table. This value is used as the exponent in a 2^N calculation
+ to determine the actual size of the hashtable.
+
+ The default is off.
+
max_bonds
Specifies the number of bonding devices to create for this
@@ -5,7 +5,7 @@
obj-$(CONFIG_BONDING) += bonding.o
-bonding-objs := bond_main.o bond_3ad.o bond_alb.o bond_sysfs.o bond_sysfs_slave.o bond_debugfs.o bond_netlink.o bond_options.o
+bonding-objs := bond_main.o bond_3ad.o bond_alb.o bond_sysfs.o bond_sysfs_slave.o bond_debugfs.o bond_netlink.o bond_options.o bond_mac_filter.o
proc-$(CONFIG_PROC_FS) += bond_procfs.o
bonding-objs += $(proc-y)
new file mode 100644
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Filter received frames based on MAC addresses "behind" the bond.
+ */
+
+#include "bonding_priv.h"
+
+static const struct rhashtable_params bond_rht_params = {
+ .head_offset = offsetof(struct bond_mac_cache_entry, rhnode),
+ .key_offset = offsetof(struct bond_mac_cache_entry, key),
+ .key_len = sizeof(struct mac_addr),
+ .automatic_shrinking = true,
+};
+
+static unsigned long hold_time(void)
+{
+ return msecs_to_jiffies(5000);
+}
+
+static bool has_expired(struct bond_mac_cache_entry *entry)
+{
+ return time_before(entry->expired, jiffies);
+}
+
+static bool has_expired_sync(struct bond_mac_cache_entry *entry)
+{
+ bool ret;
+
+ spin_lock(&entry->lock);
+ ret = has_expired(entry);
+ spin_unlock(&entry->lock);
+
+ return ret;
+}
+
+static void mac_delete_rcu(struct callback_head *head)
+{
+ kmem_cache_free(bond_mac_cache,
+ container_of(head, struct bond_mac_cache_entry, rcu));
+}
+
+static int mac_remove(struct bonding *bond,
+ struct bond_mac_cache_entry *entry)
+{
+ set_bit(BOND_MAC_DEAD, &entry->flags);
+ return rhashtable_remove_fast(bond->mac_filter_tbl,
+ &entry->rhnode,
+ bond->mac_filter_tbl->p);
+}
+
+void bond_mac_hash_release_entries(struct work_struct *work)
+{
+ struct bonding *bond = container_of(work, struct bonding,
+ mac_work.work);
+ struct bond_mac_cache_entry *entry;
+ struct rhashtable_iter iter;
+
+ rhashtable_walk_enter(bond->mac_filter_tbl, &iter);
+ rhashtable_walk_start(&iter);
+ while ((entry = rhashtable_walk_next(&iter)) != NULL) {
+ if (IS_ERR(entry))
+ continue;
+
+ spin_lock_bh(&entry->lock);
+ if (!has_expired(entry)) {
+ spin_unlock_bh(&entry->lock);
+ continue;
+ }
+
+ mac_remove(bond, entry);
+ spin_unlock_bh(&entry->lock);
+ call_rcu(&entry->rcu, mac_delete_rcu);
+ }
+ rhashtable_walk_stop(&iter);
+ rhashtable_walk_exit(&iter);
+ queue_delayed_work(bond->wq, &bond->mac_work,
+ msecs_to_jiffies(5 * 60 * 1000));
+}
+
+int bond_mac_hash_init(struct bonding *bond)
+{
+ int ret;
+
+ bond->mac_filter_tbl = kzalloc(sizeof(*bond->mac_filter_tbl),
+ GFP_KERNEL);
+ if (!bond->mac_filter_tbl)
+ return -ENOMEM;
+
+ ret = rhashtable_init(bond->mac_filter_tbl, &bond_rht_params);
+ if (ret) {
+ kfree(bond->mac_filter_tbl);
+ bond->mac_filter_tbl = NULL;
+ }
+
+ bond->mac_filter_tbl->p.max_size = 1 << bond->params.mac_filter;
+ netdev_dbg(bond->dev, "mac_filter hash table size: %d\n",
+ bond->mac_filter_tbl->p.max_size);
+ return ret;
+}
+
+static void bond_mac_free_entry(void *entry, void *ctx)
+{
+ kmem_cache_free((struct kmem_cache *)ctx, entry);
+}
+
+void bond_mac_hash_destroy(struct bonding *bond)
+{
+ if (bond->mac_filter_tbl) {
+ rhashtable_free_and_destroy(bond->mac_filter_tbl,
+ bond_mac_free_entry,
+ bond_mac_cache);
+ kfree(bond->mac_filter_tbl);
+ bond->mac_filter_tbl = NULL;
+ }
+}
+
+static void mac_update(struct bond_mac_cache_entry *entry)
+{
+ entry->expired = jiffies + hold_time();
+}
+
+static int mac_create(struct bonding *bond, const u8 *addr)
+{
+ struct bond_mac_cache_entry *entry;
+ int ret;
+
+ entry = kmem_cache_alloc(bond_mac_cache, GFP_ATOMIC);
+ if (!entry)
+ return -ENOMEM;
+
+ memset(entry, 0, sizeof(*entry));
+ spin_lock_init(&entry->lock);
+ memcpy(&entry->key, addr, sizeof(entry->key));
+ mac_update(entry);
+ ret = rhashtable_lookup_insert_fast(bond->mac_filter_tbl,
+ &entry->rhnode,
+ bond->mac_filter_tbl->p);
+ if (ret) {
+ kmem_cache_free(bond_mac_cache, entry);
+ if (ret == -EEXIST)
+ return 0;
+ netdev_dbg(bond->dev, "Failed to insert mac entry %d\n", ret);
+ }
+ return ret;
+}
+
+static struct bond_mac_cache_entry *mac_find(struct bonding *bond,
+ const u8 *addr)
+{
+ struct mac_addr key;
+
+ memcpy(&key, addr, sizeof(key));
+ return rhashtable_lookup(bond->mac_filter_tbl, &key,
+ bond->mac_filter_tbl->p);
+}
+
+int bond_mac_insert(struct bonding *bond, const u8 *addr)
+{
+ struct bond_mac_cache_entry *entry;
+ int ret = 0;
+
+ if (!is_valid_ether_addr(addr))
+ return -EINVAL;
+
+ entry = mac_find(bond, addr);
+ if (entry) {
+ spin_lock(&entry->lock);
+ if (!test_bit(BOND_MAC_DEAD, &entry->flags)) {
+ mac_update(entry);
+ spin_unlock(&entry->lock);
+ goto out;
+ }
+ spin_unlock(&entry->lock);
+ }
+
+ ret = mac_create(bond, addr);
+
+out:
+ return ret;
+}
+
+int bond_mac_filter_recv(const struct sk_buff *skb, struct bonding *bond,
+ struct slave *slave)
+{
+ struct bond_mac_cache_entry *entry;
+ const struct ethhdr *mac_hdr;
+ int ret = RX_HANDLER_ANOTHER;
+
+ mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ if (is_multicast_ether_addr(mac_hdr->h_dest) &&
+ slave != rcu_dereference(bond->curr_active_slave)) {
+ ret = RX_HANDLER_CONSUMED;
+ goto out;
+ }
+
+ entry = mac_find(bond, mac_hdr->h_source);
+ if (entry && !has_expired_sync(entry))
+ ret = RX_HANDLER_CONSUMED;
+
+out:
+ return ret;
+}
new file mode 100644
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Filter received frames based on MAC addresses "behind" the bond.
+ */
+
+#ifndef _BOND_MAC_FILTER_H
+#define _BOND_MAC_FILTER_H
+#include <net/bonding.h>
+#include <linux/spinlock.h>
+#include <linux/rhashtable.h>
+
+enum {
+ BOND_MAC_DEAD,
+};
+
+struct bond_mac_cache_entry {
+ struct rhash_head rhnode;
+ struct mac_addr key;
+
+ spinlock_t lock; /* protects used member */
+ unsigned long flags;
+ unsigned long expired;
+ struct rcu_head rcu;
+};
+
+extern struct kmem_cache *bond_mac_cache;
+
+void bond_mac_hash_release_entries(struct work_struct *work);
+int bond_mac_hash_init(struct bonding *bond);
+void bond_mac_hash_destroy(struct bonding *bond);
+
+int bond_mac_insert(struct bonding *bond, const u8 *addr);
+int bond_mac_filter_recv(const struct sk_buff *skb,
+ struct bonding *bond,
+ struct slave *slave);
+
+#endif
@@ -207,6 +207,7 @@ MODULE_PARM_DESC(lp_interval, "The number of seconds between instances where "
atomic_t netpoll_block_tx = ATOMIC_INIT(0);
#endif
+struct kmem_cache *bond_mac_cache __read_mostly;
unsigned int bond_net_id __read_mostly;
static const struct flow_dissector_key flow_keys_bonding_keys[] = {
@@ -4151,6 +4152,7 @@ void bond_work_init_all(struct bonding *bond)
INIT_DELAYED_WORK(&bond->arp_work, bond_arp_monitor);
INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler);
+ INIT_DELAYED_WORK(&bond->mac_work, bond_mac_hash_release_entries);
}
static void bond_work_cancel_all(struct bonding *bond)
@@ -4161,6 +4163,7 @@ static void bond_work_cancel_all(struct bonding *bond)
cancel_delayed_work_sync(&bond->ad_work);
cancel_delayed_work_sync(&bond->mcast_work);
cancel_delayed_work_sync(&bond->slave_arr_work);
+ cancel_delayed_work_sync(&bond->mac_work);
}
static int bond_open(struct net_device *bond_dev)
@@ -4208,6 +4211,15 @@ static int bond_open(struct net_device *bond_dev)
bond_3ad_initiate_agg_selection(bond, 1);
}
+ if (BOND_MODE(bond) == BOND_MODE_XOR && bond->params.mac_filter) {
+ int ret = bond_mac_hash_init(bond);
+
+ if (ret)
+ return ret;
+ bond->recv_probe = bond_mac_filter_recv;
+ queue_delayed_work(bond->wq, &bond->mac_work, 0);
+ }
+
if (bond_mode_can_use_xmit_hash(bond))
bond_update_slave_arr(bond, NULL);
@@ -4223,6 +4235,7 @@ static int bond_close(struct net_device *bond_dev)
if (bond_is_lb(bond))
bond_alb_deinitialize(bond);
bond->recv_probe = NULL;
+ bond_mac_hash_destroy(bond);
return 0;
}
@@ -5077,6 +5090,13 @@ static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond,
if (unlikely(!count))
return NULL;
+ if (BOND_MODE(bond) == BOND_MODE_XOR && bond->params.mac_filter) {
+ const struct ethhdr *mac_hdr;
+
+ mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ if (bond_mac_insert(bond, mac_hdr->h_source))
+ return NULL;
+ }
slave = slaves->arr[hash % count];
return slave;
}
@@ -6158,6 +6178,7 @@ static int bond_check_params(struct bond_params *params)
params->downdelay = downdelay;
params->peer_notif_delay = 0;
params->use_carrier = use_carrier;
+ params->mac_filter = 0;
params->lacp_active = 1;
params->lacp_fast = lacp_fast;
params->primary[0] = 0;
@@ -6350,6 +6371,14 @@ static int __init bonding_init(void)
goto err;
}
+ bond_mac_cache = kmem_cache_create("bond_mac_cache",
+ sizeof(struct bond_mac_cache_entry),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!bond_mac_cache) {
+ res = -ENOMEM;
+ goto err;
+ }
+
skb_flow_dissector_init(&flow_keys_bonding,
flow_keys_bonding_keys,
ARRAY_SIZE(flow_keys_bonding_keys));
@@ -6379,6 +6408,7 @@ static void __exit bonding_exit(void)
/* Make sure we don't have an imbalance on our netpoll blocking */
WARN_ON(atomic_read(&netpoll_block_tx));
#endif
+ kmem_cache_destroy(bond_mac_cache);
}
module_init(bonding_init);
@@ -117,6 +117,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
[IFLA_BOND_PEER_NOTIF_DELAY] = { .type = NLA_U32 },
[IFLA_BOND_MISSED_MAX] = { .type = NLA_U8 },
[IFLA_BOND_NS_IP6_TARGET] = { .type = NLA_NESTED },
+ [IFLA_BOND_MAC_FILTER] = { .type = NLA_U8 },
};
static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = {
@@ -196,6 +197,15 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[],
if (err)
return err;
}
+ if (data[IFLA_BOND_MAC_FILTER]) {
+ u8 mac_filter = nla_get_u8(data[IFLA_BOND_MAC_FILTER]);
+
+ bond_opt_initval(&newval, mac_filter);
+ err = __bond_opt_set(bond, BOND_OPT_MAC_FILTER, &newval,
+ data[IFLA_BOND_MAC_FILTER], extack);
+ if (err)
+ return err;
+ }
if (data[IFLA_BOND_ACTIVE_SLAVE]) {
int ifindex = nla_get_u32(data[IFLA_BOND_ACTIVE_SLAVE]);
struct net_device *slave_dev;
@@ -610,6 +620,7 @@ static size_t bond_get_size(const struct net_device *bond_dev)
/* IFLA_BOND_NS_IP6_TARGET */
nla_total_size(sizeof(struct nlattr)) +
nla_total_size(sizeof(struct in6_addr)) * BOND_MAX_NS_TARGETS +
+ nla_total_size(sizeof(u8)) + /* IFLA_BOND_MAC_FILTER */
0;
}
@@ -768,6 +779,9 @@ static int bond_fill_info(struct sk_buff *skb,
if (nla_put_u8(skb, IFLA_BOND_MISSED_MAX,
bond->params.missed_max))
goto nla_put_failure;
+ if (nla_put_u8(skb, IFLA_BOND_MAC_FILTER,
+ bond->params.mac_filter))
+ goto nla_put_failure;
if (BOND_MODE(bond) == BOND_MODE_8023AD) {
struct ad_info info;
@@ -15,6 +15,7 @@
#include <linux/sched/signal.h>
#include <net/bonding.h>
+#include "bonding_priv.h"
static int bond_option_active_slave_set(struct bonding *bond,
const struct bond_opt_value *newval);
@@ -84,7 +85,8 @@ static int bond_option_ad_user_port_key_set(struct bonding *bond,
const struct bond_opt_value *newval);
static int bond_option_missed_max_set(struct bonding *bond,
const struct bond_opt_value *newval);
-
+static int bond_option_mac_filter_set(struct bonding *bond,
+ const struct bond_opt_value *newval);
static const struct bond_opt_value bond_mode_tbl[] = {
{ "balance-rr", BOND_MODE_ROUNDROBIN, BOND_VALFLAG_DEFAULT},
@@ -226,6 +228,12 @@ static const struct bond_opt_value bond_missed_max_tbl[] = {
{ NULL, -1, 0},
};
+static const struct bond_opt_value bond_mac_filter_tbl[] = {
+ { "off", 0, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT},
+ { "maxval", 18, BOND_VALFLAG_MAX},
+ { NULL, -1, 0}
+};
+
static const struct bond_option bond_opts[BOND_OPT_LAST] = {
[BOND_OPT_MODE] = {
.id = BOND_OPT_MODE,
@@ -490,7 +498,16 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
.desc = "Delay between each peer notification on failover event, in milliseconds",
.values = bond_intmax_tbl,
.set = bond_option_peer_notif_delay_set
- }
+ },
+ [BOND_OPT_MAC_FILTER] = {
+ .id = BOND_OPT_MAC_FILTER,
+ .name = "mac_filter",
+ .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_XOR)),
+ .desc = "filter received frames based on MAC addresses that have transmitted from the bond, number of MAC addresses to track",
+ .flags = BOND_OPTFLAG_IFDOWN,
+ .values = bond_mac_filter_tbl,
+ .set = bond_option_mac_filter_set
+ },
};
/* Searches for an option by name */
@@ -855,25 +872,38 @@ static bool bond_set_tls_features(struct bonding *bond)
return true;
}
+static void disable_arp_enable_mii(struct bonding *bond, const char *feature,
+ const char *suffix)
+{
+ if (bond->params.arp_interval) {
+ netdev_dbg(bond->dev, "%s%s is incompatible with arp monitoring, start mii monitoring\n",
+ feature, suffix);
+ /* disable arp monitoring */
+ bond->params.arp_interval = 0;
+ }
+
+ if (!bond->params.miimon) {
+ /* set miimon to default value */
+ bond->params.miimon = BOND_DEFAULT_MIIMON;
+ netdev_dbg(bond->dev, "Setting MII monitoring interval to %d\n",
+ bond->params.miimon);
+ }
+}
+
static int bond_option_mode_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
- if (!bond_mode_uses_arp(newval->value)) {
- if (bond->params.arp_interval) {
- netdev_dbg(bond->dev, "%s mode is incompatible with arp monitoring, start mii monitoring\n",
- newval->string);
- /* disable arp monitoring */
- bond->params.arp_interval = 0;
- }
-
- if (!bond->params.miimon) {
- /* set miimon to default value */
- bond->params.miimon = BOND_DEFAULT_MIIMON;
- netdev_dbg(bond->dev, "Setting MII monitoring interval to %d\n",
- bond->params.miimon);
- }
+ if (bond->params.mac_filter && newval->value != BOND_MODE_XOR) {
+ netdev_dbg(bond->dev, "%s mode is incompatible with mac filtering, disabling\n",
+ newval->string);
+ bond->params.mac_filter = 0;
}
+ if (!bond_mode_uses_arp(newval->value))
+ disable_arp_enable_mii(bond, newval->string, " mode");
+ else if (bond->params.mac_filter && bond->params.arp_interval)
+ disable_arp_enable_mii(bond, "MAC filtering", "");
+
if (newval->value == BOND_MODE_ALB)
bond->params.tlb_dynamic_lb = 1;
@@ -1061,6 +1091,17 @@ static int bond_option_use_carrier_set(struct bonding *bond,
return 0;
}
+static int bond_option_mac_filter_set(struct bonding *bond,
+ const struct bond_opt_value *newval)
+{
+ if (newval->value && bond->params.arp_interval)
+ disable_arp_enable_mii(bond, "MAC filtering", "");
+
+ netdev_dbg(bond->dev, "Setting mac_filter to %llu\n", newval->value);
+ bond->params.mac_filter = newval->value;
+ return 0;
+}
+
/* There are two tricky bits here. First, if ARP monitoring is activated, then
* we must disable MII monitoring. Second, if the ARP timer isn't running,
* we must start it.
@@ -1068,6 +1109,14 @@ static int bond_option_use_carrier_set(struct bonding *bond,
static int bond_option_arp_interval_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
+ if (newval->value && bond->params.mac_filter) {
+ if (bond->dev->flags & IFF_UP)
+ return -EBUSY;
+
+ netdev_dbg(bond->dev, "MAC filtering cannot be used with ARP monitoring. Disabling MAC filtering\n");
+ bond->params.mac_filter = 0;
+ }
+
netdev_dbg(bond->dev, "Setting ARP monitoring interval to %llu\n",
newval->value);
bond->params.arp_interval = newval->value;
@@ -15,6 +15,7 @@
#ifndef _BONDING_PRIV_H
#define _BONDING_PRIV_H
#include <generated/utsrelease.h>
+#include "bond_mac_filter.h"
#define DRV_NAME "bonding"
#define DRV_DESCRIPTION "Ethernet Channel Bonding Driver"
@@ -68,6 +68,7 @@ enum {
BOND_OPT_MISSED_MAX,
BOND_OPT_NS_TARGETS,
BOND_OPT_PRIO,
+ BOND_OPT_MAC_FILTER,
BOND_OPT_LAST
};
@@ -125,6 +125,7 @@ struct bond_params {
int miimon;
u8 num_peer_notif;
u8 missed_max;
+ u8 mac_filter;
int arp_interval;
int arp_validate;
int arp_all_targets;
@@ -251,6 +252,7 @@ struct bonding {
struct delayed_work alb_work;
struct delayed_work ad_work;
struct delayed_work mcast_work;
+ struct delayed_work mac_work;
struct delayed_work slave_arr_work;
#ifdef CONFIG_DEBUG_FS
/* debugging support via debugfs */
@@ -263,6 +265,7 @@ struct bonding {
spinlock_t ipsec_lock;
#endif /* CONFIG_XFRM_OFFLOAD */
struct bpf_prog *xdp_prog;
+ struct rhashtable *mac_filter_tbl;
};
#define bond_slave_get_rcu(dev) \
@@ -936,6 +936,7 @@ enum {
IFLA_BOND_AD_LACP_ACTIVE,
IFLA_BOND_MISSED_MAX,
IFLA_BOND_NS_IP6_TARGET,
+ IFLA_BOND_MAC_FILTER,
__IFLA_BOND_MAX,
};