[RFC,2/2] net: bridge: add a software fast-path implementation

Message ID	20220210142401.4912-2-nbd@nbd.name (mailing list archive)
State	RFC
Delegated to:	Netdev Maintainers
Headers	show Return-Path: <netdev-owner@kernel.org> From: Felix Fietkau <nbd@nbd.name> To: netdev@vger.kernel.org Subject: [RFC 2/2] net: bridge: add a software fast-path implementation Date: Thu, 10 Feb 2022 15:24:01 +0100 Message-Id: <20220210142401.4912-2-nbd@nbd.name> In-Reply-To: <20220210142401.4912-1-nbd@nbd.name> References: <20220210142401.4912-1-nbd@nbd.name> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	[RFC,1/2] net: bridge: add knob for filtering rx/tx BPDU packets on a port \| expand [RFC,1/2] net: bridge: add knob for filtering rx/tx BPDU packets on a port [RFC,2/2] net: bridge: add a software fast-path implementation

Context	Check	Description
netdev/fixes_present	success	Fixes tag not required for -next series
netdev/subject_prefix	warning	Target tree name not specified in the subject
netdev/cover_letter	success	Single patches do not need cover letters
netdev/patch_count	success	Link
netdev/header_inline	success	No static functions without inline keyword in header files
netdev/build_32bit	fail	Errors and warnings before: 4868 this patch: 4481
netdev/cc_maintainers	fail	5 maintainers not CCed: roopa@nvidia.com kuba@kernel.org nikolay@nvidia.com davem@davemloft.net bridge@lists.linux-foundation.org
netdev/build_clang	fail	Errors and warnings before: 826 this patch: 816
netdev/module_param	success	Was 0 now: 0
netdev/verify_signedoff	success	Signed-off-by tag matches author and committer
netdev/verify_fixes	success	No Fixes tag
netdev/build_allmodconfig_warn	fail	Errors and warnings before: 5025 this patch: 4637
netdev/checkpatch	fail	CHECK: Blank lines aren't necessary before a close brace '}' CHECK: Please don't use multiple blank lines CHECK: Please use a blank line after function/struct/union/enum declarations ERROR: code indent should use tabs where possible WARNING: Missing or malformed SPDX-License-Identifier tag in line 1 WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns
netdev/kdoc	success	Errors and warnings before: 3 this patch: 3
netdev/source_inline	success	Was 0 now: 0
netdev/tree_selection	success	Guessing tree name failed - patch did not apply

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 18d3b264b754..944630df0ec3 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -59,6 +59,7 @@ struct br_ip_list { #define BR_MRP_LOST_IN_CONT BIT(19) #define BR_TX_FWD_OFFLOAD BIT(20) #define BR_BPDU_FILTER BIT(21) +#define BR_OFFLOAD BIT(22) #define BR_DEFAULT_AGEING_TIME (300 * HZ) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 4c847c2d6afa..a7349414a27f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -482,6 +482,8 @@ enum { IFLA_BR_VLAN_STATS_PER_PORT, IFLA_BR_MULTI_BOOLOPT, IFLA_BR_MCAST_QUERIER_STATE, + IFLA_BR_OFFLOAD_CACHE_SIZE, + IFLA_BR_OFFLOAD_CACHE_RESERVED, __IFLA_BR_MAX, }; @@ -538,6 +540,7 @@ enum { IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, IFLA_BRPORT_BPDU_FILTER, + IFLA_BRPORT_OFFLOAD, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 3c8ded7d3e84..3f93da1f66da 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -34,6 +34,16 @@ config BRIDGE If unsure, say N. +config BRIDGE_OFFLOAD + bool "Offloading support" + depends on BRIDGE + help + If you say Y here, you can turn on a per-port offload flag, which + will cache src/destination mac address flows between ports and handle + them faster. + + If unsure, say N. + config BRIDGE_IGMP_SNOOPING bool "IGMP/MLD snooping" depends on BRIDGE diff --git a/net/bridge/Makefile b/net/bridge/Makefile index 7fb9a021873b..166f76b5f258 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -11,6 +11,7 @@ bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \ br_netlink_tunnel.o br_arp_nd_proxy.o bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o +bridge-$(CONFIG_BRIDGE_OFFLOAD) += br_offload.o bridge-$(subst m,y,$(CONFIG_BRIDGE_NETFILTER)) += br_nf_core.o diff --git a/net/bridge/br.c b/net/bridge/br.c index 1fac72cc617f..bd46e5e20b30 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -18,6 +18,7 @@ #include <net/switchdev.h> #include "br_private.h" +#include "br_private_offload.h" /* * Handle changes in state of network devices enslaved to a bridge. @@ -381,6 +382,10 @@ static int __init br_init(void) if (err) goto err_out; + err = br_offload_init(); + if (err) + goto err_out0; + err = register_pernet_subsys(&br_net_ops); if (err) goto err_out1; @@ -430,6 +435,8 @@ static int __init br_init(void) err_out2: unregister_pernet_subsys(&br_net_ops); err_out1: + br_offload_fini(); +err_out0: br_fdb_fini(); err_out: stp_proto_unregister(&br_stp_proto); @@ -452,6 +459,7 @@ static void __exit br_deinit(void) #if IS_ENABLED(CONFIG_ATM_LANE) br_fdb_test_addr_hook = NULL; #endif + br_offload_fini(); br_fdb_fini(); } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 8d6bab244c4a..10c4e4039c7b 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -524,6 +524,10 @@ void br_dev_setup(struct net_device *dev) br->bridge_hello_time = br->hello_time = 2 * HZ; br->bridge_forward_delay = br->forward_delay = 15 * HZ; br->bridge_ageing_time = br->ageing_time = BR_DEFAULT_AGEING_TIME; +#ifdef CONFIG_BRIDGE_OFFLOAD + br->offload_cache_size = 128; + br->offload_cache_reserved = 8; +#endif dev->max_mtu = ETH_MAX_MTU; br_netfilter_rtable_init(br); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 6ccda68bd473..49abfc13a323 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -23,6 +23,7 @@ #include <net/switchdev.h> #include <trace/events/bridge.h> #include "br_private.h" +#include "br_private_offload.h" static const struct rhashtable_params br_fdb_rht_params = { .head_offset = offsetof(struct net_bridge_fdb_entry, rhnode), @@ -185,6 +186,8 @@ static void fdb_notify(struct net_bridge *br, struct sk_buff *skb; int err = -ENOBUFS; + br_offload_fdb_update(fdb); + if (swdev_notify) br_switchdev_fdb_notify(br, fdb, type); @@ -393,6 +396,10 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br, fdb->key.vlan_id = vid; fdb->flags = flags; fdb->updated = fdb->used = jiffies; +#ifdef CONFIG_BRIDGE_OFFLOAD + INIT_HLIST_HEAD(&fdb->offload_in); + INIT_HLIST_HEAD(&fdb->offload_out); +#endif err = rhashtable_lookup_insert_fast(&br->fdb_hash_tbl, &fdb->rhnode, br_fdb_rht_params); if (err) { @@ -527,8 +534,10 @@ void br_fdb_cleanup(struct work_struct *work) */ rcu_read_lock(); hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { - unsigned long this_timer = f->updated + delay; + unsigned long this_timer; + br_offload_fdb_refresh_time(br, f); + this_timer = f->updated + delay; if (test_bit(BR_FDB_STATIC, &f->flags) || test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags)) { if (test_bit(BR_FDB_NOTIFY, &f->flags)) { @@ -651,8 +660,11 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, if (num >= maxnum) break; - if (has_expired(br, f)) - continue; + if (has_expired(br, f)) { + if (!br_offload_fdb_refresh_time(br, f) || + has_expired(br, f)) + continue; + } /* ignore pseudo entry for local MAC address */ if (!f->dst) @@ -797,6 +809,7 @@ int br_fdb_dump(struct sk_buff *skb, if (!filter_dev && f->dst) goto skip; + br_offload_fdb_refresh_time(br, f); err = fdb_fill_info(skb, br, f, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, @@ -831,6 +844,7 @@ int br_fdb_get(struct sk_buff *skb, goto errout; } + br_offload_fdb_refresh_time(br, f); err = fdb_fill_info(skb, br, f, portid, seq, RTM_NEWNEIGH, 0); errout: diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 9fe5c888f27d..6d9025106d9d 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -16,6 +16,7 @@ #include <linux/if_vlan.h> #include <linux/netfilter_bridge.h> #include "br_private.h" +#include "br_private_offload.h" /* Don't forward packets to originating port or forwarding disabled */ static inline int should_deliver(const struct net_bridge_port *p, @@ -32,6 +33,8 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { + br_offload_output(skb); + skb_push(skb, ETH_HLEN); if (!is_skb_forwardable(skb->dev, skb)) goto drop; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 55f47cadb114..c68c7f6cc429 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -25,6 +25,7 @@ #include <net/net_namespace.h> #include "br_private.h" +#include "br_private_offload.h" /* * Determine initial path cost based on speed. @@ -772,6 +773,9 @@ void br_port_flags_change(struct net_bridge_port *p, unsigned long mask) if (mask & BR_NEIGH_SUPPRESS) br_recalculate_neigh_suppress_enabled(br); + + if (mask & BR_OFFLOAD) + br_offload_port_state(p); } bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index d8263c4849c1..b606ca06ff2d 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -22,6 +22,7 @@ #include <linux/rculist.h> #include "br_private.h" #include "br_private_tunnel.h" +#include "br_private_offload.h" static int br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -164,6 +165,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb dst->used = now; br_forward(dst->dst, skb, local_rcv, false); } else { + br_offload_skb_disable(skb); if (!mcast_hit) br_flood(br, skb, pkt_type, local_rcv, false); else @@ -293,6 +295,9 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) memset(skb->cb, 0, sizeof(struct br_input_skb_cb)); + if (br_offload_input(p, skb)) + return RX_HANDLER_CONSUMED; + p = br_port_get_rcu(skb->dev); if (p->flags & BR_VLAN_TUNNEL) br_handle_ingress_vlan_tunnel(skb, p, nbp_vlan_group_rcu(p)); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 11215c55adc2..994aca4b633a 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -19,6 +19,7 @@ #include "br_private_cfm.h" #include "br_private_tunnel.h" #include "br_private_mcast_eht.h" +#include "br_private_offload.h" static int __get_num_vlan_infos(struct net_bridge_vlan_group *vg, u32 filter_mask) @@ -185,6 +186,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_NEIGH_SUPPRESS */ + nla_total_size(1) /* IFLA_BRPORT_ISOLATED */ + nla_total_size(1) /* IFLA_BRPORT_BPDU_FILTER */ + + nla_total_size(1) /* IFLA_BRPORT_OFFLOAD */ + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */ + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */ + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */ @@ -271,7 +273,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u8(skb, IFLA_BRPORT_MRP_IN_OPEN, !!(p->flags & BR_MRP_LOST_IN_CONT)) || nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)) || - nla_put_u8(skb, IFLA_BRPORT_BPDU_FILTER, !!(p->flags & BR_BPDU_FILTER))) + nla_put_u8(skb, IFLA_BRPORT_BPDU_FILTER, !!(p->flags & BR_BPDU_FILTER)) || + nla_put_u8(skb, IFLA_BRPORT_OFFLOAD, !!(p->flags & BR_OFFLOAD))) return -EMSGSIZE; timerval = br_timer_value(&p->message_age_timer); @@ -832,6 +835,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 }, [IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 }, [IFLA_BRPORT_BPDU_FILTER] = { .type = NLA_U8 }, + [IFLA_BRPORT_OFFLOAD] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ @@ -897,6 +901,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[], br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS, BR_NEIGH_SUPPRESS); br_set_port_flag(p, tb, IFLA_BRPORT_ISOLATED, BR_ISOLATED); br_set_port_flag(p, tb, IFLA_BRPORT_BPDU_FILTER, BR_BPDU_FILTER); + br_set_port_flag(p, tb, IFLA_BRPORT_OFFLOAD, BR_OFFLOAD); changed_mask = old_flags ^ p->flags; @@ -1165,6 +1170,8 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { [IFLA_BR_VLAN_STATS_PER_PORT] = { .type = NLA_U8 }, [IFLA_BR_MULTI_BOOLOPT] = NLA_POLICY_EXACT_LEN(sizeof(struct br_boolopt_multi)), + [IFLA_BR_OFFLOAD_CACHE_SIZE] = { .type = NLA_U32 }, + [IFLA_BR_OFFLOAD_CACHE_RESERVED] = { .type = NLA_U32 }, }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], @@ -1424,6 +1431,19 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], br_opt_toggle(br, BROPT_NF_CALL_ARPTABLES, !!val); } #endif +#ifdef CONFIG_BRIDGE_OFFLOAD + if (data[IFLA_BR_OFFLOAD_CACHE_SIZE]) { + u32 val = nla_get_u32(data[IFLA_BR_OFFLOAD_CACHE_SIZE]); + + br_offload_set_cache_size(br, val); + } + + if (data[IFLA_BR_OFFLOAD_CACHE_RESERVED]) { + u32 val = nla_get_u32(data[IFLA_BR_OFFLOAD_CACHE_RESERVED]); + + br_offload_set_cache_reserved(br, val); + } +#endif if (data[IFLA_BR_MULTI_BOOLOPT]) { struct br_boolopt_multi *bm; @@ -1512,6 +1532,10 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IPTABLES */ nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IP6TABLES */ nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_ARPTABLES */ +#endif +#ifdef CONFIG_BRIDGE_OFFLOAD + nla_total_size(sizeof(u32)) + /* IFLA_BR_OFFLOAD_CACHE_SIZE */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_OFFLOAD_CACHE_RESERVED */ #endif nla_total_size(sizeof(struct br_boolopt_multi)) + /* IFLA_BR_MULTI_BOOLOPT */ 0; @@ -1636,6 +1660,11 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) br_opt_get(br, BROPT_NF_CALL_ARPTABLES) ? 1 : 0)) return -EMSGSIZE; #endif +#ifdef CONFIG_BRIDGE_OFFLOAD + if (nla_put_u32(skb, IFLA_BR_OFFLOAD_CACHE_SIZE, br->offload_cache_size) || + nla_put_u32(skb, IFLA_BR_OFFLOAD_CACHE_RESERVED, br->offload_cache_reserved)) + return -EMSGSIZE; +#endif return 0; } diff --git a/net/bridge/br_offload.c b/net/bridge/br_offload.c new file mode 100644 index 000000000000..8cb9266e6cf9 --- /dev/null +++ b/net/bridge/br_offload.c @@ -0,0 +1,466 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/kernel.h> +#include <linux/workqueue.h> +#include "br_private.h" +#include "br_private_offload.h" + +static DEFINE_SPINLOCK(offload_lock); + +struct bridge_flow_key { + u8 dest[ETH_ALEN]; + u8 src[ETH_ALEN]; +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + u16 vlan_tag; + bool vlan_present; +#endif +}; + +struct bridge_flow { + struct net_bridge_port *port; + struct rhash_head node; + struct bridge_flow_key key; +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + bool vlan_out_present; + u16 vlan_out; +#endif + + unsigned long used; + struct net_bridge_fdb_entry *fdb_in, *fdb_out; + struct hlist_node fdb_list_in, fdb_list_out; + + struct rcu_head rcu; +}; + +static const struct rhashtable_params flow_params = { + .automatic_shrinking = true, + .head_offset = offsetof(struct bridge_flow, node), + .key_len = sizeof(struct bridge_flow_key), + .key_offset = offsetof(struct bridge_flow, key), +}; + +static struct kmem_cache *offload_cache __read_mostly; + +static void +flow_rcu_free(struct rcu_head *head) +{ + struct bridge_flow *flow; + + flow = container_of(head, struct bridge_flow, rcu); + kmem_cache_free(offload_cache, flow); +} + +static void +__br_offload_flow_free(struct bridge_flow *flow) +{ + flow->used = 0; + hlist_del(&flow->fdb_list_in); + hlist_del(&flow->fdb_list_out); + + call_rcu(&flow->rcu, flow_rcu_free); +} + +static void +br_offload_flow_free(struct bridge_flow *flow) +{ + if (rhashtable_remove_fast(&flow->port->offload.rht, &flow->node, + flow_params) != 0) + return; + + __br_offload_flow_free(flow); +} + +static bool +br_offload_flow_fdb_refresh_time(struct bridge_flow *flow, + struct net_bridge_fdb_entry *fdb) +{ + if (!time_after(flow->used, fdb->updated)) + return false; + + fdb->updated = flow->used; + + return true; +} + + +static void +br_offload_flow_refresh_time(struct bridge_flow *flow) +{ + br_offload_flow_fdb_refresh_time(flow, flow->fdb_in); + br_offload_flow_fdb_refresh_time(flow, flow->fdb_out); +} + +static void +br_offload_destroy_cb(void *ptr, void *arg) +{ + struct bridge_flow *flow = ptr; + + br_offload_flow_refresh_time(flow); + __br_offload_flow_free(flow); +} + +static bool +br_offload_need_gc(struct net_bridge_port *p) +{ + return (atomic_read(&p->offload.rht.nelems) + + p->br->offload_cache_reserved) >= p->br->offload_cache_size; +} + +static void +br_offload_gc_work(struct work_struct *work) +{ + struct rhashtable_iter hti; + struct net_bridge_port *p; + struct bridge_flow *gc_flow = NULL; + struct bridge_flow *flow; + unsigned long gc_used; + + p = container_of(work, struct net_bridge_port, offload.gc_work); + + if (!br_offload_need_gc(p)) + return; + + rhashtable_walk_enter(&p->offload.rht, &hti); + rhashtable_walk_start(&hti); + while ((flow = rhashtable_walk_next(&hti)) != NULL) { + unsigned long used; + + if (IS_ERR(flow)) + continue; + + used = READ_ONCE(flow->used); + if (!used) + continue; + + if (gc_flow && !time_before(used, gc_used)) + continue; + + gc_flow = flow; + gc_used = used; + } + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); + + if (!gc_flow) + return; + + spin_lock_bh(&offload_lock); + if (br_offload_need_gc(p) && gc_flow && + gc_flow->used == gc_used) + br_offload_flow_free(gc_flow); + if (p->offload.enabled && br_offload_need_gc(p)) + queue_work(system_long_wq, work); + spin_unlock_bh(&offload_lock); + +} + +void br_offload_port_state(struct net_bridge_port *p) +{ + struct net_bridge_port_offload *o = &p->offload; + bool enabled = true; + bool flush = false; + + if (p->state != BR_STATE_FORWARDING || + !(p->flags & BR_OFFLOAD)) + enabled = false; + + spin_lock_bh(&offload_lock); + if (o->enabled == enabled) + goto out; + + if (enabled) { + if (!o->gc_work.func) + INIT_WORK(&o->gc_work, br_offload_gc_work); + rhashtable_init(&o->rht, &flow_params); + } else { + flush = true; + rhashtable_free_and_destroy(&o->rht, br_offload_destroy_cb, o); + } + + o->enabled = enabled; + +out: + spin_unlock_bh(&offload_lock); + + if (flush) + flush_work(&o->gc_work); +} + +void br_offload_fdb_update(const struct net_bridge_fdb_entry *fdb) +{ + struct bridge_flow *f; + struct hlist_node *tmp; + + spin_lock_bh(&offload_lock); + + hlist_for_each_entry_safe(f, tmp, &fdb->offload_in, fdb_list_in) { + br_offload_flow_refresh_time(f); + br_offload_flow_free(f); + } + hlist_for_each_entry_safe(f, tmp, &fdb->offload_out, fdb_list_out) { + br_offload_flow_refresh_time(f); + br_offload_flow_free(f); + } + + spin_unlock_bh(&offload_lock); +} + +bool br_offload_fdb_refresh_time(struct net_bridge *br, + struct net_bridge_fdb_entry *fdb) +{ + unsigned long timeout = jiffies - br->ageing_time; + struct bridge_flow *f; + struct hlist_node *tmp; + bool ret = false; + + spin_lock_bh(&offload_lock); + + hlist_for_each_entry_safe(f, tmp, &fdb->offload_in, fdb_list_in) { + if (br_offload_flow_fdb_refresh_time(f, fdb)) + ret = true; + if (time_before(f->used, timeout)) + br_offload_flow_free(f); + } + + hlist_for_each_entry_safe(f, tmp, &fdb->offload_out, fdb_list_out) { + if (br_offload_flow_fdb_refresh_time(f, fdb)) + ret = true; + if (time_before(f->used, timeout)) + br_offload_flow_free(f); + } + + spin_unlock_bh(&offload_lock); + + return ret; +} + +static void +br_offload_prepare_key(struct net_bridge_port *p, struct bridge_flow_key *key, + struct sk_buff *skb) +{ + memset(key, 0, sizeof(*key)); + memcpy(key, eth_hdr(skb), 2 * ETH_ALEN); +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + if (!br_opt_get(p->br, BROPT_VLAN_ENABLED)) + return; + + if (!skb_vlan_tag_present(skb) || skb->vlan_proto != p->br->vlan_proto) + return; + + key->vlan_present = true; + key->vlan_tag = skb_vlan_tag_get_id(skb); +#endif +} + +void br_offload_output(struct sk_buff *skb) +{ + struct net_bridge_port_offload *o; + struct br_input_skb_cb *cb = (struct br_input_skb_cb *)skb->cb; + struct net_bridge_port *p, *inp; + struct net_device *dev; + struct net_bridge_fdb_entry *fdb_in, *fdb_out; + struct net_bridge_vlan_group *vg; + struct bridge_flow_key key; + struct bridge_flow *flow; + u16 vlan; + + if (!cb->offload) + return; + + rcu_read_lock(); + + p = br_port_get_rcu(skb->dev); + if (!p) + goto out; + + o = &p->offload; + if (!o->enabled) + goto out; + + if (atomic_read(&p->offload.rht.nelems) >= p->br->offload_cache_size) + goto out; + + dev = dev_get_by_index_rcu(dev_net(p->br->dev), cb->input_ifindex); + if (!dev) + goto out; + + inp = br_port_get_rcu(dev); + if (!p) + goto out; + + vg = nbp_vlan_group_rcu(inp); + vlan = cb->input_vlan_present ? cb->input_vlan_tag : br_get_pvid(vg); + fdb_in = br_fdb_find_rcu(p->br, eth_hdr(skb)->h_source, vlan); + if (!fdb_in) + goto out; + + vg = nbp_vlan_group_rcu(p); + vlan = skb_vlan_tag_present(skb) ? skb_vlan_tag_get_id(skb) : br_get_pvid(vg); + fdb_out = br_fdb_find_rcu(p->br, eth_hdr(skb)->h_dest, vlan); + if (!fdb_out) + goto out; + + br_offload_prepare_key(p, &key, skb); +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + key.vlan_present = cb->input_vlan_present; + key.vlan_tag = cb->input_vlan_tag; +#endif + + flow = kmem_cache_alloc(offload_cache, GFP_ATOMIC); + flow->port = fdb_in->dst; + memcpy(&flow->key, &key, sizeof(key)); + +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + flow->vlan_out_present = skb_vlan_tag_present(skb); + flow->vlan_out = skb_vlan_tag_get(skb); +#endif + + flow->fdb_in = fdb_in; + flow->fdb_out = fdb_out; + flow->used = jiffies; + + spin_lock_bh(&offload_lock); + if (!o->enabled || + atomic_read(&p->offload.rht.nelems) >= p->br->offload_cache_size || + rhashtable_insert_fast(&flow->port->offload.rht, &flow->node, flow_params)) { + kmem_cache_free(offload_cache, flow); + goto out_unlock; + } + + hlist_add_head(&flow->fdb_list_in, &fdb_in->offload_in); + hlist_add_head(&flow->fdb_list_out, &fdb_out->offload_out); + + if (br_offload_need_gc(p)) + queue_work(system_long_wq, &p->offload.gc_work); + +out_unlock: + spin_unlock_bh(&offload_lock); + +out: + rcu_read_unlock(); +} + +bool br_offload_input(struct net_bridge_port *p, struct sk_buff *skb) +{ + struct net_bridge_port_offload *o = &p->offload; + struct br_input_skb_cb *cb = (struct br_input_skb_cb *)skb->cb; + struct bridge_flow_key key; + struct net_bridge_port *dst; + struct bridge_flow *flow; + unsigned long now = jiffies; + bool ret = false; + + if (skb->len < sizeof(key)) + return false; + + if (!o->enabled) + return false; + + if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) + return false; + + br_offload_prepare_key(p, &key, skb); + + rcu_read_lock(); + flow = rhashtable_lookup(&o->rht, &key, flow_params); + if (!flow) { + cb->offload = 1; +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + cb->input_vlan_present = key.vlan_present != 0; + cb->input_vlan_tag = key.vlan_tag; + cb->input_ifindex = p->dev->ifindex; +#endif + goto out; + } + + if (flow->fdb_in->dst != p) + goto out; + + dst = flow->fdb_out->dst; + if (!dst) + goto out; + + ret = true; +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + if (!flow->vlan_out_present && key.vlan_present) { + __vlan_hwaccel_clear_tag(skb); + } else if (flow->vlan_out_present) { + if (skb_vlan_tag_present(skb) && + skb->vlan_proto != p->br->vlan_proto) { + /* Protocol-mismatch, empty out vlan_tci for new tag */ + skb_push(skb, ETH_HLEN); + skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto, + skb_vlan_tag_get(skb)); + if (unlikely(!skb)) + goto out; + + skb_pull(skb, ETH_HLEN); + skb_reset_mac_len(skb); + } + + __vlan_hwaccel_put_tag(skb, p->br->vlan_proto, + flow->vlan_out); + } +#endif + + skb->dev = dst->dev; + skb_push(skb, ETH_HLEN); + + if (skb_warn_if_lro(skb) || !is_skb_forwardable(skb->dev, skb)) { + kfree_skb(skb); + goto out; + } + + if (flow->used != now) + flow->used = now; + skb_forward_csum(skb); + dev_queue_xmit(skb); + +out: + rcu_read_unlock(); + return ret; +} + +static void +br_offload_check_gc(struct net_bridge *br) +{ + struct net_bridge_port *p; + + spin_lock_bh(&br->lock); + list_for_each_entry(p, &br->port_list, list) + if (br_offload_need_gc(p)) + queue_work(system_long_wq, &p->offload.gc_work); + spin_unlock_bh(&br->lock); +} + + +int br_offload_set_cache_size(struct net_bridge *br, unsigned long val) +{ + br->offload_cache_size = val; + br_offload_check_gc(br); + + return 0; +} + +int br_offload_set_cache_reserved(struct net_bridge *br, unsigned long val) +{ + br->offload_cache_reserved = val; + br_offload_check_gc(br); + + return 0; +} + +int __init br_offload_init(void) +{ + offload_cache = kmem_cache_create("bridge_offload_cache", + sizeof(struct bridge_flow), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!offload_cache) + return -ENOMEM; + + return 0; +} + +void br_offload_fini(void) +{ + kmem_cache_destroy(offload_cache); +} diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2661dda1a92b..40021fe4b8c8 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -268,7 +268,15 @@ struct net_bridge_fdb_entry { unsigned long updated ____cacheline_aligned_in_smp; unsigned long used; - struct rcu_head rcu; + union { +#ifdef CONFIG_BRIDGE_OFFLOAD + struct { + struct hlist_head offload_in; + struct hlist_head offload_out; + }; +#endif + struct rcu_head rcu; + }; }; #define MDB_PG_FLAGS_PERMANENT BIT(0) @@ -343,6 +351,12 @@ struct net_bridge_mdb_entry { struct rcu_head rcu; }; +struct net_bridge_port_offload { + struct rhashtable rht; + struct work_struct gc_work; + bool enabled; +}; + struct net_bridge_port { struct net_bridge *br; struct net_device *dev; @@ -404,6 +418,9 @@ struct net_bridge_port { u16 backup_redirected_cnt; struct bridge_stp_xstats stp_xstats; +#ifdef CONFIG_BRIDGE_OFFLOAD + struct net_bridge_port_offload offload; +#endif }; #define kobj_to_brport(obj) container_of(obj, struct net_bridge_port, kobj) @@ -555,6 +572,11 @@ struct br_input_skb_cb { u8 br_netfilter_broute:1; #endif +#ifdef CONFIG_BRIDGE_OFFLOAD + u32 offload_cache_size; + u32 offload_cache_reserved; +#endif + #ifdef CONFIG_NET_SWITCHDEV /* Set if TX data plane offloading is used towards at least one * hardware domain. @@ -580,6 +602,12 @@ struct br_input_skb_cb { #else # define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (0) #endif +#ifdef CONFIG_BRIDGE_OFFLOAD + u8 offload:1; + u8 input_vlan_present:1; + u16 input_vlan_tag; + int input_ifindex; +#endif #define br_printk(level, br, format, args...) \ printk(level "%s: " format, (br)->dev->name, ##args) diff --git a/net/bridge/br_private_offload.h b/net/bridge/br_private_offload.h new file mode 100644 index 000000000000..f66edd0539ab --- /dev/null +++ b/net/bridge/br_private_offload.h @@ -0,0 +1,53 @@ +#ifndef __BR_OFFLOAD_H +#define __BR_OFFLOAD_H + +#ifdef CONFIG_BRIDGE_OFFLOAD +bool br_offload_input(struct net_bridge_port *p, struct sk_buff *skb); +void br_offload_output(struct sk_buff *skb); +void br_offload_port_state(struct net_bridge_port *p); +void br_offload_fdb_update(const struct net_bridge_fdb_entry *fdb); +bool br_offload_fdb_refresh_time(struct net_bridge *br, + struct net_bridge_fdb_entry *fdb); +int br_offload_init(void); +void br_offload_fini(void); +int br_offload_set_cache_size(struct net_bridge *br, unsigned long val); +int br_offload_set_cache_reserved(struct net_bridge *br, unsigned long val); +#else +static inline bool br_offload_input(struct net_bridge_port *p, struct sk_buff *skb) +{ + return false; +} +static inline void br_offload_output(struct sk_buff *skb) +{ +} +static inline void br_offload_port_state(struct net_bridge_port *p) +{ +} +static inline void br_offload_fdb_update(const struct net_bridge_fdb_entry *fdb) +{ +} +static inline bool br_offload_fdb_refresh_time(struct net_bridge *br, + struct net_bridge_fdb_entry *fdb) +{ + return false; +} +static inline int br_offload_init(void) +{ + return 0; +} +static inline void br_offload_fini(void) +{ +} +#endif + +static inline void br_offload_skb_disable(struct sk_buff *skb) +{ +#ifdef CONFIG_BRIDGE_OFFLOAD + struct br_input_skb_cb *cb = (struct br_input_skb_cb *)skb->cb; + + if (cb->offload) + cb->offload = 0; +#endif +} + +#endif diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 1d80f34a139c..b57788b53d24 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -12,6 +12,7 @@ #include "br_private.h" #include "br_private_stp.h" +#include "br_private_offload.h" /* since time values in bpdu are in jiffies and then scaled (1/256) * before sending, make sure that is at least one STP tick. @@ -52,6 +53,8 @@ void br_set_state(struct net_bridge_port *p, unsigned int state) (unsigned int) p->port_no, p->dev->name, br_port_state_names[p->state]); + br_offload_port_state(p); + if (p->br->stp_enabled == BR_KERNEL_STP) { switch (p->state) { case BR_STATE_BLOCKING: diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c index 6399a8a69d07..ffc65dc4eea8 100644 --- a/net/bridge/br_vlan_tunnel.c +++ b/net/bridge/br_vlan_tunnel.c @@ -15,6 +15,7 @@ #include "br_private.h" #include "br_private_tunnel.h" +#include "br_private_offload.h" static inline int br_vlan_tunid_cmp(struct rhashtable_compare_arg *arg, const void *ptr) @@ -180,6 +181,7 @@ void br_handle_ingress_vlan_tunnel(struct sk_buff *skb, skb_dst_drop(skb); __vlan_hwaccel_put_tag(skb, p->br->vlan_proto, vlan->vid); + br_offload_skb_disable(skb); } int br_handle_egress_vlan_tunnel(struct sk_buff *skb, @@ -201,6 +203,7 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb, if (err) return err; + br_offload_skb_disable(skb); tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst); if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst)) skb_dst_set(skb, &tunnel_dst->dst); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 00328f0dd22b..da8d3b72a77e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -55,7 +55,7 @@ #include <net/net_namespace.h> #define RTNL_MAX_TYPE 50 -#define RTNL_SLAVE_MAX_TYPE 40 +#define RTNL_SLAVE_MAX_TYPE 41 struct rtnl_link { rtnl_doit_func doit;

[RFC,2/2] net: bridge: add a software fast-path implementation

Checks

Commit Message

Comments

Patch