diff mbox series

[net,v2,3/3] selftests: forwarding: Add test for load-balancing between multiple servers

Message ID 20230825090830.18635-4-sriram.yagnaraman@est.tech (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Avoid TCP resets when using ECMP for load-balancing between multiple servers. | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 9 this patch: 9
netdev/cc_maintainers warning 2 maintainers not CCed: razor@blackwall.org liuhangbin@gmail.com
netdev/build_clang success Errors and warnings before: 9 this patch: 9
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success net selftest script(s) already in Makefile
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 9 this patch: 9
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Sriram Yagnaraman Aug. 25, 2023, 9:08 a.m. UTC
Create a topology with a host, and a router. The host (veth0) is in the
default namespace, and a network namespace is created for the router,
the peer veth1 is moved to the router netns. A dummy interface is added
inside the router netns, to simulate a network that has two neighbors.
An ECMP route to a virtual IP (vip) with the two neighbors as the next
hop is added.

The test uses perf stat to count the number of fib:fib_table_lookup
tracepoint hits for IPv4 and the number of fib6:fib6_table_lookup for
IPv6. The measured count is checked to be within 15% for the number of
packets received on veth1 in the router.

See diagram in the test for more information.

Suggested-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
---
 .../testing/selftests/net/forwarding/Makefile |   1 +
 tools/testing/selftests/net/forwarding/lib.sh |   5 +
 .../net/forwarding/router_multipath_vip.sh    | 255 ++++++++++++++++++
 3 files changed, 261 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/router_multipath_vip.sh

Comments

Ido Schimmel Aug. 27, 2023, 5:18 p.m. UTC | #1
On Fri, Aug 25, 2023 at 11:08:30AM +0200, Sriram Yagnaraman wrote:
> Create a topology with a host, and a router. The host (veth0) is in the
> default namespace, and a network namespace is created for the router,
> the peer veth1 is moved to the router netns. A dummy interface is added
> inside the router netns, to simulate a network that has two neighbors.
> An ECMP route to a virtual IP (vip) with the two neighbors as the next
> hop is added.
> 
> The test uses perf stat to count the number of fib:fib_table_lookup
> tracepoint hits for IPv4 and the number of fib6:fib6_table_lookup for
> IPv6. The measured count is checked to be within 15% for the number of
> packets received on veth1 in the router.
> 
> See diagram in the test for more information.
> 
> Suggested-by: Ido Schimmel <idosch@nvidia.com>
> Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
> ---
>  .../testing/selftests/net/forwarding/Makefile |   1 +
>  tools/testing/selftests/net/forwarding/lib.sh |   5 +
>  .../net/forwarding/router_multipath_vip.sh    | 255 ++++++++++++++++++

I believe this test better fits in fib_tests.sh. Here's a diff that uses
the previously mentioned method [1]. I verified that it fails without
the fixes and passes with them:

Before:

IPv4 multipath list receive tests
    TEST: Multipath route hit ratio (.15)                               [FAIL]
    TEST: Single path route hit ratio (.15)                             [ OK ]

IPv6 multipath list receive tests
    TEST: Multipath route hit ratio (.15)                               [FAIL]
    TEST: Single path route hit ratio (.14)                             [ OK ]

After:

IPv4 multipath list receive tests
    TEST: Multipath route hit ratio (.99)                               [ OK ]
    TEST: Single path route hit ratio (.15)                             [ OK ]

IPv6 multipath list receive tests
    TEST: Multipath route hit ratio (.99)                               [ OK ]
    TEST: Single path route hit ratio (.14)                             [ OK ]

[1]
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 35d89dfa6f11..5ffbfdad943c 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -9,7 +9,7 @@ ret=0
 ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
-TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh"
+TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh ipv4_mpath_list ipv6_mpath_list"
 
 VERBOSE=0
 PAUSE_ON_FAIL=no
@@ -2138,6 +2138,143 @@ ipv4_bcast_neigh_test()
 	cleanup
 }
 
+mpath_dep_check()
+{
+	if [ ! -x "$(command -v mausezahn)" ]; then
+		echo "mausezahn command not found. Skipping test"
+		return 1
+	fi
+
+	if [ ! -x "$(command -v jq)" ]; then
+		echo "jq command not found. Skipping test"
+		return 1
+	fi
+
+	if [ ! -x "$(command -v bc)" ]; then
+		echo "bc command not found. Skipping test"
+		return 1
+	fi
+
+	if [ ! -x "$(command -v perf)" ]; then
+		echo "perf command not found. Skipping test"
+		return 1
+	fi
+
+	perf list fib:* | grep -q fib_table_lookup
+	if [ $? -ne 0 ]; then
+		echo "IPv4 FIB tracepoint not found. Skipping test"
+		return 1
+	fi
+
+	perf list fib6:* | grep -q fib6_table_lookup
+	if [ $? -ne 0 ]; then
+		echo "IPv6 FIB tracepoint not found. Skipping test"
+		return 1
+	fi
+
+	return 0
+}
+
+list_rcv_eval()
+{
+	local name=$1; shift
+	local file=$1; shift
+	local exp=$1; shift
+
+
+	local count=$(tail -n 1 $file | jq '.["counter-value"] | tonumber | floor')
+	local ratio=$(echo "scale=2; $count / 65536" | bc -l)
+	local res=$(echo "$ratio $exp" | bc)
+	[[ $res -eq 1 ]]
+	log_test $? 0 "$name route hit ratio ($ratio)"
+}
+
+ipv4_mpath_list_test()
+{
+	echo
+	echo "IPv4 multipath list receive tests"
+
+	mpath_dep_check || return 1
+
+	route_setup
+
+	set -e
+	run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off"
+
+	run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
+	run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
+	run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on"
+	run_cmd "ip -n ns2 link add name nh1 up type dummy"
+	run_cmd "ip -n ns2 link add name nh2 up type dummy"
+	run_cmd "ip -n ns2 address add 172.16.201.1/24 dev nh1"
+	run_cmd "ip -n ns2 address add 172.16.202.1/24 dev nh2"
+	run_cmd "ip -n ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
+	run_cmd "ip -n ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
+	run_cmd "ip -n ns2 route add 203.0.113.0/24 nexthop via 172.16.201.2 nexthop via 172.16.202.2"
+	run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1"
+	set +e
+
+	local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]')
+	local tmp_file=$(mktemp)
+
+	# Packets forwarded in a list using a multipath route must not reuse a
+	# cached result so that a flow always hits the same nexthop. In other
+	# words, the FIB lookup tracepoint needs to be triggered for every
+	# packet.
+	run_cmd "perf stat -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- ip netns exec ns1 mausezahn veth1 -a own -b $dmac -A 172.16.101.1 -B 203.0.113.1 -t udp 'sp=12345,dp=0-65535' -q"
+	list_rcv_eval "Multipath" $tmp_file ">= 0.95"
+
+	# The same is not true for a single path route.
+	run_cmd "ip -n ns2 route replace 203.0.113.0/24 nexthop via 172.16.201.2"
+	run_cmd "perf stat -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- ip netns exec ns1 mausezahn veth1 -a own -b $dmac -A 172.16.101.1 -B 203.0.113.1 -t udp 'sp=12345,dp=0-65535' -q"
+	list_rcv_eval "Single path" $tmp_file "< 0.95"
+
+	rm $tmp_file
+	route_cleanup
+}
+
+ipv6_mpath_list_test()
+{
+	echo
+	echo "IPv6 multipath list receive tests"
+
+	mpath_dep_check || return 1
+
+	route_setup
+
+	set -e
+	run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off"
+
+	run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
+	run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
+	run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on"
+	run_cmd "ip -n ns2 link add name nh1 up type dummy"
+	run_cmd "ip -n ns2 link add name nh2 up type dummy"
+	run_cmd "ip -n ns2 -6 address add 2001:db8:201::1/64 dev nh1"
+	run_cmd "ip -n ns2 -6 address add 2001:db8:202::1/64 dev nh2"
+	run_cmd "ip -n ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
+	run_cmd "ip -n ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
+	run_cmd "ip -n ns2 -6 route add 2001:db8:301::/64 nexthop via 2001:db8:201::2 nexthop via 2001:db8:202::2"
+	run_cmd "ip netns exec ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1"
+	set +e
+
+	local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]')
+	local tmp_file=$(mktemp)
+
+	# Packets forwarded in a list using a multipath route must not reuse a
+	# cached result so that a flow always hits the same nexthop. In other
+	# words, the FIB lookup tracepoint needs to be triggered for every
+	# packet.
+	run_cmd "perf stat -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- ip netns exec ns1 mausezahn -6 veth1 -a own -b $dmac -A 2001:db8:101::1 -B 2001:db8:301::1 -t udp 'sp=12345,dp=0-65535' -q"
+	list_rcv_eval "Multipath" $tmp_file ">= 0.95"
+
+	# The same is not true for a single path route.
+	run_cmd "ip -n ns2 route replace 2001:db8:301::/64 nexthop via 2001:db8:201::2"
+	run_cmd "perf stat -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- ip netns exec ns1 mausezahn -6 veth1 -a own -b $dmac -A 2001:db8:101::1 -B 2001:db8:301::1 -t udp 'sp=12345,dp=0-65535' -q"
+	list_rcv_eval "Single path" $tmp_file "< 0.95"
+
+	rm $tmp_file
+	route_cleanup
+}
+
 ################################################################################
 # usage
 
@@ -2217,6 +2354,8 @@ do
 	ipv4_mangle)			ipv4_mangle_test;;
 	ipv6_mangle)			ipv6_mangle_test;;
 	ipv4_bcast_neigh)		ipv4_bcast_neigh_test;;
+	ipv4_mpath_list)		ipv4_mpath_list_test;;
+	ipv6_mpath_list)		ipv6_mpath_list_test;;
 
 	help) echo "Test names: $TESTS"; exit 0;;
 	esac
Sriram Yagnaraman Aug. 28, 2023, 8:01 a.m. UTC | #2
Hi Ido,

> -----Original Message-----
> From: Ido Schimmel <idosch@idosch.org>
> Sent: Sunday, 27 August 2023 19:18
> To: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
> Cc: netdev@vger.kernel.org; linux-kselftest@vger.kernel.org; David S . Miller
> <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>; Jakub
> Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>; David Ahern
> <dsahern@kernel.org>; Ido Schimmel <idosch@nvidia.com>; Shuah Khan
> <shuah@kernel.org>; Petr Machata <petrm@nvidia.com>
> Subject: Re: [PATCH net v2 3/3] selftests: forwarding: Add test for load-
> balancing between multiple servers
> 
> On Fri, Aug 25, 2023 at 11:08:30AM +0200, Sriram Yagnaraman wrote:
> > Create a topology with a host, and a router. The host (veth0) is in
> > the default namespace, and a network namespace is created for the
> > router, the peer veth1 is moved to the router netns. A dummy interface
> > is added inside the router netns, to simulate a network that has two
> neighbors.
> > An ECMP route to a virtual IP (vip) with the two neighbors as the next
> > hop is added.
> >
> > The test uses perf stat to count the number of fib:fib_table_lookup
> > tracepoint hits for IPv4 and the number of fib6:fib6_table_lookup for
> > IPv6. The measured count is checked to be within 15% for the number of
> > packets received on veth1 in the router.
> >
> > See diagram in the test for more information.
> >
> > Suggested-by: Ido Schimmel <idosch@nvidia.com>
> > Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
> > ---
> >  .../testing/selftests/net/forwarding/Makefile |   1 +
> >  tools/testing/selftests/net/forwarding/lib.sh |   5 +
> >  .../net/forwarding/router_multipath_vip.sh    | 255 ++++++++++++++++++
> 
> I believe this test better fits in fib_tests.sh. Here's a diff that uses the previously
> mentioned method [1]. I verified that it fails without the fixes and passes with
> them:
> 
> Before:
> 
> IPv4 multipath list receive tests
>     TEST: Multipath route hit ratio (.15)                               [FAIL]
>     TEST: Single path route hit ratio (.15)                             [ OK ]
> 
> IPv6 multipath list receive tests
>     TEST: Multipath route hit ratio (.15)                               [FAIL]
>     TEST: Single path route hit ratio (.14)                             [ OK ]
> 
> After:
> 
> IPv4 multipath list receive tests
>     TEST: Multipath route hit ratio (.99)                               [ OK ]
>     TEST: Single path route hit ratio (.15)                             [ OK ]
> 
> IPv6 multipath list receive tests
>     TEST: Multipath route hit ratio (.99)                               [ OK ]
>     TEST: Single path route hit ratio (.14)                             [ OK ]
> 

Thanks a lot, it works for me as well. 
I will post v3 with this version of the test then.
diff mbox series

Patch

diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 770efbe24f0d..bf4e5745fd5c 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -70,6 +70,7 @@  TEST_PROGS = bridge_igmp.sh \
 	router_mpath_nh.sh \
 	router_multicast.sh \
 	router_multipath.sh \
+	router_multipath_vip.sh \
 	router_nh.sh \
 	router.sh \
 	router_vid_1.sh \
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index f69015bf2dea..75a7b138c399 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -787,6 +787,11 @@  link_stats_tx_packets_get()
 	link_stats_get $1 tx packets
 }
 
+link_stats_rx_packets_get()
+{
+	link_stats_get $1 rx packets
+}
+
 link_stats_rx_errors_get()
 {
 	link_stats_get $1 rx errors
diff --git a/tools/testing/selftests/net/forwarding/router_multipath_vip.sh b/tools/testing/selftests/net/forwarding/router_multipath_vip.sh
new file mode 100755
index 000000000000..9aefebd8085c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_multipath_vip.sh
@@ -0,0 +1,255 @@ 
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+
+# | H1                 |
+# |                    |
+# |              $h1 + |
+# |     192.0.2.2/28 | |
+# | 2001:db8:1::2/64 | |
+# |                  | |
+# +------------------|-+
+#                    |
+# +------------------|-----------------------+  +----------------------+
+# | SW               |                       |  |                      |
+# | (netns ns-r1)    |                       |  | + $neigh1            |
+# |             $rp1 +                       |  |   192.0.2.18/28      |
+# |     192.0.2.1/28                         |  |   2001:db8:2::2/64   |
+# | 2001:db8:1::1/64     + vip               |  |                      |
+# |                        198.51.100.0/24   |  +----------------------+
+# |                        2001:db8:3::/64   |  |                      |
+# |                                          |  | + $neigh2            |
+# |                                          |  |   192.0.2.19/28      |
+# |                                          |  |   2001:db8:2::3/64   |
+# |                                          |  |                      |
+# +------------------------------------------+  +----------------------+
+
+ALL_TESTS="multipath_test"
+NUM_NETIFS=2
+source lib.sh
+
+ns_create()
+{
+	local ns="$1"
+
+	ip netns add $ns
+
+	in_ns $ns ip link set dev lo up
+	in_ns $ns forwarding_enable
+}
+
+ns_destroy()
+{
+	local ns="$1"
+
+	ip netns del $ns &> /dev/null
+}
+
+h1_create()
+{
+	ip link set dev $h1 up
+	ip address add 192.0.2.2/28 dev $h1
+	ip address add 2001:db8:1::2/64 dev $h1
+
+	ethtool -K $h1 tcp-segmentation-offload off
+}
+
+h1_destroy()
+{
+	ethtool -K $h1 tcp-segmentation-offload on
+
+	ip address del 2001:db8:1::2/64 dev $h1
+	ip address del 192.0.2.2/28 dev $h1
+	ip link set dev $h1 down
+}
+
+router_create()
+{
+	ns="ns-r1"
+	dummy="dum1"
+
+	echo 20000 > /sys/class/net/$rp1/gro_flush_timeout
+	echo 1 > /sys/class/net/$rp1/napi_defer_hard_irqs
+	ethtool -K $rp1 generic-receive-offload on
+
+	ns_create $ns
+	ip link set dev $rp1 netns $ns
+
+	ip -n $ns link set dev $rp1 up
+	ip -n $ns address add dev $rp1 192.0.2.1/28
+
+	ip -n $ns link add name $dummy up type dummy
+	ip -n $ns address add 192.0.2.17/28 dev $dummy
+	ip -n $ns address add 2001:db8:2::1/64 dev $dummy
+
+	ip -n $ns neigh add 192.0.2.18 lladdr 00:11:22:33:44:55 nud perm dev $dummy
+	ip -n $ns neigh add 192.0.2.19 lladdr 00:aa:bb:cc:dd:ee nud perm dev $dummy
+	ip -n $ns neigh add 2001:db8:2::2 lladdr 00:11:22:33:44:55 nud perm dev $dummy
+	ip -n $ns neigh add 2001:db8:2::3 lladdr 00:aa:bb:cc:dd:ee nud perm dev $dummy
+
+	ip -n $ns route add 198.51.100.0/24 \
+		nexthop via 192.0.2.18 \
+		nexthop via 192.0.2.19
+
+	ip -n $ns route add 2001:db8:3::/64 \
+		nexthop via 2001:db8:2::2 \
+		nexthop via 2001:db8:2::3
+}
+
+router_destroy()
+{
+	ns="ns-r1"
+	dummy="dum1"
+
+	ip -n $ns link del dev $dummy
+
+	ip -n $ns address del dev $rp1 192.0.2.1/28
+	ip -n $ns link set dev $rp1 down
+	ip -n $ns link set dev $rp1 netns 1
+
+	ns_destroy $ns
+
+	echo 0 > /sys/class/net/$rp1/gro_flush_timeout
+	echo 0 > /sys/class/net/$rp1/napi_defer_hard_irqs
+	ethtool -K $rp1 generic-receive-offload off
+}
+
+perf_stat()
+{
+	local tracepoint="$1"
+	local cmd="$2"
+	local out="$3"
+
+	perf stat -o $out -j $tracepoint -a &
+	perf_stat_pid=$!
+
+	$cmd
+
+	kill -SIGINT $perf_stat_pid && wait $perf_stat_pid
+}
+
+perf_evaluate()
+{
+	local desc="$1"
+	local expected=$2
+	local perf_output="$3"
+
+	RET=0
+	measured=$(tail -n 1 $perf_output | jq '.["counter-value"] | tonumber | floor')
+
+	diff=$(echo $expected - $measured | bc -l)
+	diff=${diff#-}
+
+	test "$(echo "$diff / $expected > 0.15" | bc -l)" -eq 0
+	check_err $? "Too large discrepancy between expected and measured fib lookup counts"
+	log_test "$desc"
+	log_info "Expected count $expected Measured count $measured"
+}
+
+run_test_and_check_tpv4()
+{
+	local src_ip="$1"
+	local dst_ip="$2"
+	local t0_rp1 t1_rp1
+	local expected
+
+	# Transmit multiple flows from h1 to vip and check that fib lookup
+	# tracepoint is hit for each packet.
+	in_ns ns-r1 sysctl_set net.ipv4.fib_multipath_hash_policy 1
+
+	t0_rp1=$(in_ns ns-r1 link_stats_rx_packets_get $rp1)
+
+	smac=$(mac_get $h1)
+	dmac=$(in_ns ns-r1 mac_get $rp1)
+	cmd="$MZ $h1 -q -p 64 -a $smac -b $dmac -A $src_ip -B $dst_ip
+		-t udp 'sp=1024,dp=1024-65535'"
+
+	perf_output=$(mktemp)
+	perf_stat "-e fib:fib_table_lookup" "$cmd" "$perf_output"
+
+	t1_rp1=$(in_ns ns-r1 link_stats_rx_packets_get $rp1)
+	expected="$(echo "$t1_rp1 - $t0_rp1" | bc -l)"
+
+	perf_evaluate "IPv4 multipath: fib_table_lookup" $expected "$perf_output"
+	rm $perf_output
+
+	in_ns ns-r1 sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+run_test_and_check_tpv6()
+{
+	local src_ip="$1"
+	local dst_ip="$2"
+	local t0_rp1 t1_rp1
+	local expected
+
+	# Transmit multiple flows from h1 to vip and check that fib lookup
+	# tracepoint is hit for each packet.
+	in_ns ns-r1 sysctl_set net.ipv4.fib_multipath_hash_policy 1
+
+	t0_rp1=$(in_ns ns-r1 link_stats_rx_packets_get $rp1)
+
+	smac=$(mac_get $h1)
+	dmac=$(in_ns ns-r1 mac_get $rp1)
+	cmd="$MZ $h1 -6 -q -p 64 -a $smac -b $dmac -A $src_ip -B $dst_ip
+		-t udp 'sp=1024,dp=1024-65535'"
+
+	perf_output=$(mktemp)
+	perf_stat "-e fib6:fib6_table_lookup" "$cmd" "$perf_output"
+
+	# fib6_table_lookup is called twice for each packet, for
+	# RT6_TABLE_LOCAL and RT6_TABLE_MAIN
+	t1_rp1=$(in_ns ns-r1 link_stats_rx_packets_get $rp1)
+	expected="$(echo "2*($t1_rp1 - $t0_rp1)" | bc -l)"
+
+	perf_evaluate "IPv6 multipath: fib6_table_lookup" $expected "$perf_output"
+	rm $perf_output
+
+	in_ns ns-r1 sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath_test()
+{
+	log_info "Running multipath tests"
+	run_test_and_check_tpv4 "192.0.2.2" "198.51.100.1"
+	run_test_and_check_tpv6 "2001:db8:1::2" "2001:db8:3::1"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+
+	h1_create
+	router_create
+}
+
+setup_wait()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	setup_wait_dev $h1
+	in_ns ns-r1 setup_wait_dev $rp1
+
+	# Make sure links are ready.
+	sleep $WAIT_TIME
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	router_destroy
+	h1_destroy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS