diff mbox series

[bpf,v2,1/2] selftests/bpf: Add test for l3 use of bpf_redirect_peer

Message ID 20210517101128.641827-2-joamaki@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Fix l3 to l2 use of bpf_skb_change_head | expand

Checks

Context Check Description
netdev/apply fail Patch does not apply to bpf
netdev/tree_selection success Clearly marked for bpf

Commit Message

Jussi Maki May 17, 2021, 10:11 a.m. UTC
Add a failing test to try and use bpf_skb_change_head in combination
with bpf_redirect_peer to redirect a packet from a L3 device to veth.

The test uses a BPF program that adds L2 headers to the packet coming
from a L3 device and then calls bpf_redirect_peer to redirect the packet
to a veth device. The test fails as skb->mac_len is not set properly and
thus the ethernet headers are not properly skb_pull'd in cls_bpf_classify,
causing tcp_v4_rcv to point the TCP header into middle of the IP header.

Signed-off-by: Jussi Maki <joamaki@gmail.com>
---
 .../selftests/bpf/prog_tests/tc_redirect.c    | 238 +++++++++++++++++-
 .../selftests/bpf/progs/test_tc_peer.c        |  26 ++
 2 files changed, 251 insertions(+), 13 deletions(-)
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 95ef9fcd31d8..aa844f282e8a 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -11,16 +11,18 @@ 
  */
 
 #define _GNU_SOURCE
-#include <fcntl.h>
+
+#include "test_progs.h"
+
 #include <linux/limits.h>
 #include <linux/sysctl.h>
+#include <linux/if_tun.h>
+#include <linux/if.h>
 #include <sched.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <sys/stat.h>
-#include <sys/types.h>
 
-#include "test_progs.h"
 #include "network_helpers.h"
 #include "test_tc_neigh_fib.skel.h"
 #include "test_tc_neigh.skel.h"
@@ -32,16 +34,23 @@ 
 
 #define IP4_SRC "172.16.1.100"
 #define IP4_DST "172.16.2.100"
+#define IP4_TUN_SRC "172.17.1.100"
+#define IP4_TUN_FWD "172.17.1.200"
 #define IP4_PORT 9004
 
-#define IP6_SRC "::1:dead:beef:cafe"
-#define IP6_DST "::2:dead:beef:cafe"
+#define IP6_SRC "0::1:dead:beef:cafe"
+#define IP6_DST "0::2:dead:beef:cafe"
+#define IP6_TUN_SRC "1::1:dead:beef:cafe"
+#define IP6_TUN_FWD "1::2:dead:beef:cafe"
 #define IP6_PORT 9006
 
 #define IP4_SLL "169.254.0.1"
 #define IP4_DLL "169.254.0.2"
 #define IP4_NET "169.254.0.0"
 
+#define MAC_DST_FWD "00:11:22:33:44:55"
+#define MAC_DST "00:22:33:44:55:66"
+
 #define IFADDR_STR_LEN 18
 #define PING_ARGS "-c 3 -w 10 -q"
 
@@ -92,7 +101,8 @@  static int modify_proc(const char *path, const char *newval)
 
 	strncpy(mod->path, path, PATH_MAX);
 
-	if (!fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f)) {
+	mod->oldlen = fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f);
+	if (mod->oldlen < 0) {
 		log_err("reading from %s failed", path);
 		goto fail;
 	}
@@ -238,14 +248,15 @@  static int get_ifindex(const char *name)
 static int netns_setup_links_and_routes(struct netns_setup_result *result)
 {
 	char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
-	char veth_dst_fwd_addr[IFADDR_STR_LEN+1] = {};
 
 	SYS("ip link add veth_src type veth peer name veth_src_fwd");
 	SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
+
+	SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
+	SYS("ip link set veth_dst address " MAC_DST);
+
 	if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
 		goto fail;
-	if (get_ifaddr("veth_dst_fwd", veth_dst_fwd_addr))
-		goto fail;
 
 	result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
 	if (result->ifindex_veth_src_fwd < 0)
@@ -306,10 +317,8 @@  static int netns_setup_links_and_routes(struct netns_setup_result *result)
 	SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
 	SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
 
-	SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr %s",
-	    veth_dst_fwd_addr);
-	SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr %s",
-	    veth_dst_fwd_addr);
+	SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+	SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
 
 	setns_root();
 	return 0;
@@ -560,6 +569,206 @@  static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
 	setns_root();
 }
 
+
+static int tun_open(char *name)
+{
+	struct ifreq ifr;
+	int fd, err;
+
+	fd = open("/dev/net/tun", O_RDWR);
+	if (fd < 0)
+		return -1;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
+	if (*name)
+		strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+	err = ioctl(fd, TUNSETIFF, (void *) &ifr);
+	if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
+		goto fail;
+
+	SYS("ip link set dev %s up", name);
+
+	return fd;
+fail:
+	close(fd);
+	return -1;
+}
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+enum {
+	SRC_TO_TARGET = 0,
+	TARGET_TO_SRC = 1,
+};
+
+static int tun_relay_loop(int src_fd, int target_fd)
+{
+	fd_set rfds, wfds;
+
+	FD_ZERO(&rfds);
+	FD_ZERO(&wfds);
+
+	for (;;) {
+		char buf[1500];
+		int direction, nread, nwrite;
+
+		FD_SET(src_fd, &rfds);
+		FD_SET(target_fd, &rfds);
+
+		if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
+			fprintf(stderr, "select failed: %s\n", strerror(errno));
+			return 1;
+		}
+
+		direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
+
+		nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
+		if (nread < 0) {
+			fprintf(stderr, "read failed: %s\n", strerror(errno));
+			return 1;
+		}
+
+		nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
+		if (nwrite != nread) {
+			fprintf(stderr, "write failed: %s\n", strerror(errno));
+			return 1;
+		}
+	}
+}
+
+static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
+{
+	struct test_tc_peer *skel;
+	int err, tunnel_pid = -1;
+	int src_fd, target_fd;
+
+	skel = test_tc_peer__open();
+	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+		return;
+
+	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
+	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+	err = test_tc_peer__load(skel);
+	if (!ASSERT_OK(err, "test_tc_peer__load")) {
+		test_tc_peer__destroy(skel);
+		return;
+	}
+
+	err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) {
+		test_tc_peer__destroy(skel);
+		return;
+	}
+
+	/* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
+	 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
+	 * expose the L2 headers encapsulating the IP packet to BPF and hence
+	 * don't have skb in suitable state for this test. Alternative to TUN/TAP
+	 * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
+	 * but that requires much more complicated setup.
+	 */
+	if (!ASSERT_OK(setns_by_name(NS_SRC), "setns " NS_SRC))
+		goto fail;
+
+	src_fd = tun_open("tun_src");
+	if (!ASSERT_GE(src_fd, 0, "alloc tun_src"))
+		goto fail;
+
+	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns " NS_FWD))
+		goto fail;
+
+	target_fd = tun_open("tun_fwd");
+	if (!ASSERT_GE(target_fd, 0, "alloc tun_fwd"))
+		goto fail;
+
+	tunnel_pid = fork();
+	if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
+		goto fail;
+
+	if (tunnel_pid == 0)
+		exit(tun_relay_loop(src_fd, target_fd));
+
+	setns_root();
+
+	/* Load "tc_src_l3" to the tun_fwd interface to redirect packets */
+	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns " NS_FWD))
+		goto fail;
+
+	SYS("tc qdisc add dev tun_fwd clsact");
+	SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
+	    SRC_PROG_PIN_FILE);
+
+	/* Setup route and neigh tables */
+	SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
+	SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
+
+	SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
+	SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
+
+	SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
+	SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
+	    " dev tun_src scope global");
+	SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
+	SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
+	SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
+	    " dev tun_src scope global");
+	SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
+	SYS("ip -netns " NS_DST " route add " IP6_TUN_FWD "/128 dev veth_dst scope global");
+
+	SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+	SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+	SYS("ip -netns " NS_DST " neigh add " IP6_TUN_FWD " dev veth_dst lladdr " MAC_DST_FWD);
+
+	/* Enable forwarding back to wards src, but not the other way in order to require the
+	 * BPF redirection.
+	 */
+	err = modify_proc("/proc/sys/net/ipv4/ip_forward", "1");
+	if (!ASSERT_OK(err, "set ipv4.ip_forward"))
+		goto fail;
+
+	err = modify_proc("/proc/sys/net/ipv4/conf/veth_src_fwd/forwarding", "0");
+	if (!ASSERT_OK(err, "set veth_src_fwd.forwarding"))
+		goto fail;
+
+	err = modify_proc("/proc/sys/net/ipv4/conf/tun_fwd/forwarding", "0");
+	if (!ASSERT_OK(err, "set veth_src_fwd.forwarding"))
+		goto fail;
+
+	err = modify_proc("/proc/sys/net/ipv6/conf/all/forwarding", "1");
+	if (!ASSERT_OK(err, "set ipv6.forwarding"))
+		goto fail;
+
+	err = modify_proc("/proc/sys/net/ipv6/conf/veth_src_fwd/forwarding", "0");
+	if (!ASSERT_OK(err, "set ipv6.forwarding"))
+		goto fail;
+
+	err = modify_proc("/proc/sys/net/ipv6/conf/tun_fwd/forwarding", "0");
+	if (!ASSERT_OK(err, "set ipv6.forwarding"))
+		goto fail;
+
+	setns_root();
+
+	test_connectivity();
+
+fail:
+	setns_by_name(NS_FWD);
+	restore_proc();
+	setns_root();
+	if (tunnel_pid > 0) {
+		kill(tunnel_pid, SIGTERM);
+		waitpid(tunnel_pid, NULL, 0);
+	}
+	if (src_fd >= 0)
+		close(src_fd);
+	if (target_fd >= 0)
+		close(target_fd);
+	bpf_program__unpin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
+	test_tc_peer__destroy(skel);
+}
+
 void test_tc_redirect(void)
 {
 	struct netns_setup_result setup_result;
@@ -577,6 +786,9 @@  void test_tc_redirect(void)
 	if (test__start_subtest("tc_redirect_peer"))
 		test_tc_redirect_peer(&setup_result);
 
+	if (test__start_subtest("tc_redirect_peer_l3"))
+		test_tc_redirect_peer_l3(&setup_result);
+
 	if (test__start_subtest("tc_redirect_neigh"))
 		test_tc_redirect_neigh(&setup_result);
 
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index 72c72950c3bb..aea7bec5a1ab 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -5,12 +5,18 @@ 
 #include <linux/bpf.h>
 #include <linux/stddef.h>
 #include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
 
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 static volatile const __u32 IFINDEX_SRC;
 static volatile const __u32 IFINDEX_DST;
 
+static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
+static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66};
+
 SEC("classifier/chk_egress")
 int tc_chk(struct __sk_buff *skb)
 {
@@ -29,4 +35,24 @@  int tc_src(struct __sk_buff *skb)
 	return bpf_redirect_peer(IFINDEX_DST, 0);
 }
 
+SEC("classifier/src_ingress_l3")
+int tc_src_l3(struct __sk_buff *skb)
+{
+	__u16 proto = skb->protocol;
+
+	if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0)
+		return TC_ACT_SHOT;
+
+	if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0)
+		return TC_ACT_SHOT;
+
+	if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0)
+		return TC_ACT_SHOT;
+
+	if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0)
+		return TC_ACT_SHOT;
+
+	return bpf_redirect_peer(IFINDEX_DST, 0);
+}
+
 char __license[] SEC("license") = "GPL";