diff mbox series

[bpf-next,v4,1/6] selftests/bpf: Add traffic monitor functions.

Message ID 20240731193140.758210-2-thinker.li@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series monitor network traffic for flaky test cases | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 42 this patch: 42
netdev/build_tools success Errors and warnings before: 10 this patch: 10
netdev/cc_maintainers warning 10 maintainers not CCed: kpsingh@kernel.org shuah@kernel.org haoluo@google.com daniel@iogearbox.net john.fastabend@gmail.com jolsa@kernel.org linux-kselftest@vger.kernel.org yonghong.song@linux.dev mykolal@fb.com eddyz87@gmail.com
netdev/build_clang success Errors and warnings before: 44 this patch: 44
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 49 this patch: 49
netdev/checkpatch warning WARNING: else is not generally useful after a break or return WARNING: line length of 82 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 fail Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Kui-Feng Lee July 31, 2024, 7:31 p.m. UTC
Add functions that capture packets and print log in the background. They
are supposed to be used for debugging flaky network test cases. A monitored
test case should call traffic_monitor_start() to start a thread to capture
packets in the background for a given namespace and call
traffic_monitor_stop() to stop capturing. (Or, option '-m' implemented by
the later patches.)

    IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 68, ifindex 1, SYN
    IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 60, ifindex 1, SYN, ACK
    IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 60, ifindex 1, ACK
    IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 52, ifindex 1, ACK
    IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 52, ifindex 1, FIN, ACK
    IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 52, ifindex 1, RST, ACK
    Packet file: packets-2172-86-select_reuseport:sockhash-test.log
    #280/87 select_reuseport/sockhash IPv4/TCP LOOPBACK test_detach_bpf:OK

The above is the output of an example. It shows the packets of a connection
and the name of the file that contains captured packets in the directory
/tmp/tmon_pcap. The file can be loaded by tcpdump or wireshark.

This feature only works if TRAFFIC_MONITOR variable has been passed to
build BPF selftests. For example,

  make TRAFFIC_MONITOR=1 -C tools/testing/selftests/bpf

This command will build BPF selftests with this feature enabled.

Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
 tools/testing/selftests/bpf/Makefile     |   5 +
 tools/testing/selftests/bpf/test_progs.c | 432 +++++++++++++++++++++++
 tools/testing/selftests/bpf/test_progs.h |  16 +
 3 files changed, 453 insertions(+)

Comments

Stanislav Fomichev July 31, 2024, 9:07 p.m. UTC | #1
On 07/31, Kui-Feng Lee wrote:
> Add functions that capture packets and print log in the background. They
> are supposed to be used for debugging flaky network test cases. A monitored
> test case should call traffic_monitor_start() to start a thread to capture
> packets in the background for a given namespace and call
> traffic_monitor_stop() to stop capturing. (Or, option '-m' implemented by
> the later patches.)
> 
>     IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 68, ifindex 1, SYN
>     IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 60, ifindex 1, SYN, ACK
>     IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 60, ifindex 1, ACK
>     IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 52, ifindex 1, ACK
>     IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 52, ifindex 1, FIN, ACK
>     IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 52, ifindex 1, RST, ACK
>     Packet file: packets-2172-86-select_reuseport:sockhash-test.log
>     #280/87 select_reuseport/sockhash IPv4/TCP LOOPBACK test_detach_bpf:OK
> 
> The above is the output of an example. It shows the packets of a connection
> and the name of the file that contains captured packets in the directory
> /tmp/tmon_pcap. The file can be loaded by tcpdump or wireshark.
> 
> This feature only works if TRAFFIC_MONITOR variable has been passed to
> build BPF selftests. For example,
> 
>   make TRAFFIC_MONITOR=1 -C tools/testing/selftests/bpf
> 
> This command will build BPF selftests with this feature enabled.
> 
> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
> ---
>  tools/testing/selftests/bpf/Makefile     |   5 +
>  tools/testing/selftests/bpf/test_progs.c | 432 +++++++++++++++++++++++
>  tools/testing/selftests/bpf/test_progs.h |  16 +
>  3 files changed, 453 insertions(+)
> 
> diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
> index 774c6270e377..0a3108311be7 100644
> --- a/tools/testing/selftests/bpf/Makefile
> +++ b/tools/testing/selftests/bpf/Makefile
> @@ -41,6 +41,11 @@ CFLAGS += -g $(OPT_FLAGS) -rdynamic					\
>  LDFLAGS += $(SAN_LDFLAGS)
>  LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread
>  
> +ifneq ($(TRAFFIC_MONITOR),)
> +LDLIBS += -lpcap
> +CFLAGS += -DTRAFFIC_MONITOR=1
> +endif

Optionally: can make this more automagical with the following:

LDLIBS += $(shell pkg-config --libs 2>/dev/null)
CFLAGS += $(shell pkg-config --cflags 2>/dev/null)
CFLAGS += $(shell pkg-config --exists libpcap 2>/dev/null && echo "-DTRAFFIC_MONITOR=1")
Martin KaFai Lau Aug. 2, 2024, 3:29 a.m. UTC | #2
On 7/31/24 12:31 PM, Kui-Feng Lee wrote:
> Add functions that capture packets and print log in the background. They
> are supposed to be used for debugging flaky network test cases. A monitored
> test case should call traffic_monitor_start() to start a thread to capture
> packets in the background for a given namespace and call
> traffic_monitor_stop() to stop capturing. (Or, option '-m' implemented by
> the later patches.)
> 
>      IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 68, ifindex 1, SYN
>      IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 60, ifindex 1, SYN, ACK
>      IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 60, ifindex 1, ACK
>      IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 52, ifindex 1, ACK
>      IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 52, ifindex 1, FIN, ACK
>      IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 52, ifindex 1, RST, ACK

nit. Instead of ifindex, it should be ifname now.

>      Packet file: packets-2172-86-select_reuseport:sockhash-test.log
>      #280/87 select_reuseport/sockhash IPv4/TCP LOOPBACK test_detach_bpf:OK
> 
> The above is the output of an example. It shows the packets of a connection
> and the name of the file that contains captured packets in the directory
> /tmp/tmon_pcap. The file can be loaded by tcpdump or wireshark.
> 
> This feature only works if TRAFFIC_MONITOR variable has been passed to
> build BPF selftests. For example,
> 
>    make TRAFFIC_MONITOR=1 -C tools/testing/selftests/bpf
> 
> This command will build BPF selftests with this feature enabled.
> 
> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
> ---
>   tools/testing/selftests/bpf/Makefile     |   5 +
>   tools/testing/selftests/bpf/test_progs.c | 432 +++++++++++++++++++++++

In the cover letter, it mentioned the traffic monitoring implementation is moved 
from the network_helpers.c to test_progs.c.

Can you share more about the reason?

Is it because the traffic monitor now depends on the test_progs's test name, 
should_tmon...etc ? Can the test name and should_tmon be exported for the 
network_helpers to use?

What other compilation issues did it hit if the traffic monitor codes stay in 
the network_helpers.c? Some individual binaries (with main()) like 
test_tcp_check_syncookie_user that links to network_helpers.o but not to 
test_progs.o?

> +#ifdef TRAFFIC_MONITOR
> +struct tmonitor_ctx {
> +	pcap_t *pcap;
> +	pcap_dumper_t *dumper;
> +	pthread_t thread;
> +	int wake_fd_r;
> +	int wake_fd_w;
> +
> +	bool done;
> +	char pkt_fname[PATH_MAX];
> +	int pcap_fd;
> +};
> +
> +/* Is this packet captured with a Ethernet protocol type? */
> +static bool is_ethernet(const u_char *packet)
> +{
> +	u16 arphdr_type;
> +
> +	memcpy(&arphdr_type, packet + 8, 2);
> +	arphdr_type = ntohs(arphdr_type);
> +
> +	/* Except the following cases, the protocol type contains the
> +	 * Ethernet protocol type for the packet.
> +	 *
> +	 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
> +	 */
> +	switch (arphdr_type) {
> +	case 770: /* ARPHRD_FRAD */
> +	case 778: /* ARPHDR_IPGRE */
> +	case 803: /* ARPHRD_IEEE80211_RADIOTAP */
> +		return false;
> +	}
> +	return true;
> +}
> +
> +/* Show the information of the transport layer in the packet */
> +static void show_transport(const u_char *packet, u16 len, u32 ifindex,
> +			   const char *src_addr, const char *dst_addr,
> +			   u16 proto, bool ipv6)
> +{
> +	struct udphdr *udp;
> +	struct tcphdr *tcp;
> +	u16 src_port, dst_port;
> +	const char *transport_str;
> +	char *ifname, _ifname[IF_NAMESIZE];
> +
> +	ifname = if_indextoname(ifindex, _ifname);
> +	if (!ifname) {
> +		snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
> +		ifname = _ifname;
> +	}
> +
> +	if (proto == IPPROTO_UDP) {
> +		udp = (struct udphdr *)packet;
> +		src_port = ntohs(udp->source);
> +		dst_port = ntohs(udp->dest);
> +		transport_str = "UDP";
> +	} else if (proto == IPPROTO_TCP) {
> +		tcp = (struct tcphdr *)packet;
> +		src_port = ntohs(tcp->source);
> +		dst_port = ntohs(tcp->dest);
> +		transport_str = "TCP"
> +;
> +	} else if (proto == IPPROTO_ICMP) {
> +		printf("IPv4 ICMP packet: %s -> %s, len %d, type %d, code %d, ifname %s\n",
> +		       src_addr, dst_addr, len, packet[0], packet[1], ifname);
> +		return;
> +	} else if (proto == IPPROTO_ICMPV6) {
> +		printf("IPv6 ICMPv6 packet: %s -> %s, len %d, type %d, code %d, ifname %s\n",
> +		       src_addr, dst_addr, len, packet[0], packet[1], ifname);
> +		return;
> +	} else {
> +		printf("%s (proto %d): %s -> %s, ifname %s\n",
> +		       ipv6 ? "IPv6" : "IPv4", proto, src_addr, dst_addr, ifname);
> +		return;
> +	}
> +
> +	/* TCP */
> +
> +	if (ipv6)
> +		printf("IPv6 %s packet: [%s]:%d -> [%s]:%d, len %d, ifname %s",
> +		       transport_str, src_addr, src_port,
> +		       dst_addr, dst_port, len, ifname);
> +	else
> +		printf("IPv4 %s packet: %s:%d -> %s:%d, len %d, ifname %s",
> +		       transport_str, src_addr, src_port,
> +		       dst_addr, dst_port, len, ifname);
> +
> +	if (proto == IPPROTO_TCP) {
> +		if (tcp->fin)
> +			printf(", FIN");
> +		if (tcp->syn)
> +			printf(", SYN");
> +		if (tcp->rst)
> +			printf(", RST");
> +		if (tcp->ack)
> +			printf(", ACK");
> +	}
> +
> +	printf("\n");

The TCP packet output is done by multiple printf. There is a good chance that 
one TCP packet is interleaved with the other printf(s), considering the traffic 
monitoring is done in its own thread. I am seeing this in the tc_redirect test.

Does it help to do multiple snprintf (for the tcp flags?) first and then calls 
one printf?

> +}
> +
> +static void show_ipv6_packet(const u_char *packet, u32 ifindex)
> +{
> +	struct ipv6hdr *pkt = (struct ipv6hdr *)packet;
> +	struct in6_addr src;
> +	struct in6_addr dst;
> +	char src_str[INET6_ADDRSTRLEN], dst_str[INET6_ADDRSTRLEN];
> +	u_char proto;
> +
> +	memcpy(&src, &pkt->saddr, sizeof(src));
> +	memcpy(&dst, &pkt->daddr, sizeof(dst));
> +	inet_ntop(AF_INET6, &src, src_str, sizeof(src_str));
> +	inet_ntop(AF_INET6, &dst, dst_str, sizeof(dst_str));
> +	proto = pkt->nexthdr;
> +	show_transport(packet + sizeof(struct ipv6hdr),
> +		       ntohs(pkt->payload_len),
> +		       ifindex, src_str, dst_str, proto, true);
> +}
> +
> +static void show_ipv4_packet(const u_char *packet, u32 ifindex)
> +{
> +	struct iphdr *pkt = (struct iphdr *)packet;
> +	struct in_addr src;
> +	struct in_addr dst;
> +	u_char proto;
> +	char src_str[INET_ADDRSTRLEN], dst_str[INET_ADDRSTRLEN];
> +
> +	memcpy(&src, &pkt->saddr, sizeof(src));
> +	memcpy(&dst, &pkt->daddr, sizeof(dst));
> +	inet_ntop(AF_INET, &src, src_str, sizeof(src_str));
> +	inet_ntop(AF_INET, &dst, dst_str, sizeof(dst_str));
> +	proto = pkt->protocol;
> +	show_transport(packet + sizeof(struct iphdr),
> +		       ntohs(pkt->tot_len),
> +		       ifindex, src_str, dst_str, proto, false);
> +}
> +
> +static void *traffic_monitor_thread(void *arg)
> +{
> +	char *ifname, _ifname[IF_NAMESIZE];
> +	const u_char *packet, *payload;
> +	struct tmonitor_ctx *ctx = arg;
> +	struct pcap_pkthdr header;
> +	pcap_t *pcap = ctx->pcap;
> +	pcap_dumper_t *dumper = ctx->dumper;
> +	int fd = ctx->pcap_fd;
> +	int wake_fd = ctx->wake_fd_r;
> +	u16 proto;
> +	u32 ifindex;
> +	fd_set fds;
> +	int nfds, r;
> +
> +	nfds = (fd > wake_fd ? fd : wake_fd) + 1;
> +	FD_ZERO(&fds);
> +
> +	while (!ctx->done) {
> +		FD_SET(fd, &fds);
> +		FD_SET(wake_fd, &fds);
> +		r = select(nfds, &fds, NULL, NULL, NULL);
> +		if (!r)
> +			continue;
> +		if (r < 0) {
> +			if (errno == EINTR)
> +				continue;
> +			log_err("Fail to select on pcap fd and wake fd: %s", strerror(errno));
> +			break;
> +		}
> +
> +		packet = pcap_next(pcap, &header);
> +		if (!packet)
> +			continue;
> +
> +		/* According to the man page of pcap_dump(), first argument
> +		 * is the pcap_dumper_t pointer even it's argument type is
> +		 * u_char *.
> +		 */
> +		pcap_dump((u_char *)dumper, &header, packet);

The captured file has the "In", "Out", and "M" (Multicast) when it is read by 
tcpdump. Is it easy to printf that in show_ipv[46]_packet() also? Just want to 
ensure we didn't miss it if it is easy.

> +
> +		/* Not sure what other types of packets look like. Here, we
> +		 * parse only Ethernet and compatible packets.
> +		 */
> +		if (!is_ethernet(packet)) {
> +			printf("Packet captured\n");
> +			continue;
> +		}
> +
> +		/* Skip SLL2 header
> +		 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
> +		 *
> +		 * Although the document doesn't mention that, the payload
> +		 * doesn't include the Ethernet header. The payload starts
> +		 * from the first byte of the network layer header.
> +		 */
> +		payload = packet + 20;
> +
> +		memcpy(&proto, packet, 2);
> +		proto = ntohs(proto);
> +		memcpy(&ifindex, packet + 4, 4);
> +		ifindex = ntohl(ifindex);
> +
> +		if (proto == ETH_P_IPV6) {
> +			show_ipv6_packet(payload, ifindex);
> +		} else if (proto == ETH_P_IP) {
> +			show_ipv4_packet(payload, ifindex);
> +		} else {
> +			ifname = if_indextoname(ifindex, _ifname);
> +			if (!ifname) {
> +				snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
> +				ifname = _ifname;
> +			}
> +
> +			printf("Unknown network protocol type %x, ifname %s\n", proto, ifname);
> +		}
> +	}
> +
> +	return NULL;
> +}
Martin KaFai Lau Aug. 2, 2024, 3:43 a.m. UTC | #3
On 7/31/24 12:31 PM, Kui-Feng Lee wrote:
> diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
> index cb9d6d46826b..5d4e61fa26a1 100644
> --- a/tools/testing/selftests/bpf/test_progs.h
> +++ b/tools/testing/selftests/bpf/test_progs.h
> @@ -473,4 +473,20 @@ extern void test_loader_fini(struct test_loader *tester);
>   	test_loader_fini(&tester);					       \
>   })
>   
> +struct tmonitor_ctx;
> +
> +#ifdef TRAFFIC_MONITOR
> +struct tmonitor_ctx *traffic_monitor_start(const char *netns);
> +void traffic_monitor_stop(struct tmonitor_ctx *ctx);
> +#else
> +static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns)
> +{
> +	return (struct tmonitor_ctx *)-1;

hmm... from peeking patch 3, only NULL is checked.

While at it, if there is no libpcap during make, is the "-m" option available or 
the test_progs will error out if "-m" is used?

> +}
> +
> +static inline void traffic_monitor_stop(struct tmonitor_ctx *ctx)
> +{
> +}
> +#endif
> +
>   #endif /* __TEST_PROGS_H */
Kui-Feng Lee Aug. 2, 2024, 3:54 a.m. UTC | #4
On 7/31/24 14:07, Stanislav Fomichev wrote:
> On 07/31, Kui-Feng Lee wrote:
>> Add functions that capture packets and print log in the background. They
>> are supposed to be used for debugging flaky network test cases. A monitored
>> test case should call traffic_monitor_start() to start a thread to capture
>> packets in the background for a given namespace and call
>> traffic_monitor_stop() to stop capturing. (Or, option '-m' implemented by
>> the later patches.)
>>
>>      IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 68, ifindex 1, SYN
>>      IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 60, ifindex 1, SYN, ACK
>>      IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 60, ifindex 1, ACK
>>      IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 52, ifindex 1, ACK
>>      IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 52, ifindex 1, FIN, ACK
>>      IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 52, ifindex 1, RST, ACK
>>      Packet file: packets-2172-86-select_reuseport:sockhash-test.log
>>      #280/87 select_reuseport/sockhash IPv4/TCP LOOPBACK test_detach_bpf:OK
>>
>> The above is the output of an example. It shows the packets of a connection
>> and the name of the file that contains captured packets in the directory
>> /tmp/tmon_pcap. The file can be loaded by tcpdump or wireshark.
>>
>> This feature only works if TRAFFIC_MONITOR variable has been passed to
>> build BPF selftests. For example,
>>
>>    make TRAFFIC_MONITOR=1 -C tools/testing/selftests/bpf
>>
>> This command will build BPF selftests with this feature enabled.
>>
>> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
>> ---
>>   tools/testing/selftests/bpf/Makefile     |   5 +
>>   tools/testing/selftests/bpf/test_progs.c | 432 +++++++++++++++++++++++
>>   tools/testing/selftests/bpf/test_progs.h |  16 +
>>   3 files changed, 453 insertions(+)
>>
>> diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
>> index 774c6270e377..0a3108311be7 100644
>> --- a/tools/testing/selftests/bpf/Makefile
>> +++ b/tools/testing/selftests/bpf/Makefile
>> @@ -41,6 +41,11 @@ CFLAGS += -g $(OPT_FLAGS) -rdynamic					\
>>   LDFLAGS += $(SAN_LDFLAGS)
>>   LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread
>>   
>> +ifneq ($(TRAFFIC_MONITOR),)
>> +LDLIBS += -lpcap
>> +CFLAGS += -DTRAFFIC_MONITOR=1
>> +endif
> 
> Optionally: can make this more automagical with the following:
> 
> LDLIBS += $(shell pkg-config --libs 2>/dev/null)
> CFLAGS += $(shell pkg-config --cflags 2>/dev/null)
> CFLAGS += $(shell pkg-config --exists libpcap 2>/dev/null && echo "-DTRAFFIC_MONITOR=1")

Sure! I will try it!
Kui-Feng Lee Aug. 2, 2024, 4:31 a.m. UTC | #5
On 8/1/24 20:29, Martin KaFai Lau wrote:
> On 7/31/24 12:31 PM, Kui-Feng Lee wrote:
>> Add functions that capture packets and print log in the background. They
>> are supposed to be used for debugging flaky network test cases. A 
>> monitored
>> test case should call traffic_monitor_start() to start a thread to 
>> capture
>> packets in the background for a given namespace and call
>> traffic_monitor_stop() to stop capturing. (Or, option '-m' implemented by
>> the later patches.)
>>
>>      IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 68, 
>> ifindex 1, SYN
>>      IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 60, 
>> ifindex 1, SYN, ACK
>>      IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 60, 
>> ifindex 1, ACK
>>      IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 52, 
>> ifindex 1, ACK
>>      IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 52, 
>> ifindex 1, FIN, ACK
>>      IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 52, 
>> ifindex 1, RST, ACK
> 
> nit. Instead of ifindex, it should be ifname now.

Sure! I will update it.

> 
>>      Packet file: packets-2172-86-select_reuseport:sockhash-test.log
>>      #280/87 select_reuseport/sockhash IPv4/TCP LOOPBACK 
>> test_detach_bpf:OK
>>
>> The above is the output of an example. It shows the packets of a 
>> connection
>> and the name of the file that contains captured packets in the directory
>> /tmp/tmon_pcap. The file can be loaded by tcpdump or wireshark.
>>
>> This feature only works if TRAFFIC_MONITOR variable has been passed to
>> build BPF selftests. For example,
>>
>>    make TRAFFIC_MONITOR=1 -C tools/testing/selftests/bpf
>>
>> This command will build BPF selftests with this feature enabled.
>>
>> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
>> ---
>>   tools/testing/selftests/bpf/Makefile     |   5 +
>>   tools/testing/selftests/bpf/test_progs.c | 432 +++++++++++++++++++++++
> 
> In the cover letter, it mentioned the traffic monitoring implementation 
> is moved from the network_helpers.c to test_progs.c.
> 
> Can you share more about the reason?

network_helpers.c has been used by several test programs.
However, they don't have env that we found in test_progs.c.
That means we could not access env directly. Instead, the caller
have to pass the test name and subtest name to the function.
Leter, we also need to check if a test name matches the patterns. It is
inconvient for users. So, I move these functions to test_progs.c to make
user's life eaiser.


> 
> Is it because the traffic monitor now depends on the test_progs's test 
> name, should_tmon...etc ? Can the test name and should_tmon be exported 
> for the network_helpers to use?

Yes! And in later patches, we also introduce a list of patterns.
> 
> What other compilation issues did it hit if the traffic monitor codes 
> stay in the network_helpers.c? Some individual binaries (with main()) 
> like test_tcp_check_syncookie_user that links to network_helpers.o but 
> not to test_progs.o?

Yes, they are problems as well. These binary also need to link to
libpcap even they don't use it although this is not an important issue.


> 
>> +#ifdef TRAFFIC_MONITOR
>> +struct tmonitor_ctx {
>> +    pcap_t *pcap;
>> +    pcap_dumper_t *dumper;
>> +    pthread_t thread;
>> +    int wake_fd_r;
>> +    int wake_fd_w;
>> +
>> +    bool done;
>> +    char pkt_fname[PATH_MAX];
>> +    int pcap_fd;
>> +};
>> +
>> +/* Is this packet captured with a Ethernet protocol type? */
>> +static bool is_ethernet(const u_char *packet)
>> +{
>> +    u16 arphdr_type;
>> +
>> +    memcpy(&arphdr_type, packet + 8, 2);
>> +    arphdr_type = ntohs(arphdr_type);
>> +
>> +    /* Except the following cases, the protocol type contains the
>> +     * Ethernet protocol type for the packet.
>> +     *
>> +     * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
>> +     */
>> +    switch (arphdr_type) {
>> +    case 770: /* ARPHRD_FRAD */
>> +    case 778: /* ARPHDR_IPGRE */
>> +    case 803: /* ARPHRD_IEEE80211_RADIOTAP */
>> +        return false;
>> +    }
>> +    return true;
>> +}
>> +
>> +/* Show the information of the transport layer in the packet */
>> +static void show_transport(const u_char *packet, u16 len, u32 ifindex,
>> +               const char *src_addr, const char *dst_addr,
>> +               u16 proto, bool ipv6)
>> +{
>> +    struct udphdr *udp;
>> +    struct tcphdr *tcp;
>> +    u16 src_port, dst_port;
>> +    const char *transport_str;
>> +    char *ifname, _ifname[IF_NAMESIZE];
>> +
>> +    ifname = if_indextoname(ifindex, _ifname);
>> +    if (!ifname) {
>> +        snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
>> +        ifname = _ifname;
>> +    }
>> +
>> +    if (proto == IPPROTO_UDP) {
>> +        udp = (struct udphdr *)packet;
>> +        src_port = ntohs(udp->source);
>> +        dst_port = ntohs(udp->dest);
>> +        transport_str = "UDP";
>> +    } else if (proto == IPPROTO_TCP) {
>> +        tcp = (struct tcphdr *)packet;
>> +        src_port = ntohs(tcp->source);
>> +        dst_port = ntohs(tcp->dest);
>> +        transport_str = "TCP"
>> +;
>> +    } else if (proto == IPPROTO_ICMP) {
>> +        printf("IPv4 ICMP packet: %s -> %s, len %d, type %d, code %d, 
>> ifname %s\n",
>> +               src_addr, dst_addr, len, packet[0], packet[1], ifname);
>> +        return;
>> +    } else if (proto == IPPROTO_ICMPV6) {
>> +        printf("IPv6 ICMPv6 packet: %s -> %s, len %d, type %d, code 
>> %d, ifname %s\n",
>> +               src_addr, dst_addr, len, packet[0], packet[1], ifname);
>> +        return;
>> +    } else {
>> +        printf("%s (proto %d): %s -> %s, ifname %s\n",
>> +               ipv6 ? "IPv6" : "IPv4", proto, src_addr, dst_addr, 
>> ifname);
>> +        return;
>> +    }
>> +
>> +    /* TCP */
>> +
>> +    if (ipv6)
>> +        printf("IPv6 %s packet: [%s]:%d -> [%s]:%d, len %d, ifname %s",
>> +               transport_str, src_addr, src_port,
>> +               dst_addr, dst_port, len, ifname);
>> +    else
>> +        printf("IPv4 %s packet: %s:%d -> %s:%d, len %d, ifname %s",
>> +               transport_str, src_addr, src_port,
>> +               dst_addr, dst_port, len, ifname);
>> +
>> +    if (proto == IPPROTO_TCP) {
>> +        if (tcp->fin)
>> +            printf(", FIN");
>> +        if (tcp->syn)
>> +            printf(", SYN");
>> +        if (tcp->rst)
>> +            printf(", RST");
>> +        if (tcp->ack)
>> +            printf(", ACK");
>> +    }
>> +
>> +    printf("\n");
> 
> The TCP packet output is done by multiple printf. There is a good chance 
> that one TCP packet is interleaved with the other printf(s), considering 
> the traffic monitoring is done in its own thread. I am seeing this in 
> the tc_redirect test.
> 
> Does it help to do multiple snprintf (for the tcp flags?) first and then 
> calls one printf?

Sure, it would help. Or, perform flockfile() and funlockfile().

> 
>> +}
>> +
>> +static void show_ipv6_packet(const u_char *packet, u32 ifindex)
>> +{
>> +    struct ipv6hdr *pkt = (struct ipv6hdr *)packet;
>> +    struct in6_addr src;
>> +    struct in6_addr dst;
>> +    char src_str[INET6_ADDRSTRLEN], dst_str[INET6_ADDRSTRLEN];
>> +    u_char proto;
>> +
>> +    memcpy(&src, &pkt->saddr, sizeof(src));
>> +    memcpy(&dst, &pkt->daddr, sizeof(dst));
>> +    inet_ntop(AF_INET6, &src, src_str, sizeof(src_str));
>> +    inet_ntop(AF_INET6, &dst, dst_str, sizeof(dst_str));
>> +    proto = pkt->nexthdr;
>> +    show_transport(packet + sizeof(struct ipv6hdr),
>> +               ntohs(pkt->payload_len),
>> +               ifindex, src_str, dst_str, proto, true);
>> +}
>> +
>> +static void show_ipv4_packet(const u_char *packet, u32 ifindex)
>> +{
>> +    struct iphdr *pkt = (struct iphdr *)packet;
>> +    struct in_addr src;
>> +    struct in_addr dst;
>> +    u_char proto;
>> +    char src_str[INET_ADDRSTRLEN], dst_str[INET_ADDRSTRLEN];
>> +
>> +    memcpy(&src, &pkt->saddr, sizeof(src));
>> +    memcpy(&dst, &pkt->daddr, sizeof(dst));
>> +    inet_ntop(AF_INET, &src, src_str, sizeof(src_str));
>> +    inet_ntop(AF_INET, &dst, dst_str, sizeof(dst_str));
>> +    proto = pkt->protocol;
>> +    show_transport(packet + sizeof(struct iphdr),
>> +               ntohs(pkt->tot_len),
>> +               ifindex, src_str, dst_str, proto, false);
>> +}
>> +
>> +static void *traffic_monitor_thread(void *arg)
>> +{
>> +    char *ifname, _ifname[IF_NAMESIZE];
>> +    const u_char *packet, *payload;
>> +    struct tmonitor_ctx *ctx = arg;
>> +    struct pcap_pkthdr header;
>> +    pcap_t *pcap = ctx->pcap;
>> +    pcap_dumper_t *dumper = ctx->dumper;
>> +    int fd = ctx->pcap_fd;
>> +    int wake_fd = ctx->wake_fd_r;
>> +    u16 proto;
>> +    u32 ifindex;
>> +    fd_set fds;
>> +    int nfds, r;
>> +
>> +    nfds = (fd > wake_fd ? fd : wake_fd) + 1;
>> +    FD_ZERO(&fds);
>> +
>> +    while (!ctx->done) {
>> +        FD_SET(fd, &fds);
>> +        FD_SET(wake_fd, &fds);
>> +        r = select(nfds, &fds, NULL, NULL, NULL);
>> +        if (!r)
>> +            continue;
>> +        if (r < 0) {
>> +            if (errno == EINTR)
>> +                continue;
>> +            log_err("Fail to select on pcap fd and wake fd: %s", 
>> strerror(errno));
>> +            break;
>> +        }
>> +
>> +        packet = pcap_next(pcap, &header);
>> +        if (!packet)
>> +            continue;
>> +
>> +        /* According to the man page of pcap_dump(), first argument
>> +         * is the pcap_dumper_t pointer even it's argument type is
>> +         * u_char *.
>> +         */
>> +        pcap_dump((u_char *)dumper, &header, packet);
> 
> The captured file has the "In", "Out", and "M" (Multicast) when it is 
> read by tcpdump. Is it easy to printf that in show_ipv[46]_packet() 
> also? Just want to ensure we didn't miss it if it is easy.

No problem! IIRC, it is part of the SSL2 header.

> 
>> +
>> +        /* Not sure what other types of packets look like. Here, we
>> +         * parse only Ethernet and compatible packets.
>> +         */
>> +        if (!is_ethernet(packet)) {
>> +            printf("Packet captured\n");
>> +            continue;
>> +        }
>> +
>> +        /* Skip SLL2 header
>> +         * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
>> +         *
>> +         * Although the document doesn't mention that, the payload
>> +         * doesn't include the Ethernet header. The payload starts
>> +         * from the first byte of the network layer header.
>> +         */
>> +        payload = packet + 20;
>> +
>> +        memcpy(&proto, packet, 2);
>> +        proto = ntohs(proto);
>> +        memcpy(&ifindex, packet + 4, 4);
>> +        ifindex = ntohl(ifindex);
>> +
>> +        if (proto == ETH_P_IPV6) {
>> +            show_ipv6_packet(payload, ifindex);
>> +        } else if (proto == ETH_P_IP) {
>> +            show_ipv4_packet(payload, ifindex);
>> +        } else {
>> +            ifname = if_indextoname(ifindex, _ifname);
>> +            if (!ifname) {
>> +                snprintf(_ifname, sizeof(_ifname), "unknown(%d)", 
>> ifindex);
>> +                ifname = _ifname;
>> +            }
>> +
>> +            printf("Unknown network protocol type %x, ifname %s\n", 
>> proto, ifname);
>> +        }
>> +    }
>> +
>> +    return NULL;
>> +}
> 
>
Kui-Feng Lee Aug. 2, 2024, 4:35 a.m. UTC | #6
On 8/1/24 20:43, Martin KaFai Lau wrote:
> On 7/31/24 12:31 PM, Kui-Feng Lee wrote:
>> diff --git a/tools/testing/selftests/bpf/test_progs.h 
>> b/tools/testing/selftests/bpf/test_progs.h
>> index cb9d6d46826b..5d4e61fa26a1 100644
>> --- a/tools/testing/selftests/bpf/test_progs.h
>> +++ b/tools/testing/selftests/bpf/test_progs.h
>> @@ -473,4 +473,20 @@ extern void test_loader_fini(struct test_loader 
>> *tester);
>>       test_loader_fini(&tester);                           \
>>   })
>> +struct tmonitor_ctx;
>> +
>> +#ifdef TRAFFIC_MONITOR
>> +struct tmonitor_ctx *traffic_monitor_start(const char *netns);
>> +void traffic_monitor_stop(struct tmonitor_ctx *ctx);
>> +#else
>> +static inline struct tmonitor_ctx *traffic_monitor_start(const char 
>> *netns)
>> +{
>> +    return (struct tmonitor_ctx *)-1;
> 
> hmm... from peeking patch 3, only NULL is checked.
> 
> While at it, if there is no libpcap during make, is the "-m" option 
> available or the test_progs will error out if "-m" is used?

"-m" is still available so CI can always pass "-m" without consider
the configuration of the binary. But, it would be good idea to
print a warning message for this situation.

> 
>> +}
>> +
>> +static inline void traffic_monitor_stop(struct tmonitor_ctx *ctx)
>> +{
>> +}
>> +#endif
>> +
>>   #endif /* __TEST_PROGS_H */
>
Martin KaFai Lau Aug. 2, 2024, 6:58 p.m. UTC | #7
On 8/1/24 9:31 PM, Kui-Feng Lee wrote:
> 
> 
> On 8/1/24 20:29, Martin KaFai Lau wrote:
>> On 7/31/24 12:31 PM, Kui-Feng Lee wrote:
>>> Add functions that capture packets and print log in the background. They
>>> are supposed to be used for debugging flaky network test cases. A monitored
>>> test case should call traffic_monitor_start() to start a thread to capture
>>> packets in the background for a given namespace and call
>>> traffic_monitor_stop() to stop capturing. (Or, option '-m' implemented by
>>> the later patches.)
>>>
>>>      IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 68, ifindex 1, SYN
>>>      IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 60, ifindex 1, 
>>> SYN, ACK
>>>      IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 60, ifindex 1, ACK
>>>      IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 52, ifindex 1, ACK
>>>      IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 52, ifindex 1, 
>>> FIN, ACK
>>>      IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 52, ifindex 1, 
>>> RST, ACK
>>
>> nit. Instead of ifindex, it should be ifname now.
> 
> Sure! I will update it.
> 
>>
>>>      Packet file: packets-2172-86-select_reuseport:sockhash-test.log
>>>      #280/87 select_reuseport/sockhash IPv4/TCP LOOPBACK test_detach_bpf:OK
>>>
>>> The above is the output of an example. It shows the packets of a connection
>>> and the name of the file that contains captured packets in the directory
>>> /tmp/tmon_pcap. The file can be loaded by tcpdump or wireshark.
>>>
>>> This feature only works if TRAFFIC_MONITOR variable has been passed to
>>> build BPF selftests. For example,
>>>
>>>    make TRAFFIC_MONITOR=1 -C tools/testing/selftests/bpf
>>>
>>> This command will build BPF selftests with this feature enabled.
>>>
>>> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
>>> ---
>>>   tools/testing/selftests/bpf/Makefile     |   5 +
>>>   tools/testing/selftests/bpf/test_progs.c | 432 +++++++++++++++++++++++
>>
>> In the cover letter, it mentioned the traffic monitoring implementation is 
>> moved from the network_helpers.c to test_progs.c.
>>
>> Can you share more about the reason?
> 
> network_helpers.c has been used by several test programs.
> However, they don't have env that we found in test_progs.c.
> That means we could not access env directly. Instead, the caller
> have to pass the test name and subtest name to the function.
> Leter, we also need to check if a test name matches the patterns. It is
> inconvient for users. So, I move these functions to test_progs.c to make
> user's life eaiser.
> 
> 
>>
>> Is it because the traffic monitor now depends on the test_progs's test name, 
>> should_tmon...etc ? Can the test name and should_tmon be exported for the 
>> network_helpers to use?
> 
> Yes! And in later patches, we also introduce a list of patterns.

The list of patterns matching is summarized in "should_tmon" which can be 
exported through a function?

or I have missed another criteria when deciding tmon should be enabled for a test?

>>
>> What other compilation issues did it hit if the traffic monitor codes stay in 
>> the network_helpers.c? Some individual binaries (with main()) like 
>> test_tcp_check_syncookie_user that links to network_helpers.o but not to 
>> test_progs.o?
> 
> Yes, they are problems as well. These binary also need to link to
> libpcap even they don't use it although this is not an important issue.

I don't think linking the non test_progs binaries to libpcap or not is important.

I am positive there are ways out of it without adding the networking codes to 
the test_progs.c. It sounds like an unnecessary nit now but I believe it is 
useful going forward when making changes and extension to the traffic 
monitoring. May be brainstorm a little to see if there is an way out.

One way could be putting them in a new traffic_monitor.c such that the non 
test_progs binaries won't link to it. and exports the test name and shmod_tmon 
in test_progs.h (e.g. through function).

Another way (better and my preference if it works out) is to ask the 
traffic_monitor_start() to take the the pcap file name args and makeup a 
reasonable default if no filename is given.  Not that I am promoting non 
test_progs tests, traffic_monitor_start() can then be reused by others for legit 
reason. The test_progs's tests usually should not use traffic_monitor_start() 
directly and they should stay with the netns_{new, free}. I think only netns_new 
needs the env to figure out the should_tmon and the pcap filename. May be 
netns_new() can stay in test_progs.c, or rename it to test__netns_new().

wdyt?
Kui-Feng Lee Aug. 2, 2024, 8:37 p.m. UTC | #8
On 8/2/24 11:58, Martin KaFai Lau wrote:
> On 8/1/24 9:31 PM, Kui-Feng Lee wrote:
>>
>>
>> On 8/1/24 20:29, Martin KaFai Lau wrote:
>>> On 7/31/24 12:31 PM, Kui-Feng Lee wrote:
>>>> Add functions that capture packets and print log in the background. 
>>>> They
>>>> are supposed to be used for debugging flaky network test cases. A 
>>>> monitored
>>>> test case should call traffic_monitor_start() to start a thread to 
>>>> capture
>>>> packets in the background for a given namespace and call
>>>> traffic_monitor_stop() to stop capturing. (Or, option '-m' 
>>>> implemented by
>>>> the later patches.)
>>>>
>>>>      IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 68, 
>>>> ifindex 1, SYN
>>>>      IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 60, 
>>>> ifindex 1, SYN, ACK
>>>>      IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 60, 
>>>> ifindex 1, ACK
>>>>      IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 52, 
>>>> ifindex 1, ACK
>>>>      IPv4 TCP packet: 127.0.0.1:48165 -> 127.0.0.1:36707, len 52, 
>>>> ifindex 1, FIN, ACK
>>>>      IPv4 TCP packet: 127.0.0.1:36707 -> 127.0.0.1:48165, len 52, 
>>>> ifindex 1, RST, ACK
>>>
>>> nit. Instead of ifindex, it should be ifname now.
>>
>> Sure! I will update it.
>>
>>>
>>>>      Packet file: packets-2172-86-select_reuseport:sockhash-test.log
>>>>      #280/87 select_reuseport/sockhash IPv4/TCP LOOPBACK 
>>>> test_detach_bpf:OK
>>>>
>>>> The above is the output of an example. It shows the packets of a 
>>>> connection
>>>> and the name of the file that contains captured packets in the 
>>>> directory
>>>> /tmp/tmon_pcap. The file can be loaded by tcpdump or wireshark.
>>>>
>>>> This feature only works if TRAFFIC_MONITOR variable has been passed to
>>>> build BPF selftests. For example,
>>>>
>>>>    make TRAFFIC_MONITOR=1 -C tools/testing/selftests/bpf
>>>>
>>>> This command will build BPF selftests with this feature enabled.
>>>>
>>>> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
>>>> ---
>>>>   tools/testing/selftests/bpf/Makefile     |   5 +
>>>>   tools/testing/selftests/bpf/test_progs.c | 432 
>>>> +++++++++++++++++++++++
>>>
>>> In the cover letter, it mentioned the traffic monitoring 
>>> implementation is moved from the network_helpers.c to test_progs.c.
>>>
>>> Can you share more about the reason?
>>
>> network_helpers.c has been used by several test programs.
>> However, they don't have env that we found in test_progs.c.
>> That means we could not access env directly. Instead, the caller
>> have to pass the test name and subtest name to the function.
>> Leter, we also need to check if a test name matches the patterns. It is
>> inconvient for users. So, I move these functions to test_progs.c to make
>> user's life eaiser.
>>
>>
>>>
>>> Is it because the traffic monitor now depends on the test_progs's 
>>> test name, should_tmon...etc ? Can the test name and should_tmon be 
>>> exported for the network_helpers to use?
>>
>> Yes! And in later patches, we also introduce a list of patterns.
> 
> The list of patterns matching is summarized in "should_tmon" which can 
> be exported through a function?

Yes! Even with a functio, it still depends on test_progs.c.

> 
> or I have missed another criteria when deciding tmon should be enabled 
> for a test?
> 
>>>
>>> What other compilation issues did it hit if the traffic monitor codes 
>>> stay in the network_helpers.c? Some individual binaries (with main()) 
>>> like test_tcp_check_syncookie_user that links to network_helpers.o 
>>> but not to test_progs.o?
>>
>> Yes, they are problems as well. These binary also need to link to
>> libpcap even they don't use it although this is not an important issue.
> 
> I don't think linking the non test_progs binaries to libpcap or not is 
> important.
> 
> I am positive there are ways out of it without adding the networking 
> codes to the test_progs.c. It sounds like an unnecessary nit now but I 
> believe it is useful going forward when making changes and extension to 
> the traffic monitoring. May be brainstorm a little to see if there is an 
> way out.
> 
> One way could be putting them in a new traffic_monitor.c such that the 
> non test_progs binaries won't link to it. and exports the test name and 
> shmod_tmon in test_progs.h (e.g. through function).
> 
> Another way (better and my preference if it works out) is to ask the 
> traffic_monitor_start() to take the the pcap file name args and makeup a 
> reasonable default if no filename is given.  Not that I am promoting non 
> test_progs tests, traffic_monitor_start() can then be reused by others 
> for legit reason. The test_progs's tests usually should not use 
> traffic_monitor_start() directly and they should stay with the 
> netns_{new, free}. I think only netns_new needs the env to figure out 
> the should_tmon and the pcap filename. May be netns_new() can stay in 
> test_progs.c, or rename it to test__netns_new().
> 
> wdyt?

How about put two ideas together?
Have traffic_monitor.c and macros in test_progs.h to collect
data from env, and pass the data to netns_new() in traffic_monitor.c.

For example,

#define test__netns_new(ns) netns_new(ns, env.test->should_tmon || \
             (env.subtest_state && env.subtest_state->should_tmon), \
             env.test->test_name,                                   \
             env.subtest_state ? env.subtest_state->name: NULL)
Martin KaFai Lau Aug. 2, 2024, 9:12 p.m. UTC | #9
On 8/2/24 1:37 PM, Kui-Feng Lee wrote:
>> One way could be putting them in a new traffic_monitor.c such that the non 
>> test_progs binaries won't link to it. and exports the test name and shmod_tmon 
>> in test_progs.h (e.g. through function).
>>
>> Another way (better and my preference if it works out) is to ask the 
>> traffic_monitor_start() to take the the pcap file name args and makeup a 
>> reasonable default if no filename is given.  Not that I am promoting non 
>> test_progs tests, traffic_monitor_start() can then be reused by others for 
>> legit reason. The test_progs's tests usually should not use 
>> traffic_monitor_start() directly and they should stay with the netns_{new, 
>> free}. I think only netns_new needs the env to figure out the should_tmon and 
>> the pcap filename. May be netns_new() can stay in test_progs.c, or rename it 
>> to test__netns_new().
>>
>> wdyt?
> 
> How about put two ideas together?
> Have traffic_monitor.c and macros in test_progs.h to collect
> data from env, and pass the data to netns_new() in traffic_monitor.c.
> 
> For example,
> 
> #define test__netns_new(ns) netns_new(ns, env.test->should_tmon || \
>              (env.subtest_state && env.subtest_state->should_tmon), \
>              env.test->test_name,                                   \
>              env.subtest_state ? env.subtest_state->name: NULL)
> 

The macro looks ok. I am not sure if it is easier as a macro in .h or just a 
func in test_progs.c. A quick look is the struct of env.test is not defined in 
.h. Just a thought.

If we have this macro/func, a quick thought is there is not much upside to 
create a new traffic_monitor.c instead of putting everything in 
network_helpers.c. I am fine either way. The other non test_progs binaries just 
need to link another traffic_monitor.o in the future if it wants to do 
traffic_monitor_start().
Kui-Feng Lee Aug. 6, 2024, 10:07 p.m. UTC | #10
On 8/1/24 21:35, Kui-Feng Lee wrote:
> 
> 
> On 8/1/24 20:43, Martin KaFai Lau wrote:
>> On 7/31/24 12:31 PM, Kui-Feng Lee wrote:
>>> diff --git a/tools/testing/selftests/bpf/test_progs.h 
>>> b/tools/testing/selftests/bpf/test_progs.h
>>> index cb9d6d46826b..5d4e61fa26a1 100644
>>> --- a/tools/testing/selftests/bpf/test_progs.h
>>> +++ b/tools/testing/selftests/bpf/test_progs.h
>>> @@ -473,4 +473,20 @@ extern void test_loader_fini(struct test_loader 
>>> *tester);
>>>       test_loader_fini(&tester);                           \
>>>   })
>>> +struct tmonitor_ctx;
>>> +
>>> +#ifdef TRAFFIC_MONITOR
>>> +struct tmonitor_ctx *traffic_monitor_start(const char *netns);
>>> +void traffic_monitor_stop(struct tmonitor_ctx *ctx);
>>> +#else
>>> +static inline struct tmonitor_ctx *traffic_monitor_start(const char 
>>> *netns)
>>> +{
>>> +    return (struct tmonitor_ctx *)-1;
>>
>> hmm... from peeking patch 3, only NULL is checked.

When traffic monitor is disable, these two functions are noop.
Returning -1 (not NULL) is convenient for the callers. They don't need
to tell if the error caused by a real error or by the disabled
feature.

>>
>> While at it, if there is no libpcap during make, is the "-m" option 
>> available or the test_progs will error out if "-m" is used?
> 
> "-m" is still available so CI can always pass "-m" without consider
> the configuration of the binary. But, it would be good idea to
> print a warning message for this situation.
> 
>>
>>> +}
>>> +
>>> +static inline void traffic_monitor_stop(struct tmonitor_ctx *ctx)
>>> +{
>>> +}
>>> +#endif
>>> +
>>>   #endif /* __TEST_PROGS_H */
>>
Martin KaFai Lau Aug. 6, 2024, 10:22 p.m. UTC | #11
On 8/6/24 3:07 PM, Kui-Feng Lee wrote:
> 
> 
> On 8/1/24 21:35, Kui-Feng Lee wrote:
>>
>>
>> On 8/1/24 20:43, Martin KaFai Lau wrote:
>>> On 7/31/24 12:31 PM, Kui-Feng Lee wrote:
>>>> diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/ 
>>>> selftests/bpf/test_progs.h
>>>> index cb9d6d46826b..5d4e61fa26a1 100644
>>>> --- a/tools/testing/selftests/bpf/test_progs.h
>>>> +++ b/tools/testing/selftests/bpf/test_progs.h
>>>> @@ -473,4 +473,20 @@ extern void test_loader_fini(struct test_loader *tester);
>>>>       test_loader_fini(&tester);                           \
>>>>   })
>>>> +struct tmonitor_ctx;
>>>> +
>>>> +#ifdef TRAFFIC_MONITOR
>>>> +struct tmonitor_ctx *traffic_monitor_start(const char *netns);
>>>> +void traffic_monitor_stop(struct tmonitor_ctx *ctx);
>>>> +#else
>>>> +static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns)
>>>> +{
>>>> +    return (struct tmonitor_ctx *)-1;
>>>
>>> hmm... from peeking patch 3, only NULL is checked.
> 
> When traffic monitor is disable, these two functions are noop.
> Returning -1 (not NULL) is convenient for the callers. They don't need
> to tell if the error caused by a real error or by the disabled
> feature.

I pasted the code from patch 3 here only to ensure I understand the above 
explanation correctly:

+		netns_obj->tmon = traffic_monitor_start(name);
+		if (!netns_obj->tmon)
		    ^^^^^^^^^^^^^^^^

+			goto fail;

Does it mean the traffic_monitor_start() above will never be called if 
TRAFFIC_MONITOR macro is not defined such that traffic_monitor_start() returning 
-1 but testing for NULL here does not matter?
Kui-Feng Lee Aug. 6, 2024, 11:18 p.m. UTC | #12
On 8/6/24 15:22, Martin KaFai Lau wrote:
> On 8/6/24 3:07 PM, Kui-Feng Lee wrote:
>>
>>
>> On 8/1/24 21:35, Kui-Feng Lee wrote:
>>>
>>>
>>> On 8/1/24 20:43, Martin KaFai Lau wrote:
>>>> On 7/31/24 12:31 PM, Kui-Feng Lee wrote:
>>>>> diff --git a/tools/testing/selftests/bpf/test_progs.h 
>>>>> b/tools/testing/ selftests/bpf/test_progs.h
>>>>> index cb9d6d46826b..5d4e61fa26a1 100644
>>>>> --- a/tools/testing/selftests/bpf/test_progs.h
>>>>> +++ b/tools/testing/selftests/bpf/test_progs.h
>>>>> @@ -473,4 +473,20 @@ extern void test_loader_fini(struct 
>>>>> test_loader *tester);
>>>>>       test_loader_fini(&tester);                           \
>>>>>   })
>>>>> +struct tmonitor_ctx;
>>>>> +
>>>>> +#ifdef TRAFFIC_MONITOR
>>>>> +struct tmonitor_ctx *traffic_monitor_start(const char *netns);
>>>>> +void traffic_monitor_stop(struct tmonitor_ctx *ctx);
>>>>> +#else
>>>>> +static inline struct tmonitor_ctx *traffic_monitor_start(const 
>>>>> char *netns)
>>>>> +{
>>>>> +    return (struct tmonitor_ctx *)-1;
>>>>
>>>> hmm... from peeking patch 3, only NULL is checked.
>>
>> When traffic monitor is disable, these two functions are noop.
>> Returning -1 (not NULL) is convenient for the callers. They don't need
>> to tell if the error caused by a real error or by the disabled
>> feature.
> 
> I pasted the code from patch 3 here only to ensure I understand the 
> above explanation correctly:
> 
> +        netns_obj->tmon = traffic_monitor_start(name);
> +        if (!netns_obj->tmon)
>              ^^^^^^^^^^^^^^^^
> 
> +            goto fail;
> 
> Does it mean the traffic_monitor_start() above will never be called if 
> TRAFFIC_MONITOR macro is not defined such that traffic_monitor_start() 
> returning -1 but testing for NULL here does not matter?

Correct!
Martin KaFai Lau Aug. 6, 2024, 11:41 p.m. UTC | #13
On 8/6/24 4:18 PM, Kui-Feng Lee wrote:
> 
> 
> On 8/6/24 15:22, Martin KaFai Lau wrote:
>> On 8/6/24 3:07 PM, Kui-Feng Lee wrote:
>>>
>>>
>>> On 8/1/24 21:35, Kui-Feng Lee wrote:
>>>>
>>>>
>>>> On 8/1/24 20:43, Martin KaFai Lau wrote:
>>>>> On 7/31/24 12:31 PM, Kui-Feng Lee wrote:
>>>>>> diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/ 
>>>>>> selftests/bpf/test_progs.h
>>>>>> index cb9d6d46826b..5d4e61fa26a1 100644
>>>>>> --- a/tools/testing/selftests/bpf/test_progs.h
>>>>>> +++ b/tools/testing/selftests/bpf/test_progs.h
>>>>>> @@ -473,4 +473,20 @@ extern void test_loader_fini(struct test_loader 
>>>>>> *tester);
>>>>>>       test_loader_fini(&tester);                           \
>>>>>>   })
>>>>>> +struct tmonitor_ctx;
>>>>>> +
>>>>>> +#ifdef TRAFFIC_MONITOR
>>>>>> +struct tmonitor_ctx *traffic_monitor_start(const char *netns);
>>>>>> +void traffic_monitor_stop(struct tmonitor_ctx *ctx);
>>>>>> +#else
>>>>>> +static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns)
>>>>>> +{
>>>>>> +    return (struct tmonitor_ctx *)-1;
>>>>>
>>>>> hmm... from peeking patch 3, only NULL is checked.
>>>
>>> When traffic monitor is disable, these two functions are noop.
>>> Returning -1 (not NULL) is convenient for the callers. They don't need
>>> to tell if the error caused by a real error or by the disabled
>>> feature.
>>
>> I pasted the code from patch 3 here only to ensure I understand the above 
>> explanation correctly:
>>
>> +        netns_obj->tmon = traffic_monitor_start(name);
>> +        if (!netns_obj->tmon)
>>              ^^^^^^^^^^^^^^^^
>>
>> +            goto fail;
>>
>> Does it mean the traffic_monitor_start() above will never be called if 
>> TRAFFIC_MONITOR macro is not defined such that traffic_monitor_start() 
>> returning -1 but testing for NULL here does not matter?
> 
> Correct!

Got it. Then I missed some understanding. Can you explain why the above 
traffic_monitor_start() will never be called?
Kui-Feng Lee Aug. 7, 2024, 12:17 a.m. UTC | #14
On 8/6/24 16:41, Martin KaFai Lau wrote:
> On 8/6/24 4:18 PM, Kui-Feng Lee wrote:
>>
>>
>> On 8/6/24 15:22, Martin KaFai Lau wrote:
>>> On 8/6/24 3:07 PM, Kui-Feng Lee wrote:
>>>>
>>>>
>>>> On 8/1/24 21:35, Kui-Feng Lee wrote:
>>>>>
>>>>>
>>>>> On 8/1/24 20:43, Martin KaFai Lau wrote:
>>>>>> On 7/31/24 12:31 PM, Kui-Feng Lee wrote:
>>>>>>> diff --git a/tools/testing/selftests/bpf/test_progs.h 
>>>>>>> b/tools/testing/ selftests/bpf/test_progs.h
>>>>>>> index cb9d6d46826b..5d4e61fa26a1 100644
>>>>>>> --- a/tools/testing/selftests/bpf/test_progs.h
>>>>>>> +++ b/tools/testing/selftests/bpf/test_progs.h
>>>>>>> @@ -473,4 +473,20 @@ extern void test_loader_fini(struct 
>>>>>>> test_loader *tester);
>>>>>>>       test_loader_fini(&tester);                           \
>>>>>>>   })
>>>>>>> +struct tmonitor_ctx;
>>>>>>> +
>>>>>>> +#ifdef TRAFFIC_MONITOR
>>>>>>> +struct tmonitor_ctx *traffic_monitor_start(const char *netns);
>>>>>>> +void traffic_monitor_stop(struct tmonitor_ctx *ctx);
>>>>>>> +#else
>>>>>>> +static inline struct tmonitor_ctx *traffic_monitor_start(const 
>>>>>>> char *netns)
>>>>>>> +{
>>>>>>> +    return (struct tmonitor_ctx *)-1;
>>>>>>
>>>>>> hmm... from peeking patch 3, only NULL is checked.
>>>>
>>>> When traffic monitor is disable, these two functions are noop.
>>>> Returning -1 (not NULL) is convenient for the callers. They don't need
>>>> to tell if the error caused by a real error or by the disabled
>>>> feature.
>>>
>>> I pasted the code from patch 3 here only to ensure I understand the 
>>> above explanation correctly:
>>>
>>> +        netns_obj->tmon = traffic_monitor_start(name);
>>> +        if (!netns_obj->tmon)
>>>              ^^^^^^^^^^^^^^^^
>>>
>>> +            goto fail;
>>>
>>> Does it mean the traffic_monitor_start() above will never be called 
>>> if TRAFFIC_MONITOR macro is not defined such that 
>>> traffic_monitor_start() returning -1 but testing for NULL here does 
>>> not matter?
>>
>> Correct!
> 
> Got it. Then I missed some understanding. Can you explain why the above 
> traffic_monitor_start() will never be called?
> 

Sorry! Forget my previous word.

In the case that TRAFFIC_MONITOR is not defined,
traffic_monitor_start(name) always returns -1.
So, "!netns_obj->tmon" is always false. "goto fail" is never executed.
That means the test will keep going just like that traffic monitor is
enabled and started correctly.
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 774c6270e377..0a3108311be7 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -41,6 +41,11 @@  CFLAGS += -g $(OPT_FLAGS) -rdynamic					\
 LDFLAGS += $(SAN_LDFLAGS)
 LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread
 
+ifneq ($(TRAFFIC_MONITOR),)
+LDLIBS += -lpcap
+CFLAGS += -DTRAFFIC_MONITOR=1
+endif
+
 # The following tests perform type punning and they may break strict
 # aliasing rules, which are exploited by both GCC and clang by default
 # while optimizing.  This can lead to broken programs.
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 60fafa2f1ed7..ab54d8420603 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -14,10 +14,25 @@ 
 #include <netinet/in.h>
 #include <sys/select.h>
 #include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
 #include <sys/un.h>
 #include <bpf/btf.h>
 #include "json_writer.h"
 
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <netinet/tcp.h>
+#include <net/if.h>
+#include "network_helpers.h"
+
+#ifdef TRAFFIC_MONITOR
+/* Prevent pcap.h from including pcap/bpf.h and causing conflicts */
+#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
+#include <pcap/pcap.h>
+#include <pcap/dlt.h>
+#endif
+
 #ifdef __GLIBC__
 #include <execinfo.h> /* backtrace */
 #endif
@@ -416,6 +431,423 @@  static void restore_netns(void)
 	}
 }
 
+#ifdef TRAFFIC_MONITOR
+struct tmonitor_ctx {
+	pcap_t *pcap;
+	pcap_dumper_t *dumper;
+	pthread_t thread;
+	int wake_fd_r;
+	int wake_fd_w;
+
+	bool done;
+	char pkt_fname[PATH_MAX];
+	int pcap_fd;
+};
+
+/* Is this packet captured with a Ethernet protocol type? */
+static bool is_ethernet(const u_char *packet)
+{
+	u16 arphdr_type;
+
+	memcpy(&arphdr_type, packet + 8, 2);
+	arphdr_type = ntohs(arphdr_type);
+
+	/* Except the following cases, the protocol type contains the
+	 * Ethernet protocol type for the packet.
+	 *
+	 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
+	 */
+	switch (arphdr_type) {
+	case 770: /* ARPHRD_FRAD */
+	case 778: /* ARPHDR_IPGRE */
+	case 803: /* ARPHRD_IEEE80211_RADIOTAP */
+		return false;
+	}
+	return true;
+}
+
+/* Show the information of the transport layer in the packet */
+static void show_transport(const u_char *packet, u16 len, u32 ifindex,
+			   const char *src_addr, const char *dst_addr,
+			   u16 proto, bool ipv6)
+{
+	struct udphdr *udp;
+	struct tcphdr *tcp;
+	u16 src_port, dst_port;
+	const char *transport_str;
+	char *ifname, _ifname[IF_NAMESIZE];
+
+	ifname = if_indextoname(ifindex, _ifname);
+	if (!ifname) {
+		snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
+		ifname = _ifname;
+	}
+
+	if (proto == IPPROTO_UDP) {
+		udp = (struct udphdr *)packet;
+		src_port = ntohs(udp->source);
+		dst_port = ntohs(udp->dest);
+		transport_str = "UDP";
+	} else if (proto == IPPROTO_TCP) {
+		tcp = (struct tcphdr *)packet;
+		src_port = ntohs(tcp->source);
+		dst_port = ntohs(tcp->dest);
+		transport_str = "TCP"
+;
+	} else if (proto == IPPROTO_ICMP) {
+		printf("IPv4 ICMP packet: %s -> %s, len %d, type %d, code %d, ifname %s\n",
+		       src_addr, dst_addr, len, packet[0], packet[1], ifname);
+		return;
+	} else if (proto == IPPROTO_ICMPV6) {
+		printf("IPv6 ICMPv6 packet: %s -> %s, len %d, type %d, code %d, ifname %s\n",
+		       src_addr, dst_addr, len, packet[0], packet[1], ifname);
+		return;
+	} else {
+		printf("%s (proto %d): %s -> %s, ifname %s\n",
+		       ipv6 ? "IPv6" : "IPv4", proto, src_addr, dst_addr, ifname);
+		return;
+	}
+
+	/* TCP */
+
+	if (ipv6)
+		printf("IPv6 %s packet: [%s]:%d -> [%s]:%d, len %d, ifname %s",
+		       transport_str, src_addr, src_port,
+		       dst_addr, dst_port, len, ifname);
+	else
+		printf("IPv4 %s packet: %s:%d -> %s:%d, len %d, ifname %s",
+		       transport_str, src_addr, src_port,
+		       dst_addr, dst_port, len, ifname);
+
+	if (proto == IPPROTO_TCP) {
+		if (tcp->fin)
+			printf(", FIN");
+		if (tcp->syn)
+			printf(", SYN");
+		if (tcp->rst)
+			printf(", RST");
+		if (tcp->ack)
+			printf(", ACK");
+	}
+
+	printf("\n");
+}
+
+static void show_ipv6_packet(const u_char *packet, u32 ifindex)
+{
+	struct ipv6hdr *pkt = (struct ipv6hdr *)packet;
+	struct in6_addr src;
+	struct in6_addr dst;
+	char src_str[INET6_ADDRSTRLEN], dst_str[INET6_ADDRSTRLEN];
+	u_char proto;
+
+	memcpy(&src, &pkt->saddr, sizeof(src));
+	memcpy(&dst, &pkt->daddr, sizeof(dst));
+	inet_ntop(AF_INET6, &src, src_str, sizeof(src_str));
+	inet_ntop(AF_INET6, &dst, dst_str, sizeof(dst_str));
+	proto = pkt->nexthdr;
+	show_transport(packet + sizeof(struct ipv6hdr),
+		       ntohs(pkt->payload_len),
+		       ifindex, src_str, dst_str, proto, true);
+}
+
+static void show_ipv4_packet(const u_char *packet, u32 ifindex)
+{
+	struct iphdr *pkt = (struct iphdr *)packet;
+	struct in_addr src;
+	struct in_addr dst;
+	u_char proto;
+	char src_str[INET_ADDRSTRLEN], dst_str[INET_ADDRSTRLEN];
+
+	memcpy(&src, &pkt->saddr, sizeof(src));
+	memcpy(&dst, &pkt->daddr, sizeof(dst));
+	inet_ntop(AF_INET, &src, src_str, sizeof(src_str));
+	inet_ntop(AF_INET, &dst, dst_str, sizeof(dst_str));
+	proto = pkt->protocol;
+	show_transport(packet + sizeof(struct iphdr),
+		       ntohs(pkt->tot_len),
+		       ifindex, src_str, dst_str, proto, false);
+}
+
+static void *traffic_monitor_thread(void *arg)
+{
+	char *ifname, _ifname[IF_NAMESIZE];
+	const u_char *packet, *payload;
+	struct tmonitor_ctx *ctx = arg;
+	struct pcap_pkthdr header;
+	pcap_t *pcap = ctx->pcap;
+	pcap_dumper_t *dumper = ctx->dumper;
+	int fd = ctx->pcap_fd;
+	int wake_fd = ctx->wake_fd_r;
+	u16 proto;
+	u32 ifindex;
+	fd_set fds;
+	int nfds, r;
+
+	nfds = (fd > wake_fd ? fd : wake_fd) + 1;
+	FD_ZERO(&fds);
+
+	while (!ctx->done) {
+		FD_SET(fd, &fds);
+		FD_SET(wake_fd, &fds);
+		r = select(nfds, &fds, NULL, NULL, NULL);
+		if (!r)
+			continue;
+		if (r < 0) {
+			if (errno == EINTR)
+				continue;
+			log_err("Fail to select on pcap fd and wake fd: %s", strerror(errno));
+			break;
+		}
+
+		packet = pcap_next(pcap, &header);
+		if (!packet)
+			continue;
+
+		/* According to the man page of pcap_dump(), first argument
+		 * is the pcap_dumper_t pointer even it's argument type is
+		 * u_char *.
+		 */
+		pcap_dump((u_char *)dumper, &header, packet);
+
+		/* Not sure what other types of packets look like. Here, we
+		 * parse only Ethernet and compatible packets.
+		 */
+		if (!is_ethernet(packet)) {
+			printf("Packet captured\n");
+			continue;
+		}
+
+		/* Skip SLL2 header
+		 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
+		 *
+		 * Although the document doesn't mention that, the payload
+		 * doesn't include the Ethernet header. The payload starts
+		 * from the first byte of the network layer header.
+		 */
+		payload = packet + 20;
+
+		memcpy(&proto, packet, 2);
+		proto = ntohs(proto);
+		memcpy(&ifindex, packet + 4, 4);
+		ifindex = ntohl(ifindex);
+
+		if (proto == ETH_P_IPV6) {
+			show_ipv6_packet(payload, ifindex);
+		} else if (proto == ETH_P_IP) {
+			show_ipv4_packet(payload, ifindex);
+		} else {
+			ifname = if_indextoname(ifindex, _ifname);
+			if (!ifname) {
+				snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
+				ifname = _ifname;
+			}
+
+			printf("Unknown network protocol type %x, ifname %s\n", proto, ifname);
+		}
+	}
+
+	return NULL;
+}
+
+/* Prepare the pcap handle to capture packets.
+ *
+ * This pcap is non-blocking and immediate mode is enabled to receive
+ * captured packets as soon as possible.  The snaplen is set to 1024 bytes
+ * to limit the size of captured content. The format of the link-layer
+ * header is set to DLT_LINUX_SLL2 to enable handling various link-layer
+ * technologies.
+ */
+static pcap_t *traffic_monitor_prepare_pcap(void)
+{
+	char errbuf[PCAP_ERRBUF_SIZE];
+	pcap_t *pcap;
+	int r;
+
+	/* Listen on all NICs in the namespace */
+	pcap = pcap_create("any", errbuf);
+	if (!pcap) {
+		log_err("Failed to open pcap: %s", errbuf);
+		return NULL;
+	}
+	/* Limit the size of the packet (first N bytes) */
+	r = pcap_set_snaplen(pcap, 1024);
+	if (r) {
+		log_err("Failed to set snaplen: %s", pcap_geterr(pcap));
+		goto error;
+	}
+	/* To receive packets as fast as possible */
+	r = pcap_set_immediate_mode(pcap, 1);
+	if (r) {
+		log_err("Failed to set immediate mode: %s", pcap_geterr(pcap));
+		goto error;
+	}
+	r = pcap_setnonblock(pcap, 1, errbuf);
+	if (r) {
+		log_err("Failed to set nonblock: %s", errbuf);
+		goto error;
+	}
+	r = pcap_activate(pcap);
+	if (r) {
+		log_err("Failed to activate pcap: %s", pcap_geterr(pcap));
+		goto error;
+	}
+	/* Determine the format of the link-layer header */
+	r = pcap_set_datalink(pcap, DLT_LINUX_SLL2);
+	if (r) {
+		log_err("Failed to set datalink: %s", pcap_geterr(pcap));
+		goto error;
+	}
+
+	return pcap;
+error:
+	pcap_close(pcap);
+	return NULL;
+}
+
+static void encode_test_name(char *buf, size_t len)
+{
+	struct prog_test_def *test = env.test;
+	struct subtest_state *subtest_state = env.subtest_state;
+	char *p;
+
+	if (subtest_state)
+		snprintf(buf, len, "%s:%s", test->test_name, subtest_state->name);
+	else
+		snprintf(buf, len, "%s", test->test_name);
+	while ((p = strchr(buf, '/')))
+		*p = '_';
+	while ((p = strchr(buf, ' ')))
+		*p = '_';
+}
+
+#define PCAP_DIR "/tmp/tmon_pcap"
+
+/* Start to monitor the network traffic in the given network namespace.
+ *
+ * netns: the name of the network namespace to monitor. If NULL, the
+ * current network namespace is monitored.
+ *
+ * This function will start a thread to capture packets going through NICs
+ * in the give network namespace.
+ */
+struct tmonitor_ctx *traffic_monitor_start(const char *netns)
+{
+	struct tmonitor_ctx *ctx = NULL;
+	struct nstoken *nstoken = NULL;
+	int pipefd[2] = {-1, -1};
+	static int tmon_seq;
+	char test_name[64];
+	int r;
+
+	if (netns) {
+		nstoken = open_netns(netns);
+		if (!nstoken)
+			return NULL;
+	}
+	ctx = malloc(sizeof(*ctx));
+	if (!ctx) {
+		log_err("Failed to malloc ctx");
+		goto fail_ctx;
+	}
+	memset(ctx, 0, sizeof(*ctx));
+
+	encode_test_name(test_name, sizeof(test_name));
+	snprintf(ctx->pkt_fname, sizeof(ctx->pkt_fname),
+		 PCAP_DIR "/packets-%d-%d-%s-%s.log", getpid(), tmon_seq++,
+		 test_name, netns ? netns : "unknown");
+
+	r = mkdir(PCAP_DIR, 0755);
+	if (r && errno != EEXIST) {
+		log_err("Failed to create " PCAP_DIR);
+		goto fail_pcap;
+	}
+
+	ctx->pcap = traffic_monitor_prepare_pcap();
+	if (!ctx->pcap)
+		goto fail_pcap;
+	ctx->pcap_fd = pcap_get_selectable_fd(ctx->pcap);
+	if (ctx->pcap_fd < 0) {
+		log_err("Failed to get pcap fd");
+		goto fail_dumper;
+	}
+
+	/* Create a packet file */
+	ctx->dumper = pcap_dump_open(ctx->pcap, ctx->pkt_fname);
+	if (!ctx->dumper) {
+		log_err("Failed to open pcap dump: %s", ctx->pkt_fname);
+		goto fail_dumper;
+	}
+
+	/* Create a pipe to wake up the monitor thread */
+	r = pipe(pipefd);
+	if (r) {
+		log_err("Failed to create pipe: %s", strerror(errno));
+		goto fail;
+	}
+	ctx->wake_fd_r = pipefd[0];
+	ctx->wake_fd_w = pipefd[1];
+
+	r = pthread_create(&ctx->thread, NULL, traffic_monitor_thread, ctx);
+	if (r) {
+		log_err("Failed to create thread: %s", strerror(r));
+		goto fail;
+	}
+
+	close_netns(nstoken);
+
+	return ctx;
+
+fail:
+	close(pipefd[0]);
+	close(pipefd[1]);
+
+	pcap_dump_close(ctx->dumper);
+	unlink(ctx->pkt_fname);
+
+fail_dumper:
+	pcap_close(ctx->pcap);
+
+fail_pcap:
+	free(ctx);
+
+fail_ctx:
+	close_netns(nstoken);
+
+	return NULL;
+}
+
+static void traffic_monitor_release(struct tmonitor_ctx *ctx)
+{
+	pcap_close(ctx->pcap);
+	pcap_dump_close(ctx->dumper);
+
+	close(ctx->wake_fd_r);
+	close(ctx->wake_fd_w);
+
+	free(ctx);
+}
+
+/* Stop the network traffic monitor.
+ *
+ * ctx: the context returned by traffic_monitor_start()
+ */
+void traffic_monitor_stop(struct tmonitor_ctx *ctx)
+{
+	if (!ctx)
+		return;
+
+	/* Stop the monitor thread */
+	ctx->done = true;
+	write(ctx->wake_fd_w, "x", 1);
+	pthread_join(ctx->thread, NULL);
+
+	printf("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);
+
+	traffic_monitor_release(ctx);
+}
+#endif /* TRAFFIC_MONITOR */
+
 void test__end_subtest(void)
 {
 	struct prog_test_def *test = env.test;
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index cb9d6d46826b..5d4e61fa26a1 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -473,4 +473,20 @@  extern void test_loader_fini(struct test_loader *tester);
 	test_loader_fini(&tester);					       \
 })
 
+struct tmonitor_ctx;
+
+#ifdef TRAFFIC_MONITOR
+struct tmonitor_ctx *traffic_monitor_start(const char *netns);
+void traffic_monitor_stop(struct tmonitor_ctx *ctx);
+#else
+static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns)
+{
+	return (struct tmonitor_ctx *)-1;
+}
+
+static inline void traffic_monitor_stop(struct tmonitor_ctx *ctx)
+{
+}
+#endif
+
 #endif /* __TEST_PROGS_H */