diff mbox series

[net] selftests: net: avoid waiting for server in amt.sh forever when it fails.

Message ID 20240508040643.229383-1-ap420073@gmail.com (mailing list archive)
State New
Headers show
Series [net] selftests: net: avoid waiting for server in amt.sh forever when it fails. | expand

Commit Message

Taehee Yoo May 8, 2024, 4:06 a.m. UTC
In the forwarding testcase, it opens a server and a client with the nc.
The server receives the correct message from NC, it prints OK.
The server prints FAIL if it receives the wrong message from the client.

But If the server can't receive any message, it will not close so
the amt.sh waits forever.
There are several reasons.
1. crash of smcrouted.
2. Send a message from the client to the server before the server is up.

To avoid this problem, the server waits only for 10 seconds.
The client sends messages for 10 seconds.
If the server is successfully closed, it kills the client.

Fixes: c08e8baea78e ("selftests: add amt interface selftest script")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
---
 tools/testing/selftests/net/amt.sh | 63 +++++++++++++++++++-----------
 1 file changed, 40 insertions(+), 23 deletions(-)

Comments

Simon Horman May 9, 2024, 8:38 a.m. UTC | #1
On Wed, May 08, 2024 at 04:06:43AM +0000, Taehee Yoo wrote:
> In the forwarding testcase, it opens a server and a client with the nc.
> The server receives the correct message from NC, it prints OK.
> The server prints FAIL if it receives the wrong message from the client.
> 
> But If the server can't receive any message, it will not close so
> the amt.sh waits forever.
> There are several reasons.
> 1. crash of smcrouted.
> 2. Send a message from the client to the server before the server is up.
> 
> To avoid this problem, the server waits only for 10 seconds.
> The client sends messages for 10 seconds.
> If the server is successfully closed, it kills the client.
> 
> Fixes: c08e8baea78e ("selftests: add amt interface selftest script")
> Signed-off-by: Taehee Yoo <ap420073@gmail.com>
> ---
>  tools/testing/selftests/net/amt.sh | 63 +++++++++++++++++++-----------
>  1 file changed, 40 insertions(+), 23 deletions(-)
> 
> diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh
> index 75528788cb95..16641d3dccce 100755
> --- a/tools/testing/selftests/net/amt.sh
> +++ b/tools/testing/selftests/net/amt.sh
> @@ -77,6 +77,7 @@ readonly LISTENER=$(mktemp -u listener-XXXXXXXX)
>  readonly GATEWAY=$(mktemp -u gateway-XXXXXXXX)
>  readonly RELAY=$(mktemp -u relay-XXXXXXXX)
>  readonly SOURCE=$(mktemp -u source-XXXXXXXX)
> +readonly RESULT=$(mktemp -p /tmp amt-XXXXXXXX)
>  ERR=4
>  err=0
>  
> @@ -85,6 +86,10 @@ exit_cleanup()
>  	for ns in "$@"; do
>  		ip netns delete "${ns}" 2>/dev/null || true
>  	done
> +	rm $RESULT
> +	smcpid=$(< $SMCROUTEDIR/amt.pid)
> +	kill $smcpid
> +	rm -rf $SMCROUTEDIR

Hi Taehee Yoo,

I think this cleanup may be executed before SMCROUTEDIR exists.

For consistency with other temp files, perhaps
perpahps it is best to move the creation of SMCROUTEDIR up
to where RESULT is instantiated above.

And perhaps the pid handling can be made conditional on the
existence of $SMCROUTEDIR/amt.pid

	if [ -f "$SMCROUTEDIR/amt.pid" ]; then
		...
	fi

>  
>  	exit $ERR
>  }
> @@ -167,7 +172,9 @@ setup_iptables()
>  
>  setup_mcast_routing()
>  {
> -	ip netns exec "${RELAY}" smcrouted
> +	SMCROUTEDIR="$(mktemp -d)"
> +
> +	ip netns exec "${RELAY}" smcrouted -P $SMCROUTEDIR/amt.pid
>  	ip netns exec "${RELAY}" smcroutectl a relay_src \
>  		172.17.0.2 239.0.0.1 amtr
>  	ip netns exec "${RELAY}" smcroutectl a relay_src \
> @@ -210,40 +217,52 @@ check_features()
>  
>  test_ipv4_forward()
>  {
> -	RESULT4=$(ip netns exec "${LISTENER}" nc -w 1 -l -u 239.0.0.1 4000)
> +	echo "" > $RESULT
> +	bash -c "$(ip netns exec "${LISTENER}" \
> +		timeout 10s nc -w 1 -l -u 239.0.0.1 4000 > $RESULT)"

Hi,

It's unclear to me what the purpose of the bash -c "$(...)" construction is
here. Can the same be achieved using simply:

	ip netns exec "${LISTENER}" \
		timeout 10s nc -w 1 -l -u 239.0.0.1 4000 > $RESULT

Also, not strictly related to this patch, it seems a little odd here, and
elsewhere, to call bash in a /bin/sh script.

> +	RESULT4=$(< $RESULT)
>  	if [ "$RESULT4" == "172.17.0.2" ]; then
>  		printf "TEST: %-60s  [ OK ]\n" "IPv4 amt multicast forwarding"
> -		exit 0
>  	else
>  		printf "TEST: %-60s  [FAIL]\n" "IPv4 amt multicast forwarding"
> -		exit 1
>  	fi
> +
>  }

...

>  send_mcast4()
>  {
>  	sleep 2
> -	ip netns exec "${SOURCE}" bash -c \
> -		'echo 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' &
> +	for n in {0..10}; do
> +		ip netns exec "${SOURCE}" bash -c \
> +			'echo 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000'
> +		sleep 1
> +	done
> +
>  }
>  
>  send_mcast6()
>  {
>  	sleep 2
> -	ip netns exec "${SOURCE}" bash -c \
> -		'echo 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' &
> +	for n in {0..10}; do
> +		ip netns exec "${SOURCE}" bash -c \
> +			'echo 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000'
> +		sleep 1
> +	done
> +
>  }
>  
>  check_features

...
Paolo Abeni May 9, 2024, 9:36 a.m. UTC | #2
On Wed, 2024-05-08 at 04:06 +0000, Taehee Yoo wrote:

> @@ -210,40 +217,52 @@ check_features()
>  
>  test_ipv4_forward()
>  {
> -	RESULT4=$(ip netns exec "${LISTENER}" nc -w 1 -l -u 239.0.0.1 4000)
> +	echo "" > $RESULT
> +	bash -c "$(ip netns exec "${LISTENER}" \
> +		timeout 10s  > $RESULT)"
> +	RESULT4=$(< $RESULT)

if you instead do:

	RESULT4=$(timeout 10s ip netns exec \
		  "${LISTENER}" nc -w 1 -l -u 239.0.0.1 4000)

You can avoid the additional tmp file (RESULT)

>  	if [ "$RESULT4" == "172.17.0.2" ]; then
>  		printf "TEST: %-60s  [ OK ]\n" "IPv4 amt multicast forwarding"
> -		exit 0
>  	else
>  		printf "TEST: %-60s  [FAIL]\n" "IPv4 amt multicast forwarding"
> -		exit 1
>  	fi
> +
>  }

[...]

> @@ -259,19 +278,17 @@ setup_iptables
>  setup_mcast_routing
>  test_remote_ip
>  test_ipv4_forward &
> -pid=$!
> -send_mcast4
> -wait $pid || err=$?
> -if [ $err -eq 1 ]; then
> -	ERR=1
> -fi
> +spid=$!
> +send_mcast4 &
> +cpid=$!
> +wait $spid

It looks like you don't capture anymore the return code from
test_ipv4_forward, why?

That will foul the test runner infra to think that this test is always
successful.

Paolo
Jakub Kicinski May 9, 2024, 4:22 p.m. UTC | #3
On Wed,  8 May 2024 04:06:43 +0000 Taehee Yoo wrote:
> In the forwarding testcase, it opens a server and a client with the nc.
> The server receives the correct message from NC, it prints OK.
> The server prints FAIL if it receives the wrong message from the client.
> 
> But If the server can't receive any message, it will not close so
> the amt.sh waits forever.
> There are several reasons.
> 1. crash of smcrouted.
> 2. Send a message from the client to the server before the server is up.
> 
> To avoid this problem, the server waits only for 10 seconds.
> The client sends messages for 10 seconds.
> If the server is successfully closed, it kills the client.

Since this didn't fix the problem of smcroute crashing I had to take 
a closer look myself.

I filed https://github.com/troglobit/smcroute/issues/207 for smcroute

And sent:
https://lore.kernel.org/all/20240509161919.3939966-1-kuba@kernel.org/
and
https://lore.kernel.org/all/20240509161952.3940476-1-kuba@kernel.org/

Please don't use netcat in tests in the future. There are two
incompatible implementations which always cause hard to repro
issues.
Taehee Yoo May 10, 2024, 4:57 a.m. UTC | #4
On Thu, May 9, 2024 at 5:38 PM Simon Horman <horms@kernel.org> wrote:
>

Hi Simon,
Thanks a lot for the review!

> On Wed, May 08, 2024 at 04:06:43AM +0000, Taehee Yoo wrote:
> > In the forwarding testcase, it opens a server and a client with the nc.
> > The server receives the correct message from NC, it prints OK.
> > The server prints FAIL if it receives the wrong message from the client.
> >
> > But If the server can't receive any message, it will not close so
> > the amt.sh waits forever.
> > There are several reasons.
> > 1. crash of smcrouted.
> > 2. Send a message from the client to the server before the server is up.
> >
> > To avoid this problem, the server waits only for 10 seconds.
> > The client sends messages for 10 seconds.
> > If the server is successfully closed, it kills the client.
> >
> > Fixes: c08e8baea78e ("selftests: add amt interface selftest script")
> > Signed-off-by: Taehee Yoo <ap420073@gmail.com>
> > ---
> >  tools/testing/selftests/net/amt.sh | 63 +++++++++++++++++++-----------
> >  1 file changed, 40 insertions(+), 23 deletions(-)
> >
> > diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh
> > index 75528788cb95..16641d3dccce 100755
> > --- a/tools/testing/selftests/net/amt.sh
> > +++ b/tools/testing/selftests/net/amt.sh
> > @@ -77,6 +77,7 @@ readonly LISTENER=$(mktemp -u listener-XXXXXXXX)
> >  readonly GATEWAY=$(mktemp -u gateway-XXXXXXXX)
> >  readonly RELAY=$(mktemp -u relay-XXXXXXXX)
> >  readonly SOURCE=$(mktemp -u source-XXXXXXXX)
> > +readonly RESULT=$(mktemp -p /tmp amt-XXXXXXXX)
> >  ERR=4
> >  err=0
> >
> > @@ -85,6 +86,10 @@ exit_cleanup()
> >       for ns in "$@"; do
> >               ip netns delete "${ns}" 2>/dev/null || true
> >       done
> > +     rm $RESULT
> > +     smcpid=$(< $SMCROUTEDIR/amt.pid)
> > +     kill $smcpid
> > +     rm -rf $SMCROUTEDIR
>
> Hi Taehee Yoo,
>
> I think this cleanup may be executed before SMCROUTEDIR exists.
>
> For consistency with other temp files, perhaps
> perpahps it is best to move the creation of SMCROUTEDIR up
> to where RESULT is instantiated above.
>
> And perhaps the pid handling can be made conditional on the
> existence of $SMCROUTEDIR/amt.pid
>
>         if [ -f "$SMCROUTEDIR/amt.pid" ]; then
>                 ...
>         fi
>

Thanks!
I will check a pid file before kills smcrouted.

> >
> >       exit $ERR
> >  }
> > @@ -167,7 +172,9 @@ setup_iptables()
> >
> >  setup_mcast_routing()
> >  {
> > -     ip netns exec "${RELAY}" smcrouted
> > +     SMCROUTEDIR="$(mktemp -d)"
> > +
> > +     ip netns exec "${RELAY}" smcrouted -P $SMCROUTEDIR/amt.pid
> >       ip netns exec "${RELAY}" smcroutectl a relay_src \
> >               172.17.0.2 239.0.0.1 amtr
> >       ip netns exec "${RELAY}" smcroutectl a relay_src \
> > @@ -210,40 +217,52 @@ check_features()
> >
> >  test_ipv4_forward()
> >  {
> > -     RESULT4=$(ip netns exec "${LISTENER}" nc -w 1 -l -u 239.0.0.1 4000)
> > +     echo "" > $RESULT
> > +     bash -c "$(ip netns exec "${LISTENER}" \
> > +             timeout 10s nc -w 1 -l -u 239.0.0.1 4000 > $RESULT)"
>
> Hi,
>
> It's unclear to me what the purpose of the bash -c "$(...)" construction is
> here. Can the same be achieved using simply:
>
>         ip netns exec "${LISTENER}" \
>                 timeout 10s nc -w 1 -l -u 239.0.0.1 4000 > $RESULT
>

The purpose of using bash -s was to avoid exiting main bash program
by timeout expiration due to 'set -e' option.
But Jakub avoided that problem by adding (|| true) in the recent patch.


> Also, not strictly related to this patch, it seems a little odd here, and
> elsewhere, to call bash in a /bin/sh script.
>

Oh Thanks,
Shebang should be bash, not sh.
I will fix it.


> > +     RESULT4=$(< $RESULT)
> >       if [ "$RESULT4" == "172.17.0.2" ]; then
> >               printf "TEST: %-60s  [ OK ]\n" "IPv4 amt multicast forwarding"
> > -             exit 0
> >       else
> >               printf "TEST: %-60s  [FAIL]\n" "IPv4 amt multicast forwarding"
> > -             exit 1
> >       fi
> > +
> >  }
>
> ...
>
> >  send_mcast4()
> >  {
> >       sleep 2
> > -     ip netns exec "${SOURCE}" bash -c \
> > -             'echo 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' &
> > +     for n in {0..10}; do
> > +             ip netns exec "${SOURCE}" bash -c \
> > +                     'echo 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000'
> > +             sleep 1
> > +     done
> > +
> >  }
> >
> >  send_mcast6()
> >  {
> >       sleep 2
> > -     ip netns exec "${SOURCE}" bash -c \
> > -             'echo 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' &
> > +     for n in {0..10}; do
> > +             ip netns exec "${SOURCE}" bash -c \
> > +                     'echo 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000'
> > +             sleep 1
> > +     done
> > +
> >  }
> >
> >  check_features
>
> ...
>
> --
> pw-bot: under-review

Thanks a lot!
Taehee Yoo
Taehee Yoo May 10, 2024, 5:05 a.m. UTC | #5
On Thu, May 9, 2024 at 6:36 PM Paolo Abeni <pabeni@redhat.com> wrote:
>

Hi Paolo,
Thank you for the review!

> On Wed, 2024-05-08 at 04:06 +0000, Taehee Yoo wrote:
>
> > @@ -210,40 +217,52 @@ check_features()
> >
> > test_ipv4_forward()
> > {
> > - RESULT4=$(ip netns exec "${LISTENER}" nc -w 1 -l -u 239.0.0.1 4000)
> > + echo "" > $RESULT
> > + bash -c "$(ip netns exec "${LISTENER}" \
> > + timeout 10s > $RESULT)"
> > + RESULT4=$(< $RESULT)
>
> if you instead do:
>
> RESULT4=$(timeout 10s ip netns exec \
> "${LISTENER}" nc -w 1 -l -u 239.0.0.1 4000)
>
> You can avoid the additional tmp file (RESULT)
>

Thanks,
In the recent patch from Jakub, a variable is used instead of a file.

> > if [ "$RESULT4" == "172.17.0.2" ]; then
> > printf "TEST: %-60s [ OK ]\n" "IPv4 amt multicast forwarding"
> > - exit 0
> > else
> > printf "TEST: %-60s [FAIL]\n" "IPv4 amt multicast forwarding"
> > - exit 1
> > fi
> > +
> > }
>
> [...]
>
> > @@ -259,19 +278,17 @@ setup_iptables
> > setup_mcast_routing
> > test_remote_ip
> > test_ipv4_forward &
> > -pid=$!
> > -send_mcast4
> > -wait $pid || err=$?
> > -if [ $err -eq 1 ]; then
> > - ERR=1
> > -fi
> > +spid=$!
> > +send_mcast4 &
> > +cpid=$!
> > +wait $spid
>
> It looks like you don't capture anymore the return code from
> test_ipv4_forward, why?
>
> That will foul the test runner infra to think that this test is always
> successful.
>

You're right,
Sorry, I didn't consider it.
It should not be changed.


> Paolo
>

Thanks a lot!
Taehee Yoo
Taehee Yoo May 10, 2024, 5:17 a.m. UTC | #6
On Fri, May 10, 2024 at 1:23 AM Jakub Kicinski <kuba@kernel.org> wrote:
>

Hi Jakub,
Thanks a lot for the review and looking into the bug.

> On Wed,  8 May 2024 04:06:43 +0000 Taehee Yoo wrote:
> > In the forwarding testcase, it opens a server and a client with the nc.
> > The server receives the correct message from NC, it prints OK.
> > The server prints FAIL if it receives the wrong message from the client.
> >
> > But If the server can't receive any message, it will not close so
> > the amt.sh waits forever.
> > There are several reasons.
> > 1. crash of smcrouted.
> > 2. Send a message from the client to the server before the server is up.
> >
> > To avoid this problem, the server waits only for 10 seconds.
> > The client sends messages for 10 seconds.
> > If the server is successfully closed, it kills the client.
>
> Since this didn't fix the problem of smcroute crashing I had to take
> a closer look myself.
>
> I filed https://github.com/troglobit/smcroute/issues/207 for smcroute
>

Thank you so much for looking into this bug!
I will test it.

> And sent:
> https://lore.kernel.org/all/20240509161919.3939966-1-kuba@kernel.org/
> and
> https://lore.kernel.org/all/20240509161952.3940476-1-kuba@kernel.org/
>
> Please don't use netcat in tests in the future. There are two
> incompatible implementations which always cause hard to repro
> issues.

Okay, I will not use netcat in the future.

Thanks a lot!
Taehee Yoo
diff mbox series

Patch

diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh
index 75528788cb95..16641d3dccce 100755
--- a/tools/testing/selftests/net/amt.sh
+++ b/tools/testing/selftests/net/amt.sh
@@ -77,6 +77,7 @@  readonly LISTENER=$(mktemp -u listener-XXXXXXXX)
 readonly GATEWAY=$(mktemp -u gateway-XXXXXXXX)
 readonly RELAY=$(mktemp -u relay-XXXXXXXX)
 readonly SOURCE=$(mktemp -u source-XXXXXXXX)
+readonly RESULT=$(mktemp -p /tmp amt-XXXXXXXX)
 ERR=4
 err=0
 
@@ -85,6 +86,10 @@  exit_cleanup()
 	for ns in "$@"; do
 		ip netns delete "${ns}" 2>/dev/null || true
 	done
+	rm $RESULT
+	smcpid=$(< $SMCROUTEDIR/amt.pid)
+	kill $smcpid
+	rm -rf $SMCROUTEDIR
 
 	exit $ERR
 }
@@ -167,7 +172,9 @@  setup_iptables()
 
 setup_mcast_routing()
 {
-	ip netns exec "${RELAY}" smcrouted
+	SMCROUTEDIR="$(mktemp -d)"
+
+	ip netns exec "${RELAY}" smcrouted -P $SMCROUTEDIR/amt.pid
 	ip netns exec "${RELAY}" smcroutectl a relay_src \
 		172.17.0.2 239.0.0.1 amtr
 	ip netns exec "${RELAY}" smcroutectl a relay_src \
@@ -210,40 +217,52 @@  check_features()
 
 test_ipv4_forward()
 {
-	RESULT4=$(ip netns exec "${LISTENER}" nc -w 1 -l -u 239.0.0.1 4000)
+	echo "" > $RESULT
+	bash -c "$(ip netns exec "${LISTENER}" \
+		timeout 10s nc -w 1 -l -u 239.0.0.1 4000 > $RESULT)"
+	RESULT4=$(< $RESULT)
 	if [ "$RESULT4" == "172.17.0.2" ]; then
 		printf "TEST: %-60s  [ OK ]\n" "IPv4 amt multicast forwarding"
-		exit 0
 	else
 		printf "TEST: %-60s  [FAIL]\n" "IPv4 amt multicast forwarding"
-		exit 1
 	fi
+
 }
 
 test_ipv6_forward()
 {
-	RESULT6=$(ip netns exec "${LISTENER}" nc -w 1 -l -u ff0e::5:6 6000)
+	echo "" > $RESULT
+	bash -c "$(ip netns exec "${LISTENER}" \
+		timeout 10s nc -w 1 -l -u ff0e::5:6 6000 > $RESULT)"
+	RESULT6=$(< $RESULT)
 	if [ "$RESULT6" == "2001:db8:3::2" ]; then
 		printf "TEST: %-60s  [ OK ]\n" "IPv6 amt multicast forwarding"
-		exit 0
 	else
 		printf "TEST: %-60s  [FAIL]\n" "IPv6 amt multicast forwarding"
-		exit 1
 	fi
+
 }
 
 send_mcast4()
 {
 	sleep 2
-	ip netns exec "${SOURCE}" bash -c \
-		'echo 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' &
+	for n in {0..10}; do
+		ip netns exec "${SOURCE}" bash -c \
+			'echo 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000'
+		sleep 1
+	done
+
 }
 
 send_mcast6()
 {
 	sleep 2
-	ip netns exec "${SOURCE}" bash -c \
-		'echo 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' &
+	for n in {0..10}; do
+		ip netns exec "${SOURCE}" bash -c \
+			'echo 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000'
+		sleep 1
+	done
+
 }
 
 check_features
@@ -259,19 +278,17 @@  setup_iptables
 setup_mcast_routing
 test_remote_ip
 test_ipv4_forward &
-pid=$!
-send_mcast4
-wait $pid || err=$?
-if [ $err -eq 1 ]; then
-	ERR=1
-fi
+spid=$!
+send_mcast4 &
+cpid=$!
+wait $spid
+kill $cpid
 test_ipv6_forward &
-pid=$!
-send_mcast6
-wait $pid || err=$?
-if [ $err -eq 1 ]; then
-	ERR=1
-fi
+spid=$!
+send_mcast6 &
+cpid=$!
+wait $spid
+kill $cpid
 send_mcast_torture4
 printf "TEST: %-60s  [ OK ]\n" "IPv4 amt traffic forwarding torture"
 send_mcast_torture6