diff mbox series

trace-cmd: ptp-timesync: Turn off Nagle for TCP sockets

Message ID 20240614132020.1797502-1-avidanborisov@gmail.com (mailing list archive)
State Accepted
Commit ddcd2b117755721aad16bed100a6c4ae808a2d9a
Headers show
Series trace-cmd: ptp-timesync: Turn off Nagle for TCP sockets | expand

Commit Message

avidanborisov@gmail.com June 14, 2024, 1:20 p.m. UTC
From: Avidan Borisov <avidanborisov@gmail.com>

Currently, to establish a tracing session using PTP as a timestamp sync
protocol, trace-cmd sends overall PTP_SYNC_LOOP*NR_CPUS small messages
serially one after another. Due to the effects of the Nagle algorithm [1],
this will in effect send a message every ~200ms, resulting in
PTP_SYNC_LOOP(340)*200ms = ~68 seconds to process a single traced core.

Fix this by setting TCP_NODELAY on any TCP socket created by trace-cmd.

Example on a 32-core machine (host and agent on the same machine for
simplicity):

before:
$ sudo trace-cmd agent -N localhost -p 12345 -D
$ sudo time -p trace-cmd record -p nop -A 127.0.0.1:12345 --name
localhost -p nop -- sleep 0
Negotiated ptp time sync protocol with guest localhost
CPU31 data recorded at offset=0x245000
    0 bytes in size (0 uncompressed)
real 1968.48
user 19.15
sys 73.58

after:
$ sudo trace-cmd agent -N localhost -p 12345 -D
$ sudo time -p trace-cmd record -p nop -A 127.0.0.1:12345 --name
localhost -p nop -- sleep 0
Negotiated ptp time sync protocol with guest localhost
CPU31 data recorded at offset=0x245000
    0 bytes in size (0 uncompressed)
real 16.52
user 1.33
sys 1.52

This patch reduced the tracing time from 32 minutes to 16 seconds.

[1]: https://brooker.co.za/blog/2024/05/09/nagle.html

Signed-off-by: Avidan Borisov <avidanborisov@gmail.com>
---
 tracecmd/include/trace-local.h | 10 ++++++++++
 tracecmd/trace-listen.c        |  2 ++
 tracecmd/trace-record.c        |  2 ++
 3 files changed, 14 insertions(+)

Comments

Steven Rostedt July 17, 2024, 7:09 p.m. UTC | #1
On Fri, 14 Jun 2024 13:20:20 +0000
avidanborisov@gmail.com wrote:

> From: Avidan Borisov <avidanborisov@gmail.com>
> 
> Currently, to establish a tracing session using PTP as a timestamp sync
> protocol, trace-cmd sends overall PTP_SYNC_LOOP*NR_CPUS small messages
> serially one after another. Due to the effects of the Nagle algorithm [1],
> this will in effect send a message every ~200ms, resulting in
> PTP_SYNC_LOOP(340)*200ms = ~68 seconds to process a single traced core.
> 
> Fix this by setting TCP_NODELAY on any TCP socket created by trace-cmd.
> 
> Example on a 32-core machine (host and agent on the same machine for
> simplicity):
> 
> before:
> $ sudo trace-cmd agent -N localhost -p 12345 -D
> $ sudo time -p trace-cmd record -p nop -A 127.0.0.1:12345 --name
> localhost -p nop -- sleep 0
> Negotiated ptp time sync protocol with guest localhost
> CPU31 data recorded at offset=0x245000
>     0 bytes in size (0 uncompressed)
> real 1968.48
> user 19.15
> sys 73.58
> 
> after:
> $ sudo trace-cmd agent -N localhost -p 12345 -D
> $ sudo time -p trace-cmd record -p nop -A 127.0.0.1:12345 --name
> localhost -p nop -- sleep 0
> Negotiated ptp time sync protocol with guest localhost
> CPU31 data recorded at offset=0x245000
>     0 bytes in size (0 uncompressed)
> real 16.52
> user 1.33
> sys 1.52
> 
> This patch reduced the tracing time from 32 minutes to 16 seconds.

Ouch!

Thanks, I'll go ahead and apply this.

-- Steve


> 
> [1]: https://brooker.co.za/blog/2024/05/09/nagle.html
> 
> Signed-off-by: Avidan Borisov <avidanborisov@gmail.com>
> ---
>  tracecmd/include/trace-local.h | 10 ++++++++++
>  tracecmd/trace-listen.c        |  2 ++
>  tracecmd/trace-record.c        |  2 ++
>  3 files changed, 14 insertions(+)
> 
> diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h
> index 55934f98..1515fbbe 100644
> --- a/tracecmd/include/trace-local.h
> +++ b/tracecmd/include/trace-local.h
> @@ -11,6 +11,8 @@
>  #include <ctype.h>	/* for isdigit() */
>  #include <errno.h>
>  #include <limits.h>
> +#include <netinet/tcp.h>
> +#include <netinet/in.h>
>  
>  #include "trace-cmd-private.h"
>  #include "event-utils.h"
> @@ -470,4 +472,12 @@ void make_pid_name(char *buf, const char *pidfile_basename);
>  void remove_pid_file(const char *pidfile_basename);
>  void make_pid_file(const char *pidfile_basename);
>  
> +static inline void set_tcp_no_delay(int sockfd, int socktype)
> +{
> +	int flag = 1;
> +
> +	if (socktype == SOCK_STREAM)
> +		setsockopt(sockfd, IPPROTO_TCP, TCP_NODELAY, &flag, sizeof(flag));
> +}
> +
>  #endif /* __TRACE_LOCAL_H */
> diff --git a/tracecmd/trace-listen.c b/tracecmd/trace-listen.c
> index 5894a92d..308c1d9d 100644
> --- a/tracecmd/trace-listen.c
> +++ b/tracecmd/trace-listen.c
> @@ -268,6 +268,7 @@ int trace_net_make(int port, enum port_type type)
>  		if (sd < 0)
>  			continue;
>  
> +		set_tcp_no_delay(sd, rp->ai_socktype);
>  		if (bind(sd, rp->ai_addr, rp->ai_addrlen) == 0)
>  			break;
>  
> @@ -1048,6 +1049,7 @@ static int get_network(char *port)
>  		if (sfd < 0)
>  			continue;
>  
> +		set_tcp_no_delay(sfd, rp->ai_socktype);
>  		if (bind(sfd, rp->ai_addr, rp->ai_addrlen) == 0)
>  			break;
>  
> diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
> index 91cc90d4..7d03f9d7 100644
> --- a/tracecmd/trace-record.c
> +++ b/tracecmd/trace-record.c
> @@ -3415,6 +3415,8 @@ static int connect_addr(struct addrinfo *results)
>  			     rp->ai_protocol);
>  		if (sfd == -1)
>  			continue;
> +
> +		set_tcp_no_delay(sfd, rp->ai_socktype);
>  		if (connect(sfd, rp->ai_addr, rp->ai_addrlen) != -1)
>  			break;
>  		close(sfd);
diff mbox series

Patch

diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h
index 55934f98..1515fbbe 100644
--- a/tracecmd/include/trace-local.h
+++ b/tracecmd/include/trace-local.h
@@ -11,6 +11,8 @@ 
 #include <ctype.h>	/* for isdigit() */
 #include <errno.h>
 #include <limits.h>
+#include <netinet/tcp.h>
+#include <netinet/in.h>
 
 #include "trace-cmd-private.h"
 #include "event-utils.h"
@@ -470,4 +472,12 @@  void make_pid_name(char *buf, const char *pidfile_basename);
 void remove_pid_file(const char *pidfile_basename);
 void make_pid_file(const char *pidfile_basename);
 
+static inline void set_tcp_no_delay(int sockfd, int socktype)
+{
+	int flag = 1;
+
+	if (socktype == SOCK_STREAM)
+		setsockopt(sockfd, IPPROTO_TCP, TCP_NODELAY, &flag, sizeof(flag));
+}
+
 #endif /* __TRACE_LOCAL_H */
diff --git a/tracecmd/trace-listen.c b/tracecmd/trace-listen.c
index 5894a92d..308c1d9d 100644
--- a/tracecmd/trace-listen.c
+++ b/tracecmd/trace-listen.c
@@ -268,6 +268,7 @@  int trace_net_make(int port, enum port_type type)
 		if (sd < 0)
 			continue;
 
+		set_tcp_no_delay(sd, rp->ai_socktype);
 		if (bind(sd, rp->ai_addr, rp->ai_addrlen) == 0)
 			break;
 
@@ -1048,6 +1049,7 @@  static int get_network(char *port)
 		if (sfd < 0)
 			continue;
 
+		set_tcp_no_delay(sfd, rp->ai_socktype);
 		if (bind(sfd, rp->ai_addr, rp->ai_addrlen) == 0)
 			break;
 
diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
index 91cc90d4..7d03f9d7 100644
--- a/tracecmd/trace-record.c
+++ b/tracecmd/trace-record.c
@@ -3415,6 +3415,8 @@  static int connect_addr(struct addrinfo *results)
 			     rp->ai_protocol);
 		if (sfd == -1)
 			continue;
+
+		set_tcp_no_delay(sfd, rp->ai_socktype);
 		if (connect(sfd, rp->ai_addr, rp->ai_addrlen) != -1)
 			break;
 		close(sfd);