Message ID | 20190315083754.27532-10-tstoyanov@vmware.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | trace-cmd: Timetamps sync between host and guest machines, relying on vsock events. | expand |
On Fri, Mar 15, 2019 at 10:38 AM Tzvetomir Stoyanov <tstoyanov@vmware.com> wrote: > > This is a POC patch, implementing an algorithm for syncing timestamps between > host and guest machines, using vsock trace events to catch the host / guest time. > > Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com> > --- > include/trace-cmd/trace-cmd.h | 21 +- > tracecmd/Makefile | 1 + > tracecmd/include/trace-local.h | 33 +- > tracecmd/trace-agent.c | 10 +- > tracecmd/trace-msg.c | 209 ++++++++- > tracecmd/trace-record.c | 81 +++- > tracecmd/trace-timesync.c | 824 +++++++++++++++++++++++++++++++++ > 7 files changed, 1144 insertions(+), 35 deletions(-) > create mode 100644 tracecmd/trace-timesync.c > > diff --git a/include/trace-cmd/trace-cmd.h b/include/trace-cmd/trace-cmd.h > index 5552396..905f3ec 100644 > --- a/include/trace-cmd/trace-cmd.h > +++ b/include/trace-cmd/trace-cmd.h > @@ -341,16 +341,29 @@ bool tracecmd_msg_done(struct tracecmd_msg_handle *msg_handle); > void tracecmd_msg_set_done(struct tracecmd_msg_handle *msg_handle); > > int tracecmd_msg_send_trace_req(struct tracecmd_msg_handle *msg_handle, > - int argc, char **argv, bool use_fifos); > + int argc, char **argv, bool use_fifos, > + bool do_tsync); > int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle, > - int *argc, char ***argv, bool *use_fifos); > + int *argc, char ***argv, bool *use_fifos, > + bool *do_tsync); > > int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle, > int nr_cpus, int page_size, > - unsigned int *ports, bool use_fifos); > + unsigned int *ports, bool use_fifos, > + bool do_tsync); > int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle, > int *nr_cpus, int *page_size, > - unsigned int **ports, bool *use_fifos); > + unsigned int **ports, bool *use_fifos, > + bool *do_tsync); > + > +struct tracecmd_clock_sync; > + > +int tracecmd_msg_rcv_time_sync(struct tracecmd_msg_handle *msg_handle, > + struct tracecmd_clock_sync *clock_sync, > + long long *offset, long long *timestamp); > +int tracecmd_msg_snd_time_sync(struct tracecmd_msg_handle *msg_handle, > + struct tracecmd_clock_sync *clock_sync, > + long long *offset, long long *timestamp); > > /* --- Plugin handling --- */ > extern struct tep_plugin_option trace_ftrace_options[]; > diff --git a/tracecmd/Makefile b/tracecmd/Makefile > index d3e3080..8a73bf7 100644 > --- a/tracecmd/Makefile > +++ b/tracecmd/Makefile > @@ -32,6 +32,7 @@ TRACE_CMD_OBJS += trace-list.o > TRACE_CMD_OBJS += trace-output.o > TRACE_CMD_OBJS += trace-usage.o > TRACE_CMD_OBJS += trace-msg.o > +TRACE_CMD_OBJS += trace-timesync.o > > ifeq ($(VSOCK_DEFINED), 1) > TRACE_CMD_OBJS += trace-agent.o > diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h > index 62f5e47..7688b02 100644 > --- a/tracecmd/include/trace-local.h > +++ b/tracecmd/include/trace-local.h > @@ -26,6 +26,7 @@ extern int quiet; > typedef unsigned long long u64; > > struct buffer_instance; > +struct tracecmd_clock_sync; > > /* for local shared information with trace-cmd executable */ > > @@ -101,7 +102,7 @@ void trace_usage(int argc, char **argv); > > int trace_record_agent(struct tracecmd_msg_handle *msg_handle, > int cpus, int *fds, > - int argc, char **argv, bool use_fifos); > + int argc, char **argv, bool use_fifos, bool do_tsync); > > struct hook_list; > > @@ -214,6 +215,12 @@ struct buffer_instance { > int port; > int *fds; > bool use_fifos; > + bool do_tsync; > + > + struct tracecmd_clock_sync *clock_sync; > + int time_sync_count; > + long long *time_sync_ts; > + long long *time_sync_offsets; > }; > > extern struct buffer_instance top_instance; > @@ -235,6 +242,30 @@ void update_first_instance(struct buffer_instance *instance, int topt); > void show_instance_file(struct buffer_instance *instance, const char *name); > > int count_cpus(void); > + > +struct tracecmd_time_sync_event { > + int id; > + int cpu; > + int pid; > + unsigned long long ts; > +}; > + > +int tracecmd_clock_get_peer(struct tracecmd_clock_sync *clock_context, > + unsigned int *remote_cid, unsigned int *remote_port); > +bool tracecmd_time_sync_check(void); > +void tracecmd_clock_context_free(struct buffer_instance *instance); > +int tracecmd_clock_find_event(struct tracecmd_clock_sync *clock, int cpu, > + struct tracecmd_time_sync_event *event); > +void tracecmd_clock_synch_enable(struct tracecmd_clock_sync *clock_context); > +void tracecmd_clock_synch_disable(struct tracecmd_clock_sync *clock_context); > +void tracecmd_clock_synch_calc_reset(struct tracecmd_clock_sync *clock_context); > +void tracecmd_clock_synch_calc_probe(struct tracecmd_clock_sync *clock_context, > + long long ts_local, long long ts_remote); > +int tracecmd_clock_synch_calc(struct tracecmd_clock_sync *clock_context, > + long long *offset_ret, long long *time_ret); > +void sync_time_with_host_v3(struct buffer_instance *instance); > +void sync_time_with_guest_v3(struct buffer_instance *instance); > + > void write_tracing_on(struct buffer_instance *instance, int on); > char *get_instance_dir(struct buffer_instance *instance); > int write_instance_file(struct buffer_instance *instance, > diff --git a/tracecmd/trace-agent.c b/tracecmd/trace-agent.c > index 7389f72..3d305dc 100644 > --- a/tracecmd/trace-agent.c > +++ b/tracecmd/trace-agent.c > @@ -116,6 +116,7 @@ static void agent_handle(int sd, int nr_cpus, int page_size) > char **argv = NULL; > int argc = 0; > bool use_fifos; > + bool do_tsync; > int *fds; > int ret; > > @@ -128,7 +129,8 @@ static void agent_handle(int sd, int nr_cpus, int page_size) > if (!msg_handle) > die("Failed to allocate message handle"); > > - ret = tracecmd_msg_recv_trace_req(msg_handle, &argc, &argv, &use_fifos); > + ret = tracecmd_msg_recv_trace_req(msg_handle, &argc, &argv, > + &use_fifos, &do_tsync); > if (ret < 0) > die("Failed to receive trace request"); > > @@ -137,13 +139,15 @@ static void agent_handle(int sd, int nr_cpus, int page_size) > > if (!use_fifos) > make_vsocks(nr_cpus, fds, ports); > + if (do_tsync) > + do_tsync = tracecmd_time_sync_check(); > > ret = tracecmd_msg_send_trace_resp(msg_handle, nr_cpus, page_size, > - ports, use_fifos); > + ports, use_fifos, do_tsync); > if (ret < 0) > die("Failed to send trace response"); > > - trace_record_agent(msg_handle, nr_cpus, fds, argc, argv, use_fifos); > + trace_record_agent(msg_handle, nr_cpus, fds, argc, argv, use_fifos, do_tsync); > > free(argv[0]); > free(argv); > diff --git a/tracecmd/trace-msg.c b/tracecmd/trace-msg.c > index 065a01e..786e3a9 100644 > --- a/tracecmd/trace-msg.c > +++ b/tracecmd/trace-msg.c > @@ -18,6 +18,7 @@ > #include <stdarg.h> > #include <string.h> > #include <unistd.h> > +#include <time.h> Unused header. > #include <arpa/inet.h> > #include <sys/types.h> > #include <linux/types.h> > @@ -26,8 +27,12 @@ > #include "trace-local.h" > #include "trace-msg.h" > > +typedef __u16 u16; > +typedef __s16 s16; > typedef __u32 u32; > typedef __be32 be32; > +typedef __u64 u64; > +typedef __s64 s64; > > static inline void dprint(const char *fmt, ...) > { > @@ -50,6 +55,9 @@ static inline void dprint(const char *fmt, ...) > > unsigned int page_size; > > +/* Try a few times to get an accurate time sync */ > +#define TSYNC_TRIES 300 > + > struct tracecmd_msg_opt { > be32 size; > be32 opt_cmd; > @@ -76,6 +84,20 @@ struct tracecmd_msg_trace_resp { > be32 page_size; > } __attribute__((packed)); > > +struct tracecmd_msg_tsync_stop { > + long long offset; > + long long timestamp; > +} __attribute__((packed)); > + > +struct tracecmd_msg_tsync_req { > + u16 cpu; > +} __attribute__((packed)); > + > +struct tracecmd_msg_tsync_resp { > + u64 time; > +} __attribute__((packed)); > + > + > struct tracecmd_msg_header { > be32 size; > be32 cmd; > @@ -83,14 +105,19 @@ struct tracecmd_msg_header { > } __attribute__((packed)); > > #define MSG_MAP \ > - C(CLOSE, 0, 0), \ > - C(TINIT, 1, sizeof(struct tracecmd_msg_tinit)), \ > - C(RINIT, 2, sizeof(struct tracecmd_msg_rinit)), \ > - C(SEND_DATA, 3, 0), \ > - C(FIN_DATA, 4, 0), \ > - C(NOT_SUPP, 5, 0), \ > - C(TRACE_REQ, 6, sizeof(struct tracecmd_msg_trace_req)), \ > - C(TRACE_RESP, 7, sizeof(struct tracecmd_msg_trace_resp)), > + C(CLOSE, 0, 0), \ > + C(TINIT, 1, sizeof(struct tracecmd_msg_tinit)), \ > + C(RINIT, 2, sizeof(struct tracecmd_msg_rinit)), \ > + C(SEND_DATA, 3, 0), \ > + C(FIN_DATA, 4, 0), \ > + C(NOT_SUPP, 5, 0), \ > + C(TRACE_REQ, 6, sizeof(struct tracecmd_msg_trace_req)), \ > + C(TRACE_RESP, 7, sizeof(struct tracecmd_msg_trace_resp)),\ > + C(TSYNC_START, 8, 0), \ > + C(TSYNC_STOP, 9, sizeof(struct tracecmd_msg_tsync_stop)),\ > + C(TSYNC_PROBE, 11, 0), \ > + C(TSYNC_REQ, 11, sizeof(struct tracecmd_msg_tsync_req)), \ > + C(TSYNC_RESP, 12, sizeof(struct tracecmd_msg_tsync_resp)), > > #undef C > #define C(a,b,c) MSG_##a = b > @@ -120,10 +147,13 @@ static const char *cmd_to_name(int cmd) > struct tracecmd_msg { > struct tracecmd_msg_header hdr; > union { > - struct tracecmd_msg_tinit tinit; > - struct tracecmd_msg_rinit rinit; > - struct tracecmd_msg_trace_req trace_req; > - struct tracecmd_msg_trace_resp trace_resp; > + struct tracecmd_msg_tinit tinit; > + struct tracecmd_msg_rinit rinit; > + struct tracecmd_msg_trace_req trace_req; > + struct tracecmd_msg_trace_resp trace_resp; > + struct tracecmd_msg_tsync_stop ts_stop; > + struct tracecmd_msg_tsync_req ts_req; > + struct tracecmd_msg_tsync_resp ts_resp; > }; > union { > struct tracecmd_msg_opt *opt; > @@ -161,6 +191,7 @@ static int msg_write(int fd, struct tracecmd_msg *msg) > > enum msg_trace_flags { > MSG_TRACE_USE_FIFOS = 1 << 0, > + MSG_TRACE_DO_TSYNC = 1 << 1, > }; > > enum msg_opt_command { > @@ -744,7 +775,8 @@ error: > return ret; > } > > -static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv, bool use_fifos) > +static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv, > + bool use_fifos, bool do_tsync) > { > size_t args_size = 0; > char *p; > @@ -754,7 +786,12 @@ static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv, bool > args_size += strlen(argv[i]) + 1; > > msg->hdr.size = htonl(ntohl(msg->hdr.size) + args_size); > - msg->trace_req.flags = use_fifos ? htonl(MSG_TRACE_USE_FIFOS) : htonl(0); > + msg->trace_req.flags = 0; > + if (use_fifos) > + msg->trace_req.flags |= MSG_TRACE_USE_FIFOS; > + if (do_tsync) > + msg->trace_req.flags |= MSG_TRACE_DO_TSYNC; > + msg->trace_req.flags = htonl(msg->trace_req.flags); > msg->trace_req.argc = htonl(argc); > msg->buf = calloc(args_size, 1); > if (!msg->buf) > @@ -768,13 +805,14 @@ static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv, bool > } > > int tracecmd_msg_send_trace_req(struct tracecmd_msg_handle *msg_handle, > - int argc, char **argv, bool use_fifos) > + int argc, char **argv, bool use_fifos, > + bool do_tsync) > { > struct tracecmd_msg msg; > int ret; > > tracecmd_msg_init(MSG_TRACE_REQ, &msg); > - ret = make_trace_req(&msg, argc, argv, use_fifos); > + ret = make_trace_req(&msg, argc, argv, use_fifos, do_tsync); > if (ret < 0) > return ret; > > @@ -787,7 +825,8 @@ int tracecmd_msg_send_trace_req(struct tracecmd_msg_handle *msg_handle, > * free(argv); > */ > int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle, > - int *argc, char ***argv, bool *use_fifos) > + int *argc, char ***argv, bool *use_fifos, > + bool *do_tsync) > { > struct tracecmd_msg msg; > char *p, *buf_end, **args; > @@ -840,6 +879,7 @@ int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle, > *argc = nr_args; > *argv = args; > *use_fifos = ntohl(msg.trace_req.flags) & MSG_TRACE_USE_FIFOS; > + *do_tsync = ntohl(msg.trace_req.flags) & MSG_TRACE_DO_TSYNC; > > /* > * On success we're passing msg.buf to the caller through argv[0] so we > @@ -859,14 +899,136 @@ out: > return ret; > } > > +int tracecmd_msg_rcv_time_sync(struct tracecmd_msg_handle *msg_handle, > + struct tracecmd_clock_sync *clock_context, > + long long *offset, long long *timestamp) > +{ > + struct tracecmd_time_sync_event event; > + unsigned int remote_cid = 0; > + struct tracecmd_msg msg; > + int cpu_pid, ret; > + > + if (clock_context == NULL || msg_handle == NULL) > + return 0; > + > + if (offset) > + *offset = 0; > + > + ret = tracecmd_msg_recv(msg_handle->fd, &msg); > + if (ret < 0 || ntohl(msg.hdr.cmd) == MSG_TSYNC_STOP) > + return 0; > + if (ntohl(msg.hdr.cmd) != MSG_TSYNC_START) { > + handle_unexpected_msg(msg_handle, &msg); > + return 0; > + } > + > + tracecmd_clock_get_peer(clock_context, &remote_cid, NULL); > + tracecmd_msg_init(MSG_TSYNC_START, &msg); > + tracecmd_msg_send(msg_handle->fd, &msg); > + tracecmd_clock_synch_enable(clock_context); > + > + do { > + memset(&event, 0, sizeof(event)); > + ret = tracecmd_msg_recv(msg_handle->fd, &msg); > + if (ret < 0 || ntohl(msg.hdr.cmd) == MSG_TSYNC_STOP) > + break; > + if (ntohl(msg.hdr.cmd) != MSG_TSYNC_PROBE) { > + handle_unexpected_msg(msg_handle, &msg); > + break; > + } > + ret = tracecmd_msg_recv(msg_handle->fd, &msg); > + if (ret < 0 || ntohl(msg.hdr.cmd) == MSG_TSYNC_STOP) > + break; > + if (ntohl(msg.hdr.cmd) != MSG_TSYNC_REQ) { > + handle_unexpected_msg(msg_handle, &msg); > + break; > + } > + /* Get kvm event related to the corresponding VCPU context */ > + cpu_pid = get_guest_vcpu_pid(remote_cid, ntohs(msg.ts_req.cpu)); > + tracecmd_clock_find_event(clock_context, cpu_pid, &event); > + tracecmd_msg_init(MSG_TSYNC_RESP, &msg); > + msg.ts_resp.time = htonll(event.ts); > + tracecmd_msg_send(msg_handle->fd, &msg); > + } while (true); > + > + tracecmd_clock_synch_disable(clock_context); > + > + if (ret >= 0 && ntohl(msg.hdr.cmd) == MSG_TSYNC_STOP) { > + if (offset) > + *offset = ntohll(msg.ts_stop.offset); > + if (timestamp) > + *timestamp = ntohll(msg.ts_stop.timestamp); > + } > + > + msg_free(&msg); > + return 0; > +} > + > +int tracecmd_msg_snd_time_sync(struct tracecmd_msg_handle *msg_handle, > + struct tracecmd_clock_sync *clock_context, > + long long *offset, long long *timestamp) > +{ > + struct tracecmd_time_sync_event event; > + int sync_loop = TSYNC_TRIES; > + struct tracecmd_msg msg; > + int ret; > + > + if (clock_context == NULL || msg_handle == NULL) > + return 0; > + > + tracecmd_msg_init(MSG_TSYNC_START, &msg); > + tracecmd_msg_send(msg_handle->fd, &msg); > + ret = tracecmd_msg_recv(msg_handle->fd, &msg); > + if (ret < 0 || ntohl(msg.hdr.cmd) != MSG_TSYNC_START) > + return 0; > + tracecmd_clock_synch_calc_reset(clock_context); > + tracecmd_clock_synch_enable(clock_context); > + > + do { > + tracecmd_msg_init(MSG_TSYNC_PROBE, &msg); > + tracecmd_msg_send(msg_handle->fd, &msg); > + /* Get the ts and CPU of the sent event */ > + ret = tracecmd_clock_find_event(clock_context, -1, &event); > + tracecmd_msg_init(MSG_TSYNC_REQ, &msg); > + msg.ts_req.cpu = htons(event.cpu); > + tracecmd_msg_send(msg_handle->fd, &msg); > + memset(&msg, 0, sizeof(msg)); > + ret = tracecmd_msg_recv(msg_handle->fd, &msg); > + if (ret < 0) > + break; > + if (ntohl(msg.hdr.cmd) != MSG_TSYNC_RESP) { > + handle_unexpected_msg(msg_handle, &msg); > + break; > + } > + tracecmd_clock_synch_calc_probe(clock_context, > + event.ts, > + htonll(msg.ts_resp.time)); > + } while (--sync_loop); > + > + tracecmd_clock_synch_disable(clock_context); > + tracecmd_clock_synch_calc(clock_context, offset, timestamp); > + tracecmd_msg_init(MSG_TSYNC_STOP, &msg); > + msg.ts_stop.offset = htonll(*offset); > + msg.ts_stop.timestamp = htonll(*timestamp); > + tracecmd_msg_send(msg_handle->fd, &msg); > + > + msg_free(&msg); > + return 0; > +} > + > static int make_trace_resp(struct tracecmd_msg *msg, int page_size, int nr_cpus, > - unsigned int *ports, bool use_fifos) > + unsigned int *ports, bool use_fifos, bool do_tsync) > { > int ports_size = nr_cpus * sizeof(*msg->port_array); > int i; > > msg->hdr.size = htonl(ntohl(msg->hdr.size) + ports_size); > - msg->trace_resp.flags = use_fifos ? htonl(MSG_TRACE_USE_FIFOS) : htonl(0); > + msg->trace_resp.flags = 0; > + if (use_fifos) > + msg->trace_resp.flags |= MSG_TRACE_USE_FIFOS; > + if (do_tsync) > + msg->trace_resp.flags |= MSG_TRACE_DO_TSYNC; > + msg->trace_resp.flags = htonl(msg->trace_resp.flags); > msg->trace_resp.cpus = htonl(nr_cpus); > msg->trace_resp.page_size = htonl(page_size); > > @@ -882,13 +1044,14 @@ static int make_trace_resp(struct tracecmd_msg *msg, int page_size, int nr_cpus, > > int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle, > int nr_cpus, int page_size, > - unsigned int *ports, bool use_fifos) > + unsigned int *ports, bool use_fifos, > + bool do_tsync) > { > struct tracecmd_msg msg; > int ret; > > tracecmd_msg_init(MSG_TRACE_RESP, &msg); > - ret = make_trace_resp(&msg, page_size, nr_cpus, ports, use_fifos); > + ret = make_trace_resp(&msg, page_size, nr_cpus, ports, use_fifos, do_tsync); > if (ret < 0) > return ret; > > @@ -897,7 +1060,8 @@ int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle, > > int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle, > int *nr_cpus, int *page_size, > - unsigned int **ports, bool *use_fifos) > + unsigned int **ports, bool *use_fifos, > + bool *do_tsync) > { > struct tracecmd_msg msg; > ssize_t buf_len; > @@ -920,6 +1084,7 @@ int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle, > } > > *use_fifos = ntohl(msg.trace_resp.flags) & MSG_TRACE_USE_FIFOS; > + *do_tsync = ntohl(msg.trace_resp.flags) & MSG_TRACE_DO_TSYNC; > *nr_cpus = ntohl(msg.trace_resp.cpus); > *page_size = ntohl(msg.trace_resp.page_size); > *ports = calloc(*nr_cpus, sizeof(**ports)); > diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c > index b4ea40f..c923093 100644 > --- a/tracecmd/trace-record.c > +++ b/tracecmd/trace-record.c > @@ -210,6 +210,8 @@ struct common_record_context { > char *date2ts; > char *max_graph_depth; > int data_flags; > + int time_shift_count; > + struct tracecmd_option *time_shift_option; > > int record_all; > int total_disable; > @@ -649,11 +651,20 @@ static void tell_guests_to_stop(void) > for_all_instances(instance) { > if (is_guest(instance)) { > tracecmd_msg_send_close_msg(instance->msg_handle); > - tracecmd_msg_handle_close(instance->msg_handle); > } > } > } > > +static void close_guests_handle(void) > +{ > + struct buffer_instance *instance; > + > + for_all_instances(instance) { > + if (is_guest(instance)) > + tracecmd_msg_handle_close(instance->msg_handle); > + } > +} > + > static void stop_threads(enum trace_type type) > { > struct timeval tv = { 0, 0 }; > @@ -3355,6 +3366,7 @@ static void connect_to_agent(struct buffer_instance *instance) > unsigned int *ports; > int i, *fds = NULL; > bool use_fifos = false; > + bool do_tsync; > > if (!no_fifos) { > nr_fifos = open_guest_fifos(instance->name, &fds); > @@ -3366,17 +3378,19 @@ static void connect_to_agent(struct buffer_instance *instance) > die("Failed to connect to vsocket @%d:%d", > instance->cid, instance->port); > > + do_tsync = tracecmd_time_sync_check(); > + > msg_handle = tracecmd_msg_handle_alloc(sd, 0); > if (!msg_handle) > die("Failed to allocate message handle"); > > ret = tracecmd_msg_send_trace_req(msg_handle, instance->argc, > - instance->argv, use_fifos); > + instance->argv, use_fifos, do_tsync); > if (ret < 0) > die("Failed to send trace request"); > > ret = tracecmd_msg_recv_trace_resp(msg_handle, &nr_cpus, &page_size, > - &ports, &use_fifos); > + &ports, &use_fifos, &do_tsync); > if (ret < 0) > die("Failed to receive trace response"); Do we want to warn users that there won't be synchronization if do_tsync is false here? > > @@ -3397,10 +3411,13 @@ static void connect_to_agent(struct buffer_instance *instance) > } > > instance->use_fifos = use_fifos; > + instance->do_tsync = do_tsync; > instance->cpu_count = nr_cpus; > > /* the msg_handle now points to the guest fd */ > instance->msg_handle = msg_handle; > + > + sync_time_with_guest_v3(instance); > } > > static void setup_guest(struct buffer_instance *instance) > @@ -3425,10 +3442,13 @@ static void setup_guest(struct buffer_instance *instance) > close(fd); > } > > -static void setup_agent(struct buffer_instance *instance, struct common_record_context *ctx) > +static void setup_agent(struct buffer_instance *instance, > + struct common_record_context *ctx) > { > struct tracecmd_output *network_handle; > > + sync_time_with_host_v3(instance); > + > network_handle = tracecmd_create_init_fd_msg(instance->msg_handle, > listed_events); > add_options(network_handle, ctx); > @@ -5603,6 +5623,42 @@ static bool has_local_instances(void) > return false; > } > > +//#define TSYNC_DEBUG > +static void write_guest_time_shift(struct buffer_instance *instance) > +{ > + struct tracecmd_output *handle; > + struct iovec vector[3]; > + char *file; > + int fd; > + > + if (!instance->time_sync_count) > + return; > + > + file = get_guest_file(output_file, instance->name); > + fd = open(file, O_RDWR); > + if (fd < 0) > + die("error opening %s", file); > + put_temp_file(file); > + handle = tracecmd_get_output_handle_fd(fd); > + vector[0].iov_len = sizeof(instance->time_sync_count); > + vector[0].iov_base = &instance->time_sync_count; > + vector[1].iov_len = sizeof(long long) * instance->time_sync_count; > + vector[1].iov_base = instance->time_sync_ts; > + vector[2].iov_len = sizeof(long long) * instance->time_sync_count; > + vector[2].iov_base = instance->time_sync_offsets; > + tracecmd_add_option_v(handle, TRACECMD_OPTION_TIME_SHIFT, vector, 3); > + tracecmd_append_options(handle); > + tracecmd_output_close(handle); > +#ifdef TSYNC_DEBUG > + if (instance->time_sync_count > 1) > + printf("\n\rDetected %lld ns ts offset drift in %lld ns for guest %s\n\r", > + instance->time_sync_offsets[instance->time_sync_count-1] - > + instance->time_sync_offsets[0], > + instance->time_sync_ts[instance->time_sync_count-1]- > + instance->time_sync_ts[0], instance->name); > +#endif > +} > + > /* > * This function contains common code for the following commands: > * record, start, stream, profile. > @@ -5722,6 +5778,20 @@ static void record_trace(int argc, char **argv, > if (!latency) > wait_threads(); > > + if (ctx->instance && is_agent(ctx->instance)) { > + sync_time_with_host_v3(ctx->instance); > + tracecmd_clock_context_free(ctx->instance); > + } else { > + for_all_instances(instance) { > + if (is_guest(instance)) { > + sync_time_with_guest_v3(instance); > + write_guest_time_shift(instance); > + tracecmd_clock_context_free(instance); > + } > + } > + } > + close_guests_handle(); > + > if (IS_RECORD(ctx)) { > record_data(ctx); > delete_thread_data(); > @@ -5860,7 +5930,7 @@ void trace_record(int argc, char **argv) > int trace_record_agent(struct tracecmd_msg_handle *msg_handle, > int cpus, int *fds, > int argc, char **argv, > - bool use_fifos) > + bool use_fifos, bool do_tsync) > { > struct common_record_context ctx; > char **argv_plus; > @@ -5887,6 +5957,7 @@ int trace_record_agent(struct tracecmd_msg_handle *msg_handle, > > ctx.instance->fds = fds; > ctx.instance->use_fifos = use_fifos; > + ctx.instance->do_tsync = do_tsync; > ctx.instance->flags |= BUFFER_FL_AGENT; > ctx.instance->msg_handle = msg_handle; > msg_handle->version = V3_PROTOCOL; > diff --git a/tracecmd/trace-timesync.c b/tracecmd/trace-timesync.c > new file mode 100644 > index 0000000..3f648c9 > --- /dev/null > +++ b/tracecmd/trace-timesync.c > @@ -0,0 +1,824 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2019, VMware, Tzvetomir Stoyanov <tstoyanov@vmware.com> > + * > + */ > + > +#include <fcntl.h> > +#include <stdlib.h> > +#include <unistd.h> > +#include <arpa/inet.h> > +#include <linux/vm_sockets.h> > +#include "trace-local.h" > + > +//#define TSYNC_DEBUG > + > +static int clock_sync_x86_host_init(struct tracecmd_clock_sync *clock_context); > +static int clock_sync_x86_host_free(struct tracecmd_clock_sync *clock_context); > +static int clock_sync_x86_host_find_events(struct tracecmd_clock_sync *clock, > + int cpu, struct tracecmd_time_sync_event *event); > +static int clock_sync_x86_guest_init(struct tracecmd_clock_sync *clock_context); > +static int clock_sync_x86_guest_free(struct tracecmd_clock_sync *clock_context); > +static int clock_sync_x86_guest_find_events(struct tracecmd_clock_sync *clock, > + int pid, > + struct tracecmd_time_sync_event *event); > + > +struct tracecmd_event_descr { > + char *system; > + char *name; > +}; > + > +struct tracecmd_ftrace_param { > + char *file; > + char *set; > + char *reset; > +}; > + > +enum clock_sync_context { > + CLOCK_KVM_X86_VSOCK_HOST, > + CLOCK_KVM_X86_VSOCK_GUEST, > + CLOCK_CONTEXT_MAX, > +}; > + > +struct tracecmd_clock_sync { > + enum clock_sync_context clock_context_id; > + struct tracecmd_ftrace_param *ftrace_params; > + struct tracecmd_time_sync_event *events; > + int events_count; > + struct tep_handle *tep; > + struct buffer_instance *vinst; > + > + int probes_count; > + int bad_probes; > + int probes_size; > + long long *times; > + long long *offsets; > + long long offset_av; > + long long offset_min; > + long long offset_max; > + int debug_fd; > + > + unsigned int local_cid; > + unsigned int local_port; > + unsigned int remote_cid; > + unsigned int remote_port; > +}; > + > +struct { > + const char *systems[3]; > + struct tracecmd_ftrace_param ftrace_params[5]; > + struct tracecmd_event_descr events[3]; > + int (*clock_sync_init)(struct tracecmd_clock_sync *clock_context); > + int (*clock_sync_free)(struct tracecmd_clock_sync *clock_context); > + int (*clock_sync_find_events)(struct tracecmd_clock_sync *clock_context, > + int pid, > + struct tracecmd_time_sync_event *event); > + int (*clock_sync_load)(struct tracecmd_clock_sync *clock_context); > + int (*clock_sync_unload)(struct tracecmd_clock_sync *clock_context); > +} static clock_sync[CLOCK_CONTEXT_MAX] = { > + { /* CLOCK_KVM_X86_VSOCK_HOST */ > + .systems = {"vsock", "ftrace", NULL}, > + .ftrace_params = { > + {"set_ftrace_filter", "vmx_read_l1_tsc_offset\nsvm_read_l1_tsc_offset", "\0"}, > + {"current_tracer", "function", "nop"}, > + {"events/vsock/virtio_transport_recv_pkt/enable", "1", "0"}, > + {"events/vsock/virtio_transport_recv_pkt/filter", NULL, "\0"}, > + {NULL, NULL, NULL} }, > + .events = { > + {.system = "ftrace", .name = "function"}, > + {.system = "vsock", .name = "virtio_transport_recv_pkt"}, > + {.system = NULL, .name = NULL} }, > + clock_sync_x86_host_init, > + clock_sync_x86_host_free, > + clock_sync_x86_host_find_events, > + }, > + > + { /* CLOCK_KVM_X86_VSOCK_GUEST */ > + .systems = { "vsock", "ftrace", NULL}, > + .ftrace_params = { > + {"set_ftrace_filter", "vp_notify", "\0"}, > + {"current_tracer", "function", "nop"}, > + {"events/vsock/virtio_transport_alloc_pkt/enable", "1", "0"}, > + {"events/vsock/virtio_transport_alloc_pkt/filter", NULL, "\0"}, > + {NULL, NULL, NULL}, > + }, > + .events = { > + {.system = "vsock", .name = "virtio_transport_alloc_pkt"}, > + {.system = "ftrace", .name = "function"}, > + {.system = NULL, .name = NULL} > + }, > + clock_sync_x86_guest_init, > + clock_sync_x86_guest_free, > + clock_sync_x86_guest_find_events, > + } > +}; > + > +static int clock_sync_x86_host_init(struct tracecmd_clock_sync *clock_context) > +{ > + char vsock_filter[255]; > + > + snprintf(vsock_filter, 255, > + "src_cid==%u && src_port==%u && dst_cid==%u && dst_port==%u && len!=0", > + clock_context->remote_cid, clock_context->remote_port, > + clock_context->local_cid, clock_context->local_port); > + > + clock_context->ftrace_params[3].set = strdup(vsock_filter); > + return 1; > +} > + > +static int clock_sync_x86_host_free(struct tracecmd_clock_sync *clock_context) > +{ > + free(clock_context->ftrace_params[3].set); > + clock_context->ftrace_params[3].set = NULL; > + return 1; > +} > + > +static int clock_sync_x86_guest_init(struct tracecmd_clock_sync *clock_context) > +{ > + char vsock_filter[255]; > + > + snprintf(vsock_filter, 255, > + "src_cid==%u && src_port==%u && dst_cid==%u && dst_port==%u && len!=0", > + clock_context->local_cid, clock_context->local_port, > + clock_context->remote_cid, clock_context->remote_port); > + > + clock_context->ftrace_params[3].set = strdup(vsock_filter); > + return 1; > +} > + > +static int clock_sync_x86_guest_free(struct tracecmd_clock_sync *clock_context) > +{ > + free(clock_context->ftrace_params[3].set); > + clock_context->ftrace_params[3].set = NULL; > + return 1; > +} > + > +static int > +get_events_in_page(struct tep_handle *tep, void *page, > + int size, int cpu, struct tracecmd_time_sync_event **events, > + int *events_count, int *events_size) > +{ > + struct tracecmd_time_sync_event *events_array = NULL; > + struct tep_record *last_record = NULL; > + struct tep_event *event = NULL; > + struct tep_record *record; > + int id, cnt = 0; > + > + if (size <= 0) > + return 0; > + > + if (*events == NULL) { > + *events = malloc(10*sizeof(struct tracecmd_time_sync_event)); > + *events_size = 10; > + *events_count = 0; > + } > + > + while (true) { > + event = NULL; > + record = tracecmd_read_page_record(tep, page, size, > + last_record); > + if (!record) > + break; > + free_record(last_record); > + id = tep_data_type(tep, record); > + event = tep_data_event_from_type(tep, id); > + if (event) { > + if (*events_count >= *events_size) { > + events_array = realloc(*events, > + ((3*(*events_size))/2)* > + sizeof(struct tracecmd_time_sync_event)); > + if (events_array) { > + *events = events_array; > + (*events_size) = (((*events_size)*3)/2); > + } > + } > + > + if (*events_count < *events_size) { > + (*events)[*events_count].ts = record->ts; > + (*events)[*events_count].cpu = cpu; > + (*events)[*events_count].id = id; > + (*events)[*events_count].pid = tep_data_pid(tep, record); > + (*events_count)++; > + cnt++; > + } > + } > + last_record = record; > + } > + free_record(last_record); > + > + return cnt; > +} > + > +static int sync_events_cmp(const void *a, const void *b) > +{ > + const struct tracecmd_time_sync_event *ea = (const struct tracecmd_time_sync_event *)a; > + const struct tracecmd_time_sync_event *eb = (const struct tracecmd_time_sync_event *)b; > + > + if (ea->ts > eb->ts) > + return 1; > + if (ea->ts < eb->ts) > + return -1; > + return 0; > +} > + > +static int find_sync_events(struct tep_handle *pevent, > + struct tracecmd_time_sync_event *recorded, > + int rsize, struct tracecmd_time_sync_event *events) > +{ > + int i = 0, j = 0; > + > + while (i < rsize) { > + if (!events[j].ts && events[j].id == recorded[i].id && > + (events[j].pid < 0 || events[j].pid == recorded[i].pid)) { > + events[j].cpu = recorded[i].cpu; > + events[j].ts = recorded[i].ts; > + j++; > + } else if (j > 0 && events[j-1].id == recorded[i].id && > + (events[j-1].pid < 0 || events[j-1].pid == recorded[i].pid)) { > + events[j-1].cpu = recorded[i].cpu; > + events[j-1].ts = recorded[i].ts; > + } > + i++; > + } > + return j; > +} > + > +//#define TSYNC_RBUFFER_DEBUG > +static int find_raw_events(struct tep_handle *tep, > + struct buffer_instance *instance, > + struct tracecmd_time_sync_event *events) > +{ > + struct tracecmd_time_sync_event *events_array = NULL; > + int events_count = 0; > + int events_size = 0; > + struct dirent *dent; > + unsigned int p_size; > + int ts = 0; > + void *page; > + char *path; > + char *file; > + DIR *dir; > + int cpu; > + int len; > + int fd; > + int r; > + > + p_size = getpagesize(); > +#ifdef TSYNC_RBUFFER_DEBUG > + file = get_instance_file(instance, "trace"); > + if (!file) > + return ts; > + { > + char *buf = NULL; > + FILE *fp; > + size_t n; > + int r; > + > + printf("Events:\n\r"); > + fp = fopen(file, "r"); > + while ((r = getline(&buf, &n, fp)) >= 0) { > + > + if (buf[0] != '#') > + printf("%s", buf); > + free(buf); > + buf = NULL; > + } > + fclose(fp); > + } > + tracecmd_put_tracing_file(file); > +#endif /* TSYNC_RBUFFER_DEBUG */ > + path = get_instance_file(instance, "per_cpu"); > + if (!path) > + return ts; > + > + dir = opendir(path); > + if (!dir) > + goto out; > + > + len = strlen(path); > + file = malloc(len + strlen("trace_pipe_raw") + 32); > + page = malloc(p_size); > + if (!file || !page) > + die("Failed to allocate time_stamp info"); > + > + while ((dent = readdir(dir))) { > + > + const char *name = dent->d_name; > + > + if (strncmp(name, "cpu", 3) != 0) > + continue; > + cpu = atoi(&name[3]); > + sprintf(file, "%s/%s/trace_pipe_raw", path, name); > + fd = open(file, O_RDONLY | O_NONBLOCK); > + if (fd < 0) > + continue; > + do { > + r = read(fd, page, p_size); > + if (r > 0) { > + get_events_in_page(tep, page, r, cpu, > + &events_array, &events_count, > + &events_size); > + } > + } while (r > 0); > + close(fd); > + } We don't need to walk tracefs here since we already know how many cpus we have at this point and can be open the trace_pipe_raw files directly. > + qsort(events_array, events_count, sizeof(*events_array), sync_events_cmp); > + r = find_sync_events(tep, events_array, events_count, events); > +#ifdef TSYNC_RBUFFER_DEBUG > + len = 0; > + while (events[len].id) { > + printf("Found %d @ cpu %d: %lld pid %d\n\r", > + events[len].id, events[len].cpu, > + events[len].ts, events[len].pid); > + len++; > + } > +#endif > + > + free(events_array); > + free(file); > + free(page); > + closedir(dir); > + > + out: > + tracecmd_put_tracing_file(path); > + return r; > +} > + > +static int clock_sync_x86_host_find_events(struct tracecmd_clock_sync *clock, > + int pid, > + struct tracecmd_time_sync_event *event) > +{ > + int ret; > + > + clock->events[0].pid = pid; > + ret = find_raw_events(clock->tep, clock->vinst, clock->events); > + event->ts = clock->events[0].ts; > + event->cpu = clock->events[0].cpu; > + return ret; > + > +} > + > +static int clock_sync_x86_guest_find_events(struct tracecmd_clock_sync *clock, > + int pid, > + struct tracecmd_time_sync_event *event) > +{ > + int ret; > + > + ret = find_raw_events(clock->tep, clock->vinst, clock->events); > + if (ret != clock->events_count) > + return 0; > + event->ts = clock->events[1].ts; > + event->cpu = clock->events[0].cpu; > + return 1; > + > +} > + > +static void tracecmd_clock_sync_reset(struct tracecmd_clock_sync *clock_context) > +{ > + int i = 0; > + > + while (clock_context->events[i].id) { > + clock_context->events[i].cpu = 0; > + clock_context->events[i].ts = 0; > + clock_context->events[i].pid = -1; pid 0 is the scheduler so it should be a sensible value for unset pids. > + i++; > + } > +} > + > +int tracecmd_clock_find_event(struct tracecmd_clock_sync *clock, int pid, > + struct tracecmd_time_sync_event *event) > +{ > + int ret = 0; > + int id; > + > + if (clock == NULL || > + clock->clock_context_id >= CLOCK_CONTEXT_MAX) > + return 0; > + > + id = clock->clock_context_id; > + if (clock_sync[id].clock_sync_find_events) > + ret = clock_sync[id].clock_sync_find_events(clock, pid, event); > + > + tracecmd_clock_sync_reset(clock); > + return ret; > +} > + > +static void clock_context_copy(struct tracecmd_clock_sync *clock_context, > + struct tracecmd_ftrace_param *params, > + struct tracecmd_event_descr *events) > +{ > + int i; > + > + i = 0; > + while (params[i].file) > + i++; > + clock_context->ftrace_params = calloc(i+1, sizeof(struct tracecmd_ftrace_param)); > + i = 0; > + while (params[i].file) { > + clock_context->ftrace_params[i].file = strdup(params[i].file); > + if (params[i].set) > + clock_context->ftrace_params[i].set = strdup(params[i].set); > + if (params[i].reset) > + clock_context->ftrace_params[i].reset = strdup(params[i].reset); > + i++; > + } > + > + i = 0; > + while (events[i].name) > + i++; > + clock_context->events = calloc(i+1, sizeof(struct tracecmd_time_sync_event)); > + clock_context->events_count = i; > +} > + > +void trace_instance_reset(struct buffer_instance *vinst) > +{ > + write_instance_file(vinst, "trace", "\0", NULL); > +} > + > +static struct buffer_instance * > +clock_synch_create_instance(const char *clock, unsigned int cid) > +{ > + struct buffer_instance *vinst; > + char inst_name[256]; > + > + snprintf(inst_name, 256, "clock_synch-%d", cid); > + > + vinst = create_instance(strdup(inst_name)); > + tracecmd_init_instance(vinst); > + vinst->cpu_count = tracecmd_local_cpu_count(); > + tracecmd_make_instance(vinst); > + trace_instance_reset(vinst); > + if (clock) > + vinst->clock = strdup(clock); > + tracecmd_set_clock(vinst); > + return vinst; > +} > + > +static struct tep_handle *clock_synch_get_tep(struct buffer_instance *instance, > + const char * const *systems) > +{ > + struct tep_handle *tep = NULL; > + char *path; > + > + path = get_instance_dir(instance); > + tep = tracecmd_local_events_system(path, systems); > + tracecmd_put_tracing_file(path); > + > + tep_set_file_bigendian(tep, tracecmd_host_bigendian()); > + tep_set_host_bigendian(tep, tracecmd_host_bigendian()); > + > + return tep; > +} > + > +static void get_vsocket_params(int fd, unsigned int *lcid, unsigned int *lport, > + unsigned int *rcid, unsigned int *rport) > +{ > + struct sockaddr_vm addr; > + socklen_t addr_len = sizeof(addr); > + > + if (lcid || lport) { > + memset(&addr, 0, sizeof(addr)); > + if (getsockname(fd, (struct sockaddr *)&addr, &addr_len)) > + return; > + if (addr.svm_family != AF_VSOCK) > + return; > + if (lport) > + *lport = addr.svm_port; > + if (lcid) > + *lcid = addr.svm_cid; > + } > + > + if (rcid || rport) { > + memset(&addr, 0, sizeof(addr)); > + addr_len = sizeof(addr); > + if (getpeername(fd, (struct sockaddr *)&addr, &addr_len)) > + return; > + if (addr.svm_family != AF_VSOCK) > + return; > + > + if (rport) > + *rport = addr.svm_port; > + if (rcid) > + *rcid = addr.svm_cid; > + } > +} > + > +struct tracecmd_clock_sync* > +tracecmd_clock_context_new(struct tracecmd_msg_handle *msg_handle, > + const char *clock_str, > + enum clock_sync_context id) > +{ > + struct tracecmd_clock_sync *clock_context; > + struct tep_event *event; > + unsigned int i = 0; > + > + switch (id) { > +#ifdef VSOCK > + case CLOCK_KVM_X86_VSOCK_HOST: > + case CLOCK_KVM_X86_VSOCK_GUEST: > + break; > +#endif > + default: /* not supported clock sync context */ > + return NULL; > + } > + > + if (id >= CLOCK_CONTEXT_MAX || NULL == msg_handle) > + return NULL; > + clock_context = calloc(1, sizeof(struct tracecmd_clock_sync)); > + if (!clock_context) > + return NULL; > + clock_context->clock_context_id = id; > + clock_context_copy(clock_context, > + clock_sync[id].ftrace_params, clock_sync[id].events); > + > + get_vsocket_params(msg_handle->fd, > + &(clock_context->local_cid), > + &(clock_context->local_port), > + &(clock_context->remote_cid), > + &(clock_context->remote_port)); > + > + if (clock_sync[id].clock_sync_init) > + clock_sync[id].clock_sync_init(clock_context); > + > + clock_context->vinst = clock_synch_create_instance(clock_str, clock_context->remote_cid); > + clock_context->tep = clock_synch_get_tep(clock_context->vinst, > + clock_sync[id].systems); > + i = 0; > + while (clock_sync[id].events[i].name) { > + event = tep_find_event_by_name(clock_context->tep, > + clock_sync[id].events[i].system, > + clock_sync[id].events[i].name); > + if (!event) > + break; > + clock_context->events[i].id = event->id; > + i++; > + } > +#ifdef TSYNC_DEBUG > + clock_context->debug_fd = -1; > +#endif > + > + return clock_context; > + > +} > + > +void tracecmd_clock_context_free(struct buffer_instance *instance) > +{ > + int i; > + > + if (instance->clock_sync == NULL || > + instance->clock_sync->clock_context_id >= CLOCK_CONTEXT_MAX) > + return; > + if (clock_sync[instance->clock_sync->clock_context_id].clock_sync_free) > + clock_sync[instance->clock_sync->clock_context_id].clock_sync_free(instance->clock_sync); > + > + i = 0; > + while (instance->clock_sync->ftrace_params[i].file) { > + free(instance->clock_sync->ftrace_params[i].file); > + free(instance->clock_sync->ftrace_params[i].set); > + free(instance->clock_sync->ftrace_params[i].reset); > + i++; > + } > + free(instance->clock_sync->ftrace_params); > + free(instance->clock_sync->events); > + tracecmd_remove_instance(instance->clock_sync->vinst); > + /* todo: clean up the instance */ > + tep_free(instance->clock_sync->tep); > + > + free(instance->clock_sync->offsets); > + free(instance->clock_sync->times); > +#ifdef TSYNC_DEBUG > + if (instance->clock_sync->debug_fd >= 0) { > + close (instance->clock_sync->debug_fd); > + instance->clock_sync->debug_fd = -1; > + } > +#endif > + free(instance->clock_sync); > + instance->clock_sync = NULL; > +} > + > +bool tracecmd_time_sync_check(void) > +{ > +#ifdef VSOCK > + return true; > +#endif > + return false; > +} > + > +void sync_time_with_host_v3(struct buffer_instance *instance) > +{ > + long long timestamp = 0; > + long long offset = 0; > + > + if (!instance->do_tsync) > + return; > + > + if (instance->clock_sync == NULL) > + instance->clock_sync = tracecmd_clock_context_new(instance->msg_handle, > + instance->clock, CLOCK_KVM_X86_VSOCK_GUEST); > + > + tracecmd_msg_snd_time_sync(instance->msg_handle, instance->clock_sync, > + &offset, ×tamp); > +} > + > +void sync_time_with_guest_v3(struct buffer_instance *instance) > +{ > + long long timestamp = 0; > + long long offset = 0; > + long long *sync_array_ts; > + long long *sync_array_offs; > + > + if (!instance->do_tsync) > + return; > + > + if (instance->clock_sync == NULL) > + instance->clock_sync = tracecmd_clock_context_new(instance->msg_handle, > + top_instance.clock, CLOCK_KVM_X86_VSOCK_HOST); > + > + tracecmd_msg_rcv_time_sync(instance->msg_handle, > + instance->clock_sync, &offset, ×tamp); > + > + sync_array_ts = realloc(instance->time_sync_ts, > + (instance->time_sync_count+1)*sizeof(long long)); > + sync_array_offs = realloc(instance->time_sync_offsets, > + (instance->time_sync_count+1)*sizeof(long long)); > + > + if (sync_array_ts && sync_array_offs) { > + sync_array_ts[instance->time_sync_count] = timestamp; > + sync_array_offs[instance->time_sync_count] = offset; > + instance->time_sync_count++; > + instance->time_sync_ts = sync_array_ts; > + instance->time_sync_offsets = sync_array_offs; > + > + } else { > + free(sync_array_ts); > + free(sync_array_offs); > + } > +} > + > +static void set_clock_synch_events(struct buffer_instance *instance, > + struct tracecmd_ftrace_param *params, > + bool enable) > +{ > + int i = 0; > + > + if (!enable) > + write_tracing_on(instance, 0); > + > + while (params[i].file) { > + if (enable && params[i].set) { > + write_instance_file(instance, params[i].file, > + params[i].set, NULL); > + } > + if (!enable && params[i].reset) > + write_instance_file(instance, params[i].file, > + params[i].reset, NULL); > + i++; > + } > + > + if (enable) > + write_tracing_on(instance, 1); > +} > + > +int tracecmd_clock_get_peer(struct tracecmd_clock_sync *clock_context, > + unsigned int *remote_cid, unsigned int *remote_port) > +{ > + if (!clock_context) > + return 0; > + if (remote_cid) > + *remote_cid = clock_context->remote_cid; > + if (remote_port) > + *remote_cid = clock_context->remote_port; > + return 1; > +} > + > +void tracecmd_clock_synch_enable(struct tracecmd_clock_sync *clock_context) > +{ > + set_clock_synch_events(clock_context->vinst, > + clock_context->ftrace_params, true); > +} > + > +void tracecmd_clock_synch_disable(struct tracecmd_clock_sync *clock_context) > +{ > + set_clock_synch_events(clock_context->vinst, > + clock_context->ftrace_params, false); > +} > + > +int tracecmd_clock_synch_calc(struct tracecmd_clock_sync *clock_context, > + long long *offset_ret, long long *time_ret) > +{ > + int i, j = 0; > + long long av, tresch, offset = 0, time = 0; > + > + if (!clock_context || !clock_context->probes_count) > + return 0; > + av = clock_context->offset_av / clock_context->probes_count; > + tresch = (long long)((clock_context->offset_max - clock_context->offset_min)/10); > + > + for (i = 0; i < clock_context->probes_count; i++) { > + /* filter the offsets with deviation up to 10% */ > + if (llabs(clock_context->offsets[i] - av) < tresch) { > + offset += clock_context->offsets[i]; > + j++; > + } > + } > + if (j) > + offset /= (long long)j; > + > + tresch = 0; > + for (i = 0; i < clock_context->probes_count; i++) { > + if ((!tresch || tresch > llabs(offset-clock_context->offsets[i]))) { > + tresch = llabs(offset-clock_context->offsets[i]); > + time = clock_context->times[i]; > + } > + } > + if (offset_ret) > + *offset_ret = offset; > + if (time_ret) > + *time_ret = time; > +#ifdef TSYNC_DEBUG > + printf("\n calculated offset: %lld, %d/%d probes\n\r", > + *offset_ret, clock_context->probes_count, > + clock_context->probes_count + clock_context->bad_probes); > +#endif > + return 1; > +} > + > +void tracecmd_clock_synch_calc_reset(struct tracecmd_clock_sync *clock_context) > +{ > + if (!clock_context) > + return; > + > + clock_context->probes_count = 0; > + clock_context->bad_probes = 0; > + clock_context->offset_av = 0; > + clock_context->offset_min = 0; > + clock_context->offset_max = 0; > +#ifdef TSYNC_DEBUG > + if (clock_context->debug_fd >= 0) { > + close(clock_context->debug_fd); > + clock_context->debug_fd = -1; > + } > +#endif > + > +} > + > +void tracecmd_clock_synch_calc_probe(struct tracecmd_clock_sync *clock_context, > + long long ts_local, long long ts_remote) > +{ > + int count; > +#ifdef TSYNC_DEBUG > + char buff[256]; > +#endif > + > + if (!clock_context || !ts_local || !ts_remote) > + return; > + if (!ts_local || !ts_remote) { > + clock_context->bad_probes++; > + return; > + } > + > + if (!clock_context->offsets && !clock_context->times) { > + clock_context->offsets = calloc(10, sizeof(long long)); > + clock_context->times = calloc(10, sizeof(long long)); > + clock_context->probes_size = 10; > + } > + > + if (clock_context->probes_size == clock_context->probes_count) { > + clock_context->probes_size = (3*clock_context->probes_size)/2; > + clock_context->offsets = realloc(clock_context->offsets, > + clock_context->probes_size * > + sizeof(long long)); > + clock_context->times = realloc(clock_context->times, > + clock_context->probes_size* > + sizeof(long long)); > + } > + > + if (!clock_context->offsets || !clock_context->times) { > + clock_context->probes_size = 0; > + tracecmd_clock_synch_calc_reset(clock_context); > + return; > + } > +#ifdef TSYNC_DEBUG > + if (clock_context->debug_fd < 0) { > + sprintf(buff, "s-cid%d.txt", clock_context->remote_cid); > + clock_context->debug_fd = open(buff, O_CREAT|O_WRONLY|O_TRUNC, 0644); > + } > +#endif > + count = clock_context->probes_count; > + clock_context->probes_count++; > + clock_context->offsets[count] = ts_remote - ts_local; > + clock_context->times[count] = ts_local; > + clock_context->offset_av += clock_context->offsets[count]; > + > + if (!clock_context->offset_min || > + clock_context->offset_min > llabs(clock_context->offsets[count])) > + clock_context->offset_min = llabs(clock_context->offsets[count]); > + if (!clock_context->offset_max || > + clock_context->offset_max < llabs(clock_context->offsets[count])) > + clock_context->offset_max = llabs(clock_context->offsets[count]); > +#ifdef TSYNC_DEBUG > + sprintf(buff, "%lld %lld\n", ts_local, ts_remote); > + write(clock_context->debug_fd, buff, strlen(buff)); > +#endif > + > +} > -- > 2.20.1 >
diff --git a/include/trace-cmd/trace-cmd.h b/include/trace-cmd/trace-cmd.h index 5552396..905f3ec 100644 --- a/include/trace-cmd/trace-cmd.h +++ b/include/trace-cmd/trace-cmd.h @@ -341,16 +341,29 @@ bool tracecmd_msg_done(struct tracecmd_msg_handle *msg_handle); void tracecmd_msg_set_done(struct tracecmd_msg_handle *msg_handle); int tracecmd_msg_send_trace_req(struct tracecmd_msg_handle *msg_handle, - int argc, char **argv, bool use_fifos); + int argc, char **argv, bool use_fifos, + bool do_tsync); int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle, - int *argc, char ***argv, bool *use_fifos); + int *argc, char ***argv, bool *use_fifos, + bool *do_tsync); int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle, int nr_cpus, int page_size, - unsigned int *ports, bool use_fifos); + unsigned int *ports, bool use_fifos, + bool do_tsync); int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle, int *nr_cpus, int *page_size, - unsigned int **ports, bool *use_fifos); + unsigned int **ports, bool *use_fifos, + bool *do_tsync); + +struct tracecmd_clock_sync; + +int tracecmd_msg_rcv_time_sync(struct tracecmd_msg_handle *msg_handle, + struct tracecmd_clock_sync *clock_sync, + long long *offset, long long *timestamp); +int tracecmd_msg_snd_time_sync(struct tracecmd_msg_handle *msg_handle, + struct tracecmd_clock_sync *clock_sync, + long long *offset, long long *timestamp); /* --- Plugin handling --- */ extern struct tep_plugin_option trace_ftrace_options[]; diff --git a/tracecmd/Makefile b/tracecmd/Makefile index d3e3080..8a73bf7 100644 --- a/tracecmd/Makefile +++ b/tracecmd/Makefile @@ -32,6 +32,7 @@ TRACE_CMD_OBJS += trace-list.o TRACE_CMD_OBJS += trace-output.o TRACE_CMD_OBJS += trace-usage.o TRACE_CMD_OBJS += trace-msg.o +TRACE_CMD_OBJS += trace-timesync.o ifeq ($(VSOCK_DEFINED), 1) TRACE_CMD_OBJS += trace-agent.o diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h index 62f5e47..7688b02 100644 --- a/tracecmd/include/trace-local.h +++ b/tracecmd/include/trace-local.h @@ -26,6 +26,7 @@ extern int quiet; typedef unsigned long long u64; struct buffer_instance; +struct tracecmd_clock_sync; /* for local shared information with trace-cmd executable */ @@ -101,7 +102,7 @@ void trace_usage(int argc, char **argv); int trace_record_agent(struct tracecmd_msg_handle *msg_handle, int cpus, int *fds, - int argc, char **argv, bool use_fifos); + int argc, char **argv, bool use_fifos, bool do_tsync); struct hook_list; @@ -214,6 +215,12 @@ struct buffer_instance { int port; int *fds; bool use_fifos; + bool do_tsync; + + struct tracecmd_clock_sync *clock_sync; + int time_sync_count; + long long *time_sync_ts; + long long *time_sync_offsets; }; extern struct buffer_instance top_instance; @@ -235,6 +242,30 @@ void update_first_instance(struct buffer_instance *instance, int topt); void show_instance_file(struct buffer_instance *instance, const char *name); int count_cpus(void); + +struct tracecmd_time_sync_event { + int id; + int cpu; + int pid; + unsigned long long ts; +}; + +int tracecmd_clock_get_peer(struct tracecmd_clock_sync *clock_context, + unsigned int *remote_cid, unsigned int *remote_port); +bool tracecmd_time_sync_check(void); +void tracecmd_clock_context_free(struct buffer_instance *instance); +int tracecmd_clock_find_event(struct tracecmd_clock_sync *clock, int cpu, + struct tracecmd_time_sync_event *event); +void tracecmd_clock_synch_enable(struct tracecmd_clock_sync *clock_context); +void tracecmd_clock_synch_disable(struct tracecmd_clock_sync *clock_context); +void tracecmd_clock_synch_calc_reset(struct tracecmd_clock_sync *clock_context); +void tracecmd_clock_synch_calc_probe(struct tracecmd_clock_sync *clock_context, + long long ts_local, long long ts_remote); +int tracecmd_clock_synch_calc(struct tracecmd_clock_sync *clock_context, + long long *offset_ret, long long *time_ret); +void sync_time_with_host_v3(struct buffer_instance *instance); +void sync_time_with_guest_v3(struct buffer_instance *instance); + void write_tracing_on(struct buffer_instance *instance, int on); char *get_instance_dir(struct buffer_instance *instance); int write_instance_file(struct buffer_instance *instance, diff --git a/tracecmd/trace-agent.c b/tracecmd/trace-agent.c index 7389f72..3d305dc 100644 --- a/tracecmd/trace-agent.c +++ b/tracecmd/trace-agent.c @@ -116,6 +116,7 @@ static void agent_handle(int sd, int nr_cpus, int page_size) char **argv = NULL; int argc = 0; bool use_fifos; + bool do_tsync; int *fds; int ret; @@ -128,7 +129,8 @@ static void agent_handle(int sd, int nr_cpus, int page_size) if (!msg_handle) die("Failed to allocate message handle"); - ret = tracecmd_msg_recv_trace_req(msg_handle, &argc, &argv, &use_fifos); + ret = tracecmd_msg_recv_trace_req(msg_handle, &argc, &argv, + &use_fifos, &do_tsync); if (ret < 0) die("Failed to receive trace request"); @@ -137,13 +139,15 @@ static void agent_handle(int sd, int nr_cpus, int page_size) if (!use_fifos) make_vsocks(nr_cpus, fds, ports); + if (do_tsync) + do_tsync = tracecmd_time_sync_check(); ret = tracecmd_msg_send_trace_resp(msg_handle, nr_cpus, page_size, - ports, use_fifos); + ports, use_fifos, do_tsync); if (ret < 0) die("Failed to send trace response"); - trace_record_agent(msg_handle, nr_cpus, fds, argc, argv, use_fifos); + trace_record_agent(msg_handle, nr_cpus, fds, argc, argv, use_fifos, do_tsync); free(argv[0]); free(argv); diff --git a/tracecmd/trace-msg.c b/tracecmd/trace-msg.c index 065a01e..786e3a9 100644 --- a/tracecmd/trace-msg.c +++ b/tracecmd/trace-msg.c @@ -18,6 +18,7 @@ #include <stdarg.h> #include <string.h> #include <unistd.h> +#include <time.h> #include <arpa/inet.h> #include <sys/types.h> #include <linux/types.h> @@ -26,8 +27,12 @@ #include "trace-local.h" #include "trace-msg.h" +typedef __u16 u16; +typedef __s16 s16; typedef __u32 u32; typedef __be32 be32; +typedef __u64 u64; +typedef __s64 s64; static inline void dprint(const char *fmt, ...) { @@ -50,6 +55,9 @@ static inline void dprint(const char *fmt, ...) unsigned int page_size; +/* Try a few times to get an accurate time sync */ +#define TSYNC_TRIES 300 + struct tracecmd_msg_opt { be32 size; be32 opt_cmd; @@ -76,6 +84,20 @@ struct tracecmd_msg_trace_resp { be32 page_size; } __attribute__((packed)); +struct tracecmd_msg_tsync_stop { + long long offset; + long long timestamp; +} __attribute__((packed)); + +struct tracecmd_msg_tsync_req { + u16 cpu; +} __attribute__((packed)); + +struct tracecmd_msg_tsync_resp { + u64 time; +} __attribute__((packed)); + + struct tracecmd_msg_header { be32 size; be32 cmd; @@ -83,14 +105,19 @@ struct tracecmd_msg_header { } __attribute__((packed)); #define MSG_MAP \ - C(CLOSE, 0, 0), \ - C(TINIT, 1, sizeof(struct tracecmd_msg_tinit)), \ - C(RINIT, 2, sizeof(struct tracecmd_msg_rinit)), \ - C(SEND_DATA, 3, 0), \ - C(FIN_DATA, 4, 0), \ - C(NOT_SUPP, 5, 0), \ - C(TRACE_REQ, 6, sizeof(struct tracecmd_msg_trace_req)), \ - C(TRACE_RESP, 7, sizeof(struct tracecmd_msg_trace_resp)), + C(CLOSE, 0, 0), \ + C(TINIT, 1, sizeof(struct tracecmd_msg_tinit)), \ + C(RINIT, 2, sizeof(struct tracecmd_msg_rinit)), \ + C(SEND_DATA, 3, 0), \ + C(FIN_DATA, 4, 0), \ + C(NOT_SUPP, 5, 0), \ + C(TRACE_REQ, 6, sizeof(struct tracecmd_msg_trace_req)), \ + C(TRACE_RESP, 7, sizeof(struct tracecmd_msg_trace_resp)),\ + C(TSYNC_START, 8, 0), \ + C(TSYNC_STOP, 9, sizeof(struct tracecmd_msg_tsync_stop)),\ + C(TSYNC_PROBE, 11, 0), \ + C(TSYNC_REQ, 11, sizeof(struct tracecmd_msg_tsync_req)), \ + C(TSYNC_RESP, 12, sizeof(struct tracecmd_msg_tsync_resp)), #undef C #define C(a,b,c) MSG_##a = b @@ -120,10 +147,13 @@ static const char *cmd_to_name(int cmd) struct tracecmd_msg { struct tracecmd_msg_header hdr; union { - struct tracecmd_msg_tinit tinit; - struct tracecmd_msg_rinit rinit; - struct tracecmd_msg_trace_req trace_req; - struct tracecmd_msg_trace_resp trace_resp; + struct tracecmd_msg_tinit tinit; + struct tracecmd_msg_rinit rinit; + struct tracecmd_msg_trace_req trace_req; + struct tracecmd_msg_trace_resp trace_resp; + struct tracecmd_msg_tsync_stop ts_stop; + struct tracecmd_msg_tsync_req ts_req; + struct tracecmd_msg_tsync_resp ts_resp; }; union { struct tracecmd_msg_opt *opt; @@ -161,6 +191,7 @@ static int msg_write(int fd, struct tracecmd_msg *msg) enum msg_trace_flags { MSG_TRACE_USE_FIFOS = 1 << 0, + MSG_TRACE_DO_TSYNC = 1 << 1, }; enum msg_opt_command { @@ -744,7 +775,8 @@ error: return ret; } -static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv, bool use_fifos) +static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv, + bool use_fifos, bool do_tsync) { size_t args_size = 0; char *p; @@ -754,7 +786,12 @@ static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv, bool args_size += strlen(argv[i]) + 1; msg->hdr.size = htonl(ntohl(msg->hdr.size) + args_size); - msg->trace_req.flags = use_fifos ? htonl(MSG_TRACE_USE_FIFOS) : htonl(0); + msg->trace_req.flags = 0; + if (use_fifos) + msg->trace_req.flags |= MSG_TRACE_USE_FIFOS; + if (do_tsync) + msg->trace_req.flags |= MSG_TRACE_DO_TSYNC; + msg->trace_req.flags = htonl(msg->trace_req.flags); msg->trace_req.argc = htonl(argc); msg->buf = calloc(args_size, 1); if (!msg->buf) @@ -768,13 +805,14 @@ static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv, bool } int tracecmd_msg_send_trace_req(struct tracecmd_msg_handle *msg_handle, - int argc, char **argv, bool use_fifos) + int argc, char **argv, bool use_fifos, + bool do_tsync) { struct tracecmd_msg msg; int ret; tracecmd_msg_init(MSG_TRACE_REQ, &msg); - ret = make_trace_req(&msg, argc, argv, use_fifos); + ret = make_trace_req(&msg, argc, argv, use_fifos, do_tsync); if (ret < 0) return ret; @@ -787,7 +825,8 @@ int tracecmd_msg_send_trace_req(struct tracecmd_msg_handle *msg_handle, * free(argv); */ int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle, - int *argc, char ***argv, bool *use_fifos) + int *argc, char ***argv, bool *use_fifos, + bool *do_tsync) { struct tracecmd_msg msg; char *p, *buf_end, **args; @@ -840,6 +879,7 @@ int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle, *argc = nr_args; *argv = args; *use_fifos = ntohl(msg.trace_req.flags) & MSG_TRACE_USE_FIFOS; + *do_tsync = ntohl(msg.trace_req.flags) & MSG_TRACE_DO_TSYNC; /* * On success we're passing msg.buf to the caller through argv[0] so we @@ -859,14 +899,136 @@ out: return ret; } +int tracecmd_msg_rcv_time_sync(struct tracecmd_msg_handle *msg_handle, + struct tracecmd_clock_sync *clock_context, + long long *offset, long long *timestamp) +{ + struct tracecmd_time_sync_event event; + unsigned int remote_cid = 0; + struct tracecmd_msg msg; + int cpu_pid, ret; + + if (clock_context == NULL || msg_handle == NULL) + return 0; + + if (offset) + *offset = 0; + + ret = tracecmd_msg_recv(msg_handle->fd, &msg); + if (ret < 0 || ntohl(msg.hdr.cmd) == MSG_TSYNC_STOP) + return 0; + if (ntohl(msg.hdr.cmd) != MSG_TSYNC_START) { + handle_unexpected_msg(msg_handle, &msg); + return 0; + } + + tracecmd_clock_get_peer(clock_context, &remote_cid, NULL); + tracecmd_msg_init(MSG_TSYNC_START, &msg); + tracecmd_msg_send(msg_handle->fd, &msg); + tracecmd_clock_synch_enable(clock_context); + + do { + memset(&event, 0, sizeof(event)); + ret = tracecmd_msg_recv(msg_handle->fd, &msg); + if (ret < 0 || ntohl(msg.hdr.cmd) == MSG_TSYNC_STOP) + break; + if (ntohl(msg.hdr.cmd) != MSG_TSYNC_PROBE) { + handle_unexpected_msg(msg_handle, &msg); + break; + } + ret = tracecmd_msg_recv(msg_handle->fd, &msg); + if (ret < 0 || ntohl(msg.hdr.cmd) == MSG_TSYNC_STOP) + break; + if (ntohl(msg.hdr.cmd) != MSG_TSYNC_REQ) { + handle_unexpected_msg(msg_handle, &msg); + break; + } + /* Get kvm event related to the corresponding VCPU context */ + cpu_pid = get_guest_vcpu_pid(remote_cid, ntohs(msg.ts_req.cpu)); + tracecmd_clock_find_event(clock_context, cpu_pid, &event); + tracecmd_msg_init(MSG_TSYNC_RESP, &msg); + msg.ts_resp.time = htonll(event.ts); + tracecmd_msg_send(msg_handle->fd, &msg); + } while (true); + + tracecmd_clock_synch_disable(clock_context); + + if (ret >= 0 && ntohl(msg.hdr.cmd) == MSG_TSYNC_STOP) { + if (offset) + *offset = ntohll(msg.ts_stop.offset); + if (timestamp) + *timestamp = ntohll(msg.ts_stop.timestamp); + } + + msg_free(&msg); + return 0; +} + +int tracecmd_msg_snd_time_sync(struct tracecmd_msg_handle *msg_handle, + struct tracecmd_clock_sync *clock_context, + long long *offset, long long *timestamp) +{ + struct tracecmd_time_sync_event event; + int sync_loop = TSYNC_TRIES; + struct tracecmd_msg msg; + int ret; + + if (clock_context == NULL || msg_handle == NULL) + return 0; + + tracecmd_msg_init(MSG_TSYNC_START, &msg); + tracecmd_msg_send(msg_handle->fd, &msg); + ret = tracecmd_msg_recv(msg_handle->fd, &msg); + if (ret < 0 || ntohl(msg.hdr.cmd) != MSG_TSYNC_START) + return 0; + tracecmd_clock_synch_calc_reset(clock_context); + tracecmd_clock_synch_enable(clock_context); + + do { + tracecmd_msg_init(MSG_TSYNC_PROBE, &msg); + tracecmd_msg_send(msg_handle->fd, &msg); + /* Get the ts and CPU of the sent event */ + ret = tracecmd_clock_find_event(clock_context, -1, &event); + tracecmd_msg_init(MSG_TSYNC_REQ, &msg); + msg.ts_req.cpu = htons(event.cpu); + tracecmd_msg_send(msg_handle->fd, &msg); + memset(&msg, 0, sizeof(msg)); + ret = tracecmd_msg_recv(msg_handle->fd, &msg); + if (ret < 0) + break; + if (ntohl(msg.hdr.cmd) != MSG_TSYNC_RESP) { + handle_unexpected_msg(msg_handle, &msg); + break; + } + tracecmd_clock_synch_calc_probe(clock_context, + event.ts, + htonll(msg.ts_resp.time)); + } while (--sync_loop); + + tracecmd_clock_synch_disable(clock_context); + tracecmd_clock_synch_calc(clock_context, offset, timestamp); + tracecmd_msg_init(MSG_TSYNC_STOP, &msg); + msg.ts_stop.offset = htonll(*offset); + msg.ts_stop.timestamp = htonll(*timestamp); + tracecmd_msg_send(msg_handle->fd, &msg); + + msg_free(&msg); + return 0; +} + static int make_trace_resp(struct tracecmd_msg *msg, int page_size, int nr_cpus, - unsigned int *ports, bool use_fifos) + unsigned int *ports, bool use_fifos, bool do_tsync) { int ports_size = nr_cpus * sizeof(*msg->port_array); int i; msg->hdr.size = htonl(ntohl(msg->hdr.size) + ports_size); - msg->trace_resp.flags = use_fifos ? htonl(MSG_TRACE_USE_FIFOS) : htonl(0); + msg->trace_resp.flags = 0; + if (use_fifos) + msg->trace_resp.flags |= MSG_TRACE_USE_FIFOS; + if (do_tsync) + msg->trace_resp.flags |= MSG_TRACE_DO_TSYNC; + msg->trace_resp.flags = htonl(msg->trace_resp.flags); msg->trace_resp.cpus = htonl(nr_cpus); msg->trace_resp.page_size = htonl(page_size); @@ -882,13 +1044,14 @@ static int make_trace_resp(struct tracecmd_msg *msg, int page_size, int nr_cpus, int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle, int nr_cpus, int page_size, - unsigned int *ports, bool use_fifos) + unsigned int *ports, bool use_fifos, + bool do_tsync) { struct tracecmd_msg msg; int ret; tracecmd_msg_init(MSG_TRACE_RESP, &msg); - ret = make_trace_resp(&msg, page_size, nr_cpus, ports, use_fifos); + ret = make_trace_resp(&msg, page_size, nr_cpus, ports, use_fifos, do_tsync); if (ret < 0) return ret; @@ -897,7 +1060,8 @@ int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle, int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle, int *nr_cpus, int *page_size, - unsigned int **ports, bool *use_fifos) + unsigned int **ports, bool *use_fifos, + bool *do_tsync) { struct tracecmd_msg msg; ssize_t buf_len; @@ -920,6 +1084,7 @@ int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle, } *use_fifos = ntohl(msg.trace_resp.flags) & MSG_TRACE_USE_FIFOS; + *do_tsync = ntohl(msg.trace_resp.flags) & MSG_TRACE_DO_TSYNC; *nr_cpus = ntohl(msg.trace_resp.cpus); *page_size = ntohl(msg.trace_resp.page_size); *ports = calloc(*nr_cpus, sizeof(**ports)); diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c index b4ea40f..c923093 100644 --- a/tracecmd/trace-record.c +++ b/tracecmd/trace-record.c @@ -210,6 +210,8 @@ struct common_record_context { char *date2ts; char *max_graph_depth; int data_flags; + int time_shift_count; + struct tracecmd_option *time_shift_option; int record_all; int total_disable; @@ -649,11 +651,20 @@ static void tell_guests_to_stop(void) for_all_instances(instance) { if (is_guest(instance)) { tracecmd_msg_send_close_msg(instance->msg_handle); - tracecmd_msg_handle_close(instance->msg_handle); } } } +static void close_guests_handle(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) { + if (is_guest(instance)) + tracecmd_msg_handle_close(instance->msg_handle); + } +} + static void stop_threads(enum trace_type type) { struct timeval tv = { 0, 0 }; @@ -3355,6 +3366,7 @@ static void connect_to_agent(struct buffer_instance *instance) unsigned int *ports; int i, *fds = NULL; bool use_fifos = false; + bool do_tsync; if (!no_fifos) { nr_fifos = open_guest_fifos(instance->name, &fds); @@ -3366,17 +3378,19 @@ static void connect_to_agent(struct buffer_instance *instance) die("Failed to connect to vsocket @%d:%d", instance->cid, instance->port); + do_tsync = tracecmd_time_sync_check(); + msg_handle = tracecmd_msg_handle_alloc(sd, 0); if (!msg_handle) die("Failed to allocate message handle"); ret = tracecmd_msg_send_trace_req(msg_handle, instance->argc, - instance->argv, use_fifos); + instance->argv, use_fifos, do_tsync); if (ret < 0) die("Failed to send trace request"); ret = tracecmd_msg_recv_trace_resp(msg_handle, &nr_cpus, &page_size, - &ports, &use_fifos); + &ports, &use_fifos, &do_tsync); if (ret < 0) die("Failed to receive trace response"); @@ -3397,10 +3411,13 @@ static void connect_to_agent(struct buffer_instance *instance) } instance->use_fifos = use_fifos; + instance->do_tsync = do_tsync; instance->cpu_count = nr_cpus; /* the msg_handle now points to the guest fd */ instance->msg_handle = msg_handle; + + sync_time_with_guest_v3(instance); } static void setup_guest(struct buffer_instance *instance) @@ -3425,10 +3442,13 @@ static void setup_guest(struct buffer_instance *instance) close(fd); } -static void setup_agent(struct buffer_instance *instance, struct common_record_context *ctx) +static void setup_agent(struct buffer_instance *instance, + struct common_record_context *ctx) { struct tracecmd_output *network_handle; + sync_time_with_host_v3(instance); + network_handle = tracecmd_create_init_fd_msg(instance->msg_handle, listed_events); add_options(network_handle, ctx); @@ -5603,6 +5623,42 @@ static bool has_local_instances(void) return false; } +//#define TSYNC_DEBUG +static void write_guest_time_shift(struct buffer_instance *instance) +{ + struct tracecmd_output *handle; + struct iovec vector[3]; + char *file; + int fd; + + if (!instance->time_sync_count) + return; + + file = get_guest_file(output_file, instance->name); + fd = open(file, O_RDWR); + if (fd < 0) + die("error opening %s", file); + put_temp_file(file); + handle = tracecmd_get_output_handle_fd(fd); + vector[0].iov_len = sizeof(instance->time_sync_count); + vector[0].iov_base = &instance->time_sync_count; + vector[1].iov_len = sizeof(long long) * instance->time_sync_count; + vector[1].iov_base = instance->time_sync_ts; + vector[2].iov_len = sizeof(long long) * instance->time_sync_count; + vector[2].iov_base = instance->time_sync_offsets; + tracecmd_add_option_v(handle, TRACECMD_OPTION_TIME_SHIFT, vector, 3); + tracecmd_append_options(handle); + tracecmd_output_close(handle); +#ifdef TSYNC_DEBUG + if (instance->time_sync_count > 1) + printf("\n\rDetected %lld ns ts offset drift in %lld ns for guest %s\n\r", + instance->time_sync_offsets[instance->time_sync_count-1] - + instance->time_sync_offsets[0], + instance->time_sync_ts[instance->time_sync_count-1]- + instance->time_sync_ts[0], instance->name); +#endif +} + /* * This function contains common code for the following commands: * record, start, stream, profile. @@ -5722,6 +5778,20 @@ static void record_trace(int argc, char **argv, if (!latency) wait_threads(); + if (ctx->instance && is_agent(ctx->instance)) { + sync_time_with_host_v3(ctx->instance); + tracecmd_clock_context_free(ctx->instance); + } else { + for_all_instances(instance) { + if (is_guest(instance)) { + sync_time_with_guest_v3(instance); + write_guest_time_shift(instance); + tracecmd_clock_context_free(instance); + } + } + } + close_guests_handle(); + if (IS_RECORD(ctx)) { record_data(ctx); delete_thread_data(); @@ -5860,7 +5930,7 @@ void trace_record(int argc, char **argv) int trace_record_agent(struct tracecmd_msg_handle *msg_handle, int cpus, int *fds, int argc, char **argv, - bool use_fifos) + bool use_fifos, bool do_tsync) { struct common_record_context ctx; char **argv_plus; @@ -5887,6 +5957,7 @@ int trace_record_agent(struct tracecmd_msg_handle *msg_handle, ctx.instance->fds = fds; ctx.instance->use_fifos = use_fifos; + ctx.instance->do_tsync = do_tsync; ctx.instance->flags |= BUFFER_FL_AGENT; ctx.instance->msg_handle = msg_handle; msg_handle->version = V3_PROTOCOL; diff --git a/tracecmd/trace-timesync.c b/tracecmd/trace-timesync.c new file mode 100644 index 0000000..3f648c9 --- /dev/null +++ b/tracecmd/trace-timesync.c @@ -0,0 +1,824 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019, VMware, Tzvetomir Stoyanov <tstoyanov@vmware.com> + * + */ + +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <linux/vm_sockets.h> +#include "trace-local.h" + +//#define TSYNC_DEBUG + +static int clock_sync_x86_host_init(struct tracecmd_clock_sync *clock_context); +static int clock_sync_x86_host_free(struct tracecmd_clock_sync *clock_context); +static int clock_sync_x86_host_find_events(struct tracecmd_clock_sync *clock, + int cpu, struct tracecmd_time_sync_event *event); +static int clock_sync_x86_guest_init(struct tracecmd_clock_sync *clock_context); +static int clock_sync_x86_guest_free(struct tracecmd_clock_sync *clock_context); +static int clock_sync_x86_guest_find_events(struct tracecmd_clock_sync *clock, + int pid, + struct tracecmd_time_sync_event *event); + +struct tracecmd_event_descr { + char *system; + char *name; +}; + +struct tracecmd_ftrace_param { + char *file; + char *set; + char *reset; +}; + +enum clock_sync_context { + CLOCK_KVM_X86_VSOCK_HOST, + CLOCK_KVM_X86_VSOCK_GUEST, + CLOCK_CONTEXT_MAX, +}; + +struct tracecmd_clock_sync { + enum clock_sync_context clock_context_id; + struct tracecmd_ftrace_param *ftrace_params; + struct tracecmd_time_sync_event *events; + int events_count; + struct tep_handle *tep; + struct buffer_instance *vinst; + + int probes_count; + int bad_probes; + int probes_size; + long long *times; + long long *offsets; + long long offset_av; + long long offset_min; + long long offset_max; + int debug_fd; + + unsigned int local_cid; + unsigned int local_port; + unsigned int remote_cid; + unsigned int remote_port; +}; + +struct { + const char *systems[3]; + struct tracecmd_ftrace_param ftrace_params[5]; + struct tracecmd_event_descr events[3]; + int (*clock_sync_init)(struct tracecmd_clock_sync *clock_context); + int (*clock_sync_free)(struct tracecmd_clock_sync *clock_context); + int (*clock_sync_find_events)(struct tracecmd_clock_sync *clock_context, + int pid, + struct tracecmd_time_sync_event *event); + int (*clock_sync_load)(struct tracecmd_clock_sync *clock_context); + int (*clock_sync_unload)(struct tracecmd_clock_sync *clock_context); +} static clock_sync[CLOCK_CONTEXT_MAX] = { + { /* CLOCK_KVM_X86_VSOCK_HOST */ + .systems = {"vsock", "ftrace", NULL}, + .ftrace_params = { + {"set_ftrace_filter", "vmx_read_l1_tsc_offset\nsvm_read_l1_tsc_offset", "\0"}, + {"current_tracer", "function", "nop"}, + {"events/vsock/virtio_transport_recv_pkt/enable", "1", "0"}, + {"events/vsock/virtio_transport_recv_pkt/filter", NULL, "\0"}, + {NULL, NULL, NULL} }, + .events = { + {.system = "ftrace", .name = "function"}, + {.system = "vsock", .name = "virtio_transport_recv_pkt"}, + {.system = NULL, .name = NULL} }, + clock_sync_x86_host_init, + clock_sync_x86_host_free, + clock_sync_x86_host_find_events, + }, + + { /* CLOCK_KVM_X86_VSOCK_GUEST */ + .systems = { "vsock", "ftrace", NULL}, + .ftrace_params = { + {"set_ftrace_filter", "vp_notify", "\0"}, + {"current_tracer", "function", "nop"}, + {"events/vsock/virtio_transport_alloc_pkt/enable", "1", "0"}, + {"events/vsock/virtio_transport_alloc_pkt/filter", NULL, "\0"}, + {NULL, NULL, NULL}, + }, + .events = { + {.system = "vsock", .name = "virtio_transport_alloc_pkt"}, + {.system = "ftrace", .name = "function"}, + {.system = NULL, .name = NULL} + }, + clock_sync_x86_guest_init, + clock_sync_x86_guest_free, + clock_sync_x86_guest_find_events, + } +}; + +static int clock_sync_x86_host_init(struct tracecmd_clock_sync *clock_context) +{ + char vsock_filter[255]; + + snprintf(vsock_filter, 255, + "src_cid==%u && src_port==%u && dst_cid==%u && dst_port==%u && len!=0", + clock_context->remote_cid, clock_context->remote_port, + clock_context->local_cid, clock_context->local_port); + + clock_context->ftrace_params[3].set = strdup(vsock_filter); + return 1; +} + +static int clock_sync_x86_host_free(struct tracecmd_clock_sync *clock_context) +{ + free(clock_context->ftrace_params[3].set); + clock_context->ftrace_params[3].set = NULL; + return 1; +} + +static int clock_sync_x86_guest_init(struct tracecmd_clock_sync *clock_context) +{ + char vsock_filter[255]; + + snprintf(vsock_filter, 255, + "src_cid==%u && src_port==%u && dst_cid==%u && dst_port==%u && len!=0", + clock_context->local_cid, clock_context->local_port, + clock_context->remote_cid, clock_context->remote_port); + + clock_context->ftrace_params[3].set = strdup(vsock_filter); + return 1; +} + +static int clock_sync_x86_guest_free(struct tracecmd_clock_sync *clock_context) +{ + free(clock_context->ftrace_params[3].set); + clock_context->ftrace_params[3].set = NULL; + return 1; +} + +static int +get_events_in_page(struct tep_handle *tep, void *page, + int size, int cpu, struct tracecmd_time_sync_event **events, + int *events_count, int *events_size) +{ + struct tracecmd_time_sync_event *events_array = NULL; + struct tep_record *last_record = NULL; + struct tep_event *event = NULL; + struct tep_record *record; + int id, cnt = 0; + + if (size <= 0) + return 0; + + if (*events == NULL) { + *events = malloc(10*sizeof(struct tracecmd_time_sync_event)); + *events_size = 10; + *events_count = 0; + } + + while (true) { + event = NULL; + record = tracecmd_read_page_record(tep, page, size, + last_record); + if (!record) + break; + free_record(last_record); + id = tep_data_type(tep, record); + event = tep_data_event_from_type(tep, id); + if (event) { + if (*events_count >= *events_size) { + events_array = realloc(*events, + ((3*(*events_size))/2)* + sizeof(struct tracecmd_time_sync_event)); + if (events_array) { + *events = events_array; + (*events_size) = (((*events_size)*3)/2); + } + } + + if (*events_count < *events_size) { + (*events)[*events_count].ts = record->ts; + (*events)[*events_count].cpu = cpu; + (*events)[*events_count].id = id; + (*events)[*events_count].pid = tep_data_pid(tep, record); + (*events_count)++; + cnt++; + } + } + last_record = record; + } + free_record(last_record); + + return cnt; +} + +static int sync_events_cmp(const void *a, const void *b) +{ + const struct tracecmd_time_sync_event *ea = (const struct tracecmd_time_sync_event *)a; + const struct tracecmd_time_sync_event *eb = (const struct tracecmd_time_sync_event *)b; + + if (ea->ts > eb->ts) + return 1; + if (ea->ts < eb->ts) + return -1; + return 0; +} + +static int find_sync_events(struct tep_handle *pevent, + struct tracecmd_time_sync_event *recorded, + int rsize, struct tracecmd_time_sync_event *events) +{ + int i = 0, j = 0; + + while (i < rsize) { + if (!events[j].ts && events[j].id == recorded[i].id && + (events[j].pid < 0 || events[j].pid == recorded[i].pid)) { + events[j].cpu = recorded[i].cpu; + events[j].ts = recorded[i].ts; + j++; + } else if (j > 0 && events[j-1].id == recorded[i].id && + (events[j-1].pid < 0 || events[j-1].pid == recorded[i].pid)) { + events[j-1].cpu = recorded[i].cpu; + events[j-1].ts = recorded[i].ts; + } + i++; + } + return j; +} + +//#define TSYNC_RBUFFER_DEBUG +static int find_raw_events(struct tep_handle *tep, + struct buffer_instance *instance, + struct tracecmd_time_sync_event *events) +{ + struct tracecmd_time_sync_event *events_array = NULL; + int events_count = 0; + int events_size = 0; + struct dirent *dent; + unsigned int p_size; + int ts = 0; + void *page; + char *path; + char *file; + DIR *dir; + int cpu; + int len; + int fd; + int r; + + p_size = getpagesize(); +#ifdef TSYNC_RBUFFER_DEBUG + file = get_instance_file(instance, "trace"); + if (!file) + return ts; + { + char *buf = NULL; + FILE *fp; + size_t n; + int r; + + printf("Events:\n\r"); + fp = fopen(file, "r"); + while ((r = getline(&buf, &n, fp)) >= 0) { + + if (buf[0] != '#') + printf("%s", buf); + free(buf); + buf = NULL; + } + fclose(fp); + } + tracecmd_put_tracing_file(file); +#endif /* TSYNC_RBUFFER_DEBUG */ + path = get_instance_file(instance, "per_cpu"); + if (!path) + return ts; + + dir = opendir(path); + if (!dir) + goto out; + + len = strlen(path); + file = malloc(len + strlen("trace_pipe_raw") + 32); + page = malloc(p_size); + if (!file || !page) + die("Failed to allocate time_stamp info"); + + while ((dent = readdir(dir))) { + + const char *name = dent->d_name; + + if (strncmp(name, "cpu", 3) != 0) + continue; + cpu = atoi(&name[3]); + sprintf(file, "%s/%s/trace_pipe_raw", path, name); + fd = open(file, O_RDONLY | O_NONBLOCK); + if (fd < 0) + continue; + do { + r = read(fd, page, p_size); + if (r > 0) { + get_events_in_page(tep, page, r, cpu, + &events_array, &events_count, + &events_size); + } + } while (r > 0); + close(fd); + } + qsort(events_array, events_count, sizeof(*events_array), sync_events_cmp); + r = find_sync_events(tep, events_array, events_count, events); +#ifdef TSYNC_RBUFFER_DEBUG + len = 0; + while (events[len].id) { + printf("Found %d @ cpu %d: %lld pid %d\n\r", + events[len].id, events[len].cpu, + events[len].ts, events[len].pid); + len++; + } +#endif + + free(events_array); + free(file); + free(page); + closedir(dir); + + out: + tracecmd_put_tracing_file(path); + return r; +} + +static int clock_sync_x86_host_find_events(struct tracecmd_clock_sync *clock, + int pid, + struct tracecmd_time_sync_event *event) +{ + int ret; + + clock->events[0].pid = pid; + ret = find_raw_events(clock->tep, clock->vinst, clock->events); + event->ts = clock->events[0].ts; + event->cpu = clock->events[0].cpu; + return ret; + +} + +static int clock_sync_x86_guest_find_events(struct tracecmd_clock_sync *clock, + int pid, + struct tracecmd_time_sync_event *event) +{ + int ret; + + ret = find_raw_events(clock->tep, clock->vinst, clock->events); + if (ret != clock->events_count) + return 0; + event->ts = clock->events[1].ts; + event->cpu = clock->events[0].cpu; + return 1; + +} + +static void tracecmd_clock_sync_reset(struct tracecmd_clock_sync *clock_context) +{ + int i = 0; + + while (clock_context->events[i].id) { + clock_context->events[i].cpu = 0; + clock_context->events[i].ts = 0; + clock_context->events[i].pid = -1; + i++; + } +} + +int tracecmd_clock_find_event(struct tracecmd_clock_sync *clock, int pid, + struct tracecmd_time_sync_event *event) +{ + int ret = 0; + int id; + + if (clock == NULL || + clock->clock_context_id >= CLOCK_CONTEXT_MAX) + return 0; + + id = clock->clock_context_id; + if (clock_sync[id].clock_sync_find_events) + ret = clock_sync[id].clock_sync_find_events(clock, pid, event); + + tracecmd_clock_sync_reset(clock); + return ret; +} + +static void clock_context_copy(struct tracecmd_clock_sync *clock_context, + struct tracecmd_ftrace_param *params, + struct tracecmd_event_descr *events) +{ + int i; + + i = 0; + while (params[i].file) + i++; + clock_context->ftrace_params = calloc(i+1, sizeof(struct tracecmd_ftrace_param)); + i = 0; + while (params[i].file) { + clock_context->ftrace_params[i].file = strdup(params[i].file); + if (params[i].set) + clock_context->ftrace_params[i].set = strdup(params[i].set); + if (params[i].reset) + clock_context->ftrace_params[i].reset = strdup(params[i].reset); + i++; + } + + i = 0; + while (events[i].name) + i++; + clock_context->events = calloc(i+1, sizeof(struct tracecmd_time_sync_event)); + clock_context->events_count = i; +} + +void trace_instance_reset(struct buffer_instance *vinst) +{ + write_instance_file(vinst, "trace", "\0", NULL); +} + +static struct buffer_instance * +clock_synch_create_instance(const char *clock, unsigned int cid) +{ + struct buffer_instance *vinst; + char inst_name[256]; + + snprintf(inst_name, 256, "clock_synch-%d", cid); + + vinst = create_instance(strdup(inst_name)); + tracecmd_init_instance(vinst); + vinst->cpu_count = tracecmd_local_cpu_count(); + tracecmd_make_instance(vinst); + trace_instance_reset(vinst); + if (clock) + vinst->clock = strdup(clock); + tracecmd_set_clock(vinst); + return vinst; +} + +static struct tep_handle *clock_synch_get_tep(struct buffer_instance *instance, + const char * const *systems) +{ + struct tep_handle *tep = NULL; + char *path; + + path = get_instance_dir(instance); + tep = tracecmd_local_events_system(path, systems); + tracecmd_put_tracing_file(path); + + tep_set_file_bigendian(tep, tracecmd_host_bigendian()); + tep_set_host_bigendian(tep, tracecmd_host_bigendian()); + + return tep; +} + +static void get_vsocket_params(int fd, unsigned int *lcid, unsigned int *lport, + unsigned int *rcid, unsigned int *rport) +{ + struct sockaddr_vm addr; + socklen_t addr_len = sizeof(addr); + + if (lcid || lport) { + memset(&addr, 0, sizeof(addr)); + if (getsockname(fd, (struct sockaddr *)&addr, &addr_len)) + return; + if (addr.svm_family != AF_VSOCK) + return; + if (lport) + *lport = addr.svm_port; + if (lcid) + *lcid = addr.svm_cid; + } + + if (rcid || rport) { + memset(&addr, 0, sizeof(addr)); + addr_len = sizeof(addr); + if (getpeername(fd, (struct sockaddr *)&addr, &addr_len)) + return; + if (addr.svm_family != AF_VSOCK) + return; + + if (rport) + *rport = addr.svm_port; + if (rcid) + *rcid = addr.svm_cid; + } +} + +struct tracecmd_clock_sync* +tracecmd_clock_context_new(struct tracecmd_msg_handle *msg_handle, + const char *clock_str, + enum clock_sync_context id) +{ + struct tracecmd_clock_sync *clock_context; + struct tep_event *event; + unsigned int i = 0; + + switch (id) { +#ifdef VSOCK + case CLOCK_KVM_X86_VSOCK_HOST: + case CLOCK_KVM_X86_VSOCK_GUEST: + break; +#endif + default: /* not supported clock sync context */ + return NULL; + } + + if (id >= CLOCK_CONTEXT_MAX || NULL == msg_handle) + return NULL; + clock_context = calloc(1, sizeof(struct tracecmd_clock_sync)); + if (!clock_context) + return NULL; + clock_context->clock_context_id = id; + clock_context_copy(clock_context, + clock_sync[id].ftrace_params, clock_sync[id].events); + + get_vsocket_params(msg_handle->fd, + &(clock_context->local_cid), + &(clock_context->local_port), + &(clock_context->remote_cid), + &(clock_context->remote_port)); + + if (clock_sync[id].clock_sync_init) + clock_sync[id].clock_sync_init(clock_context); + + clock_context->vinst = clock_synch_create_instance(clock_str, clock_context->remote_cid); + clock_context->tep = clock_synch_get_tep(clock_context->vinst, + clock_sync[id].systems); + i = 0; + while (clock_sync[id].events[i].name) { + event = tep_find_event_by_name(clock_context->tep, + clock_sync[id].events[i].system, + clock_sync[id].events[i].name); + if (!event) + break; + clock_context->events[i].id = event->id; + i++; + } +#ifdef TSYNC_DEBUG + clock_context->debug_fd = -1; +#endif + + return clock_context; + +} + +void tracecmd_clock_context_free(struct buffer_instance *instance) +{ + int i; + + if (instance->clock_sync == NULL || + instance->clock_sync->clock_context_id >= CLOCK_CONTEXT_MAX) + return; + if (clock_sync[instance->clock_sync->clock_context_id].clock_sync_free) + clock_sync[instance->clock_sync->clock_context_id].clock_sync_free(instance->clock_sync); + + i = 0; + while (instance->clock_sync->ftrace_params[i].file) { + free(instance->clock_sync->ftrace_params[i].file); + free(instance->clock_sync->ftrace_params[i].set); + free(instance->clock_sync->ftrace_params[i].reset); + i++; + } + free(instance->clock_sync->ftrace_params); + free(instance->clock_sync->events); + tracecmd_remove_instance(instance->clock_sync->vinst); + /* todo: clean up the instance */ + tep_free(instance->clock_sync->tep); + + free(instance->clock_sync->offsets); + free(instance->clock_sync->times); +#ifdef TSYNC_DEBUG + if (instance->clock_sync->debug_fd >= 0) { + close (instance->clock_sync->debug_fd); + instance->clock_sync->debug_fd = -1; + } +#endif + free(instance->clock_sync); + instance->clock_sync = NULL; +} + +bool tracecmd_time_sync_check(void) +{ +#ifdef VSOCK + return true; +#endif + return false; +} + +void sync_time_with_host_v3(struct buffer_instance *instance) +{ + long long timestamp = 0; + long long offset = 0; + + if (!instance->do_tsync) + return; + + if (instance->clock_sync == NULL) + instance->clock_sync = tracecmd_clock_context_new(instance->msg_handle, + instance->clock, CLOCK_KVM_X86_VSOCK_GUEST); + + tracecmd_msg_snd_time_sync(instance->msg_handle, instance->clock_sync, + &offset, ×tamp); +} + +void sync_time_with_guest_v3(struct buffer_instance *instance) +{ + long long timestamp = 0; + long long offset = 0; + long long *sync_array_ts; + long long *sync_array_offs; + + if (!instance->do_tsync) + return; + + if (instance->clock_sync == NULL) + instance->clock_sync = tracecmd_clock_context_new(instance->msg_handle, + top_instance.clock, CLOCK_KVM_X86_VSOCK_HOST); + + tracecmd_msg_rcv_time_sync(instance->msg_handle, + instance->clock_sync, &offset, ×tamp); + + sync_array_ts = realloc(instance->time_sync_ts, + (instance->time_sync_count+1)*sizeof(long long)); + sync_array_offs = realloc(instance->time_sync_offsets, + (instance->time_sync_count+1)*sizeof(long long)); + + if (sync_array_ts && sync_array_offs) { + sync_array_ts[instance->time_sync_count] = timestamp; + sync_array_offs[instance->time_sync_count] = offset; + instance->time_sync_count++; + instance->time_sync_ts = sync_array_ts; + instance->time_sync_offsets = sync_array_offs; + + } else { + free(sync_array_ts); + free(sync_array_offs); + } +} + +static void set_clock_synch_events(struct buffer_instance *instance, + struct tracecmd_ftrace_param *params, + bool enable) +{ + int i = 0; + + if (!enable) + write_tracing_on(instance, 0); + + while (params[i].file) { + if (enable && params[i].set) { + write_instance_file(instance, params[i].file, + params[i].set, NULL); + } + if (!enable && params[i].reset) + write_instance_file(instance, params[i].file, + params[i].reset, NULL); + i++; + } + + if (enable) + write_tracing_on(instance, 1); +} + +int tracecmd_clock_get_peer(struct tracecmd_clock_sync *clock_context, + unsigned int *remote_cid, unsigned int *remote_port) +{ + if (!clock_context) + return 0; + if (remote_cid) + *remote_cid = clock_context->remote_cid; + if (remote_port) + *remote_cid = clock_context->remote_port; + return 1; +} + +void tracecmd_clock_synch_enable(struct tracecmd_clock_sync *clock_context) +{ + set_clock_synch_events(clock_context->vinst, + clock_context->ftrace_params, true); +} + +void tracecmd_clock_synch_disable(struct tracecmd_clock_sync *clock_context) +{ + set_clock_synch_events(clock_context->vinst, + clock_context->ftrace_params, false); +} + +int tracecmd_clock_synch_calc(struct tracecmd_clock_sync *clock_context, + long long *offset_ret, long long *time_ret) +{ + int i, j = 0; + long long av, tresch, offset = 0, time = 0; + + if (!clock_context || !clock_context->probes_count) + return 0; + av = clock_context->offset_av / clock_context->probes_count; + tresch = (long long)((clock_context->offset_max - clock_context->offset_min)/10); + + for (i = 0; i < clock_context->probes_count; i++) { + /* filter the offsets with deviation up to 10% */ + if (llabs(clock_context->offsets[i] - av) < tresch) { + offset += clock_context->offsets[i]; + j++; + } + } + if (j) + offset /= (long long)j; + + tresch = 0; + for (i = 0; i < clock_context->probes_count; i++) { + if ((!tresch || tresch > llabs(offset-clock_context->offsets[i]))) { + tresch = llabs(offset-clock_context->offsets[i]); + time = clock_context->times[i]; + } + } + if (offset_ret) + *offset_ret = offset; + if (time_ret) + *time_ret = time; +#ifdef TSYNC_DEBUG + printf("\n calculated offset: %lld, %d/%d probes\n\r", + *offset_ret, clock_context->probes_count, + clock_context->probes_count + clock_context->bad_probes); +#endif + return 1; +} + +void tracecmd_clock_synch_calc_reset(struct tracecmd_clock_sync *clock_context) +{ + if (!clock_context) + return; + + clock_context->probes_count = 0; + clock_context->bad_probes = 0; + clock_context->offset_av = 0; + clock_context->offset_min = 0; + clock_context->offset_max = 0; +#ifdef TSYNC_DEBUG + if (clock_context->debug_fd >= 0) { + close(clock_context->debug_fd); + clock_context->debug_fd = -1; + } +#endif + +} + +void tracecmd_clock_synch_calc_probe(struct tracecmd_clock_sync *clock_context, + long long ts_local, long long ts_remote) +{ + int count; +#ifdef TSYNC_DEBUG + char buff[256]; +#endif + + if (!clock_context || !ts_local || !ts_remote) + return; + if (!ts_local || !ts_remote) { + clock_context->bad_probes++; + return; + } + + if (!clock_context->offsets && !clock_context->times) { + clock_context->offsets = calloc(10, sizeof(long long)); + clock_context->times = calloc(10, sizeof(long long)); + clock_context->probes_size = 10; + } + + if (clock_context->probes_size == clock_context->probes_count) { + clock_context->probes_size = (3*clock_context->probes_size)/2; + clock_context->offsets = realloc(clock_context->offsets, + clock_context->probes_size * + sizeof(long long)); + clock_context->times = realloc(clock_context->times, + clock_context->probes_size* + sizeof(long long)); + } + + if (!clock_context->offsets || !clock_context->times) { + clock_context->probes_size = 0; + tracecmd_clock_synch_calc_reset(clock_context); + return; + } +#ifdef TSYNC_DEBUG + if (clock_context->debug_fd < 0) { + sprintf(buff, "s-cid%d.txt", clock_context->remote_cid); + clock_context->debug_fd = open(buff, O_CREAT|O_WRONLY|O_TRUNC, 0644); + } +#endif + count = clock_context->probes_count; + clock_context->probes_count++; + clock_context->offsets[count] = ts_remote - ts_local; + clock_context->times[count] = ts_local; + clock_context->offset_av += clock_context->offsets[count]; + + if (!clock_context->offset_min || + clock_context->offset_min > llabs(clock_context->offsets[count])) + clock_context->offset_min = llabs(clock_context->offsets[count]); + if (!clock_context->offset_max || + clock_context->offset_max < llabs(clock_context->offsets[count])) + clock_context->offset_max = llabs(clock_context->offsets[count]); +#ifdef TSYNC_DEBUG + sprintf(buff, "%lld %lld\n", ts_local, ts_remote); + write(clock_context->debug_fd, buff, strlen(buff)); +#endif + +}
This is a POC patch, implementing an algorithm for syncing timestamps between host and guest machines, using vsock trace events to catch the host / guest time. Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com> --- include/trace-cmd/trace-cmd.h | 21 +- tracecmd/Makefile | 1 + tracecmd/include/trace-local.h | 33 +- tracecmd/trace-agent.c | 10 +- tracecmd/trace-msg.c | 209 ++++++++- tracecmd/trace-record.c | 81 +++- tracecmd/trace-timesync.c | 824 +++++++++++++++++++++++++++++++++ 7 files changed, 1144 insertions(+), 35 deletions(-) create mode 100644 tracecmd/trace-timesync.c