@@ -454,8 +454,8 @@ tracecmd_tsync_with_guest(unsigned long long trace_id, int loop_interval,
int guest_cpus, const char *proto_name, const char *clock);
int tracecmd_tsync_with_guest_stop(struct tracecmd_time_sync *tsync);
int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu,
- int *count, long long **ts,
- long long **offsets, long long **scalings);
+ int *count, long long **ts,
+ long long **offsets, long long **scalings, long long **frac);
int tracecmd_tsync_get_session_params(struct tracecmd_time_sync *tsync,
char **selected_proto,
unsigned int *tsync_port);
@@ -27,14 +27,15 @@ struct tracecmd_time_sync {
struct clock_sync_offsets {
/* Arrays with calculated time offsets at given time */
int sync_size; /* Allocated size of sync_ts,
- * sync_offsets and sync_scalings
+ * sync_offsets, sync_scalings and sync_frac
*/
int sync_count; /* Number of elements in sync_ts,
- * sync_offsets and sync_scalings
+ * sync_offsets, sync_scalings and sync_frac
*/
long long *sync_ts;
long long *sync_offsets;
long long *sync_scalings;
+ long long *sync_frac;
};
struct clock_sync_context {
@@ -60,7 +61,7 @@ int tracecmd_tsync_proto_register(const char *proto_name, int accuracy, int role
int (*init)(struct tracecmd_time_sync *),
int (*free)(struct tracecmd_time_sync *),
int (*calc)(struct tracecmd_time_sync *,
- long long *, long long *,
+ long long *, long long *, long long*,
long long *, unsigned int));
int tracecmd_tsync_proto_unregister(char *proto_name);
@@ -19,6 +19,7 @@
#define KVM_DEBUG_FS "/sys/kernel/debug/kvm"
#define KVM_DEBUG_OFFSET_FILE "tsc-offset"
#define KVM_DEBUG_SCALING_FILE "tsc-scaling-ratio"
+#define KVM_DEBUG_FRACTION_FILE "tsc-scaling-ratio-frac-bits"
#define KVM_DEBUG_VCPU_DIR "vcpu"
/* default KVM scaling values, taken from the Linux kernel */
@@ -37,6 +38,7 @@ struct kvm_clock_sync {
int vcpu_count;
char **vcpu_offsets;
char **vcpu_scalings;
+ char **vcpu_frac;
int marker_fd;
struct tep_handle *tep;
int raw_id;
@@ -47,6 +49,7 @@ struct kvm_clock_offset_msg {
s64 ts;
s64 offset;
s64 scaling;
+ s64 frac;
};
static int read_ll_from_file(char *file, long long *res)
@@ -72,21 +75,27 @@ static int read_ll_from_file(char *file, long long *res)
static bool kvm_scaling_check_vm_cpu(char *vname, char *cpu)
{
- long long scaling;
+ long long scaling, frac;
+ bool has_scaling = false;
+ bool has_frac = false;
char *path;
int ret;
if (asprintf(&path, "%s/%s/%s", vname, cpu, KVM_DEBUG_SCALING_FILE) < 0)
- return true;
+ return false;
ret = read_ll_from_file(path, &scaling);
free(path);
+ if (!ret)
+ has_scaling = true;
+
+ if (asprintf(&path, "%s/%s/%s", vname, cpu, KVM_DEBUG_FRACTION_FILE) < 0)
+ return false;
+ ret = read_ll_from_file(path, &frac);
+ free(path);
+ if (!ret)
+ has_frac = true;
- /*
- * If there is a scaling, different from the default -
- * return false, not supported.
- */
- if (!ret &&
- scaling != KVM_SCALING_AMD_DEFAULT && scaling != KVM_SCALING_INTEL_DEFAULT)
+ if (has_scaling != has_frac)
return false;
return true;
@@ -173,6 +182,11 @@ static int kvm_open_vcpu_dir(struct kvm_clock_sync *kvm, int cpu, char *dir_str)
dir_str, entry->d_name);
kvm->vcpu_scalings[cpu] = strdup(path);
}
+ if (!strcmp(entry->d_name, KVM_DEBUG_FRACTION_FILE)) {
+ snprintf(path, sizeof(path), "%s/%s",
+ dir_str, entry->d_name);
+ kvm->vcpu_frac[cpu] = strdup(path);
+ }
}
}
if (!kvm->vcpu_offsets[cpu])
@@ -187,6 +201,8 @@ error:
kvm->vcpu_offsets[cpu] = NULL;
free(kvm->vcpu_scalings[cpu]);
kvm->vcpu_scalings[cpu] = NULL;
+ free(kvm->vcpu_frac[cpu]);
+ kvm->vcpu_frac[cpu] = NULL;
return -1;
}
@@ -252,7 +268,8 @@ static int kvm_clock_sync_init_host(struct tracecmd_time_sync *tsync,
kvm->vcpu_count = tsync->vcpu_count;
kvm->vcpu_offsets = calloc(kvm->vcpu_count, sizeof(char *));
kvm->vcpu_scalings = calloc(kvm->vcpu_count, sizeof(char *));
- if (!kvm->vcpu_offsets || !kvm->vcpu_scalings)
+ kvm->vcpu_frac = calloc(kvm->vcpu_count, sizeof(char *));
+ if (!kvm->vcpu_offsets || !kvm->vcpu_scalings || !kvm->vcpu_frac)
goto error;
if (kvm_open_debug_files(kvm, tsync->guest_pid) < 0)
goto error;
@@ -261,6 +278,7 @@ static int kvm_clock_sync_init_host(struct tracecmd_time_sync *tsync,
error:
free(kvm->vcpu_offsets);
free(kvm->vcpu_scalings);
+ free(kvm->vcpu_frac);
return -1;
}
@@ -351,6 +369,8 @@ static int kvm_clock_sync_free(struct tracecmd_time_sync *tsync)
kvm->vcpu_offsets[i] = NULL;
free(kvm->vcpu_scalings[i]);
kvm->vcpu_scalings[i] = NULL;
+ free(kvm->vcpu_frac[i]);
+ kvm->vcpu_frac[i] = NULL;
}
if (kvm->tep)
tep_free(kvm->tep);
@@ -362,7 +382,7 @@ static int kvm_clock_sync_free(struct tracecmd_time_sync *tsync)
}
static int kvm_clock_host(struct tracecmd_time_sync *tsync,
- long long *offset, long long *scaling,
+ long long *offset, long long *scaling, long long *frac,
long long *timestamp, unsigned int cpu)
{
char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH];
@@ -372,6 +392,7 @@ static int kvm_clock_host(struct tracecmd_time_sync *tsync,
long long kvm_scaling = 1;
unsigned int sync_msg;
long long kvm_offset;
+ long long kvm_frac = 0;
unsigned int size;
char *msg;
int ret;
@@ -386,12 +407,16 @@ static int kvm_clock_host(struct tracecmd_time_sync *tsync,
ret = read_ll_from_file(kvm->vcpu_offsets[cpu], &kvm_offset);
if (ret < 0)
return -1;
+
if (kvm->vcpu_scalings && kvm->vcpu_scalings[cpu]) {
read_ll_from_file(kvm->vcpu_scalings[cpu], &kvm_scaling);
if (kvm_scaling == KVM_SCALING_AMD_DEFAULT ||
kvm_scaling == KVM_SCALING_INTEL_DEFAULT)
kvm_scaling = 1;
}
+
+ if (kvm->vcpu_frac && kvm->vcpu_frac[cpu])
+ ret = read_ll_from_file(kvm->vcpu_frac[cpu], &kvm_frac);
msg = (char *)&packet;
size = sizeof(packet);
ret = tracecmd_msg_recv_time_sync(tsync->msg_handle,
@@ -403,6 +428,7 @@ static int kvm_clock_host(struct tracecmd_time_sync *tsync,
packet.offset = -kvm_offset;
packet.scaling = kvm_scaling;
+ packet.frac = kvm_frac;
ret = tracecmd_msg_send_time_sync(tsync->msg_handle, KVM_NAME,
KVM_SYNC_PKT_RESPONSE, sizeof(packet),
(char *)&packet);
@@ -411,6 +437,7 @@ static int kvm_clock_host(struct tracecmd_time_sync *tsync,
*scaling = packet.scaling;
*offset = packet.offset;
+ *frac = kvm_frac;
*timestamp = packet.ts;
return 0;
@@ -442,10 +469,10 @@ static int kvm_marker_find(struct tep_event *event, struct tep_record *record,
return 0;
}
-
static int kvm_clock_guest(struct tracecmd_time_sync *tsync,
long long *offset,
long long *scaling,
+ long long *frac,
long long *timestamp)
{
char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH];
@@ -486,12 +513,13 @@ static int kvm_clock_guest(struct tracecmd_time_sync *tsync,
*scaling = packet.scaling;
*offset = packet.offset;
+ *frac = packet.frac;
*timestamp = packet.ts;
return 0;
}
static int kvm_clock_sync_calc(struct tracecmd_time_sync *tsync,
- long long *offset, long long *scaling,
+ long long *offset, long long *scaling, long long *frac,
long long *timestamp, unsigned int cpu)
{
struct clock_sync_context *clock_context;
@@ -503,9 +531,9 @@ static int kvm_clock_sync_calc(struct tracecmd_time_sync *tsync,
clock_context = (struct clock_sync_context *)tsync->context;
if (clock_context->is_guest)
- ret = kvm_clock_guest(tsync, offset, scaling, timestamp);
+ ret = kvm_clock_guest(tsync, offset, scaling, frac, timestamp);
else
- ret = kvm_clock_host(tsync, offset, scaling, timestamp, cpu);
+ ret = kvm_clock_host(tsync, offset, scaling, frac, timestamp, cpu);
return ret;
}
@@ -663,7 +663,7 @@ static int ptp_clock_server(struct tracecmd_time_sync *tsync,
}
static int ptp_clock_sync_calc(struct tracecmd_time_sync *tsync,
- long long *offset, long long *scaling,
+ long long *offset, long long *scaling, long long *frac,
long long *timestamp, unsigned int cpu)
{
struct clock_sync_context *clock_context;
@@ -689,6 +689,8 @@ static int ptp_clock_sync_calc(struct tracecmd_time_sync *tsync,
if (scaling)
*scaling = 1;
+ if (frac)
+ *frac = 0;
if (clock_context->is_server)
ret = ptp_clock_server(tsync, offset, timestamp);
else
@@ -36,7 +36,7 @@ struct tsync_proto {
int (*clock_sync_init)(struct tracecmd_time_sync *clock_context);
int (*clock_sync_free)(struct tracecmd_time_sync *clock_context);
int (*clock_sync_calc)(struct tracecmd_time_sync *clock_context,
- long long *offset, long long *scaling,
+ long long *offset, long long *scaling, long long *frac,
long long *timestamp, unsigned int cpu);
};
@@ -76,7 +76,7 @@ int tracecmd_tsync_proto_register(const char *proto_name, int accuracy, int role
int (*init)(struct tracecmd_time_sync *),
int (*free)(struct tracecmd_time_sync *),
int (*calc)(struct tracecmd_time_sync *,
- long long *, long long *,
+ long long *, long long *, long long *,
long long *, unsigned int))
{
struct tsync_proto *proto = NULL;
@@ -137,12 +137,13 @@ bool __hidden tsync_proto_is_supported(const char *proto_name)
* @ts: Array of size @count containing timestamps of callculated offsets
* @offsets: array of size @count, containing offsets for each timestamp
* @scalings: array of size @count, containing scaling ratios for each timestamp
+ * @frac: array of size @count, containing fraction bits for each timestamp
*
* Retuns -1 in case of an error, or 0 otherwise
*/
int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu,
int *count, long long **ts,
- long long **offsets, long long **scalings)
+ long long **offsets, long long **scalings, long long **frac)
{
struct clock_sync_context *tsync_context;
@@ -159,6 +160,8 @@ int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu,
*offsets = tsync_context->offsets[cpu].sync_offsets;
if (scalings)
*scalings = tsync_context->offsets[cpu].sync_scalings;
+ if (frac)
+ *frac = tsync_context->offsets[cpu].sync_frac;
return 0;
}
@@ -564,9 +567,11 @@ void tracecmd_tsync_free(struct tracecmd_time_sync *tsync)
free(tsync_context->offsets[i].sync_ts);
free(tsync_context->offsets[i].sync_offsets);
free(tsync_context->offsets[i].sync_scalings);
+ free(tsync_context->offsets[i].sync_frac);
tsync_context->offsets[i].sync_ts = NULL;
tsync_context->offsets[i].sync_offsets = NULL;
tsync_context->offsets[i].sync_scalings = NULL;
+ tsync_context->offsets[i].sync_frac = NULL;
tsync_context->offsets[i].sync_count = 0;
tsync_context->offsets[i].sync_size = 0;
}
@@ -643,10 +648,11 @@ static int tsync_send(struct tracecmd_time_sync *tsync,
long long timestamp = 0;
long long scaling = 0;
long long offset = 0;
+ long long frac = 0;
int ret;
old_set = pin_to_cpu(cpu);
- ret = proto->clock_sync_calc(tsync, &offset, &scaling, ×tamp, cpu);
+ ret = proto->clock_sync_calc(tsync, &offset, &scaling, &frac, ×tamp, cpu);
if (old_set)
restore_pin_to_cpu(old_set);
@@ -685,10 +691,11 @@ static void tsync_with_host(struct tracecmd_time_sync *tsync)
}
static int record_sync_sample(struct clock_sync_offsets *offsets, int array_step,
- long long offset, long long scaling, long long ts)
+ long long offset, long long scaling, long long frac, long long ts)
{
long long *sync_scalings = NULL;
long long *sync_offsets = NULL;
+ long long *sync_frac = NULL;
long long *sync_ts = NULL;
if (offsets->sync_count >= offsets->sync_size) {
@@ -698,22 +705,27 @@ static int record_sync_sample(struct clock_sync_offsets *offsets, int array_step
(offsets->sync_size + array_step) * sizeof(long long));
sync_scalings = realloc(offsets->sync_scalings,
(offsets->sync_size + array_step) * sizeof(long long));
+ sync_frac = realloc(offsets->sync_frac,
+ (offsets->sync_size + array_step) * sizeof(long long));
- if (!sync_ts || !sync_offsets || !sync_scalings) {
+ if (!sync_ts || !sync_offsets || !sync_scalings || !sync_frac) {
free(sync_ts);
free(sync_offsets);
free(sync_scalings);
+ free(sync_frac);
return -1;
}
offsets->sync_size += array_step;
offsets->sync_ts = sync_ts;
offsets->sync_offsets = sync_offsets;
offsets->sync_scalings = sync_scalings;
+ offsets->sync_frac = sync_frac;
}
offsets->sync_ts[offsets->sync_count] = ts;
offsets->sync_offsets[offsets->sync_count] = offset;
offsets->sync_scalings[offsets->sync_count] = scaling;
+ offsets->sync_frac[offsets->sync_count] = frac;
offsets->sync_count++;
return 0;
@@ -726,9 +738,10 @@ static int tsync_get_sample(struct tracecmd_time_sync *tsync, unsigned int cpu,
long long timestamp = 0;
long long scaling = 0;
long long offset = 0;
+ long long frac = 0;
int ret;
- ret = proto->clock_sync_calc(tsync, &offset, &scaling, ×tamp, cpu);
+ ret = proto->clock_sync_calc(tsync, &offset, &scaling, &frac, ×tamp, cpu);
if (ret) {
tracecmd_warning("Failed to synchronize timestamps with guest");
return -1;
@@ -739,7 +752,7 @@ static int tsync_get_sample(struct tracecmd_time_sync *tsync, unsigned int cpu,
if (!clock || cpu >= clock->cpu_count || !clock->offsets)
return -1;
return record_sync_sample(&clock->offsets[cpu], array_step,
- offset, scaling, timestamp);
+ offset, scaling, frac, timestamp);
}
#define TIMER_SEC_NANO 1000000000LL
@@ -928,6 +941,7 @@ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
unsigned int flags;
long long *scalings = NULL;
long long *offsets = NULL;
+ long long *frac = NULL;
long long *ts = NULL;
int vcount;
int count;
@@ -936,7 +950,7 @@ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
if (!tsync->vcpu_count)
return -1;
- vcount = 3 + (4 * tsync->vcpu_count);
+ vcount = 3 + (5 * tsync->vcpu_count);
vector = calloc(vcount, sizeof(struct iovec));
if (!vector)
return -1;
@@ -955,7 +969,7 @@ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
if (j >= vcount)
break;
ret = tracecmd_tsync_get_offsets(tsync, i, &count,
- &ts, &offsets, &scalings);
+ &ts, &offsets, &scalings, NULL);
if (ret < 0 || !count || !ts || !offsets || !scalings)
break;
vector[j].iov_len = 4;
@@ -971,6 +985,27 @@ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
ret = -1;
goto out;
}
+ /*
+ * Writing fraction bits into the option is implemented in a separate loop for
+ * backward compatibility. In the trace-cmd 2.9 release, this option has only offset
+ * and scaling. That legacy code must work with the new extended option.
+ *
+ */
+ for (i = 0; i < tsync->vcpu_count; i++) {
+ if (j >= vcount)
+ break;
+ ret = tracecmd_tsync_get_offsets(tsync, i, NULL,
+ NULL, NULL, NULL, &frac);
+ if (ret < 0)
+ break;
+ vector[j].iov_len = 8 * count;
+ vector[j++].iov_base = frac;
+ }
+ if (i < tsync->vcpu_count) {
+ ret = -1;
+ goto out;
+ }
+
tracecmd_add_option_v(handle, TRACECMD_OPTION_TIME_SHIFT, vector, vcount);
#ifdef TSYNC_DEBUG
if (count > 1)
The formula, used in the Linux kernel for guest TSC clock is: guest_tsc = tsc-offset + (host_tsc * tsc-scaling-ratio) >> tsc-scaling-ratio-frac-bits The KVM hypervisor exports these parameters in the debugfs, if clock scaling is supported: /sys/kernel/debug/kvm: tsc-offset tsc-scaling-ratio tsc-scaling-ratio-frac-bits Current trace-cmd implementation does not take into account fraction bits, when calculating guest clock offset in case of a clock scaling. The TRACECMD_OPTION_TIME_SHIFT option is extended with per-cpu array with fraction bits. KVM and PTP algorithms are extended to get fraction: - in PTP, there are fixed scaling=1 and fraction=0; - KVM gets these from KVM debugfs, if available. The formula from the kernel is implemented in trace-cmd library, when calculating timestamps of the guest events. This change is backward compatible with trace-cmd 2.9, where the TRACECMD_OPTION_TIME_SHIFT option is introduced. Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com> --- .../include/private/trace-cmd-private.h | 4 +- lib/trace-cmd/include/trace-tsync-local.h | 7 ++- lib/trace-cmd/trace-timesync-kvm.c | 56 ++++++++++++++----- lib/trace-cmd/trace-timesync-ptp.c | 4 +- lib/trace-cmd/trace-timesync.c | 55 ++++++++++++++---- 5 files changed, 96 insertions(+), 30 deletions(-)