@@ -3,7 +3,7 @@ libtracefs(3)
NAME
----
-tracefs_find_cid_pid, tracefs_instance_find_cid_pid -
+tracefs_find_cid_pid, tracefs_instance_find_cid_pid, tracefs_time_conversion -
helper functions to handle tracing guests
SYNOPSIS
@@ -14,6 +14,7 @@ SYNOPSIS
char pass:[*]*tracefs_find_cid_pid*(int _cid_);
char pass:[*]*tracefs_instance_find_cid_pid*(struct tracefs_instance pass:[*]_instance_, int _cid_);
+int *tracefs_time_conversion*(int _cpu_, int pass:[*]_shift_, int pass:[*]_multi_, long long pass:[*]offset);
--
DESCRIPTION
@@ -27,6 +28,11 @@ The *tracefs_instance_find_cid_pid*() is the same as *tracefs_find_cid_pid*() bu
the instance to use to perform the tracing in. If NULL it will use the top level
buffer to perform the tracing.
+The *tracefs_time_conversion*() will return the values used by the kernel to convert
+the raw time stamp counter into nanoseconds for the given _cpu_. Pointers for _shift_, _multi_
+and _offset_ can be NULL to be ignored, otherwise they are set with the shift, multiplier
+and offset repectively.
+
RETURN VALUE
------------
Both *tracefs_find_cid_pid*() and *tracefs_instance_find_cid_pid*() will return the
@@ -76,8 +82,67 @@ static int find_cids(void)
return 0;
}
+struct time_info {
+ int shift;
+ int multi;
+};
+
+static void show_time_conversion(void)
+{
+ struct time_info *tinfo;
+ int cpus;
+ int cpu;
+ int ret;
+
+ cpus = sysconf(_SC_NPROCESSORS_CONF);
+ tinfo = calloc(cpus, sizeof(*tinfo));
+ if (!tinfo)
+ exit(-1);
+
+ for (cpu = 0; cpu < cpus; cpu++) {
+ ret = tracefs_time_conversion(cpu,
+ &tinfo[cpu].shift,
+ &tinfo[cpu].multi,
+ NULL);
+ if (ret)
+ break;
+ }
+ if (cpu != cpus) {
+ if (!cpu) {
+ perror("tracefs_time_conversion");
+ exit(-1);
+ }
+ printf("Only read %d of %d CPUs", cpu, cpus);
+ cpus = cpu + 1;
+ }
+
+ /* Check if all the shift and mult values are the same */
+ for (cpu = 1; cpu < cpus; cpu++) {
+ if (tinfo[cpu - 1].shift != tinfo[cpu].shift)
+ break;
+ if (tinfo[cpu - 1].multi != tinfo[cpu].multi)
+ break;
+ }
+
+ if (cpu == cpus) {
+ printf("All cpus have:\n");
+ printf(" shift: %d\n", tinfo[0].shift);
+ printf(" multi: %d\n", tinfo[0].multi);
+ printf("\n");
+ return;
+ }
+
+ for (cpu = 0; cpu < cpus; cpu++) {
+ printf("CPU: %d\n", cpu);
+ printf(" shift: %d\n", tinfo[cpu].shift);
+ printf(" multi: %d\n", tinfo[cpu].multi);
+ printf("\n");
+ }
+}
+
int main(int argc, char *argv[])
{
+ show_time_conversion();
find_cids();
exit(0);
}
@@ -79,13 +79,19 @@ else
VSOCK_DEFINED := 0
endif
+ifndef NO_PERF
+PERF_DEFINED := $(shell if (echo "$(pound)include <linux/perf_event.h>" | $(CC) -E - >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi)
+else
+PREF_DEFINED := 0
+endif
+
etcdir ?= /etc
etcdir_SQ = '$(subst ','\'',$(etcdir))'
export man_dir man_dir_SQ html_install html_install_SQ INSTALL
export img_install img_install_SQ
export DESTDIR DESTDIR_SQ
-export VSOCK_DEFINED
+export VSOCK_DEFINED PERF_DEFINED
pound := \#
@@ -644,4 +644,7 @@ int tracefs_cpu_pipe(struct tracefs_cpu *tcpu, int wfd, bool nonblock);
int tracefs_instance_find_cid_pid(struct tracefs_instance *instance, int cid);
int tracefs_find_cid_pid(int cid);
+/* More guest helpers */
+int tracefs_time_conversion(int cpu, int *shift, int *mult, long long *offset);
+
#endif /* _TRACE_FS_H */
@@ -18,6 +18,9 @@ OBJS += tracefs-record.o
ifeq ($(VSOCK_DEFINED), 1)
OBJS += tracefs-vsock.o
endif
+ifeq ($(PERF_DEFINED), 1)
+OBJS += tracefs-perf.o
+endif
# Order matters for the the three below
OBJS += sqlhist-lex.o
new file mode 100644
@@ -0,0 +1,93 @@
+#include <asm/unistd.h>
+#include <sys/mman.h>
+#include <signal.h>
+#include <linux/perf_event.h>
+
+#include <tracefs.h>
+
+static void perf_init_pe(struct perf_event_attr *pe)
+{
+ memset(pe, 0, sizeof(struct perf_event_attr));
+ pe->type = PERF_TYPE_SOFTWARE;
+ pe->sample_type = PERF_SAMPLE_CPU;
+ pe->size = sizeof(struct perf_event_attr);
+ pe->config = PERF_COUNT_HW_CPU_CYCLES;
+ pe->disabled = 1;
+ pe->exclude_kernel = 1;
+ pe->freq = 1;
+ pe->sample_freq = 1000;
+ pe->inherit = 1;
+ pe->mmap = 1;
+ pe->comm = 1;
+ pe->task = 1;
+ pe->precise_ip = 1;
+ pe->sample_id_all = 1;
+ pe->read_format = PERF_FORMAT_ID |
+ PERF_FORMAT_TOTAL_TIME_ENABLED|
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
+
+}
+
+static long perf_event_open(struct perf_event_attr *event, pid_t pid,
+ int cpu, int group_fd, unsigned long flags)
+{
+ return syscall(__NR_perf_event_open, event, pid, cpu, group_fd, flags);
+}
+
+#define MAP_SIZE (9 * getpagesize())
+
+static struct perf_event_mmap_page *perf_mmap(int fd)
+{
+ struct perf_event_mmap_page *perf_mmap;
+
+ /* associate a buffer with the file */
+ perf_mmap = mmap(NULL, MAP_SIZE,
+ PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (perf_mmap == MAP_FAILED)
+ return NULL;
+
+ return perf_mmap;
+}
+
+static int perf_read_maps(int cpu, int *shift, int *mult, long long *offset)
+{
+ struct perf_event_attr perf_attr;
+ struct perf_event_mmap_page *mpage;
+ int fd;
+
+ /* We succeed if theres' nothing to do! */
+ if (!shift && !mult && !offset)
+ return 0;
+
+ perf_init_pe(&perf_attr);
+ fd = perf_event_open(&perf_attr, getpid(), cpu, -1, 0);
+ if (fd < 0)
+ return -1;
+
+ mpage = perf_mmap(fd);
+ if (!mpage) {
+ close(fd);
+ return -1;
+ }
+
+ if (shift)
+ *shift = mpage->time_shift;
+ if (mult)
+ *mult = mpage->time_mult;
+ if (offset)
+ *offset = mpage->time_offset;
+ munmap(mpage, MAP_SIZE);
+ return 0;
+}
+
+/**
+ * tracefs_time_conversion - Find how the kernel converts the raw counters
+ * @cpu: The CPU to check for
+ * @shift: If non-NULL it will be set to the shift value
+ * @mult: If non-NULL it will be set to the multiplier value
+ * @offset: If non-NULL it will be set to the offset
+ */
+int tracefs_time_conversion(int cpu, int *shift, int *mult, long long *offset)
+{
+ return perf_read_maps(cpu, shift, mult, offset);
+}