@@ -5,6 +5,7 @@ bin_PROGRAMS = \
intel_bios_dumper \
intel_bios_reader \
intel_error_decode \
+ intel_gpu_mon \
intel_gpu_top \
intel_gpu_time \
intel_gtt \
new file mode 100644
@@ -0,0 +1,582 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ * Eugeni Dodonov <eugeni.dodonov@intel.com>
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <err.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include "intel_gpu_tools.h"
+#include "instdone.h"
+
+#define FORCEWAKE 0xA18C
+#define FORCEWAKE_ACK 0x130090
+
+#define SAMPLES_PER_SEC 10000
+#define SAMPLES_TO_PERCENT_RATIO (SAMPLES_PER_SEC / 100)
+
+#define MAX_NUM_TOP_BITS 100
+
+#define HAS_STATS_REGS(devid) IS_965(devid)
+
+#define BATTERY "BAT1"
+
+struct top_bit {
+ struct instdone_bit *bit;
+ int count;
+} top_bits[MAX_NUM_TOP_BITS];
+
+static uint32_t instdone, instdone1;
+static uint32_t devid;
+
+enum stats_counts {
+ IA_VERTICES,
+ IA_PRIMITIVES,
+ VS_INVOCATION,
+ GS_INVOCATION,
+ GS_PRIMITIVES,
+ CL_INVOCATION,
+ CL_PRIMITIVES,
+ PS_INVOCATION,
+ PS_DEPTH,
+ STATS_COUNT
+};
+
+const uint32_t stats_regs[STATS_COUNT] = {
+ IA_VERTICES_COUNT_QW,
+ IA_PRIMITIVES_COUNT_QW,
+ VS_INVOCATION_COUNT_QW,
+ GS_INVOCATION_COUNT_QW,
+ GS_PRIMITIVES_COUNT_QW,
+ CL_INVOCATION_COUNT_QW,
+ CL_PRIMITIVES_COUNT_QW,
+ PS_INVOCATION_COUNT_QW,
+ PS_DEPTH_COUNT_QW,
+};
+
+const char *stats_reg_names[STATS_COUNT] = {
+ "vert f",
+ "prim f",
+ "VS inv",
+ "GS inv",
+ "GS prim",
+ "CL inv",
+ "CL prim",
+ "PS inv",
+ "PS dpt",
+};
+
+uint64_t stats[STATS_COUNT];
+uint64_t last_stats[STATS_COUNT];
+
+static int samples_per_sec = SAMPLES_PER_SEC;
+static int samples_to_percent_ratio = (SAMPLES_PER_SEC / 100);
+
+struct cpudata
+{
+ unsigned long user, system, idle, nice, total;
+};
+
+struct proc_cpudata
+{
+ unsigned long userproc, systemproc, vmem, rmem;
+ char pname[255];
+};
+
+struct powerdata
+{
+ long rate;
+};
+
+static unsigned long
+gettime(void)
+{
+ struct timeval t;
+ gettimeofday(&t, NULL);
+ return (t.tv_usec + (t.tv_sec * 1000000));
+}
+
+static void
+get_cpu_stat(struct cpudata *cpu)
+{
+ FILE *file;
+ char temp[10];
+ char line[255];
+
+ /* CPU */
+ file = fopen("/proc/stat", "r");
+ fgets(line, 255, file);
+ fclose(file);
+ sscanf(line, "%s %lu %lu %lu %lu", temp, &cpu->user, &cpu->nice, &cpu->system,
+ &cpu->idle);
+ cpu->total = cpu->user + cpu->system + cpu->idle + cpu->nice;
+
+}
+
+static char*
+get_battery_file(char *battery)
+{
+ /* The format is /proc/acpi/battery/BAT?/state */
+ char *temp = (char *)malloc(sizeof(char) * (35 * strlen(battery)));
+ sprintf(temp, "/proc/acpi/battery/%s/state", battery);
+ return temp;
+}
+
+static void
+get_power_stat(struct powerdata *power, const char *battery_file)
+{
+ FILE *file;
+ char line[255];
+
+ file = fopen(battery_file, "r");
+ if (!file) {
+ /* Unable to read battery information */
+ power->rate=-1;
+ return;
+ }
+ /* Sample /proc/acpi/battery/BAT?/state file format:
+ * present: yes
+ * capacity state: ok
+ * charging state: discharging
+ * present rate: 1746 mA
+ * remaining capacity: 5301 mAh
+ * present voltage: 11788 mV
+ */
+ fgets(line, 255, file);
+ fgets(line, 255, file);
+ fgets(line, 255, file);
+ fgets(line, 255, file);
+ sscanf(line, "present rate:\t%lu mAh\n", &power->rate);
+ fclose(file);
+}
+
+static void
+update_idle_bit(struct top_bit *top_bit)
+{
+ uint32_t reg_val;
+
+ if (top_bit->bit->reg == INST_DONE_1)
+ reg_val = instdone1;
+ else
+ reg_val = instdone;
+
+ if ((reg_val & top_bit->bit->bit) == 0)
+ top_bit->count++;
+}
+
+struct ring {
+ const char *name;
+ uint32_t mmio;
+ int head, tail, size;
+ uint64_t full;
+ int idle;
+};
+
+static void gen6_force_wake_get(void)
+{
+ int count;
+
+ if (!IS_GEN6(devid))
+ return;
+
+ /* This will probably have undesirable side-effects upon the system. */
+ count = 0;
+ while (count++ < 50 && (INREG(FORCEWAKE_ACK) & 1))
+ usleep(10);
+
+ OUTREG(FORCEWAKE, 1);
+
+ count = 0;
+ while (count++ < 50 && (INREG(FORCEWAKE_ACK) & 1) == 0)
+ usleep(10);
+}
+
+static void gen6_force_wake_put(void)
+{
+ if (!IS_GEN6(devid))
+ return;
+
+ OUTREG(FORCEWAKE, 0);
+}
+
+static uint32_t ring_read(struct ring *ring, uint32_t reg)
+{
+ return INREG(ring->mmio + reg);
+}
+
+static void ring_init(struct ring *ring)
+{
+ gen6_force_wake_get();
+ ring->size = (((ring_read(ring, RING_LEN) & RING_NR_PAGES) >> 12) + 1) * 4096;
+ gen6_force_wake_put();
+}
+
+static void ring_reset(struct ring *ring)
+{
+ ring->idle = ring->full = 0;
+}
+
+static void ring_sample(struct ring *ring)
+{
+ int full;
+
+ if (!ring->size)
+ return;
+
+ gen6_force_wake_get();
+ ring->head = ring_read(ring, RING_HEAD) & HEAD_ADDR;
+ ring->tail = ring_read(ring, RING_TAIL) & TAIL_ADDR;
+ gen6_force_wake_put();
+
+ if (ring->tail == ring->head)
+ ring->idle++;
+
+ full = ring->tail - ring->head;
+ if (full < 0)
+ full += ring->size;
+ ring->full += full;
+}
+
+static void cpu_print_header(FILE *out)
+{
+ fprintf(out, "user%%\tsys%%\t");
+}
+
+static void power_print_header(FILE *out)
+{
+ fprintf(out, "volts\t");
+}
+
+static void cpu_print(FILE *out, struct cpudata *cpu, struct cpudata *oldcpu)
+{
+ float uload, sload;
+ uload = (float) ((cpu->user - oldcpu->user) * 100) / (cpu->total - oldcpu->total);
+ sload = (float) ((cpu->system - oldcpu->system) * 100) / (cpu->total - oldcpu->total);
+ fprintf(out, "%.2f\t%.2f\t",
+ uload, sload);
+}
+
+static void power_print(FILE *out, struct powerdata *power)
+{
+ fprintf(out, "%ld\t",
+ power->rate);
+}
+
+static void ring_print_header(FILE *out, struct ring *ring)
+{
+ fprintf(out, "%s%%\tops\t",
+ ring->name
+ );
+}
+static void ring_print(FILE *out, struct ring *ring)
+{
+ fprintf(out, "%3d\t%d\t",
+ (ring->size) ? 100 - ring->idle / samples_to_percent_ratio : -1,
+ (ring->size) ? (int)(ring->full / samples_per_sec) : -1
+ );
+}
+
+static void
+usage(const char *appname)
+{
+ printf("intel_gpu_mon - intel gpu and system monitor\n"
+ "\n"
+ "usage: %s [parameters] [command to profile]\n"
+ "\n"
+ "The following parameters apply:\n"
+ "[-s <samples>] samples per seconds (default %d)\n"
+ "[-o <file>] output to file (default to stdio)\n"
+ "[-b <battery>] battery device to read from (default %s)\n"
+ "\n"
+ "[command to profile] if specified, the following parameters will be considered\n"
+ " the command line of application to be profiled.\n",
+ appname,
+ SAMPLES_PER_SEC,
+ BATTERY
+ );
+ return;
+}
+
+int main(int argc, char **argv)
+{
+ struct pci_device *pci_dev;
+ struct ring render_ring = {
+ .name = "render",
+ .mmio = 0x2030,
+ }, bsd_ring = {
+ .name = "bits",
+ .mmio = 0x4030,
+ }, bsd6_ring = {
+ .name = "bits6",
+ .mmio = 0x12030,
+ }, blt_ring = {
+ .name = "blit",
+ .mmio = 0x22030,
+ };
+ int i, ch;
+ FILE *output = stdout;
+ struct cpudata oldcpu, cpu;
+ struct powerdata power;
+ int print_headers=1;
+ pid_t child_pid=-1;
+ double elapsed_time=0;
+ char *battery = BATTERY, *battery_file = NULL;
+
+ /* What is thy bidding, user? */
+ while ((ch = getopt(argc, argv, "s:o:b:h")) != -1)
+ {
+ switch (ch)
+ {
+ /* Enable cache */
+ case 'b': battery = strdup(optarg);
+ break;
+ case 's': samples_per_sec = atoi(optarg);
+ if (samples_per_sec < 100) {
+ fprintf(stderr, "Error: samples per second must be >= 100\n");
+ exit(1);
+ }
+ samples_to_percent_ratio = (samples_per_sec / 100);
+ break;
+ case 'o': output = fopen(optarg, "w");
+ if (!output)
+ {
+ perror("fopen");
+ exit(1);
+ }
+ break;
+ case 'h':
+ usage(argv[0]);
+ exit(0);
+ break;
+ default:
+ fprintf(stderr, "Invalid flag %c!\n", (char)optopt);
+ usage(argv[0]);
+ exit(1);
+ break;
+ }
+
+ }
+ argc -= optind;
+ argv += optind;
+
+ /* Do we have a command to run? */
+ if (argc > 0)
+ {
+ fprintf(output, "# Profiling: %s\n", argv[0]);
+ fflush(output);
+ child_pid = fork();
+ if (child_pid < 0)
+ {
+ perror("fork");
+ exit(1);
+ }
+ else if (child_pid == 0)
+ {
+ int res;
+ res = system(argv[0]);
+ if (res < 0)
+ perror("running command");
+ fflush(output);
+ fprintf(output, "# %s exited with status %d\n", argv[0], res);
+ fflush(output);
+ exit(0);
+ }
+ }
+
+ pci_dev = intel_get_pci_device();
+ devid = pci_dev->device_id;
+ intel_get_mmio(pci_dev);
+ init_instdone_definitions(devid);
+
+ for (i = 0; i < num_instdone_bits; i++) {
+ top_bits[i].bit = &instdone_bits[i];
+ top_bits[i].count = 0;
+ }
+
+ ring_init(&render_ring);
+ if (IS_GEN4(devid) || IS_GEN5(devid))
+ ring_init(&bsd_ring);
+ if (IS_GEN6(devid)) {
+ ring_init(&bsd6_ring);
+ ring_init(&blt_ring);
+ }
+
+ /* Calculate initial stats */
+ /* Battery */
+ battery_file = get_battery_file(battery);
+ /* CPU */
+ get_cpu_stat(&oldcpu);
+ /* GPU */
+ if (HAS_STATS_REGS(devid)) {
+ for (i = 0; i < STATS_COUNT; i++) {
+ uint32_t stats_high, stats_low, stats_high_2;
+
+ do {
+ stats_high = INREG(stats_regs[i] + 4);
+ stats_low = INREG(stats_regs[i]);
+ stats_high_2 = INREG(stats_regs[i] + 4);
+ } while (stats_high != stats_high_2);
+
+ last_stats[i] = (uint64_t)stats_high << 32 |
+ stats_low;
+ }
+ }
+
+
+ for (;;) {
+ int j;
+ unsigned long long t1, ti, tf, t2;
+ unsigned long long def_sleep = 1000000 / samples_per_sec;
+ int child_stat=0;
+
+ t1 = gettime();
+
+ ring_reset(&render_ring);
+ ring_reset(&bsd_ring);
+ ring_reset(&bsd6_ring);
+ ring_reset(&blt_ring);
+
+ for (i = 0; i < samples_per_sec; i++) {
+ long long interval;
+ ti = gettime();
+ if (IS_965(devid)) {
+ instdone = INREG(INST_DONE_I965);
+ instdone1 = INREG(INST_DONE_1);
+ } else
+ instdone = INREG(INST_DONE);
+
+ for (j = 0; j < num_instdone_bits; j++)
+ update_idle_bit(&top_bits[j]);
+
+ ring_sample(&render_ring);
+ ring_sample(&bsd_ring);
+ ring_sample(&bsd6_ring);
+ ring_sample(&blt_ring);
+
+ tf = gettime();
+ if (tf - t1 >= 1000000) {
+ /* We are out of sync, bail out */
+ break;
+ }
+ interval = def_sleep - (tf - ti);
+ if (interval > 0)
+ usleep(interval);
+ }
+
+ get_cpu_stat(&cpu);
+
+ /* Power */
+ get_power_stat(&power, battery_file);
+
+ if (HAS_STATS_REGS(devid)) {
+ for (i = 0; i < STATS_COUNT; i++) {
+ uint32_t stats_high, stats_low, stats_high_2;
+
+ do {
+ stats_high = INREG(stats_regs[i] + 4);
+ stats_low = INREG(stats_regs[i]);
+ stats_high_2 = INREG(stats_regs[i] + 4);
+ } while (stats_high != stats_high_2);
+
+ stats[i] = (uint64_t)stats_high << 32 |
+ stats_low;
+ }
+ }
+
+ /* Print headers for columns at first run */
+ if (print_headers) {
+ fprintf(output, "# time\t");
+ cpu_print_header(output);
+ power_print_header(output);
+ ring_print_header(output, &render_ring);
+ ring_print_header(output, &bsd_ring);
+ ring_print_header(output, &bsd6_ring);
+ ring_print_header(output, &blt_ring);
+ for (i = 0; i < MAX_NUM_TOP_BITS; i++) {
+ if (i < STATS_COUNT && HAS_STATS_REGS(devid)) {
+ fprintf(output, "%s\t",
+ stats_reg_names[i]
+ );
+ }
+ if (!top_bits[i].count)
+ break;
+ }
+ fprintf(output, "\n");
+ print_headers = 0;
+ }
+
+ t2 = gettime();
+ elapsed_time += (t2 - t1) / 1000000.0;
+ fprintf(output, "%.2f\t", elapsed_time);
+ cpu_print(output, &cpu, &oldcpu);
+ power_print(output, &power);
+ ring_print(output, &render_ring);
+ ring_print(output, &bsd_ring);
+ ring_print(output, &bsd6_ring);
+ ring_print(output, &blt_ring);
+
+ for (i = 0; i < MAX_NUM_TOP_BITS; i++) {
+ if (i < STATS_COUNT && HAS_STATS_REGS(devid)) {
+ fprintf(output, "%lu\t",
+ stats[i] - last_stats[i]);
+ last_stats[i] = stats[i];
+ }
+ if (!top_bits[i].count)
+ break;
+ }
+ oldcpu = cpu;
+
+ fprintf(output, "\n");
+ fflush(output);
+
+ /* Check if child has gone */
+ if (child_pid > 0)
+ {
+ int res;
+ if ((res =waitpid(child_pid, &child_stat, WNOHANG)) == -1) {
+ perror("waitpid");
+ exit(1);
+ }
+ if (res == 0)
+ continue;
+ if (WIFEXITED(child_stat))
+ break;
+ }
+ }
+
+ /* Free memory */
+ free(battery_file);
+
+ fclose(output);
+
+ return 0;
+}