Message ID | 1505299928-13809-3-git-send-email-sagar.a.kamble@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Sep 13, 2017 at 04:22:01PM +0530, Sagar Arun Kamble wrote: > This tests different performance metrics being streamed by i915 driver. > This feature in i915 also referred as Driver Assisted Performance > Capture (DAPC) provides userspace an ability to sample the OA reports > at execbuf boundaries and associate other metadata like CTX ID, PID, TAG > with each sample. Further, ability to capture engine timestamps and MMIO > reads is also provided. > > v2: Defining the enums for OA_SOURCE and PERF_PROP locally till the > libdrm changes are merged. > > Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com> Reviewed-by: Ewelina Musial <ewelina.musial@intel.com> > --- > tests/Makefile.sources | 1 + > tests/intel_perf_dapc.c | 811 ++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 812 insertions(+) > create mode 100644 tests/intel_perf_dapc.c > > diff --git a/tests/Makefile.sources b/tests/Makefile.sources > index 6c19509..24bd099 100644 > --- a/tests/Makefile.sources > +++ b/tests/Makefile.sources > @@ -170,6 +170,7 @@ TESTS_progs = \ > gen7_forcewake_mt \ > gvt_basic \ > intel_perf \ > + intel_perf_dapc \ > kms_3d \ > kms_addfb_basic \ > kms_atomic \ > diff --git a/tests/intel_perf_dapc.c b/tests/intel_perf_dapc.c > new file mode 100644 > index 0000000..92b4dee > --- /dev/null > +++ b/tests/intel_perf_dapc.c > @@ -0,0 +1,811 @@ > +/* > + * Copyright © 2017 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + * > + */ > +#include <fcntl.h> > + > +#include "igt.h" > +#include "drm.h" > + > +IGT_TEST_DESCRIPTION("Test the i915 command stream based perf metrics streaming interface"); > + > +/* Temporarily copy i915-perf uapi here to avoid a dependency on libdrm's > + * i915_drm.h copy being updated with the i915-perf interface before this > + * test can land in i-g-t. > + * > + * TODO: remove this once the interface lands in libdrm > + */ > +#ifndef DRM_I915_PERF_OPEN > +#define DRM_I915_PERF_OPEN 0x36 > +#define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + \ > + DRM_I915_PERF_OPEN, \ > + struct drm_i915_perf_open_param) > + > +enum drm_i915_oa_format { > + I915_OA_FORMAT_A13 = 1, /* HSW only */ > + I915_OA_FORMAT_A29, /* HSW only */ > + I915_OA_FORMAT_A13_B8_C8, /* HSW only */ > + I915_OA_FORMAT_B4_C8, /* HSW only */ > + I915_OA_FORMAT_A45_B8_C8, /* HSW only */ > + I915_OA_FORMAT_B4_C8_A16, /* HSW only */ > + I915_OA_FORMAT_C4_B8, /* HSW+ */ > + > + /* Gen8+ */ > + I915_OA_FORMAT_A12, > + I915_OA_FORMAT_A12_B8_C8, > + I915_OA_FORMAT_A32u40_A4u32_B8_C8, > + > + I915_OA_FORMAT_MAX /* non-ABI */ > +}; > + > +enum drm_i915_perf_sample_oa_source { > + I915_PERF_SAMPLE_OA_SOURCE_OABUFFER, > + I915_PERF_SAMPLE_OA_SOURCE_CS, > + I915_PERF_SAMPLE_OA_SOURCE_MAX /* non-ABI */ > +}; > + > +#define I915_PERF_MMIO_NUM_MAX 8 > +struct drm_i915_perf_mmio_list { > + __u32 num_mmio; > + __u32 mmio_list[I915_PERF_MMIO_NUM_MAX]; > +}; > + > +enum drm_i915_perf_property_id { > + DRM_I915_PERF_PROP_CTX_HANDLE = 1, > + DRM_I915_PERF_PROP_SAMPLE_OA, > + DRM_I915_PERF_PROP_OA_METRICS_SET, > + DRM_I915_PERF_PROP_OA_FORMAT, > + DRM_I915_PERF_PROP_OA_EXPONENT, > + DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE, > + DRM_I915_PERF_PROP_ENGINE, > + DRM_I915_PERF_PROP_SAMPLE_CTX_ID, > + DRM_I915_PERF_PROP_SAMPLE_PID, > + DRM_I915_PERF_PROP_SAMPLE_TAG, > + DRM_I915_PERF_PROP_SAMPLE_TS, > + DRM_I915_PERF_PROP_SAMPLE_MMIO, > + DRM_I915_PERF_PROP_MAX /* non-ABI */ > +}; > + > +struct drm_i915_perf_open_param { > + __u32 flags; > +#define I915_PERF_FLAG_FD_CLOEXEC (1<<0) > +#define I915_PERF_FLAG_FD_NONBLOCK (1<<1) > +#define I915_PERF_FLAG_DISABLED (1<<2) > + > + __u32 num_properties; > + __u64 properties_ptr; > +}; > + > +#define I915_PERF_IOCTL_ENABLE _IO('i', 0x0) > +#define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) > + > +struct drm_i915_perf_record_header { > + __u32 type; > + __u16 pad; > + __u16 size; > +}; > + > +enum drm_i915_perf_record_type { > + DRM_I915_PERF_RECORD_SAMPLE = 1, > + DRM_I915_PERF_RECORD_OA_REPORT_LOST = 2, > + DRM_I915_PERF_RECORD_OA_BUFFER_LOST = 3, > + DRM_I915_PERF_RECORD_MAX /* non-ABI */ > +}; > +#endif /* !DRM_I915_PERF_OPEN */ > + > +/* There is no ifdef we can use for those formats :( */ > +enum { > + local_I915_OA_FORMAT_A12 = I915_OA_FORMAT_C4_B8 + 1, > + local_I915_OA_FORMAT_A12_B8_C8 = I915_OA_FORMAT_C4_B8 + 2, > + local_I915_OA_FORMAT_A32u40_A4u32_B8_C8 = I915_OA_FORMAT_C4_B8 + 3, > +}; > + > +#define local_I915_OA_FORMAT_MAX (local_I915_OA_FORMAT_A32u40_A4u32_B8_C8 + 1) > + > +enum { > + local_I915_PERF_SAMPLE_OA_SOURCE_OABUFFER, > + local_I915_PERF_SAMPLE_OA_SOURCE_CS, > + local_I915_PERF_SAMPLE_OA_SOURCE_MAX /* non-ABI */ > +}; > + > +enum { > + local_DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE = DRM_I915_PERF_PROP_OA_EXPONENT + 1, > + local_DRM_I915_PERF_PROP_ENGINE = DRM_I915_PERF_PROP_OA_EXPONENT + 2, > + local_DRM_I915_PERF_PROP_SAMPLE_CTX_ID = DRM_I915_PERF_PROP_OA_EXPONENT + 3, > + local_DRM_I915_PERF_PROP_SAMPLE_PID = DRM_I915_PERF_PROP_OA_EXPONENT + 4, > + local_DRM_I915_PERF_PROP_SAMPLE_TAG = DRM_I915_PERF_PROP_OA_EXPONENT + 5, > + local_DRM_I915_PERF_PROP_SAMPLE_TS = DRM_I915_PERF_PROP_OA_EXPONENT + 6, > + local_DRM_I915_PERF_PROP_SAMPLE_MMIO = DRM_I915_PERF_PROP_OA_EXPONENT + 7, > + local_DRM_I915_PERF_PROP_MAX /* non-ABI */ > +}; > + > +static struct { > + const char *name; > + size_t size; > + int a40_high_off; /* bytes */ > + int a40_low_off; > + int n_a40; > + int a_off; > + int n_a; > + int first_a; > + int b_off; > + int n_b; > + int c_off; > + int n_c; > + int min_gen; > + int max_gen; > +} oa_formats[local_I915_OA_FORMAT_MAX] = { > + [I915_OA_FORMAT_A13] = { /* HSW only */ > + "A13", .size = 64, > + .a_off = 12, .n_a = 13, > + .max_gen = 7 }, > + [I915_OA_FORMAT_A29] = { /* HSW only */ > + "A29", .size = 128, > + .a_off = 12, .n_a = 29, > + .max_gen = 7 }, > + [I915_OA_FORMAT_A13_B8_C8] = { /* HSW only */ > + "A13_B8_C8", .size = 128, > + .a_off = 12, .n_a = 13, > + .b_off = 64, .n_b = 8, > + .c_off = 96, .n_c = 8, > + .max_gen = 7 }, > + [I915_OA_FORMAT_A45_B8_C8] = { /* HSW only */ > + "A45_B8_C8", .size = 256, > + .a_off = 12, .n_a = 45, > + .b_off = 192, .n_b = 8, > + .c_off = 224, .n_c = 8, > + .max_gen = 7 }, > + [I915_OA_FORMAT_B4_C8] = { /* HSW only */ > + "B4_C8", .size = 64, > + .b_off = 16, .n_b = 4, > + .c_off = 32, .n_c = 8, > + .max_gen = 7 }, > + [I915_OA_FORMAT_B4_C8_A16] = { /* HSW only */ > + "B4_C8_A16", .size = 128, > + .b_off = 16, .n_b = 4, > + .c_off = 32, .n_c = 8, > + .a_off = 60, .n_a = 16, .first_a = 29, > + .max_gen = 7 }, > + [I915_OA_FORMAT_C4_B8] = { /* HSW+ (header differs from HSW-Gen8+) */ > + "C4_B8", .size = 64, > + .c_off = 16, .n_c = 4, > + .b_off = 28, .n_b = 8 }, > + > + /* Gen8+ */ > + > + [local_I915_OA_FORMAT_A12] = { > + "A12", .size = 64, > + .a_off = 12, .n_a = 12, .first_a = 7, > + .min_gen = 8 }, > + [local_I915_OA_FORMAT_A12_B8_C8] = { > + "A12_B8_C8", .size = 128, > + .a_off = 12, .n_a = 12, > + .b_off = 64, .n_b = 8, > + .c_off = 96, .n_c = 8, .first_a = 7, > + .min_gen = 8 }, > + [local_I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { > + "A32u40_A4u32_B8_C8", .size = 256, > + .a40_high_off = 160, .a40_low_off = 16, .n_a40 = 32, > + .a_off = 144, .n_a = 4, .first_a = 32, > + .b_off = 192, .n_b = 8, > + .c_off = 224, .n_c = 8, > + .min_gen = 8 }, > + [I915_OA_FORMAT_C4_B8] = { > + "C4_B8", .size = 64, > + .c_off = 16, .n_c = 4, > + .b_off = 32, .n_b = 8, > + .min_gen = 8 }, > +}; > + > +static int drm_fd = -1; > +static uint32_t devid; > +static int card = -1; > + > +static uint64_t test_metric_set_id = UINT64_MAX; > + > +static uint64_t timestamp_frequency = 12500000; Please avoid magic numbers, we probably should define those somewhere. > +static enum drm_i915_oa_format test_oa_format; > +static uint64_t oa_exp_1_millisec; > + > +static igt_render_copyfunc_t render_copy = NULL; > + > +static uint64_t > +timebase_scale(uint32_t u32_delta) > +{ > + return ((uint64_t)u32_delta * NSEC_PER_SEC) / timestamp_frequency; > +} > + > +/* Returns: the largest OA exponent that will still result in a sampling period > + * less than or equal to the given @period. > + */ > +static int > +max_oa_exponent_for_period_lte(uint64_t period) > +{ > + /* NB: timebase_scale() takes a uint32_t and an exponent of 30 > + * would already represent a period of ~3 minutes so there's > + * really no need to consider higher exponents. > + */ > + for (int i = 0; i < 30; i++) { > + uint64_t oa_period = timebase_scale(2 << i); > + > + if (oa_period > period) > + return max(0, i - 1); > + } > + > + igt_assert(!"reached"); > + return -1; > +} > + > +static bool > +try_read_u64_file(const char *file, uint64_t *val) > +{ > + char buf[32]; > + int fd, n; > + > + fd = open(file, O_RDONLY); > + if (fd < 0) > + return false; > + > + while ((n = read(fd, buf, sizeof(buf) - 1)) < 0 && errno == EINTR) > + ; > + igt_assert(n >= 0); > + > + close(fd); > + > + buf[n] = '\0'; > + *val = strtoull(buf, NULL, 0); > + > + return true; > +} > + > +static void > +write_u64_file(const char *file, uint64_t val) > +{ > + char buf[32]; > + int fd, len, ret; > + > + fd = open(file, O_WRONLY); > + igt_assert(fd >= 0); > + > + len = snprintf(buf, sizeof(buf), "%"PRIu64, val); > + igt_assert(len > 0); > + > + while ((ret = write(fd, buf, len)) < 0 && errno == EINTR) > + ; > + igt_assert_eq(ret, len); > + > + close(fd); > +} > + Write/read helpers are used only once so maybe those functions are redundant? Or if we want those helpers we could move them to libs. > +static bool > +init_sys_info(void) > +{ > + const char *test_set_name = NULL; > + const char *test_set_uuid = NULL; > + char buf[256]; > + > + igt_assert_neq(card, -1); > + igt_assert_neq(devid, 0); > + > + timestamp_frequency = 12500000; The same here - magic number > + > + if (IS_HASWELL(devid)) { > + /* We don't have a TestOa metric set for Haswell so use > + * RenderBasic > + */ > + test_set_name = "RenderBasic"; > + test_set_uuid = "403d8832-1a27-4aa6-a64e-f5389ce7b212"; > + test_oa_format = I915_OA_FORMAT_A45_B8_C8; > + } else { > + test_set_name = "TestOa"; > + test_oa_format = local_I915_OA_FORMAT_A32u40_A4u32_B8_C8; > + > + if (IS_BROADWELL(devid)) { > + test_set_uuid = "d6de6f55-e526-4f79-a6a6-d7315c09044e"; > + } else if (IS_CHERRYVIEW(devid)) { > + test_set_uuid = "4a534b07-cba3-414d-8d60-874830e883aa"; > + } else if (IS_SKYLAKE(devid)) { > + switch (intel_gt(devid)) { > + case 1: > + test_set_uuid = > + "1651949f-0ac0-4cb1-a06f-dafd74a407d1"; > + break; > + case 2: > + test_set_uuid = > + "2b985803-d3c9-4629-8a4f-634bfecba0e8"; > + break; > + case 3: > + test_set_uuid = > + "882fa433-1f4a-4a67-a962-c741888fe5f5"; > + break; > + default: > + igt_debug("unsupported Skylake GT size\n"); > + return false; > + } > + timestamp_frequency = 12000000; And here :) > + } else if (IS_BROXTON(devid)) { > + test_set_uuid = "5ee72f5c-092f-421e-8b70-225f7c3e9612"; > + timestamp_frequency = 19200000; > + } else if (IS_KABYLAKE(devid)) { > + switch (intel_gt(devid)) { > + case 1: > + test_set_uuid = > + "baa3c7e4-52b6-4b85-801e-465a94b746dd"; > + break; > + case 2: > + test_set_uuid = > + "f1792f32-6db2-4b50-b4b2-557128f1688d"; > + break; > + default: > + igt_debug("unsupported Kabylake GT size\n"); > + return false; > + } > + timestamp_frequency = 12000000; > + } else if (IS_GEMINILAKE(devid)) { > + test_set_uuid = "dd3fd789-e783-4204-8cd0-b671bbccb0cf"; > + timestamp_frequency = 19200000; > + } else { > + igt_debug("unsupported GT\n"); > + return false; > + } > + } > + > + igt_debug("%s metric set UUID = %s\n", > + test_set_name, > + test_set_uuid); > + > + oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000); > + > + snprintf(buf, sizeof(buf), > + "/sys/class/drm/card%d/metrics/%s/id", > + card, > + test_set_uuid); > + > + return try_read_u64_file(buf, &test_metric_set_id); > +} > + > +static int > +__perf_open(int fd, struct drm_i915_perf_open_param *param) > +{ > + int ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param); > + > + igt_assert(ret >= 0); > + errno = 0; > + > + return ret; > +} > + > +static void > +test_cs_oa_stream_create(void) > +{ > + igt_fork(child, 1) { > + uint64_t properties[] = { > + /* Include OA reports in samples */ > + DRM_I915_PERF_PROP_SAMPLE_OA, true, > + > + /* OA unit configuration */ > + DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id, > + DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format, > + DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec, > + > + /* CS parameters */ > + local_DRM_I915_PERF_PROP_SAMPLE_CTX_ID, true, > + local_DRM_I915_PERF_PROP_ENGINE, I915_EXEC_BSD, > + }; > + struct drm_i915_perf_open_param param = { > + .flags = I915_PERF_FLAG_FD_CLOEXEC | > + I915_PERF_FLAG_FD_NONBLOCK, > + .num_properties = sizeof(properties) / 16, > + .properties_ptr = to_user_pointer(properties), > + }; > + int stream_fd; > + > + do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m, EINVAL); > + > + /* Send Render Engine as PROP_ENGINE */ > + properties[ARRAY_SIZE(properties)-1] = I915_EXEC_RENDER; > + > + stream_fd = __perf_open(drm_fd, ¶m); > + close(stream_fd); > + } > + > + igt_waitchildren(); > +} > + > +static void > +scratch_buf_init(drm_intel_bufmgr *bufmgr, > + struct igt_buf *buf, > + int width, int height, > + uint32_t color) > +{ > + size_t stride = width * 4; > + size_t size = stride * height; > + drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, "", size, 4096); > + int ret; > + > + ret = drm_intel_bo_map(bo, true /* writable */); > + igt_assert_eq(ret, 0); > + > + for (int i = 0; i < width * height; i++) > + ((uint32_t *)bo->virtual)[i] = color; > + > + drm_intel_bo_unmap(bo); > + > + buf->bo = bo; > + buf->stride = stride; > + buf->tiling = I915_TILING_NONE; > + buf->size = size; > +} > + > +/* > + * Given a set of CS properties including DRM_I915_PERF_PROP_SAMPLE_OA > + * this function returns the offset in the sample where OA report will > + * be located. > + */ > +static size_t > +get_oa_report_offset(uint64_t *properties, int prop_size) > +{ > + size_t offset = 0; > + int i = 0; > + > + do { > + switch (properties[i]) { > + case local_DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE: > + if (properties[i+1]) { > + offset += 8; > + i += 2; > + } > + break; > + case local_DRM_I915_PERF_PROP_SAMPLE_CTX_ID: > + if (properties[i+1]) { > + offset += 8; > + i += 2; > + } > + break; > + case local_DRM_I915_PERF_PROP_SAMPLE_PID: > + if (properties[i+1]) { > + offset += 8; > + i += 2; > + } > + break; > + case local_DRM_I915_PERF_PROP_SAMPLE_TAG: > + if (properties[i+1]) { > + offset += 8; > + i += 2; > + } > + break; > + case local_DRM_I915_PERF_PROP_SAMPLE_TS: > + if (properties[i+1]) { > + offset += 8; > + i += 2; > + } > + break; Why are you doing the same thing for each case separately? > + case DRM_I915_PERF_PROP_CTX_HANDLE: > + case DRM_I915_PERF_PROP_SAMPLE_OA: > + case DRM_I915_PERF_PROP_OA_METRICS_SET: > + case DRM_I915_PERF_PROP_OA_FORMAT: > + case DRM_I915_PERF_PROP_OA_EXPONENT: > + case local_DRM_I915_PERF_PROP_ENGINE: > + i += 2; > + break; > + } > + > + if (properties[i] == local_DRM_I915_PERF_PROP_SAMPLE_MMIO) { > + uint32_t num_mmio = *((uint32_t *)properties[i+1]); > + > + offset += (num_mmio * 4); > + i += 2; > + } > + } while (i < prop_size); > + > + return offset; > +} > + > +static size_t > +get_perf_report_size(uint64_t *properties, int prop_size, int format_id) > +{ > + size_t format_size = oa_formats[format_id].size; > + size_t sample_size = 0; > + > + sample_size += get_oa_report_offset(properties, prop_size); > + sample_size += format_size; > + > + return sample_size; > +} > + > +static bool > +read_perf_reports(int stream_fd, > + uint8_t *perf_reports, > + int num_reports, > + size_t report_size, > + bool retry_on_loss) > +{ > + size_t sample_size = (sizeof(struct drm_i915_perf_record_header) + > + report_size); > + const struct drm_i915_perf_record_header *header; > + uint8_t *base_perf_reports = perf_reports; > + int i = 0; > + > + /* Note: we allocate a large buffer so that each read() iteration > + * should scrape *all* pending records. > + * > + * The largest buffer the OA unit supports is 16MB and the smallest > + * perf report format is 64bytes + 8bytes allowing up to 233016 > + * reports to be buffered. > + * > + * Being sure we are fetching all buffered reports allows us to > + * potentially throw away / skip all reports whenever we see > + * a _REPORT_LOST notification as a way of being sure are > + * measurements aren't skewed by a lost report. > + * > + * Note: that is is useful for some tests but also not something > + * applications would be expected to resort to. Lost reports are > + * somewhat unpredictable but typically don't pose a problem - except > + * to indicate that the OA unit may be over taxed if lots of reports > + * are being lost. > + */ > + int buf_size = 233016 * > + (72 + sizeof(struct drm_i915_perf_record_header)); > + uint8_t *buf = malloc(buf_size); > + > + igt_assert(buf); > + > + do { > + ssize_t len; > + > + while ((len = read(stream_fd, buf, buf_size)) < 0 && > + errno == EINTR) > + ; > + > + igt_assert(len > 0); > + > + for (size_t offset = 0; offset < len; offset += header->size) { > + const uint8_t *report; > + size_t sample_offset = 0; > + > + header = (void *)(buf + offset); > + > + igt_assert_eq(header->pad, 0); /* Reserved */ > + > + /* Currently the only test that should ever expect to > + * see a _BUFFER_LOST error is the buffer_fill test, > + * otherwise something bad has probably happened... > + */ > + igt_assert_neq(header->type, > + DRM_I915_PERF_RECORD_OA_BUFFER_LOST); > + > + /* At high sampling frequencies the OA HW might not be > + * able to cope with all write requests and will notify > + * us that a report was lost. We restart our read of > + * two sequential reports due to the timeline blip this > + * implies > + */ > + if (header->type == > + DRM_I915_PERF_RECORD_OA_REPORT_LOST) { > + igt_debug("read restart: OA trigger collision " > + "/ report lost\n"); > + if (!retry_on_loss) { > + igt_debug("Freeing memory\n"); > + free(buf); > + return false; > + } > + i = 0; > + perf_reports = base_perf_reports; > + > + /* XXX: break, because we don't know where > + * within the series of already read reports > + * there could be a blip from the lost report. > + */ > + break; > + } > + > + /* Currently the only other record type expected is a > + * _SAMPLE. Notably this test will need updating if > + * i915-perf is extended in the future with additional > + * record types. > + */ > + igt_assert_eq(header->type, > + DRM_I915_PERF_RECORD_SAMPLE); > + > + igt_assert_eq(header->size, sample_size); > + > + sample_offset = offset + > + sizeof(struct drm_i915_perf_record_header); > + report = (const uint8_t *)(buf + sample_offset); > + > + memcpy(perf_reports, report, report_size); > + perf_reports += report_size; > + i++; > + if (i == num_reports) > + break; > + } > + } while (i < num_reports); > + > + free(buf); > + return true; > +} > + > +static void > +perf_stream_capture_workload_samples(struct drm_i915_perf_open_param *param, > + uint8_t *perf_reports, > + int num_reports, int report_size) > +{ > + drm_intel_bufmgr *bufmgr; > + drm_intel_context *context0; > + struct intel_batchbuffer *batch; > + struct igt_buf src, dst; > + int width = 800; > + int height = 600; > + uint32_t ctx_id = 0xffffffff; /* invalid id */ > + int stream_fd; > + int ret; > + bool valid_data = false; > + > +retry: > + bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096); > + drm_intel_bufmgr_gem_enable_reuse(bufmgr); > + > + scratch_buf_init(bufmgr, &src, width, height, 0xff0000ff); > + scratch_buf_init(bufmgr, &dst, width, height, 0x00ff00ff); > + > + batch = intel_batchbuffer_alloc(bufmgr, devid); > + > + context0 = drm_intel_gem_context_create(bufmgr); > + igt_assert(context0); > + > + ret = drm_intel_gem_context_get_id(context0, &ctx_id); > + igt_assert_eq(ret, 0); > + igt_assert_neq(ctx_id, 0xffffffff); > + > + igt_debug("opening i915-perf stream\n"); > + stream_fd = __perf_open(drm_fd, param); > + > + render_copy(batch, > + context0, > + &src, 0, 0, width, height, > + &dst, 0, 0); > + > + intel_batchbuffer_flush_with_context(batch, context0); > + > + drm_intel_bo_unreference(src.bo); > + drm_intel_bo_unreference(dst.bo); > + > + intel_batchbuffer_free(batch); > + drm_intel_gem_context_destroy(context0); > + drm_intel_bufmgr_destroy(bufmgr); > + > + valid_data = read_perf_reports(stream_fd, perf_reports, > + num_reports, report_size, > + false); > + if (!valid_data) { > + close(stream_fd); > + goto retry; > + } > +} > + > +struct oa_source_sample { > + uint64_t source; > + uint64_t ctx_id; > + uint8_t oa_report[]; > +}; > + > +#define SOURCE(i) (i == 0) ? "OABUFFER" : "CS" > + > +static void > +verify_source(uint8_t *perf_reports, int num_reports, size_t report_size) > +{ > + struct oa_source_sample *sample; > + uint32_t *oa_report; > + > + for (int i = 0; i < num_reports; i++) { > + size_t offset = i * report_size; > + > + sample = (struct oa_source_sample *) (perf_reports + offset); > + oa_report = (uint32_t *) sample->oa_report; > + > + igt_debug("read report: source= %s, reason = %x, " > + "timestamp = %x\n", > + SOURCE(sample->source), oa_report[0], oa_report[1]); > + > + igt_assert((sample->source == > + local_I915_PERF_SAMPLE_OA_SOURCE_OABUFFER) || > + (sample->source == > + local_I915_PERF_SAMPLE_OA_SOURCE_CS)); > + > + if (sample->source == local_I915_PERF_SAMPLE_OA_SOURCE_CS) > + igt_assert(!oa_report[0]); > + > + /* Don't expect zero for timestamps */ > + igt_assert_neq(oa_report[1], 0); > + } > +} > + > +static void > +test_oa_source(void) > +{ > + uint64_t properties[] = { > + /* Include OA reports in samples */ > + DRM_I915_PERF_PROP_SAMPLE_OA, true, > + > + /* OA unit configuration */ > + DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id, > + DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format, > + DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec, > + > + /* CS parameters */ > + local_DRM_I915_PERF_PROP_ENGINE, I915_EXEC_RENDER, > + local_DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE, true, > + local_DRM_I915_PERF_PROP_SAMPLE_CTX_ID, true, > + }; > + struct drm_i915_perf_open_param param = { > + .flags = I915_PERF_FLAG_FD_CLOEXEC, > + .num_properties = sizeof(properties) / 16, > + .properties_ptr = to_user_pointer(properties), > + }; > + > + /* should be default, but just to be sure... */ > + write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1); > + > + igt_fork(child, 1) { > + int prop_size = ARRAY_SIZE(properties); > + int num_reports = 10; > + int report_size = get_perf_report_size(properties, prop_size, > + test_oa_format); > + int total_size = num_reports * report_size; > + uint8_t *perf_reports = malloc(total_size); > + > + igt_assert(perf_reports); > + > + perf_stream_capture_workload_samples(¶m, perf_reports, > + num_reports, report_size); > + verify_source(perf_reports, num_reports, report_size); > + free(perf_reports); > + } > + > + igt_waitchildren(); > +} > + > +igt_main > +{ > + igt_skip_on_simulation(); > + > + igt_fixture { > + drm_fd = drm_open_driver_render(DRIVER_INTEL); > + devid = intel_get_drm_devid(drm_fd); > + card = drm_get_card(); > + > + igt_require(init_sys_info()); > + > + render_copy = igt_get_render_copyfunc(devid); > + igt_require_f(render_copy, "no render-copy function\n"); > + } > + > + igt_subtest("cs-oa-stream-create") > + test_cs_oa_stream_create(); > + > + igt_subtest("oa-source") > + test_oa_source(); > + > + igt_fixture { > + close(drm_fd); > + } > +} > -- > 1.9.1 > -- Cheers, Ewelina > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff --git a/tests/Makefile.sources b/tests/Makefile.sources index 6c19509..24bd099 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -170,6 +170,7 @@ TESTS_progs = \ gen7_forcewake_mt \ gvt_basic \ intel_perf \ + intel_perf_dapc \ kms_3d \ kms_addfb_basic \ kms_atomic \ diff --git a/tests/intel_perf_dapc.c b/tests/intel_perf_dapc.c new file mode 100644 index 0000000..92b4dee --- /dev/null +++ b/tests/intel_perf_dapc.c @@ -0,0 +1,811 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ +#include <fcntl.h> + +#include "igt.h" +#include "drm.h" + +IGT_TEST_DESCRIPTION("Test the i915 command stream based perf metrics streaming interface"); + +/* Temporarily copy i915-perf uapi here to avoid a dependency on libdrm's + * i915_drm.h copy being updated with the i915-perf interface before this + * test can land in i-g-t. + * + * TODO: remove this once the interface lands in libdrm + */ +#ifndef DRM_I915_PERF_OPEN +#define DRM_I915_PERF_OPEN 0x36 +#define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + \ + DRM_I915_PERF_OPEN, \ + struct drm_i915_perf_open_param) + +enum drm_i915_oa_format { + I915_OA_FORMAT_A13 = 1, /* HSW only */ + I915_OA_FORMAT_A29, /* HSW only */ + I915_OA_FORMAT_A13_B8_C8, /* HSW only */ + I915_OA_FORMAT_B4_C8, /* HSW only */ + I915_OA_FORMAT_A45_B8_C8, /* HSW only */ + I915_OA_FORMAT_B4_C8_A16, /* HSW only */ + I915_OA_FORMAT_C4_B8, /* HSW+ */ + + /* Gen8+ */ + I915_OA_FORMAT_A12, + I915_OA_FORMAT_A12_B8_C8, + I915_OA_FORMAT_A32u40_A4u32_B8_C8, + + I915_OA_FORMAT_MAX /* non-ABI */ +}; + +enum drm_i915_perf_sample_oa_source { + I915_PERF_SAMPLE_OA_SOURCE_OABUFFER, + I915_PERF_SAMPLE_OA_SOURCE_CS, + I915_PERF_SAMPLE_OA_SOURCE_MAX /* non-ABI */ +}; + +#define I915_PERF_MMIO_NUM_MAX 8 +struct drm_i915_perf_mmio_list { + __u32 num_mmio; + __u32 mmio_list[I915_PERF_MMIO_NUM_MAX]; +}; + +enum drm_i915_perf_property_id { + DRM_I915_PERF_PROP_CTX_HANDLE = 1, + DRM_I915_PERF_PROP_SAMPLE_OA, + DRM_I915_PERF_PROP_OA_METRICS_SET, + DRM_I915_PERF_PROP_OA_FORMAT, + DRM_I915_PERF_PROP_OA_EXPONENT, + DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE, + DRM_I915_PERF_PROP_ENGINE, + DRM_I915_PERF_PROP_SAMPLE_CTX_ID, + DRM_I915_PERF_PROP_SAMPLE_PID, + DRM_I915_PERF_PROP_SAMPLE_TAG, + DRM_I915_PERF_PROP_SAMPLE_TS, + DRM_I915_PERF_PROP_SAMPLE_MMIO, + DRM_I915_PERF_PROP_MAX /* non-ABI */ +}; + +struct drm_i915_perf_open_param { + __u32 flags; +#define I915_PERF_FLAG_FD_CLOEXEC (1<<0) +#define I915_PERF_FLAG_FD_NONBLOCK (1<<1) +#define I915_PERF_FLAG_DISABLED (1<<2) + + __u32 num_properties; + __u64 properties_ptr; +}; + +#define I915_PERF_IOCTL_ENABLE _IO('i', 0x0) +#define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) + +struct drm_i915_perf_record_header { + __u32 type; + __u16 pad; + __u16 size; +}; + +enum drm_i915_perf_record_type { + DRM_I915_PERF_RECORD_SAMPLE = 1, + DRM_I915_PERF_RECORD_OA_REPORT_LOST = 2, + DRM_I915_PERF_RECORD_OA_BUFFER_LOST = 3, + DRM_I915_PERF_RECORD_MAX /* non-ABI */ +}; +#endif /* !DRM_I915_PERF_OPEN */ + +/* There is no ifdef we can use for those formats :( */ +enum { + local_I915_OA_FORMAT_A12 = I915_OA_FORMAT_C4_B8 + 1, + local_I915_OA_FORMAT_A12_B8_C8 = I915_OA_FORMAT_C4_B8 + 2, + local_I915_OA_FORMAT_A32u40_A4u32_B8_C8 = I915_OA_FORMAT_C4_B8 + 3, +}; + +#define local_I915_OA_FORMAT_MAX (local_I915_OA_FORMAT_A32u40_A4u32_B8_C8 + 1) + +enum { + local_I915_PERF_SAMPLE_OA_SOURCE_OABUFFER, + local_I915_PERF_SAMPLE_OA_SOURCE_CS, + local_I915_PERF_SAMPLE_OA_SOURCE_MAX /* non-ABI */ +}; + +enum { + local_DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE = DRM_I915_PERF_PROP_OA_EXPONENT + 1, + local_DRM_I915_PERF_PROP_ENGINE = DRM_I915_PERF_PROP_OA_EXPONENT + 2, + local_DRM_I915_PERF_PROP_SAMPLE_CTX_ID = DRM_I915_PERF_PROP_OA_EXPONENT + 3, + local_DRM_I915_PERF_PROP_SAMPLE_PID = DRM_I915_PERF_PROP_OA_EXPONENT + 4, + local_DRM_I915_PERF_PROP_SAMPLE_TAG = DRM_I915_PERF_PROP_OA_EXPONENT + 5, + local_DRM_I915_PERF_PROP_SAMPLE_TS = DRM_I915_PERF_PROP_OA_EXPONENT + 6, + local_DRM_I915_PERF_PROP_SAMPLE_MMIO = DRM_I915_PERF_PROP_OA_EXPONENT + 7, + local_DRM_I915_PERF_PROP_MAX /* non-ABI */ +}; + +static struct { + const char *name; + size_t size; + int a40_high_off; /* bytes */ + int a40_low_off; + int n_a40; + int a_off; + int n_a; + int first_a; + int b_off; + int n_b; + int c_off; + int n_c; + int min_gen; + int max_gen; +} oa_formats[local_I915_OA_FORMAT_MAX] = { + [I915_OA_FORMAT_A13] = { /* HSW only */ + "A13", .size = 64, + .a_off = 12, .n_a = 13, + .max_gen = 7 }, + [I915_OA_FORMAT_A29] = { /* HSW only */ + "A29", .size = 128, + .a_off = 12, .n_a = 29, + .max_gen = 7 }, + [I915_OA_FORMAT_A13_B8_C8] = { /* HSW only */ + "A13_B8_C8", .size = 128, + .a_off = 12, .n_a = 13, + .b_off = 64, .n_b = 8, + .c_off = 96, .n_c = 8, + .max_gen = 7 }, + [I915_OA_FORMAT_A45_B8_C8] = { /* HSW only */ + "A45_B8_C8", .size = 256, + .a_off = 12, .n_a = 45, + .b_off = 192, .n_b = 8, + .c_off = 224, .n_c = 8, + .max_gen = 7 }, + [I915_OA_FORMAT_B4_C8] = { /* HSW only */ + "B4_C8", .size = 64, + .b_off = 16, .n_b = 4, + .c_off = 32, .n_c = 8, + .max_gen = 7 }, + [I915_OA_FORMAT_B4_C8_A16] = { /* HSW only */ + "B4_C8_A16", .size = 128, + .b_off = 16, .n_b = 4, + .c_off = 32, .n_c = 8, + .a_off = 60, .n_a = 16, .first_a = 29, + .max_gen = 7 }, + [I915_OA_FORMAT_C4_B8] = { /* HSW+ (header differs from HSW-Gen8+) */ + "C4_B8", .size = 64, + .c_off = 16, .n_c = 4, + .b_off = 28, .n_b = 8 }, + + /* Gen8+ */ + + [local_I915_OA_FORMAT_A12] = { + "A12", .size = 64, + .a_off = 12, .n_a = 12, .first_a = 7, + .min_gen = 8 }, + [local_I915_OA_FORMAT_A12_B8_C8] = { + "A12_B8_C8", .size = 128, + .a_off = 12, .n_a = 12, + .b_off = 64, .n_b = 8, + .c_off = 96, .n_c = 8, .first_a = 7, + .min_gen = 8 }, + [local_I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { + "A32u40_A4u32_B8_C8", .size = 256, + .a40_high_off = 160, .a40_low_off = 16, .n_a40 = 32, + .a_off = 144, .n_a = 4, .first_a = 32, + .b_off = 192, .n_b = 8, + .c_off = 224, .n_c = 8, + .min_gen = 8 }, + [I915_OA_FORMAT_C4_B8] = { + "C4_B8", .size = 64, + .c_off = 16, .n_c = 4, + .b_off = 32, .n_b = 8, + .min_gen = 8 }, +}; + +static int drm_fd = -1; +static uint32_t devid; +static int card = -1; + +static uint64_t test_metric_set_id = UINT64_MAX; + +static uint64_t timestamp_frequency = 12500000; +static enum drm_i915_oa_format test_oa_format; +static uint64_t oa_exp_1_millisec; + +static igt_render_copyfunc_t render_copy = NULL; + +static uint64_t +timebase_scale(uint32_t u32_delta) +{ + return ((uint64_t)u32_delta * NSEC_PER_SEC) / timestamp_frequency; +} + +/* Returns: the largest OA exponent that will still result in a sampling period + * less than or equal to the given @period. + */ +static int +max_oa_exponent_for_period_lte(uint64_t period) +{ + /* NB: timebase_scale() takes a uint32_t and an exponent of 30 + * would already represent a period of ~3 minutes so there's + * really no need to consider higher exponents. + */ + for (int i = 0; i < 30; i++) { + uint64_t oa_period = timebase_scale(2 << i); + + if (oa_period > period) + return max(0, i - 1); + } + + igt_assert(!"reached"); + return -1; +} + +static bool +try_read_u64_file(const char *file, uint64_t *val) +{ + char buf[32]; + int fd, n; + + fd = open(file, O_RDONLY); + if (fd < 0) + return false; + + while ((n = read(fd, buf, sizeof(buf) - 1)) < 0 && errno == EINTR) + ; + igt_assert(n >= 0); + + close(fd); + + buf[n] = '\0'; + *val = strtoull(buf, NULL, 0); + + return true; +} + +static void +write_u64_file(const char *file, uint64_t val) +{ + char buf[32]; + int fd, len, ret; + + fd = open(file, O_WRONLY); + igt_assert(fd >= 0); + + len = snprintf(buf, sizeof(buf), "%"PRIu64, val); + igt_assert(len > 0); + + while ((ret = write(fd, buf, len)) < 0 && errno == EINTR) + ; + igt_assert_eq(ret, len); + + close(fd); +} + +static bool +init_sys_info(void) +{ + const char *test_set_name = NULL; + const char *test_set_uuid = NULL; + char buf[256]; + + igt_assert_neq(card, -1); + igt_assert_neq(devid, 0); + + timestamp_frequency = 12500000; + + if (IS_HASWELL(devid)) { + /* We don't have a TestOa metric set for Haswell so use + * RenderBasic + */ + test_set_name = "RenderBasic"; + test_set_uuid = "403d8832-1a27-4aa6-a64e-f5389ce7b212"; + test_oa_format = I915_OA_FORMAT_A45_B8_C8; + } else { + test_set_name = "TestOa"; + test_oa_format = local_I915_OA_FORMAT_A32u40_A4u32_B8_C8; + + if (IS_BROADWELL(devid)) { + test_set_uuid = "d6de6f55-e526-4f79-a6a6-d7315c09044e"; + } else if (IS_CHERRYVIEW(devid)) { + test_set_uuid = "4a534b07-cba3-414d-8d60-874830e883aa"; + } else if (IS_SKYLAKE(devid)) { + switch (intel_gt(devid)) { + case 1: + test_set_uuid = + "1651949f-0ac0-4cb1-a06f-dafd74a407d1"; + break; + case 2: + test_set_uuid = + "2b985803-d3c9-4629-8a4f-634bfecba0e8"; + break; + case 3: + test_set_uuid = + "882fa433-1f4a-4a67-a962-c741888fe5f5"; + break; + default: + igt_debug("unsupported Skylake GT size\n"); + return false; + } + timestamp_frequency = 12000000; + } else if (IS_BROXTON(devid)) { + test_set_uuid = "5ee72f5c-092f-421e-8b70-225f7c3e9612"; + timestamp_frequency = 19200000; + } else if (IS_KABYLAKE(devid)) { + switch (intel_gt(devid)) { + case 1: + test_set_uuid = + "baa3c7e4-52b6-4b85-801e-465a94b746dd"; + break; + case 2: + test_set_uuid = + "f1792f32-6db2-4b50-b4b2-557128f1688d"; + break; + default: + igt_debug("unsupported Kabylake GT size\n"); + return false; + } + timestamp_frequency = 12000000; + } else if (IS_GEMINILAKE(devid)) { + test_set_uuid = "dd3fd789-e783-4204-8cd0-b671bbccb0cf"; + timestamp_frequency = 19200000; + } else { + igt_debug("unsupported GT\n"); + return false; + } + } + + igt_debug("%s metric set UUID = %s\n", + test_set_name, + test_set_uuid); + + oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000); + + snprintf(buf, sizeof(buf), + "/sys/class/drm/card%d/metrics/%s/id", + card, + test_set_uuid); + + return try_read_u64_file(buf, &test_metric_set_id); +} + +static int +__perf_open(int fd, struct drm_i915_perf_open_param *param) +{ + int ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param); + + igt_assert(ret >= 0); + errno = 0; + + return ret; +} + +static void +test_cs_oa_stream_create(void) +{ + igt_fork(child, 1) { + uint64_t properties[] = { + /* Include OA reports in samples */ + DRM_I915_PERF_PROP_SAMPLE_OA, true, + + /* OA unit configuration */ + DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id, + DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format, + DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec, + + /* CS parameters */ + local_DRM_I915_PERF_PROP_SAMPLE_CTX_ID, true, + local_DRM_I915_PERF_PROP_ENGINE, I915_EXEC_BSD, + }; + struct drm_i915_perf_open_param param = { + .flags = I915_PERF_FLAG_FD_CLOEXEC | + I915_PERF_FLAG_FD_NONBLOCK, + .num_properties = sizeof(properties) / 16, + .properties_ptr = to_user_pointer(properties), + }; + int stream_fd; + + do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m, EINVAL); + + /* Send Render Engine as PROP_ENGINE */ + properties[ARRAY_SIZE(properties)-1] = I915_EXEC_RENDER; + + stream_fd = __perf_open(drm_fd, ¶m); + close(stream_fd); + } + + igt_waitchildren(); +} + +static void +scratch_buf_init(drm_intel_bufmgr *bufmgr, + struct igt_buf *buf, + int width, int height, + uint32_t color) +{ + size_t stride = width * 4; + size_t size = stride * height; + drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, "", size, 4096); + int ret; + + ret = drm_intel_bo_map(bo, true /* writable */); + igt_assert_eq(ret, 0); + + for (int i = 0; i < width * height; i++) + ((uint32_t *)bo->virtual)[i] = color; + + drm_intel_bo_unmap(bo); + + buf->bo = bo; + buf->stride = stride; + buf->tiling = I915_TILING_NONE; + buf->size = size; +} + +/* + * Given a set of CS properties including DRM_I915_PERF_PROP_SAMPLE_OA + * this function returns the offset in the sample where OA report will + * be located. + */ +static size_t +get_oa_report_offset(uint64_t *properties, int prop_size) +{ + size_t offset = 0; + int i = 0; + + do { + switch (properties[i]) { + case local_DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE: + if (properties[i+1]) { + offset += 8; + i += 2; + } + break; + case local_DRM_I915_PERF_PROP_SAMPLE_CTX_ID: + if (properties[i+1]) { + offset += 8; + i += 2; + } + break; + case local_DRM_I915_PERF_PROP_SAMPLE_PID: + if (properties[i+1]) { + offset += 8; + i += 2; + } + break; + case local_DRM_I915_PERF_PROP_SAMPLE_TAG: + if (properties[i+1]) { + offset += 8; + i += 2; + } + break; + case local_DRM_I915_PERF_PROP_SAMPLE_TS: + if (properties[i+1]) { + offset += 8; + i += 2; + } + break; + case DRM_I915_PERF_PROP_CTX_HANDLE: + case DRM_I915_PERF_PROP_SAMPLE_OA: + case DRM_I915_PERF_PROP_OA_METRICS_SET: + case DRM_I915_PERF_PROP_OA_FORMAT: + case DRM_I915_PERF_PROP_OA_EXPONENT: + case local_DRM_I915_PERF_PROP_ENGINE: + i += 2; + break; + } + + if (properties[i] == local_DRM_I915_PERF_PROP_SAMPLE_MMIO) { + uint32_t num_mmio = *((uint32_t *)properties[i+1]); + + offset += (num_mmio * 4); + i += 2; + } + } while (i < prop_size); + + return offset; +} + +static size_t +get_perf_report_size(uint64_t *properties, int prop_size, int format_id) +{ + size_t format_size = oa_formats[format_id].size; + size_t sample_size = 0; + + sample_size += get_oa_report_offset(properties, prop_size); + sample_size += format_size; + + return sample_size; +} + +static bool +read_perf_reports(int stream_fd, + uint8_t *perf_reports, + int num_reports, + size_t report_size, + bool retry_on_loss) +{ + size_t sample_size = (sizeof(struct drm_i915_perf_record_header) + + report_size); + const struct drm_i915_perf_record_header *header; + uint8_t *base_perf_reports = perf_reports; + int i = 0; + + /* Note: we allocate a large buffer so that each read() iteration + * should scrape *all* pending records. + * + * The largest buffer the OA unit supports is 16MB and the smallest + * perf report format is 64bytes + 8bytes allowing up to 233016 + * reports to be buffered. + * + * Being sure we are fetching all buffered reports allows us to + * potentially throw away / skip all reports whenever we see + * a _REPORT_LOST notification as a way of being sure are + * measurements aren't skewed by a lost report. + * + * Note: that is is useful for some tests but also not something + * applications would be expected to resort to. Lost reports are + * somewhat unpredictable but typically don't pose a problem - except + * to indicate that the OA unit may be over taxed if lots of reports + * are being lost. + */ + int buf_size = 233016 * + (72 + sizeof(struct drm_i915_perf_record_header)); + uint8_t *buf = malloc(buf_size); + + igt_assert(buf); + + do { + ssize_t len; + + while ((len = read(stream_fd, buf, buf_size)) < 0 && + errno == EINTR) + ; + + igt_assert(len > 0); + + for (size_t offset = 0; offset < len; offset += header->size) { + const uint8_t *report; + size_t sample_offset = 0; + + header = (void *)(buf + offset); + + igt_assert_eq(header->pad, 0); /* Reserved */ + + /* Currently the only test that should ever expect to + * see a _BUFFER_LOST error is the buffer_fill test, + * otherwise something bad has probably happened... + */ + igt_assert_neq(header->type, + DRM_I915_PERF_RECORD_OA_BUFFER_LOST); + + /* At high sampling frequencies the OA HW might not be + * able to cope with all write requests and will notify + * us that a report was lost. We restart our read of + * two sequential reports due to the timeline blip this + * implies + */ + if (header->type == + DRM_I915_PERF_RECORD_OA_REPORT_LOST) { + igt_debug("read restart: OA trigger collision " + "/ report lost\n"); + if (!retry_on_loss) { + igt_debug("Freeing memory\n"); + free(buf); + return false; + } + i = 0; + perf_reports = base_perf_reports; + + /* XXX: break, because we don't know where + * within the series of already read reports + * there could be a blip from the lost report. + */ + break; + } + + /* Currently the only other record type expected is a + * _SAMPLE. Notably this test will need updating if + * i915-perf is extended in the future with additional + * record types. + */ + igt_assert_eq(header->type, + DRM_I915_PERF_RECORD_SAMPLE); + + igt_assert_eq(header->size, sample_size); + + sample_offset = offset + + sizeof(struct drm_i915_perf_record_header); + report = (const uint8_t *)(buf + sample_offset); + + memcpy(perf_reports, report, report_size); + perf_reports += report_size; + i++; + if (i == num_reports) + break; + } + } while (i < num_reports); + + free(buf); + return true; +} + +static void +perf_stream_capture_workload_samples(struct drm_i915_perf_open_param *param, + uint8_t *perf_reports, + int num_reports, int report_size) +{ + drm_intel_bufmgr *bufmgr; + drm_intel_context *context0; + struct intel_batchbuffer *batch; + struct igt_buf src, dst; + int width = 800; + int height = 600; + uint32_t ctx_id = 0xffffffff; /* invalid id */ + int stream_fd; + int ret; + bool valid_data = false; + +retry: + bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096); + drm_intel_bufmgr_gem_enable_reuse(bufmgr); + + scratch_buf_init(bufmgr, &src, width, height, 0xff0000ff); + scratch_buf_init(bufmgr, &dst, width, height, 0x00ff00ff); + + batch = intel_batchbuffer_alloc(bufmgr, devid); + + context0 = drm_intel_gem_context_create(bufmgr); + igt_assert(context0); + + ret = drm_intel_gem_context_get_id(context0, &ctx_id); + igt_assert_eq(ret, 0); + igt_assert_neq(ctx_id, 0xffffffff); + + igt_debug("opening i915-perf stream\n"); + stream_fd = __perf_open(drm_fd, param); + + render_copy(batch, + context0, + &src, 0, 0, width, height, + &dst, 0, 0); + + intel_batchbuffer_flush_with_context(batch, context0); + + drm_intel_bo_unreference(src.bo); + drm_intel_bo_unreference(dst.bo); + + intel_batchbuffer_free(batch); + drm_intel_gem_context_destroy(context0); + drm_intel_bufmgr_destroy(bufmgr); + + valid_data = read_perf_reports(stream_fd, perf_reports, + num_reports, report_size, + false); + if (!valid_data) { + close(stream_fd); + goto retry; + } +} + +struct oa_source_sample { + uint64_t source; + uint64_t ctx_id; + uint8_t oa_report[]; +}; + +#define SOURCE(i) (i == 0) ? "OABUFFER" : "CS" + +static void +verify_source(uint8_t *perf_reports, int num_reports, size_t report_size) +{ + struct oa_source_sample *sample; + uint32_t *oa_report; + + for (int i = 0; i < num_reports; i++) { + size_t offset = i * report_size; + + sample = (struct oa_source_sample *) (perf_reports + offset); + oa_report = (uint32_t *) sample->oa_report; + + igt_debug("read report: source= %s, reason = %x, " + "timestamp = %x\n", + SOURCE(sample->source), oa_report[0], oa_report[1]); + + igt_assert((sample->source == + local_I915_PERF_SAMPLE_OA_SOURCE_OABUFFER) || + (sample->source == + local_I915_PERF_SAMPLE_OA_SOURCE_CS)); + + if (sample->source == local_I915_PERF_SAMPLE_OA_SOURCE_CS) + igt_assert(!oa_report[0]); + + /* Don't expect zero for timestamps */ + igt_assert_neq(oa_report[1], 0); + } +} + +static void +test_oa_source(void) +{ + uint64_t properties[] = { + /* Include OA reports in samples */ + DRM_I915_PERF_PROP_SAMPLE_OA, true, + + /* OA unit configuration */ + DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id, + DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format, + DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec, + + /* CS parameters */ + local_DRM_I915_PERF_PROP_ENGINE, I915_EXEC_RENDER, + local_DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE, true, + local_DRM_I915_PERF_PROP_SAMPLE_CTX_ID, true, + }; + struct drm_i915_perf_open_param param = { + .flags = I915_PERF_FLAG_FD_CLOEXEC, + .num_properties = sizeof(properties) / 16, + .properties_ptr = to_user_pointer(properties), + }; + + /* should be default, but just to be sure... */ + write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1); + + igt_fork(child, 1) { + int prop_size = ARRAY_SIZE(properties); + int num_reports = 10; + int report_size = get_perf_report_size(properties, prop_size, + test_oa_format); + int total_size = num_reports * report_size; + uint8_t *perf_reports = malloc(total_size); + + igt_assert(perf_reports); + + perf_stream_capture_workload_samples(¶m, perf_reports, + num_reports, report_size); + verify_source(perf_reports, num_reports, report_size); + free(perf_reports); + } + + igt_waitchildren(); +} + +igt_main +{ + igt_skip_on_simulation(); + + igt_fixture { + drm_fd = drm_open_driver_render(DRIVER_INTEL); + devid = intel_get_drm_devid(drm_fd); + card = drm_get_card(); + + igt_require(init_sys_info()); + + render_copy = igt_get_render_copyfunc(devid); + igt_require_f(render_copy, "no render-copy function\n"); + } + + igt_subtest("cs-oa-stream-create") + test_cs_oa_stream_create(); + + igt_subtest("oa-source") + test_oa_source(); + + igt_fixture { + close(drm_fd); + } +}
This tests different performance metrics being streamed by i915 driver. This feature in i915 also referred as Driver Assisted Performance Capture (DAPC) provides userspace an ability to sample the OA reports at execbuf boundaries and associate other metadata like CTX ID, PID, TAG with each sample. Further, ability to capture engine timestamps and MMIO reads is also provided. v2: Defining the enums for OA_SOURCE and PERF_PROP locally till the libdrm changes are merged. Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com> --- tests/Makefile.sources | 1 + tests/intel_perf_dapc.c | 811 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 812 insertions(+) create mode 100644 tests/intel_perf_dapc.c