[RFC] benchmarks/gem_slice_shutdown: microbenchmark for slice shutdown delays

Message ID	1493737698-5352-1-git-send-email-oscar.mateo@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Oscar Mateo <oscar.mateo@intel.com> To: intel-gfx@lists.freedesktop.org Date: Tue, 2 May 2017 15:08:18 +0000 Message-Id: <1493737698-5352-1-git-send-email-oscar.mateo@intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [RFC] benchmarks/gem_slice_shutdown: microbenchmark for slice shutdown delays Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

diff --git a/benchmarks/Makefile.sources b/benchmarks/Makefile.sources index 3a94115..591b5ae 100644 --- a/benchmarks/Makefile.sources +++ b/benchmarks/Makefile.sources @@ -13,6 +13,7 @@ benchmarks_prog_list = \ gem_mmap \ gem_prw \ gem_set_domain \ + gem_slice_shutdown \ gem_syslatency \ gem_wsim \ kms_vblank \ diff --git a/benchmarks/gem_slice_shutdown.c b/benchmarks/gem_slice_shutdown.c new file mode 100644 index 0000000..dcb17c1 --- /dev/null +++ b/benchmarks/gem_slice_shutdown.c @@ -0,0 +1,295 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Oscar Mateo <oscar.mateo@intel.com> + * + */ + +/* + * This tool measures time to change the configuration of number of slices + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <fcntl.h> +#include <inttypes.h> +#include <errno.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <time.h> +#include <assert.h> + +#include "drm.h" +#include "ioctl_wrappers.h" +#include "drmtest.h" +#include "intel_io.h" +#include "igt_stats.h" +#include "intel_chipset.h" +#include "intel_bufmgr.h" + +#define GEN8_R_PWR_CLK_STATE (0x20C8) +#define GEN8_RPCS_ENABLE (1 << 31) +#define GEN8_RPCS_S_CNT_ENABLE (1 << 18) +#define GEN8_RPCS_S_CNT_SHIFT 15 +#define GEN8_RPCS_S_CNT_MASK (0x7 << GEN8_RPCS_S_CNT_SHIFT) +#define GEN8_RPCS_SS_CNT_ENABLE (1 << 11) +#define GEN8_RPCS_SS_CNT_SHIFT 8 +#define GEN8_RPCS_SS_CNT_MASK (0x7 << GEN8_RPCS_SS_CNT_SHIFT) +#define GEN8_RPCS_EU_MAX_SHIFT 4 +#define GEN8_RPCS_EU_MAX_MASK (0xf << GEN8_RPCS_EU_MAX_SHIFT) +#define GEN8_RPCS_EU_MIN_SHIFT 0 +#define GEN8_RPCS_EU_MIN_MASK (0xf << GEN8_RPCS_EU_MIN_SHIFT) + +static const char *yesno(bool x) +{ + return x ? "yes" : "no"; +} + +static void print_rpcs_config(uint32_t rpcs_config) +{ + bool rpcs_enable, s_enable, ss_enable; + uint s_count, ss_count, eu_max, eu_min; + + rpcs_enable = rpcs_config & GEN8_RPCS_ENABLE; + s_enable = rpcs_config & GEN8_RPCS_S_CNT_ENABLE; + ss_enable = rpcs_config & GEN8_RPCS_SS_CNT_ENABLE; + s_count = (rpcs_config & GEN8_RPCS_S_CNT_MASK) >> GEN8_RPCS_S_CNT_SHIFT; + ss_count = (rpcs_config & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT; + eu_max = (rpcs_config & GEN8_RPCS_EU_MAX_MASK) >> GEN8_RPCS_EU_MAX_SHIFT; + eu_min = (rpcs_config & GEN8_RPCS_EU_MIN_MASK) >> GEN8_RPCS_EU_MIN_SHIFT; + printf("RPCS enabled: %s\n", yesno(rpcs_enable)); + printf("Slice count enabled: %s, count: %u\n", yesno(s_enable), s_count); + printf("Subslice count enabled: %s, count: %u\n", yesno(ss_enable), ss_count); + printf("EU max: %u, min: %u\n", eu_max, eu_min); +} + +static void init_buffer(drm_intel_bufmgr *bufmgr, + struct igt_buf *buf, + uint32_t size) +{ + buf->bo = drm_intel_bo_alloc(bufmgr, "", size, 4096); + buf->size = size; + buf->tiling = I915_TILING_NONE; + buf->stride = 4096; +} + +static double elapsed(const struct timespec *start, const struct timespec *end) +{ + return 1e6*(end->tv_sec - start->tv_sec) + 1e-3*(end->tv_nsec - start->tv_nsec); +} + +#define MI_STORE_REGISTER_MEM_64_BIT_ADDR ((0x24 << 23) | 2) + +static uint32_t do_read_pwrclk_state(drm_intel_bufmgr *bufmgr, + struct intel_batchbuffer *batch, + drm_intel_context *context) +{ + uint32_t rpcs_config; + uint32_t *data; + drm_intel_bo *dst_bo; + + dst_bo = drm_intel_bo_alloc(bufmgr, "dst", 4, 4096); + + BEGIN_BATCH(3, 1); + OUT_BATCH(MI_STORE_REGISTER_MEM_64_BIT_ADDR); + OUT_BATCH(GEN8_R_PWR_CLK_STATE); + OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + ADVANCE_BATCH(); + + intel_batchbuffer_flush_with_context(batch, context); + + drm_intel_bo_map(dst_bo, 1); + + data = dst_bo->virtual; + rpcs_config = *data; + + drm_intel_bo_unmap(dst_bo); + + drm_intel_bo_unreference(dst_bo); + + return rpcs_config; +} + +#define LOCAL_MI_LOAD_REGISTER_IMM (0x22 << 23) + +#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) +#define PIPE_CONTROL_CS_STALL (1<<20) +#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) +#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) +#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) +#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) + +static void emit_config_slice_count(drm_intel_bufmgr *bufmgr, + struct intel_batchbuffer *batch, + drm_intel_context *context, + uint32_t rpcs_config) +{ + drm_intel_bo *dst_bo; + + dst_bo = drm_intel_bo_alloc(bufmgr, "scratch", 4, 4096); + + BEGIN_BATCH(9, 1); + OUT_BATCH(LOCAL_MI_LOAD_REGISTER_IMM | 1); + OUT_BATCH(GEN8_R_PWR_CLK_STATE); + OUT_BATCH(rpcs_config); + OUT_BATCH(GFX_OP_PIPE_CONTROL(6)); + OUT_BATCH(PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL); + OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + drm_intel_bo_unreference(dst_bo); +} + +static void do_test(igt_render_copyfunc_t rendercopy, + drm_intel_bufmgr *bufmgr, + drm_intel_context *context, + struct intel_batchbuffer *batch, + uint32_t rpcs_config, + struct igt_buf *src, + struct igt_buf *dst, + uint c) +{ + rpcs_config &= ~GEN8_RPCS_S_CNT_MASK; + rpcs_config |= (c << GEN8_RPCS_S_CNT_SHIFT); + + emit_config_slice_count(bufmgr, batch, context, rpcs_config); + rendercopy(batch, context, src, 0, 0, 0, 0, dst, 0, 0); + drm_intel_bo_wait_rendering(dst->bo); +} + +static void do_measures(igt_render_copyfunc_t rendercopy, + drm_intel_bufmgr *bufmgr, + drm_intel_context *context, + struct intel_batchbuffer *batch, + uint32_t rpcs_config, + struct igt_buf *src, + struct igt_buf *dst, + uint c1, uint c2) +{ + uint c = c1; + struct igt_mean real[2], baseline[2]; + struct timespec start, end; + int i, j; + + for (i = 0; i < 2; i++) { + igt_mean_init(&real[i]); + igt_mean_init(&baseline[i]); + } + + /* Initial config and warm up */ + do_test(rendercopy, bufmgr, context, batch, rpcs_config, src, dst, c); + +#define LOOPS 1000 + for (j = 0; j < LOOPS; j++) { + c = c2; + for (i = 0; i < 2; i++) { + clock_gettime(CLOCK_MONOTONIC, &start); + do_test(rendercopy, bufmgr, context, batch, + rpcs_config, src, dst, c); + clock_gettime(CLOCK_MONOTONIC, &end); + + igt_mean_add(&real[i], elapsed(&start, &end)); + + clock_gettime(CLOCK_MONOTONIC, &start); + do_test(rendercopy, bufmgr, context, batch, + rpcs_config, src, dst, c); + clock_gettime(CLOCK_MONOTONIC, &end); + + igt_mean_add(&baseline[i], elapsed(&start, &end)); + + c = c1; + } + } + + printf("Slice poweron time (%u -> %u): %7.3f us\n", c1, c2, + igt_mean_get(&real[0]) - igt_mean_get(&baseline[0])); + printf("Slice shutdown time (%u -> %u): %7.3f us\n", c2, c1, + igt_mean_get(&real[1]) - igt_mean_get(&baseline[1])); + + printf("\n"); +} + +static void slice_shutdown(int fd, int devid) +{ + igt_render_copyfunc_t rendercopy = igt_get_render_copyfunc(devid); + drm_intel_bufmgr *bufmgr = drm_intel_bufmgr_gem_init(fd, 4096); + drm_intel_context *context = drm_intel_gem_context_create(bufmgr); + struct intel_batchbuffer *batch = intel_batchbuffer_alloc(bufmgr, devid); + struct igt_buf src, dst; + uint32_t rpcs_config; + bool rpcs_enable, s_enable; + int s_count; + uint i, j; + + init_buffer(bufmgr, &src, 10*4096*4096); + init_buffer(bufmgr, &dst, 10*4096*4096); + + rpcs_config = do_read_pwrclk_state(bufmgr, batch, context); + print_rpcs_config(rpcs_config); + + rpcs_enable = rpcs_config & GEN8_RPCS_ENABLE; + s_enable = rpcs_config & GEN8_RPCS_S_CNT_ENABLE; + + if (!rpcs_enable || !s_enable) { + fprintf(stderr, "We need slice count to be already enabled\n"); + exit(-1); + } + + s_count = (rpcs_config & GEN8_RPCS_S_CNT_MASK) >> GEN8_RPCS_S_CNT_SHIFT; + + for (i = 1; i <= s_count; i++) + for (j = i + 1; j <= s_count; j++) + do_measures(rendercopy, bufmgr, context, batch, + rpcs_config, &src, &dst, i, j); + + intel_batchbuffer_free(batch); + drm_intel_gem_context_destroy(context); + drm_intel_bufmgr_destroy(bufmgr); +} + +int main(int argc, char **argv) +{ + int fd, devid; + + fd = drm_open_driver(DRIVER_INTEL); + + devid = intel_get_drm_devid(fd); + if (intel_gen(devid) < 8) { + fprintf(stderr, "gen8+ required, yours is gen%u\n", + intel_gen(devid)); + exit(-1); + } + + slice_shutdown(fd, devid); +}

[RFC] benchmarks/gem_slice_shutdown: microbenchmark for slice shutdown delays

Commit Message

Patch