From patchwork Mon Apr 6 13:55:12 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Lionel Landwerlin X-Patchwork-Id: 11475565 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 1076A14DD for ; Mon, 6 Apr 2020 13:55:25 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id EDAA121D79 for ; Mon, 6 Apr 2020 13:55:24 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org EDAA121D79 Authentication-Results: mail.kernel.org; dmarc=fail (p=none dis=none) header.from=intel.com Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 845206E3C7; Mon, 6 Apr 2020 13:55:24 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by gabe.freedesktop.org (Postfix) with ESMTPS id 849F46E3C7 for ; Mon, 6 Apr 2020 13:55:23 +0000 (UTC) IronPort-SDR: 2Kufvqtj+F1B2MHFrRBi2TNZ0K18P9MEbnTeyE2ouZf5VJpuk565moGJRymAMJhZtqzGVqJOV5 yDjH5DW5FmWw== X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by orsmga102.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 06 Apr 2020 06:55:22 -0700 IronPort-SDR: kp9rasYx1eFkp7XnKt84pC+zGb/yViLmDdfEPA9G1P+XZ7OwTcj+MzB648/mZbIG79xkD6Z5wn RyWSf7Or4R7g== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.72,351,1580803200"; d="scan'208";a="450838978" Received: from unknown (HELO delly.ger.corp.intel.com) ([10.252.48.224]) by fmsmga005.fm.intel.com with ESMTP; 06 Apr 2020 06:55:21 -0700 From: Lionel Landwerlin To: intel-gfx@lists.freedesktop.org Date: Mon, 6 Apr 2020 16:55:12 +0300 Message-Id: <20200406135514.569205-1-lionel.g.landwerlin@intel.com> X-Mailer: git-send-email 2.26.0 MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 1/3] drm/i915/perf: break OA config buffer object in 2 X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" We want to enable performance monitoring on multiple contexts to cover the Iris use case of using 2 GEM contexts (3D & compute). So start by breaking the OA configuration BO which contains global & per context register writes. NOA muxes & OA configurations are global, while FLEXEU register configurations are per context. v2: Use an offset into the same VMA (Chris) Signed-off-by: Lionel Landwerlin --- drivers/gpu/drm/i915/i915_perf.c | 176 ++++++++++++++++++++----------- 1 file changed, 116 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 2f78b147bb2d..e7bbb09e84a1 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -372,6 +372,7 @@ struct i915_oa_config_bo { struct i915_oa_config *oa_config; struct i915_vma *vma; + uint32_t per_context_offset; }; static struct ctl_table_header *sysctl_header; @@ -1826,37 +1827,43 @@ static struct i915_oa_config_bo * alloc_oa_config_buffer(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) { - struct drm_i915_gem_object *obj; struct i915_oa_config_bo *oa_bo; + struct drm_i915_gem_object *obj; size_t config_length = 0; - u32 *cs; + u32 *cs_start, *cs; int err; oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL); if (!oa_bo) return ERR_PTR(-ENOMEM); + /* + * Global configuration requires a jump into the NOA wait BO for it to + * apply. + */ config_length += num_lri_dwords(oa_config->mux_regs_len); config_length += num_lri_dwords(oa_config->b_counter_regs_len); - config_length += num_lri_dwords(oa_config->flex_regs_len); config_length += 3; /* MI_BATCH_BUFFER_START */ + + config_length += num_lri_dwords(oa_config->flex_regs_len); + config_length += 1 /* MI_BATCH_BUFFER_END */; + config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE); - obj = i915_gem_object_create_shmem(stream->perf->i915, config_length); + obj = i915_gem_object_create_shmem(stream->perf->i915, + config_length); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err_free; } - cs = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto err_oa_bo; + cs_start = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cs_start)) { + err = PTR_ERR(cs_start); + goto err_bo; } - cs = write_cs_mi_lri(cs, - oa_config->mux_regs, - oa_config->mux_regs_len); + cs = cs_start; cs = write_cs_mi_lri(cs, oa_config->b_counter_regs, oa_config->b_counter_regs_len); @@ -1871,6 +1878,14 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream, *cs++ = i915_ggtt_offset(stream->noa_wait); *cs++ = 0; + oa_bo->per_context_offset = 4 * (cs - cs_start); + + cs = write_cs_mi_lri(cs, + oa_config->mux_regs, + oa_config->mux_regs_len); + + *cs++ = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); @@ -1879,7 +1894,7 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream, NULL); if (IS_ERR(oa_bo->vma)) { err = PTR_ERR(oa_bo->vma); - goto err_oa_bo; + goto err_bo; } oa_bo->oa_config = i915_oa_config_get(oa_config); @@ -1887,15 +1902,15 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream, return oa_bo; -err_oa_bo: +err_bo: i915_gem_object_put(obj); err_free: kfree(oa_bo); return ERR_PTR(err); } -static struct i915_vma * -get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) +static struct i915_oa_config_bo * +get_oa_bo(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) { struct i915_oa_config_bo *oa_bo; @@ -1908,34 +1923,31 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) memcmp(oa_bo->oa_config->uuid, oa_config->uuid, sizeof(oa_config->uuid)) == 0) - goto out; + return oa_bo; } - oa_bo = alloc_oa_config_buffer(stream, oa_config); - if (IS_ERR(oa_bo)) - return ERR_CAST(oa_bo); - -out: - return i915_vma_get(oa_bo->vma); + return alloc_oa_config_buffer(stream, oa_config); } static int emit_oa_config(struct i915_perf_stream *stream, struct i915_oa_config *oa_config, struct intel_context *ce, - struct i915_active *active) + struct i915_active *active, + bool global) { + struct i915_oa_config_bo *oa_bo; struct i915_request *rq; - struct i915_vma *vma; + u64 vma_offset; int err; - vma = get_oa_vma(stream, oa_config); - if (IS_ERR(vma)) - return PTR_ERR(vma); + oa_bo = get_oa_bo(stream, oa_config); + if (IS_ERR(oa_bo)) + return PTR_ERR(oa_bo); - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + err = i915_vma_pin(oa_bo->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) - goto err_vma_put; + return err; intel_engine_pm_get(ce->engine); rq = i915_request_create(ce); @@ -1957,16 +1969,19 @@ emit_oa_config(struct i915_perf_stream *stream, goto err_add_request; } - i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, 0); + i915_vma_lock(oa_bo->vma); + err = i915_request_await_object(rq, oa_bo->vma->obj, 0); if (!err) - err = i915_vma_move_to_active(vma, rq, 0); - i915_vma_unlock(vma); + err = i915_vma_move_to_active(oa_bo->vma, rq, 0); + i915_vma_unlock(oa_bo->vma); if (err) goto err_add_request; - err = rq->engine->emit_bb_start(rq, - vma->node.start, 0, + vma_offset = oa_bo->vma->node.start; + if (!global) + vma_offset += oa_bo->per_context_offset; + + err = rq->engine->emit_bb_start(rq, vma_offset, 0, I915_DISPATCH_SECURE); if (err) goto err_add_request; @@ -1974,9 +1989,7 @@ emit_oa_config(struct i915_perf_stream *stream, err_add_request: i915_request_add(rq); err_vma_unpin: - i915_vma_unpin(vma); -err_vma_put: - i915_vma_put(vma); + i915_vma_unpin(oa_bo->vma); return err; } @@ -1990,6 +2003,7 @@ hsw_enable_metric_set(struct i915_perf_stream *stream, struct i915_active *active) { struct intel_uncore *uncore = stream->uncore; + int err; /* * PRM: @@ -2006,9 +2020,17 @@ hsw_enable_metric_set(struct i915_perf_stream *stream, intel_uncore_rmw(uncore, GEN6_UCGCTL1, 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE); - return emit_oa_config(stream, - stream->oa_config, oa_context(stream), - active); + err = emit_oa_config(stream, stream->oa_config, + oa_context(stream), + active, + false /* global */); + if (err) + return err; + + return emit_oa_config(stream, stream->oa_config, + oa_context(stream), + active, + true /* global */); } static void hsw_disable_metric_set(struct i915_perf_stream *stream) @@ -2419,7 +2441,7 @@ gen8_enable_metric_set(struct i915_perf_stream *stream, { struct intel_uncore *uncore = stream->uncore; struct i915_oa_config *oa_config = stream->oa_config; - int ret; + int err; /* * We disable slice/unslice clock ratio change reports on SKL since @@ -2455,13 +2477,21 @@ gen8_enable_metric_set(struct i915_perf_stream *stream, * to make sure all slices/subslices are ON before writing to NOA * registers. */ - ret = lrc_configure_all_contexts(stream, oa_config, active); - if (ret) - return ret; + err = lrc_configure_all_contexts(stream, oa_config, active); + if (err) + return err; - return emit_oa_config(stream, - stream->oa_config, oa_context(stream), - active); + err = emit_oa_config(stream, oa_config, + oa_context(stream), + active, + false /* global */); + if (err) + return err; + + return emit_oa_config(stream, stream->oa_config, + oa_context(stream), + active, + true /* global */); } static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream) @@ -2507,9 +2537,9 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, return ret; /* - * For Gen12, performance counters are context - * saved/restored. Only enable it for the context that - * requested this. + * For Gen12, performance counters are also context saved/restored on + * another set of performance registers. Configure the unit dealing + * with those. */ if (stream->ctx) { ret = gen12_configure_oar_context(stream, active); @@ -2517,9 +2547,17 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, return ret; } - return emit_oa_config(stream, - stream->oa_config, oa_context(stream), - active); + ret = emit_oa_config(stream, oa_config, + oa_context(stream), + active, + false /* global */); + if (ret) + return ret; + + return emit_oa_config(stream, stream->oa_config, + oa_context(stream), + active, + true /* global */); } static void gen8_disable_metric_set(struct i915_perf_stream *stream) @@ -3174,6 +3212,7 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream, unsigned long metrics_set) { struct i915_oa_config *config; + struct i915_active *active = NULL; long ret = stream->oa_config->id; config = i915_perf_get_oa_config(stream->perf, metrics_set); @@ -3181,7 +3220,11 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream, return -EINVAL; if (config != stream->oa_config) { - int err; + active = i915_active_create(); + if (!active) { + ret = -ENOMEM; + goto err_config; + } /* * If OA is bound to a specific context, emit the @@ -3192,13 +3235,26 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream, * When set globally, we use a low priority kernel context, * so it will effectively take effect when idle. */ - err = emit_oa_config(stream, config, oa_context(stream), NULL); - if (!err) - config = xchg(&stream->oa_config, config); - else - ret = err; + ret = emit_oa_config(stream, config, + oa_context(stream), + active, + false /* global */); + if (ret) + goto err_active; + + ret = emit_oa_config(stream, config, + oa_context(stream), + active, + true /* global */); + if (ret) + goto err_active; + + config = xchg(&stream->oa_config, config); } +err_active: + i915_active_put(active); +err_config: i915_oa_config_put(config); return ret;