[v7,11/12] drm/i915/perf: execute OA configuration from command stream

Message ID	20190709093208.20470-12-lionel.g.landwerlin@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> To: intel-gfx@lists.freedesktop.org Date: Tue, 9 Jul 2019 12:32:07 +0300 Message-Id: <20190709093208.20470-12-lionel.g.landwerlin@intel.com> In-Reply-To: <20190709093208.20470-1-lionel.g.landwerlin@intel.com> References: <20190709093208.20470-1-lionel.g.landwerlin@intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH v7 11/12] drm/i915/perf: execute OA configuration from command stream Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	drm/i915: Vulkan performance query support \| expand [v7,00/12] drm/i915: Vulkan performance query support [v7,01/12] drm/i915/perf: ensure we keep a reference on the driver [v7,02/12] drm/i915/perf: add missing delay for OA muxes configuration [v7,03/12] drm/i915/perf: introduce a versioning of the i915-perf uapi [v7,04/12] drm/i915/perf: allow for CS OA configs to be created lazily [v7,05/12] drm/i915: enumerate scratch fields [v7,06/12] drm/i915/perf: implement active wait for noa configurations [v7,07/12] drm/i915: introduce a mechanism to extend execbuf2 [v7,08/12] drm/i915: add syncobj timeline support [v7,09/12] drm/i915: add a new perf configuration execbuf parameter [v7,10/12] drm/i915/perf: allow holding preemption on filtered ctx [v7,11/12] drm/i915/perf: execute OA configuration from command stream [v7,12/12] drm/i915: add support for perf configuration queries

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index dc0e4982c672..c72e7c746b57 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1205,6 +1205,13 @@ struct i915_perf_stream { */ intel_wakeref_t wakeref; + /** + * @initial_config_rq: First request run at the opening of the i915 + * perf stream to configure the HW. Should be NULL after the perf + * stream has been opened successfully. + */ + struct i915_request *initial_config_rq; + /** * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` * properties given when opening a stream, representing the contents diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 1e5011d4cdae..048a9abad26a 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -392,6 +392,19 @@ void i915_oa_config_release(struct kref *ref) kfree(oa_config); } +static void i915_oa_config_dispose_buffers(struct drm_i915_private *i915) +{ + struct i915_oa_config *oa_config, *next; + + mutex_lock(&i915->perf.metrics_lock); + list_for_each_entry_safe(oa_config, next, &i915->perf.metrics_buffers, vma_link) { + list_del(&oa_config->vma_link); + i915_gem_object_put(oa_config->obj); + oa_config->obj = NULL; + } + mutex_unlock(&i915->perf.metrics_lock); +} + static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 n_regs) { u32 i; @@ -1449,6 +1462,14 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) } } +static void free_noa_wait(struct drm_i915_private *i915) +{ + mutex_lock(&i915->drm.struct_mutex); + i915_vma_unpin_and_release(&i915->perf.oa.noa_wait, + I915_VMA_RELEASE_MAP); + mutex_unlock(&i915->drm.struct_mutex); +} + static void free_oa_buffer(struct drm_i915_private *i915) { @@ -1468,16 +1489,17 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) BUG_ON(stream != dev_priv->perf.oa.exclusive_stream); + dev_priv->perf.oa.ops.disable_metric_set(dev_priv); + /* * Unset exclusive_stream first, it will be checked while disabling * the metric set on gen8+. */ mutex_lock(&dev_priv->drm.struct_mutex); dev_priv->perf.oa.exclusive_stream = NULL; - dev_priv->perf.oa.ops.disable_metric_set(dev_priv); - i915_vma_unpin_and_release(&dev_priv->perf.oa.noa_wait, 0); mutex_unlock(&dev_priv->drm.struct_mutex); + free_noa_wait(dev_priv); free_oa_buffer(dev_priv); intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); @@ -1710,6 +1732,10 @@ static int alloc_noa_wait(struct drm_i915_private *i915) return PTR_ERR(bo); } + ret = i915_mutex_lock_interruptible(&i915->drm); + if (ret) + goto err_unref; + /* * We pin in GGTT because we jump into this buffer now because * multiple OA config BOs will have a jump to this address and it @@ -1717,10 +1743,13 @@ static int alloc_noa_wait(struct drm_i915_private *i915) */ vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 4096, 0); if (IS_ERR(vma)) { + mutex_unlock(&i915->drm.struct_mutex); ret = PTR_ERR(vma); goto err_unref; } + mutex_unlock(&i915->drm.struct_mutex); + batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB); if (IS_ERR(batch)) { ret = PTR_ERR(batch); @@ -1854,7 +1883,11 @@ static int alloc_noa_wait(struct drm_i915_private *i915) return 0; err_unpin: - __i915_vma_unpin(vma); + mutex_lock(&i915->drm.struct_mutex); + i915_vma_unpin_and_release(&i915->perf.oa.noa_wait, 0); + mutex_unlock(&i915->drm.struct_mutex); + + return ret; err_unref: i915_gem_object_put(bo); @@ -1862,23 +1895,55 @@ static int alloc_noa_wait(struct drm_i915_private *i915) return ret; } -static void config_oa_regs(struct drm_i915_private *dev_priv, - const struct i915_oa_reg *regs, - u32 n_regs) +static int emit_oa_config(struct drm_i915_private *i915, + struct i915_perf_stream *stream) { - u32 i; + struct i915_oa_config *oa_config = stream->oa_config; + struct i915_request *rq = stream->initial_config_rq; + struct i915_vma *vma; + u32 *cs; + int err; - for (i = 0; i < n_regs; i++) { - const struct i915_oa_reg *reg = regs + i; + vma = i915_vma_instance(oa_config->obj, &i915->ggtt.vm, NULL); + if (unlikely(IS_ERR(vma))) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + return err; - I915_WRITE(reg->addr, reg->value); + err = i915_vma_move_to_active(vma, rq, 0); + if (err) { + i915_vma_unpin(vma); + return err; } + + cs = intel_ring_begin(rq, INTEL_GEN(i915) >= 8 ? 4 : 2); + if (IS_ERR(cs)) { + i915_vma_unpin(vma); + return PTR_ERR(cs); + } + + if (INTEL_GEN(i915) > 8) { + *cs++ = MI_BATCH_BUFFER_START_GEN8; + *cs++ = lower_32_bits(vma->node.start); + *cs++ = upper_32_bits(vma->node.start); + *cs++ = MI_NOOP; + } else { + *cs++ = MI_BATCH_BUFFER_START; + *cs++ = vma->node.start; + } + + intel_ring_advance(rq, cs); + + i915_vma_unpin(vma); + + return 0; } static int hsw_enable_metric_set(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - const struct i915_oa_config *oa_config = stream->oa_config; /* PRM: * @@ -1894,35 +1959,7 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream) I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | GEN6_CSUNIT_CLOCK_GATE_DISABLE)); - config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); - - /* It apparently takes a fairly long time for a new MUX - * configuration to be be applied after these register writes. - * This delay duration was derived empirically based on the - * render_basic config but hopefully it covers the maximum - * configuration latency. - * - * As a fallback, the checks in _append_oa_reports() to skip - * invalid OA reports do also seem to work to discard reports - * generated before this config has completed - albeit not - * silently. - * - * Unfortunately this is essentially a magic number, since we - * don't currently know of a reliable mechanism for predicting - * how long the MUX config will take to apply and besides - * seeing invalid reports we don't know of a reliable way to - * explicitly check that the MUX config has landed. - * - * It's even possible we've miss characterized the underlying - * problem - it just seems like the simplest explanation why - * a delay at this location would mitigate any invalid reports. - */ - usleep_range(15000, 20000); - - config_oa_regs(dev_priv, oa_config->b_counter_regs, - oa_config->b_counter_regs_len); - - return 0; + return emit_oa_config(dev_priv, stream); } static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) @@ -2027,10 +2064,18 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, { unsigned int map_type = i915_coherent_map_type(dev_priv); struct i915_gem_context *ctx; - struct i915_request *rq; int ret; - lockdep_assert_held(&dev_priv->drm.struct_mutex); + /* When calling without a configuration, we're tearing down the i915 + * perf stream. Don't be interruptible in that case. + */ + if (oa_config) { + ret = i915_mutex_lock_interruptible(&dev_priv->drm); + if (ret) + return ret; + } else { + mutex_lock(&dev_priv->drm.struct_mutex); + } /* * The OA register config is setup through the context image. This image @@ -2049,7 +2094,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); if (ret) - return ret; + goto unlock; /* Update all contexts now that we've stalled the submission. */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { @@ -2072,7 +2117,8 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, map_type); if (IS_ERR(regs)) { i915_gem_context_unlock_engines(ctx); - return PTR_ERR(regs); + ret = PTR_ERR(regs); + goto unlock; } ce->state->obj->mm.dirty = true; @@ -2086,16 +2132,14 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, } /* - * Apply the configuration by doing one context restore of the edited - * context image. + * The above configuration will be applied when called + * config_oa_regs(). */ - rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context); - if (IS_ERR(rq)) - return PTR_ERR(rq); - i915_request_add(rq); +unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); - return 0; + return ret; } static int gen8_enable_metric_set(struct i915_perf_stream *stream) @@ -2142,35 +2186,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) if (ret) return ret; - config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); - - /* It apparently takes a fairly long time for a new MUX - * configuration to be be applied after these register writes. - * This delay duration was derived empirically based on the - * render_basic config but hopefully it covers the maximum - * configuration latency. - * - * As a fallback, the checks in _append_oa_reports() to skip - * invalid OA reports do also seem to work to discard reports - * generated before this config has completed - albeit not - * silently. - * - * Unfortunately this is essentially a magic number, since we - * don't currently know of a reliable mechanism for predicting - * how long the MUX config will take to apply and besides - * seeing invalid reports we don't know of a reliable way to - * explicitly check that the MUX config has landed. - * - * It's even possible we've miss characterized the underlying - * problem - it just seems like the simplest explanation why - * a delay at this location would mitigate any invalid reports. - */ - usleep_range(15000, 20000); - - config_oa_regs(dev_priv, oa_config->b_counter_regs, - oa_config->b_counter_regs_len); - - return 0; + return emit_oa_config(dev_priv, stream); } static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) @@ -2341,7 +2357,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, struct perf_open_properties *props) { struct drm_i915_private *dev_priv = stream->dev_priv; + struct drm_i915_gem_object *obj; int format_size; + long timeout; int ret; /* If the sysfs metrics/ directory wasn't registered for some @@ -2425,13 +2443,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, } } - ret = i915_perf_get_oa_config(dev_priv, props->metrics_set, - &stream->oa_config, NULL); - if (ret) { - DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); - goto err_config; - } - ret = alloc_noa_wait(dev_priv); if (ret) { DRM_DEBUG("Unable to allocate NOA wait batch buffer\n"); @@ -2457,47 +2468,90 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, if (ret) goto err_oa_buf_alloc; + ret = i915_perf_get_oa_config(dev_priv, props->metrics_set, + &stream->oa_config, &obj); + if (ret) { + DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); + goto err_config; + } + + /* + * We just need the buffer to be created, but not our own reference on + * it as the oa_config already has one. + */ + i915_gem_object_put(obj); + + stream->initial_config_rq = + i915_request_create(dev_priv->engine[RCS0]->kernel_context); + if (IS_ERR(stream->initial_config_rq)) { + ret = PTR_ERR(stream->initial_config_rq); + goto err_initial_config; + } + + stream->ops = &i915_oa_stream_ops; + ret = i915_mutex_lock_interruptible(&dev_priv->drm); if (ret) goto err_lock; - stream->ops = &i915_oa_stream_ops; + ret = i915_active_request_set(&dev_priv->engine[RCS0]->last_oa_config, + stream->initial_config_rq); + if (ret) { + mutex_unlock(&dev_priv->drm.struct_mutex); + goto err_lock; + } + dev_priv->perf.oa.exclusive_stream = stream; + mutex_unlock(&dev_priv->drm.struct_mutex); + ret = dev_priv->perf.oa.ops.enable_metric_set(stream); if (ret) { DRM_DEBUG("Unable to enable metric set\n"); goto err_enable; } - DRM_DEBUG("opening stream oa config uuid=%s\n", stream->oa_config->uuid); + i915_request_get(stream->initial_config_rq); - mutex_unlock(&dev_priv->drm.struct_mutex); + i915_request_add(stream->initial_config_rq); + + timeout = i915_request_wait(stream->initial_config_rq, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + i915_request_put(stream->initial_config_rq); + stream->initial_config_rq = NULL; + + ret = timeout < 0 ? timeout : 0; + if (ret) + goto err_enable; + + DRM_DEBUG("opening stream oa config uuid=%s\n", stream->oa_config->uuid); return 0; err_enable: + mutex_lock(&dev_priv->drm.struct_mutex); dev_priv->perf.oa.exclusive_stream = NULL; - dev_priv->perf.oa.ops.disable_metric_set(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); + dev_priv->perf.oa.ops.disable_metric_set(dev_priv); err_lock: - free_oa_buffer(dev_priv); + i915_request_add(stream->initial_config_rq); -err_oa_buf_alloc: +err_initial_config: i915_oa_config_put(stream->oa_config); + i915_oa_config_dispose_buffers(dev_priv); + +err_config: + free_oa_buffer(dev_priv); +err_oa_buf_alloc: intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); - mutex_lock(&dev_priv->drm.struct_mutex); - i915_vma_unpin_and_release(&dev_priv->perf.oa.noa_wait, 0); - mutex_unlock(&dev_priv->drm.struct_mutex); + free_noa_wait(dev_priv); err_noa_wait_alloc: - i915_oa_config_put(stream->oa_config); - -err_config: if (stream->ctx) oa_put_render_ctx_id(stream); @@ -2859,20 +2913,13 @@ static int i915_perf_release(struct inode *inode, struct file *file) { struct i915_perf_stream *stream = file->private_data; struct drm_i915_private *dev_priv = stream->dev_priv; - struct i915_oa_config *oa_config, *next; mutex_lock(&dev_priv->perf.lock); i915_perf_destroy_locked(stream); /* Dispose of all oa config batch buffers. */ - mutex_lock(&dev_priv->perf.metrics_lock); - list_for_each_entry_safe(oa_config, next, &dev_priv->perf.metrics_buffers, vma_link) { - list_del(&oa_config->vma_link); - i915_gem_object_put(oa_config->obj); - oa_config->obj = NULL; - } - mutex_unlock(&dev_priv->perf.metrics_lock); + i915_oa_config_dispose_buffers(dev_priv); mutex_unlock(&dev_priv->perf.lock);

[v7,11/12] drm/i915/perf: execute OA configuration from command stream

Commit Message

Comments

Patch