[v3,6/7] drm/i915/perf: allow holding preemption on filtered ctx

Message ID	20190604131140.12647-7-lionel.g.landwerlin@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> To: intel-gfx@lists.freedesktop.org Date: Tue, 4 Jun 2019 16:11:39 +0300 Message-Id: <20190604131140.12647-7-lionel.g.landwerlin@intel.com> In-Reply-To: <20190604131140.12647-1-lionel.g.landwerlin@intel.com> References: <20190604131140.12647-1-lionel.g.landwerlin@intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH v3 6/7] drm/i915/perf: allow holding preemption on filtered ctx Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	drm/i915: Vulkan performance query support \| expand [v3,0/7] drm/i915: Vulkan performance query support [v3,1/7] drm/i915/perf: introduce a versioning of the i915-perf uapi [v3,2/7] drm/i915/perf: allow for CS OA configs to be created lazily [v3,3/7] drm/i915: introduce a mechanism to extend execbuf2 [v3,4/7] drm/i915: add syncobj timeline support [v3,5/7] drm/i915: add a new perf configuration execbuf parameter [v3,6/7] drm/i915/perf: allow holding preemption on filtered ctx [v3,7/7] drm/i915: add support for perf configuration queries

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 4a785999a9c5..59ec1c0a8879 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2100,6 +2100,14 @@ static int eb_oa_config(struct i915_execbuffer *eb) if (!eb->oa_config) return 0; + /* + * If the perf stream was opened with hold preemption, flag the + * request properly so that the priority of the request is bumped once + * it reaches the execlist ports. + */ + if (eb->i915->perf.oa.exclusive_stream->hold_preemption) + eb->request->has_perf = true; + /* * If the config hasn't changed, skip reconfiguring the HW (this is * subject to a delay we want to avoid has much as possible). diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index ed19f4e53d31..4046f045408b 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -683,6 +683,12 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq) if (port_isset(port)) i915_request_put(port_request(port)); + if (rq->has_perf) { + rq->sched.attr.priority = + (I915_PRIORITY_MASK & rq->sched.attr.priority) | + I915_USER_PRIORITY(I915_PRIORITY_PERF); + } + port_set(port, port_pack(i915_request_get(rq), port_count(port))); } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 75029d1a3802..984b76a09cfa 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -476,7 +476,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, value = INTEL_INFO(dev_priv)->has_coherent_ggtt; break; case I915_PARAM_PERF_REVISION: - value = 1; + value = 2; break; case I915_PARAM_HAS_EXEC_PERF_CONFIG: /* Obviously requires perf support. */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index aefdae856e77..57f915751d76 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1396,6 +1396,14 @@ struct i915_perf_stream { */ bool enabled; + /** + * @hold_preemption: Whether preemption is put on hold for command + * submissions done on the @ctx. This is useful for some drivers that + * cannot easily post process the OA buffer context to subtract delta + * of performance counters not associated with @ctx. + */ + bool hold_preemption; + /** * @ops: The callbacks providing the implementation of this specific * type of configured stream. diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 82a282f668c0..f8897e3cc862 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -343,6 +343,8 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { * struct perf_open_properties - for validated properties given to open a stream * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags * @single_context: Whether a single or all gpu contexts should be monitored + * @hold_preemption: Whether the preemption is disabled for the filtered + * context * @ctx_handle: A gem ctx handle for use with @single_context * @metrics_set: An ID for an OA unit metric set advertised via sysfs * @oa_format: An OA unit HW report format @@ -357,6 +359,7 @@ struct perf_open_properties { u32 sample_flags; u64 single_context:1; + u64 hold_preemption:1; u64 ctx_handle; /* OA sampling state */ @@ -2170,6 +2173,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, stream->sample_flags |= SAMPLE_OA_REPORT; stream->sample_size += format_size; + stream->hold_preemption = props->hold_preemption; + dev_priv->perf.oa.oa_buffer.format_size = format_size; if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0)) return -EINVAL; @@ -2695,6 +2700,15 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, } } + if (props->hold_preemption) { + if (!props->single_context) { + DRM_DEBUG("preemption disable with no context\n"); + ret = -EINVAL; + goto err; + } + privileged_op = true; + } + /* * On Haswell the OA unit supports clock gating off for a specific * context and in this mode there's no visibility of metrics for the @@ -2709,8 +2723,9 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to * enable the OA unit by default. */ - if (IS_HASWELL(dev_priv) && specific_ctx) + if (IS_HASWELL(dev_priv) && specific_ctx && !props->hold_preemption) { privileged_op = false; + } /* Similar to perf's kernel.perf_paranoid_cpu sysctl option * we check a dev.i915.perf_stream_paranoid sysctl option @@ -2719,7 +2734,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, */ if (privileged_op && i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { - DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n"); + DRM_DEBUG("Insufficient privileges to open i915 perf stream\n"); ret = -EACCES; goto err_ctx; } @@ -2911,6 +2926,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, props->oa_periodic = true; props->oa_period_exponent = value; break; + case DRM_I915_PERF_PROP_HOLD_PREEMPTION: + props->hold_preemption = value != 0 ? 1 : 0; + break; case DRM_I915_PERF_PROP_MAX: MISSING_CASE(id); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index 49709de69875..fc1c1fe7ce58 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -17,6 +17,13 @@ enum { I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, + /* Requests containing performance queries must not be preempted by + * another context. They get scheduled with their default priority and + * once they reach the execlist ports we bump them to + * I915_PRIORITY_PERF so that they stick to the HW until they finish. + */ + I915_PRIORITY_PERF = I915_CONTEXT_MAX_USER_PRIORITY + 2, + I915_PRIORITY_INVALID = INT_MIN }; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index da1e6984a8cc..48dbe988db23 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -725,6 +725,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->batch = NULL; rq->capture_list = NULL; rq->waitboost = false; + rq->has_perf = false; rq->execution_mask = ALL_ENGINES; INIT_LIST_HEAD(&rq->active_list); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index c9f7d07991c8..ab09b5bb89f7 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -216,6 +216,7 @@ struct i915_request { unsigned long emitted_jiffies; bool waitboost; + bool has_perf; /** engine->request_list entry for this request */ struct list_head link; diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index a4f98ccef0fe..cba3f50dc1fe 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -717,6 +717,12 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq) { GEM_BUG_ON(port_isset(port)); + if (rq->has_perf) { + rq->sched.attr.priority = + (I915_PRIORITY_MASK & rq->sched.attr.priority) | + I915_USER_PRIORITY(I915_PRIORITY_PERF); + } + port_set(port, i915_request_get(rq)); } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 7f770183ee31..8ffdbc0e4616 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2001,6 +2001,16 @@ enum drm_i915_perf_property_id { */ DRM_I915_PERF_PROP_OA_EXPONENT, + /** + * Specifying this property is only valid when specify a context to + * filter with DRM_I915_PERF_PROP_CTX_HANDLE. Specifying this property + * will hold preemption of the particular context we want to gather + * performance data about. + * + * This property is available in perf revision 2. + */ + DRM_I915_PERF_PROP_HOLD_PREEMPTION, + DRM_I915_PERF_PROP_MAX /* non-ABI */ };

[v3,6/7] drm/i915/perf: allow holding preemption on filtered ctx

Commit Message

Comments

Patch