[4/5] drm/i915: add a new perf configuration execbuf parameter

Message ID	20190521140855.3957-5-lionel.g.landwerlin@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> To: intel-gfx@lists.freedesktop.org Date: Tue, 21 May 2019 15:08:54 +0100 Message-Id: <20190521140855.3957-5-lionel.g.landwerlin@intel.com> In-Reply-To: <20190521140855.3957-1-lionel.g.landwerlin@intel.com> References: <20190521140855.3957-1-lionel.g.landwerlin@intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 4/5] drm/i915: add a new perf configuration execbuf parameter Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	drm/i915: Vulkan performance query support \| expand [0/5] drm/i915: Vulkan performance query support [1/5] drm/i915/perf: introduce a versioning of the i915-perf uapi [2/5] drm/i915/perf: allow holding preemption on filtered ctx [3/5] drm/i915/perf: allow for CS OA configs to be created lazily [4/5] drm/i915: add a new perf configuration execbuf parameter [5/5] drm/i915: add support for perf configuration queries

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index e381c1c73902..766fbbede430 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -445,6 +445,7 @@ struct intel_engine_cs { #define I915_ENGINE_HAS_PREEMPTION BIT(2) #define I915_ENGINE_HAS_SEMAPHORES BIT(3) #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) +#define I915_ENGINE_HAS_OA BIT(5) unsigned int flags; /* @@ -534,6 +535,12 @@ intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; } +static inline bool +intel_engine_has_oa(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_HAS_OA; +} + #define instdone_slice_mask(dev_priv__) \ (IS_GEN(dev_priv__, 7) ? \ 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 2ad95977f7a8..cad6fca4ba0f 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2395,6 +2395,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) engine->init_context = gen8_init_rcs_context; engine->emit_flush = gen8_emit_flush_render; engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; + engine->flags |= I915_ENGINE_HAS_OA; } return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index f0d60affdba3..dc85a3e474b9 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -2210,8 +2210,10 @@ static void setup_rcs(struct intel_engine_cs *engine) engine->irq_enable_mask = I915_USER_INTERRUPT; } - if (IS_HASWELL(i915)) + if (IS_HASWELL(i915)) { engine->emit_bb_start = hsw_emit_bb_start; + engine->flags |= I915_ENGINE_HAS_OA; + } engine->resume = rcs_resume; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5871e0cfbab0..6d9a15642342 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -472,6 +472,10 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_PERF_REVISION: value = 2; break; + case I915_PARAM_HAS_EXEC_PERF_CONFIG: + /* Obviously requires perf support. */ + value = dev_priv->perf.initialized; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index abd564bfa03b..25860d99ffc6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3154,6 +3154,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915, int metrics_set, struct i915_oa_config **out_config, struct drm_i915_gem_object **out_obj); +void i915_oa_config_put(struct i915_oa_config *oa_config); /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 361c232dde83..3794c6ce71e3 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -288,6 +288,9 @@ struct i915_execbuffer { */ int lut_size; struct hlist_head *buckets; /** ht for relocation handles */ + + struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is not needed. */ + struct drm_i915_gem_object *oa_bo; }; #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) @@ -1183,6 +1186,33 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) *addr = value; } +static int +get_execbuf_oa_config(struct drm_i915_private *dev_priv, + int perf_fd, u32 oa_config_id, + struct i915_oa_config **out_oa_config, + struct drm_i915_gem_object **out_oa_obj) +{ + struct file *perf_file; + int ret; + + if (!dev_priv->perf.oa.exclusive_stream) + return -EINVAL; + + perf_file = fget(perf_fd); + if (!perf_file) + return -EINVAL; + + if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream) + return -EINVAL; + + fput(perf_file); + + ret = i915_perf_get_oa_config(dev_priv, oa_config_id, + out_oa_config, out_oa_obj); + + return ret; +} + static int __reloc_gpu_alloc(struct i915_execbuffer *eb, struct i915_vma *vma, unsigned int len) @@ -1937,12 +1967,15 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) return false; } - if (exec->DR4 == 0xffffffff) { - DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); - exec->DR4 = 0; + /* We reuse DR1 & DR4 fields for passing the perf config detail. */ + if (!(exec->flags & I915_EXEC_PERF_CONFIG)) { + if (exec->DR4 == 0xffffffff) { + DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); + exec->DR4 = 0; + } + if (exec->DR1 || exec->DR4) + return false; } - if (exec->DR1 || exec->DR4) - return false; if ((exec->batch_start_offset | exec->batch_len) & 0x7) return false; @@ -2048,6 +2081,42 @@ static int eb_submit(struct i915_execbuffer *eb) return err; } + if (eb->oa_config && + eb->oa_config != eb->i915->perf.oa.exclusive_stream->oa_config) { + struct i915_vma *oa_vma; + + oa_vma = i915_vma_instance(eb->oa_bo, + &eb->engine->i915->ggtt.vm, NULL); + if (unlikely(IS_ERR(oa_vma))) { + err = PTR_ERR(oa_vma); + return err; + } + + err = i915_vma_pin(oa_vma, 0, 0, PIN_GLOBAL); + if (err) + return err; + + err = eb->engine->emit_bb_start(eb->request, + oa_vma->node.start, + 0, I915_DISPATCH_SECURE); + if (err) { + i915_vma_unpin(oa_vma); + return err; + } + + err = i915_vma_move_to_active(oa_vma, eb->request, 0); + if (err) { + i915_vma_unpin(oa_vma); + return err; + } + + + i915_vma_unpin(oa_vma); + + + swap(eb->oa_config, eb->i915->perf.oa.exclusive_stream->oa_config); + } + err = eb->engine->emit_bb_start(eb->request, eb->batch->node.start + eb->batch_start_offset, @@ -2341,6 +2410,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.buffer_count = args->buffer_count; eb.batch_start_offset = args->batch_start_offset; eb.batch_len = args->batch_len; + eb.oa_config = NULL; eb.batch_flags = 0; if (args->flags & I915_EXEC_SECURE) { @@ -2385,17 +2455,29 @@ i915_gem_do_execbuffer(struct drm_device *dev, */ intel_gt_pm_get(eb.i915); - err = i915_mutex_lock_interruptible(dev); - if (err) - goto err_rpm; - err = eb_select_engine(&eb, file, args); if (unlikely(err)) - goto err_unlock; + goto err_rpm; + + if (args->flags & I915_EXEC_PERF_CONFIG) { + if (!intel_engine_has_oa(eb.engine)) { + err = -ENODEV; + goto err_engine; + } + + err = get_execbuf_oa_config(eb.i915, args->DR1, args->DR4, + &eb.oa_config, &eb.oa_bo); + if (err) + goto err_engine; + } + + err = i915_mutex_lock_interruptible(dev); + if (err) + goto err_oa; err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ if (unlikely(err)) - goto err_engine; + goto err_unlock; err = eb_relocate(&eb); if (err) { @@ -2541,10 +2623,15 @@ i915_gem_do_execbuffer(struct drm_device *dev, err_vma: if (eb.exec) eb_release_vmas(&eb); -err_engine: - eb_unpin_context(&eb); err_unlock: mutex_unlock(&dev->struct_mutex); +err_oa: + if (eb.oa_config) { + i915_gem_object_put(eb.oa_bo); + i915_oa_config_put(eb.oa_config); + } +err_engine: + eb_unpin_context(&eb); err_rpm: intel_gt_pm_put(eb.i915); i915_gem_context_put(eb.gem_context); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 7e0ebd4bc8f2..7b861f12f161 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -365,7 +365,7 @@ struct perf_open_properties { int oa_period_exponent; }; -static void put_oa_config(struct i915_oa_config *oa_config) +void i915_oa_config_put(struct i915_oa_config *oa_config) { if (!atomic_dec_and_test(&oa_config->ref_count)) return; @@ -515,7 +515,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915, err_buf_alloc: if (out_config) { - put_oa_config(oa_config); + i915_oa_config_put(oa_config); *out_config = NULL; } unlock: @@ -1496,7 +1496,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) if (stream->ctx) oa_put_render_ctx_id(stream); - put_oa_config(stream->oa_config); + i915_oa_config_put(stream->oa_config); if (dev_priv->perf.oa.spurious_report_rs.missed) { DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", @@ -2264,7 +2264,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, free_oa_buffer(dev_priv); err_oa_buf_alloc: - put_oa_config(stream->oa_config); + i915_oa_config_put(stream->oa_config); intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); intel_runtime_pm_put(dev_priv, stream->wakeref); @@ -3441,7 +3441,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, sysfs_err: mutex_unlock(&dev_priv->perf.metrics_lock); reg_err: - put_oa_config(oa_config); + i915_oa_config_put(oa_config); DRM_DEBUG("Failed to add new OA config\n"); return err; } @@ -3495,7 +3495,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); - put_oa_config(oa_config); + i915_oa_config_put(oa_config); config_err: mutex_unlock(&dev_priv->perf.metrics_lock); @@ -3657,7 +3657,7 @@ static int destroy_config(int id, void *p, void *data) { struct i915_oa_config *oa_config = p; - put_oa_config(oa_config); + i915_oa_config_put(oa_config); return 0; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 5601dc688295..e57fb5f249da 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -604,6 +604,16 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_PERF_REVISION 53 +/* + * Request an OA performance configuration change before running the commands + * given in an execbuf. + * + * Performance configuration ID is given in the DR4 field of + * drm_i915_gem_execbuffer2 and the file descriptor of the i915 perf stream is + * given in DR1. Execbuffer will fail if any of these parameter is invalid. + */ +#define I915_PARAM_HAS_EXEC_PERF_CONFIG 54 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -1126,7 +1136,15 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_FENCE_ARRAY (1<<19) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1)) +/* Request that perf monitoring hardware be reprogrammed before executing the + * commands from the batch in the execbuf. The DR1 & DR4 fields of the execbuf + * must respectively contain the file descriptor of the perf monitoring device + * and the configuration to program. + */ +#define I915_EXEC_PERF_CONFIG (1<<20) + + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_PERF_CONFIG<<1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \

[4/5] drm/i915: add a new perf configuration execbuf parameter

Commit Message

Comments

Patch