@@ -454,6 +454,7 @@ struct intel_engine_cs {
#define I915_ENGINE_HAS_SEMAPHORES BIT(3)
#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
#define I915_ENGINE_IS_VIRTUAL BIT(5)
+#define I915_ENGINE_HAS_OA BIT(6)
unsigned int flags;
/*
@@ -549,6 +550,12 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine)
return engine->flags & I915_ENGINE_IS_VIRTUAL;
}
+static inline bool
+intel_engine_has_oa(const struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_HAS_OA;
+}
+
#define instdone_slice_mask(dev_priv__) \
(IS_GEN(dev_priv__, 7) ? \
1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
@@ -2732,6 +2732,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
engine->init_context = gen8_init_rcs_context;
engine->emit_flush = gen8_emit_flush_render;
engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
+ engine->flags |= I915_ENGINE_HAS_OA;
}
return 0;
@@ -2210,8 +2210,10 @@ static void setup_rcs(struct intel_engine_cs *engine)
engine->irq_enable_mask = I915_USER_INTERRUPT;
}
- if (IS_HASWELL(i915))
+ if (IS_HASWELL(i915)) {
engine->emit_bb_start = hsw_emit_bb_start;
+ engine->flags |= I915_ENGINE_HAS_OA;
+ }
engine->resume = rcs_resume;
}
@@ -473,6 +473,10 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
case I915_PARAM_PERF_REVISION:
value = 2;
break;
+ case I915_PARAM_HAS_EXEC_PERF_CONFIG:
+ /* Obviously requires perf support. */
+ value = dev_priv->perf.initialized;
+ break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
@@ -3154,6 +3154,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
int metrics_set,
struct i915_oa_config **out_config,
struct drm_i915_gem_object **out_obj);
+void i915_oa_config_put(struct i915_oa_config *oa_config);
/* i915_gem_evict.c */
int __must_check i915_gem_evict_something(struct i915_address_space *vm,
@@ -288,6 +288,9 @@ struct i915_execbuffer {
*/
int lut_size;
struct hlist_head *buckets; /** ht for relocation handles */
+
+ struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is not needed. */
+ struct drm_i915_gem_object *oa_bo;
};
#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1183,6 +1186,33 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
*addr = value;
}
+static int
+get_execbuf_oa_config(struct drm_i915_private *dev_priv,
+ int perf_fd, u32 oa_config_id,
+ struct i915_oa_config **out_oa_config,
+ struct drm_i915_gem_object **out_oa_obj)
+{
+ struct file *perf_file;
+ int ret;
+
+ if (!dev_priv->perf.oa.exclusive_stream)
+ return -EINVAL;
+
+ perf_file = fget(perf_fd);
+ if (!perf_file)
+ return -EINVAL;
+
+ if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream)
+ return -EINVAL;
+
+ fput(perf_file);
+
+ ret = i915_perf_get_oa_config(dev_priv, oa_config_id,
+ out_oa_config, out_oa_obj);
+
+ return ret;
+}
+
static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
struct i915_vma *vma,
unsigned int len)
@@ -1937,12 +1967,15 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
return false;
}
- if (exec->DR4 == 0xffffffff) {
- DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
- exec->DR4 = 0;
+ /* We reuse DR1 & DR4 fields for passing the perf config detail. */
+ if (!(exec->flags & I915_EXEC_PERF_CONFIG)) {
+ if (exec->DR4 == 0xffffffff) {
+ DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
+ exec->DR4 = 0;
+ }
+ if (exec->DR1 || exec->DR4)
+ return false;
}
- if (exec->DR1 || exec->DR4)
- return false;
if ((exec->batch_start_offset | exec->batch_len) & 0x7)
return false;
@@ -2048,6 +2081,40 @@ static int eb_submit(struct i915_execbuffer *eb)
return err;
}
+ if (eb->oa_config &&
+ eb->oa_config != eb->i915->perf.oa.exclusive_stream->oa_config) {
+ struct i915_vma *oa_vma;
+
+ oa_vma = i915_vma_instance(eb->oa_bo,
+ &eb->engine->i915->ggtt.vm, NULL);
+ if (unlikely(IS_ERR(oa_vma))) {
+ err = PTR_ERR(oa_vma);
+ return err;
+ }
+
+ err = i915_vma_pin(oa_vma, 0, 0, PIN_GLOBAL);
+ if (err)
+ return err;
+
+ err = i915_vma_move_to_active(oa_vma, eb->request, 0);
+ if (err) {
+ i915_vma_unpin(oa_vma);
+ return err;
+ }
+
+ err = eb->engine->emit_bb_start(eb->request,
+ oa_vma->node.start,
+ 0, I915_DISPATCH_SECURE);
+ if (err) {
+ i915_vma_unpin(oa_vma);
+ return err;
+ }
+
+ i915_vma_unpin(oa_vma);
+
+ swap(eb->oa_config, eb->i915->perf.oa.exclusive_stream->oa_config);
+ }
+
err = eb->engine->emit_bb_start(eb->request,
eb->batch->node.start +
eb->batch_start_offset,
@@ -2345,6 +2412,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.buffer_count = args->buffer_count;
eb.batch_start_offset = args->batch_start_offset;
eb.batch_len = args->batch_len;
+ eb.oa_config = NULL;
eb.batch_flags = 0;
if (args->flags & I915_EXEC_SECURE) {
@@ -2410,9 +2478,21 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (unlikely(err))
goto err_unlock;
+ if (args->flags & I915_EXEC_PERF_CONFIG) {
+ if (!intel_engine_has_oa(eb.engine)) {
+ err = -ENODEV;
+ goto err_engine;
+ }
+
+ err = get_execbuf_oa_config(eb.i915, args->DR1, args->DR4,
+ &eb.oa_config, &eb.oa_bo);
+ if (err)
+ goto err_engine;
+ }
+
err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
if (unlikely(err))
- goto err_engine;
+ goto err_oa;
err = eb_relocate(&eb);
if (err) {
@@ -2565,6 +2645,11 @@ i915_gem_do_execbuffer(struct drm_device *dev,
err_vma:
if (eb.exec)
eb_release_vmas(&eb);
+err_oa:
+ if (eb.oa_config) {
+ i915_gem_object_put(eb.oa_bo);
+ i915_oa_config_put(eb.oa_config);
+ }
err_engine:
eb_unpin_context(&eb);
err_unlock:
@@ -367,7 +367,7 @@ struct perf_open_properties {
int oa_period_exponent;
};
-static void put_oa_config(struct i915_oa_config *oa_config)
+void i915_oa_config_put(struct i915_oa_config *oa_config)
{
if (!atomic_dec_and_test(&oa_config->ref_count))
return;
@@ -501,7 +501,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
err_buf_alloc:
if (out_config) {
- put_oa_config(oa_config);
+ i915_oa_config_put(oa_config);
*out_config = NULL;
}
unlock:
@@ -1482,7 +1482,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
if (stream->ctx)
oa_put_render_ctx_id(stream);
- put_oa_config(stream->oa_config);
+ i915_oa_config_put(stream->oa_config);
if (dev_priv->perf.oa.spurious_report_rs.missed) {
DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
@@ -2250,7 +2250,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
free_oa_buffer(dev_priv);
err_oa_buf_alloc:
- put_oa_config(stream->oa_config);
+ i915_oa_config_put(stream->oa_config);
intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
intel_runtime_pm_put(dev_priv, stream->wakeref);
@@ -3427,7 +3427,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
sysfs_err:
mutex_unlock(&dev_priv->perf.metrics_lock);
reg_err:
- put_oa_config(oa_config);
+ i915_oa_config_put(oa_config);
DRM_DEBUG("Failed to add new OA config\n");
return err;
}
@@ -3481,7 +3481,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
- put_oa_config(oa_config);
+ i915_oa_config_put(oa_config);
config_err:
mutex_unlock(&dev_priv->perf.metrics_lock);
@@ -3643,7 +3643,7 @@ static int destroy_config(int id, void *p, void *data)
{
struct i915_oa_config *oa_config = p;
- put_oa_config(oa_config);
+ i915_oa_config_put(oa_config);
return 0;
}
@@ -617,6 +617,16 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_PERF_REVISION 54
+/*
+ * Request an OA performance configuration change before running the commands
+ * given in an execbuf.
+ *
+ * Performance configuration ID is given in the DR4 field of
+ * drm_i915_gem_execbuffer2 and the file descriptor of the i915 perf stream is
+ * given in DR1. Execbuffer will fail if any of these parameter is invalid.
+ */
+#define I915_PARAM_HAS_EXEC_PERF_CONFIG 55
+
/* Must be kept compact -- no holes and well documented */
typedef struct drm_i915_getparam {
@@ -1148,7 +1158,14 @@ struct drm_i915_gem_execbuffer2 {
*/
#define I915_EXEC_FENCE_SUBMIT (1 << 20)
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
+/* Request that perf monitoring hardware be reprogrammed before executing the
+ * commands from the batch in the execbuf. The DR1 & DR4 fields of the execbuf
+ * must respectively contain the file descriptor of the perf monitoring device
+ * and the configuration to program.
+ */
+#define I915_EXEC_PERF_CONFIG (1<<21)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_PERF_CONFIG<<1))
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
We want the ability to dispatch a set of command buffer to the hardware, each with a different OA configuration. To achieve this, we reuse a couple of fields from the execbuf2 struct (I CAN HAZ execbuf3?) to notify what OA configuration should be used for a batch buffer. This requires the process making the execbuf with this flag to also own the perf fd at the time of execbuf. v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris) Move oa_config vma to active (Chris) v3: Don't drop the lock for engine lookup (Chris) Move OA config vma to active before writing the ringbuffer (Chris) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> --- drivers/gpu/drm/i915/gt/intel_engine_types.h | 7 ++ drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 4 +- drivers/gpu/drm/i915/i915_drv.c | 4 + drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 97 ++++++++++++++++++-- drivers/gpu/drm/i915/i915_perf.c | 14 +-- include/uapi/drm/i915_drm.h | 19 +++- 8 files changed, 132 insertions(+), 15 deletions(-)