@@ -283,7 +283,12 @@ struct i915_execbuffer {
struct {
u64 flags; /** Available extensions parameters */
struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
+ struct drm_i915_gem_execbuffer_ext_perf perf_config;
} extensions;
+
+ struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is not needed. */
+ struct drm_i915_gem_object *oa_bo;
+ struct i915_vma *oa_vma;
};
#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1210,6 +1215,21 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
return err;
}
+
+static int
+get_execbuf_oa_config(struct i915_execbuffer *eb)
+{
+ eb->oa_config = NULL;
+ eb->oa_vma = NULL;
+ eb->oa_bo = NULL;
+
+ if ((eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) == 0)
+ return 0;
+
+ return i915_perf_get_oa_config(eb->i915, eb->extensions.perf_config.oa_config,
+ &eb->oa_config, &eb->oa_bo);
+}
+
static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
struct i915_vma *vma,
unsigned int len)
@@ -2072,6 +2092,47 @@ add_to_client(struct i915_request *rq, struct drm_file *file)
list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
}
+static int eb_oa_config(struct i915_execbuffer *eb)
+{
+ int ret;
+
+ if (!eb->oa_config)
+ return 0;
+
+ ret = i915_mutex_lock_interruptible(&eb->i915->drm);
+ if (ret)
+ return ret;
+
+ ret = i915_active_request_set(&eb->engine->last_oa_config,
+ eb->request);
+ if (ret)
+ goto unlock;
+
+ /*
+ * If the config hasn't changed, skip reconfiguring the HW (this is
+ * subject to a delay we want to avoid has much as possible).
+ */
+ if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config)
+ goto unlock;
+
+ ret = i915_vma_move_to_active(eb->oa_vma, eb->request, 0);
+ if (ret)
+ goto unlock;
+
+ ret = eb->engine->emit_bb_start(eb->request,
+ eb->oa_vma->node.start,
+ 0, I915_DISPATCH_SECURE);
+ if (ret)
+ goto unlock;
+
+ swap(eb->oa_config, eb->i915->perf.oa.exclusive_stream->oa_config);
+
+unlock:
+ mutex_unlock(&eb->i915->drm.struct_mutex);
+
+ return ret;
+}
+
static int eb_submit(struct i915_execbuffer *eb)
{
int err;
@@ -2098,6 +2159,10 @@ static int eb_submit(struct i915_execbuffer *eb)
return err;
}
+ err = eb_oa_config(eb);
+ if (err)
+ return err;
+
err = eb->engine->emit_bb_start(eb->request,
eb->batch->node.start +
eb->batch_start_offset,
@@ -2537,8 +2602,25 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d
return 0;
}
+static int parse_perf_config(struct i915_user_extension __user *ext, void *data)
+{
+ struct i915_execbuffer *eb = data;
+
+ if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF))
+ return -EINVAL;
+
+ if (copy_from_user(&eb->extensions.perf_config, ext,
+ sizeof(eb->extensions.perf_config)))
+ return -EFAULT;
+
+ eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF);
+
+ return 0;
+}
+
static const i915_user_extension_fn execbuf_extensions[] = {
[DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
+ [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config,
};
static int
@@ -2643,9 +2725,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}
}
+ err = get_execbuf_oa_config(&eb);
+ if (err)
+ goto err_oa_config;
+
err = eb_create(&eb);
if (err)
- goto err_out_fence;
+ goto err_oa_config;
GEM_BUG_ON(!eb.lut_size);
@@ -2670,6 +2756,27 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (unlikely(err))
goto err_unlock;
+ if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
+ struct file *perf_file;
+
+ if (!intel_engine_has_oa(eb.engine)) {
+ err = -ENODEV;
+ goto err_engine;
+ }
+
+ perf_file = fget(eb.extensions.perf_config.perf_fd);
+ if (!perf_file)
+ goto err_engine;
+
+ if (perf_file->private_data != eb.i915->perf.oa.exclusive_stream)
+ err = -EINVAL;
+
+ fput(perf_file);
+
+ if (unlikely(err))
+ goto err_engine;
+ }
+
err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
if (unlikely(err))
goto err_engine;
@@ -2790,6 +2897,20 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}
}
+ if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
+ eb.oa_vma = i915_vma_instance(eb.oa_bo,
+ &eb.engine->i915->ggtt.vm, NULL);
+ if (unlikely(IS_ERR(eb.oa_vma))) {
+ err = PTR_ERR(eb.oa_vma);
+ eb.oa_vma = NULL;
+ goto err_request;
+ }
+
+ err = i915_vma_pin(eb.oa_vma, 0, 0, PIN_GLOBAL);
+ if (err)
+ goto err_request;
+ }
+
/*
* Whilst this request exists, batch_obj will be on the
* active_list, and so will hold the active reference. Only when this
@@ -2834,7 +2955,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
i915_gem_context_put(eb.gem_context);
err_destroy:
eb_destroy(&eb);
-err_out_fence:
+err_oa_config:
+ if (eb.oa_config) {
+ i915_gem_object_put(eb.oa_bo);
+ i915_oa_config_put(eb.oa_config);
+ }
+ if (eb.oa_vma)
+ i915_vma_unpin(eb.oa_vma);
if (out_fence_fd != -1)
put_unused_fd(out_fence_fd);
err_exec_fence:
@@ -864,6 +864,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
engine->set_default_submission(engine);
+ INIT_ACTIVE_REQUEST(&engine->last_oa_config);
+
return 0;
err_unpin:
@@ -363,6 +363,8 @@ struct intel_engine_cs {
struct i915_wa_list wa_list;
struct i915_wa_list whitelist;
+ struct i915_active_request last_oa_config;
+
u32 irq_keep_mask; /* always keep these interrupts */
u32 irq_enable_mask; /* bitmask to enable ring interrupt */
void (*irq_enable)(struct intel_engine_cs *engine);
@@ -446,6 +448,7 @@ struct intel_engine_cs {
#define I915_ENGINE_HAS_SEMAPHORES BIT(3)
#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
#define I915_ENGINE_IS_VIRTUAL BIT(5)
+#define I915_ENGINE_HAS_OA BIT(6)
unsigned int flags;
/*
@@ -541,6 +544,12 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine)
return engine->flags & I915_ENGINE_IS_VIRTUAL;
}
+static inline bool
+intel_engine_has_oa(const struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_HAS_OA;
+}
+
#define instdone_slice_mask(dev_priv__) \
(IS_GEN(dev_priv__, 7) ? \
1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
@@ -2794,6 +2794,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
engine->init_context = gen8_init_rcs_context;
engine->emit_flush = gen8_emit_flush_render;
engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
+ engine->flags |= I915_ENGINE_HAS_OA;
}
return 0;
@@ -2205,8 +2205,10 @@ static void setup_rcs(struct intel_engine_cs *engine)
engine->irq_enable_mask = I915_USER_INTERRUPT;
}
- if (IS_HASWELL(i915))
+ if (IS_HASWELL(i915)) {
engine->emit_bb_start = hsw_emit_bb_start;
+ engine->flags |= I915_ENGINE_HAS_OA;
+ }
engine->resume = rcs_resume;
}
@@ -487,6 +487,10 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
case I915_PARAM_PERF_REVISION:
value = i915_perf_ioctl_version();
break;
+ case I915_PARAM_HAS_EXEC_PERF_CONFIG:
+ /* Obviously requires perf support. */
+ value = dev_priv->perf.initialized;
+ break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
@@ -1116,7 +1116,7 @@ struct i915_oa_config {
struct list_head vma_link;
- atomic_t ref_count;
+ struct kref ref;
};
struct i915_perf_stream;
@@ -2614,6 +2614,12 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
int metrics_set,
struct i915_oa_config **out_config,
struct drm_i915_gem_object **out_obj);
+void i915_oa_config_release(struct kref *ref);
+
+static inline void i915_oa_config_put(struct i915_oa_config *oa_config)
+{
+ kref_put(&oa_config->ref, i915_oa_config_release);
+}
/* i915_gem_evict.c */
int __must_check i915_gem_evict_something(struct i915_address_space *vm,
@@ -367,10 +367,9 @@ struct perf_open_properties {
int oa_period_exponent;
};
-static void put_oa_config(struct i915_oa_config *oa_config)
+void i915_oa_config_release(struct kref *ref)
{
- if (!atomic_dec_and_test(&oa_config->ref_count))
- return;
+ struct i915_oa_config *oa_config = container_of(ref, typeof(*oa_config), ref);
if (oa_config->obj) {
struct drm_i915_private *i915 = oa_config->i915;
@@ -488,7 +487,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
}
if (out_config) {
- atomic_inc(&oa_config->ref_count);
+ kref_get(&oa_config->ref);
*out_config = oa_config;
}
@@ -510,7 +509,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
mutex_unlock(&i915->perf.metrics_lock);
if (ret && out_config) {
- put_oa_config(oa_config);
+ i915_oa_config_put(oa_config);
*out_config = NULL;
}
@@ -1484,7 +1483,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
if (stream->ctx)
oa_put_render_ctx_id(stream);
- put_oa_config(stream->oa_config);
+ i915_oa_config_put(stream->oa_config);
if (dev_priv->perf.oa.spurious_report_rs.missed) {
DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
@@ -2480,7 +2479,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
free_oa_buffer(dev_priv);
err_oa_buf_alloc:
- put_oa_config(stream->oa_config);
+ i915_oa_config_put(stream->oa_config);
intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
@@ -2490,7 +2489,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
mutex_unlock(&dev_priv->drm.struct_mutex);
err_noa_wait_alloc:
- put_oa_config(stream->oa_config);
+ i915_oa_config_put(stream->oa_config);
err_config:
if (stream->ctx)
@@ -3305,7 +3304,7 @@ void i915_perf_register(struct drm_i915_private *dev_priv)
goto sysfs_error;
dev_priv->perf.oa.test_config.i915 = dev_priv;
- atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1);
+ kref_init(&dev_priv->perf.oa.test_config.ref);
goto exit;
@@ -3562,7 +3561,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
}
oa_config->i915 = dev_priv;
- atomic_set(&oa_config->ref_count, 1);
+ kref_init(&oa_config->ref);
if (!uuid_is_valid(args->uuid)) {
DRM_DEBUG("Invalid uuid format for OA config\n");
@@ -3661,7 +3660,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
sysfs_err:
mutex_unlock(&dev_priv->perf.metrics_lock);
reg_err:
- put_oa_config(oa_config);
+ i915_oa_config_put(oa_config);
DRM_DEBUG("Failed to add new OA config\n");
return err;
}
@@ -3717,7 +3716,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
- put_oa_config(oa_config);
+ i915_oa_config_put(oa_config);
return 0;
@@ -3887,7 +3886,7 @@ static int destroy_config(int id, void *p, void *data)
{
struct i915_oa_config *oa_config = p;
- put_oa_config(oa_config);
+ i915_oa_config_put(oa_config);
return 0;
}
@@ -624,6 +624,16 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
+/*
+ * Request an i915/perf performance configuration change before running the
+ * commands given in an execbuf.
+ *
+ * Performance configuration ID and the file descriptor of the i915 perf
+ * stream are given through drm_i915_gem_execbuffer_ext_perf. See
+ * I915_EXEC_EXT.
+ */
+#define I915_PARAM_HAS_EXEC_PERF_CONFIG 56
+
/* Must be kept compact -- no holes and well documented */
typedef struct drm_i915_getparam {
@@ -1026,6 +1036,12 @@ enum drm_i915_gem_execbuffer_ext {
*/
DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 0,
+ /**
+ * This identifier is associated with
+ * drm_i915_gem_execbuffer_perf_ext.
+ */
+ DRM_I915_GEM_EXECBUFFER_EXT_PERF,
+
DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
};
@@ -1056,6 +1072,27 @@ struct drm_i915_gem_execbuffer_ext_timeline_fences {
__u64 values_ptr;
};
+struct drm_i915_gem_execbuffer_ext_perf {
+ struct i915_user_extension base;
+
+ /**
+ * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN.
+ * This is used to identify that the application
+ */
+ __s32 perf_fd;
+
+ /**
+ * Unused for now. Must be cleared to zero.
+ */
+ __u32 pad;
+
+ /**
+ * OA configuration ID to switch to before executing the commands
+ * associated to the execbuf.
+ */
+ __u64 oa_config;
+};
+
struct drm_i915_gem_execbuffer2 {
/**
* List of gem_exec_object2 structs
We want the ability to dispatch a set of command buffer to the hardware, each with a different OA configuration. To achieve this, we reuse a couple of fields from the execbuf2 struct (I CAN HAZ execbuf3?) to notify what OA configuration should be used for a batch buffer. This requires the process making the execbuf with this flag to also own the perf fd at the time of execbuf. v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris) Move oa_config vma to active (Chris) v3: Don't drop the lock for engine lookup (Chris) Move OA config vma to active before writing the ringbuffer (Chris) v4: Reuse i915_user_extension_fn Serialize requests with OA config updates v5: Check that the chained extension is only present once (Chris) Unpin oa_vma in main path (Chris) v6: Use BIT_ULL (Chris) v7: Hold drm.struct_mutex when serializing the request with OA config (Chris) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 131 +++++++++++++++++- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 + drivers/gpu/drm/i915/gt/intel_engine_types.h | 9 ++ drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 4 +- drivers/gpu/drm/i915/i915_drv.c | 4 + drivers/gpu/drm/i915/i915_drv.h | 8 +- drivers/gpu/drm/i915/i915_perf.c | 25 ++-- include/uapi/drm/i915_drm.h | 37 +++++ 9 files changed, 204 insertions(+), 17 deletions(-)