Message ID | 20190627080045.8814-8-lionel.g.landwerlin@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915: Vulkan performance query support | expand |
Quoting Lionel Landwerlin (2019-06-27 09:00:42) > +static int > +get_execbuf_oa_config(struct drm_i915_private *dev_priv, > + s32 perf_fd, u64 oa_config_id, > + struct i915_oa_config **out_oa_config, > + struct drm_i915_gem_object **out_oa_obj) > +{ > + struct file *perf_file; > + int ret; > + > + if (!dev_priv->perf.oa.exclusive_stream) > + return -EINVAL; > + > + perf_file = fget(perf_fd); > + if (!perf_file) > + return -EINVAL; > + > + if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream) > + return -EINVAL; Leaked the file. > + fput(perf_file); and we never use perf_file again? It's only use is as a permission check on the ioctl? Just checking in case perf_fd is dup()ed by the user after we check it. -Chris
Quoting Lionel Landwerlin (2019-06-27 09:00:42) > We want the ability to dispatch a set of command buffer to the > hardware, each with a different OA configuration. To achieve this, we > reuse a couple of fields from the execbuf2 struct (I CAN HAZ > execbuf3?) to notify what OA configuration should be used for a batch > buffer. This requires the process making the execbuf with this flag to > also own the perf fd at the time of execbuf. > > v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris) > Move oa_config vma to active (Chris) > > v3: Don't drop the lock for engine lookup (Chris) > Move OA config vma to active before writing the ringbuffer (Chris) > > v4: Reuse i915_user_extension_fn > Serialize requests with OA config updates > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > --- > .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 130 +++++++++++++++++- > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 + > drivers/gpu/drm/i915/gt/intel_engine_types.h | 9 ++ > drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + > drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 4 +- > drivers/gpu/drm/i915/i915_drv.c | 4 + > drivers/gpu/drm/i915/i915_drv.h | 10 +- > drivers/gpu/drm/i915/i915_perf.c | 26 ++-- > include/uapi/drm/i915_drm.h | 37 +++++ > 9 files changed, 208 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > index 476fade6fcab..18ae3e8a4e02 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > @@ -283,7 +283,11 @@ struct i915_execbuffer { > struct { > u64 flags; /** Available extensions parameters */ > struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; > + struct drm_i915_gem_execbuffer_ext_perf perf_config; > } extensions; > + > + struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is not needed. */ > + struct drm_i915_gem_object *oa_bo; > }; > > #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) > @@ -1210,6 +1214,34 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) > return err; > } > > + > +static int > +get_execbuf_oa_config(struct drm_i915_private *dev_priv, > + s32 perf_fd, u64 oa_config_id, > + struct i915_oa_config **out_oa_config, > + struct drm_i915_gem_object **out_oa_obj) > +{ > + struct file *perf_file; > + int ret; > + > + if (!dev_priv->perf.oa.exclusive_stream) > + return -EINVAL; > + > + perf_file = fget(perf_fd); > + if (!perf_file) > + return -EINVAL; > + > + if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream) > + return -EINVAL; > + > + fput(perf_file); > + > + ret = i915_perf_get_oa_config(dev_priv, oa_config_id, > + out_oa_config, out_oa_obj); > + > + return ret; > +} > + > static int __reloc_gpu_alloc(struct i915_execbuffer *eb, > struct i915_vma *vma, > unsigned int len) > @@ -2072,6 +2104,64 @@ add_to_client(struct i915_request *rq, struct drm_file *file) > list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list); > } > > +static int eb_oa_config(struct i915_execbuffer *eb) > +{ > + struct i915_vma *oa_vma; > + int err; > + > + if (!eb->oa_config) > + return 0; > + > + err = i915_active_request_set(&eb->engine->last_oa_config, > + eb->request); > + if (err) > + return err; > + > + /* > + * If the perf stream was opened with hold preemption, flag the > + * request properly so that the priority of the request is bumped once > + * it reaches the execlist ports. > + */ > + if (eb->i915->perf.oa.exclusive_stream->hold_preemption) > + eb->request->has_perf = true; Leave this chunk for the next battle^W patch. > + /* > + * If the config hasn't changed, skip reconfiguring the HW (this is > + * subject to a delay we want to avoid has much as possible). > + */ > + if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config) > + return 0; > + > + oa_vma = i915_vma_instance(eb->oa_bo, > + &eb->engine->i915->ggtt.vm, NULL); > + if (unlikely(IS_ERR(oa_vma))) > + return PTR_ERR(oa_vma); > + > + err = i915_vma_pin(oa_vma, 0, 0, PIN_GLOBAL); > + if (err) > + return err; Ugh. We should not be pinned after creating the request. Might not matter -- it's just reservation under its own lock that must not be crossed with the timeline lock currently held here. > + > + err = i915_vma_move_to_active(oa_vma, eb->request, 0); You can unpin immediately after move_to_active (as the active is treated by being pinned). I'm thinking this will be replaced with just i915_active_ref() as you only care about the vma. > + if (err) { > + i915_vma_unpin(oa_vma); > + return err; > + } > + > + err = eb->engine->emit_bb_start(eb->request, > + oa_vma->node.start, > + 0, I915_DISPATCH_SECURE); > + if (err) { > + i915_vma_unpin(oa_vma); > + return err; > + } > + > + i915_vma_unpin(oa_vma); > + > + swap(eb->oa_config, eb->i915->perf.oa.exclusive_stream->oa_config); > + > + return 0; > +} > + > static int eb_submit(struct i915_execbuffer *eb) > { > int err; > @@ -2098,6 +2188,10 @@ static int eb_submit(struct i915_execbuffer *eb) > return err; > } > > + err = eb_oa_config(eb); > + if (err) > + return err; > + > err = eb->engine->emit_bb_start(eb->request, > eb->batch->node.start + > eb->batch_start_offset, > @@ -2520,8 +2614,22 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d > return 0; > } > > +static int parse_perf_config(struct i915_user_extension __user *ext, void *data) > +{ > + struct i915_execbuffer *eb = data; > + > + if (copy_from_user(&eb->extensions.perf_config, ext, > + sizeof(eb->extensions.perf_config))) > + return -EFAULT; > + > + eb->extensions.flags |= BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF); > + > + return 0; > +} > + > static const i915_user_extension_fn execbuf_extensions[] = { > [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, > + [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config, > }; > > static int > @@ -2573,6 +2681,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, > eb.buffer_count = args->buffer_count; > eb.batch_start_offset = args->batch_start_offset; > eb.batch_len = args->batch_len; > + eb.oa_config = NULL; > > eb.batch_flags = 0; > if (args->flags & I915_EXEC_SECURE) { > @@ -2651,9 +2760,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, > if (unlikely(err)) > goto err_unlock; > > + if (eb.extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) { > + if (!intel_engine_has_oa(eb.engine)) { > + err = -ENODEV; > + goto err_engine; > + } > + > + err = get_execbuf_oa_config(eb.i915, > + eb.extensions.perf_config.perf_fd, > + eb.extensions.perf_config.oa_config, > + &eb.oa_config, &eb.oa_bo); > + if (err) > + goto err_engine; Why is this under the struct_mutex? > + } > + > err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ > if (unlikely(err)) > - goto err_engine; > + goto err_oa; > > err = eb_relocate(&eb); > if (err) { > @@ -2806,6 +2929,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, > err_vma: > if (eb.exec) > eb_release_vmas(&eb); > +err_oa: > + if (eb.oa_config) { > + i915_gem_object_put(eb.oa_bo); > + i915_oa_config_put(eb.oa_config); > + } > err_engine: > eb_unpin_context(&eb); > err_unlock: > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > index 4961f74fd902..ba4d2c1abd1a 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > @@ -859,6 +859,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine) > > engine->set_default_submission(engine); > > + INIT_ACTIVE_REQUEST(&engine->last_oa_config); > + > return 0; > > err_unpin: > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h > index 7e056114344e..39da40937e7f 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h > @@ -363,6 +363,8 @@ struct intel_engine_cs { > struct i915_wa_list wa_list; > struct i915_wa_list whitelist; > > + struct i915_active_request last_oa_config; > + > u32 irq_keep_mask; /* always keep these interrupts */ > u32 irq_enable_mask; /* bitmask to enable ring interrupt */ > void (*irq_enable)(struct intel_engine_cs *engine); > @@ -446,6 +448,7 @@ struct intel_engine_cs { > #define I915_ENGINE_HAS_SEMAPHORES BIT(3) > #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) > #define I915_ENGINE_IS_VIRTUAL BIT(5) > +#define I915_ENGINE_HAS_OA BIT(6) > unsigned int flags; > > /* > @@ -541,6 +544,12 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine) > return engine->flags & I915_ENGINE_IS_VIRTUAL; > } > > +static inline bool > +intel_engine_has_oa(const struct intel_engine_cs *engine) > +{ > + return engine->flags & I915_ENGINE_HAS_OA; > +} > + > #define instdone_slice_mask(dev_priv__) \ > (IS_GEN(dev_priv__, 7) ? \ > 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > index 28685ba91a2c..a6ea468986d1 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -2763,6 +2763,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) > engine->init_context = gen8_init_rcs_context; > engine->emit_flush = gen8_emit_flush_render; > engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; > + engine->flags |= I915_ENGINE_HAS_OA; > } > > return 0; > diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c > index f094406dcc56..a1aee092d0a3 100644 > --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c > @@ -2192,8 +2192,10 @@ static void setup_rcs(struct intel_engine_cs *engine) > engine->irq_enable_mask = I915_USER_INTERRUPT; > } > > - if (IS_HASWELL(i915)) > + if (IS_HASWELL(i915)) { > engine->emit_bb_start = hsw_emit_bb_start; > + engine->flags |= I915_ENGINE_HAS_OA; > + } > > engine->resume = rcs_resume; > } > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index 48f9009a6318..ef54cf1c54a5 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -486,6 +486,10 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, > case I915_PARAM_PERF_REVISION: > value = 1; > break; > + case I915_PARAM_HAS_EXEC_PERF_CONFIG: > + /* Obviously requires perf support. */ > + value = dev_priv->perf.initialized; > + break; > default: > DRM_DEBUG("Unknown parameter %d\n", param->param); > return -EINVAL; > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 0323c8182a67..51b103493623 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1096,6 +1096,8 @@ struct i915_oa_reg { > }; > > struct i915_oa_config { > + struct drm_i915_private *i915; > + > char uuid[UUID_STRING_LEN + 1]; > int id; > > @@ -1114,7 +1116,7 @@ struct i915_oa_config { > > struct list_head vma_link; > > - atomic_t ref_count; > + struct kref ref; > }; > > struct i915_perf_stream; > @@ -2612,6 +2614,12 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915, > int metrics_set, > struct i915_oa_config **out_config, > struct drm_i915_gem_object **out_obj); > +void i915_oa_config_release(struct kref *ref); > + > +static inline void i915_oa_config_put(struct i915_oa_config *oa_config) > +{ > + kref_put(&oa_config->ref, i915_oa_config_release); > +} > > /* i915_gem_evict.c */ > int __must_check i915_gem_evict_something(struct i915_address_space *vm, > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index b2f5ba87921c..34d74fa64c74 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -366,10 +366,11 @@ struct perf_open_properties { > int oa_period_exponent; > }; > > -static void put_oa_config(struct i915_oa_config *oa_config) > +void i915_oa_config_release(struct kref *ref) > { > - if (!atomic_dec_and_test(&oa_config->ref_count)) > - return; > + struct i915_oa_config *oa_config = container_of(ref, typeof(*oa_config), ref); > + > + lockdep_assert_held(&oa_config->i915->perf.metrics_lock); That's not true for the eb paths. > > if (oa_config->obj) { > list_del(&oa_config->vma_link); > @@ -485,7 +486,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915, > } > > if (out_config) { > - atomic_inc(&oa_config->ref_count); > + kref_get(&oa_config->ref); > *out_config = oa_config; > } > > @@ -507,7 +508,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915, > > err_buf_alloc: > if (out_config) { > - put_oa_config(oa_config); > + i915_oa_config_put(oa_config); > *out_config = NULL; > } > unlock: > @@ -1483,7 +1484,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) > if (stream->ctx) > oa_put_render_ctx_id(stream); > > - put_oa_config(stream->oa_config); > + i915_oa_config_put(stream->oa_config); > > if (dev_priv->perf.oa.spurious_report_rs.missed) { > DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", > @@ -2430,7 +2431,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, > free_oa_buffer(dev_priv); > > err_oa_buf_alloc: > - put_oa_config(stream->oa_config); > + i915_oa_config_put(stream->oa_config); > > intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); > intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); > @@ -3246,7 +3247,7 @@ void i915_perf_register(struct drm_i915_private *dev_priv) > if (ret) > goto sysfs_error; > > - atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1); > + dev_priv->perf.oa.test_config.i915 = dev_priv; > > goto exit; > > @@ -3502,7 +3503,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, > return -ENOMEM; > } > > - atomic_set(&oa_config->ref_count, 1); > + oa_config->i915 = dev_priv; > + kref_init(&oa_config->ref); > > if (!uuid_is_valid(args->uuid)) { > DRM_DEBUG("Invalid uuid format for OA config\n"); > @@ -3601,7 +3603,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, > sysfs_err: > mutex_unlock(&dev_priv->perf.metrics_lock); > reg_err: > - put_oa_config(oa_config); > + i915_oa_config_put(oa_config); > DRM_DEBUG("Failed to add new OA config\n"); > return err; > } > @@ -3655,7 +3657,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, > > DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); > > - put_oa_config(oa_config); > + i915_oa_config_put(oa_config); > > config_err: > mutex_unlock(&dev_priv->perf.metrics_lock); > @@ -3824,7 +3826,7 @@ static int destroy_config(int id, void *p, void *data) > { > struct i915_oa_config *oa_config = p; > > - put_oa_config(oa_config); > + i915_oa_config_put(oa_config); > > return 0; > } > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index b7fe1915e34d..5938a73963fe 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -623,6 +623,16 @@ typedef struct drm_i915_irq_wait { > */ > #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 > > +/* > + * Request an i915/perf performance configuration change before running the > + * commands given in an execbuf. > + * > + * Performance configuration ID and the file descriptor of the i915 perf > + * stream are given through drm_i915_gem_execbuffer_ext_perf. See > + * I915_EXEC_EXT. > + */ > +#define I915_PARAM_HAS_EXEC_PERF_CONFIG 56 > + > /* Must be kept compact -- no holes and well documented */ > > typedef struct drm_i915_getparam { > @@ -1026,6 +1036,12 @@ enum drm_i915_gem_execbuffer_ext { > */ > DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES, > > + /** > + * This identifier is associated with > + * drm_i915_gem_execbuffer_perf_ext. > + */ > + DRM_I915_GEM_EXECBUFFER_EXT_PERF, > + > DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */ > }; > > @@ -1054,6 +1070,27 @@ struct drm_i915_gem_execbuffer_ext_timeline_fences { > __u64 values_ptr; > }; > > +struct drm_i915_gem_execbuffer_ext_perf { > + struct i915_user_extension base; > + > + /** > + * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN. > + * This is used to identify that the application > + */ > + __s32 perf_fd; > + > + /** > + * Unused for now. Must be cleared to zero. > + */ > + __u32 pad; > + > + /** > + * OA configuration ID to switch to before executing the commands > + * associated to the execbuf. > + */ > + __u64 oa_config; > +}; > + > struct drm_i915_gem_execbuffer2 { > /** > * List of gem_exec_object2 structs > -- > 2.21.0.392.gf8f6787159e > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
On 27/06/2019 12:19, Chris Wilson wrote: > Quoting Lionel Landwerlin (2019-06-27 09:00:42) >> +static int >> +get_execbuf_oa_config(struct drm_i915_private *dev_priv, >> + s32 perf_fd, u64 oa_config_id, >> + struct i915_oa_config **out_oa_config, >> + struct drm_i915_gem_object **out_oa_obj) >> +{ >> + struct file *perf_file; >> + int ret; >> + >> + if (!dev_priv->perf.oa.exclusive_stream) >> + return -EINVAL; >> + >> + perf_file = fget(perf_fd); >> + if (!perf_file) >> + return -EINVAL; >> + >> + if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream) >> + return -EINVAL; > Leaked the file. Thanks! > >> + fput(perf_file); > and we never use perf_file again? It's only use is as a permission check > on the ioctl? Just checking in case perf_fd is dup()ed by the user after > we check it. > -Chris > Correct, if it's dup()ed then the application enters undefined behavior territory. Much like not synchronizing buffer access across engines ;) -Lionel
On 27/06/2019 12:45, Chris Wilson wrote: > Quoting Lionel Landwerlin (2019-06-27 09:00:42) >> We want the ability to dispatch a set of command buffer to the >> hardware, each with a different OA configuration. To achieve this, we >> reuse a couple of fields from the execbuf2 struct (I CAN HAZ >> execbuf3?) to notify what OA configuration should be used for a batch >> buffer. This requires the process making the execbuf with this flag to >> also own the perf fd at the time of execbuf. >> >> v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris) >> Move oa_config vma to active (Chris) >> >> v3: Don't drop the lock for engine lookup (Chris) >> Move OA config vma to active before writing the ringbuffer (Chris) >> >> v4: Reuse i915_user_extension_fn >> Serialize requests with OA config updates >> >> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >> --- >> .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 130 +++++++++++++++++- >> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 + >> drivers/gpu/drm/i915/gt/intel_engine_types.h | 9 ++ >> drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + >> drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 4 +- >> drivers/gpu/drm/i915/i915_drv.c | 4 + >> drivers/gpu/drm/i915/i915_drv.h | 10 +- >> drivers/gpu/drm/i915/i915_perf.c | 26 ++-- >> include/uapi/drm/i915_drm.h | 37 +++++ >> 9 files changed, 208 insertions(+), 15 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c >> index 476fade6fcab..18ae3e8a4e02 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c >> @@ -283,7 +283,11 @@ struct i915_execbuffer { >> struct { >> u64 flags; /** Available extensions parameters */ >> struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; >> + struct drm_i915_gem_execbuffer_ext_perf perf_config; >> } extensions; >> + >> + struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is not needed. */ >> + struct drm_i915_gem_object *oa_bo; >> }; >> >> #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) >> @@ -1210,6 +1214,34 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) >> return err; >> } >> >> + >> +static int >> +get_execbuf_oa_config(struct drm_i915_private *dev_priv, >> + s32 perf_fd, u64 oa_config_id, >> + struct i915_oa_config **out_oa_config, >> + struct drm_i915_gem_object **out_oa_obj) >> +{ >> + struct file *perf_file; >> + int ret; >> + >> + if (!dev_priv->perf.oa.exclusive_stream) >> + return -EINVAL; >> + >> + perf_file = fget(perf_fd); >> + if (!perf_file) >> + return -EINVAL; >> + >> + if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream) >> + return -EINVAL; >> + >> + fput(perf_file); >> + >> + ret = i915_perf_get_oa_config(dev_priv, oa_config_id, >> + out_oa_config, out_oa_obj); >> + >> + return ret; >> +} >> + >> static int __reloc_gpu_alloc(struct i915_execbuffer *eb, >> struct i915_vma *vma, >> unsigned int len) >> @@ -2072,6 +2104,64 @@ add_to_client(struct i915_request *rq, struct drm_file *file) >> list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list); >> } >> >> +static int eb_oa_config(struct i915_execbuffer *eb) >> +{ >> + struct i915_vma *oa_vma; >> + int err; >> + >> + if (!eb->oa_config) >> + return 0; >> + >> + err = i915_active_request_set(&eb->engine->last_oa_config, >> + eb->request); >> + if (err) >> + return err; >> + >> + /* >> + * If the perf stream was opened with hold preemption, flag the >> + * request properly so that the priority of the request is bumped once >> + * it reaches the execlist ports. >> + */ >> + if (eb->i915->perf.oa.exclusive_stream->hold_preemption) >> + eb->request->has_perf = true; > Leave this chunk for the next battle^W patch. Oops. > >> + /* >> + * If the config hasn't changed, skip reconfiguring the HW (this is >> + * subject to a delay we want to avoid has much as possible). >> + */ >> + if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config) >> + return 0; >> + >> + oa_vma = i915_vma_instance(eb->oa_bo, >> + &eb->engine->i915->ggtt.vm, NULL); >> + if (unlikely(IS_ERR(oa_vma))) >> + return PTR_ERR(oa_vma); >> + >> + err = i915_vma_pin(oa_vma, 0, 0, PIN_GLOBAL); >> + if (err) >> + return err; > Ugh. We should not be pinned after creating the request. Might not > matter -- it's just reservation under its own lock that must not be > crossed with the timeline lock currently held here. Should I move this into get_execbuf_oa_config() ? > >> + >> + err = i915_vma_move_to_active(oa_vma, eb->request, 0); > You can unpin immediately after move_to_active (as the active is treated > by being pinned). > > I'm thinking this will be replaced with just i915_active_ref() as you > only care about the vma. > >> + if (err) { >> + i915_vma_unpin(oa_vma); >> + return err; >> + } >> + >> + err = eb->engine->emit_bb_start(eb->request, >> + oa_vma->node.start, >> + 0, I915_DISPATCH_SECURE); >> + if (err) { >> + i915_vma_unpin(oa_vma); >> + return err; >> + } >> + >> + i915_vma_unpin(oa_vma); >> + >> + swap(eb->oa_config, eb->i915->perf.oa.exclusive_stream->oa_config); >> + >> + return 0; >> +} >> + >> static int eb_submit(struct i915_execbuffer *eb) >> { >> int err; >> @@ -2098,6 +2188,10 @@ static int eb_submit(struct i915_execbuffer *eb) >> return err; >> } >> >> + err = eb_oa_config(eb); >> + if (err) >> + return err; >> + >> err = eb->engine->emit_bb_start(eb->request, >> eb->batch->node.start + >> eb->batch_start_offset, >> @@ -2520,8 +2614,22 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d >> return 0; >> } >> >> +static int parse_perf_config(struct i915_user_extension __user *ext, void *data) >> +{ >> + struct i915_execbuffer *eb = data; >> + >> + if (copy_from_user(&eb->extensions.perf_config, ext, >> + sizeof(eb->extensions.perf_config))) >> + return -EFAULT; >> + >> + eb->extensions.flags |= BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF); >> + >> + return 0; >> +} >> + >> static const i915_user_extension_fn execbuf_extensions[] = { >> [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, >> + [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config, >> }; >> >> static int >> @@ -2573,6 +2681,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, >> eb.buffer_count = args->buffer_count; >> eb.batch_start_offset = args->batch_start_offset; >> eb.batch_len = args->batch_len; >> + eb.oa_config = NULL; >> >> eb.batch_flags = 0; >> if (args->flags & I915_EXEC_SECURE) { >> @@ -2651,9 +2760,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, >> if (unlikely(err)) >> goto err_unlock; >> >> + if (eb.extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) { >> + if (!intel_engine_has_oa(eb.engine)) { >> + err = -ENODEV; >> + goto err_engine; >> + } >> + >> + err = get_execbuf_oa_config(eb.i915, >> + eb.extensions.perf_config.perf_fd, >> + eb.extensions.perf_config.oa_config, >> + &eb.oa_config, &eb.oa_bo); >> + if (err) >> + goto err_engine; > Why is this under the struct_mutex? Access to dev_priv->perf.oa.exclusive_stream must be under struct_mutex. Also because we verify that the engine actually has OA support. I could split the getting the configuration part away. > >> + } >> + >> err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ >> if (unlikely(err)) >> - goto err_engine; >> + goto err_oa; >> >> err = eb_relocate(&eb); >> if (err) { >> @@ -2806,6 +2929,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, >> err_vma: >> if (eb.exec) >> eb_release_vmas(&eb); >> +err_oa: >> + if (eb.oa_config) { >> + i915_gem_object_put(eb.oa_bo); >> + i915_oa_config_put(eb.oa_config); >> + } >> err_engine: >> eb_unpin_context(&eb); >> err_unlock: >> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c >> index 4961f74fd902..ba4d2c1abd1a 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c >> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c >> @@ -859,6 +859,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine) >> >> engine->set_default_submission(engine); >> >> + INIT_ACTIVE_REQUEST(&engine->last_oa_config); >> + >> return 0; >> >> err_unpin: >> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h >> index 7e056114344e..39da40937e7f 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h >> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h >> @@ -363,6 +363,8 @@ struct intel_engine_cs { >> struct i915_wa_list wa_list; >> struct i915_wa_list whitelist; >> >> + struct i915_active_request last_oa_config; >> + >> u32 irq_keep_mask; /* always keep these interrupts */ >> u32 irq_enable_mask; /* bitmask to enable ring interrupt */ >> void (*irq_enable)(struct intel_engine_cs *engine); >> @@ -446,6 +448,7 @@ struct intel_engine_cs { >> #define I915_ENGINE_HAS_SEMAPHORES BIT(3) >> #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) >> #define I915_ENGINE_IS_VIRTUAL BIT(5) >> +#define I915_ENGINE_HAS_OA BIT(6) >> unsigned int flags; >> >> /* >> @@ -541,6 +544,12 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine) >> return engine->flags & I915_ENGINE_IS_VIRTUAL; >> } >> >> +static inline bool >> +intel_engine_has_oa(const struct intel_engine_cs *engine) >> +{ >> + return engine->flags & I915_ENGINE_HAS_OA; >> +} >> + >> #define instdone_slice_mask(dev_priv__) \ >> (IS_GEN(dev_priv__, 7) ? \ >> 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) >> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c >> index 28685ba91a2c..a6ea468986d1 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c >> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c >> @@ -2763,6 +2763,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) >> engine->init_context = gen8_init_rcs_context; >> engine->emit_flush = gen8_emit_flush_render; >> engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; >> + engine->flags |= I915_ENGINE_HAS_OA; >> } >> >> return 0; >> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c >> index f094406dcc56..a1aee092d0a3 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c >> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c >> @@ -2192,8 +2192,10 @@ static void setup_rcs(struct intel_engine_cs *engine) >> engine->irq_enable_mask = I915_USER_INTERRUPT; >> } >> >> - if (IS_HASWELL(i915)) >> + if (IS_HASWELL(i915)) { >> engine->emit_bb_start = hsw_emit_bb_start; >> + engine->flags |= I915_ENGINE_HAS_OA; >> + } >> >> engine->resume = rcs_resume; >> } >> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c >> index 48f9009a6318..ef54cf1c54a5 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.c >> +++ b/drivers/gpu/drm/i915/i915_drv.c >> @@ -486,6 +486,10 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, >> case I915_PARAM_PERF_REVISION: >> value = 1; >> break; >> + case I915_PARAM_HAS_EXEC_PERF_CONFIG: >> + /* Obviously requires perf support. */ >> + value = dev_priv->perf.initialized; >> + break; >> default: >> DRM_DEBUG("Unknown parameter %d\n", param->param); >> return -EINVAL; >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index 0323c8182a67..51b103493623 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -1096,6 +1096,8 @@ struct i915_oa_reg { >> }; >> >> struct i915_oa_config { >> + struct drm_i915_private *i915; >> + >> char uuid[UUID_STRING_LEN + 1]; >> int id; >> >> @@ -1114,7 +1116,7 @@ struct i915_oa_config { >> >> struct list_head vma_link; >> >> - atomic_t ref_count; >> + struct kref ref; >> }; >> >> struct i915_perf_stream; >> @@ -2612,6 +2614,12 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915, >> int metrics_set, >> struct i915_oa_config **out_config, >> struct drm_i915_gem_object **out_obj); >> +void i915_oa_config_release(struct kref *ref); >> + >> +static inline void i915_oa_config_put(struct i915_oa_config *oa_config) >> +{ >> + kref_put(&oa_config->ref, i915_oa_config_release); >> +} >> >> /* i915_gem_evict.c */ >> int __must_check i915_gem_evict_something(struct i915_address_space *vm, >> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c >> index b2f5ba87921c..34d74fa64c74 100644 >> --- a/drivers/gpu/drm/i915/i915_perf.c >> +++ b/drivers/gpu/drm/i915/i915_perf.c >> @@ -366,10 +366,11 @@ struct perf_open_properties { >> int oa_period_exponent; >> }; >> >> -static void put_oa_config(struct i915_oa_config *oa_config) >> +void i915_oa_config_release(struct kref *ref) >> { >> - if (!atomic_dec_and_test(&oa_config->ref_count)) >> - return; >> + struct i915_oa_config *oa_config = container_of(ref, typeof(*oa_config), ref); >> + >> + lockdep_assert_held(&oa_config->i915->perf.metrics_lock); > That's not true for the eb paths. Actually that's not true anywhere... > >> >> if (oa_config->obj) { >> list_del(&oa_config->vma_link); >> @@ -485,7 +486,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915, >> } >> >> if (out_config) { >> - atomic_inc(&oa_config->ref_count); >> + kref_get(&oa_config->ref); >> *out_config = oa_config; >> } >> >> @@ -507,7 +508,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915, >> >> err_buf_alloc: >> if (out_config) { >> - put_oa_config(oa_config); >> + i915_oa_config_put(oa_config); >> *out_config = NULL; >> } >> unlock: >> @@ -1483,7 +1484,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) >> if (stream->ctx) >> oa_put_render_ctx_id(stream); >> >> - put_oa_config(stream->oa_config); >> + i915_oa_config_put(stream->oa_config); >> >> if (dev_priv->perf.oa.spurious_report_rs.missed) { >> DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", >> @@ -2430,7 +2431,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, >> free_oa_buffer(dev_priv); >> >> err_oa_buf_alloc: >> - put_oa_config(stream->oa_config); >> + i915_oa_config_put(stream->oa_config); >> >> intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); >> intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); >> @@ -3246,7 +3247,7 @@ void i915_perf_register(struct drm_i915_private *dev_priv) >> if (ret) >> goto sysfs_error; >> >> - atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1); >> + dev_priv->perf.oa.test_config.i915 = dev_priv; >> >> goto exit; >> >> @@ -3502,7 +3503,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, >> return -ENOMEM; >> } >> >> - atomic_set(&oa_config->ref_count, 1); >> + oa_config->i915 = dev_priv; >> + kref_init(&oa_config->ref); >> >> if (!uuid_is_valid(args->uuid)) { >> DRM_DEBUG("Invalid uuid format for OA config\n"); >> @@ -3601,7 +3603,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, >> sysfs_err: >> mutex_unlock(&dev_priv->perf.metrics_lock); >> reg_err: >> - put_oa_config(oa_config); >> + i915_oa_config_put(oa_config); >> DRM_DEBUG("Failed to add new OA config\n"); >> return err; >> } >> @@ -3655,7 +3657,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, >> >> DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); >> >> - put_oa_config(oa_config); >> + i915_oa_config_put(oa_config); >> >> config_err: >> mutex_unlock(&dev_priv->perf.metrics_lock); >> @@ -3824,7 +3826,7 @@ static int destroy_config(int id, void *p, void *data) >> { >> struct i915_oa_config *oa_config = p; >> >> - put_oa_config(oa_config); >> + i915_oa_config_put(oa_config); >> >> return 0; >> } >> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h >> index b7fe1915e34d..5938a73963fe 100644 >> --- a/include/uapi/drm/i915_drm.h >> +++ b/include/uapi/drm/i915_drm.h >> @@ -623,6 +623,16 @@ typedef struct drm_i915_irq_wait { >> */ >> #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 >> >> +/* >> + * Request an i915/perf performance configuration change before running the >> + * commands given in an execbuf. >> + * >> + * Performance configuration ID and the file descriptor of the i915 perf >> + * stream are given through drm_i915_gem_execbuffer_ext_perf. See >> + * I915_EXEC_EXT. >> + */ >> +#define I915_PARAM_HAS_EXEC_PERF_CONFIG 56 >> + >> /* Must be kept compact -- no holes and well documented */ >> >> typedef struct drm_i915_getparam { >> @@ -1026,6 +1036,12 @@ enum drm_i915_gem_execbuffer_ext { >> */ >> DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES, >> >> + /** >> + * This identifier is associated with >> + * drm_i915_gem_execbuffer_perf_ext. >> + */ >> + DRM_I915_GEM_EXECBUFFER_EXT_PERF, >> + >> DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */ >> }; >> >> @@ -1054,6 +1070,27 @@ struct drm_i915_gem_execbuffer_ext_timeline_fences { >> __u64 values_ptr; >> }; >> >> +struct drm_i915_gem_execbuffer_ext_perf { >> + struct i915_user_extension base; >> + >> + /** >> + * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN. >> + * This is used to identify that the application >> + */ >> + __s32 perf_fd; >> + >> + /** >> + * Unused for now. Must be cleared to zero. >> + */ >> + __u32 pad; >> + >> + /** >> + * OA configuration ID to switch to before executing the commands >> + * associated to the execbuf. >> + */ >> + __u64 oa_config; >> +}; >> + >> struct drm_i915_gem_execbuffer2 { >> /** >> * List of gem_exec_object2 structs >> -- >> 2.21.0.392.gf8f6787159e >> >> _______________________________________________ >> Intel-gfx mailing list >> Intel-gfx@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Quoting Lionel Landwerlin (2019-06-27 13:32:13) > On 27/06/2019 12:45, Chris Wilson wrote: > > Quoting Lionel Landwerlin (2019-06-27 09:00:42) > >> + /* > >> + * If the config hasn't changed, skip reconfiguring the HW (this is > >> + * subject to a delay we want to avoid has much as possible). > >> + */ > >> + if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config) > >> + return 0; > >> + > >> + oa_vma = i915_vma_instance(eb->oa_bo, > >> + &eb->engine->i915->ggtt.vm, NULL); > >> + if (unlikely(IS_ERR(oa_vma))) > >> + return PTR_ERR(oa_vma); > >> + > >> + err = i915_vma_pin(oa_vma, 0, 0, PIN_GLOBAL); > >> + if (err) > >> + return err; > > Ugh. We should not be pinned after creating the request. Might not > > matter -- it's just reservation under its own lock that must not be > > crossed with the timeline lock currently held here. > > > Should I move this into get_execbuf_oa_config() ? That would be save me fretting about the lock nesting. > >> @@ -2651,9 +2760,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, > >> if (unlikely(err)) > >> goto err_unlock; > >> > >> + if (eb.extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) { > >> + if (!intel_engine_has_oa(eb.engine)) { > >> + err = -ENODEV; > >> + goto err_engine; > >> + } > >> + > >> + err = get_execbuf_oa_config(eb.i915, > >> + eb.extensions.perf_config.perf_fd, > >> + eb.extensions.perf_config.oa_config, > >> + &eb.oa_config, &eb.oa_bo); > >> + if (err) > >> + goto err_engine; > > Why is this under the struct_mutex? > > > Access to dev_priv->perf.oa.exclusive_stream must be under struct_mutex. > > Also because we verify that the engine actually has OA support. > > I could split the getting the configuration part away. I'm about 10^W 50^W certainly less than a 100 patches away from killing struct_mutex for execbuf... -Chris
On 27/06/2019 15:53, Chris Wilson wrote: > Quoting Lionel Landwerlin (2019-06-27 13:32:13) >> On 27/06/2019 12:45, Chris Wilson wrote: >>> Quoting Lionel Landwerlin (2019-06-27 09:00:42) >>>> + /* >>>> + * If the config hasn't changed, skip reconfiguring the HW (this is >>>> + * subject to a delay we want to avoid has much as possible). >>>> + */ >>>> + if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config) >>>> + return 0; >>>> + >>>> + oa_vma = i915_vma_instance(eb->oa_bo, >>>> + &eb->engine->i915->ggtt.vm, NULL); >>>> + if (unlikely(IS_ERR(oa_vma))) >>>> + return PTR_ERR(oa_vma); >>>> + >>>> + err = i915_vma_pin(oa_vma, 0, 0, PIN_GLOBAL); >>>> + if (err) >>>> + return err; >>> Ugh. We should not be pinned after creating the request. Might not >>> matter -- it's just reservation under its own lock that must not be >>> crossed with the timeline lock currently held here. >> >> Should I move this into get_execbuf_oa_config() ? > That would be save me fretting about the lock nesting. > >>>> @@ -2651,9 +2760,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, >>>> if (unlikely(err)) >>>> goto err_unlock; >>>> >>>> + if (eb.extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) { >>>> + if (!intel_engine_has_oa(eb.engine)) { >>>> + err = -ENODEV; >>>> + goto err_engine; >>>> + } >>>> + >>>> + err = get_execbuf_oa_config(eb.i915, >>>> + eb.extensions.perf_config.perf_fd, >>>> + eb.extensions.perf_config.oa_config, >>>> + &eb.oa_config, &eb.oa_bo); >>>> + if (err) >>>> + goto err_engine; >>> Why is this under the struct_mutex? >> >> Access to dev_priv->perf.oa.exclusive_stream must be under struct_mutex. >> >> Also because we verify that the engine actually has OA support. >> >> I could split the getting the configuration part away. > I'm about 10^W 50^W certainly less than a 100 patches away from killing > struct_mutex for execbuf... > -Chris > I'm sorry. Dealing with all this OA stuff is underwhelming. I think an engine lock would be enough if that's not too bad for you. -Lionel
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 476fade6fcab..18ae3e8a4e02 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -283,7 +283,11 @@ struct i915_execbuffer { struct { u64 flags; /** Available extensions parameters */ struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; + struct drm_i915_gem_execbuffer_ext_perf perf_config; } extensions; + + struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is not needed. */ + struct drm_i915_gem_object *oa_bo; }; #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) @@ -1210,6 +1214,34 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) return err; } + +static int +get_execbuf_oa_config(struct drm_i915_private *dev_priv, + s32 perf_fd, u64 oa_config_id, + struct i915_oa_config **out_oa_config, + struct drm_i915_gem_object **out_oa_obj) +{ + struct file *perf_file; + int ret; + + if (!dev_priv->perf.oa.exclusive_stream) + return -EINVAL; + + perf_file = fget(perf_fd); + if (!perf_file) + return -EINVAL; + + if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream) + return -EINVAL; + + fput(perf_file); + + ret = i915_perf_get_oa_config(dev_priv, oa_config_id, + out_oa_config, out_oa_obj); + + return ret; +} + static int __reloc_gpu_alloc(struct i915_execbuffer *eb, struct i915_vma *vma, unsigned int len) @@ -2072,6 +2104,64 @@ add_to_client(struct i915_request *rq, struct drm_file *file) list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list); } +static int eb_oa_config(struct i915_execbuffer *eb) +{ + struct i915_vma *oa_vma; + int err; + + if (!eb->oa_config) + return 0; + + err = i915_active_request_set(&eb->engine->last_oa_config, + eb->request); + if (err) + return err; + + /* + * If the perf stream was opened with hold preemption, flag the + * request properly so that the priority of the request is bumped once + * it reaches the execlist ports. + */ + if (eb->i915->perf.oa.exclusive_stream->hold_preemption) + eb->request->has_perf = true; + + /* + * If the config hasn't changed, skip reconfiguring the HW (this is + * subject to a delay we want to avoid has much as possible). + */ + if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config) + return 0; + + oa_vma = i915_vma_instance(eb->oa_bo, + &eb->engine->i915->ggtt.vm, NULL); + if (unlikely(IS_ERR(oa_vma))) + return PTR_ERR(oa_vma); + + err = i915_vma_pin(oa_vma, 0, 0, PIN_GLOBAL); + if (err) + return err; + + err = i915_vma_move_to_active(oa_vma, eb->request, 0); + if (err) { + i915_vma_unpin(oa_vma); + return err; + } + + err = eb->engine->emit_bb_start(eb->request, + oa_vma->node.start, + 0, I915_DISPATCH_SECURE); + if (err) { + i915_vma_unpin(oa_vma); + return err; + } + + i915_vma_unpin(oa_vma); + + swap(eb->oa_config, eb->i915->perf.oa.exclusive_stream->oa_config); + + return 0; +} + static int eb_submit(struct i915_execbuffer *eb) { int err; @@ -2098,6 +2188,10 @@ static int eb_submit(struct i915_execbuffer *eb) return err; } + err = eb_oa_config(eb); + if (err) + return err; + err = eb->engine->emit_bb_start(eb->request, eb->batch->node.start + eb->batch_start_offset, @@ -2520,8 +2614,22 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d return 0; } +static int parse_perf_config(struct i915_user_extension __user *ext, void *data) +{ + struct i915_execbuffer *eb = data; + + if (copy_from_user(&eb->extensions.perf_config, ext, + sizeof(eb->extensions.perf_config))) + return -EFAULT; + + eb->extensions.flags |= BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF); + + return 0; +} + static const i915_user_extension_fn execbuf_extensions[] = { [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, + [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config, }; static int @@ -2573,6 +2681,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.buffer_count = args->buffer_count; eb.batch_start_offset = args->batch_start_offset; eb.batch_len = args->batch_len; + eb.oa_config = NULL; eb.batch_flags = 0; if (args->flags & I915_EXEC_SECURE) { @@ -2651,9 +2760,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (unlikely(err)) goto err_unlock; + if (eb.extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) { + if (!intel_engine_has_oa(eb.engine)) { + err = -ENODEV; + goto err_engine; + } + + err = get_execbuf_oa_config(eb.i915, + eb.extensions.perf_config.perf_fd, + eb.extensions.perf_config.oa_config, + &eb.oa_config, &eb.oa_bo); + if (err) + goto err_engine; + } + err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ if (unlikely(err)) - goto err_engine; + goto err_oa; err = eb_relocate(&eb); if (err) { @@ -2806,6 +2929,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, err_vma: if (eb.exec) eb_release_vmas(&eb); +err_oa: + if (eb.oa_config) { + i915_gem_object_put(eb.oa_bo); + i915_oa_config_put(eb.oa_config); + } err_engine: eb_unpin_context(&eb); err_unlock: diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 4961f74fd902..ba4d2c1abd1a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -859,6 +859,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine) engine->set_default_submission(engine); + INIT_ACTIVE_REQUEST(&engine->last_oa_config); + return 0; err_unpin: diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 7e056114344e..39da40937e7f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -363,6 +363,8 @@ struct intel_engine_cs { struct i915_wa_list wa_list; struct i915_wa_list whitelist; + struct i915_active_request last_oa_config; + u32 irq_keep_mask; /* always keep these interrupts */ u32 irq_enable_mask; /* bitmask to enable ring interrupt */ void (*irq_enable)(struct intel_engine_cs *engine); @@ -446,6 +448,7 @@ struct intel_engine_cs { #define I915_ENGINE_HAS_SEMAPHORES BIT(3) #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) #define I915_ENGINE_IS_VIRTUAL BIT(5) +#define I915_ENGINE_HAS_OA BIT(6) unsigned int flags; /* @@ -541,6 +544,12 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_IS_VIRTUAL; } +static inline bool +intel_engine_has_oa(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_HAS_OA; +} + #define instdone_slice_mask(dev_priv__) \ (IS_GEN(dev_priv__, 7) ? \ 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 28685ba91a2c..a6ea468986d1 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2763,6 +2763,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) engine->init_context = gen8_init_rcs_context; engine->emit_flush = gen8_emit_flush_render; engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; + engine->flags |= I915_ENGINE_HAS_OA; } return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index f094406dcc56..a1aee092d0a3 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -2192,8 +2192,10 @@ static void setup_rcs(struct intel_engine_cs *engine) engine->irq_enable_mask = I915_USER_INTERRUPT; } - if (IS_HASWELL(i915)) + if (IS_HASWELL(i915)) { engine->emit_bb_start = hsw_emit_bb_start; + engine->flags |= I915_ENGINE_HAS_OA; + } engine->resume = rcs_resume; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 48f9009a6318..ef54cf1c54a5 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -486,6 +486,10 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_PERF_REVISION: value = 1; break; + case I915_PARAM_HAS_EXEC_PERF_CONFIG: + /* Obviously requires perf support. */ + value = dev_priv->perf.initialized; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0323c8182a67..51b103493623 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1096,6 +1096,8 @@ struct i915_oa_reg { }; struct i915_oa_config { + struct drm_i915_private *i915; + char uuid[UUID_STRING_LEN + 1]; int id; @@ -1114,7 +1116,7 @@ struct i915_oa_config { struct list_head vma_link; - atomic_t ref_count; + struct kref ref; }; struct i915_perf_stream; @@ -2612,6 +2614,12 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915, int metrics_set, struct i915_oa_config **out_config, struct drm_i915_gem_object **out_obj); +void i915_oa_config_release(struct kref *ref); + +static inline void i915_oa_config_put(struct i915_oa_config *oa_config) +{ + kref_put(&oa_config->ref, i915_oa_config_release); +} /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index b2f5ba87921c..34d74fa64c74 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -366,10 +366,11 @@ struct perf_open_properties { int oa_period_exponent; }; -static void put_oa_config(struct i915_oa_config *oa_config) +void i915_oa_config_release(struct kref *ref) { - if (!atomic_dec_and_test(&oa_config->ref_count)) - return; + struct i915_oa_config *oa_config = container_of(ref, typeof(*oa_config), ref); + + lockdep_assert_held(&oa_config->i915->perf.metrics_lock); if (oa_config->obj) { list_del(&oa_config->vma_link); @@ -485,7 +486,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915, } if (out_config) { - atomic_inc(&oa_config->ref_count); + kref_get(&oa_config->ref); *out_config = oa_config; } @@ -507,7 +508,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915, err_buf_alloc: if (out_config) { - put_oa_config(oa_config); + i915_oa_config_put(oa_config); *out_config = NULL; } unlock: @@ -1483,7 +1484,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) if (stream->ctx) oa_put_render_ctx_id(stream); - put_oa_config(stream->oa_config); + i915_oa_config_put(stream->oa_config); if (dev_priv->perf.oa.spurious_report_rs.missed) { DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", @@ -2430,7 +2431,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, free_oa_buffer(dev_priv); err_oa_buf_alloc: - put_oa_config(stream->oa_config); + i915_oa_config_put(stream->oa_config); intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); @@ -3246,7 +3247,7 @@ void i915_perf_register(struct drm_i915_private *dev_priv) if (ret) goto sysfs_error; - atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1); + dev_priv->perf.oa.test_config.i915 = dev_priv; goto exit; @@ -3502,7 +3503,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, return -ENOMEM; } - atomic_set(&oa_config->ref_count, 1); + oa_config->i915 = dev_priv; + kref_init(&oa_config->ref); if (!uuid_is_valid(args->uuid)) { DRM_DEBUG("Invalid uuid format for OA config\n"); @@ -3601,7 +3603,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, sysfs_err: mutex_unlock(&dev_priv->perf.metrics_lock); reg_err: - put_oa_config(oa_config); + i915_oa_config_put(oa_config); DRM_DEBUG("Failed to add new OA config\n"); return err; } @@ -3655,7 +3657,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); - put_oa_config(oa_config); + i915_oa_config_put(oa_config); config_err: mutex_unlock(&dev_priv->perf.metrics_lock); @@ -3824,7 +3826,7 @@ static int destroy_config(int id, void *p, void *data) { struct i915_oa_config *oa_config = p; - put_oa_config(oa_config); + i915_oa_config_put(oa_config); return 0; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index b7fe1915e34d..5938a73963fe 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -623,6 +623,16 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 +/* + * Request an i915/perf performance configuration change before running the + * commands given in an execbuf. + * + * Performance configuration ID and the file descriptor of the i915 perf + * stream are given through drm_i915_gem_execbuffer_ext_perf. See + * I915_EXEC_EXT. + */ +#define I915_PARAM_HAS_EXEC_PERF_CONFIG 56 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -1026,6 +1036,12 @@ enum drm_i915_gem_execbuffer_ext { */ DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES, + /** + * This identifier is associated with + * drm_i915_gem_execbuffer_perf_ext. + */ + DRM_I915_GEM_EXECBUFFER_EXT_PERF, + DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */ }; @@ -1054,6 +1070,27 @@ struct drm_i915_gem_execbuffer_ext_timeline_fences { __u64 values_ptr; }; +struct drm_i915_gem_execbuffer_ext_perf { + struct i915_user_extension base; + + /** + * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN. + * This is used to identify that the application + */ + __s32 perf_fd; + + /** + * Unused for now. Must be cleared to zero. + */ + __u32 pad; + + /** + * OA configuration ID to switch to before executing the commands + * associated to the execbuf. + */ + __u64 oa_config; +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs
We want the ability to dispatch a set of command buffer to the hardware, each with a different OA configuration. To achieve this, we reuse a couple of fields from the execbuf2 struct (I CAN HAZ execbuf3?) to notify what OA configuration should be used for a batch buffer. This requires the process making the execbuf with this flag to also own the perf fd at the time of execbuf. v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris) Move oa_config vma to active (Chris) v3: Don't drop the lock for engine lookup (Chris) Move OA config vma to active before writing the ringbuffer (Chris) v4: Reuse i915_user_extension_fn Serialize requests with OA config updates Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 130 +++++++++++++++++- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 + drivers/gpu/drm/i915/gt/intel_engine_types.h | 9 ++ drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 4 +- drivers/gpu/drm/i915/i915_drv.c | 4 + drivers/gpu/drm/i915/i915_drv.h | 10 +- drivers/gpu/drm/i915/i915_perf.c | 26 ++-- include/uapi/drm/i915_drm.h | 37 +++++ 9 files changed, 208 insertions(+), 15 deletions(-)