diff mbox series

[v5,07/10] drm/i915: add a new perf configuration execbuf parameter

Message ID 20190627080045.8814-8-lionel.g.landwerlin@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: Vulkan performance query support | expand

Commit Message

Lionel Landwerlin June 27, 2019, 8 a.m. UTC
We want the ability to dispatch a set of command buffer to the
hardware, each with a different OA configuration. To achieve this, we
reuse a couple of fields from the execbuf2 struct (I CAN HAZ
execbuf3?) to notify what OA configuration should be used for a batch
buffer. This requires the process making the execbuf with this flag to
also own the perf fd at the time of execbuf.

v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris)
    Move oa_config vma to active (Chris)

v3: Don't drop the lock for engine lookup (Chris)
    Move OA config vma to active before writing the ringbuffer (Chris)

v4: Reuse i915_user_extension_fn
    Serialize requests with OA config updates

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 130 +++++++++++++++++-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   9 ++
 drivers/gpu/drm/i915/gt/intel_lrc.c           |   1 +
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |   4 +-
 drivers/gpu/drm/i915/i915_drv.c               |   4 +
 drivers/gpu/drm/i915/i915_drv.h               |  10 +-
 drivers/gpu/drm/i915/i915_perf.c              |  26 ++--
 include/uapi/drm/i915_drm.h                   |  37 +++++
 9 files changed, 208 insertions(+), 15 deletions(-)

Comments

Chris Wilson June 27, 2019, 9:19 a.m. UTC | #1
Quoting Lionel Landwerlin (2019-06-27 09:00:42)
> +static int
> +get_execbuf_oa_config(struct drm_i915_private *dev_priv,
> +                     s32 perf_fd, u64 oa_config_id,
> +                     struct i915_oa_config **out_oa_config,
> +                     struct drm_i915_gem_object **out_oa_obj)
> +{
> +       struct file *perf_file;
> +       int ret;
> +
> +       if (!dev_priv->perf.oa.exclusive_stream)
> +               return -EINVAL;
> +
> +       perf_file = fget(perf_fd);
> +       if (!perf_file)
> +               return -EINVAL;
> +
> +       if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream)
> +               return -EINVAL;

Leaked the file.

> +       fput(perf_file);

and we never use perf_file again? It's only use is as a permission check
on the ioctl? Just checking in case perf_fd is dup()ed by the user after
we check it.
-Chris
Chris Wilson June 27, 2019, 9:45 a.m. UTC | #2
Quoting Lionel Landwerlin (2019-06-27 09:00:42)
> We want the ability to dispatch a set of command buffer to the
> hardware, each with a different OA configuration. To achieve this, we
> reuse a couple of fields from the execbuf2 struct (I CAN HAZ
> execbuf3?) to notify what OA configuration should be used for a batch
> buffer. This requires the process making the execbuf with this flag to
> also own the perf fd at the time of execbuf.
> 
> v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris)
>     Move oa_config vma to active (Chris)
> 
> v3: Don't drop the lock for engine lookup (Chris)
>     Move OA config vma to active before writing the ringbuffer (Chris)
> 
> v4: Reuse i915_user_extension_fn
>     Serialize requests with OA config updates
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 130 +++++++++++++++++-
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 +
>  drivers/gpu/drm/i915/gt/intel_engine_types.h  |   9 ++
>  drivers/gpu/drm/i915/gt/intel_lrc.c           |   1 +
>  drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |   4 +-
>  drivers/gpu/drm/i915/i915_drv.c               |   4 +
>  drivers/gpu/drm/i915/i915_drv.h               |  10 +-
>  drivers/gpu/drm/i915/i915_perf.c              |  26 ++--
>  include/uapi/drm/i915_drm.h                   |  37 +++++
>  9 files changed, 208 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 476fade6fcab..18ae3e8a4e02 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -283,7 +283,11 @@ struct i915_execbuffer {
>         struct {
>                 u64 flags; /** Available extensions parameters */
>                 struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
> +               struct drm_i915_gem_execbuffer_ext_perf perf_config;
>         } extensions;
> +
> +       struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is not needed. */
> +       struct drm_i915_gem_object *oa_bo;
>  };
>  
>  #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
> @@ -1210,6 +1214,34 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
>         return err;
>  }
>  
> +
> +static int
> +get_execbuf_oa_config(struct drm_i915_private *dev_priv,
> +                     s32 perf_fd, u64 oa_config_id,
> +                     struct i915_oa_config **out_oa_config,
> +                     struct drm_i915_gem_object **out_oa_obj)
> +{
> +       struct file *perf_file;
> +       int ret;
> +
> +       if (!dev_priv->perf.oa.exclusive_stream)
> +               return -EINVAL;
> +
> +       perf_file = fget(perf_fd);
> +       if (!perf_file)
> +               return -EINVAL;
> +
> +       if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream)
> +               return -EINVAL;
> +
> +       fput(perf_file);
> +
> +       ret = i915_perf_get_oa_config(dev_priv, oa_config_id,
> +                                     out_oa_config, out_oa_obj);
> +
> +       return ret;
> +}
> +
>  static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>                              struct i915_vma *vma,
>                              unsigned int len)
> @@ -2072,6 +2104,64 @@ add_to_client(struct i915_request *rq, struct drm_file *file)
>         list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
>  }
>  
> +static int eb_oa_config(struct i915_execbuffer *eb)
> +{
> +       struct i915_vma *oa_vma;
> +       int err;
> +
> +       if (!eb->oa_config)
> +               return 0;
> +
> +       err = i915_active_request_set(&eb->engine->last_oa_config,
> +                                     eb->request);
> +       if (err)
> +               return err;
> +
> +       /*
> +        * If the perf stream was opened with hold preemption, flag the
> +        * request properly so that the priority of the request is bumped once
> +        * it reaches the execlist ports.
> +        */
> +       if (eb->i915->perf.oa.exclusive_stream->hold_preemption)
> +               eb->request->has_perf = true;

Leave this chunk for the next battle^W patch.

> +       /*
> +        * If the config hasn't changed, skip reconfiguring the HW (this is
> +        * subject to a delay we want to avoid has much as possible).
> +        */
> +       if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config)
> +               return 0;
> +
> +       oa_vma = i915_vma_instance(eb->oa_bo,
> +                                  &eb->engine->i915->ggtt.vm, NULL);
> +       if (unlikely(IS_ERR(oa_vma)))
> +               return PTR_ERR(oa_vma);
> +
> +       err = i915_vma_pin(oa_vma, 0, 0, PIN_GLOBAL);
> +       if (err)
> +               return err;

Ugh. We should not be pinned after creating the request. Might not
matter -- it's just reservation under its own lock that must not be
crossed with the timeline lock currently held here.

> +
> +       err = i915_vma_move_to_active(oa_vma, eb->request, 0);

You can unpin immediately after move_to_active (as the active is treated
by being pinned).

I'm thinking this will be replaced with just i915_active_ref() as you
only care about the vma.

> +       if (err) {
> +               i915_vma_unpin(oa_vma);
> +               return err;
> +       }
> +
> +       err = eb->engine->emit_bb_start(eb->request,
> +                                       oa_vma->node.start,
> +                                       0, I915_DISPATCH_SECURE);
> +       if (err) {
> +               i915_vma_unpin(oa_vma);
> +               return err;
> +       }
> +
> +       i915_vma_unpin(oa_vma);
> +
> +       swap(eb->oa_config, eb->i915->perf.oa.exclusive_stream->oa_config);
> +
> +       return 0;
> +}
> +
>  static int eb_submit(struct i915_execbuffer *eb)
>  {
>         int err;
> @@ -2098,6 +2188,10 @@ static int eb_submit(struct i915_execbuffer *eb)
>                         return err;
>         }
>  
> +       err = eb_oa_config(eb);
> +       if (err)
> +               return err;
> +
>         err = eb->engine->emit_bb_start(eb->request,
>                                         eb->batch->node.start +
>                                         eb->batch_start_offset,
> @@ -2520,8 +2614,22 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d
>         return 0;
>  }
>  
> +static int parse_perf_config(struct i915_user_extension __user *ext, void *data)
> +{
> +       struct i915_execbuffer *eb = data;
> +
> +       if (copy_from_user(&eb->extensions.perf_config, ext,
> +                          sizeof(eb->extensions.perf_config)))
> +               return -EFAULT;
> +
> +       eb->extensions.flags |= BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF);
> +
> +       return 0;
> +}
> +
>  static const i915_user_extension_fn execbuf_extensions[] = {
>          [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
> +        [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config,
>  };
>  
>  static int
> @@ -2573,6 +2681,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>         eb.buffer_count = args->buffer_count;
>         eb.batch_start_offset = args->batch_start_offset;
>         eb.batch_len = args->batch_len;
> +       eb.oa_config = NULL;
>  
>         eb.batch_flags = 0;
>         if (args->flags & I915_EXEC_SECURE) {
> @@ -2651,9 +2760,23 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>         if (unlikely(err))
>                 goto err_unlock;
>  
> +       if (eb.extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
> +               if (!intel_engine_has_oa(eb.engine)) {
> +                       err = -ENODEV;
> +                       goto err_engine;
> +               }
> +
> +               err = get_execbuf_oa_config(eb.i915,
> +                                           eb.extensions.perf_config.perf_fd,
> +                                           eb.extensions.perf_config.oa_config,
> +                                           &eb.oa_config, &eb.oa_bo);
> +               if (err)
> +                       goto err_engine;

Why is this under the struct_mutex?

> +       }
> +
>         err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
>         if (unlikely(err))
> -               goto err_engine;
> +               goto err_oa;
>  
>         err = eb_relocate(&eb);
>         if (err) {
> @@ -2806,6 +2929,11 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>  err_vma:
>         if (eb.exec)
>                 eb_release_vmas(&eb);
> +err_oa:
> +       if (eb.oa_config) {
> +               i915_gem_object_put(eb.oa_bo);
> +               i915_oa_config_put(eb.oa_config);
> +       }
>  err_engine:
>         eb_unpin_context(&eb);
>  err_unlock:
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 4961f74fd902..ba4d2c1abd1a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -859,6 +859,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>  
>         engine->set_default_submission(engine);
>  
> +       INIT_ACTIVE_REQUEST(&engine->last_oa_config);
> +
>         return 0;
>  
>  err_unpin:
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 7e056114344e..39da40937e7f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -363,6 +363,8 @@ struct intel_engine_cs {
>         struct i915_wa_list wa_list;
>         struct i915_wa_list whitelist;
>  
> +       struct i915_active_request last_oa_config;
> +
>         u32             irq_keep_mask; /* always keep these interrupts */
>         u32             irq_enable_mask; /* bitmask to enable ring interrupt */
>         void            (*irq_enable)(struct intel_engine_cs *engine);
> @@ -446,6 +448,7 @@ struct intel_engine_cs {
>  #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
>  #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
>  #define I915_ENGINE_IS_VIRTUAL       BIT(5)
> +#define I915_ENGINE_HAS_OA           BIT(6)
>         unsigned int flags;
>  
>         /*
> @@ -541,6 +544,12 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine)
>         return engine->flags & I915_ENGINE_IS_VIRTUAL;
>  }
>  
> +static inline bool
> +intel_engine_has_oa(const struct intel_engine_cs *engine)
> +{
> +       return engine->flags & I915_ENGINE_HAS_OA;
> +}
> +
>  #define instdone_slice_mask(dev_priv__) \
>         (IS_GEN(dev_priv__, 7) ? \
>          1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 28685ba91a2c..a6ea468986d1 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2763,6 +2763,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
>                 engine->init_context = gen8_init_rcs_context;
>                 engine->emit_flush = gen8_emit_flush_render;
>                 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
> +               engine->flags |= I915_ENGINE_HAS_OA;
>         }
>  
>         return 0;
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index f094406dcc56..a1aee092d0a3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -2192,8 +2192,10 @@ static void setup_rcs(struct intel_engine_cs *engine)
>                 engine->irq_enable_mask = I915_USER_INTERRUPT;
>         }
>  
> -       if (IS_HASWELL(i915))
> +       if (IS_HASWELL(i915)) {
>                 engine->emit_bb_start = hsw_emit_bb_start;
> +               engine->flags |= I915_ENGINE_HAS_OA;
> +       }
>  
>         engine->resume = rcs_resume;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 48f9009a6318..ef54cf1c54a5 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -486,6 +486,10 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
>         case I915_PARAM_PERF_REVISION:
>                 value = 1;
>                 break;
> +       case I915_PARAM_HAS_EXEC_PERF_CONFIG:
> +               /* Obviously requires perf support. */
> +               value = dev_priv->perf.initialized;
> +               break;
>         default:
>                 DRM_DEBUG("Unknown parameter %d\n", param->param);
>                 return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0323c8182a67..51b103493623 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1096,6 +1096,8 @@ struct i915_oa_reg {
>  };
>  
>  struct i915_oa_config {
> +       struct drm_i915_private *i915;
> +
>         char uuid[UUID_STRING_LEN + 1];
>         int id;
>  
> @@ -1114,7 +1116,7 @@ struct i915_oa_config {
>  
>         struct list_head vma_link;
>  
> -       atomic_t ref_count;
> +       struct kref ref;
>  };
>  
>  struct i915_perf_stream;
> @@ -2612,6 +2614,12 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
>                             int metrics_set,
>                             struct i915_oa_config **out_config,
>                             struct drm_i915_gem_object **out_obj);
> +void i915_oa_config_release(struct kref *ref);
> +
> +static inline void i915_oa_config_put(struct i915_oa_config *oa_config)
> +{
> +       kref_put(&oa_config->ref, i915_oa_config_release);
> +}
>  
>  /* i915_gem_evict.c */
>  int __must_check i915_gem_evict_something(struct i915_address_space *vm,
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index b2f5ba87921c..34d74fa64c74 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -366,10 +366,11 @@ struct perf_open_properties {
>         int oa_period_exponent;
>  };
>  
> -static void put_oa_config(struct i915_oa_config *oa_config)
> +void i915_oa_config_release(struct kref *ref)
>  {
> -       if (!atomic_dec_and_test(&oa_config->ref_count))
> -               return;
> +       struct i915_oa_config *oa_config = container_of(ref, typeof(*oa_config), ref);
> +
> +       lockdep_assert_held(&oa_config->i915->perf.metrics_lock);

That's not true for the eb paths.

>  
>         if (oa_config->obj) {
>                 list_del(&oa_config->vma_link);
> @@ -485,7 +486,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
>         }
>  
>         if (out_config) {
> -               atomic_inc(&oa_config->ref_count);
> +               kref_get(&oa_config->ref);
>                 *out_config = oa_config;
>         }
>  
> @@ -507,7 +508,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
>  
>  err_buf_alloc:
>         if (out_config) {
> -               put_oa_config(oa_config);
> +               i915_oa_config_put(oa_config);
>                 *out_config = NULL;
>         }
>  unlock:
> @@ -1483,7 +1484,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>         if (stream->ctx)
>                 oa_put_render_ctx_id(stream);
>  
> -       put_oa_config(stream->oa_config);
> +       i915_oa_config_put(stream->oa_config);
>  
>         if (dev_priv->perf.oa.spurious_report_rs.missed) {
>                 DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
> @@ -2430,7 +2431,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>         free_oa_buffer(dev_priv);
>  
>  err_oa_buf_alloc:
> -       put_oa_config(stream->oa_config);
> +       i915_oa_config_put(stream->oa_config);
>  
>         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
>         intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
> @@ -3246,7 +3247,7 @@ void i915_perf_register(struct drm_i915_private *dev_priv)
>         if (ret)
>                 goto sysfs_error;
>  
> -       atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1);
> +       dev_priv->perf.oa.test_config.i915 = dev_priv;
>  
>         goto exit;
>  
> @@ -3502,7 +3503,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
>                 return -ENOMEM;
>         }
>  
> -       atomic_set(&oa_config->ref_count, 1);
> +       oa_config->i915 = dev_priv;
> +       kref_init(&oa_config->ref);
>  
>         if (!uuid_is_valid(args->uuid)) {
>                 DRM_DEBUG("Invalid uuid format for OA config\n");
> @@ -3601,7 +3603,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
>  sysfs_err:
>         mutex_unlock(&dev_priv->perf.metrics_lock);
>  reg_err:
> -       put_oa_config(oa_config);
> +       i915_oa_config_put(oa_config);
>         DRM_DEBUG("Failed to add new OA config\n");
>         return err;
>  }
> @@ -3655,7 +3657,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
>  
>         DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
>  
> -       put_oa_config(oa_config);
> +       i915_oa_config_put(oa_config);
>  
>  config_err:
>         mutex_unlock(&dev_priv->perf.metrics_lock);
> @@ -3824,7 +3826,7 @@ static int destroy_config(int id, void *p, void *data)
>  {
>         struct i915_oa_config *oa_config = p;
>  
> -       put_oa_config(oa_config);
> +       i915_oa_config_put(oa_config);
>  
>         return 0;
>  }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index b7fe1915e34d..5938a73963fe 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -623,6 +623,16 @@ typedef struct drm_i915_irq_wait {
>   */
>  #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
>  
> +/*
> + * Request an i915/perf performance configuration change before running the
> + * commands given in an execbuf.
> + *
> + * Performance configuration ID and the file descriptor of the i915 perf
> + * stream are given through drm_i915_gem_execbuffer_ext_perf. See
> + * I915_EXEC_EXT.
> + */
> +#define I915_PARAM_HAS_EXEC_PERF_CONFIG 56
> +
>  /* Must be kept compact -- no holes and well documented */
>  
>  typedef struct drm_i915_getparam {
> @@ -1026,6 +1036,12 @@ enum drm_i915_gem_execbuffer_ext {
>          */
>         DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
>  
> +       /**
> +        * This identifier is associated with
> +        * drm_i915_gem_execbuffer_perf_ext.
> +        */
> +       DRM_I915_GEM_EXECBUFFER_EXT_PERF,
> +
>         DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
>  };
>  
> @@ -1054,6 +1070,27 @@ struct drm_i915_gem_execbuffer_ext_timeline_fences {
>         __u64 values_ptr;
>  };
>  
> +struct drm_i915_gem_execbuffer_ext_perf {
> +       struct i915_user_extension base;
> +
> +       /**
> +        * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN.
> +        * This is used to identify that the application
> +        */
> +       __s32 perf_fd;
> +
> +       /**
> +        * Unused for now. Must be cleared to zero.
> +        */
> +       __u32 pad;
> +
> +       /**
> +        * OA configuration ID to switch to before executing the commands
> +        * associated to the execbuf.
> +        */
> +       __u64 oa_config;
> +};
> +
>  struct drm_i915_gem_execbuffer2 {
>         /**
>          * List of gem_exec_object2 structs
> -- 
> 2.21.0.392.gf8f6787159e
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Lionel Landwerlin June 27, 2019, 11:18 a.m. UTC | #3
On 27/06/2019 12:19, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-06-27 09:00:42)
>> +static int
>> +get_execbuf_oa_config(struct drm_i915_private *dev_priv,
>> +                     s32 perf_fd, u64 oa_config_id,
>> +                     struct i915_oa_config **out_oa_config,
>> +                     struct drm_i915_gem_object **out_oa_obj)
>> +{
>> +       struct file *perf_file;
>> +       int ret;
>> +
>> +       if (!dev_priv->perf.oa.exclusive_stream)
>> +               return -EINVAL;
>> +
>> +       perf_file = fget(perf_fd);
>> +       if (!perf_file)
>> +               return -EINVAL;
>> +
>> +       if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream)
>> +               return -EINVAL;
> Leaked the file.


Thanks!


>
>> +       fput(perf_file);
> and we never use perf_file again? It's only use is as a permission check
> on the ioctl? Just checking in case perf_fd is dup()ed by the user after
> we check it.
> -Chris
>
Correct, if it's dup()ed then the application enters undefined behavior 
territory.

Much like not synchronizing buffer access across engines ;)


-Lionel
Lionel Landwerlin June 27, 2019, 12:32 p.m. UTC | #4
On 27/06/2019 12:45, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-06-27 09:00:42)
>> We want the ability to dispatch a set of command buffer to the
>> hardware, each with a different OA configuration. To achieve this, we
>> reuse a couple of fields from the execbuf2 struct (I CAN HAZ
>> execbuf3?) to notify what OA configuration should be used for a batch
>> buffer. This requires the process making the execbuf with this flag to
>> also own the perf fd at the time of execbuf.
>>
>> v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris)
>>      Move oa_config vma to active (Chris)
>>
>> v3: Don't drop the lock for engine lookup (Chris)
>>      Move OA config vma to active before writing the ringbuffer (Chris)
>>
>> v4: Reuse i915_user_extension_fn
>>      Serialize requests with OA config updates
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> ---
>>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 130 +++++++++++++++++-
>>   drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 +
>>   drivers/gpu/drm/i915/gt/intel_engine_types.h  |   9 ++
>>   drivers/gpu/drm/i915/gt/intel_lrc.c           |   1 +
>>   drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |   4 +-
>>   drivers/gpu/drm/i915/i915_drv.c               |   4 +
>>   drivers/gpu/drm/i915/i915_drv.h               |  10 +-
>>   drivers/gpu/drm/i915/i915_perf.c              |  26 ++--
>>   include/uapi/drm/i915_drm.h                   |  37 +++++
>>   9 files changed, 208 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> index 476fade6fcab..18ae3e8a4e02 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> @@ -283,7 +283,11 @@ struct i915_execbuffer {
>>          struct {
>>                  u64 flags; /** Available extensions parameters */
>>                  struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
>> +               struct drm_i915_gem_execbuffer_ext_perf perf_config;
>>          } extensions;
>> +
>> +       struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is not needed. */
>> +       struct drm_i915_gem_object *oa_bo;
>>   };
>>   
>>   #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
>> @@ -1210,6 +1214,34 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
>>          return err;
>>   }
>>   
>> +
>> +static int
>> +get_execbuf_oa_config(struct drm_i915_private *dev_priv,
>> +                     s32 perf_fd, u64 oa_config_id,
>> +                     struct i915_oa_config **out_oa_config,
>> +                     struct drm_i915_gem_object **out_oa_obj)
>> +{
>> +       struct file *perf_file;
>> +       int ret;
>> +
>> +       if (!dev_priv->perf.oa.exclusive_stream)
>> +               return -EINVAL;
>> +
>> +       perf_file = fget(perf_fd);
>> +       if (!perf_file)
>> +               return -EINVAL;
>> +
>> +       if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream)
>> +               return -EINVAL;
>> +
>> +       fput(perf_file);
>> +
>> +       ret = i915_perf_get_oa_config(dev_priv, oa_config_id,
>> +                                     out_oa_config, out_oa_obj);
>> +
>> +       return ret;
>> +}
>> +
>>   static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>>                               struct i915_vma *vma,
>>                               unsigned int len)
>> @@ -2072,6 +2104,64 @@ add_to_client(struct i915_request *rq, struct drm_file *file)
>>          list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
>>   }
>>   
>> +static int eb_oa_config(struct i915_execbuffer *eb)
>> +{
>> +       struct i915_vma *oa_vma;
>> +       int err;
>> +
>> +       if (!eb->oa_config)
>> +               return 0;
>> +
>> +       err = i915_active_request_set(&eb->engine->last_oa_config,
>> +                                     eb->request);
>> +       if (err)
>> +               return err;
>> +
>> +       /*
>> +        * If the perf stream was opened with hold preemption, flag the
>> +        * request properly so that the priority of the request is bumped once
>> +        * it reaches the execlist ports.
>> +        */
>> +       if (eb->i915->perf.oa.exclusive_stream->hold_preemption)
>> +               eb->request->has_perf = true;
> Leave this chunk for the next battle^W patch.


Oops.


>
>> +       /*
>> +        * If the config hasn't changed, skip reconfiguring the HW (this is
>> +        * subject to a delay we want to avoid has much as possible).
>> +        */
>> +       if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config)
>> +               return 0;
>> +
>> +       oa_vma = i915_vma_instance(eb->oa_bo,
>> +                                  &eb->engine->i915->ggtt.vm, NULL);
>> +       if (unlikely(IS_ERR(oa_vma)))
>> +               return PTR_ERR(oa_vma);
>> +
>> +       err = i915_vma_pin(oa_vma, 0, 0, PIN_GLOBAL);
>> +       if (err)
>> +               return err;
> Ugh. We should not be pinned after creating the request. Might not
> matter -- it's just reservation under its own lock that must not be
> crossed with the timeline lock currently held here.


Should I move this into get_execbuf_oa_config() ?

>
>> +
>> +       err = i915_vma_move_to_active(oa_vma, eb->request, 0);
> You can unpin immediately after move_to_active (as the active is treated
> by being pinned).
>
> I'm thinking this will be replaced with just i915_active_ref() as you
> only care about the vma.
>
>> +       if (err) {
>> +               i915_vma_unpin(oa_vma);
>> +               return err;
>> +       }
>> +
>> +       err = eb->engine->emit_bb_start(eb->request,
>> +                                       oa_vma->node.start,
>> +                                       0, I915_DISPATCH_SECURE);
>> +       if (err) {
>> +               i915_vma_unpin(oa_vma);
>> +               return err;
>> +       }
>> +
>> +       i915_vma_unpin(oa_vma);
>> +
>> +       swap(eb->oa_config, eb->i915->perf.oa.exclusive_stream->oa_config);
>> +
>> +       return 0;
>> +}
>> +
>>   static int eb_submit(struct i915_execbuffer *eb)
>>   {
>>          int err;
>> @@ -2098,6 +2188,10 @@ static int eb_submit(struct i915_execbuffer *eb)
>>                          return err;
>>          }
>>   
>> +       err = eb_oa_config(eb);
>> +       if (err)
>> +               return err;
>> +
>>          err = eb->engine->emit_bb_start(eb->request,
>>                                          eb->batch->node.start +
>>                                          eb->batch_start_offset,
>> @@ -2520,8 +2614,22 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d
>>          return 0;
>>   }
>>   
>> +static int parse_perf_config(struct i915_user_extension __user *ext, void *data)
>> +{
>> +       struct i915_execbuffer *eb = data;
>> +
>> +       if (copy_from_user(&eb->extensions.perf_config, ext,
>> +                          sizeof(eb->extensions.perf_config)))
>> +               return -EFAULT;
>> +
>> +       eb->extensions.flags |= BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF);
>> +
>> +       return 0;
>> +}
>> +
>>   static const i915_user_extension_fn execbuf_extensions[] = {
>>           [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
>> +        [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config,
>>   };
>>   
>>   static int
>> @@ -2573,6 +2681,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>          eb.buffer_count = args->buffer_count;
>>          eb.batch_start_offset = args->batch_start_offset;
>>          eb.batch_len = args->batch_len;
>> +       eb.oa_config = NULL;
>>   
>>          eb.batch_flags = 0;
>>          if (args->flags & I915_EXEC_SECURE) {
>> @@ -2651,9 +2760,23 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>          if (unlikely(err))
>>                  goto err_unlock;
>>   
>> +       if (eb.extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
>> +               if (!intel_engine_has_oa(eb.engine)) {
>> +                       err = -ENODEV;
>> +                       goto err_engine;
>> +               }
>> +
>> +               err = get_execbuf_oa_config(eb.i915,
>> +                                           eb.extensions.perf_config.perf_fd,
>> +                                           eb.extensions.perf_config.oa_config,
>> +                                           &eb.oa_config, &eb.oa_bo);
>> +               if (err)
>> +                       goto err_engine;
> Why is this under the struct_mutex?


Access to dev_priv->perf.oa.exclusive_stream must be under struct_mutex.

Also because we verify that the engine actually has OA support.

I could split the getting the configuration part away.


>
>> +       }
>> +
>>          err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
>>          if (unlikely(err))
>> -               goto err_engine;
>> +               goto err_oa;
>>   
>>          err = eb_relocate(&eb);
>>          if (err) {
>> @@ -2806,6 +2929,11 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>   err_vma:
>>          if (eb.exec)
>>                  eb_release_vmas(&eb);
>> +err_oa:
>> +       if (eb.oa_config) {
>> +               i915_gem_object_put(eb.oa_bo);
>> +               i915_oa_config_put(eb.oa_config);
>> +       }
>>   err_engine:
>>          eb_unpin_context(&eb);
>>   err_unlock:
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> index 4961f74fd902..ba4d2c1abd1a 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> @@ -859,6 +859,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>>   
>>          engine->set_default_submission(engine);
>>   
>> +       INIT_ACTIVE_REQUEST(&engine->last_oa_config);
>> +
>>          return 0;
>>   
>>   err_unpin:
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> index 7e056114344e..39da40937e7f 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> @@ -363,6 +363,8 @@ struct intel_engine_cs {
>>          struct i915_wa_list wa_list;
>>          struct i915_wa_list whitelist;
>>   
>> +       struct i915_active_request last_oa_config;
>> +
>>          u32             irq_keep_mask; /* always keep these interrupts */
>>          u32             irq_enable_mask; /* bitmask to enable ring interrupt */
>>          void            (*irq_enable)(struct intel_engine_cs *engine);
>> @@ -446,6 +448,7 @@ struct intel_engine_cs {
>>   #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
>>   #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
>>   #define I915_ENGINE_IS_VIRTUAL       BIT(5)
>> +#define I915_ENGINE_HAS_OA           BIT(6)
>>          unsigned int flags;
>>   
>>          /*
>> @@ -541,6 +544,12 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine)
>>          return engine->flags & I915_ENGINE_IS_VIRTUAL;
>>   }
>>   
>> +static inline bool
>> +intel_engine_has_oa(const struct intel_engine_cs *engine)
>> +{
>> +       return engine->flags & I915_ENGINE_HAS_OA;
>> +}
>> +
>>   #define instdone_slice_mask(dev_priv__) \
>>          (IS_GEN(dev_priv__, 7) ? \
>>           1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> index 28685ba91a2c..a6ea468986d1 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> @@ -2763,6 +2763,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
>>                  engine->init_context = gen8_init_rcs_context;
>>                  engine->emit_flush = gen8_emit_flush_render;
>>                  engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
>> +               engine->flags |= I915_ENGINE_HAS_OA;
>>          }
>>   
>>          return 0;
>> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
>> index f094406dcc56..a1aee092d0a3 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
>> @@ -2192,8 +2192,10 @@ static void setup_rcs(struct intel_engine_cs *engine)
>>                  engine->irq_enable_mask = I915_USER_INTERRUPT;
>>          }
>>   
>> -       if (IS_HASWELL(i915))
>> +       if (IS_HASWELL(i915)) {
>>                  engine->emit_bb_start = hsw_emit_bb_start;
>> +               engine->flags |= I915_ENGINE_HAS_OA;
>> +       }
>>   
>>          engine->resume = rcs_resume;
>>   }
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
>> index 48f9009a6318..ef54cf1c54a5 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -486,6 +486,10 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
>>          case I915_PARAM_PERF_REVISION:
>>                  value = 1;
>>                  break;
>> +       case I915_PARAM_HAS_EXEC_PERF_CONFIG:
>> +               /* Obviously requires perf support. */
>> +               value = dev_priv->perf.initialized;
>> +               break;
>>          default:
>>                  DRM_DEBUG("Unknown parameter %d\n", param->param);
>>                  return -EINVAL;
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 0323c8182a67..51b103493623 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1096,6 +1096,8 @@ struct i915_oa_reg {
>>   };
>>   
>>   struct i915_oa_config {
>> +       struct drm_i915_private *i915;
>> +
>>          char uuid[UUID_STRING_LEN + 1];
>>          int id;
>>   
>> @@ -1114,7 +1116,7 @@ struct i915_oa_config {
>>   
>>          struct list_head vma_link;
>>   
>> -       atomic_t ref_count;
>> +       struct kref ref;
>>   };
>>   
>>   struct i915_perf_stream;
>> @@ -2612,6 +2614,12 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
>>                              int metrics_set,
>>                              struct i915_oa_config **out_config,
>>                              struct drm_i915_gem_object **out_obj);
>> +void i915_oa_config_release(struct kref *ref);
>> +
>> +static inline void i915_oa_config_put(struct i915_oa_config *oa_config)
>> +{
>> +       kref_put(&oa_config->ref, i915_oa_config_release);
>> +}
>>   
>>   /* i915_gem_evict.c */
>>   int __must_check i915_gem_evict_something(struct i915_address_space *vm,
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>> index b2f5ba87921c..34d74fa64c74 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -366,10 +366,11 @@ struct perf_open_properties {
>>          int oa_period_exponent;
>>   };
>>   
>> -static void put_oa_config(struct i915_oa_config *oa_config)
>> +void i915_oa_config_release(struct kref *ref)
>>   {
>> -       if (!atomic_dec_and_test(&oa_config->ref_count))
>> -               return;
>> +       struct i915_oa_config *oa_config = container_of(ref, typeof(*oa_config), ref);
>> +
>> +       lockdep_assert_held(&oa_config->i915->perf.metrics_lock);
> That's not true for the eb paths.


Actually that's not true anywhere...


>
>>   
>>          if (oa_config->obj) {
>>                  list_del(&oa_config->vma_link);
>> @@ -485,7 +486,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
>>          }
>>   
>>          if (out_config) {
>> -               atomic_inc(&oa_config->ref_count);
>> +               kref_get(&oa_config->ref);
>>                  *out_config = oa_config;
>>          }
>>   
>> @@ -507,7 +508,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
>>   
>>   err_buf_alloc:
>>          if (out_config) {
>> -               put_oa_config(oa_config);
>> +               i915_oa_config_put(oa_config);
>>                  *out_config = NULL;
>>          }
>>   unlock:
>> @@ -1483,7 +1484,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>>          if (stream->ctx)
>>                  oa_put_render_ctx_id(stream);
>>   
>> -       put_oa_config(stream->oa_config);
>> +       i915_oa_config_put(stream->oa_config);
>>   
>>          if (dev_priv->perf.oa.spurious_report_rs.missed) {
>>                  DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
>> @@ -2430,7 +2431,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>          free_oa_buffer(dev_priv);
>>   
>>   err_oa_buf_alloc:
>> -       put_oa_config(stream->oa_config);
>> +       i915_oa_config_put(stream->oa_config);
>>   
>>          intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
>>          intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
>> @@ -3246,7 +3247,7 @@ void i915_perf_register(struct drm_i915_private *dev_priv)
>>          if (ret)
>>                  goto sysfs_error;
>>   
>> -       atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1);
>> +       dev_priv->perf.oa.test_config.i915 = dev_priv;
>>   
>>          goto exit;
>>   
>> @@ -3502,7 +3503,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
>>                  return -ENOMEM;
>>          }
>>   
>> -       atomic_set(&oa_config->ref_count, 1);
>> +       oa_config->i915 = dev_priv;
>> +       kref_init(&oa_config->ref);
>>   
>>          if (!uuid_is_valid(args->uuid)) {
>>                  DRM_DEBUG("Invalid uuid format for OA config\n");
>> @@ -3601,7 +3603,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
>>   sysfs_err:
>>          mutex_unlock(&dev_priv->perf.metrics_lock);
>>   reg_err:
>> -       put_oa_config(oa_config);
>> +       i915_oa_config_put(oa_config);
>>          DRM_DEBUG("Failed to add new OA config\n");
>>          return err;
>>   }
>> @@ -3655,7 +3657,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
>>   
>>          DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
>>   
>> -       put_oa_config(oa_config);
>> +       i915_oa_config_put(oa_config);
>>   
>>   config_err:
>>          mutex_unlock(&dev_priv->perf.metrics_lock);
>> @@ -3824,7 +3826,7 @@ static int destroy_config(int id, void *p, void *data)
>>   {
>>          struct i915_oa_config *oa_config = p;
>>   
>> -       put_oa_config(oa_config);
>> +       i915_oa_config_put(oa_config);
>>   
>>          return 0;
>>   }
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index b7fe1915e34d..5938a73963fe 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -623,6 +623,16 @@ typedef struct drm_i915_irq_wait {
>>    */
>>   #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
>>   
>> +/*
>> + * Request an i915/perf performance configuration change before running the
>> + * commands given in an execbuf.
>> + *
>> + * Performance configuration ID and the file descriptor of the i915 perf
>> + * stream are given through drm_i915_gem_execbuffer_ext_perf. See
>> + * I915_EXEC_EXT.
>> + */
>> +#define I915_PARAM_HAS_EXEC_PERF_CONFIG 56
>> +
>>   /* Must be kept compact -- no holes and well documented */
>>   
>>   typedef struct drm_i915_getparam {
>> @@ -1026,6 +1036,12 @@ enum drm_i915_gem_execbuffer_ext {
>>           */
>>          DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
>>   
>> +       /**
>> +        * This identifier is associated with
>> +        * drm_i915_gem_execbuffer_perf_ext.
>> +        */
>> +       DRM_I915_GEM_EXECBUFFER_EXT_PERF,
>> +
>>          DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
>>   };
>>   
>> @@ -1054,6 +1070,27 @@ struct drm_i915_gem_execbuffer_ext_timeline_fences {
>>          __u64 values_ptr;
>>   };
>>   
>> +struct drm_i915_gem_execbuffer_ext_perf {
>> +       struct i915_user_extension base;
>> +
>> +       /**
>> +        * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN.
>> +        * This is used to identify that the application
>> +        */
>> +       __s32 perf_fd;
>> +
>> +       /**
>> +        * Unused for now. Must be cleared to zero.
>> +        */
>> +       __u32 pad;
>> +
>> +       /**
>> +        * OA configuration ID to switch to before executing the commands
>> +        * associated to the execbuf.
>> +        */
>> +       __u64 oa_config;
>> +};
>> +
>>   struct drm_i915_gem_execbuffer2 {
>>          /**
>>           * List of gem_exec_object2 structs
>> -- 
>> 2.21.0.392.gf8f6787159e
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson June 27, 2019, 12:53 p.m. UTC | #5
Quoting Lionel Landwerlin (2019-06-27 13:32:13)
> On 27/06/2019 12:45, Chris Wilson wrote:
> > Quoting Lionel Landwerlin (2019-06-27 09:00:42)
> >> +       /*
> >> +        * If the config hasn't changed, skip reconfiguring the HW (this is
> >> +        * subject to a delay we want to avoid has much as possible).
> >> +        */
> >> +       if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config)
> >> +               return 0;
> >> +
> >> +       oa_vma = i915_vma_instance(eb->oa_bo,
> >> +                                  &eb->engine->i915->ggtt.vm, NULL);
> >> +       if (unlikely(IS_ERR(oa_vma)))
> >> +               return PTR_ERR(oa_vma);
> >> +
> >> +       err = i915_vma_pin(oa_vma, 0, 0, PIN_GLOBAL);
> >> +       if (err)
> >> +               return err;
> > Ugh. We should not be pinned after creating the request. Might not
> > matter -- it's just reservation under its own lock that must not be
> > crossed with the timeline lock currently held here.
> 
> 
> Should I move this into get_execbuf_oa_config() ?

That would be save me fretting about the lock nesting.

> >> @@ -2651,9 +2760,23 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> >>          if (unlikely(err))
> >>                  goto err_unlock;
> >>   
> >> +       if (eb.extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
> >> +               if (!intel_engine_has_oa(eb.engine)) {
> >> +                       err = -ENODEV;
> >> +                       goto err_engine;
> >> +               }
> >> +
> >> +               err = get_execbuf_oa_config(eb.i915,
> >> +                                           eb.extensions.perf_config.perf_fd,
> >> +                                           eb.extensions.perf_config.oa_config,
> >> +                                           &eb.oa_config, &eb.oa_bo);
> >> +               if (err)
> >> +                       goto err_engine;
> > Why is this under the struct_mutex?
> 
> 
> Access to dev_priv->perf.oa.exclusive_stream must be under struct_mutex.
> 
> Also because we verify that the engine actually has OA support.
> 
> I could split the getting the configuration part away.

I'm about 10^W 50^W certainly less than a 100 patches away from killing
struct_mutex for execbuf...
-Chris
Lionel Landwerlin June 27, 2019, 2:09 p.m. UTC | #6
On 27/06/2019 15:53, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-06-27 13:32:13)
>> On 27/06/2019 12:45, Chris Wilson wrote:
>>> Quoting Lionel Landwerlin (2019-06-27 09:00:42)
>>>> +       /*
>>>> +        * If the config hasn't changed, skip reconfiguring the HW (this is
>>>> +        * subject to a delay we want to avoid has much as possible).
>>>> +        */
>>>> +       if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config)
>>>> +               return 0;
>>>> +
>>>> +       oa_vma = i915_vma_instance(eb->oa_bo,
>>>> +                                  &eb->engine->i915->ggtt.vm, NULL);
>>>> +       if (unlikely(IS_ERR(oa_vma)))
>>>> +               return PTR_ERR(oa_vma);
>>>> +
>>>> +       err = i915_vma_pin(oa_vma, 0, 0, PIN_GLOBAL);
>>>> +       if (err)
>>>> +               return err;
>>> Ugh. We should not be pinned after creating the request. Might not
>>> matter -- it's just reservation under its own lock that must not be
>>> crossed with the timeline lock currently held here.
>>
>> Should I move this into get_execbuf_oa_config() ?
> That would be save me fretting about the lock nesting.
>
>>>> @@ -2651,9 +2760,23 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>>>           if (unlikely(err))
>>>>                   goto err_unlock;
>>>>    
>>>> +       if (eb.extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
>>>> +               if (!intel_engine_has_oa(eb.engine)) {
>>>> +                       err = -ENODEV;
>>>> +                       goto err_engine;
>>>> +               }
>>>> +
>>>> +               err = get_execbuf_oa_config(eb.i915,
>>>> +                                           eb.extensions.perf_config.perf_fd,
>>>> +                                           eb.extensions.perf_config.oa_config,
>>>> +                                           &eb.oa_config, &eb.oa_bo);
>>>> +               if (err)
>>>> +                       goto err_engine;
>>> Why is this under the struct_mutex?
>>
>> Access to dev_priv->perf.oa.exclusive_stream must be under struct_mutex.
>>
>> Also because we verify that the engine actually has OA support.
>>
>> I could split the getting the configuration part away.
> I'm about 10^W 50^W certainly less than a 100 patches away from killing
> struct_mutex for execbuf...
> -Chris
>
I'm sorry. Dealing with all this OA stuff is underwhelming.

I think an engine lock would be enough if that's not too bad for you.


-Lionel
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 476fade6fcab..18ae3e8a4e02 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -283,7 +283,11 @@  struct i915_execbuffer {
 	struct {
 		u64 flags; /** Available extensions parameters */
 		struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
+		struct drm_i915_gem_execbuffer_ext_perf perf_config;
 	} extensions;
+
+	struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is not needed. */
+	struct drm_i915_gem_object *oa_bo;
 };
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1210,6 +1214,34 @@  static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
 	return err;
 }
 
+
+static int
+get_execbuf_oa_config(struct drm_i915_private *dev_priv,
+		      s32 perf_fd, u64 oa_config_id,
+		      struct i915_oa_config **out_oa_config,
+		      struct drm_i915_gem_object **out_oa_obj)
+{
+	struct file *perf_file;
+	int ret;
+
+	if (!dev_priv->perf.oa.exclusive_stream)
+		return -EINVAL;
+
+	perf_file = fget(perf_fd);
+	if (!perf_file)
+		return -EINVAL;
+
+	if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream)
+		return -EINVAL;
+
+	fput(perf_file);
+
+	ret = i915_perf_get_oa_config(dev_priv, oa_config_id,
+				      out_oa_config, out_oa_obj);
+
+	return ret;
+}
+
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 			     struct i915_vma *vma,
 			     unsigned int len)
@@ -2072,6 +2104,64 @@  add_to_client(struct i915_request *rq, struct drm_file *file)
 	list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
 }
 
+static int eb_oa_config(struct i915_execbuffer *eb)
+{
+	struct i915_vma *oa_vma;
+	int err;
+
+	if (!eb->oa_config)
+		return 0;
+
+	err = i915_active_request_set(&eb->engine->last_oa_config,
+				      eb->request);
+	if (err)
+		return err;
+
+	/*
+	 * If the perf stream was opened with hold preemption, flag the
+	 * request properly so that the priority of the request is bumped once
+	 * it reaches the execlist ports.
+	 */
+	if (eb->i915->perf.oa.exclusive_stream->hold_preemption)
+		eb->request->has_perf = true;
+
+	/*
+	 * If the config hasn't changed, skip reconfiguring the HW (this is
+	 * subject to a delay we want to avoid has much as possible).
+	 */
+	if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config)
+		return 0;
+
+	oa_vma = i915_vma_instance(eb->oa_bo,
+				   &eb->engine->i915->ggtt.vm, NULL);
+	if (unlikely(IS_ERR(oa_vma)))
+		return PTR_ERR(oa_vma);
+
+	err = i915_vma_pin(oa_vma, 0, 0, PIN_GLOBAL);
+	if (err)
+		return err;
+
+	err = i915_vma_move_to_active(oa_vma, eb->request, 0);
+	if (err) {
+		i915_vma_unpin(oa_vma);
+		return err;
+	}
+
+	err = eb->engine->emit_bb_start(eb->request,
+					oa_vma->node.start,
+					0, I915_DISPATCH_SECURE);
+	if (err) {
+		i915_vma_unpin(oa_vma);
+		return err;
+	}
+
+	i915_vma_unpin(oa_vma);
+
+	swap(eb->oa_config, eb->i915->perf.oa.exclusive_stream->oa_config);
+
+	return 0;
+}
+
 static int eb_submit(struct i915_execbuffer *eb)
 {
 	int err;
@@ -2098,6 +2188,10 @@  static int eb_submit(struct i915_execbuffer *eb)
 			return err;
 	}
 
+	err = eb_oa_config(eb);
+	if (err)
+		return err;
+
 	err = eb->engine->emit_bb_start(eb->request,
 					eb->batch->node.start +
 					eb->batch_start_offset,
@@ -2520,8 +2614,22 @@  static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d
 	return 0;
 }
 
+static int parse_perf_config(struct i915_user_extension __user *ext, void *data)
+{
+	struct i915_execbuffer *eb = data;
+
+	if (copy_from_user(&eb->extensions.perf_config, ext,
+			   sizeof(eb->extensions.perf_config)))
+		return -EFAULT;
+
+	eb->extensions.flags |= BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF);
+
+	return 0;
+}
+
 static const i915_user_extension_fn execbuf_extensions[] = {
         [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
+        [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config,
 };
 
 static int
@@ -2573,6 +2681,7 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 	eb.buffer_count = args->buffer_count;
 	eb.batch_start_offset = args->batch_start_offset;
 	eb.batch_len = args->batch_len;
+	eb.oa_config = NULL;
 
 	eb.batch_flags = 0;
 	if (args->flags & I915_EXEC_SECURE) {
@@ -2651,9 +2760,23 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_unlock;
 
+	if (eb.extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
+		if (!intel_engine_has_oa(eb.engine)) {
+			err = -ENODEV;
+			goto err_engine;
+		}
+
+		err = get_execbuf_oa_config(eb.i915,
+					    eb.extensions.perf_config.perf_fd,
+					    eb.extensions.perf_config.oa_config,
+					    &eb.oa_config, &eb.oa_bo);
+		if (err)
+			goto err_engine;
+	}
+
 	err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
 	if (unlikely(err))
-		goto err_engine;
+		goto err_oa;
 
 	err = eb_relocate(&eb);
 	if (err) {
@@ -2806,6 +2929,11 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 err_vma:
 	if (eb.exec)
 		eb_release_vmas(&eb);
+err_oa:
+	if (eb.oa_config) {
+		i915_gem_object_put(eb.oa_bo);
+		i915_oa_config_put(eb.oa_config);
+	}
 err_engine:
 	eb_unpin_context(&eb);
 err_unlock:
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 4961f74fd902..ba4d2c1abd1a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -859,6 +859,8 @@  int intel_engine_init_common(struct intel_engine_cs *engine)
 
 	engine->set_default_submission(engine);
 
+	INIT_ACTIVE_REQUEST(&engine->last_oa_config);
+
 	return 0;
 
 err_unpin:
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 7e056114344e..39da40937e7f 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -363,6 +363,8 @@  struct intel_engine_cs {
 	struct i915_wa_list wa_list;
 	struct i915_wa_list whitelist;
 
+	struct i915_active_request last_oa_config;
+
 	u32             irq_keep_mask; /* always keep these interrupts */
 	u32		irq_enable_mask; /* bitmask to enable ring interrupt */
 	void		(*irq_enable)(struct intel_engine_cs *engine);
@@ -446,6 +448,7 @@  struct intel_engine_cs {
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
 #define I915_ENGINE_IS_VIRTUAL       BIT(5)
+#define I915_ENGINE_HAS_OA           BIT(6)
 	unsigned int flags;
 
 	/*
@@ -541,6 +544,12 @@  intel_engine_is_virtual(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_IS_VIRTUAL;
 }
 
+static inline bool
+intel_engine_has_oa(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_HAS_OA;
+}
+
 #define instdone_slice_mask(dev_priv__) \
 	(IS_GEN(dev_priv__, 7) ? \
 	 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 28685ba91a2c..a6ea468986d1 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2763,6 +2763,7 @@  int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 		engine->init_context = gen8_init_rcs_context;
 		engine->emit_flush = gen8_emit_flush_render;
 		engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
+		engine->flags |= I915_ENGINE_HAS_OA;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index f094406dcc56..a1aee092d0a3 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -2192,8 +2192,10 @@  static void setup_rcs(struct intel_engine_cs *engine)
 		engine->irq_enable_mask = I915_USER_INTERRUPT;
 	}
 
-	if (IS_HASWELL(i915))
+	if (IS_HASWELL(i915)) {
 		engine->emit_bb_start = hsw_emit_bb_start;
+		engine->flags |= I915_ENGINE_HAS_OA;
+	}
 
 	engine->resume = rcs_resume;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 48f9009a6318..ef54cf1c54a5 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -486,6 +486,10 @@  static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_PERF_REVISION:
 		value = 1;
 		break;
+	case I915_PARAM_HAS_EXEC_PERF_CONFIG:
+		/* Obviously requires perf support. */
+		value = dev_priv->perf.initialized;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0323c8182a67..51b103493623 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1096,6 +1096,8 @@  struct i915_oa_reg {
 };
 
 struct i915_oa_config {
+	struct drm_i915_private *i915;
+
 	char uuid[UUID_STRING_LEN + 1];
 	int id;
 
@@ -1114,7 +1116,7 @@  struct i915_oa_config {
 
 	struct list_head vma_link;
 
-	atomic_t ref_count;
+	struct kref ref;
 };
 
 struct i915_perf_stream;
@@ -2612,6 +2614,12 @@  int i915_perf_get_oa_config(struct drm_i915_private *i915,
 			    int metrics_set,
 			    struct i915_oa_config **out_config,
 			    struct drm_i915_gem_object **out_obj);
+void i915_oa_config_release(struct kref *ref);
+
+static inline void i915_oa_config_put(struct i915_oa_config *oa_config)
+{
+	kref_put(&oa_config->ref, i915_oa_config_release);
+}
 
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index b2f5ba87921c..34d74fa64c74 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -366,10 +366,11 @@  struct perf_open_properties {
 	int oa_period_exponent;
 };
 
-static void put_oa_config(struct i915_oa_config *oa_config)
+void i915_oa_config_release(struct kref *ref)
 {
-	if (!atomic_dec_and_test(&oa_config->ref_count))
-		return;
+	struct i915_oa_config *oa_config = container_of(ref, typeof(*oa_config), ref);
+
+	lockdep_assert_held(&oa_config->i915->perf.metrics_lock);
 
 	if (oa_config->obj) {
 		list_del(&oa_config->vma_link);
@@ -485,7 +486,7 @@  int i915_perf_get_oa_config(struct drm_i915_private *i915,
 	}
 
 	if (out_config) {
-		atomic_inc(&oa_config->ref_count);
+		kref_get(&oa_config->ref);
 		*out_config = oa_config;
 	}
 
@@ -507,7 +508,7 @@  int i915_perf_get_oa_config(struct drm_i915_private *i915,
 
 err_buf_alloc:
 	if (out_config) {
-		put_oa_config(oa_config);
+		i915_oa_config_put(oa_config);
 		*out_config = NULL;
 	}
 unlock:
@@ -1483,7 +1484,7 @@  static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
 
-	put_oa_config(stream->oa_config);
+	i915_oa_config_put(stream->oa_config);
 
 	if (dev_priv->perf.oa.spurious_report_rs.missed) {
 		DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
@@ -2430,7 +2431,7 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	free_oa_buffer(dev_priv);
 
 err_oa_buf_alloc:
-	put_oa_config(stream->oa_config);
+	i915_oa_config_put(stream->oa_config);
 
 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
 	intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
@@ -3246,7 +3247,7 @@  void i915_perf_register(struct drm_i915_private *dev_priv)
 	if (ret)
 		goto sysfs_error;
 
-	atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1);
+	dev_priv->perf.oa.test_config.i915 = dev_priv;
 
 	goto exit;
 
@@ -3502,7 +3503,8 @@  int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 		return -ENOMEM;
 	}
 
-	atomic_set(&oa_config->ref_count, 1);
+	oa_config->i915 = dev_priv;
+	kref_init(&oa_config->ref);
 
 	if (!uuid_is_valid(args->uuid)) {
 		DRM_DEBUG("Invalid uuid format for OA config\n");
@@ -3601,7 +3603,7 @@  int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 sysfs_err:
 	mutex_unlock(&dev_priv->perf.metrics_lock);
 reg_err:
-	put_oa_config(oa_config);
+	i915_oa_config_put(oa_config);
 	DRM_DEBUG("Failed to add new OA config\n");
 	return err;
 }
@@ -3655,7 +3657,7 @@  int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 
 	DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
 
-	put_oa_config(oa_config);
+	i915_oa_config_put(oa_config);
 
 config_err:
 	mutex_unlock(&dev_priv->perf.metrics_lock);
@@ -3824,7 +3826,7 @@  static int destroy_config(int id, void *p, void *data)
 {
 	struct i915_oa_config *oa_config = p;
 
-	put_oa_config(oa_config);
+	i915_oa_config_put(oa_config);
 
 	return 0;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index b7fe1915e34d..5938a73963fe 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -623,6 +623,16 @@  typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
 
+/*
+ * Request an i915/perf performance configuration change before running the
+ * commands given in an execbuf.
+ *
+ * Performance configuration ID and the file descriptor of the i915 perf
+ * stream are given through drm_i915_gem_execbuffer_ext_perf. See
+ * I915_EXEC_EXT.
+ */
+#define I915_PARAM_HAS_EXEC_PERF_CONFIG 56
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1026,6 +1036,12 @@  enum drm_i915_gem_execbuffer_ext {
 	 */
 	DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
 
+	/**
+	 * This identifier is associated with
+	 * drm_i915_gem_execbuffer_perf_ext.
+	 */
+	DRM_I915_GEM_EXECBUFFER_EXT_PERF,
+
 	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
 };
 
@@ -1054,6 +1070,27 @@  struct drm_i915_gem_execbuffer_ext_timeline_fences {
 	__u64 values_ptr;
 };
 
+struct drm_i915_gem_execbuffer_ext_perf {
+	struct i915_user_extension base;
+
+	/**
+	 * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN.
+	 * This is used to identify that the application
+	 */
+	__s32 perf_fd;
+
+	/**
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 pad;
+
+	/**
+	 * OA configuration ID to switch to before executing the commands
+	 * associated to the execbuf.
+	 */
+	__u64 oa_config;
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs