Message ID | 20181008151822.10519-3-lionel.g.landwerlin@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915: serialized performance queries | expand |
Quoting Lionel Landwerlin (2018-10-08 16:18:21) > Here we introduce a mechanism by which the execbuf part of the i915 > driver will be able to request that a batch buffer containing the > programming for a particular OA config be created. > > We'll execute these OA configuration buffers right before executing a > set of userspace commands so that a particular user batchbuffer be > executed with a given OA configuration. > > This mechanism essentially allows the userspace driver to go through > several OA configuration without having to open/close the i915/perf > stream. > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 22 ++- > drivers/gpu/drm/i915/i915_perf.c | 195 ++++++++++++++++++---- > drivers/gpu/drm/i915/intel_gpu_commands.h | 1 + > 3 files changed, 187 insertions(+), 31 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 2264b30ce51a..a35715cd7608 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1378,6 +1378,10 @@ struct i915_oa_config { > struct attribute *attrs[2]; > struct device_attribute sysfs_metric_id; > > + struct i915_vma *vma; > + > + struct list_head vma_link; > + > atomic_t ref_count; > }; > > @@ -1979,11 +1983,21 @@ struct drm_i915_private { > struct mutex metrics_lock; > > /* > - * List of dynamic configurations, you need to hold > - * dev_priv->perf.metrics_lock to access it. > + * List of dynamic configurations (struct i915_oa_config), you > + * need to hold dev_priv->perf.metrics_lock to access it. > */ > struct idr metrics_idr; > > + /* > + * List of dynamic configurations (struct i915_oa_config) > + * which have an allocated buffer in GGTT for reconfiguration, > + * you need to hold dev_priv->perf.metrics_lock to access it. > + * Elements are added to the list lazilly on execbuf (when a > + * particular configuration is requested). The list is freed > + * upon closing the perf stream. > + */ > + struct list_head metrics_buffers; > + > /* > * Lock associated with anything below within this structure > * except exclusive_stream. > @@ -3315,6 +3329,10 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, > void i915_oa_init_reg_state(struct intel_engine_cs *engine, > struct i915_gem_context *ctx, > uint32_t *reg_state); > +int i915_perf_get_oa_config(struct drm_i915_private *i915, > + int metrics_set, > + struct i915_oa_config **out_config, > + struct i915_vma **out_vma); > > /* i915_gem_evict.c */ > int __must_check i915_gem_evict_something(struct i915_address_space *vm, > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index e2a96b6844fe..39c5b44862d4 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -364,9 +364,16 @@ struct perf_open_properties { > int oa_period_exponent; > }; > > -static void free_oa_config(struct drm_i915_private *dev_priv, > - struct i915_oa_config *oa_config) > +static void put_oa_config(struct i915_oa_config *oa_config) > { > + if (!atomic_dec_and_test(&oa_config->ref_count)) > + return; > + > + if (oa_config->vma) { > + list_del(&oa_config->vma_link); > + i915_vma_put(oa_config->vma); > + } > + > if (!PTR_ERR(oa_config->flex_regs)) > kfree(oa_config->flex_regs); > if (!PTR_ERR(oa_config->b_counter_regs)) > @@ -376,38 +383,152 @@ static void free_oa_config(struct drm_i915_private *dev_priv, > kfree(oa_config); > } > > -static void put_oa_config(struct drm_i915_private *dev_priv, > - struct i915_oa_config *oa_config) > +static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 n_regs) > { > - if (!atomic_dec_and_test(&oa_config->ref_count)) > - return; > + u32 i; > > - free_oa_config(dev_priv, oa_config); > + for (i = 0; i < n_regs; i++) { > + if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { > + u32 n_lri = min(n_regs - i, > + (u32) MI_LOAD_REGISTER_IMM_MAX_REGS); > + > + *cs++ = MI_LOAD_REGISTER_IMM(n_lri); > + } > + *cs++ = i915_mmio_reg_offset(reg_data[i].addr); > + *cs++ = reg_data[i].value; > + } > + > + return cs; > } > > -static int get_oa_config(struct drm_i915_private *dev_priv, > - int metrics_set, > - struct i915_oa_config **out_config) > +static int alloc_oa_config_buffer(struct drm_i915_private *i915, > + struct i915_oa_config *oa_config) > { > + struct drm_i915_gem_object *bo; > + size_t config_length = 0; > int ret; > + u32 *cs; > > - if (metrics_set == 1) { > - *out_config = &dev_priv->perf.oa.test_config; > - atomic_inc(&dev_priv->perf.oa.test_config.ref_count); > - return 0; > + if (oa_config->mux_regs_len > 0) { > + config_length += DIV_ROUND_UP(oa_config->mux_regs_len, > + MI_LOAD_REGISTER_IMM_MAX_REGS) * 4; > + config_length += oa_config->mux_regs_len * 8; > } > + if (oa_config->b_counter_regs_len > 0) { > + config_length += DIV_ROUND_UP(oa_config->b_counter_regs_len, > + MI_LOAD_REGISTER_IMM_MAX_REGS) * 4; > + config_length += oa_config->b_counter_regs_len * 8; > + } > + if (oa_config->flex_regs_len > 0) { > + config_length += DIV_ROUND_UP(oa_config->flex_regs_len, > + MI_LOAD_REGISTER_IMM_MAX_REGS) * 4; > + config_length += oa_config->flex_regs_len * 8; > + } > + config_length += 4; /* MI_BATCH_BUFFER_END */ > + config_length = ALIGN(config_length, I915_GTT_PAGE_SIZE); > > - ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); > + ret = i915_mutex_lock_interruptible(&i915->drm); > if (ret) > return ret; > > - *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set); > - if (!*out_config) > - ret = -EINVAL; > - else > - atomic_inc(&(*out_config)->ref_count); > + bo = i915_gem_object_create(i915, config_length); > + if (IS_ERR(bo)) { > + ret = PTR_ERR(bo); > + goto unlock; > + } > + > + ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC); Don't enable snoop on a batchbuffer. > + if (ret) > + goto err_unref; > > - mutex_unlock(&dev_priv->perf.metrics_lock); > + oa_config->vma = i915_gem_object_ggtt_pin(bo, NULL, 0, config_length, 0); Why have you pinned it? > + if (IS_ERR(oa_config->vma)) { > + ret = PTR_ERR(oa_config->vma); > + oa_config->vma = NULL; > + goto err_unref; > + } > + > + cs = i915_gem_object_pin_map(bo, I915_MAP_WB); > + if (IS_ERR(cs)) { > + ret = PTR_ERR(cs); > + goto err_unpin; > + } > + > + memset(cs, 0, config_length); Already zero. Or use create_internal to avoid shmemfs overhead. And since you write all bytes, you can just ignore it. > + cs = write_cs_mi_lri(cs, oa_config->mux_regs, oa_config->mux_regs_len); > + cs = write_cs_mi_lri(cs, oa_config->b_counter_regs, oa_config->b_counter_regs_len); > + cs = write_cs_mi_lri(cs, oa_config->flex_regs, oa_config->flex_regs_len); > + > + *cs++ = MI_BATCH_BUFFER_END; > + > + i915_gem_object_unpin_map(bo); > + > + goto unlock; > + > +err_unpin: > + __i915_vma_unpin(oa_config->vma); > + > +err_unref: > + oa_config->vma = NULL; > + i915_gem_object_put(bo); > + > +unlock: > + mutex_unlock(&i915->drm.struct_mutex); > + return ret; > +} > + > +int i915_perf_get_oa_config(struct drm_i915_private *i915, > + int metrics_set, > + struct i915_oa_config **out_config, > + struct i915_vma **out_vma) > +{ > + int ret = 0; > + struct i915_oa_config *oa_config; > + > + if (!i915->perf.initialized) > + return -ENODEV; > + > + ret = mutex_lock_interruptible(&i915->perf.metrics_lock); > + if (ret) > + return ret; > + > + if (metrics_set == 1) { > + oa_config = &i915->perf.oa.test_config; > + } else { > + oa_config = idr_find(&i915->perf.metrics_idr, metrics_set); > + if (!oa_config) { > + ret = -EINVAL; > + goto unlock; > + } > + } > + > + if (out_config) { > + atomic_inc(&oa_config->ref_count); > + *out_config = oa_config; > + } > + > + if (out_vma) { > + if (oa_config->vma) { > + *out_vma = i915_vma_get(oa_config->vma); > + } else { > + ret = alloc_oa_config_buffer(i915, oa_config); > + if (ret) { > + goto err_buf_alloc; > + } else { > + list_add(&oa_config->vma_link, > + &i915->perf.metrics_buffers); > + *out_vma = i915_vma_get(oa_config->vma); > + } > + } Where is out_vma used so we can check if the litetime tracking is ok as so far you are releasing it before we know it is idle. -Chris
On 08/10/2018 16:34, Chris Wilson wrote: > Quoting Lionel Landwerlin (2018-10-08 16:18:21) >> Here we introduce a mechanism by which the execbuf part of the i915 >> driver will be able to request that a batch buffer containing the >> programming for a particular OA config be created. >> >> We'll execute these OA configuration buffers right before executing a >> set of userspace commands so that a particular user batchbuffer be >> executed with a given OA configuration. >> >> This mechanism essentially allows the userspace driver to go through >> several OA configuration without having to open/close the i915/perf >> stream. >> >> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >> --- >> drivers/gpu/drm/i915/i915_drv.h | 22 ++- >> drivers/gpu/drm/i915/i915_perf.c | 195 ++++++++++++++++++---- >> drivers/gpu/drm/i915/intel_gpu_commands.h | 1 + >> 3 files changed, 187 insertions(+), 31 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index 2264b30ce51a..a35715cd7608 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -1378,6 +1378,10 @@ struct i915_oa_config { >> struct attribute *attrs[2]; >> struct device_attribute sysfs_metric_id; >> >> + struct i915_vma *vma; >> + >> + struct list_head vma_link; >> + >> atomic_t ref_count; >> }; >> >> @@ -1979,11 +1983,21 @@ struct drm_i915_private { >> struct mutex metrics_lock; >> >> /* >> - * List of dynamic configurations, you need to hold >> - * dev_priv->perf.metrics_lock to access it. >> + * List of dynamic configurations (struct i915_oa_config), you >> + * need to hold dev_priv->perf.metrics_lock to access it. >> */ >> struct idr metrics_idr; >> >> + /* >> + * List of dynamic configurations (struct i915_oa_config) >> + * which have an allocated buffer in GGTT for reconfiguration, >> + * you need to hold dev_priv->perf.metrics_lock to access it. >> + * Elements are added to the list lazilly on execbuf (when a >> + * particular configuration is requested). The list is freed >> + * upon closing the perf stream. >> + */ >> + struct list_head metrics_buffers; >> + >> /* >> * Lock associated with anything below within this structure >> * except exclusive_stream. >> @@ -3315,6 +3329,10 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, >> void i915_oa_init_reg_state(struct intel_engine_cs *engine, >> struct i915_gem_context *ctx, >> uint32_t *reg_state); >> +int i915_perf_get_oa_config(struct drm_i915_private *i915, >> + int metrics_set, >> + struct i915_oa_config **out_config, >> + struct i915_vma **out_vma); >> >> /* i915_gem_evict.c */ >> int __must_check i915_gem_evict_something(struct i915_address_space *vm, >> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c >> index e2a96b6844fe..39c5b44862d4 100644 >> --- a/drivers/gpu/drm/i915/i915_perf.c >> +++ b/drivers/gpu/drm/i915/i915_perf.c >> @@ -364,9 +364,16 @@ struct perf_open_properties { >> int oa_period_exponent; >> }; >> >> -static void free_oa_config(struct drm_i915_private *dev_priv, >> - struct i915_oa_config *oa_config) >> +static void put_oa_config(struct i915_oa_config *oa_config) >> { >> + if (!atomic_dec_and_test(&oa_config->ref_count)) >> + return; >> + >> + if (oa_config->vma) { >> + list_del(&oa_config->vma_link); >> + i915_vma_put(oa_config->vma); >> + } >> + >> if (!PTR_ERR(oa_config->flex_regs)) >> kfree(oa_config->flex_regs); >> if (!PTR_ERR(oa_config->b_counter_regs)) >> @@ -376,38 +383,152 @@ static void free_oa_config(struct drm_i915_private *dev_priv, >> kfree(oa_config); >> } >> >> -static void put_oa_config(struct drm_i915_private *dev_priv, >> - struct i915_oa_config *oa_config) >> +static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 n_regs) >> { >> - if (!atomic_dec_and_test(&oa_config->ref_count)) >> - return; >> + u32 i; >> >> - free_oa_config(dev_priv, oa_config); >> + for (i = 0; i < n_regs; i++) { >> + if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { >> + u32 n_lri = min(n_regs - i, >> + (u32) MI_LOAD_REGISTER_IMM_MAX_REGS); >> + >> + *cs++ = MI_LOAD_REGISTER_IMM(n_lri); >> + } >> + *cs++ = i915_mmio_reg_offset(reg_data[i].addr); >> + *cs++ = reg_data[i].value; >> + } >> + >> + return cs; >> } >> >> -static int get_oa_config(struct drm_i915_private *dev_priv, >> - int metrics_set, >> - struct i915_oa_config **out_config) >> +static int alloc_oa_config_buffer(struct drm_i915_private *i915, >> + struct i915_oa_config *oa_config) >> { >> + struct drm_i915_gem_object *bo; >> + size_t config_length = 0; >> int ret; >> + u32 *cs; >> >> - if (metrics_set == 1) { >> - *out_config = &dev_priv->perf.oa.test_config; >> - atomic_inc(&dev_priv->perf.oa.test_config.ref_count); >> - return 0; >> + if (oa_config->mux_regs_len > 0) { >> + config_length += DIV_ROUND_UP(oa_config->mux_regs_len, >> + MI_LOAD_REGISTER_IMM_MAX_REGS) * 4; >> + config_length += oa_config->mux_regs_len * 8; >> } >> + if (oa_config->b_counter_regs_len > 0) { >> + config_length += DIV_ROUND_UP(oa_config->b_counter_regs_len, >> + MI_LOAD_REGISTER_IMM_MAX_REGS) * 4; >> + config_length += oa_config->b_counter_regs_len * 8; >> + } >> + if (oa_config->flex_regs_len > 0) { >> + config_length += DIV_ROUND_UP(oa_config->flex_regs_len, >> + MI_LOAD_REGISTER_IMM_MAX_REGS) * 4; >> + config_length += oa_config->flex_regs_len * 8; >> + } >> + config_length += 4; /* MI_BATCH_BUFFER_END */ >> + config_length = ALIGN(config_length, I915_GTT_PAGE_SIZE); >> >> - ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); >> + ret = i915_mutex_lock_interruptible(&i915->drm); >> if (ret) >> return ret; >> >> - *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set); >> - if (!*out_config) >> - ret = -EINVAL; >> - else >> - atomic_inc(&(*out_config)->ref_count); >> + bo = i915_gem_object_create(i915, config_length); >> + if (IS_ERR(bo)) { >> + ret = PTR_ERR(bo); >> + goto unlock; >> + } >> + >> + ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC); > Don't enable snoop on a batchbuffer. Oh right, dropping. > >> + if (ret) >> + goto err_unref; >> >> - mutex_unlock(&dev_priv->perf.metrics_lock); >> + oa_config->vma = i915_gem_object_ggtt_pin(bo, NULL, 0, config_length, 0); > Why have you pinned it? Duh, I guess I can just pin it at execbuf time! Thanks! > >> + if (IS_ERR(oa_config->vma)) { >> + ret = PTR_ERR(oa_config->vma); >> + oa_config->vma = NULL; >> + goto err_unref; >> + } >> + >> + cs = i915_gem_object_pin_map(bo, I915_MAP_WB); >> + if (IS_ERR(cs)) { >> + ret = PTR_ERR(cs); >> + goto err_unpin; >> + } >> + >> + memset(cs, 0, config_length); > Already zero. Or use create_internal to avoid shmemfs overhead. And > since you write all bytes, you can just ignore it. Cool, will drop. > >> + cs = write_cs_mi_lri(cs, oa_config->mux_regs, oa_config->mux_regs_len); >> + cs = write_cs_mi_lri(cs, oa_config->b_counter_regs, oa_config->b_counter_regs_len); >> + cs = write_cs_mi_lri(cs, oa_config->flex_regs, oa_config->flex_regs_len); >> + >> + *cs++ = MI_BATCH_BUFFER_END; >> + >> + i915_gem_object_unpin_map(bo); >> + >> + goto unlock; >> + >> +err_unpin: >> + __i915_vma_unpin(oa_config->vma); >> + >> +err_unref: >> + oa_config->vma = NULL; >> + i915_gem_object_put(bo); >> + >> +unlock: >> + mutex_unlock(&i915->drm.struct_mutex); >> + return ret; >> +} >> + >> +int i915_perf_get_oa_config(struct drm_i915_private *i915, >> + int metrics_set, >> + struct i915_oa_config **out_config, >> + struct i915_vma **out_vma) >> +{ >> + int ret = 0; >> + struct i915_oa_config *oa_config; >> + >> + if (!i915->perf.initialized) >> + return -ENODEV; >> + >> + ret = mutex_lock_interruptible(&i915->perf.metrics_lock); >> + if (ret) >> + return ret; >> + >> + if (metrics_set == 1) { >> + oa_config = &i915->perf.oa.test_config; >> + } else { >> + oa_config = idr_find(&i915->perf.metrics_idr, metrics_set); >> + if (!oa_config) { >> + ret = -EINVAL; >> + goto unlock; >> + } >> + } >> + >> + if (out_config) { >> + atomic_inc(&oa_config->ref_count); >> + *out_config = oa_config; >> + } >> + >> + if (out_vma) { >> + if (oa_config->vma) { >> + *out_vma = i915_vma_get(oa_config->vma); >> + } else { >> + ret = alloc_oa_config_buffer(i915, oa_config); >> + if (ret) { >> + goto err_buf_alloc; >> + } else { >> + list_add(&oa_config->vma_link, >> + &i915->perf.metrics_buffers); >> + *out_vma = i915_vma_get(oa_config->vma); >> + } >> + } > Where is out_vma used so we can check if the litetime tracking is ok as > so far you are releasing it before we know it is idle. It's part of patch 3. > -Chris >
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2264b30ce51a..a35715cd7608 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1378,6 +1378,10 @@ struct i915_oa_config { struct attribute *attrs[2]; struct device_attribute sysfs_metric_id; + struct i915_vma *vma; + + struct list_head vma_link; + atomic_t ref_count; }; @@ -1979,11 +1983,21 @@ struct drm_i915_private { struct mutex metrics_lock; /* - * List of dynamic configurations, you need to hold - * dev_priv->perf.metrics_lock to access it. + * List of dynamic configurations (struct i915_oa_config), you + * need to hold dev_priv->perf.metrics_lock to access it. */ struct idr metrics_idr; + /* + * List of dynamic configurations (struct i915_oa_config) + * which have an allocated buffer in GGTT for reconfiguration, + * you need to hold dev_priv->perf.metrics_lock to access it. + * Elements are added to the list lazilly on execbuf (when a + * particular configuration is requested). The list is freed + * upon closing the perf stream. + */ + struct list_head metrics_buffers; + /* * Lock associated with anything below within this structure * except exclusive_stream. @@ -3315,6 +3329,10 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, void i915_oa_init_reg_state(struct intel_engine_cs *engine, struct i915_gem_context *ctx, uint32_t *reg_state); +int i915_perf_get_oa_config(struct drm_i915_private *i915, + int metrics_set, + struct i915_oa_config **out_config, + struct i915_vma **out_vma); /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index e2a96b6844fe..39c5b44862d4 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -364,9 +364,16 @@ struct perf_open_properties { int oa_period_exponent; }; -static void free_oa_config(struct drm_i915_private *dev_priv, - struct i915_oa_config *oa_config) +static void put_oa_config(struct i915_oa_config *oa_config) { + if (!atomic_dec_and_test(&oa_config->ref_count)) + return; + + if (oa_config->vma) { + list_del(&oa_config->vma_link); + i915_vma_put(oa_config->vma); + } + if (!PTR_ERR(oa_config->flex_regs)) kfree(oa_config->flex_regs); if (!PTR_ERR(oa_config->b_counter_regs)) @@ -376,38 +383,152 @@ static void free_oa_config(struct drm_i915_private *dev_priv, kfree(oa_config); } -static void put_oa_config(struct drm_i915_private *dev_priv, - struct i915_oa_config *oa_config) +static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 n_regs) { - if (!atomic_dec_and_test(&oa_config->ref_count)) - return; + u32 i; - free_oa_config(dev_priv, oa_config); + for (i = 0; i < n_regs; i++) { + if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { + u32 n_lri = min(n_regs - i, + (u32) MI_LOAD_REGISTER_IMM_MAX_REGS); + + *cs++ = MI_LOAD_REGISTER_IMM(n_lri); + } + *cs++ = i915_mmio_reg_offset(reg_data[i].addr); + *cs++ = reg_data[i].value; + } + + return cs; } -static int get_oa_config(struct drm_i915_private *dev_priv, - int metrics_set, - struct i915_oa_config **out_config) +static int alloc_oa_config_buffer(struct drm_i915_private *i915, + struct i915_oa_config *oa_config) { + struct drm_i915_gem_object *bo; + size_t config_length = 0; int ret; + u32 *cs; - if (metrics_set == 1) { - *out_config = &dev_priv->perf.oa.test_config; - atomic_inc(&dev_priv->perf.oa.test_config.ref_count); - return 0; + if (oa_config->mux_regs_len > 0) { + config_length += DIV_ROUND_UP(oa_config->mux_regs_len, + MI_LOAD_REGISTER_IMM_MAX_REGS) * 4; + config_length += oa_config->mux_regs_len * 8; } + if (oa_config->b_counter_regs_len > 0) { + config_length += DIV_ROUND_UP(oa_config->b_counter_regs_len, + MI_LOAD_REGISTER_IMM_MAX_REGS) * 4; + config_length += oa_config->b_counter_regs_len * 8; + } + if (oa_config->flex_regs_len > 0) { + config_length += DIV_ROUND_UP(oa_config->flex_regs_len, + MI_LOAD_REGISTER_IMM_MAX_REGS) * 4; + config_length += oa_config->flex_regs_len * 8; + } + config_length += 4; /* MI_BATCH_BUFFER_END */ + config_length = ALIGN(config_length, I915_GTT_PAGE_SIZE); - ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); + ret = i915_mutex_lock_interruptible(&i915->drm); if (ret) return ret; - *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set); - if (!*out_config) - ret = -EINVAL; - else - atomic_inc(&(*out_config)->ref_count); + bo = i915_gem_object_create(i915, config_length); + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + goto unlock; + } + + ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC); + if (ret) + goto err_unref; - mutex_unlock(&dev_priv->perf.metrics_lock); + oa_config->vma = i915_gem_object_ggtt_pin(bo, NULL, 0, config_length, 0); + if (IS_ERR(oa_config->vma)) { + ret = PTR_ERR(oa_config->vma); + oa_config->vma = NULL; + goto err_unref; + } + + cs = i915_gem_object_pin_map(bo, I915_MAP_WB); + if (IS_ERR(cs)) { + ret = PTR_ERR(cs); + goto err_unpin; + } + + memset(cs, 0, config_length); + + cs = write_cs_mi_lri(cs, oa_config->mux_regs, oa_config->mux_regs_len); + cs = write_cs_mi_lri(cs, oa_config->b_counter_regs, oa_config->b_counter_regs_len); + cs = write_cs_mi_lri(cs, oa_config->flex_regs, oa_config->flex_regs_len); + + *cs++ = MI_BATCH_BUFFER_END; + + i915_gem_object_unpin_map(bo); + + goto unlock; + +err_unpin: + __i915_vma_unpin(oa_config->vma); + +err_unref: + oa_config->vma = NULL; + i915_gem_object_put(bo); + +unlock: + mutex_unlock(&i915->drm.struct_mutex); + return ret; +} + +int i915_perf_get_oa_config(struct drm_i915_private *i915, + int metrics_set, + struct i915_oa_config **out_config, + struct i915_vma **out_vma) +{ + int ret = 0; + struct i915_oa_config *oa_config; + + if (!i915->perf.initialized) + return -ENODEV; + + ret = mutex_lock_interruptible(&i915->perf.metrics_lock); + if (ret) + return ret; + + if (metrics_set == 1) { + oa_config = &i915->perf.oa.test_config; + } else { + oa_config = idr_find(&i915->perf.metrics_idr, metrics_set); + if (!oa_config) { + ret = -EINVAL; + goto unlock; + } + } + + if (out_config) { + atomic_inc(&oa_config->ref_count); + *out_config = oa_config; + } + + if (out_vma) { + if (oa_config->vma) { + *out_vma = i915_vma_get(oa_config->vma); + } else { + ret = alloc_oa_config_buffer(i915, oa_config); + if (ret) { + goto err_buf_alloc; + } else { + list_add(&oa_config->vma_link, + &i915->perf.metrics_buffers); + *out_vma = i915_vma_get(oa_config->vma); + } + } + } + + goto unlock; + +err_buf_alloc: + put_oa_config(oa_config); +unlock: + mutex_unlock(&i915->perf.metrics_lock); return ret; } @@ -1377,7 +1498,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) if (stream->ctx) oa_put_render_ctx_id(stream); - put_oa_config(dev_priv, stream->oa_config); + put_oa_config(stream->oa_config); if (dev_priv->perf.oa.spurious_report_rs.missed) { DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", @@ -2070,7 +2191,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, } } - ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config); + ret = i915_perf_get_oa_config(dev_priv, props->metrics_set, + &stream->oa_config, NULL); if (ret) { DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); goto err_config; @@ -2115,6 +2237,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, stream->ops = &i915_oa_stream_ops; + DRM_DEBUG("opening stream oa config uuid=%s\n", stream->oa_config->uuid); + dev_priv->perf.oa.exclusive_stream = stream; mutex_unlock(&dev_priv->drm.struct_mutex); @@ -2129,7 +2253,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, free_oa_buffer(dev_priv); err_oa_buf_alloc: - put_oa_config(dev_priv, stream->oa_config); + put_oa_config(stream->oa_config); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); intel_runtime_pm_put(dev_priv); @@ -2496,9 +2620,21 @@ static int i915_perf_release(struct inode *inode, struct file *file) { struct i915_perf_stream *stream = file->private_data; struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_oa_config *oa_config, *next; mutex_lock(&dev_priv->perf.lock); + i915_perf_destroy_locked(stream); + + /* Dispose of all oa config batch buffers. */ + mutex_lock(&dev_priv->perf.metrics_lock); + list_for_each_entry_safe(oa_config, next, &dev_priv->perf.metrics_buffers, vma_link) { + list_del(&oa_config->vma_link); + i915_vma_put(oa_config->vma); + oa_config->vma = NULL; + } + mutex_unlock(&dev_priv->perf.metrics_lock); + mutex_unlock(&dev_priv->perf.lock); return 0; @@ -3294,7 +3430,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, sysfs_err: mutex_unlock(&dev_priv->perf.metrics_lock); reg_err: - put_oa_config(dev_priv, oa_config); + put_oa_config(oa_config); DRM_DEBUG("Failed to add new OA config\n"); return err; } @@ -3348,7 +3484,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); - put_oa_config(dev_priv, oa_config); + put_oa_config(oa_config); config_err: mutex_unlock(&dev_priv->perf.metrics_lock); @@ -3492,6 +3628,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) init_waitqueue_head(&dev_priv->perf.oa.poll_wq); INIT_LIST_HEAD(&dev_priv->perf.streams); + INIT_LIST_HEAD(&dev_priv->perf.metrics_buffers); + mutex_init(&dev_priv->perf.lock); spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock); @@ -3508,10 +3646,9 @@ void i915_perf_init(struct drm_i915_private *dev_priv) static int destroy_config(int id, void *p, void *data) { - struct drm_i915_private *dev_priv = data; struct i915_oa_config *oa_config = p; - put_oa_config(dev_priv, oa_config); + put_oa_config(oa_config); return 0; } @@ -3525,7 +3662,7 @@ void i915_perf_fini(struct drm_i915_private *dev_priv) if (!dev_priv->perf.initialized) return; - idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv); + idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, NULL); idr_destroy(&dev_priv->perf.metrics_idr); unregister_sysctl_table(dev_priv->perf.sysctl_header); diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h index 105e2a9e874a..9fb9f3a0cb60 100644 --- a/drivers/gpu/drm/i915/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/intel_gpu_commands.h @@ -122,6 +122,7 @@ */ #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) #define MI_LRI_FORCE_POSTED (1<<12) +#define MI_LOAD_REGISTER_IMM_MAX_REGS (126) #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) #define MI_SRM_LRM_GLOBAL_GTT (1<<22)
Here we introduce a mechanism by which the execbuf part of the i915 driver will be able to request that a batch buffer containing the programming for a particular OA config be created. We'll execute these OA configuration buffers right before executing a set of userspace commands so that a particular user batchbuffer be executed with a given OA configuration. This mechanism essentially allows the userspace driver to go through several OA configuration without having to open/close the i915/perf stream. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 22 ++- drivers/gpu/drm/i915/i915_perf.c | 195 ++++++++++++++++++---- drivers/gpu/drm/i915/intel_gpu_commands.h | 1 + 3 files changed, 187 insertions(+), 31 deletions(-)