@@ -717,7 +717,6 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
struct drm_device *dev = &dev_priv->drm;
i915_gem_driver_register(dev_priv);
- i915_pmu_register(dev_priv);
intel_vgpu_register(dev_priv);
@@ -731,11 +730,12 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
i915_debugfs_register(dev_priv);
i915_setup_sysfs(dev_priv);
+ intel_gt_driver_register(to_gt(dev_priv));
+
/* Depends on sysfs having been initialized */
+ i915_pmu_register(dev_priv);
i915_perf_register(dev_priv);
- intel_gt_driver_register(to_gt(dev_priv));
-
intel_display_driver_register(dev_priv);
intel_power_domains_enable(dev_priv);
@@ -762,11 +762,12 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
intel_display_driver_unregister(dev_priv);
- intel_gt_driver_unregister(to_gt(dev_priv));
-
i915_perf_unregister(dev_priv);
+ /* GT should be available until PMU is gone */
i915_pmu_unregister(dev_priv);
+ intel_gt_driver_unregister(to_gt(dev_priv));
+
i915_teardown_sysfs(dev_priv);
drm_dev_unplug(&dev_priv->drm);
@@ -670,21 +670,27 @@ static void i915_pmu_enable(struct perf_event *event)
if (is_engine_event(event)) {
u8 sample = engine_event_sample(event);
struct intel_engine_cs *engine;
-
- engine = intel_engine_lookup_user(i915,
- engine_event_class(event),
- engine_event_instance(event));
-
- BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
- I915_ENGINE_SAMPLE_COUNT);
- BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
- I915_ENGINE_SAMPLE_COUNT);
- GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
- GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
- GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
-
- engine->pmu.enable |= BIT(sample);
- engine->pmu.enable_count[sample]++;
+ u8 class = engine_event_class(event);
+ u8 instance = engine_event_instance(event);
+
+ engine = intel_engine_lookup_user(i915, class, instance);
+ if (!drm_WARN_ONCE(&i915->drm,
+ !engine,
+ "Invalid engine event: { class:%d, inst:%d }\n",
+ class, instance)) {
+ BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
+ I915_ENGINE_SAMPLE_COUNT);
+ BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
+ I915_ENGINE_SAMPLE_COUNT);
+ GEM_BUG_ON(sample >=
+ ARRAY_SIZE(engine->pmu.enable_count));
+ GEM_BUG_ON(sample >=
+ ARRAY_SIZE(engine->pmu.sample));
+ GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
+
+ engine->pmu.enable |= BIT(sample);
+ engine->pmu.enable_count[sample]++;
+ }
}
spin_unlock_irqrestore(&pmu->lock, flags);
@@ -714,21 +720,25 @@ static void i915_pmu_disable(struct perf_event *event)
if (is_engine_event(event)) {
u8 sample = engine_event_sample(event);
struct intel_engine_cs *engine;
-
- engine = intel_engine_lookup_user(i915,
- engine_event_class(event),
- engine_event_instance(event));
-
- GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
- GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
- GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
-
- /*
- * Decrement the reference count and clear the enabled
- * bitmask when the last listener on an event goes away.
- */
- if (--engine->pmu.enable_count[sample] == 0)
- engine->pmu.enable &= ~BIT(sample);
+ u8 class = engine_event_class(event);
+ u8 instance = engine_event_instance(event);
+
+ engine = intel_engine_lookup_user(i915, class, instance);
+ if (!drm_WARN_ONCE(&i915->drm,
+ !engine,
+ "Invalid engine event: { class:%d, inst:%d }\n",
+ class, instance)) {
+ GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
+ GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
+ GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
+
+ /*
+ * Decrement the reference count and clear the enabled
+ * bitmask when the last listener on an event goes away.
+ */
+ if (--engine->pmu.enable_count[sample] == 0)
+ engine->pmu.enable &= ~BIT(sample);
+ }
}
GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
In the driver teardown, we are unregistering the gt prior to unregistering the PMU. This means there is a small window of time in which the application can request metrics from the PMU, some of which are calling into the uapi engines list, while the engines are not available. In this case we can see null pointer dereferences. Fix this ordering in both the driver load and unload sequences. Additionally add a check for engine presence to prevent this NPD in the event this ordering is accidentally reversed. Print a debug message indicating when they aren't available. v1: Actually address the driver load/unload ordering issue v2: Use drm_WARN_ONCE instead of a debug print Signed-off-by: Stuart Summers <stuart.summers@intel.com> --- drivers/gpu/drm/i915/i915_driver.c | 11 ++--- drivers/gpu/drm/i915/i915_pmu.c | 70 +++++++++++++++++------------- 2 files changed, 46 insertions(+), 35 deletions(-)