diff mbox series

[3/8] drm/i915/pmu: Fix crash due to use-after-free

Message ID 20241011225430.1219345-4-lucas.demarchi@intel.com (mailing list archive)
State New, archived
Headers show
Series Fix i915 pmu on bind/unbind | expand

Commit Message

Lucas De Marchi Oct. 11, 2024, 10:54 p.m. UTC
When an i915 PMU counter is enabled and the driver is then unbound, the
PMU will be unregistered via perf_pmu_unregister(), however the event
will still be alive. i915 currently tries to deal with this situation
by:

	a) Marking the pmu as "closed" and shortcut the calls from perf
	b) Taking a reference from i915, that is put back when the event
	   is destroyed.
	c) Setting event_init to NULL to avoid any further event

(a) is ugly, but may be left as is since it protects not trying to
access the HW that is now gone. Unless a pmu driver can call
perf_pmu_unregister() and not receive any more calls, it's a necessary
ugliness.

(b) doesn't really work: when the event is destroyed and the i915 ref is
put it may free the i915 object, that contains the pmu, not only the
event. After event->destroy() callback, perf still expects the pmu
object to be alive.

Instead of pigging back on the event->destroy() to take and put the
device reference, implement the new get()/put() on the pmu object for
that purpose.

(c) is only done to have a flag to avoid some function entrypoints when
pmu is unregistered.

Cc: stable@vger.kernel.org # 5.11+
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/i915/i915_pmu.c | 36 ++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 14 deletions(-)

Comments

Matt Roper Oct. 19, 2024, 12:05 a.m. UTC | #1
On Fri, Oct 11, 2024 at 03:54:25PM -0700, Lucas De Marchi wrote:
> When an i915 PMU counter is enabled and the driver is then unbound, the
> PMU will be unregistered via perf_pmu_unregister(), however the event
> will still be alive. i915 currently tries to deal with this situation
> by:
> 
> 	a) Marking the pmu as "closed" and shortcut the calls from perf
> 	b) Taking a reference from i915, that is put back when the event
> 	   is destroyed.
> 	c) Setting event_init to NULL to avoid any further event
> 
> (a) is ugly, but may be left as is since it protects not trying to
> access the HW that is now gone. Unless a pmu driver can call
> perf_pmu_unregister() and not receive any more calls, it's a necessary
> ugliness.
> 
> (b) doesn't really work: when the event is destroyed and the i915 ref is
> put it may free the i915 object, that contains the pmu, not only the
> event. After event->destroy() callback, perf still expects the pmu
> object to be alive.
> 
> Instead of pigging back on the event->destroy() to take and put the
> device reference, implement the new get()/put() on the pmu object for
> that purpose.
> 
> (c) is only done to have a flag to avoid some function entrypoints when
> pmu is unregistered.
> 
> Cc: stable@vger.kernel.org # 5.11+
> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>

> ---
>  drivers/gpu/drm/i915/i915_pmu.c | 36 ++++++++++++++++++++-------------
>  1 file changed, 22 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 4d05d98f51b8e..dc9f753369170 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -515,15 +515,6 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
>  	return HRTIMER_RESTART;
>  }
>  
> -static void i915_pmu_event_destroy(struct perf_event *event)
> -{
> -	struct i915_pmu *pmu = event_to_pmu(event);
> -	struct drm_i915_private *i915 = pmu_to_i915(pmu);
> -
> -	drm_WARN_ON(&i915->drm, event->parent);
> -
> -	drm_dev_put(&i915->drm);
> -}
>  
>  static int
>  engine_event_status(struct intel_engine_cs *engine,
> @@ -629,11 +620,6 @@ static int i915_pmu_event_init(struct perf_event *event)
>  	if (ret)
>  		return ret;
>  
> -	if (!event->parent) {
> -		drm_dev_get(&i915->drm);
> -		event->destroy = i915_pmu_event_destroy;
> -	}
> -
>  	return 0;
>  }
>  
> @@ -872,6 +858,24 @@ static int i915_pmu_event_event_idx(struct perf_event *event)
>  	return 0;
>  }
>  
> +static struct pmu *i915_pmu_get(struct pmu *base)
> +{
> +	struct i915_pmu *pmu = container_of(base, struct i915_pmu, base);
> +	struct drm_i915_private *i915 = pmu_to_i915(pmu);
> +
> +	drm_dev_get(&i915->drm);
> +
> +	return base;
> +}
> +
> +static void i915_pmu_put(struct pmu *base)
> +{
> +	struct i915_pmu *pmu = container_of(base, struct i915_pmu, base);
> +	struct drm_i915_private *i915 = pmu_to_i915(pmu);
> +
> +	drm_dev_put(&i915->drm);
> +}
> +
>  struct i915_str_attribute {
>  	struct device_attribute attr;
>  	const char *str;
> @@ -1154,6 +1158,8 @@ static void free_pmu(struct drm_device *dev, void *res)
>  	struct i915_pmu *pmu = res;
>  	struct drm_i915_private *i915 = pmu_to_i915(pmu);
>  
> +	perf_pmu_free(&pmu->base);
> +
>  	free_event_attributes(pmu);
>  	kfree(pmu->base.attr_groups);
>  	if (IS_DGFX(i915))
> @@ -1299,6 +1305,8 @@ void i915_pmu_register(struct drm_i915_private *i915)
>  	pmu->base.stop		= i915_pmu_event_stop;
>  	pmu->base.read		= i915_pmu_event_read;
>  	pmu->base.event_idx	= i915_pmu_event_event_idx;
> +	pmu->base.get		= i915_pmu_get;
> +	pmu->base.put		= i915_pmu_put;
>  
>  	ret = perf_pmu_register(&pmu->base, pmu->name, -1);
>  	if (ret)
> -- 
> 2.47.0
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 4d05d98f51b8e..dc9f753369170 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -515,15 +515,6 @@  static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
 	return HRTIMER_RESTART;
 }
 
-static void i915_pmu_event_destroy(struct perf_event *event)
-{
-	struct i915_pmu *pmu = event_to_pmu(event);
-	struct drm_i915_private *i915 = pmu_to_i915(pmu);
-
-	drm_WARN_ON(&i915->drm, event->parent);
-
-	drm_dev_put(&i915->drm);
-}
 
 static int
 engine_event_status(struct intel_engine_cs *engine,
@@ -629,11 +620,6 @@  static int i915_pmu_event_init(struct perf_event *event)
 	if (ret)
 		return ret;
 
-	if (!event->parent) {
-		drm_dev_get(&i915->drm);
-		event->destroy = i915_pmu_event_destroy;
-	}
-
 	return 0;
 }
 
@@ -872,6 +858,24 @@  static int i915_pmu_event_event_idx(struct perf_event *event)
 	return 0;
 }
 
+static struct pmu *i915_pmu_get(struct pmu *base)
+{
+	struct i915_pmu *pmu = container_of(base, struct i915_pmu, base);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
+
+	drm_dev_get(&i915->drm);
+
+	return base;
+}
+
+static void i915_pmu_put(struct pmu *base)
+{
+	struct i915_pmu *pmu = container_of(base, struct i915_pmu, base);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
+
+	drm_dev_put(&i915->drm);
+}
+
 struct i915_str_attribute {
 	struct device_attribute attr;
 	const char *str;
@@ -1154,6 +1158,8 @@  static void free_pmu(struct drm_device *dev, void *res)
 	struct i915_pmu *pmu = res;
 	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 
+	perf_pmu_free(&pmu->base);
+
 	free_event_attributes(pmu);
 	kfree(pmu->base.attr_groups);
 	if (IS_DGFX(i915))
@@ -1299,6 +1305,8 @@  void i915_pmu_register(struct drm_i915_private *i915)
 	pmu->base.stop		= i915_pmu_event_stop;
 	pmu->base.read		= i915_pmu_event_read;
 	pmu->base.event_idx	= i915_pmu_event_event_idx;
+	pmu->base.get		= i915_pmu_get;
+	pmu->base.put		= i915_pmu_put;
 
 	ret = perf_pmu_register(&pmu->base, pmu->name, -1);
 	if (ret)