diff mbox series

[v2,4/9] drm/i915/perf: Group engines into respective OA groups

Message ID 20230217005850.2511422-5-umesh.nerlige.ramappa@intel.com (mailing list archive)
State New, archived
Headers show
Series Add OAM support for MTL | expand

Commit Message

Umesh Nerlige Ramappa Feb. 17, 2023, 12:58 a.m. UTC
Now that we may have multiple OA units in a single GT as well as on
separate GTs, create an engine group that maps to a single OA unit.

v2: (Jani)
- Drop warning on ENOMEM
- Reorder patch in the series

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |   4 +
 drivers/gpu/drm/i915/gt/intel_sseu.c         |   3 +-
 drivers/gpu/drm/i915/i915_perf.c             | 124 +++++++++++++++++--
 drivers/gpu/drm/i915/i915_perf_types.h       |  51 +++++++-
 4 files changed, 169 insertions(+), 13 deletions(-)

Comments

Dixit, Ashutosh Feb. 22, 2023, 9:52 p.m. UTC | #1
On Thu, 16 Feb 2023 16:58:45 -0800, Umesh Nerlige Ramappa wrote:
>

Hi Umesh,

> Now that we may have multiple OA units in a single GT as well as on
> separate GTs, create an engine group that maps to a single OA unit.
>
> v2: (Jani)
> - Drop warning on ENOMEM
> - Reorder patch in the series
>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_types.h |   4 +
>  drivers/gpu/drm/i915/gt/intel_sseu.c         |   3 +-
>  drivers/gpu/drm/i915/i915_perf.c             | 124 +++++++++++++++++--
>  drivers/gpu/drm/i915/i915_perf_types.h       |  51 +++++++-
>  4 files changed, 169 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 4fd54fb8810f..8a8b0dce241b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -53,6 +53,8 @@ struct intel_gt;
>  struct intel_ring;
>  struct intel_uncore;
>  struct intel_breadcrumbs;
> +struct intel_engine_cs;
> +struct i915_perf_group;
>
>  typedef u32 intel_engine_mask_t;
>  #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
> @@ -603,6 +605,8 @@ struct intel_engine_cs {
>	} props, defaults;
>
>	I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
> +
> +	struct i915_perf_group *oa_group;

I think 'struct i915_oa_unit' is a better name (since it suggests a HW
entity), but since if we change we'll need to change everywhere so leave as
is with a comment to the effect that:

	1 OA unit <-> 1 OA buffer <-> 1 perf group

>  };
>
>  static inline bool
> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
> index 6c6198a257ac..1141f875f5bd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_sseu.c
> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
> @@ -6,6 +6,7 @@
>  #include <linux/string_helpers.h>
>
>  #include "i915_drv.h"
> +#include "i915_perf_types.h"
>  #include "intel_engine_regs.h"
>  #include "intel_gt_regs.h"
>  #include "intel_sseu.h"
> @@ -677,7 +678,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
>	 * If i915/perf is active, we want a stable powergating configuration
>	 * on the system. Use the configuration pinned by i915/perf.
>	 */
> -	if (gt->perf.exclusive_stream)
> +	if (gt->perf.group && gt->perf.group[PERF_GROUP_OAG].exclusive_stream)

I haven't looked into what this function does, hopefully ok to do this only
for OAG?

>		req_sseu = &gt->perf.sseu;
>
>	slices = hweight8(req_sseu->slice_mask);
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 1229f65534e2..37c4cc44d68c 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1584,8 +1584,9 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>  {
>	struct i915_perf *perf = stream->perf;
>	struct intel_gt *gt = stream->engine->gt;
> +	struct i915_perf_group *g = stream->engine->oa_group;
>
> -	if (WARN_ON(stream != gt->perf.exclusive_stream))
> +	if (WARN_ON(stream != g->exclusive_stream))
>		return;
>
>	/*
> @@ -1594,7 +1595,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>	 *
>	 * See i915_oa_init_reg_state() and lrc_configure_all_contexts()
>	 */
> -	WRITE_ONCE(gt->perf.exclusive_stream, NULL);
> +	WRITE_ONCE(g->exclusive_stream, NULL);
>	perf->ops.disable_metric_set(stream);
>
>	free_oa_buffer(stream);
> @@ -3192,6 +3193,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>  {
>	struct drm_i915_private *i915 = stream->perf->i915;
>	struct i915_perf *perf = stream->perf;
> +	struct i915_perf_group *g;
>	struct intel_gt *gt;
>	int ret;
>
> @@ -3202,6 +3204,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>	}
>	gt = props->engine->gt;
>
> +	g = props->engine->oa_group;
> +	if (!g) {
> +		DRM_DEBUG("Perf group invalid\n");
> +		return -EINVAL;
> +	}

This check should be moved to the engine_supports_oa check in
read_properties_unlocked in "drm/i915/perf: Add engine class instance
parameters to perf". It basically duplicates that check I think.

Or rather, engine_supports_oa check should be now be re-written as follows
I think:

static bool engine_supports_oa(const struct intel_engine_cs *engine)
{
	return engine->oa_group;
}

Since there are many more instances of engine_supports_oa calls.

If we do this in read_properties_unlocked we don't need the above check
here.

> +
>	/*
>	 * If the sysfs metrics/ directory wasn't registered for some
>	 * reason then don't let userspace try their luck with config
> @@ -3231,7 +3239,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>	 * counter reports and marshal to the appropriate client
>	 * we currently only allow exclusive access
>	 */
> -	if (gt->perf.exclusive_stream) {
> +	if (g->exclusive_stream) {
>		drm_dbg(&stream->perf->i915->drm,
>			"OA unit already in use\n");
>		return -EBUSY;
> @@ -3326,7 +3334,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>	stream->ops = &i915_oa_stream_ops;
>
>	stream->engine->gt->perf.sseu = props->sseu;
> -	WRITE_ONCE(gt->perf.exclusive_stream, stream);
> +	WRITE_ONCE(g->exclusive_stream, stream);
>
>	ret = i915_perf_stream_enable_sync(stream);
>	if (ret) {
> @@ -3349,7 +3357,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>	return 0;
>
>  err_enable:
> -	WRITE_ONCE(gt->perf.exclusive_stream, NULL);
> +	WRITE_ONCE(g->exclusive_stream, NULL);
>	perf->ops.disable_metric_set(stream);
>
>	free_oa_buffer(stream);
> @@ -3378,12 +3386,13 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
>			    const struct intel_engine_cs *engine)
>  {
>	struct i915_perf_stream *stream;
> +	struct i915_perf_group *g = engine->oa_group;
>
> -	if (!engine_supports_oa(engine))
> +	if (!g)
>		return;
>
>	/* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
> -	stream = READ_ONCE(engine->gt->perf.exclusive_stream);
> +	stream = READ_ONCE(g->exclusive_stream);
>	if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
>		gen8_update_reg_state_unlocked(ce, stream);
>  }
> @@ -4753,6 +4762,95 @@ static struct ctl_table oa_table[] = {
>	{}
>  };
>
> +static u32 __num_perf_groups_per_gt(struct intel_gt *gt)
> +{
> +	enum intel_platform platform = INTEL_INFO(gt->i915)->platform;
> +
> +	switch (platform) {
> +	default:
> +		return 1;
> +	}

I think in this function let us just say 'return 1' since we have not
introduced a value other than 1 in this series, so no need for the switch
statement I think.

> +}
> +
> +static u32 __oa_engine_group(struct intel_engine_cs *engine)
> +{
> +	if (!engine_supports_oa(engine))
> +		return PERF_GROUP_INVALID;
> +
> +	switch (engine->class) {
> +	case RENDER_CLASS:
> +		return PERF_GROUP_OAG;
> +
> +	default:
> +		return PERF_GROUP_INVALID;
> +	}
> +}
> +
> +static void oa_init_groups(struct intel_gt *gt)
> +{
> +	int i, num_groups = gt->perf.num_perf_groups;
> +	struct i915_perf *perf = &gt->i915->perf;
> +
> +	for (i = 0; i < num_groups; i++) {
> +		struct i915_perf_group *g = &gt->perf.group[i];
> +
> +		/* Fused off engines can result in a group with num_engines == 0 */
> +		if (g->num_engines == 0)
> +			continue;
> +
> +		/* Set oa_unit_ids now to ensure ids remain contiguous. */
> +		g->oa_unit_id = perf->oa_unit_ids++;
> +
> +		g->gt = gt;
> +	}
> +}
> +
> +static int oa_init_gt(struct intel_gt *gt)
> +{
> +	u32 num_groups = __num_perf_groups_per_gt(gt);
> +	struct intel_engine_cs *engine;
> +	struct i915_perf_group *g;
> +	intel_engine_mask_t tmp;
> +
> +	g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
> +	if (!g)
> +		return -ENOMEM;
> +
> +	for_each_engine_masked(engine, gt, ALL_ENGINES, tmp) {
> +		u32 index;
> +
> +		index = __oa_engine_group(engine);
> +		if (index < num_groups) {
> +			g[index].engine_mask |= BIT(engine->id);
> +			g[index].num_engines++;
> +			engine->oa_group = &g[index];
> +		} else {
> +			engine->oa_group = NULL;
> +		}

We can avoid the else by initializing engine->oa_group to NULL at the start
of the for_each_engine_masked loop.

> +	}
> +
> +	gt->perf.num_perf_groups = num_groups;
> +	gt->perf.group = g;
> +
> +	oa_init_groups(gt);
> +
> +	return 0;
> +}
> +
> +static int oa_init_engine_groups(struct i915_perf *perf)
> +{
> +	struct intel_gt *gt;
> +	int i, ret;
> +
> +	for_each_gt(gt, perf->i915, i) {
> +		ret = oa_init_gt(gt);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
>  static void oa_init_supported_formats(struct i915_perf *perf)
>  {
>	struct drm_i915_private *i915 = perf->i915;
> @@ -4919,7 +5017,7 @@ void i915_perf_init(struct drm_i915_private *i915)
>
>	if (perf->ops.enable_metric_set) {
>		struct intel_gt *gt;
> -		int i;
> +		int i, ret;
>
>		for_each_gt(gt, i915, i)
>			mutex_init(&gt->perf.lock);
> @@ -4958,6 +5056,11 @@ void i915_perf_init(struct drm_i915_private *i915)
>
>		perf->i915 = i915;
>
> +		ret = oa_init_engine_groups(perf);
> +		if (ret)
> +			drm_err(&i915->drm,
> +				"OA initialization failed %d\n", ret);
> +
>		oa_init_supported_formats(perf);
>	}
>  }
> @@ -4986,10 +5089,15 @@ void i915_perf_sysctl_unregister(void)
>  void i915_perf_fini(struct drm_i915_private *i915)
>  {
>	struct i915_perf *perf = &i915->perf;
> +	struct intel_gt *gt;
> +	int i;
>
>	if (!perf->i915)
>		return;
>
> +	for_each_gt(gt, perf->i915, i)
> +		kfree(gt->perf.group);
> +
>	idr_for_each(&perf->metrics_idr, destroy_config, perf);
>	idr_destroy(&perf->metrics_idr);
>
> diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
> index e36f046fe2b6..ce99551ad0fd 100644
> --- a/drivers/gpu/drm/i915/i915_perf_types.h
> +++ b/drivers/gpu/drm/i915/i915_perf_types.h
> @@ -17,6 +17,7 @@
>  #include <linux/wait.h>
>  #include <uapi/drm/i915_drm.h>
>
> +#include "gt/intel_engine_types.h"
>  #include "gt/intel_sseu.h"
>  #include "i915_reg_defs.h"
>  #include "intel_wakeref.h"
> @@ -30,6 +31,13 @@ struct i915_vma;
>  struct intel_context;
>  struct intel_engine_cs;
>

For below 'assigned but not used comments' I am basing this on this patch
and the last OAM patch 9/9.

> +enum {
> +	PERF_GROUP_OAG = 0,
> +
> +	PERF_GROUP_MAX,
> +	PERF_GROUP_INVALID = U32_MAX,
> +};
> +
>  struct i915_oa_format {
>	u32 format;
>	int size;
> @@ -390,6 +398,35 @@ struct i915_oa_ops {
>	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
>  };
>
> +struct i915_perf_group {
> +	/*
> +	 * @type: Identifier for the OA unit.
> +	 */
> +	u32 oa_unit_id;

Assigned but not used, should be removed and introduced when needed.

> +
> +	/*
> +	 * @gt: gt that this group belongs to
> +	 */
> +	struct intel_gt *gt;

Not used either, suggest removing.

> +
> +	/*
> +	 * @exclusive_stream: The stream currently using the OA unit. This is
> +	 * sometimes accessed outside a syscall associated to its file
> +	 * descriptor.
> +	 */
> +	struct i915_perf_stream *exclusive_stream;
> +
> +	/*
> +	 * @num_engines: The number of engines using this OA buffer.

s/OA buffer/OA unit/

> +	 */
> +	u32 num_engines;
> +
> +	/*
> +	 * @engine_mask: A mask of engines using a single OA buffer.

s/OA buffer/OA unit/

> +	 */
> +	intel_engine_mask_t engine_mask;

Assigned but not used, should be removed and introduced when needed.

> +};
> +
>  struct i915_perf_gt {
>	/*
>	 * Lock associated with anything below within this structure.
> @@ -402,12 +439,15 @@ struct i915_perf_gt {
>	 */
>	struct intel_sseu sseu;
>
> +	/**
> +	 * @num_perf_groups: number of perf groups per gt.
> +	 */
> +	u32 num_perf_groups;

This is 1 in this series so you could argue not needed but I think we know
some future platforms where there might be > 1 so I think we can leave it
in.

> +
>	/*
> -	 * @exclusive_stream: The stream currently using the OA unit. This is
> -	 * sometimes accessed outside a syscall associated to its file
> -	 * descriptor.
> +	 * @group: list of OA groups - one for each OA buffer.
>	 */
> -	struct i915_perf_stream *exclusive_stream;
> +	struct i915_perf_group *group;
>  };
>
>  struct i915_perf {
> @@ -461,6 +501,9 @@ struct i915_perf {
>	unsigned long format_mask[FORMAT_MASK_SIZE];
>
>	atomic64_t noa_programming_delay;
> +
> +	/* oa unit ids */
> +	u32 oa_unit_ids;

Assigned but not used, should be removed, because oa_unit_id is
unused. Also if we need to do this later maybe idr is a better approach?

Also, if we remove the above members as suggested, oa_init_groups will
probably need to move to the last patch 9.

>  };
>
>  #endif /* _I915_PERF_TYPES_H_ */
> --
> 2.36.1
>

Thanks.
--
Ashutosh
Umesh Nerlige Ramappa Feb. 24, 2023, 5:30 p.m. UTC | #2
On Wed, Feb 22, 2023 at 01:52:23PM -0800, Dixit, Ashutosh wrote:
>On Thu, 16 Feb 2023 16:58:45 -0800, Umesh Nerlige Ramappa wrote:
>>
>
>Hi Umesh,
>
>> Now that we may have multiple OA units in a single GT as well as on
>> separate GTs, create an engine group that maps to a single OA unit.
>>
>> v2: (Jani)
>> - Drop warning on ENOMEM
>> - Reorder patch in the series
>>
>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>> ---
>>  drivers/gpu/drm/i915/gt/intel_engine_types.h |   4 +
>>  drivers/gpu/drm/i915/gt/intel_sseu.c         |   3 +-
>>  drivers/gpu/drm/i915/i915_perf.c             | 124 +++++++++++++++++--
>>  drivers/gpu/drm/i915/i915_perf_types.h       |  51 +++++++-
>>  4 files changed, 169 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> index 4fd54fb8810f..8a8b0dce241b 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> @@ -53,6 +53,8 @@ struct intel_gt;
>>  struct intel_ring;
>>  struct intel_uncore;
>>  struct intel_breadcrumbs;
>> +struct intel_engine_cs;
>> +struct i915_perf_group;
>>
>>  typedef u32 intel_engine_mask_t;
>>  #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
>> @@ -603,6 +605,8 @@ struct intel_engine_cs {
>>	} props, defaults;
>>
>>	I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
>> +
>> +	struct i915_perf_group *oa_group;
>
>I think 'struct i915_oa_unit' is a better name (since it suggests a HW
>entity), but since if we change we'll need to change everywhere so leave as
>is with a comment to the effect that:
>
>	1 OA unit <-> 1 OA buffer <-> 1 perf group
>
>>  };
>>
>>  static inline bool
>> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
>> index 6c6198a257ac..1141f875f5bd 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_sseu.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
>> @@ -6,6 +6,7 @@
>>  #include <linux/string_helpers.h>
>>
>>  #include "i915_drv.h"
>> +#include "i915_perf_types.h"
>>  #include "intel_engine_regs.h"
>>  #include "intel_gt_regs.h"
>>  #include "intel_sseu.h"
>> @@ -677,7 +678,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
>>	 * If i915/perf is active, we want a stable powergating configuration
>>	 * on the system. Use the configuration pinned by i915/perf.
>>	 */
>> -	if (gt->perf.exclusive_stream)
>> +	if (gt->perf.group && gt->perf.group[PERF_GROUP_OAG].exclusive_stream)
>
>I haven't looked into what this function does, hopefully ok to do this only
>for OAG?

This function builds the value that should be programmed into
PWR_CLK_STATE register which exists only for render.

Will add remaining comments

Thanks,
Umesh
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 4fd54fb8810f..8a8b0dce241b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -53,6 +53,8 @@  struct intel_gt;
 struct intel_ring;
 struct intel_uncore;
 struct intel_breadcrumbs;
+struct intel_engine_cs;
+struct i915_perf_group;
 
 typedef u32 intel_engine_mask_t;
 #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
@@ -603,6 +605,8 @@  struct intel_engine_cs {
 	} props, defaults;
 
 	I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
+
+	struct i915_perf_group *oa_group;
 };
 
 static inline bool
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 6c6198a257ac..1141f875f5bd 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -6,6 +6,7 @@ 
 #include <linux/string_helpers.h>
 
 #include "i915_drv.h"
+#include "i915_perf_types.h"
 #include "intel_engine_regs.h"
 #include "intel_gt_regs.h"
 #include "intel_sseu.h"
@@ -677,7 +678,7 @@  u32 intel_sseu_make_rpcs(struct intel_gt *gt,
 	 * If i915/perf is active, we want a stable powergating configuration
 	 * on the system. Use the configuration pinned by i915/perf.
 	 */
-	if (gt->perf.exclusive_stream)
+	if (gt->perf.group && gt->perf.group[PERF_GROUP_OAG].exclusive_stream)
 		req_sseu = &gt->perf.sseu;
 
 	slices = hweight8(req_sseu->slice_mask);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 1229f65534e2..37c4cc44d68c 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1584,8 +1584,9 @@  static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 {
 	struct i915_perf *perf = stream->perf;
 	struct intel_gt *gt = stream->engine->gt;
+	struct i915_perf_group *g = stream->engine->oa_group;
 
-	if (WARN_ON(stream != gt->perf.exclusive_stream))
+	if (WARN_ON(stream != g->exclusive_stream))
 		return;
 
 	/*
@@ -1594,7 +1595,7 @@  static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 	 *
 	 * See i915_oa_init_reg_state() and lrc_configure_all_contexts()
 	 */
-	WRITE_ONCE(gt->perf.exclusive_stream, NULL);
+	WRITE_ONCE(g->exclusive_stream, NULL);
 	perf->ops.disable_metric_set(stream);
 
 	free_oa_buffer(stream);
@@ -3192,6 +3193,7 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 {
 	struct drm_i915_private *i915 = stream->perf->i915;
 	struct i915_perf *perf = stream->perf;
+	struct i915_perf_group *g;
 	struct intel_gt *gt;
 	int ret;
 
@@ -3202,6 +3204,12 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	}
 	gt = props->engine->gt;
 
+	g = props->engine->oa_group;
+	if (!g) {
+		DRM_DEBUG("Perf group invalid\n");
+		return -EINVAL;
+	}
+
 	/*
 	 * If the sysfs metrics/ directory wasn't registered for some
 	 * reason then don't let userspace try their luck with config
@@ -3231,7 +3239,7 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	 * counter reports and marshal to the appropriate client
 	 * we currently only allow exclusive access
 	 */
-	if (gt->perf.exclusive_stream) {
+	if (g->exclusive_stream) {
 		drm_dbg(&stream->perf->i915->drm,
 			"OA unit already in use\n");
 		return -EBUSY;
@@ -3326,7 +3334,7 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	stream->ops = &i915_oa_stream_ops;
 
 	stream->engine->gt->perf.sseu = props->sseu;
-	WRITE_ONCE(gt->perf.exclusive_stream, stream);
+	WRITE_ONCE(g->exclusive_stream, stream);
 
 	ret = i915_perf_stream_enable_sync(stream);
 	if (ret) {
@@ -3349,7 +3357,7 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	return 0;
 
 err_enable:
-	WRITE_ONCE(gt->perf.exclusive_stream, NULL);
+	WRITE_ONCE(g->exclusive_stream, NULL);
 	perf->ops.disable_metric_set(stream);
 
 	free_oa_buffer(stream);
@@ -3378,12 +3386,13 @@  void i915_oa_init_reg_state(const struct intel_context *ce,
 			    const struct intel_engine_cs *engine)
 {
 	struct i915_perf_stream *stream;
+	struct i915_perf_group *g = engine->oa_group;
 
-	if (!engine_supports_oa(engine))
+	if (!g)
 		return;
 
 	/* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
-	stream = READ_ONCE(engine->gt->perf.exclusive_stream);
+	stream = READ_ONCE(g->exclusive_stream);
 	if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
 		gen8_update_reg_state_unlocked(ce, stream);
 }
@@ -4753,6 +4762,95 @@  static struct ctl_table oa_table[] = {
 	{}
 };
 
+static u32 __num_perf_groups_per_gt(struct intel_gt *gt)
+{
+	enum intel_platform platform = INTEL_INFO(gt->i915)->platform;
+
+	switch (platform) {
+	default:
+		return 1;
+	}
+}
+
+static u32 __oa_engine_group(struct intel_engine_cs *engine)
+{
+	if (!engine_supports_oa(engine))
+		return PERF_GROUP_INVALID;
+
+	switch (engine->class) {
+	case RENDER_CLASS:
+		return PERF_GROUP_OAG;
+
+	default:
+		return PERF_GROUP_INVALID;
+	}
+}
+
+static void oa_init_groups(struct intel_gt *gt)
+{
+	int i, num_groups = gt->perf.num_perf_groups;
+	struct i915_perf *perf = &gt->i915->perf;
+
+	for (i = 0; i < num_groups; i++) {
+		struct i915_perf_group *g = &gt->perf.group[i];
+
+		/* Fused off engines can result in a group with num_engines == 0 */
+		if (g->num_engines == 0)
+			continue;
+
+		/* Set oa_unit_ids now to ensure ids remain contiguous. */
+		g->oa_unit_id = perf->oa_unit_ids++;
+
+		g->gt = gt;
+	}
+}
+
+static int oa_init_gt(struct intel_gt *gt)
+{
+	u32 num_groups = __num_perf_groups_per_gt(gt);
+	struct intel_engine_cs *engine;
+	struct i915_perf_group *g;
+	intel_engine_mask_t tmp;
+
+	g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
+	if (!g)
+		return -ENOMEM;
+
+	for_each_engine_masked(engine, gt, ALL_ENGINES, tmp) {
+		u32 index;
+
+		index = __oa_engine_group(engine);
+		if (index < num_groups) {
+			g[index].engine_mask |= BIT(engine->id);
+			g[index].num_engines++;
+			engine->oa_group = &g[index];
+		} else {
+			engine->oa_group = NULL;
+		}
+	}
+
+	gt->perf.num_perf_groups = num_groups;
+	gt->perf.group = g;
+
+	oa_init_groups(gt);
+
+	return 0;
+}
+
+static int oa_init_engine_groups(struct i915_perf *perf)
+{
+	struct intel_gt *gt;
+	int i, ret;
+
+	for_each_gt(gt, perf->i915, i) {
+		ret = oa_init_gt(gt);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static void oa_init_supported_formats(struct i915_perf *perf)
 {
 	struct drm_i915_private *i915 = perf->i915;
@@ -4919,7 +5017,7 @@  void i915_perf_init(struct drm_i915_private *i915)
 
 	if (perf->ops.enable_metric_set) {
 		struct intel_gt *gt;
-		int i;
+		int i, ret;
 
 		for_each_gt(gt, i915, i)
 			mutex_init(&gt->perf.lock);
@@ -4958,6 +5056,11 @@  void i915_perf_init(struct drm_i915_private *i915)
 
 		perf->i915 = i915;
 
+		ret = oa_init_engine_groups(perf);
+		if (ret)
+			drm_err(&i915->drm,
+				"OA initialization failed %d\n", ret);
+
 		oa_init_supported_formats(perf);
 	}
 }
@@ -4986,10 +5089,15 @@  void i915_perf_sysctl_unregister(void)
 void i915_perf_fini(struct drm_i915_private *i915)
 {
 	struct i915_perf *perf = &i915->perf;
+	struct intel_gt *gt;
+	int i;
 
 	if (!perf->i915)
 		return;
 
+	for_each_gt(gt, perf->i915, i)
+		kfree(gt->perf.group);
+
 	idr_for_each(&perf->metrics_idr, destroy_config, perf);
 	idr_destroy(&perf->metrics_idr);
 
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index e36f046fe2b6..ce99551ad0fd 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -17,6 +17,7 @@ 
 #include <linux/wait.h>
 #include <uapi/drm/i915_drm.h>
 
+#include "gt/intel_engine_types.h"
 #include "gt/intel_sseu.h"
 #include "i915_reg_defs.h"
 #include "intel_wakeref.h"
@@ -30,6 +31,13 @@  struct i915_vma;
 struct intel_context;
 struct intel_engine_cs;
 
+enum {
+	PERF_GROUP_OAG = 0,
+
+	PERF_GROUP_MAX,
+	PERF_GROUP_INVALID = U32_MAX,
+};
+
 struct i915_oa_format {
 	u32 format;
 	int size;
@@ -390,6 +398,35 @@  struct i915_oa_ops {
 	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
 };
 
+struct i915_perf_group {
+	/*
+	 * @type: Identifier for the OA unit.
+	 */
+	u32 oa_unit_id;
+
+	/*
+	 * @gt: gt that this group belongs to
+	 */
+	struct intel_gt *gt;
+
+	/*
+	 * @exclusive_stream: The stream currently using the OA unit. This is
+	 * sometimes accessed outside a syscall associated to its file
+	 * descriptor.
+	 */
+	struct i915_perf_stream *exclusive_stream;
+
+	/*
+	 * @num_engines: The number of engines using this OA buffer.
+	 */
+	u32 num_engines;
+
+	/*
+	 * @engine_mask: A mask of engines using a single OA buffer.
+	 */
+	intel_engine_mask_t engine_mask;
+};
+
 struct i915_perf_gt {
 	/*
 	 * Lock associated with anything below within this structure.
@@ -402,12 +439,15 @@  struct i915_perf_gt {
 	 */
 	struct intel_sseu sseu;
 
+	/**
+	 * @num_perf_groups: number of perf groups per gt.
+	 */
+	u32 num_perf_groups;
+
 	/*
-	 * @exclusive_stream: The stream currently using the OA unit. This is
-	 * sometimes accessed outside a syscall associated to its file
-	 * descriptor.
+	 * @group: list of OA groups - one for each OA buffer.
 	 */
-	struct i915_perf_stream *exclusive_stream;
+	struct i915_perf_group *group;
 };
 
 struct i915_perf {
@@ -461,6 +501,9 @@  struct i915_perf {
 	unsigned long format_mask[FORMAT_MASK_SIZE];
 
 	atomic64_t noa_programming_delay;
+
+	/* oa unit ids */
+	u32 oa_unit_ids;
 };
 
 #endif /* _I915_PERF_TYPES_H_ */