diff mbox series

[5/9] drm/i915/perf: Group engines into respective OA groups

Message ID 20230215005419.2100887-6-umesh.nerlige.ramappa@intel.com (mailing list archive)
State New, archived
Headers show
Series Add OAM support for MTL | expand

Commit Message

Umesh Nerlige Ramappa Feb. 15, 2023, 12:54 a.m. UTC
Now that we may have multiple OA units in a single GT as well as on
separate GTs, create an engine group that maps to a single OA unit.

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |   4 +
 drivers/gpu/drm/i915/gt/intel_sseu.c         |   3 +-
 drivers/gpu/drm/i915/i915_perf.c             | 126 +++++++++++++++++--
 drivers/gpu/drm/i915/i915_perf_types.h       |  51 +++++++-
 4 files changed, 171 insertions(+), 13 deletions(-)

Comments

Jani Nikula Feb. 16, 2023, 10:51 a.m. UTC | #1
On Tue, 14 Feb 2023, Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> wrote:
> Now that we may have multiple OA units in a single GT as well as on
> separate GTs, create an engine group that maps to a single OA unit.
>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_types.h |   4 +
>  drivers/gpu/drm/i915/gt/intel_sseu.c         |   3 +-
>  drivers/gpu/drm/i915/i915_perf.c             | 126 +++++++++++++++++--
>  drivers/gpu/drm/i915/i915_perf_types.h       |  51 +++++++-
>  4 files changed, 171 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 4fd54fb8810f..8a8b0dce241b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -53,6 +53,8 @@ struct intel_gt;
>  struct intel_ring;
>  struct intel_uncore;
>  struct intel_breadcrumbs;
> +struct intel_engine_cs;
> +struct i915_perf_group;
>  
>  typedef u32 intel_engine_mask_t;
>  #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
> @@ -603,6 +605,8 @@ struct intel_engine_cs {
>  	} props, defaults;
>  
>  	I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
> +
> +	struct i915_perf_group *oa_group;
>  };
>  
>  static inline bool
> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
> index 6c6198a257ac..1141f875f5bd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_sseu.c
> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
> @@ -6,6 +6,7 @@
>  #include <linux/string_helpers.h>
>  
>  #include "i915_drv.h"
> +#include "i915_perf_types.h"
>  #include "intel_engine_regs.h"
>  #include "intel_gt_regs.h"
>  #include "intel_sseu.h"
> @@ -677,7 +678,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
>  	 * If i915/perf is active, we want a stable powergating configuration
>  	 * on the system. Use the configuration pinned by i915/perf.
>  	 */
> -	if (gt->perf.exclusive_stream)
> +	if (gt->perf.group && gt->perf.group[PERF_GROUP_OAG].exclusive_stream)
>  		req_sseu = &gt->perf.sseu;
>  
>  	slices = hweight8(req_sseu->slice_mask);
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index e134523576f8..fda779b2c16f 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1587,8 +1587,9 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>  {
>  	struct i915_perf *perf = stream->perf;
>  	struct intel_gt *gt = stream->engine->gt;
> +	struct i915_perf_group *g = stream->engine->oa_group;
>  
> -	if (WARN_ON(stream != gt->perf.exclusive_stream))
> +	if (WARN_ON(stream != g->exclusive_stream))
>  		return;
>  
>  	/*
> @@ -1597,7 +1598,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>  	 *
>  	 * See i915_oa_init_reg_state() and lrc_configure_all_contexts()
>  	 */
> -	WRITE_ONCE(gt->perf.exclusive_stream, NULL);
> +	WRITE_ONCE(g->exclusive_stream, NULL);
>  	perf->ops.disable_metric_set(stream);
>  
>  	free_oa_buffer(stream);
> @@ -3195,6 +3196,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>  {
>  	struct drm_i915_private *i915 = stream->perf->i915;
>  	struct i915_perf *perf = stream->perf;
> +	struct i915_perf_group *g;
>  	struct intel_gt *gt;
>  	int ret;
>  
> @@ -3205,6 +3207,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>  	}
>  	gt = props->engine->gt;
>  
> +	g = props->engine->oa_group;
> +	if (!g) {
> +		DRM_DEBUG("Perf group invalid\n");
> +		return -EINVAL;
> +	}
> +
>  	/*
>  	 * If the sysfs metrics/ directory wasn't registered for some
>  	 * reason then don't let userspace try their luck with config
> @@ -3234,7 +3242,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>  	 * counter reports and marshal to the appropriate client
>  	 * we currently only allow exclusive access
>  	 */
> -	if (gt->perf.exclusive_stream) {
> +	if (g->exclusive_stream) {
>  		drm_dbg(&stream->perf->i915->drm,
>  			"OA unit already in use\n");
>  		return -EBUSY;
> @@ -3329,7 +3337,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>  	stream->ops = &i915_oa_stream_ops;
>  
>  	stream->engine->gt->perf.sseu = props->sseu;
> -	WRITE_ONCE(gt->perf.exclusive_stream, stream);
> +	WRITE_ONCE(g->exclusive_stream, stream);
>  
>  	ret = i915_perf_stream_enable_sync(stream);
>  	if (ret) {
> @@ -3352,7 +3360,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>  	return 0;
>  
>  err_enable:
> -	WRITE_ONCE(gt->perf.exclusive_stream, NULL);
> +	WRITE_ONCE(g->exclusive_stream, NULL);
>  	perf->ops.disable_metric_set(stream);
>  
>  	free_oa_buffer(stream);
> @@ -3381,12 +3389,13 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
>  			    const struct intel_engine_cs *engine)
>  {
>  	struct i915_perf_stream *stream;
> +	struct i915_perf_group *g = engine->oa_group;
>  
> -	if (!engine_supports_oa(engine))
> +	if (!g)
>  		return;
>  
>  	/* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
> -	stream = READ_ONCE(engine->gt->perf.exclusive_stream);
> +	stream = READ_ONCE(g->exclusive_stream);
>  	if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
>  		gen8_update_reg_state_unlocked(ce, stream);
>  }
> @@ -4755,6 +4764,95 @@ static struct ctl_table oa_table[] = {
>  	{}
>  };
>  
> +static u32 __num_perf_groups_per_gt(struct intel_gt *gt)
> +{
> +	enum intel_platform platform = INTEL_INFO(gt->i915)->platform;
> +
> +	switch (platform) {
> +	default:
> +		return 1;
> +	}
> +}
> +
> +static u32 __oa_engine_group(struct intel_engine_cs *engine)
> +{
> +	if (!engine_supports_oa(engine))
> +		return PERF_GROUP_INVALID;
> +
> +	switch (engine->class) {
> +	case RENDER_CLASS:
> +		return PERF_GROUP_OAG;
> +
> +	default:
> +		return PERF_GROUP_INVALID;
> +	}
> +}
> +
> +static void oa_init_groups(struct intel_gt *gt)
> +{
> +	int i, num_groups = gt->perf.num_perf_groups;
> +	struct i915_perf *perf = &gt->i915->perf;
> +
> +	for (i = 0; i < num_groups; i++) {
> +		struct i915_perf_group *g = &gt->perf.group[i];
> +
> +		/* Fused off engines can result in a group with num_engines == 0 */
> +		if (g->num_engines == 0)
> +			continue;
> +
> +		/* Set oa_unit_ids now to ensure ids remain contiguous. */
> +		g->oa_unit_id = perf->oa_unit_ids++;
> +
> +		g->gt = gt;
> +	}
> +}
> +
> +static int oa_init_gt(struct intel_gt *gt)
> +{
> +	u32 num_groups = __num_perf_groups_per_gt(gt);
> +	struct intel_engine_cs *engine;
> +	struct i915_perf_group *g;
> +	intel_engine_mask_t tmp;
> +
> +	g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
> +	if (drm_WARN_ON(&gt->i915->drm, !g))
> +		return -ENOMEM;

No warnings or messages on -ENOMEM is standard policy.

> +
> +	for_each_engine_masked(engine, gt, ALL_ENGINES, tmp) {
> +		u32 index;
> +
> +		index = __oa_engine_group(engine);
> +		if (index < num_groups) {
> +			g[index].engine_mask |= BIT(engine->id);
> +			g[index].num_engines++;
> +			engine->oa_group = &g[index];
> +		} else {
> +			engine->oa_group = NULL;
> +		}
> +	}
> +
> +	gt->perf.num_perf_groups = num_groups;
> +	gt->perf.group = g;
> +
> +	oa_init_groups(gt);
> +
> +	return 0;
> +}
> +
> +static int oa_init_engine_groups(struct i915_perf *perf)
> +{
> +	struct intel_gt *gt;
> +	int i, ret;
> +
> +	for_each_gt(gt, perf->i915, i) {
> +		ret = oa_init_gt(gt);
> +		if (ret)
> +			return ret;

If this fails in the middle, you'll leave things in half-initialized
state when returning, and expect the caller to clean it up. But that's a
surprising interface design. If i915_perf_init() returns an error, it's
*not* customary to have to call the corresponding cleanup function. On
the contrary, you only call it on success. Any init failures need to be
cleaned up internally before returning.

> +	}
> +
> +	return 0;
> +}
> +
>  static void oa_init_supported_formats(struct i915_perf *perf)
>  {
>  	struct drm_i915_private *i915 = perf->i915;
> @@ -4921,7 +5019,7 @@ int i915_perf_init(struct drm_i915_private *i915)
>  
>  	if (perf->ops.enable_metric_set) {
>  		struct intel_gt *gt;
> -		int i;
> +		int i, ret;
>  
>  		for_each_gt(gt, i915, i)
>  			mutex_init(&gt->perf.lock);
> @@ -4960,6 +5058,13 @@ int i915_perf_init(struct drm_i915_private *i915)
>  
>  		perf->i915 = i915;
>  
> +		ret = oa_init_engine_groups(perf);
> +		if (ret) {
> +			drm_err(&i915->drm,
> +				"OA initialization failed %d\n", ret);
> +			return ret;
> +		}
> +
>  		oa_init_supported_formats(perf);
>  	}
>  
> @@ -4990,10 +5095,15 @@ void i915_perf_sysctl_unregister(void)
>  void i915_perf_fini(struct drm_i915_private *i915)
>  {
>  	struct i915_perf *perf = &i915->perf;
> +	struct intel_gt *gt;
> +	int i;
>  
>  	if (!perf->i915)
>  		return;
>  
> +	for_each_gt(gt, perf->i915, i)
> +		kfree(gt->perf.group);
> +
>  	idr_for_each(&perf->metrics_idr, destroy_config, perf);
>  	idr_destroy(&perf->metrics_idr);
>  
> diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
> index e36f046fe2b6..ce99551ad0fd 100644
> --- a/drivers/gpu/drm/i915/i915_perf_types.h
> +++ b/drivers/gpu/drm/i915/i915_perf_types.h
> @@ -17,6 +17,7 @@
>  #include <linux/wait.h>
>  #include <uapi/drm/i915_drm.h>
>  
> +#include "gt/intel_engine_types.h"
>  #include "gt/intel_sseu.h"
>  #include "i915_reg_defs.h"
>  #include "intel_wakeref.h"
> @@ -30,6 +31,13 @@ struct i915_vma;
>  struct intel_context;
>  struct intel_engine_cs;
>  
> +enum {
> +	PERF_GROUP_OAG = 0,
> +
> +	PERF_GROUP_MAX,
> +	PERF_GROUP_INVALID = U32_MAX,
> +};
> +
>  struct i915_oa_format {
>  	u32 format;
>  	int size;
> @@ -390,6 +398,35 @@ struct i915_oa_ops {
>  	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
>  };
>  
> +struct i915_perf_group {
> +	/*
> +	 * @type: Identifier for the OA unit.
> +	 */
> +	u32 oa_unit_id;
> +
> +	/*
> +	 * @gt: gt that this group belongs to
> +	 */
> +	struct intel_gt *gt;
> +
> +	/*
> +	 * @exclusive_stream: The stream currently using the OA unit. This is
> +	 * sometimes accessed outside a syscall associated to its file
> +	 * descriptor.
> +	 */
> +	struct i915_perf_stream *exclusive_stream;
> +
> +	/*
> +	 * @num_engines: The number of engines using this OA buffer.
> +	 */
> +	u32 num_engines;
> +
> +	/*
> +	 * @engine_mask: A mask of engines using a single OA buffer.
> +	 */
> +	intel_engine_mask_t engine_mask;
> +};
> +
>  struct i915_perf_gt {
>  	/*
>  	 * Lock associated with anything below within this structure.
> @@ -402,12 +439,15 @@ struct i915_perf_gt {
>  	 */
>  	struct intel_sseu sseu;
>  
> +	/**
> +	 * @num_perf_groups: number of perf groups per gt.
> +	 */
> +	u32 num_perf_groups;
> +
>  	/*
> -	 * @exclusive_stream: The stream currently using the OA unit. This is
> -	 * sometimes accessed outside a syscall associated to its file
> -	 * descriptor.
> +	 * @group: list of OA groups - one for each OA buffer.
>  	 */
> -	struct i915_perf_stream *exclusive_stream;
> +	struct i915_perf_group *group;
>  };
>  
>  struct i915_perf {
> @@ -461,6 +501,9 @@ struct i915_perf {
>  	unsigned long format_mask[FORMAT_MASK_SIZE];
>  
>  	atomic64_t noa_programming_delay;
> +
> +	/* oa unit ids */
> +	u32 oa_unit_ids;
>  };
>  
>  #endif /* _I915_PERF_TYPES_H_ */
Dixit, Ashutosh Feb. 16, 2023, 5:55 p.m. UTC | #2
On Thu, 16 Feb 2023 02:51:34 -0800, Jani Nikula wrote:
>
> > +static int oa_init_gt(struct intel_gt *gt)
> > +{
> > +	u32 num_groups = __num_perf_groups_per_gt(gt);
> > +	struct intel_engine_cs *engine;
> > +	struct i915_perf_group *g;
> > +	intel_engine_mask_t tmp;
> > +
> > +	g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
> > +	if (drm_WARN_ON(&gt->i915->drm, !g))
> > +		return -ENOMEM;
>
> No warnings or messages on -ENOMEM is standard policy.

Hmm I think this is the only error for which this code is failing the
probe. So if we are not going to fail the probe, we should at least allow a
WARN_ON? Exception proves the rule?
Jani Nikula Feb. 16, 2023, 6:10 p.m. UTC | #3
On Thu, 16 Feb 2023, "Dixit, Ashutosh" <ashutosh.dixit@intel.com> wrote:
> On Thu, 16 Feb 2023 02:51:34 -0800, Jani Nikula wrote:
>>
>> > +static int oa_init_gt(struct intel_gt *gt)
>> > +{
>> > +	u32 num_groups = __num_perf_groups_per_gt(gt);
>> > +	struct intel_engine_cs *engine;
>> > +	struct i915_perf_group *g;
>> > +	intel_engine_mask_t tmp;
>> > +
>> > +	g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
>> > +	if (drm_WARN_ON(&gt->i915->drm, !g))
>> > +		return -ENOMEM;
>>
>> No warnings or messages on -ENOMEM is standard policy.
>
> Hmm I think this is the only error for which this code is failing the
> probe. So if we are not going to fail the probe, we should at least allow a
> WARN_ON? Exception proves the rule?

A whole lot of other things are going to go bonkers on -ENOMEM, and
getting that warn isn't going to help anyone...

Maybe we do need to fail probe on this after all, but it just seemed
pointless at the time it was introduced a few patches earlier.

BR,
Jani.
Umesh Nerlige Ramappa Feb. 16, 2023, 8:55 p.m. UTC | #4
On Thu, Feb 16, 2023 at 08:10:29PM +0200, Jani Nikula wrote:
>On Thu, 16 Feb 2023, "Dixit, Ashutosh" <ashutosh.dixit@intel.com> wrote:
>> On Thu, 16 Feb 2023 02:51:34 -0800, Jani Nikula wrote:
>>>
>>> > +static int oa_init_gt(struct intel_gt *gt)
>>> > +{
>>> > +	u32 num_groups = __num_perf_groups_per_gt(gt);
>>> > +	struct intel_engine_cs *engine;
>>> > +	struct i915_perf_group *g;
>>> > +	intel_engine_mask_t tmp;
>>> > +
>>> > +	g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
>>> > +	if (drm_WARN_ON(&gt->i915->drm, !g))
>>> > +		return -ENOMEM;
>>>
>>> No warnings or messages on -ENOMEM is standard policy.
>>
>> Hmm I think this is the only error for which this code is failing the
>> probe. So if we are not going to fail the probe, we should at least allow a
>> WARN_ON? Exception proves the rule?
>
>A whole lot of other things are going to go bonkers on -ENOMEM, and
>getting that warn isn't going to help anyone...

Should I just add a debug message here instead of warn_ON?

>
>Maybe we do need to fail probe on this after all, but it just seemed
>pointless at the time it was introduced a few patches earlier.

Sorry about that, I will fix the order of patches.

Umesh
>
>BR,
>Jani.
>
>-- 
>Jani Nikula, Intel Open Source Graphics Center
Umesh Nerlige Ramappa Feb. 16, 2023, 11:44 p.m. UTC | #5
On Thu, Feb 16, 2023 at 12:51:34PM +0200, Jani Nikula wrote:
>On Tue, 14 Feb 2023, Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> wrote:
>> Now that we may have multiple OA units in a single GT as well as on
>> separate GTs, create an engine group that maps to a single OA unit.
>>
>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>> ---
>>  drivers/gpu/drm/i915/gt/intel_engine_types.h |   4 +
>>  drivers/gpu/drm/i915/gt/intel_sseu.c         |   3 +-
>>  drivers/gpu/drm/i915/i915_perf.c             | 126 +++++++++++++++++--
>>  drivers/gpu/drm/i915/i915_perf_types.h       |  51 +++++++-
>>  4 files changed, 171 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> index 4fd54fb8810f..8a8b0dce241b 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> @@ -53,6 +53,8 @@ struct intel_gt;
>>  struct intel_ring;
>>  struct intel_uncore;
>>  struct intel_breadcrumbs;
>> +struct intel_engine_cs;
>> +struct i915_perf_group;
>>
>>  typedef u32 intel_engine_mask_t;
>>  #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
>> @@ -603,6 +605,8 @@ struct intel_engine_cs {
>>  	} props, defaults;
>>
>>  	I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
>> +
>> +	struct i915_perf_group *oa_group;
>>  };
>>
>>  static inline bool
>> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
>> index 6c6198a257ac..1141f875f5bd 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_sseu.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
>> @@ -6,6 +6,7 @@
>>  #include <linux/string_helpers.h>
>>
>>  #include "i915_drv.h"
>> +#include "i915_perf_types.h"
>>  #include "intel_engine_regs.h"
>>  #include "intel_gt_regs.h"
>>  #include "intel_sseu.h"
>> @@ -677,7 +678,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
>>  	 * If i915/perf is active, we want a stable powergating configuration
>>  	 * on the system. Use the configuration pinned by i915/perf.
>>  	 */
>> -	if (gt->perf.exclusive_stream)
>> +	if (gt->perf.group && gt->perf.group[PERF_GROUP_OAG].exclusive_stream)
>>  		req_sseu = &gt->perf.sseu;
>>
>>  	slices = hweight8(req_sseu->slice_mask);
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>> index e134523576f8..fda779b2c16f 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -1587,8 +1587,9 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>>  {
>>  	struct i915_perf *perf = stream->perf;
>>  	struct intel_gt *gt = stream->engine->gt;
>> +	struct i915_perf_group *g = stream->engine->oa_group;
>>
>> -	if (WARN_ON(stream != gt->perf.exclusive_stream))
>> +	if (WARN_ON(stream != g->exclusive_stream))
>>  		return;
>>
>>  	/*
>> @@ -1597,7 +1598,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>>  	 *
>>  	 * See i915_oa_init_reg_state() and lrc_configure_all_contexts()
>>  	 */
>> -	WRITE_ONCE(gt->perf.exclusive_stream, NULL);
>> +	WRITE_ONCE(g->exclusive_stream, NULL);
>>  	perf->ops.disable_metric_set(stream);
>>
>>  	free_oa_buffer(stream);
>> @@ -3195,6 +3196,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>  {
>>  	struct drm_i915_private *i915 = stream->perf->i915;
>>  	struct i915_perf *perf = stream->perf;
>> +	struct i915_perf_group *g;
>>  	struct intel_gt *gt;
>>  	int ret;
>>
>> @@ -3205,6 +3207,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>  	}
>>  	gt = props->engine->gt;
>>
>> +	g = props->engine->oa_group;
>> +	if (!g) {
>> +		DRM_DEBUG("Perf group invalid\n");
>> +		return -EINVAL;
>> +	}
>> +
>>  	/*
>>  	 * If the sysfs metrics/ directory wasn't registered for some
>>  	 * reason then don't let userspace try their luck with config
>> @@ -3234,7 +3242,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>  	 * counter reports and marshal to the appropriate client
>>  	 * we currently only allow exclusive access
>>  	 */
>> -	if (gt->perf.exclusive_stream) {
>> +	if (g->exclusive_stream) {
>>  		drm_dbg(&stream->perf->i915->drm,
>>  			"OA unit already in use\n");
>>  		return -EBUSY;
>> @@ -3329,7 +3337,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>  	stream->ops = &i915_oa_stream_ops;
>>
>>  	stream->engine->gt->perf.sseu = props->sseu;
>> -	WRITE_ONCE(gt->perf.exclusive_stream, stream);
>> +	WRITE_ONCE(g->exclusive_stream, stream);
>>
>>  	ret = i915_perf_stream_enable_sync(stream);
>>  	if (ret) {
>> @@ -3352,7 +3360,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>  	return 0;
>>
>>  err_enable:
>> -	WRITE_ONCE(gt->perf.exclusive_stream, NULL);
>> +	WRITE_ONCE(g->exclusive_stream, NULL);
>>  	perf->ops.disable_metric_set(stream);
>>
>>  	free_oa_buffer(stream);
>> @@ -3381,12 +3389,13 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
>>  			    const struct intel_engine_cs *engine)
>>  {
>>  	struct i915_perf_stream *stream;
>> +	struct i915_perf_group *g = engine->oa_group;
>>
>> -	if (!engine_supports_oa(engine))
>> +	if (!g)
>>  		return;
>>
>>  	/* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
>> -	stream = READ_ONCE(engine->gt->perf.exclusive_stream);
>> +	stream = READ_ONCE(g->exclusive_stream);
>>  	if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
>>  		gen8_update_reg_state_unlocked(ce, stream);
>>  }
>> @@ -4755,6 +4764,95 @@ static struct ctl_table oa_table[] = {
>>  	{}
>>  };
>>
>> +static u32 __num_perf_groups_per_gt(struct intel_gt *gt)
>> +{
>> +	enum intel_platform platform = INTEL_INFO(gt->i915)->platform;
>> +
>> +	switch (platform) {
>> +	default:
>> +		return 1;
>> +	}
>> +}
>> +
>> +static u32 __oa_engine_group(struct intel_engine_cs *engine)
>> +{
>> +	if (!engine_supports_oa(engine))
>> +		return PERF_GROUP_INVALID;
>> +
>> +	switch (engine->class) {
>> +	case RENDER_CLASS:
>> +		return PERF_GROUP_OAG;
>> +
>> +	default:
>> +		return PERF_GROUP_INVALID;
>> +	}
>> +}
>> +
>> +static void oa_init_groups(struct intel_gt *gt)
>> +{
>> +	int i, num_groups = gt->perf.num_perf_groups;
>> +	struct i915_perf *perf = &gt->i915->perf;
>> +
>> +	for (i = 0; i < num_groups; i++) {
>> +		struct i915_perf_group *g = &gt->perf.group[i];
>> +
>> +		/* Fused off engines can result in a group with num_engines == 0 */
>> +		if (g->num_engines == 0)
>> +			continue;
>> +
>> +		/* Set oa_unit_ids now to ensure ids remain contiguous. */
>> +		g->oa_unit_id = perf->oa_unit_ids++;
>> +
>> +		g->gt = gt;
>> +	}
>> +}
>> +
>> +static int oa_init_gt(struct intel_gt *gt)
>> +{
>> +	u32 num_groups = __num_perf_groups_per_gt(gt);
>> +	struct intel_engine_cs *engine;
>> +	struct i915_perf_group *g;
>> +	intel_engine_mask_t tmp;
>> +
>> +	g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
>> +	if (drm_WARN_ON(&gt->i915->drm, !g))
>> +		return -ENOMEM;
>
>No warnings or messages on -ENOMEM is standard policy.

Will remove warn ON.

>
>> +
>> +	for_each_engine_masked(engine, gt, ALL_ENGINES, tmp) {
>> +		u32 index;
>> +
>> +		index = __oa_engine_group(engine);
>> +		if (index < num_groups) {
>> +			g[index].engine_mask |= BIT(engine->id);
>> +			g[index].num_engines++;
>> +			engine->oa_group = &g[index];
>> +		} else {
>> +			engine->oa_group = NULL;
>> +		}
>> +	}
>> +
>> +	gt->perf.num_perf_groups = num_groups;
>> +	gt->perf.group = g;
>> +
>> +	oa_init_groups(gt);
>> +
>> +	return 0;
>> +}
>> +
>> +static int oa_init_engine_groups(struct i915_perf *perf)
>> +{
>> +	struct intel_gt *gt;
>> +	int i, ret;
>> +
>> +	for_each_gt(gt, perf->i915, i) {
>> +		ret = oa_init_gt(gt);
>> +		if (ret)
>> +			return ret;
>
>If this fails in the middle, you'll leave things in half-initialized
>state when returning, and expect the caller to clean it up. But that's a
>surprising interface design. If i915_perf_init() returns an error, it's
>*not* customary to have to call the corresponding cleanup function. On
>the contrary, you only call it on success. Any init failures need to be
>cleaned up internally before returning.

ENOMEM is the only error I am expecting here, so no other cleanup is 
needed if this fails.

Thanks,
Umesh
Umesh Nerlige Ramappa Feb. 16, 2023, 11:58 p.m. UTC | #6
On Thu, Feb 16, 2023 at 12:55:59PM -0800, Umesh Nerlige Ramappa wrote:
>On Thu, Feb 16, 2023 at 08:10:29PM +0200, Jani Nikula wrote:
>>On Thu, 16 Feb 2023, "Dixit, Ashutosh" <ashutosh.dixit@intel.com> wrote:
>>>On Thu, 16 Feb 2023 02:51:34 -0800, Jani Nikula wrote:
>>>>
>>>>> +static int oa_init_gt(struct intel_gt *gt)
>>>>> +{
>>>>> +	u32 num_groups = __num_perf_groups_per_gt(gt);
>>>>> +	struct intel_engine_cs *engine;
>>>>> +	struct i915_perf_group *g;
>>>>> +	intel_engine_mask_t tmp;
>>>>> +
>>>>> +	g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
>>>>> +	if (drm_WARN_ON(&gt->i915->drm, !g))
>>>>> +		return -ENOMEM;
>>>>
>>>>No warnings or messages on -ENOMEM is standard policy.
>>>
>>>Hmm I think this is the only error for which this code is failing the
>>>probe. So if we are not going to fail the probe, we should at least allow a
>>>WARN_ON? Exception proves the rule?
>>
>>A whole lot of other things are going to go bonkers on -ENOMEM, and
>>getting that warn isn't going to help anyone...
>
>Should I just add a debug message here instead of warn_ON?

nvm, you already mentioned no warn/message on ENOMEM.

Regards,
Umesh
>
>>
>>Maybe we do need to fail probe on this after all, but it just seemed
>>pointless at the time it was introduced a few patches earlier.
>
>Sorry about that, I will fix the order of patches.
>
>Umesh
>>
>>BR,
>>Jani.
>>
>>-- 
>>Jani Nikula, Intel Open Source Graphics Center
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 4fd54fb8810f..8a8b0dce241b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -53,6 +53,8 @@  struct intel_gt;
 struct intel_ring;
 struct intel_uncore;
 struct intel_breadcrumbs;
+struct intel_engine_cs;
+struct i915_perf_group;
 
 typedef u32 intel_engine_mask_t;
 #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
@@ -603,6 +605,8 @@  struct intel_engine_cs {
 	} props, defaults;
 
 	I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
+
+	struct i915_perf_group *oa_group;
 };
 
 static inline bool
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 6c6198a257ac..1141f875f5bd 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -6,6 +6,7 @@ 
 #include <linux/string_helpers.h>
 
 #include "i915_drv.h"
+#include "i915_perf_types.h"
 #include "intel_engine_regs.h"
 #include "intel_gt_regs.h"
 #include "intel_sseu.h"
@@ -677,7 +678,7 @@  u32 intel_sseu_make_rpcs(struct intel_gt *gt,
 	 * If i915/perf is active, we want a stable powergating configuration
 	 * on the system. Use the configuration pinned by i915/perf.
 	 */
-	if (gt->perf.exclusive_stream)
+	if (gt->perf.group && gt->perf.group[PERF_GROUP_OAG].exclusive_stream)
 		req_sseu = &gt->perf.sseu;
 
 	slices = hweight8(req_sseu->slice_mask);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index e134523576f8..fda779b2c16f 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1587,8 +1587,9 @@  static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 {
 	struct i915_perf *perf = stream->perf;
 	struct intel_gt *gt = stream->engine->gt;
+	struct i915_perf_group *g = stream->engine->oa_group;
 
-	if (WARN_ON(stream != gt->perf.exclusive_stream))
+	if (WARN_ON(stream != g->exclusive_stream))
 		return;
 
 	/*
@@ -1597,7 +1598,7 @@  static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 	 *
 	 * See i915_oa_init_reg_state() and lrc_configure_all_contexts()
 	 */
-	WRITE_ONCE(gt->perf.exclusive_stream, NULL);
+	WRITE_ONCE(g->exclusive_stream, NULL);
 	perf->ops.disable_metric_set(stream);
 
 	free_oa_buffer(stream);
@@ -3195,6 +3196,7 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 {
 	struct drm_i915_private *i915 = stream->perf->i915;
 	struct i915_perf *perf = stream->perf;
+	struct i915_perf_group *g;
 	struct intel_gt *gt;
 	int ret;
 
@@ -3205,6 +3207,12 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	}
 	gt = props->engine->gt;
 
+	g = props->engine->oa_group;
+	if (!g) {
+		DRM_DEBUG("Perf group invalid\n");
+		return -EINVAL;
+	}
+
 	/*
 	 * If the sysfs metrics/ directory wasn't registered for some
 	 * reason then don't let userspace try their luck with config
@@ -3234,7 +3242,7 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	 * counter reports and marshal to the appropriate client
 	 * we currently only allow exclusive access
 	 */
-	if (gt->perf.exclusive_stream) {
+	if (g->exclusive_stream) {
 		drm_dbg(&stream->perf->i915->drm,
 			"OA unit already in use\n");
 		return -EBUSY;
@@ -3329,7 +3337,7 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	stream->ops = &i915_oa_stream_ops;
 
 	stream->engine->gt->perf.sseu = props->sseu;
-	WRITE_ONCE(gt->perf.exclusive_stream, stream);
+	WRITE_ONCE(g->exclusive_stream, stream);
 
 	ret = i915_perf_stream_enable_sync(stream);
 	if (ret) {
@@ -3352,7 +3360,7 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	return 0;
 
 err_enable:
-	WRITE_ONCE(gt->perf.exclusive_stream, NULL);
+	WRITE_ONCE(g->exclusive_stream, NULL);
 	perf->ops.disable_metric_set(stream);
 
 	free_oa_buffer(stream);
@@ -3381,12 +3389,13 @@  void i915_oa_init_reg_state(const struct intel_context *ce,
 			    const struct intel_engine_cs *engine)
 {
 	struct i915_perf_stream *stream;
+	struct i915_perf_group *g = engine->oa_group;
 
-	if (!engine_supports_oa(engine))
+	if (!g)
 		return;
 
 	/* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
-	stream = READ_ONCE(engine->gt->perf.exclusive_stream);
+	stream = READ_ONCE(g->exclusive_stream);
 	if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
 		gen8_update_reg_state_unlocked(ce, stream);
 }
@@ -4755,6 +4764,95 @@  static struct ctl_table oa_table[] = {
 	{}
 };
 
+static u32 __num_perf_groups_per_gt(struct intel_gt *gt)
+{
+	enum intel_platform platform = INTEL_INFO(gt->i915)->platform;
+
+	switch (platform) {
+	default:
+		return 1;
+	}
+}
+
+static u32 __oa_engine_group(struct intel_engine_cs *engine)
+{
+	if (!engine_supports_oa(engine))
+		return PERF_GROUP_INVALID;
+
+	switch (engine->class) {
+	case RENDER_CLASS:
+		return PERF_GROUP_OAG;
+
+	default:
+		return PERF_GROUP_INVALID;
+	}
+}
+
+static void oa_init_groups(struct intel_gt *gt)
+{
+	int i, num_groups = gt->perf.num_perf_groups;
+	struct i915_perf *perf = &gt->i915->perf;
+
+	for (i = 0; i < num_groups; i++) {
+		struct i915_perf_group *g = &gt->perf.group[i];
+
+		/* Fused off engines can result in a group with num_engines == 0 */
+		if (g->num_engines == 0)
+			continue;
+
+		/* Set oa_unit_ids now to ensure ids remain contiguous. */
+		g->oa_unit_id = perf->oa_unit_ids++;
+
+		g->gt = gt;
+	}
+}
+
+static int oa_init_gt(struct intel_gt *gt)
+{
+	u32 num_groups = __num_perf_groups_per_gt(gt);
+	struct intel_engine_cs *engine;
+	struct i915_perf_group *g;
+	intel_engine_mask_t tmp;
+
+	g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
+	if (drm_WARN_ON(&gt->i915->drm, !g))
+		return -ENOMEM;
+
+	for_each_engine_masked(engine, gt, ALL_ENGINES, tmp) {
+		u32 index;
+
+		index = __oa_engine_group(engine);
+		if (index < num_groups) {
+			g[index].engine_mask |= BIT(engine->id);
+			g[index].num_engines++;
+			engine->oa_group = &g[index];
+		} else {
+			engine->oa_group = NULL;
+		}
+	}
+
+	gt->perf.num_perf_groups = num_groups;
+	gt->perf.group = g;
+
+	oa_init_groups(gt);
+
+	return 0;
+}
+
+static int oa_init_engine_groups(struct i915_perf *perf)
+{
+	struct intel_gt *gt;
+	int i, ret;
+
+	for_each_gt(gt, perf->i915, i) {
+		ret = oa_init_gt(gt);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static void oa_init_supported_formats(struct i915_perf *perf)
 {
 	struct drm_i915_private *i915 = perf->i915;
@@ -4921,7 +5019,7 @@  int i915_perf_init(struct drm_i915_private *i915)
 
 	if (perf->ops.enable_metric_set) {
 		struct intel_gt *gt;
-		int i;
+		int i, ret;
 
 		for_each_gt(gt, i915, i)
 			mutex_init(&gt->perf.lock);
@@ -4960,6 +5058,13 @@  int i915_perf_init(struct drm_i915_private *i915)
 
 		perf->i915 = i915;
 
+		ret = oa_init_engine_groups(perf);
+		if (ret) {
+			drm_err(&i915->drm,
+				"OA initialization failed %d\n", ret);
+			return ret;
+		}
+
 		oa_init_supported_formats(perf);
 	}
 
@@ -4990,10 +5095,15 @@  void i915_perf_sysctl_unregister(void)
 void i915_perf_fini(struct drm_i915_private *i915)
 {
 	struct i915_perf *perf = &i915->perf;
+	struct intel_gt *gt;
+	int i;
 
 	if (!perf->i915)
 		return;
 
+	for_each_gt(gt, perf->i915, i)
+		kfree(gt->perf.group);
+
 	idr_for_each(&perf->metrics_idr, destroy_config, perf);
 	idr_destroy(&perf->metrics_idr);
 
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index e36f046fe2b6..ce99551ad0fd 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -17,6 +17,7 @@ 
 #include <linux/wait.h>
 #include <uapi/drm/i915_drm.h>
 
+#include "gt/intel_engine_types.h"
 #include "gt/intel_sseu.h"
 #include "i915_reg_defs.h"
 #include "intel_wakeref.h"
@@ -30,6 +31,13 @@  struct i915_vma;
 struct intel_context;
 struct intel_engine_cs;
 
+enum {
+	PERF_GROUP_OAG = 0,
+
+	PERF_GROUP_MAX,
+	PERF_GROUP_INVALID = U32_MAX,
+};
+
 struct i915_oa_format {
 	u32 format;
 	int size;
@@ -390,6 +398,35 @@  struct i915_oa_ops {
 	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
 };
 
+struct i915_perf_group {
+	/*
+	 * @type: Identifier for the OA unit.
+	 */
+	u32 oa_unit_id;
+
+	/*
+	 * @gt: gt that this group belongs to
+	 */
+	struct intel_gt *gt;
+
+	/*
+	 * @exclusive_stream: The stream currently using the OA unit. This is
+	 * sometimes accessed outside a syscall associated to its file
+	 * descriptor.
+	 */
+	struct i915_perf_stream *exclusive_stream;
+
+	/*
+	 * @num_engines: The number of engines using this OA buffer.
+	 */
+	u32 num_engines;
+
+	/*
+	 * @engine_mask: A mask of engines using a single OA buffer.
+	 */
+	intel_engine_mask_t engine_mask;
+};
+
 struct i915_perf_gt {
 	/*
 	 * Lock associated with anything below within this structure.
@@ -402,12 +439,15 @@  struct i915_perf_gt {
 	 */
 	struct intel_sseu sseu;
 
+	/**
+	 * @num_perf_groups: number of perf groups per gt.
+	 */
+	u32 num_perf_groups;
+
 	/*
-	 * @exclusive_stream: The stream currently using the OA unit. This is
-	 * sometimes accessed outside a syscall associated to its file
-	 * descriptor.
+	 * @group: list of OA groups - one for each OA buffer.
 	 */
-	struct i915_perf_stream *exclusive_stream;
+	struct i915_perf_group *group;
 };
 
 struct i915_perf {
@@ -461,6 +501,9 @@  struct i915_perf {
 	unsigned long format_mask[FORMAT_MASK_SIZE];
 
 	atomic64_t noa_programming_delay;
+
+	/* oa unit ids */
+	u32 oa_unit_ids;
 };
 
 #endif /* _I915_PERF_TYPES_H_ */