diff mbox

[RFC] tests/pm_sseu: Add subtest to verify UMD can configure render powerclock state

Message ID 1493737707-5478-1-git-send-email-oscar.mateo@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

oscar.mateo@intel.com May 2, 2017, 3:08 p.m. UTC
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
---
 tests/pm_sseu.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 105 insertions(+)

Comments

Chris Wilson May 3, 2017, 8:59 a.m. UTC | #1
On Tue, May 02, 2017 at 03:08:27PM +0000, Oscar Mateo wrote:
> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> ---
>  tests/pm_sseu.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 105 insertions(+)
> 
> diff --git a/tests/pm_sseu.c b/tests/pm_sseu.c
> index 7d4b33c..1fb36c5 100644
> --- a/tests/pm_sseu.c
> +++ b/tests/pm_sseu.c
> @@ -352,6 +352,108 @@ full_enable(void)
>  	check_full_enable(&stat);
>  }
>  
> +#define GEN8_R_PWR_CLK_STATE	(0x20C8)
> +#define   GEN8_RPCS_ENABLE		(1 << 31)
> +
> +#define MI_STORE_REGISTER_MEM_64_BIT_ADDR	((0x24 << 23) | 2)
> +
> +static uint32_t read_pwrclk_state(drm_intel_bufmgr *bufmgr,
> +				  struct intel_batchbuffer *batch,
> +				  drm_intel_context *context)
> +{
> +	uint32_t rpcs_config;
> +	uint32_t *data;
> +	drm_intel_bo *dst_bo;
> +
> +	dst_bo = drm_intel_bo_alloc(bufmgr, "dst", 4, 4096);
> +
> +	BEGIN_BATCH(3, 1);
> +	OUT_BATCH(MI_STORE_REGISTER_MEM_64_BIT_ADDR);
> +	OUT_BATCH(GEN8_R_PWR_CLK_STATE);
> +	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
> +	ADVANCE_BATCH();
> +
> +	intel_batchbuffer_flush_with_context(batch, context);
> +
> +	drm_intel_bo_map(dst_bo, 1);
> +
> +	data = dst_bo->virtual;
> +	rpcs_config = *data;
> +
> +	drm_intel_bo_unmap(dst_bo);
> +
> +	drm_intel_bo_unreference(dst_bo);
> +
> +	return rpcs_config;
> +}
> +
> +#define LOCAL_MI_LOAD_REGISTER_IMM	(0x22 << 23)
> +
> +#define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
> +#define   PIPE_CONTROL_CS_STALL				(1<<20)
> +#define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12)
> +#define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7)
> +#define   PIPE_CONTROL_DC_FLUSH_ENABLE			(1<<5)
> +#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
> +
> +static void write_pwrclk_state(drm_intel_bufmgr *bufmgr,
> +			       struct intel_batchbuffer *batch,
> +			       drm_intel_context *context,
> +			       uint32_t rpcs_config)
> +{
> +	drm_intel_bo *dst_bo;
> +
> +	dst_bo = drm_intel_bo_alloc(bufmgr, "scratch", 4, 4096);
> +
> +	BEGIN_BATCH(9, 1);
> +	OUT_BATCH(LOCAL_MI_LOAD_REGISTER_IMM | 1);
> +	OUT_BATCH(GEN8_R_PWR_CLK_STATE);
> +	OUT_BATCH(rpcs_config);
> +	OUT_BATCH(GFX_OP_PIPE_CONTROL(6));
> +	OUT_BATCH(PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> +		  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> +		  PIPE_CONTROL_DC_FLUSH_ENABLE |
> +		  PIPE_CONTROL_FLUSH_ENABLE |
> +		  PIPE_CONTROL_CS_STALL);
> +	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
> +	OUT_BATCH(0);
> +	OUT_BATCH(0);
> +	OUT_BATCH(0);
> +	ADVANCE_BATCH();
> +
> +	intel_batchbuffer_flush_with_context(batch, context);
> +
> +	drm_intel_bo_unreference(dst_bo);
> +}
> +
> +/* Makes sure userspace can configure GEN8_R_PWR_CLK_STATE (e.g. is whitelisted) */
> +static void
> +pwrclk_state(void)
> +{
> +	drm_intel_context *context;
> +	uint32_t rpcs_config;
> +	bool rpcs_enabled;
> +
> +	/*
> +	 * Gen8 BDW is the first case in which usermode can configure their
> +	 * own render power gating
> +	*/
> +	igt_require(gem.gen >= 8);

Pop quiz: what result does this give on kernels that do not support RPCS
self-adjustment?

So we need some method to query the modifiable set of registers. Several
options: an lrc version getparam, a query via context getparam for is
this register modifiable, or for userspace to try and fail.

For something like this where the implementation is best effort, trial
and error is not a bad plan. It just means that this test cannot discern
a failure (or we say that all previous kernels were mistaken...)

Question on the whitelist themselves, I presume there is a maximum? How
close are we to running out? (i.e. is this the best use of a precious
whitelist slot?) Can you put that discussion into the changelog +
comment for wa_ring_whitelist_reg.

How well are whitelists supported in gvt? I presume it is transparent.
-Chris
oscar.mateo@intel.com May 3, 2017, 9:43 a.m. UTC | #2
On 05/03/2017 08:59 AM, Chris Wilson wrote:
> On Tue, May 02, 2017 at 03:08:27PM +0000, Oscar Mateo wrote:
>> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
>> ---
>>   tests/pm_sseu.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 105 insertions(+)
>>
>> diff --git a/tests/pm_sseu.c b/tests/pm_sseu.c
>> index 7d4b33c..1fb36c5 100644
>> --- a/tests/pm_sseu.c
>> +++ b/tests/pm_sseu.c
>> @@ -352,6 +352,108 @@ full_enable(void)
>>   	check_full_enable(&stat);
>>   }
>>   
>> +#define GEN8_R_PWR_CLK_STATE	(0x20C8)
>> +#define   GEN8_RPCS_ENABLE		(1 << 31)
>> +
>> +#define MI_STORE_REGISTER_MEM_64_BIT_ADDR	((0x24 << 23) | 2)
>> +
>> +static uint32_t read_pwrclk_state(drm_intel_bufmgr *bufmgr,
>> +				  struct intel_batchbuffer *batch,
>> +				  drm_intel_context *context)
>> +{
>> +	uint32_t rpcs_config;
>> +	uint32_t *data;
>> +	drm_intel_bo *dst_bo;
>> +
>> +	dst_bo = drm_intel_bo_alloc(bufmgr, "dst", 4, 4096);
>> +
>> +	BEGIN_BATCH(3, 1);
>> +	OUT_BATCH(MI_STORE_REGISTER_MEM_64_BIT_ADDR);
>> +	OUT_BATCH(GEN8_R_PWR_CLK_STATE);
>> +	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
>> +	ADVANCE_BATCH();
>> +
>> +	intel_batchbuffer_flush_with_context(batch, context);
>> +
>> +	drm_intel_bo_map(dst_bo, 1);
>> +
>> +	data = dst_bo->virtual;
>> +	rpcs_config = *data;
>> +
>> +	drm_intel_bo_unmap(dst_bo);
>> +
>> +	drm_intel_bo_unreference(dst_bo);
>> +
>> +	return rpcs_config;
>> +}
>> +
>> +#define LOCAL_MI_LOAD_REGISTER_IMM	(0x22 << 23)
>> +
>> +#define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
>> +#define   PIPE_CONTROL_CS_STALL				(1<<20)
>> +#define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12)
>> +#define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7)
>> +#define   PIPE_CONTROL_DC_FLUSH_ENABLE			(1<<5)
>> +#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
>> +
>> +static void write_pwrclk_state(drm_intel_bufmgr *bufmgr,
>> +			       struct intel_batchbuffer *batch,
>> +			       drm_intel_context *context,
>> +			       uint32_t rpcs_config)
>> +{
>> +	drm_intel_bo *dst_bo;
>> +
>> +	dst_bo = drm_intel_bo_alloc(bufmgr, "scratch", 4, 4096);
>> +
>> +	BEGIN_BATCH(9, 1);
>> +	OUT_BATCH(LOCAL_MI_LOAD_REGISTER_IMM | 1);
>> +	OUT_BATCH(GEN8_R_PWR_CLK_STATE);
>> +	OUT_BATCH(rpcs_config);
>> +	OUT_BATCH(GFX_OP_PIPE_CONTROL(6));
>> +	OUT_BATCH(PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
>> +		  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
>> +		  PIPE_CONTROL_DC_FLUSH_ENABLE |
>> +		  PIPE_CONTROL_FLUSH_ENABLE |
>> +		  PIPE_CONTROL_CS_STALL);
>> +	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
>> +	OUT_BATCH(0);
>> +	OUT_BATCH(0);
>> +	OUT_BATCH(0);
>> +	ADVANCE_BATCH();
>> +
>> +	intel_batchbuffer_flush_with_context(batch, context);
>> +
>> +	drm_intel_bo_unreference(dst_bo);
>> +}
>> +
>> +/* Makes sure userspace can configure GEN8_R_PWR_CLK_STATE (e.g. is whitelisted) */
>> +static void
>> +pwrclk_state(void)
>> +{
>> +	drm_intel_context *context;
>> +	uint32_t rpcs_config;
>> +	bool rpcs_enabled;
>> +
>> +	/*
>> +	 * Gen8 BDW is the first case in which usermode can configure their
>> +	 * own render power gating
>> +	*/
>> +	igt_require(gem.gen >= 8);
> Pop quiz: what result does this give on kernels that do not support RPCS
> self-adjustment?
>

The hardware transforms the LRI on the privileged register into a NOOP, 
so we can detect failures to change thr value. On kernels that do not 
support RPCS self-adjustment, we hit the assert:

igt_assert_neq(rpcs_enabled, !!(rpcs_config & GEN8_RPCS_ENABLE));

> So we need some method to query the modifiable set of registers. Several
> options: an lrc version getparam, a query via context getparam for is
> this register modifiable, or for userspace to try and fail.
>
> For something like this where the implementation is best effort, trial
> and error is not a bad plan. It just means that this test cannot discern
> a failure (or we say that all previous kernels were mistaken...)
>
> Question on the whitelist themselves, I presume there is a maximum? How
> close are we to running out? (i.e. is this the best use of a precious
> whitelist slot?) Can you put that discussion into the changelog +
> comment for wa_ring_whitelist_reg.
>

We have a maximum of 12 slots per engine (RING_MAX_NONPRIV_SLOTS). Worst 
path at the moment seems to be BXT A1 where we currently use 5 slots in 
the render engine. Next are SKL and KBL (all steppings) with 4.

> How well are whitelists supported in gvt? I presume it is transparent.
> -Chris
>

I imagine it is? I don't really know.
Chris Wilson May 3, 2017, 4:53 p.m. UTC | #3
On Wed, May 03, 2017 at 09:43:08AM +0000, Oscar Mateo wrote:
> 
> 
> 
> On 05/03/2017 08:59 AM, Chris Wilson wrote:
> >On Tue, May 02, 2017 at 03:08:27PM +0000, Oscar Mateo wrote:
> >>Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> >>Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >>Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> >>---
> >>  tests/pm_sseu.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>  1 file changed, 105 insertions(+)
> >>
> >>diff --git a/tests/pm_sseu.c b/tests/pm_sseu.c
> >>index 7d4b33c..1fb36c5 100644
> >>--- a/tests/pm_sseu.c
> >>+++ b/tests/pm_sseu.c
> >>@@ -352,6 +352,108 @@ full_enable(void)
> >>  	check_full_enable(&stat);
> >>  }
> >>+#define GEN8_R_PWR_CLK_STATE	(0x20C8)
> >>+#define   GEN8_RPCS_ENABLE		(1 << 31)
> >>+
> >>+#define MI_STORE_REGISTER_MEM_64_BIT_ADDR	((0x24 << 23) | 2)
> >>+
> >>+static uint32_t read_pwrclk_state(drm_intel_bufmgr *bufmgr,
> >>+				  struct intel_batchbuffer *batch,
> >>+				  drm_intel_context *context)
> >>+{
> >>+	uint32_t rpcs_config;
> >>+	uint32_t *data;
> >>+	drm_intel_bo *dst_bo;
> >>+
> >>+	dst_bo = drm_intel_bo_alloc(bufmgr, "dst", 4, 4096);
> >>+
> >>+	BEGIN_BATCH(3, 1);
> >>+	OUT_BATCH(MI_STORE_REGISTER_MEM_64_BIT_ADDR);
> >>+	OUT_BATCH(GEN8_R_PWR_CLK_STATE);
> >>+	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
> >>+	ADVANCE_BATCH();
> >>+
> >>+	intel_batchbuffer_flush_with_context(batch, context);
> >>+
> >>+	drm_intel_bo_map(dst_bo, 1);
> >>+
> >>+	data = dst_bo->virtual;
> >>+	rpcs_config = *data;
> >>+
> >>+	drm_intel_bo_unmap(dst_bo);
> >>+
> >>+	drm_intel_bo_unreference(dst_bo);
> >>+
> >>+	return rpcs_config;
> >>+}
> >>+
> >>+#define LOCAL_MI_LOAD_REGISTER_IMM	(0x22 << 23)
> >>+
> >>+#define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
> >>+#define   PIPE_CONTROL_CS_STALL				(1<<20)
> >>+#define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12)
> >>+#define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7)
> >>+#define   PIPE_CONTROL_DC_FLUSH_ENABLE			(1<<5)
> >>+#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
> >>+
> >>+static void write_pwrclk_state(drm_intel_bufmgr *bufmgr,
> >>+			       struct intel_batchbuffer *batch,
> >>+			       drm_intel_context *context,
> >>+			       uint32_t rpcs_config)
> >>+{
> >>+	drm_intel_bo *dst_bo;
> >>+
> >>+	dst_bo = drm_intel_bo_alloc(bufmgr, "scratch", 4, 4096);
> >>+
> >>+	BEGIN_BATCH(9, 1);
> >>+	OUT_BATCH(LOCAL_MI_LOAD_REGISTER_IMM | 1);
> >>+	OUT_BATCH(GEN8_R_PWR_CLK_STATE);
> >>+	OUT_BATCH(rpcs_config);
> >>+	OUT_BATCH(GFX_OP_PIPE_CONTROL(6));
> >>+	OUT_BATCH(PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> >>+		  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> >>+		  PIPE_CONTROL_DC_FLUSH_ENABLE |
> >>+		  PIPE_CONTROL_FLUSH_ENABLE |
> >>+		  PIPE_CONTROL_CS_STALL);
> >>+	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
> >>+	OUT_BATCH(0);
> >>+	OUT_BATCH(0);
> >>+	OUT_BATCH(0);
> >>+	ADVANCE_BATCH();
> >>+
> >>+	intel_batchbuffer_flush_with_context(batch, context);
> >>+
> >>+	drm_intel_bo_unreference(dst_bo);
> >>+}
> >>+
> >>+/* Makes sure userspace can configure GEN8_R_PWR_CLK_STATE (e.g. is whitelisted) */
> >>+static void
> >>+pwrclk_state(void)
> >>+{
> >>+	drm_intel_context *context;
> >>+	uint32_t rpcs_config;
> >>+	bool rpcs_enabled;
> >>+
> >>+	/*
> >>+	 * Gen8 BDW is the first case in which usermode can configure their
> >>+	 * own render power gating
> >>+	*/
> >>+	igt_require(gem.gen >= 8);
> >Pop quiz: what result does this give on kernels that do not support RPCS
> >self-adjustment?
> >
> 
> The hardware transforms the LRI on the privileged register into a
> NOOP, so we can detect failures to change thr value. On kernels that
> do not support RPCS self-adjustment, we hit the assert:
> 
> igt_assert_neq(rpcs_enabled, !!(rpcs_config & GEN8_RPCS_ENABLE));

Exactly. That causes it to FAIL on old kernels, whereas you want to
detect that the kernel doesn't support the feature and report SKIP.
The same question that userspace will ask itself before trying to use a
feature. In this case, a valid response may well be "it doesn't matter
if LRI writes to RCPS fail" but that means we can't differentiate
between FAIL/SKIP here. (Which may be acceptable, just think carefully
and include comments about when a FAIL is valid.)

> >So we need some method to query the modifiable set of registers. Several
> >options: an lrc version getparam, a query via context getparam for is
> >this register modifiable, or for userspace to try and fail.
> >
> >For something like this where the implementation is best effort, trial
> >and error is not a bad plan. It just means that this test cannot discern
> >a failure (or we say that all previous kernels were mistaken...)
> >
> >Question on the whitelist themselves, I presume there is a maximum? How
> >close are we to running out? (i.e. is this the best use of a precious
> >whitelist slot?) Can you put that discussion into the changelog +
> >comment for wa_ring_whitelist_reg.
> >
> 
> We have a maximum of 12 slots per engine (RING_MAX_NONPRIV_SLOTS).
> Worst path at the moment seems to be BXT A1 where we currently use 5
> slots in the render engine. Next are SKL and KBL (all steppings)
> with 4.

And I presume that with more flexible hw with more powergating controls,
those will be automatically user accessible and not require more
precious NONPRIV slots? (Where that power gating can be flexible enough
to grant access to the user, ofc.)
-Chris
oscar.mateo@intel.com May 3, 2017, 5:04 p.m. UTC | #4
On 05/03/2017 04:53 PM, Chris Wilson wrote:
> On Wed, May 03, 2017 at 09:43:08AM +0000, Oscar Mateo wrote:
>>
>>
>> On 05/03/2017 08:59 AM, Chris Wilson wrote:
>>> On Tue, May 02, 2017 at 03:08:27PM +0000, Oscar Mateo wrote:
>>>> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
>>>> ---
>>>>   tests/pm_sseu.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>   1 file changed, 105 insertions(+)
>>>>
>>>> diff --git a/tests/pm_sseu.c b/tests/pm_sseu.c
>>>> index 7d4b33c..1fb36c5 100644
>>>> --- a/tests/pm_sseu.c
>>>> +++ b/tests/pm_sseu.c
>>>> @@ -352,6 +352,108 @@ full_enable(void)
>>>>   	check_full_enable(&stat);
>>>>   }
>>>> +#define GEN8_R_PWR_CLK_STATE	(0x20C8)
>>>> +#define   GEN8_RPCS_ENABLE		(1 << 31)
>>>> +
>>>> +#define MI_STORE_REGISTER_MEM_64_BIT_ADDR	((0x24 << 23) | 2)
>>>> +
>>>> +static uint32_t read_pwrclk_state(drm_intel_bufmgr *bufmgr,
>>>> +				  struct intel_batchbuffer *batch,
>>>> +				  drm_intel_context *context)
>>>> +{
>>>> +	uint32_t rpcs_config;
>>>> +	uint32_t *data;
>>>> +	drm_intel_bo *dst_bo;
>>>> +
>>>> +	dst_bo = drm_intel_bo_alloc(bufmgr, "dst", 4, 4096);
>>>> +
>>>> +	BEGIN_BATCH(3, 1);
>>>> +	OUT_BATCH(MI_STORE_REGISTER_MEM_64_BIT_ADDR);
>>>> +	OUT_BATCH(GEN8_R_PWR_CLK_STATE);
>>>> +	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
>>>> +	ADVANCE_BATCH();
>>>> +
>>>> +	intel_batchbuffer_flush_with_context(batch, context);
>>>> +
>>>> +	drm_intel_bo_map(dst_bo, 1);
>>>> +
>>>> +	data = dst_bo->virtual;
>>>> +	rpcs_config = *data;
>>>> +
>>>> +	drm_intel_bo_unmap(dst_bo);
>>>> +
>>>> +	drm_intel_bo_unreference(dst_bo);
>>>> +
>>>> +	return rpcs_config;
>>>> +}
>>>> +
>>>> +#define LOCAL_MI_LOAD_REGISTER_IMM	(0x22 << 23)
>>>> +
>>>> +#define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
>>>> +#define   PIPE_CONTROL_CS_STALL				(1<<20)
>>>> +#define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12)
>>>> +#define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7)
>>>> +#define   PIPE_CONTROL_DC_FLUSH_ENABLE			(1<<5)
>>>> +#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
>>>> +
>>>> +static void write_pwrclk_state(drm_intel_bufmgr *bufmgr,
>>>> +			       struct intel_batchbuffer *batch,
>>>> +			       drm_intel_context *context,
>>>> +			       uint32_t rpcs_config)
>>>> +{
>>>> +	drm_intel_bo *dst_bo;
>>>> +
>>>> +	dst_bo = drm_intel_bo_alloc(bufmgr, "scratch", 4, 4096);
>>>> +
>>>> +	BEGIN_BATCH(9, 1);
>>>> +	OUT_BATCH(LOCAL_MI_LOAD_REGISTER_IMM | 1);
>>>> +	OUT_BATCH(GEN8_R_PWR_CLK_STATE);
>>>> +	OUT_BATCH(rpcs_config);
>>>> +	OUT_BATCH(GFX_OP_PIPE_CONTROL(6));
>>>> +	OUT_BATCH(PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
>>>> +		  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
>>>> +		  PIPE_CONTROL_DC_FLUSH_ENABLE |
>>>> +		  PIPE_CONTROL_FLUSH_ENABLE |
>>>> +		  PIPE_CONTROL_CS_STALL);
>>>> +	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
>>>> +	OUT_BATCH(0);
>>>> +	OUT_BATCH(0);
>>>> +	OUT_BATCH(0);
>>>> +	ADVANCE_BATCH();
>>>> +
>>>> +	intel_batchbuffer_flush_with_context(batch, context);
>>>> +
>>>> +	drm_intel_bo_unreference(dst_bo);
>>>> +}
>>>> +
>>>> +/* Makes sure userspace can configure GEN8_R_PWR_CLK_STATE (e.g. is whitelisted) */
>>>> +static void
>>>> +pwrclk_state(void)
>>>> +{
>>>> +	drm_intel_context *context;
>>>> +	uint32_t rpcs_config;
>>>> +	bool rpcs_enabled;
>>>> +
>>>> +	/*
>>>> +	 * Gen8 BDW is the first case in which usermode can configure their
>>>> +	 * own render power gating
>>>> +	*/
>>>> +	igt_require(gem.gen >= 8);
>>> Pop quiz: what result does this give on kernels that do not support RPCS
>>> self-adjustment?
>>>
>> The hardware transforms the LRI on the privileged register into a
>> NOOP, so we can detect failures to change thr value. On kernels that
>> do not support RPCS self-adjustment, we hit the assert:
>>
>> igt_assert_neq(rpcs_enabled, !!(rpcs_config & GEN8_RPCS_ENABLE));
> Exactly. That causes it to FAIL on old kernels, whereas you want to
> detect that the kernel doesn't support the feature and report SKIP.
> The same question that userspace will ask itself before trying to use a
> feature. In this case, a valid response may well be "it doesn't matter
> if LRI writes to RCPS fail" but that means we can't differentiate
> between FAIL/SKIP here. (Which may be acceptable, just think carefully
> and include comments about when a FAIL is valid.)

Ah, I see what you mean now. I'll add a comment to that effect.

>>> So we need some method to query the modifiable set of registers. Several
>>> options: an lrc version getparam, a query via context getparam for is
>>> this register modifiable, or for userspace to try and fail.
>>>
>>> For something like this where the implementation is best effort, trial
>>> and error is not a bad plan. It just means that this test cannot discern
>>> a failure (or we say that all previous kernels were mistaken...)
>>>
>>> Question on the whitelist themselves, I presume there is a maximum? How
>>> close are we to running out? (i.e. is this the best use of a precious
>>> whitelist slot?) Can you put that discussion into the changelog +
>>> comment for wa_ring_whitelist_reg.
>>>
>> We have a maximum of 12 slots per engine (RING_MAX_NONPRIV_SLOTS).
>> Worst path at the moment seems to be BXT A1 where we currently use 5
>> slots in the render engine. Next are SKL and KBL (all steppings)
>> with 4.
> And I presume that with more flexible hw with more powergating controls,
> those will be automatically user accessible and not require more
> precious NONPRIV slots? (Where that power gating can be flexible enough
> to grant access to the user, ofc.)
> -Chris
>

One can only hope :)
diff mbox

Patch

diff --git a/tests/pm_sseu.c b/tests/pm_sseu.c
index 7d4b33c..1fb36c5 100644
--- a/tests/pm_sseu.c
+++ b/tests/pm_sseu.c
@@ -352,6 +352,108 @@  full_enable(void)
 	check_full_enable(&stat);
 }
 
+#define GEN8_R_PWR_CLK_STATE	(0x20C8)
+#define   GEN8_RPCS_ENABLE		(1 << 31)
+
+#define MI_STORE_REGISTER_MEM_64_BIT_ADDR	((0x24 << 23) | 2)
+
+static uint32_t read_pwrclk_state(drm_intel_bufmgr *bufmgr,
+				  struct intel_batchbuffer *batch,
+				  drm_intel_context *context)
+{
+	uint32_t rpcs_config;
+	uint32_t *data;
+	drm_intel_bo *dst_bo;
+
+	dst_bo = drm_intel_bo_alloc(bufmgr, "dst", 4, 4096);
+
+	BEGIN_BATCH(3, 1);
+	OUT_BATCH(MI_STORE_REGISTER_MEM_64_BIT_ADDR);
+	OUT_BATCH(GEN8_R_PWR_CLK_STATE);
+	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush_with_context(batch, context);
+
+	drm_intel_bo_map(dst_bo, 1);
+
+	data = dst_bo->virtual;
+	rpcs_config = *data;
+
+	drm_intel_bo_unmap(dst_bo);
+
+	drm_intel_bo_unreference(dst_bo);
+
+	return rpcs_config;
+}
+
+#define LOCAL_MI_LOAD_REGISTER_IMM	(0x22 << 23)
+
+#define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
+#define   PIPE_CONTROL_CS_STALL				(1<<20)
+#define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12)
+#define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7)
+#define   PIPE_CONTROL_DC_FLUSH_ENABLE			(1<<5)
+#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
+
+static void write_pwrclk_state(drm_intel_bufmgr *bufmgr,
+			       struct intel_batchbuffer *batch,
+			       drm_intel_context *context,
+			       uint32_t rpcs_config)
+{
+	drm_intel_bo *dst_bo;
+
+	dst_bo = drm_intel_bo_alloc(bufmgr, "scratch", 4, 4096);
+
+	BEGIN_BATCH(9, 1);
+	OUT_BATCH(LOCAL_MI_LOAD_REGISTER_IMM | 1);
+	OUT_BATCH(GEN8_R_PWR_CLK_STATE);
+	OUT_BATCH(rpcs_config);
+	OUT_BATCH(GFX_OP_PIPE_CONTROL(6));
+	OUT_BATCH(PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+		  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		  PIPE_CONTROL_DC_FLUSH_ENABLE |
+		  PIPE_CONTROL_FLUSH_ENABLE |
+		  PIPE_CONTROL_CS_STALL);
+	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush_with_context(batch, context);
+
+	drm_intel_bo_unreference(dst_bo);
+}
+
+/* Makes sure userspace can configure GEN8_R_PWR_CLK_STATE (e.g. is whitelisted) */
+static void
+pwrclk_state(void)
+{
+	drm_intel_context *context;
+	uint32_t rpcs_config;
+	bool rpcs_enabled;
+
+	/*
+	 * Gen8 BDW is the first case in which usermode can configure their
+	 * own render power gating
+	*/
+	igt_require(gem.gen >= 8);
+
+	context = drm_intel_gem_context_create(gem.bufmgr);
+
+	rpcs_config = read_pwrclk_state(gem.bufmgr, gem.batch, context);
+	rpcs_enabled = rpcs_config & GEN8_RPCS_ENABLE;
+
+	rpcs_config ^= GEN8_RPCS_ENABLE;
+	write_pwrclk_state(gem.bufmgr, gem.batch, context, rpcs_config);
+
+	rpcs_config = read_pwrclk_state(gem.bufmgr, gem.batch, context);
+	igt_assert_neq(rpcs_enabled, !!(rpcs_config & GEN8_RPCS_ENABLE));
+
+	drm_intel_gem_context_destroy(context);
+}
+
 static void
 exit_handler(int sig)
 {
@@ -370,4 +472,7 @@  igt_main
 
 	igt_subtest("full-enable")
 		full_enable();
+
+	igt_subtest("pwrclk-state")
+		pwrclk_state();
 }