diff mbox series

drm/i915/selftests: keep same cache settings as timeline

Message ID 20230315180800.2632766-1-fei.yang@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/selftests: keep same cache settings as timeline | expand

Commit Message

Yang, Fei March 15, 2023, 6:08 p.m. UTC
From: Fei Yang <fei.yang@intel.com>

On MTL, objects allocated through i915_gem_object_create_internal() are
mapped as uncached in GPU by default because HAS_LLC is false. However
in the live_hwsp_read selftest these watcher objects are mapped as WB
on CPU side. The conseqence is that the updates done by the GPU are not
immediately visible to CPU, thus the selftest is randomly failing due to
the stale data in CPU cache. Solution can be either setting WC for CPU +
UC for GPU, or WB for CPU + 1-way coherent WB for GPU.
To keep the consistency, let's simply inherit the same cache settings
from the timeline, which is WB for CPU + 1-way coherent WB for GPU,
because this test is supposed to emulate the behavior of the timeline
anyway.

Signed-off-by: Fei Yang <fei.yang@intel.com>
---
 drivers/gpu/drm/i915/gt/selftest_timeline.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

Comments

Matt Roper March 17, 2023, 12:21 a.m. UTC | #1
On Wed, Mar 15, 2023 at 11:08:00AM -0700, fei.yang@intel.com wrote:
> From: Fei Yang <fei.yang@intel.com>
> 
> On MTL, objects allocated through i915_gem_object_create_internal() are
> mapped as uncached in GPU by default because HAS_LLC is false. However
> in the live_hwsp_read selftest these watcher objects are mapped as WB
> on CPU side. The conseqence is that the updates done by the GPU are not
> immediately visible to CPU, thus the selftest is randomly failing due to
> the stale data in CPU cache. Solution can be either setting WC for CPU +
> UC for GPU, or WB for CPU + 1-way coherent WB for GPU.
> To keep the consistency, let's simply inherit the same cache settings
> from the timeline, which is WB for CPU + 1-way coherent WB for GPU,
> because this test is supposed to emulate the behavior of the timeline
> anyway.
> 
> Signed-off-by: Fei Yang <fei.yang@intel.com>

It looks like there might be an indentation mistake on the second line
of the i915_gem_object_pin_map_unlocked() call, but we can fix that up
while applying; no need to re-send.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>

Is there an FDO issue # for the random failures thar were being seen?
If so, we should add a Closes: or References: tag here.


Matt

> ---
>  drivers/gpu/drm/i915/gt/selftest_timeline.c | 14 +++++++++++---
>  1 file changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> index 522d0190509c..631aaed9bc3d 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> @@ -825,7 +825,8 @@ static bool cmp_gte(u32 a, u32 b)
>  	return a >= b;
>  }
>  
> -static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt)
> +static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt,
> +			 struct intel_timeline *tl)
>  {
>  	struct drm_i915_gem_object *obj;
>  	struct i915_vma *vma;
> @@ -834,7 +835,10 @@ static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt)
>  	if (IS_ERR(obj))
>  		return PTR_ERR(obj);
>  
> -	w->map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
> +	/* keep the same cache settings as timeline */
> +	i915_gem_object_set_cache_coherency(obj, tl->hwsp_ggtt->obj->cache_level);
> +	w->map = i915_gem_object_pin_map_unlocked(obj,
> +			page_unmask_bits(tl->hwsp_ggtt->obj->mm.mapping));
>  	if (IS_ERR(w->map)) {
>  		i915_gem_object_put(obj);
>  		return PTR_ERR(w->map);
> @@ -1004,8 +1008,10 @@ static int live_hwsp_read(void *arg)
>  	if (!tl->has_initial_breadcrumb)
>  		goto out_free;
>  
> +	selftest_tl_pin(tl);
> +
>  	for (i = 0; i < ARRAY_SIZE(watcher); i++) {
> -		err = setup_watcher(&watcher[i], gt);
> +		err = setup_watcher(&watcher[i], gt, tl);
>  		if (err)
>  			goto out;
>  	}
> @@ -1160,6 +1166,8 @@ static int live_hwsp_read(void *arg)
>  	for (i = 0; i < ARRAY_SIZE(watcher); i++)
>  		cleanup_watcher(&watcher[i]);
>  
> +	intel_timeline_unpin(tl);
> +
>  	if (igt_flush_test(gt->i915))
>  		err = -EIO;
>  
> -- 
> 2.25.1
>
Yang, Fei March 17, 2023, 3:43 a.m. UTC | #2
>> From: Fei Yang <fei.yang@intel.com>
>>
>> On MTL, objects allocated through i915_gem_object_create_internal() are
>> mapped as uncached in GPU by default because HAS_LLC is false. However
>> in the live_hwsp_read selftest these watcher objects are mapped as WB
>> on CPU side. The conseqence is that the updates done by the GPU are not
>> immediately visible to CPU, thus the selftest is randomly failing due to
>> the stale data in CPU cache. Solution can be either setting WC for CPU +
>> UC for GPU, or WB for CPU + 1-way coherent WB for GPU.
>> To keep the consistency, let's simply inherit the same cache settings
>> from the timeline, which is WB for CPU + 1-way coherent WB for GPU,
>> because this test is supposed to emulate the behavior of the timeline
>> anyway.
>>
>> Signed-off-by: Fei Yang <fei.yang@intel.com>
>
> It looks like there might be an indentation mistake on the second line
> of the i915_gem_object_pin_map_unlocked() call, but we can fix that up
> while applying; no need to re-send.
>
> Reviewed-by: Matt Roper <matthew.d.roper@intel.com>

Thanks for reviewing.

> Is there an FDO issue # for the random failures thar were being seen?
> If so, we should add a Closes: or References: tag here.

I'm not aware of a FDO filed for this failure. That might be because the
issue is reproduced on MTL which might not be widely available to the
community yet.

> Matt
>> ---
>>  drivers/gpu/drm/i915/gt/selftest_timeline.c | 14 +++++++++++---
>>  1 file changed, 11 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
>> index 522d0190509c..631aaed9bc3d 100644
>> --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
>> +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
>> @@ -825,7 +825,8 @@ static bool cmp_gte(u32 a, u32 b)
>>        return a >= b;
>>  }
>>
>> -static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt)
>> +static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt,
>> +                      struct intel_timeline *tl)
>>  {
>>        struct drm_i915_gem_object *obj;
>>        struct i915_vma *vma;
>> @@ -834,7 +835,10 @@ static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt)
>>        if (IS_ERR(obj))
>>                return PTR_ERR(obj);
>>
>> -     w->map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
>> +     /* keep the same cache settings as timeline */
>> +     i915_gem_object_set_cache_coherency(obj, tl->hwsp_ggtt->obj->cache_level);
>> +     w->map = i915_gem_object_pin_map_unlocked(obj,
>> +                     page_unmask_bits(tl->hwsp_ggtt->obj->mm.mapping));
>>        if (IS_ERR(w->map)) {
>>                i915_gem_object_put(obj);
>>                return PTR_ERR(w->map);
>> @@ -1004,8 +1008,10 @@ static int live_hwsp_read(void *arg)
>>        if (!tl->has_initial_breadcrumb)
>>                goto out_free;
>>
>> +     selftest_tl_pin(tl);
>> +
>>        for (i = 0; i < ARRAY_SIZE(watcher); i++) {
>> -             err = setup_watcher(&watcher[i], gt);
>> +             err = setup_watcher(&watcher[i], gt, tl);
>>                if (err)
>>                        goto out;
>>        }
>> @@ -1160,6 +1166,8 @@ static int live_hwsp_read(void *arg)
>>        for (i = 0; i < ARRAY_SIZE(watcher); i++)
>>                cleanup_watcher(&watcher[i]);
>>
>> +     intel_timeline_unpin(tl);
>> +
>>        if (igt_flush_test(gt->i915))
>>                err = -EIO;
>>
>> --
>> 2.25.1
Matt Roper March 17, 2023, 4:38 p.m. UTC | #3
On Thu, Mar 16, 2023 at 08:43:46PM -0700, Yang, Fei wrote:
> >> From: Fei Yang <fei.yang@intel.com>
> >>
> >> On MTL, objects allocated through i915_gem_object_create_internal() are
> >> mapped as uncached in GPU by default because HAS_LLC is false. However
> >> in the live_hwsp_read selftest these watcher objects are mapped as WB
> >> on CPU side. The conseqence is that the updates done by the GPU are not
> >> immediately visible to CPU, thus the selftest is randomly failing due to
> >> the stale data in CPU cache. Solution can be either setting WC for CPU +
> >> UC for GPU, or WB for CPU + 1-way coherent WB for GPU.
> >> To keep the consistency, let's simply inherit the same cache settings
> >> from the timeline, which is WB for CPU + 1-way coherent WB for GPU,
> >> because this test is supposed to emulate the behavior of the timeline
> >> anyway.
> >>
> >> Signed-off-by: Fei Yang <fei.yang@intel.com>
> >
> > It looks like there might be an indentation mistake on the second line
> > of the i915_gem_object_pin_map_unlocked() call, but we can fix that up
> > while applying; no need to re-send.
> >
> > Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
> 
> Thanks for reviewing.
> 
> > Is there an FDO issue # for the random failures thar were being seen?
> > If so, we should add a Closes: or References: tag here.
> 
> I'm not aware of a FDO filed for this failure. That might be because the
> issue is reproduced on MTL which might not be widely available to the
> community yet.

Yeah, I was thinking CI would have filed some, but I just remembered we
don't have public CI setup yet for MTL, so no automated bugs are coming
in yet.

Applied to drm-intel-gt-next.  Thanks for the patch.


Matt

> 
> > Matt
> >> ---
> >>  drivers/gpu/drm/i915/gt/selftest_timeline.c | 14 +++++++++++---
> >>  1 file changed, 11 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> >> index 522d0190509c..631aaed9bc3d 100644
> >> --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
> >> +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> >> @@ -825,7 +825,8 @@ static bool cmp_gte(u32 a, u32 b)
> >>        return a >= b;
> >>  }
> >>
> >> -static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt)
> >> +static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt,
> >> +                      struct intel_timeline *tl)
> >>  {
> >>        struct drm_i915_gem_object *obj;
> >>        struct i915_vma *vma;
> >> @@ -834,7 +835,10 @@ static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt)
> >>        if (IS_ERR(obj))
> >>                return PTR_ERR(obj);
> >>
> >> -     w->map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
> >> +     /* keep the same cache settings as timeline */
> >> +     i915_gem_object_set_cache_coherency(obj, tl->hwsp_ggtt->obj->cache_level);
> >> +     w->map = i915_gem_object_pin_map_unlocked(obj,
> >> +                     page_unmask_bits(tl->hwsp_ggtt->obj->mm.mapping));
> >>        if (IS_ERR(w->map)) {
> >>                i915_gem_object_put(obj);
> >>                return PTR_ERR(w->map);
> >> @@ -1004,8 +1008,10 @@ static int live_hwsp_read(void *arg)
> >>        if (!tl->has_initial_breadcrumb)
> >>                goto out_free;
> >>
> >> +     selftest_tl_pin(tl);
> >> +
> >>        for (i = 0; i < ARRAY_SIZE(watcher); i++) {
> >> -             err = setup_watcher(&watcher[i], gt);
> >> +             err = setup_watcher(&watcher[i], gt, tl);
> >>                if (err)
> >>                        goto out;
> >>        }
> >> @@ -1160,6 +1166,8 @@ static int live_hwsp_read(void *arg)
> >>        for (i = 0; i < ARRAY_SIZE(watcher); i++)
> >>                cleanup_watcher(&watcher[i]);
> >>
> >> +     intel_timeline_unpin(tl);
> >> +
> >>        if (igt_flush_test(gt->i915))
> >>                err = -EIO;
> >>
> >> --
> >> 2.25.1
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 522d0190509c..631aaed9bc3d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -825,7 +825,8 @@  static bool cmp_gte(u32 a, u32 b)
 	return a >= b;
 }
 
-static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt)
+static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt,
+			 struct intel_timeline *tl)
 {
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
@@ -834,7 +835,10 @@  static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt)
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
-	w->map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
+	/* keep the same cache settings as timeline */
+	i915_gem_object_set_cache_coherency(obj, tl->hwsp_ggtt->obj->cache_level);
+	w->map = i915_gem_object_pin_map_unlocked(obj,
+			page_unmask_bits(tl->hwsp_ggtt->obj->mm.mapping));
 	if (IS_ERR(w->map)) {
 		i915_gem_object_put(obj);
 		return PTR_ERR(w->map);
@@ -1004,8 +1008,10 @@  static int live_hwsp_read(void *arg)
 	if (!tl->has_initial_breadcrumb)
 		goto out_free;
 
+	selftest_tl_pin(tl);
+
 	for (i = 0; i < ARRAY_SIZE(watcher); i++) {
-		err = setup_watcher(&watcher[i], gt);
+		err = setup_watcher(&watcher[i], gt, tl);
 		if (err)
 			goto out;
 	}
@@ -1160,6 +1166,8 @@  static int live_hwsp_read(void *arg)
 	for (i = 0; i < ARRAY_SIZE(watcher); i++)
 		cleanup_watcher(&watcher[i]);
 
+	intel_timeline_unpin(tl);
+
 	if (igt_flush_test(gt->i915))
 		err = -EIO;