diff mbox

drm/i915: Re-enable preallocated top level PDPs support

Message ID 1484661972-9366-2-git-send-email-zhi.a.wang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wang, Zhi A Jan. 17, 2017, 2:06 p.m. UTC
After PPGTT page table is able to be shrinken, the preallocated PDPs and
PDE pages can be freed even they are preallocated under 3-level PPGTT
mode. This patch re-enables preallocated top level PDPs and PDE pages
like before.

Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhiyuan Lv <zhiyuan.lv@intel.com>
Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 10 +++++++++-
 drivers/gpu/drm/i915/i915_gem_gtt.h |  1 +
 drivers/gpu/drm/i915/intel_lrc.c    |  2 +-
 3 files changed, 11 insertions(+), 2 deletions(-)

Comments

Michał Winiarski Jan. 19, 2017, 11:52 a.m. UTC | #1
On Tue, Jan 17, 2017 at 10:06:12PM +0800, Zhi Wang wrote:
> After PPGTT page table is able to be shrinken, the preallocated PDPs and
> PDE pages can be freed even they are preallocated under 3-level PPGTT
> mode. This patch re-enables preallocated top level PDPs and PDE pages
> like before.
> 
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
> Cc: Zhiyuan Lv <zhiyuan.lv@intel.com>
> Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 10 +++++++++-
>  drivers/gpu/drm/i915/i915_gem_gtt.h |  1 +
>  drivers/gpu/drm/i915/intel_lrc.c    |  2 +-
>  3 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 8aca11f..f0e1992 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -793,12 +793,18 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
>  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  	struct i915_page_directory *pd;
>  	uint64_t pdpe;
> +	bool pd_is_empty;
>  
>  	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
>  		if (WARN_ON(!pdp->page_directory[pdpe]))
>  			break;
>  
> -		if (gen8_ppgtt_clear_pd(vm, pd, start, length)) {
> +		pd_is_empty = gen8_ppgtt_clear_pd(vm, pd, start, length);

Why the extra pd_is_empty variable?
Just adding if (preallocate) continue; here is more readable imho.

We should also assert that we're not in 4-level paging mode when shrinking is
skipped.

With those changes:

Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>

-Michał

> +		/* Do not free pd pages if pdps are preallocated. */
> +		if (ppgtt->preallocate_top_level_pdps)
> +			continue;
> +
> +		if (pd_is_empty) {
>  			__clear_bit(pdpe, pdp->used_pdpes);
>  			gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe);
>  			free_pd(vm->i915, pd);
> @@ -1545,6 +1551,8 @@ static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
>  
>  	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
>  
> +	ppgtt->preallocate_top_level_pdps = true;
> +
>  	return ret;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 34a4fd5..f325cb8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -349,6 +349,7 @@ struct i915_hw_ppgtt {
>  	struct kref ref;
>  	struct drm_mm_node node;
>  	unsigned long pd_dirty_rings;
> +	bool preallocate_top_level_pdps;
>  	union {
>  		struct i915_pml4 pml4;		/* GEN8+ & 48b PPGTT */
>  		struct i915_page_directory_pointer pdp;	/* GEN8+ */
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index db714dc..766a91a 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1488,7 +1488,7 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
>  	if (req->ctx->ppgtt &&
>  	    (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings)) {
>  		if (!USES_FULL_48BIT_PPGTT(req->i915) &&
> -		    !intel_vgpu_active(req->i915)) {
> +		    !req->ctx->ppgtt->preallocate_top_level_pdps) {
>  			ret = intel_logical_ring_emit_pdps(req);
>  			if (ret)
>  				return ret;
> -- 
> 1.9.1
>
Chris Wilson Jan. 19, 2017, 12:09 p.m. UTC | #2
On Thu, Jan 19, 2017 at 12:52:39PM +0100, Michał Winiarski wrote:
> On Tue, Jan 17, 2017 at 10:06:12PM +0800, Zhi Wang wrote:
> > After PPGTT page table is able to be shrinken, the preallocated PDPs and
> > PDE pages can be freed even they are preallocated under 3-level PPGTT
> > mode. This patch re-enables preallocated top level PDPs and PDE pages
> > like before.
> > 
> > Cc: Michał Winiarski <michal.winiarski@intel.com>
> > Cc: Michel Thierry <michel.thierry@intel.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
> > Cc: Zhiyuan Lv <zhiyuan.lv@intel.com>
> > Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 10 +++++++++-
> >  drivers/gpu/drm/i915/i915_gem_gtt.h |  1 +
> >  drivers/gpu/drm/i915/intel_lrc.c    |  2 +-
> >  3 files changed, 11 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index 8aca11f..f0e1992 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -793,12 +793,18 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
> >  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> >  	struct i915_page_directory *pd;
> >  	uint64_t pdpe;
> > +	bool pd_is_empty;
> >  
> >  	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> >  		if (WARN_ON(!pdp->page_directory[pdpe]))
> >  			break;
> >  
> > -		if (gen8_ppgtt_clear_pd(vm, pd, start, length)) {
> > +		pd_is_empty = gen8_ppgtt_clear_pd(vm, pd, start, length);
> 
> Why the extra pd_is_empty variable?
> Just adding if (preallocate) continue; here is more readable imho.
> 
> We should also assert that we're not in 4-level paging mode when shrinking is
> skipped.

I've restructured this so that we only shrink from clear_range_4lvl.
Having written the tests to exercise your code and put it to use.

Michał can you look at the ppgtt selftests can see if you can think of
any other way we might be able to exercise the alloc/insert/clear?
-Chris
Wang, Zhi A Jan. 20, 2017, 9:11 a.m. UTC | #3
Hi Guys:
     Please don't merge this patch at this time as I found another problem about alias PPGTT. 

It seems the alias PPGTT on GEN8+ is also broken under native i915. The first allocate_va_range() in alias PPGTT initialization path will allocate all the page tables and the next clear_range() will shrink the allocated page table. It's OK at this time, but when doing VMA binding, alias_ppgtt_bind_vma() will directly insert PTE entries without calling a allocate_va_range() to create the page table first.  This causes an OOPS on my machine.

It looks like we have two options:

1. Let the alias PPGTT page table also be shrinkable.
2. Follow the previous approach, don't shrink the alias PPGTT page table(Probably add some hack in clear_range() path.

-----Original Message-----
From: Winiarski, Michal 

Sent: Thursday, January 19, 2017 7:53 PM
To: Wang, Zhi A <zhi.a.wang@intel.com>
Cc: intel-gfx@lists.freedesktop.org; Thierry, Michel <michel.thierry@intel.com>; Joonas Lahtinen <joonas.lahtinen@linux.intel.com>; Chris Wilson <chris@chris-wilson.co.uk>; Zhenyu Wang <zhenyuw@linux.intel.com>; Lv, Zhiyuan <zhiyuan.lv@intel.com>
Subject: Re: [PATCH] drm/i915: Re-enable preallocated top level PDPs support

On Tue, Jan 17, 2017 at 10:06:12PM +0800, Zhi Wang wrote:
> After PPGTT page table is able to be shrinken, the preallocated PDPs 

> and PDE pages can be freed even they are preallocated under 3-level 

> PPGTT mode. This patch re-enables preallocated top level PDPs and PDE 

> pages like before.

> 

> Cc: Michał Winiarski <michal.winiarski@intel.com>

> Cc: Michel Thierry <michel.thierry@intel.com>

> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

> Cc: Chris Wilson <chris@chris-wilson.co.uk>

> Cc: Zhenyu Wang <zhenyuw@linux.intel.com>

> Cc: Zhiyuan Lv <zhiyuan.lv@intel.com>

> Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>

> ---

>  drivers/gpu/drm/i915/i915_gem_gtt.c | 10 +++++++++-  

> drivers/gpu/drm/i915/i915_gem_gtt.h |  1 +

>  drivers/gpu/drm/i915/intel_lrc.c    |  2 +-

>  3 files changed, 11 insertions(+), 2 deletions(-)

> 

> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 

> b/drivers/gpu/drm/i915/i915_gem_gtt.c

> index 8aca11f..f0e1992 100644

> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c

> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c

> @@ -793,12 +793,18 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,

>  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);

>  	struct i915_page_directory *pd;

>  	uint64_t pdpe;

> +	bool pd_is_empty;

>  

>  	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {

>  		if (WARN_ON(!pdp->page_directory[pdpe]))

>  			break;

>  

> -		if (gen8_ppgtt_clear_pd(vm, pd, start, length)) {

> +		pd_is_empty = gen8_ppgtt_clear_pd(vm, pd, start, length);


Why the extra pd_is_empty variable?
Just adding if (preallocate) continue; here is more readable imho.

We should also assert that we're not in 4-level paging mode when shrinking is skipped.

With those changes:

Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>


-Michał

> +		/* Do not free pd pages if pdps are preallocated. */

> +		if (ppgtt->preallocate_top_level_pdps)

> +			continue;

> +

> +		if (pd_is_empty) {

>  			__clear_bit(pdpe, pdp->used_pdpes);

>  			gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe);

>  			free_pd(vm->i915, pd);

> @@ -1545,6 +1551,8 @@ static int 

> gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)

>  

>  	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);

>  

> +	ppgtt->preallocate_top_level_pdps = true;

> +

>  	return ret;

>  }

>  

> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h 

> b/drivers/gpu/drm/i915/i915_gem_gtt.h

> index 34a4fd5..f325cb8 100644

> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h

> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h

> @@ -349,6 +349,7 @@ struct i915_hw_ppgtt {

>  	struct kref ref;

>  	struct drm_mm_node node;

>  	unsigned long pd_dirty_rings;

> +	bool preallocate_top_level_pdps;

>  	union {

>  		struct i915_pml4 pml4;		/* GEN8+ & 48b PPGTT */

>  		struct i915_page_directory_pointer pdp;	/* GEN8+ */

> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 

> b/drivers/gpu/drm/i915/intel_lrc.c

> index db714dc..766a91a 100644

> --- a/drivers/gpu/drm/i915/intel_lrc.c

> +++ b/drivers/gpu/drm/i915/intel_lrc.c

> @@ -1488,7 +1488,7 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,

>  	if (req->ctx->ppgtt &&

>  	    (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings)) {

>  		if (!USES_FULL_48BIT_PPGTT(req->i915) &&

> -		    !intel_vgpu_active(req->i915)) {

> +		    !req->ctx->ppgtt->preallocate_top_level_pdps) {

>  			ret = intel_logical_ring_emit_pdps(req);

>  			if (ret)

>  				return ret;

> --

> 1.9.1

>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8aca11f..f0e1992 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -793,12 +793,18 @@  static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_directory *pd;
 	uint64_t pdpe;
+	bool pd_is_empty;
 
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
 		if (WARN_ON(!pdp->page_directory[pdpe]))
 			break;
 
-		if (gen8_ppgtt_clear_pd(vm, pd, start, length)) {
+		pd_is_empty = gen8_ppgtt_clear_pd(vm, pd, start, length);
+		/* Do not free pd pages if pdps are preallocated. */
+		if (ppgtt->preallocate_top_level_pdps)
+			continue;
+
+		if (pd_is_empty) {
 			__clear_bit(pdpe, pdp->used_pdpes);
 			gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe);
 			free_pd(vm->i915, pd);
@@ -1545,6 +1551,8 @@  static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
 
 	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
 
+	ppgtt->preallocate_top_level_pdps = true;
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 34a4fd5..f325cb8 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -349,6 +349,7 @@  struct i915_hw_ppgtt {
 	struct kref ref;
 	struct drm_mm_node node;
 	unsigned long pd_dirty_rings;
+	bool preallocate_top_level_pdps;
 	union {
 		struct i915_pml4 pml4;		/* GEN8+ & 48b PPGTT */
 		struct i915_page_directory_pointer pdp;	/* GEN8+ */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index db714dc..766a91a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1488,7 +1488,7 @@  static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
 	if (req->ctx->ppgtt &&
 	    (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings)) {
 		if (!USES_FULL_48BIT_PPGTT(req->i915) &&
-		    !intel_vgpu_active(req->i915)) {
+		    !req->ctx->ppgtt->preallocate_top_level_pdps) {
 			ret = intel_logical_ring_emit_pdps(req);
 			if (ret)
 				return ret;