Message ID | 1438250783-23118-1-git-send-email-michel.thierry@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Reviewed the patch & it looks fine. Reviewed-by: "Akash Goel <akash.goel@intel.com>" On 7/30/2015 3:36 PM, Michel Thierry wrote: > In 64b (48bit canonical) PPGTT addressing, the PDP0 register contains > the base address to PML4, while the other PDP registers are ignored. > > In LRC, the addressing mode must be specified in every context > descriptor, and the base address to PML4 is stored in the reg state. > > v2: PML4 update in legacy context switch is left for historic reasons, > the preferred mode of operation is with lrc context based submission. > v3: s/gen8_map_page_directory/gen8_setup_page_directory and > s/gen8_map_page_directory_pointer/gen8_setup_page_directory_pointer. > Also, clflush will be needed for bxt. (Akash) > v4: Squashed lrc-specific code and use a macro to set PML4 register. > v5: Rebase after Mika's ppgtt cleanup / scratch merge patch series. > PDP update in bb_start is only for legacy 32b mode. > v6: Rebase after final merged version of Mika's ppgtt/scratch > patches. > v7: There is no need to update the pml4 register value in > execlists_update_context. (Akash) > v8: Move pd and pdp setup functions to a previous patch, they do not > belong here. (Akash) > v9: Check USES_FULL_48BIT_PPGTT instead of GEN8_CTX_ADDRESSING_MODE in > gen8_emit_bb_start to check if emit pdps is needed. (Akash) > > Cc: Akash Goel <akash.goel@intel.com> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> > Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+) > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 17 +++++++---- > drivers/gpu/drm/i915/i915_reg.h | 1 + > drivers/gpu/drm/i915/intel_lrc.c | 60 ++++++++++++++++++++++++++----------- > 3 files changed, 55 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index c498eaa..ae2e082 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -656,8 +656,8 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, > return 0; > } > > -static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, > - struct drm_i915_gem_request *req) > +static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, > + struct drm_i915_gem_request *req) > { > int i, ret; > > @@ -672,6 +672,12 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, > return 0; > } > > +static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, > + struct drm_i915_gem_request *req) > +{ > + return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); > +} > + > static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm, > struct i915_page_directory_pointer *pdp, > uint64_t start, > @@ -1321,14 +1327,13 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) > ppgtt->base.unbind_vma = ppgtt_unbind_vma; > ppgtt->base.bind_vma = ppgtt_bind_vma; > > - ppgtt->switch_mm = gen8_mm_switch; > - > if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > ret = setup_px(ppgtt->base.dev, &ppgtt->pml4); > if (ret) > goto free_scratch; > > ppgtt->base.total = 1ULL << 48; > + ppgtt->switch_mm = gen8_48b_mm_switch; > } else { > ret = __pdp_init(false, &ppgtt->pdp); > if (ret) > @@ -1343,6 +1348,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) > */ > ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total; > > + ppgtt->switch_mm = gen8_legacy_mm_switch; > trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, > 0, 0, > GEN8_PML4E_SHIFT); > @@ -1540,8 +1546,9 @@ static void gen8_ppgtt_enable(struct drm_device *dev) > int j; > > for_each_ring(ring, dev_priv, j) { > + u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0; > I915_WRITE(RING_MODE_GEN7(ring), > - _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); > + _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level)); > } > } > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index 3a77678..5bd1b6a 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -1670,6 +1670,7 @@ enum skl_disp_power_wells { > #define GFX_REPLAY_MODE (1<<11) > #define GFX_PSMI_GRANULARITY (1<<10) > #define GFX_PPGTT_ENABLE (1<<9) > +#define GEN8_GFX_PPGTT_48B (1<<7) > > #define VLV_DISPLAY_BASE 0x180000 > #define VLV_MIPI_BASE VLV_DISPLAY_BASE > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 99bba8e..4c40614 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -196,13 +196,21 @@ > reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \ > } > > +#define ASSIGN_CTX_PML4(ppgtt, reg_state) { \ > + reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \ > + reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \ > +} > + > enum { > ADVANCED_CONTEXT = 0, > - LEGACY_CONTEXT, > + LEGACY_32B_CONTEXT, > ADVANCED_AD_CONTEXT, > LEGACY_64B_CONTEXT > }; > -#define GEN8_CTX_MODE_SHIFT 3 > +#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 > +#define GEN8_CTX_ADDRESSING_MODE(dev) (USES_FULL_48BIT_PPGTT(dev) ?\ > + LEGACY_64B_CONTEXT :\ > + LEGACY_32B_CONTEXT) > enum { > FAULT_AND_HANG = 0, > FAULT_AND_HALT, /* Debug only */ > @@ -273,7 +281,7 @@ static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_request *rq) > WARN_ON(lrca & 0xFFFFFFFF00000FFFULL); > > desc = GEN8_CTX_VALID; > - desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT; > + desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT; > if (IS_GEN8(ctx_obj->base.dev)) > desc |= GEN8_CTX_L3LLC_COHERENT; > desc |= GEN8_CTX_PRIVILEGE; > @@ -348,10 +356,12 @@ static int execlists_update_context(struct drm_i915_gem_request *rq) > reg_state[CTX_RING_TAIL+1] = rq->tail; > reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj); > > - /* True PPGTT with dynamic page allocation: update PDP registers and > - * point the unallocated PDPs to the scratch page > - */ > - if (ppgtt) { > + if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > + /* True 32b PPGTT with dynamic page allocation: update PDP > + * registers and point the unallocated PDPs to scratch page. > + * PML4 is allocated during ppgtt init, so this is not needed > + * in 48-bit mode. > + */ > ASSIGN_CTX_PDP(ppgtt, reg_state, 3); > ASSIGN_CTX_PDP(ppgtt, reg_state, 2); > ASSIGN_CTX_PDP(ppgtt, reg_state, 1); > @@ -1512,12 +1522,15 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, > * Ideally, we should set Force PD Restore in ctx descriptor, > * but we can't. Force Restore would be a second option, but > * it is unsafe in case of lite-restore (because the ctx is > - * not idle). */ > + * not idle). PML4 is allocated during ppgtt init so this is > + * not needed in 48-bit.*/ > if (req->ctx->ppgtt && > (intel_ring_flag(req->ring) & req->ctx->ppgtt->pd_dirty_rings)) { > - ret = intel_logical_ring_emit_pdps(req); > - if (ret) > - return ret; > + if (!USES_FULL_48BIT_PPGTT(req->i915)) { > + ret = intel_logical_ring_emit_pdps(req); > + if (ret) > + return ret; > + } > > req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring); > } > @@ -2198,13 +2211,24 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o > reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0); > reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0); > > - /* With dynamic page allocation, PDPs may not be allocated at this point, > - * Point the unallocated PDPs to the scratch page > - */ > - ASSIGN_CTX_PDP(ppgtt, reg_state, 3); > - ASSIGN_CTX_PDP(ppgtt, reg_state, 2); > - ASSIGN_CTX_PDP(ppgtt, reg_state, 1); > - ASSIGN_CTX_PDP(ppgtt, reg_state, 0); > + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > + /* 64b PPGTT (48bit canonical) > + * PDP0_DESCRIPTOR contains the base address to PML4 and > + * other PDP Descriptors are ignored. > + */ > + ASSIGN_CTX_PML4(ppgtt, reg_state); > + } else { > + /* 32b PPGTT > + * PDP*_DESCRIPTOR contains the base address of space supported. > + * With dynamic page allocation, PDPs may not be allocated at > + * this point. Point the unallocated PDPs to the scratch page > + */ > + ASSIGN_CTX_PDP(ppgtt, reg_state, 3); > + ASSIGN_CTX_PDP(ppgtt, reg_state, 2); > + ASSIGN_CTX_PDP(ppgtt, reg_state, 1); > + ASSIGN_CTX_PDP(ppgtt, reg_state, 0); > + } > + > if (ring->id == RCS) { > reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); > reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE; >
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c498eaa..ae2e082 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -656,8 +656,8 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, return 0; } -static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) +static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_gem_request *req) { int i, ret; @@ -672,6 +672,12 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, return 0; } +static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_gem_request *req) +{ + return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); +} + static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp, uint64_t start, @@ -1321,14 +1327,13 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; - ppgtt->switch_mm = gen8_mm_switch; - if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { ret = setup_px(ppgtt->base.dev, &ppgtt->pml4); if (ret) goto free_scratch; ppgtt->base.total = 1ULL << 48; + ppgtt->switch_mm = gen8_48b_mm_switch; } else { ret = __pdp_init(false, &ppgtt->pdp); if (ret) @@ -1343,6 +1348,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) */ ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total; + ppgtt->switch_mm = gen8_legacy_mm_switch; trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 0, 0, GEN8_PML4E_SHIFT); @@ -1540,8 +1546,9 @@ static void gen8_ppgtt_enable(struct drm_device *dev) int j; for_each_ring(ring, dev_priv, j) { + u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0; I915_WRITE(RING_MODE_GEN7(ring), - _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); + _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level)); } } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3a77678..5bd1b6a 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1670,6 +1670,7 @@ enum skl_disp_power_wells { #define GFX_REPLAY_MODE (1<<11) #define GFX_PSMI_GRANULARITY (1<<10) #define GFX_PPGTT_ENABLE (1<<9) +#define GEN8_GFX_PPGTT_48B (1<<7) #define VLV_DISPLAY_BASE 0x180000 #define VLV_MIPI_BASE VLV_DISPLAY_BASE diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 99bba8e..4c40614 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -196,13 +196,21 @@ reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \ } +#define ASSIGN_CTX_PML4(ppgtt, reg_state) { \ + reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \ + reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \ +} + enum { ADVANCED_CONTEXT = 0, - LEGACY_CONTEXT, + LEGACY_32B_CONTEXT, ADVANCED_AD_CONTEXT, LEGACY_64B_CONTEXT }; -#define GEN8_CTX_MODE_SHIFT 3 +#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 +#define GEN8_CTX_ADDRESSING_MODE(dev) (USES_FULL_48BIT_PPGTT(dev) ?\ + LEGACY_64B_CONTEXT :\ + LEGACY_32B_CONTEXT) enum { FAULT_AND_HANG = 0, FAULT_AND_HALT, /* Debug only */ @@ -273,7 +281,7 @@ static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_request *rq) WARN_ON(lrca & 0xFFFFFFFF00000FFFULL); desc = GEN8_CTX_VALID; - desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT; + desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT; if (IS_GEN8(ctx_obj->base.dev)) desc |= GEN8_CTX_L3LLC_COHERENT; desc |= GEN8_CTX_PRIVILEGE; @@ -348,10 +356,12 @@ static int execlists_update_context(struct drm_i915_gem_request *rq) reg_state[CTX_RING_TAIL+1] = rq->tail; reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj); - /* True PPGTT with dynamic page allocation: update PDP registers and - * point the unallocated PDPs to the scratch page - */ - if (ppgtt) { + if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { + /* True 32b PPGTT with dynamic page allocation: update PDP + * registers and point the unallocated PDPs to scratch page. + * PML4 is allocated during ppgtt init, so this is not needed + * in 48-bit mode. + */ ASSIGN_CTX_PDP(ppgtt, reg_state, 3); ASSIGN_CTX_PDP(ppgtt, reg_state, 2); ASSIGN_CTX_PDP(ppgtt, reg_state, 1); @@ -1512,12 +1522,15 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, * Ideally, we should set Force PD Restore in ctx descriptor, * but we can't. Force Restore would be a second option, but * it is unsafe in case of lite-restore (because the ctx is - * not idle). */ + * not idle). PML4 is allocated during ppgtt init so this is + * not needed in 48-bit.*/ if (req->ctx->ppgtt && (intel_ring_flag(req->ring) & req->ctx->ppgtt->pd_dirty_rings)) { - ret = intel_logical_ring_emit_pdps(req); - if (ret) - return ret; + if (!USES_FULL_48BIT_PPGTT(req->i915)) { + ret = intel_logical_ring_emit_pdps(req); + if (ret) + return ret; + } req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring); } @@ -2198,13 +2211,24 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0); reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0); - /* With dynamic page allocation, PDPs may not be allocated at this point, - * Point the unallocated PDPs to the scratch page - */ - ASSIGN_CTX_PDP(ppgtt, reg_state, 3); - ASSIGN_CTX_PDP(ppgtt, reg_state, 2); - ASSIGN_CTX_PDP(ppgtt, reg_state, 1); - ASSIGN_CTX_PDP(ppgtt, reg_state, 0); + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { + /* 64b PPGTT (48bit canonical) + * PDP0_DESCRIPTOR contains the base address to PML4 and + * other PDP Descriptors are ignored. + */ + ASSIGN_CTX_PML4(ppgtt, reg_state); + } else { + /* 32b PPGTT + * PDP*_DESCRIPTOR contains the base address of space supported. + * With dynamic page allocation, PDPs may not be allocated at + * this point. Point the unallocated PDPs to the scratch page + */ + ASSIGN_CTX_PDP(ppgtt, reg_state, 3); + ASSIGN_CTX_PDP(ppgtt, reg_state, 2); + ASSIGN_CTX_PDP(ppgtt, reg_state, 1); + ASSIGN_CTX_PDP(ppgtt, reg_state, 0); + } + if (ring->id == RCS) { reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE;