Message ID | 1435243213-22308-10-git-send-email-david.s.gordon@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 06/25/2015 07:40 AM, Dave Gordon wrote: > GuC submission is basically execlist submission, but with the GuC > handling the actual writes to the ELSP and the resulting context > switch interrupts. So to prepare a context for submission via the > GuC, we need some of the same functions used in execlist mode. > This commit exposes two such functions, changing their names to > better describe what they do (they're related to logical ring > contexts rather than to execlists per se). > > v2: > Replaces previous "drm/i915: Move execlists defines from .c to .h" > > Issue: VIZ-4884 > Signed-off-by: Dave Gordon <david.s.gordon@intel.com> > --- > drivers/gpu/drm/i915/intel_lrc.c | 27 +++++++++++++-------------- > drivers/gpu/drm/i915/intel_lrc.h | 5 +++++ > 2 files changed, 18 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index e5f4040..a77b22d 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -264,8 +264,8 @@ u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj) > return lrca >> 12; > } > > -static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring, > - struct drm_i915_gem_object *ctx_obj) > +uint64_t intel_lr_context_descriptor(struct intel_engine_cs *ring, > + struct drm_i915_gem_object *ctx_obj) > { > struct drm_device *dev = ring->dev; > uint64_t desc; > @@ -306,13 +306,13 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, > > /* XXX: You must always write both descriptors in the order below. */ > if (ctx_obj1) > - temp = execlists_ctx_descriptor(ring, ctx_obj1); > + temp = intel_lr_context_descriptor(ring, ctx_obj1); > else > temp = 0; > desc[1] = (u32)(temp >> 32); > desc[0] = (u32)temp; > > - temp = execlists_ctx_descriptor(ring, ctx_obj0); > + temp = intel_lr_context_descriptor(ring, ctx_obj0); > desc[3] = (u32)(temp >> 32); > desc[2] = (u32)temp; > > @@ -331,10 +331,10 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, > spin_unlock(&dev_priv->uncore.lock); > } > > -static int execlists_update_context(struct drm_i915_gem_object *ctx_obj, > - struct drm_i915_gem_object *ring_obj, > - struct i915_hw_ppgtt *ppgtt, > - u32 tail) > +/* Update the ringbuffer pointer and tail offset in a saved context image */ > +void intel_lr_context_update(struct drm_i915_gem_object *ctx_obj, > + struct drm_i915_gem_object *ring_obj, > + u32 tail) > { > struct page *page; > uint32_t *reg_state; > @@ -342,12 +342,11 @@ static int execlists_update_context(struct drm_i915_gem_object *ctx_obj, > page = i915_gem_object_get_page(ctx_obj, 1); > reg_state = kmap_atomic(page); > > - reg_state[CTX_RING_TAIL+1] = tail; > reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj); > + if (tail != ~0u) > + reg_state[CTX_RING_TAIL+1] = tail; I actually regret my original copycat of this function for guc. Because updating ring tail is moved to guc, there is no need to call this for each submission. Maybe we should split this call into two parts. The updating of ring buffer base is moved to where ring buffer is newly mapped. And the updating of tail is kept here for execlist submission only. Alex
On Thu, Jun 25, 2015 at 01:57:16PM -0700, Yu Dai wrote: > > On 06/25/2015 07:40 AM, Dave Gordon wrote: > >GuC submission is basically execlist submission, but with the GuC > >handling the actual writes to the ELSP and the resulting context > >switch interrupts. So to prepare a context for submission via the > >GuC, we need some of the same functions used in execlist mode. > >This commit exposes two such functions, changing their names to > >better describe what they do (they're related to logical ring > >contexts rather than to execlists per se). > > > >v2: > > Replaces previous "drm/i915: Move execlists defines from .c to .h" > > > >Issue: VIZ-4884 > >Signed-off-by: Dave Gordon <david.s.gordon@intel.com> > >--- > > drivers/gpu/drm/i915/intel_lrc.c | 27 +++++++++++++-------------- > > drivers/gpu/drm/i915/intel_lrc.h | 5 +++++ > > 2 files changed, 18 insertions(+), 14 deletions(-) > > > >diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > >index e5f4040..a77b22d 100644 > >--- a/drivers/gpu/drm/i915/intel_lrc.c > >+++ b/drivers/gpu/drm/i915/intel_lrc.c > >@@ -264,8 +264,8 @@ u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj) > > return lrca >> 12; > > } > >-static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring, > >- struct drm_i915_gem_object *ctx_obj) > >+uint64_t intel_lr_context_descriptor(struct intel_engine_cs *ring, > >+ struct drm_i915_gem_object *ctx_obj) > > { > > struct drm_device *dev = ring->dev; > > uint64_t desc; > >@@ -306,13 +306,13 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, > > /* XXX: You must always write both descriptors in the order below. */ > > if (ctx_obj1) > >- temp = execlists_ctx_descriptor(ring, ctx_obj1); > >+ temp = intel_lr_context_descriptor(ring, ctx_obj1); > > else > > temp = 0; > > desc[1] = (u32)(temp >> 32); > > desc[0] = (u32)temp; > >- temp = execlists_ctx_descriptor(ring, ctx_obj0); > >+ temp = intel_lr_context_descriptor(ring, ctx_obj0); > > desc[3] = (u32)(temp >> 32); > > desc[2] = (u32)temp; > >@@ -331,10 +331,10 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, > > spin_unlock(&dev_priv->uncore.lock); > > } > >-static int execlists_update_context(struct drm_i915_gem_object *ctx_obj, > >- struct drm_i915_gem_object *ring_obj, > >- struct i915_hw_ppgtt *ppgtt, > >- u32 tail) > >+/* Update the ringbuffer pointer and tail offset in a saved context image */ > >+void intel_lr_context_update(struct drm_i915_gem_object *ctx_obj, > >+ struct drm_i915_gem_object *ring_obj, > >+ u32 tail) > > { > > struct page *page; > > uint32_t *reg_state; > >@@ -342,12 +342,11 @@ static int execlists_update_context(struct drm_i915_gem_object *ctx_obj, > > page = i915_gem_object_get_page(ctx_obj, 1); > > reg_state = kmap_atomic(page); > >- reg_state[CTX_RING_TAIL+1] = tail; > > reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj); > >+ if (tail != ~0u) > >+ reg_state[CTX_RING_TAIL+1] = tail; > I actually regret my original copycat of this function for guc. > Because updating ring tail is moved to guc, there is no need to call > this for each submission. Maybe we should split this call into two > parts. The updating of ring buffer base is moved to where ring > buffer is newly mapped. And the updating of tail is kept here for > execlist submission only. If you would be so kind to review patches that do just that, it would make intel_lrc a much nicer place to work, and execlists much faster. -Chris
On 26/06/15 08:31, Chris Wilson wrote: > On Thu, Jun 25, 2015 at 01:57:16PM -0700, Yu Dai wrote: >> >> On 06/25/2015 07:40 AM, Dave Gordon wrote: >>> GuC submission is basically execlist submission, but with the GuC >>> handling the actual writes to the ELSP and the resulting context >>> switch interrupts. So to prepare a context for submission via the >>> GuC, we need some of the same functions used in execlist mode. >>> This commit exposes two such functions, changing their names to >>> better describe what they do (they're related to logical ring >>> contexts rather than to execlists per se). >>> >>> v2: >>> Replaces previous "drm/i915: Move execlists defines from .c to .h" >>> >>> Issue: VIZ-4884 >>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com> >>> --- >>> drivers/gpu/drm/i915/intel_lrc.c | 27 +++++++++++++-------------- >>> drivers/gpu/drm/i915/intel_lrc.h | 5 +++++ >>> 2 files changed, 18 insertions(+), 14 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c >>> index e5f4040..a77b22d 100644 >>> --- a/drivers/gpu/drm/i915/intel_lrc.c >>> +++ b/drivers/gpu/drm/i915/intel_lrc.c >>> @@ -264,8 +264,8 @@ u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj) >>> return lrca >> 12; >>> } >>> -static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring, >>> - struct drm_i915_gem_object *ctx_obj) >>> +uint64_t intel_lr_context_descriptor(struct intel_engine_cs *ring, >>> + struct drm_i915_gem_object *ctx_obj) >>> { >>> struct drm_device *dev = ring->dev; >>> uint64_t desc; >>> @@ -306,13 +306,13 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, >>> /* XXX: You must always write both descriptors in the order below. */ >>> if (ctx_obj1) >>> - temp = execlists_ctx_descriptor(ring, ctx_obj1); >>> + temp = intel_lr_context_descriptor(ring, ctx_obj1); >>> else >>> temp = 0; >>> desc[1] = (u32)(temp >> 32); >>> desc[0] = (u32)temp; >>> - temp = execlists_ctx_descriptor(ring, ctx_obj0); >>> + temp = intel_lr_context_descriptor(ring, ctx_obj0); >>> desc[3] = (u32)(temp >> 32); >>> desc[2] = (u32)temp; >>> @@ -331,10 +331,10 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, >>> spin_unlock(&dev_priv->uncore.lock); >>> } >>> -static int execlists_update_context(struct drm_i915_gem_object *ctx_obj, >>> - struct drm_i915_gem_object *ring_obj, >>> - struct i915_hw_ppgtt *ppgtt, >>> - u32 tail) >>> +/* Update the ringbuffer pointer and tail offset in a saved context image */ >>> +void intel_lr_context_update(struct drm_i915_gem_object *ctx_obj, >>> + struct drm_i915_gem_object *ring_obj, >>> + u32 tail) >>> { >>> struct page *page; >>> uint32_t *reg_state; >>> @@ -342,12 +342,11 @@ static int execlists_update_context(struct drm_i915_gem_object *ctx_obj, >>> page = i915_gem_object_get_page(ctx_obj, 1); >>> reg_state = kmap_atomic(page); >>> - reg_state[CTX_RING_TAIL+1] = tail; >>> reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj); >>> + if (tail != ~0u) >>> + reg_state[CTX_RING_TAIL+1] = tail; >> >> I actually regret my original copycat of this function for guc. >> Because updating ring tail is moved to guc, there is no need to call >> this for each submission. Maybe we should split this call into two >> parts. The updating of ring buffer base is moved to where ring >> buffer is newly mapped. And the updating of tail is kept here for >> execlist submission only. > > If you would be so kind to review patches that do just that, it would > make intel_lrc a much nicer place to work, and execlists much faster. > -Chris I've deleted Alex's coy of this function in the GuC code; that's why the execlists/LRC version is exposed so that it can be shared with the GuC path, and the update of "tail" here is conditional, so the GuC call doesn't use that part. I agree it would be nicer still to update the ringbuffer base address only at the point when it's pinned to the GGTT, rather than on each batch submission. As you say, that would allow us to remove this call entirely from the GuC path. Another improvement would be to kmap the context whenever it's active, the way we already do for the ringbuffer, so we could get rid of the kmap_atomic calls here. Since contexts and ringbuffers are one-to-one in execlist/GuC modes, it should be simple to keep them mapped-and-pinned in paralell. Further, perhaps we could allocate them in a single contiguous structure to further reduce overhead; 4 pages of ringbuffer, one page as a GuC shared channel, one page for the (PP)HWSP, some number of pages for the context image? All GGTT-pinned and kmapped together whenever they've got any work in-flight; all unpinned and unmapped either during retirement of the last-active-request in that context or lazily some time later. I expect Chris has already implemented some of these ideas :) .Dave.
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e5f4040..a77b22d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -264,8 +264,8 @@ u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj) return lrca >> 12; } -static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring, - struct drm_i915_gem_object *ctx_obj) +uint64_t intel_lr_context_descriptor(struct intel_engine_cs *ring, + struct drm_i915_gem_object *ctx_obj) { struct drm_device *dev = ring->dev; uint64_t desc; @@ -306,13 +306,13 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, /* XXX: You must always write both descriptors in the order below. */ if (ctx_obj1) - temp = execlists_ctx_descriptor(ring, ctx_obj1); + temp = intel_lr_context_descriptor(ring, ctx_obj1); else temp = 0; desc[1] = (u32)(temp >> 32); desc[0] = (u32)temp; - temp = execlists_ctx_descriptor(ring, ctx_obj0); + temp = intel_lr_context_descriptor(ring, ctx_obj0); desc[3] = (u32)(temp >> 32); desc[2] = (u32)temp; @@ -331,10 +331,10 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, spin_unlock(&dev_priv->uncore.lock); } -static int execlists_update_context(struct drm_i915_gem_object *ctx_obj, - struct drm_i915_gem_object *ring_obj, - struct i915_hw_ppgtt *ppgtt, - u32 tail) +/* Update the ringbuffer pointer and tail offset in a saved context image */ +void intel_lr_context_update(struct drm_i915_gem_object *ctx_obj, + struct drm_i915_gem_object *ring_obj, + u32 tail) { struct page *page; uint32_t *reg_state; @@ -342,12 +342,11 @@ static int execlists_update_context(struct drm_i915_gem_object *ctx_obj, page = i915_gem_object_get_page(ctx_obj, 1); reg_state = kmap_atomic(page); - reg_state[CTX_RING_TAIL+1] = tail; reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj); + if (tail != ~0u) + reg_state[CTX_RING_TAIL+1] = tail; kunmap_atomic(reg_state); - - return 0; } static void execlists_submit_contexts(struct intel_engine_cs *ring, @@ -363,7 +362,7 @@ static void execlists_submit_contexts(struct intel_engine_cs *ring, WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0)); WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj)); - execlists_update_context(ctx_obj0, ringbuf0->obj, to0->ppgtt, tail0); + intel_lr_context_update(ctx_obj0, ringbuf0->obj, tail0); if (to1) { ringbuf1 = to1->engine[ring->id].ringbuf; @@ -372,7 +371,7 @@ static void execlists_submit_contexts(struct intel_engine_cs *ring, WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1)); WARN_ON(!i915_gem_obj_is_pinned(ringbuf1->obj)); - execlists_update_context(ctx_obj1, ringbuf1->obj, to1->ppgtt, tail1); + intel_lr_context_update(ctx_obj1, ringbuf1->obj, tail1); } execlists_elsp_write(ring, ctx_obj0, ctx_obj1); @@ -2029,7 +2028,7 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o reg_state[CTX_RING_TAIL+1] = 0; reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base); /* Ring buffer start address is not known until the buffer is pinned. - * It is written to the context image in execlists_update_context() + * It is written to the context image in intel_lr_context_update() */ reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base); reg_state[CTX_RING_BUFFER_CONTROL+1] = diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index f59940a..b3659a1 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -73,6 +73,11 @@ void intel_lr_context_unpin(struct intel_engine_cs *ring, struct intel_context *ctx); void intel_lr_context_reset(struct drm_device *dev, struct intel_context *ctx); +void intel_lr_context_update(struct drm_i915_gem_object *ctx_obj, + struct drm_i915_gem_object *ring_obj, + u32 tail); +uint64_t intel_lr_context_descriptor(struct intel_engine_cs *ring, + struct drm_i915_gem_object *ctx_obj); /* Execlists */ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
GuC submission is basically execlist submission, but with the GuC handling the actual writes to the ELSP and the resulting context switch interrupts. So to prepare a context for submission via the GuC, we need some of the same functions used in execlist mode. This commit exposes two such functions, changing their names to better describe what they do (they're related to logical ring contexts rather than to execlists per se). v2: Replaces previous "drm/i915: Move execlists defines from .c to .h" Issue: VIZ-4884 Signed-off-by: Dave Gordon <david.s.gordon@intel.com> --- drivers/gpu/drm/i915/intel_lrc.c | 27 +++++++++++++-------------- drivers/gpu/drm/i915/intel_lrc.h | 5 +++++ 2 files changed, 18 insertions(+), 14 deletions(-)