Message ID | 20170918092536.12287-1-michal.winiarski@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 18/09/17 02:25, Michał Winiarski wrote: > We can just operate on the wq_tail directly (in the process descriptor). > This allows us to remove the duplicated tail from the client. While I'm > here let's also remove the constants kept in the client and document our > locking requirements. This causes a small change in one of GuC debugfs > files. We're no longer reporting constant values (which I don't think > is a problem), but we're also no longer reporting the tail (does anyone > care?). > > v2: Update tail after wqi contents. (Chris) > v3: Really update tail after wqi contents. > > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> > Cc: Oscar Mateo <oscar.mateo@intel.com> > Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 2 -- > drivers/gpu/drm/i915/i915_guc_submission.c | 37 +++++++++++++----------------- > drivers/gpu/drm/i915/intel_uc.h | 4 ---- > 3 files changed, 16 insertions(+), 27 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 46ac6091772e..2518bdf95eef 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -2447,8 +2447,6 @@ static void i915_guc_client_info(struct seq_file *m, > client->priority, client->stage_id, client->proc_desc_offset); > seq_printf(m, "\tDoorbell id %d, offset: 0x%lx\n", > client->doorbell_id, client->doorbell_offset); > - seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n", > - client->wq_size, client->wq_offset, client->wq_tail); > > for_each_engine(engine, dev_priv, id) { > u64 submissions = client->submissions[id]; > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c > index 16ce570a0b74..18fefc11b8d7 100644 > --- a/drivers/gpu/drm/i915/i915_guc_submission.c > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c > @@ -305,7 +305,7 @@ static void guc_proc_desc_init(struct intel_guc *guc, > desc->db_base_addr = 0; > > desc->stage_id = client->stage_id; > - desc->wq_size_bytes = client->wq_size; > + desc->wq_size_bytes = GUC_WQ_SIZE; > desc->wq_status = WQ_STATUS_ACTIVE; > desc->priority = client->priority; > } > @@ -390,8 +390,8 @@ static void guc_stage_desc_init(struct intel_guc *guc, > desc->db_trigger_cpu = (uintptr_t)__get_doorbell(client); > desc->db_trigger_uk = gfx_addr + client->doorbell_offset; > desc->process_desc = gfx_addr + client->proc_desc_offset; > - desc->wq_addr = gfx_addr + client->wq_offset; > - desc->wq_size = client->wq_size; > + desc->wq_addr = gfx_addr + GUC_DB_SIZE; > + desc->wq_size = GUC_WQ_SIZE; > > desc->desc_private = (uintptr_t)client; > } > @@ -416,14 +416,12 @@ static void guc_wq_item_append(struct i915_guc_client *client, > struct i915_gem_context *ctx = rq->ctx; > struct guc_process_desc *desc = __get_process_desc(client); > struct guc_wq_item *wqi; > - u32 freespace, tail, wq_off; > + u32 ring_tail, wq_off; > > - /* Free space is guaranteed */ > - freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size); > - GEM_BUG_ON(freespace < wqi_size); > + lockdep_assert_held(&client->wq_lock); > > - tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); > - GEM_BUG_ON(tail > WQ_RING_TAIL_MAX); > + ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); > + GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX); > > /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we > * should not have the case where structure wqi is across page, neither > @@ -434,11 +432,11 @@ static void guc_wq_item_append(struct i915_guc_client *client, > */ > BUILD_BUG_ON(wqi_size != 16); > > - /* postincrement WQ tail for next time */ > - wq_off = client->wq_tail; > + /* Free space is guaranteed. */ > + wq_off = READ_ONCE(desc->tail); > + GEM_BUG_ON(CIRC_SPACE(wq_off, READ_ONCE(desc->head), > + GUC_WQ_SIZE) < wqi_size); > GEM_BUG_ON(wq_off & (wqi_size - 1)); > - client->wq_tail += wqi_size; > - client->wq_tail &= client->wq_size - 1; > > /* WQ starts from the page after doorbell / process_desc */ > wqi = client->vaddr + wq_off + GUC_DB_SIZE; > @@ -451,8 +449,11 @@ static void guc_wq_item_append(struct i915_guc_client *client, > > wqi->context_desc = lower_32_bits(intel_lr_context_descriptor(ctx, engine)); > > - wqi->submit_element_info = tail << WQ_RING_TAIL_SHIFT; > + wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT; > wqi->fence_id = rq->global_seqno; > + > + /* Postincrement WQ tail for next time. */ Apologies for the late comment, just a nitpick: by updating desc->tail here we're making the update visible to GuC, we're not doing something for next SW iteration. I believe the comment was used to refer to the SW tracker (client->wq_tail) that is now gone so we could reword/drop it. Daniele > + WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1)); > } > > static void guc_reset_wq(struct i915_guc_client *client) > @@ -461,18 +462,14 @@ static void guc_reset_wq(struct i915_guc_client *client) > > desc->head = 0; > desc->tail = 0; > - > - client->wq_tail = 0; > } > > static void guc_ring_doorbell(struct i915_guc_client *client) > { > - struct guc_process_desc *desc = __get_process_desc(client); > struct guc_doorbell_info *db; > u32 cookie; > > - /* Update the tail so it is visible to GuC */ > - desc->tail = client->wq_tail; > + lockdep_assert_held(&client->wq_lock); > > /* pointer of current doorbell cacheline */ > db = __get_doorbell(client); > @@ -813,8 +810,6 @@ guc_client_alloc(struct drm_i915_private *dev_priv, > client->engines = engines; > client->priority = priority; > client->doorbell_id = GUC_DOORBELL_INVALID; > - client->wq_offset = GUC_DB_SIZE; > - client->wq_size = GUC_WQ_SIZE; > spin_lock_init(&client->wq_lock); > > ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS, > diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h > index f505dcccd613..7703c9ad6511 100644 > --- a/drivers/gpu/drm/i915/intel_uc.h > +++ b/drivers/gpu/drm/i915/intel_uc.h > @@ -68,10 +68,6 @@ struct i915_guc_client { > unsigned long doorbell_offset; > > spinlock_t wq_lock; > - uint32_t wq_offset; > - uint32_t wq_size; > - uint32_t wq_tail; > - > /* Per-engine counts of GuC submissions */ > uint64_t submissions[I915_NUM_ENGINES]; > }; >
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 46ac6091772e..2518bdf95eef 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2447,8 +2447,6 @@ static void i915_guc_client_info(struct seq_file *m, client->priority, client->stage_id, client->proc_desc_offset); seq_printf(m, "\tDoorbell id %d, offset: 0x%lx\n", client->doorbell_id, client->doorbell_offset); - seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n", - client->wq_size, client->wq_offset, client->wq_tail); for_each_engine(engine, dev_priv, id) { u64 submissions = client->submissions[id]; diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 16ce570a0b74..18fefc11b8d7 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -305,7 +305,7 @@ static void guc_proc_desc_init(struct intel_guc *guc, desc->db_base_addr = 0; desc->stage_id = client->stage_id; - desc->wq_size_bytes = client->wq_size; + desc->wq_size_bytes = GUC_WQ_SIZE; desc->wq_status = WQ_STATUS_ACTIVE; desc->priority = client->priority; } @@ -390,8 +390,8 @@ static void guc_stage_desc_init(struct intel_guc *guc, desc->db_trigger_cpu = (uintptr_t)__get_doorbell(client); desc->db_trigger_uk = gfx_addr + client->doorbell_offset; desc->process_desc = gfx_addr + client->proc_desc_offset; - desc->wq_addr = gfx_addr + client->wq_offset; - desc->wq_size = client->wq_size; + desc->wq_addr = gfx_addr + GUC_DB_SIZE; + desc->wq_size = GUC_WQ_SIZE; desc->desc_private = (uintptr_t)client; } @@ -416,14 +416,12 @@ static void guc_wq_item_append(struct i915_guc_client *client, struct i915_gem_context *ctx = rq->ctx; struct guc_process_desc *desc = __get_process_desc(client); struct guc_wq_item *wqi; - u32 freespace, tail, wq_off; + u32 ring_tail, wq_off; - /* Free space is guaranteed */ - freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size); - GEM_BUG_ON(freespace < wqi_size); + lockdep_assert_held(&client->wq_lock); - tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); - GEM_BUG_ON(tail > WQ_RING_TAIL_MAX); + ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); + GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX); /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we * should not have the case where structure wqi is across page, neither @@ -434,11 +432,11 @@ static void guc_wq_item_append(struct i915_guc_client *client, */ BUILD_BUG_ON(wqi_size != 16); - /* postincrement WQ tail for next time */ - wq_off = client->wq_tail; + /* Free space is guaranteed. */ + wq_off = READ_ONCE(desc->tail); + GEM_BUG_ON(CIRC_SPACE(wq_off, READ_ONCE(desc->head), + GUC_WQ_SIZE) < wqi_size); GEM_BUG_ON(wq_off & (wqi_size - 1)); - client->wq_tail += wqi_size; - client->wq_tail &= client->wq_size - 1; /* WQ starts from the page after doorbell / process_desc */ wqi = client->vaddr + wq_off + GUC_DB_SIZE; @@ -451,8 +449,11 @@ static void guc_wq_item_append(struct i915_guc_client *client, wqi->context_desc = lower_32_bits(intel_lr_context_descriptor(ctx, engine)); - wqi->submit_element_info = tail << WQ_RING_TAIL_SHIFT; + wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT; wqi->fence_id = rq->global_seqno; + + /* Postincrement WQ tail for next time. */ + WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1)); } static void guc_reset_wq(struct i915_guc_client *client) @@ -461,18 +462,14 @@ static void guc_reset_wq(struct i915_guc_client *client) desc->head = 0; desc->tail = 0; - - client->wq_tail = 0; } static void guc_ring_doorbell(struct i915_guc_client *client) { - struct guc_process_desc *desc = __get_process_desc(client); struct guc_doorbell_info *db; u32 cookie; - /* Update the tail so it is visible to GuC */ - desc->tail = client->wq_tail; + lockdep_assert_held(&client->wq_lock); /* pointer of current doorbell cacheline */ db = __get_doorbell(client); @@ -813,8 +810,6 @@ guc_client_alloc(struct drm_i915_private *dev_priv, client->engines = engines; client->priority = priority; client->doorbell_id = GUC_DOORBELL_INVALID; - client->wq_offset = GUC_DB_SIZE; - client->wq_size = GUC_WQ_SIZE; spin_lock_init(&client->wq_lock); ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS, diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index f505dcccd613..7703c9ad6511 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -68,10 +68,6 @@ struct i915_guc_client { unsigned long doorbell_offset; spinlock_t wq_lock; - uint32_t wq_offset; - uint32_t wq_size; - uint32_t wq_tail; - /* Per-engine counts of GuC submissions */ uint64_t submissions[I915_NUM_ENGINES]; };