Message ID | 1462563095-13233-5-git-send-email-david.s.gordon@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 06/05/16 20:31, Dave Gordon wrote: > Mostly little optimisations and future-proofing against code breakage. > For instance, if the driver is correctly following the submission > protocol, the "out of space" condition is impossible, so the previous > runtime WARN_ON() is promoted to a GEM_BUG_ON() for a more dramatic > effect in development and less impact in end-user systems. > > Similarly we can make alignment checking more stringent and replace > other WARN_ON() conditions that don't relate to the runtime hardware > state with either BUILD_BUG_ON() for compile-time-detectable issues, or > GEM_BUG_ON() for logical "can't happen" errors. > > With those changes, we can convert it to void, as suggested by Chris > Wilson, and update the calling code appropriately. > > v3: > Note that we're now putting the request seqno in the "fence_id" field > of each GuC-work-item, in case it turns up somewhere useful (Tvrtko). > > Signed-off-by: Dave Gordon <david.s.gordon@intel.com> > --- > drivers/gpu/drm/i915/i915_guc_submission.c | 71 +++++++++++++++--------------- > drivers/gpu/drm/i915/intel_guc.h | 2 +- > drivers/gpu/drm/i915/intel_guc_fwif.h | 3 +- > 3 files changed, 38 insertions(+), 38 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c > index 21d3603..79897b0 100644 > --- a/drivers/gpu/drm/i915/i915_guc_submission.c > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c > @@ -487,23 +487,28 @@ int i915_guc_wq_check_space(struct drm_i915_gem_request *request) > return -EAGAIN; > } > > -static int guc_add_workqueue_item(struct i915_guc_client *gc, > - struct drm_i915_gem_request *rq) > +static void guc_add_workqueue_item(struct i915_guc_client *gc, > + struct drm_i915_gem_request *rq) > { > + /* wqi_len is in DWords, and does not include the one-word header */ > + const size_t wqi_size = sizeof(struct guc_wq_item); > + const u32 wqi_len = wqi_size/sizeof(u32) - 1; > struct guc_process_desc *desc; > struct guc_wq_item *wqi; > void *base; > - u32 tail, wq_len, wq_off, space; > + u32 freespace, tail, wq_off, wq_page; > > desc = gc->client_base + gc->proc_desc_offset; > - space = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size); > - if (WARN_ON(space < sizeof(struct guc_wq_item))) > - return -ENOSPC; /* shouldn't happen */ > > - /* postincrement WQ tail for next time */ > - wq_off = gc->wq_tail; > - gc->wq_tail += sizeof(struct guc_wq_item); > - gc->wq_tail &= gc->wq_size - 1; > + /* Free space is guaranteed, see i915_guc_wq_check_space() above */ > + freespace = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size); > + GEM_BUG_ON(freespace < wqi_size); > + > + /* The GuC firmware wants the tail index in QWords, not bytes */ > + tail = rq->tail; > + GEM_BUG_ON(tail & 7); > + tail >>= 3; > + GEM_BUG_ON(tail > WQ_RING_TAIL_MAX); > > /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we > * should not have the case where structure wqi is across page, neither > @@ -512,19 +517,23 @@ static int guc_add_workqueue_item(struct i915_guc_client *gc, > * XXX: if not the case, we need save data to a temp wqi and copy it to > * workqueue buffer dw by dw. > */ > - WARN_ON(sizeof(struct guc_wq_item) != 16); > - WARN_ON(wq_off & 3); > + BUILD_BUG_ON(wqi_size != 16); > + > + /* postincrement WQ tail for next time */ > + wq_off = gc->wq_tail; > + gc->wq_tail += wqi_size; > + gc->wq_tail &= gc->wq_size - 1; > + GEM_BUG_ON(wq_off & (wqi_size - 1)); > > - /* wq starts from the page after doorbell / process_desc */ > - base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, > - (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT)); > + /* WQ starts from the page after doorbell / process_desc */ > + wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT; > wq_off &= PAGE_SIZE - 1; > + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, wq_page)); > wqi = (struct guc_wq_item *)((char *)base + wq_off); > > - /* len does not include the header */ > - wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1; > + /* Now fill in the 4-word work queue item */ > wqi->header = WQ_TYPE_INORDER | > - (wq_len << WQ_LEN_SHIFT) | > + (wqi_len << WQ_LEN_SHIFT) | > (rq->engine->guc_id << WQ_TARGET_SHIFT) | > WQ_NO_WCFLUSH_WAIT; > > @@ -532,14 +541,10 @@ static int guc_add_workqueue_item(struct i915_guc_client *gc, > wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, > rq->engine); > > - /* The GuC firmware wants the tail index in QWords, not bytes */ > - tail = rq->ringbuf->tail >> 3; > wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; > - wqi->fence_id = 0; /*XXX: what fence to be here */ > + wqi->fence_id = rq->seqno; > > kunmap_atomic(base); > - > - return 0; > } > > /** > @@ -566,26 +571,20 @@ int i915_guc_submit(struct drm_i915_gem_request *rq) > unsigned int engine_id = rq->engine->guc_id; > struct intel_guc *guc = &rq->i915->guc; > struct i915_guc_client *client = guc->execbuf_client; > - int q_ret, b_ret; > + int b_ret; > > - q_ret = guc_add_workqueue_item(client, rq); > - if (q_ret == 0) > - b_ret = guc_ring_doorbell(client); > + guc_add_workqueue_item(client, rq); > + b_ret = guc_ring_doorbell(client); > > client->submissions[engine_id] += 1; > - if (q_ret) { > - client->q_fail += 1; > - client->retcode = q_ret; > - } else if (b_ret) { > + client->retcode = b_ret; > + if (b_ret) > client->b_fail += 1; > - client->retcode = q_ret = b_ret; > - } else { > - client->retcode = 0; > - } > + > guc->submissions[engine_id] += 1; > guc->last_seqno[engine_id] = rq->seqno; > > - return q_ret; > + return b_ret; > } > > /* > diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h > index 436f2d6..10e1d5e 100644 > --- a/drivers/gpu/drm/i915/intel_guc.h > +++ b/drivers/gpu/drm/i915/intel_guc.h > @@ -74,7 +74,7 @@ struct i915_guc_client { > /* GuC submission statistics & status */ > uint64_t submissions[GUC_MAX_ENGINES_NUM]; > uint32_t no_wq_space; /* Space pre-check failed */ > - uint32_t q_fail; /* Failed to queue (MBZ) */ > + uint32_t q_fail; /* No longer used */ > uint32_t b_fail; /* Doorbell failure (MBZ) */ > int retcode; /* Result of last guc_submit() */ > }; > diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h > index 2de57ff..944786d 100644 > --- a/drivers/gpu/drm/i915/intel_guc_fwif.h > +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h > @@ -71,7 +71,8 @@ > #define WQ_WORKLOAD_TOUCH (2 << WQ_WORKLOAD_SHIFT) > > #define WQ_RING_TAIL_SHIFT 20 > -#define WQ_RING_TAIL_MASK (0x7FF << WQ_RING_TAIL_SHIFT) > +#define WQ_RING_TAIL_MAX 0x7FF /* 2^11 QWords */ > +#define WQ_RING_TAIL_MASK (WQ_RING_TAIL_MAX << WQ_RING_TAIL_SHIFT) > > #define GUC_DOORBELL_ENABLED 1 > #define GUC_DOORBELL_DISABLED 0 > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 21d3603..79897b0 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -487,23 +487,28 @@ int i915_guc_wq_check_space(struct drm_i915_gem_request *request) return -EAGAIN; } -static int guc_add_workqueue_item(struct i915_guc_client *gc, - struct drm_i915_gem_request *rq) +static void guc_add_workqueue_item(struct i915_guc_client *gc, + struct drm_i915_gem_request *rq) { + /* wqi_len is in DWords, and does not include the one-word header */ + const size_t wqi_size = sizeof(struct guc_wq_item); + const u32 wqi_len = wqi_size/sizeof(u32) - 1; struct guc_process_desc *desc; struct guc_wq_item *wqi; void *base; - u32 tail, wq_len, wq_off, space; + u32 freespace, tail, wq_off, wq_page; desc = gc->client_base + gc->proc_desc_offset; - space = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size); - if (WARN_ON(space < sizeof(struct guc_wq_item))) - return -ENOSPC; /* shouldn't happen */ - /* postincrement WQ tail for next time */ - wq_off = gc->wq_tail; - gc->wq_tail += sizeof(struct guc_wq_item); - gc->wq_tail &= gc->wq_size - 1; + /* Free space is guaranteed, see i915_guc_wq_check_space() above */ + freespace = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size); + GEM_BUG_ON(freespace < wqi_size); + + /* The GuC firmware wants the tail index in QWords, not bytes */ + tail = rq->tail; + GEM_BUG_ON(tail & 7); + tail >>= 3; + GEM_BUG_ON(tail > WQ_RING_TAIL_MAX); /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we * should not have the case where structure wqi is across page, neither @@ -512,19 +517,23 @@ static int guc_add_workqueue_item(struct i915_guc_client *gc, * XXX: if not the case, we need save data to a temp wqi and copy it to * workqueue buffer dw by dw. */ - WARN_ON(sizeof(struct guc_wq_item) != 16); - WARN_ON(wq_off & 3); + BUILD_BUG_ON(wqi_size != 16); + + /* postincrement WQ tail for next time */ + wq_off = gc->wq_tail; + gc->wq_tail += wqi_size; + gc->wq_tail &= gc->wq_size - 1; + GEM_BUG_ON(wq_off & (wqi_size - 1)); - /* wq starts from the page after doorbell / process_desc */ - base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, - (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT)); + /* WQ starts from the page after doorbell / process_desc */ + wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT; wq_off &= PAGE_SIZE - 1; + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, wq_page)); wqi = (struct guc_wq_item *)((char *)base + wq_off); - /* len does not include the header */ - wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1; + /* Now fill in the 4-word work queue item */ wqi->header = WQ_TYPE_INORDER | - (wq_len << WQ_LEN_SHIFT) | + (wqi_len << WQ_LEN_SHIFT) | (rq->engine->guc_id << WQ_TARGET_SHIFT) | WQ_NO_WCFLUSH_WAIT; @@ -532,14 +541,10 @@ static int guc_add_workqueue_item(struct i915_guc_client *gc, wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, rq->engine); - /* The GuC firmware wants the tail index in QWords, not bytes */ - tail = rq->ringbuf->tail >> 3; wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; - wqi->fence_id = 0; /*XXX: what fence to be here */ + wqi->fence_id = rq->seqno; kunmap_atomic(base); - - return 0; } /** @@ -566,26 +571,20 @@ int i915_guc_submit(struct drm_i915_gem_request *rq) unsigned int engine_id = rq->engine->guc_id; struct intel_guc *guc = &rq->i915->guc; struct i915_guc_client *client = guc->execbuf_client; - int q_ret, b_ret; + int b_ret; - q_ret = guc_add_workqueue_item(client, rq); - if (q_ret == 0) - b_ret = guc_ring_doorbell(client); + guc_add_workqueue_item(client, rq); + b_ret = guc_ring_doorbell(client); client->submissions[engine_id] += 1; - if (q_ret) { - client->q_fail += 1; - client->retcode = q_ret; - } else if (b_ret) { + client->retcode = b_ret; + if (b_ret) client->b_fail += 1; - client->retcode = q_ret = b_ret; - } else { - client->retcode = 0; - } + guc->submissions[engine_id] += 1; guc->last_seqno[engine_id] = rq->seqno; - return q_ret; + return b_ret; } /* diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 436f2d6..10e1d5e 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -74,7 +74,7 @@ struct i915_guc_client { /* GuC submission statistics & status */ uint64_t submissions[GUC_MAX_ENGINES_NUM]; uint32_t no_wq_space; /* Space pre-check failed */ - uint32_t q_fail; /* Failed to queue (MBZ) */ + uint32_t q_fail; /* No longer used */ uint32_t b_fail; /* Doorbell failure (MBZ) */ int retcode; /* Result of last guc_submit() */ }; diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 2de57ff..944786d 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -71,7 +71,8 @@ #define WQ_WORKLOAD_TOUCH (2 << WQ_WORKLOAD_SHIFT) #define WQ_RING_TAIL_SHIFT 20 -#define WQ_RING_TAIL_MASK (0x7FF << WQ_RING_TAIL_SHIFT) +#define WQ_RING_TAIL_MAX 0x7FF /* 2^11 QWords */ +#define WQ_RING_TAIL_MASK (WQ_RING_TAIL_MAX << WQ_RING_TAIL_SHIFT) #define GUC_DOORBELL_ENABLED 1 #define GUC_DOORBELL_DISABLED 0
Mostly little optimisations and future-proofing against code breakage. For instance, if the driver is correctly following the submission protocol, the "out of space" condition is impossible, so the previous runtime WARN_ON() is promoted to a GEM_BUG_ON() for a more dramatic effect in development and less impact in end-user systems. Similarly we can make alignment checking more stringent and replace other WARN_ON() conditions that don't relate to the runtime hardware state with either BUILD_BUG_ON() for compile-time-detectable issues, or GEM_BUG_ON() for logical "can't happen" errors. With those changes, we can convert it to void, as suggested by Chris Wilson, and update the calling code appropriately. v3: Note that we're now putting the request seqno in the "fence_id" field of each GuC-work-item, in case it turns up somewhere useful (Tvrtko). Signed-off-by: Dave Gordon <david.s.gordon@intel.com> --- drivers/gpu/drm/i915/i915_guc_submission.c | 71 +++++++++++++++--------------- drivers/gpu/drm/i915/intel_guc.h | 2 +- drivers/gpu/drm/i915/intel_guc_fwif.h | 3 +- 3 files changed, 38 insertions(+), 38 deletions(-)