Message ID | 1436466554-24806-10-git-send-email-david.s.gordon@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Jul 09, 2015 at 07:29:10PM +0100, Dave Gordon wrote: > A GuC client has its own doorbell and workqueue. It maintains the > doorbell cache line, process description object and work queue item. > > A default guc_client is created for the i915 driver to use for > normal-priority in-order submission. > > Note that the created client is not yet ready for use; doorbell > allocation will fail as we haven't yet linked the GuC's context > descriptor to the default contexts for each ring (see later patch). > > v2: > Defer adding structure members until needed [Chris Wilson] > Rationalise type declarations [Chris Wilson] > > v4: > Rebased > > Issue: VIZ-4884 > Signed-off-by: Alex Dai <yu.dai@intel.com> > Signed-off-by: Dave Gordon <david.s.gordon@intel.com> [TOR:] I had some non-critical questions below. Reviewed-by: Tom O'Rourke <Tom.O'Rourke@intel.com> > --- > drivers/gpu/drm/i915/i915_guc_submission.c | 649 +++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_guc.h | 42 ++ > drivers/gpu/drm/i915/intel_guc_loader.c | 12 + > 3 files changed, 703 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c > index e9d46d6..25d8807 100644 > --- a/drivers/gpu/drm/i915/i915_guc_submission.c > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c > @@ -27,6 +27,512 @@ > #include "intel_guc.h" > > /** > + * DOC: GuC Client > + * > + * i915_guc_client: > + * We use the term client to avoid confusion with contexts. A i915_guc_client is > + * equivalent to GuC object guc_context_desc. This context descriptor is > + * allocated from a pool of 1024 entries. Kernel driver will allocate doorbell > + * and workqueue for it. Also the process descriptor (guc_process_desc), which > + * is mapped to client space. So the client can write Work Item then ring the > + * doorbell. > + * > + * To simplify the implementation, we allocate one gem object that contains all > + * pages for doorbell, process descriptor and workqueue. > + * > + * The Scratch registers: > + * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes > + * a value to the action register (SOFT_SCRATCH_0) along with any data. It then > + * triggers an interrupt on the GuC via another register write (0xC4C8). > + * Firmware writes a success/fail code back to the action register after > + * processes the request. The kernel driver polls waiting for this update and > + * then proceeds. > + * See host2guc_action() > + * > + * Doorbells: > + * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW) > + * mapped into process space. > + * > + * Work Items: > + * There are several types of work items that the host may place into a > + * workqueue, each with its own requirements and limitations. Currently only > + * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which > + * represents in-order queue. The kernel driver packs ring tail pointer and an > + * ELSP context descriptor dword into Work Item. > + * See guc_add_workqueue_item() > + * > + */ > + > +/* > + * Read GuC command/status register (SOFT_SCRATCH_0) > + * Return true if it contains a response rather than a command > + */ > +static inline bool host2guc_action_response(struct drm_i915_private *dev_priv, > + u32 *status) > +{ > + u32 val = I915_READ(SOFT_SCRATCH(0)); > + *status = val; > + return GUC2HOST_IS_RESPONSE(val); > +} > + > +static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len) > +{ > + struct drm_i915_private *dev_priv = guc_to_i915(guc); > + u32 status; > + int i; > + int ret; > + > + if (WARN_ON(len < 1 || len > 15)) > + return -EINVAL; > + [TOR:] Would it be good for host2guc_action to take a forcewake? There are several writes and polling reads for completion. Taking a forcewake could avoid surplus forcewakes for each register access. > + spin_lock(&dev_priv->guc.host2guc_lock); > + > + dev_priv->guc.action_count += 1; > + dev_priv->guc.action_cmd = data[0]; > + > + for (i = 0; i < len; i++) > + I915_WRITE(SOFT_SCRATCH(i), data[i]); > + > + POSTING_READ(SOFT_SCRATCH(i - 1)); > + > + I915_WRITE(HOST2GUC_INTERRUPT, HOST2GUC_TRIGGER); > + > + ret = wait_for_atomic(host2guc_action_response(dev_priv, &status), 10); [TOR:] Why 10? > + if (status != GUC2HOST_STATUS_SUCCESS) { > + /* either GuC doesn't respond, which is a TIMEOUT, > + * or a failure code is returned. */ > + if (ret != -ETIMEDOUT) > + ret = -EIO; > + > + DRM_ERROR("GUC: host2guc action 0x%X failed. ret=%d " > + "status=0x%08X response=0x%08X\n", > + data[0], ret, status, > + I915_READ(SOFT_SCRATCH(15))); > + > + dev_priv->guc.action_fail += 1; > + dev_priv->guc.action_err = ret; > + } > + dev_priv->guc.action_status = status; > + > + spin_unlock(&dev_priv->guc.host2guc_lock); > + > + return ret; > +} > + > +/* > + * Tell the GuC to allocate or deallocate a specific doorbell > + */ > + > +static int host2guc_allocate_doorbell(struct intel_guc *guc, > + struct i915_guc_client *client) > +{ > + u32 data[2]; > + > + data[0] = HOST2GUC_ACTION_ALLOCATE_DOORBELL; > + data[1] = client->ctx_index; > + > + return host2guc_action(guc, data, 2); > +} > + > +static int host2guc_release_doorbell(struct intel_guc *guc, > + struct i915_guc_client *client) > +{ > + u32 data[2]; > + > + data[0] = HOST2GUC_ACTION_DEALLOCATE_DOORBELL; > + data[1] = client->ctx_index; > + > + return host2guc_action(guc, data, 2); > +} > + > +/* > + * Initialise, update, or clear doorbell data shared with the GuC > + * > + * These functions modify shared data and so need access to the mapped > + * client object which contains the page being used for the doorbell > + */ > + > +static void guc_init_doorbell(struct intel_guc *guc, > + struct i915_guc_client *client) > +{ > + struct guc_doorbell_info *doorbell; > + void *base; > + > + base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0)); > + doorbell = base + client->doorbell_offset; > + > + doorbell->db_status = 1; > + doorbell->cookie = 0; > + > + kunmap_atomic(base); > +} > + > +static int guc_ring_doorbell(struct i915_guc_client *gc) > +{ > + struct guc_process_desc *desc; > + union guc_doorbell_qw db_cmp, db_exc, db_ret; > + union guc_doorbell_qw *db; > + void *base; > + int attempt = 2, ret = -EAGAIN; > + > + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0)); > + desc = base + gc->proc_desc_offset; > + > + /* Update the tail so it is visible to GuC */ > + desc->tail = gc->wq_tail; > + > + /* current cookie */ > + db_cmp.db_status = GUC_DOORBELL_ENABLED; > + db_cmp.cookie = gc->cookie; > + > + /* cookie to be updated */ > + db_exc.db_status = GUC_DOORBELL_ENABLED; > + db_exc.cookie = gc->cookie + 1; > + if (db_exc.cookie == 0) > + db_exc.cookie = 1; > + > + /* pointer of current doorbell cacheline */ > + db = base + gc->doorbell_offset; > + > + while (attempt--) { > + /* lets ring the doorbell */ > + db_ret.value_qw = atomic64_cmpxchg((atomic64_t *)db, > + db_cmp.value_qw, db_exc.value_qw); > + > + /* if the exchange was successfully executed */ > + if (db_ret.value_qw == db_cmp.value_qw) { > + /* db was successfully rung */ > + gc->cookie = db_exc.cookie; > + ret = 0; > + break; > + } > + > + /* XXX: doorbell was lost and need to acquire it again */ > + if (db_ret.db_status == GUC_DOORBELL_DISABLED) > + break; > + > + DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n", > + db_cmp.cookie, db_ret.cookie); > + > + /* update the cookie to newly read cookie from GuC */ > + db_cmp.cookie = db_ret.cookie; > + db_exc.cookie = db_ret.cookie + 1; > + if (db_exc.cookie == 0) > + db_exc.cookie = 1; > + } > + > + kunmap_atomic(base); > + return ret; > +} > + > +static void guc_disable_doorbell(struct intel_guc *guc, > + struct i915_guc_client *client) > +{ > + struct drm_i915_private *dev_priv = guc_to_i915(guc); > + struct guc_doorbell_info *doorbell; > + void *base; > + int drbreg = GEN8_DRBREGL(client->doorbell_id); > + int value; > + > + base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0)); > + doorbell = base + client->doorbell_offset; > + > + doorbell->db_status = 0; > + > + kunmap_atomic(base); > + > + I915_WRITE(drbreg, I915_READ(drbreg) & ~GEN8_DRB_VALID); > + > + value = I915_READ(drbreg); > + WARN_ON((value & GEN8_DRB_VALID) != 0); > + > + I915_WRITE(GEN8_DRBREGU(client->doorbell_id), 0); > + I915_WRITE(drbreg, 0); > + > + /* XXX: wait for any interrupts */ > + /* XXX: wait for workqueue to drain */ > +} > + > +/* > + * Select, assign and relase doorbell cachelines > + * > + * These functions track which doorbell cachelines are in use. > + * The data they manipulate is protected by the host2guc lock. > + */ > + > +static uint32_t select_doorbell_cacheline(struct intel_guc *guc) > +{ > + const uint32_t cacheline_size = boot_cpu_data.x86_clflush_size; > + uint32_t offset; > + > + spin_lock(&guc->host2guc_lock); > + > + /* Doorbell uses a single cache line within a page */ > + offset = guc->db_cacheline & PAGE_MASK; > + > + /* Moving to next cache line to reduce contention */ > + guc->db_cacheline += cacheline_size; > + > + spin_unlock(&guc->host2guc_lock); > + > + return offset; > +} > + > +static uint16_t assign_doorbell(struct intel_guc *guc, uint32_t priority) > +{ > + /* The bitmap is split into two halves - high and normal priority. */ > + const uint16_t half = GUC_MAX_DOORBELLS / 2; > + const uint16_t start = (priority <= GUC_CTX_PRIORITY_HIGH) ? half : 0; > + const uint16_t end = start + half; > + uint16_t id; > + > + spin_lock(&guc->host2guc_lock); > + id = find_next_zero_bit(guc->doorbell_bitmap, end, start); > + if (id == end) > + id = GUC_INVALID_DOORBELL_ID; > + else > + bitmap_set(guc->doorbell_bitmap, id, 1); > + spin_unlock(&guc->host2guc_lock); > + > + return id; > +} > + > +static void release_doorbell(struct intel_guc *guc, uint16_t id) > +{ > + spin_lock(&guc->host2guc_lock); > + bitmap_clear(guc->doorbell_bitmap, id, 1); > + spin_unlock(&guc->host2guc_lock); > +} > + > +/* > + * Initialise the process descriptor shared with the GuC firmware. > + */ > +static void guc_init_proc_desc(struct intel_guc *guc, > + struct i915_guc_client *client) > +{ > + struct guc_process_desc *desc; > + void *base; > + > + base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0)); > + desc = base + client->proc_desc_offset; > + > + memset(desc, 0, sizeof(*desc)); > + > + /* > + * XXX: pDoorbell and WQVBaseAddress are pointers in process address > + * space for ring3 clients (set them as in mmap_ioctl) or kernel > + * space for kernel clients (map on demand instead? May make debug > + * easier to have it mapped). > + */ > + desc->wq_base_addr = 0; > + desc->db_base_addr = 0; > + > + desc->context_id = client->ctx_index; > + desc->wq_size_bytes = client->wq_size; > + desc->wq_status = WQ_STATUS_ACTIVE; > + desc->priority = client->priority; > + > + kunmap_atomic(base); > +} > + > +/* > + * Initialise/clear the context descriptor shared with the GuC firmware. > + * > + * This descriptor tells the GuC where (in GGTT space) to find the important > + * data structures relating to this client (doorbell, process descriptor, > + * write queue, etc). > + */ > + > +static void guc_init_ctx_desc(struct intel_guc *guc, > + struct i915_guc_client *client) > +{ > + struct guc_context_desc desc; > + struct sg_table *sg; > + > + memset(&desc, 0, sizeof(desc)); > + > + desc.attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL; > + desc.context_id = client->ctx_index; > + desc.priority = client->priority; > + desc.engines_used = (1 << RCS) | (1 << VCS) | (1 << BCS) | > + (1 << VECS) | (1 << VCS2); /* all engines */ > + desc.db_id = client->doorbell_id; > + > + /* > + * The CPU address is only needed at certain points, so kmap_atomic on > + * demand instead of storing it in the ctx descriptor. > + * XXX: May make debug easier to have it mapped > + */ > + desc.db_trigger_cpu = 0; > + desc.db_trigger_uk = client->doorbell_offset + > + i915_gem_obj_ggtt_offset(client->client_obj); > + desc.db_trigger_phy = client->doorbell_offset + > + sg_dma_address(client->client_obj->pages->sgl); > + > + desc.process_desc = client->proc_desc_offset + > + i915_gem_obj_ggtt_offset(client->client_obj); > + > + desc.wq_addr = client->wq_offset + > + i915_gem_obj_ggtt_offset(client->client_obj); > + > + desc.wq_size = client->wq_size; > + > + /* > + * XXX: Take LRCs from an existing intel_context if this is not an > + * IsKMDCreatedContext client > + */ > + desc.desc_private = (uintptr_t)client; > + > + /* Pool context is pinned already */ > + sg = guc->ctx_pool_obj->pages; > + sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), > + sizeof(desc) * client->ctx_index); > +} > + > +static void guc_fini_ctx_desc(struct intel_guc *guc, > + struct i915_guc_client *client) > +{ > + struct guc_context_desc desc; > + struct sg_table *sg; > + > + memset(&desc, 0, sizeof(desc)); > + > + sg = guc->ctx_pool_obj->pages; > + sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), > + sizeof(desc) * client->ctx_index); > +} > + > +/* Get valid workqueue item and return it back to offset */ > +static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset) > +{ > + struct guc_process_desc *desc; > + void *base; > + u32 size = sizeof(struct guc_wq_item); > + int ret = 0, timeout_counter = 200; > + unsigned long flags; > + > + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0)); > + desc = base + gc->proc_desc_offset; > + > + while (timeout_counter-- > 0) { > + spin_lock_irqsave(&gc->wq_lock, flags); > + > + ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail, desc->head, > + gc->wq_size) >= size, 1); > + > + if (!ret) { > + *offset = gc->wq_tail; > + > + /* advance the tail for next workqueue item */ > + gc->wq_tail += size; > + gc->wq_tail &= gc->wq_size - 1; > + > + /* this will break the loop */ > + timeout_counter = 0; > + } > + > + spin_unlock_irqrestore(&gc->wq_lock, flags); > + }; > + > + kunmap_atomic(base); > + > + return ret; > +} > + > +static int guc_add_workqueue_item(struct i915_guc_client *gc, > + struct drm_i915_gem_request *rq) > +{ > + enum intel_ring_id ring_id = rq->ring->id; > + struct guc_wq_item *wqi; > + void *base; > + u32 tail, wq_len, wq_off = 0; > + int ret; > + > + ret = guc_get_workqueue_space(gc, &wq_off); > + if (ret) > + return ret; > + > + /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we > + * should not have the case where structure wqi is across page, neither > + * wrapped to the beginning. This simplifies the implementation below. > + * > + * XXX: if not the case, we need save data to a temp wqi and copy it to > + * workqueue buffer dw by dw. > + */ > + WARN_ON(sizeof(struct guc_wq_item) != 16); > + WARN_ON(wq_off & 3); > + > + /* wq starts from the page after doorbell / process_desc */ > + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, > + (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT)); > + wq_off &= PAGE_SIZE - 1; > + wqi = (struct guc_wq_item *)((char *)base + wq_off); > + > + /* len does not include the header */ > + wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1; > + wqi->header = WQ_TYPE_INORDER | > + (wq_len << WQ_LEN_SHIFT) | > + (ring_id << WQ_TARGET_SHIFT) | > + WQ_NO_WCFLUSH_WAIT; > + > + /* The GuC wants only the low-order word of the context descriptor */ > + wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, rq->ring); > + > + /* The GuC firmware wants the tail index in QWords, not bytes */ > + tail = rq->ringbuf->tail >> 3; > + wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; > + wqi->fence_id = 0; /*XXX: what fence to be here */ > + > + kunmap_atomic(base); > + > + return 0; > +} > + > +/** > + * i915_guc_submit() - Submit commands through GuC > + * @client: the guc client where commands will go through > + * @ctx: LRC where commands come from > + * @ring: HW engine that will excute the commands > + * > + * Return: 0 if succeed > + */ > +int i915_guc_submit(struct i915_guc_client *client, > + struct drm_i915_gem_request *rq) > +{ > + unsigned long flags; > + int q_ret, b_ret; > + > + /* Need this because of the deferred pin ctx and ring */ > + /* Shall we move this right after ring is pinned? */ > + intel_lr_context_update(rq); > + > + q_ret = guc_add_workqueue_item(client, rq); > + if (q_ret == 0) > + b_ret = guc_ring_doorbell(client); > + > + spin_lock_irqsave(&client->wq_lock, flags); > + client->submissions += 1; > + if (q_ret) { > + client->q_fail += 1; > + client->retcode = q_ret; > + } else if (b_ret) { > + client->b_fail += 1; > + client->retcode = q_ret = b_ret; > + } else { > + client->retcode = 0; > + } > + spin_unlock_irqrestore(&client->wq_lock, flags); > + > + return q_ret; > +} > + > +/* > + * Everything below here is concerned with setup & teardown, and is > + * therefore not part of the somewhat time-critical batch-submission > + * path of i915_guc_submit() above. > + */ > + > +/** > * gem_allocate_guc_obj() - Allocate gem object for GuC usage > * @dev: drm device > * @size: size of object > @@ -75,6 +581,121 @@ static void gem_release_guc_obj(struct drm_i915_gem_object *obj) > drm_gem_object_unreference(&obj->base); > } > > +static void guc_client_free(struct drm_device *dev, > + struct i915_guc_client *client) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_guc *guc = &dev_priv->guc; > + > + if (!client) > + return; > + > + if (client->doorbell_id != GUC_INVALID_DOORBELL_ID) { > + /* > + * First disable the doorbell, then tell the GuC we've > + * finished with it, finally deallocate it in our bitmap > + */ > + guc_disable_doorbell(guc, client); > + host2guc_release_doorbell(guc, client); > + release_doorbell(guc, client->doorbell_id); > + } > + > + /* > + * XXX: wait for any outstanding submissions before freeing memory. > + * Be sure to drop any locks > + */ > + > + gem_release_guc_obj(client->client_obj); > + > + if (client->ctx_index != GUC_INVALID_CTX_ID) { > + guc_fini_ctx_desc(guc, client); > + ida_simple_remove(&guc->ctx_ids, client->ctx_index); > + } > + > + kfree(client); > +} > + > +/** > + * guc_client_alloc() - Allocate an i915_guc_client > + * @dev: drm device > + * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW > + * The kernel client to replace ExecList submission is created with > + * NORMAL priority. Priority of a client for scheduler can be HIGH, > + * while a preemption context can use CRITICAL. > + * > + * Return: An i915_guc_client object if success. > + */ > +static struct i915_guc_client *guc_client_alloc(struct drm_device *dev, > + uint32_t priority) > +{ > + struct i915_guc_client *client; > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_guc *guc = &dev_priv->guc; > + struct drm_i915_gem_object *obj; > + > + client = kzalloc(sizeof(*client), GFP_KERNEL); > + if (!client) > + return NULL; > + > + client->doorbell_id = GUC_INVALID_DOORBELL_ID; > + client->priority = priority; > + > + client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0, > + GUC_MAX_GPU_CONTEXTS, GFP_KERNEL); > + if (client->ctx_index >= GUC_MAX_GPU_CONTEXTS) { > + client->ctx_index = GUC_INVALID_CTX_ID; > + goto err; > + } > + > + /* The first page is doorbell/proc_desc. Two followed pages are wq. */ > + obj = gem_allocate_guc_obj(dev, GUC_DB_SIZE + GUC_WQ_SIZE); > + if (!obj) > + goto err; > + > + client->client_obj = obj; > + client->wq_offset = GUC_DB_SIZE; > + client->wq_size = GUC_WQ_SIZE; > + spin_lock_init(&client->wq_lock); > + > + client->doorbell_offset = select_doorbell_cacheline(guc); > + > + /* > + * Since the doorbell only requires a single cacheline, we can save > + * space by putting the application process descriptor in the same > + * page. Use the half of the page that doesn't include the doorbell. > + */ > + if (client->doorbell_offset >= (GUC_DB_SIZE / 2)) > + client->proc_desc_offset = 0; > + else > + client->proc_desc_offset = (GUC_DB_SIZE / 2); > + > + client->doorbell_id = assign_doorbell(guc, client->priority); > + if (client->doorbell_id == GUC_INVALID_DOORBELL_ID) > + /* XXX: evict a doorbell instead */ > + goto err; > + > + guc_init_proc_desc(guc, client); > + guc_init_ctx_desc(guc, client); > + guc_init_doorbell(guc, client); > + > + /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */ > + I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); > + > + /* XXX: Any cache flushes needed? General domain mgmt calls? */ > + > + if (host2guc_allocate_doorbell(guc, client)) > + goto err; > + > + DRM_DEBUG_DRIVER("new priority %u client %p: ctx_index %u db_id %u\n", > + priority, client, client->ctx_index, client->doorbell_id); > + > + return client; > + > +err: > + guc_client_free(dev, client); > + return NULL; > +} > + > static void guc_create_log(struct intel_guc *guc) > { > struct drm_i915_private *dev_priv = guc_to_i915(guc); > @@ -138,6 +759,8 @@ int i915_guc_submission_init(struct drm_device *dev) > if (!guc->ctx_pool_obj) > return -ENOMEM; > > + spin_lock_init(&dev_priv->guc.host2guc_lock); > + > ida_init(&guc->ctx_ids); > > guc_create_log(guc); > @@ -145,6 +768,32 @@ int i915_guc_submission_init(struct drm_device *dev) > return 0; > } > > +int i915_guc_submission_enable(struct drm_device *dev) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_guc *guc = &dev_priv->guc; > + struct i915_guc_client *client; > + > + /* client for execbuf submission */ > + client = guc_client_alloc(dev, GUC_CTX_PRIORITY_NORMAL); > + if (!client) { > + DRM_ERROR("Failed to create execbuf guc_client\n"); > + return -ENOMEM; > + } > + > + guc->execbuf_client = client; > + return 0; > +} > + > +void i915_guc_submission_disable(struct drm_device *dev) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_guc *guc = &dev_priv->guc; > + > + guc_client_free(dev, guc->execbuf_client); > + guc->execbuf_client = NULL; > +} > + > void i915_guc_submission_fini(struct drm_device *dev) > { > struct drm_i915_private *dev_priv = dev->dev_private; > diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h > index 5b51b05..d249326 100644 > --- a/drivers/gpu/drm/i915/intel_guc.h > +++ b/drivers/gpu/drm/i915/intel_guc.h > @@ -27,6 +27,30 @@ > #include "intel_guc_fwif.h" > #include "i915_guc_reg.h" > > +struct i915_guc_client { > + struct drm_i915_gem_object *client_obj; > + uint32_t priority; > + uint32_t ctx_index; > + > + uint32_t proc_desc_offset; > + uint32_t doorbell_offset; > + uint32_t cookie; > + uint16_t doorbell_id; > + uint16_t padding; /* Maintain alignment */ > + > + uint32_t wq_offset; > + uint32_t wq_size; > + > + spinlock_t wq_lock; /* Protects all data below */ > + uint32_t wq_tail; > + > + /* GuC submission statistics & status */ > + uint64_t submissions; > + uint32_t q_fail; > + uint32_t b_fail; > + int retcode; > +}; > + > enum intel_guc_fw_status { > GUC_FIRMWARE_FAIL = -1, > GUC_FIRMWARE_NONE = 0, > @@ -60,6 +84,20 @@ struct intel_guc { > > struct drm_i915_gem_object *ctx_pool_obj; > struct ida ctx_ids; > + > + struct i915_guc_client *execbuf_client; > + > + spinlock_t host2guc_lock; /* Protects all data below */ > + > + DECLARE_BITMAP(doorbell_bitmap, GUC_MAX_DOORBELLS); > + int db_cacheline; > + > + /* Action status & statistics */ > + uint64_t action_count; /* Total commands issued */ > + uint32_t action_cmd; /* Last command word */ > + uint32_t action_status; /* Last return status */ > + uint32_t action_fail; /* Total number of failures */ > + int32_t action_err; /* Last error code */ > }; > > /* intel_guc_loader.c */ > @@ -70,6 +108,10 @@ extern const char *intel_guc_fw_status_repr(enum intel_guc_fw_status status); > > /* i915_guc_submission.c */ > int i915_guc_submission_init(struct drm_device *dev); > +int i915_guc_submission_enable(struct drm_device *dev); > +int i915_guc_submit(struct i915_guc_client *client, > + struct drm_i915_gem_request *rq); > +void i915_guc_submission_disable(struct drm_device *dev); > void i915_guc_submission_fini(struct drm_device *dev); > > #endif > diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c > index e5d7136..25ba29f 100644 > --- a/drivers/gpu/drm/i915/intel_guc_loader.c > +++ b/drivers/gpu/drm/i915/intel_guc_loader.c > @@ -427,6 +427,8 @@ int intel_guc_ucode_load(struct drm_device *dev) > intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status), > intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); > > + i915_guc_submission_disable(dev); > + > if (guc_fw->guc_fw_fetch_status == GUC_FIRMWARE_NONE) > return 0; > > @@ -479,12 +481,20 @@ int intel_guc_ucode_load(struct drm_device *dev) > intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status), > intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); > > + if (i915.enable_guc_submission) { > + err = i915_guc_submission_enable(dev); > + if (err) > + goto fail; > + } > + > return 0; > > fail: > if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_PENDING) > guc_fw->guc_fw_load_status = GUC_FIRMWARE_FAIL; > > + i915_guc_submission_disable(dev); > + > DRM_ERROR("Failed to initialize GuC, error %d\n", err); > > return err; > @@ -547,6 +557,8 @@ void intel_guc_ucode_fini(struct drm_device *dev) > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; > > + i915_guc_submission_fini(dev); > + > if (guc_fw->guc_fw_obj) > drm_gem_object_unreference(&guc_fw->guc_fw_obj->base); > guc_fw->guc_fw_obj = NULL; > -- > 1.9.1 >
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index e9d46d6..25d8807 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -27,6 +27,512 @@ #include "intel_guc.h" /** + * DOC: GuC Client + * + * i915_guc_client: + * We use the term client to avoid confusion with contexts. A i915_guc_client is + * equivalent to GuC object guc_context_desc. This context descriptor is + * allocated from a pool of 1024 entries. Kernel driver will allocate doorbell + * and workqueue for it. Also the process descriptor (guc_process_desc), which + * is mapped to client space. So the client can write Work Item then ring the + * doorbell. + * + * To simplify the implementation, we allocate one gem object that contains all + * pages for doorbell, process descriptor and workqueue. + * + * The Scratch registers: + * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes + * a value to the action register (SOFT_SCRATCH_0) along with any data. It then + * triggers an interrupt on the GuC via another register write (0xC4C8). + * Firmware writes a success/fail code back to the action register after + * processes the request. The kernel driver polls waiting for this update and + * then proceeds. + * See host2guc_action() + * + * Doorbells: + * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW) + * mapped into process space. + * + * Work Items: + * There are several types of work items that the host may place into a + * workqueue, each with its own requirements and limitations. Currently only + * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which + * represents in-order queue. The kernel driver packs ring tail pointer and an + * ELSP context descriptor dword into Work Item. + * See guc_add_workqueue_item() + * + */ + +/* + * Read GuC command/status register (SOFT_SCRATCH_0) + * Return true if it contains a response rather than a command + */ +static inline bool host2guc_action_response(struct drm_i915_private *dev_priv, + u32 *status) +{ + u32 val = I915_READ(SOFT_SCRATCH(0)); + *status = val; + return GUC2HOST_IS_RESPONSE(val); +} + +static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 status; + int i; + int ret; + + if (WARN_ON(len < 1 || len > 15)) + return -EINVAL; + + spin_lock(&dev_priv->guc.host2guc_lock); + + dev_priv->guc.action_count += 1; + dev_priv->guc.action_cmd = data[0]; + + for (i = 0; i < len; i++) + I915_WRITE(SOFT_SCRATCH(i), data[i]); + + POSTING_READ(SOFT_SCRATCH(i - 1)); + + I915_WRITE(HOST2GUC_INTERRUPT, HOST2GUC_TRIGGER); + + ret = wait_for_atomic(host2guc_action_response(dev_priv, &status), 10); + if (status != GUC2HOST_STATUS_SUCCESS) { + /* either GuC doesn't respond, which is a TIMEOUT, + * or a failure code is returned. */ + if (ret != -ETIMEDOUT) + ret = -EIO; + + DRM_ERROR("GUC: host2guc action 0x%X failed. ret=%d " + "status=0x%08X response=0x%08X\n", + data[0], ret, status, + I915_READ(SOFT_SCRATCH(15))); + + dev_priv->guc.action_fail += 1; + dev_priv->guc.action_err = ret; + } + dev_priv->guc.action_status = status; + + spin_unlock(&dev_priv->guc.host2guc_lock); + + return ret; +} + +/* + * Tell the GuC to allocate or deallocate a specific doorbell + */ + +static int host2guc_allocate_doorbell(struct intel_guc *guc, + struct i915_guc_client *client) +{ + u32 data[2]; + + data[0] = HOST2GUC_ACTION_ALLOCATE_DOORBELL; + data[1] = client->ctx_index; + + return host2guc_action(guc, data, 2); +} + +static int host2guc_release_doorbell(struct intel_guc *guc, + struct i915_guc_client *client) +{ + u32 data[2]; + + data[0] = HOST2GUC_ACTION_DEALLOCATE_DOORBELL; + data[1] = client->ctx_index; + + return host2guc_action(guc, data, 2); +} + +/* + * Initialise, update, or clear doorbell data shared with the GuC + * + * These functions modify shared data and so need access to the mapped + * client object which contains the page being used for the doorbell + */ + +static void guc_init_doorbell(struct intel_guc *guc, + struct i915_guc_client *client) +{ + struct guc_doorbell_info *doorbell; + void *base; + + base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0)); + doorbell = base + client->doorbell_offset; + + doorbell->db_status = 1; + doorbell->cookie = 0; + + kunmap_atomic(base); +} + +static int guc_ring_doorbell(struct i915_guc_client *gc) +{ + struct guc_process_desc *desc; + union guc_doorbell_qw db_cmp, db_exc, db_ret; + union guc_doorbell_qw *db; + void *base; + int attempt = 2, ret = -EAGAIN; + + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0)); + desc = base + gc->proc_desc_offset; + + /* Update the tail so it is visible to GuC */ + desc->tail = gc->wq_tail; + + /* current cookie */ + db_cmp.db_status = GUC_DOORBELL_ENABLED; + db_cmp.cookie = gc->cookie; + + /* cookie to be updated */ + db_exc.db_status = GUC_DOORBELL_ENABLED; + db_exc.cookie = gc->cookie + 1; + if (db_exc.cookie == 0) + db_exc.cookie = 1; + + /* pointer of current doorbell cacheline */ + db = base + gc->doorbell_offset; + + while (attempt--) { + /* lets ring the doorbell */ + db_ret.value_qw = atomic64_cmpxchg((atomic64_t *)db, + db_cmp.value_qw, db_exc.value_qw); + + /* if the exchange was successfully executed */ + if (db_ret.value_qw == db_cmp.value_qw) { + /* db was successfully rung */ + gc->cookie = db_exc.cookie; + ret = 0; + break; + } + + /* XXX: doorbell was lost and need to acquire it again */ + if (db_ret.db_status == GUC_DOORBELL_DISABLED) + break; + + DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n", + db_cmp.cookie, db_ret.cookie); + + /* update the cookie to newly read cookie from GuC */ + db_cmp.cookie = db_ret.cookie; + db_exc.cookie = db_ret.cookie + 1; + if (db_exc.cookie == 0) + db_exc.cookie = 1; + } + + kunmap_atomic(base); + return ret; +} + +static void guc_disable_doorbell(struct intel_guc *guc, + struct i915_guc_client *client) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct guc_doorbell_info *doorbell; + void *base; + int drbreg = GEN8_DRBREGL(client->doorbell_id); + int value; + + base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0)); + doorbell = base + client->doorbell_offset; + + doorbell->db_status = 0; + + kunmap_atomic(base); + + I915_WRITE(drbreg, I915_READ(drbreg) & ~GEN8_DRB_VALID); + + value = I915_READ(drbreg); + WARN_ON((value & GEN8_DRB_VALID) != 0); + + I915_WRITE(GEN8_DRBREGU(client->doorbell_id), 0); + I915_WRITE(drbreg, 0); + + /* XXX: wait for any interrupts */ + /* XXX: wait for workqueue to drain */ +} + +/* + * Select, assign and relase doorbell cachelines + * + * These functions track which doorbell cachelines are in use. + * The data they manipulate is protected by the host2guc lock. + */ + +static uint32_t select_doorbell_cacheline(struct intel_guc *guc) +{ + const uint32_t cacheline_size = boot_cpu_data.x86_clflush_size; + uint32_t offset; + + spin_lock(&guc->host2guc_lock); + + /* Doorbell uses a single cache line within a page */ + offset = guc->db_cacheline & PAGE_MASK; + + /* Moving to next cache line to reduce contention */ + guc->db_cacheline += cacheline_size; + + spin_unlock(&guc->host2guc_lock); + + return offset; +} + +static uint16_t assign_doorbell(struct intel_guc *guc, uint32_t priority) +{ + /* The bitmap is split into two halves - high and normal priority. */ + const uint16_t half = GUC_MAX_DOORBELLS / 2; + const uint16_t start = (priority <= GUC_CTX_PRIORITY_HIGH) ? half : 0; + const uint16_t end = start + half; + uint16_t id; + + spin_lock(&guc->host2guc_lock); + id = find_next_zero_bit(guc->doorbell_bitmap, end, start); + if (id == end) + id = GUC_INVALID_DOORBELL_ID; + else + bitmap_set(guc->doorbell_bitmap, id, 1); + spin_unlock(&guc->host2guc_lock); + + return id; +} + +static void release_doorbell(struct intel_guc *guc, uint16_t id) +{ + spin_lock(&guc->host2guc_lock); + bitmap_clear(guc->doorbell_bitmap, id, 1); + spin_unlock(&guc->host2guc_lock); +} + +/* + * Initialise the process descriptor shared with the GuC firmware. + */ +static void guc_init_proc_desc(struct intel_guc *guc, + struct i915_guc_client *client) +{ + struct guc_process_desc *desc; + void *base; + + base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0)); + desc = base + client->proc_desc_offset; + + memset(desc, 0, sizeof(*desc)); + + /* + * XXX: pDoorbell and WQVBaseAddress are pointers in process address + * space for ring3 clients (set them as in mmap_ioctl) or kernel + * space for kernel clients (map on demand instead? May make debug + * easier to have it mapped). + */ + desc->wq_base_addr = 0; + desc->db_base_addr = 0; + + desc->context_id = client->ctx_index; + desc->wq_size_bytes = client->wq_size; + desc->wq_status = WQ_STATUS_ACTIVE; + desc->priority = client->priority; + + kunmap_atomic(base); +} + +/* + * Initialise/clear the context descriptor shared with the GuC firmware. + * + * This descriptor tells the GuC where (in GGTT space) to find the important + * data structures relating to this client (doorbell, process descriptor, + * write queue, etc). + */ + +static void guc_init_ctx_desc(struct intel_guc *guc, + struct i915_guc_client *client) +{ + struct guc_context_desc desc; + struct sg_table *sg; + + memset(&desc, 0, sizeof(desc)); + + desc.attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL; + desc.context_id = client->ctx_index; + desc.priority = client->priority; + desc.engines_used = (1 << RCS) | (1 << VCS) | (1 << BCS) | + (1 << VECS) | (1 << VCS2); /* all engines */ + desc.db_id = client->doorbell_id; + + /* + * The CPU address is only needed at certain points, so kmap_atomic on + * demand instead of storing it in the ctx descriptor. + * XXX: May make debug easier to have it mapped + */ + desc.db_trigger_cpu = 0; + desc.db_trigger_uk = client->doorbell_offset + + i915_gem_obj_ggtt_offset(client->client_obj); + desc.db_trigger_phy = client->doorbell_offset + + sg_dma_address(client->client_obj->pages->sgl); + + desc.process_desc = client->proc_desc_offset + + i915_gem_obj_ggtt_offset(client->client_obj); + + desc.wq_addr = client->wq_offset + + i915_gem_obj_ggtt_offset(client->client_obj); + + desc.wq_size = client->wq_size; + + /* + * XXX: Take LRCs from an existing intel_context if this is not an + * IsKMDCreatedContext client + */ + desc.desc_private = (uintptr_t)client; + + /* Pool context is pinned already */ + sg = guc->ctx_pool_obj->pages; + sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), + sizeof(desc) * client->ctx_index); +} + +static void guc_fini_ctx_desc(struct intel_guc *guc, + struct i915_guc_client *client) +{ + struct guc_context_desc desc; + struct sg_table *sg; + + memset(&desc, 0, sizeof(desc)); + + sg = guc->ctx_pool_obj->pages; + sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), + sizeof(desc) * client->ctx_index); +} + +/* Get valid workqueue item and return it back to offset */ +static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset) +{ + struct guc_process_desc *desc; + void *base; + u32 size = sizeof(struct guc_wq_item); + int ret = 0, timeout_counter = 200; + unsigned long flags; + + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0)); + desc = base + gc->proc_desc_offset; + + while (timeout_counter-- > 0) { + spin_lock_irqsave(&gc->wq_lock, flags); + + ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail, desc->head, + gc->wq_size) >= size, 1); + + if (!ret) { + *offset = gc->wq_tail; + + /* advance the tail for next workqueue item */ + gc->wq_tail += size; + gc->wq_tail &= gc->wq_size - 1; + + /* this will break the loop */ + timeout_counter = 0; + } + + spin_unlock_irqrestore(&gc->wq_lock, flags); + }; + + kunmap_atomic(base); + + return ret; +} + +static int guc_add_workqueue_item(struct i915_guc_client *gc, + struct drm_i915_gem_request *rq) +{ + enum intel_ring_id ring_id = rq->ring->id; + struct guc_wq_item *wqi; + void *base; + u32 tail, wq_len, wq_off = 0; + int ret; + + ret = guc_get_workqueue_space(gc, &wq_off); + if (ret) + return ret; + + /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we + * should not have the case where structure wqi is across page, neither + * wrapped to the beginning. This simplifies the implementation below. + * + * XXX: if not the case, we need save data to a temp wqi and copy it to + * workqueue buffer dw by dw. + */ + WARN_ON(sizeof(struct guc_wq_item) != 16); + WARN_ON(wq_off & 3); + + /* wq starts from the page after doorbell / process_desc */ + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, + (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT)); + wq_off &= PAGE_SIZE - 1; + wqi = (struct guc_wq_item *)((char *)base + wq_off); + + /* len does not include the header */ + wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1; + wqi->header = WQ_TYPE_INORDER | + (wq_len << WQ_LEN_SHIFT) | + (ring_id << WQ_TARGET_SHIFT) | + WQ_NO_WCFLUSH_WAIT; + + /* The GuC wants only the low-order word of the context descriptor */ + wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, rq->ring); + + /* The GuC firmware wants the tail index in QWords, not bytes */ + tail = rq->ringbuf->tail >> 3; + wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; + wqi->fence_id = 0; /*XXX: what fence to be here */ + + kunmap_atomic(base); + + return 0; +} + +/** + * i915_guc_submit() - Submit commands through GuC + * @client: the guc client where commands will go through + * @ctx: LRC where commands come from + * @ring: HW engine that will excute the commands + * + * Return: 0 if succeed + */ +int i915_guc_submit(struct i915_guc_client *client, + struct drm_i915_gem_request *rq) +{ + unsigned long flags; + int q_ret, b_ret; + + /* Need this because of the deferred pin ctx and ring */ + /* Shall we move this right after ring is pinned? */ + intel_lr_context_update(rq); + + q_ret = guc_add_workqueue_item(client, rq); + if (q_ret == 0) + b_ret = guc_ring_doorbell(client); + + spin_lock_irqsave(&client->wq_lock, flags); + client->submissions += 1; + if (q_ret) { + client->q_fail += 1; + client->retcode = q_ret; + } else if (b_ret) { + client->b_fail += 1; + client->retcode = q_ret = b_ret; + } else { + client->retcode = 0; + } + spin_unlock_irqrestore(&client->wq_lock, flags); + + return q_ret; +} + +/* + * Everything below here is concerned with setup & teardown, and is + * therefore not part of the somewhat time-critical batch-submission + * path of i915_guc_submit() above. + */ + +/** * gem_allocate_guc_obj() - Allocate gem object for GuC usage * @dev: drm device * @size: size of object @@ -75,6 +581,121 @@ static void gem_release_guc_obj(struct drm_i915_gem_object *obj) drm_gem_object_unreference(&obj->base); } +static void guc_client_free(struct drm_device *dev, + struct i915_guc_client *client) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc *guc = &dev_priv->guc; + + if (!client) + return; + + if (client->doorbell_id != GUC_INVALID_DOORBELL_ID) { + /* + * First disable the doorbell, then tell the GuC we've + * finished with it, finally deallocate it in our bitmap + */ + guc_disable_doorbell(guc, client); + host2guc_release_doorbell(guc, client); + release_doorbell(guc, client->doorbell_id); + } + + /* + * XXX: wait for any outstanding submissions before freeing memory. + * Be sure to drop any locks + */ + + gem_release_guc_obj(client->client_obj); + + if (client->ctx_index != GUC_INVALID_CTX_ID) { + guc_fini_ctx_desc(guc, client); + ida_simple_remove(&guc->ctx_ids, client->ctx_index); + } + + kfree(client); +} + +/** + * guc_client_alloc() - Allocate an i915_guc_client + * @dev: drm device + * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW + * The kernel client to replace ExecList submission is created with + * NORMAL priority. Priority of a client for scheduler can be HIGH, + * while a preemption context can use CRITICAL. + * + * Return: An i915_guc_client object if success. + */ +static struct i915_guc_client *guc_client_alloc(struct drm_device *dev, + uint32_t priority) +{ + struct i915_guc_client *client; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc *guc = &dev_priv->guc; + struct drm_i915_gem_object *obj; + + client = kzalloc(sizeof(*client), GFP_KERNEL); + if (!client) + return NULL; + + client->doorbell_id = GUC_INVALID_DOORBELL_ID; + client->priority = priority; + + client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0, + GUC_MAX_GPU_CONTEXTS, GFP_KERNEL); + if (client->ctx_index >= GUC_MAX_GPU_CONTEXTS) { + client->ctx_index = GUC_INVALID_CTX_ID; + goto err; + } + + /* The first page is doorbell/proc_desc. Two followed pages are wq. */ + obj = gem_allocate_guc_obj(dev, GUC_DB_SIZE + GUC_WQ_SIZE); + if (!obj) + goto err; + + client->client_obj = obj; + client->wq_offset = GUC_DB_SIZE; + client->wq_size = GUC_WQ_SIZE; + spin_lock_init(&client->wq_lock); + + client->doorbell_offset = select_doorbell_cacheline(guc); + + /* + * Since the doorbell only requires a single cacheline, we can save + * space by putting the application process descriptor in the same + * page. Use the half of the page that doesn't include the doorbell. + */ + if (client->doorbell_offset >= (GUC_DB_SIZE / 2)) + client->proc_desc_offset = 0; + else + client->proc_desc_offset = (GUC_DB_SIZE / 2); + + client->doorbell_id = assign_doorbell(guc, client->priority); + if (client->doorbell_id == GUC_INVALID_DOORBELL_ID) + /* XXX: evict a doorbell instead */ + goto err; + + guc_init_proc_desc(guc, client); + guc_init_ctx_desc(guc, client); + guc_init_doorbell(guc, client); + + /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */ + I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); + + /* XXX: Any cache flushes needed? General domain mgmt calls? */ + + if (host2guc_allocate_doorbell(guc, client)) + goto err; + + DRM_DEBUG_DRIVER("new priority %u client %p: ctx_index %u db_id %u\n", + priority, client, client->ctx_index, client->doorbell_id); + + return client; + +err: + guc_client_free(dev, client); + return NULL; +} + static void guc_create_log(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); @@ -138,6 +759,8 @@ int i915_guc_submission_init(struct drm_device *dev) if (!guc->ctx_pool_obj) return -ENOMEM; + spin_lock_init(&dev_priv->guc.host2guc_lock); + ida_init(&guc->ctx_ids); guc_create_log(guc); @@ -145,6 +768,32 @@ int i915_guc_submission_init(struct drm_device *dev) return 0; } +int i915_guc_submission_enable(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc *guc = &dev_priv->guc; + struct i915_guc_client *client; + + /* client for execbuf submission */ + client = guc_client_alloc(dev, GUC_CTX_PRIORITY_NORMAL); + if (!client) { + DRM_ERROR("Failed to create execbuf guc_client\n"); + return -ENOMEM; + } + + guc->execbuf_client = client; + return 0; +} + +void i915_guc_submission_disable(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc *guc = &dev_priv->guc; + + guc_client_free(dev, guc->execbuf_client); + guc->execbuf_client = NULL; +} + void i915_guc_submission_fini(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 5b51b05..d249326 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -27,6 +27,30 @@ #include "intel_guc_fwif.h" #include "i915_guc_reg.h" +struct i915_guc_client { + struct drm_i915_gem_object *client_obj; + uint32_t priority; + uint32_t ctx_index; + + uint32_t proc_desc_offset; + uint32_t doorbell_offset; + uint32_t cookie; + uint16_t doorbell_id; + uint16_t padding; /* Maintain alignment */ + + uint32_t wq_offset; + uint32_t wq_size; + + spinlock_t wq_lock; /* Protects all data below */ + uint32_t wq_tail; + + /* GuC submission statistics & status */ + uint64_t submissions; + uint32_t q_fail; + uint32_t b_fail; + int retcode; +}; + enum intel_guc_fw_status { GUC_FIRMWARE_FAIL = -1, GUC_FIRMWARE_NONE = 0, @@ -60,6 +84,20 @@ struct intel_guc { struct drm_i915_gem_object *ctx_pool_obj; struct ida ctx_ids; + + struct i915_guc_client *execbuf_client; + + spinlock_t host2guc_lock; /* Protects all data below */ + + DECLARE_BITMAP(doorbell_bitmap, GUC_MAX_DOORBELLS); + int db_cacheline; + + /* Action status & statistics */ + uint64_t action_count; /* Total commands issued */ + uint32_t action_cmd; /* Last command word */ + uint32_t action_status; /* Last return status */ + uint32_t action_fail; /* Total number of failures */ + int32_t action_err; /* Last error code */ }; /* intel_guc_loader.c */ @@ -70,6 +108,10 @@ extern const char *intel_guc_fw_status_repr(enum intel_guc_fw_status status); /* i915_guc_submission.c */ int i915_guc_submission_init(struct drm_device *dev); +int i915_guc_submission_enable(struct drm_device *dev); +int i915_guc_submit(struct i915_guc_client *client, + struct drm_i915_gem_request *rq); +void i915_guc_submission_disable(struct drm_device *dev); void i915_guc_submission_fini(struct drm_device *dev); #endif diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index e5d7136..25ba29f 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -427,6 +427,8 @@ int intel_guc_ucode_load(struct drm_device *dev) intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status), intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); + i915_guc_submission_disable(dev); + if (guc_fw->guc_fw_fetch_status == GUC_FIRMWARE_NONE) return 0; @@ -479,12 +481,20 @@ int intel_guc_ucode_load(struct drm_device *dev) intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status), intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); + if (i915.enable_guc_submission) { + err = i915_guc_submission_enable(dev); + if (err) + goto fail; + } + return 0; fail: if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_PENDING) guc_fw->guc_fw_load_status = GUC_FIRMWARE_FAIL; + i915_guc_submission_disable(dev); + DRM_ERROR("Failed to initialize GuC, error %d\n", err); return err; @@ -547,6 +557,8 @@ void intel_guc_ucode_fini(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; + i915_guc_submission_fini(dev); + if (guc_fw->guc_fw_obj) drm_gem_object_unreference(&guc_fw->guc_fw_obj->base); guc_fw->guc_fw_obj = NULL;