[2/5] drm/i915/guc: Submit GuC workitems containing coalesced requests

Message ID	20170912174056.20139-2-michal.winiarski@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com> To: <intel-gfx@lists.freedesktop.org> Date: Tue, 12 Sep 2017 19:40:53 +0200 Message-ID: <20170912174056.20139-2-michal.winiarski@intel.com> In-Reply-To: <20170912174056.20139-1-michal.winiarski@intel.com> References: <20170912174056.20139-1-michal.winiarski@intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 2/5] drm/i915/guc: Submit GuC workitems containing coalesced requests Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6338018f655d..f5fd00cfb3b0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2450,8 +2450,6 @@ static void i915_guc_client_info(struct seq_file *m, seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n", client->wq_size, client->wq_offset, client->wq_tail); - seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space); - for_each_engine(engine, dev_priv, id) { u64 submissions = client->submissions[id]; tot += submissions; diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 8a550785b257..6f0adcd2a058 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -406,63 +406,6 @@ static void guc_stage_desc_fini(struct intel_guc *guc, memset(desc, 0, sizeof(*desc)); } -/** - * i915_guc_wq_reserve() - reserve space in the GuC's workqueue - * @request: request associated with the commands - * - * Return: 0 if space is available - * -EAGAIN if space is not currently available - * - * This function must be called (and must return 0) before a request - * is submitted to the GuC via i915_guc_submit() below. Once a result - * of 0 has been returned, it must be balanced by a corresponding - * call to submit(). - * - * Reservation allows the caller to determine in advance that space - * will be available for the next submission before committing resources - * to it, and helps avoid late failures with complicated recovery paths. - */ -int i915_guc_wq_reserve(struct drm_i915_gem_request *request) -{ - const size_t wqi_size = sizeof(struct guc_wq_item); - struct i915_guc_client *client = request->i915->guc.execbuf_client; - struct guc_process_desc *desc = __get_process_desc(client); - u32 freespace; - int ret; - - spin_lock_irq(&client->wq_lock); - freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size); - freespace -= client->wq_rsvd; - if (likely(freespace >= wqi_size)) { - client->wq_rsvd += wqi_size; - ret = 0; - } else { - client->no_wq_space++; - ret = -EAGAIN; - } - spin_unlock_irq(&client->wq_lock); - - return ret; -} - -static void guc_client_update_wq_rsvd(struct i915_guc_client *client, int size) -{ - unsigned long flags; - - spin_lock_irqsave(&client->wq_lock, flags); - client->wq_rsvd += size; - spin_unlock_irqrestore(&client->wq_lock, flags); -} - -void i915_guc_wq_unreserve(struct drm_i915_gem_request *request) -{ - const int wqi_size = sizeof(struct guc_wq_item); - struct i915_guc_client *client = request->i915->guc.execbuf_client; - - GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size); - guc_client_update_wq_rsvd(client, -wqi_size); -} - /* Construct a Work Item and append it to the GuC's Work Queue */ static void guc_wq_item_append(struct i915_guc_client *client, struct drm_i915_gem_request *rq) @@ -475,7 +418,7 @@ static void guc_wq_item_append(struct i915_guc_client *client, struct guc_wq_item *wqi; u32 freespace, tail, wq_off; - /* Free space is guaranteed, see i915_guc_wq_reserve() above */ + /* Free space is guaranteed */ freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size); GEM_BUG_ON(freespace < wqi_size); @@ -491,14 +434,12 @@ static void guc_wq_item_append(struct i915_guc_client *client, * workqueue buffer dw by dw. */ BUILD_BUG_ON(wqi_size != 16); - GEM_BUG_ON(client->wq_rsvd < wqi_size); /* postincrement WQ tail for next time */ wq_off = client->wq_tail; GEM_BUG_ON(wq_off & (wqi_size - 1)); client->wq_tail += wqi_size; client->wq_tail &= client->wq_size - 1; - client->wq_rsvd -= wqi_size; /* WQ starts from the page after doorbell / process_desc */ wqi = client->vaddr + wq_off + GUC_DB_SIZE; @@ -580,47 +521,43 @@ static int guc_ring_doorbell(struct i915_guc_client *client) } /** - * __i915_guc_submit() - Submit commands through GuC + * i915_guc_submit() - Submit commands through GuC * @rq: request associated with the commands * - * The caller must have already called i915_guc_wq_reserve() above with - * a result of 0 (success), guaranteeing that there is space in the work - * queue for the new request, so enqueuing the item cannot fail. - * - * Bad Things Will Happen if the caller violates this protocol e.g. calls - * submit() when _reserve() says there's no space, or calls _submit() - * a different number of times from (successful) calls to _reserve(). - * * The only error here arises if the doorbell hardware isn't functioning * as expected, which really shouln't happen. */ -static void __i915_guc_submit(struct drm_i915_gem_request *rq) +static void i915_guc_submit(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = rq->i915; - struct intel_engine_cs *engine = rq->engine; - unsigned int engine_id = engine->id; - struct intel_guc *guc = &rq->i915->guc; + struct drm_i915_private *dev_priv = engine->i915; + struct intel_guc *guc = &dev_priv->guc; struct i915_guc_client *client = guc->execbuf_client; + struct execlist_port *port = engine->execlist_port; + unsigned int engine_id = engine->id; + unsigned int n; unsigned long flags; - /* WA to flush out the pending GMADR writes to ring buffer. */ - if (i915_vma_is_map_and_fenceable(rq->ring->vma)) - POSTING_READ_FW(GUC_STATUS); + for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) { + struct drm_i915_gem_request *rq; + unsigned int count; - spin_lock_irqsave(&client->wq_lock, flags); + rq = port_unpack(&port[n], &count); + if (rq && count == 0) { + port_set(&port[n], port_pack(rq, ++count)); - guc_wq_item_append(client, rq); - WARN_ON(guc_ring_doorbell(client)); + if (i915_vma_is_map_and_fenceable(rq->ring->vma)) + POSTING_READ_FW(GUC_STATUS); - client->submissions[engine_id] += 1; + spin_lock_irqsave(&client->wq_lock, flags); - spin_unlock_irqrestore(&client->wq_lock, flags); -} + guc_wq_item_append(client, rq); + WARN_ON(guc_ring_doorbell(client)); -static void i915_guc_submit(struct drm_i915_gem_request *rq) -{ - __i915_gem_request_submit(rq); - __i915_guc_submit(rq); + client->submissions[engine_id] += 1; + + spin_unlock_irqrestore(&client->wq_lock, flags); + } + } } static void nested_enable_signaling(struct drm_i915_gem_request *rq) @@ -654,16 +591,19 @@ static void port_assign(struct execlist_port *port, if (port_isset(port)) i915_gem_request_put(port_request(port)); - port_set(port, i915_gem_request_get(rq)); + port_set(port, port_pack(i915_gem_request_get(rq), port_count(port))); nested_enable_signaling(rq); } -static bool i915_guc_dequeue(struct intel_engine_cs *engine) +static void i915_guc_dequeue(struct intel_engine_cs *engine) { struct execlist_port *port = engine->execlist_port; - struct drm_i915_gem_request *last = port_request(port); - struct rb_node *rb; + struct drm_i915_gem_request *last = NULL; bool submit = false; + struct rb_node *rb; + + if (port_isset(port)) + port++; spin_lock_irq(&engine->timeline->lock); rb = engine->execlist_first; @@ -688,7 +628,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&rq->priotree.link); rq->priotree.priority = INT_MAX; - i915_guc_submit(rq); + __i915_gem_request_submit(rq); trace_i915_gem_request_in(rq, port_index(port, engine)); last = rq; submit = true; @@ -702,11 +642,11 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) } done: engine->execlist_first = rb; - if (submit) + if (submit) { port_assign(port, last); + i915_guc_submit(engine); + } spin_unlock_irq(&engine->timeline->lock); - - return submit; } static void i915_guc_irq_handler(unsigned long data) @@ -714,24 +654,20 @@ static void i915_guc_irq_handler(unsigned long data) struct intel_engine_cs *engine = (struct intel_engine_cs *)data; struct execlist_port *port = engine->execlist_port; struct drm_i915_gem_request *rq; - bool submit; - do { - rq = port_request(&port[0]); - while (rq && i915_gem_request_completed(rq)) { - trace_i915_gem_request_out(rq); - i915_gem_request_put(rq); + rq = port_request(&port[0]); + while (rq && i915_gem_request_completed(rq)) { + trace_i915_gem_request_out(rq); + i915_gem_request_put(rq); - port[0] = port[1]; - memset(&port[1], 0, sizeof(port[1])); + port[0] = port[1]; + memset(&port[1], 0, sizeof(port[1])); - rq = port_request(&port[0]); - } + rq = port_request(&port[0]); + } - submit = false; - if (!port_count(&port[1])) - submit = i915_guc_dequeue(engine); - } while (submit); + if (!port_isset(&port[1])) + i915_guc_dequeue(engine); } /* @@ -1220,6 +1156,19 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) enum intel_engine_id id; int err; + /* + * We're using GuC work items for submitting work through GuC. Since + * we're coalescing multiple requests from a single context into a + * single work item prior to assigning it to execlist_port, we can + * never have more work items than the total number of ports (for all + * engines). The GuC firmware is controlling the HEAD of work queue, + * and it is guaranteed that it will remove the work item from the + * queue before our request is completed. + */ + BUILD_BUG_ON(ARRAY_SIZE(engine->execlist_port) * + sizeof(struct guc_wq_item) * + I915_NUM_ENGINES > GUC_WQ_SIZE); + if (!client) { client = guc_client_alloc(dev_priv, INTEL_INFO(dev_priv)->ring_mask, @@ -1247,9 +1196,6 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) guc_interrupts_capture(dev_priv); for_each_engine(engine, dev_priv, id) { - const int wqi_size = sizeof(struct guc_wq_item); - struct drm_i915_gem_request *rq; - /* The tasklet was initialised by execlists, and may be in * a state of flux (across a reset) and so we just want to * take over the callback without changing any other state @@ -1257,14 +1203,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) */ engine->irq_tasklet.func = i915_guc_irq_handler; clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - - /* Replay the current set of previously submitted requests */ - spin_lock_irq(&engine->timeline->lock); - list_for_each_entry(rq, &engine->timeline->requests, link) { - guc_client_update_wq_rsvd(client, wqi_size); - __i915_guc_submit(rq); - } - spin_unlock_irq(&engine->timeline->lock); + i915_guc_submit(engine); } return 0; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d89e1b8e1cc5..5837b33f9705 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -914,27 +914,14 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) */ request->reserved_space += EXECLISTS_REQUEST_SIZE; - if (i915.enable_guc_submission) { - /* - * Check that the GuC has space for the request before - * going any further, as the i915_add_request() call - * later on mustn't fail ... - */ - ret = i915_guc_wq_reserve(request); - if (ret) - goto err; - } - cs = intel_ring_begin(request, 0); - if (IS_ERR(cs)) { - ret = PTR_ERR(cs); - goto err_unreserve; - } + if (IS_ERR(cs)) + return PTR_ERR(cs); if (!ce->initialised) { ret = engine->init_context(request); if (ret) - goto err_unreserve; + return ret; ce->initialised = true; } @@ -948,12 +935,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) request->reserved_space -= EXECLISTS_REQUEST_SIZE; return 0; - -err_unreserve: - if (i915.enable_guc_submission) - i915_guc_wq_unreserve(request); -err: - return ret; } /* diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 69daf4c01cd0..d41051688221 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -52,13 +52,6 @@ struct drm_i915_gem_request; * GuC). The subsequent pages of the client object constitute the work * queue (a circular array of work items), again described in the process * descriptor. Work queue pages are mapped momentarily as required. - * - * We also keep a few statistics on failures. Ideally, these should all - * be zero! - * no_wq_space: times that the submission pre-check found no space was - * available in the work queue (note, the queue is shared, - * not per-engine). It is OK for this to be nonzero, but - * it should not be huge! */ struct i915_guc_client { struct i915_vma *vma; @@ -79,8 +72,6 @@ struct i915_guc_client { uint32_t wq_offset; uint32_t wq_size; uint32_t wq_tail; - uint32_t wq_rsvd; - uint32_t no_wq_space; /* Per-engine counts of GuC submissions */ uint64_t submissions[I915_NUM_ENGINES]; @@ -246,8 +237,6 @@ u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); /* i915_guc_submission.c */ int i915_guc_submission_init(struct drm_i915_private *dev_priv); int i915_guc_submission_enable(struct drm_i915_private *dev_priv); -int i915_guc_wq_reserve(struct drm_i915_gem_request *rq); -void i915_guc_wq_unreserve(struct drm_i915_gem_request *request); void i915_guc_submission_disable(struct drm_i915_private *dev_priv); void i915_guc_submission_fini(struct drm_i915_private *dev_priv); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);

[2/5] drm/i915/guc: Submit GuC workitems containing coalesced requests

Commit Message

Patch