[RFC,5/8] drm/i915: Handle event stop and destroy for commands in flight

Message ID	1436950023-13940-6-git-send-email-sourab.gupta@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: sourab.gupta@intel.com To: intel-gfx@lists.freedesktop.org Date: Wed, 15 Jul 2015 14:17:00 +0530 Message-Id: <1436950023-13940-6-git-send-email-sourab.gupta@intel.com> In-Reply-To: <1436950023-13940-1-git-send-email-sourab.gupta@intel.com> References: <1436950023-13940-1-git-send-email-sourab.gupta@intel.com> Cc: Insoo Woo <insoo.woo@intel.com>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Jabin Wu <jabin.wu@intel.com>, Sourab Gupta <sourab.gupta@intel.com> Subject: [Intel-gfx] [RFC 5/8] drm/i915: Handle event stop and destroy for commands in flight Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eb72f95..b3d5f7e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1653,10 +1653,18 @@ struct i915_oa_reg { u32 value; }; +enum i915_oa_event_state { + I915_OA_EVENT_INIT, + I915_OA_EVENT_STARTED, + I915_OA_EVENT_STOP_IN_PROGRESS, + I915_OA_EVENT_STOPPED, +}; + struct i915_oa_rcs_node { struct list_head head; struct drm_i915_gem_request *req; u32 offset; + bool discard; u32 ctx_id; }; @@ -1934,7 +1942,7 @@ struct drm_i915_private { struct perf_event *exclusive_event; struct intel_context *specific_ctx; - bool event_active; + enum i915_oa_event_state event_state; bool periodic; bool multiple_ctx_mode; @@ -1964,6 +1972,7 @@ struct drm_i915_private { } oa_rcs_buffer; struct list_head node_list; struct work_struct work_timer; + struct work_struct work_event_destroy; } oa_pmu; #endif diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c index 491496b..c1e3bea 100644 --- a/drivers/gpu/drm/i915/i915_oa_perf.c +++ b/drivers/gpu/drm/i915/i915_oa_perf.c @@ -83,7 +83,7 @@ static u32 forward_oa_snapshots(struct drm_i915_private *dev_priv, /* We currently only allow exclusive access to the counters * so only have one event to forward too... */ - if (dev_priv->oa_pmu.event_active) + if (dev_priv->oa_pmu.event_state == I915_OA_EVENT_STARTED) forward_one_oa_snapshot_to_event(dev_priv, snapshot, exclusive_event); } @@ -128,6 +128,9 @@ static void flush_oa_snapshots(struct drm_i915_private *dev_priv, u32 head; u32 tail; + if (dev_priv->oa_pmu.event_state == I915_OA_EVENT_STOPPED) + return; + /* Can either flush via hrtimer callback or pmu methods/fops */ if (skip_if_flushing) { @@ -204,6 +207,36 @@ int i915_oa_rcs_wait_gpu(struct drm_i915_private *dev_priv) return 0; } +void i915_oa_rcs_release_request_ref(struct drm_i915_private *dev_priv) +{ + struct i915_oa_rcs_node *entry, *next; + struct drm_i915_gem_request *req; + unsigned long lock_flags; + int ret; + + list_for_each_entry_safe + (entry, next, &dev_priv->oa_pmu.node_list, head) { + req = entry->req; + if (req) { + ret = i915_mutex_lock_interruptible(dev_priv->dev); + if (ret) + break; + i915_gem_request_assign(&entry->req, NULL); + mutex_unlock(&dev_priv->dev->struct_mutex); + } + + /* + * This fn won't be running concurrently with forward snapshots + * work fn. These are the only two places where list entries + * will be deleted. So no need of protecting full loop? + */ + spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags); + list_del(&entry->head); + spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags); + kfree(entry); + } +} + static void forward_one_oa_rcs_sample(struct drm_i915_private *dev_priv, struct i915_oa_rcs_node *node) { @@ -247,11 +280,19 @@ void forward_oa_rcs_snapshots_work(struct work_struct *__work) unsigned long lock_flags; int ret; + spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags); + if (dev_priv->oa_pmu.event_state != I915_OA_EVENT_STARTED) { + spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags); + return; + } + spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags); + list_for_each_entry_safe (entry, next, &dev_priv->oa_pmu.node_list, head) { req = entry->req; if (req && i915_gem_request_completed(req, true)) { - forward_one_oa_rcs_sample(dev_priv, entry); + if (!entry->discard) + forward_one_oa_rcs_sample(dev_priv, entry); ret = i915_mutex_lock_interruptible(dev_priv->dev); if (ret) break; @@ -317,16 +358,101 @@ static void i915_oa_event_destroy(struct perf_event *event) WARN_ON(event->parent); - /* Stop updating oacontrol via _oa_context_pin_[un]notify()... */ + if (dev_priv->oa_pmu.multiple_ctx_mode) { + /* Stop updating oacontrol via _oa_context_pin_[un]notify() */ + spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags); + dev_priv->oa_pmu.specific_ctx = NULL; + spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags); + + cancel_work_sync(&dev_priv->oa_pmu.work_timer); + schedule_work(&dev_priv->oa_pmu.work_event_destroy); + + BUG_ON(dev_priv->oa_pmu.exclusive_event != event); + dev_priv->oa_pmu.exclusive_event = NULL; + + /* We can deference our local copy of rcs buffer here, since + * an active reference of buffer would be taken while + * inserting commands. So the buffer would be freed up only + * after GPU is done with it. + */ + oa_rcs_buffer_destroy(dev_priv); + } else { + /* Stop updating oacontrol via _oa_context_pin_[un]notify() */ + spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags); + dev_priv->oa_pmu.specific_ctx = NULL; + spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags); + + /* Don't let the compiler start resetting OA, PM and clock + * gating state before we've stopped update_oacontrol() + */ + barrier(); + + BUG_ON(dev_priv->oa_pmu.exclusive_event != event); + dev_priv->oa_pmu.exclusive_event = NULL; + + oa_buffer_destroy(dev_priv); + + I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) & + ~GEN6_CSUNIT_CLOCK_GATE_DISABLE)); + I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) | + GEN7_DOP_CLOCK_GATE_ENABLE)); + + I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & + ~GT_NOA_ENABLE)); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + intel_runtime_pm_put(dev_priv); + dev_priv->oa_pmu.event_state = I915_OA_EVENT_INIT; + } +} + +void i915_oa_rcs_event_destroy_work(struct work_struct *__work) +{ + struct drm_i915_private *dev_priv = + container_of(__work, typeof(*dev_priv), + oa_pmu.work_event_destroy); + unsigned long lock_flags; + int ret; + + /* Stop updating oacontrol via _oa_context_pin_[un]notify() + * TODO: Is this reqd here? + */ spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags); dev_priv->oa_pmu.specific_ctx = NULL; spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags); - /* Don't let the compiler start resetting OA, PM and clock gating - * state before we've stopped update_oacontrol() + /* Don't let the compiler start resetting OA, PM and clock + * gating state before we've stopped update_oacontrol() */ barrier(); + ret = i915_oa_rcs_wait_gpu(dev_priv); + if (ret) + goto out; + + i915_oa_rcs_release_request_ref(dev_priv); + +out: + /* Disable OA unit */ + I915_WRITE(GEN7_OACONTROL, 0); + + /* The periodic OA buffer has to be destroyed here, since + * this can be done only after OA unit is disabled. There is no active + * reference tracking mechanism for periodic OA buffer. So we can only + * dereference it in the worker after we've disabled OA unit (which we + * can do after we're sure to have completed the in flight GPU cmds) + */ + /* TODO: Once we have callbacks in place on completion of request + * (i.e. when retire-notification patches land), we can take the active + * reference on LRI request(submitted for disabling OA) during event + * stop/destroy, and perform these actions, in the callback instead of + * work fn + */ + + oa_buffer_destroy(dev_priv); + + spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags); + I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) & ~GEN6_CSUNIT_CLOCK_GATE_DISABLE)); I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) | @@ -335,16 +461,10 @@ static void i915_oa_event_destroy(struct perf_event *event) I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & ~GT_NOA_ENABLE)); - if (dev_priv->oa_pmu.multiple_ctx_mode) - oa_rcs_buffer_destroy(dev_priv); - - oa_buffer_destroy(dev_priv); - - BUG_ON(dev_priv->oa_pmu.exclusive_event != event); - dev_priv->oa_pmu.exclusive_event = NULL; - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); intel_runtime_pm_put(dev_priv); + dev_priv->oa_pmu.event_state = I915_OA_EVENT_INIT; + spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags); } static int alloc_obj(struct drm_i915_private *dev_priv, @@ -625,7 +745,8 @@ static int i915_oa_event_init(struct perf_event *event) * counter snapshots and marshal to the appropriate client * we currently only allow exclusive access */ spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags); - if (dev_priv->oa_pmu.oa_buffer.obj) { + if (dev_priv->oa_pmu.oa_buffer.obj || + dev_priv->oa_pmu.event_state != I915_OA_EVENT_INIT) { spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags); return -EBUSY; } @@ -790,7 +911,8 @@ static void update_oacontrol(struct drm_i915_private *dev_priv) { BUG_ON(!spin_is_locked(&dev_priv->oa_pmu.lock)); - if (dev_priv->oa_pmu.event_active) { + if ((dev_priv->oa_pmu.event_state == I915_OA_EVENT_STARTED) || + (dev_priv->oa_pmu.event_state == I915_OA_EVENT_STOP_IN_PROGRESS)) { unsigned long ctx_id = 0; bool pinning_ok = false; @@ -900,7 +1022,7 @@ static void i915_oa_event_start(struct perf_event *event, int flags) spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags); - dev_priv->oa_pmu.event_active = true; + dev_priv->oa_pmu.event_state = I915_OA_EVENT_STARTED; update_oacontrol(dev_priv); /* Reset the head ptr to ensure we don't forward reports relating @@ -927,14 +1049,6 @@ static void i915_oa_event_stop(struct perf_event *event, int flags) container_of(event->pmu, typeof(*dev_priv), oa_pmu.pmu); unsigned long lock_flags; - spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags); - - dev_priv->oa_pmu.event_active = false; - update_oacontrol(dev_priv); - - mmiowb(); - spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags); - if (event->attr.sample_period) { hrtimer_cancel(&dev_priv->oa_pmu.timer); if (dev_priv->oa_pmu.multiple_ctx_mode) @@ -942,6 +1056,23 @@ static void i915_oa_event_stop(struct perf_event *event, int flags) flush_oa_snapshots(dev_priv, false, U64_MAX); } + if (dev_priv->oa_pmu.multiple_ctx_mode) { + struct i915_oa_rcs_node *entry; + + spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags); + + dev_priv->oa_pmu.event_state = I915_OA_EVENT_STOP_IN_PROGRESS; + list_for_each_entry(entry, &dev_priv->oa_pmu.node_list, head) + entry->discard = true; + + spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags); + } else { + spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags); + dev_priv->oa_pmu.event_state = I915_OA_EVENT_STOPPED; + update_oacontrol(dev_priv); + mmiowb(); + spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags); + } event->hw.state = PERF_HES_STOPPED; } @@ -1075,10 +1206,13 @@ void i915_oa_pmu_register(struct drm_device *dev) i915->oa_pmu.timer.function = hrtimer_sample; INIT_WORK(&i915->oa_pmu.work_timer, forward_oa_rcs_snapshots_work); + INIT_WORK(&i915->oa_pmu.work_event_destroy, + i915_oa_rcs_event_destroy_work); spin_lock_init(&i915->oa_pmu.lock); i915->oa_pmu.pmu.capabilities = PERF_PMU_CAP_IS_DEVICE; + i915->oa_pmu.event_state = I915_OA_EVENT_INIT; /* Effectively disallow opening an event with a specific pid * since we aren't interested in processes running on the cpu... @@ -1105,8 +1239,10 @@ void i915_oa_pmu_unregister(struct drm_device *dev) if (i915->oa_pmu.pmu.event_init == NULL) return; - if (i915->oa_pmu.multiple_ctx_mode) + if (i915->oa_pmu.multiple_ctx_mode) { cancel_work_sync(&i915->oa_pmu.work_timer); + cancel_work_sync(&i915->oa_pmu.work_event_destroy); + } unregister_sysctl_table(i915->oa_pmu.sysctl_header);

[RFC,5/8] drm/i915: Handle event stop and destroy for commands in flight

Commit Message

Patch