Message ID | 1468158084-22028-18-git-send-email-akash.goel@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Sun, Jul 10, 2016 at 07:11:24PM +0530, akash.goel@intel.com wrote: > @@ -1707,8 +1692,8 @@ static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) > I915_READ(SOFT_SCRATCH(15)) & ~msg); > > /* Handle flush interrupt event in bottom half */ > - queue_work(dev_priv->guc.log.wq, > - &dev_priv->guc.events_work); > + smp_store_mb(dev_priv->guc.log.flush_signal, 1); > + wake_up_process(dev_priv->guc.log.flush_task); > } > +void guc_cancel_log_flush_work_sync(struct drm_i915_private *dev_priv) > +{ > + spin_lock_irq(&dev_priv->irq_lock); > + dev_priv->guc.log.flush_signal = false; > + spin_unlock_irq(&dev_priv->irq_lock); > + > + if (dev_priv->guc.log.flush_task) > + wait_for_completion(&dev_priv->guc.log.flush_completion); > +} > + > +static int guc_log_flush_worker(void *arg) > +{ > + struct drm_i915_private *dev_priv = arg; > + struct intel_guc *guc = &dev_priv->guc; > + > + /* Install ourselves with high priority to reduce signalling latency */ > + struct sched_param param = { .sched_priority = 1 }; > + sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); > + > + do { > + set_current_state(TASK_INTERRUPTIBLE); > + > + spin_lock_irq(&dev_priv->irq_lock); > + if (guc->log.flush_signal) { > + guc->log.flush_signal = false; > + reinit_completion(&guc->log.flush_completion); > + spin_unlock_irq(&dev_priv->irq_lock); > + i915_guc_capture_logs(&dev_priv->drm); > + complete_all(&guc->log.flush_completion); > + } else { > + spin_unlock_irq(&dev_priv->irq_lock); > + if (kthread_should_stop()) > + break; > + > + schedule(); > + } > + } while (1); > + __set_current_state(TASK_RUNNING); > + > + return 0; This looks decidely fishy. irq handler: smp_store_mb(log.signal, 1); wake_up_process(log.tsk); worker: do { set_current_state(TASK_INT); while (cmpxchg(&log.signal, 1, 0)) { reinit_completion(log.complete); i915_guc_capture_logs(); } complete_all(log.complete); if (kthread_should_stop()) break; schedule(); } while(1); __set_current_state(TASK_RUNNING); flush: smp_store_mb(log.signal, 0); wait_for_completion(log.complete); In the end, just the silly locking and placement of complete_all() is dangerous. reinit_completion() lacks the barrier to be used like this really, at any rate, racy with the irq handler, so use sparingly or when you control the irq handler. (Also not sure if log.signal = 0 is sane, or the current callsites really require the flush.) -Chris
On 7/21/2016 1:04 AM, Chris Wilson wrote: > On Sun, Jul 10, 2016 at 07:11:24PM +0530, akash.goel@intel.com wrote: >> @@ -1707,8 +1692,8 @@ static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) >> I915_READ(SOFT_SCRATCH(15)) & ~msg); >> >> /* Handle flush interrupt event in bottom half */ >> - queue_work(dev_priv->guc.log.wq, >> - &dev_priv->guc.events_work); >> + smp_store_mb(dev_priv->guc.log.flush_signal, 1); >> + wake_up_process(dev_priv->guc.log.flush_task); >> } > >> +void guc_cancel_log_flush_work_sync(struct drm_i915_private *dev_priv) >> +{ >> + spin_lock_irq(&dev_priv->irq_lock); >> + dev_priv->guc.log.flush_signal = false; >> + spin_unlock_irq(&dev_priv->irq_lock); >> + >> + if (dev_priv->guc.log.flush_task) >> + wait_for_completion(&dev_priv->guc.log.flush_completion); >> +} >> + >> +static int guc_log_flush_worker(void *arg) >> +{ >> + struct drm_i915_private *dev_priv = arg; >> + struct intel_guc *guc = &dev_priv->guc; >> + >> + /* Install ourselves with high priority to reduce signalling latency */ >> + struct sched_param param = { .sched_priority = 1 }; >> + sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); >> + >> + do { >> + set_current_state(TASK_INTERRUPTIBLE); >> + >> + spin_lock_irq(&dev_priv->irq_lock); >> + if (guc->log.flush_signal) { >> + guc->log.flush_signal = false; >> + reinit_completion(&guc->log.flush_completion); >> + spin_unlock_irq(&dev_priv->irq_lock); >> + i915_guc_capture_logs(&dev_priv->drm); >> + complete_all(&guc->log.flush_completion); >> + } else { >> + spin_unlock_irq(&dev_priv->irq_lock); >> + if (kthread_should_stop()) >> + break; >> + >> + schedule(); >> + } >> + } while (1); >> + __set_current_state(TASK_RUNNING); >> + >> + return 0; > > This looks decidely fishy. > Sorry for that. > irq handler: > smp_store_mb(log.signal, 1); > wake_up_process(log.tsk); > > worker: > do { > set_current_state(TASK_INT); > > while (cmpxchg(&log.signal, 1, 0)) { > reinit_completion(log.complete); > i915_guc_capture_logs(); > } > > complete_all(log.complete); > if (kthread_should_stop()) > break; > > schedule(); > } while(1); > __set_current_state(TASK_RUNNING); > > flush: > smp_store_mb(log.signal, 0); > wait_for_completion(log.complete); > > > In the end, just the silly locking and placement of complete_all() is > dangerous. reinit_completion() lacks the barrier to be used like this > really, at any rate, racy with the irq handler, so use sparingly or when > you control the irq handler. Sorry I forgot to add a comment that guc_cancel_log_flush_work_sync() should be invoked only after ensuring that there will be no more flush interrupts, which will happen either by explicitly disabling the interrupt or disabling the logging and that's what is done at the 2 call sites. Since had covered reinit_completion() under the irq_lock, thought an explicit barrier is not needed. spin_lock_irq(&dev_priv->irq_lock); if (guc->log.flush_signal) { guc->log.flush_signal = false; reinit_completion(&guc->log.flush_completion); spin_unlock_irq(&dev_priv->irq_lock); i915_guc_capture_logs(&dev_priv->drm); complete_all(&guc->log.flush_completion); The placement of complete_all isn't right for the case, where a guc_cancel_log_flush_work_sync() is called but there was no prior flush interrupt received. > (Also not sure if log.signal = 0 is sane, Did log.signal = 0 for fast cancellation. Will remove that. A smp_wmb() after reinit_completion(&flush_completion) would be fine ? > or the current callsites really require the flush.) Sync against a ongoing/pending flush is being done for the 2 forceful flush cases, which will be effective only if the pending flush is completed, so forceful flush should be serialized with a pending flush. Best regards Akash > -Chris >
On Thu, Jul 21, 2016 at 09:11:42AM +0530, Goel, Akash wrote: > > > On 7/21/2016 1:04 AM, Chris Wilson wrote: > >In the end, just the silly locking and placement of complete_all() is > >dangerous. reinit_completion() lacks the barrier to be used like this > >really, at any rate, racy with the irq handler, so use sparingly or when > >you control the irq handler. > Sorry I forgot to add a comment that > guc_cancel_log_flush_work_sync() should be invoked only after > ensuring that there will be no more flush interrupts, which will > happen either by explicitly disabling the interrupt or disabling the > logging and that's what is done at the 2 call sites. > > Since had covered reinit_completion() under the irq_lock, thought an > explicit barrier is not needed. You hadn't controlled everything via the irq_lock, and nor should you. > > spin_lock_irq(&dev_priv->irq_lock); > if (guc->log.flush_signal) { > guc->log.flush_signal = false; > reinit_completion(&guc->log.flush_completion); > spin_unlock_irq(&dev_priv->irq_lock); > i915_guc_capture_logs(&dev_priv->drm); > complete_all(&guc->log.flush_completion); > > The placement of complete_all isn't right for the case, where > a guc_cancel_log_flush_work_sync() is called but there was no prior > flush interrupt received. Exactly. > >(Also not sure if log.signal = 0 is sane, > Did log.signal = 0 for fast cancellation. Will remove that. > > A smp_wmb() after reinit_completion(&flush_completion) would be fine ? Don't worry, the race can only be controlled by controlling the irq. In the end, I think something more like while (signal) ... complete_all(); schedule(); reinit_completion(); is the simplest. > >or the current callsites really require the flush.) > > Sync against a ongoing/pending flush is being done for the 2 > forceful flush cases, which will be effective only if the pending > flush is completed, so forceful flush should be serialized with a > pending flush. Or you just signal=true, wakeup task, wait_timeout. Otherwise you haven't really serialized anything without disabling the interrupt. -Chris
On 7/21/2016 11:13 AM, Chris Wilson wrote: > On Thu, Jul 21, 2016 at 09:11:42AM +0530, Goel, Akash wrote: >> >> >> On 7/21/2016 1:04 AM, Chris Wilson wrote: >>> In the end, just the silly locking and placement of complete_all() is >>> dangerous. reinit_completion() lacks the barrier to be used like this >>> really, at any rate, racy with the irq handler, so use sparingly or when >>> you control the irq handler. >> Sorry I forgot to add a comment that >> guc_cancel_log_flush_work_sync() should be invoked only after >> ensuring that there will be no more flush interrupts, which will >> happen either by explicitly disabling the interrupt or disabling the >> logging and that's what is done at the 2 call sites. >> >> Since had covered reinit_completion() under the irq_lock, thought an >> explicit barrier is not needed. > > You hadn't controlled everything via the irq_lock, and nor should you. > >> >> spin_lock_irq(&dev_priv->irq_lock); >> if (guc->log.flush_signal) { >> guc->log.flush_signal = false; >> reinit_completion(&guc->log.flush_completion); >> spin_unlock_irq(&dev_priv->irq_lock); >> i915_guc_capture_logs(&dev_priv->drm); >> complete_all(&guc->log.flush_completion); >> >> The placement of complete_all isn't right for the case, where >> a guc_cancel_log_flush_work_sync() is called but there was no prior >> flush interrupt received. > > Exactly. > >>> (Also not sure if log.signal = 0 is sane, >> Did log.signal = 0 for fast cancellation. Will remove that. >> >> A smp_wmb() after reinit_completion(&flush_completion) would be fine ? > > Don't worry, the race can only be controlled by controlling the irq. > In the end, I think something more like > > while (signal) ... > > complete_all(); > schedule(); > reinit_completion(); > > is the simplest. Thanks much, so will have the task body like this. do { set_current_state(TASK_INT); while (cmpxchg(&log.signal, 1, 0)) { i915_guc_capture_logs(); }; complete_all(log.complete); if (kthread_should_stop()) break; schedule(); reinit_completion(); } while(1); > >>> or the current callsites really require the flush.) >> >> Sync against a ongoing/pending flush is being done for the 2 >> forceful flush cases, which will be effective only if the pending >> flush is completed, so forceful flush should be serialized with a >> pending flush. > > Or you just signal=true, wakeup task, wait_timeout. Otherwise you > haven't really serialized anything without disabling the interrupt. Agree without disabling the interrupt, serialization cannot be provided, For the sync can use, { WARN_ON(guc->interrupts_enabled); wait_for_completion_interruptible_timeout( guc->log.complete, 5 /* in jiffies*/); } Best regards Akash > -Chris >
On 21/07/16 07:18, Goel, Akash wrote: > On 7/21/2016 11:13 AM, Chris Wilson wrote: >> On Thu, Jul 21, 2016 at 09:11:42AM +0530, Goel, Akash wrote: >>> >>> >>> On 7/21/2016 1:04 AM, Chris Wilson wrote: >>>> In the end, just the silly locking and placement of complete_all() is >>>> dangerous. reinit_completion() lacks the barrier to be used like this >>>> really, at any rate, racy with the irq handler, so use sparingly or >>>> when >>>> you control the irq handler. >>> Sorry I forgot to add a comment that >>> guc_cancel_log_flush_work_sync() should be invoked only after >>> ensuring that there will be no more flush interrupts, which will >>> happen either by explicitly disabling the interrupt or disabling the >>> logging and that's what is done at the 2 call sites. >>> >>> Since had covered reinit_completion() under the irq_lock, thought an >>> explicit barrier is not needed. >> >> You hadn't controlled everything via the irq_lock, and nor should you. >> >>> >>> spin_lock_irq(&dev_priv->irq_lock); >>> if (guc->log.flush_signal) { >>> guc->log.flush_signal = false; >>> reinit_completion(&guc->log.flush_completion); >>> spin_unlock_irq(&dev_priv->irq_lock); >>> i915_guc_capture_logs(&dev_priv->drm); >>> complete_all(&guc->log.flush_completion); >>> >>> The placement of complete_all isn't right for the case, where >>> a guc_cancel_log_flush_work_sync() is called but there was no prior >>> flush interrupt received. >> >> Exactly. >> >>>> (Also not sure if log.signal = 0 is sane, >>> Did log.signal = 0 for fast cancellation. Will remove that. >>> >>> A smp_wmb() after reinit_completion(&flush_completion) would be fine ? >> >> Don't worry, the race can only be controlled by controlling the irq. > > >> In the end, I think something more like >> >> while (signal) ... >> >> complete_all(); >> schedule(); >> reinit_completion(); >> >> is the simplest. > Thanks much, so will have the task body like this. > do { > set_current_state(TASK_INT); > while (cmpxchg(&log.signal, 1, 0)) { > i915_guc_capture_logs(); > }; > complete_all(log.complete); > if (kthread_should_stop()) > break; > schedule(); > reinit_completion(); > } while(1); > >> >>>> or the current callsites really require the flush.) >>> >>> Sync against a ongoing/pending flush is being done for the 2 >>> forceful flush cases, which will be effective only if the pending >>> flush is completed, so forceful flush should be serialized with a >>> pending flush. >> >> Or you just signal=true, wakeup task, wait_timeout. Otherwise you >> haven't really serialized anything without disabling the interrupt. > Agree without disabling the interrupt, serialization cannot be provided, > > For the sync can use, > { > WARN_ON(guc->interrupts_enabled); > wait_for_completion_interruptible_timeout( > guc->log.complete, 5 /* in jiffies*/); > } We don't even know for sure if the thread (especially rt priority) is absolutely required. Worker solution would be so much simpler if good enough. Because in the meantime it was discovered that relayfs has a silly polling behaviour where it delays waking up readers by a jiffy and that copies can be done much faster with SSE instructions. And that we are increasing the GuC buffer anyway to improve space utilization. So I say again, I think you need to evaluate all that and determine if a complicated thread solution is really needed. In parallel question is also if relayfs needs to be improved wrt this latency issue, or if it can't, maybe it is the wrong facility for this use case. Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 43c9900..ed00192 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -791,20 +791,8 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv) if (dev_priv->hotplug.dp_wq == NULL) goto out_free_wq; - if (HAS_GUC_SCHED(dev_priv)) { - /* Need a dedicated wq to process log buffer flush interrupts - * from GuC without much delay so as to avoid any loss of logs. - */ - dev_priv->guc.log.wq = - alloc_ordered_workqueue("i915-guc_log", 0); - if (dev_priv->guc.log.wq == NULL) - goto out_free_hotplug_dp_wq; - } - return 0; -out_free_hotplug_dp_wq: - destroy_workqueue(dev_priv->hotplug.dp_wq); out_free_wq: destroy_workqueue(dev_priv->wq); out_err: @@ -815,7 +803,6 @@ out_err: static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) { - destroy_workqueue(dev_priv->guc.log.wq); destroy_workqueue(dev_priv->hotplug.dp_wq); destroy_workqueue(dev_priv->wq); } diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 067eb52..0093240 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -21,6 +21,7 @@ * IN THE SOFTWARE. * */ +#include <linux/kthread.h> #include <linux/firmware.h> #include <linux/circ_buf.h> #include <linux/debugfs.h> @@ -1096,6 +1097,48 @@ static int guc_create_log_relay_file(struct intel_guc *guc) return 0; } +void guc_cancel_log_flush_work_sync(struct drm_i915_private *dev_priv) +{ + spin_lock_irq(&dev_priv->irq_lock); + dev_priv->guc.log.flush_signal = false; + spin_unlock_irq(&dev_priv->irq_lock); + + if (dev_priv->guc.log.flush_task) + wait_for_completion(&dev_priv->guc.log.flush_completion); +} + +static int guc_log_flush_worker(void *arg) +{ + struct drm_i915_private *dev_priv = arg; + struct intel_guc *guc = &dev_priv->guc; + + /* Install ourselves with high priority to reduce signalling latency */ + struct sched_param param = { .sched_priority = 1 }; + sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); + + do { + set_current_state(TASK_INTERRUPTIBLE); + + spin_lock_irq(&dev_priv->irq_lock); + if (guc->log.flush_signal) { + guc->log.flush_signal = false; + reinit_completion(&guc->log.flush_completion); + spin_unlock_irq(&dev_priv->irq_lock); + i915_guc_capture_logs(&dev_priv->drm); + complete_all(&guc->log.flush_completion); + } else { + spin_unlock_irq(&dev_priv->irq_lock); + if (kthread_should_stop()) + break; + + schedule(); + } + } while (1); + __set_current_state(TASK_RUNNING); + + return 0; +} + static void guc_log_cleanup(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; @@ -1108,6 +1151,11 @@ static void guc_log_cleanup(struct drm_i915_private *dev_priv) /* First disable the flush interrupt */ gen9_disable_guc_interrupts(dev_priv); + if (guc->log.flush_task) + kthread_stop(guc->log.flush_task); + + guc->log.flush_task = NULL; + guc_remove_log_relay_file(guc); guc->log.relay_chan = NULL; @@ -1120,6 +1168,7 @@ static void guc_log_cleanup(struct drm_i915_private *dev_priv) static int guc_create_log_extras(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct task_struct *tsk; void *vaddr; int ret; @@ -1144,6 +1193,23 @@ static int guc_create_log_extras(struct intel_guc *guc) guc->log.buf_addr = vaddr; } + if (!guc->log.flush_task) { + init_completion(&guc->log.flush_completion); + /* Spawn a thread to provide a fast bottom-half for handling log + * buffer flush interrupts. + */ + tsk = kthread_run(guc_log_flush_worker, dev_priv, + "i915/guc_log_flushd"); + if (IS_ERR(tsk)) { + ret = PTR_ERR(tsk); + DRM_ERROR("creation of log flush task failed %d\n", ret); + guc_log_cleanup(dev_priv); + return ret; + } + + guc->log.flush_task = tsk; + } + return 0; } @@ -1206,8 +1272,9 @@ static int guc_log_late_setup(struct drm_device *dev) if (WARN_ON(guc->log.relay_chan)) return -EINVAL; - /* If log_level was set as -1 at boot time, then vmalloc mapping would - * not have been created for the log buffer, so create one now. + /* If log_level was set as -1 at boot time, then vmalloc mapping and + * flush daemon would not have been created for the log buffer, so + * create one now. */ ret = guc_create_log_extras(guc); if (ret) @@ -1474,6 +1541,9 @@ void i915_guc_capture_logs_on_reset(struct drm_device *dev) /* First disable the interrupts, will be renabled after reset */ gen9_disable_guc_interrupts(dev_priv); + /* Wait for the ongoing flush, if any, to complete */ + guc_cancel_log_flush_work_sync(dev_priv); + /* Ask GuC to update the log buffer state */ host2guc_force_logbuffer_flush(&dev_priv->guc); @@ -1544,7 +1614,7 @@ int i915_guc_log_control(struct drm_device *dev, uint64_t control_val) * buffer state and send the flush interrupt so that Host can * collect the left over logs also. */ - flush_work(&dev_priv->guc.events_work); + guc_cancel_log_flush_work_sync(dev_priv); host2guc_force_logbuffer_flush(&dev_priv->guc); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c3fb67e..0b6991e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -435,13 +435,14 @@ void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv) { spin_lock_irq(&dev_priv->irq_lock); dev_priv->guc.interrupts_enabled = false; + /* Speed up cancellation after disabling interrupts */ + dev_priv->guc.log.flush_signal = false; gen6_disable_pm_irq(dev_priv, dev_priv->pm_guc_events); spin_unlock_irq(&dev_priv->irq_lock); synchronize_irq(dev_priv->drm.irq); - cancel_work_sync(&dev_priv->guc.events_work); gen9_reset_guc_interrupts(dev_priv); } @@ -1208,22 +1209,6 @@ static void gen6_pm_rps_work(struct work_struct *work) mutex_unlock(&dev_priv->rps.hw_lock); } -static void gen9_guc2host_events_work(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, guc.events_work); - - spin_lock_irq(&dev_priv->irq_lock); - /* Speed up work cancellation during disabling guc interrupts. */ - if (!dev_priv->guc.interrupts_enabled) { - spin_unlock_irq(&dev_priv->irq_lock); - return; - } - spin_unlock_irq(&dev_priv->irq_lock); - - i915_guc_capture_logs(&dev_priv->drm); -} - /** * ivybridge_parity_work - Workqueue called when a parity error interrupt * occurred. @@ -1707,8 +1692,8 @@ static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) I915_READ(SOFT_SCRATCH(15)) & ~msg); /* Handle flush interrupt event in bottom half */ - queue_work(dev_priv->guc.log.wq, - &dev_priv->guc.events_work); + smp_store_mb(dev_priv->guc.log.flush_signal, 1); + wake_up_process(dev_priv->guc.log.flush_task); } } dev_priv->guc.log.flush_interrupt_count++; @@ -4629,7 +4614,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv) INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work); INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); - INIT_WORK(&dev_priv->guc.events_work, gen9_guc2host_events_work); if (HAS_GUC_UCODE(dev)) dev_priv->pm_guc_events = GEN9_GUC_TO_HOST_INT_EVENT; diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index e911a32..70c2241 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -125,9 +125,11 @@ struct intel_guc_fw { struct intel_guc_log { uint32_t flags; struct drm_i915_gem_object *obj; - struct workqueue_struct *wq; void *buf_addr; struct rchan *relay_chan; + struct task_struct *flush_task; + struct completion flush_completion; + bool flush_signal; /* logging related stats */ u32 flush_interrupt_count; @@ -141,7 +143,6 @@ struct intel_guc { struct intel_guc_log log; /* GuC2Host interrupt related state */ - struct work_struct events_work; bool interrupts_enabled; struct drm_i915_gem_object *ads_obj;