diff mbox series

drm/i915/guc: Use lockless list for destroyed contexts

Message ID 20211222232907.12735-1-matthew.brost@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/guc: Use lockless list for destroyed contexts | expand

Commit Message

Matthew Brost Dec. 22, 2021, 11:29 p.m. UTC
Use a lockless list structure for destroyed contexts to avoid hammering
on global submission spin lock.

Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c       |  2 -
 drivers/gpu/drm/i915/gt/intel_context_types.h |  3 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc.h        |  3 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 44 +++++--------------
 4 files changed, 16 insertions(+), 36 deletions(-)

Comments

John Harrison Dec. 23, 2021, 12:48 a.m. UTC | #1
On 12/22/2021 15:29, Matthew Brost wrote:
> Use a lockless list structure for destroyed contexts to avoid hammering
> on global submission spin lock.
I thought the guidance was that lockless anything without an explanation 
longer than War And Peace comes with an automatic termination penalty?

Also, I thought the simple suggestion was to just move the entire list 
sideways under the existing lock and then loop through the local list 
safely without requiring locks because it is now local only.

John.


>
> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_context.c       |  2 -
>   drivers/gpu/drm/i915/gt/intel_context_types.h |  3 +-
>   drivers/gpu/drm/i915/gt/uc/intel_guc.h        |  3 +-
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 44 +++++--------------
>   4 files changed, 16 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 5d0ec7c49b6a..4aacb4b0418d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -403,8 +403,6 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
>   	ce->guc_id.id = GUC_INVALID_LRC_ID;
>   	INIT_LIST_HEAD(&ce->guc_id.link);
>   
> -	INIT_LIST_HEAD(&ce->destroyed_link);
> -
>   	INIT_LIST_HEAD(&ce->parallel.child_list);
>   
>   	/*
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index 30cd81ad8911..4532d43ec9c0 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -9,6 +9,7 @@
>   #include <linux/average.h>
>   #include <linux/kref.h>
>   #include <linux/list.h>
> +#include <linux/llist.h>
>   #include <linux/mutex.h>
>   #include <linux/types.h>
>   
> @@ -224,7 +225,7 @@ struct intel_context {
>   	 * list when context is pending to be destroyed (deregistered with the
>   	 * GuC), protected by guc->submission_state.lock
>   	 */
> -	struct list_head destroyed_link;
> +	struct llist_node destroyed_link;
>   
>   	/** @parallel: sub-structure for parallel submission members */
>   	struct {
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> index f9240d4baa69..705085058411 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> @@ -8,6 +8,7 @@
>   
>   #include <linux/xarray.h>
>   #include <linux/delay.h>
> +#include <linux/llist.h>
>   
>   #include "intel_uncore.h"
>   #include "intel_guc_fw.h"
> @@ -112,7 +113,7 @@ struct intel_guc {
>   		 * @destroyed_contexts: list of contexts waiting to be destroyed
>   		 * (deregistered with the GuC)
>   		 */
> -		struct list_head destroyed_contexts;
> +		struct llist_head destroyed_contexts;
>   		/**
>   		 * @destroyed_worker: worker to deregister contexts, need as we
>   		 * need to take a GT PM reference and can't from destroy
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 0a03a30e4c6d..6f7643edc139 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -1771,7 +1771,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
>   	spin_lock_init(&guc->submission_state.lock);
>   	INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
>   	ida_init(&guc->submission_state.guc_ids);
> -	INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
> +	init_llist_head(&guc->submission_state.destroyed_contexts);
>   	INIT_WORK(&guc->submission_state.destroyed_worker,
>   		  destroyed_worker_func);
>   
> @@ -2696,26 +2696,18 @@ static void __guc_context_destroy(struct intel_context *ce)
>   	}
>   }
>   
> +#define take_destroyed_contexts(guc) \
> +	llist_del_all(&guc->submission_state.destroyed_contexts)
> +
>   static void guc_flush_destroyed_contexts(struct intel_guc *guc)
>   {
> -	struct intel_context *ce;
> -	unsigned long flags;
> +	struct intel_context *ce, *cn;
>   
>   	GEM_BUG_ON(!submission_disabled(guc) &&
>   		   guc_submission_initialized(guc));
>   
> -	while (!list_empty(&guc->submission_state.destroyed_contexts)) {
> -		spin_lock_irqsave(&guc->submission_state.lock, flags);
> -		ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
> -					      struct intel_context,
> -					      destroyed_link);
> -		if (ce)
> -			list_del_init(&ce->destroyed_link);
> -		spin_unlock_irqrestore(&guc->submission_state.lock, flags);
> -
> -		if (!ce)
> -			break;
> -
> +	llist_for_each_entry_safe(ce, cn, take_destroyed_contexts(guc),
> +				 destroyed_link) {
>   		release_guc_id(guc, ce);
>   		__guc_context_destroy(ce);
>   	}
> @@ -2723,23 +2715,11 @@ static void guc_flush_destroyed_contexts(struct intel_guc *guc)
>   
>   static void deregister_destroyed_contexts(struct intel_guc *guc)
>   {
> -	struct intel_context *ce;
> -	unsigned long flags;
> -
> -	while (!list_empty(&guc->submission_state.destroyed_contexts)) {
> -		spin_lock_irqsave(&guc->submission_state.lock, flags);
> -		ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
> -					      struct intel_context,
> -					      destroyed_link);
> -		if (ce)
> -			list_del_init(&ce->destroyed_link);
> -		spin_unlock_irqrestore(&guc->submission_state.lock, flags);
> -
> -		if (!ce)
> -			break;
> +	struct intel_context *ce, *cn;
>   
> +	llist_for_each_entry_safe(ce, cn, take_destroyed_contexts(guc),
> +				 destroyed_link)
>   		guc_lrc_desc_unpin(ce);
> -	}
>   }
>   
>   static void destroyed_worker_func(struct work_struct *w)
> @@ -2771,8 +2751,8 @@ static void guc_context_destroy(struct kref *kref)
>   	if (likely(!destroy)) {
>   		if (!list_empty(&ce->guc_id.link))
>   			list_del_init(&ce->guc_id.link);
> -		list_add_tail(&ce->destroyed_link,
> -			      &guc->submission_state.destroyed_contexts);
> +		llist_add(&ce->destroyed_link,
> +			  &guc->submission_state.destroyed_contexts);
>   	} else {
>   		__release_guc_id(guc, ce);
>   	}
Matthew Brost Dec. 23, 2021, 12:53 a.m. UTC | #2
On Wed, Dec 22, 2021 at 04:48:36PM -0800, John Harrison wrote:
> On 12/22/2021 15:29, Matthew Brost wrote:
> > Use a lockless list structure for destroyed contexts to avoid hammering
> > on global submission spin lock.
> I thought the guidance was that lockless anything without an explanation
> longer than War And Peace comes with an automatic termination penalty?
> 

I was thinking that was for custom lockless algorithms not using core
uAPIs. If this is really concern I could protect the llist_del_all by a
lock but the doc explicitly says how I'm using this uAPI is safe without
a lock. 

> Also, I thought the simple suggestion was to just move the entire list
> sideways under the existing lock and then loop through the local list safely
> without requiring locks because it is now local only.
> 

That's basically what this uAPI does in a few simple calls rather than
our own algorithm to move to a new list.

Matt

> John.
> 
> 
> > 
> > Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gt/intel_context.c       |  2 -
> >   drivers/gpu/drm/i915/gt/intel_context_types.h |  3 +-
> >   drivers/gpu/drm/i915/gt/uc/intel_guc.h        |  3 +-
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 44 +++++--------------
> >   4 files changed, 16 insertions(+), 36 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> > index 5d0ec7c49b6a..4aacb4b0418d 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> > @@ -403,8 +403,6 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
> >   	ce->guc_id.id = GUC_INVALID_LRC_ID;
> >   	INIT_LIST_HEAD(&ce->guc_id.link);
> > -	INIT_LIST_HEAD(&ce->destroyed_link);
> > -
> >   	INIT_LIST_HEAD(&ce->parallel.child_list);
> >   	/*
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > index 30cd81ad8911..4532d43ec9c0 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > @@ -9,6 +9,7 @@
> >   #include <linux/average.h>
> >   #include <linux/kref.h>
> >   #include <linux/list.h>
> > +#include <linux/llist.h>
> >   #include <linux/mutex.h>
> >   #include <linux/types.h>
> > @@ -224,7 +225,7 @@ struct intel_context {
> >   	 * list when context is pending to be destroyed (deregistered with the
> >   	 * GuC), protected by guc->submission_state.lock
> >   	 */
> > -	struct list_head destroyed_link;
> > +	struct llist_node destroyed_link;
> >   	/** @parallel: sub-structure for parallel submission members */
> >   	struct {
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > index f9240d4baa69..705085058411 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > @@ -8,6 +8,7 @@
> >   #include <linux/xarray.h>
> >   #include <linux/delay.h>
> > +#include <linux/llist.h>
> >   #include "intel_uncore.h"
> >   #include "intel_guc_fw.h"
> > @@ -112,7 +113,7 @@ struct intel_guc {
> >   		 * @destroyed_contexts: list of contexts waiting to be destroyed
> >   		 * (deregistered with the GuC)
> >   		 */
> > -		struct list_head destroyed_contexts;
> > +		struct llist_head destroyed_contexts;
> >   		/**
> >   		 * @destroyed_worker: worker to deregister contexts, need as we
> >   		 * need to take a GT PM reference and can't from destroy
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index 0a03a30e4c6d..6f7643edc139 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -1771,7 +1771,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
> >   	spin_lock_init(&guc->submission_state.lock);
> >   	INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
> >   	ida_init(&guc->submission_state.guc_ids);
> > -	INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
> > +	init_llist_head(&guc->submission_state.destroyed_contexts);
> >   	INIT_WORK(&guc->submission_state.destroyed_worker,
> >   		  destroyed_worker_func);
> > @@ -2696,26 +2696,18 @@ static void __guc_context_destroy(struct intel_context *ce)
> >   	}
> >   }
> > +#define take_destroyed_contexts(guc) \
> > +	llist_del_all(&guc->submission_state.destroyed_contexts)
> > +
> >   static void guc_flush_destroyed_contexts(struct intel_guc *guc)
> >   {
> > -	struct intel_context *ce;
> > -	unsigned long flags;
> > +	struct intel_context *ce, *cn;
> >   	GEM_BUG_ON(!submission_disabled(guc) &&
> >   		   guc_submission_initialized(guc));
> > -	while (!list_empty(&guc->submission_state.destroyed_contexts)) {
> > -		spin_lock_irqsave(&guc->submission_state.lock, flags);
> > -		ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
> > -					      struct intel_context,
> > -					      destroyed_link);
> > -		if (ce)
> > -			list_del_init(&ce->destroyed_link);
> > -		spin_unlock_irqrestore(&guc->submission_state.lock, flags);
> > -
> > -		if (!ce)
> > -			break;
> > -
> > +	llist_for_each_entry_safe(ce, cn, take_destroyed_contexts(guc),
> > +				 destroyed_link) {
> >   		release_guc_id(guc, ce);
> >   		__guc_context_destroy(ce);
> >   	}
> > @@ -2723,23 +2715,11 @@ static void guc_flush_destroyed_contexts(struct intel_guc *guc)
> >   static void deregister_destroyed_contexts(struct intel_guc *guc)
> >   {
> > -	struct intel_context *ce;
> > -	unsigned long flags;
> > -
> > -	while (!list_empty(&guc->submission_state.destroyed_contexts)) {
> > -		spin_lock_irqsave(&guc->submission_state.lock, flags);
> > -		ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
> > -					      struct intel_context,
> > -					      destroyed_link);
> > -		if (ce)
> > -			list_del_init(&ce->destroyed_link);
> > -		spin_unlock_irqrestore(&guc->submission_state.lock, flags);
> > -
> > -		if (!ce)
> > -			break;
> > +	struct intel_context *ce, *cn;
> > +	llist_for_each_entry_safe(ce, cn, take_destroyed_contexts(guc),
> > +				 destroyed_link)
> >   		guc_lrc_desc_unpin(ce);
> > -	}
> >   }
> >   static void destroyed_worker_func(struct work_struct *w)
> > @@ -2771,8 +2751,8 @@ static void guc_context_destroy(struct kref *kref)
> >   	if (likely(!destroy)) {
> >   		if (!list_empty(&ce->guc_id.link))
> >   			list_del_init(&ce->guc_id.link);
> > -		list_add_tail(&ce->destroyed_link,
> > -			      &guc->submission_state.destroyed_contexts);
> > +		llist_add(&ce->destroyed_link,
> > +			  &guc->submission_state.destroyed_contexts);
> >   	} else {
> >   		__release_guc_id(guc, ce);
> >   	}
>
Tvrtko Ursulin Dec. 23, 2021, 10:01 a.m. UTC | #3
On 22/12/2021 23:29, Matthew Brost wrote:
> Use a lockless list structure for destroyed contexts to avoid hammering
> on global submission spin lock.

Thanks for looking into it quickly!

On the topic of "lockless" yes I agree the llist in principle is not a 
concern. That part looks fine to me. On the actual "integration" (how it 
slots in) with the GuC code I leave one comment below.

> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_context.c       |  2 -
>   drivers/gpu/drm/i915/gt/intel_context_types.h |  3 +-
>   drivers/gpu/drm/i915/gt/uc/intel_guc.h        |  3 +-
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 44 +++++--------------
>   4 files changed, 16 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 5d0ec7c49b6a..4aacb4b0418d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -403,8 +403,6 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
>   	ce->guc_id.id = GUC_INVALID_LRC_ID;
>   	INIT_LIST_HEAD(&ce->guc_id.link);
>   
> -	INIT_LIST_HEAD(&ce->destroyed_link);
> -
>   	INIT_LIST_HEAD(&ce->parallel.child_list);
>   
>   	/*
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index 30cd81ad8911..4532d43ec9c0 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -9,6 +9,7 @@
>   #include <linux/average.h>
>   #include <linux/kref.h>
>   #include <linux/list.h>
> +#include <linux/llist.h>
>   #include <linux/mutex.h>
>   #include <linux/types.h>
>   
> @@ -224,7 +225,7 @@ struct intel_context {
>   	 * list when context is pending to be destroyed (deregistered with the
>   	 * GuC), protected by guc->submission_state.lock
>   	 */
> -	struct list_head destroyed_link;
> +	struct llist_node destroyed_link;
>   
>   	/** @parallel: sub-structure for parallel submission members */
>   	struct {
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> index f9240d4baa69..705085058411 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> @@ -8,6 +8,7 @@
>   
>   #include <linux/xarray.h>
>   #include <linux/delay.h>
> +#include <linux/llist.h>
>   
>   #include "intel_uncore.h"
>   #include "intel_guc_fw.h"
> @@ -112,7 +113,7 @@ struct intel_guc {
>   		 * @destroyed_contexts: list of contexts waiting to be destroyed
>   		 * (deregistered with the GuC)
>   		 */
> -		struct list_head destroyed_contexts;
> +		struct llist_head destroyed_contexts;
>   		/**
>   		 * @destroyed_worker: worker to deregister contexts, need as we
>   		 * need to take a GT PM reference and can't from destroy
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 0a03a30e4c6d..6f7643edc139 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -1771,7 +1771,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
>   	spin_lock_init(&guc->submission_state.lock);
>   	INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
>   	ida_init(&guc->submission_state.guc_ids);
> -	INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
> +	init_llist_head(&guc->submission_state.destroyed_contexts);
>   	INIT_WORK(&guc->submission_state.destroyed_worker,
>   		  destroyed_worker_func);
>   
> @@ -2696,26 +2696,18 @@ static void __guc_context_destroy(struct intel_context *ce)
>   	}
>   }
>   
> +#define take_destroyed_contexts(guc) \
> +	llist_del_all(&guc->submission_state.destroyed_contexts)
> +
>   static void guc_flush_destroyed_contexts(struct intel_guc *guc)
>   {
> -	struct intel_context *ce;
> -	unsigned long flags;
> +	struct intel_context *ce, *cn;
>   
>   	GEM_BUG_ON(!submission_disabled(guc) &&
>   		   guc_submission_initialized(guc));
>   
> -	while (!list_empty(&guc->submission_state.destroyed_contexts)) {
> -		spin_lock_irqsave(&guc->submission_state.lock, flags);
> -		ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
> -					      struct intel_context,
> -					      destroyed_link);
> -		if (ce)
> -			list_del_init(&ce->destroyed_link);
> -		spin_unlock_irqrestore(&guc->submission_state.lock, flags);
> -
> -		if (!ce)
> -			break;
> -
> +	llist_for_each_entry_safe(ce, cn, take_destroyed_contexts(guc),
> +				 destroyed_link) {
>   		release_guc_id(guc, ce);
>   		__guc_context_destroy(ce);
>   	}
> @@ -2723,23 +2715,11 @@ static void guc_flush_destroyed_contexts(struct intel_guc *guc)
>   
>   static void deregister_destroyed_contexts(struct intel_guc *guc)
>   {
> -	struct intel_context *ce;
> -	unsigned long flags;
> -
> -	while (!list_empty(&guc->submission_state.destroyed_contexts)) {
> -		spin_lock_irqsave(&guc->submission_state.lock, flags);
> -		ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
> -					      struct intel_context,
> -					      destroyed_link);
> -		if (ce)
> -			list_del_init(&ce->destroyed_link);
> -		spin_unlock_irqrestore(&guc->submission_state.lock, flags);
> -
> -		if (!ce)
> -			break;
> +	struct intel_context *ce, *cn;
>   
> +	llist_for_each_entry_safe(ce, cn, take_destroyed_contexts(guc),
> +				 destroyed_link)
>   		guc_lrc_desc_unpin(ce);
> -	}
>   }
>   
>   static void destroyed_worker_func(struct work_struct *w)
> @@ -2771,8 +2751,8 @@ static void guc_context_destroy(struct kref *kref)
>   	if (likely(!destroy)) {
>   		if (!list_empty(&ce->guc_id.link))
>   			list_del_init(&ce->guc_id.link);
> -		list_add_tail(&ce->destroyed_link,
> -			      &guc->submission_state.destroyed_contexts);
> +		llist_add(&ce->destroyed_link,
> +			  &guc->submission_state.destroyed_contexts);

So here presumably submission lock is still needed for unlinking the 
from guc_id list.

Mechanical flows of the patch looks good to me, but I leave to you and 
John to decide on llist vs keeping the existing doubly linked list. I 
mean agreeing what fits better with the existing locking and data 
structure design.

Regards,

Tvrtko

>   	} else {
>   		__release_guc_id(guc, ce);
>   	}
>
Matthew Brost Dec. 23, 2021, 6:05 p.m. UTC | #4
On Thu, Dec 23, 2021 at 10:01:42AM +0000, Tvrtko Ursulin wrote:
> 
> On 22/12/2021 23:29, Matthew Brost wrote:
> > Use a lockless list structure for destroyed contexts to avoid hammering
> > on global submission spin lock.
> 
> Thanks for looking into it quickly!
> 
> On the topic of "lockless" yes I agree the llist in principle is not a
> concern. That part looks fine to me. On the actual "integration" (how it
> slots in) with the GuC code I leave one comment below.
> 

Agree.

> > Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gt/intel_context.c       |  2 -
> >   drivers/gpu/drm/i915/gt/intel_context_types.h |  3 +-
> >   drivers/gpu/drm/i915/gt/uc/intel_guc.h        |  3 +-
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 44 +++++--------------
> >   4 files changed, 16 insertions(+), 36 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> > index 5d0ec7c49b6a..4aacb4b0418d 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> > @@ -403,8 +403,6 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
> >   	ce->guc_id.id = GUC_INVALID_LRC_ID;
> >   	INIT_LIST_HEAD(&ce->guc_id.link);
> > -	INIT_LIST_HEAD(&ce->destroyed_link);
> > -
> >   	INIT_LIST_HEAD(&ce->parallel.child_list);
> >   	/*
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > index 30cd81ad8911..4532d43ec9c0 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > @@ -9,6 +9,7 @@
> >   #include <linux/average.h>
> >   #include <linux/kref.h>
> >   #include <linux/list.h>
> > +#include <linux/llist.h>
> >   #include <linux/mutex.h>
> >   #include <linux/types.h>
> > @@ -224,7 +225,7 @@ struct intel_context {
> >   	 * list when context is pending to be destroyed (deregistered with the
> >   	 * GuC), protected by guc->submission_state.lock
> >   	 */
> > -	struct list_head destroyed_link;
> > +	struct llist_node destroyed_link;
> >   	/** @parallel: sub-structure for parallel submission members */
> >   	struct {
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > index f9240d4baa69..705085058411 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > @@ -8,6 +8,7 @@
> >   #include <linux/xarray.h>
> >   #include <linux/delay.h>
> > +#include <linux/llist.h>
> >   #include "intel_uncore.h"
> >   #include "intel_guc_fw.h"
> > @@ -112,7 +113,7 @@ struct intel_guc {
> >   		 * @destroyed_contexts: list of contexts waiting to be destroyed
> >   		 * (deregistered with the GuC)
> >   		 */
> > -		struct list_head destroyed_contexts;
> > +		struct llist_head destroyed_contexts;
> >   		/**
> >   		 * @destroyed_worker: worker to deregister contexts, need as we
> >   		 * need to take a GT PM reference and can't from destroy
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index 0a03a30e4c6d..6f7643edc139 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -1771,7 +1771,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
> >   	spin_lock_init(&guc->submission_state.lock);
> >   	INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
> >   	ida_init(&guc->submission_state.guc_ids);
> > -	INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
> > +	init_llist_head(&guc->submission_state.destroyed_contexts);
> >   	INIT_WORK(&guc->submission_state.destroyed_worker,
> >   		  destroyed_worker_func);
> > @@ -2696,26 +2696,18 @@ static void __guc_context_destroy(struct intel_context *ce)
> >   	}
> >   }
> > +#define take_destroyed_contexts(guc) \
> > +	llist_del_all(&guc->submission_state.destroyed_contexts)
> > +
> >   static void guc_flush_destroyed_contexts(struct intel_guc *guc)
> >   {
> > -	struct intel_context *ce;
> > -	unsigned long flags;
> > +	struct intel_context *ce, *cn;
> >   	GEM_BUG_ON(!submission_disabled(guc) &&
> >   		   guc_submission_initialized(guc));
> > -	while (!list_empty(&guc->submission_state.destroyed_contexts)) {
> > -		spin_lock_irqsave(&guc->submission_state.lock, flags);
> > -		ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
> > -					      struct intel_context,
> > -					      destroyed_link);
> > -		if (ce)
> > -			list_del_init(&ce->destroyed_link);
> > -		spin_unlock_irqrestore(&guc->submission_state.lock, flags);
> > -
> > -		if (!ce)
> > -			break;
> > -
> > +	llist_for_each_entry_safe(ce, cn, take_destroyed_contexts(guc),
> > +				 destroyed_link) {
> >   		release_guc_id(guc, ce);
> >   		__guc_context_destroy(ce);
> >   	}
> > @@ -2723,23 +2715,11 @@ static void guc_flush_destroyed_contexts(struct intel_guc *guc)
> >   static void deregister_destroyed_contexts(struct intel_guc *guc)
> >   {
> > -	struct intel_context *ce;
> > -	unsigned long flags;
> > -
> > -	while (!list_empty(&guc->submission_state.destroyed_contexts)) {
> > -		spin_lock_irqsave(&guc->submission_state.lock, flags);
> > -		ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
> > -					      struct intel_context,
> > -					      destroyed_link);
> > -		if (ce)
> > -			list_del_init(&ce->destroyed_link);
> > -		spin_unlock_irqrestore(&guc->submission_state.lock, flags);
> > -
> > -		if (!ce)
> > -			break;
> > +	struct intel_context *ce, *cn;
> > +	llist_for_each_entry_safe(ce, cn, take_destroyed_contexts(guc),
> > +				 destroyed_link)
> >   		guc_lrc_desc_unpin(ce);
> > -	}
> >   }
> >   static void destroyed_worker_func(struct work_struct *w)
> > @@ -2771,8 +2751,8 @@ static void guc_context_destroy(struct kref *kref)
> >   	if (likely(!destroy)) {
> >   		if (!list_empty(&ce->guc_id.link))
> >   			list_del_init(&ce->guc_id.link);
> > -		list_add_tail(&ce->destroyed_link,
> > -			      &guc->submission_state.destroyed_contexts);
> > +		llist_add(&ce->destroyed_link,
> > +			  &guc->submission_state.destroyed_contexts);
> 
> So here presumably submission lock is still needed for unlinking the from
> guc_id list.
> 

Yes, the submission lock also protects the guc_id list.

According to the llist DoC the llist_add could be outside a lock
(multiple producers can safely call llist_add simultaneously but it is
also harmless to call within a lock.

> Mechanical flows of the patch looks good to me, but I leave to you and John
> to decide on llist vs keeping the existing doubly linked list. I mean
> agreeing what fits better with the existing locking and data structure
> design.
> 

Sounds good.

Matt

> Regards,
> 
> Tvrtko
> 
> >   	} else {
> >   		__release_guc_id(guc, ce);
> >   	}
> >
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 5d0ec7c49b6a..4aacb4b0418d 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -403,8 +403,6 @@  intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
 	ce->guc_id.id = GUC_INVALID_LRC_ID;
 	INIT_LIST_HEAD(&ce->guc_id.link);
 
-	INIT_LIST_HEAD(&ce->destroyed_link);
-
 	INIT_LIST_HEAD(&ce->parallel.child_list);
 
 	/*
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 30cd81ad8911..4532d43ec9c0 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -9,6 +9,7 @@ 
 #include <linux/average.h>
 #include <linux/kref.h>
 #include <linux/list.h>
+#include <linux/llist.h>
 #include <linux/mutex.h>
 #include <linux/types.h>
 
@@ -224,7 +225,7 @@  struct intel_context {
 	 * list when context is pending to be destroyed (deregistered with the
 	 * GuC), protected by guc->submission_state.lock
 	 */
-	struct list_head destroyed_link;
+	struct llist_node destroyed_link;
 
 	/** @parallel: sub-structure for parallel submission members */
 	struct {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index f9240d4baa69..705085058411 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -8,6 +8,7 @@ 
 
 #include <linux/xarray.h>
 #include <linux/delay.h>
+#include <linux/llist.h>
 
 #include "intel_uncore.h"
 #include "intel_guc_fw.h"
@@ -112,7 +113,7 @@  struct intel_guc {
 		 * @destroyed_contexts: list of contexts waiting to be destroyed
 		 * (deregistered with the GuC)
 		 */
-		struct list_head destroyed_contexts;
+		struct llist_head destroyed_contexts;
 		/**
 		 * @destroyed_worker: worker to deregister contexts, need as we
 		 * need to take a GT PM reference and can't from destroy
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 0a03a30e4c6d..6f7643edc139 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1771,7 +1771,7 @@  int intel_guc_submission_init(struct intel_guc *guc)
 	spin_lock_init(&guc->submission_state.lock);
 	INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
 	ida_init(&guc->submission_state.guc_ids);
-	INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
+	init_llist_head(&guc->submission_state.destroyed_contexts);
 	INIT_WORK(&guc->submission_state.destroyed_worker,
 		  destroyed_worker_func);
 
@@ -2696,26 +2696,18 @@  static void __guc_context_destroy(struct intel_context *ce)
 	}
 }
 
+#define take_destroyed_contexts(guc) \
+	llist_del_all(&guc->submission_state.destroyed_contexts)
+
 static void guc_flush_destroyed_contexts(struct intel_guc *guc)
 {
-	struct intel_context *ce;
-	unsigned long flags;
+	struct intel_context *ce, *cn;
 
 	GEM_BUG_ON(!submission_disabled(guc) &&
 		   guc_submission_initialized(guc));
 
-	while (!list_empty(&guc->submission_state.destroyed_contexts)) {
-		spin_lock_irqsave(&guc->submission_state.lock, flags);
-		ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
-					      struct intel_context,
-					      destroyed_link);
-		if (ce)
-			list_del_init(&ce->destroyed_link);
-		spin_unlock_irqrestore(&guc->submission_state.lock, flags);
-
-		if (!ce)
-			break;
-
+	llist_for_each_entry_safe(ce, cn, take_destroyed_contexts(guc),
+				 destroyed_link) {
 		release_guc_id(guc, ce);
 		__guc_context_destroy(ce);
 	}
@@ -2723,23 +2715,11 @@  static void guc_flush_destroyed_contexts(struct intel_guc *guc)
 
 static void deregister_destroyed_contexts(struct intel_guc *guc)
 {
-	struct intel_context *ce;
-	unsigned long flags;
-
-	while (!list_empty(&guc->submission_state.destroyed_contexts)) {
-		spin_lock_irqsave(&guc->submission_state.lock, flags);
-		ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
-					      struct intel_context,
-					      destroyed_link);
-		if (ce)
-			list_del_init(&ce->destroyed_link);
-		spin_unlock_irqrestore(&guc->submission_state.lock, flags);
-
-		if (!ce)
-			break;
+	struct intel_context *ce, *cn;
 
+	llist_for_each_entry_safe(ce, cn, take_destroyed_contexts(guc),
+				 destroyed_link)
 		guc_lrc_desc_unpin(ce);
-	}
 }
 
 static void destroyed_worker_func(struct work_struct *w)
@@ -2771,8 +2751,8 @@  static void guc_context_destroy(struct kref *kref)
 	if (likely(!destroy)) {
 		if (!list_empty(&ce->guc_id.link))
 			list_del_init(&ce->guc_id.link);
-		list_add_tail(&ce->destroyed_link,
-			      &guc->submission_state.destroyed_contexts);
+		llist_add(&ce->destroyed_link,
+			  &guc->submission_state.destroyed_contexts);
 	} else {
 		__release_guc_id(guc, ce);
 	}