Message ID | 20241014104637.83209-5-tursulin@igalia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Small DRM scheduler improvements | expand |
On Mon, 2024-10-14 at 11:46 +0100, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com> > > Christian suggested to rename the lock and improve the documentation Let's move it to Annotators: Suggested-by: Christian König <christian.koenig@amd.com> (Otherwise some time in the future a Christian Kaiser might start working on the scheduler on steal the praise ^^) > of > what it protects. So without Christian's name here I'd phrase it as: "When writing to a drm_sched_entity's run-queue, writers are protected through the lock drm_sched_entity.rq_lock. This naming, however, frequently collides with the separate internal lock of struct drm_sched_rq, resulting in uses like this: spin_lock(&entity->rq_lock); spin_lock(&entity->rq->lock); Rename drm_sched_entity.rq_lock to improve readability. While at it, re-order that struct's members to make it more obvious what the lock protects. > And to also re-order the structure members so all > protected by the lock are together in a block. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com> > Cc: Christian König <christian.koenig@amd.com> > Cc: Alex Deucher <alexander.deucher@amd.com> > Cc: Luben Tuikov <ltuikov89@gmail.com> > Cc: Matthew Brost <matthew.brost@intel.com> > Cc: Philipp Stanner <pstanner@redhat.com> > Reviewed-by: Christian König <christian.koenig@amd.com> > --- > drivers/gpu/drm/scheduler/sched_entity.c | 28 ++++++++++++---------- > -- > drivers/gpu/drm/scheduler/sched_main.c | 2 +- > include/drm/gpu_scheduler.h | 15 +++++++------ > 3 files changed, 23 insertions(+), 22 deletions(-) > > diff --git a/drivers/gpu/drm/scheduler/sched_entity.c > b/drivers/gpu/drm/scheduler/sched_entity.c > index b72cba292839..c013c2b49aa5 100644 > --- a/drivers/gpu/drm/scheduler/sched_entity.c > +++ b/drivers/gpu/drm/scheduler/sched_entity.c > @@ -105,7 +105,7 @@ int drm_sched_entity_init(struct drm_sched_entity > *entity, > /* We start in an idle state. */ > complete_all(&entity->entity_idle); > > - spin_lock_init(&entity->rq_lock); > + spin_lock_init(&entity->lock); > spsc_queue_init(&entity->job_queue); > > atomic_set(&entity->fence_seq, 0); > @@ -133,10 +133,10 @@ void drm_sched_entity_modify_sched(struct > drm_sched_entity *entity, > { > WARN_ON(!num_sched_list || !sched_list); > > - spin_lock(&entity->rq_lock); > + spin_lock(&entity->lock); > entity->sched_list = sched_list; > entity->num_sched_list = num_sched_list; > - spin_unlock(&entity->rq_lock); > + spin_unlock(&entity->lock); > } > EXPORT_SYMBOL(drm_sched_entity_modify_sched); > > @@ -244,10 +244,10 @@ static void drm_sched_entity_kill(struct > drm_sched_entity *entity) > if (!entity->rq) > return; > > - spin_lock(&entity->rq_lock); > + spin_lock(&entity->lock); > entity->stopped = true; > drm_sched_rq_remove_entity(entity->rq, entity); > - spin_unlock(&entity->rq_lock); > + spin_unlock(&entity->lock); > > /* Make sure this entity is not used by the scheduler at the > moment */ > wait_for_completion(&entity->entity_idle); > @@ -396,9 +396,9 @@ static void drm_sched_entity_wakeup(struct > dma_fence *f, > void drm_sched_entity_set_priority(struct drm_sched_entity *entity, > enum drm_sched_priority priority) > { > - spin_lock(&entity->rq_lock); > + spin_lock(&entity->lock); > entity->priority = priority; > - spin_unlock(&entity->rq_lock); > + spin_unlock(&entity->lock); > } > EXPORT_SYMBOL(drm_sched_entity_set_priority); > > @@ -515,10 +515,10 @@ struct drm_sched_job > *drm_sched_entity_pop_job(struct drm_sched_entity *entity) > > next = to_drm_sched_job(spsc_queue_peek(&entity- > >job_queue)); > if (next) { > - spin_lock(&entity->rq_lock); > + spin_lock(&entity->lock); > drm_sched_rq_update_fifo_locked(entity, > next- > >submit_ts); > - spin_unlock(&entity->rq_lock); > + spin_unlock(&entity->lock); > } > } > > @@ -559,14 +559,14 @@ void drm_sched_entity_select_rq(struct > drm_sched_entity *entity) > if (fence && !dma_fence_is_signaled(fence)) > return; > > - spin_lock(&entity->rq_lock); > + spin_lock(&entity->lock); > sched = drm_sched_pick_best(entity->sched_list, entity- > >num_sched_list); > rq = sched ? sched->sched_rq[entity->priority] : NULL; > if (rq != entity->rq) { > drm_sched_rq_remove_entity(entity->rq, entity); > entity->rq = rq; > } > - spin_unlock(&entity->rq_lock); > + spin_unlock(&entity->lock); > > if (entity->num_sched_list == 1) > entity->sched_list = NULL; > @@ -605,9 +605,9 @@ void drm_sched_entity_push_job(struct > drm_sched_job *sched_job) > struct drm_sched_rq *rq; > > /* Add the entity to the run queue */ > - spin_lock(&entity->rq_lock); > + spin_lock(&entity->lock); > if (entity->stopped) { > - spin_unlock(&entity->rq_lock); > + spin_unlock(&entity->lock); > > DRM_ERROR("Trying to push to a killed > entity\n"); > return; > @@ -621,7 +621,7 @@ void drm_sched_entity_push_job(struct > drm_sched_job *sched_job) > if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) > drm_sched_rq_update_fifo_locked(entity, > submit_ts); > > - spin_unlock(&entity->rq_lock); > + spin_unlock(&entity->lock); > > drm_sched_wakeup(sched); > } > diff --git a/drivers/gpu/drm/scheduler/sched_main.c > b/drivers/gpu/drm/scheduler/sched_main.c > index 07ee386b8e4b..2670bf9f34b2 100644 > --- a/drivers/gpu/drm/scheduler/sched_main.c > +++ b/drivers/gpu/drm/scheduler/sched_main.c > @@ -176,7 +176,7 @@ void drm_sched_rq_update_fifo_locked(struct > drm_sched_entity *entity, ktime_t ts > * for entity from within concurrent > drm_sched_entity_select_rq and the > * other to update the rb tree structure. > */ > - lockdep_assert_held(&entity->rq_lock); > + lockdep_assert_held(&entity->lock); > > spin_lock(&entity->rq->lock); > > diff --git a/include/drm/gpu_scheduler.h > b/include/drm/gpu_scheduler.h > index b6d095074c19..683fff8939e4 100644 > --- a/include/drm/gpu_scheduler.h > +++ b/include/drm/gpu_scheduler.h > @@ -96,6 +96,14 @@ struct drm_sched_entity { > */ > struct list_head list; > Uh, btw, while reviewing, I just saw that we still have that FIXME further up: /** * @rq: * * Runqueue on which this entity is currently scheduled. * * FIXME: Locking is very unclear for this. Writers are protected by * @rq_lock, but readers are generally lockless and seem to just race * with not even a READ_ONCE. */ struct drm_sched_rq *rq; At the very least, rq_lock should be renamed here, too. AFAICS the series doesn't solve the FIXME, so we keep it, agreed? > + /** > + * @lock: > + * > + * Lock protecting the run-queue (@rq) to which this entity > belongs, > + * @priority and the list of schedulers (@sched_list, > @num_sched_list). > + */ > + spinlock_t lock; > + > /** > * @rq: > * > @@ -140,13 +148,6 @@ struct drm_sched_entity { > */ I think this comment here above also uses the term "rq_lock". While you're fixing it, maybe also do a quick grep for "rq_lock" in case I overlooked it somewhere else. I stopped drinking coffee today, so... Thx, P. > enum drm_sched_priority priority; > > - /** > - * @rq_lock: > - * > - * Lock to modify the runqueue to which this entity belongs. > - */ > - spinlock_t rq_lock; > - > /** > * @job_queue: the list of jobs of this entity. > */
On 15/10/2024 12:56, Philipp Stanner wrote: > On Mon, 2024-10-14 at 11:46 +0100, Tvrtko Ursulin wrote: >> From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com> >> >> Christian suggested to rename the lock and improve the documentation > > Let's move it to Annotators: > Suggested-by: Christian König <christian.koenig@amd.com> Ack. > (Otherwise some time in the future a Christian Kaiser might start > working on the scheduler on steal the praise ^^) > >> of >> what it protects. > > So without Christian's name here I'd phrase it as: > "When writing to a drm_sched_entity's run-queue, writers are protected > through the lock drm_sched_entity.rq_lock. This naming, however, > frequently collides with the separate internal lock of struct > drm_sched_rq, resulting in uses like this: > > spin_lock(&entity->rq_lock); > spin_lock(&entity->rq->lock); > > Rename drm_sched_entity.rq_lock to improve readability. While at it, > re-order that struct's members to make it more obvious what the lock > protects. Will copy&paste - thanks for typing it out. >> And to also re-order the structure members so all >> protected by the lock are together in a block. > > >> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com> >> Cc: Christian König <christian.koenig@amd.com> >> Cc: Alex Deucher <alexander.deucher@amd.com> >> Cc: Luben Tuikov <ltuikov89@gmail.com> >> Cc: Matthew Brost <matthew.brost@intel.com> >> Cc: Philipp Stanner <pstanner@redhat.com> >> Reviewed-by: Christian König <christian.koenig@amd.com> >> --- >> drivers/gpu/drm/scheduler/sched_entity.c | 28 ++++++++++++---------- >> -- >> drivers/gpu/drm/scheduler/sched_main.c | 2 +- >> include/drm/gpu_scheduler.h | 15 +++++++------ >> 3 files changed, 23 insertions(+), 22 deletions(-) >> >> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c >> b/drivers/gpu/drm/scheduler/sched_entity.c >> index b72cba292839..c013c2b49aa5 100644 >> --- a/drivers/gpu/drm/scheduler/sched_entity.c >> +++ b/drivers/gpu/drm/scheduler/sched_entity.c >> @@ -105,7 +105,7 @@ int drm_sched_entity_init(struct drm_sched_entity >> *entity, >> /* We start in an idle state. */ >> complete_all(&entity->entity_idle); >> >> - spin_lock_init(&entity->rq_lock); >> + spin_lock_init(&entity->lock); >> spsc_queue_init(&entity->job_queue); >> >> atomic_set(&entity->fence_seq, 0); >> @@ -133,10 +133,10 @@ void drm_sched_entity_modify_sched(struct >> drm_sched_entity *entity, >> { >> WARN_ON(!num_sched_list || !sched_list); >> >> - spin_lock(&entity->rq_lock); >> + spin_lock(&entity->lock); >> entity->sched_list = sched_list; >> entity->num_sched_list = num_sched_list; >> - spin_unlock(&entity->rq_lock); >> + spin_unlock(&entity->lock); >> } >> EXPORT_SYMBOL(drm_sched_entity_modify_sched); >> >> @@ -244,10 +244,10 @@ static void drm_sched_entity_kill(struct >> drm_sched_entity *entity) >> if (!entity->rq) >> return; >> >> - spin_lock(&entity->rq_lock); >> + spin_lock(&entity->lock); >> entity->stopped = true; >> drm_sched_rq_remove_entity(entity->rq, entity); >> - spin_unlock(&entity->rq_lock); >> + spin_unlock(&entity->lock); >> >> /* Make sure this entity is not used by the scheduler at the >> moment */ >> wait_for_completion(&entity->entity_idle); >> @@ -396,9 +396,9 @@ static void drm_sched_entity_wakeup(struct >> dma_fence *f, >> void drm_sched_entity_set_priority(struct drm_sched_entity *entity, >> enum drm_sched_priority priority) >> { >> - spin_lock(&entity->rq_lock); >> + spin_lock(&entity->lock); >> entity->priority = priority; >> - spin_unlock(&entity->rq_lock); >> + spin_unlock(&entity->lock); >> } >> EXPORT_SYMBOL(drm_sched_entity_set_priority); >> >> @@ -515,10 +515,10 @@ struct drm_sched_job >> *drm_sched_entity_pop_job(struct drm_sched_entity *entity) >> >> next = to_drm_sched_job(spsc_queue_peek(&entity- >>> job_queue)); >> if (next) { >> - spin_lock(&entity->rq_lock); >> + spin_lock(&entity->lock); >> drm_sched_rq_update_fifo_locked(entity, >> next- >>> submit_ts); >> - spin_unlock(&entity->rq_lock); >> + spin_unlock(&entity->lock); >> } >> } >> >> @@ -559,14 +559,14 @@ void drm_sched_entity_select_rq(struct >> drm_sched_entity *entity) >> if (fence && !dma_fence_is_signaled(fence)) >> return; >> >> - spin_lock(&entity->rq_lock); >> + spin_lock(&entity->lock); >> sched = drm_sched_pick_best(entity->sched_list, entity- >>> num_sched_list); >> rq = sched ? sched->sched_rq[entity->priority] : NULL; >> if (rq != entity->rq) { >> drm_sched_rq_remove_entity(entity->rq, entity); >> entity->rq = rq; >> } >> - spin_unlock(&entity->rq_lock); >> + spin_unlock(&entity->lock); >> >> if (entity->num_sched_list == 1) >> entity->sched_list = NULL; >> @@ -605,9 +605,9 @@ void drm_sched_entity_push_job(struct >> drm_sched_job *sched_job) >> struct drm_sched_rq *rq; >> >> /* Add the entity to the run queue */ >> - spin_lock(&entity->rq_lock); >> + spin_lock(&entity->lock); >> if (entity->stopped) { >> - spin_unlock(&entity->rq_lock); >> + spin_unlock(&entity->lock); >> >> DRM_ERROR("Trying to push to a killed >> entity\n"); >> return; >> @@ -621,7 +621,7 @@ void drm_sched_entity_push_job(struct >> drm_sched_job *sched_job) >> if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) >> drm_sched_rq_update_fifo_locked(entity, >> submit_ts); >> >> - spin_unlock(&entity->rq_lock); >> + spin_unlock(&entity->lock); >> >> drm_sched_wakeup(sched); >> } >> diff --git a/drivers/gpu/drm/scheduler/sched_main.c >> b/drivers/gpu/drm/scheduler/sched_main.c >> index 07ee386b8e4b..2670bf9f34b2 100644 >> --- a/drivers/gpu/drm/scheduler/sched_main.c >> +++ b/drivers/gpu/drm/scheduler/sched_main.c >> @@ -176,7 +176,7 @@ void drm_sched_rq_update_fifo_locked(struct >> drm_sched_entity *entity, ktime_t ts >> * for entity from within concurrent >> drm_sched_entity_select_rq and the >> * other to update the rb tree structure. >> */ >> - lockdep_assert_held(&entity->rq_lock); >> + lockdep_assert_held(&entity->lock); >> >> spin_lock(&entity->rq->lock); >> >> diff --git a/include/drm/gpu_scheduler.h >> b/include/drm/gpu_scheduler.h >> index b6d095074c19..683fff8939e4 100644 >> --- a/include/drm/gpu_scheduler.h >> +++ b/include/drm/gpu_scheduler.h >> @@ -96,6 +96,14 @@ struct drm_sched_entity { >> */ >> struct list_head list; >> > > Uh, btw, while reviewing, I just saw that we still have that FIXME > further up: > > /** > * @rq: > * > * Runqueue on which this entity is currently scheduled. > * > * FIXME: Locking is very unclear for this. Writers are protected by > * @rq_lock, but readers are generally lockless and seem to just race > * with not even a READ_ONCE. > */ > struct drm_sched_rq *rq; > > At the very least, rq_lock should be renamed here, too. AFAICS the Good catch! > series doesn't solve the FIXME, so we keep it, agreed? Yep. Regards, Tvrtko >> + /** >> + * @lock: >> + * >> + * Lock protecting the run-queue (@rq) to which this entity >> belongs, >> + * @priority and the list of schedulers (@sched_list, >> @num_sched_list). >> + */ >> + spinlock_t lock; >> + >> /** >> * @rq: >> * >> @@ -140,13 +148,6 @@ struct drm_sched_entity { >> */ > > I think this comment here above also uses the term "rq_lock". While > you're fixing it, maybe also do a quick grep for "rq_lock" in case I > overlooked it somewhere else. I stopped drinking coffee today, so... > > > Thx, > P. > >> enum drm_sched_priority priority; >> >> - /** >> - * @rq_lock: >> - * >> - * Lock to modify the runqueue to which this entity belongs. >> - */ >> - spinlock_t rq_lock; >> - >> /** >> * @job_queue: the list of jobs of this entity. >> */ >
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index b72cba292839..c013c2b49aa5 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -105,7 +105,7 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, /* We start in an idle state. */ complete_all(&entity->entity_idle); - spin_lock_init(&entity->rq_lock); + spin_lock_init(&entity->lock); spsc_queue_init(&entity->job_queue); atomic_set(&entity->fence_seq, 0); @@ -133,10 +133,10 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity, { WARN_ON(!num_sched_list || !sched_list); - spin_lock(&entity->rq_lock); + spin_lock(&entity->lock); entity->sched_list = sched_list; entity->num_sched_list = num_sched_list; - spin_unlock(&entity->rq_lock); + spin_unlock(&entity->lock); } EXPORT_SYMBOL(drm_sched_entity_modify_sched); @@ -244,10 +244,10 @@ static void drm_sched_entity_kill(struct drm_sched_entity *entity) if (!entity->rq) return; - spin_lock(&entity->rq_lock); + spin_lock(&entity->lock); entity->stopped = true; drm_sched_rq_remove_entity(entity->rq, entity); - spin_unlock(&entity->rq_lock); + spin_unlock(&entity->lock); /* Make sure this entity is not used by the scheduler at the moment */ wait_for_completion(&entity->entity_idle); @@ -396,9 +396,9 @@ static void drm_sched_entity_wakeup(struct dma_fence *f, void drm_sched_entity_set_priority(struct drm_sched_entity *entity, enum drm_sched_priority priority) { - spin_lock(&entity->rq_lock); + spin_lock(&entity->lock); entity->priority = priority; - spin_unlock(&entity->rq_lock); + spin_unlock(&entity->lock); } EXPORT_SYMBOL(drm_sched_entity_set_priority); @@ -515,10 +515,10 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); if (next) { - spin_lock(&entity->rq_lock); + spin_lock(&entity->lock); drm_sched_rq_update_fifo_locked(entity, next->submit_ts); - spin_unlock(&entity->rq_lock); + spin_unlock(&entity->lock); } } @@ -559,14 +559,14 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity) if (fence && !dma_fence_is_signaled(fence)) return; - spin_lock(&entity->rq_lock); + spin_lock(&entity->lock); sched = drm_sched_pick_best(entity->sched_list, entity->num_sched_list); rq = sched ? sched->sched_rq[entity->priority] : NULL; if (rq != entity->rq) { drm_sched_rq_remove_entity(entity->rq, entity); entity->rq = rq; } - spin_unlock(&entity->rq_lock); + spin_unlock(&entity->lock); if (entity->num_sched_list == 1) entity->sched_list = NULL; @@ -605,9 +605,9 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) struct drm_sched_rq *rq; /* Add the entity to the run queue */ - spin_lock(&entity->rq_lock); + spin_lock(&entity->lock); if (entity->stopped) { - spin_unlock(&entity->rq_lock); + spin_unlock(&entity->lock); DRM_ERROR("Trying to push to a killed entity\n"); return; @@ -621,7 +621,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) drm_sched_rq_update_fifo_locked(entity, submit_ts); - spin_unlock(&entity->rq_lock); + spin_unlock(&entity->lock); drm_sched_wakeup(sched); } diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 07ee386b8e4b..2670bf9f34b2 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -176,7 +176,7 @@ void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts * for entity from within concurrent drm_sched_entity_select_rq and the * other to update the rb tree structure. */ - lockdep_assert_held(&entity->rq_lock); + lockdep_assert_held(&entity->lock); spin_lock(&entity->rq->lock); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index b6d095074c19..683fff8939e4 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -96,6 +96,14 @@ struct drm_sched_entity { */ struct list_head list; + /** + * @lock: + * + * Lock protecting the run-queue (@rq) to which this entity belongs, + * @priority and the list of schedulers (@sched_list, @num_sched_list). + */ + spinlock_t lock; + /** * @rq: * @@ -140,13 +148,6 @@ struct drm_sched_entity { */ enum drm_sched_priority priority; - /** - * @rq_lock: - * - * Lock to modify the runqueue to which this entity belongs. - */ - spinlock_t rq_lock; - /** * @job_queue: the list of jobs of this entity. */