Message ID | 20230404002211.3611376-5-matthew.brost@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Xe DRM scheduler and long running workload plans | expand |
On 2023-04-03 20:22, Matthew Brost wrote: > Add generic schedule message interface which sends messages to backend > from the drm_gpu_scheduler main submission thread. The idea is some of > these messages modify some state in drm_sched_entity which is also > modified during submission. By scheduling these messages and submission > in the same thread their is not race changing states in > drm_sched_entity. "... there is no race when changing ..." or better yet, "... we eliminate races due to drm_sched_entity state changes." > > This interface will be used in XE, new Intel GPU driver, to cleanup, "Xe"? Regards, Luben > suspend, resume, and change scheduling properties of a drm_sched_entity. > > The interface is designed to be generic and extendable with only the > backend understanding the messages. > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > --- > drivers/gpu/drm/scheduler/sched_main.c | 58 +++++++++++++++++++++++++- > include/drm/gpu_scheduler.h | 29 ++++++++++++- > 2 files changed, 84 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > index 2795021efe7b..9dc3378e9c5e 100644 > --- a/drivers/gpu/drm/scheduler/sched_main.c > +++ b/drivers/gpu/drm/scheduler/sched_main.c > @@ -1055,6 +1055,54 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list, > } > EXPORT_SYMBOL(drm_sched_pick_best); > > +/** > + * drm_sched_add_msg - add scheduler message > + * > + * @sched: scheduler instance > + * @msg: message to be added > + * > + * Can and will pass an jobs waiting on dependencies or in a runnable queue. > + * Messages processing will stop if schedule run wq is stopped and resume when > + * run wq is started. > + */ > +void drm_sched_add_msg(struct drm_gpu_scheduler *sched, > + struct drm_sched_msg *msg) > +{ > + spin_lock(&sched->job_list_lock); > + list_add_tail(&msg->link, &sched->msgs); > + spin_unlock(&sched->job_list_lock); > + > + /* > + * Same as above in drm_sched_run_wq_queue, try to kick worker if > + * paused, harmless if this races > + */ > + if (!sched->pause_run_wq) > + queue_work(sched->run_wq, &sched->work_run); > +} > +EXPORT_SYMBOL(drm_sched_add_msg); > + > +/** > + * drm_sched_get_msg - get scheduler message > + * > + * @sched: scheduler instance > + * > + * Returns NULL or message > + */ > +static struct drm_sched_msg * > +drm_sched_get_msg(struct drm_gpu_scheduler *sched) > +{ > + struct drm_sched_msg *msg; > + > + spin_lock(&sched->job_list_lock); > + msg = list_first_entry_or_null(&sched->msgs, > + struct drm_sched_msg, link); > + if (msg) > + list_del(&msg->link); > + spin_unlock(&sched->job_list_lock); > + > + return msg; > +} > + > /** > * drm_sched_main - main scheduler thread > * > @@ -1068,6 +1116,7 @@ static void drm_sched_main(struct work_struct *w) > > while (!READ_ONCE(sched->pause_run_wq)) { > struct drm_sched_entity *entity; > + struct drm_sched_msg *msg; > struct drm_sched_fence *s_fence; > struct drm_sched_job *sched_job; > struct dma_fence *fence; > @@ -1075,12 +1124,16 @@ static void drm_sched_main(struct work_struct *w) > > cleanup_job = drm_sched_get_cleanup_job(sched); > entity = drm_sched_select_entity(sched); > + msg = drm_sched_get_msg(sched); > > if (cleanup_job) > sched->ops->free_job(cleanup_job); > > + if (msg) > + sched->ops->process_msg(msg); > + > if (!entity) { > - if (!cleanup_job) > + if (!cleanup_job && !msg) > break; > continue; > } > @@ -1089,7 +1142,7 @@ static void drm_sched_main(struct work_struct *w) > > if (!sched_job) { > complete_all(&entity->entity_idle); > - if (!cleanup_job) > + if (!cleanup_job && !msg) > break; > continue; > } > @@ -1181,6 +1234,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, > > init_waitqueue_head(&sched->job_scheduled); > INIT_LIST_HEAD(&sched->pending_list); > + INIT_LIST_HEAD(&sched->msgs); > spin_lock_init(&sched->job_list_lock); > atomic_set(&sched->hw_rq_count, 0); > INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h > index 3e421f5a710c..18172ae63ab7 100644 > --- a/include/drm/gpu_scheduler.h > +++ b/include/drm/gpu_scheduler.h > @@ -398,6 +398,23 @@ enum drm_gpu_sched_stat { > DRM_GPU_SCHED_STAT_ENODEV, > }; > > +/** > + * struct drm_sched_msg - an in-band (relative to GPU scheduler run queue) > + * message > + * > + * Generic enough for backend defined messages, backend can expand if needed. > + */ > +struct drm_sched_msg { > + /** @link: list link into the gpu scheduler list of messages */ > + struct list_head link; > + /** > + * @private_data: opaque pointer to message private data (backend defined) > + */ > + void *private_data; > + /** @opcode: opcode of message (backend defined) */ > + unsigned int opcode; > +}; > + > /** > * struct drm_sched_backend_ops - Define the backend operations > * called by the scheduler > @@ -475,6 +492,12 @@ struct drm_sched_backend_ops { > * and it's time to clean it up. > */ > void (*free_job)(struct drm_sched_job *sched_job); > + > + /** > + * @process_msg: Process a message. Allowed to block, it is this > + * function's responsibility to free message if dynamically allocated. > + */ > + void (*process_msg)(struct drm_sched_msg *msg); > }; > > /** > @@ -486,6 +509,7 @@ struct drm_sched_backend_ops { > * @timeout: the time after which a job is removed from the scheduler. > * @name: name of the ring for which this scheduler is being used. > * @sched_rq: priority wise array of run queues. > + * @msgs: list of messages to be processed in @work_run > * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler > * waits on this wait queue until all the scheduled jobs are > * finished. > @@ -493,7 +517,7 @@ struct drm_sched_backend_ops { > * @job_id_count: used to assign unique id to the each job. > * @run_wq: workqueue used to queue @work_run > * @timeout_wq: workqueue used to queue @work_tdr > - * @work_run: schedules jobs and cleans up entities > + * @work_run: schedules jobs, cleans up jobs, and processes messages > * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the > * timeout interval is over. > * @pending_list: the list of jobs which are currently in the job queue. > @@ -517,6 +541,7 @@ struct drm_gpu_scheduler { > long timeout; > const char *name; > struct drm_sched_rq sched_rq[DRM_SCHED_PRIORITY_COUNT]; > + struct list_head msgs; > wait_queue_head_t job_scheduled; > atomic_t hw_rq_count; > atomic64_t job_id_count; > @@ -570,6 +595,8 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity, > > void drm_sched_job_cleanup(struct drm_sched_job *job); > void drm_sched_wakeup(struct drm_gpu_scheduler *sched); > +void drm_sched_add_msg(struct drm_gpu_scheduler *sched, > + struct drm_sched_msg *msg); > void drm_sched_run_wq_stop(struct drm_gpu_scheduler *sched); > void drm_sched_run_wq_start(struct drm_gpu_scheduler *sched); > void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad);
On Thu, May 04, 2023 at 01:28:52AM -0400, Luben Tuikov wrote: > On 2023-04-03 20:22, Matthew Brost wrote: > > Add generic schedule message interface which sends messages to backend > > from the drm_gpu_scheduler main submission thread. The idea is some of > > these messages modify some state in drm_sched_entity which is also > > modified during submission. By scheduling these messages and submission > > in the same thread their is not race changing states in > > drm_sched_entity. > > "... there is no race when changing ..." or better yet, > "... we eliminate races due to drm_sched_entity state changes." > > > > > This interface will be used in XE, new Intel GPU driver, to cleanup, > > "Xe"? > Will fix both. Matt > Regards, > Luben > > > suspend, resume, and change scheduling properties of a drm_sched_entity. > > > > The interface is designed to be generic and extendable with only the > > backend understanding the messages. > > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > > --- > > drivers/gpu/drm/scheduler/sched_main.c | 58 +++++++++++++++++++++++++- > > include/drm/gpu_scheduler.h | 29 ++++++++++++- > > 2 files changed, 84 insertions(+), 3 deletions(-) > > > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > > index 2795021efe7b..9dc3378e9c5e 100644 > > --- a/drivers/gpu/drm/scheduler/sched_main.c > > +++ b/drivers/gpu/drm/scheduler/sched_main.c > > @@ -1055,6 +1055,54 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list, > > } > > EXPORT_SYMBOL(drm_sched_pick_best); > > > > +/** > > + * drm_sched_add_msg - add scheduler message > > + * > > + * @sched: scheduler instance > > + * @msg: message to be added > > + * > > + * Can and will pass an jobs waiting on dependencies or in a runnable queue. > > + * Messages processing will stop if schedule run wq is stopped and resume when > > + * run wq is started. > > + */ > > +void drm_sched_add_msg(struct drm_gpu_scheduler *sched, > > + struct drm_sched_msg *msg) > > +{ > > + spin_lock(&sched->job_list_lock); > > + list_add_tail(&msg->link, &sched->msgs); > > + spin_unlock(&sched->job_list_lock); > > + > > + /* > > + * Same as above in drm_sched_run_wq_queue, try to kick worker if > > + * paused, harmless if this races > > + */ > > + if (!sched->pause_run_wq) > > + queue_work(sched->run_wq, &sched->work_run); > > +} > > +EXPORT_SYMBOL(drm_sched_add_msg); > > + > > +/** > > + * drm_sched_get_msg - get scheduler message > > + * > > + * @sched: scheduler instance > > + * > > + * Returns NULL or message > > + */ > > +static struct drm_sched_msg * > > +drm_sched_get_msg(struct drm_gpu_scheduler *sched) > > +{ > > + struct drm_sched_msg *msg; > > + > > + spin_lock(&sched->job_list_lock); > > + msg = list_first_entry_or_null(&sched->msgs, > > + struct drm_sched_msg, link); > > + if (msg) > > + list_del(&msg->link); > > + spin_unlock(&sched->job_list_lock); > > + > > + return msg; > > +} > > + > > /** > > * drm_sched_main - main scheduler thread > > * > > @@ -1068,6 +1116,7 @@ static void drm_sched_main(struct work_struct *w) > > > > while (!READ_ONCE(sched->pause_run_wq)) { > > struct drm_sched_entity *entity; > > + struct drm_sched_msg *msg; > > struct drm_sched_fence *s_fence; > > struct drm_sched_job *sched_job; > > struct dma_fence *fence; > > @@ -1075,12 +1124,16 @@ static void drm_sched_main(struct work_struct *w) > > > > cleanup_job = drm_sched_get_cleanup_job(sched); > > entity = drm_sched_select_entity(sched); > > + msg = drm_sched_get_msg(sched); > > > > if (cleanup_job) > > sched->ops->free_job(cleanup_job); > > > > + if (msg) > > + sched->ops->process_msg(msg); > > + > > if (!entity) { > > - if (!cleanup_job) > > + if (!cleanup_job && !msg) > > break; > > continue; > > } > > @@ -1089,7 +1142,7 @@ static void drm_sched_main(struct work_struct *w) > > > > if (!sched_job) { > > complete_all(&entity->entity_idle); > > - if (!cleanup_job) > > + if (!cleanup_job && !msg) > > break; > > continue; > > } > > @@ -1181,6 +1234,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, > > > > init_waitqueue_head(&sched->job_scheduled); > > INIT_LIST_HEAD(&sched->pending_list); > > + INIT_LIST_HEAD(&sched->msgs); > > spin_lock_init(&sched->job_list_lock); > > atomic_set(&sched->hw_rq_count, 0); > > INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); > > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h > > index 3e421f5a710c..18172ae63ab7 100644 > > --- a/include/drm/gpu_scheduler.h > > +++ b/include/drm/gpu_scheduler.h > > @@ -398,6 +398,23 @@ enum drm_gpu_sched_stat { > > DRM_GPU_SCHED_STAT_ENODEV, > > }; > > > > +/** > > + * struct drm_sched_msg - an in-band (relative to GPU scheduler run queue) > > + * message > > + * > > + * Generic enough for backend defined messages, backend can expand if needed. > > + */ > > +struct drm_sched_msg { > > + /** @link: list link into the gpu scheduler list of messages */ > > + struct list_head link; > > + /** > > + * @private_data: opaque pointer to message private data (backend defined) > > + */ > > + void *private_data; > > + /** @opcode: opcode of message (backend defined) */ > > + unsigned int opcode; > > +}; > > + > > /** > > * struct drm_sched_backend_ops - Define the backend operations > > * called by the scheduler > > @@ -475,6 +492,12 @@ struct drm_sched_backend_ops { > > * and it's time to clean it up. > > */ > > void (*free_job)(struct drm_sched_job *sched_job); > > + > > + /** > > + * @process_msg: Process a message. Allowed to block, it is this > > + * function's responsibility to free message if dynamically allocated. > > + */ > > + void (*process_msg)(struct drm_sched_msg *msg); > > }; > > > > /** > > @@ -486,6 +509,7 @@ struct drm_sched_backend_ops { > > * @timeout: the time after which a job is removed from the scheduler. > > * @name: name of the ring for which this scheduler is being used. > > * @sched_rq: priority wise array of run queues. > > + * @msgs: list of messages to be processed in @work_run > > * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler > > * waits on this wait queue until all the scheduled jobs are > > * finished. > > @@ -493,7 +517,7 @@ struct drm_sched_backend_ops { > > * @job_id_count: used to assign unique id to the each job. > > * @run_wq: workqueue used to queue @work_run > > * @timeout_wq: workqueue used to queue @work_tdr > > - * @work_run: schedules jobs and cleans up entities > > + * @work_run: schedules jobs, cleans up jobs, and processes messages > > * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the > > * timeout interval is over. > > * @pending_list: the list of jobs which are currently in the job queue. > > @@ -517,6 +541,7 @@ struct drm_gpu_scheduler { > > long timeout; > > const char *name; > > struct drm_sched_rq sched_rq[DRM_SCHED_PRIORITY_COUNT]; > > + struct list_head msgs; > > wait_queue_head_t job_scheduled; > > atomic_t hw_rq_count; > > atomic64_t job_id_count; > > @@ -570,6 +595,8 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity, > > > > void drm_sched_job_cleanup(struct drm_sched_job *job); > > void drm_sched_wakeup(struct drm_gpu_scheduler *sched); > > +void drm_sched_add_msg(struct drm_gpu_scheduler *sched, > > + struct drm_sched_msg *msg); > > void drm_sched_run_wq_stop(struct drm_gpu_scheduler *sched); > > void drm_sched_run_wq_start(struct drm_gpu_scheduler *sched); > > void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad); >
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 2795021efe7b..9dc3378e9c5e 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1055,6 +1055,54 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list, } EXPORT_SYMBOL(drm_sched_pick_best); +/** + * drm_sched_add_msg - add scheduler message + * + * @sched: scheduler instance + * @msg: message to be added + * + * Can and will pass an jobs waiting on dependencies or in a runnable queue. + * Messages processing will stop if schedule run wq is stopped and resume when + * run wq is started. + */ +void drm_sched_add_msg(struct drm_gpu_scheduler *sched, + struct drm_sched_msg *msg) +{ + spin_lock(&sched->job_list_lock); + list_add_tail(&msg->link, &sched->msgs); + spin_unlock(&sched->job_list_lock); + + /* + * Same as above in drm_sched_run_wq_queue, try to kick worker if + * paused, harmless if this races + */ + if (!sched->pause_run_wq) + queue_work(sched->run_wq, &sched->work_run); +} +EXPORT_SYMBOL(drm_sched_add_msg); + +/** + * drm_sched_get_msg - get scheduler message + * + * @sched: scheduler instance + * + * Returns NULL or message + */ +static struct drm_sched_msg * +drm_sched_get_msg(struct drm_gpu_scheduler *sched) +{ + struct drm_sched_msg *msg; + + spin_lock(&sched->job_list_lock); + msg = list_first_entry_or_null(&sched->msgs, + struct drm_sched_msg, link); + if (msg) + list_del(&msg->link); + spin_unlock(&sched->job_list_lock); + + return msg; +} + /** * drm_sched_main - main scheduler thread * @@ -1068,6 +1116,7 @@ static void drm_sched_main(struct work_struct *w) while (!READ_ONCE(sched->pause_run_wq)) { struct drm_sched_entity *entity; + struct drm_sched_msg *msg; struct drm_sched_fence *s_fence; struct drm_sched_job *sched_job; struct dma_fence *fence; @@ -1075,12 +1124,16 @@ static void drm_sched_main(struct work_struct *w) cleanup_job = drm_sched_get_cleanup_job(sched); entity = drm_sched_select_entity(sched); + msg = drm_sched_get_msg(sched); if (cleanup_job) sched->ops->free_job(cleanup_job); + if (msg) + sched->ops->process_msg(msg); + if (!entity) { - if (!cleanup_job) + if (!cleanup_job && !msg) break; continue; } @@ -1089,7 +1142,7 @@ static void drm_sched_main(struct work_struct *w) if (!sched_job) { complete_all(&entity->entity_idle); - if (!cleanup_job) + if (!cleanup_job && !msg) break; continue; } @@ -1181,6 +1234,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, init_waitqueue_head(&sched->job_scheduled); INIT_LIST_HEAD(&sched->pending_list); + INIT_LIST_HEAD(&sched->msgs); spin_lock_init(&sched->job_list_lock); atomic_set(&sched->hw_rq_count, 0); INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 3e421f5a710c..18172ae63ab7 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -398,6 +398,23 @@ enum drm_gpu_sched_stat { DRM_GPU_SCHED_STAT_ENODEV, }; +/** + * struct drm_sched_msg - an in-band (relative to GPU scheduler run queue) + * message + * + * Generic enough for backend defined messages, backend can expand if needed. + */ +struct drm_sched_msg { + /** @link: list link into the gpu scheduler list of messages */ + struct list_head link; + /** + * @private_data: opaque pointer to message private data (backend defined) + */ + void *private_data; + /** @opcode: opcode of message (backend defined) */ + unsigned int opcode; +}; + /** * struct drm_sched_backend_ops - Define the backend operations * called by the scheduler @@ -475,6 +492,12 @@ struct drm_sched_backend_ops { * and it's time to clean it up. */ void (*free_job)(struct drm_sched_job *sched_job); + + /** + * @process_msg: Process a message. Allowed to block, it is this + * function's responsibility to free message if dynamically allocated. + */ + void (*process_msg)(struct drm_sched_msg *msg); }; /** @@ -486,6 +509,7 @@ struct drm_sched_backend_ops { * @timeout: the time after which a job is removed from the scheduler. * @name: name of the ring for which this scheduler is being used. * @sched_rq: priority wise array of run queues. + * @msgs: list of messages to be processed in @work_run * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler * waits on this wait queue until all the scheduled jobs are * finished. @@ -493,7 +517,7 @@ struct drm_sched_backend_ops { * @job_id_count: used to assign unique id to the each job. * @run_wq: workqueue used to queue @work_run * @timeout_wq: workqueue used to queue @work_tdr - * @work_run: schedules jobs and cleans up entities + * @work_run: schedules jobs, cleans up jobs, and processes messages * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the * timeout interval is over. * @pending_list: the list of jobs which are currently in the job queue. @@ -517,6 +541,7 @@ struct drm_gpu_scheduler { long timeout; const char *name; struct drm_sched_rq sched_rq[DRM_SCHED_PRIORITY_COUNT]; + struct list_head msgs; wait_queue_head_t job_scheduled; atomic_t hw_rq_count; atomic64_t job_id_count; @@ -570,6 +595,8 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity, void drm_sched_job_cleanup(struct drm_sched_job *job); void drm_sched_wakeup(struct drm_gpu_scheduler *sched); +void drm_sched_add_msg(struct drm_gpu_scheduler *sched, + struct drm_sched_msg *msg); void drm_sched_run_wq_stop(struct drm_gpu_scheduler *sched); void drm_sched_run_wq_start(struct drm_gpu_scheduler *sched); void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad);
Add generic schedule message interface which sends messages to backend from the drm_gpu_scheduler main submission thread. The idea is some of these messages modify some state in drm_sched_entity which is also modified during submission. By scheduling these messages and submission in the same thread their is not race changing states in drm_sched_entity. This interface will be used in XE, new Intel GPU driver, to cleanup, suspend, resume, and change scheduling properties of a drm_sched_entity. The interface is designed to be generic and extendable with only the backend understanding the messages. Signed-off-by: Matthew Brost <matthew.brost@intel.com> --- drivers/gpu/drm/scheduler/sched_main.c | 58 +++++++++++++++++++++++++- include/drm/gpu_scheduler.h | 29 ++++++++++++- 2 files changed, 84 insertions(+), 3 deletions(-)