Message ID | 20170912151155.4603-13-christian.gmeiner@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Am Dienstag, den 12.09.2017, 17:11 +0200 schrieb Christian Gmeiner: > With 'sync points' we can sample the reqeustes perform signals > before and/or after the submited command buffer. > > Changes v2 -> v3: > - fixed indentation and init nr_events to 1 > > Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com> > --- > drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 102 +++++++++++++++++++++++++++------- > drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 1 + > 2 files changed, 83 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c > index b65d7b9d247a..842b6642dcd6 100644 > --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c > +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c > @@ -1324,12 +1324,48 @@ void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu) > pm_runtime_put_autosuspend(gpu->dev); > } > > +static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu, > + struct etnaviv_event *event, unsigned int flags) > +{ > + const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf; > + unsigned int i; > + > + for (i = 0; i < cmdbuf->nr_pmrs; i++) { > + const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i; > + > + if (pmr->flags == flags) > + etnaviv_perfmon_process(gpu, pmr); > + } > +} > + > +static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu, > + struct etnaviv_event *event) > +{ > + sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_PRE); > +} > + > +static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu, > + struct etnaviv_event *event) > +{ > + const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf; > + unsigned int i; > + > + sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST); > + > + for (i = 0; i < cmdbuf->nr_pmrs; i++) { > + const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i; > + > + *pmr->bo_vma = pmr->sequence; > + } > +} > + > + > /* add bo's to gpu's ring, and kick gpu: */ > int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, > struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf) > { > struct dma_fence *fence; > - unsigned int event, i; > + unsigned int i, nr_events = 1, event[3]; > int ret; > > ret = etnaviv_gpu_pm_get_sync(gpu); > @@ -1345,9 +1381,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, > * > */ > > - ret = event_alloc(gpu, 1, &event); > + /* > + * if there are performance monitor requests we need to have > + * - a sync point to re-configure gpu and process ETNA_PM_PROCESS_PRE > + * requests. > + * - a sync point to re-configure gpu, process ETNA_PM_PROCESS_POST requests > + * and update the sequence number for userspace. > + */ > + if (cmdbuf->nr_pmrs) > + nr_events = 3; > + > + ret = event_alloc(gpu, nr_events, event); > if (ret) { > - DRM_ERROR("no free event\n"); > + DRM_ERROR("no free events\n"); > goto out_pm_put; > } > > @@ -1355,12 +1401,14 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, > > fence = etnaviv_gpu_fence_alloc(gpu); > if (!fence) { > - event_free(gpu, event); > + for (i = 0; i < nr_events; i++) > + event_free(gpu, event[i]); > + > ret = -ENOMEM; > goto out_unlock; > } > > - gpu->event[event].fence = fence; > + gpu->event[event[0]].fence = fence; > submit->fence = dma_fence_get(fence); > gpu->active_fence = submit->fence->seqno; > > @@ -1370,7 +1418,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, > gpu->lastctx = cmdbuf->ctx; > } > > - etnaviv_buffer_queue(gpu, event, cmdbuf); > + if (cmdbuf->nr_pmrs) { > + gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre; > + gpu->event[event[1]].cmdbuf = cmdbuf; > + etnaviv_sync_point_queue(gpu, event[1]); > + } > + > + etnaviv_buffer_queue(gpu, event[0], cmdbuf); > + > + if (cmdbuf->nr_pmrs) { > + gpu->event[event[2]].sync_point = &sync_point_perfmon_sample_post; > + gpu->event[event[2]].cmdbuf = cmdbuf; > + etnaviv_sync_point_queue(gpu, event[2]); > + } > > cmdbuf->fence = fence; > list_add_tail(&cmdbuf->node, &gpu->active_cmd_list); > @@ -1475,20 +1535,22 @@ static irqreturn_t irq_handler(int irq, void *data) > } > > fence = gpu->event[event].fence; > - gpu->event[event].fence = NULL; > - dma_fence_signal(fence); > - > - /* > - * Events can be processed out of order. Eg, > - * - allocate and queue event 0 > - * - allocate event 1 > - * - event 0 completes, we process it > - * - allocate and queue event 0 > - * - event 1 and event 0 complete > - * we can end up processing event 0 first, then 1. > - */ > - if (fence_after(fence->seqno, gpu->completed_fence)) > - gpu->completed_fence = fence->seqno; if (!fence) continue; > + if (fence) { > + gpu->event[event].fence = NULL; > + dma_fence_signal(fence); > + > + /* > + * Events can be processed out of order. Eg, > + * - allocate and queue event 0 > + * - allocate event 1 > + * - event 0 completes, we process it > + * - allocate and queue event 0 > + * - event 1 and event 0 complete > + * we can end up processing event 0 first, then 1. > + */ > + if (fence_after(fence->seqno, gpu->completed_fence)) > + gpu->completed_fence = fence->seqno; > + } > > event_free(gpu, event); Together with the change above, this event_free needs to move into pmrs_worker for the sync point events. Otherwise the event may be reallocated (and cleared) before the pmrs_worker is run, which still needs the event to know which function to execute. > } > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h > index 7d5f785b0f08..c75a7b5c397c 100644 > --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h > +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h > @@ -89,6 +89,7 @@ struct etnaviv_chip_identity { > > struct etnaviv_event { > struct dma_fence *fence; > + struct etnaviv_cmdbuf *cmdbuf; > > void (*sync_point)(struct etnaviv_gpu *gpu, struct etnaviv_event *event); > };
Hi Lucas 2017-09-13 13:03 GMT+02:00 Lucas Stach <l.stach@pengutronix.de>: > Am Dienstag, den 12.09.2017, 17:11 +0200 schrieb Christian Gmeiner: >> With 'sync points' we can sample the reqeustes perform signals >> before and/or after the submited command buffer. >> >> Changes v2 -> v3: >> - fixed indentation and init nr_events to 1 >> >> Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com> >> --- >> drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 102 +++++++++++++++++++++++++++------- >> drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 1 + >> 2 files changed, 83 insertions(+), 20 deletions(-) >> >> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c >> index b65d7b9d247a..842b6642dcd6 100644 >> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c >> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c >> @@ -1324,12 +1324,48 @@ void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu) >> pm_runtime_put_autosuspend(gpu->dev); >> } >> >> +static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu, >> + struct etnaviv_event *event, unsigned int flags) >> +{ >> + const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf; >> + unsigned int i; >> + >> + for (i = 0; i < cmdbuf->nr_pmrs; i++) { >> + const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i; >> + >> + if (pmr->flags == flags) >> + etnaviv_perfmon_process(gpu, pmr); >> + } >> +} >> + >> +static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu, >> + struct etnaviv_event *event) >> +{ >> + sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_PRE); >> +} >> + >> +static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu, >> + struct etnaviv_event *event) >> +{ >> + const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf; >> + unsigned int i; >> + >> + sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST); >> + >> + for (i = 0; i < cmdbuf->nr_pmrs; i++) { >> + const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i; >> + >> + *pmr->bo_vma = pmr->sequence; >> + } >> +} >> + >> + >> /* add bo's to gpu's ring, and kick gpu: */ >> int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, >> struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf) >> { >> struct dma_fence *fence; >> - unsigned int event, i; >> + unsigned int i, nr_events = 1, event[3]; >> int ret; >> >> ret = etnaviv_gpu_pm_get_sync(gpu); >> @@ -1345,9 +1381,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, >> * >> */ >> >> - ret = event_alloc(gpu, 1, &event); >> + /* >> + * if there are performance monitor requests we need to have >> + * - a sync point to re-configure gpu and process ETNA_PM_PROCESS_PRE >> + * requests. >> + * - a sync point to re-configure gpu, process ETNA_PM_PROCESS_POST requests >> + * and update the sequence number for userspace. >> + */ >> + if (cmdbuf->nr_pmrs) >> + nr_events = 3; >> + >> + ret = event_alloc(gpu, nr_events, event); >> if (ret) { >> - DRM_ERROR("no free event\n"); >> + DRM_ERROR("no free events\n"); >> goto out_pm_put; >> } >> >> @@ -1355,12 +1401,14 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, >> >> fence = etnaviv_gpu_fence_alloc(gpu); >> if (!fence) { >> - event_free(gpu, event); >> + for (i = 0; i < nr_events; i++) >> + event_free(gpu, event[i]); >> + >> ret = -ENOMEM; >> goto out_unlock; >> } >> >> - gpu->event[event].fence = fence; >> + gpu->event[event[0]].fence = fence; >> submit->fence = dma_fence_get(fence); >> gpu->active_fence = submit->fence->seqno; >> >> @@ -1370,7 +1418,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, >> gpu->lastctx = cmdbuf->ctx; >> } >> >> - etnaviv_buffer_queue(gpu, event, cmdbuf); >> + if (cmdbuf->nr_pmrs) { >> + gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre; >> + gpu->event[event[1]].cmdbuf = cmdbuf; >> + etnaviv_sync_point_queue(gpu, event[1]); >> + } >> + >> + etnaviv_buffer_queue(gpu, event[0], cmdbuf); >> + >> + if (cmdbuf->nr_pmrs) { >> + gpu->event[event[2]].sync_point = &sync_point_perfmon_sample_post; >> + gpu->event[event[2]].cmdbuf = cmdbuf; >> + etnaviv_sync_point_queue(gpu, event[2]); >> + } >> >> cmdbuf->fence = fence; >> list_add_tail(&cmdbuf->node, &gpu->active_cmd_list); >> @@ -1475,20 +1535,22 @@ static irqreturn_t irq_handler(int irq, void *data) >> } >> >> fence = gpu->event[event].fence; >> - gpu->event[event].fence = NULL; >> - dma_fence_signal(fence); >> - >> - /* >> - * Events can be processed out of order. Eg, >> - * - allocate and queue event 0 >> - * - allocate event 1 >> - * - event 0 completes, we process it >> - * - allocate and queue event 0 >> - * - event 1 and event 0 complete >> - * we can end up processing event 0 first, then 1. >> - */ >> - if (fence_after(fence->seqno, gpu->completed_fence)) >> - gpu->completed_fence = fence->seqno; > > if (!fence) > continue; > >> + if (fence) { >> + gpu->event[event].fence = NULL; >> + dma_fence_signal(fence); >> + >> + /* >> + * Events can be processed out of order. Eg, >> + * - allocate and queue event 0 >> + * - allocate event 1 >> + * - event 0 completes, we process it >> + * - allocate and queue event 0 >> + * - event 1 and event 0 complete >> + * we can end up processing event 0 first, then 1. >> + */ >> + if (fence_after(fence->seqno, gpu->completed_fence)) >> + gpu->completed_fence = fence->seqno; >> + } >> >> event_free(gpu, event); > > Together with the change above, this event_free needs to move into > pmrs_worker for the sync point events. Otherwise the event may be > reallocated (and cleared) before the pmrs_worker is run, which still > needs the event to know which function to execute. > makes sense - will be fixed in next version. >> } >> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h >> index 7d5f785b0f08..c75a7b5c397c 100644 >> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h >> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h >> @@ -89,6 +89,7 @@ struct etnaviv_chip_identity { >> >> struct etnaviv_event { >> struct dma_fence *fence; >> + struct etnaviv_cmdbuf *cmdbuf; >> >> void (*sync_point)(struct etnaviv_gpu *gpu, struct etnaviv_event *event); >> }; > > greets -- Christian Gmeiner, MSc https://christian-gmeiner.info
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index b65d7b9d247a..842b6642dcd6 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1324,12 +1324,48 @@ void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu) pm_runtime_put_autosuspend(gpu->dev); } +static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu, + struct etnaviv_event *event, unsigned int flags) +{ + const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf; + unsigned int i; + + for (i = 0; i < cmdbuf->nr_pmrs; i++) { + const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i; + + if (pmr->flags == flags) + etnaviv_perfmon_process(gpu, pmr); + } +} + +static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu, + struct etnaviv_event *event) +{ + sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_PRE); +} + +static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu, + struct etnaviv_event *event) +{ + const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf; + unsigned int i; + + sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST); + + for (i = 0; i < cmdbuf->nr_pmrs; i++) { + const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i; + + *pmr->bo_vma = pmr->sequence; + } +} + + /* add bo's to gpu's ring, and kick gpu: */ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf) { struct dma_fence *fence; - unsigned int event, i; + unsigned int i, nr_events = 1, event[3]; int ret; ret = etnaviv_gpu_pm_get_sync(gpu); @@ -1345,9 +1381,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, * */ - ret = event_alloc(gpu, 1, &event); + /* + * if there are performance monitor requests we need to have + * - a sync point to re-configure gpu and process ETNA_PM_PROCESS_PRE + * requests. + * - a sync point to re-configure gpu, process ETNA_PM_PROCESS_POST requests + * and update the sequence number for userspace. + */ + if (cmdbuf->nr_pmrs) + nr_events = 3; + + ret = event_alloc(gpu, nr_events, event); if (ret) { - DRM_ERROR("no free event\n"); + DRM_ERROR("no free events\n"); goto out_pm_put; } @@ -1355,12 +1401,14 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, fence = etnaviv_gpu_fence_alloc(gpu); if (!fence) { - event_free(gpu, event); + for (i = 0; i < nr_events; i++) + event_free(gpu, event[i]); + ret = -ENOMEM; goto out_unlock; } - gpu->event[event].fence = fence; + gpu->event[event[0]].fence = fence; submit->fence = dma_fence_get(fence); gpu->active_fence = submit->fence->seqno; @@ -1370,7 +1418,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, gpu->lastctx = cmdbuf->ctx; } - etnaviv_buffer_queue(gpu, event, cmdbuf); + if (cmdbuf->nr_pmrs) { + gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre; + gpu->event[event[1]].cmdbuf = cmdbuf; + etnaviv_sync_point_queue(gpu, event[1]); + } + + etnaviv_buffer_queue(gpu, event[0], cmdbuf); + + if (cmdbuf->nr_pmrs) { + gpu->event[event[2]].sync_point = &sync_point_perfmon_sample_post; + gpu->event[event[2]].cmdbuf = cmdbuf; + etnaviv_sync_point_queue(gpu, event[2]); + } cmdbuf->fence = fence; list_add_tail(&cmdbuf->node, &gpu->active_cmd_list); @@ -1475,20 +1535,22 @@ static irqreturn_t irq_handler(int irq, void *data) } fence = gpu->event[event].fence; - gpu->event[event].fence = NULL; - dma_fence_signal(fence); - - /* - * Events can be processed out of order. Eg, - * - allocate and queue event 0 - * - allocate event 1 - * - event 0 completes, we process it - * - allocate and queue event 0 - * - event 1 and event 0 complete - * we can end up processing event 0 first, then 1. - */ - if (fence_after(fence->seqno, gpu->completed_fence)) - gpu->completed_fence = fence->seqno; + if (fence) { + gpu->event[event].fence = NULL; + dma_fence_signal(fence); + + /* + * Events can be processed out of order. Eg, + * - allocate and queue event 0 + * - allocate event 1 + * - event 0 completes, we process it + * - allocate and queue event 0 + * - event 1 and event 0 complete + * we can end up processing event 0 first, then 1. + */ + if (fence_after(fence->seqno, gpu->completed_fence)) + gpu->completed_fence = fence->seqno; + } event_free(gpu, event); } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 7d5f785b0f08..c75a7b5c397c 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -89,6 +89,7 @@ struct etnaviv_chip_identity { struct etnaviv_event { struct dma_fence *fence; + struct etnaviv_cmdbuf *cmdbuf; void (*sync_point)(struct etnaviv_gpu *gpu, struct etnaviv_event *event); };
With 'sync points' we can sample the reqeustes perform signals before and/or after the submited command buffer. Changes v2 -> v3: - fixed indentation and init nr_events to 1 Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com> --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 102 +++++++++++++++++++++++++++------- drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 1 + 2 files changed, 83 insertions(+), 20 deletions(-)