diff mbox

[V4,12/23] drm/etnaviv: use 'sync points' for performance monitor requests

Message ID 20170912151155.4603-13-christian.gmeiner@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Christian Gmeiner Sept. 12, 2017, 3:11 p.m. UTC
With 'sync points' we can sample the reqeustes perform signals
before and/or after the submited command buffer.

Changes v2 -> v3:
- fixed indentation and init nr_events to 1

Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com>
---
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 102 +++++++++++++++++++++++++++-------
 drivers/gpu/drm/etnaviv/etnaviv_gpu.h |   1 +
 2 files changed, 83 insertions(+), 20 deletions(-)

Comments

Lucas Stach Sept. 13, 2017, 11:03 a.m. UTC | #1
Am Dienstag, den 12.09.2017, 17:11 +0200 schrieb Christian Gmeiner:
> With 'sync points' we can sample the reqeustes perform signals
> before and/or after the submited command buffer.
> 
> Changes v2 -> v3:
> - fixed indentation and init nr_events to 1
> 
> Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com>
> ---
>  drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 102 +++++++++++++++++++++++++++-------
>  drivers/gpu/drm/etnaviv/etnaviv_gpu.h |   1 +
>  2 files changed, 83 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> index b65d7b9d247a..842b6642dcd6 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> @@ -1324,12 +1324,48 @@ void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu)
>  	pm_runtime_put_autosuspend(gpu->dev);
>  }
>  
> +static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu,
> +	struct etnaviv_event *event, unsigned int flags)
> +{
> +	const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf;
> +	unsigned int i;
> +
> +	for (i = 0; i < cmdbuf->nr_pmrs; i++) {
> +		const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i;
> +
> +		if (pmr->flags == flags)
> +			etnaviv_perfmon_process(gpu, pmr);
> +	}
> +}
> +
> +static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu,
> +	struct etnaviv_event *event)
> +{
> +	sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_PRE);
> +}
> +
> +static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu,
> +	struct etnaviv_event *event)
> +{
> +	const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf;
> +	unsigned int i;
> +
> +	sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST);
> +
> +	for (i = 0; i < cmdbuf->nr_pmrs; i++) {
> +		const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i;
> +
> +		*pmr->bo_vma = pmr->sequence;
> +	}
> +}
> +
> +
>  /* add bo's to gpu's ring, and kick gpu: */
>  int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
>  	struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf)
>  {
>  	struct dma_fence *fence;
> -	unsigned int event, i;
> +	unsigned int i, nr_events = 1, event[3];
>  	int ret;
>  
>  	ret = etnaviv_gpu_pm_get_sync(gpu);
> @@ -1345,9 +1381,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
>  	 *
>  	 */
>  
> -	ret = event_alloc(gpu, 1, &event);
> +	/*
> +	 * if there are performance monitor requests we need to have
> +	 * - a sync point to re-configure gpu and process ETNA_PM_PROCESS_PRE
> +	 *   requests.
> +	 * - a sync point to re-configure gpu, process ETNA_PM_PROCESS_POST requests
> +	 *   and update the sequence number for userspace.
> +	 */
> +	if (cmdbuf->nr_pmrs)
> +		nr_events = 3;
> +
> +	ret = event_alloc(gpu, nr_events, event);
>  	if (ret) {
> -		DRM_ERROR("no free event\n");
> +		DRM_ERROR("no free events\n");
>  		goto out_pm_put;
>  	}
>  
> @@ -1355,12 +1401,14 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
>  
>  	fence = etnaviv_gpu_fence_alloc(gpu);
>  	if (!fence) {
> -		event_free(gpu, event);
> +		for (i = 0; i < nr_events; i++)
> +			event_free(gpu, event[i]);
> +
>  		ret = -ENOMEM;
>  		goto out_unlock;
>  	}
>  
> -	gpu->event[event].fence = fence;
> +	gpu->event[event[0]].fence = fence;
>  	submit->fence = dma_fence_get(fence);
>  	gpu->active_fence = submit->fence->seqno;
>  
> @@ -1370,7 +1418,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
>  		gpu->lastctx = cmdbuf->ctx;
>  	}
>  
> -	etnaviv_buffer_queue(gpu, event, cmdbuf);
> +	if (cmdbuf->nr_pmrs) {
> +		gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre;
> +		gpu->event[event[1]].cmdbuf = cmdbuf;
> +		etnaviv_sync_point_queue(gpu, event[1]);
> +	}
> +
> +	etnaviv_buffer_queue(gpu, event[0], cmdbuf);
> +
> +	if (cmdbuf->nr_pmrs) {
> +		gpu->event[event[2]].sync_point = &sync_point_perfmon_sample_post;
> +		gpu->event[event[2]].cmdbuf = cmdbuf;
> +		etnaviv_sync_point_queue(gpu, event[2]);
> +	}
>  
>  	cmdbuf->fence = fence;
>  	list_add_tail(&cmdbuf->node, &gpu->active_cmd_list);
> @@ -1475,20 +1535,22 @@ static irqreturn_t irq_handler(int irq, void *data)
>  			}
>  
>  			fence = gpu->event[event].fence;
> -			gpu->event[event].fence = NULL;
> -			dma_fence_signal(fence);
> -
> -			/*
> -			 * Events can be processed out of order.  Eg,
> -			 * - allocate and queue event 0
> -			 * - allocate event 1
> -			 * - event 0 completes, we process it
> -			 * - allocate and queue event 0
> -			 * - event 1 and event 0 complete
> -			 * we can end up processing event 0 first, then 1.
> -			 */
> -			if (fence_after(fence->seqno, gpu->completed_fence))
> -				gpu->completed_fence = fence->seqno;

if (!fence)
	continue;

> +			if (fence) {
> +				gpu->event[event].fence = NULL;
> +				dma_fence_signal(fence);
> +
> +				/*
> +				 * Events can be processed out of order.  Eg,
> +				 * - allocate and queue event 0
> +				 * - allocate event 1
> +				 * - event 0 completes, we process it
> +				 * - allocate and queue event 0
> +				 * - event 1 and event 0 complete
> +				 * we can end up processing event 0 first, then 1.
> +				 */
> +				if (fence_after(fence->seqno, gpu->completed_fence))
> +					gpu->completed_fence = fence->seqno;
> +			}
>  
>  			event_free(gpu, event);

Together with the change above, this event_free needs to move into
pmrs_worker for the sync point events. Otherwise the event may be
reallocated (and cleared) before the pmrs_worker is run, which still
needs the event to know which function to execute.

>  		}
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> index 7d5f785b0f08..c75a7b5c397c 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> @@ -89,6 +89,7 @@ struct etnaviv_chip_identity {
>  
>  struct etnaviv_event {
>  	struct dma_fence *fence;
> +	struct etnaviv_cmdbuf *cmdbuf;
>  
>  	void (*sync_point)(struct etnaviv_gpu *gpu, struct etnaviv_event *event);
>  };
Christian Gmeiner Sept. 13, 2017, 2:35 p.m. UTC | #2
Hi Lucas


2017-09-13 13:03 GMT+02:00 Lucas Stach <l.stach@pengutronix.de>:
> Am Dienstag, den 12.09.2017, 17:11 +0200 schrieb Christian Gmeiner:
>> With 'sync points' we can sample the reqeustes perform signals
>> before and/or after the submited command buffer.
>>
>> Changes v2 -> v3:
>> - fixed indentation and init nr_events to 1
>>
>> Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com>
>> ---
>>  drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 102 +++++++++++++++++++++++++++-------
>>  drivers/gpu/drm/etnaviv/etnaviv_gpu.h |   1 +
>>  2 files changed, 83 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
>> index b65d7b9d247a..842b6642dcd6 100644
>> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
>> @@ -1324,12 +1324,48 @@ void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu)
>>       pm_runtime_put_autosuspend(gpu->dev);
>>  }
>>
>> +static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu,
>> +     struct etnaviv_event *event, unsigned int flags)
>> +{
>> +     const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf;
>> +     unsigned int i;
>> +
>> +     for (i = 0; i < cmdbuf->nr_pmrs; i++) {
>> +             const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i;
>> +
>> +             if (pmr->flags == flags)
>> +                     etnaviv_perfmon_process(gpu, pmr);
>> +     }
>> +}
>> +
>> +static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu,
>> +     struct etnaviv_event *event)
>> +{
>> +     sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_PRE);
>> +}
>> +
>> +static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu,
>> +     struct etnaviv_event *event)
>> +{
>> +     const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf;
>> +     unsigned int i;
>> +
>> +     sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST);
>> +
>> +     for (i = 0; i < cmdbuf->nr_pmrs; i++) {
>> +             const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i;
>> +
>> +             *pmr->bo_vma = pmr->sequence;
>> +     }
>> +}
>> +
>> +
>>  /* add bo's to gpu's ring, and kick gpu: */
>>  int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
>>       struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf)
>>  {
>>       struct dma_fence *fence;
>> -     unsigned int event, i;
>> +     unsigned int i, nr_events = 1, event[3];
>>       int ret;
>>
>>       ret = etnaviv_gpu_pm_get_sync(gpu);
>> @@ -1345,9 +1381,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
>>        *
>>        */
>>
>> -     ret = event_alloc(gpu, 1, &event);
>> +     /*
>> +      * if there are performance monitor requests we need to have
>> +      * - a sync point to re-configure gpu and process ETNA_PM_PROCESS_PRE
>> +      *   requests.
>> +      * - a sync point to re-configure gpu, process ETNA_PM_PROCESS_POST requests
>> +      *   and update the sequence number for userspace.
>> +      */
>> +     if (cmdbuf->nr_pmrs)
>> +             nr_events = 3;
>> +
>> +     ret = event_alloc(gpu, nr_events, event);
>>       if (ret) {
>> -             DRM_ERROR("no free event\n");
>> +             DRM_ERROR("no free events\n");
>>               goto out_pm_put;
>>       }
>>
>> @@ -1355,12 +1401,14 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
>>
>>       fence = etnaviv_gpu_fence_alloc(gpu);
>>       if (!fence) {
>> -             event_free(gpu, event);
>> +             for (i = 0; i < nr_events; i++)
>> +                     event_free(gpu, event[i]);
>> +
>>               ret = -ENOMEM;
>>               goto out_unlock;
>>       }
>>
>> -     gpu->event[event].fence = fence;
>> +     gpu->event[event[0]].fence = fence;
>>       submit->fence = dma_fence_get(fence);
>>       gpu->active_fence = submit->fence->seqno;
>>
>> @@ -1370,7 +1418,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
>>               gpu->lastctx = cmdbuf->ctx;
>>       }
>>
>> -     etnaviv_buffer_queue(gpu, event, cmdbuf);
>> +     if (cmdbuf->nr_pmrs) {
>> +             gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre;
>> +             gpu->event[event[1]].cmdbuf = cmdbuf;
>> +             etnaviv_sync_point_queue(gpu, event[1]);
>> +     }
>> +
>> +     etnaviv_buffer_queue(gpu, event[0], cmdbuf);
>> +
>> +     if (cmdbuf->nr_pmrs) {
>> +             gpu->event[event[2]].sync_point = &sync_point_perfmon_sample_post;
>> +             gpu->event[event[2]].cmdbuf = cmdbuf;
>> +             etnaviv_sync_point_queue(gpu, event[2]);
>> +     }
>>
>>       cmdbuf->fence = fence;
>>       list_add_tail(&cmdbuf->node, &gpu->active_cmd_list);
>> @@ -1475,20 +1535,22 @@ static irqreturn_t irq_handler(int irq, void *data)
>>                       }
>>
>>                       fence = gpu->event[event].fence;
>> -                     gpu->event[event].fence = NULL;
>> -                     dma_fence_signal(fence);
>> -
>> -                     /*
>> -                      * Events can be processed out of order.  Eg,
>> -                      * - allocate and queue event 0
>> -                      * - allocate event 1
>> -                      * - event 0 completes, we process it
>> -                      * - allocate and queue event 0
>> -                      * - event 1 and event 0 complete
>> -                      * we can end up processing event 0 first, then 1.
>> -                      */
>> -                     if (fence_after(fence->seqno, gpu->completed_fence))
>> -                             gpu->completed_fence = fence->seqno;
>
> if (!fence)
>         continue;
>
>> +                     if (fence) {
>> +                             gpu->event[event].fence = NULL;
>> +                             dma_fence_signal(fence);
>> +
>> +                             /*
>> +                              * Events can be processed out of order.  Eg,
>> +                              * - allocate and queue event 0
>> +                              * - allocate event 1
>> +                              * - event 0 completes, we process it
>> +                              * - allocate and queue event 0
>> +                              * - event 1 and event 0 complete
>> +                              * we can end up processing event 0 first, then 1.
>> +                              */
>> +                             if (fence_after(fence->seqno, gpu->completed_fence))
>> +                                     gpu->completed_fence = fence->seqno;
>> +                     }
>>
>>                       event_free(gpu, event);
>
> Together with the change above, this event_free needs to move into
> pmrs_worker for the sync point events. Otherwise the event may be
> reallocated (and cleared) before the pmrs_worker is run, which still
> needs the event to know which function to execute.
>

makes sense - will be fixed in next version.

>>               }
>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
>> index 7d5f785b0f08..c75a7b5c397c 100644
>> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
>> @@ -89,6 +89,7 @@ struct etnaviv_chip_identity {
>>
>>  struct etnaviv_event {
>>       struct dma_fence *fence;
>> +     struct etnaviv_cmdbuf *cmdbuf;
>>
>>       void (*sync_point)(struct etnaviv_gpu *gpu, struct etnaviv_event *event);
>>  };
>
>

greets
--
Christian Gmeiner, MSc

https://christian-gmeiner.info
diff mbox

Patch

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index b65d7b9d247a..842b6642dcd6 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -1324,12 +1324,48 @@  void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu)
 	pm_runtime_put_autosuspend(gpu->dev);
 }
 
+static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu,
+	struct etnaviv_event *event, unsigned int flags)
+{
+	const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf;
+	unsigned int i;
+
+	for (i = 0; i < cmdbuf->nr_pmrs; i++) {
+		const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i;
+
+		if (pmr->flags == flags)
+			etnaviv_perfmon_process(gpu, pmr);
+	}
+}
+
+static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu,
+	struct etnaviv_event *event)
+{
+	sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_PRE);
+}
+
+static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu,
+	struct etnaviv_event *event)
+{
+	const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf;
+	unsigned int i;
+
+	sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST);
+
+	for (i = 0; i < cmdbuf->nr_pmrs; i++) {
+		const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i;
+
+		*pmr->bo_vma = pmr->sequence;
+	}
+}
+
+
 /* add bo's to gpu's ring, and kick gpu: */
 int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 	struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf)
 {
 	struct dma_fence *fence;
-	unsigned int event, i;
+	unsigned int i, nr_events = 1, event[3];
 	int ret;
 
 	ret = etnaviv_gpu_pm_get_sync(gpu);
@@ -1345,9 +1381,19 @@  int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 	 *
 	 */
 
-	ret = event_alloc(gpu, 1, &event);
+	/*
+	 * if there are performance monitor requests we need to have
+	 * - a sync point to re-configure gpu and process ETNA_PM_PROCESS_PRE
+	 *   requests.
+	 * - a sync point to re-configure gpu, process ETNA_PM_PROCESS_POST requests
+	 *   and update the sequence number for userspace.
+	 */
+	if (cmdbuf->nr_pmrs)
+		nr_events = 3;
+
+	ret = event_alloc(gpu, nr_events, event);
 	if (ret) {
-		DRM_ERROR("no free event\n");
+		DRM_ERROR("no free events\n");
 		goto out_pm_put;
 	}
 
@@ -1355,12 +1401,14 @@  int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 
 	fence = etnaviv_gpu_fence_alloc(gpu);
 	if (!fence) {
-		event_free(gpu, event);
+		for (i = 0; i < nr_events; i++)
+			event_free(gpu, event[i]);
+
 		ret = -ENOMEM;
 		goto out_unlock;
 	}
 
-	gpu->event[event].fence = fence;
+	gpu->event[event[0]].fence = fence;
 	submit->fence = dma_fence_get(fence);
 	gpu->active_fence = submit->fence->seqno;
 
@@ -1370,7 +1418,19 @@  int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 		gpu->lastctx = cmdbuf->ctx;
 	}
 
-	etnaviv_buffer_queue(gpu, event, cmdbuf);
+	if (cmdbuf->nr_pmrs) {
+		gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre;
+		gpu->event[event[1]].cmdbuf = cmdbuf;
+		etnaviv_sync_point_queue(gpu, event[1]);
+	}
+
+	etnaviv_buffer_queue(gpu, event[0], cmdbuf);
+
+	if (cmdbuf->nr_pmrs) {
+		gpu->event[event[2]].sync_point = &sync_point_perfmon_sample_post;
+		gpu->event[event[2]].cmdbuf = cmdbuf;
+		etnaviv_sync_point_queue(gpu, event[2]);
+	}
 
 	cmdbuf->fence = fence;
 	list_add_tail(&cmdbuf->node, &gpu->active_cmd_list);
@@ -1475,20 +1535,22 @@  static irqreturn_t irq_handler(int irq, void *data)
 			}
 
 			fence = gpu->event[event].fence;
-			gpu->event[event].fence = NULL;
-			dma_fence_signal(fence);
-
-			/*
-			 * Events can be processed out of order.  Eg,
-			 * - allocate and queue event 0
-			 * - allocate event 1
-			 * - event 0 completes, we process it
-			 * - allocate and queue event 0
-			 * - event 1 and event 0 complete
-			 * we can end up processing event 0 first, then 1.
-			 */
-			if (fence_after(fence->seqno, gpu->completed_fence))
-				gpu->completed_fence = fence->seqno;
+			if (fence) {
+				gpu->event[event].fence = NULL;
+				dma_fence_signal(fence);
+
+				/*
+				 * Events can be processed out of order.  Eg,
+				 * - allocate and queue event 0
+				 * - allocate event 1
+				 * - event 0 completes, we process it
+				 * - allocate and queue event 0
+				 * - event 1 and event 0 complete
+				 * we can end up processing event 0 first, then 1.
+				 */
+				if (fence_after(fence->seqno, gpu->completed_fence))
+					gpu->completed_fence = fence->seqno;
+			}
 
 			event_free(gpu, event);
 		}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 7d5f785b0f08..c75a7b5c397c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -89,6 +89,7 @@  struct etnaviv_chip_identity {
 
 struct etnaviv_event {
 	struct dma_fence *fence;
+	struct etnaviv_cmdbuf *cmdbuf;
 
 	void (*sync_point)(struct etnaviv_gpu *gpu, struct etnaviv_event *event);
 };