diff mbox

[V2,10/23] drm/etnaviv: add 'sync point' support

Message ID 20170722095323.9964-11-christian.gmeiner@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Christian Gmeiner July 22, 2017, 9:53 a.m. UTC
In order to support performance counters in a sane way we need to provide
a method to sync the GPU with the CPU. The GPU can process multpile command
buffers/events per irq. With the help of a 'sync point' we can trigger an event
and stop the GPU/FE immediately. When the CPU is done with is processing it
simply needs to restart the FE and the GPU will process the command stream.

Changes from v1 -> v2:
- process sync point with a work item to keep irq as fast as possible

Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com>
---
 drivers/gpu/drm/etnaviv/etnaviv_buffer.c | 36 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/etnaviv/etnaviv_drv.h    |  1 +
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c    | 25 ++++++++++++++++++++++
 drivers/gpu/drm/etnaviv/etnaviv_gpu.h    |  6 ++++++
 4 files changed, 68 insertions(+)

Comments

Lucas Stach Aug. 8, 2017, 10:34 a.m. UTC | #1
Am Samstag, den 22.07.2017, 11:53 +0200 schrieb Christian Gmeiner:
> In order to support performance counters in a sane way we need to provide
> a method to sync the GPU with the CPU. The GPU can process multpile command
> buffers/events per irq. With the help of a 'sync point' we can trigger an event
> and stop the GPU/FE immediately. When the CPU is done with is processing it
> simply needs to restart the FE and the GPU will process the command stream.
> 
> Changes from v1 -> v2:
> - process sync point with a work item to keep irq as fast as possible
> 
> Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com>
> ---
>  drivers/gpu/drm/etnaviv/etnaviv_buffer.c | 36 ++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/etnaviv/etnaviv_drv.h    |  1 +
>  drivers/gpu/drm/etnaviv/etnaviv_gpu.c    | 25 ++++++++++++++++++++++
>  drivers/gpu/drm/etnaviv/etnaviv_gpu.h    |  6 ++++++
>  4 files changed, 68 insertions(+)
> 
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
> index ed9588f36bc9..9e7098e3207f 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
> @@ -250,6 +250,42 @@ void etnaviv_buffer_end(struct etnaviv_gpu *gpu)
>  	}
>  }
>  
> +/* Append a 'sync point' to the ring buffer. */
> +void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)
> +{
> +	struct etnaviv_cmdbuf *buffer = gpu->buffer;
> +	unsigned int waitlink_offset = buffer->user_size - 16;
> +	u32 dwords, target;
> +
> +	/*
> +	 * We need at most 3 dwords in the return target:
> +	 * 1 event + 1 end + 1 wait + 1 link.
> +	 */
> +	dwords = 4;
> +	target = etnaviv_buffer_reserve(gpu, buffer, dwords);
> +
> +	/* Signal sync point event */
> +	CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
> +		       VIVS_GL_EVENT_FROM_PE);
> +
> +	/* Stop the FE to 'pause' the GPU */
> +	CMD_END(buffer);
> +
> +	/* Append waitlink */
> +	CMD_WAIT(buffer);
> +	CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer) +
> +			    buffer->user_size - 4);
> +
> +	/*
> +	 * Kick off the 'sync point' command by replacing the previous
> +	 * WAIT with a link to the address in the ring buffer.
> +	 */
> +	etnaviv_buffer_replace_wait(buffer, waitlink_offset,
> +				    VIV_FE_LINK_HEADER_OP_LINK |
> +				    VIV_FE_LINK_HEADER_PREFETCH(dwords),
> +				    target);
> +}
> +
>  /* Append a command buffer to the ring buffer. */
>  void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
>  	struct etnaviv_cmdbuf *cmdbuf)
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
> index 058389f93b69..f6cdd694ca51 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
> @@ -101,6 +101,7 @@ int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
>  u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu);
>  u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr);
>  void etnaviv_buffer_end(struct etnaviv_gpu *gpu);
> +void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event);
>  void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
>  	struct etnaviv_cmdbuf *cmdbuf);
>  void etnaviv_validate_init(void);
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> index ce6c869e214f..bc1b96b4c7e9 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> @@ -25,6 +25,7 @@
>  #include "etnaviv_gpu.h"
>  #include "etnaviv_gem.h"
>  #include "etnaviv_mmu.h"
> +#include "etnaviv_perfmon.h"
>  #include "common.xml.h"
>  #include "state.xml.h"
>  #include "state_hi.xml.h"
> @@ -1353,6 +1354,7 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
>  	}
>  
>  	gpu->event[event].fence = fence;
> +	gpu->event[event].sync_point = NULL;
>  	submit->fence = dma_fence_get(fence);
>  	gpu->active_fence = submit->fence->seqno;
>  
> @@ -1398,6 +1400,23 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
>  	return ret;
>  }
>  
> +static void etnaviv_process_sync_point(struct etnaviv_gpu *gpu,
> +	struct etnaviv_event *event)
> +{
> +	u32 addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
> +
> +	event->sync_point(gpu, event);
> +	etnaviv_gpu_start_fe(gpu, addr + 2, 2);
> +}
> +
> +static void pmrs_worker(struct work_struct *work)
> +{
> +	struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
> +					       pmrs_work);
> +
> +	etnaviv_process_sync_point(gpu, &gpu->event[gpu->pmrs_event]);
> +}
> +
>  /*
>   * Init/Cleanup:
>   */
> @@ -1444,6 +1463,11 @@ static irqreturn_t irq_handler(int irq, void *data)
>  
>  			dev_dbg(gpu->dev, "event %u\n", event);
>  
> +			if (gpu->event[event].sync_point) {
> +				gpu->pmrs_event = event;
> +				etnaviv_queue_work(gpu->drm, &gpu->pmrs_work);

If the handler is delayed we might handle multiple events per
invocation, in which case the events might not be in order. E.g. the FE
stop event might be event 30, while the FE start event might be event 0.
In that case you would execute the FE start before the FE stop has been
queued -> not good. You need to make sure that your PMRS events are
processed in the correct order.

> +			}
> +
>  			fence = gpu->event[event].fence;
>  			gpu->event[event].fence = NULL;
>  			dma_fence_signal(fence);
> @@ -1647,6 +1671,7 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master,
>  
>  	INIT_LIST_HEAD(&gpu->active_cmd_list);
>  	INIT_WORK(&gpu->retire_work, retire_worker);
> +	INIT_WORK(&gpu->pmrs_work, pmrs_worker);
>  	INIT_WORK(&gpu->recover_work, recover_worker);
>  	init_waitqueue_head(&gpu->fence_event);
>  
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> index 70e6590aacdf..7d5f785b0f08 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> @@ -89,6 +89,8 @@ struct etnaviv_chip_identity {
>  
>  struct etnaviv_event {
>  	struct dma_fence *fence;
> +
> +	void (*sync_point)(struct etnaviv_gpu *gpu, struct etnaviv_event *event);
>  };
>  
>  struct etnaviv_cmdbuf_suballoc;
> @@ -135,6 +137,10 @@ struct etnaviv_gpu {
>  	/* worker for handling active-list retiring: */
>  	struct work_struct retire_work;
>  
> +	/* worker for handling performance monitor requests: */
> +	struct work_struct pmrs_work;
> +	int pmrs_event;
> +
>  	void __iomem *mmio;
>  	int irq;
>
Christian Gmeiner Aug. 22, 2017, 9:58 a.m. UTC | #2
Hi Lucas

2017-08-08 12:34 GMT+02:00 Lucas Stach <l.stach@pengutronix.de>:
> Am Samstag, den 22.07.2017, 11:53 +0200 schrieb Christian Gmeiner:
>> In order to support performance counters in a sane way we need to provide
>> a method to sync the GPU with the CPU. The GPU can process multpile command
>> buffers/events per irq. With the help of a 'sync point' we can trigger an event
>> and stop the GPU/FE immediately. When the CPU is done with is processing it
>> simply needs to restart the FE and the GPU will process the command stream.
>>
>> Changes from v1 -> v2:
>> - process sync point with a work item to keep irq as fast as possible
>>
>> Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com>
>> ---
>>  drivers/gpu/drm/etnaviv/etnaviv_buffer.c | 36 ++++++++++++++++++++++++++++++++
>>  drivers/gpu/drm/etnaviv/etnaviv_drv.h    |  1 +
>>  drivers/gpu/drm/etnaviv/etnaviv_gpu.c    | 25 ++++++++++++++++++++++
>>  drivers/gpu/drm/etnaviv/etnaviv_gpu.h    |  6 ++++++
>>  4 files changed, 68 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
>> index ed9588f36bc9..9e7098e3207f 100644
>> --- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
>> @@ -250,6 +250,42 @@ void etnaviv_buffer_end(struct etnaviv_gpu *gpu)
>>       }
>>  }
>>
>> +/* Append a 'sync point' to the ring buffer. */
>> +void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)
>> +{
>> +     struct etnaviv_cmdbuf *buffer = gpu->buffer;
>> +     unsigned int waitlink_offset = buffer->user_size - 16;
>> +     u32 dwords, target;
>> +
>> +     /*
>> +      * We need at most 3 dwords in the return target:
>> +      * 1 event + 1 end + 1 wait + 1 link.
>> +      */
>> +     dwords = 4;
>> +     target = etnaviv_buffer_reserve(gpu, buffer, dwords);
>> +
>> +     /* Signal sync point event */
>> +     CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
>> +                    VIVS_GL_EVENT_FROM_PE);
>> +
>> +     /* Stop the FE to 'pause' the GPU */
>> +     CMD_END(buffer);
>> +
>> +     /* Append waitlink */
>> +     CMD_WAIT(buffer);
>> +     CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer) +
>> +                         buffer->user_size - 4);
>> +
>> +     /*
>> +      * Kick off the 'sync point' command by replacing the previous
>> +      * WAIT with a link to the address in the ring buffer.
>> +      */
>> +     etnaviv_buffer_replace_wait(buffer, waitlink_offset,
>> +                                 VIV_FE_LINK_HEADER_OP_LINK |
>> +                                 VIV_FE_LINK_HEADER_PREFETCH(dwords),
>> +                                 target);
>> +}
>> +
>>  /* Append a command buffer to the ring buffer. */
>>  void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
>>       struct etnaviv_cmdbuf *cmdbuf)
>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
>> index 058389f93b69..f6cdd694ca51 100644
>> --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
>> @@ -101,6 +101,7 @@ int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
>>  u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu);
>>  u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr);
>>  void etnaviv_buffer_end(struct etnaviv_gpu *gpu);
>> +void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event);
>>  void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
>>       struct etnaviv_cmdbuf *cmdbuf);
>>  void etnaviv_validate_init(void);
>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
>> index ce6c869e214f..bc1b96b4c7e9 100644
>> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
>> @@ -25,6 +25,7 @@
>>  #include "etnaviv_gpu.h"
>>  #include "etnaviv_gem.h"
>>  #include "etnaviv_mmu.h"
>> +#include "etnaviv_perfmon.h"
>>  #include "common.xml.h"
>>  #include "state.xml.h"
>>  #include "state_hi.xml.h"
>> @@ -1353,6 +1354,7 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
>>       }
>>
>>       gpu->event[event].fence = fence;
>> +     gpu->event[event].sync_point = NULL;
>>       submit->fence = dma_fence_get(fence);
>>       gpu->active_fence = submit->fence->seqno;
>>
>> @@ -1398,6 +1400,23 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
>>       return ret;
>>  }
>>
>> +static void etnaviv_process_sync_point(struct etnaviv_gpu *gpu,
>> +     struct etnaviv_event *event)
>> +{
>> +     u32 addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
>> +
>> +     event->sync_point(gpu, event);
>> +     etnaviv_gpu_start_fe(gpu, addr + 2, 2);
>> +}
>> +
>> +static void pmrs_worker(struct work_struct *work)
>> +{
>> +     struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
>> +                                            pmrs_work);
>> +
>> +     etnaviv_process_sync_point(gpu, &gpu->event[gpu->pmrs_event]);
>> +}
>> +
>>  /*
>>   * Init/Cleanup:
>>   */
>> @@ -1444,6 +1463,11 @@ static irqreturn_t irq_handler(int irq, void *data)
>>
>>                       dev_dbg(gpu->dev, "event %u\n", event);
>>
>> +                     if (gpu->event[event].sync_point) {
>> +                             gpu->pmrs_event = event;
>> +                             etnaviv_queue_work(gpu->drm, &gpu->pmrs_work);
>
> If the handler is delayed we might handle multiple events per
> invocation, in which case the events might not be in order. E.g. the FE
> stop event might be event 30, while the FE start event might be event 0.
> In that case you would execute the FE start before the FE stop has been
> queued -> not good. You need to make sure that your PMRS events are
> processed in the correct order.
>

I thought about this problem for some time and I do not fully get your
point - sorry.

First there is no FE start event. I am using 'sync' points for pre and
post pmrs points.
A 'sync' point is represented like this in the cmd stream:

| EVENT | FE STOP | WAIT | LINK back to WAIT | - see
etnaviv_sync_point_queue(..) in etnaviv_buffer.c

The restarting of the FE happens in pmrs_worker(..) triggered by EVENT
and the FE starts processing the
cmd stream at "| WAIT | LINK back to WAIT |".

Maybe you can point me into the right direction.

>> +                     }
>> +
>>                       fence = gpu->event[event].fence;
>>                       gpu->event[event].fence = NULL;
>>                       dma_fence_signal(fence);
>> @@ -1647,6 +1671,7 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master,
>>
>>       INIT_LIST_HEAD(&gpu->active_cmd_list);
>>       INIT_WORK(&gpu->retire_work, retire_worker);
>> +     INIT_WORK(&gpu->pmrs_work, pmrs_worker);
>>       INIT_WORK(&gpu->recover_work, recover_worker);
>>       init_waitqueue_head(&gpu->fence_event);
>>
>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
>> index 70e6590aacdf..7d5f785b0f08 100644
>> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
>> @@ -89,6 +89,8 @@ struct etnaviv_chip_identity {
>>
>>  struct etnaviv_event {
>>       struct dma_fence *fence;
>> +
>> +     void (*sync_point)(struct etnaviv_gpu *gpu, struct etnaviv_event *event);
>>  };
>>
>>  struct etnaviv_cmdbuf_suballoc;
>> @@ -135,6 +137,10 @@ struct etnaviv_gpu {
>>       /* worker for handling active-list retiring: */
>>       struct work_struct retire_work;
>>
>> +     /* worker for handling performance monitor requests: */
>> +     struct work_struct pmrs_work;
>> +     int pmrs_event;
>> +
>>       void __iomem *mmio;
>>       int irq;
>>
>
>

greets
--
Christian Gmeiner, MSc

https://christian-gmeiner.info
Lucas Stach Aug. 22, 2017, 10:17 a.m. UTC | #3
Am Dienstag, den 22.08.2017, 11:58 +0200 schrieb Christian Gmeiner:
[...]
> >> @@ -1444,6 +1463,11 @@ static irqreturn_t irq_handler(int irq, void *data)
> >>
> >>                       dev_dbg(gpu->dev, "event %u\n", event);
> >>
> >> +                     if (gpu->event[event].sync_point) {
> >> +                             gpu->pmrs_event = event;
> >> +                             etnaviv_queue_work(gpu->drm, &gpu->pmrs_work);
> >
> > If the handler is delayed we might handle multiple events per
> > invocation, in which case the events might not be in order. E.g. the FE
> > stop event might be event 30, while the FE start event might be event 0.
> > In that case you would execute the FE start before the FE stop has been
> > queued -> not good. You need to make sure that your PMRS events are
> > processed in the correct order.
> >
> 
> I thought about this problem for some time and I do not fully get your
> point - sorry.
> 
> First there is no FE start event. I am using 'sync' points for pre and
> post pmrs points.

You are right. I was just about to type up a lengthy explanation of what
I meant, but while thinking it through I realized that my assumptions
where invalid. As both the PRE and POST events stop the FE, the GPU can
never get ahead of the event workers.

Please scratch my earlier comments.

Regards,
Lucas
diff mbox

Patch

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
index ed9588f36bc9..9e7098e3207f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
@@ -250,6 +250,42 @@  void etnaviv_buffer_end(struct etnaviv_gpu *gpu)
 	}
 }
 
+/* Append a 'sync point' to the ring buffer. */
+void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)
+{
+	struct etnaviv_cmdbuf *buffer = gpu->buffer;
+	unsigned int waitlink_offset = buffer->user_size - 16;
+	u32 dwords, target;
+
+	/*
+	 * We need at most 3 dwords in the return target:
+	 * 1 event + 1 end + 1 wait + 1 link.
+	 */
+	dwords = 4;
+	target = etnaviv_buffer_reserve(gpu, buffer, dwords);
+
+	/* Signal sync point event */
+	CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
+		       VIVS_GL_EVENT_FROM_PE);
+
+	/* Stop the FE to 'pause' the GPU */
+	CMD_END(buffer);
+
+	/* Append waitlink */
+	CMD_WAIT(buffer);
+	CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer) +
+			    buffer->user_size - 4);
+
+	/*
+	 * Kick off the 'sync point' command by replacing the previous
+	 * WAIT with a link to the address in the ring buffer.
+	 */
+	etnaviv_buffer_replace_wait(buffer, waitlink_offset,
+				    VIV_FE_LINK_HEADER_OP_LINK |
+				    VIV_FE_LINK_HEADER_PREFETCH(dwords),
+				    target);
+}
+
 /* Append a command buffer to the ring buffer. */
 void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
 	struct etnaviv_cmdbuf *cmdbuf)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index 058389f93b69..f6cdd694ca51 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -101,6 +101,7 @@  int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
 u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu);
 u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr);
 void etnaviv_buffer_end(struct etnaviv_gpu *gpu);
+void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event);
 void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
 	struct etnaviv_cmdbuf *cmdbuf);
 void etnaviv_validate_init(void);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index ce6c869e214f..bc1b96b4c7e9 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -25,6 +25,7 @@ 
 #include "etnaviv_gpu.h"
 #include "etnaviv_gem.h"
 #include "etnaviv_mmu.h"
+#include "etnaviv_perfmon.h"
 #include "common.xml.h"
 #include "state.xml.h"
 #include "state_hi.xml.h"
@@ -1353,6 +1354,7 @@  int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 	}
 
 	gpu->event[event].fence = fence;
+	gpu->event[event].sync_point = NULL;
 	submit->fence = dma_fence_get(fence);
 	gpu->active_fence = submit->fence->seqno;
 
@@ -1398,6 +1400,23 @@  int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 	return ret;
 }
 
+static void etnaviv_process_sync_point(struct etnaviv_gpu *gpu,
+	struct etnaviv_event *event)
+{
+	u32 addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
+
+	event->sync_point(gpu, event);
+	etnaviv_gpu_start_fe(gpu, addr + 2, 2);
+}
+
+static void pmrs_worker(struct work_struct *work)
+{
+	struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
+					       pmrs_work);
+
+	etnaviv_process_sync_point(gpu, &gpu->event[gpu->pmrs_event]);
+}
+
 /*
  * Init/Cleanup:
  */
@@ -1444,6 +1463,11 @@  static irqreturn_t irq_handler(int irq, void *data)
 
 			dev_dbg(gpu->dev, "event %u\n", event);
 
+			if (gpu->event[event].sync_point) {
+				gpu->pmrs_event = event;
+				etnaviv_queue_work(gpu->drm, &gpu->pmrs_work);
+			}
+
 			fence = gpu->event[event].fence;
 			gpu->event[event].fence = NULL;
 			dma_fence_signal(fence);
@@ -1647,6 +1671,7 @@  static int etnaviv_gpu_bind(struct device *dev, struct device *master,
 
 	INIT_LIST_HEAD(&gpu->active_cmd_list);
 	INIT_WORK(&gpu->retire_work, retire_worker);
+	INIT_WORK(&gpu->pmrs_work, pmrs_worker);
 	INIT_WORK(&gpu->recover_work, recover_worker);
 	init_waitqueue_head(&gpu->fence_event);
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 70e6590aacdf..7d5f785b0f08 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -89,6 +89,8 @@  struct etnaviv_chip_identity {
 
 struct etnaviv_event {
 	struct dma_fence *fence;
+
+	void (*sync_point)(struct etnaviv_gpu *gpu, struct etnaviv_event *event);
 };
 
 struct etnaviv_cmdbuf_suballoc;
@@ -135,6 +137,10 @@  struct etnaviv_gpu {
 	/* worker for handling active-list retiring: */
 	struct work_struct retire_work;
 
+	/* worker for handling performance monitor requests: */
+	struct work_struct pmrs_work;
+	int pmrs_event;
+
 	void __iomem *mmio;
 	int irq;