diff mbox

[7/7] vulkan: Add VK_GOOGLE_display_timing extension (x11 and display backends)

Message ID 20180210044516.16944-8-keithp@keithp.com (mailing list archive)
State New, archived
Headers show

Commit Message

Keith Packard Feb. 10, 2018, 4:45 a.m. UTC
This adds support for the VK_GOOGLE_display timing extension, which
provides two things:

 1) Detailed information about when frames are displayed, including
    slack time between GPU execution and display frame.

 2) Absolute time control over swapchain queue processing. This allows
    the application to request frames be displayed at specific
    absolute times, using the same timebase as that provided in vblank
    events.

Support for this extension has been implemented for the x11 and
display backends; adding support to other backends should be
reasonable straightforward for one familiar with those systems and
should not require any additional device-specific code.

Signed-off-by: Keith Packard <keithp@keithp.com>
---
 src/amd/vulkan/radv_extensions.py   |   1 +
 src/amd/vulkan/radv_wsi.c           |  32 +++++
 src/intel/vulkan/anv_extensions.py  |   1 +
 src/intel/vulkan/anv_wsi.c          |  29 ++++
 src/vulkan/wsi/wsi_common.c         | 254 +++++++++++++++++++++++++++++++++++-
 src/vulkan/wsi/wsi_common.h         |  24 ++++
 src/vulkan/wsi/wsi_common_display.c | 143 +++++++++++++++++++-
 src/vulkan/wsi/wsi_common_private.h |  35 +++++
 src/vulkan/wsi/wsi_common_x11.c     |  69 +++++++++-
 9 files changed, 577 insertions(+), 11 deletions(-)
diff mbox

Patch

diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
index fed198df412..f38d6903a80 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -96,6 +96,7 @@  EXTENSIONS = [
     Extension('VK_AMD_rasterization_order',               1, 'device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2'),
     Extension('VK_AMD_shader_info',                       1, True),
     Extension('VK_MESA_query_timestamp',                  1, True),
+    Extension('VK_GOOGLE_display_timing',                 1, True),
 ]
 
 class VkVersion:
diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c
index 2d2a30ebbb1..48ff7107232 100644
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -247,3 +247,35 @@  VkResult radv_QueuePresentKHR(
 					queue->queue_family_index,
 					pPresentInfo);
 }
+
+/* VK_GOOGLE_display_timing */
+VkResult
+radv_GetRefreshCycleDurationGOOGLE(
+	VkDevice                      _device,
+	VkSwapchainKHR                swapchain,
+	VkRefreshCycleDurationGOOGLE  *pDisplayTimingProperties)
+{
+	RADV_FROM_HANDLE(radv_device, device, _device);
+	struct radv_physical_device *pdevice = device->physical_device;
+
+	return wsi_common_get_refresh_cycle_duration(&pdevice->wsi_device,
+						     _device,
+						     swapchain,
+						     pDisplayTimingProperties);
+}
+
+VkResult
+radv_GetPastPresentationTimingGOOGLE(VkDevice                            _device,
+				     VkSwapchainKHR                      swapchain,
+				     uint32_t                            *pPresentationTimingCount,
+				     VkPastPresentationTimingGOOGLE      *pPresentationTimings)
+{
+	RADV_FROM_HANDLE(radv_device, device, _device);
+	struct radv_physical_device *pdevice = device->physical_device;
+
+	return wsi_common_get_past_presentation_timing(&pdevice->wsi_device,
+						       _device,
+						       swapchain,
+						       pPresentationTimingCount,
+						       pPresentationTimings);
+}
diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py
index dd4d5a1f970..c45ef9ad09f 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -92,6 +92,7 @@  EXTENSIONS = [
     Extension('VK_EXT_display_surface_counter',           1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
     Extension('VK_EXT_display_control',                   1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
     Extension('VK_MESA_query_timestamp',                  1, True),
+    Extension('VK_GOOGLE_display_timing',                 1, True),
 ]
 
 class VkVersion:
diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c
index 52362adfb71..7801a989e0d 100644
--- a/src/intel/vulkan/anv_wsi.c
+++ b/src/intel/vulkan/anv_wsi.c
@@ -239,3 +239,32 @@  VkResult anv_QueuePresentKHR(
                                    _queue, 0,
                                    pPresentInfo);
 }
+
+/* VK_GOOGLE_display_timing */
+VkResult
+anv_GetRefreshCycleDurationGOOGLE(VkDevice                      _device,
+                                  VkSwapchainKHR                swapchain,
+                                  VkRefreshCycleDurationGOOGLE  *pDisplayTimingProperties)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+
+   return wsi_common_get_refresh_cycle_duration(&device->instance->physicalDevice.wsi_device,
+                                                _device,
+                                                swapchain,
+                                                pDisplayTimingProperties);
+}
+
+VkResult
+anv_GetPastPresentationTimingGOOGLE(VkDevice                            _device,
+                                    VkSwapchainKHR                      swapchain,
+                                    uint32_t                            *pPresentationTimingCount,
+                                    VkPastPresentationTimingGOOGLE      *pPresentationTimings)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+
+   return wsi_common_get_past_presentation_timing(&device->instance->physicalDevice.wsi_device,
+                                                  _device,
+                                                  swapchain,
+                                                  pPresentationTimingCount,
+                                                  pPresentationTimings);
+}
diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c
index 02abc9ef7fb..c36e4ccdcbe 100644
--- a/src/vulkan/wsi/wsi_common.c
+++ b/src/vulkan/wsi/wsi_common.c
@@ -40,11 +40,16 @@  wsi_device_init(struct wsi_device *wsi,
    PFN_vk##func func = (PFN_vk##func)proc_addr(pdevice, "vk" #func)
    WSI_GET_CB(GetPhysicalDeviceMemoryProperties);
    WSI_GET_CB(GetPhysicalDeviceQueueFamilyProperties);
+   WSI_GET_CB(GetPhysicalDeviceProperties);
 #undef WSI_GET_CB
 
    GetPhysicalDeviceMemoryProperties(pdevice, &wsi->memory_props);
    GetPhysicalDeviceQueueFamilyProperties(pdevice, &wsi->queue_family_count, NULL);
 
+   VkPhysicalDeviceProperties properties;
+   GetPhysicalDeviceProperties(pdevice, &properties);
+   wsi->timestamp_period = properties.limits.timestampPeriod;
+
 #define WSI_GET_CB(func) \
    wsi->func = (PFN_vk##func)proc_addr(pdevice, "vk" #func)
    WSI_GET_CB(AllocateMemory);
@@ -53,14 +58,18 @@  wsi_device_init(struct wsi_device *wsi,
    WSI_GET_CB(BindImageMemory);
    WSI_GET_CB(BeginCommandBuffer);
    WSI_GET_CB(CmdCopyImageToBuffer);
+   WSI_GET_CB(CmdResetQueryPool);
+   WSI_GET_CB(CmdWriteTimestamp);
    WSI_GET_CB(CreateBuffer);
    WSI_GET_CB(CreateCommandPool);
    WSI_GET_CB(CreateFence);
    WSI_GET_CB(CreateImage);
+   WSI_GET_CB(CreateQueryPool);
    WSI_GET_CB(DestroyBuffer);
    WSI_GET_CB(DestroyCommandPool);
    WSI_GET_CB(DestroyFence);
    WSI_GET_CB(DestroyImage);
+   WSI_GET_CB(DestroyQueryPool);
    WSI_GET_CB(EndCommandBuffer);
    WSI_GET_CB(FreeMemory);
    WSI_GET_CB(FreeCommandBuffers);
@@ -68,9 +77,13 @@  wsi_device_init(struct wsi_device *wsi,
    WSI_GET_CB(GetImageMemoryRequirements);
    WSI_GET_CB(GetImageSubresourceLayout);
    WSI_GET_CB(GetMemoryFdKHR);
+   WSI_GET_CB(GetPhysicalDeviceProperties);
    WSI_GET_CB(GetPhysicalDeviceFormatProperties);
+   WSI_GET_CB(GetPhysicalDeviceQueueFamilyProperties);
+   WSI_GET_CB(GetQueryPoolResults);
    WSI_GET_CB(ResetFences);
    WSI_GET_CB(QueueSubmit);
+   WSI_GET_CB(QueryCurrentTimestampMESA);
    WSI_GET_CB(WaitForFences);
 #undef WSI_GET_CB
 
@@ -136,6 +149,8 @@  wsi_swapchain_init(const struct wsi_device *wsi,
    chain->device = device;
    chain->alloc = *pAllocator;
    chain->use_prime_blit = false;
+   chain->timing_insert = 0;
+   chain->timing_count = 0;
 
    chain->cmd_pools =
       vk_zalloc(pAllocator, sizeof(VkCommandPool) * wsi->queue_family_count, 8,
@@ -209,6 +224,60 @@  align_u32(uint32_t v, uint32_t a)
    return (v + a - 1) & ~(a - 1);
 }
 
+static VkResult
+wsi_image_init_timestamp(const struct wsi_swapchain *chain,
+                         struct wsi_image *image)
+{
+   const struct wsi_device *wsi = chain->wsi;
+   VkResult result;
+   /* Set up command buffer to get timestamp info */
+
+   result = wsi->CreateQueryPool(chain->device,
+                                 &(const VkQueryPoolCreateInfo) {
+                                    .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+                                       .queryType = VK_QUERY_TYPE_TIMESTAMP,
+                                       .queryCount = 1,
+                                       },
+                                 NULL,
+                                 &image->query_pool);
+
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   result = wsi->AllocateCommandBuffers(chain->device,
+                                        &(const VkCommandBufferAllocateInfo) {
+                                           .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+                                              .pNext = NULL,
+                                              .commandPool = chain->cmd_pools[0],
+                                              .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+                                              .commandBufferCount = 1,
+                                              },
+                                        &image->timestamp_buffer);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   wsi->BeginCommandBuffer(image->timestamp_buffer,
+                           &(VkCommandBufferBeginInfo) {
+                              .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+                                 .flags = 0
+                                 });
+
+   wsi->CmdResetQueryPool(image->timestamp_buffer,
+                          image->query_pool,
+                          0, 1);
+
+   wsi->CmdWriteTimestamp(image->timestamp_buffer,
+                          VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                          image->query_pool,
+                          0);
+
+   wsi->EndCommandBuffer(image->timestamp_buffer);
+
+   return VK_SUCCESS;
+fail:
+   return result;
+}
+
 VkResult
 wsi_create_native_image(const struct wsi_swapchain *chain,
                         const VkSwapchainCreateInfoKHR *pCreateInfo,
@@ -303,6 +372,11 @@  wsi_create_native_image(const struct wsi_swapchain *chain,
    };
    int fd;
    result = wsi->GetMemoryFdKHR(chain->device, &memory_get_fd_info, &fd);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   result = wsi_image_init_timestamp(chain, image);
+
    if (result != VK_SUCCESS)
       goto fail;
 
@@ -497,6 +571,11 @@  wsi_create_prime_image(const struct wsi_swapchain *chain,
          goto fail;
    }
 
+   result = wsi_image_init_timestamp(chain, image);
+
+   if (result != VK_SUCCESS)
+      goto fail;
+
    const VkMemoryGetFdInfoKHR linear_memory_get_fd_info = {
       .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
       .pNext = NULL,
@@ -695,6 +774,87 @@  wsi_common_acquire_next_image(const struct wsi_device *wsi,
                                         semaphore, pImageIndex);
 }
 
+static struct wsi_timing *
+wsi_get_timing(struct wsi_swapchain *chain, uint32_t i)
+{
+   uint32_t j = WSI_TIMING_HISTORY + chain->timing_insert - chain->timing_count + i;
+
+   if (j >= WSI_TIMING_HISTORY)
+      j -= WSI_TIMING_HISTORY;
+   return &chain->timing[j];
+}
+
+static struct wsi_timing *
+wsi_next_timing(struct wsi_swapchain *chain, int image_index)
+{
+   uint32_t j;
+   struct wsi_timing *timing;
+
+   j = chain->timing_insert;
+   ++chain->timing_insert;
+   if (chain->timing_insert >= WSI_TIMING_HISTORY)
+      chain->timing_insert = 0;
+   if (chain->timing_count < WSI_TIMING_HISTORY)
+      ++chain->timing_count;
+   timing = &chain->timing[j];
+   memset(timing, '\0', sizeof (*timing));
+   return timing;
+}
+
+void
+wsi_mark_timing(struct wsi_swapchain *swapchain,
+                struct wsi_image *image,
+                uint64_t ust,
+                uint64_t msc)
+{
+   const struct wsi_device *wsi = swapchain->wsi;
+   struct wsi_timing *timing = image->timing;
+   VkResult result;
+
+   if (!timing)
+      return;
+
+   swapchain->frame_msc = msc;
+   swapchain->frame_ust = ust;
+
+   uint64_t     render_timestamp;
+
+   result = wsi->GetQueryPoolResults(swapchain->device, image->query_pool,
+                                     0, 1, sizeof(render_timestamp), &render_timestamp,
+                                     sizeof (uint64_t),
+                                     VK_QUERY_RESULT_64_BIT|VK_QUERY_RESULT_WAIT_BIT);
+
+   if (result != VK_SUCCESS)
+      return;
+
+   uint64_t     current_gpu_timestamp;
+   result = wsi->QueryCurrentTimestampMESA(swapchain->device, &current_gpu_timestamp);
+   if (result != VK_SUCCESS)
+      return;
+   uint64_t     current_time = swapchain->get_current_time();
+
+   VkRefreshCycleDurationGOOGLE display_timings;
+   swapchain->get_refresh_cycle_duration(swapchain, &display_timings);
+
+   uint64_t     refresh_duration = display_timings.refreshDuration;
+
+   /* When did drawing complete (in nsec) */
+
+   uint64_t     render_time = current_time - (current_gpu_timestamp - render_timestamp) * wsi->timestamp_period;
+
+   if (render_time > ust)
+      render_time = ust;
+
+   uint64_t     render_frames = (ust - render_time) / refresh_duration;
+
+   uint64_t     earliest_time = ust - render_frames * refresh_duration;
+
+   timing->timing.actualPresentTime = ust;
+   timing->timing.earliestPresentTime = earliest_time;
+   timing->timing.presentMargin = earliest_time - render_time;
+   timing->complete = true;
+}
+
 VkResult
 wsi_common_queue_present(const struct wsi_device *wsi,
                          VkDevice device,
@@ -706,10 +866,13 @@  wsi_common_queue_present(const struct wsi_device *wsi,
 
    const VkPresentRegionsKHR *regions =
       vk_find_struct_const(pPresentInfo->pNext, PRESENT_REGIONS_KHR);
+   const VkPresentTimesInfoGOOGLE *present_times_info =
+      vk_find_struct_const(pPresentInfo->pNext, PRESENT_TIMES_INFO_GOOGLE);
 
    for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
       WSI_FROM_HANDLE(wsi_swapchain, swapchain, pPresentInfo->pSwapchains[i]);
       VkResult result;
+      struct wsi_timing *timing = NULL;
 
       if (swapchain->fences[0] == VK_NULL_HANDLE) {
          const VkFenceCreateInfo fence_info = {
@@ -726,9 +889,12 @@  wsi_common_queue_present(const struct wsi_device *wsi,
          wsi->ResetFences(device, 1, &swapchain->fences[0]);
       }
 
+      VkCommandBuffer submit_buffers[2];
       VkSubmitInfo submit_info = {
          .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
          .pNext = NULL,
+         .pCommandBuffers = submit_buffers,
+         .commandBufferCount = 0
       };
 
       VkPipelineStageFlags *stage_flags = NULL;
@@ -761,9 +927,44 @@  wsi_common_queue_present(const struct wsi_device *wsi,
           */
          struct wsi_image *image =
             swapchain->get_wsi_image(swapchain, pPresentInfo->pImageIndices[i]);
-         submit_info.commandBufferCount = 1;
-         submit_info.pCommandBuffers =
-            &image->prime.blit_cmd_buffers[queue_family_index];
+         submit_buffers[submit_info.commandBufferCount++] = 
+            image->prime.blit_cmd_buffers[queue_family_index];
+      }
+
+      /* Set up GOOGLE_display_timing bits */
+      if (present_times_info &&
+          present_times_info->pTimes != NULL &&
+          i < present_times_info->swapchainCount)
+      {
+         const VkPresentTimeGOOGLE *present_time = &present_times_info->pTimes[i];
+
+         struct wsi_image *image = swapchain->get_wsi_image(swapchain, pPresentInfo->pImageIndices[i]);
+
+         timing = wsi_next_timing(swapchain, pPresentInfo->pImageIndices[i]);
+         timing->timing.presentID = present_time->presentID;
+         timing->timing.desiredPresentTime = present_time->desiredPresentTime;
+         timing->target_msc = 0;
+         image->timing = timing;
+
+         if (present_time->desiredPresentTime != 0)
+         {
+            int64_t delta_nsec = (int64_t) (present_time->desiredPresentTime - swapchain->frame_ust);
+
+            /* Set the target msc only if it's no more than two seconds from
+             * now, and not stale
+             */
+            if (0 <= delta_nsec && delta_nsec <= 2000000000ul) {
+               VkRefreshCycleDurationGOOGLE refresh_timing;
+
+               swapchain->get_refresh_cycle_duration(swapchain, &refresh_timing);
+
+               int64_t refresh = (int64_t) refresh_timing.refreshDuration;
+               int64_t frames = (delta_nsec + refresh/2) / refresh;
+               timing->target_msc = swapchain->frame_msc + frames;
+            }
+         }
+
+         submit_buffers[submit_info.commandBufferCount++] = image->timestamp_buffer;
       }
 
       result = wsi->QueueSubmit(queue, 1, &submit_info, swapchain->fences[0]);
@@ -801,3 +1002,50 @@  wsi_common_queue_present(const struct wsi_device *wsi,
 
    return final_result;
 }
+
+VkResult
+wsi_common_get_refresh_cycle_duration(const struct wsi_device *wsi,
+                                      VkDevice device_h,
+                                      VkSwapchainKHR _swapchain,
+                                      VkRefreshCycleDurationGOOGLE *pDisplayTimingProperties)
+{
+   WSI_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain);
+
+   if (!swapchain->get_refresh_cycle_duration)
+      return VK_ERROR_EXTENSION_NOT_PRESENT;
+   return swapchain->get_refresh_cycle_duration(swapchain, pDisplayTimingProperties);
+}
+
+
+VkResult
+wsi_common_get_past_presentation_timing(const struct wsi_device *wsi,
+                                        VkDevice device_h,
+                                        VkSwapchainKHR _swapchain,
+                                        uint32_t *count,
+                                        VkPastPresentationTimingGOOGLE *timings)
+{
+   WSI_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain);
+   uint32_t timing_count_requested = *count;
+   uint32_t timing_count_available = 0;
+   uint32_t t;
+
+   /* Count the number of completed entries, copy */
+   for (t = 0; t < swapchain->timing_count; t++) {
+      struct wsi_timing *timing = wsi_get_timing(swapchain, t);
+
+      if (timing->complete && !timing->consumed) {
+         if (timings && timing_count_available < timing_count_requested) {
+            timings[timing_count_available] = timing->timing;
+            timing->consumed = true;
+         }
+         timing_count_available++;
+      }
+   }
+
+   *count = timing_count_available;
+
+   if (timing_count_available > timing_count_requested && timings != NULL)
+      return VK_INCOMPLETE;
+
+   return VK_SUCCESS;
+}
diff --git a/src/vulkan/wsi/wsi_common.h b/src/vulkan/wsi/wsi_common.h
index e504f4120ad..47786810b66 100644
--- a/src/vulkan/wsi/wsi_common.h
+++ b/src/vulkan/wsi/wsi_common.h
@@ -64,6 +64,7 @@  struct wsi_interface;
 struct wsi_device {
    VkPhysicalDeviceMemoryProperties memory_props;
    uint32_t queue_family_count;
+   float timestamp_period;
 
 #define WSI_CB(cb) PFN_vk##cb cb
    WSI_CB(AllocateMemory);
@@ -72,14 +73,18 @@  struct wsi_device {
    WSI_CB(BindImageMemory);
    WSI_CB(BeginCommandBuffer);
    WSI_CB(CmdCopyImageToBuffer);
+   WSI_CB(CmdResetQueryPool);
+   WSI_CB(CmdWriteTimestamp);
    WSI_CB(CreateBuffer);
    WSI_CB(CreateCommandPool);
    WSI_CB(CreateFence);
    WSI_CB(CreateImage);
+   WSI_CB(CreateQueryPool);
    WSI_CB(DestroyBuffer);
    WSI_CB(DestroyCommandPool);
    WSI_CB(DestroyFence);
    WSI_CB(DestroyImage);
+   WSI_CB(DestroyQueryPool);
    WSI_CB(EndCommandBuffer);
    WSI_CB(FreeMemory);
    WSI_CB(FreeCommandBuffers);
@@ -87,9 +92,13 @@  struct wsi_device {
    WSI_CB(GetImageMemoryRequirements);
    WSI_CB(GetImageSubresourceLayout);
    WSI_CB(GetMemoryFdKHR);
+   WSI_CB(GetPhysicalDeviceProperties);
    WSI_CB(GetPhysicalDeviceFormatProperties);
+   WSI_CB(GetPhysicalDeviceQueueFamilyProperties);
+   WSI_CB(GetQueryPoolResults);
    WSI_CB(ResetFences);
    WSI_CB(QueueSubmit);
+   WSI_CB(QueryCurrentTimestampMESA);
    WSI_CB(WaitForFences);
 #undef WSI_CB
 
@@ -201,4 +210,19 @@  wsi_common_queue_present(const struct wsi_device *wsi,
                          int queue_family_index,
                          const VkPresentInfoKHR *pPresentInfo);
 
+/* VK_GOOGLE_display_timing */
+VkResult
+wsi_common_get_refresh_cycle_duration(const struct wsi_device *wsi,
+                                      VkDevice device_h,
+                                      VkSwapchainKHR swapchain,
+                                      VkRefreshCycleDurationGOOGLE *pDisplayTimingProperties);
+
+
+VkResult
+wsi_common_get_past_presentation_timing(const struct wsi_device *wsi,
+                                        VkDevice device_h,
+                                        VkSwapchainKHR swapchain,
+                                        uint32_t *pPresentationTimingCount,
+                                        VkPastPresentationTimingGOOGLE *pPresentationTimings);
+
 #endif
diff --git a/src/vulkan/wsi/wsi_common_display.c b/src/vulkan/wsi/wsi_common_display.c
index c3608f13e54..60bc6e8be00 100644
--- a/src/vulkan/wsi/wsi_common_display.c
+++ b/src/vulkan/wsi/wsi_common_display.c
@@ -75,6 +75,8 @@  typedef struct wsi_display_connector {
    char                         *name;
    bool                         connected;
    bool                         active;
+   uint64_t                     last_frame;
+   uint64_t                     last_nsec;
    wsi_display_mode             *current_mode;
    drmModeModeInfo              current_drm_mode;
    uint32_t                     dpms_property;
@@ -104,6 +106,7 @@  struct wsi_display {
 enum wsi_image_state {
    wsi_image_idle,
    wsi_image_drawing,
+   wsi_image_waiting,
    wsi_image_queued,
    wsi_image_flipping,
    wsi_image_displaying
@@ -113,6 +116,7 @@  struct wsi_display_image {
    struct wsi_image             base;
    struct wsi_display_swapchain *chain;
    enum wsi_image_state         state;
+   struct wsi_display_fence     *fence;
    uint32_t                     fb_id;
    uint64_t                     flip_sequence;
 };
@@ -122,6 +126,7 @@  struct wsi_display_swapchain {
    struct wsi_display           *wsi;
    VkIcdSurfaceDisplay          *surface;
    uint64_t                     flip_sequence;
+   const VkAllocationCallbacks  *allocator;
    struct wsi_display_image     images[0];
 };
 
@@ -130,6 +135,7 @@  struct wsi_display_fence {
    bool                         event_received;
    bool                         destroyed;
    uint64_t                     sequence;
+   struct wsi_display_image     *image;
 };
 
 static uint64_t fence_sequence;
@@ -799,6 +805,7 @@  wsi_display_image_init(VkDevice                         device_h,
 
    image->chain = chain;
    image->state = wsi_image_idle;
+   image->fence = NULL;
    image->fb_id = 0;
 
    /* XXX extract depth and bpp from image somehow */
@@ -866,6 +873,11 @@  wsi_display_idle_old_displaying(struct wsi_display_image *active_image)
 static VkResult
 _wsi_display_queue_next(struct wsi_swapchain     *drv_chain);
 
+static uint64_t widen_32_to_64(uint32_t narrow, uint64_t near)
+{
+	return near + (int32_t) (narrow - near);
+}
+
 static void
 wsi_display_page_flip_handler2(int              fd,
                                unsigned int     frame,
@@ -875,11 +887,25 @@  wsi_display_page_flip_handler2(int              fd,
                                void             *data)
 {
    struct wsi_display_image     *image = data;
+   struct wsi_display_swapchain *chain = image->chain;
+   VkIcdSurfaceDisplay          *surface = chain->surface;
+   wsi_display_mode             *display_mode = wsi_display_mode_from_handle(surface->displayMode);
+   wsi_display_connector        *connector = display_mode->connector;
+   uint64_t                     frame64 = widen_32_to_64(frame, connector->last_frame);
+   uint64_t                     nsec = (uint64_t) sec * 1000000000 + (uint64_t) usec * 1000;
+
+   /* Don't let time go backwards because this function has lower resolution than ktime */
+   if (nsec < connector->last_nsec)
+      nsec = connector->last_nsec;
 
-   wsi_display_debug("image %ld displayed at %d\n", image - &(image->chain->images[0]), frame);
+   wsi_display_debug("image %ld displayed at %ld\n", image - &(image->chain->images[0]), frame64);
    image->state = wsi_image_displaying;
+   connector->last_frame = frame64;
+   connector->last_nsec = nsec;
+   wsi_mark_timing(&image->chain->base, &image->base,
+                   nsec, frame64);
    wsi_display_idle_old_displaying(image);
-   (void) _wsi_display_queue_next(&(image->chain->base));
+   (void) _wsi_display_queue_next(&(chain->base));
 }
 
 static void wsi_display_page_flip_handler(int fd, unsigned int frame,
@@ -1228,6 +1254,7 @@  wsi_display_fence_alloc(VkDevice                        device,
    fence->event_received = false;
    fence->destroyed = false;
    fence->sequence = ++fence_sequence;
+   fence->image = NULL;
    return fence;
 }
 
@@ -1323,6 +1350,12 @@  _wsi_display_queue_next(struct wsi_swapchain     *drv_chain)
       if (!image)
          return VK_SUCCESS;
 
+      if (image->fence) {
+         image->fence->image = NULL;
+         wsi_display_fence_destroy(&image->fence->base);
+         image->fence = NULL;
+      }
+
       if (connector->active) {
          ret = drmModePageFlip(wsi->master_fd, connector->crtc_id, image->fb_id,
                                DRM_MODE_PAGE_FLIP_EVENT, image);
@@ -1385,16 +1418,83 @@  wsi_display_queue_present(struct wsi_swapchain          *drv_chain,
 
    pthread_mutex_lock(&wsi->wait_mutex);
 
+   if (image->base.timing && image->base.timing->target_msc != 0) {
+      VkIcdSurfaceDisplay          *surface = chain->surface;
+      wsi_display_mode             *display_mode = wsi_display_mode_from_handle(surface->displayMode);
+      wsi_display_connector        *connector = display_mode->connector;
+
+      wsi_display_debug("delta frame %ld\n", image->base.timing->target_msc - connector->last_frame);
+      if (image->base.timing->target_msc > connector->last_frame) {
+         uint64_t frame_queued;
+         VkDisplayKHR display = wsi_display_connector_to_handle(connector);
+
+         wsi_display_debug_code(uint64_t current_frame, current_nsec;
+                                drmCrtcGetSequence(wsi->master_fd, connector->crtc_id, &current_frame, &current_nsec);
+                                wsi_display_debug("from current: %ld\n", image->base.timing->target_msc - current_frame));
+
+         image->fence = wsi_display_fence_alloc(chain->base.device, chain->base.wsi, display, &chain->base.alloc);
+
+         if (!image->fence) {
+            result = VK_ERROR_OUT_OF_HOST_MEMORY;
+            goto bail_unlock;
+         }
+
+         result = wsi_register_vblank_event(image->fence,
+                                            chain->base.wsi,
+                                            display,
+                                            0,
+                                            image->base.timing->target_msc - 1,
+                                            &frame_queued);
+
+         if (result != VK_SUCCESS)
+            goto bail_unlock;
+
+         /* Check and make sure we are queued for the right frame, otherwise just
+          * go queue an image
+          */
+         if (frame_queued <= image->base.timing->target_msc - 1) {
+            image->state = wsi_image_waiting;
+
+            /*
+             * Don't set the image member until we're going to wait for the
+             * event to arrive before flipping to the image. That way, if the
+             * register_vblank_event call happens to process the event, it
+             * won't actually do anything
+             */
+            image->fence->image = image;
+            wsi_display_start_wait_thread(wsi);
+            result = VK_SUCCESS;
+            goto bail_unlock;
+         }
+
+      }
+   }
+
+
    image->flip_sequence = ++chain->flip_sequence;
    image->state = wsi_image_queued;
 
    result = _wsi_display_queue_next(drv_chain);
 
+bail_unlock:
    pthread_mutex_unlock(&wsi->wait_mutex);
 
    return result;
 }
 
+static VkResult
+wsi_display_get_refresh_cycle_duration(struct wsi_swapchain *drv_chain,
+                                       VkRefreshCycleDurationGOOGLE *duration)
+{
+   struct wsi_display_swapchain *chain = (struct wsi_display_swapchain *) drv_chain;
+   VkIcdSurfaceDisplay          *surface = chain->surface;
+   wsi_display_mode             *display_mode = wsi_display_mode_from_handle(surface->displayMode);
+   double                       refresh = wsi_display_mode_refresh(display_mode);
+
+   duration->refreshDuration = (uint64_t) (floor (1.0/refresh * 1e9 + 0.5));
+   return VK_SUCCESS;
+}
+
 static VkResult
 wsi_display_surface_create_swapchain(VkIcdSurfaceBase                   *icd_surface,
                                      VkDevice                           device,
@@ -1426,10 +1526,13 @@  wsi_display_surface_create_swapchain(VkIcdSurfaceBase                   *icd_sur
    chain->base.get_wsi_image = wsi_display_get_wsi_image;
    chain->base.acquire_next_image = wsi_display_acquire_next_image;
    chain->base.queue_present = wsi_display_queue_present;
+   chain->base.get_refresh_cycle_duration = wsi_display_get_refresh_cycle_duration;
+   chain->base.get_current_time = wsi_get_current_monotonic;
    chain->base.present_mode = create_info->presentMode;
    chain->base.image_count = num_images;
 
    chain->wsi = wsi;
+   chain->allocator = allocator;
 
    chain->surface = (VkIcdSurfaceDisplay *) icd_surface;
 
@@ -2130,6 +2233,7 @@  wsi_get_swapchain_counter(VkDevice                      device,
    struct wsi_display_swapchain *swapchain = (struct wsi_display_swapchain *) wsi_swapchain_from_handle(_swapchain);
    struct wsi_display_connector *connector = wsi_display_mode_from_handle(swapchain->surface->displayMode)->connector;
    int ret;
+   uint64_t nsec;
 
    if (wsi->master_fd < 0)
       return VK_ERROR_INITIALIZATION_FAILED;
@@ -2139,9 +2243,13 @@  wsi_get_swapchain_counter(VkDevice                      device,
       return VK_SUCCESS;
    }
 
-   ret = drmCrtcGetSequence(wsi->master_fd, connector->crtc_id, value, NULL);
+   ret = drmCrtcGetSequence(wsi->master_fd, connector->crtc_id, value, &nsec);
    if (ret)
       *value = 0;
+   else {
+      connector->last_frame = *value;
+      connector->last_nsec = nsec;
+   }
 
    return VK_SUCCESS;
 }
@@ -2150,19 +2258,44 @@  static void wsi_display_vblank_handler(int fd, unsigned int frame,
                                        unsigned int sec, unsigned int usec, void *data)
 {
    struct wsi_display_fence     *fence = data;
-
-   wsi_display_debug("%9lu fence %lu received %d\n", pthread_self(), fence->sequence, frame);
+   struct wsi_display_connector *connector = wsi_display_connector_from_handle(fence->base.display);
+   uint64_t frame64 = widen_32_to_64(frame, connector->last_frame);
+   uint64_t nsec = (uint64_t) sec * 1000000000 + (uint64_t) usec * 1000;
+   struct wsi_display_image *image = fence->image;
+
+   /* Don't let time go backwards because this function has lower resolution than ktime */
+   if (nsec < connector->last_nsec)
+      nsec = connector->last_nsec;
+
+   wsi_display_debug("%9lu fence %lu received %lu nsec %lu\n", pthread_self(), fence->sequence, frame64, nsec);
+   connector->last_nsec = nsec;
+   connector->last_frame = frame64;
    fence->event_received = true;
    wsi_display_fence_check_free(fence);
+   if (image) {
+      image->flip_sequence = ++image->chain->flip_sequence;
+      image->state = wsi_image_queued;
+      (void) _wsi_display_queue_next(&image->chain->base);
+   }
 }
 
 static void wsi_display_sequence_handler(int fd, uint64_t frame,
                                          uint64_t ns, uint64_t user_data)
 {
    struct wsi_display_fence     *fence = (struct wsi_display_fence *) (uintptr_t) user_data;
+   struct wsi_display_connector *connector = wsi_display_connector_from_handle(fence->base.display);
+   struct wsi_display_image *image = fence->image;
 
    wsi_display_debug("%9lu fence %lu received %lu\n", pthread_self(), fence->sequence, frame);
+   connector->last_nsec = ns;
+   connector->last_frame = frame;
    fence->event_received = true;
    wsi_display_fence_check_free(fence);
+   if (image) {
+      image->flip_sequence = ++image->chain->flip_sequence;
+      image->state = wsi_image_queued;
+      (void) _wsi_display_queue_next(&image->chain->base);
+   }
 }
 
+/* VK_GOOGLE_display_timing */
diff --git a/src/vulkan/wsi/wsi_common_private.h b/src/vulkan/wsi/wsi_common_private.h
index 0d902846238..6f9d7323085 100644
--- a/src/vulkan/wsi/wsi_common_private.h
+++ b/src/vulkan/wsi/wsi_common_private.h
@@ -25,6 +25,13 @@ 
 
 #include "wsi_common.h"
 
+struct wsi_timing {
+   bool complete;
+   bool consumed;
+   uint64_t target_msc;
+   VkPastPresentationTimingGOOGLE timing;
+};
+
 struct wsi_image {
    VkImage image;
    VkDeviceMemory memory;
@@ -39,8 +46,16 @@  struct wsi_image {
    uint32_t offset;
    uint32_t row_pitch;
    int fd;
+
+   VkQueryPool query_pool;
+
+   VkCommandBuffer timestamp_buffer;
+
+   struct wsi_timing *timing;
 };
 
+#define WSI_TIMING_HISTORY      16
+
 struct wsi_swapchain {
    const struct wsi_device *wsi;
 
@@ -52,6 +67,16 @@  struct wsi_swapchain {
 
    bool use_prime_blit;
 
+   uint32_t timing_insert;
+   uint32_t timing_count;
+
+   struct wsi_timing timing[WSI_TIMING_HISTORY];
+
+   uint64_t frame_msc;
+   uint64_t frame_ust;
+
+   float timestamp_period;
+
    /* Command pools, one per queue family */
    VkCommandPool *cmd_pools;
 
@@ -65,6 +90,10 @@  struct wsi_swapchain {
    VkResult (*queue_present)(struct wsi_swapchain *swap_chain,
                              uint32_t image_index,
                              const VkPresentRegionKHR *damage);
+   VkResult (*get_refresh_cycle_duration)(struct wsi_swapchain *swap_chain,
+                                          VkRefreshCycleDurationGOOGLE *pDisplayTimingProperties);
+
+   uint64_t (*get_current_time)(void);
 };
 
 VkResult
@@ -91,6 +120,12 @@  wsi_destroy_image(const struct wsi_swapchain *chain,
                   struct wsi_image *image);
 
 
+void
+wsi_mark_timing(struct wsi_swapchain *swapchain,
+                struct wsi_image *image,
+                uint64_t ust,
+                uint64_t msc);
+
 struct wsi_interface {
    VkResult (*get_support)(VkIcdSurfaceBase *surface,
                            struct wsi_device *wsi_device,
diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c
index 714523678d4..e0d9135da53 100644
--- a/src/vulkan/wsi/wsi_common_x11.c
+++ b/src/vulkan/wsi/wsi_common_x11.c
@@ -641,6 +641,7 @@  struct x11_image {
    bool                                      busy;
    struct xshmfence *                        shm_fence;
    uint32_t                                  sync_fence;
+   uint32_t                                  serial;
 };
 
 struct x11_swapchain {
@@ -657,6 +658,8 @@  struct x11_swapchain {
    uint64_t                                     send_sbc;
    uint64_t                                     last_present_msc;
    uint32_t                                     stamp;
+   uint64_t                                     last_present_nsec;
+   uint64_t                                     refresh_period;
 
    bool                                         threaded;
    VkResult                                     status;
@@ -706,8 +709,36 @@  x11_handle_dri3_present_event(struct x11_swapchain *chain,
 
    case XCB_PRESENT_EVENT_COMPLETE_NOTIFY: {
       xcb_present_complete_notify_event_t *complete = (void *) event;
-      if (complete->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP)
+      if (complete->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP) {
+         uint64_t       frames = complete->msc - chain->last_present_msc;
+         uint64_t       present_nsec = complete->ust * 1000;
+
+         /*
+          * Well, this is about as good as we can do -- measure the refresh
+          * instead of asking for the current mode and using that. Turns out,
+          * for eDP panels, this works better anyways as they used the builtin
+          * fixed mode for everything
+          */
+         if (0 < frames && frames < 10 && present_nsec > chain->last_present_nsec) {
+
+            uint64_t refresh_period = (present_nsec - chain->last_present_nsec + frames / 2) / frames;
+
+            if (chain->refresh_period)
+               refresh_period = (3 * chain->refresh_period + refresh_period) >> 2;
+
+            chain->refresh_period = refresh_period;
+         }
+
          chain->last_present_msc = complete->msc;
+         chain->last_present_nsec = present_nsec;
+         for (unsigned i = 0; i < chain->base.image_count; i++) {
+            if (chain->images[i].serial == complete->serial) {
+               wsi_mark_timing(&chain->base, &chain->images[i].base,
+                               present_nsec, complete->msc);
+               break;
+            }
+         }
+      }
       break;
    }
 
@@ -823,7 +854,7 @@  x11_acquire_next_image_from_queue(struct x11_swapchain *chain,
 
 static VkResult
 x11_present_to_x11(struct x11_swapchain *chain, uint32_t image_index,
-                   uint32_t target_msc)
+                   uint64_t target_msc)
 {
    struct x11_image *image = &chain->images[image_index];
 
@@ -840,11 +871,12 @@  x11_present_to_x11(struct x11_swapchain *chain, uint32_t image_index,
    xshmfence_reset(image->shm_fence);
 
    ++chain->send_sbc;
+   image->serial = (uint32_t) chain->send_sbc;
    xcb_void_cookie_t cookie =
       xcb_present_pixmap(chain->conn,
                          chain->window,
                          image->pixmap,
-                         (uint32_t) chain->send_sbc,
+                         image->serial,
                          0,                                    /* valid */
                          0,                                    /* update */
                          0,                                    /* x_off */
@@ -894,6 +926,26 @@  x11_queue_present(struct wsi_swapchain *anv_chain,
    }
 }
 
+static uint64_t
+x11_refresh_duration(struct x11_swapchain *chain)
+{
+   /* Pick 60Hz if we don't know what it actually is yet */
+   if (!chain->refresh_period)
+      return (uint64_t) (1e9 / 59.98 + 0.5);
+
+   return chain->refresh_period;
+}
+
+static VkResult
+x11_get_refresh(struct wsi_swapchain *wsi_chain,
+                VkRefreshCycleDurationGOOGLE *timings)
+{
+   struct x11_swapchain *chain = (struct x11_swapchain *)wsi_chain;
+
+   timings->refreshDuration = x11_refresh_duration(chain);
+   return VK_SUCCESS;
+}
+
 static void *
 x11_manage_fifo_queues(void *state)
 {
@@ -910,6 +962,7 @@  x11_manage_fifo_queues(void *state)
        * other than the currently presented one.
        */
       uint32_t image_index;
+      struct x11_image *image;
       result = wsi_queue_pull(&chain->present_queue, &image_index, INT64_MAX);
       if (result != VK_SUCCESS) {
          goto fail;
@@ -918,6 +971,13 @@  x11_manage_fifo_queues(void *state)
       }
 
       uint64_t target_msc = chain->last_present_msc + 1;
+
+      image = &chain->images[image_index];
+
+      struct wsi_timing *timing = image->base.timing;
+      if (timing && timing->target_msc != 0 && timing->target_msc > target_msc)
+         target_msc = timing->target_msc;
+
       result = x11_present_to_x11(chain, image_index, target_msc);
       if (result != VK_SUCCESS)
          goto fail;
@@ -1098,7 +1158,9 @@  x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
    chain->base.get_wsi_image = x11_get_wsi_image;
    chain->base.acquire_next_image = x11_acquire_next_image;
    chain->base.queue_present = x11_queue_present;
+   chain->base.get_current_time = wsi_get_current_time;
    chain->base.present_mode = pCreateInfo->presentMode;
+   chain->base.get_refresh_cycle_duration = x11_get_refresh;
    chain->base.image_count = num_images;
    chain->conn = conn;
    chain->window = window;
@@ -1106,6 +1168,7 @@  x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
    chain->extent = pCreateInfo->imageExtent;
    chain->send_sbc = 0;
    chain->last_present_msc = 0;
+   chain->last_present_nsec = 0;
    chain->threaded = false;
    chain->status = VK_SUCCESS;