@@ -508,6 +508,7 @@ struct radeon_bo {
struct drm_gem_object gem_base;
struct ttm_bo_kmap_obj dma_buf_vmap;
+ bool prime_imported;
pid_t pid;
struct radeon_mn *mn;
@@ -550,6 +550,34 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc,
DRM_ERROR("failed to reserve new rbo buffer before flip\n");
goto cleanup;
}
+
+ /*
+ * Repin into GTT in case of imported prime dmabuf,
+ * then unpin again. Restores source dmabuf location
+ * to GTT, where the actual dmabuf backing store gets
+ * updated by the exporting render offload gpu at swap.
+ */
+ if (new_rbo->prime_imported) {
+ DRM_DEBUG_PRIME("Flip to prime imported dmabuf %p\n", new_rbo);
+
+ r = radeon_bo_pin(new_rbo, RADEON_GEM_DOMAIN_GTT, NULL);
+ if (unlikely(r != 0)) {
+ DRM_ERROR("failed to gtt pin buffer %p before flip\n",
+ new_rbo);
+ }
+ else {
+ r = radeon_bo_unpin(new_rbo);
+ }
+
+ if (unlikely(r != 0)) {
+ radeon_bo_unreserve(new_rbo);
+ r = -EINVAL;
+ DRM_ERROR("failed to gtt unpin buffer %p before flip\n",
+ new_rbo);
+ goto cleanup;
+ }
+ }
+
/* Only 27 bit offset for legacy CRTC */
r = radeon_bo_pin_restricted(new_rbo, RADEON_GEM_DOMAIN_VRAM,
ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, &base);
@@ -69,6 +69,7 @@ struct drm_gem_object *radeon_gem_prime_import_sg_table(struct drm_device *dev,
ww_mutex_lock(&resv->lock, NULL);
ret = radeon_bo_create(rdev, attach->dmabuf->size, PAGE_SIZE, false,
RADEON_GEM_DOMAIN_GTT, 0, sg, resv, &bo);
+ bo->prime_imported = true;
ww_mutex_unlock(&resv->lock);
if (ret)
return ERR_PTR(ret);
@@ -256,6 +256,7 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
struct ttm_mem_reg *old_mem)
{
struct radeon_device *rdev;
+ struct radeon_bo *rbo;
uint64_t old_start, new_start;
struct radeon_fence *fence;
unsigned num_pages;
@@ -296,6 +297,19 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0);
num_pages = new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
+
+ /*
+ * Prime imported dmabuf, previously used as scanout buffer in a page
+ * flip? If so, skip actual data move back from VRAM into GTT, as this
+ * would only copy back stale image data.
+ */
+ rbo = container_of(bo, struct radeon_bo, tbo);
+ if (rbo->prime_imported && old_mem->mem_type == TTM_PL_VRAM &&
+ new_mem->mem_type == TTM_PL_TT) {
+ DRM_DEBUG_PRIME("Skip for dmabuf back-move %p.\n", rbo);
+ num_pages = 0;
+ }
+
fence = radeon_copy(rdev, old_start, new_start, num_pages, bo->resv);
if (IS_ERR(fence))
return PTR_ERR(fence);
Scanout bo's which are dmabuf backed in RAM and imported via prime will not update their content with new rendering from the renderoffload gpu once they've been flipped onto the scanout once. The reason is that at preparation of first flip they get pinned into VRAM, then unpinned at some later point, but they stay in the VRAM memory domain, so updates to the system RAM dmabuf object by the exporting render offload gpu don't lead to updates of the content in VRAM - it becomes stale. For prime imported dmabufs we solve this by first pinning the bo into GTT, which will reset the bos domain back to GTT, then unpinning again, so the followup pinning into VRAM will actually upload an up to date display buffer from dmabuf GTT backing store. During the pinning into GTT, we skip the actual data move from VRAM to GTT to avoid a needless bo copy of stale image data. Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com> --- drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_display.c | 28 ++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_prime.c | 1 + drivers/gpu/drm/radeon/radeon_ttm.c | 14 ++++++++++++++ 4 files changed, 44 insertions(+)