Message ID | 20240130193652.374270-3-juhapekka.heikkila@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Enable ccs compressed framebuffers on Xe2 | expand |
On Tue, Jan 30, 2024 at 09:36:50PM +0200, Juha-Pekka Heikkila wrote: > Add BO bind time pat index member to xe_bo structure and store > pat index from xe_vma to xe_bo. > > Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com> > --- > drivers/gpu/drm/xe/xe_bo_types.h | 12 ++++++++++++ > drivers/gpu/drm/xe/xe_pt.c | 22 ++++++++++++++++++---- > 2 files changed, 30 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h > index 14ef13b7b421..6d599f1e846b 100644 > --- a/drivers/gpu/drm/xe/xe_bo_types.h > +++ b/drivers/gpu/drm/xe/xe_bo_types.h > @@ -91,6 +91,18 @@ struct xe_bo { > > /** @vram_userfault_link: Link into @mem_access.vram_userfault.list */ > struct list_head vram_userfault_link; > + > + /** > + * @pat_index: The pat index requested when bind this BO > + */ > + u16 pat_index; > + > + /** > + * @has_sealed_pat_index: The pat index is sealed because this BO is > + * pinned as framebuffer. This is to prevent flipping compression > + * on/off from framebuffers while in use. > + */ > + bool has_sealed_pat_index; > }; > > #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base) > diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c > index de1030a47588..c72cb75d993c 100644 > --- a/drivers/gpu/drm/xe/xe_pt.c > +++ b/drivers/gpu/drm/xe/xe_pt.c > @@ -1208,10 +1208,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue > struct dma_fence *fence; > struct invalidation_fence *ifence = NULL; > struct xe_range_fence *rfence; > + struct xe_bo *bo = xe_vma_bo(vma); > int err; > > bind_pt_update.locked = false; > - xe_bo_assert_held(xe_vma_bo(vma)); > + xe_bo_assert_held(bo); > xe_vm_assert_held(vm); > > vm_dbg(&xe_vma_vm(vma)->xe->drm, > @@ -1252,8 +1253,21 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue > return ERR_PTR(-ENOMEM); > } > > + /* > + * On Xe2 BO which was pinned as framebuffer before with different > + * PAT index cannot be bound with different PAT index. This is > + * to prevent switching CCS on/off from framebuffers on the fly > + * with Xe2. > + */ I haven't been following all the discussion here, but why is switching it on/off a problem? On Xe2 can't we just always turn on decompression (assuming they were 4-tile)? Even if a content producer puts data into the buffer using a non-compression PAT index, my understanding is that the FlatCCS metadata for that part of the buffer still gets updated appropriately (to 0000 or whatever the code is for "uncompressed block"). If the decompression bit in PLANE_CTL basically translates to "pay attention to FlatCCS" vs "ignore FlatCCS" it shouldn't matter whether the data is truly compressed or not, right? Since the FlatCCS area that corresponds to a buffer is still correct even when non-compressed PAT is used (I think), is there a reason to turn off decompression for 4-tile? Am I overlooking something? Matt > + if (bo) { > + if (bo->has_sealed_pat_index && bo->pat_index != vma->pat_index) > + return ERR_PTR(-EINVAL); > + > + bo->pat_index = vma->pat_index; > + } > + > fence = xe_migrate_update_pgtables(tile->migrate, > - vm, xe_vma_bo(vma), q, > + vm, bo, q, > entries, num_entries, > syncs, num_syncs, > &bind_pt_update.base); > @@ -1287,8 +1301,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue > DMA_RESV_USAGE_KERNEL : > DMA_RESV_USAGE_BOOKKEEP); > > - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) > - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, > + if (!xe_vma_has_no_bo(vma) && !bo->vm) > + dma_resv_add_fence(bo->ttm.base.resv, fence, > DMA_RESV_USAGE_BOOKKEEP); > xe_pt_commit_bind(vma, entries, num_entries, rebind, > bind_pt_update.locked ? &deferred : NULL); > -- > 2.25.1 >
On 31.1.2024 20.56, Matt Roper wrote: > On Tue, Jan 30, 2024 at 09:36:50PM +0200, Juha-Pekka Heikkila wrote: >> Add BO bind time pat index member to xe_bo structure and store >> pat index from xe_vma to xe_bo. >> >> Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com> >> --- >> drivers/gpu/drm/xe/xe_bo_types.h | 12 ++++++++++++ >> drivers/gpu/drm/xe/xe_pt.c | 22 ++++++++++++++++++---- >> 2 files changed, 30 insertions(+), 4 deletions(-) >> >> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h >> index 14ef13b7b421..6d599f1e846b 100644 >> --- a/drivers/gpu/drm/xe/xe_bo_types.h >> +++ b/drivers/gpu/drm/xe/xe_bo_types.h >> @@ -91,6 +91,18 @@ struct xe_bo { >> >> /** @vram_userfault_link: Link into @mem_access.vram_userfault.list */ >> struct list_head vram_userfault_link; >> + >> + /** >> + * @pat_index: The pat index requested when bind this BO >> + */ >> + u16 pat_index; >> + >> + /** >> + * @has_sealed_pat_index: The pat index is sealed because this BO is >> + * pinned as framebuffer. This is to prevent flipping compression >> + * on/off from framebuffers while in use. >> + */ >> + bool has_sealed_pat_index; >> }; >> >> #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base) >> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c >> index de1030a47588..c72cb75d993c 100644 >> --- a/drivers/gpu/drm/xe/xe_pt.c >> +++ b/drivers/gpu/drm/xe/xe_pt.c >> @@ -1208,10 +1208,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue >> struct dma_fence *fence; >> struct invalidation_fence *ifence = NULL; >> struct xe_range_fence *rfence; >> + struct xe_bo *bo = xe_vma_bo(vma); >> int err; >> >> bind_pt_update.locked = false; >> - xe_bo_assert_held(xe_vma_bo(vma)); >> + xe_bo_assert_held(bo); >> xe_vm_assert_held(vm); >> >> vm_dbg(&xe_vma_vm(vma)->xe->drm, >> @@ -1252,8 +1253,21 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue >> return ERR_PTR(-ENOMEM); >> } >> >> + /* >> + * On Xe2 BO which was pinned as framebuffer before with different >> + * PAT index cannot be bound with different PAT index. This is >> + * to prevent switching CCS on/off from framebuffers on the fly >> + * with Xe2. >> + */ > > I haven't been following all the discussion here, but why is switching > it on/off a problem? On Xe2 can't we just always turn on decompression > (assuming they were 4-tile)? > > Even if a content producer puts data into the buffer using a > non-compression PAT index, my understanding is that the FlatCCS metadata > for that part of the buffer still gets updated appropriately (to 0000 or > whatever the code is for "uncompressed block"). If the decompression > bit in PLANE_CTL basically translates to "pay attention to FlatCCS" vs > "ignore FlatCCS" it shouldn't matter whether the data is truly > compressed or not, right? Since the FlatCCS area that corresponds to a > buffer is still correct even when non-compressed PAT is used (I think), > is there a reason to turn off decompression for 4-tile? > > Am I overlooking something? Hi Matt, you got it correct for the case of tile4, on patch 4/4 of this set I put decompression on unconditionally for tile4 for display when on xe2. Problems come when we're not on tile4 but linear/x-tile where display engine doesn't support decompression for these. These PAT indexes for BOs are set by user space so I will not be allowed to change it and can only deny changing pat index for BO if it was already accepted as good configuration for display. That has_sealed_pat_index is set when framebuffer is pinned. Decompression for linear and x-tile is marked as not supported and I have sas document for xe2 compression where is said sw must disable compression for linear/x-tile. /Juha-Pekka > >> + if (bo) { >> + if (bo->has_sealed_pat_index && bo->pat_index != vma->pat_index) >> + return ERR_PTR(-EINVAL); >> + >> + bo->pat_index = vma->pat_index; >> + } >> + >> fence = xe_migrate_update_pgtables(tile->migrate, >> - vm, xe_vma_bo(vma), q, >> + vm, bo, q, >> entries, num_entries, >> syncs, num_syncs, >> &bind_pt_update.base); >> @@ -1287,8 +1301,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue >> DMA_RESV_USAGE_KERNEL : >> DMA_RESV_USAGE_BOOKKEEP); >> >> - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) >> - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, >> + if (!xe_vma_has_no_bo(vma) && !bo->vm) >> + dma_resv_add_fence(bo->ttm.base.resv, fence, >> DMA_RESV_USAGE_BOOKKEEP); >> xe_pt_commit_bind(vma, entries, num_entries, rebind, >> bind_pt_update.locked ? &deferred : NULL); >> -- >> 2.25.1 >> >
On 1.2.2024 16.17, Juha-Pekka Heikkila wrote: > On 31.1.2024 20.56, Matt Roper wrote: >> On Tue, Jan 30, 2024 at 09:36:50PM +0200, Juha-Pekka Heikkila wrote: >>> Add BO bind time pat index member to xe_bo structure and store >>> pat index from xe_vma to xe_bo. >>> >>> Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com> >>> --- >>> drivers/gpu/drm/xe/xe_bo_types.h | 12 ++++++++++++ >>> drivers/gpu/drm/xe/xe_pt.c | 22 ++++++++++++++++++---- >>> 2 files changed, 30 insertions(+), 4 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h >>> b/drivers/gpu/drm/xe/xe_bo_types.h >>> index 14ef13b7b421..6d599f1e846b 100644 >>> --- a/drivers/gpu/drm/xe/xe_bo_types.h >>> +++ b/drivers/gpu/drm/xe/xe_bo_types.h >>> @@ -91,6 +91,18 @@ struct xe_bo { >>> /** @vram_userfault_link: Link into >>> @mem_access.vram_userfault.list */ >>> struct list_head vram_userfault_link; >>> + >>> + /** >>> + * @pat_index: The pat index requested when bind this BO >>> + */ >>> + u16 pat_index; >>> + >>> + /** >>> + * @has_sealed_pat_index: The pat index is sealed because this >>> BO is >>> + * pinned as framebuffer. This is to prevent flipping compression >>> + * on/off from framebuffers while in use. >>> + */ >>> + bool has_sealed_pat_index; >>> }; >>> #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base) >>> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c >>> index de1030a47588..c72cb75d993c 100644 >>> --- a/drivers/gpu/drm/xe/xe_pt.c >>> +++ b/drivers/gpu/drm/xe/xe_pt.c >>> @@ -1208,10 +1208,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct >>> xe_vma *vma, struct xe_exec_queue >>> struct dma_fence *fence; >>> struct invalidation_fence *ifence = NULL; >>> struct xe_range_fence *rfence; >>> + struct xe_bo *bo = xe_vma_bo(vma); >>> int err; >>> bind_pt_update.locked = false; >>> - xe_bo_assert_held(xe_vma_bo(vma)); >>> + xe_bo_assert_held(bo); >>> xe_vm_assert_held(vm); >>> vm_dbg(&xe_vma_vm(vma)->xe->drm, >>> @@ -1252,8 +1253,21 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct >>> xe_vma *vma, struct xe_exec_queue >>> return ERR_PTR(-ENOMEM); >>> } >>> + /* >>> + * On Xe2 BO which was pinned as framebuffer before with different >>> + * PAT index cannot be bound with different PAT index. This is >>> + * to prevent switching CCS on/off from framebuffers on the fly >>> + * with Xe2. >>> + */ >> >> I haven't been following all the discussion here, but why is switching >> it on/off a problem? On Xe2 can't we just always turn on decompression >> (assuming they were 4-tile)? >> >> Even if a content producer puts data into the buffer using a >> non-compression PAT index, my understanding is that the FlatCCS metadata >> for that part of the buffer still gets updated appropriately (to 0000 or >> whatever the code is for "uncompressed block"). If the decompression >> bit in PLANE_CTL basically translates to "pay attention to FlatCCS" vs >> "ignore FlatCCS" it shouldn't matter whether the data is truly >> compressed or not, right? Since the FlatCCS area that corresponds to a >> buffer is still correct even when non-compressed PAT is used (I think), >> is there a reason to turn off decompression for 4-tile? >> >> Am I overlooking something? > > Hi Matt, > > you got it correct for the case of tile4, on patch 4/4 of this set I put > decompression on unconditionally for tile4 for display when on xe2. > > Problems come when we're not on tile4 but linear/x-tile where display > engine doesn't support decompression for these. These PAT indexes for > BOs are set by user space so I will not be allowed to change it and can > only deny changing pat index for BO if it was already accepted as good > configuration for display. That has_sealed_pat_index is set when > framebuffer is pinned. > > Decompression for linear and x-tile is marked as not supported and I > have sas document for xe2 compression where is said sw must disable > compression for linear/x-tile. > I did just talk with Ville and we did agree we could just drop these checks. By default everything will be decompressed and user space will need explicitly to enable compression. Linear and x-tile will misrender if they're compressed and with patch 4/4 from this set tile4 will work in all cases. >> >>> + if (bo) { >>> + if (bo->has_sealed_pat_index && bo->pat_index != >>> vma->pat_index) >>> + return ERR_PTR(-EINVAL); >>> + >>> + bo->pat_index = vma->pat_index; >>> + } >>> + >>> fence = xe_migrate_update_pgtables(tile->migrate, >>> - vm, xe_vma_bo(vma), q, >>> + vm, bo, q, >>> entries, num_entries, >>> syncs, num_syncs, >>> &bind_pt_update.base); >>> @@ -1287,8 +1301,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct >>> xe_vma *vma, struct xe_exec_queue >>> DMA_RESV_USAGE_KERNEL : >>> DMA_RESV_USAGE_BOOKKEEP); >>> - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) >>> - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, >>> + if (!xe_vma_has_no_bo(vma) && !bo->vm) >>> + dma_resv_add_fence(bo->ttm.base.resv, fence, >>> DMA_RESV_USAGE_BOOKKEEP); >>> xe_pt_commit_bind(vma, entries, num_entries, rebind, >>> bind_pt_update.locked ? &deferred : NULL); >>> -- >>> 2.25.1 >>> >> >
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 14ef13b7b421..6d599f1e846b 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -91,6 +91,18 @@ struct xe_bo { /** @vram_userfault_link: Link into @mem_access.vram_userfault.list */ struct list_head vram_userfault_link; + + /** + * @pat_index: The pat index requested when bind this BO + */ + u16 pat_index; + + /** + * @has_sealed_pat_index: The pat index is sealed because this BO is + * pinned as framebuffer. This is to prevent flipping compression + * on/off from framebuffers while in use. + */ + bool has_sealed_pat_index; }; #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index de1030a47588..c72cb75d993c 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1208,10 +1208,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue struct dma_fence *fence; struct invalidation_fence *ifence = NULL; struct xe_range_fence *rfence; + struct xe_bo *bo = xe_vma_bo(vma); int err; bind_pt_update.locked = false; - xe_bo_assert_held(xe_vma_bo(vma)); + xe_bo_assert_held(bo); xe_vm_assert_held(vm); vm_dbg(&xe_vma_vm(vma)->xe->drm, @@ -1252,8 +1253,21 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue return ERR_PTR(-ENOMEM); } + /* + * On Xe2 BO which was pinned as framebuffer before with different + * PAT index cannot be bound with different PAT index. This is + * to prevent switching CCS on/off from framebuffers on the fly + * with Xe2. + */ + if (bo) { + if (bo->has_sealed_pat_index && bo->pat_index != vma->pat_index) + return ERR_PTR(-EINVAL); + + bo->pat_index = vma->pat_index; + } + fence = xe_migrate_update_pgtables(tile->migrate, - vm, xe_vma_bo(vma), q, + vm, bo, q, entries, num_entries, syncs, num_syncs, &bind_pt_update.base); @@ -1287,8 +1301,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, + if (!xe_vma_has_no_bo(vma) && !bo->vm) + dma_resv_add_fence(bo->ttm.base.resv, fence, DMA_RESV_USAGE_BOOKKEEP); xe_pt_commit_bind(vma, entries, num_entries, rebind, bind_pt_update.locked ? &deferred : NULL);
Add BO bind time pat index member to xe_bo structure and store pat index from xe_vma to xe_bo. Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com> --- drivers/gpu/drm/xe/xe_bo_types.h | 12 ++++++++++++ drivers/gpu/drm/xe/xe_pt.c | 22 ++++++++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-)