Message ID | 20240716152104.377039-2-Arunpravin.PaneerSelvam@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v5,1/2] drm/buddy: Add start address support to trim function | expand |
AMDGPU_GEM_CREATE_GFX12_DCC is set on 90% of all memory allocations, and almost all of them are not displayable. Shouldn't we use a different way to indicate that we need a non-power-of-two alignment, such as looking at the alignment field directly? Marek On Tue, Jul 16, 2024, 11:45 Arunpravin Paneer Selvam < Arunpravin.PaneerSelvam@amd.com> wrote: > Add address alignment support to the DCC VRAM buffers. > > v2: > - adjust size based on the max_texture_channel_caches values > only for GFX12 DCC buffers. > - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only > for DCC buffers. > - roundup non power of two DCC buffer adjusted size to nearest > power of two number as the buddy allocator does not support non > power of two alignments. This applies only to the contiguous > DCC buffers. > > v3:(Alex) > - rewrite the max texture channel caches comparison code in an > algorithmic way to determine the alignment size. > > v4:(Alex) > - Move the logic from amdgpu_vram_mgr_dcc_alignment() to gmc_v12_0.c > and add a new gmc func callback for dcc alignment. If the callback > is non-NULL, call it to get the alignment, otherwise, use the default. > > v5:(Alex) > - Set the Alignment to a default value if the callback doesn't exist. > - Add the callback to amdgpu_gmc_funcs. > > v6: > - Fix checkpatch error reported by Intel CI. > > Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com> > Acked-by: Alex Deucher <alexander.deucher@amd.com> > Acked-by: Christian König <christian.koenig@amd.com> > Reviewed-by: Frank Min <Frank.Min@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 36 ++++++++++++++++++-- > drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 15 ++++++++ > 3 files changed, 55 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > index febca3130497..654d0548a3f8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs { > uint64_t addr, uint64_t *flags); > /* get the amount of memory used by the vbios for pre-OS console */ > unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); > + /* get the DCC buffer alignment */ > + u64 (*get_dcc_alignment)(struct amdgpu_device *adev); > > enum amdgpu_memory_partition (*query_mem_partition_mode)( > struct amdgpu_device *adev); > @@ -363,6 +365,10 @@ struct amdgpu_gmc { > (adev)->gmc.gmc_funcs->override_vm_pte_flags \ > ((adev), (vm), (addr), (pte_flags)) > #define amdgpu_gmc_get_vbios_fb_size(adev) > (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) > +#define amdgpu_gmc_get_dcc_alignment(_adev) ({ \ > + typeof(_adev) (adev) = (_adev); \ > + ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev))); \ > +}) > > /** > * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through > the BAR > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > index f91cc149d06c..aa9dca12371c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > @@ -512,6 +512,16 @@ static int amdgpu_vram_mgr_new(struct > ttm_resource_manager *man, > vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; > > remaining_size = (u64)vres->base.size; > + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && > + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) { > + u64 adjust_size; > + > + if (adev->gmc.gmc_funcs->get_dcc_alignment) { > + adjust_size = amdgpu_gmc_get_dcc_alignment(adev); > + remaining_size = roundup_pow_of_two(remaining_size > + adjust_size); > + vres->flags |= DRM_BUDDY_TRIM_DISABLE; > + } > + } > > mutex_lock(&mgr->lock); > while (remaining_size) { > @@ -521,8 +531,12 @@ static int amdgpu_vram_mgr_new(struct > ttm_resource_manager *man, > min_block_size = mgr->default_page_size; > > size = remaining_size; > - if ((size >= (u64)pages_per_block << PAGE_SHIFT) && > - !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) > + > + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && > + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) > + min_block_size = size; > + else if ((size >= (u64)pages_per_block << PAGE_SHIFT) && > + !(size & (((u64)pages_per_block << PAGE_SHIFT) - > 1))) > min_block_size = (u64)pages_per_block << > PAGE_SHIFT; > > BUG_ON(min_block_size < mm->chunk_size); > @@ -553,6 +567,24 @@ static int amdgpu_vram_mgr_new(struct > ttm_resource_manager *man, > } > mutex_unlock(&mgr->lock); > > + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && > + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) { > + struct drm_buddy_block *dcc_block; > + u64 dcc_start, alignment; > + > + dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks); > + dcc_start = amdgpu_vram_mgr_block_start(dcc_block); > + > + if (adev->gmc.gmc_funcs->get_dcc_alignment) { > + alignment = amdgpu_gmc_get_dcc_alignment(adev); > + /* Adjust the start address for DCC buffers only */ > + dcc_start = roundup(dcc_start, alignment); > + drm_buddy_block_trim(mm, &dcc_start, > + (u64)vres->base.size, > + &vres->blocks); > + } > + } > + > vres->base.start = 0; > size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), > vres->base.size); > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > index fd3ac483760e..4259edcdec8a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > @@ -542,6 +542,20 @@ static unsigned gmc_v12_0_get_vbios_fb_size(struct > amdgpu_device *adev) > return 0; > } > > +static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev) > +{ > + u64 max_tex_channel_caches, alignment; > + > + max_tex_channel_caches = > adev->gfx.config.max_texture_channel_caches; > + if (is_power_of_2(max_tex_channel_caches)) > + alignment = (max_tex_channel_caches / SZ_4) * > max_tex_channel_caches; > + else > + alignment = roundup_pow_of_two(max_tex_channel_caches) * > + max_tex_channel_caches; > + > + return (u64)alignment * SZ_1K; > +} > + > static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { > .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb, > .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, > @@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs > gmc_v12_0_gmc_funcs = { > .get_vm_pde = gmc_v12_0_get_vm_pde, > .get_vm_pte = gmc_v12_0_get_vm_pte, > .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, > + .get_dcc_alignment = gmc_v12_0_get_dcc_alignment, > }; > > static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev) > -- > 2.25.1 > >
Well that approach was discussed before and seemed to be to complicated. But I totally agree that the AMDGPU_GEM_CREATE_GFX12_DCC flag is a bad idea. This isn't anything userspace should need to specify in the first place. All we need is a hardware workaround which kicks in all the time while pinning BOs for this specific hw generation and texture channel configuration. Please remove the AMDGPU_GEM_CREATE_GFX12_DCC flag again if possible or specify why it is actually necessary? Regards, Christian. Am 17.07.24 um 05:44 schrieb Marek Olšák: > AMDGPU_GEM_CREATE_GFX12_DCC is set on 90% of all memory allocations, > and almost all of them are not displayable. Shouldn't we use a > different way to indicate that we need a non-power-of-two alignment, > such as looking at the alignment field directly? > > Marek > > On Tue, Jul 16, 2024, 11:45 Arunpravin Paneer Selvam > <Arunpravin.PaneerSelvam@amd.com> wrote: > > Add address alignment support to the DCC VRAM buffers. > > v2: > - adjust size based on the max_texture_channel_caches values > only for GFX12 DCC buffers. > - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only > for DCC buffers. > - roundup non power of two DCC buffer adjusted size to nearest > power of two number as the buddy allocator does not support non > power of two alignments. This applies only to the contiguous > DCC buffers. > > v3:(Alex) > - rewrite the max texture channel caches comparison code in an > algorithmic way to determine the alignment size. > > v4:(Alex) > - Move the logic from amdgpu_vram_mgr_dcc_alignment() to gmc_v12_0.c > and add a new gmc func callback for dcc alignment. If the callback > is non-NULL, call it to get the alignment, otherwise, use the > default. > > v5:(Alex) > - Set the Alignment to a default value if the callback doesn't > exist. > - Add the callback to amdgpu_gmc_funcs. > > v6: > - Fix checkpatch error reported by Intel CI. > > Signed-off-by: Arunpravin Paneer Selvam > <Arunpravin.PaneerSelvam@amd.com> > Acked-by: Alex Deucher <alexander.deucher@amd.com> > Acked-by: Christian König <christian.koenig@amd.com> > Reviewed-by: Frank Min <Frank.Min@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 36 > ++++++++++++++++++-- > drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 15 ++++++++ > 3 files changed, 55 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > index febca3130497..654d0548a3f8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs { > uint64_t addr, uint64_t *flags); > /* get the amount of memory used by the vbios for pre-OS > console */ > unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); > + /* get the DCC buffer alignment */ > + u64 (*get_dcc_alignment)(struct amdgpu_device *adev); > > enum amdgpu_memory_partition (*query_mem_partition_mode)( > struct amdgpu_device *adev); > @@ -363,6 +365,10 @@ struct amdgpu_gmc { > (adev)->gmc.gmc_funcs->override_vm_pte_flags \ > ((adev), (vm), (addr), (pte_flags)) > #define amdgpu_gmc_get_vbios_fb_size(adev) > (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) > +#define amdgpu_gmc_get_dcc_alignment(_adev) ({ \ > + typeof(_adev) (adev) = (_adev); \ > + ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev))); \ > +}) > > /** > * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible > through the BAR > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > index f91cc149d06c..aa9dca12371c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > @@ -512,6 +512,16 @@ static int amdgpu_vram_mgr_new(struct > ttm_resource_manager *man, > vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; > > remaining_size = (u64)vres->base.size; > + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && > + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) { > + u64 adjust_size; > + > + if (adev->gmc.gmc_funcs->get_dcc_alignment) { > + adjust_size = > amdgpu_gmc_get_dcc_alignment(adev); > + remaining_size = > roundup_pow_of_two(remaining_size + adjust_size); > + vres->flags |= DRM_BUDDY_TRIM_DISABLE; > + } > + } > > mutex_lock(&mgr->lock); > while (remaining_size) { > @@ -521,8 +531,12 @@ static int amdgpu_vram_mgr_new(struct > ttm_resource_manager *man, > min_block_size = mgr->default_page_size; > > size = remaining_size; > - if ((size >= (u64)pages_per_block << PAGE_SHIFT) && > - !(size & (((u64)pages_per_block << PAGE_SHIFT) > - 1))) > + > + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && > + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) > + min_block_size = size; > + else if ((size >= (u64)pages_per_block << > PAGE_SHIFT) && > + !(size & (((u64)pages_per_block << > PAGE_SHIFT) - 1))) > min_block_size = (u64)pages_per_block << > PAGE_SHIFT; > > BUG_ON(min_block_size < mm->chunk_size); > @@ -553,6 +567,24 @@ static int amdgpu_vram_mgr_new(struct > ttm_resource_manager *man, > } > mutex_unlock(&mgr->lock); > > + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && > + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) { > + struct drm_buddy_block *dcc_block; > + u64 dcc_start, alignment; > + > + dcc_block = > amdgpu_vram_mgr_first_block(&vres->blocks); > + dcc_start = amdgpu_vram_mgr_block_start(dcc_block); > + > + if (adev->gmc.gmc_funcs->get_dcc_alignment) { > + alignment = > amdgpu_gmc_get_dcc_alignment(adev); > + /* Adjust the start address for DCC > buffers only */ > + dcc_start = roundup(dcc_start, alignment); > + drm_buddy_block_trim(mm, &dcc_start, > + (u64)vres->base.size, > + &vres->blocks); > + } > + } > + > vres->base.start = 0; > size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), > vres->base.size); > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > index fd3ac483760e..4259edcdec8a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > @@ -542,6 +542,20 @@ static unsigned > gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) > return 0; > } > > +static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev) > +{ > + u64 max_tex_channel_caches, alignment; > + > + max_tex_channel_caches = > adev->gfx.config.max_texture_channel_caches; > + if (is_power_of_2(max_tex_channel_caches)) > + alignment = (max_tex_channel_caches / SZ_4) * > max_tex_channel_caches; > + else > + alignment = > roundup_pow_of_two(max_tex_channel_caches) * > + max_tex_channel_caches; > + > + return (u64)alignment * SZ_1K; > +} > + > static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { > .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb, > .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, > @@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs > gmc_v12_0_gmc_funcs = { > .get_vm_pde = gmc_v12_0_get_vm_pde, > .get_vm_pte = gmc_v12_0_get_vm_pte, > .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, > + .get_dcc_alignment = gmc_v12_0_get_dcc_alignment, > }; > > static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev) > -- > 2.25.1 >
Hi Christian, Can we use the below combination flags to kick in hardware workaround while pinning BO's for this specific hw generation. if (place->flags & TTM_PL_FLAG_CONTIGUOUS) && (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) { } Regards, Arun. On 7/17/2024 2:38 PM, Christian König wrote: > Well that approach was discussed before and seemed to be to complicated. > > But I totally agree that the AMDGPU_GEM_CREATE_GFX12_DCC flag is a bad > idea. This isn't anything userspace should need to specify in the > first place. > > All we need is a hardware workaround which kicks in all the time while > pinning BOs for this specific hw generation and texture channel > configuration. > > Please remove the AMDGPU_GEM_CREATE_GFX12_DCC flag again if possible > or specify why it is actually necessary? > > Regards, > Christian. > > Am 17.07.24 um 05:44 schrieb Marek Olšák: >> AMDGPU_GEM_CREATE_GFX12_DCC is set on 90% of all memory allocations, >> and almost all of them are not displayable. Shouldn't we use a >> different way to indicate that we need a non-power-of-two alignment, >> such as looking at the alignment field directly? >> >> Marek >> >> On Tue, Jul 16, 2024, 11:45 Arunpravin Paneer Selvam >> <Arunpravin.PaneerSelvam@amd.com> wrote: >> >> Add address alignment support to the DCC VRAM buffers. >> >> v2: >> - adjust size based on the max_texture_channel_caches values >> only for GFX12 DCC buffers. >> - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only >> for DCC buffers. >> - roundup non power of two DCC buffer adjusted size to nearest >> power of two number as the buddy allocator does not support non >> power of two alignments. This applies only to the contiguous >> DCC buffers. >> >> v3:(Alex) >> - rewrite the max texture channel caches comparison code in an >> algorithmic way to determine the alignment size. >> >> v4:(Alex) >> - Move the logic from amdgpu_vram_mgr_dcc_alignment() to >> gmc_v12_0.c >> and add a new gmc func callback for dcc alignment. If the >> callback >> is non-NULL, call it to get the alignment, otherwise, use the >> default. >> >> v5:(Alex) >> - Set the Alignment to a default value if the callback doesn't >> exist. >> - Add the callback to amdgpu_gmc_funcs. >> >> v6: >> - Fix checkpatch error reported by Intel CI. >> >> Signed-off-by: Arunpravin Paneer Selvam >> <Arunpravin.PaneerSelvam@amd.com> >> Acked-by: Alex Deucher <alexander.deucher@amd.com> >> Acked-by: Christian König <christian.koenig@amd.com> >> Reviewed-by: Frank Min <Frank.Min@amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 36 >> ++++++++++++++++++-- >> drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 15 ++++++++ >> 3 files changed, 55 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> index febca3130497..654d0548a3f8 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs { >> uint64_t addr, uint64_t >> *flags); >> /* get the amount of memory used by the vbios for pre-OS >> console */ >> unsigned int (*get_vbios_fb_size)(struct amdgpu_device >> *adev); >> + /* get the DCC buffer alignment */ >> + u64 (*get_dcc_alignment)(struct amdgpu_device *adev); >> >> enum amdgpu_memory_partition (*query_mem_partition_mode)( >> struct amdgpu_device *adev); >> @@ -363,6 +365,10 @@ struct amdgpu_gmc { >> (adev)->gmc.gmc_funcs->override_vm_pte_flags \ >> ((adev), (vm), (addr), (pte_flags)) >> #define amdgpu_gmc_get_vbios_fb_size(adev) >> (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) >> +#define amdgpu_gmc_get_dcc_alignment(_adev) ({ \ >> + typeof(_adev) (adev) = (_adev); \ >> + ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev))); \ >> +}) >> >> /** >> * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible >> through the BAR >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c >> index f91cc149d06c..aa9dca12371c 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c >> @@ -512,6 +512,16 @@ static int amdgpu_vram_mgr_new(struct >> ttm_resource_manager *man, >> vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; >> >> remaining_size = (u64)vres->base.size; >> + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && >> + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) { >> + u64 adjust_size; >> + >> + if (adev->gmc.gmc_funcs->get_dcc_alignment) { >> + adjust_size = >> amdgpu_gmc_get_dcc_alignment(adev); >> + remaining_size = >> roundup_pow_of_two(remaining_size + adjust_size); >> + vres->flags |= DRM_BUDDY_TRIM_DISABLE; >> + } >> + } >> >> mutex_lock(&mgr->lock); >> while (remaining_size) { >> @@ -521,8 +531,12 @@ static int amdgpu_vram_mgr_new(struct >> ttm_resource_manager *man, >> min_block_size = mgr->default_page_size; >> >> size = remaining_size; >> - if ((size >= (u64)pages_per_block << PAGE_SHIFT) && >> - !(size & (((u64)pages_per_block << >> PAGE_SHIFT) - 1))) >> + >> + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && >> + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) >> + min_block_size = size; >> + else if ((size >= (u64)pages_per_block << >> PAGE_SHIFT) && >> + !(size & (((u64)pages_per_block << >> PAGE_SHIFT) - 1))) >> min_block_size = (u64)pages_per_block << >> PAGE_SHIFT; >> >> BUG_ON(min_block_size < mm->chunk_size); >> @@ -553,6 +567,24 @@ static int amdgpu_vram_mgr_new(struct >> ttm_resource_manager *man, >> } >> mutex_unlock(&mgr->lock); >> >> + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && >> + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) { >> + struct drm_buddy_block *dcc_block; >> + u64 dcc_start, alignment; >> + >> + dcc_block = >> amdgpu_vram_mgr_first_block(&vres->blocks); >> + dcc_start = amdgpu_vram_mgr_block_start(dcc_block); >> + >> + if (adev->gmc.gmc_funcs->get_dcc_alignment) { >> + alignment = >> amdgpu_gmc_get_dcc_alignment(adev); >> + /* Adjust the start address for DCC >> buffers only */ >> + dcc_start = roundup(dcc_start, alignment); >> + drm_buddy_block_trim(mm, &dcc_start, >> + (u64)vres->base.size, >> + &vres->blocks); >> + } >> + } >> + >> vres->base.start = 0; >> size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), >> vres->base.size); >> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c >> index fd3ac483760e..4259edcdec8a 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c >> @@ -542,6 +542,20 @@ static unsigned >> gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) >> return 0; >> } >> >> +static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev) >> +{ >> + u64 max_tex_channel_caches, alignment; >> + >> + max_tex_channel_caches = >> adev->gfx.config.max_texture_channel_caches; >> + if (is_power_of_2(max_tex_channel_caches)) >> + alignment = (max_tex_channel_caches / SZ_4) * >> max_tex_channel_caches; >> + else >> + alignment = >> roundup_pow_of_two(max_tex_channel_caches) * >> + max_tex_channel_caches; >> + >> + return (u64)alignment * SZ_1K; >> +} >> + >> static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { >> .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb, >> .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, >> @@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs >> gmc_v12_0_gmc_funcs = { >> .get_vm_pde = gmc_v12_0_get_vm_pde, >> .get_vm_pte = gmc_v12_0_get_vm_pte, >> .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, >> + .get_dcc_alignment = gmc_v12_0_get_dcc_alignment, >> }; >> >> static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev) >> -- >> 2.25.1 >> >
As far as I know, yes. Regards, Christian. Am 17.07.24 um 16:38 schrieb Paneer Selvam, Arunpravin: > Hi Christian, > > Can we use the below combination flags to kick in hardware workaround > while pinning BO's for this specific hw generation. > > if (place->flags & TTM_PL_FLAG_CONTIGUOUS) && > (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) || > amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) { > } > > Regards, > Arun. > > On 7/17/2024 2:38 PM, Christian König wrote: >> Well that approach was discussed before and seemed to be to complicated. >> >> But I totally agree that the AMDGPU_GEM_CREATE_GFX12_DCC flag is a >> bad idea. This isn't anything userspace should need to specify in the >> first place. >> >> All we need is a hardware workaround which kicks in all the time >> while pinning BOs for this specific hw generation and texture channel >> configuration. >> >> Please remove the AMDGPU_GEM_CREATE_GFX12_DCC flag again if possible >> or specify why it is actually necessary? >> >> Regards, >> Christian. >> >> Am 17.07.24 um 05:44 schrieb Marek Olšák: >>> AMDGPU_GEM_CREATE_GFX12_DCC is set on 90% of all memory allocations, >>> and almost all of them are not displayable. Shouldn't we use a >>> different way to indicate that we need a non-power-of-two alignment, >>> such as looking at the alignment field directly? >>> >>> Marek >>> >>> On Tue, Jul 16, 2024, 11:45 Arunpravin Paneer Selvam >>> <Arunpravin.PaneerSelvam@amd.com> wrote: >>> >>> Add address alignment support to the DCC VRAM buffers. >>> >>> v2: >>> - adjust size based on the max_texture_channel_caches values >>> only for GFX12 DCC buffers. >>> - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only >>> for DCC buffers. >>> - roundup non power of two DCC buffer adjusted size to nearest >>> power of two number as the buddy allocator does not support non >>> power of two alignments. This applies only to the contiguous >>> DCC buffers. >>> >>> v3:(Alex) >>> - rewrite the max texture channel caches comparison code in an >>> algorithmic way to determine the alignment size. >>> >>> v4:(Alex) >>> - Move the logic from amdgpu_vram_mgr_dcc_alignment() to >>> gmc_v12_0.c >>> and add a new gmc func callback for dcc alignment. If the >>> callback >>> is non-NULL, call it to get the alignment, otherwise, use >>> the default. >>> >>> v5:(Alex) >>> - Set the Alignment to a default value if the callback doesn't >>> exist. >>> - Add the callback to amdgpu_gmc_funcs. >>> >>> v6: >>> - Fix checkpatch error reported by Intel CI. >>> >>> Signed-off-by: Arunpravin Paneer Selvam >>> <Arunpravin.PaneerSelvam@amd.com> >>> Acked-by: Alex Deucher <alexander.deucher@amd.com> >>> Acked-by: Christian König <christian.koenig@amd.com> >>> Reviewed-by: Frank Min <Frank.Min@amd.com> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++++ >>> drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 36 >>> ++++++++++++++++++-- >>> drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 15 ++++++++ >>> 3 files changed, 55 insertions(+), 2 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >>> index febca3130497..654d0548a3f8 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >>> @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs { >>> uint64_t addr, uint64_t >>> *flags); >>> /* get the amount of memory used by the vbios for pre-OS >>> console */ >>> unsigned int (*get_vbios_fb_size)(struct amdgpu_device >>> *adev); >>> + /* get the DCC buffer alignment */ >>> + u64 (*get_dcc_alignment)(struct amdgpu_device *adev); >>> >>> enum amdgpu_memory_partition (*query_mem_partition_mode)( >>> struct amdgpu_device *adev); >>> @@ -363,6 +365,10 @@ struct amdgpu_gmc { >>> (adev)->gmc.gmc_funcs->override_vm_pte_flags \ >>> ((adev), (vm), (addr), (pte_flags)) >>> #define amdgpu_gmc_get_vbios_fb_size(adev) >>> (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) >>> +#define amdgpu_gmc_get_dcc_alignment(_adev) ({ \ >>> + typeof(_adev) (adev) = (_adev); \ >>> + ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev))); \ >>> +}) >>> >>> /** >>> * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible >>> through the BAR >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c >>> index f91cc149d06c..aa9dca12371c 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c >>> @@ -512,6 +512,16 @@ static int amdgpu_vram_mgr_new(struct >>> ttm_resource_manager *man, >>> vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; >>> >>> remaining_size = (u64)vres->base.size; >>> + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && >>> + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) { >>> + u64 adjust_size; >>> + >>> + if (adev->gmc.gmc_funcs->get_dcc_alignment) { >>> + adjust_size = >>> amdgpu_gmc_get_dcc_alignment(adev); >>> + remaining_size = >>> roundup_pow_of_two(remaining_size + adjust_size); >>> + vres->flags |= DRM_BUDDY_TRIM_DISABLE; >>> + } >>> + } >>> >>> mutex_lock(&mgr->lock); >>> while (remaining_size) { >>> @@ -521,8 +531,12 @@ static int amdgpu_vram_mgr_new(struct >>> ttm_resource_manager *man, >>> min_block_size = mgr->default_page_size; >>> >>> size = remaining_size; >>> - if ((size >= (u64)pages_per_block << PAGE_SHIFT) && >>> - !(size & (((u64)pages_per_block << >>> PAGE_SHIFT) - 1))) >>> + >>> + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && >>> + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) >>> + min_block_size = size; >>> + else if ((size >= (u64)pages_per_block << >>> PAGE_SHIFT) && >>> + !(size & (((u64)pages_per_block << >>> PAGE_SHIFT) - 1))) >>> min_block_size = (u64)pages_per_block << >>> PAGE_SHIFT; >>> >>> BUG_ON(min_block_size < mm->chunk_size); >>> @@ -553,6 +567,24 @@ static int amdgpu_vram_mgr_new(struct >>> ttm_resource_manager *man, >>> } >>> mutex_unlock(&mgr->lock); >>> >>> + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && >>> + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) { >>> + struct drm_buddy_block *dcc_block; >>> + u64 dcc_start, alignment; >>> + >>> + dcc_block = >>> amdgpu_vram_mgr_first_block(&vres->blocks); >>> + dcc_start = amdgpu_vram_mgr_block_start(dcc_block); >>> + >>> + if (adev->gmc.gmc_funcs->get_dcc_alignment) { >>> + alignment = >>> amdgpu_gmc_get_dcc_alignment(adev); >>> + /* Adjust the start address for DCC >>> buffers only */ >>> + dcc_start = roundup(dcc_start, alignment); >>> + drm_buddy_block_trim(mm, &dcc_start, >>> + (u64)vres->base.size, >>> + &vres->blocks); >>> + } >>> + } >>> + >>> vres->base.start = 0; >>> size = max_t(u64, >>> amdgpu_vram_mgr_blocks_size(&vres->blocks), >>> vres->base.size); >>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c >>> b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c >>> index fd3ac483760e..4259edcdec8a 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c >>> @@ -542,6 +542,20 @@ static unsigned >>> gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) >>> return 0; >>> } >>> >>> +static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev) >>> +{ >>> + u64 max_tex_channel_caches, alignment; >>> + >>> + max_tex_channel_caches = >>> adev->gfx.config.max_texture_channel_caches; >>> + if (is_power_of_2(max_tex_channel_caches)) >>> + alignment = (max_tex_channel_caches / SZ_4) * >>> max_tex_channel_caches; >>> + else >>> + alignment = >>> roundup_pow_of_two(max_tex_channel_caches) * >>> + max_tex_channel_caches; >>> + >>> + return (u64)alignment * SZ_1K; >>> +} >>> + >>> static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { >>> .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb, >>> .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, >>> @@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs >>> gmc_v12_0_gmc_funcs = { >>> .get_vm_pde = gmc_v12_0_get_vm_pde, >>> .get_vm_pte = gmc_v12_0_get_vm_pte, >>> .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, >>> + .get_dcc_alignment = gmc_v12_0_get_dcc_alignment, >>> }; >>> >>> static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev) >>> -- >>> 2.25.1 >>> >> >
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index febca3130497..654d0548a3f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs { uint64_t addr, uint64_t *flags); /* get the amount of memory used by the vbios for pre-OS console */ unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); + /* get the DCC buffer alignment */ + u64 (*get_dcc_alignment)(struct amdgpu_device *adev); enum amdgpu_memory_partition (*query_mem_partition_mode)( struct amdgpu_device *adev); @@ -363,6 +365,10 @@ struct amdgpu_gmc { (adev)->gmc.gmc_funcs->override_vm_pte_flags \ ((adev), (vm), (addr), (pte_flags)) #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) +#define amdgpu_gmc_get_dcc_alignment(_adev) ({ \ + typeof(_adev) (adev) = (_adev); \ + ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev))); \ +}) /** * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index f91cc149d06c..aa9dca12371c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -512,6 +512,16 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; remaining_size = (u64)vres->base.size; + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) { + u64 adjust_size; + + if (adev->gmc.gmc_funcs->get_dcc_alignment) { + adjust_size = amdgpu_gmc_get_dcc_alignment(adev); + remaining_size = roundup_pow_of_two(remaining_size + adjust_size); + vres->flags |= DRM_BUDDY_TRIM_DISABLE; + } + } mutex_lock(&mgr->lock); while (remaining_size) { @@ -521,8 +531,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, min_block_size = mgr->default_page_size; size = remaining_size; - if ((size >= (u64)pages_per_block << PAGE_SHIFT) && - !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) + + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) + min_block_size = size; + else if ((size >= (u64)pages_per_block << PAGE_SHIFT) && + !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) min_block_size = (u64)pages_per_block << PAGE_SHIFT; BUG_ON(min_block_size < mm->chunk_size); @@ -553,6 +567,24 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, } mutex_unlock(&mgr->lock); + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) { + struct drm_buddy_block *dcc_block; + u64 dcc_start, alignment; + + dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks); + dcc_start = amdgpu_vram_mgr_block_start(dcc_block); + + if (adev->gmc.gmc_funcs->get_dcc_alignment) { + alignment = amdgpu_gmc_get_dcc_alignment(adev); + /* Adjust the start address for DCC buffers only */ + dcc_start = roundup(dcc_start, alignment); + drm_buddy_block_trim(mm, &dcc_start, + (u64)vres->base.size, + &vres->blocks); + } + } + vres->base.start = 0; size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), vres->base.size); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index fd3ac483760e..4259edcdec8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -542,6 +542,20 @@ static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) return 0; } +static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev) +{ + u64 max_tex_channel_caches, alignment; + + max_tex_channel_caches = adev->gfx.config.max_texture_channel_caches; + if (is_power_of_2(max_tex_channel_caches)) + alignment = (max_tex_channel_caches / SZ_4) * max_tex_channel_caches; + else + alignment = roundup_pow_of_two(max_tex_channel_caches) * + max_tex_channel_caches; + + return (u64)alignment * SZ_1K; +} + static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb, .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, @@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .get_vm_pde = gmc_v12_0_get_vm_pde, .get_vm_pte = gmc_v12_0_get_vm_pte, .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, + .get_dcc_alignment = gmc_v12_0_get_dcc_alignment, }; static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)