Message ID | 20230404200650.11043-3-thomas.hellstrom@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/ttm: Small fixes / cleanups in prep for shrinking | expand |
On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote: > When swapping out, we will split multi-order pages both in order to > move them to the swap-cache and to be able to return memory to the > swap cache as soon as possible on a page-by-page basis. > Reduce the page max order to the system PMD size, as we can then be nicer > to the system and avoid splitting gigantic pages. > > Looking forward to when we might be able to swap out PMD size folios > without splitting, this will also be a benefit. > > v2: > - Include all orders up to the PMD size (Christian König) > v3: > - Avoid compilation errors for architectures with special PFN_SHIFTs > > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> > Reviewed-by: Christian König <christian.koenig@amd.com> Apparently this fails on ppc build testing. Please supply build fix asap (or I guess we need to revert). I'm kinda not clear why this only showed up when I merged the drm-misc-next pr into drm-next ... -Daniel > --- > drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++----------- > 1 file changed, 19 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c > index dfce896c4bae..18c342a919a2 100644 > --- a/drivers/gpu/drm/ttm/ttm_pool.c > +++ b/drivers/gpu/drm/ttm/ttm_pool.c > @@ -47,6 +47,11 @@ > > #include "ttm_module.h" > > +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT) > +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1) > +/* Some architectures have a weird PMD_SHIFT */ > +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER) > + > /** > * struct ttm_pool_dma - Helper object for coherent DMA mappings > * > @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644); > > static atomic_long_t allocated_pages; > > -static struct ttm_pool_type global_write_combined[MAX_ORDER]; > -static struct ttm_pool_type global_uncached[MAX_ORDER]; > +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER]; > +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER]; > > -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; > -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; > +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER]; > +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER]; > > static spinlock_t shrinker_lock; > static struct list_head shrinker_list; > @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, > else > gfp_flags |= GFP_HIGHUSER; > > - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); > + for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages)); > num_pages; > order = min_t(unsigned int, order, __fls(num_pages))) { > struct ttm_pool_type *pt; > @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, > > if (use_dma_alloc) { > for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) > - for (j = 0; j < MAX_ORDER; ++j) > + for (j = 0; j < TTM_DIM_ORDER; ++j) > ttm_pool_type_init(&pool->caching[i].orders[j], > pool, i, j); > } > @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool) > > if (pool->use_dma_alloc) { > for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) > - for (j = 0; j < MAX_ORDER; ++j) > + for (j = 0; j < TTM_DIM_ORDER; ++j) > ttm_pool_type_fini(&pool->caching[i].orders[j]); > } > > @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m) > unsigned int i; > > seq_puts(m, "\t "); > - for (i = 0; i < MAX_ORDER; ++i) > + for (i = 0; i < TTM_DIM_ORDER; ++i) > seq_printf(m, " ---%2u---", i); > seq_puts(m, "\n"); > } > @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, > { > unsigned int i; > > - for (i = 0; i < MAX_ORDER; ++i) > + for (i = 0; i < TTM_DIM_ORDER; ++i) > seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); > seq_puts(m, "\n"); > } > @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages) > { > unsigned int i; > > + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER); > + BUILD_BUG_ON(TTM_DIM_ORDER < 1); > + > if (!page_pool_size) > page_pool_size = num_pages; > > spin_lock_init(&shrinker_lock); > INIT_LIST_HEAD(&shrinker_list); > > - for (i = 0; i < MAX_ORDER; ++i) { > + for (i = 0; i < TTM_DIM_ORDER; ++i) { > ttm_pool_type_init(&global_write_combined[i], NULL, > ttm_write_combined, i); > ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); > @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void) > { > unsigned int i; > > - for (i = 0; i < MAX_ORDER; ++i) { > + for (i = 0; i < TTM_DIM_ORDER; ++i) { > ttm_pool_type_fini(&global_write_combined[i]); > ttm_pool_type_fini(&global_uncached[i]); > > -- > 2.39.2 >
Am 11.04.23 um 11:51 schrieb Daniel Vetter: > On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote: >> When swapping out, we will split multi-order pages both in order to >> move them to the swap-cache and to be able to return memory to the >> swap cache as soon as possible on a page-by-page basis. >> Reduce the page max order to the system PMD size, as we can then be nicer >> to the system and avoid splitting gigantic pages. >> >> Looking forward to when we might be able to swap out PMD size folios >> without splitting, this will also be a benefit. >> >> v2: >> - Include all orders up to the PMD size (Christian König) >> v3: >> - Avoid compilation errors for architectures with special PFN_SHIFTs >> >> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> >> Reviewed-by: Christian König <christian.koenig@amd.com> > Apparently this fails on ppc build testing. Please supply build fix asap > (or I guess we need to revert). I'm kinda not clear why this only showed > up when I merged the drm-misc-next pr into drm-next ... I'm really wondering this as well. It looks like PMD_SHIFT isn't a constant on this particular platform. But from what I can find in the upstream 6.2 kernel PMD_SHIFT always seems to be a constant. So how exactly can that here break? Christian. > -Daniel > >> --- >> drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++----------- >> 1 file changed, 19 insertions(+), 11 deletions(-) >> >> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c >> index dfce896c4bae..18c342a919a2 100644 >> --- a/drivers/gpu/drm/ttm/ttm_pool.c >> +++ b/drivers/gpu/drm/ttm/ttm_pool.c >> @@ -47,6 +47,11 @@ >> >> #include "ttm_module.h" >> >> +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT) >> +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1) >> +/* Some architectures have a weird PMD_SHIFT */ >> +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER) >> + >> /** >> * struct ttm_pool_dma - Helper object for coherent DMA mappings >> * >> @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644); >> >> static atomic_long_t allocated_pages; >> >> -static struct ttm_pool_type global_write_combined[MAX_ORDER]; >> -static struct ttm_pool_type global_uncached[MAX_ORDER]; >> +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER]; >> +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER]; >> >> -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; >> -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; >> +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER]; >> +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER]; >> >> static spinlock_t shrinker_lock; >> static struct list_head shrinker_list; >> @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, >> else >> gfp_flags |= GFP_HIGHUSER; >> >> - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); >> + for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages)); >> num_pages; >> order = min_t(unsigned int, order, __fls(num_pages))) { >> struct ttm_pool_type *pt; >> @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, >> >> if (use_dma_alloc) { >> for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) >> - for (j = 0; j < MAX_ORDER; ++j) >> + for (j = 0; j < TTM_DIM_ORDER; ++j) >> ttm_pool_type_init(&pool->caching[i].orders[j], >> pool, i, j); >> } >> @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool) >> >> if (pool->use_dma_alloc) { >> for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) >> - for (j = 0; j < MAX_ORDER; ++j) >> + for (j = 0; j < TTM_DIM_ORDER; ++j) >> ttm_pool_type_fini(&pool->caching[i].orders[j]); >> } >> >> @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m) >> unsigned int i; >> >> seq_puts(m, "\t "); >> - for (i = 0; i < MAX_ORDER; ++i) >> + for (i = 0; i < TTM_DIM_ORDER; ++i) >> seq_printf(m, " ---%2u---", i); >> seq_puts(m, "\n"); >> } >> @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, >> { >> unsigned int i; >> >> - for (i = 0; i < MAX_ORDER; ++i) >> + for (i = 0; i < TTM_DIM_ORDER; ++i) >> seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); >> seq_puts(m, "\n"); >> } >> @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages) >> { >> unsigned int i; >> >> + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER); >> + BUILD_BUG_ON(TTM_DIM_ORDER < 1); >> + >> if (!page_pool_size) >> page_pool_size = num_pages; >> >> spin_lock_init(&shrinker_lock); >> INIT_LIST_HEAD(&shrinker_list); >> >> - for (i = 0; i < MAX_ORDER; ++i) { >> + for (i = 0; i < TTM_DIM_ORDER; ++i) { >> ttm_pool_type_init(&global_write_combined[i], NULL, >> ttm_write_combined, i); >> ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); >> @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void) >> { >> unsigned int i; >> >> - for (i = 0; i < MAX_ORDER; ++i) { >> + for (i = 0; i < TTM_DIM_ORDER; ++i) { >> ttm_pool_type_fini(&global_write_combined[i]); >> ttm_pool_type_fini(&global_uncached[i]); >> >> -- >> 2.39.2 >>
On Tue, Apr 11, 2023 at 02:11:18PM +0200, Christian König wrote: > Am 11.04.23 um 11:51 schrieb Daniel Vetter: > > On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote: > > > When swapping out, we will split multi-order pages both in order to > > > move them to the swap-cache and to be able to return memory to the > > > swap cache as soon as possible on a page-by-page basis. > > > Reduce the page max order to the system PMD size, as we can then be nicer > > > to the system and avoid splitting gigantic pages. > > > > > > Looking forward to when we might be able to swap out PMD size folios > > > without splitting, this will also be a benefit. > > > > > > v2: > > > - Include all orders up to the PMD size (Christian König) > > > v3: > > > - Avoid compilation errors for architectures with special PFN_SHIFTs > > > > > > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> > > > Reviewed-by: Christian König <christian.koenig@amd.com> > > Apparently this fails on ppc build testing. Please supply build fix asap > > (or I guess we need to revert). I'm kinda not clear why this only showed > > up when I merged the drm-misc-next pr into drm-next ... > > I'm really wondering this as well. It looks like PMD_SHIFT isn't a constant > on this particular platform. > > But from what I can find in the upstream 6.2 kernel PMD_SHIFT always seems > to be a constant. > > So how exactly can that here break? There's some in-flight patches to rework MAX_ORDER and other things in linux-next, maybe it's recent? If you check out linux-next then you need to reapply the patch (since sfr reverted it). -Daniel > > Christian. > > > -Daniel > > > > > --- > > > drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++----------- > > > 1 file changed, 19 insertions(+), 11 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c > > > index dfce896c4bae..18c342a919a2 100644 > > > --- a/drivers/gpu/drm/ttm/ttm_pool.c > > > +++ b/drivers/gpu/drm/ttm/ttm_pool.c > > > @@ -47,6 +47,11 @@ > > > #include "ttm_module.h" > > > +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT) > > > +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1) > > > +/* Some architectures have a weird PMD_SHIFT */ > > > +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER) > > > + > > > /** > > > * struct ttm_pool_dma - Helper object for coherent DMA mappings > > > * > > > @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644); > > > static atomic_long_t allocated_pages; > > > -static struct ttm_pool_type global_write_combined[MAX_ORDER]; > > > -static struct ttm_pool_type global_uncached[MAX_ORDER]; > > > +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER]; > > > +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER]; > > > -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; > > > -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; > > > +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER]; > > > +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER]; > > > static spinlock_t shrinker_lock; > > > static struct list_head shrinker_list; > > > @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, > > > else > > > gfp_flags |= GFP_HIGHUSER; > > > - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); > > > + for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages)); > > > num_pages; > > > order = min_t(unsigned int, order, __fls(num_pages))) { > > > struct ttm_pool_type *pt; > > > @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, > > > if (use_dma_alloc) { > > > for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) > > > - for (j = 0; j < MAX_ORDER; ++j) > > > + for (j = 0; j < TTM_DIM_ORDER; ++j) > > > ttm_pool_type_init(&pool->caching[i].orders[j], > > > pool, i, j); > > > } > > > @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool) > > > if (pool->use_dma_alloc) { > > > for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) > > > - for (j = 0; j < MAX_ORDER; ++j) > > > + for (j = 0; j < TTM_DIM_ORDER; ++j) > > > ttm_pool_type_fini(&pool->caching[i].orders[j]); > > > } > > > @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m) > > > unsigned int i; > > > seq_puts(m, "\t "); > > > - for (i = 0; i < MAX_ORDER; ++i) > > > + for (i = 0; i < TTM_DIM_ORDER; ++i) > > > seq_printf(m, " ---%2u---", i); > > > seq_puts(m, "\n"); > > > } > > > @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, > > > { > > > unsigned int i; > > > - for (i = 0; i < MAX_ORDER; ++i) > > > + for (i = 0; i < TTM_DIM_ORDER; ++i) > > > seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); > > > seq_puts(m, "\n"); > > > } > > > @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages) > > > { > > > unsigned int i; > > > + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER); > > > + BUILD_BUG_ON(TTM_DIM_ORDER < 1); > > > + > > > if (!page_pool_size) > > > page_pool_size = num_pages; > > > spin_lock_init(&shrinker_lock); > > > INIT_LIST_HEAD(&shrinker_list); > > > - for (i = 0; i < MAX_ORDER; ++i) { > > > + for (i = 0; i < TTM_DIM_ORDER; ++i) { > > > ttm_pool_type_init(&global_write_combined[i], NULL, > > > ttm_write_combined, i); > > > ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); > > > @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void) > > > { > > > unsigned int i; > > > - for (i = 0; i < MAX_ORDER; ++i) { > > > + for (i = 0; i < TTM_DIM_ORDER; ++i) { > > > ttm_pool_type_fini(&global_write_combined[i]); > > > ttm_pool_type_fini(&global_uncached[i]); > > > -- > > > 2.39.2 > > > >
On Tue, 11 Apr 2023 at 15:45, Daniel Vetter <daniel@ffwll.ch> wrote: > > On Tue, Apr 11, 2023 at 02:11:18PM +0200, Christian König wrote: > > Am 11.04.23 um 11:51 schrieb Daniel Vetter: > > > On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote: > > > > When swapping out, we will split multi-order pages both in order to > > > > move them to the swap-cache and to be able to return memory to the > > > > swap cache as soon as possible on a page-by-page basis. > > > > Reduce the page max order to the system PMD size, as we can then be nicer > > > > to the system and avoid splitting gigantic pages. > > > > > > > > Looking forward to when we might be able to swap out PMD size folios > > > > without splitting, this will also be a benefit. > > > > > > > > v2: > > > > - Include all orders up to the PMD size (Christian König) > > > > v3: > > > > - Avoid compilation errors for architectures with special PFN_SHIFTs > > > > > > > > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> > > > > Reviewed-by: Christian König <christian.koenig@amd.com> > > > Apparently this fails on ppc build testing. Please supply build fix asap > > > (or I guess we need to revert). I'm kinda not clear why this only showed > > > up when I merged the drm-misc-next pr into drm-next ... > > > > I'm really wondering this as well. It looks like PMD_SHIFT isn't a constant > > on this particular platform. > > > > But from what I can find in the upstream 6.2 kernel PMD_SHIFT always seems > > to be a constant. > > > > So how exactly can that here break? > > There's some in-flight patches to rework MAX_ORDER and other things in > linux-next, maybe it's recent? If you check out linux-next then you need > to reapply the patch (since sfr reverted it). So I looked and on ppc64 PMD_SHIFT is defined in terms of PTE_INDEX_SIZE, which is defined (for book3s) in terms of the variable __pte_index_size. This is in 6.3 already and seems pretty old. So revert? Or fixup patch to make this work on ppc? > > > > --- > > > > drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++----------- > > > > 1 file changed, 19 insertions(+), 11 deletions(-) > > > > > > > > diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c > > > > index dfce896c4bae..18c342a919a2 100644 > > > > --- a/drivers/gpu/drm/ttm/ttm_pool.c > > > > +++ b/drivers/gpu/drm/ttm/ttm_pool.c > > > > @@ -47,6 +47,11 @@ > > > > #include "ttm_module.h" > > > > +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT) > > > > +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1) > > > > +/* Some architectures have a weird PMD_SHIFT */ > > > > +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER) > > > > + > > > > /** > > > > * struct ttm_pool_dma - Helper object for coherent DMA mappings > > > > * > > > > @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644); > > > > static atomic_long_t allocated_pages; > > > > -static struct ttm_pool_type global_write_combined[MAX_ORDER]; > > > > -static struct ttm_pool_type global_uncached[MAX_ORDER]; > > > > +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER]; > > > > +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER]; > > > > -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; > > > > -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; > > > > +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER]; > > > > +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER]; > > > > static spinlock_t shrinker_lock; > > > > static struct list_head shrinker_list; > > > > @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, > > > > else > > > > gfp_flags |= GFP_HIGHUSER; > > > > - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); > > > > + for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages)); > > > > num_pages; > > > > order = min_t(unsigned int, order, __fls(num_pages))) { > > > > struct ttm_pool_type *pt; > > > > @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, > > > > if (use_dma_alloc) { > > > > for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) > > > > - for (j = 0; j < MAX_ORDER; ++j) > > > > + for (j = 0; j < TTM_DIM_ORDER; ++j) > > > > ttm_pool_type_init(&pool->caching[i].orders[j], > > > > pool, i, j); > > > > } > > > > @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool) > > > > if (pool->use_dma_alloc) { > > > > for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) > > > > - for (j = 0; j < MAX_ORDER; ++j) > > > > + for (j = 0; j < TTM_DIM_ORDER; ++j) > > > > ttm_pool_type_fini(&pool->caching[i].orders[j]); > > > > } > > > > @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m) > > > > unsigned int i; > > > > seq_puts(m, "\t "); > > > > - for (i = 0; i < MAX_ORDER; ++i) > > > > + for (i = 0; i < TTM_DIM_ORDER; ++i) > > > > seq_printf(m, " ---%2u---", i); > > > > seq_puts(m, "\n"); > > > > } > > > > @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, > > > > { > > > > unsigned int i; > > > > - for (i = 0; i < MAX_ORDER; ++i) > > > > + for (i = 0; i < TTM_DIM_ORDER; ++i) > > > > seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); > > > > seq_puts(m, "\n"); > > > > } > > > > @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages) > > > > { > > > > unsigned int i; > > > > + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER); > > > > + BUILD_BUG_ON(TTM_DIM_ORDER < 1); > > > > + > > > > if (!page_pool_size) > > > > page_pool_size = num_pages; > > > > spin_lock_init(&shrinker_lock); > > > > INIT_LIST_HEAD(&shrinker_list); > > > > - for (i = 0; i < MAX_ORDER; ++i) { > > > > + for (i = 0; i < TTM_DIM_ORDER; ++i) { > > > > ttm_pool_type_init(&global_write_combined[i], NULL, > > > > ttm_write_combined, i); > > > > ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); > > > > @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void) > > > > { > > > > unsigned int i; > > > > - for (i = 0; i < MAX_ORDER; ++i) { > > > > + for (i = 0; i < TTM_DIM_ORDER; ++i) { > > > > ttm_pool_type_fini(&global_write_combined[i]); > > > > ttm_pool_type_fini(&global_uncached[i]); > > > > -- > > > > 2.39.2 > > > > > > > > -- > Daniel Vetter > Software Engineer, Intel Corporation > http://blog.ffwll.ch
Am 12.04.23 um 11:08 schrieb Daniel Vetter: > On Tue, 11 Apr 2023 at 15:45, Daniel Vetter <daniel@ffwll.ch> wrote: >> On Tue, Apr 11, 2023 at 02:11:18PM +0200, Christian König wrote: >>> Am 11.04.23 um 11:51 schrieb Daniel Vetter: >>>> On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote: >>>>> When swapping out, we will split multi-order pages both in order to >>>>> move them to the swap-cache and to be able to return memory to the >>>>> swap cache as soon as possible on a page-by-page basis. >>>>> Reduce the page max order to the system PMD size, as we can then be nicer >>>>> to the system and avoid splitting gigantic pages. >>>>> >>>>> Looking forward to when we might be able to swap out PMD size folios >>>>> without splitting, this will also be a benefit. >>>>> >>>>> v2: >>>>> - Include all orders up to the PMD size (Christian König) >>>>> v3: >>>>> - Avoid compilation errors for architectures with special PFN_SHIFTs >>>>> >>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> >>>>> Reviewed-by: Christian König <christian.koenig@amd.com> >>>> Apparently this fails on ppc build testing. Please supply build fix asap >>>> (or I guess we need to revert). I'm kinda not clear why this only showed >>>> up when I merged the drm-misc-next pr into drm-next ... >>> I'm really wondering this as well. It looks like PMD_SHIFT isn't a constant >>> on this particular platform. >>> >>> But from what I can find in the upstream 6.2 kernel PMD_SHIFT always seems >>> to be a constant. >>> >>> So how exactly can that here break? >> There's some in-flight patches to rework MAX_ORDER and other things in >> linux-next, maybe it's recent? If you check out linux-next then you need >> to reapply the patch (since sfr reverted it). > So I looked and on ppc64 PMD_SHIFT is defined in terms of > PTE_INDEX_SIZE, which is defined (for book3s) in terms of the variable > __pte_index_size. This is in 6.3 already and seems pretty old. Ah! I missed that one, thanks. > So revert? Or fixup patch to make this work on ppc? I think for now just revert or change it so that we check if PMD_SHIFT is a constant. Thomas do you have any quick solution? Christian. > > >>>>> --- >>>>> drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++----------- >>>>> 1 file changed, 19 insertions(+), 11 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c >>>>> index dfce896c4bae..18c342a919a2 100644 >>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c >>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c >>>>> @@ -47,6 +47,11 @@ >>>>> #include "ttm_module.h" >>>>> +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT) >>>>> +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1) >>>>> +/* Some architectures have a weird PMD_SHIFT */ >>>>> +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER) >>>>> + >>>>> /** >>>>> * struct ttm_pool_dma - Helper object for coherent DMA mappings >>>>> * >>>>> @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644); >>>>> static atomic_long_t allocated_pages; >>>>> -static struct ttm_pool_type global_write_combined[MAX_ORDER]; >>>>> -static struct ttm_pool_type global_uncached[MAX_ORDER]; >>>>> +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER]; >>>>> +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER]; >>>>> -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; >>>>> -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; >>>>> +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER]; >>>>> +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER]; >>>>> static spinlock_t shrinker_lock; >>>>> static struct list_head shrinker_list; >>>>> @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, >>>>> else >>>>> gfp_flags |= GFP_HIGHUSER; >>>>> - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); >>>>> + for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages)); >>>>> num_pages; >>>>> order = min_t(unsigned int, order, __fls(num_pages))) { >>>>> struct ttm_pool_type *pt; >>>>> @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, >>>>> if (use_dma_alloc) { >>>>> for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) >>>>> - for (j = 0; j < MAX_ORDER; ++j) >>>>> + for (j = 0; j < TTM_DIM_ORDER; ++j) >>>>> ttm_pool_type_init(&pool->caching[i].orders[j], >>>>> pool, i, j); >>>>> } >>>>> @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool) >>>>> if (pool->use_dma_alloc) { >>>>> for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) >>>>> - for (j = 0; j < MAX_ORDER; ++j) >>>>> + for (j = 0; j < TTM_DIM_ORDER; ++j) >>>>> ttm_pool_type_fini(&pool->caching[i].orders[j]); >>>>> } >>>>> @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m) >>>>> unsigned int i; >>>>> seq_puts(m, "\t "); >>>>> - for (i = 0; i < MAX_ORDER; ++i) >>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) >>>>> seq_printf(m, " ---%2u---", i); >>>>> seq_puts(m, "\n"); >>>>> } >>>>> @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, >>>>> { >>>>> unsigned int i; >>>>> - for (i = 0; i < MAX_ORDER; ++i) >>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) >>>>> seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); >>>>> seq_puts(m, "\n"); >>>>> } >>>>> @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages) >>>>> { >>>>> unsigned int i; >>>>> + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER); >>>>> + BUILD_BUG_ON(TTM_DIM_ORDER < 1); >>>>> + >>>>> if (!page_pool_size) >>>>> page_pool_size = num_pages; >>>>> spin_lock_init(&shrinker_lock); >>>>> INIT_LIST_HEAD(&shrinker_list); >>>>> - for (i = 0; i < MAX_ORDER; ++i) { >>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) { >>>>> ttm_pool_type_init(&global_write_combined[i], NULL, >>>>> ttm_write_combined, i); >>>>> ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); >>>>> @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void) >>>>> { >>>>> unsigned int i; >>>>> - for (i = 0; i < MAX_ORDER; ++i) { >>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) { >>>>> ttm_pool_type_fini(&global_write_combined[i]); >>>>> ttm_pool_type_fini(&global_uncached[i]); >>>>> -- >>>>> 2.39.2 >>>>> >> -- >> Daniel Vetter >> Software Engineer, Intel Corporation >> http://blog.ffwll.ch > >
On Wed, 12 Apr 2023 at 16:18, Christian König <christian.koenig@amd.com> wrote: > > Am 12.04.23 um 11:08 schrieb Daniel Vetter: > > On Tue, 11 Apr 2023 at 15:45, Daniel Vetter <daniel@ffwll.ch> wrote: > >> On Tue, Apr 11, 2023 at 02:11:18PM +0200, Christian König wrote: > >>> Am 11.04.23 um 11:51 schrieb Daniel Vetter: > >>>> On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote: > >>>>> When swapping out, we will split multi-order pages both in order to > >>>>> move them to the swap-cache and to be able to return memory to the > >>>>> swap cache as soon as possible on a page-by-page basis. > >>>>> Reduce the page max order to the system PMD size, as we can then be nicer > >>>>> to the system and avoid splitting gigantic pages. > >>>>> > >>>>> Looking forward to when we might be able to swap out PMD size folios > >>>>> without splitting, this will also be a benefit. > >>>>> > >>>>> v2: > >>>>> - Include all orders up to the PMD size (Christian König) > >>>>> v3: > >>>>> - Avoid compilation errors for architectures with special PFN_SHIFTs > >>>>> > >>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> > >>>>> Reviewed-by: Christian König <christian.koenig@amd.com> > >>>> Apparently this fails on ppc build testing. Please supply build fix asap > >>>> (or I guess we need to revert). I'm kinda not clear why this only showed > >>>> up when I merged the drm-misc-next pr into drm-next ... > >>> I'm really wondering this as well. It looks like PMD_SHIFT isn't a constant > >>> on this particular platform. > >>> > >>> But from what I can find in the upstream 6.2 kernel PMD_SHIFT always seems > >>> to be a constant. > >>> > >>> So how exactly can that here break? > >> There's some in-flight patches to rework MAX_ORDER and other things in > >> linux-next, maybe it's recent? If you check out linux-next then you need > >> to reapply the patch (since sfr reverted it). > > So I looked and on ppc64 PMD_SHIFT is defined in terms of > > PTE_INDEX_SIZE, which is defined (for book3s) in terms of the variable > > __pte_index_size. This is in 6.3 already and seems pretty old. > > Ah! I missed that one, thanks. > > > So revert? Or fixup patch to make this work on ppc? > > I think for now just revert or change it so that we check if PMD_SHIFT > is a constant. > > Thomas do you have any quick solution? I guess Thomas is on vacations. Can you pls do the revert and push it to drm-misc-next-fixes so this won't get lost? Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> preemptively for that. Normally I think we could wait a bit more but it's really close to merge window PR and I don't like handing too many open things to Dave when he's back :-) -Daniel > > Christian. > > > > > > >>>>> --- > >>>>> drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++----------- > >>>>> 1 file changed, 19 insertions(+), 11 deletions(-) > >>>>> > >>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c > >>>>> index dfce896c4bae..18c342a919a2 100644 > >>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c > >>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c > >>>>> @@ -47,6 +47,11 @@ > >>>>> #include "ttm_module.h" > >>>>> +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT) > >>>>> +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1) > >>>>> +/* Some architectures have a weird PMD_SHIFT */ > >>>>> +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER) > >>>>> + > >>>>> /** > >>>>> * struct ttm_pool_dma - Helper object for coherent DMA mappings > >>>>> * > >>>>> @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644); > >>>>> static atomic_long_t allocated_pages; > >>>>> -static struct ttm_pool_type global_write_combined[MAX_ORDER]; > >>>>> -static struct ttm_pool_type global_uncached[MAX_ORDER]; > >>>>> +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER]; > >>>>> +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER]; > >>>>> -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; > >>>>> -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; > >>>>> +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER]; > >>>>> +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER]; > >>>>> static spinlock_t shrinker_lock; > >>>>> static struct list_head shrinker_list; > >>>>> @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, > >>>>> else > >>>>> gfp_flags |= GFP_HIGHUSER; > >>>>> - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); > >>>>> + for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages)); > >>>>> num_pages; > >>>>> order = min_t(unsigned int, order, __fls(num_pages))) { > >>>>> struct ttm_pool_type *pt; > >>>>> @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, > >>>>> if (use_dma_alloc) { > >>>>> for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) > >>>>> - for (j = 0; j < MAX_ORDER; ++j) > >>>>> + for (j = 0; j < TTM_DIM_ORDER; ++j) > >>>>> ttm_pool_type_init(&pool->caching[i].orders[j], > >>>>> pool, i, j); > >>>>> } > >>>>> @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool) > >>>>> if (pool->use_dma_alloc) { > >>>>> for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) > >>>>> - for (j = 0; j < MAX_ORDER; ++j) > >>>>> + for (j = 0; j < TTM_DIM_ORDER; ++j) > >>>>> ttm_pool_type_fini(&pool->caching[i].orders[j]); > >>>>> } > >>>>> @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m) > >>>>> unsigned int i; > >>>>> seq_puts(m, "\t "); > >>>>> - for (i = 0; i < MAX_ORDER; ++i) > >>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) > >>>>> seq_printf(m, " ---%2u---", i); > >>>>> seq_puts(m, "\n"); > >>>>> } > >>>>> @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, > >>>>> { > >>>>> unsigned int i; > >>>>> - for (i = 0; i < MAX_ORDER; ++i) > >>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) > >>>>> seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); > >>>>> seq_puts(m, "\n"); > >>>>> } > >>>>> @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages) > >>>>> { > >>>>> unsigned int i; > >>>>> + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER); > >>>>> + BUILD_BUG_ON(TTM_DIM_ORDER < 1); > >>>>> + > >>>>> if (!page_pool_size) > >>>>> page_pool_size = num_pages; > >>>>> spin_lock_init(&shrinker_lock); > >>>>> INIT_LIST_HEAD(&shrinker_list); > >>>>> - for (i = 0; i < MAX_ORDER; ++i) { > >>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) { > >>>>> ttm_pool_type_init(&global_write_combined[i], NULL, > >>>>> ttm_write_combined, i); > >>>>> ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); > >>>>> @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void) > >>>>> { > >>>>> unsigned int i; > >>>>> - for (i = 0; i < MAX_ORDER; ++i) { > >>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) { > >>>>> ttm_pool_type_fini(&global_write_combined[i]); > >>>>> ttm_pool_type_fini(&global_uncached[i]); > >>>>> -- > >>>>> 2.39.2 > >>>>> > >> -- > >> Daniel Vetter > >> Software Engineer, Intel Corporation > >> http://blog.ffwll.ch > > > > >
Am 13.04.23 um 10:48 schrieb Daniel Vetter: > On Wed, 12 Apr 2023 at 16:18, Christian König <christian.koenig@amd.com> wrote: >> Am 12.04.23 um 11:08 schrieb Daniel Vetter: >>> On Tue, 11 Apr 2023 at 15:45, Daniel Vetter <daniel@ffwll.ch> wrote: >>>> On Tue, Apr 11, 2023 at 02:11:18PM +0200, Christian König wrote: >>>>> Am 11.04.23 um 11:51 schrieb Daniel Vetter: >>>>>> On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote: >>>>>>> When swapping out, we will split multi-order pages both in order to >>>>>>> move them to the swap-cache and to be able to return memory to the >>>>>>> swap cache as soon as possible on a page-by-page basis. >>>>>>> Reduce the page max order to the system PMD size, as we can then be nicer >>>>>>> to the system and avoid splitting gigantic pages. >>>>>>> >>>>>>> Looking forward to when we might be able to swap out PMD size folios >>>>>>> without splitting, this will also be a benefit. >>>>>>> >>>>>>> v2: >>>>>>> - Include all orders up to the PMD size (Christian König) >>>>>>> v3: >>>>>>> - Avoid compilation errors for architectures with special PFN_SHIFTs >>>>>>> >>>>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> >>>>>>> Reviewed-by: Christian König <christian.koenig@amd.com> >>>>>> Apparently this fails on ppc build testing. Please supply build fix asap >>>>>> (or I guess we need to revert). I'm kinda not clear why this only showed >>>>>> up when I merged the drm-misc-next pr into drm-next ... >>>>> I'm really wondering this as well. It looks like PMD_SHIFT isn't a constant >>>>> on this particular platform. >>>>> >>>>> But from what I can find in the upstream 6.2 kernel PMD_SHIFT always seems >>>>> to be a constant. >>>>> >>>>> So how exactly can that here break? >>>> There's some in-flight patches to rework MAX_ORDER and other things in >>>> linux-next, maybe it's recent? If you check out linux-next then you need >>>> to reapply the patch (since sfr reverted it). >>> So I looked and on ppc64 PMD_SHIFT is defined in terms of >>> PTE_INDEX_SIZE, which is defined (for book3s) in terms of the variable >>> __pte_index_size. This is in 6.3 already and seems pretty old. >> Ah! I missed that one, thanks. >> >>> So revert? Or fixup patch to make this work on ppc? >> I think for now just revert or change it so that we check if PMD_SHIFT >> is a constant. >> >> Thomas do you have any quick solution? > I guess Thomas is on vacations. Can you pls do the revert and push it > to drm-misc-next-fixes so this won't get lost? The offending patch hasn't showed up in drm-misc-next-fixes nor drm-misc-fixes yet. Looks like the branches are lacking behind. I can revert it on drm-misc-next, but I', not 100% sure that will then get picked up in time. Christian. > > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> > > preemptively for that. Normally I think we could wait a bit more but > it's really close to merge window PR and I don't like handing too many > open things to Dave when he's back :-) > -Daniel > >> Christian. >> >>> >>>>>>> --- >>>>>>> drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++----------- >>>>>>> 1 file changed, 19 insertions(+), 11 deletions(-) >>>>>>> >>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c >>>>>>> index dfce896c4bae..18c342a919a2 100644 >>>>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c >>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c >>>>>>> @@ -47,6 +47,11 @@ >>>>>>> #include "ttm_module.h" >>>>>>> +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT) >>>>>>> +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1) >>>>>>> +/* Some architectures have a weird PMD_SHIFT */ >>>>>>> +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER) >>>>>>> + >>>>>>> /** >>>>>>> * struct ttm_pool_dma - Helper object for coherent DMA mappings >>>>>>> * >>>>>>> @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644); >>>>>>> static atomic_long_t allocated_pages; >>>>>>> -static struct ttm_pool_type global_write_combined[MAX_ORDER]; >>>>>>> -static struct ttm_pool_type global_uncached[MAX_ORDER]; >>>>>>> +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER]; >>>>>>> +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER]; >>>>>>> -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; >>>>>>> -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; >>>>>>> +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER]; >>>>>>> +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER]; >>>>>>> static spinlock_t shrinker_lock; >>>>>>> static struct list_head shrinker_list; >>>>>>> @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, >>>>>>> else >>>>>>> gfp_flags |= GFP_HIGHUSER; >>>>>>> - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); >>>>>>> + for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages)); >>>>>>> num_pages; >>>>>>> order = min_t(unsigned int, order, __fls(num_pages))) { >>>>>>> struct ttm_pool_type *pt; >>>>>>> @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, >>>>>>> if (use_dma_alloc) { >>>>>>> for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) >>>>>>> - for (j = 0; j < MAX_ORDER; ++j) >>>>>>> + for (j = 0; j < TTM_DIM_ORDER; ++j) >>>>>>> ttm_pool_type_init(&pool->caching[i].orders[j], >>>>>>> pool, i, j); >>>>>>> } >>>>>>> @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool) >>>>>>> if (pool->use_dma_alloc) { >>>>>>> for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) >>>>>>> - for (j = 0; j < MAX_ORDER; ++j) >>>>>>> + for (j = 0; j < TTM_DIM_ORDER; ++j) >>>>>>> ttm_pool_type_fini(&pool->caching[i].orders[j]); >>>>>>> } >>>>>>> @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m) >>>>>>> unsigned int i; >>>>>>> seq_puts(m, "\t "); >>>>>>> - for (i = 0; i < MAX_ORDER; ++i) >>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) >>>>>>> seq_printf(m, " ---%2u---", i); >>>>>>> seq_puts(m, "\n"); >>>>>>> } >>>>>>> @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, >>>>>>> { >>>>>>> unsigned int i; >>>>>>> - for (i = 0; i < MAX_ORDER; ++i) >>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) >>>>>>> seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); >>>>>>> seq_puts(m, "\n"); >>>>>>> } >>>>>>> @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages) >>>>>>> { >>>>>>> unsigned int i; >>>>>>> + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER); >>>>>>> + BUILD_BUG_ON(TTM_DIM_ORDER < 1); >>>>>>> + >>>>>>> if (!page_pool_size) >>>>>>> page_pool_size = num_pages; >>>>>>> spin_lock_init(&shrinker_lock); >>>>>>> INIT_LIST_HEAD(&shrinker_list); >>>>>>> - for (i = 0; i < MAX_ORDER; ++i) { >>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) { >>>>>>> ttm_pool_type_init(&global_write_combined[i], NULL, >>>>>>> ttm_write_combined, i); >>>>>>> ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); >>>>>>> @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void) >>>>>>> { >>>>>>> unsigned int i; >>>>>>> - for (i = 0; i < MAX_ORDER; ++i) { >>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) { >>>>>>> ttm_pool_type_fini(&global_write_combined[i]); >>>>>>> ttm_pool_type_fini(&global_uncached[i]); >>>>>>> -- >>>>>>> 2.39.2 >>>>>>> >>>> -- >>>> Daniel Vetter >>>> Software Engineer, Intel Corporation >>>> http://blog.ffwll.ch >>> >
On Thu, 13 Apr 2023 at 11:46, Christian König <christian.koenig@amd.com> wrote: > > Am 13.04.23 um 10:48 schrieb Daniel Vetter: > > On Wed, 12 Apr 2023 at 16:18, Christian König <christian.koenig@amd.com> wrote: > >> Am 12.04.23 um 11:08 schrieb Daniel Vetter: > >>> On Tue, 11 Apr 2023 at 15:45, Daniel Vetter <daniel@ffwll.ch> wrote: > >>>> On Tue, Apr 11, 2023 at 02:11:18PM +0200, Christian König wrote: > >>>>> Am 11.04.23 um 11:51 schrieb Daniel Vetter: > >>>>>> On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote: > >>>>>>> When swapping out, we will split multi-order pages both in order to > >>>>>>> move them to the swap-cache and to be able to return memory to the > >>>>>>> swap cache as soon as possible on a page-by-page basis. > >>>>>>> Reduce the page max order to the system PMD size, as we can then be nicer > >>>>>>> to the system and avoid splitting gigantic pages. > >>>>>>> > >>>>>>> Looking forward to when we might be able to swap out PMD size folios > >>>>>>> without splitting, this will also be a benefit. > >>>>>>> > >>>>>>> v2: > >>>>>>> - Include all orders up to the PMD size (Christian König) > >>>>>>> v3: > >>>>>>> - Avoid compilation errors for architectures with special PFN_SHIFTs > >>>>>>> > >>>>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> > >>>>>>> Reviewed-by: Christian König <christian.koenig@amd.com> > >>>>>> Apparently this fails on ppc build testing. Please supply build fix asap > >>>>>> (or I guess we need to revert). I'm kinda not clear why this only showed > >>>>>> up when I merged the drm-misc-next pr into drm-next ... > >>>>> I'm really wondering this as well. It looks like PMD_SHIFT isn't a constant > >>>>> on this particular platform. > >>>>> > >>>>> But from what I can find in the upstream 6.2 kernel PMD_SHIFT always seems > >>>>> to be a constant. > >>>>> > >>>>> So how exactly can that here break? > >>>> There's some in-flight patches to rework MAX_ORDER and other things in > >>>> linux-next, maybe it's recent? If you check out linux-next then you need > >>>> to reapply the patch (since sfr reverted it). > >>> So I looked and on ppc64 PMD_SHIFT is defined in terms of > >>> PTE_INDEX_SIZE, which is defined (for book3s) in terms of the variable > >>> __pte_index_size. This is in 6.3 already and seems pretty old. > >> Ah! I missed that one, thanks. > >> > >>> So revert? Or fixup patch to make this work on ppc? > >> I think for now just revert or change it so that we check if PMD_SHIFT > >> is a constant. > >> > >> Thomas do you have any quick solution? > > I guess Thomas is on vacations. Can you pls do the revert and push it > > to drm-misc-next-fixes so this won't get lost? > > The offending patch hasn't showed up in drm-misc-next-fixes nor > drm-misc-fixes yet. Looks like the branches are lacking behind. > > I can revert it on drm-misc-next, but I', not 100% sure that will then > get picked up in time. It's there now, Maarten forwarded drm-misc-next-fixes this morning. That's why I pinged here again, trees are ready to land the revert :-) -Daniel > > Christian. > > > > > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> > > > > preemptively for that. Normally I think we could wait a bit more but > > it's really close to merge window PR and I don't like handing too many > > open things to Dave when he's back :-) > > -Daniel > > > >> Christian. > >> > >>> > >>>>>>> --- > >>>>>>> drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++----------- > >>>>>>> 1 file changed, 19 insertions(+), 11 deletions(-) > >>>>>>> > >>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c > >>>>>>> index dfce896c4bae..18c342a919a2 100644 > >>>>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c > >>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c > >>>>>>> @@ -47,6 +47,11 @@ > >>>>>>> #include "ttm_module.h" > >>>>>>> +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT) > >>>>>>> +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1) > >>>>>>> +/* Some architectures have a weird PMD_SHIFT */ > >>>>>>> +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER) > >>>>>>> + > >>>>>>> /** > >>>>>>> * struct ttm_pool_dma - Helper object for coherent DMA mappings > >>>>>>> * > >>>>>>> @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644); > >>>>>>> static atomic_long_t allocated_pages; > >>>>>>> -static struct ttm_pool_type global_write_combined[MAX_ORDER]; > >>>>>>> -static struct ttm_pool_type global_uncached[MAX_ORDER]; > >>>>>>> +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER]; > >>>>>>> +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER]; > >>>>>>> -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; > >>>>>>> -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; > >>>>>>> +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER]; > >>>>>>> +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER]; > >>>>>>> static spinlock_t shrinker_lock; > >>>>>>> static struct list_head shrinker_list; > >>>>>>> @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, > >>>>>>> else > >>>>>>> gfp_flags |= GFP_HIGHUSER; > >>>>>>> - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); > >>>>>>> + for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages)); > >>>>>>> num_pages; > >>>>>>> order = min_t(unsigned int, order, __fls(num_pages))) { > >>>>>>> struct ttm_pool_type *pt; > >>>>>>> @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, > >>>>>>> if (use_dma_alloc) { > >>>>>>> for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) > >>>>>>> - for (j = 0; j < MAX_ORDER; ++j) > >>>>>>> + for (j = 0; j < TTM_DIM_ORDER; ++j) > >>>>>>> ttm_pool_type_init(&pool->caching[i].orders[j], > >>>>>>> pool, i, j); > >>>>>>> } > >>>>>>> @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool) > >>>>>>> if (pool->use_dma_alloc) { > >>>>>>> for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) > >>>>>>> - for (j = 0; j < MAX_ORDER; ++j) > >>>>>>> + for (j = 0; j < TTM_DIM_ORDER; ++j) > >>>>>>> ttm_pool_type_fini(&pool->caching[i].orders[j]); > >>>>>>> } > >>>>>>> @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m) > >>>>>>> unsigned int i; > >>>>>>> seq_puts(m, "\t "); > >>>>>>> - for (i = 0; i < MAX_ORDER; ++i) > >>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) > >>>>>>> seq_printf(m, " ---%2u---", i); > >>>>>>> seq_puts(m, "\n"); > >>>>>>> } > >>>>>>> @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, > >>>>>>> { > >>>>>>> unsigned int i; > >>>>>>> - for (i = 0; i < MAX_ORDER; ++i) > >>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) > >>>>>>> seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); > >>>>>>> seq_puts(m, "\n"); > >>>>>>> } > >>>>>>> @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages) > >>>>>>> { > >>>>>>> unsigned int i; > >>>>>>> + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER); > >>>>>>> + BUILD_BUG_ON(TTM_DIM_ORDER < 1); > >>>>>>> + > >>>>>>> if (!page_pool_size) > >>>>>>> page_pool_size = num_pages; > >>>>>>> spin_lock_init(&shrinker_lock); > >>>>>>> INIT_LIST_HEAD(&shrinker_list); > >>>>>>> - for (i = 0; i < MAX_ORDER; ++i) { > >>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) { > >>>>>>> ttm_pool_type_init(&global_write_combined[i], NULL, > >>>>>>> ttm_write_combined, i); > >>>>>>> ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); > >>>>>>> @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void) > >>>>>>> { > >>>>>>> unsigned int i; > >>>>>>> - for (i = 0; i < MAX_ORDER; ++i) { > >>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) { > >>>>>>> ttm_pool_type_fini(&global_write_combined[i]); > >>>>>>> ttm_pool_type_fini(&global_uncached[i]); > >>>>>>> -- > >>>>>>> 2.39.2 > >>>>>>> > >>>> -- > >>>> Daniel Vetter > >>>> Software Engineer, Intel Corporation > >>>> http://blog.ffwll.ch > >>> > > >
Am 13.04.23 um 15:13 schrieb Daniel Vetter: > On Thu, 13 Apr 2023 at 11:46, Christian König <christian.koenig@amd.com> wrote: >> Am 13.04.23 um 10:48 schrieb Daniel Vetter: >>> On Wed, 12 Apr 2023 at 16:18, Christian König <christian.koenig@amd.com> wrote: >>>> Am 12.04.23 um 11:08 schrieb Daniel Vetter: >>>>> On Tue, 11 Apr 2023 at 15:45, Daniel Vetter <daniel@ffwll.ch> wrote: >>>>>> On Tue, Apr 11, 2023 at 02:11:18PM +0200, Christian König wrote: >>>>>>> Am 11.04.23 um 11:51 schrieb Daniel Vetter: >>>>>>>> On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote: >>>>>>>>> When swapping out, we will split multi-order pages both in order to >>>>>>>>> move them to the swap-cache and to be able to return memory to the >>>>>>>>> swap cache as soon as possible on a page-by-page basis. >>>>>>>>> Reduce the page max order to the system PMD size, as we can then be nicer >>>>>>>>> to the system and avoid splitting gigantic pages. >>>>>>>>> >>>>>>>>> Looking forward to when we might be able to swap out PMD size folios >>>>>>>>> without splitting, this will also be a benefit. >>>>>>>>> >>>>>>>>> v2: >>>>>>>>> - Include all orders up to the PMD size (Christian König) >>>>>>>>> v3: >>>>>>>>> - Avoid compilation errors for architectures with special PFN_SHIFTs >>>>>>>>> >>>>>>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> >>>>>>>>> Reviewed-by: Christian König <christian.koenig@amd.com> >>>>>>>> Apparently this fails on ppc build testing. Please supply build fix asap >>>>>>>> (or I guess we need to revert). I'm kinda not clear why this only showed >>>>>>>> up when I merged the drm-misc-next pr into drm-next ... >>>>>>> I'm really wondering this as well. It looks like PMD_SHIFT isn't a constant >>>>>>> on this particular platform. >>>>>>> >>>>>>> But from what I can find in the upstream 6.2 kernel PMD_SHIFT always seems >>>>>>> to be a constant. >>>>>>> >>>>>>> So how exactly can that here break? >>>>>> There's some in-flight patches to rework MAX_ORDER and other things in >>>>>> linux-next, maybe it's recent? If you check out linux-next then you need >>>>>> to reapply the patch (since sfr reverted it). >>>>> So I looked and on ppc64 PMD_SHIFT is defined in terms of >>>>> PTE_INDEX_SIZE, which is defined (for book3s) in terms of the variable >>>>> __pte_index_size. This is in 6.3 already and seems pretty old. >>>> Ah! I missed that one, thanks. >>>> >>>>> So revert? Or fixup patch to make this work on ppc? >>>> I think for now just revert or change it so that we check if PMD_SHIFT >>>> is a constant. >>>> >>>> Thomas do you have any quick solution? >>> I guess Thomas is on vacations. Can you pls do the revert and push it >>> to drm-misc-next-fixes so this won't get lost? >> The offending patch hasn't showed up in drm-misc-next-fixes nor >> drm-misc-fixes yet. Looks like the branches are lacking behind. >> >> I can revert it on drm-misc-next, but I', not 100% sure that will then >> get picked up in time. > It's there now, Maarten forwarded drm-misc-next-fixes this morning. > That's why I pinged here again, trees are ready to land the revert :-) Just pushed it. Christian. > -Daniel > >> Christian. >> >>> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> >>> >>> preemptively for that. Normally I think we could wait a bit more but >>> it's really close to merge window PR and I don't like handing too many >>> open things to Dave when he's back :-) >>> -Daniel >>> >>>> Christian. >>>> >>>>>>>>> --- >>>>>>>>> drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++----------- >>>>>>>>> 1 file changed, 19 insertions(+), 11 deletions(-) >>>>>>>>> >>>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c >>>>>>>>> index dfce896c4bae..18c342a919a2 100644 >>>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c >>>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c >>>>>>>>> @@ -47,6 +47,11 @@ >>>>>>>>> #include "ttm_module.h" >>>>>>>>> +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT) >>>>>>>>> +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1) >>>>>>>>> +/* Some architectures have a weird PMD_SHIFT */ >>>>>>>>> +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER) >>>>>>>>> + >>>>>>>>> /** >>>>>>>>> * struct ttm_pool_dma - Helper object for coherent DMA mappings >>>>>>>>> * >>>>>>>>> @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644); >>>>>>>>> static atomic_long_t allocated_pages; >>>>>>>>> -static struct ttm_pool_type global_write_combined[MAX_ORDER]; >>>>>>>>> -static struct ttm_pool_type global_uncached[MAX_ORDER]; >>>>>>>>> +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER]; >>>>>>>>> +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER]; >>>>>>>>> -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; >>>>>>>>> -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; >>>>>>>>> +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER]; >>>>>>>>> +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER]; >>>>>>>>> static spinlock_t shrinker_lock; >>>>>>>>> static struct list_head shrinker_list; >>>>>>>>> @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, >>>>>>>>> else >>>>>>>>> gfp_flags |= GFP_HIGHUSER; >>>>>>>>> - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); >>>>>>>>> + for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages)); >>>>>>>>> num_pages; >>>>>>>>> order = min_t(unsigned int, order, __fls(num_pages))) { >>>>>>>>> struct ttm_pool_type *pt; >>>>>>>>> @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, >>>>>>>>> if (use_dma_alloc) { >>>>>>>>> for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) >>>>>>>>> - for (j = 0; j < MAX_ORDER; ++j) >>>>>>>>> + for (j = 0; j < TTM_DIM_ORDER; ++j) >>>>>>>>> ttm_pool_type_init(&pool->caching[i].orders[j], >>>>>>>>> pool, i, j); >>>>>>>>> } >>>>>>>>> @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool) >>>>>>>>> if (pool->use_dma_alloc) { >>>>>>>>> for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) >>>>>>>>> - for (j = 0; j < MAX_ORDER; ++j) >>>>>>>>> + for (j = 0; j < TTM_DIM_ORDER; ++j) >>>>>>>>> ttm_pool_type_fini(&pool->caching[i].orders[j]); >>>>>>>>> } >>>>>>>>> @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m) >>>>>>>>> unsigned int i; >>>>>>>>> seq_puts(m, "\t "); >>>>>>>>> - for (i = 0; i < MAX_ORDER; ++i) >>>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) >>>>>>>>> seq_printf(m, " ---%2u---", i); >>>>>>>>> seq_puts(m, "\n"); >>>>>>>>> } >>>>>>>>> @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, >>>>>>>>> { >>>>>>>>> unsigned int i; >>>>>>>>> - for (i = 0; i < MAX_ORDER; ++i) >>>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) >>>>>>>>> seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); >>>>>>>>> seq_puts(m, "\n"); >>>>>>>>> } >>>>>>>>> @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages) >>>>>>>>> { >>>>>>>>> unsigned int i; >>>>>>>>> + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER); >>>>>>>>> + BUILD_BUG_ON(TTM_DIM_ORDER < 1); >>>>>>>>> + >>>>>>>>> if (!page_pool_size) >>>>>>>>> page_pool_size = num_pages; >>>>>>>>> spin_lock_init(&shrinker_lock); >>>>>>>>> INIT_LIST_HEAD(&shrinker_list); >>>>>>>>> - for (i = 0; i < MAX_ORDER; ++i) { >>>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) { >>>>>>>>> ttm_pool_type_init(&global_write_combined[i], NULL, >>>>>>>>> ttm_write_combined, i); >>>>>>>>> ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); >>>>>>>>> @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void) >>>>>>>>> { >>>>>>>>> unsigned int i; >>>>>>>>> - for (i = 0; i < MAX_ORDER; ++i) { >>>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) { >>>>>>>>> ttm_pool_type_fini(&global_write_combined[i]); >>>>>>>>> ttm_pool_type_fini(&global_uncached[i]); >>>>>>>>> -- >>>>>>>>> 2.39.2 >>>>>>>>> >>>>>> -- >>>>>> Daniel Vetter >>>>>> Software Engineer, Intel Corporation >>>>>> http://blog.ffwll.ch >
Hi On 4/14/23 12:11, Christian König wrote: > Am 13.04.23 um 15:13 schrieb Daniel Vetter: >> On Thu, 13 Apr 2023 at 11:46, Christian König >> <christian.koenig@amd.com> wrote: >>> Am 13.04.23 um 10:48 schrieb Daniel Vetter: >>>> On Wed, 12 Apr 2023 at 16:18, Christian König >>>> <christian.koenig@amd.com> wrote: >>>>> Am 12.04.23 um 11:08 schrieb Daniel Vetter: >>>>>> On Tue, 11 Apr 2023 at 15:45, Daniel Vetter <daniel@ffwll.ch> wrote: >>>>>>> On Tue, Apr 11, 2023 at 02:11:18PM +0200, Christian König wrote: >>>>>>>> Am 11.04.23 um 11:51 schrieb Daniel Vetter: >>>>>>>>> On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote: >>>>>>>>>> When swapping out, we will split multi-order pages both in >>>>>>>>>> order to >>>>>>>>>> move them to the swap-cache and to be able to return memory >>>>>>>>>> to the >>>>>>>>>> swap cache as soon as possible on a page-by-page basis. >>>>>>>>>> Reduce the page max order to the system PMD size, as we can >>>>>>>>>> then be nicer >>>>>>>>>> to the system and avoid splitting gigantic pages. >>>>>>>>>> >>>>>>>>>> Looking forward to when we might be able to swap out PMD size >>>>>>>>>> folios >>>>>>>>>> without splitting, this will also be a benefit. >>>>>>>>>> >>>>>>>>>> v2: >>>>>>>>>> - Include all orders up to the PMD size (Christian König) >>>>>>>>>> v3: >>>>>>>>>> - Avoid compilation errors for architectures with special >>>>>>>>>> PFN_SHIFTs >>>>>>>>>> >>>>>>>>>> Signed-off-by: Thomas Hellström >>>>>>>>>> <thomas.hellstrom@linux.intel.com> >>>>>>>>>> Reviewed-by: Christian König <christian.koenig@amd.com> >>>>>>>>> Apparently this fails on ppc build testing. Please supply >>>>>>>>> build fix asap >>>>>>>>> (or I guess we need to revert). I'm kinda not clear why this >>>>>>>>> only showed >>>>>>>>> up when I merged the drm-misc-next pr into drm-next ... >>>>>>>> I'm really wondering this as well. It looks like PMD_SHIFT >>>>>>>> isn't a constant >>>>>>>> on this particular platform. >>>>>>>> >>>>>>>> But from what I can find in the upstream 6.2 kernel PMD_SHIFT >>>>>>>> always seems >>>>>>>> to be a constant. >>>>>>>> >>>>>>>> So how exactly can that here break? >>>>>>> There's some in-flight patches to rework MAX_ORDER and other >>>>>>> things in >>>>>>> linux-next, maybe it's recent? If you check out linux-next then >>>>>>> you need >>>>>>> to reapply the patch (since sfr reverted it). >>>>>> So I looked and on ppc64 PMD_SHIFT is defined in terms of >>>>>> PTE_INDEX_SIZE, which is defined (for book3s) in terms of the >>>>>> variable >>>>>> __pte_index_size. This is in 6.3 already and seems pretty old. >>>>> Ah! I missed that one, thanks. >>>>> >>>>>> So revert? Or fixup patch to make this work on ppc? >>>>> I think for now just revert or change it so that we check if >>>>> PMD_SHIFT >>>>> is a constant. >>>>> >>>>> Thomas do you have any quick solution? >>>> I guess Thomas is on vacations. Can you pls do the revert and push it >>>> to drm-misc-next-fixes so this won't get lost? >>> The offending patch hasn't showed up in drm-misc-next-fixes nor >>> drm-misc-fixes yet. Looks like the branches are lacking behind. >>> >>> I can revert it on drm-misc-next, but I', not 100% sure that will then >>> get picked up in time. >> It's there now, Maarten forwarded drm-misc-next-fixes this morning. >> That's why I pinged here again, trees are ready to land the revert :-) > > Just pushed it. > > Christian. Thanks for fixing this. (I was on vacation). I got a "BUILD SUCCESS" for this series based on drm-misc-next so I didn't think anything weird would show up. Thanks, Thomas > >> -Daniel >> >>> Christian. >>> >>>> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> >>>> >>>> preemptively for that. Normally I think we could wait a bit more but >>>> it's really close to merge window PR and I don't like handing too many >>>> open things to Dave when he's back :-) >>>> -Daniel >>>> >>>>> Christian. >>>>> >>>>>>>>>> --- >>>>>>>>>> drivers/gpu/drm/ttm/ttm_pool.c | 30 >>>>>>>>>> +++++++++++++++++++----------- >>>>>>>>>> 1 file changed, 19 insertions(+), 11 deletions(-) >>>>>>>>>> >>>>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c >>>>>>>>>> b/drivers/gpu/drm/ttm/ttm_pool.c >>>>>>>>>> index dfce896c4bae..18c342a919a2 100644 >>>>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c >>>>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c >>>>>>>>>> @@ -47,6 +47,11 @@ >>>>>>>>>> #include "ttm_module.h" >>>>>>>>>> +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT) >>>>>>>>>> +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1) >>>>>>>>>> +/* Some architectures have a weird PMD_SHIFT */ >>>>>>>>>> +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? >>>>>>>>>> __TTM_DIM_ORDER : MAX_ORDER) >>>>>>>>>> + >>>>>>>>>> /** >>>>>>>>>> * struct ttm_pool_dma - Helper object for coherent DMA >>>>>>>>>> mappings >>>>>>>>>> * >>>>>>>>>> @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644); >>>>>>>>>> static atomic_long_t allocated_pages; >>>>>>>>>> -static struct ttm_pool_type global_write_combined[MAX_ORDER]; >>>>>>>>>> -static struct ttm_pool_type global_uncached[MAX_ORDER]; >>>>>>>>>> +static struct ttm_pool_type >>>>>>>>>> global_write_combined[TTM_DIM_ORDER]; >>>>>>>>>> +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER]; >>>>>>>>>> -static struct ttm_pool_type >>>>>>>>>> global_dma32_write_combined[MAX_ORDER]; >>>>>>>>>> -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; >>>>>>>>>> +static struct ttm_pool_type >>>>>>>>>> global_dma32_write_combined[TTM_DIM_ORDER]; >>>>>>>>>> +static struct ttm_pool_type >>>>>>>>>> global_dma32_uncached[TTM_DIM_ORDER]; >>>>>>>>>> static spinlock_t shrinker_lock; >>>>>>>>>> static struct list_head shrinker_list; >>>>>>>>>> @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, >>>>>>>>>> struct ttm_tt *tt, >>>>>>>>>> else >>>>>>>>>> gfp_flags |= GFP_HIGHUSER; >>>>>>>>>> - for (order = min_t(unsigned int, MAX_ORDER - 1, >>>>>>>>>> __fls(num_pages)); >>>>>>>>>> + for (order = min_t(unsigned int, TTM_MAX_ORDER, >>>>>>>>>> __fls(num_pages)); >>>>>>>>>> num_pages; >>>>>>>>>> order = min_t(unsigned int, order, >>>>>>>>>> __fls(num_pages))) { >>>>>>>>>> struct ttm_pool_type *pt; >>>>>>>>>> @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, >>>>>>>>>> struct device *dev, >>>>>>>>>> if (use_dma_alloc) { >>>>>>>>>> for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) >>>>>>>>>> - for (j = 0; j < MAX_ORDER; ++j) >>>>>>>>>> + for (j = 0; j < TTM_DIM_ORDER; ++j) >>>>>>>>>> ttm_pool_type_init(&pool->caching[i].orders[j], >>>>>>>>>> pool, >>>>>>>>>> i, j); >>>>>>>>>> } >>>>>>>>>> @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool) >>>>>>>>>> if (pool->use_dma_alloc) { >>>>>>>>>> for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) >>>>>>>>>> - for (j = 0; j < MAX_ORDER; ++j) >>>>>>>>>> + for (j = 0; j < TTM_DIM_ORDER; ++j) >>>>>>>>>> ttm_pool_type_fini(&pool->caching[i].orders[j]); >>>>>>>>>> } >>>>>>>>>> @@ -637,7 +642,7 @@ static void >>>>>>>>>> ttm_pool_debugfs_header(struct seq_file *m) >>>>>>>>>> unsigned int i; >>>>>>>>>> seq_puts(m, "\t "); >>>>>>>>>> - for (i = 0; i < MAX_ORDER; ++i) >>>>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) >>>>>>>>>> seq_printf(m, " ---%2u---", i); >>>>>>>>>> seq_puts(m, "\n"); >>>>>>>>>> } >>>>>>>>>> @@ -648,7 +653,7 @@ static void >>>>>>>>>> ttm_pool_debugfs_orders(struct ttm_pool_type *pt, >>>>>>>>>> { >>>>>>>>>> unsigned int i; >>>>>>>>>> - for (i = 0; i < MAX_ORDER; ++i) >>>>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) >>>>>>>>>> seq_printf(m, " %8u", >>>>>>>>>> ttm_pool_type_count(&pt[i])); >>>>>>>>>> seq_puts(m, "\n"); >>>>>>>>>> } >>>>>>>>>> @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long >>>>>>>>>> num_pages) >>>>>>>>>> { >>>>>>>>>> unsigned int i; >>>>>>>>>> + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER); >>>>>>>>>> + BUILD_BUG_ON(TTM_DIM_ORDER < 1); >>>>>>>>>> + >>>>>>>>>> if (!page_pool_size) >>>>>>>>>> page_pool_size = num_pages; >>>>>>>>>> spin_lock_init(&shrinker_lock); >>>>>>>>>> INIT_LIST_HEAD(&shrinker_list); >>>>>>>>>> - for (i = 0; i < MAX_ORDER; ++i) { >>>>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) { >>>>>>>>>> ttm_pool_type_init(&global_write_combined[i], NULL, >>>>>>>>>> ttm_write_combined, i); >>>>>>>>>> ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); >>>>>>>>>> @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void) >>>>>>>>>> { >>>>>>>>>> unsigned int i; >>>>>>>>>> - for (i = 0; i < MAX_ORDER; ++i) { >>>>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) { >>>>>>>>>> ttm_pool_type_fini(&global_write_combined[i]); >>>>>>>>>> ttm_pool_type_fini(&global_uncached[i]); >>>>>>>>>> -- >>>>>>>>>> 2.39.2 >>>>>>>>>> >>>>>>> -- >>>>>>> Daniel Vetter >>>>>>> Software Engineer, Intel Corporation >>>>>>> http://blog.ffwll.ch >> >
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index dfce896c4bae..18c342a919a2 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -47,6 +47,11 @@ #include "ttm_module.h" +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT) +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1) +/* Some architectures have a weird PMD_SHIFT */ +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER) + /** * struct ttm_pool_dma - Helper object for coherent DMA mappings * @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644); static atomic_long_t allocated_pages; -static struct ttm_pool_type global_write_combined[MAX_ORDER]; -static struct ttm_pool_type global_uncached[MAX_ORDER]; +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER]; +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER]; -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER]; +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER]; static spinlock_t shrinker_lock; static struct list_head shrinker_list; @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, else gfp_flags |= GFP_HIGHUSER; - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); + for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages)); num_pages; order = min_t(unsigned int, order, __fls(num_pages))) { struct ttm_pool_type *pt; @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, if (use_dma_alloc) { for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < MAX_ORDER; ++j) + for (j = 0; j < TTM_DIM_ORDER; ++j) ttm_pool_type_init(&pool->caching[i].orders[j], pool, i, j); } @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool) if (pool->use_dma_alloc) { for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < MAX_ORDER; ++j) + for (j = 0; j < TTM_DIM_ORDER; ++j) ttm_pool_type_fini(&pool->caching[i].orders[j]); } @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m) unsigned int i; seq_puts(m, "\t "); - for (i = 0; i < MAX_ORDER; ++i) + for (i = 0; i < TTM_DIM_ORDER; ++i) seq_printf(m, " ---%2u---", i); seq_puts(m, "\n"); } @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, { unsigned int i; - for (i = 0; i < MAX_ORDER; ++i) + for (i = 0; i < TTM_DIM_ORDER; ++i) seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); seq_puts(m, "\n"); } @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages) { unsigned int i; + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER); + BUILD_BUG_ON(TTM_DIM_ORDER < 1); + if (!page_pool_size) page_pool_size = num_pages; spin_lock_init(&shrinker_lock); INIT_LIST_HEAD(&shrinker_list); - for (i = 0; i < MAX_ORDER; ++i) { + for (i = 0; i < TTM_DIM_ORDER; ++i) { ttm_pool_type_init(&global_write_combined[i], NULL, ttm_write_combined, i); ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void) { unsigned int i; - for (i = 0; i < MAX_ORDER; ++i) { + for (i = 0; i < TTM_DIM_ORDER; ++i) { ttm_pool_type_fini(&global_write_combined[i]); ttm_pool_type_fini(&global_uncached[i]);