Message ID | 20161222083641.2691-31-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Dec 22, 2016 at 08:36:33AM +0000, Chris Wilson wrote: > Compute the minimal required hole during scan and only evict those nodes > that overlap. This enables us to reduce the number of nodes we need to > evict to the bare minimum. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Again, for next time around pls cc: driver maintainers too. -Daniel > --- > drivers/gpu/drm/drm_mm.c | 60 +++++++++++++++++++++++++++------ > drivers/gpu/drm/etnaviv/etnaviv_mmu.c | 2 +- > drivers/gpu/drm/i915/i915_gem_evict.c | 3 +- > drivers/gpu/drm/selftests/test-drm_mm.c | 10 +++--- > include/drm/drm_mm.h | 22 ++++++------ > 5 files changed, 71 insertions(+), 26 deletions(-) > > diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c > index 1b5613bcb35e..189ab84c5a59 100644 > --- a/drivers/gpu/drm/drm_mm.c > +++ b/drivers/gpu/drm/drm_mm.c > @@ -718,10 +718,10 @@ EXPORT_SYMBOL(drm_mm_replace_node); > * @color: opaque tag value to use for the allocation > * @start: start of the allowed range for the allocation > * @end: end of the allowed range for the allocation > + * @flags: flags to specify how the allocation will be performed afterwards > * > * This simply sets up the scanning routines with the parameters for the desired > - * hole. Note that there's no need to specify allocation flags, since they only > - * change the place a node is allocated from within a suitable hole. > + * hole. > * > * Warning: > * As long as the scan list is non-empty, no other operations than > @@ -733,7 +733,8 @@ void drm_mm_scan_init_with_range(struct drm_mm_scan *scan, > u64 alignment, > unsigned long color, > u64 start, > - u64 end) > + u64 end, > + unsigned int flags) > { > DRM_MM_BUG_ON(start >= end); > DRM_MM_BUG_ON(!size || size > end - start); > @@ -744,6 +745,7 @@ void drm_mm_scan_init_with_range(struct drm_mm_scan *scan, > scan->color = color; > scan->alignment = alignment; > scan->size = size; > + scan->flags = flags; > > DRM_MM_BUG_ON(end <= start); > scan->range_start = start; > @@ -778,7 +780,7 @@ bool drm_mm_scan_add_block(struct drm_mm_scan *scan, > DRM_MM_BUG_ON(node->mm != mm); > DRM_MM_BUG_ON(!node->allocated); > DRM_MM_BUG_ON(node->scanned_block); > - node->scanned_block = 1; > + node->scanned_block = true; > mm->scan_active++; > > hole = list_prev_entry(node, node_list); > @@ -800,15 +802,53 @@ bool drm_mm_scan_add_block(struct drm_mm_scan *scan, > > adj_start = max(col_start, scan->range_start); > adj_end = min(col_end, scan->range_end); > + if (adj_end <= adj_start || adj_end - adj_start < scan->size) > + return false; > + > + if (scan->flags == DRM_MM_CREATE_TOP) > + adj_start = adj_end - scan->size; > + > + if (scan->alignment) { > + u64 rem; > + > + div64_u64_rem(adj_start, scan->alignment, &rem); > + if (rem) { > + adj_start -= rem; > + if (scan->flags != DRM_MM_CREATE_TOP) > + adj_start += scan->alignment; > + if (adj_start < max(col_start, scan->range_start) || > + min(col_end, scan->range_end) - adj_start < scan->size) > + return false; > + > + if (adj_end <= adj_start || > + adj_end - adj_start < scan->size) > + return false; > + } > + } > > - if (check_free_hole(adj_start, adj_end, > - scan->size, scan->alignment)) { > + if (mm->color_adjust) { > + /* If allocations need adjusting due to neighbouring colours, > + * we do not have enough information to decide if we need > + * to evict nodes on either side of [adj_start, adj_end]. > + * What almost works is > + * hit_start = adj_start + (hole_start - col_start); > + * hit_end = adj_start + scan->size + (hole_end - col_end); > + * but because the decision is only made on the final hole, > + * we may underestimate the required adjustments for an > + * interior allocation. > + */ > scan->hit_start = hole_start; > scan->hit_end = hole_end; > - return true; > + } else { > + scan->hit_start = adj_start; > + scan->hit_end = adj_start + scan->size; > } > > - return false; > + DRM_MM_BUG_ON(scan->hit_start >= scan->hit_end); > + DRM_MM_BUG_ON(scan->hit_start < hole_start); > + DRM_MM_BUG_ON(scan->hit_end > hole_end); > + > + return true; > } > EXPORT_SYMBOL(drm_mm_scan_add_block); > > @@ -836,7 +876,7 @@ bool drm_mm_scan_remove_block(struct drm_mm_scan *scan, > > DRM_MM_BUG_ON(node->mm != scan->mm); > DRM_MM_BUG_ON(!node->scanned_block); > - node->scanned_block = 0; > + node->scanned_block = false; > > DRM_MM_BUG_ON(!node->mm->scan_active); > node->mm->scan_active--; > @@ -846,7 +886,7 @@ bool drm_mm_scan_remove_block(struct drm_mm_scan *scan, > prev_node->hole_follows = node->scanned_preceeds_hole; > list_add(&node->node_list, &prev_node->node_list); > > - return (drm_mm_hole_node_end(node) > scan->hit_start && > + return (node->start + node->size > scan->hit_start && > node->start < scan->hit_end); > } > EXPORT_SYMBOL(drm_mm_scan_remove_block); > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c > index fe1e886dcabb..2dae3169ce48 100644 > --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c > +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c > @@ -135,7 +135,7 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu, > } > > /* Try to retire some entries */ > - drm_mm_scan_init(&scan, &mmu->mm, size, 0, 0); > + drm_mm_scan_init(&scan, &mmu->mm, size, 0, 0, 0); > > found = 0; > INIT_LIST_HEAD(&list); > diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c > index 77ded288534b..2741498cdf2b 100644 > --- a/drivers/gpu/drm/i915/i915_gem_evict.c > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c > @@ -128,7 +128,8 @@ i915_gem_evict_something(struct i915_address_space *vm, > */ > drm_mm_scan_init_with_range(&scan, &vm->mm, > min_size, alignment, cache_level, > - start, end); > + start, end, > + flags & PIN_HIGH ? DRM_MM_CREATE_TOP : 0); > > /* Retire before we search the active list. Although we have > * reasonable accuracy in our retirement lists, we may have > diff --git a/drivers/gpu/drm/selftests/test-drm_mm.c b/drivers/gpu/drm/selftests/test-drm_mm.c > index 997f2bc93b9b..1bbfc24342c5 100644 > --- a/drivers/gpu/drm/selftests/test-drm_mm.c > +++ b/drivers/gpu/drm/selftests/test-drm_mm.c > @@ -1199,7 +1199,7 @@ static bool evict_nothing(struct drm_mm *mm, > struct drm_mm_node *node; > unsigned int n; > > - drm_mm_scan_init(&scan, mm, 1, 0, 0); > + drm_mm_scan_init(&scan, mm, 1, 0, 0, 0); > for (n = 0; n < total_size; n++) { > e = &nodes[n]; > list_add(&e->link, &evict_list); > @@ -1246,7 +1246,7 @@ static bool evict_everything(struct drm_mm *mm, > unsigned int n; > int err; > > - drm_mm_scan_init(&scan, mm, total_size, 0, 0); > + drm_mm_scan_init(&scan, mm, total_size, 0, 0, 0); > for (n = 0; n < total_size; n++) { > e = &nodes[n]; > list_add(&e->link, &evict_list); > @@ -1296,7 +1296,8 @@ static int evict_something(struct drm_mm *mm, > > drm_mm_scan_init_with_range(&scan, mm, > size, alignment, 0, > - range_start, range_end); > + range_start, range_end, > + mode->create_flags); > if (!evict_nodes(&scan, > nodes, order, count, > &evict_list)) > @@ -1874,7 +1875,8 @@ static int evict_color(struct drm_mm *mm, > > drm_mm_scan_init_with_range(&scan, mm, > size, alignment, color, > - range_start, range_end); > + range_start, range_end, > + mode->create_flags); > if (!evict_nodes(&scan, > nodes, order, count, > &evict_list)) > diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h > index bae0f10da8e3..606336fc229a 100644 > --- a/include/drm/drm_mm.h > +++ b/include/drm/drm_mm.h > @@ -120,6 +120,7 @@ struct drm_mm_scan { > struct drm_mm_node *prev_scanned_node; > > unsigned long color; > + unsigned int flags; > }; > > /** > @@ -388,11 +389,9 @@ __drm_mm_interval_first(const struct drm_mm *mm, u64 start, u64 last); > > void drm_mm_scan_init_with_range(struct drm_mm_scan *scan, > struct drm_mm *mm, > - u64 size, > - u64 alignment, > - unsigned long color, > - u64 start, > - u64 end); > + u64 size, u64 alignment, unsigned long color, > + u64 start, u64 end, > + unsigned int flags); > > /** > * drm_mm_scan_init - initialize lru scanning > @@ -401,10 +400,10 @@ void drm_mm_scan_init_with_range(struct drm_mm_scan *scan, > * @size: size of the allocation > * @alignment: alignment of the allocation > * @color: opaque tag value to use for the allocation > + * @flags: flags to specify how the allocation will be performed afterwards > * > * This simply sets up the scanning routines with the parameters for the desired > - * hole. Note that there's no need to specify allocation flags, since they only > - * change the place a node is allocated from within a suitable hole. > + * hole. > * > * Warning: > * As long as the scan list is non-empty, no other operations than > @@ -414,10 +413,13 @@ static inline void drm_mm_scan_init(struct drm_mm_scan *scan, > struct drm_mm *mm, > u64 size, > u64 alignment, > - unsigned long color) > + unsigned long color, > + unsigned int flags) > { > - drm_mm_scan_init_with_range(scan, mm, size, alignment, color, > - 0, U64_MAX); > + drm_mm_scan_init_with_range(scan, mm, > + size, alignment, color, > + 0, U64_MAX, > + flags); > } > > bool drm_mm_scan_add_block(struct drm_mm_scan *scan, > -- > 2.11.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Wed, Dec 28, 2016 at 02:01:29PM +0100, Daniel Vetter wrote: > On Thu, Dec 22, 2016 at 08:36:33AM +0000, Chris Wilson wrote: > > Compute the minimal required hole during scan and only evict those nodes > > that overlap. This enables us to reduce the number of nodes we need to > > evict to the bare minimum. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > > Again, for next time around pls cc: driver maintainers too. etnaviv wants a cyclic allocator, at least looks like it, so it doesn't fit the api very well. (I can't tell if it is a misunderstanding of drm_mm, or a design choice to try and implement a different allocation strategy - strange fits happen on wraparound, as we then see a bad combination of the cyclic and drm_mm's last-hole strategy.) It actually wants something like if (size > mmu->domain->geometry.aperture_end - mmu->last_iova) mmu->last_iova = mmu->domain->geometry.aperture_start; drm_mm_for_each_node_in_range_safe(node, nn, &mmu->mm, mmu->last_iova, mmu->last_iova + size) { etnaviv_iommu_remove_mapping(mmu, m); m->mmu = NULL; mmu->need_flush = true; } node->start = mmu->last_iova; node->size = size; drm_mm_reserve_node(&mmu->mm, node); mmu->last_iova += size; Which would serve it's needs more concisely. The push to make drm_mm_node scale to large random patterns in i915 adversely affects such simple users - we could make a very concise drm_mm_cyclic. Though that would still have the danger of being a single consumer API. -Chris
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 1b5613bcb35e..189ab84c5a59 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -718,10 +718,10 @@ EXPORT_SYMBOL(drm_mm_replace_node); * @color: opaque tag value to use for the allocation * @start: start of the allowed range for the allocation * @end: end of the allowed range for the allocation + * @flags: flags to specify how the allocation will be performed afterwards * * This simply sets up the scanning routines with the parameters for the desired - * hole. Note that there's no need to specify allocation flags, since they only - * change the place a node is allocated from within a suitable hole. + * hole. * * Warning: * As long as the scan list is non-empty, no other operations than @@ -733,7 +733,8 @@ void drm_mm_scan_init_with_range(struct drm_mm_scan *scan, u64 alignment, unsigned long color, u64 start, - u64 end) + u64 end, + unsigned int flags) { DRM_MM_BUG_ON(start >= end); DRM_MM_BUG_ON(!size || size > end - start); @@ -744,6 +745,7 @@ void drm_mm_scan_init_with_range(struct drm_mm_scan *scan, scan->color = color; scan->alignment = alignment; scan->size = size; + scan->flags = flags; DRM_MM_BUG_ON(end <= start); scan->range_start = start; @@ -778,7 +780,7 @@ bool drm_mm_scan_add_block(struct drm_mm_scan *scan, DRM_MM_BUG_ON(node->mm != mm); DRM_MM_BUG_ON(!node->allocated); DRM_MM_BUG_ON(node->scanned_block); - node->scanned_block = 1; + node->scanned_block = true; mm->scan_active++; hole = list_prev_entry(node, node_list); @@ -800,15 +802,53 @@ bool drm_mm_scan_add_block(struct drm_mm_scan *scan, adj_start = max(col_start, scan->range_start); adj_end = min(col_end, scan->range_end); + if (adj_end <= adj_start || adj_end - adj_start < scan->size) + return false; + + if (scan->flags == DRM_MM_CREATE_TOP) + adj_start = adj_end - scan->size; + + if (scan->alignment) { + u64 rem; + + div64_u64_rem(adj_start, scan->alignment, &rem); + if (rem) { + adj_start -= rem; + if (scan->flags != DRM_MM_CREATE_TOP) + adj_start += scan->alignment; + if (adj_start < max(col_start, scan->range_start) || + min(col_end, scan->range_end) - adj_start < scan->size) + return false; + + if (adj_end <= adj_start || + adj_end - adj_start < scan->size) + return false; + } + } - if (check_free_hole(adj_start, adj_end, - scan->size, scan->alignment)) { + if (mm->color_adjust) { + /* If allocations need adjusting due to neighbouring colours, + * we do not have enough information to decide if we need + * to evict nodes on either side of [adj_start, adj_end]. + * What almost works is + * hit_start = adj_start + (hole_start - col_start); + * hit_end = adj_start + scan->size + (hole_end - col_end); + * but because the decision is only made on the final hole, + * we may underestimate the required adjustments for an + * interior allocation. + */ scan->hit_start = hole_start; scan->hit_end = hole_end; - return true; + } else { + scan->hit_start = adj_start; + scan->hit_end = adj_start + scan->size; } - return false; + DRM_MM_BUG_ON(scan->hit_start >= scan->hit_end); + DRM_MM_BUG_ON(scan->hit_start < hole_start); + DRM_MM_BUG_ON(scan->hit_end > hole_end); + + return true; } EXPORT_SYMBOL(drm_mm_scan_add_block); @@ -836,7 +876,7 @@ bool drm_mm_scan_remove_block(struct drm_mm_scan *scan, DRM_MM_BUG_ON(node->mm != scan->mm); DRM_MM_BUG_ON(!node->scanned_block); - node->scanned_block = 0; + node->scanned_block = false; DRM_MM_BUG_ON(!node->mm->scan_active); node->mm->scan_active--; @@ -846,7 +886,7 @@ bool drm_mm_scan_remove_block(struct drm_mm_scan *scan, prev_node->hole_follows = node->scanned_preceeds_hole; list_add(&node->node_list, &prev_node->node_list); - return (drm_mm_hole_node_end(node) > scan->hit_start && + return (node->start + node->size > scan->hit_start && node->start < scan->hit_end); } EXPORT_SYMBOL(drm_mm_scan_remove_block); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c index fe1e886dcabb..2dae3169ce48 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c @@ -135,7 +135,7 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu, } /* Try to retire some entries */ - drm_mm_scan_init(&scan, &mmu->mm, size, 0, 0); + drm_mm_scan_init(&scan, &mmu->mm, size, 0, 0, 0); found = 0; INIT_LIST_HEAD(&list); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 77ded288534b..2741498cdf2b 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -128,7 +128,8 @@ i915_gem_evict_something(struct i915_address_space *vm, */ drm_mm_scan_init_with_range(&scan, &vm->mm, min_size, alignment, cache_level, - start, end); + start, end, + flags & PIN_HIGH ? DRM_MM_CREATE_TOP : 0); /* Retire before we search the active list. Although we have * reasonable accuracy in our retirement lists, we may have diff --git a/drivers/gpu/drm/selftests/test-drm_mm.c b/drivers/gpu/drm/selftests/test-drm_mm.c index 997f2bc93b9b..1bbfc24342c5 100644 --- a/drivers/gpu/drm/selftests/test-drm_mm.c +++ b/drivers/gpu/drm/selftests/test-drm_mm.c @@ -1199,7 +1199,7 @@ static bool evict_nothing(struct drm_mm *mm, struct drm_mm_node *node; unsigned int n; - drm_mm_scan_init(&scan, mm, 1, 0, 0); + drm_mm_scan_init(&scan, mm, 1, 0, 0, 0); for (n = 0; n < total_size; n++) { e = &nodes[n]; list_add(&e->link, &evict_list); @@ -1246,7 +1246,7 @@ static bool evict_everything(struct drm_mm *mm, unsigned int n; int err; - drm_mm_scan_init(&scan, mm, total_size, 0, 0); + drm_mm_scan_init(&scan, mm, total_size, 0, 0, 0); for (n = 0; n < total_size; n++) { e = &nodes[n]; list_add(&e->link, &evict_list); @@ -1296,7 +1296,8 @@ static int evict_something(struct drm_mm *mm, drm_mm_scan_init_with_range(&scan, mm, size, alignment, 0, - range_start, range_end); + range_start, range_end, + mode->create_flags); if (!evict_nodes(&scan, nodes, order, count, &evict_list)) @@ -1874,7 +1875,8 @@ static int evict_color(struct drm_mm *mm, drm_mm_scan_init_with_range(&scan, mm, size, alignment, color, - range_start, range_end); + range_start, range_end, + mode->create_flags); if (!evict_nodes(&scan, nodes, order, count, &evict_list)) diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index bae0f10da8e3..606336fc229a 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -120,6 +120,7 @@ struct drm_mm_scan { struct drm_mm_node *prev_scanned_node; unsigned long color; + unsigned int flags; }; /** @@ -388,11 +389,9 @@ __drm_mm_interval_first(const struct drm_mm *mm, u64 start, u64 last); void drm_mm_scan_init_with_range(struct drm_mm_scan *scan, struct drm_mm *mm, - u64 size, - u64 alignment, - unsigned long color, - u64 start, - u64 end); + u64 size, u64 alignment, unsigned long color, + u64 start, u64 end, + unsigned int flags); /** * drm_mm_scan_init - initialize lru scanning @@ -401,10 +400,10 @@ void drm_mm_scan_init_with_range(struct drm_mm_scan *scan, * @size: size of the allocation * @alignment: alignment of the allocation * @color: opaque tag value to use for the allocation + * @flags: flags to specify how the allocation will be performed afterwards * * This simply sets up the scanning routines with the parameters for the desired - * hole. Note that there's no need to specify allocation flags, since they only - * change the place a node is allocated from within a suitable hole. + * hole. * * Warning: * As long as the scan list is non-empty, no other operations than @@ -414,10 +413,13 @@ static inline void drm_mm_scan_init(struct drm_mm_scan *scan, struct drm_mm *mm, u64 size, u64 alignment, - unsigned long color) + unsigned long color, + unsigned int flags) { - drm_mm_scan_init_with_range(scan, mm, size, alignment, color, - 0, U64_MAX); + drm_mm_scan_init_with_range(scan, mm, + size, alignment, color, + 0, U64_MAX, + flags); } bool drm_mm_scan_add_block(struct drm_mm_scan *scan,