Message ID | 20221128063419.101586-1-xinhui.pan@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v3] drm: Optimise for continuous memory allocation | expand |
Hi Xinhui, On 11/28/2022 12:04 PM, xinhui pan wrote: > Currently drm-buddy does not have full knowledge of continuous memory. > > Lets consider scenario below. > order 1: L R > order 0: LL LR RL RR > for order 1 allocation, it can offer L or R or LR+RL. > > For now, we only implement L or R case for continuous memory allocation. > So this patch aims to implement the LR+RL case. > > Signed-off-by: xinhui pan <xinhui.pan@amd.com> > --- > change from v2: > search continuous block in nearby root if needed > > change from v1: > implement top-down continuous allocation > --- > drivers/gpu/drm/drm_buddy.c | 78 +++++++++++++++++++++++++++++++++---- > 1 file changed, 71 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c > index 11bb59399471..ff58eb3136d2 100644 > --- a/drivers/gpu/drm/drm_buddy.c > +++ b/drivers/gpu/drm/drm_buddy.c > @@ -386,6 +386,58 @@ alloc_range_bias(struct drm_buddy *mm, > return ERR_PTR(err); > } > > +static struct drm_buddy_block * > +find_continuous_blocks(struct drm_buddy *mm, > + int order, > + unsigned long flags, > + struct drm_buddy_block **rn) > +{ > + struct list_head *head = &mm->free_list[order]; > + struct drm_buddy_block *node, *parent, *free_node, *max_node = NULL; NIT: We usually name the variable as *block or ***_block for drm buddy and we have *node or ***_node for drm mm manager. > + int i; > + > + list_for_each_entry(free_node, head, link) { > + if (max_node) { > + if (!(flags & DRM_BUDDY_TOPDOWN_ALLOCATION)) > + break; > + > + if (drm_buddy_block_offset(free_node) < > + drm_buddy_block_offset(max_node)) > + continue; > + } > + > + parent = free_node; > + do { > + node = parent; > + parent = parent->parent; > + } while (parent && parent->right == node); > + > + if (!parent) { > + for (i = 0; i < mm->n_roots - 1; i++) > + if (mm->roots[i] == node) > + break; > + if (i == mm->n_roots - 1) > + continue; > + node = mm->roots[i + 1]; > + } else { > + node = parent->right; > + } > + > + while (drm_buddy_block_is_split(node)) > + node = node->left; > + > + if (drm_buddy_block_is_free(node) && > + drm_buddy_block_order(node) == order) { > + *rn = node; > + max_node = free_node; > + BUG_ON(drm_buddy_block_offset(node) != > + drm_buddy_block_offset(max_node) + > + drm_buddy_block_size(mm, max_node)); > + } > + } > + return max_node; > +} > + > static struct drm_buddy_block * > get_maxblock(struct list_head *head) > { > @@ -637,7 +689,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, > struct list_head *blocks, > unsigned long flags) > { > - struct drm_buddy_block *block = NULL; > + struct drm_buddy_block *block = NULL, *rblock = NULL; > unsigned int min_order, order; > unsigned long pages; > LIST_HEAD(allocated); > @@ -689,17 +741,29 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, > break; > > if (order-- == min_order) { > + if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) && > + min_order != 0 && pages == BIT(order + 1)) { > + block = find_continuous_blocks(mm, > + order, > + flags, > + &rblock); > + if (block) > + break; > + } > err = -ENOSPC; > goto err_free; > } > } while (1); > > - mark_allocated(block); > - mm->avail -= drm_buddy_block_size(mm, block); > - kmemleak_update_trace(block); > - list_add_tail(&block->link, &allocated); > - > - pages -= BIT(order); > + do { > + mark_allocated(block); > + mm->avail -= drm_buddy_block_size(mm, block); > + kmemleak_update_trace(block); > + list_add_tail(&block->link, &allocated); > + pages -= BIT(order); > + block = rblock; > + rblock = NULL; > + } while (block); I think with this approach, if we are lucky enough we may get contiguous blocks in one order level down in RL combination from the freelist? Regards, Arun > > if (!pages) > break;
[AMD Official Use Only - General] Hi Arun, Thanks for your reply. comments are inline.
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 11bb59399471..ff58eb3136d2 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -386,6 +386,58 @@ alloc_range_bias(struct drm_buddy *mm, return ERR_PTR(err); } +static struct drm_buddy_block * +find_continuous_blocks(struct drm_buddy *mm, + int order, + unsigned long flags, + struct drm_buddy_block **rn) +{ + struct list_head *head = &mm->free_list[order]; + struct drm_buddy_block *node, *parent, *free_node, *max_node = NULL; + int i; + + list_for_each_entry(free_node, head, link) { + if (max_node) { + if (!(flags & DRM_BUDDY_TOPDOWN_ALLOCATION)) + break; + + if (drm_buddy_block_offset(free_node) < + drm_buddy_block_offset(max_node)) + continue; + } + + parent = free_node; + do { + node = parent; + parent = parent->parent; + } while (parent && parent->right == node); + + if (!parent) { + for (i = 0; i < mm->n_roots - 1; i++) + if (mm->roots[i] == node) + break; + if (i == mm->n_roots - 1) + continue; + node = mm->roots[i + 1]; + } else { + node = parent->right; + } + + while (drm_buddy_block_is_split(node)) + node = node->left; + + if (drm_buddy_block_is_free(node) && + drm_buddy_block_order(node) == order) { + *rn = node; + max_node = free_node; + BUG_ON(drm_buddy_block_offset(node) != + drm_buddy_block_offset(max_node) + + drm_buddy_block_size(mm, max_node)); + } + } + return max_node; +} + static struct drm_buddy_block * get_maxblock(struct list_head *head) { @@ -637,7 +689,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, struct list_head *blocks, unsigned long flags) { - struct drm_buddy_block *block = NULL; + struct drm_buddy_block *block = NULL, *rblock = NULL; unsigned int min_order, order; unsigned long pages; LIST_HEAD(allocated); @@ -689,17 +741,29 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, break; if (order-- == min_order) { + if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) && + min_order != 0 && pages == BIT(order + 1)) { + block = find_continuous_blocks(mm, + order, + flags, + &rblock); + if (block) + break; + } err = -ENOSPC; goto err_free; } } while (1); - mark_allocated(block); - mm->avail -= drm_buddy_block_size(mm, block); - kmemleak_update_trace(block); - list_add_tail(&block->link, &allocated); - - pages -= BIT(order); + do { + mark_allocated(block); + mm->avail -= drm_buddy_block_size(mm, block); + kmemleak_update_trace(block); + list_add_tail(&block->link, &allocated); + pages -= BIT(order); + block = rblock; + rblock = NULL; + } while (block); if (!pages) break;
Currently drm-buddy does not have full knowledge of continuous memory. Lets consider scenario below. order 1: L R order 0: LL LR RL RR for order 1 allocation, it can offer L or R or LR+RL. For now, we only implement L or R case for continuous memory allocation. So this patch aims to implement the LR+RL case. Signed-off-by: xinhui pan <xinhui.pan@amd.com> --- change from v2: search continuous block in nearby root if needed change from v1: implement top-down continuous allocation --- drivers/gpu/drm/drm_buddy.c | 78 +++++++++++++++++++++++++++++++++---- 1 file changed, 71 insertions(+), 7 deletions(-)