diff mbox series

[1/1] drm/mm: optimize rb_hole_addr rbtree search in high addr mode

Message ID 20200428160423.4402-1-nirmoy.das@amd.com (mailing list archive)
State New, archived
Headers show
Series [1/1] drm/mm: optimize rb_hole_addr rbtree search in high addr mode | expand

Commit Message

Nirmoy Das April 28, 2020, 4:04 p.m. UTC
Userspace can severely fragment rb_hole_addr rbtree by manipulating
alignment while allocating buffers. Fragmented rb_hole_addr rbtree would
result in large delays while allocating buffer object for a userspace
application. It takes long time to find suitable hole because if we fail
to find a suitable hole in the first attempt then we look for neighbouring
nodes using rb_prev(). Traversing rbtree using rb_prev() can take really
long time if the tree is fragmented.

This patch improves searches in fragmented rb_hole_addr rbtree by storing
an extra field in drm_mm_node, max_hole_size. Each drm_mm_node now stores
maximum hole size for its subtree in drm_mm_node->max_hole_size. Using
drm_mm_node->max_hole_size, it is possible to eliminate complete
subtree if that subtree is unable to serve a request hence reducing number
of rb_prev() used.

Note: Implementation details of rb_hole_addr rbtree updates after a node
removal and addition is borrowed from block/bfq-wf2q.c which is trying to
achieve a similar goal.

With this patch applied, 1 million bo allocations on amdgpu took ~8 sec and
without the patch the test code took 28 sec for only 50k bo allocations.

partial test code:
int test_fragmentation(void)
{
#define MAX_ALLOC 1000000

	int i = 0;
	uint32_t  minor_version;
	uint32_t  major_version;

	struct amdgpu_bo_alloc_request request = {};
	amdgpu_bo_handle vram_handle[MAX_ALLOC] = {};
	amdgpu_device_handle device_handle;

	request.alloc_size = 4096;
	request.phys_alignment = 8192;
	request.preferred_heap = AMDGPU_GEM_DOMAIN_VRAM;

	int fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
	amdgpu_device_initialize(fd, &major_version,  &minor_version, &device_handle);

	for (i = 0; i < MAX_ALLOC; i++) {
		amdgpu_bo_alloc(device_handle, &request, &vram_handle[i]);
	}

	for (i = 0; i < MAX_ALLOC; i++)
		amdgpu_bo_free(vram_handle[i]);

	return 0;
}

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/drm_mm.c | 140 +++++++++++++++++++++++++++++++++++++--
 include/drm/drm_mm.h     |   1 +
 2 files changed, 137 insertions(+), 4 deletions(-)

--
2.26.1

Comments

kernel test robot April 29, 2020, 11:02 p.m. UTC | #1
Hi Nirmoy,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-intel/for-linux-next]
[also build test WARNING on drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next drm-tip/drm-tip linus/master v5.7-rc3 next-20200429]
[cannot apply to drm/drm-next]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Nirmoy-Das/drm-mm-optimize-rb_hole_addr-rbtree-search-in-high-addr-mode/20200429-154950
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.1-191-gc51a0382-dirty
        make ARCH=x86_64 allmodconfig
        make C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__'

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)

>> drivers/gpu/drm/drm_mm.c:455:24: sparse: sparse: Using plain integer as NULL pointer

vim +455 drivers/gpu/drm/drm_mm.c

   431	
   432	/**
   433	 * next_hole_high_addr - returns next hole for a DRM_MM_INSERT_HIGH mode request
   434	 * @entry: previously selected drm_mm_node
   435	 * @size: size of the a hole needed for the request
   436	 *
   437	 * This function will verify whether left subtree of @entry has hole big enough
   438	 * to fit the requtested size. If so, it will return previous node of @entry or
   439	 * else it will return parent node of @entry
   440	 *
   441	 * It will also skip the complete left subtree if max_hole_size of that subtree
   442	 * is same as the max_hole_size of the @entry.
   443	 *
   444	 * Returns:
   445	 * previous node of @entry if left subtree of @entry can serve the request or
   446	 * else return parent of @entry
   447	 */
   448	static struct drm_mm_node *
   449	next_hole_high_addr(struct drm_mm_node *entry, u64 size)
   450	{
   451		struct rb_node *rb_node, *left_rb_node, *parent_rb_node;
   452		struct drm_mm_node *left_node;
   453	
   454		if (!entry)
 > 455			return false;
   456	
   457		rb_node = &entry->rb_hole_addr;
   458		if (rb_node->rb_left) {
   459			left_rb_node = rb_node->rb_left;
   460			parent_rb_node = rb_parent(rb_node);
   461			left_node = rb_entry(left_rb_node,
   462					     struct drm_mm_node, rb_hole_addr);
   463			if ((left_node->max_hole_size < size ||
   464			     entry->size == entry->max_hole_size) &&
   465			    parent_rb_node && parent_rb_node->rb_left != rb_node)
   466				return rb_hole_addr_to_node(parent_rb_node);
   467		}
   468	
   469		return rb_hole_addr_to_node(rb_prev(rb_node));
   470	}
   471	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 8981abe8b7c9..5076d3761f51 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -212,6 +212,84 @@  static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node,
 				   &drm_mm_interval_tree_augment);
 }

+static void update_addr_hole_node_max(struct drm_mm_node *entity,
+				      struct rb_node *node)
+{
+	struct drm_mm_node *child;
+
+	if (node) {
+		child = rb_entry(node, struct drm_mm_node, rb_hole_addr);
+		if (child->max_hole_size > entity->max_hole_size)
+			entity->max_hole_size = child->max_hole_size;
+	}
+}
+
+static void update_addr_hole_node(struct rb_node *node)
+{
+	struct drm_mm_node *entity = rb_entry(node, struct drm_mm_node,
+					      rb_hole_addr);
+
+	entity->max_hole_size = entity->hole_size;
+	update_addr_hole_node_max(entity, node->rb_right);
+	update_addr_hole_node_max(entity, node->rb_left);
+}
+
+/**
+ * update_addr_hole_rbtree - update max_hole_size field for the whole tree.
+ * @node: the starting node.
+ *
+ * This function updates max_hole_size field of the @node and all the nodes
+ * that are in the path to the root.
+ */
+static void update_addr_hole_rbtree(struct rb_node *node)
+{
+	struct rb_node *parent;
+
+	while (node) {
+		update_addr_hole_node(node);
+
+		parent = rb_parent(node);
+		if (!parent)
+			return;
+
+		if (node == parent->rb_left && parent->rb_right)
+			update_addr_hole_node(parent->rb_right);
+		else if (parent->rb_left)
+			update_addr_hole_node(parent->rb_left);
+		node = parent;
+	}
+}
+
+/**
+ * find_deepest - find the deepest node that an extraction can modify.
+ * @node: the node being removed.
+ *
+ * This finction returns the deepest node that can be moved by rbtree's
+ * rebalance operation after removal of @node. If @node is the last node
+ * in the tree then return NULL.
+ */
+static struct rb_node *find_deepest(struct rb_node *node)
+{
+	struct rb_node *deepest = NULL;
+
+	if (!node->rb_left && !node->rb_right)
+		deepest = rb_parent(node);
+	else if (!node->rb_right)
+		deepest = node->rb_left;
+	else if (!node->rb_left)
+		deepest = node->rb_right;
+	else {
+		deepest = rb_next(node);
+		if (deepest->rb_right)
+			deepest = deepest->rb_right;
+		else if (rb_parent(deepest) != node)
+			deepest = rb_parent(deepest);
+
+	}
+
+	return deepest;
+}
+
 #define RB_INSERT(root, member, expr) do { \
 	struct rb_node **link = &root.rb_node, *rb = NULL; \
 	u64 x = expr(node); \
@@ -258,26 +336,39 @@  static void insert_hole_size(struct rb_root_cached *root,
 static void add_hole(struct drm_mm_node *node)
 {
 	struct drm_mm *mm = node->mm;
+	struct rb_node *rb = &node->rb_hole_addr;

 	node->hole_size =
 		__drm_mm_hole_node_end(node) - __drm_mm_hole_node_start(node);
+	node->max_hole_size = node->hole_size;
 	DRM_MM_BUG_ON(!drm_mm_hole_follows(node));

 	insert_hole_size(&mm->holes_size, node);
 	RB_INSERT(mm->holes_addr, rb_hole_addr, HOLE_ADDR);
-
 	list_add(&node->hole_stack, &mm->hole_stack);
+
+	if (rb->rb_left)
+		rb = rb->rb_left;
+	else if (rb->rb_right)
+		rb = rb->rb_right;
+
+	update_addr_hole_rbtree(rb);
 }

 static void rm_hole(struct drm_mm_node *node)
 {
+	struct rb_node *rb_deepest;
 	DRM_MM_BUG_ON(!drm_mm_hole_follows(node));

 	list_del(&node->hole_stack);
 	rb_erase_cached(&node->rb_hole_size, &node->mm->holes_size);
+
+	rb_deepest = find_deepest(&node->rb_hole_addr);
 	rb_erase(&node->rb_hole_addr, &node->mm->holes_addr);
-	node->hole_size = 0;
+	update_addr_hole_rbtree(rb_deepest);

+	node->hole_size = 0;
+	node->max_hole_size = 0;
 	DRM_MM_BUG_ON(drm_mm_hole_follows(node));
 }

@@ -338,6 +429,46 @@  static struct drm_mm_node *find_hole(struct drm_mm *mm, u64 addr)
 	return node;
 }

+/**
+ * next_hole_high_addr - returns next hole for a DRM_MM_INSERT_HIGH mode request
+ * @entry: previously selected drm_mm_node
+ * @size: size of the a hole needed for the request
+ *
+ * This function will verify whether left subtree of @entry has hole big enough
+ * to fit the requtested size. If so, it will return previous node of @entry or
+ * else it will return parent node of @entry
+ *
+ * It will also skip the complete left subtree if max_hole_size of that subtree
+ * is same as the max_hole_size of the @entry.
+ *
+ * Returns:
+ * previous node of @entry if left subtree of @entry can serve the request or
+ * else return parent of @entry
+ */
+static struct drm_mm_node *
+next_hole_high_addr(struct drm_mm_node *entry, u64 size)
+{
+	struct rb_node *rb_node, *left_rb_node, *parent_rb_node;
+	struct drm_mm_node *left_node;
+
+	if (!entry)
+		return false;
+
+	rb_node = &entry->rb_hole_addr;
+	if (rb_node->rb_left) {
+		left_rb_node = rb_node->rb_left;
+		parent_rb_node = rb_parent(rb_node);
+		left_node = rb_entry(left_rb_node,
+				     struct drm_mm_node, rb_hole_addr);
+		if ((left_node->max_hole_size < size ||
+		     entry->size == entry->max_hole_size) &&
+		    parent_rb_node && parent_rb_node->rb_left != rb_node)
+			return rb_hole_addr_to_node(parent_rb_node);
+	}
+
+	return rb_hole_addr_to_node(rb_prev(rb_node));
+}
+
 static struct drm_mm_node *
 first_hole(struct drm_mm *mm,
 	   u64 start, u64 end, u64 size,
@@ -364,6 +495,7 @@  first_hole(struct drm_mm *mm,
 static struct drm_mm_node *
 next_hole(struct drm_mm *mm,
 	  struct drm_mm_node *node,
+	  u64 size,
 	  enum drm_mm_insert_mode mode)
 {
 	switch (mode) {
@@ -375,7 +507,7 @@  next_hole(struct drm_mm *mm,
 		return rb_hole_addr_to_node(rb_next(&node->rb_hole_addr));

 	case DRM_MM_INSERT_HIGH:
-		return rb_hole_addr_to_node(rb_prev(&node->rb_hole_addr));
+		return next_hole_high_addr(node, size);

 	case DRM_MM_INSERT_EVICT:
 		node = list_next_entry(node, hole_stack);
@@ -489,7 +621,7 @@  int drm_mm_insert_node_in_range(struct drm_mm * const mm,
 	remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
 	for (hole = first_hole(mm, range_start, range_end, size, mode);
 	     hole;
-	     hole = once ? NULL : next_hole(mm, hole, mode)) {
+	     hole = once ? NULL : next_hole(mm, hole, size, mode)) {
 		u64 hole_start = __drm_mm_hole_node_start(hole);
 		u64 hole_end = hole_start + hole->hole_size;
 		u64 adj_start, adj_end;
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index ee8b0e80ca90..0ee555ff2d9e 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -168,6 +168,7 @@  struct drm_mm_node {
 	struct rb_node rb_hole_addr;
 	u64 __subtree_last;
 	u64 hole_size;
+	u64 max_hole_size;
 	unsigned long flags;
 #define DRM_MM_NODE_ALLOCATED_BIT	0
 #define DRM_MM_NODE_SCANNED_BIT		1