diff mbox

[2/2] drm/radeon: buffer memory placement work thread WIP

Message ID 1354203342-3961-3-git-send-email-j.glisse@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jerome Glisse Nov. 29, 2012, 3:35 p.m. UTC
From: Jerome Glisse <jglisse@redhat.com>

Use delayed work thread to move buffer out of vram if they haven't
been use over some period of time. This allow to make room for
buffer that are actively use.

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
 drivers/gpu/drm/radeon/radeon.h        |  13 ++
 drivers/gpu/drm/radeon/radeon_cs.c     |   2 +-
 drivers/gpu/drm/radeon/radeon_device.c |   8 ++
 drivers/gpu/drm/radeon/radeon_object.c | 241 ++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/radeon/radeon_object.h |   3 +-
 5 files changed, 262 insertions(+), 5 deletions(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 0a2664c..a2e92da 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -102,6 +102,8 @@  extern int radeon_lockup_timeout;
  */
 #define RADEON_MAX_USEC_TIMEOUT			100000	/* 100 ms */
 #define RADEON_FENCE_JIFFIES_TIMEOUT		(HZ / 2)
+#define RADEON_PLACEMENT_WORK_MS		500
+#define RADEON_PLACEMENT_MAX_EVICTION		8
 /* RADEON_IB_POOL_SIZE must be a power of 2 */
 #define RADEON_IB_POOL_SIZE			16
 #define RADEON_DEBUGFS_MAX_COMPONENTS		32
@@ -311,6 +313,10 @@  struct radeon_bo_va {
 struct radeon_bo {
 	/* Protected by gem.mutex */
 	struct list_head		list;
+	/* Protected by rdev->placement_mutex */
+	struct list_head		plist;
+	struct list_head		*head;
+	unsigned long			last_use_jiffies;
 	/* Protected by tbo.reserved */
 	u32				placements[3];
 	u32				busy_placements[3];
@@ -1523,6 +1529,13 @@  struct radeon_device {
 	struct drm_device		*ddev;
 	struct pci_dev			*pdev;
 	struct rw_semaphore		exclusive_lock;
+	struct mutex			placement_mutex;
+	struct list_head		wvram_in_list;
+	struct list_head		rvram_in_list;
+	struct list_head		wvram_out_list;
+	struct list_head		rvram_out_list;
+	struct delayed_work		placement_work;
+	unsigned long			vram_in_size;
 	/* ASIC */
 	union radeon_asic_config	config;
 	enum radeon_family		family;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 41672cc..e9e90bc 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -88,7 +88,7 @@  static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 		} else
 			p->relocs[i].handle = 0;
 	}
-	return radeon_bo_list_validate(&p->validated);
+	return radeon_bo_list_validate(p->rdev, &p->validated);
 }
 
 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index e2f5f88..0c4c874 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1001,6 +1001,14 @@  int radeon_device_init(struct radeon_device *rdev,
 	init_rwsem(&rdev->pm.mclk_lock);
 	init_rwsem(&rdev->exclusive_lock);
 	init_waitqueue_head(&rdev->irq.vblank_queue);
+
+	mutex_init(&rdev->placement_mutex);
+	INIT_LIST_HEAD(&rdev->wvram_in_list);
+	INIT_LIST_HEAD(&rdev->rvram_in_list);
+	INIT_LIST_HEAD(&rdev->wvram_out_list);
+	INIT_LIST_HEAD(&rdev->rvram_out_list);
+	INIT_DELAYED_WORK(&rdev->placement_work, radeon_placement_work_handler);
+
 	r = radeon_gem_init(rdev);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index e25ae20..f2bcc5f 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -64,6 +64,10 @@  static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 	mutex_lock(&bo->rdev->gem.mutex);
 	list_del_init(&bo->list);
 	mutex_unlock(&bo->rdev->gem.mutex);
+	mutex_lock(&bo->rdev->placement_mutex);
+	list_del_init(&bo->plist);
+	bo->head = NULL;
+	mutex_unlock(&bo->rdev->placement_mutex);
 	radeon_bo_clear_surface_reg(bo);
 	radeon_bo_clear_va(bo);
 	drm_gem_object_release(&bo->gem_base);
@@ -153,6 +157,8 @@  int radeon_bo_create(struct radeon_device *rdev,
 	bo->surface_reg = -1;
 	INIT_LIST_HEAD(&bo->list);
 	INIT_LIST_HEAD(&bo->va);
+	INIT_LIST_HEAD(&bo->plist);
+	bo->head = NULL;
 	radeon_ttm_placement_from_domain(bo, domain);
 	/* Kernel allocation are uninterruptible */
 	down_read(&rdev->pm.mclk_lock);
@@ -263,8 +269,14 @@  int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 		if (gpu_addr != NULL)
 			*gpu_addr = radeon_bo_gpu_offset(bo);
 	}
-	if (unlikely(r != 0))
+	if (unlikely(r != 0)) {
 		dev_err(bo->rdev->dev, "%p pin failed\n", bo);
+	} else {
+		mutex_lock(&bo->rdev->placement_mutex);
+		list_del_init(&bo->plist);
+		bo->head = NULL;
+		mutex_unlock(&bo->rdev->placement_mutex);
+	}
 	return r;
 }
 
@@ -353,11 +365,200 @@  void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
 	}
 }
 
-int radeon_bo_list_validate(struct list_head *head)
+static inline int list_is_first(const struct list_head *list,
+				const struct list_head *head)
+{
+	return list->prev == head;
+}
+
+static inline void list_exchange(struct list_head *list1,
+				struct list_head *list2)
+{
+	struct list_head *tmp;
+
+	tmp = list1->next;
+	list1->next = list2->next;
+	list1->next->prev = list1;
+	list2->next = tmp;
+	list2->next->prev = list2;
+
+	tmp = list1->prev;
+	list1->prev = list2->prev;
+	list1->prev->next = list1;
+	list2->prev = tmp;
+	list2->prev->next = list2;
+}
+
+void radeon_placement_work_handler(struct work_struct *work)
+{
+	struct radeon_device *rdev;
+	struct radeon_bo *rbo, *movein = NULL;
+	struct radeon_bo *moveout[RADEON_PLACEMENT_MAX_EVICTION];
+	unsigned ceviction = 0;
+	unsigned long cjiffies = jiffies, size = 0;
+	unsigned long elapsed_ms, eelapsed_ms;
+	int r, i;
+
+	rdev = container_of(work, struct radeon_device, placement_work.work);
+	mutex_lock(&rdev->placement_mutex);
+	if (!list_empty(&rdev->wvram_in_list)) {
+		movein = list_first_entry(&rdev->wvram_in_list, struct radeon_bo, plist);
+	}
+	if (movein == NULL && !list_empty(&rdev->rvram_in_list)) {
+		movein = list_first_entry(&rdev->rvram_in_list, struct radeon_bo, plist);
+	}
+	if (movein == NULL) {
+		/* nothing is waiting to move in so do nothing */
+		goto out;
+	}
+	if (time_after(movein->last_use_jiffies, cjiffies)) {
+		/* wrap around */
+		movein->last_use_jiffies = 0;
+	}
+	elapsed_ms = jiffies_to_msecs(cjiffies - movein->last_use_jiffies);
+	/* try to evict read buffer first */
+	list_for_each_entry(rbo, &rdev->rvram_out_list, plist) {
+		if (time_after(rbo->last_use_jiffies, cjiffies)) {
+			/* wrap around */
+			rbo->last_use_jiffies = 0;
+		}
+		eelapsed_ms = jiffies_to_msecs(cjiffies - rbo->last_use_jiffies);
+		if (eelapsed_ms > (elapsed_ms + 50)) {
+			/* haven't been use in at least the last 50ms compared to
+			 * the move in one
+			 */
+			r = radeon_bo_reserve(rbo, false);
+			if (!r) {
+				moveout[ceviction++] = rbo;
+			}
+		}
+		if (ceviction >= RADEON_PLACEMENT_MAX_EVICTION) {
+			goto out;
+		}
+	}
+	if (ceviction >= RADEON_PLACEMENT_MAX_EVICTION) {
+		goto out;
+	}
+	list_for_each_entry(rbo, &rdev->wvram_out_list, plist) {
+		if (time_after(rbo->last_use_jiffies, cjiffies)) {
+			/* wrap around */
+			rbo->last_use_jiffies = 0;
+		}
+		eelapsed_ms = jiffies_to_msecs(cjiffies - rbo->last_use_jiffies);
+		if (eelapsed_ms > (elapsed_ms + 50)) {
+			/* haven't been use in at least the last 50ms compared to
+			 * the move in one
+			 */
+			r = radeon_bo_reserve(rbo, false);
+			if (!r) {
+				moveout[ceviction++] = rbo;
+			}
+		}
+		if (ceviction >= RADEON_PLACEMENT_MAX_EVICTION) {
+			goto out;
+		}
+	}
+out:
+	mutex_unlock(&rdev->placement_mutex);
+	for (i = 0; i < ceviction; i++) {
+		if (!moveout[i]->pin_count)
+		{
+			radeon_ttm_placement_from_domain(moveout[i], RADEON_GEM_DOMAIN_GTT);
+			r = ttm_bo_validate(&moveout[i]->tbo, &moveout[i]->placement,
+					true, true, true);
+			if (!r) {
+				size += moveout[i]->tbo.mem.num_pages << PAGE_SHIFT;
+			}
+		}
+		radeon_bo_unreserve(moveout[i]);
+	}
+	DRM_INFO("vram out (%8ldMB %8ldKB) vram in (%8ldMB %8ldKB)\n", size >> 20, size >> 10, rdev->vram_in_size >> 20, rdev->vram_in_size >> 10);
+	rdev->vram_in_size = 0;
+}
+
+static void radeon_bo_placement_promote_locked(struct radeon_bo *rbo, unsigned wdomain, unsigned rdomain)
+{
+	struct radeon_device *rdev = rbo->rdev;
+	unsigned long cjiffies, elapsed_ms;
+
+	cjiffies = jiffies;
+	if (wdomain & RADEON_GEM_DOMAIN_VRAM) {
+		if (time_after(rbo->last_use_jiffies, cjiffies)) {
+			/* wrap around */
+			rbo->last_use_jiffies = 0;
+		}
+		elapsed_ms = jiffies_to_msecs(cjiffies - rbo->last_use_jiffies);
+
+		if (list_empty(&rbo->plist) || rbo->head != &rdev->wvram_in_list) {
+			list_del_init(&rbo->plist);
+			list_add_tail(&rbo->plist, &rdev->wvram_in_list);
+			rbo->head = &rdev->wvram_in_list;
+		} else {
+			if (!list_is_first(&rbo->plist, &rdev->wvram_in_list)) {
+				struct radeon_bo *pbo;
+				unsigned long pelapsed_ms;
+
+				/* move up the list */
+				pbo = list_entry(rbo->plist.prev, struct radeon_bo, plist);
+				if (time_after(pbo->last_use_jiffies, cjiffies)) {
+					/* wrap around */
+					pbo->last_use_jiffies = 0;
+				}
+				pelapsed_ms = jiffies_to_msecs(cjiffies - pbo->last_use_jiffies);
+				if (pelapsed_ms > elapsed_ms) {
+					list_exchange(&rbo->plist, &pbo->plist);
+				}
+			}
+		}
+		rbo->last_use_jiffies = cjiffies;
+	} else if (rdomain & RADEON_GEM_DOMAIN_VRAM) {
+		if (time_after(rbo->last_use_jiffies, cjiffies)) {
+			/* wrap around */
+			rbo->last_use_jiffies = 0;
+		}
+		elapsed_ms = jiffies_to_msecs(cjiffies - rbo->last_use_jiffies);
+
+		if (list_empty(&rbo->plist) || rbo->head != &rdev->rvram_in_list) {
+			list_del_init(&rbo->plist);
+			list_add_tail(&rbo->plist, &rdev->rvram_in_list);
+			rbo->head = &rdev->rvram_in_list;
+		} else {
+			if (!list_is_first(&rbo->plist, &rdev->rvram_in_list)) {
+				struct radeon_bo *pbo;
+				unsigned long pelapsed_ms;
+
+				/* move up the list */
+				pbo = list_entry(rbo->plist.prev, struct radeon_bo, plist);
+				if (time_after(pbo->last_use_jiffies, cjiffies)) {
+					/* wrap around */
+					pbo->last_use_jiffies = 0;
+				}
+				pelapsed_ms = jiffies_to_msecs(cjiffies - pbo->last_use_jiffies);
+				if (pelapsed_ms > elapsed_ms) {
+					list_exchange(&rbo->plist, &pbo->plist);
+				}
+			}
+		}
+		rbo->last_use_jiffies = cjiffies;
+	}
+}
+
+static void radeon_bo_placement_update_locked(struct radeon_bo *rbo)
+{
+	if (rbo->head) {
+		list_move_tail(&rbo->plist, rbo->head);
+	} else {
+		list_del_init(&rbo->plist);
+		list_add_tail(&rbo->plist, &rbo->rdev->rvram_out_list);
+		rbo->head = &rbo->rdev->rvram_out_list;
+	}
+	rbo->last_use_jiffies = jiffies;
+}
+
+int radeon_bo_list_validate(struct radeon_device *rdev, struct list_head *head)
 {
 	struct radeon_bo_list *lobj;
 	struct radeon_bo *bo;
-	u32 domain;
 	int r;
 
 	r = ttm_eu_reserve_buffers(head);
@@ -367,6 +568,15 @@  int radeon_bo_list_validate(struct list_head *head)
 	list_for_each_entry(lobj, head, tv.head) {
 		bo = lobj->bo;
 		if (!bo->pin_count) {
+			if (bo->tbo.mem.mem_type != TTM_PL_VRAM) {
+				mutex_lock(&rdev->placement_mutex);
+				radeon_bo_placement_promote_locked(bo, lobj->wdomain, lobj->rdomain);
+				mutex_unlock(&rdev->placement_mutex);
+			} else {
+				mutex_lock(&rdev->placement_mutex);
+				radeon_bo_placement_update_locked(bo);
+				mutex_unlock(&rdev->placement_mutex);
+			}
 			r = ttm_bo_validate(&bo->tbo, &bo->placement,
 						true, false, false);
 			if (unlikely(r)) {
@@ -376,6 +586,8 @@  int radeon_bo_list_validate(struct list_head *head)
 		lobj->gpu_offset = radeon_bo_gpu_offset(bo);
 		lobj->tiling_flags = bo->tiling_flags;
 	}
+
+	schedule_delayed_work(&rdev->placement_work, msecs_to_jiffies(RADEON_PLACEMENT_WORK_MS));
 	return 0;
 }
 
@@ -558,11 +770,34 @@  void radeon_bo_move_notify(struct ttm_buffer_object *bo,
 			   struct ttm_mem_reg *mem)
 {
 	struct radeon_bo *rbo;
+	struct radeon_device *rdev;
+
 	if (!radeon_ttm_bo_is_radeon_bo(bo))
 		return;
 	rbo = container_of(bo, struct radeon_bo, tbo);
 	radeon_bo_check_tiling(rbo, 0, 1);
 	radeon_vm_bo_invalidate(rbo->rdev, rbo);
+	if (mem && mem->mem_type == TTM_PL_VRAM && !(mem->placement & TTM_PL_FLAG_NO_EVICT)) {
+		rdev = rbo->rdev;
+		mutex_lock(&rdev->placement_mutex);
+		if (rbo->head == &rdev->wvram_in_list) {
+			list_del_init(&rbo->plist);
+			list_add_tail(&rbo->plist, &rdev->wvram_out_list);
+			rbo->head = &rdev->wvram_out_list;
+		} else {
+			list_del_init(&rbo->plist);
+			list_add_tail(&rbo->plist, &rdev->rvram_out_list);
+			rbo->head = &rdev->rvram_out_list;
+		}
+		mutex_unlock(&rdev->placement_mutex);
+		rdev->vram_in_size += rbo->tbo.mem.num_pages << PAGE_SHIFT;
+	} else {
+		rdev = rbo->rdev;
+		mutex_lock(&rdev->placement_mutex);
+		list_del_init(&rbo->plist);
+		rbo->head = NULL;
+		mutex_unlock(&rdev->placement_mutex);
+	}
 }
 
 int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 93cd491..7babfc9 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -128,7 +128,7 @@  extern int radeon_bo_init(struct radeon_device *rdev);
 extern void radeon_bo_fini(struct radeon_device *rdev);
 extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
 				struct list_head *head);
-extern int radeon_bo_list_validate(struct list_head *head);
+extern int radeon_bo_list_validate(struct radeon_device *rdev, struct list_head *head);
 extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
 				struct vm_area_struct *vma);
 extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo,
@@ -141,6 +141,7 @@  extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
 					struct ttm_mem_reg *mem);
 extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
+extern void radeon_placement_work_handler(struct work_struct *work);
 
 /*
  * sub allocation