@@ -102,6 +102,8 @@ extern int radeon_lockup_timeout;
*/
#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */
#define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2)
+#define RADEON_PLACEMENT_WORK_MS 500
+#define RADEON_PLACEMENT_MAX_EVICTION 8
/* RADEON_IB_POOL_SIZE must be a power of 2 */
#define RADEON_IB_POOL_SIZE 16
#define RADEON_DEBUGFS_MAX_COMPONENTS 32
@@ -311,6 +313,10 @@ struct radeon_bo_va {
struct radeon_bo {
/* Protected by gem.mutex */
struct list_head list;
+ /* Protected by rdev->placement_mutex */
+ struct list_head plist;
+ struct list_head *head;
+ unsigned long last_use_jiffies;
/* Protected by tbo.reserved */
u32 placements[3];
u32 busy_placements[3];
@@ -1523,6 +1529,13 @@ struct radeon_device {
struct drm_device *ddev;
struct pci_dev *pdev;
struct rw_semaphore exclusive_lock;
+ struct mutex placement_mutex;
+ struct list_head wvram_in_list;
+ struct list_head rvram_in_list;
+ struct list_head wvram_out_list;
+ struct list_head rvram_out_list;
+ struct delayed_work placement_work;
+ unsigned long vram_in_size;
/* ASIC */
union radeon_asic_config config;
enum radeon_family family;
@@ -88,7 +88,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
} else
p->relocs[i].handle = 0;
}
- return radeon_bo_list_validate(&p->validated);
+ return radeon_bo_list_validate(p->rdev, &p->validated);
}
static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
@@ -1001,6 +1001,14 @@ int radeon_device_init(struct radeon_device *rdev,
init_rwsem(&rdev->pm.mclk_lock);
init_rwsem(&rdev->exclusive_lock);
init_waitqueue_head(&rdev->irq.vblank_queue);
+
+ mutex_init(&rdev->placement_mutex);
+ INIT_LIST_HEAD(&rdev->wvram_in_list);
+ INIT_LIST_HEAD(&rdev->rvram_in_list);
+ INIT_LIST_HEAD(&rdev->wvram_out_list);
+ INIT_LIST_HEAD(&rdev->rvram_out_list);
+ INIT_DELAYED_WORK(&rdev->placement_work, radeon_placement_work_handler);
+
r = radeon_gem_init(rdev);
if (r)
return r;
@@ -64,6 +64,10 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
mutex_lock(&bo->rdev->gem.mutex);
list_del_init(&bo->list);
mutex_unlock(&bo->rdev->gem.mutex);
+ mutex_lock(&bo->rdev->placement_mutex);
+ list_del_init(&bo->plist);
+ bo->head = NULL;
+ mutex_unlock(&bo->rdev->placement_mutex);
radeon_bo_clear_surface_reg(bo);
radeon_bo_clear_va(bo);
drm_gem_object_release(&bo->gem_base);
@@ -153,6 +157,8 @@ int radeon_bo_create(struct radeon_device *rdev,
bo->surface_reg = -1;
INIT_LIST_HEAD(&bo->list);
INIT_LIST_HEAD(&bo->va);
+ INIT_LIST_HEAD(&bo->plist);
+ bo->head = NULL;
radeon_ttm_placement_from_domain(bo, domain);
/* Kernel allocation are uninterruptible */
down_read(&rdev->pm.mclk_lock);
@@ -263,8 +269,14 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
if (gpu_addr != NULL)
*gpu_addr = radeon_bo_gpu_offset(bo);
}
- if (unlikely(r != 0))
+ if (unlikely(r != 0)) {
dev_err(bo->rdev->dev, "%p pin failed\n", bo);
+ } else {
+ mutex_lock(&bo->rdev->placement_mutex);
+ list_del_init(&bo->plist);
+ bo->head = NULL;
+ mutex_unlock(&bo->rdev->placement_mutex);
+ }
return r;
}
@@ -353,11 +365,200 @@ void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
}
}
-int radeon_bo_list_validate(struct list_head *head)
+static inline int list_is_first(const struct list_head *list,
+ const struct list_head *head)
+{
+ return list->prev == head;
+}
+
+static inline void list_exchange(struct list_head *list1,
+ struct list_head *list2)
+{
+ struct list_head *tmp;
+
+ tmp = list1->next;
+ list1->next = list2->next;
+ list1->next->prev = list1;
+ list2->next = tmp;
+ list2->next->prev = list2;
+
+ tmp = list1->prev;
+ list1->prev = list2->prev;
+ list1->prev->next = list1;
+ list2->prev = tmp;
+ list2->prev->next = list2;
+}
+
+void radeon_placement_work_handler(struct work_struct *work)
+{
+ struct radeon_device *rdev;
+ struct radeon_bo *rbo, *movein = NULL;
+ struct radeon_bo *moveout[RADEON_PLACEMENT_MAX_EVICTION];
+ unsigned ceviction = 0;
+ unsigned long cjiffies = jiffies, size = 0;
+ unsigned long elapsed_ms, eelapsed_ms;
+ int r, i;
+
+ rdev = container_of(work, struct radeon_device, placement_work.work);
+ mutex_lock(&rdev->placement_mutex);
+ if (!list_empty(&rdev->wvram_in_list)) {
+ movein = list_first_entry(&rdev->wvram_in_list, struct radeon_bo, plist);
+ }
+ if (movein == NULL && !list_empty(&rdev->rvram_in_list)) {
+ movein = list_first_entry(&rdev->rvram_in_list, struct radeon_bo, plist);
+ }
+ if (movein == NULL) {
+ /* nothing is waiting to move in so do nothing */
+ goto out;
+ }
+ if (time_after(movein->last_use_jiffies, cjiffies)) {
+ /* wrap around */
+ movein->last_use_jiffies = 0;
+ }
+ elapsed_ms = jiffies_to_msecs(cjiffies - movein->last_use_jiffies);
+ /* try to evict read buffer first */
+ list_for_each_entry(rbo, &rdev->rvram_out_list, plist) {
+ if (time_after(rbo->last_use_jiffies, cjiffies)) {
+ /* wrap around */
+ rbo->last_use_jiffies = 0;
+ }
+ eelapsed_ms = jiffies_to_msecs(cjiffies - rbo->last_use_jiffies);
+ if (eelapsed_ms > (elapsed_ms + 50)) {
+ /* haven't been use in at least the last 50ms compared to
+ * the move in one
+ */
+ r = radeon_bo_reserve(rbo, false);
+ if (!r) {
+ moveout[ceviction++] = rbo;
+ }
+ }
+ if (ceviction >= RADEON_PLACEMENT_MAX_EVICTION) {
+ goto out;
+ }
+ }
+ if (ceviction >= RADEON_PLACEMENT_MAX_EVICTION) {
+ goto out;
+ }
+ list_for_each_entry(rbo, &rdev->wvram_out_list, plist) {
+ if (time_after(rbo->last_use_jiffies, cjiffies)) {
+ /* wrap around */
+ rbo->last_use_jiffies = 0;
+ }
+ eelapsed_ms = jiffies_to_msecs(cjiffies - rbo->last_use_jiffies);
+ if (eelapsed_ms > (elapsed_ms + 50)) {
+ /* haven't been use in at least the last 50ms compared to
+ * the move in one
+ */
+ r = radeon_bo_reserve(rbo, false);
+ if (!r) {
+ moveout[ceviction++] = rbo;
+ }
+ }
+ if (ceviction >= RADEON_PLACEMENT_MAX_EVICTION) {
+ goto out;
+ }
+ }
+out:
+ mutex_unlock(&rdev->placement_mutex);
+ for (i = 0; i < ceviction; i++) {
+ if (!moveout[i]->pin_count)
+ {
+ radeon_ttm_placement_from_domain(moveout[i], RADEON_GEM_DOMAIN_GTT);
+ r = ttm_bo_validate(&moveout[i]->tbo, &moveout[i]->placement,
+ true, true, true);
+ if (!r) {
+ size += moveout[i]->tbo.mem.num_pages << PAGE_SHIFT;
+ }
+ }
+ radeon_bo_unreserve(moveout[i]);
+ }
+ DRM_INFO("vram out (%8ldMB %8ldKB) vram in (%8ldMB %8ldKB)\n", size >> 20, size >> 10, rdev->vram_in_size >> 20, rdev->vram_in_size >> 10);
+ rdev->vram_in_size = 0;
+}
+
+static void radeon_bo_placement_promote_locked(struct radeon_bo *rbo, unsigned wdomain, unsigned rdomain)
+{
+ struct radeon_device *rdev = rbo->rdev;
+ unsigned long cjiffies, elapsed_ms;
+
+ cjiffies = jiffies;
+ if (wdomain & RADEON_GEM_DOMAIN_VRAM) {
+ if (time_after(rbo->last_use_jiffies, cjiffies)) {
+ /* wrap around */
+ rbo->last_use_jiffies = 0;
+ }
+ elapsed_ms = jiffies_to_msecs(cjiffies - rbo->last_use_jiffies);
+
+ if (list_empty(&rbo->plist) || rbo->head != &rdev->wvram_in_list) {
+ list_del_init(&rbo->plist);
+ list_add_tail(&rbo->plist, &rdev->wvram_in_list);
+ rbo->head = &rdev->wvram_in_list;
+ } else {
+ if (!list_is_first(&rbo->plist, &rdev->wvram_in_list)) {
+ struct radeon_bo *pbo;
+ unsigned long pelapsed_ms;
+
+ /* move up the list */
+ pbo = list_entry(rbo->plist.prev, struct radeon_bo, plist);
+ if (time_after(pbo->last_use_jiffies, cjiffies)) {
+ /* wrap around */
+ pbo->last_use_jiffies = 0;
+ }
+ pelapsed_ms = jiffies_to_msecs(cjiffies - pbo->last_use_jiffies);
+ if (pelapsed_ms > elapsed_ms) {
+ list_exchange(&rbo->plist, &pbo->plist);
+ }
+ }
+ }
+ rbo->last_use_jiffies = cjiffies;
+ } else if (rdomain & RADEON_GEM_DOMAIN_VRAM) {
+ if (time_after(rbo->last_use_jiffies, cjiffies)) {
+ /* wrap around */
+ rbo->last_use_jiffies = 0;
+ }
+ elapsed_ms = jiffies_to_msecs(cjiffies - rbo->last_use_jiffies);
+
+ if (list_empty(&rbo->plist) || rbo->head != &rdev->rvram_in_list) {
+ list_del_init(&rbo->plist);
+ list_add_tail(&rbo->plist, &rdev->rvram_in_list);
+ rbo->head = &rdev->rvram_in_list;
+ } else {
+ if (!list_is_first(&rbo->plist, &rdev->rvram_in_list)) {
+ struct radeon_bo *pbo;
+ unsigned long pelapsed_ms;
+
+ /* move up the list */
+ pbo = list_entry(rbo->plist.prev, struct radeon_bo, plist);
+ if (time_after(pbo->last_use_jiffies, cjiffies)) {
+ /* wrap around */
+ pbo->last_use_jiffies = 0;
+ }
+ pelapsed_ms = jiffies_to_msecs(cjiffies - pbo->last_use_jiffies);
+ if (pelapsed_ms > elapsed_ms) {
+ list_exchange(&rbo->plist, &pbo->plist);
+ }
+ }
+ }
+ rbo->last_use_jiffies = cjiffies;
+ }
+}
+
+static void radeon_bo_placement_update_locked(struct radeon_bo *rbo)
+{
+ if (rbo->head) {
+ list_move_tail(&rbo->plist, rbo->head);
+ } else {
+ list_del_init(&rbo->plist);
+ list_add_tail(&rbo->plist, &rbo->rdev->rvram_out_list);
+ rbo->head = &rbo->rdev->rvram_out_list;
+ }
+ rbo->last_use_jiffies = jiffies;
+}
+
+int radeon_bo_list_validate(struct radeon_device *rdev, struct list_head *head)
{
struct radeon_bo_list *lobj;
struct radeon_bo *bo;
- u32 domain;
int r;
r = ttm_eu_reserve_buffers(head);
@@ -367,6 +568,15 @@ int radeon_bo_list_validate(struct list_head *head)
list_for_each_entry(lobj, head, tv.head) {
bo = lobj->bo;
if (!bo->pin_count) {
+ if (bo->tbo.mem.mem_type != TTM_PL_VRAM) {
+ mutex_lock(&rdev->placement_mutex);
+ radeon_bo_placement_promote_locked(bo, lobj->wdomain, lobj->rdomain);
+ mutex_unlock(&rdev->placement_mutex);
+ } else {
+ mutex_lock(&rdev->placement_mutex);
+ radeon_bo_placement_update_locked(bo);
+ mutex_unlock(&rdev->placement_mutex);
+ }
r = ttm_bo_validate(&bo->tbo, &bo->placement,
true, false, false);
if (unlikely(r)) {
@@ -376,6 +586,8 @@ int radeon_bo_list_validate(struct list_head *head)
lobj->gpu_offset = radeon_bo_gpu_offset(bo);
lobj->tiling_flags = bo->tiling_flags;
}
+
+ schedule_delayed_work(&rdev->placement_work, msecs_to_jiffies(RADEON_PLACEMENT_WORK_MS));
return 0;
}
@@ -558,11 +770,34 @@ void radeon_bo_move_notify(struct ttm_buffer_object *bo,
struct ttm_mem_reg *mem)
{
struct radeon_bo *rbo;
+ struct radeon_device *rdev;
+
if (!radeon_ttm_bo_is_radeon_bo(bo))
return;
rbo = container_of(bo, struct radeon_bo, tbo);
radeon_bo_check_tiling(rbo, 0, 1);
radeon_vm_bo_invalidate(rbo->rdev, rbo);
+ if (mem && mem->mem_type == TTM_PL_VRAM && !(mem->placement & TTM_PL_FLAG_NO_EVICT)) {
+ rdev = rbo->rdev;
+ mutex_lock(&rdev->placement_mutex);
+ if (rbo->head == &rdev->wvram_in_list) {
+ list_del_init(&rbo->plist);
+ list_add_tail(&rbo->plist, &rdev->wvram_out_list);
+ rbo->head = &rdev->wvram_out_list;
+ } else {
+ list_del_init(&rbo->plist);
+ list_add_tail(&rbo->plist, &rdev->rvram_out_list);
+ rbo->head = &rdev->rvram_out_list;
+ }
+ mutex_unlock(&rdev->placement_mutex);
+ rdev->vram_in_size += rbo->tbo.mem.num_pages << PAGE_SHIFT;
+ } else {
+ rdev = rbo->rdev;
+ mutex_lock(&rdev->placement_mutex);
+ list_del_init(&rbo->plist);
+ rbo->head = NULL;
+ mutex_unlock(&rdev->placement_mutex);
+ }
}
int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
@@ -128,7 +128,7 @@ extern int radeon_bo_init(struct radeon_device *rdev);
extern void radeon_bo_fini(struct radeon_device *rdev);
extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
struct list_head *head);
-extern int radeon_bo_list_validate(struct list_head *head);
+extern int radeon_bo_list_validate(struct radeon_device *rdev, struct list_head *head);
extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
struct vm_area_struct *vma);
extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo,
@@ -141,6 +141,7 @@ extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
struct ttm_mem_reg *mem);
extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
+extern void radeon_placement_work_handler(struct work_struct *work);
/*
* sub allocation