diff mbox

[2/6] drm/radeon: track memory statistics about VRAM and GTT usage and buffer moves

Message ID 1393255246-8296-3-git-send-email-maraeo@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Marek Olšák Feb. 24, 2014, 3:20 p.m. UTC
From: Marek Olšák <marek.olsak@amd.com>

The statistics are:
- VRAM usage in bytes
- GTT usage in bytes
- number of bytes moved by TTM

The last one is actually a counter, so you need to sample it before and after
command submission and take the difference.

This is useful for finding performance bottlenecks. Userspace queries are
also added.

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
---
 drivers/gpu/drm/radeon/radeon.h        |  5 +++++
 drivers/gpu/drm/radeon/radeon_device.c |  1 +
 drivers/gpu/drm/radeon/radeon_kms.c    | 15 ++++++++++++++
 drivers/gpu/drm/radeon/radeon_object.c | 38 +++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/radeon/radeon_object.h |  2 +-
 drivers/gpu/drm/radeon/radeon_ttm.c    | 10 ++++++++-
 include/uapi/drm/radeon_drm.h          |  3 +++
 7 files changed, 71 insertions(+), 3 deletions(-)

Comments

Christian König Feb. 24, 2014, 4:20 p.m. UTC | #1
Am 24.02.2014 16:20, schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak@amd.com>
>
> The statistics are:
> - VRAM usage in bytes
> - GTT usage in bytes
> - number of bytes moved by TTM
>
> The last one is actually a counter, so you need to sample it before and after
> command submission and take the difference.
>
> This is useful for finding performance bottlenecks. Userspace queries are
> also added.
>
> Signed-off-by: Marek Olšák <marek.olsak@amd.com>
> ---
>   drivers/gpu/drm/radeon/radeon.h        |  5 +++++
>   drivers/gpu/drm/radeon/radeon_device.c |  1 +
>   drivers/gpu/drm/radeon/radeon_kms.c    | 15 ++++++++++++++
>   drivers/gpu/drm/radeon/radeon_object.c | 38 +++++++++++++++++++++++++++++++++-
>   drivers/gpu/drm/radeon/radeon_object.h |  2 +-
>   drivers/gpu/drm/radeon/radeon_ttm.c    | 10 ++++++++-
>   include/uapi/drm/radeon_drm.h          |  3 +++
>   7 files changed, 71 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 3f10782..d37a57a 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -2307,6 +2307,11 @@ struct radeon_device {
>   	/* virtual memory */
>   	struct radeon_vm_manager	vm_manager;
>   	struct mutex			gpu_clock_mutex;
> +	/* memory stats */
> +	struct mutex			memory_stats_mutex;
> +	uint64_t			vram_usage;
> +	uint64_t			gtt_usage;
> +	uint64_t			num_bytes_moved;

As far as I can see you could make those tree values atomic64_t instead 
and avoid the mutex.

>   	/* ACPI interface */
>   	struct radeon_atif		atif;
>   	struct radeon_atcs		atcs;
> diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
> index b012cbb..6564af7 100644
> --- a/drivers/gpu/drm/radeon/radeon_device.c
> +++ b/drivers/gpu/drm/radeon/radeon_device.c
> @@ -1184,6 +1184,7 @@ int radeon_device_init(struct radeon_device *rdev,
>   	mutex_init(&rdev->gem.mutex);
>   	mutex_init(&rdev->pm.mutex);
>   	mutex_init(&rdev->gpu_clock_mutex);
> +	mutex_init(&rdev->memory_stats_mutex);
>   	mutex_init(&rdev->srbm_mutex);
>   	init_rwsem(&rdev->pm.mclk_lock);
>   	init_rwsem(&rdev->exclusive_lock);
> diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
> index 0b631eb..ddc8c74 100644
> --- a/drivers/gpu/drm/radeon/radeon_kms.c
> +++ b/drivers/gpu/drm/radeon/radeon_kms.c
> @@ -486,6 +486,21 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file
>   	case RADEON_INFO_VCE_FB_VERSION:
>   		*value = rdev->vce.fb_version;
>   		break;
> +	case RADEON_INFO_NUM_BYTES_MOVED:
> +		value = (uint32_t*)&value64;
> +		value_size = sizeof(uint64_t);
> +		value64 = rdev->num_bytes_moved;
> +		break;
> +	case RADEON_INFO_VRAM_USAGE:
> +		value = (uint32_t*)&value64;
> +		value_size = sizeof(uint64_t);
> +		value64 = rdev->vram_usage;
> +		break;
> +	case RADEON_INFO_GTT_USAGE:
> +		value = (uint32_t*)&value64;
> +		value_size = sizeof(uint64_t);
> +		value64 = rdev->gtt_usage;
> +		break;
>   	default:
>   		DRM_DEBUG_KMS("Invalid request %d\n", info->request);
>   		return -EINVAL;
> diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
> index dd12bb4..d676ee2 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.c
> +++ b/drivers/gpu/drm/radeon/radeon_object.c
> @@ -56,11 +56,38 @@ static void radeon_bo_clear_va(struct radeon_bo *bo)
>   	}
>   }
>   
> +static void radeon_update_memory_usage(struct radeon_bo *bo,
> +				       unsigned mem_type, int sign)
> +{
> +	struct radeon_device *rdev = bo->rdev;
> +	u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT;
> +
> +	mutex_lock(&rdev->memory_stats_mutex);
> +	switch (mem_type) {
> +	case TTM_PL_TT:
> +		if (sign > 0)
> +			rdev->gtt_usage += size;
> +		else
> +			rdev->gtt_usage -= size;
> +		break;
> +	case TTM_PL_VRAM:
> +		if (sign > 0)
> +			rdev->vram_usage += size;
> +		else
> +			rdev->vram_usage -= size;
> +		break;
> +	}
> +	mutex_unlock(&rdev->memory_stats_mutex);
> +}
> +
>   static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
>   {
>   	struct radeon_bo *bo;
>   
>   	bo = container_of(tbo, struct radeon_bo, tbo);
> +
> +	radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1);
> +
>   	mutex_lock(&bo->rdev->gem.mutex);
>   	list_del_init(&bo->list);
>   	mutex_unlock(&bo->rdev->gem.mutex);
> @@ -567,14 +594,23 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
>   }
>   
>   void radeon_bo_move_notify(struct ttm_buffer_object *bo,
> -			   struct ttm_mem_reg *mem)
> +			   struct ttm_mem_reg *new_mem)
>   {
>   	struct radeon_bo *rbo;
> +
>   	if (!radeon_ttm_bo_is_radeon_bo(bo))
>   		return;
> +
>   	rbo = container_of(bo, struct radeon_bo, tbo);
>   	radeon_bo_check_tiling(rbo, 0, 1);
>   	radeon_vm_bo_invalidate(rbo->rdev, rbo);
> +
> +	/* update statistics */
> +	if (!new_mem)
> +		return;
> +
> +	radeon_update_memory_usage(rbo, bo->mem.mem_type, -1);
> +	radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
>   }
>   
>   int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
> diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
> index 209b111..a9a8c11 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.h
> +++ b/drivers/gpu/drm/radeon/radeon_object.h
> @@ -151,7 +151,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
>   extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
>   				bool force_drop);
>   extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
> -					struct ttm_mem_reg *mem);
> +				  struct ttm_mem_reg *new_mem);
>   extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
>   extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
>   
> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
> index 77f5b0c..7e2e833 100644
> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
> @@ -406,8 +406,16 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
>   	if (r) {
>   memcpy:
>   		r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
> +		if (r) {
> +			return r;
> +		}
>   	}
> -	return r;
> +
> +	/* update statistics */
> +	mutex_lock(&rdev->memory_stats_mutex);
> +	rdev->num_bytes_moved += (u64)bo->num_pages << PAGE_SHIFT;
> +	mutex_unlock(&rdev->memory_stats_mutex);
> +	return 0;
>   }
>   
>   static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
> diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
> index cb5c93a..aefa2f6 100644
> --- a/include/uapi/drm/radeon_drm.h
> +++ b/include/uapi/drm/radeon_drm.h
> @@ -1004,6 +1004,9 @@ struct drm_radeon_cs {
>   #define RADEON_INFO_VCE_FW_VERSION	0x1b
>   /* version of VCE feedback */
>   #define RADEON_INFO_VCE_FB_VERSION	0x1c
> +#define RADEON_INFO_NUM_BYTES_MOVED	0x1d
> +#define RADEON_INFO_VRAM_USAGE		0x1e
> +#define RADEON_INFO_GTT_USAGE		0x1f
>   
>   
>   struct drm_radeon_info {
Marek Olšák Feb. 26, 2014, 5:56 p.m. UTC | #2
On Mon, Feb 24, 2014 at 5:20 PM, Christian König
<deathsimple@vodafone.de> wrote:
> Am 24.02.2014 16:20, schrieb Marek Olšák:
>
>> From: Marek Olšák <marek.olsak@amd.com>
>>
>> The statistics are:
>> - VRAM usage in bytes
>> - GTT usage in bytes
>> - number of bytes moved by TTM
>>
>> The last one is actually a counter, so you need to sample it before and
>> after
>> command submission and take the difference.
>>
>> This is useful for finding performance bottlenecks. Userspace queries are
>> also added.
>>
>> Signed-off-by: Marek Olšák <marek.olsak@amd.com>
>> ---
>>   drivers/gpu/drm/radeon/radeon.h        |  5 +++++
>>   drivers/gpu/drm/radeon/radeon_device.c |  1 +
>>   drivers/gpu/drm/radeon/radeon_kms.c    | 15 ++++++++++++++
>>   drivers/gpu/drm/radeon/radeon_object.c | 38
>> +++++++++++++++++++++++++++++++++-
>>   drivers/gpu/drm/radeon/radeon_object.h |  2 +-
>>   drivers/gpu/drm/radeon/radeon_ttm.c    | 10 ++++++++-
>>   include/uapi/drm/radeon_drm.h          |  3 +++
>>   7 files changed, 71 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/radeon/radeon.h
>> b/drivers/gpu/drm/radeon/radeon.h
>> index 3f10782..d37a57a 100644
>> --- a/drivers/gpu/drm/radeon/radeon.h
>> +++ b/drivers/gpu/drm/radeon/radeon.h
>> @@ -2307,6 +2307,11 @@ struct radeon_device {
>>         /* virtual memory */
>>         struct radeon_vm_manager        vm_manager;
>>         struct mutex                    gpu_clock_mutex;
>> +       /* memory stats */
>> +       struct mutex                    memory_stats_mutex;
>> +       uint64_t                        vram_usage;
>> +       uint64_t                        gtt_usage;
>> +       uint64_t                        num_bytes_moved;
>
>
> As far as I can see you could make those tree values atomic64_t instead and
> avoid the mutex.

I'm afraid I cannot use atomic64_t. It doesn't work on x86 32-bit.
This seems to be a no-op:

u64 size = (u64)bo->num_pages << PAGE_SHIFT;
atomic64_add(size, &rdev->num_bytes_moved);

Marek
Christian König Feb. 26, 2014, 6:26 p.m. UTC | #3
Am 26.02.2014 18:56, schrieb Marek Olšák:
> On Mon, Feb 24, 2014 at 5:20 PM, Christian König
> <deathsimple@vodafone.de> wrote:
>> Am 24.02.2014 16:20, schrieb Marek Olšák:
>>
>>> From: Marek Olšák <marek.olsak@amd.com>
>>>
>>> The statistics are:
>>> - VRAM usage in bytes
>>> - GTT usage in bytes
>>> - number of bytes moved by TTM
>>>
>>> The last one is actually a counter, so you need to sample it before and
>>> after
>>> command submission and take the difference.
>>>
>>> This is useful for finding performance bottlenecks. Userspace queries are
>>> also added.
>>>
>>> Signed-off-by: Marek Olšák <marek.olsak@amd.com>
>>> ---
>>>    drivers/gpu/drm/radeon/radeon.h        |  5 +++++
>>>    drivers/gpu/drm/radeon/radeon_device.c |  1 +
>>>    drivers/gpu/drm/radeon/radeon_kms.c    | 15 ++++++++++++++
>>>    drivers/gpu/drm/radeon/radeon_object.c | 38
>>> +++++++++++++++++++++++++++++++++-
>>>    drivers/gpu/drm/radeon/radeon_object.h |  2 +-
>>>    drivers/gpu/drm/radeon/radeon_ttm.c    | 10 ++++++++-
>>>    include/uapi/drm/radeon_drm.h          |  3 +++
>>>    7 files changed, 71 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/radeon/radeon.h
>>> b/drivers/gpu/drm/radeon/radeon.h
>>> index 3f10782..d37a57a 100644
>>> --- a/drivers/gpu/drm/radeon/radeon.h
>>> +++ b/drivers/gpu/drm/radeon/radeon.h
>>> @@ -2307,6 +2307,11 @@ struct radeon_device {
>>>          /* virtual memory */
>>>          struct radeon_vm_manager        vm_manager;
>>>          struct mutex                    gpu_clock_mutex;
>>> +       /* memory stats */
>>> +       struct mutex                    memory_stats_mutex;
>>> +       uint64_t                        vram_usage;
>>> +       uint64_t                        gtt_usage;
>>> +       uint64_t                        num_bytes_moved;
>>
>> As far as I can see you could make those tree values atomic64_t instead and
>> avoid the mutex.
> I'm afraid I cannot use atomic64_t. It doesn't work on x86 32-bit.
> This seems to be a no-op:
>
> u64 size = (u64)bo->num_pages << PAGE_SHIFT;
> atomic64_add(size, &rdev->num_bytes_moved);

Are you sure about this? Haven't tested x86 32-bit in a long time, but 
we use atomic64 all around the place and they usually work perfectly.

Christian.

> Marek
Marek Olšák Feb. 27, 2014, 12:39 a.m. UTC | #4
Dammit. I renamed the RADEON_INFO definitions for the new queries to
0xd, e, f in the kernel tree, but I forgot to update the Mesa code,
which used 0xc, d, e. Sorry.

Marek

On Wed, Feb 26, 2014 at 7:26 PM, Christian König
<deathsimple@vodafone.de> wrote:
> Am 26.02.2014 18:56, schrieb Marek Olšák:
>
>> On Mon, Feb 24, 2014 at 5:20 PM, Christian König
>> <deathsimple@vodafone.de> wrote:
>>>
>>> Am 24.02.2014 16:20, schrieb Marek Olšák:
>>>
>>>> From: Marek Olšák <marek.olsak@amd.com>
>>>>
>>>> The statistics are:
>>>> - VRAM usage in bytes
>>>> - GTT usage in bytes
>>>> - number of bytes moved by TTM
>>>>
>>>> The last one is actually a counter, so you need to sample it before and
>>>> after
>>>> command submission and take the difference.
>>>>
>>>> This is useful for finding performance bottlenecks. Userspace queries
>>>> are
>>>> also added.
>>>>
>>>> Signed-off-by: Marek Olšák <marek.olsak@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/radeon/radeon.h        |  5 +++++
>>>>    drivers/gpu/drm/radeon/radeon_device.c |  1 +
>>>>    drivers/gpu/drm/radeon/radeon_kms.c    | 15 ++++++++++++++
>>>>    drivers/gpu/drm/radeon/radeon_object.c | 38
>>>> +++++++++++++++++++++++++++++++++-
>>>>    drivers/gpu/drm/radeon/radeon_object.h |  2 +-
>>>>    drivers/gpu/drm/radeon/radeon_ttm.c    | 10 ++++++++-
>>>>    include/uapi/drm/radeon_drm.h          |  3 +++
>>>>    7 files changed, 71 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/radeon/radeon.h
>>>> b/drivers/gpu/drm/radeon/radeon.h
>>>> index 3f10782..d37a57a 100644
>>>> --- a/drivers/gpu/drm/radeon/radeon.h
>>>> +++ b/drivers/gpu/drm/radeon/radeon.h
>>>> @@ -2307,6 +2307,11 @@ struct radeon_device {
>>>>          /* virtual memory */
>>>>          struct radeon_vm_manager        vm_manager;
>>>>          struct mutex                    gpu_clock_mutex;
>>>> +       /* memory stats */
>>>> +       struct mutex                    memory_stats_mutex;
>>>> +       uint64_t                        vram_usage;
>>>> +       uint64_t                        gtt_usage;
>>>> +       uint64_t                        num_bytes_moved;
>>>
>>>
>>> As far as I can see you could make those tree values atomic64_t instead
>>> and
>>> avoid the mutex.
>>
>> I'm afraid I cannot use atomic64_t. It doesn't work on x86 32-bit.
>> This seems to be a no-op:
>>
>> u64 size = (u64)bo->num_pages << PAGE_SHIFT;
>> atomic64_add(size, &rdev->num_bytes_moved);
>
>
> Are you sure about this? Haven't tested x86 32-bit in a long time, but we
> use atomic64 all around the place and they usually work perfectly.
>
> Christian.
>
>> Marek
>
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 3f10782..d37a57a 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2307,6 +2307,11 @@  struct radeon_device {
 	/* virtual memory */
 	struct radeon_vm_manager	vm_manager;
 	struct mutex			gpu_clock_mutex;
+	/* memory stats */
+	struct mutex			memory_stats_mutex;
+	uint64_t			vram_usage;
+	uint64_t			gtt_usage;
+	uint64_t			num_bytes_moved;
 	/* ACPI interface */
 	struct radeon_atif		atif;
 	struct radeon_atcs		atcs;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index b012cbb..6564af7 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1184,6 +1184,7 @@  int radeon_device_init(struct radeon_device *rdev,
 	mutex_init(&rdev->gem.mutex);
 	mutex_init(&rdev->pm.mutex);
 	mutex_init(&rdev->gpu_clock_mutex);
+	mutex_init(&rdev->memory_stats_mutex);
 	mutex_init(&rdev->srbm_mutex);
 	init_rwsem(&rdev->pm.mclk_lock);
 	init_rwsem(&rdev->exclusive_lock);
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 0b631eb..ddc8c74 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -486,6 +486,21 @@  static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 	case RADEON_INFO_VCE_FB_VERSION:
 		*value = rdev->vce.fb_version;
 		break;
+	case RADEON_INFO_NUM_BYTES_MOVED:
+		value = (uint32_t*)&value64;
+		value_size = sizeof(uint64_t);
+		value64 = rdev->num_bytes_moved;
+		break;
+	case RADEON_INFO_VRAM_USAGE:
+		value = (uint32_t*)&value64;
+		value_size = sizeof(uint64_t);
+		value64 = rdev->vram_usage;
+		break;
+	case RADEON_INFO_GTT_USAGE:
+		value = (uint32_t*)&value64;
+		value_size = sizeof(uint64_t);
+		value64 = rdev->gtt_usage;
+		break;
 	default:
 		DRM_DEBUG_KMS("Invalid request %d\n", info->request);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index dd12bb4..d676ee2 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -56,11 +56,38 @@  static void radeon_bo_clear_va(struct radeon_bo *bo)
 	}
 }
 
+static void radeon_update_memory_usage(struct radeon_bo *bo,
+				       unsigned mem_type, int sign)
+{
+	struct radeon_device *rdev = bo->rdev;
+	u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT;
+
+	mutex_lock(&rdev->memory_stats_mutex);
+	switch (mem_type) {
+	case TTM_PL_TT:
+		if (sign > 0)
+			rdev->gtt_usage += size;
+		else
+			rdev->gtt_usage -= size;
+		break;
+	case TTM_PL_VRAM:
+		if (sign > 0)
+			rdev->vram_usage += size;
+		else
+			rdev->vram_usage -= size;
+		break;
+	}
+	mutex_unlock(&rdev->memory_stats_mutex);
+}
+
 static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 {
 	struct radeon_bo *bo;
 
 	bo = container_of(tbo, struct radeon_bo, tbo);
+
+	radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1);
+
 	mutex_lock(&bo->rdev->gem.mutex);
 	list_del_init(&bo->list);
 	mutex_unlock(&bo->rdev->gem.mutex);
@@ -567,14 +594,23 @@  int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
 }
 
 void radeon_bo_move_notify(struct ttm_buffer_object *bo,
-			   struct ttm_mem_reg *mem)
+			   struct ttm_mem_reg *new_mem)
 {
 	struct radeon_bo *rbo;
+
 	if (!radeon_ttm_bo_is_radeon_bo(bo))
 		return;
+
 	rbo = container_of(bo, struct radeon_bo, tbo);
 	radeon_bo_check_tiling(rbo, 0, 1);
 	radeon_vm_bo_invalidate(rbo->rdev, rbo);
+
+	/* update statistics */
+	if (!new_mem)
+		return;
+
+	radeon_update_memory_usage(rbo, bo->mem.mem_type, -1);
+	radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
 }
 
 int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 209b111..a9a8c11 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -151,7 +151,7 @@  extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
 extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
 				bool force_drop);
 extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
-					struct ttm_mem_reg *mem);
+				  struct ttm_mem_reg *new_mem);
 extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
 
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 77f5b0c..7e2e833 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -406,8 +406,16 @@  static int radeon_bo_move(struct ttm_buffer_object *bo,
 	if (r) {
 memcpy:
 		r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
+		if (r) {
+			return r;
+		}
 	}
-	return r;
+
+	/* update statistics */
+	mutex_lock(&rdev->memory_stats_mutex);
+	rdev->num_bytes_moved += (u64)bo->num_pages << PAGE_SHIFT;
+	mutex_unlock(&rdev->memory_stats_mutex);
+	return 0;
 }
 
 static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index cb5c93a..aefa2f6 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -1004,6 +1004,9 @@  struct drm_radeon_cs {
 #define RADEON_INFO_VCE_FW_VERSION	0x1b
 /* version of VCE feedback */
 #define RADEON_INFO_VCE_FB_VERSION	0x1c
+#define RADEON_INFO_NUM_BYTES_MOVED	0x1d
+#define RADEON_INFO_VRAM_USAGE		0x1e
+#define RADEON_INFO_GTT_USAGE		0x1f
 
 
 struct drm_radeon_info {