Message ID | 1393255246-8296-3-git-send-email-maraeo@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Am 24.02.2014 16:20, schrieb Marek Olšák: > From: Marek Olšák <marek.olsak@amd.com> > > The statistics are: > - VRAM usage in bytes > - GTT usage in bytes > - number of bytes moved by TTM > > The last one is actually a counter, so you need to sample it before and after > command submission and take the difference. > > This is useful for finding performance bottlenecks. Userspace queries are > also added. > > Signed-off-by: Marek Olšák <marek.olsak@amd.com> > --- > drivers/gpu/drm/radeon/radeon.h | 5 +++++ > drivers/gpu/drm/radeon/radeon_device.c | 1 + > drivers/gpu/drm/radeon/radeon_kms.c | 15 ++++++++++++++ > drivers/gpu/drm/radeon/radeon_object.c | 38 +++++++++++++++++++++++++++++++++- > drivers/gpu/drm/radeon/radeon_object.h | 2 +- > drivers/gpu/drm/radeon/radeon_ttm.c | 10 ++++++++- > include/uapi/drm/radeon_drm.h | 3 +++ > 7 files changed, 71 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index 3f10782..d37a57a 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -2307,6 +2307,11 @@ struct radeon_device { > /* virtual memory */ > struct radeon_vm_manager vm_manager; > struct mutex gpu_clock_mutex; > + /* memory stats */ > + struct mutex memory_stats_mutex; > + uint64_t vram_usage; > + uint64_t gtt_usage; > + uint64_t num_bytes_moved; As far as I can see you could make those tree values atomic64_t instead and avoid the mutex. > /* ACPI interface */ > struct radeon_atif atif; > struct radeon_atcs atcs; > diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c > index b012cbb..6564af7 100644 > --- a/drivers/gpu/drm/radeon/radeon_device.c > +++ b/drivers/gpu/drm/radeon/radeon_device.c > @@ -1184,6 +1184,7 @@ int radeon_device_init(struct radeon_device *rdev, > mutex_init(&rdev->gem.mutex); > mutex_init(&rdev->pm.mutex); > mutex_init(&rdev->gpu_clock_mutex); > + mutex_init(&rdev->memory_stats_mutex); > mutex_init(&rdev->srbm_mutex); > init_rwsem(&rdev->pm.mclk_lock); > init_rwsem(&rdev->exclusive_lock); > diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c > index 0b631eb..ddc8c74 100644 > --- a/drivers/gpu/drm/radeon/radeon_kms.c > +++ b/drivers/gpu/drm/radeon/radeon_kms.c > @@ -486,6 +486,21 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file > case RADEON_INFO_VCE_FB_VERSION: > *value = rdev->vce.fb_version; > break; > + case RADEON_INFO_NUM_BYTES_MOVED: > + value = (uint32_t*)&value64; > + value_size = sizeof(uint64_t); > + value64 = rdev->num_bytes_moved; > + break; > + case RADEON_INFO_VRAM_USAGE: > + value = (uint32_t*)&value64; > + value_size = sizeof(uint64_t); > + value64 = rdev->vram_usage; > + break; > + case RADEON_INFO_GTT_USAGE: > + value = (uint32_t*)&value64; > + value_size = sizeof(uint64_t); > + value64 = rdev->gtt_usage; > + break; > default: > DRM_DEBUG_KMS("Invalid request %d\n", info->request); > return -EINVAL; > diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c > index dd12bb4..d676ee2 100644 > --- a/drivers/gpu/drm/radeon/radeon_object.c > +++ b/drivers/gpu/drm/radeon/radeon_object.c > @@ -56,11 +56,38 @@ static void radeon_bo_clear_va(struct radeon_bo *bo) > } > } > > +static void radeon_update_memory_usage(struct radeon_bo *bo, > + unsigned mem_type, int sign) > +{ > + struct radeon_device *rdev = bo->rdev; > + u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT; > + > + mutex_lock(&rdev->memory_stats_mutex); > + switch (mem_type) { > + case TTM_PL_TT: > + if (sign > 0) > + rdev->gtt_usage += size; > + else > + rdev->gtt_usage -= size; > + break; > + case TTM_PL_VRAM: > + if (sign > 0) > + rdev->vram_usage += size; > + else > + rdev->vram_usage -= size; > + break; > + } > + mutex_unlock(&rdev->memory_stats_mutex); > +} > + > static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) > { > struct radeon_bo *bo; > > bo = container_of(tbo, struct radeon_bo, tbo); > + > + radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1); > + > mutex_lock(&bo->rdev->gem.mutex); > list_del_init(&bo->list); > mutex_unlock(&bo->rdev->gem.mutex); > @@ -567,14 +594,23 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, > } > > void radeon_bo_move_notify(struct ttm_buffer_object *bo, > - struct ttm_mem_reg *mem) > + struct ttm_mem_reg *new_mem) > { > struct radeon_bo *rbo; > + > if (!radeon_ttm_bo_is_radeon_bo(bo)) > return; > + > rbo = container_of(bo, struct radeon_bo, tbo); > radeon_bo_check_tiling(rbo, 0, 1); > radeon_vm_bo_invalidate(rbo->rdev, rbo); > + > + /* update statistics */ > + if (!new_mem) > + return; > + > + radeon_update_memory_usage(rbo, bo->mem.mem_type, -1); > + radeon_update_memory_usage(rbo, new_mem->mem_type, 1); > } > > int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) > diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h > index 209b111..a9a8c11 100644 > --- a/drivers/gpu/drm/radeon/radeon_object.h > +++ b/drivers/gpu/drm/radeon/radeon_object.h > @@ -151,7 +151,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo, > extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, > bool force_drop); > extern void radeon_bo_move_notify(struct ttm_buffer_object *bo, > - struct ttm_mem_reg *mem); > + struct ttm_mem_reg *new_mem); > extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo); > extern int radeon_bo_get_surface_reg(struct radeon_bo *bo); > > diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c > index 77f5b0c..7e2e833 100644 > --- a/drivers/gpu/drm/radeon/radeon_ttm.c > +++ b/drivers/gpu/drm/radeon/radeon_ttm.c > @@ -406,8 +406,16 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, > if (r) { > memcpy: > r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem); > + if (r) { > + return r; > + } > } > - return r; > + > + /* update statistics */ > + mutex_lock(&rdev->memory_stats_mutex); > + rdev->num_bytes_moved += (u64)bo->num_pages << PAGE_SHIFT; > + mutex_unlock(&rdev->memory_stats_mutex); > + return 0; > } > > static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) > diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h > index cb5c93a..aefa2f6 100644 > --- a/include/uapi/drm/radeon_drm.h > +++ b/include/uapi/drm/radeon_drm.h > @@ -1004,6 +1004,9 @@ struct drm_radeon_cs { > #define RADEON_INFO_VCE_FW_VERSION 0x1b > /* version of VCE feedback */ > #define RADEON_INFO_VCE_FB_VERSION 0x1c > +#define RADEON_INFO_NUM_BYTES_MOVED 0x1d > +#define RADEON_INFO_VRAM_USAGE 0x1e > +#define RADEON_INFO_GTT_USAGE 0x1f > > > struct drm_radeon_info {
On Mon, Feb 24, 2014 at 5:20 PM, Christian König <deathsimple@vodafone.de> wrote: > Am 24.02.2014 16:20, schrieb Marek Olšák: > >> From: Marek Olšák <marek.olsak@amd.com> >> >> The statistics are: >> - VRAM usage in bytes >> - GTT usage in bytes >> - number of bytes moved by TTM >> >> The last one is actually a counter, so you need to sample it before and >> after >> command submission and take the difference. >> >> This is useful for finding performance bottlenecks. Userspace queries are >> also added. >> >> Signed-off-by: Marek Olšák <marek.olsak@amd.com> >> --- >> drivers/gpu/drm/radeon/radeon.h | 5 +++++ >> drivers/gpu/drm/radeon/radeon_device.c | 1 + >> drivers/gpu/drm/radeon/radeon_kms.c | 15 ++++++++++++++ >> drivers/gpu/drm/radeon/radeon_object.c | 38 >> +++++++++++++++++++++++++++++++++- >> drivers/gpu/drm/radeon/radeon_object.h | 2 +- >> drivers/gpu/drm/radeon/radeon_ttm.c | 10 ++++++++- >> include/uapi/drm/radeon_drm.h | 3 +++ >> 7 files changed, 71 insertions(+), 3 deletions(-) >> >> diff --git a/drivers/gpu/drm/radeon/radeon.h >> b/drivers/gpu/drm/radeon/radeon.h >> index 3f10782..d37a57a 100644 >> --- a/drivers/gpu/drm/radeon/radeon.h >> +++ b/drivers/gpu/drm/radeon/radeon.h >> @@ -2307,6 +2307,11 @@ struct radeon_device { >> /* virtual memory */ >> struct radeon_vm_manager vm_manager; >> struct mutex gpu_clock_mutex; >> + /* memory stats */ >> + struct mutex memory_stats_mutex; >> + uint64_t vram_usage; >> + uint64_t gtt_usage; >> + uint64_t num_bytes_moved; > > > As far as I can see you could make those tree values atomic64_t instead and > avoid the mutex. I'm afraid I cannot use atomic64_t. It doesn't work on x86 32-bit. This seems to be a no-op: u64 size = (u64)bo->num_pages << PAGE_SHIFT; atomic64_add(size, &rdev->num_bytes_moved); Marek
Am 26.02.2014 18:56, schrieb Marek Olšák: > On Mon, Feb 24, 2014 at 5:20 PM, Christian König > <deathsimple@vodafone.de> wrote: >> Am 24.02.2014 16:20, schrieb Marek Olšák: >> >>> From: Marek Olšák <marek.olsak@amd.com> >>> >>> The statistics are: >>> - VRAM usage in bytes >>> - GTT usage in bytes >>> - number of bytes moved by TTM >>> >>> The last one is actually a counter, so you need to sample it before and >>> after >>> command submission and take the difference. >>> >>> This is useful for finding performance bottlenecks. Userspace queries are >>> also added. >>> >>> Signed-off-by: Marek Olšák <marek.olsak@amd.com> >>> --- >>> drivers/gpu/drm/radeon/radeon.h | 5 +++++ >>> drivers/gpu/drm/radeon/radeon_device.c | 1 + >>> drivers/gpu/drm/radeon/radeon_kms.c | 15 ++++++++++++++ >>> drivers/gpu/drm/radeon/radeon_object.c | 38 >>> +++++++++++++++++++++++++++++++++- >>> drivers/gpu/drm/radeon/radeon_object.h | 2 +- >>> drivers/gpu/drm/radeon/radeon_ttm.c | 10 ++++++++- >>> include/uapi/drm/radeon_drm.h | 3 +++ >>> 7 files changed, 71 insertions(+), 3 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/radeon/radeon.h >>> b/drivers/gpu/drm/radeon/radeon.h >>> index 3f10782..d37a57a 100644 >>> --- a/drivers/gpu/drm/radeon/radeon.h >>> +++ b/drivers/gpu/drm/radeon/radeon.h >>> @@ -2307,6 +2307,11 @@ struct radeon_device { >>> /* virtual memory */ >>> struct radeon_vm_manager vm_manager; >>> struct mutex gpu_clock_mutex; >>> + /* memory stats */ >>> + struct mutex memory_stats_mutex; >>> + uint64_t vram_usage; >>> + uint64_t gtt_usage; >>> + uint64_t num_bytes_moved; >> >> As far as I can see you could make those tree values atomic64_t instead and >> avoid the mutex. > I'm afraid I cannot use atomic64_t. It doesn't work on x86 32-bit. > This seems to be a no-op: > > u64 size = (u64)bo->num_pages << PAGE_SHIFT; > atomic64_add(size, &rdev->num_bytes_moved); Are you sure about this? Haven't tested x86 32-bit in a long time, but we use atomic64 all around the place and they usually work perfectly. Christian. > Marek
Dammit. I renamed the RADEON_INFO definitions for the new queries to 0xd, e, f in the kernel tree, but I forgot to update the Mesa code, which used 0xc, d, e. Sorry. Marek On Wed, Feb 26, 2014 at 7:26 PM, Christian König <deathsimple@vodafone.de> wrote: > Am 26.02.2014 18:56, schrieb Marek Olšák: > >> On Mon, Feb 24, 2014 at 5:20 PM, Christian König >> <deathsimple@vodafone.de> wrote: >>> >>> Am 24.02.2014 16:20, schrieb Marek Olšák: >>> >>>> From: Marek Olšák <marek.olsak@amd.com> >>>> >>>> The statistics are: >>>> - VRAM usage in bytes >>>> - GTT usage in bytes >>>> - number of bytes moved by TTM >>>> >>>> The last one is actually a counter, so you need to sample it before and >>>> after >>>> command submission and take the difference. >>>> >>>> This is useful for finding performance bottlenecks. Userspace queries >>>> are >>>> also added. >>>> >>>> Signed-off-by: Marek Olšák <marek.olsak@amd.com> >>>> --- >>>> drivers/gpu/drm/radeon/radeon.h | 5 +++++ >>>> drivers/gpu/drm/radeon/radeon_device.c | 1 + >>>> drivers/gpu/drm/radeon/radeon_kms.c | 15 ++++++++++++++ >>>> drivers/gpu/drm/radeon/radeon_object.c | 38 >>>> +++++++++++++++++++++++++++++++++- >>>> drivers/gpu/drm/radeon/radeon_object.h | 2 +- >>>> drivers/gpu/drm/radeon/radeon_ttm.c | 10 ++++++++- >>>> include/uapi/drm/radeon_drm.h | 3 +++ >>>> 7 files changed, 71 insertions(+), 3 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/radeon/radeon.h >>>> b/drivers/gpu/drm/radeon/radeon.h >>>> index 3f10782..d37a57a 100644 >>>> --- a/drivers/gpu/drm/radeon/radeon.h >>>> +++ b/drivers/gpu/drm/radeon/radeon.h >>>> @@ -2307,6 +2307,11 @@ struct radeon_device { >>>> /* virtual memory */ >>>> struct radeon_vm_manager vm_manager; >>>> struct mutex gpu_clock_mutex; >>>> + /* memory stats */ >>>> + struct mutex memory_stats_mutex; >>>> + uint64_t vram_usage; >>>> + uint64_t gtt_usage; >>>> + uint64_t num_bytes_moved; >>> >>> >>> As far as I can see you could make those tree values atomic64_t instead >>> and >>> avoid the mutex. >> >> I'm afraid I cannot use atomic64_t. It doesn't work on x86 32-bit. >> This seems to be a no-op: >> >> u64 size = (u64)bo->num_pages << PAGE_SHIFT; >> atomic64_add(size, &rdev->num_bytes_moved); > > > Are you sure about this? Haven't tested x86 32-bit in a long time, but we > use atomic64 all around the place and they usually work perfectly. > > Christian. > >> Marek > >
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 3f10782..d37a57a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2307,6 +2307,11 @@ struct radeon_device { /* virtual memory */ struct radeon_vm_manager vm_manager; struct mutex gpu_clock_mutex; + /* memory stats */ + struct mutex memory_stats_mutex; + uint64_t vram_usage; + uint64_t gtt_usage; + uint64_t num_bytes_moved; /* ACPI interface */ struct radeon_atif atif; struct radeon_atcs atcs; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index b012cbb..6564af7 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1184,6 +1184,7 @@ int radeon_device_init(struct radeon_device *rdev, mutex_init(&rdev->gem.mutex); mutex_init(&rdev->pm.mutex); mutex_init(&rdev->gpu_clock_mutex); + mutex_init(&rdev->memory_stats_mutex); mutex_init(&rdev->srbm_mutex); init_rwsem(&rdev->pm.mclk_lock); init_rwsem(&rdev->exclusive_lock); diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 0b631eb..ddc8c74 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -486,6 +486,21 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file case RADEON_INFO_VCE_FB_VERSION: *value = rdev->vce.fb_version; break; + case RADEON_INFO_NUM_BYTES_MOVED: + value = (uint32_t*)&value64; + value_size = sizeof(uint64_t); + value64 = rdev->num_bytes_moved; + break; + case RADEON_INFO_VRAM_USAGE: + value = (uint32_t*)&value64; + value_size = sizeof(uint64_t); + value64 = rdev->vram_usage; + break; + case RADEON_INFO_GTT_USAGE: + value = (uint32_t*)&value64; + value_size = sizeof(uint64_t); + value64 = rdev->gtt_usage; + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index dd12bb4..d676ee2 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -56,11 +56,38 @@ static void radeon_bo_clear_va(struct radeon_bo *bo) } } +static void radeon_update_memory_usage(struct radeon_bo *bo, + unsigned mem_type, int sign) +{ + struct radeon_device *rdev = bo->rdev; + u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT; + + mutex_lock(&rdev->memory_stats_mutex); + switch (mem_type) { + case TTM_PL_TT: + if (sign > 0) + rdev->gtt_usage += size; + else + rdev->gtt_usage -= size; + break; + case TTM_PL_VRAM: + if (sign > 0) + rdev->vram_usage += size; + else + rdev->vram_usage -= size; + break; + } + mutex_unlock(&rdev->memory_stats_mutex); +} + static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) { struct radeon_bo *bo; bo = container_of(tbo, struct radeon_bo, tbo); + + radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1); + mutex_lock(&bo->rdev->gem.mutex); list_del_init(&bo->list); mutex_unlock(&bo->rdev->gem.mutex); @@ -567,14 +594,23 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, } void radeon_bo_move_notify(struct ttm_buffer_object *bo, - struct ttm_mem_reg *mem) + struct ttm_mem_reg *new_mem) { struct radeon_bo *rbo; + if (!radeon_ttm_bo_is_radeon_bo(bo)) return; + rbo = container_of(bo, struct radeon_bo, tbo); radeon_bo_check_tiling(rbo, 0, 1); radeon_vm_bo_invalidate(rbo->rdev, rbo); + + /* update statistics */ + if (!new_mem) + return; + + radeon_update_memory_usage(rbo, bo->mem.mem_type, -1); + radeon_update_memory_usage(rbo, new_mem->mem_type, 1); } int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 209b111..a9a8c11 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -151,7 +151,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo, extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, bool force_drop); extern void radeon_bo_move_notify(struct ttm_buffer_object *bo, - struct ttm_mem_reg *mem); + struct ttm_mem_reg *new_mem); extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo); extern int radeon_bo_get_surface_reg(struct radeon_bo *bo); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 77f5b0c..7e2e833 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -406,8 +406,16 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, if (r) { memcpy: r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem); + if (r) { + return r; + } } - return r; + + /* update statistics */ + mutex_lock(&rdev->memory_stats_mutex); + rdev->num_bytes_moved += (u64)bo->num_pages << PAGE_SHIFT; + mutex_unlock(&rdev->memory_stats_mutex); + return 0; } static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index cb5c93a..aefa2f6 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -1004,6 +1004,9 @@ struct drm_radeon_cs { #define RADEON_INFO_VCE_FW_VERSION 0x1b /* version of VCE feedback */ #define RADEON_INFO_VCE_FB_VERSION 0x1c +#define RADEON_INFO_NUM_BYTES_MOVED 0x1d +#define RADEON_INFO_VRAM_USAGE 0x1e +#define RADEON_INFO_GTT_USAGE 0x1f struct drm_radeon_info {