Am 24.02.2014 16:20, schrieb Marek Olšák:
From: Marek Olšák marek.olsak@amd.com
The statistics are:
- VRAM usage in bytes
- GTT usage in bytes
- number of bytes moved by TTM
The last one is actually a counter, so you need to sample it before and after command submission and take the difference.
This is useful for finding performance bottlenecks. Userspace queries are also added.
Signed-off-by: Marek Olšák marek.olsak@amd.com
drivers/gpu/drm/radeon/radeon.h | 5 +++++ drivers/gpu/drm/radeon/radeon_device.c | 1 + drivers/gpu/drm/radeon/radeon_kms.c | 15 ++++++++++++++ drivers/gpu/drm/radeon/radeon_object.c | 38 +++++++++++++++++++++++++++++++++- drivers/gpu/drm/radeon/radeon_object.h | 2 +- drivers/gpu/drm/radeon/radeon_ttm.c | 10 ++++++++- include/uapi/drm/radeon_drm.h | 3 +++ 7 files changed, 71 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 3f10782..d37a57a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2307,6 +2307,11 @@ struct radeon_device { /* virtual memory */ struct radeon_vm_manager vm_manager; struct mutex gpu_clock_mutex;
- /* memory stats */
- struct mutex memory_stats_mutex;
- uint64_t vram_usage;
- uint64_t gtt_usage;
- uint64_t num_bytes_moved;
As far as I can see you could make those tree values atomic64_t instead and avoid the mutex.
/* ACPI interface */ struct radeon_atif atif; struct radeon_atcs atcs; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index b012cbb..6564af7 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1184,6 +1184,7 @@ int radeon_device_init(struct radeon_device *rdev, mutex_init(&rdev->gem.mutex); mutex_init(&rdev->pm.mutex); mutex_init(&rdev->gpu_clock_mutex);
- mutex_init(&rdev->memory_stats_mutex); mutex_init(&rdev->srbm_mutex); init_rwsem(&rdev->pm.mclk_lock); init_rwsem(&rdev->exclusive_lock);
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 0b631eb..ddc8c74 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -486,6 +486,21 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file case RADEON_INFO_VCE_FB_VERSION: *value = rdev->vce.fb_version; break;
- case RADEON_INFO_NUM_BYTES_MOVED:
value = (uint32_t*)&value64;
value_size = sizeof(uint64_t);
value64 = rdev->num_bytes_moved;
break;
- case RADEON_INFO_VRAM_USAGE:
value = (uint32_t*)&value64;
value_size = sizeof(uint64_t);
value64 = rdev->vram_usage;
break;
- case RADEON_INFO_GTT_USAGE:
value = (uint32_t*)&value64;
value_size = sizeof(uint64_t);
value64 = rdev->gtt_usage;
default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL;break;
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index dd12bb4..d676ee2 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -56,11 +56,38 @@ static void radeon_bo_clear_va(struct radeon_bo *bo) } }
+static void radeon_update_memory_usage(struct radeon_bo *bo,
unsigned mem_type, int sign)
+{
- struct radeon_device *rdev = bo->rdev;
- u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT;
- mutex_lock(&rdev->memory_stats_mutex);
- switch (mem_type) {
- case TTM_PL_TT:
if (sign > 0)
rdev->gtt_usage += size;
else
rdev->gtt_usage -= size;
break;
- case TTM_PL_VRAM:
if (sign > 0)
rdev->vram_usage += size;
else
rdev->vram_usage -= size;
break;
- }
- mutex_unlock(&rdev->memory_stats_mutex);
+}
static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) { struct radeon_bo *bo;
bo = container_of(tbo, struct radeon_bo, tbo);
radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1);
mutex_lock(&bo->rdev->gem.mutex); list_del_init(&bo->list); mutex_unlock(&bo->rdev->gem.mutex);
@@ -567,14 +594,23 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, }
void radeon_bo_move_notify(struct ttm_buffer_object *bo,
struct ttm_mem_reg *mem)
struct ttm_mem_reg *new_mem)
{ struct radeon_bo *rbo;
if (!radeon_ttm_bo_is_radeon_bo(bo)) return;
rbo = container_of(bo, struct radeon_bo, tbo); radeon_bo_check_tiling(rbo, 0, 1); radeon_vm_bo_invalidate(rbo->rdev, rbo);
/* update statistics */
if (!new_mem)
return;
radeon_update_memory_usage(rbo, bo->mem.mem_type, -1);
radeon_update_memory_usage(rbo, new_mem->mem_type, 1); }
int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 209b111..a9a8c11 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -151,7 +151,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo, extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, bool force_drop); extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
struct ttm_mem_reg *mem);
extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo); extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);struct ttm_mem_reg *new_mem);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 77f5b0c..7e2e833 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -406,8 +406,16 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, if (r) { memcpy: r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
if (r) {
return r;
}}
- return r;
/* update statistics */
mutex_lock(&rdev->memory_stats_mutex);
rdev->num_bytes_moved += (u64)bo->num_pages << PAGE_SHIFT;
mutex_unlock(&rdev->memory_stats_mutex);
return 0; }
static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index cb5c93a..aefa2f6 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -1004,6 +1004,9 @@ struct drm_radeon_cs { #define RADEON_INFO_VCE_FW_VERSION 0x1b /* version of VCE feedback */ #define RADEON_INFO_VCE_FB_VERSION 0x1c +#define RADEON_INFO_NUM_BYTES_MOVED 0x1d +#define RADEON_INFO_VRAM_USAGE 0x1e +#define RADEON_INFO_GTT_USAGE 0x1f
struct drm_radeon_info {