From: Christian König christian.koenig@amd.com
This allows us to have small BOs on the LRU before big ones.
v2: fix of by one and list corruption bug
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 11 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 61 +++++++++++++++++++++++++++++++-- 2 files changed, 70 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c4a21c6..7b90323 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -391,6 +391,14 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); /* * TTM. */ + +#define AMDGPU_TTM_LRU_SIZE 20 + +struct amdgpu_mman_lru { + struct list_head *lru[TTM_NUM_MEM_TYPES]; + struct list_head *swap_lru; +}; + struct amdgpu_mman { struct ttm_bo_global_ref bo_global_ref; struct drm_global_reference mem_global_ref; @@ -408,6 +416,9 @@ struct amdgpu_mman { struct amdgpu_ring *buffer_funcs_ring; /* Scheduler entity for buffer moves */ struct amd_sched_entity entity; + + /* custom LRU management */ + struct amdgpu_mman_lru log2_size[AMDGPU_TTM_LRU_SIZE]; };
int amdgpu_copy_buffer(struct amdgpu_ring *ring, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index fefaa9b..27f3f47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -910,6 +910,52 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, return flags; }
+static void amdgpu_ttm_lru_removal(struct ttm_buffer_object *tbo) +{ + struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev); + unsigned i, j; + + for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) { + struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i]; + + for (j = 0; j < TTM_NUM_MEM_TYPES; ++j) + if (&tbo->lru == lru->lru[j]) + lru->lru[j] = tbo->lru.prev; + + if (&tbo->swap == lru->swap_lru) + lru->swap_lru = tbo->swap.prev; + } +} + +static struct amdgpu_mman_lru *amdgpu_ttm_lru(struct ttm_buffer_object *tbo) +{ + struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev); + unsigned log2_size = min(ilog2(tbo->num_pages), + AMDGPU_TTM_LRU_SIZE - 1); + + return &adev->mman.log2_size[log2_size]; +} + +static struct list_head *amdgpu_ttm_lru_tail(struct ttm_buffer_object *tbo) +{ + struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo); + struct list_head *res = lru->lru[tbo->mem.mem_type]; + + lru->lru[tbo->mem.mem_type] = &tbo->lru; + + return res; +} + +static struct list_head *amdgpu_ttm_swap_lru_tail(struct ttm_buffer_object *tbo) +{ + struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo); + struct list_head *res = lru->swap_lru; + + lru->swap_lru = &tbo->swap; + + return res; +} + static struct ttm_bo_driver amdgpu_bo_driver = { .ttm_tt_create = &amdgpu_ttm_tt_create, .ttm_tt_populate = &amdgpu_ttm_tt_populate, @@ -923,12 +969,14 @@ static struct ttm_bo_driver amdgpu_bo_driver = { .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify, .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, .io_mem_free = &amdgpu_ttm_io_mem_free, - .lru_tail = &ttm_bo_default_lru_tail, - .swap_lru_tail = &ttm_bo_default_swap_lru_tail, + .lru_removal = &amdgpu_ttm_lru_removal, + .lru_tail = &amdgpu_ttm_lru_tail, + .swap_lru_tail = &amdgpu_ttm_swap_lru_tail, };
int amdgpu_ttm_init(struct amdgpu_device *adev) { + unsigned i, j; int r;
r = amdgpu_ttm_global_init(adev); @@ -946,6 +994,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_ERROR("failed initializing buffer object driver(%d).\n", r); return r; } + + for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) { + struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i]; + + for (j = 0; j < TTM_NUM_MEM_TYPES; ++j) + lru->lru[j] = &adev->mman.bdev.man[j].lru; + lru->swap_lru = &adev->mman.bdev.glob->swap_lru; + } + adev->mman.initialized = true; r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM, adev->mc.real_vram_size >> PAGE_SHIFT);
From: Christian König christian.koenig@amd.com
Not needed any more.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 23 ----------------------- 1 file changed, 23 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9392e50..00cf74a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -24,7 +24,6 @@ * Authors: * Jerome Glisse glisse@freedesktop.org */ -#include <linux/list_sort.h> #include <linux/pagemap.h> #include <drm/drmP.h> #include <drm/amdgpu_drm.h> @@ -527,16 +526,6 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) return 0; }
-static int cmp_size_smaller_first(void *priv, struct list_head *a, - struct list_head *b) -{ - struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head); - struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head); - - /* Sort A before B if A is smaller. */ - return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; -} - /** * cs_parser_fini() - clean parser states * @parser: parser structure holding parsing context. @@ -553,18 +542,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo if (!error) { amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
- /* Sort the buffer list from the smallest to largest buffer, - * which affects the order of buffers in the LRU list. - * This assures that the smallest buffers are added first - * to the LRU list, so they are likely to be later evicted - * first, instead of large buffers whose eviction is more - * expensive. - * - * This slightly lowers the number of bytes moved by TTM - * per frame under memory pressure. - */ - list_sort(NULL, &parser->validated, cmp_size_smaller_first); - ttm_eu_fence_buffer_objects(&parser->ticket, &parser->validated, parser->fence);
On Fri, Apr 15, 2016 at 11:19 AM, Christian König deathsimple@vodafone.de wrote:
From: Christian König christian.koenig@amd.com
Not needed any more.
Applied the series.
Alex
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Alex Deucher alexander.deucher@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 23 ----------------------- 1 file changed, 23 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9392e50..00cf74a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -24,7 +24,6 @@
- Authors:
- Jerome Glisse glisse@freedesktop.org
*/ -#include <linux/list_sort.h> #include <linux/pagemap.h> #include <drm/drmP.h> #include <drm/amdgpu_drm.h> @@ -527,16 +526,6 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) return 0; }
-static int cmp_size_smaller_first(void *priv, struct list_head *a,
struct list_head *b)
-{
struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head);
struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head);
/* Sort A before B if A is smaller. */
return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
-}
/**
- cs_parser_fini() - clean parser states
- @parser: parser structure holding parsing context.
@@ -553,18 +542,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo if (!error) { amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
/* Sort the buffer list from the smallest to largest buffer,
* which affects the order of buffers in the LRU list.
* This assures that the smallest buffers are added first
* to the LRU list, so they are likely to be later evicted
* first, instead of large buffers whose eviction is more
* expensive.
*
* This slightly lowers the number of bytes moved by TTM
* per frame under memory pressure.
*/
list_sort(NULL, &parser->validated, cmp_size_smaller_first);
ttm_eu_fence_buffer_objects(&parser->ticket, &parser->validated, parser->fence);
-- 2.5.0
dri-devel@lists.freedesktop.org