Hi everyone,
this set of patches moves waiting for TTM BOs to become idle before moving them into the driver callbacks and the CPU copy fallback function.
Then the amdgpu driver is patched to completely remove this premove idle wait.
Doing this improves swapping buffers back in on command submission. Improving evictions (swapping things out) is the next step.
Please review and/or comment, Christian.
From: Christian König christian.koenig@amd.com
ttm_tt_destroy should be the only one unbinding the object.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 2 -- drivers/gpu/drm/ttm/ttm_bo_util.c | 3 --- drivers/gpu/drm/ttm/ttm_tt.c | 17 +++++------------ include/drm/ttm/ttm_bo_driver.h | 9 --------- 4 files changed, 5 insertions(+), 26 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 39386f5..4216b31 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -397,7 +397,6 @@ moved: out_err: new_man = &bdev->man[bo->mem.mem_type]; if ((new_man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm) { - ttm_tt_unbind(bo->ttm); ttm_tt_destroy(bo->ttm); bo->ttm = NULL; } @@ -419,7 +418,6 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo) bo->bdev->driver->move_notify(bo, NULL);
if (bo->ttm) { - ttm_tt_unbind(bo->ttm); ttm_tt_destroy(bo->ttm); bo->ttm = NULL; } diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index d983155..4194b7e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -53,7 +53,6 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo, int ret;
if (old_mem->mem_type != TTM_PL_SYSTEM) { - ttm_tt_unbind(ttm); ttm_bo_free_old_node(bo); ttm_flag_masked(&old_mem->placement, TTM_PL_FLAG_SYSTEM, TTM_PL_MASK_MEM); @@ -402,7 +401,6 @@ out2: new_mem->mm_node = NULL;
if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) && (ttm != NULL)) { - ttm_tt_unbind(ttm); ttm_tt_destroy(ttm); bo->ttm = NULL; } @@ -651,7 +649,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) && (bo->ttm != NULL)) { - ttm_tt_unbind(bo->ttm); ttm_tt_destroy(bo->ttm); bo->ttm = NULL; } diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 077ae9b..79f6323 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -166,11 +166,15 @@ EXPORT_SYMBOL(ttm_tt_set_placement_caching);
void ttm_tt_destroy(struct ttm_tt *ttm) { + int ret; + if (unlikely(ttm == NULL)) return;
if (ttm->state == tt_bound) { - ttm_tt_unbind(ttm); + ret = ttm->func->unbind(ttm); + BUG_ON(ret); + ttm->state = tt_unbound; }
if (ttm->state == tt_unbound) @@ -251,17 +255,6 @@ void ttm_dma_tt_fini(struct ttm_dma_tt *ttm_dma) } EXPORT_SYMBOL(ttm_dma_tt_fini);
-void ttm_tt_unbind(struct ttm_tt *ttm) -{ - int ret; - - if (ttm->state == tt_bound) { - ret = ttm->func->unbind(ttm); - BUG_ON(ret); - ttm->state = tt_unbound; - } -} - int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) { int ret = 0; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 513f7f9..da6ee17 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -623,15 +623,6 @@ extern int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem); extern void ttm_tt_destroy(struct ttm_tt *ttm);
/** - * ttm_ttm_unbind: - * - * @ttm: The struct ttm_tt. - * - * Unbind a struct ttm_tt. - */ -extern void ttm_tt_unbind(struct ttm_tt *ttm); - -/** * ttm_tt_swapin: * * @ttm: The struct ttm_tt.
From: Christian König christian.koenig@amd.com
The function is a no-op with a NULL pointer.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 11 ++++------- drivers/gpu/drm/ttm/ttm_bo_util.c | 5 ++--- drivers/gpu/drm/ttm/ttm_tt.c | 2 +- 3 files changed, 7 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 4216b31..43a2955 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -147,8 +147,7 @@ static void ttm_bo_release_list(struct kref *list_kref) BUG_ON(!list_empty(&bo->lru)); BUG_ON(!list_empty(&bo->ddestroy));
- if (bo->ttm) - ttm_tt_destroy(bo->ttm); + ttm_tt_destroy(bo->ttm); atomic_dec(&bo->glob->bo_count); if (bo->resv == &bo->ttm_resv) reservation_object_fini(&bo->ttm_resv); @@ -396,7 +395,7 @@ moved:
out_err: new_man = &bdev->man[bo->mem.mem_type]; - if ((new_man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm) { + if (new_man->flags & TTM_MEMTYPE_FLAG_FIXED) { ttm_tt_destroy(bo->ttm); bo->ttm = NULL; } @@ -417,10 +416,8 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo) if (bo->bdev->driver->move_notify) bo->bdev->driver->move_notify(bo, NULL);
- if (bo->ttm) { - ttm_tt_destroy(bo->ttm); - bo->ttm = NULL; - } + ttm_tt_destroy(bo->ttm); + bo->ttm = NULL; ttm_bo_mem_put(bo, &bo->mem);
ww_mutex_unlock (&bo->resv->lock); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 4194b7e..0f4bcb0 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -400,7 +400,7 @@ out2: *old_mem = *new_mem; new_mem->mm_node = NULL;
- if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) && (ttm != NULL)) { + if (man->flags & TTM_MEMTYPE_FLAG_FIXED) { ttm_tt_destroy(ttm); bo->ttm = NULL; } @@ -647,8 +647,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, if (ret) return ret;
- if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) && - (bo->ttm != NULL)) { + if (man->flags & TTM_MEMTYPE_FLAG_FIXED) { ttm_tt_destroy(bo->ttm); bo->ttm = NULL; } diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 79f6323..d28d4333 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -168,7 +168,7 @@ void ttm_tt_destroy(struct ttm_tt *ttm) { int ret;
- if (unlikely(ttm == NULL)) + if (ttm == NULL) return;
if (ttm->state == tt_bound) {
From: Christian König christian.koenig@amd.com
It's pointless to only call the default implementation.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ast/ast_ttm.c | 13 +------------ drivers/gpu/drm/bochs/bochs_mm.c | 11 +---------- drivers/gpu/drm/cirrus/cirrus_ttm.c | 13 +------------ drivers/gpu/drm/mgag200/mgag200_ttm.c | 13 +------------ 4 files changed, 4 insertions(+), 46 deletions(-)
diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c index 59f2f93..b29a412 100644 --- a/drivers/gpu/drm/ast/ast_ttm.c +++ b/drivers/gpu/drm/ast/ast_ttm.c @@ -186,17 +186,6 @@ static void ast_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg * { }
-static int ast_bo_move(struct ttm_buffer_object *bo, - bool evict, bool interruptible, - bool no_wait_gpu, - struct ttm_mem_reg *new_mem) -{ - int r; - r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem); - return r; -} - - static void ast_ttm_backend_destroy(struct ttm_tt *tt) { ttm_tt_fini(tt); @@ -241,7 +230,7 @@ struct ttm_bo_driver ast_bo_driver = { .ttm_tt_unpopulate = ast_ttm_tt_unpopulate, .init_mem_type = ast_bo_init_mem_type, .evict_flags = ast_bo_evict_flags, - .move = ast_bo_move, + .move = NULL, .verify_access = ast_bo_verify_access, .io_mem_reserve = &ast_ttm_io_mem_reserve, .io_mem_free = &ast_ttm_io_mem_free, diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c index 24a30f6..03c1fbf 100644 --- a/drivers/gpu/drm/bochs/bochs_mm.c +++ b/drivers/gpu/drm/bochs/bochs_mm.c @@ -165,15 +165,6 @@ static void bochs_ttm_io_mem_free(struct ttm_bo_device *bdev, { }
-static int bochs_bo_move(struct ttm_buffer_object *bo, - bool evict, bool interruptible, - bool no_wait_gpu, - struct ttm_mem_reg *new_mem) -{ - return ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem); -} - - static void bochs_ttm_backend_destroy(struct ttm_tt *tt) { ttm_tt_fini(tt); @@ -208,7 +199,7 @@ struct ttm_bo_driver bochs_bo_driver = { .ttm_tt_unpopulate = ttm_pool_unpopulate, .init_mem_type = bochs_bo_init_mem_type, .evict_flags = bochs_bo_evict_flags, - .move = bochs_bo_move, + .move = NULL, .verify_access = bochs_bo_verify_access, .io_mem_reserve = &bochs_ttm_io_mem_reserve, .io_mem_free = &bochs_ttm_io_mem_free, diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c index 6768b7b..1cc9ee6 100644 --- a/drivers/gpu/drm/cirrus/cirrus_ttm.c +++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c @@ -186,17 +186,6 @@ static void cirrus_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re { }
-static int cirrus_bo_move(struct ttm_buffer_object *bo, - bool evict, bool interruptible, - bool no_wait_gpu, - struct ttm_mem_reg *new_mem) -{ - int r; - r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem); - return r; -} - - static void cirrus_ttm_backend_destroy(struct ttm_tt *tt) { ttm_tt_fini(tt); @@ -241,7 +230,7 @@ struct ttm_bo_driver cirrus_bo_driver = { .ttm_tt_unpopulate = cirrus_ttm_tt_unpopulate, .init_mem_type = cirrus_bo_init_mem_type, .evict_flags = cirrus_bo_evict_flags, - .move = cirrus_bo_move, + .move = NULL, .verify_access = cirrus_bo_verify_access, .io_mem_reserve = &cirrus_ttm_io_mem_reserve, .io_mem_free = &cirrus_ttm_io_mem_free, diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c index 9d5083d..68268e5 100644 --- a/drivers/gpu/drm/mgag200/mgag200_ttm.c +++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c @@ -186,17 +186,6 @@ static void mgag200_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_r { }
-static int mgag200_bo_move(struct ttm_buffer_object *bo, - bool evict, bool interruptible, - bool no_wait_gpu, - struct ttm_mem_reg *new_mem) -{ - int r; - r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem); - return r; -} - - static void mgag200_ttm_backend_destroy(struct ttm_tt *tt) { ttm_tt_fini(tt); @@ -241,7 +230,7 @@ struct ttm_bo_driver mgag200_bo_driver = { .ttm_tt_unpopulate = mgag200_ttm_tt_unpopulate, .init_mem_type = mgag200_bo_init_mem_type, .evict_flags = mgag200_bo_evict_flags, - .move = mgag200_bo_move, + .move = NULL, .verify_access = mgag200_bo_verify_access, .io_mem_reserve = &mgag200_ttm_io_mem_reserve, .io_mem_free = &mgag200_ttm_io_mem_free,
From: Christian König christian.koenig@amd.com
Wait for idle before moving the BO in all drivers implementing an accelerated move function.
This should keep the current behavior when removing the pre move wait.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 ++++ drivers/gpu/drm/nouveau/nouveau_bo.c | 4 ++++ drivers/gpu/drm/qxl/qxl_ttm.c | 7 +++++++ drivers/gpu/drm/radeon/radeon_ttm.c | 4 ++++ drivers/gpu/drm/virtio/virtgpu_ttm.c | 6 ++++++ 5 files changed, 25 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 3b9053a..0a6a632 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -390,6 +390,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, struct ttm_mem_reg *old_mem = &bo->mem; int r;
+ r = ttm_bo_wait(bo, interruptible, no_wait_gpu); + if (r) + return r; + /* Can't move a pinned BO */ abo = container_of(bo, struct amdgpu_bo, tbo); if (WARN_ON_ONCE(abo->pin_count > 0)) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 74a8a2c..e06de30 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1289,6 +1289,10 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr, struct nouveau_drm_tile *new_tile = NULL; int ret = 0;
+ ret = ttm_bo_wait(bo, intr, no_wait_gpu); + if (ret) + return ret; + if (nvbo->pin_refcnt) NV_WARN(drm, "Moving pinned object %p!\n", nvbo);
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index 0738d74..89e8be9 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -350,6 +350,13 @@ static int qxl_bo_move(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem) { struct ttm_mem_reg *old_mem = &bo->mem; + int ret; + + ret = ttm_bo_wait(bo, interruptible, no_wait_gpu); + if (ret) + return ret; + + if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) { qxl_move_null(bo, new_mem); return 0; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 590b037..1cc4870 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -403,6 +403,10 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, struct ttm_mem_reg *old_mem = &bo->mem; int r;
+ r = ttm_bo_wait(bo, interruptible, no_wait_gpu); + if (r) + return r; + /* Can't move a pinned BO */ rbo = container_of(bo, struct radeon_bo, tbo); if (WARN_ON_ONCE(rbo->pin_count > 0)) diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c index a058081..80482ac 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ttm.c +++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c @@ -375,6 +375,12 @@ static int virtio_gpu_bo_move(struct ttm_buffer_object *bo, bool no_wait_gpu, struct ttm_mem_reg *new_mem) { + int ret; + + ret = ttm_bo_wait(bo, interruptible, no_wait_gpu); + if (ret) + return ret; + virtio_gpu_move_null(bo, new_mem); return 0; }
From: Christian König christian.koenig@amd.com
When we want to pipeline accelerated moves we need to wait in the fallback path.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 ++- drivers/gpu/drm/nouveau/nouveau_bo.c | 2 +- drivers/gpu/drm/qxl/qxl_ttm.c | 3 ++- drivers/gpu/drm/radeon/radeon_ttm.c | 3 ++- drivers/gpu/drm/ttm/ttm_bo.c | 3 ++- drivers/gpu/drm/ttm/ttm_bo_util.c | 7 ++++++- include/drm/ttm/ttm_bo_driver.h | 4 +++- 7 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0a6a632..9b244c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -433,7 +433,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
if (r) { memcpy: - r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem); + r = ttm_bo_move_memcpy(bo, evict, interruptible, + no_wait_gpu, new_mem); if (r) { return r; } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index e06de30..b61660b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1328,7 +1328,7 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr, /* Fallback to software copy. */ ret = ttm_bo_wait(bo, intr, no_wait_gpu); if (ret == 0) - ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem); + ret = ttm_bo_move_memcpy(bo, evict, intr, no_wait_gpu, new_mem);
out: if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) { diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index 89e8be9..d50c967 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -361,7 +361,8 @@ static int qxl_bo_move(struct ttm_buffer_object *bo, qxl_move_null(bo, new_mem); return 0; } - return ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem); + return ttm_bo_move_memcpy(bo, evict, interruptible, + no_wait_gpu, new_mem); }
static void qxl_bo_move_notify(struct ttm_buffer_object *bo, diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 1cc4870..20ca22d 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -445,7 +445,8 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
if (r) { memcpy: - r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem); + r = ttm_bo_move_memcpy(bo, evict, interruptible, + no_wait_gpu, new_mem); if (r) { return r; } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 43a2955..041fb3b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -359,7 +359,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, ret = bdev->driver->move(bo, evict, interruptible, no_wait_gpu, mem); else - ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, mem); + ret = ttm_bo_move_memcpy(bo, evict, interruptible, + no_wait_gpu, mem);
if (ret) { if (bdev->driver->move_notify) { diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 0f4bcb0..434f239 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -320,7 +320,8 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst, }
int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, - bool evict, bool no_wait_gpu, + bool evict, bool interruptible, + bool no_wait_gpu, struct ttm_mem_reg *new_mem) { struct ttm_bo_device *bdev = bo->bdev; @@ -336,6 +337,10 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, unsigned long add = 0; int dir;
+ ret = ttm_bo_wait(bo, interruptible, no_wait_gpu); + if (ret) + return ret; + ret = ttm_mem_reg_ioremap(bdev, old_mem, &old_iomap); if (ret) return ret; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index da6ee17..0d1d9d7 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -970,6 +970,7 @@ extern int ttm_bo_move_ttm(struct ttm_buffer_object *bo, * * @bo: A pointer to a struct ttm_buffer_object. * @evict: 1: This is an eviction. Don't try to pipeline. + * @interruptible: Sleep interruptible if waiting. * @no_wait_gpu: Return immediately if the GPU is busy. * @new_mem: struct ttm_mem_reg indicating where to move. * @@ -984,7 +985,8 @@ extern int ttm_bo_move_ttm(struct ttm_buffer_object *bo, */
extern int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, - bool evict, bool no_wait_gpu, + bool evict, bool interruptible, + bool no_wait_gpu, struct ttm_mem_reg *new_mem);
/**
From: Christian König christian.koenig@amd.com
That is unnecessary now.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 14 -------------- 1 file changed, 14 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 041fb3b..cc94df0 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -996,20 +996,6 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
lockdep_assert_held(&bo->resv->lock.base);
- /* - * Don't wait for the BO on initial allocation. This is important when - * the BO has an imported reservation object. - */ - if (bo->mem.mem_type != TTM_PL_SYSTEM || bo->ttm != NULL) { - /* - * FIXME: It's possible to pipeline buffer moves. - * Have the driver move function wait for idle when necessary, - * instead of doing it here. - */ - ret = ttm_bo_wait(bo, interruptible, no_wait_gpu); - if (ret) - return ret; - } mem.num_pages = bo->num_pages; mem.size = mem.num_pages << PAGE_SHIFT; mem.page_alignment = bo->mem.page_alignment;
From: Christian König christian.koenig@amd.com
That is unnecessary now.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index cc94df0..86dd6950 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -684,15 +684,6 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible, struct ttm_placement placement; int ret = 0;
- ret = ttm_bo_wait(bo, interruptible, no_wait_gpu); - - if (unlikely(ret != 0)) { - if (ret != -ERESTARTSYS) { - pr_err("Failed to expire sync object before buffer eviction\n"); - } - goto out; - } - lockdep_assert_held(&bo->resv->lock.base);
evict_mem = bo->mem; @@ -716,7 +707,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
ret = ttm_bo_handle_move_mem(bo, &evict_mem, true, interruptible, no_wait_gpu); - if (ret) { + if (unlikely(ret)) { if (ret != -ERESTARTSYS) pr_err("Buffer eviction failed\n"); ttm_bo_mem_put(bo, &evict_mem);
From: Christian König christian.koenig@amd.com
Final part to avoid pre move waits.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 86dd6950..c3c615c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1655,14 +1655,9 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) ttm_bo_list_ref_sub(bo, put_count, true);
/** - * Wait for GPU, then move to system cached. + * Move to system cached */
- ret = ttm_bo_wait(bo, false, false); - - if (unlikely(ret != 0)) - goto out; - if ((bo->mem.placement & swap_placement) != swap_placement) { struct ttm_mem_reg evict_mem;
@@ -1677,6 +1672,14 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) goto out; }
+ /** + * Make sure BO is idle. + */ + + ret = ttm_bo_wait(bo, false, false); + if (unlikely(ret != 0)) + goto out; + ttm_bo_unmap_virtual(bo);
/**
From: Christian König christian.koenig@amd.com
Otherwise we could update the VM page tables while the move is only scheduled.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 62a4c12..e9b44b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -780,6 +780,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table * * @adev: amdgpu_device pointer + * @exclusive: fence we need to sync to * @src: address where to copy page table entries from * @pages_addr: DMA addresses to use for mapping * @vm: requested vm @@ -793,6 +794,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, * Returns 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + struct fence *exclusive, uint64_t src, dma_addr_t *pages_addr, struct amdgpu_vm *vm, @@ -853,6 +855,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
vm_update_params.ib = &job->ibs[0];
+ r = amdgpu_sync_fence(adev, &job->sync, exclusive); + if (r) + goto error_free; + r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv, owner); if (r) @@ -889,6 +895,7 @@ error_free: * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks * * @adev: amdgpu_device pointer + * @exclusive: fence we need to sync to * @gtt_flags: flags as they are used for GTT * @pages_addr: DMA addresses to use for mapping * @vm: requested vm @@ -902,6 +909,7 @@ error_free: * Returns 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, + struct fence *exclusive, uint32_t gtt_flags, dma_addr_t *pages_addr, struct amdgpu_vm *vm, @@ -932,7 +940,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, addr += mapping->offset;
if (!pages_addr || src) - return amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm, + return amdgpu_vm_bo_update_mapping(adev, exclusive, + src, pages_addr, vm, start, mapping->it.last, flags, addr, fence);
@@ -940,7 +949,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, uint64_t last;
last = min((uint64_t)mapping->it.last, start + max_size - 1); - r = amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm, + r = amdgpu_vm_bo_update_mapping(adev, exclusive, + src, pages_addr, vm, start, last, flags, addr, fence); if (r) @@ -973,6 +983,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping; dma_addr_t *pages_addr = NULL; uint32_t gtt_flags, flags; + struct fence *exclusive; uint64_t addr; int r;
@@ -994,8 +1005,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, default: break; } + + exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv); } else { addr = 0; + exclusive = NULL; }
flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); @@ -1007,7 +1021,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, spin_unlock(&vm->status_lock);
list_for_each_entry(mapping, &bo_va->invalids, list) { - r = amdgpu_vm_bo_split_mapping(adev, gtt_flags, pages_addr, vm, + r = amdgpu_vm_bo_split_mapping(adev, exclusive, + gtt_flags, pages_addr, vm, mapping, flags, addr, &bo_va->last_pt_update); if (r) @@ -1054,7 +1069,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list);
- r = amdgpu_vm_bo_split_mapping(adev, 0, NULL, vm, mapping, + r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, mapping, 0, 0, NULL); kfree(mapping); if (r)
From: Christian König christian.koenig@amd.com
Not needed any more.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 ---- 1 file changed, 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 9b244c5..232f123 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -390,10 +390,6 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, struct ttm_mem_reg *old_mem = &bo->mem; int r;
- r = ttm_bo_wait(bo, interruptible, no_wait_gpu); - if (r) - return r; - /* Can't move a pinned BO */ abo = container_of(bo, struct amdgpu_bo, tbo); if (WARN_ON_ONCE(abo->pin_count > 0))
On Mon, Jun 6, 2016 at 4:17 AM, Christian König deathsimple@vodafone.de wrote:
Hi everyone,
this set of patches moves waiting for TTM BOs to become idle before moving them into the driver callbacks and the CPU copy fallback function.
Then the amdgpu driver is patched to completely remove this premove idle wait.
Doing this improves swapping buffers back in on command submission. Improving evictions (swapping things out) is the next step.
Please review and/or comment,
For the series: Reviewed-by: Alex Deucher alexander.deucher@amd.com
Christian.
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
dri-devel@lists.freedesktop.org