Am 22.06.21 um 18:23 schrieb Andrey Grodzovsky:
Problem: Under memory pressure when GTT domain is almost full multihop assert will come up when trying to evict LRU BO from VRAM to SYSTEM.
Fix: Don't assert on multihop error in evict code but rather do a retry as we do in ttm_bo_move_buffer
Signed-off-by: Andrey Grodzovsky andrey.grodzovsky@amd.com
Reviewed-by: Christian König christian.koenig@amd.com
But I think you need to move this patch earlier in the series or otherwise you break amdgpu eviction in between.
Christian.
drivers/gpu/drm/ttm/ttm_bo.c | 63 +++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 45145d02aed2..5a2dc712c632 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -485,6 +485,31 @@ void ttm_bo_unlock_delayed_workqueue(struct ttm_device *bdev, int resched) } EXPORT_SYMBOL(ttm_bo_unlock_delayed_workqueue);
+static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo,
struct ttm_resource **mem,
struct ttm_operation_ctx *ctx,
struct ttm_place *hop)
+{
- struct ttm_placement hop_placement;
- struct ttm_resource *hop_mem;
- int ret;
- hop_placement.num_placement = hop_placement.num_busy_placement = 1;
- hop_placement.placement = hop_placement.busy_placement = hop;
- /* find space in the bounce domain */
- ret = ttm_bo_mem_space(bo, &hop_placement, &hop_mem, ctx);
- if (ret)
return ret;
- /* move to the bounce domain */
- ret = ttm_bo_handle_move_mem(bo, hop_mem, false, ctx, NULL);
- if (ret) {
ttm_resource_free(bo, &hop_mem);
return ret;
- }
- return 0;
+}
- static int ttm_bo_evict(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx) {
@@ -524,12 +549,17 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, goto out; }
+bounce: ret = ttm_bo_handle_move_mem(bo, evict_mem, true, ctx, &hop);
- if (unlikely(ret)) {
WARN(ret == -EMULTIHOP, "Unexpected multihop in eviction - likely driver bug\n");
if (ret != -ERESTARTSYS)
- if (ret == -EMULTIHOP) {
ret = ttm_bo_bounce_temp_buffer(bo, &evict_mem, ctx, &hop);
if (ret) { pr_err("Buffer eviction failed\n");
ttm_resource_free(bo, &evict_mem);
ttm_resource_free(bo, &evict_mem);
goto out;
}
/* try and move to final place now. */
} out: return ret;goto bounce;
@@ -844,31 +874,6 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, } EXPORT_SYMBOL(ttm_bo_mem_space);
-static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo,
struct ttm_resource **mem,
struct ttm_operation_ctx *ctx,
struct ttm_place *hop)
-{
- struct ttm_placement hop_placement;
- struct ttm_resource *hop_mem;
- int ret;
- hop_placement.num_placement = hop_placement.num_busy_placement = 1;
- hop_placement.placement = hop_placement.busy_placement = hop;
- /* find space in the bounce domain */
- ret = ttm_bo_mem_space(bo, &hop_placement, &hop_mem, ctx);
- if (ret)
return ret;
- /* move to the bounce domain */
- ret = ttm_bo_handle_move_mem(bo, hop_mem, false, ctx, NULL);
- if (ret) {
ttm_resource_free(bo, &hop_mem);
return ret;
- }
- return 0;
-}
- static int ttm_bo_move_buffer(struct ttm_buffer_object *bo, struct ttm_placement *placement, struct ttm_operation_ctx *ctx)