Am 30.04.19 um 09:01 schrieb Chunming Zhou:
heavy gpu job could occupy memory long time, which lead other user fail to get memory.
basically pick up Christian idea:
- Reserve the BO in DC using a ww_mutex ticket (trivial).
- If we then run into this EBUSY condition in TTM check if the BO we need memory for (or rather the ww_mutex of its reservation object) has a ticket assigned.
- If we have a ticket we grab a reference to the first BO on the LRU, drop the LRU lock and try to grab the reservation lock with the ticket.
- If getting the reservation lock with the ticket succeeded we check if the BO is still the first one on the LRU in question (the BO could have moved).
- If the BO is still the first one on the LRU in question we try to evict it as we would evict any other BO.
- If any of the "If's" above fail we just back off and return -EBUSY.
v2: fix some minor check v3: address Christian v2 comments. v4: fix some missing v5: handle first_bo unlock and bo_get/put
Change-Id: I21423fb922f885465f13833c41df1e134364a8e7 Signed-off-by: Chunming Zhou david1.zhou@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 7 +- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 22 +++-- drivers/gpu/drm/ttm/ttm_bo.c | 81 +++++++++++++++++-- 3 files changed, 99 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index affde72b44db..523773e85284 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -811,7 +811,12 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, u64 min_offset, u64 max_offset) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct ttm_operation_ctx ctx = { false, false };
struct ttm_operation_ctx ctx = {
.interruptible = false,
.no_wait_gpu = false,
.resv = bo->tbo.resv,
.flags = 0
}; int r, i;
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a5cacf846e1b..cc3677c4a4c2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4101,6 +4101,9 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, struct amdgpu_device *adev; struct amdgpu_bo *rbo; struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old;
- struct list_head list, duplicates;
- struct ttm_validate_buffer tv;
- struct ww_acquire_ctx ticket; uint64_t tiling_flags; uint32_t domain; int r;
@@ -4117,9 +4120,18 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, obj = new_state->fb->obj[0]; rbo = gem_to_amdgpu_bo(obj); adev = amdgpu_ttm_adev(rbo->tbo.bdev);
- r = amdgpu_bo_reserve(rbo, false);
- if (unlikely(r != 0))
INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&duplicates);
tv.bo = &rbo->tbo;
tv.num_shared = 1;
list_add(&tv.head, &list);
r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
if (r) {
dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
return r;
}
if (plane->type != DRM_PLANE_TYPE_CURSOR) domain = amdgpu_display_supported_domains(adev);
@@ -4130,21 +4142,21 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, if (unlikely(r != 0)) { if (r != -ERESTARTSYS) DRM_ERROR("Failed to pin framebuffer with error %d\n", r);
amdgpu_bo_unreserve(rbo);
ttm_eu_backoff_reservation(&ticket, &list);
return r; }
r = amdgpu_ttm_alloc_gart(&rbo->tbo); if (unlikely(r != 0)) { amdgpu_bo_unpin(rbo);
amdgpu_bo_unreserve(rbo);
ttm_eu_backoff_reservation(&ticket, &list);
DRM_ERROR("%p bind failed\n", rbo); return r; }
amdgpu_bo_get_tiling_flags(rbo, &tiling_flags);
- amdgpu_bo_unreserve(rbo);
- ttm_eu_backoff_reservation(&ticket, &list);
Well I can only repeat myself, please put that into a separate patch!
afb->address = amdgpu_bo_gpu_offset(rbo);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 8502b3ed2d88..2c4963e105d9 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
- b. Otherwise, trylock it.
*/ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
struct ttm_operation_ctx *ctx, bool *locked)
struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
{ bool ret = false;
*locked = false;
if (busy)
*busy = false;
if (bo->resv == ctx->resv) { reservation_object_assert_held(bo->resv); if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
@@ -779,6 +781,8 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo, } else { *locked = reservation_object_trylock(bo->resv); ret = *locked;
if (!ret && busy)
*busy = true;
}
return ret;
@@ -791,7 +795,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, { struct ttm_bo_global *glob = bdev->glob; struct ttm_mem_type_manager *man = &bdev->man[mem_type];
- struct ttm_buffer_object *bo = NULL;
- struct ttm_buffer_object *bo = NULL, *first_bo = NULL; bool locked = false; unsigned i; int ret;
@@ -799,8 +803,15 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, spin_lock(&glob->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { list_for_each_entry(bo, &man->lru[i], lru) {
if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
bool busy = false;
if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
&busy)) {
if (!first_bo && busy) {
ttm_bo_get(bo);
first_bo = bo;
} continue;
} if (place && !bdev->driver->eviction_valuable(bo, place)) {
@@ -808,6 +819,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, reservation_object_unlock(bo->resv); continue; }
}break;
@@ -820,7 +832,65 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
if (!bo) { spin_unlock(&glob->lru_lock);
return -EBUSY;
/* check if other user occupy memory too long time */
if (!first_bo || !ctx || !ctx->resv || !ctx->resv->lock.ctx) {
if (first_bo)
ttm_bo_put(first_bo);
return -EBUSY;
}
if (ctx->interruptible)
ret = ww_mutex_lock_interruptible(&first_bo->resv->lock,
ctx->resv->lock.ctx);
else
ret = ww_mutex_lock(&first_bo->resv->lock, ctx->resv->lock.ctx);
if (ret) {
ttm_bo_put(first_bo);
return ret;
}
spin_lock(&glob->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
/* previous busy resv lock is held by above, idle now,
* so let them evictable.
*/
struct ttm_operation_ctx busy_ctx = {
.interruptible = ctx->interruptible,
.no_wait_gpu = ctx->no_wait_gpu,
.resv = first_bo->resv,
.flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
};
list_for_each_entry(bo, &man->lru[i], lru) {
if (!ttm_bo_evict_swapout_allowable(bo,
&busy_ctx,
&locked,
NULL))
continue;
if (place && !bdev->driver->eviction_valuable(bo,
place)) {
if (locked)
reservation_object_unlock(bo->resv);
continue;
}
break;
}
/* If the inner loop terminated early, we have our candidate */
if (&bo->lru != &man->lru[i])
break;
bo = NULL;
}
So we are now searching for any BO again which matches the criteria? Good idea.
Can we factor those two loops out into a separate function? I mean they are mostly identical now to the one above, aren't they?
if (bo && (bo->resv == first_bo->resv))
locked = true;
else if (bo)
ww_mutex_unlock(&first_bo->resv->lock);
ttm_bo_put(first_bo);
first_bo = NULL;
if (!bo) {
spin_unlock(&glob->lru_lock);
return -EBUSY;
}
- } else {
if (first_bo)
ttm_bo_put(first_bo);
This must come a bit later, since we can't call ttm_bo_put() while holding the spinlock.
Regards, Christian.
}
kref_get(&bo->list_kref); @@ -1784,7 +1854,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx) spin_lock(&glob->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { list_for_each_entry(bo, &glob->swap_lru[i], swap) {
if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
NULL)) { ret = 0; break; }