This series of patches cleans up TTMs delayed delete handling.
The core of the new handling is that we new only have a single reference counter instead of two and use kref_get_unless_zero() to grab BOs from the LRU during eviction.
This reduces the overhead of LRU moves and allows us to properly individualize the BOs reservation object during deletion to allow adding BOs for clearing memory, unmapping page tables etc..
Please review and comment, Christian.
Ghost BOs need to stick with the resv object only when the origin is imported.
This is a low hanging fruit to avoid OOM situations on evictions.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2b0e5a088da0..86d152472f38 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -511,7 +511,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, kref_init(&fbo->base.kref); fbo->base.destroy = &ttm_transfered_destroy; fbo->base.acc_size = 0; - if (bo->base.resv == &bo->base._resv) + if (bo->type != ttm_bo_type_sg) fbo->base.base.resv = &fbo->base.base._resv;
dma_resv_init(&fbo->base.base._resv);
The function is always called with deleted BOs.
While at it cleanup the indentation as well.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 1fbc36f05d89..c1104c8857b7 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -522,14 +522,9 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool interruptible, bool no_wait_gpu, bool unlock_resv) { - struct dma_resv *resv; + struct dma_resv *resv = &bo->base._resv; int ret;
- if (unlikely(list_empty(&bo->ddestroy))) - resv = bo->base.resv; - else - resv = &bo->base._resv; - if (dma_resv_test_signaled_rcu(resv, true)) ret = 0; else @@ -542,9 +537,8 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, dma_resv_unlock(bo->base.resv); spin_unlock(&ttm_bo_glob.lru_lock);
- lret = dma_resv_wait_timeout_rcu(resv, true, - interruptible, - 30 * HZ); + lret = dma_resv_wait_timeout_rcu(resv, true, interruptible, + 30 * HZ);
if (lret < 0) return lret;
This allows it to call the function without the lock held.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index c1104c8857b7..e12fc2c2d165 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -429,22 +429,24 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) { + struct dma_resv *resv = &bo->base._resv; struct dma_resv_list *fobj; struct dma_fence *fence; int i;
- fobj = dma_resv_get_list(&bo->base._resv); - fence = dma_resv_get_excl(&bo->base._resv); + rcu_read_lock(); + fobj = rcu_dereference(resv->fence); + fence = rcu_dereference(resv->fence_excl); if (fence && !fence->ops->signaled) dma_fence_enable_sw_signaling(fence);
for (i = 0; fobj && i < fobj->shared_count; ++i) { - fence = rcu_dereference_protected(fobj->shared[i], - dma_resv_held(bo->base.resv)); + fence = rcu_dereference(fobj->shared[i]);
if (!fence->ops->signaled) dma_fence_enable_sw_signaling(fence); } + rcu_read_unlock(); }
static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
This patch reworks the whole delayed deletion of BOs which aren't idle.
Instead of having two counters for the BO structure we resurrect the BO when we find that a deleted BO is not idle yet.
This has many advantages, especially that we don't need to increment/decrement the BOs reference counter any more when it moves on the LRUs.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 217 +++++++++++++----------------- drivers/gpu/drm/ttm/ttm_bo_util.c | 1 - include/drm/ttm/ttm_bo_api.h | 11 +- 3 files changed, 97 insertions(+), 132 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index e12fc2c2d165..d0624685f5d2 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -145,26 +145,6 @@ static inline uint32_t ttm_bo_type_flags(unsigned type) return 1 << (type); }
-static void ttm_bo_release_list(struct kref *list_kref) -{ - struct ttm_buffer_object *bo = - container_of(list_kref, struct ttm_buffer_object, list_kref); - size_t acc_size = bo->acc_size; - - BUG_ON(kref_read(&bo->list_kref)); - BUG_ON(kref_read(&bo->kref)); - BUG_ON(bo->mem.mm_node != NULL); - BUG_ON(!list_empty(&bo->lru)); - BUG_ON(!list_empty(&bo->ddestroy)); - ttm_tt_destroy(bo->ttm); - atomic_dec(&ttm_bo_glob.bo_count); - dma_fence_put(bo->moving); - if (!ttm_bo_uses_embedded_gem_object(bo)) - dma_resv_fini(&bo->base._resv); - bo->destroy(bo); - ttm_mem_global_free(&ttm_mem_glob, acc_size); -} - static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem) { @@ -181,21 +161,14 @@ static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo,
man = &bdev->man[mem->mem_type]; list_add_tail(&bo->lru, &man->lru[bo->priority]); - kref_get(&bo->list_kref);
if (!(man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm && !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED))) { list_add_tail(&bo->swap, &ttm_bo_glob.swap_lru[bo->priority]); - kref_get(&bo->list_kref); } }
-static void ttm_bo_ref_bug(struct kref *list_kref) -{ - BUG(); -} - static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; @@ -203,12 +176,10 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
if (!list_empty(&bo->swap)) { list_del_init(&bo->swap); - kref_put(&bo->list_kref, ttm_bo_ref_bug); notify = true; } if (!list_empty(&bo->lru)) { list_del_init(&bo->lru); - kref_put(&bo->list_kref, ttm_bo_ref_bug); notify = true; }
@@ -421,8 +392,7 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) BUG_ON(!dma_resv_trylock(&bo->base._resv));
r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv); - if (r) - dma_resv_unlock(&bo->base._resv); + dma_resv_unlock(&bo->base._resv);
return r; } @@ -449,68 +419,10 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) rcu_read_unlock(); }
-static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) -{ - struct ttm_bo_device *bdev = bo->bdev; - int ret; - - ret = ttm_bo_individualize_resv(bo); - if (ret) { - /* Last resort, if we fail to allocate memory for the - * fences block for the BO to become idle - */ - dma_resv_wait_timeout_rcu(bo->base.resv, true, false, - 30 * HZ); - spin_lock(&ttm_bo_glob.lru_lock); - goto error; - } - - spin_lock(&ttm_bo_glob.lru_lock); - ret = dma_resv_trylock(bo->base.resv) ? 0 : -EBUSY; - if (!ret) { - if (dma_resv_test_signaled_rcu(&bo->base._resv, true)) { - ttm_bo_del_from_lru(bo); - spin_unlock(&ttm_bo_glob.lru_lock); - if (bo->base.resv != &bo->base._resv) - dma_resv_unlock(&bo->base._resv); - - ttm_bo_cleanup_memtype_use(bo); - dma_resv_unlock(bo->base.resv); - return; - } - - ttm_bo_flush_all_fences(bo); - - /* - * Make NO_EVICT bos immediately available to - * shrinkers, now that they are queued for - * destruction. - */ - if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) { - bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT; - ttm_bo_move_to_lru_tail(bo, NULL); - } - - dma_resv_unlock(bo->base.resv); - } - if (bo->base.resv != &bo->base._resv) { - ttm_bo_flush_all_fences(bo); - dma_resv_unlock(&bo->base._resv); - } - -error: - kref_get(&bo->list_kref); - list_add_tail(&bo->ddestroy, &bdev->ddestroy); - spin_unlock(&ttm_bo_glob.lru_lock); - - schedule_delayed_work(&bdev->wq, - ((HZ / 100) < 1) ? 1 : HZ / 100); -} - /** * function ttm_bo_cleanup_refs - * If bo idle, remove from delayed- and lru lists, and unref. - * If not idle, do nothing. + * If bo idle, remove from lru lists, and unref. + * If not idle, block if possible. * * Must be called with lru_lock and reservation held, this function * will drop the lru lock and optionally the reservation lock before returning. @@ -572,14 +484,14 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
ttm_bo_del_from_lru(bo); list_del_init(&bo->ddestroy); - kref_put(&bo->list_kref, ttm_bo_ref_bug); - spin_unlock(&ttm_bo_glob.lru_lock); ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv) dma_resv_unlock(bo->base.resv);
+ ttm_bo_put(bo); + return 0; }
@@ -601,8 +513,9 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
bo = list_first_entry(&bdev->ddestroy, struct ttm_buffer_object, ddestroy); - kref_get(&bo->list_kref); list_move_tail(&bo->ddestroy, &removed); + if (!ttm_bo_get_unless_zero(bo)) + continue;
if (remove_all || bo->base.resv != &bo->base._resv) { spin_unlock(&glob->lru_lock); @@ -617,7 +530,7 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all) spin_unlock(&glob->lru_lock); }
- kref_put(&bo->list_kref, ttm_bo_release_list); + ttm_bo_put(bo); spin_lock(&glob->lru_lock); } list_splice_tail(&removed, &bdev->ddestroy); @@ -643,16 +556,68 @@ static void ttm_bo_release(struct kref *kref) container_of(kref, struct ttm_buffer_object, kref); struct ttm_bo_device *bdev = bo->bdev; struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type]; + size_t acc_size = bo->acc_size; + int ret;
- if (bo->bdev->driver->release_notify) - bo->bdev->driver->release_notify(bo); + if (!bo->deleted) { + if (bo->bdev->driver->release_notify) + bo->bdev->driver->release_notify(bo);
- drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node); - ttm_mem_io_lock(man, false); - ttm_mem_io_free_vm(bo); - ttm_mem_io_unlock(man); - ttm_bo_cleanup_refs_or_queue(bo); - kref_put(&bo->list_kref, ttm_bo_release_list); + drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node); + ttm_mem_io_lock(man, false); + ttm_mem_io_free_vm(bo); + ttm_mem_io_unlock(man); + + ret = ttm_bo_individualize_resv(bo); + if (ret) { + /* Last resort, if we fail to allocate memory for the + * fences block for the BO to become idle + */ + dma_resv_wait_timeout_rcu(bo->base.resv, true, false, + 30 * HZ); + } + } + + if (!dma_resv_test_signaled_rcu(bo->base.resv, true)) { + /* The BO is not idle, resurrect it for delayed destroy */ + ttm_bo_flush_all_fences(bo); + bo->deleted = true; + + /* + * Make NO_EVICT bos immediately available to + * shrinkers, now that they are queued for + * destruction. + */ + if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) { + bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT; + ttm_bo_move_to_lru_tail(bo, NULL); + } + + spin_lock(&ttm_bo_glob.lru_lock); + kref_init(&bo->kref); + list_add_tail(&bo->ddestroy, &bdev->ddestroy); + spin_unlock(&ttm_bo_glob.lru_lock); + + schedule_delayed_work(&bdev->wq, + ((HZ / 100) < 1) ? 1 : HZ / 100); + return; + } + + spin_lock(&ttm_bo_glob.lru_lock); + ttm_bo_del_from_lru(bo); + list_del(&bo->ddestroy); + spin_unlock(&ttm_bo_glob.lru_lock); + + ttm_bo_cleanup_memtype_use(bo); + + BUG_ON(bo->mem.mm_node != NULL); + ttm_tt_destroy(bo->ttm); + atomic_dec(&ttm_bo_glob.bo_count); + dma_fence_put(bo->moving); + if (!ttm_bo_uses_embedded_gem_object(bo)) + dma_resv_fini(&bo->base._resv); + bo->destroy(bo); + ttm_mem_global_free(&ttm_mem_glob, acc_size); }
void ttm_bo_put(struct ttm_buffer_object *bo) @@ -755,8 +720,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
if (bo->base.resv == ctx->resv) { dma_resv_assert_held(bo->base.resv); - if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT - || !list_empty(&bo->ddestroy)) + if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT || bo->deleted) ret = true; *locked = false; if (busy) @@ -837,6 +801,11 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, dma_resv_unlock(bo->base.resv); continue; } + if (!ttm_bo_get_unless_zero(bo)) { + if (locked) + dma_resv_unlock(bo->base.resv); + continue; + } break; }
@@ -848,21 +817,19 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, }
if (!bo) { - if (busy_bo) - kref_get(&busy_bo->list_kref); + if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) + busy_bo = NULL; spin_unlock(&ttm_bo_glob.lru_lock); ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); if (busy_bo) - kref_put(&busy_bo->list_kref, ttm_bo_release_list); + ttm_bo_put(busy_bo); return ret; }
- kref_get(&bo->list_kref); - - if (!list_empty(&bo->ddestroy)) { + if (bo->deleted) { ret = ttm_bo_cleanup_refs(bo, ctx->interruptible, ctx->no_wait_gpu, locked); - kref_put(&bo->list_kref, ttm_bo_release_list); + ttm_bo_put(bo); return ret; }
@@ -872,7 +839,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, if (locked) ttm_bo_unreserve(bo);
- kref_put(&bo->list_kref, ttm_bo_release_list); + ttm_bo_put(bo); return ret; }
@@ -1284,7 +1251,6 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev, bo->destroy = destroy ? destroy : ttm_bo_default_destroy;
kref_init(&bo->kref); - kref_init(&bo->list_kref); INIT_LIST_HEAD(&bo->lru); INIT_LIST_HEAD(&bo->ddestroy); INIT_LIST_HEAD(&bo->swap); @@ -1804,11 +1770,18 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx) spin_lock(&glob->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { list_for_each_entry(bo, &glob->swap_lru[i], swap) { - if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked, - NULL)) { - ret = 0; - break; + if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, + NULL)) + continue; + + if (!ttm_bo_get_unless_zero(bo)) { + if (locked) + dma_resv_unlock(bo->base.resv); + continue; } + + ret = 0; + break; } if (!ret) break; @@ -1819,11 +1792,9 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx) return ret; }
- kref_get(&bo->list_kref); - - if (!list_empty(&bo->ddestroy)) { + if (bo->deleted) { ret = ttm_bo_cleanup_refs(bo, false, false, locked); - kref_put(&bo->list_kref, ttm_bo_release_list); + ttm_bo_put(bo); return ret; }
@@ -1877,7 +1848,7 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx) */ if (locked) dma_resv_unlock(bo->base.resv); - kref_put(&bo->list_kref, ttm_bo_release_list); + ttm_bo_put(bo); return ret; } EXPORT_SYMBOL(ttm_bo_swapout); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 86d152472f38..c8e359ded1df 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -507,7 +507,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, fbo->base.moving = NULL; drm_vma_node_reset(&fbo->base.base.vma_node);
- kref_init(&fbo->base.list_kref); kref_init(&fbo->base.kref); fbo->base.destroy = &ttm_transfered_destroy; fbo->base.acc_size = 0; diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 66ca49db9633..b9bc1b00142e 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -135,18 +135,14 @@ struct ttm_tt; * @num_pages: Actual number of pages. * @acc_size: Accounted size for this object. * @kref: Reference count of this buffer object. When this refcount reaches - * zero, the object is put on the delayed delete list. - * @list_kref: List reference count of this buffer object. This member is - * used to avoid destruction while the buffer object is still on a list. - * Lru lists may keep one refcount, the delayed delete list, and kref != 0 - * keeps one refcount. When this refcount reaches zero, - * the object is destroyed. + * zero, the object is destroyed or put on the delayed delete list. * @mem: structure describing current placement. * @persistent_swap_storage: Usually the swap storage is deleted for buffers * pinned in physical memory. If this behaviour is not desired, this member * holds a pointer to a persistent shmem object. * @ttm: TTM structure holding system pages. * @evicted: Whether the object was evicted without user-space knowing. + * @deleted: True if the object is only a zombie and already deleted. * @lru: List head for the lru list. * @ddestroy: List head for the delayed destroy list. * @swap: List head for swap LRU list. @@ -183,9 +179,7 @@ struct ttm_buffer_object { /** * Members not needing protection. */ - struct kref kref; - struct kref list_kref;
/** * Members protected by the bo::resv::reserved lock. @@ -195,6 +189,7 @@ struct ttm_buffer_object { struct file *persistent_swap_storage; struct ttm_tt *ttm; bool evicted; + bool deleted;
/** * Members protected by the bdev::lru_lock.
comments inline. [xh]
2020年2月10日 23:09,Christian König ckoenig.leichtzumerken@gmail.com 写道:
This patch reworks the whole delayed deletion of BOs which aren't idle.
Instead of having two counters for the BO structure we resurrect the BO when we find that a deleted BO is not idle yet.
This has many advantages, especially that we don't need to increment/decrement the BOs reference counter any more when it moves on the LRUs.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/ttm/ttm_bo.c | 217 +++++++++++++----------------- drivers/gpu/drm/ttm/ttm_bo_util.c | 1 - include/drm/ttm/ttm_bo_api.h | 11 +- 3 files changed, 97 insertions(+), 132 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index e12fc2c2d165..d0624685f5d2 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -145,26 +145,6 @@ static inline uint32_t ttm_bo_type_flags(unsigned type) return 1 << (type); }
-static void ttm_bo_release_list(struct kref *list_kref) -{
- struct ttm_buffer_object *bo =
container_of(list_kref, struct ttm_buffer_object, list_kref);
- size_t acc_size = bo->acc_size;
- BUG_ON(kref_read(&bo->list_kref));
- BUG_ON(kref_read(&bo->kref));
- BUG_ON(bo->mem.mm_node != NULL);
- BUG_ON(!list_empty(&bo->lru));
- BUG_ON(!list_empty(&bo->ddestroy));
- ttm_tt_destroy(bo->ttm);
- atomic_dec(&ttm_bo_glob.bo_count);
- dma_fence_put(bo->moving);
- if (!ttm_bo_uses_embedded_gem_object(bo))
dma_resv_fini(&bo->base._resv);
- bo->destroy(bo);
- ttm_mem_global_free(&ttm_mem_glob, acc_size);
-}
static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem) { @@ -181,21 +161,14 @@ static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo,
man = &bdev->man[mem->mem_type]; list_add_tail(&bo->lru, &man->lru[bo->priority]);
kref_get(&bo->list_kref);
if (!(man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm && !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED))) { list_add_tail(&bo->swap, &ttm_bo_glob.swap_lru[bo->priority]);
kref_get(&bo->list_kref);
}
}
-static void ttm_bo_ref_bug(struct kref *list_kref) -{
- BUG();
-}
static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; @@ -203,12 +176,10 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
if (!list_empty(&bo->swap)) { list_del_init(&bo->swap);
notify = true; } if (!list_empty(&bo->lru)) { list_del_init(&bo->lru);kref_put(&bo->list_kref, ttm_bo_ref_bug);
notify = true; }kref_put(&bo->list_kref, ttm_bo_ref_bug);
@@ -421,8 +392,7 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) BUG_ON(!dma_resv_trylock(&bo->base._resv));
r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv);
- if (r)
dma_resv_unlock(&bo->base._resv);
dma_resv_unlock(&bo->base._resv);
return r;
} @@ -449,68 +419,10 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) rcu_read_unlock(); }
-static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) -{
- struct ttm_bo_device *bdev = bo->bdev;
- int ret;
- ret = ttm_bo_individualize_resv(bo);
- if (ret) {
/* Last resort, if we fail to allocate memory for the
* fences block for the BO to become idle
*/
dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
30 * HZ);
spin_lock(&ttm_bo_glob.lru_lock);
goto error;
- }
- spin_lock(&ttm_bo_glob.lru_lock);
- ret = dma_resv_trylock(bo->base.resv) ? 0 : -EBUSY;
- if (!ret) {
if (dma_resv_test_signaled_rcu(&bo->base._resv, true)) {
ttm_bo_del_from_lru(bo);
spin_unlock(&ttm_bo_glob.lru_lock);
if (bo->base.resv != &bo->base._resv)
dma_resv_unlock(&bo->base._resv);
ttm_bo_cleanup_memtype_use(bo);
dma_resv_unlock(bo->base.resv);
return;
}
ttm_bo_flush_all_fences(bo);
/*
* Make NO_EVICT bos immediately available to
* shrinkers, now that they are queued for
* destruction.
*/
if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) {
bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT;
ttm_bo_move_to_lru_tail(bo, NULL);
}
dma_resv_unlock(bo->base.resv);
- }
- if (bo->base.resv != &bo->base._resv) {
ttm_bo_flush_all_fences(bo);
dma_resv_unlock(&bo->base._resv);
- }
-error:
- kref_get(&bo->list_kref);
- list_add_tail(&bo->ddestroy, &bdev->ddestroy);
- spin_unlock(&ttm_bo_glob.lru_lock);
- schedule_delayed_work(&bdev->wq,
((HZ / 100) < 1) ? 1 : HZ / 100);
-}
/**
- function ttm_bo_cleanup_refs
- If bo idle, remove from delayed- and lru lists, and unref.
- If not idle, do nothing.
- If bo idle, remove from lru lists, and unref.
- If not idle, block if possible.
- Must be called with lru_lock and reservation held, this function
- will drop the lru lock and optionally the reservation lock before returning.
@@ -572,14 +484,14 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
ttm_bo_del_from_lru(bo); list_del_init(&bo->ddestroy);
kref_put(&bo->list_kref, ttm_bo_ref_bug);
spin_unlock(&ttm_bo_glob.lru_lock); ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv) dma_resv_unlock(bo->base.resv);
- ttm_bo_put(bo);
- return 0;
}
@@ -601,8 +513,9 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
bo = list_first_entry(&bdev->ddestroy, struct ttm_buffer_object, ddestroy);
list_move_tail(&bo->ddestroy, &removed);kref_get(&bo->list_kref);
if (!ttm_bo_get_unless_zero(bo))
continue;
if (remove_all || bo->base.resv != &bo->base._resv) { spin_unlock(&glob->lru_lock);
@@ -617,7 +530,7 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all) spin_unlock(&glob->lru_lock); }
kref_put(&bo->list_kref, ttm_bo_release_list);
spin_lock(&glob->lru_lock); } list_splice_tail(&removed, &bdev->ddestroy);ttm_bo_put(bo);
@@ -643,16 +556,68 @@ static void ttm_bo_release(struct kref *kref) container_of(kref, struct ttm_buffer_object, kref); struct ttm_bo_device *bdev = bo->bdev; struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type];
- size_t acc_size = bo->acc_size;
- int ret;
- if (bo->bdev->driver->release_notify)
bo->bdev->driver->release_notify(bo);
- if (!bo->deleted) {
if (bo->bdev->driver->release_notify)
bo->bdev->driver->release_notify(bo);
- drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node);
- ttm_mem_io_lock(man, false);
- ttm_mem_io_free_vm(bo);
- ttm_mem_io_unlock(man);
- ttm_bo_cleanup_refs_or_queue(bo);
- kref_put(&bo->list_kref, ttm_bo_release_list);
drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node);
ttm_mem_io_lock(man, false);
ttm_mem_io_free_vm(bo);
ttm_mem_io_unlock(man);
ret = ttm_bo_individualize_resv(bo);
if (ret) {
/* Last resort, if we fail to allocate memory for the
* fences block for the BO to become idle
*/
dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
30 * HZ);
}
- }
- if (!dma_resv_test_signaled_rcu(bo->base.resv, true)) {
/* The BO is not idle, resurrect it for delayed destroy */
ttm_bo_flush_all_fences(bo);
bo->deleted = true;
/*
* Make NO_EVICT bos immediately available to
* shrinkers, now that they are queued for
* destruction.
*/
if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) {
bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT;
ttm_bo_move_to_lru_tail(bo, NULL);
[xh] this should be under lru lock.
}
spin_lock(&ttm_bo_glob.lru_lock);
kref_init(&bo->kref);
list_add_tail(&bo->ddestroy, &bdev->ddestroy);
spin_unlock(&ttm_bo_glob.lru_lock);
schedule_delayed_work(&bdev->wq,
((HZ / 100) < 1) ? 1 : HZ / 100);
return;
- }
- spin_lock(&ttm_bo_glob.lru_lock);
- ttm_bo_del_from_lru(bo);
- list_del(&bo->ddestroy);
- spin_unlock(&ttm_bo_glob.lru_lock);
- ttm_bo_cleanup_memtype_use(bo);
- BUG_ON(bo->mem.mm_node != NULL);
- ttm_tt_destroy(bo->ttm);
[xh] already destroy it in ttm_bo_cleanup_memtype_use.
- atomic_dec(&ttm_bo_glob.bo_count);
- dma_fence_put(bo->moving);
- if (!ttm_bo_uses_embedded_gem_object(bo))
dma_resv_fini(&bo->base._resv);
- bo->destroy(bo);
- ttm_mem_global_free(&ttm_mem_glob, acc_size);
}
void ttm_bo_put(struct ttm_buffer_object *bo) @@ -755,8 +720,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
if (bo->base.resv == ctx->resv) { dma_resv_assert_held(bo->base.resv);
if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
|| !list_empty(&bo->ddestroy))
*locked = false; if (busy)if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT || bo->deleted) ret = true;
@@ -837,6 +801,11 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, dma_resv_unlock(bo->base.resv); continue; }
if (!ttm_bo_get_unless_zero(bo)) {
if (locked)
dma_resv_unlock(bo->base.resv);
continue;
}} break;
@@ -848,21 +817,19 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, }
if (!bo) {
if (busy_bo)
kref_get(&busy_bo->list_kref);
if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
spin_unlock(&ttm_bo_glob.lru_lock); ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); if (busy_bo)busy_bo = NULL;
kref_put(&busy_bo->list_kref, ttm_bo_release_list);
return ret; }ttm_bo_put(busy_bo);
- kref_get(&bo->list_kref);
- if (!list_empty(&bo->ddestroy)) {
- if (bo->deleted) { ret = ttm_bo_cleanup_refs(bo, ctx->interruptible, ctx->no_wait_gpu, locked);
kref_put(&bo->list_kref, ttm_bo_release_list);
return ret; }ttm_bo_put(bo);
@@ -872,7 +839,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, if (locked) ttm_bo_unreserve(bo);
- kref_put(&bo->list_kref, ttm_bo_release_list);
- ttm_bo_put(bo); return ret;
}
@@ -1284,7 +1251,6 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev, bo->destroy = destroy ? destroy : ttm_bo_default_destroy;
kref_init(&bo->kref);
- kref_init(&bo->list_kref); INIT_LIST_HEAD(&bo->lru); INIT_LIST_HEAD(&bo->ddestroy); INIT_LIST_HEAD(&bo->swap);
@@ -1804,11 +1770,18 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx) spin_lock(&glob->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { list_for_each_entry(bo, &glob->swap_lru[i], swap) {
if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
NULL)) {
ret = 0;
break;
if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
NULL))
continue;
if (!ttm_bo_get_unless_zero(bo)) {
if (locked)
dma_resv_unlock(bo->base.resv);
continue; }
[xh] As you get the bo. when you put it?
You only put one bo just before return. BUT you get the bos in the for loop. Am I missing somethings?
thanks xinhui
ret = 0;
} if (!ret) break;break;
@@ -1819,11 +1792,9 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx) return ret; }
- kref_get(&bo->list_kref);
- if (!list_empty(&bo->ddestroy)) {
- if (bo->deleted) { ret = ttm_bo_cleanup_refs(bo, false, false, locked);
kref_put(&bo->list_kref, ttm_bo_release_list);
return ret; }ttm_bo_put(bo);
@@ -1877,7 +1848,7 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx) */ if (locked) dma_resv_unlock(bo->base.resv);
- kref_put(&bo->list_kref, ttm_bo_release_list);
- ttm_bo_put(bo); return ret;
} EXPORT_SYMBOL(ttm_bo_swapout); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 86d152472f38..c8e359ded1df 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -507,7 +507,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, fbo->base.moving = NULL; drm_vma_node_reset(&fbo->base.base.vma_node);
- kref_init(&fbo->base.list_kref); kref_init(&fbo->base.kref); fbo->base.destroy = &ttm_transfered_destroy; fbo->base.acc_size = 0;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 66ca49db9633..b9bc1b00142e 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -135,18 +135,14 @@ struct ttm_tt;
- @num_pages: Actual number of pages.
- @acc_size: Accounted size for this object.
- @kref: Reference count of this buffer object. When this refcount reaches
- zero, the object is put on the delayed delete list.
- @list_kref: List reference count of this buffer object. This member is
- used to avoid destruction while the buffer object is still on a list.
- Lru lists may keep one refcount, the delayed delete list, and kref != 0
- keeps one refcount. When this refcount reaches zero,
- the object is destroyed.
- zero, the object is destroyed or put on the delayed delete list.
- @mem: structure describing current placement.
- @persistent_swap_storage: Usually the swap storage is deleted for buffers
- pinned in physical memory. If this behaviour is not desired, this member
- holds a pointer to a persistent shmem object.
- @ttm: TTM structure holding system pages.
- @evicted: Whether the object was evicted without user-space knowing.
- @deleted: True if the object is only a zombie and already deleted.
- @lru: List head for the lru list.
- @ddestroy: List head for the delayed destroy list.
- @swap: List head for swap LRU list.
@@ -183,9 +179,7 @@ struct ttm_buffer_object { /**
- Members not needing protection.
*/
struct kref kref;
struct kref list_kref;
/**
- Members protected by the bo::resv::reserved lock.
@@ -195,6 +189,7 @@ struct ttm_buffer_object { struct file *persistent_swap_storage; struct ttm_tt *ttm; bool evicted;
bool deleted;
/**
- Members protected by the bdev::lru_lock.
-- 2.17.1
comments inline
2020年2月11日 13:26,Pan, Xinhui Xinhui.Pan@amd.com 写道:
comments inline. [xh]
2020年2月10日 23:09,Christian König ckoenig.leichtzumerken@gmail.com 写道:
This patch reworks the whole delayed deletion of BOs which aren't idle.
Instead of having two counters for the BO structure we resurrect the BO when we find that a deleted BO is not idle yet.
This has many advantages, especially that we don't need to increment/decrement the BOs reference counter any more when it moves on the LRUs.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/ttm/ttm_bo.c | 217 +++++++++++++----------------- drivers/gpu/drm/ttm/ttm_bo_util.c | 1 - include/drm/ttm/ttm_bo_api.h | 11 +- 3 files changed, 97 insertions(+), 132 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index e12fc2c2d165..d0624685f5d2 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -145,26 +145,6 @@ static inline uint32_t ttm_bo_type_flags(unsigned type) return 1 << (type); }
-static void ttm_bo_release_list(struct kref *list_kref) -{
- struct ttm_buffer_object *bo =
container_of(list_kref, struct ttm_buffer_object, list_kref);
- size_t acc_size = bo->acc_size;
- BUG_ON(kref_read(&bo->list_kref));
- BUG_ON(kref_read(&bo->kref));
- BUG_ON(bo->mem.mm_node != NULL);
- BUG_ON(!list_empty(&bo->lru));
- BUG_ON(!list_empty(&bo->ddestroy));
- ttm_tt_destroy(bo->ttm);
- atomic_dec(&ttm_bo_glob.bo_count);
- dma_fence_put(bo->moving);
- if (!ttm_bo_uses_embedded_gem_object(bo))
dma_resv_fini(&bo->base._resv);
- bo->destroy(bo);
- ttm_mem_global_free(&ttm_mem_glob, acc_size);
-}
static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem) { @@ -181,21 +161,14 @@ static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo,
man = &bdev->man[mem->mem_type]; list_add_tail(&bo->lru, &man->lru[bo->priority]);
kref_get(&bo->list_kref);
if (!(man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm && !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED))) { list_add_tail(&bo->swap, &ttm_bo_glob.swap_lru[bo->priority]);
kref_get(&bo->list_kref);
}
}
-static void ttm_bo_ref_bug(struct kref *list_kref) -{
- BUG();
-}
static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; @@ -203,12 +176,10 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
if (!list_empty(&bo->swap)) { list_del_init(&bo->swap);
notify = true; } if (!list_empty(&bo->lru)) { list_del_init(&bo->lru);kref_put(&bo->list_kref, ttm_bo_ref_bug);
notify = true; }kref_put(&bo->list_kref, ttm_bo_ref_bug);
@@ -421,8 +392,7 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) BUG_ON(!dma_resv_trylock(&bo->base._resv));
r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv);
- if (r)
dma_resv_unlock(&bo->base._resv);
dma_resv_unlock(&bo->base._resv);
return r;
} @@ -449,68 +419,10 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) rcu_read_unlock(); }
-static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) -{
- struct ttm_bo_device *bdev = bo->bdev;
- int ret;
- ret = ttm_bo_individualize_resv(bo);
- if (ret) {
/* Last resort, if we fail to allocate memory for the
* fences block for the BO to become idle
*/
dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
30 * HZ);
spin_lock(&ttm_bo_glob.lru_lock);
goto error;
- }
- spin_lock(&ttm_bo_glob.lru_lock);
- ret = dma_resv_trylock(bo->base.resv) ? 0 : -EBUSY;
- if (!ret) {
if (dma_resv_test_signaled_rcu(&bo->base._resv, true)) {
ttm_bo_del_from_lru(bo);
spin_unlock(&ttm_bo_glob.lru_lock);
if (bo->base.resv != &bo->base._resv)
dma_resv_unlock(&bo->base._resv);
ttm_bo_cleanup_memtype_use(bo);
dma_resv_unlock(bo->base.resv);
return;
}
ttm_bo_flush_all_fences(bo);
/*
* Make NO_EVICT bos immediately available to
* shrinkers, now that they are queued for
* destruction.
*/
if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) {
bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT;
ttm_bo_move_to_lru_tail(bo, NULL);
}
dma_resv_unlock(bo->base.resv);
- }
- if (bo->base.resv != &bo->base._resv) {
ttm_bo_flush_all_fences(bo);
dma_resv_unlock(&bo->base._resv);
- }
-error:
- kref_get(&bo->list_kref);
- list_add_tail(&bo->ddestroy, &bdev->ddestroy);
- spin_unlock(&ttm_bo_glob.lru_lock);
- schedule_delayed_work(&bdev->wq,
((HZ / 100) < 1) ? 1 : HZ / 100);
-}
/**
- function ttm_bo_cleanup_refs
- If bo idle, remove from delayed- and lru lists, and unref.
- If not idle, do nothing.
- If bo idle, remove from lru lists, and unref.
- If not idle, block if possible.
- Must be called with lru_lock and reservation held, this function
- will drop the lru lock and optionally the reservation lock before returning.
@@ -572,14 +484,14 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
ttm_bo_del_from_lru(bo); list_del_init(&bo->ddestroy);
kref_put(&bo->list_kref, ttm_bo_ref_bug);
spin_unlock(&ttm_bo_glob.lru_lock); ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv) dma_resv_unlock(bo->base.resv);
- ttm_bo_put(bo);
- return 0;
}
@@ -601,8 +513,9 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
bo = list_first_entry(&bdev->ddestroy, struct ttm_buffer_object, ddestroy);
list_move_tail(&bo->ddestroy, &removed);kref_get(&bo->list_kref);
if (!ttm_bo_get_unless_zero(bo))
continue;
if (remove_all || bo->base.resv != &bo->base._resv) { spin_unlock(&glob->lru_lock);
@@ -617,7 +530,7 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all) spin_unlock(&glob->lru_lock); }
kref_put(&bo->list_kref, ttm_bo_release_list);
spin_lock(&glob->lru_lock); } list_splice_tail(&removed, &bdev->ddestroy);ttm_bo_put(bo);
@@ -643,16 +556,68 @@ static void ttm_bo_release(struct kref *kref) container_of(kref, struct ttm_buffer_object, kref); struct ttm_bo_device *bdev = bo->bdev; struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type];
- size_t acc_size = bo->acc_size;
- int ret;
- if (bo->bdev->driver->release_notify)
bo->bdev->driver->release_notify(bo);
- if (!bo->deleted) {
if (bo->bdev->driver->release_notify)
bo->bdev->driver->release_notify(bo);
- drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node);
- ttm_mem_io_lock(man, false);
- ttm_mem_io_free_vm(bo);
- ttm_mem_io_unlock(man);
- ttm_bo_cleanup_refs_or_queue(bo);
- kref_put(&bo->list_kref, ttm_bo_release_list);
drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node);
ttm_mem_io_lock(man, false);
ttm_mem_io_free_vm(bo);
ttm_mem_io_unlock(man);
ret = ttm_bo_individualize_resv(bo);
if (ret) {
/* Last resort, if we fail to allocate memory for the
* fences block for the BO to become idle
*/
dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
30 * HZ);
}
- }
- if (!dma_resv_test_signaled_rcu(bo->base.resv, true)) {
/* The BO is not idle, resurrect it for delayed destroy */
ttm_bo_flush_all_fences(bo);
bo->deleted = true;
/*
* Make NO_EVICT bos immediately available to
* shrinkers, now that they are queued for
* destruction.
*/
if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) {
bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT;
ttm_bo_move_to_lru_tail(bo, NULL);
[xh] this should be under lru lock.
}
spin_lock(&ttm_bo_glob.lru_lock);
kref_init(&bo->kref);
list_add_tail(&bo->ddestroy, &bdev->ddestroy);
spin_unlock(&ttm_bo_glob.lru_lock);
schedule_delayed_work(&bdev->wq,
((HZ / 100) < 1) ? 1 : HZ / 100);
return;
- }
- spin_lock(&ttm_bo_glob.lru_lock);
- ttm_bo_del_from_lru(bo);
- list_del(&bo->ddestroy);
- spin_unlock(&ttm_bo_glob.lru_lock);
- ttm_bo_cleanup_memtype_use(bo);
- BUG_ON(bo->mem.mm_node != NULL);
- ttm_tt_destroy(bo->ttm);
[xh] already destroy it in ttm_bo_cleanup_memtype_use.
- atomic_dec(&ttm_bo_glob.bo_count);
- dma_fence_put(bo->moving);
- if (!ttm_bo_uses_embedded_gem_object(bo))
dma_resv_fini(&bo->base._resv);
- bo->destroy(bo);
- ttm_mem_global_free(&ttm_mem_glob, acc_size);
}
void ttm_bo_put(struct ttm_buffer_object *bo) @@ -755,8 +720,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
if (bo->base.resv == ctx->resv) { dma_resv_assert_held(bo->base.resv);
if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
|| !list_empty(&bo->ddestroy))
*locked = false; if (busy)if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT || bo->deleted) ret = true;
@@ -837,6 +801,11 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, dma_resv_unlock(bo->base.resv); continue; }
if (!ttm_bo_get_unless_zero(bo)) {
if (locked)
dma_resv_unlock(bo->base.resv);
continue;
}} break;
@@ -848,21 +817,19 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, }
if (!bo) {
if (busy_bo)
kref_get(&busy_bo->list_kref);
if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
spin_unlock(&ttm_bo_glob.lru_lock); ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); if (busy_bo)busy_bo = NULL;
kref_put(&busy_bo->list_kref, ttm_bo_release_list);
return ret; }ttm_bo_put(busy_bo);
- kref_get(&bo->list_kref);
- if (!list_empty(&bo->ddestroy)) {
- if (bo->deleted) { ret = ttm_bo_cleanup_refs(bo, ctx->interruptible, ctx->no_wait_gpu, locked);
kref_put(&bo->list_kref, ttm_bo_release_list);
return ret; }ttm_bo_put(bo);
@@ -872,7 +839,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, if (locked) ttm_bo_unreserve(bo);
- kref_put(&bo->list_kref, ttm_bo_release_list);
- ttm_bo_put(bo); return ret;
}
@@ -1284,7 +1251,6 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev, bo->destroy = destroy ? destroy : ttm_bo_default_destroy;
kref_init(&bo->kref);
- kref_init(&bo->list_kref); INIT_LIST_HEAD(&bo->lru); INIT_LIST_HEAD(&bo->ddestroy); INIT_LIST_HEAD(&bo->swap);
@@ -1804,11 +1770,18 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx) spin_lock(&glob->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { list_for_each_entry(bo, &glob->swap_lru[i], swap) {
if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
NULL)) {
ret = 0;
break;
if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
NULL))
continue;
if (!ttm_bo_get_unless_zero(bo)) {
if (locked)
dma_resv_unlock(bo->base.resv);
continue; }
[xh] As you get the bo. when you put it?
You only put one bo just before return. BUT you get the bos in the for loop. Am I missing somethings?
thanks xinhui
[xh] Never mind. I missed the break;
thanks xinhui
ret = 0;
} if (!ret) break;break;
@@ -1819,11 +1792,9 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx) return ret; }
- kref_get(&bo->list_kref);
- if (!list_empty(&bo->ddestroy)) {
- if (bo->deleted) { ret = ttm_bo_cleanup_refs(bo, false, false, locked);
kref_put(&bo->list_kref, ttm_bo_release_list);
return ret; }ttm_bo_put(bo);
@@ -1877,7 +1848,7 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx) */ if (locked) dma_resv_unlock(bo->base.resv);
- kref_put(&bo->list_kref, ttm_bo_release_list);
- ttm_bo_put(bo); return ret;
} EXPORT_SYMBOL(ttm_bo_swapout); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 86d152472f38..c8e359ded1df 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -507,7 +507,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, fbo->base.moving = NULL; drm_vma_node_reset(&fbo->base.base.vma_node);
- kref_init(&fbo->base.list_kref); kref_init(&fbo->base.kref); fbo->base.destroy = &ttm_transfered_destroy; fbo->base.acc_size = 0;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 66ca49db9633..b9bc1b00142e 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -135,18 +135,14 @@ struct ttm_tt;
- @num_pages: Actual number of pages.
- @acc_size: Accounted size for this object.
- @kref: Reference count of this buffer object. When this refcount reaches
- zero, the object is put on the delayed delete list.
- @list_kref: List reference count of this buffer object. This member is
- used to avoid destruction while the buffer object is still on a list.
- Lru lists may keep one refcount, the delayed delete list, and kref != 0
- keeps one refcount. When this refcount reaches zero,
- the object is destroyed.
- zero, the object is destroyed or put on the delayed delete list.
- @mem: structure describing current placement.
- @persistent_swap_storage: Usually the swap storage is deleted for buffers
- pinned in physical memory. If this behaviour is not desired, this member
- holds a pointer to a persistent shmem object.
- @ttm: TTM structure holding system pages.
- @evicted: Whether the object was evicted without user-space knowing.
- @deleted: True if the object is only a zombie and already deleted.
- @lru: List head for the lru list.
- @ddestroy: List head for the delayed destroy list.
- @swap: List head for swap LRU list.
@@ -183,9 +179,7 @@ struct ttm_buffer_object { /**
- Members not needing protection.
*/
struct kref kref;
struct kref list_kref;
/**
- Members protected by the bo::resv::reserved lock.
@@ -195,6 +189,7 @@ struct ttm_buffer_object { struct file *persistent_swap_storage; struct ttm_tt *ttm; bool evicted;
bool deleted;
/**
- Members protected by the bdev::lru_lock.
-- 2.17.1
[SNIP]
/*
* Make NO_EVICT bos immediately available to
* shrinkers, now that they are queued for
* destruction.
*/
if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) {
bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT;
ttm_bo_move_to_lru_tail(bo, NULL);
[xh] this should be under lru lock.
Ah, yes good point.
- BUG_ON(bo->mem.mm_node != NULL);
- ttm_tt_destroy(bo->ttm);
[xh] already destroy it in ttm_bo_cleanup_memtype_use.
Fixed as well.
Going to send that out with those two fixed in a minute.
Thanks, Christian.
When non-imported BOs are resurrected for delayed delete we replace the dma_resv object to allow for easy reclaiming of the resources.
v2: move that to ttm_bo_individualize_resv
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d0624685f5d2..4d161038de98 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -393,6 +393,14 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv); dma_resv_unlock(&bo->base._resv); + if (r) + return r; + + if (bo->type != ttm_bo_type_sg) { + spin_lock(&ttm_bo_glob.lru_lock); + bo->base.resv = &bo->base._resv; + spin_unlock(&ttm_bo_glob.lru_lock); + }
return r; } @@ -720,7 +728,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
if (bo->base.resv == ctx->resv) { dma_resv_assert_held(bo->base.resv); - if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT || bo->deleted) + if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT) ret = true; *locked = false; if (busy)
On Mon, Feb 10, 2020 at 04:09:06PM +0100, Christian König wrote:
When non-imported BOs are resurrected for delayed delete we replace the dma_resv object to allow for easy reclaiming of the resources.
v2: move that to ttm_bo_individualize_resv
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/ttm/ttm_bo.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d0624685f5d2..4d161038de98 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -393,6 +393,14 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv); dma_resv_unlock(&bo->base._resv);
- if (r)
return r;
- if (bo->type != ttm_bo_type_sg) {
spin_lock(&ttm_bo_glob.lru_lock);
bo->base.resv = &bo->base._resv;
Having the dma_resv pointer be protected by the lru_lock for ttm internal stuff, but invariant everywhere else is really confusing. Not sure that's a great idea, I've just chased some ttm code around freaking out about that. -Daniel
spin_unlock(&ttm_bo_glob.lru_lock);
}
return r;
} @@ -720,7 +728,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
if (bo->base.resv == ctx->resv) { dma_resv_assert_held(bo->base.resv);
if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT || bo->deleted)
*locked = false; if (busy)if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT) ret = true;
-- 2.17.1
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
2020年2月11日 22:14,Daniel Vetter daniel@ffwll.ch 写道:
On Mon, Feb 10, 2020 at 04:09:06PM +0100, Christian König wrote:
When non-imported BOs are resurrected for delayed delete we replace the dma_resv object to allow for easy reclaiming of the resources.
v2: move that to ttm_bo_individualize_resv
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/ttm/ttm_bo.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d0624685f5d2..4d161038de98 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -393,6 +393,14 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv); dma_resv_unlock(&bo->base._resv);
- if (r)
return r;
- if (bo->type != ttm_bo_type_sg) {
spin_lock(&ttm_bo_glob.lru_lock);
bo->base.resv = &bo->base._resv;
Having the dma_resv pointer be protected by the lru_lock for ttm internal stuff, but invariant everywhere else is really confusing. Not sure that's
I think this is reader VS writer. To avoid any internal functions using the old resv, using an existing spin lock is acceptable. Maybe RCU is better? That will need a lot of effort. Anyway, ttm sucks. We HAS done a lot of work on it to make it better running on modern system.
a great idea, I've just chased some ttm code around freaking out about that. -Daniel
spin_unlock(&ttm_bo_glob.lru_lock);
}
return r;
} @@ -720,7 +728,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
if (bo->base.resv == ctx->resv) { dma_resv_assert_held(bo->base.resv);
if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT || bo->deleted)
*locked = false; if (busy)if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT) ret = true;
-- 2.17.1
dri-devel mailing list dri-devel@lists.freedesktop.org https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.free...
-- Daniel Vetter Software Engineer, Intel Corporation https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fblog.ffwll....
Am 11.02.20 um 16:02 schrieb Pan, Xinhui:
2020年2月11日 22:14,Daniel Vetter daniel@ffwll.ch 写道:
On Mon, Feb 10, 2020 at 04:09:06PM +0100, Christian König wrote:
When non-imported BOs are resurrected for delayed delete we replace the dma_resv object to allow for easy reclaiming of the resources.
v2: move that to ttm_bo_individualize_resv
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/ttm/ttm_bo.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d0624685f5d2..4d161038de98 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -393,6 +393,14 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv); dma_resv_unlock(&bo->base._resv);
- if (r)
return r;
- if (bo->type != ttm_bo_type_sg) {
spin_lock(&ttm_bo_glob.lru_lock);
bo->base.resv = &bo->base._resv;
Having the dma_resv pointer be protected by the lru_lock for ttm internal stuff, but invariant everywhere else is really confusing. Not sure that's
I think this is reader VS writer. To avoid any internal functions using the old resv, using an existing spin lock is acceptable. Maybe RCU is better? That will need a lot of effort. Anyway, ttm sucks. We HAS done a lot of work on it to make it better running on modern system.
Yeah that summarize my recent presentation about TTM pretty much :)
Double checked that and the only reason we have the lock is that in ttm_mem_evict_first() we trylock first and then grab a reference.
So we should probably rework that code as well and then we can also drop that lock here, but that should come later.
Christian.
a great idea, I've just chased some ttm code around freaking out about that. -Daniel
spin_unlock(&ttm_bo_glob.lru_lock);
}
return r;
} @@ -720,7 +728,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
if (bo->base.resv == ctx->resv) { dma_resv_assert_held(bo->base.resv);
if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT || bo->deleted)
*locked = false; if (busy)if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT) ret = true;
-- 2.17.1
dri-devel mailing list dri-devel@lists.freedesktop.org https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.free...
-- Daniel Vetter Software Engineer, Intel Corporation https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fblog.ffwll....
On Tue, Feb 11, 2020 at 4:23 PM Christian König ckoenig.leichtzumerken@gmail.com wrote:
Am 11.02.20 um 16:02 schrieb Pan, Xinhui:
2020年2月11日 22:14,Daniel Vetter daniel@ffwll.ch 写道:
On Mon, Feb 10, 2020 at 04:09:06PM +0100, Christian König wrote:
When non-imported BOs are resurrected for delayed delete we replace the dma_resv object to allow for easy reclaiming of the resources.
v2: move that to ttm_bo_individualize_resv
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/ttm/ttm_bo.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d0624685f5d2..4d161038de98 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -393,6 +393,14 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv); dma_resv_unlock(&bo->base._resv);
- if (r)
return r;
- if (bo->type != ttm_bo_type_sg) {
spin_lock(&ttm_bo_glob.lru_lock);
bo->base.resv = &bo->base._resv;
Having the dma_resv pointer be protected by the lru_lock for ttm internal stuff, but invariant everywhere else is really confusing. Not sure that's
I think this is reader VS writer. To avoid any internal functions using the old resv, using an existing spin lock is acceptable. Maybe RCU is better? That will need a lot of effort. Anyway, ttm sucks. We HAS done a lot of work on it to make it better running on modern system.
Yeah that summarize my recent presentation about TTM pretty much :)
Double checked that and the only reason we have the lock is that in ttm_mem_evict_first() we trylock first and then grab a reference.
Well even with the refcount stuff going on I think you're missing a pile of barriers if you drop the spinlock. Refcounts are otherwise unordered atomics (and the kref_get_unless_zero trick always needs something else that guarantees that there's at least a weak reference, i.e. something which cause a full synchronization somehow with the release/free code for that thing). So would minimally needs rcu, or like you do here the lru list spinlock.
So we should probably rework that code as well and then we can also drop that lock here, but that should come later.
_If_ the trylock in evict is the only one then I agree this should be safe. But definitely needs a comment explaining what exactly is going on. Imo at least :-)
Cheers, Daniel
Christian.
a great idea, I've just chased some ttm code around freaking out about that. -Daniel
spin_unlock(&ttm_bo_glob.lru_lock);
}
return r;
} @@ -720,7 +728,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
if (bo->base.resv == ctx->resv) { dma_resv_assert_held(bo->base.resv);
if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT || bo->deleted)
if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT) ret = true; *locked = false; if (busy)
-- 2.17.1
dri-devel mailing list dri-devel@lists.freedesktop.org https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.free...
-- Daniel Vetter Software Engineer, Intel Corporation https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fblog.ffwll....
Am 11.02.20 um 15:14 schrieb Daniel Vetter:
On Mon, Feb 10, 2020 at 04:09:06PM +0100, Christian König wrote:
When non-imported BOs are resurrected for delayed delete we replace the dma_resv object to allow for easy reclaiming of the resources.
v2: move that to ttm_bo_individualize_resv
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/ttm/ttm_bo.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d0624685f5d2..4d161038de98 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -393,6 +393,14 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv); dma_resv_unlock(&bo->base._resv);
- if (r)
return r;
- if (bo->type != ttm_bo_type_sg) {
spin_lock(&ttm_bo_glob.lru_lock);
bo->base.resv = &bo->base._resv;
Having the dma_resv pointer be protected by the lru_lock for ttm internal stuff, but invariant everywhere else is really confusing. Not sure that's a great idea, I've just chased some ttm code around freaking out about that.
The key point is that the reference counter is zero in this moment.
Taking the LRU spinlock was just a precaution and might actually be not even necessary here.
I will double check, Christian.
-Daniel
spin_unlock(&ttm_bo_glob.lru_lock);
}
return r; }
@@ -720,7 +728,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
if (bo->base.resv == ctx->resv) { dma_resv_assert_held(bo->base.resv);
if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT || bo->deleted)
*locked = false; if (busy)if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT) ret = true;
-- 2.17.1
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
This allows release_notify to add and remove fences from deleted objects.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 4d161038de98..42177ccd7035 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -568,14 +568,6 @@ static void ttm_bo_release(struct kref *kref) int ret;
if (!bo->deleted) { - if (bo->bdev->driver->release_notify) - bo->bdev->driver->release_notify(bo); - - drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node); - ttm_mem_io_lock(man, false); - ttm_mem_io_free_vm(bo); - ttm_mem_io_unlock(man); - ret = ttm_bo_individualize_resv(bo); if (ret) { /* Last resort, if we fail to allocate memory for the @@ -584,6 +576,14 @@ static void ttm_bo_release(struct kref *kref) dma_resv_wait_timeout_rcu(bo->base.resv, true, false, 30 * HZ); } + + if (bo->bdev->driver->release_notify) + bo->bdev->driver->release_notify(bo); + + drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node); + ttm_mem_io_lock(man, false); + ttm_mem_io_free_vm(bo); + ttm_mem_io_unlock(man); }
if (!dma_resv_test_signaled_rcu(bo->base.resv, true)) {
[AMD Official Use Only - Internal Distribution Only]
For patch 1/2/3/5/6 Reviewed-by: xinhui pan xinhui.pan@amd.com ________________________________ From: Christian König ckoenig.leichtzumerken@gmail.com Sent: Monday, February 10, 2020 11:09:01 PM To: Pan, Xinhui Xinhui.Pan@amd.com; amd-gfx@lists.freedesktop.org amd-gfx@lists.freedesktop.org; dri-devel@lists.freedesktop.org dri-devel@lists.freedesktop.org Subject: Cleanup TTMs delayed delete handling
This series of patches cleans up TTMs delayed delete handling.
The core of the new handling is that we new only have a single reference counter instead of two and use kref_get_unless_zero() to grab BOs from the LRU during eviction.
This reduces the overhead of LRU moves and allows us to properly individualize the BOs reservation object during deletion to allow adding BOs for clearing memory, unmapping page tables etc..
Please review and comment, Christian.
On Tue, Feb 11, 2020 at 11:06:53AM +0000, Pan, Xinhui wrote:
[AMD Official Use Only - Internal Distribution Only]
Uh might want to fix your email setup. -Daniel
For patch 1/2/3/5/6 Reviewed-by: xinhui pan xinhui.pan@amd.com ________________________________ From: Christian König ckoenig.leichtzumerken@gmail.com Sent: Monday, February 10, 2020 11:09:01 PM To: Pan, Xinhui Xinhui.Pan@amd.com; amd-gfx@lists.freedesktop.org amd-gfx@lists.freedesktop.org; dri-devel@lists.freedesktop.org dri-devel@lists.freedesktop.org Subject: Cleanup TTMs delayed delete handling
This series of patches cleans up TTMs delayed delete handling.
The core of the new handling is that we new only have a single reference counter instead of two and use kref_get_unless_zero() to grab BOs from the LRU during eviction.
This reduces the overhead of LRU moves and allows us to properly individualize the BOs reservation object during deletion to allow adding BOs for clearing memory, unmapping page tables etc..
Please review and comment, Christian.
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
dri-devel@lists.freedesktop.org