Move the iteration of the global lru into the new function ttm_global_swapout() and use that instead in drivers.
v2: consistently return int
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 57 ++++++++--------------------- drivers/gpu/drm/ttm/ttm_device.c | 29 +++++++++++++++ drivers/gpu/drm/ttm/ttm_tt.c | 2 +- drivers/gpu/drm/vmwgfx/ttm_memory.c | 3 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- include/drm/ttm/ttm_bo_api.h | 3 +- include/drm/ttm/ttm_device.h | 2 + 7 files changed, 53 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 3c23e863a3f0..66e00b404ec3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1193,56 +1193,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, } EXPORT_SYMBOL(ttm_bo_wait);
-/* - * A buffer object shrink method that tries to swap out the first - * buffer object on the bo_global::swap_lru list. - */ -int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) +int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, + gfp_t gfp_flags) { struct ttm_global *glob = &ttm_glob; - struct ttm_buffer_object *bo; - int ret = -EBUSY; bool locked; - unsigned i; - - spin_lock(&glob->lru_lock); - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { - list_for_each_entry(bo, &glob->swap_lru[i], swap) { - if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, - NULL)) - continue; - - if (!ttm_bo_get_unless_zero(bo)) { - if (locked) - dma_resv_unlock(bo->base.resv); - continue; - } + int ret;
- ret = 0; - break; - } - if (!ret) - break; - } + if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL)) + return -EBUSY;
- if (ret) { - spin_unlock(&glob->lru_lock); - return ret; + if (!ttm_bo_get_unless_zero(bo)) { + if (locked) + dma_resv_unlock(bo->base.resv); + return -EBUSY; }
if (bo->deleted) { - ret = ttm_bo_cleanup_refs(bo, false, false, locked); + ttm_bo_cleanup_refs(bo, false, false, locked); ttm_bo_put(bo); - return ret; + return 0; }
ttm_bo_del_from_lru(bo); + /* TODO: Cleanup the locking */ spin_unlock(&glob->lru_lock);
- /** + /* * Move to system cached */ - if (bo->mem.mem_type != TTM_PL_SYSTEM) { struct ttm_operation_ctx ctx = { false, false }; struct ttm_resource evict_mem; @@ -1262,29 +1241,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) } }
- /** + /* * Make sure BO is idle. */ - ret = ttm_bo_wait(bo, false, false); if (unlikely(ret != 0)) goto out;
ttm_bo_unmap_virtual(bo);
- /** + /* * Swap out. Buffer will be swapped in again as soon as * anyone tries to access a ttm page. */ - if (bo->bdev->funcs->swap_notify) bo->bdev->funcs->swap_notify(bo);
ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags); out:
- /** - * + /* * Unreserve without putting on LRU to avoid swapping out an * already swapped buffer. */ @@ -1293,7 +1269,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) ttm_bo_put(bo); return ret; } -EXPORT_SYMBOL(ttm_bo_swapout);
void ttm_bo_tt_destroy(struct ttm_buffer_object *bo) { diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 95e1b7b1f2e6..b1424189fdfb 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -102,6 +102,35 @@ static int ttm_global_init(void) return ret; }
+/** + * A buffer object shrink method that tries to swap out the first + * buffer object on the global::swap_lru list. + */ +int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) +{ + struct ttm_global *glob = &ttm_glob; + struct ttm_buffer_object *bo; + unsigned i; + int ret; + + spin_lock(&glob->lru_lock); + for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { + list_for_each_entry(bo, &glob->swap_lru[i], swap) { + uint32_t num_pages = bo->ttm->num_pages; + + ret = ttm_bo_swapout(bo, ctx, gfp_flags); + /* ttm_bo_swapout has dropped the lru_lock */ + if (!ret) + return num_pages; + if (ret != -EBUSY) + return ret; + } + } + spin_unlock(&glob->lru_lock); + return 0; +} +EXPORT_SYMBOL(ttm_global_swapout); + static void ttm_init_sysman(struct ttm_device *bdev) { struct ttm_resource_manager *man = &bdev->sysman; diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 2f0833c98d2c..95b5cff25f4c 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink, }; int ret;
- ret = ttm_bo_swapout(&ctx, GFP_NOFS); + ret = ttm_global_swapout(&ctx, GFP_NOFS); return ret < 0 ? SHRINK_EMPTY : ret; }
diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c index e972af07d029..104b95a8c7a2 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_memory.c +++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c @@ -38,6 +38,7 @@
#include <drm/drm_device.h> #include <drm/drm_file.h> +#include <drm/ttm/ttm_device.h>
#include "ttm_memory.h"
@@ -277,7 +278,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
while (ttm_zones_above_swap_target(glob, from_wq, extra)) { spin_unlock(&glob->lock); - ret = ttm_bo_swapout(ctx, GFP_KERNEL); + ret = ttm_global_swapout(ctx, GFP_KERNEL); spin_lock(&glob->lock); if (unlikely(ret < 0)) break; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 6910111099c8..b991422e156c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev) vmw_execbuf_release_pinned_bo(dev_priv); vmw_resource_evict_all(dev_priv); vmw_release_device_early(dev_priv); - while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0); + while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0); if (dev_priv->enable_fb) vmw_fifo_resource_dec(dev_priv); if (atomic_read(&dev_priv->num_fifo_resources) != 0) { diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 4fb523dfab32..5044ac330858 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -560,7 +560,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp, const char __user *wbuf, char __user *rbuf, size_t count, loff_t *f_pos, bool write);
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); +int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, + gfp_t gfp_flags);
/** * ttm_bo_uses_embedded_gem_object - check if the given bo uses the diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index 035bbc044a3b..6a0b267d4fe6 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -297,6 +297,8 @@ struct ttm_device { struct delayed_work wq; };
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); + static inline struct ttm_resource_manager * ttm_manager_type(struct ttm_device *bdev, int mem_type) {
Instead evict round robin from each devices SYSTEM and TT domain.
v2: reorder num_pages access reported by Dan's script v3: fix rebase fallout, num_pages should be 32bit
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 29 -------------- drivers/gpu/drm/ttm/ttm_bo_util.c | 1 - drivers/gpu/drm/ttm/ttm_device.c | 60 +++++++++++++++++++++-------- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- include/drm/ttm/ttm_bo_api.h | 1 - include/drm/ttm/ttm_bo_driver.h | 1 - include/drm/ttm/ttm_device.h | 7 +--- 7 files changed, 48 insertions(+), 53 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 66e00b404ec3..3673157527ff 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -73,7 +73,6 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) { struct ttm_device *bdev = bo->bdev;
- list_del_init(&bo->swap); list_del_init(&bo->lru);
if (bdev->funcs->del_from_lru_notify) @@ -105,16 +104,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
man = ttm_manager_type(bdev, mem->mem_type); list_move_tail(&bo->lru, &man->lru[bo->priority]); - if (man->use_tt && bo->ttm && - !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG | - TTM_PAGE_FLAG_SWAPPED))) { - struct list_head *swap; - - swap = &ttm_glob.swap_lru[bo->priority]; - list_move_tail(&bo->swap, swap); - } else { - list_del_init(&bo->swap); - }
if (bdev->funcs->del_from_lru_notify) bdev->funcs->del_from_lru_notify(bo); @@ -129,9 +118,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo); break; } - if (bo->ttm && !(bo->ttm->page_flags & - (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED))) - ttm_bo_bulk_move_set_pos(&bulk->swap[bo->priority], bo); } } EXPORT_SYMBOL(ttm_bo_move_to_lru_tail); @@ -169,20 +155,6 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk) list_bulk_move_tail(&man->lru[i], &pos->first->lru, &pos->last->lru); } - - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { - struct ttm_lru_bulk_move_pos *pos = &bulk->swap[i]; - struct list_head *lru; - - if (!pos->first) - continue; - - dma_resv_assert_held(pos->first->base.resv); - dma_resv_assert_held(pos->last->base.resv); - - lru = &ttm_glob.swap_lru[i]; - list_bulk_move_tail(lru, &pos->first->swap, &pos->last->swap); - } } EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
@@ -1065,7 +1037,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, kref_init(&bo->kref); INIT_LIST_HEAD(&bo->lru); INIT_LIST_HEAD(&bo->ddestroy); - INIT_LIST_HEAD(&bo->swap); bo->bdev = bdev; bo->type = type; bo->mem.mem_type = TTM_PL_SYSTEM; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 031e5819fec4..a2a17c84ceb3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -303,7 +303,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, atomic_inc(&ttm_glob.bo_count); INIT_LIST_HEAD(&fbo->base.ddestroy); INIT_LIST_HEAD(&fbo->base.lru); - INIT_LIST_HEAD(&fbo->base.swap); fbo->base.moving = NULL; drm_vma_node_reset(&fbo->base.base.vma_node);
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index b1424189fdfb..2096a0fd9c35 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -67,7 +67,6 @@ static int ttm_global_init(void) unsigned long num_pages; struct sysinfo si; int ret = 0; - unsigned i;
mutex_lock(&ttm_global_mutex); if (++ttm_glob_use_count > 1) @@ -90,8 +89,6 @@ static int ttm_global_init(void) goto out; }
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) - INIT_LIST_HEAD(&glob->swap_lru[i]); INIT_LIST_HEAD(&glob->device_list); atomic_set(&glob->bo_count, 0);
@@ -109,27 +106,60 @@ static int ttm_global_init(void) int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) { struct ttm_global *glob = &ttm_glob; + struct ttm_device *bdev; + int ret = -EBUSY; + + mutex_lock(&ttm_global_mutex); + list_for_each_entry(bdev, &glob->device_list, device_list) { + ret = ttm_device_swapout(bdev, ctx, gfp_flags); + if (ret > 0) { + list_move_tail(&bdev->device_list, &glob->device_list); + break; + } + } + mutex_unlock(&ttm_global_mutex); + return ret; +} +EXPORT_SYMBOL(ttm_global_swapout); + +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, + gfp_t gfp_flags) +{ + struct ttm_global *glob = &ttm_glob; + struct ttm_resource_manager *man; struct ttm_buffer_object *bo; - unsigned i; + unsigned i, j; int ret;
spin_lock(&glob->lru_lock); - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { - list_for_each_entry(bo, &glob->swap_lru[i], swap) { - uint32_t num_pages = bo->ttm->num_pages; - - ret = ttm_bo_swapout(bo, ctx, gfp_flags); - /* ttm_bo_swapout has dropped the lru_lock */ - if (!ret) - return num_pages; - if (ret != -EBUSY) - return ret; + for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { + man = ttm_manager_type(bdev, i); + if (!man || !man->use_tt) + continue; + + for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) { + list_for_each_entry(bo, &man->lru[j], lru) { + uint32_t num_pages; + + if (!bo->ttm || + bo->ttm->page_flags & TTM_PAGE_FLAG_SG || + bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) + continue; + + num_pages = bo->ttm->num_pages; + ret = ttm_bo_swapout(bo, ctx, gfp_flags); + /* ttm_bo_swapout has dropped the lru_lock */ + if (!ret) + return num_pages; + if (ret != -EBUSY) + return ret; + } } } spin_unlock(&glob->lru_lock); return 0; } -EXPORT_SYMBOL(ttm_global_swapout); +EXPORT_SYMBOL(ttm_device_swapout);
static void ttm_init_sysman(struct ttm_device *bdev) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index b991422e156c..4e41d8221f06 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev) vmw_execbuf_release_pinned_bo(dev_priv); vmw_resource_evict_all(dev_priv); vmw_release_device_early(dev_priv); - while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0); + while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) > 0); if (dev_priv->enable_fb) vmw_fifo_resource_dec(dev_priv); if (atomic_read(&dev_priv->num_fifo_resources) != 0) { diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 5044ac330858..3587f660e8f4 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -144,7 +144,6 @@ struct ttm_buffer_object {
struct list_head lru; struct list_head ddestroy; - struct list_head swap;
/** * Members protected by a bo reservation. diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 8959c0075cfd..d007feef7676 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -69,7 +69,6 @@ struct ttm_lru_bulk_move_pos { struct ttm_lru_bulk_move { struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY]; struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY]; - struct ttm_lru_bulk_move_pos swap[TTM_MAX_BO_PRIORITY]; };
/* diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index 6a0b267d4fe6..cda6efb4c34b 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -63,11 +63,6 @@ extern struct ttm_global { */ struct list_head device_list;
- /** - * Protected by the lru_lock. - */ - struct list_head swap_lru[TTM_MAX_BO_PRIORITY]; - /** * Internal protection. */ @@ -298,6 +293,8 @@ struct ttm_device { };
long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, + gfp_t gfp_flags);
static inline struct ttm_resource_manager * ttm_manager_type(struct ttm_device *bdev, int mem_type)
On Thu, Mar 18, 2021 at 08:47:18PM +0800, Christian König wrote:
Instead evict round robin from each devices SYSTEM and TT domain.
v2: reorder num_pages access reported by Dan's script v3: fix rebase fallout, num_pages should be 32bit
Signed-off-by: Christian König christian.koenig@amd.com
Reviewed-by: Huang Rui ray.huang@amd.com
drivers/gpu/drm/ttm/ttm_bo.c | 29 -------------- drivers/gpu/drm/ttm/ttm_bo_util.c | 1 - drivers/gpu/drm/ttm/ttm_device.c | 60 +++++++++++++++++++++-------- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- include/drm/ttm/ttm_bo_api.h | 1 - include/drm/ttm/ttm_bo_driver.h | 1 - include/drm/ttm/ttm_device.h | 7 +--- 7 files changed, 48 insertions(+), 53 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 66e00b404ec3..3673157527ff 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -73,7 +73,6 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) { struct ttm_device *bdev = bo->bdev;
list_del_init(&bo->swap); list_del_init(&bo->lru);
if (bdev->funcs->del_from_lru_notify)
@@ -105,16 +104,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
man = ttm_manager_type(bdev, mem->mem_type); list_move_tail(&bo->lru, &man->lru[bo->priority]);
if (man->use_tt && bo->ttm &&
!(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
TTM_PAGE_FLAG_SWAPPED))) {
struct list_head *swap;
swap = &ttm_glob.swap_lru[bo->priority];
list_move_tail(&bo->swap, swap);
} else {
list_del_init(&bo->swap);
}
if (bdev->funcs->del_from_lru_notify) bdev->funcs->del_from_lru_notify(bo);
@@ -129,9 +118,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo); break; }
if (bo->ttm && !(bo->ttm->page_flags &
(TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED)))
}ttm_bo_bulk_move_set_pos(&bulk->swap[bo->priority], bo);
} EXPORT_SYMBOL(ttm_bo_move_to_lru_tail); @@ -169,20 +155,6 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk) list_bulk_move_tail(&man->lru[i], &pos->first->lru, &pos->last->lru); }
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
struct ttm_lru_bulk_move_pos *pos = &bulk->swap[i];
struct list_head *lru;
if (!pos->first)
continue;
dma_resv_assert_held(pos->first->base.resv);
dma_resv_assert_held(pos->last->base.resv);
lru = &ttm_glob.swap_lru[i];
list_bulk_move_tail(lru, &pos->first->swap, &pos->last->swap);
- }
} EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
@@ -1065,7 +1037,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, kref_init(&bo->kref); INIT_LIST_HEAD(&bo->lru); INIT_LIST_HEAD(&bo->ddestroy);
- INIT_LIST_HEAD(&bo->swap); bo->bdev = bdev; bo->type = type; bo->mem.mem_type = TTM_PL_SYSTEM;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 031e5819fec4..a2a17c84ceb3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -303,7 +303,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, atomic_inc(&ttm_glob.bo_count); INIT_LIST_HEAD(&fbo->base.ddestroy); INIT_LIST_HEAD(&fbo->base.lru);
- INIT_LIST_HEAD(&fbo->base.swap); fbo->base.moving = NULL; drm_vma_node_reset(&fbo->base.base.vma_node);
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index b1424189fdfb..2096a0fd9c35 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -67,7 +67,6 @@ static int ttm_global_init(void) unsigned long num_pages; struct sysinfo si; int ret = 0;
unsigned i;
mutex_lock(&ttm_global_mutex); if (++ttm_glob_use_count > 1)
@@ -90,8 +89,6 @@ static int ttm_global_init(void) goto out; }
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
INIT_LIST_HEAD(&glob->device_list); atomic_set(&glob->bo_count, 0);INIT_LIST_HEAD(&glob->swap_lru[i]);
@@ -109,27 +106,60 @@ static int ttm_global_init(void) int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) { struct ttm_global *glob = &ttm_glob;
- struct ttm_device *bdev;
- int ret = -EBUSY;
- mutex_lock(&ttm_global_mutex);
- list_for_each_entry(bdev, &glob->device_list, device_list) {
ret = ttm_device_swapout(bdev, ctx, gfp_flags);
if (ret > 0) {
list_move_tail(&bdev->device_list, &glob->device_list);
break;
}
- }
- mutex_unlock(&ttm_global_mutex);
- return ret;
+} +EXPORT_SYMBOL(ttm_global_swapout);
+long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
gfp_t gfp_flags)
+{
- struct ttm_global *glob = &ttm_glob;
- struct ttm_resource_manager *man; struct ttm_buffer_object *bo;
- unsigned i;
unsigned i, j; int ret;
spin_lock(&glob->lru_lock);
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
list_for_each_entry(bo, &glob->swap_lru[i], swap) {
uint32_t num_pages = bo->ttm->num_pages;
ret = ttm_bo_swapout(bo, ctx, gfp_flags);
/* ttm_bo_swapout has dropped the lru_lock */
if (!ret)
return num_pages;
if (ret != -EBUSY)
return ret;
- for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
man = ttm_manager_type(bdev, i);
if (!man || !man->use_tt)
continue;
for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
list_for_each_entry(bo, &man->lru[j], lru) {
uint32_t num_pages;
if (!bo->ttm ||
bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
continue;
num_pages = bo->ttm->num_pages;
ret = ttm_bo_swapout(bo, ctx, gfp_flags);
/* ttm_bo_swapout has dropped the lru_lock */
if (!ret)
return num_pages;
if (ret != -EBUSY)
return ret;
} } spin_unlock(&glob->lru_lock); return 0;}
} -EXPORT_SYMBOL(ttm_global_swapout); +EXPORT_SYMBOL(ttm_device_swapout);
static void ttm_init_sysman(struct ttm_device *bdev) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index b991422e156c..4e41d8221f06 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev) vmw_execbuf_release_pinned_bo(dev_priv); vmw_resource_evict_all(dev_priv); vmw_release_device_early(dev_priv);
- while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
- while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) > 0); if (dev_priv->enable_fb) vmw_fifo_resource_dec(dev_priv); if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 5044ac330858..3587f660e8f4 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -144,7 +144,6 @@ struct ttm_buffer_object {
struct list_head lru; struct list_head ddestroy;
struct list_head swap;
/**
- Members protected by a bo reservation.
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 8959c0075cfd..d007feef7676 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -69,7 +69,6 @@ struct ttm_lru_bulk_move_pos { struct ttm_lru_bulk_move { struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY]; struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
- struct ttm_lru_bulk_move_pos swap[TTM_MAX_BO_PRIORITY];
};
/* diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index 6a0b267d4fe6..cda6efb4c34b 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -63,11 +63,6 @@ extern struct ttm_global { */ struct list_head device_list;
- /**
* Protected by the lru_lock.
*/
- struct list_head swap_lru[TTM_MAX_BO_PRIORITY];
- /**
*/
- Internal protection.
@@ -298,6 +293,8 @@ struct ttm_device { };
long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
gfp_t gfp_flags);
static inline struct ttm_resource_manager * ttm_manager_type(struct ttm_device *bdev, int mem_type) -- 2.25.1
Instead of having a global lock.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++--- drivers/gpu/drm/qxl/qxl_release.c | 5 +-- drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++-------------- drivers/gpu/drm/ttm/ttm_device.c | 12 +++---- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++--- drivers/gpu/drm/ttm/ttm_resource.c | 9 +++-- include/drm/ttm/ttm_bo_driver.h | 4 +-- include/drm/ttm/ttm_device.h | 4 +-- 8 files changed, 43 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9d19078246c8..ae18c0e32347 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm_bo_base *bo_base;
if (vm->bulk_moveable) { - spin_lock(&ttm_glob.lru_lock); + spin_lock(&adev->mman.bdev.lru_lock); ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&adev->mman.bdev.lru_lock); return; }
memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
- spin_lock(&ttm_glob.lru_lock); + spin_lock(&adev->mman.bdev.lru_lock); list_for_each_entry(bo_base, &vm->idle, vm_status) { struct amdgpu_bo *bo = bo_base->bo;
@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, &bo->shadow->tbo.mem, &vm->lru_bulk_move); } - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&adev->mman.bdev.lru_lock);
vm->bulk_moveable = true; } diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index f5845c96d414..b19f2f00b215 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) release->id | 0xf0000000, release->base.seqno); trace_dma_fence_emit(&release->base);
- spin_lock(&ttm_glob.lru_lock); - list_for_each_entry(entry, &release->bos, head) { bo = entry->bo;
dma_resv_add_shared_fence(bo->base.resv, &release->base); - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); + ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv); } - spin_unlock(&ttm_glob.lru_lock); ww_acquire_fini(&release->ticket); }
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 3673157527ff..2d2ac532987e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -243,9 +243,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) * reference it any more. The only tricky case is the trylock on * the resv object while holding the lru_lock. */ - spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); bo->base.resv = &bo->base._resv; - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); }
return r; @@ -304,7 +304,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (unlock_resv) dma_resv_unlock(bo->base.resv); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock);
lret = dma_resv_wait_timeout_rcu(resv, true, interruptible, 30 * HZ); @@ -314,7 +314,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, else if (lret == 0) return -EBUSY;
- spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); if (unlock_resv && !dma_resv_trylock(bo->base.resv)) { /* * We raced, and lost, someone else holds the reservation now, @@ -324,7 +324,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, * delayed destruction would succeed, so just return success * here. */ - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); return 0; } ret = 0; @@ -333,13 +333,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, if (ret || unlikely(list_empty(&bo->ddestroy))) { if (unlock_resv) dma_resv_unlock(bo->base.resv); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); return ret; }
ttm_bo_del_from_lru(bo); list_del_init(&bo->ddestroy); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv) @@ -356,13 +356,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, */ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) { - struct ttm_global *glob = &ttm_glob; struct list_head removed; bool empty;
INIT_LIST_HEAD(&removed);
- spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); while (!list_empty(&bdev->ddestroy)) { struct ttm_buffer_object *bo;
@@ -373,24 +372,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) continue;
if (remove_all || bo->base.resv != &bo->base._resv) { - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock); dma_resv_lock(bo->base.resv, NULL);
- spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else if (dma_resv_trylock(bo->base.resv)) { ttm_bo_cleanup_refs(bo, false, !remove_all, true); } else { - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock); }
ttm_bo_put(bo); - spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); } list_splice_tail(&removed, &bdev->ddestroy); empty = list_empty(&bdev->ddestroy); - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock);
return empty; } @@ -425,7 +424,7 @@ static void ttm_bo_release(struct kref *kref) ttm_bo_flush_all_fences(bo); bo->deleted = true;
- spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock);
/* * Make pinned bos immediately available to @@ -442,17 +441,17 @@ static void ttm_bo_release(struct kref *kref)
kref_init(&bo->kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock);
schedule_delayed_work(&bdev->wq, ((HZ / 100) < 1) ? 1 : HZ / 100); return; }
- spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); ttm_bo_del_from_lru(bo); list_del(&bo->ddestroy); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo); dma_resv_unlock(bo->base.resv); @@ -626,7 +625,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, unsigned i; int ret;
- spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { list_for_each_entry(bo, &man->lru[i], lru) { bool busy; @@ -663,7 +662,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, if (!bo) { if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) busy_bo = NULL; - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); if (busy_bo) ttm_bo_put(busy_bo); @@ -677,7 +676,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, return ret; }
- spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock);
ret = ttm_bo_evict(bo, ctx); if (locked) @@ -777,10 +776,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo, mem->mem_type = place->mem_type; mem->placement = place->flags;
- spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, mem, NULL); - spin_unlock(&ttm_glob.lru_lock); - + spin_unlock(&bo->bdev->lru_lock); return 0; }
@@ -1167,7 +1165,6 @@ EXPORT_SYMBOL(ttm_bo_wait); int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) { - struct ttm_global *glob = &ttm_glob; bool locked; int ret;
@@ -1188,7 +1185,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
ttm_bo_del_from_lru(bo); /* TODO: Cleanup the locking */ - spin_unlock(&glob->lru_lock); + spin_unlock(&bo->bdev->lru_lock);
/* * Move to system cached diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 2096a0fd9c35..85f6975d9872 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -81,7 +81,6 @@ static int ttm_global_init(void) ttm_pool_mgr_init(num_pages * 50 / 100); ttm_tt_mgr_init();
- spin_lock_init(&glob->lru_lock); glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
if (unlikely(glob->dummy_read_page == NULL)) { @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout); long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) { - struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; struct ttm_buffer_object *bo; unsigned i, j; int ret;
- spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { man = ttm_manager_type(bdev, i); if (!man || !man->use_tt) @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, } } } - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock); return 0; } EXPORT_SYMBOL(ttm_device_swapout); @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
bdev->vma_manager = vma_manager; INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue); + spin_lock_init(&bdev->lru_lock); INIT_LIST_HEAD(&bdev->ddestroy); bdev->dev_mapping = mapping; mutex_lock(&ttm_global_mutex); @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
void ttm_device_fini(struct ttm_device *bdev) { - struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; unsigned i;
@@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev) if (ttm_bo_delayed_delete(bdev, true)) pr_debug("Delayed destroy list was clean\n");
- spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) if (list_empty(&man->lru[0])) pr_debug("Swap list %d was clean\n", i); - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock);
ttm_pool_fini(&bdev->pool); ttm_global_release(); diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 690ab97d52b7..071c48d672c6 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, if (list_empty(list)) return;
- spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
- ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); + ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv); } - spin_unlock(&ttm_glob.lru_lock);
if (ticket) ww_acquire_fini(ticket); @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, if (list_empty(list)) return;
- spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, dma_resv_add_shared_fence(bo->base.resv, fence); else dma_resv_add_excl_fence(bo->base.resv, fence); - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); + ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv); } - spin_unlock(&ttm_glob.lru_lock); if (ticket) ww_acquire_fini(ticket); } diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index ed1672a9f332..04f2eef653ab 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, .no_wait_gpu = false, .force_alloc = true }; - struct ttm_global *glob = &ttm_glob; struct dma_fence *fence; int ret; unsigned i; @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, * Can't use standard list traversal since we're unlocking. */
- spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { while (!list_empty(&man->lru[i])) { - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock); ret = ttm_mem_evict_first(bdev, man, NULL, &ctx, NULL); if (ret) return ret; - spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); } } - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock);
spin_lock(&man->move_lock); fence = dma_fence_get(man->move); diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index d007feef7676..dbccac957f8f 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, static inline void ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo) { - spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); }
static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo, diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index cda6efb4c34b..bae56d29e8ff 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -56,7 +56,6 @@ extern struct ttm_global { */
struct page *dummy_read_page; - spinlock_t lru_lock;
/** * Protected by ttm_global_mutex. @@ -277,8 +276,9 @@ struct ttm_device { struct ttm_pool pool;
/* - * Protected by the global:lru lock. + * Protection for the per manager LRU and ddestroy lists. */ + spinlock_t lru_lock; struct list_head ddestroy;
/*
On Thu, Mar 18, 2021 at 08:47:19PM +0800, Christian König wrote:
Instead of having a global lock.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++--- drivers/gpu/drm/qxl/qxl_release.c | 5 +-- drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++-------------- drivers/gpu/drm/ttm/ttm_device.c | 12 +++---- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++--- drivers/gpu/drm/ttm/ttm_resource.c | 9 +++-- include/drm/ttm/ttm_bo_driver.h | 4 +-- include/drm/ttm/ttm_device.h | 4 +-- 8 files changed, 43 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9d19078246c8..ae18c0e32347 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm_bo_base *bo_base;
if (vm->bulk_moveable) {
spin_lock(&ttm_glob.lru_lock);
spin_lock(&adev->mman.bdev.lru_lock);
Could you please explain why do you want to instead of the global lock?
Thanks, Ray
ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&adev->mman.bdev.lru_lock);
return; }
memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&adev->mman.bdev.lru_lock); list_for_each_entry(bo_base, &vm->idle, vm_status) { struct amdgpu_bo *bo = bo_base->bo;
@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, &bo->shadow->tbo.mem, &vm->lru_bulk_move); }
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&adev->mman.bdev.lru_lock);
vm->bulk_moveable = true;
} diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index f5845c96d414..b19f2f00b215 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) release->id | 0xf0000000, release->base.seqno); trace_dma_fence_emit(&release->base);
spin_lock(&ttm_glob.lru_lock);
list_for_each_entry(entry, &release->bos, head) { bo = entry->bo;
dma_resv_add_shared_fence(bo->base.resv, &release->base);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
dma_resv_unlock(bo->base.resv); }ttm_bo_move_to_lru_tail_unlocked(bo);
- spin_unlock(&ttm_glob.lru_lock); ww_acquire_fini(&release->ticket);
}
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 3673157527ff..2d2ac532987e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -243,9 +243,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) * reference it any more. The only tricky case is the trylock on * the resv object while holding the lru_lock. */
spin_lock(&ttm_glob.lru_lock);
bo->base.resv = &bo->base._resv;spin_lock(&bo->bdev->lru_lock);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
}
return r;
@@ -304,7 +304,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (unlock_resv) dma_resv_unlock(bo->base.resv);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
lret = dma_resv_wait_timeout_rcu(resv, true, interruptible, 30 * HZ);
@@ -314,7 +314,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, else if (lret == 0) return -EBUSY;
spin_lock(&ttm_glob.lru_lock);
if (unlock_resv && !dma_resv_trylock(bo->base.resv)) { /* * We raced, and lost, someone else holds the reservation now,spin_lock(&bo->bdev->lru_lock);
@@ -324,7 +324,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, * delayed destruction would succeed, so just return success * here. */
spin_unlock(&ttm_glob.lru_lock);
} ret = 0;spin_unlock(&bo->bdev->lru_lock); return 0;
@@ -333,13 +333,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, if (ret || unlikely(list_empty(&bo->ddestroy))) { if (unlock_resv) dma_resv_unlock(bo->base.resv);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
return ret; }
ttm_bo_del_from_lru(bo); list_del_init(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv)
@@ -356,13 +356,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, */ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) {
struct ttm_global *glob = &ttm_glob; struct list_head removed; bool empty;
INIT_LIST_HEAD(&removed);
spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); while (!list_empty(&bdev->ddestroy)) { struct ttm_buffer_object *bo;
@@ -373,24 +372,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) continue;
if (remove_all || bo->base.resv != &bo->base._resv) {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock); dma_resv_lock(bo->base.resv, NULL);
spin_lock(&glob->lru_lock);
spin_lock(&bdev->lru_lock); ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else if (dma_resv_trylock(bo->base.resv)) { ttm_bo_cleanup_refs(bo, false, !remove_all, true); } else {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
}
ttm_bo_put(bo);
spin_lock(&glob->lru_lock);
} list_splice_tail(&removed, &bdev->ddestroy); empty = list_empty(&bdev->ddestroy);spin_lock(&bdev->lru_lock);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
return empty;
} @@ -425,7 +424,7 @@ static void ttm_bo_release(struct kref *kref) ttm_bo_flush_all_fences(bo); bo->deleted = true;
spin_lock(&ttm_glob.lru_lock);
spin_lock(&bo->bdev->lru_lock);
/*
- Make pinned bos immediately available to
@@ -442,17 +441,17 @@ static void ttm_bo_release(struct kref *kref)
kref_init(&bo->kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
schedule_delayed_work(&bdev->wq, ((HZ / 100) < 1) ? 1 : HZ / 100); return; }
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_del_from_lru(bo); list_del(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo); dma_resv_unlock(bo->base.resv);
@@ -626,7 +625,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, unsigned i; int ret;
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { list_for_each_entry(bo, &man->lru[i], lru) { bool busy;
@@ -663,7 +662,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, if (!bo) { if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) busy_bo = NULL;
spin_unlock(&ttm_glob.lru_lock);
ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); if (busy_bo) ttm_bo_put(busy_bo);spin_unlock(&bo->bdev->lru_lock);
@@ -677,7 +676,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, return ret; }
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
ret = ttm_bo_evict(bo, ctx); if (locked)
@@ -777,10 +776,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo, mem->mem_type = place->mem_type; mem->placement = place->flags;
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
- spin_unlock(&bo->bdev->lru_lock); return 0;
}
@@ -1167,7 +1165,6 @@ EXPORT_SYMBOL(ttm_bo_wait); int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) {
- struct ttm_global *glob = &ttm_glob; bool locked; int ret;
@@ -1188,7 +1185,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
ttm_bo_del_from_lru(bo); /* TODO: Cleanup the locking */
- spin_unlock(&glob->lru_lock);
spin_unlock(&bo->bdev->lru_lock);
/*
- Move to system cached
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 2096a0fd9c35..85f6975d9872 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -81,7 +81,6 @@ static int ttm_global_init(void) ttm_pool_mgr_init(num_pages * 50 / 100); ttm_tt_mgr_init();
spin_lock_init(&glob->lru_lock); glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
if (unlikely(glob->dummy_read_page == NULL)) {
@@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout); long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) {
struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; struct ttm_buffer_object *bo; unsigned i, j; int ret;
spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { man = ttm_manager_type(bdev, i); if (!man || !man->use_tt)
@@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, } } }
- spin_unlock(&glob->lru_lock);
- spin_unlock(&bdev->lru_lock); return 0;
} EXPORT_SYMBOL(ttm_device_swapout); @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
bdev->vma_manager = vma_manager; INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
- spin_lock_init(&bdev->lru_lock); INIT_LIST_HEAD(&bdev->ddestroy); bdev->dev_mapping = mapping; mutex_lock(&ttm_global_mutex);
@@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
void ttm_device_fini(struct ttm_device *bdev) {
- struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; unsigned i;
@@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev) if (ttm_bo_delayed_delete(bdev, true)) pr_debug("Delayed destroy list was clean\n");
- spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) if (list_empty(&man->lru[0])) pr_debug("Swap list %d was clean\n", i);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
ttm_pool_fini(&bdev->pool); ttm_global_release();
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 690ab97d52b7..071c48d672c6 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, if (list_empty(list)) return;
spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
dma_resv_unlock(bo->base.resv); }ttm_bo_move_to_lru_tail_unlocked(bo);
spin_unlock(&ttm_glob.lru_lock);
if (ticket) ww_acquire_fini(ticket);
@@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, if (list_empty(list)) return;
- spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, dma_resv_add_shared_fence(bo->base.resv, fence); else dma_resv_add_excl_fence(bo->base.resv, fence);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
dma_resv_unlock(bo->base.resv); }ttm_bo_move_to_lru_tail_unlocked(bo);
- spin_unlock(&ttm_glob.lru_lock); if (ticket) ww_acquire_fini(ticket);
} diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index ed1672a9f332..04f2eef653ab 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, .no_wait_gpu = false, .force_alloc = true };
- struct ttm_global *glob = &ttm_glob; struct dma_fence *fence; int ret; unsigned i;
@@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, * Can't use standard list traversal since we're unlocking. */
- spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { while (!list_empty(&man->lru[i])) {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock); ret = ttm_mem_evict_first(bdev, man, NULL, &ctx, NULL); if (ret) return ret;
spin_lock(&glob->lru_lock);
} }spin_lock(&bdev->lru_lock);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
spin_lock(&man->move_lock); fence = dma_fence_get(man->move);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index d007feef7676..dbccac957f8f 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, static inline void ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo) {
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
- spin_unlock(&bo->bdev->lru_lock);
}
static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo, diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index cda6efb4c34b..bae56d29e8ff 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -56,7 +56,6 @@ extern struct ttm_global { */
struct page *dummy_read_page;
spinlock_t lru_lock;
/**
- Protected by ttm_global_mutex.
@@ -277,8 +276,9 @@ struct ttm_device { struct ttm_pool pool;
/*
* Protected by the global:lru lock.
* Protection for the per manager LRU and ddestroy lists.
*/
spinlock_t lru_lock; struct list_head ddestroy;
/*
-- 2.25.1
Am 19.03.21 um 05:32 schrieb Huang Rui:
On Thu, Mar 18, 2021 at 08:47:19PM +0800, Christian König wrote:
Instead of having a global lock.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++--- drivers/gpu/drm/qxl/qxl_release.c | 5 +-- drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++-------------- drivers/gpu/drm/ttm/ttm_device.c | 12 +++---- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++--- drivers/gpu/drm/ttm/ttm_resource.c | 9 +++-- include/drm/ttm/ttm_bo_driver.h | 4 +-- include/drm/ttm/ttm_device.h | 4 +-- 8 files changed, 43 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9d19078246c8..ae18c0e32347 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm_bo_base *bo_base;
if (vm->bulk_moveable) {
spin_lock(&ttm_glob.lru_lock);
spin_lock(&adev->mman.bdev.lru_lock);
Could you please explain why do you want to instead of the global lock?
Potentially less contention.
But you are right mentioning this in the commit message is a good idea.
Regards, Christian.
Thanks, Ray
ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&adev->mman.bdev.lru_lock);
return; }
memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&adev->mman.bdev.lru_lock); list_for_each_entry(bo_base, &vm->idle, vm_status) { struct amdgpu_bo *bo = bo_base->bo;
@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, &bo->shadow->tbo.mem, &vm->lru_bulk_move); }
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&adev->mman.bdev.lru_lock);
vm->bulk_moveable = true; }
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index f5845c96d414..b19f2f00b215 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) release->id | 0xf0000000, release->base.seqno); trace_dma_fence_emit(&release->base);
spin_lock(&ttm_glob.lru_lock);
list_for_each_entry(entry, &release->bos, head) { bo = entry->bo;
dma_resv_add_shared_fence(bo->base.resv, &release->base);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
dma_resv_unlock(bo->base.resv); }ttm_bo_move_to_lru_tail_unlocked(bo);
- spin_unlock(&ttm_glob.lru_lock); ww_acquire_fini(&release->ticket); }
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 3673157527ff..2d2ac532987e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -243,9 +243,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) * reference it any more. The only tricky case is the trylock on * the resv object while holding the lru_lock. */
spin_lock(&ttm_glob.lru_lock);
bo->base.resv = &bo->base._resv;spin_lock(&bo->bdev->lru_lock);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
}
return r;
@@ -304,7 +304,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (unlock_resv) dma_resv_unlock(bo->base.resv);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
lret = dma_resv_wait_timeout_rcu(resv, true, interruptible, 30 * HZ);
@@ -314,7 +314,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, else if (lret == 0) return -EBUSY;
spin_lock(&ttm_glob.lru_lock);
if (unlock_resv && !dma_resv_trylock(bo->base.resv)) { /* * We raced, and lost, someone else holds the reservation now,spin_lock(&bo->bdev->lru_lock);
@@ -324,7 +324,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, * delayed destruction would succeed, so just return success * here. */
spin_unlock(&ttm_glob.lru_lock);
} ret = 0;spin_unlock(&bo->bdev->lru_lock); return 0;
@@ -333,13 +333,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, if (ret || unlikely(list_empty(&bo->ddestroy))) { if (unlock_resv) dma_resv_unlock(bo->base.resv);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
return ret; }
ttm_bo_del_from_lru(bo); list_del_init(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv)
@@ -356,13 +356,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, */ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) {
struct ttm_global *glob = &ttm_glob; struct list_head removed; bool empty;
INIT_LIST_HEAD(&removed);
spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); while (!list_empty(&bdev->ddestroy)) { struct ttm_buffer_object *bo;
@@ -373,24 +372,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) continue;
if (remove_all || bo->base.resv != &bo->base._resv) {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock); dma_resv_lock(bo->base.resv, NULL);
spin_lock(&glob->lru_lock);
spin_lock(&bdev->lru_lock); ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else if (dma_resv_trylock(bo->base.resv)) { ttm_bo_cleanup_refs(bo, false, !remove_all, true); } else {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
}
ttm_bo_put(bo);
spin_lock(&glob->lru_lock);
} list_splice_tail(&removed, &bdev->ddestroy); empty = list_empty(&bdev->ddestroy);spin_lock(&bdev->lru_lock);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
return empty; }
@@ -425,7 +424,7 @@ static void ttm_bo_release(struct kref *kref) ttm_bo_flush_all_fences(bo); bo->deleted = true;
spin_lock(&ttm_glob.lru_lock);
spin_lock(&bo->bdev->lru_lock);
/*
- Make pinned bos immediately available to
@@ -442,17 +441,17 @@ static void ttm_bo_release(struct kref *kref)
kref_init(&bo->kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
schedule_delayed_work(&bdev->wq, ((HZ / 100) < 1) ? 1 : HZ / 100); return; }
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_del_from_lru(bo); list_del(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo); dma_resv_unlock(bo->base.resv);
@@ -626,7 +625,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, unsigned i; int ret;
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { list_for_each_entry(bo, &man->lru[i], lru) { bool busy;
@@ -663,7 +662,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, if (!bo) { if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) busy_bo = NULL;
spin_unlock(&ttm_glob.lru_lock);
ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); if (busy_bo) ttm_bo_put(busy_bo);spin_unlock(&bo->bdev->lru_lock);
@@ -677,7 +676,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, return ret; }
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
ret = ttm_bo_evict(bo, ctx); if (locked)
@@ -777,10 +776,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo, mem->mem_type = place->mem_type; mem->placement = place->flags;
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
- spin_unlock(&bo->bdev->lru_lock); return 0; }
@@ -1167,7 +1165,6 @@ EXPORT_SYMBOL(ttm_bo_wait); int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) {
- struct ttm_global *glob = &ttm_glob; bool locked; int ret;
@@ -1188,7 +1185,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
ttm_bo_del_from_lru(bo); /* TODO: Cleanup the locking */
- spin_unlock(&glob->lru_lock);
spin_unlock(&bo->bdev->lru_lock);
/*
- Move to system cached
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 2096a0fd9c35..85f6975d9872 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -81,7 +81,6 @@ static int ttm_global_init(void) ttm_pool_mgr_init(num_pages * 50 / 100); ttm_tt_mgr_init();
spin_lock_init(&glob->lru_lock); glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
if (unlikely(glob->dummy_read_page == NULL)) {
@@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout); long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) {
struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; struct ttm_buffer_object *bo; unsigned i, j; int ret;
spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { man = ttm_manager_type(bdev, i); if (!man || !man->use_tt)
@@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, } } }
- spin_unlock(&glob->lru_lock);
- spin_unlock(&bdev->lru_lock); return 0; } EXPORT_SYMBOL(ttm_device_swapout);
@@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
bdev->vma_manager = vma_manager; INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
- spin_lock_init(&bdev->lru_lock); INIT_LIST_HEAD(&bdev->ddestroy); bdev->dev_mapping = mapping; mutex_lock(&ttm_global_mutex);
@@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
void ttm_device_fini(struct ttm_device *bdev) {
- struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; unsigned i;
@@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev) if (ttm_bo_delayed_delete(bdev, true)) pr_debug("Delayed destroy list was clean\n");
- spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) if (list_empty(&man->lru[0])) pr_debug("Swap list %d was clean\n", i);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
ttm_pool_fini(&bdev->pool); ttm_global_release();
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 690ab97d52b7..071c48d672c6 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, if (list_empty(list)) return;
spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
dma_resv_unlock(bo->base.resv); }ttm_bo_move_to_lru_tail_unlocked(bo);
spin_unlock(&ttm_glob.lru_lock);
if (ticket) ww_acquire_fini(ticket);
@@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, if (list_empty(list)) return;
- spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, dma_resv_add_shared_fence(bo->base.resv, fence); else dma_resv_add_excl_fence(bo->base.resv, fence);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
dma_resv_unlock(bo->base.resv); }ttm_bo_move_to_lru_tail_unlocked(bo);
- spin_unlock(&ttm_glob.lru_lock); if (ticket) ww_acquire_fini(ticket); }
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index ed1672a9f332..04f2eef653ab 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, .no_wait_gpu = false, .force_alloc = true };
- struct ttm_global *glob = &ttm_glob; struct dma_fence *fence; int ret; unsigned i;
@@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, * Can't use standard list traversal since we're unlocking. */
- spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { while (!list_empty(&man->lru[i])) {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock); ret = ttm_mem_evict_first(bdev, man, NULL, &ctx, NULL); if (ret) return ret;
spin_lock(&glob->lru_lock);
} }spin_lock(&bdev->lru_lock);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
spin_lock(&man->move_lock); fence = dma_fence_get(man->move);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index d007feef7676..dbccac957f8f 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, static inline void ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo) {
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); }
static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index cda6efb4c34b..bae56d29e8ff 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -56,7 +56,6 @@ extern struct ttm_global { */
struct page *dummy_read_page;
spinlock_t lru_lock;
/**
- Protected by ttm_global_mutex.
@@ -277,8 +276,9 @@ struct ttm_device { struct ttm_pool pool;
/*
* Protected by the global:lru lock.
* Protection for the per manager LRU and ddestroy lists.
*/
spinlock_t lru_lock; struct list_head ddestroy;
/*
-- 2.25.1
On Fri, Mar 19, 2021 at 08:10:21PM +0800, Christian König wrote:
Am 19.03.21 um 05:32 schrieb Huang Rui:
On Thu, Mar 18, 2021 at 08:47:19PM +0800, Christian König wrote:
Instead of having a global lock.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++--- drivers/gpu/drm/qxl/qxl_release.c | 5 +-- drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++-------------- drivers/gpu/drm/ttm/ttm_device.c | 12 +++---- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++--- drivers/gpu/drm/ttm/ttm_resource.c | 9 +++-- include/drm/ttm/ttm_bo_driver.h | 4 +-- include/drm/ttm/ttm_device.h | 4 +-- 8 files changed, 43 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9d19078246c8..ae18c0e32347 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm_bo_base *bo_base;
if (vm->bulk_moveable) {
spin_lock(&ttm_glob.lru_lock);
spin_lock(&adev->mman.bdev.lru_lock);
Could you please explain why do you want to instead of the global lock?
Potentially less contention.
But you are right mentioning this in the commit message is a good idea.
Thanks.
Patch is Acked-by: Huang Rui ray.huang@amd.com
Regards, Christian.
Thanks, Ray
ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&adev->mman.bdev.lru_lock);
return; }
memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&adev->mman.bdev.lru_lock); list_for_each_entry(bo_base, &vm->idle, vm_status) { struct amdgpu_bo *bo = bo_base->bo;
@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, &bo->shadow->tbo.mem, &vm->lru_bulk_move); }
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&adev->mman.bdev.lru_lock);
vm->bulk_moveable = true; }
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index f5845c96d414..b19f2f00b215 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) release->id | 0xf0000000, release->base.seqno); trace_dma_fence_emit(&release->base);
spin_lock(&ttm_glob.lru_lock);
list_for_each_entry(entry, &release->bos, head) { bo = entry->bo;
dma_resv_add_shared_fence(bo->base.resv, &release->base);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
dma_resv_unlock(bo->base.resv); }ttm_bo_move_to_lru_tail_unlocked(bo);
- spin_unlock(&ttm_glob.lru_lock); ww_acquire_fini(&release->ticket); }
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 3673157527ff..2d2ac532987e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -243,9 +243,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) * reference it any more. The only tricky case is the trylock on * the resv object while holding the lru_lock. */
spin_lock(&ttm_glob.lru_lock);
bo->base.resv = &bo->base._resv;spin_lock(&bo->bdev->lru_lock);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
}
return r;
@@ -304,7 +304,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (unlock_resv) dma_resv_unlock(bo->base.resv);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
lret = dma_resv_wait_timeout_rcu(resv, true, interruptible, 30 * HZ);
@@ -314,7 +314,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, else if (lret == 0) return -EBUSY;
spin_lock(&ttm_glob.lru_lock);
if (unlock_resv && !dma_resv_trylock(bo->base.resv)) { /* * We raced, and lost, someone else holds the reservation now,spin_lock(&bo->bdev->lru_lock);
@@ -324,7 +324,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, * delayed destruction would succeed, so just return success * here. */
spin_unlock(&ttm_glob.lru_lock);
} ret = 0;spin_unlock(&bo->bdev->lru_lock); return 0;
@@ -333,13 +333,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, if (ret || unlikely(list_empty(&bo->ddestroy))) { if (unlock_resv) dma_resv_unlock(bo->base.resv);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
return ret; }
ttm_bo_del_from_lru(bo); list_del_init(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv)
@@ -356,13 +356,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, */ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) {
struct ttm_global *glob = &ttm_glob; struct list_head removed; bool empty;
INIT_LIST_HEAD(&removed);
spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); while (!list_empty(&bdev->ddestroy)) { struct ttm_buffer_object *bo;
@@ -373,24 +372,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) continue;
if (remove_all || bo->base.resv != &bo->base._resv) {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock); dma_resv_lock(bo->base.resv, NULL);
spin_lock(&glob->lru_lock);
spin_lock(&bdev->lru_lock); ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else if (dma_resv_trylock(bo->base.resv)) { ttm_bo_cleanup_refs(bo, false, !remove_all, true); } else {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
}
ttm_bo_put(bo);
spin_lock(&glob->lru_lock);
} list_splice_tail(&removed, &bdev->ddestroy); empty = list_empty(&bdev->ddestroy);spin_lock(&bdev->lru_lock);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
return empty; }
@@ -425,7 +424,7 @@ static void ttm_bo_release(struct kref *kref) ttm_bo_flush_all_fences(bo); bo->deleted = true;
spin_lock(&ttm_glob.lru_lock);
spin_lock(&bo->bdev->lru_lock);
/*
- Make pinned bos immediately available to
@@ -442,17 +441,17 @@ static void ttm_bo_release(struct kref *kref)
kref_init(&bo->kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
schedule_delayed_work(&bdev->wq, ((HZ / 100) < 1) ? 1 : HZ / 100); return; }
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_del_from_lru(bo); list_del(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo); dma_resv_unlock(bo->base.resv);
@@ -626,7 +625,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, unsigned i; int ret;
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { list_for_each_entry(bo, &man->lru[i], lru) { bool busy;
@@ -663,7 +662,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, if (!bo) { if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) busy_bo = NULL;
spin_unlock(&ttm_glob.lru_lock);
ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); if (busy_bo) ttm_bo_put(busy_bo);spin_unlock(&bo->bdev->lru_lock);
@@ -677,7 +676,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, return ret; }
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
ret = ttm_bo_evict(bo, ctx); if (locked)
@@ -777,10 +776,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo, mem->mem_type = place->mem_type; mem->placement = place->flags;
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
- spin_unlock(&bo->bdev->lru_lock); return 0; }
@@ -1167,7 +1165,6 @@ EXPORT_SYMBOL(ttm_bo_wait); int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) {
- struct ttm_global *glob = &ttm_glob; bool locked; int ret;
@@ -1188,7 +1185,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
ttm_bo_del_from_lru(bo); /* TODO: Cleanup the locking */
- spin_unlock(&glob->lru_lock);
spin_unlock(&bo->bdev->lru_lock);
/*
- Move to system cached
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 2096a0fd9c35..85f6975d9872 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -81,7 +81,6 @@ static int ttm_global_init(void) ttm_pool_mgr_init(num_pages * 50 / 100); ttm_tt_mgr_init();
spin_lock_init(&glob->lru_lock); glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
if (unlikely(glob->dummy_read_page == NULL)) {
@@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout); long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) {
struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; struct ttm_buffer_object *bo; unsigned i, j; int ret;
spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { man = ttm_manager_type(bdev, i); if (!man || !man->use_tt)
@@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, } } }
- spin_unlock(&glob->lru_lock);
- spin_unlock(&bdev->lru_lock); return 0; } EXPORT_SYMBOL(ttm_device_swapout);
@@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
bdev->vma_manager = vma_manager; INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
- spin_lock_init(&bdev->lru_lock); INIT_LIST_HEAD(&bdev->ddestroy); bdev->dev_mapping = mapping; mutex_lock(&ttm_global_mutex);
@@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
void ttm_device_fini(struct ttm_device *bdev) {
- struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; unsigned i;
@@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev) if (ttm_bo_delayed_delete(bdev, true)) pr_debug("Delayed destroy list was clean\n");
- spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) if (list_empty(&man->lru[0])) pr_debug("Swap list %d was clean\n", i);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
ttm_pool_fini(&bdev->pool); ttm_global_release();
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 690ab97d52b7..071c48d672c6 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, if (list_empty(list)) return;
spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
dma_resv_unlock(bo->base.resv); }ttm_bo_move_to_lru_tail_unlocked(bo);
spin_unlock(&ttm_glob.lru_lock);
if (ticket) ww_acquire_fini(ticket);
@@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, if (list_empty(list)) return;
- spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, dma_resv_add_shared_fence(bo->base.resv, fence); else dma_resv_add_excl_fence(bo->base.resv, fence);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
dma_resv_unlock(bo->base.resv); }ttm_bo_move_to_lru_tail_unlocked(bo);
- spin_unlock(&ttm_glob.lru_lock); if (ticket) ww_acquire_fini(ticket); }
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index ed1672a9f332..04f2eef653ab 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, .no_wait_gpu = false, .force_alloc = true };
- struct ttm_global *glob = &ttm_glob; struct dma_fence *fence; int ret; unsigned i;
@@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, * Can't use standard list traversal since we're unlocking. */
- spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { while (!list_empty(&man->lru[i])) {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock); ret = ttm_mem_evict_first(bdev, man, NULL, &ctx, NULL); if (ret) return ret;
spin_lock(&glob->lru_lock);
} }spin_lock(&bdev->lru_lock);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
spin_lock(&man->move_lock); fence = dma_fence_get(man->move);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index d007feef7676..dbccac957f8f 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, static inline void ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo) {
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); }
static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index cda6efb4c34b..bae56d29e8ff 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -56,7 +56,6 @@ extern struct ttm_global { */
struct page *dummy_read_page;
spinlock_t lru_lock;
/**
- Protected by ttm_global_mutex.
@@ -277,8 +276,9 @@ struct ttm_device { struct ttm_pool pool;
/*
* Protected by the global:lru lock.
* Protection for the per manager LRU and ddestroy lists.
*/
spinlock_t lru_lock; struct list_head ddestroy;
/*
-- 2.25.1
Hi Christian,
On 3/18/21 1:47 PM, Christian König wrote: <snip>
/**
- ttm_bo_uses_embedded_gem_object - check if the given bo uses the
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index 035bbc044a3b..6a0b267d4fe6 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -297,6 +297,8 @@ struct ttm_device { struct delayed_work wq; };
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
There is a typo here, long -> int.
Thanks,
Nirmoy
- static inline struct ttm_resource_manager * ttm_manager_type(struct ttm_device *bdev, int mem_type) {
Am 18.03.21 um 15:43 schrieb Nirmoy:
Hi Christian,
On 3/18/21 1:47 PM, Christian König wrote:
<snip> > /** > * ttm_bo_uses_embedded_gem_object - check if the given bo uses the > diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h > index 035bbc044a3b..6a0b267d4fe6 100644 > --- a/include/drm/ttm/ttm_device.h > +++ b/include/drm/ttm/ttm_device.h > @@ -297,6 +297,8 @@ struct ttm_device { > struct delayed_work wq; > }; > +long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t > gfp_flags);
There is a typo here, long -> int.
Ah, yes. I missed to add the change to the header file.
Thanks for pointing that out, Christian.
Thanks,
Nirmoy
static inline struct ttm_resource_manager * ttm_manager_type(struct ttm_device *bdev, int mem_type) {
On 3/18/21 4:26 PM, Christian König wrote:
Am 18.03.21 um 15:43 schrieb Nirmoy:
Hi Christian,
On 3/18/21 1:47 PM, Christian König wrote:
<snip> > /** > * ttm_bo_uses_embedded_gem_object - check if the given bo uses the > diff --git a/include/drm/ttm/ttm_device.h > b/include/drm/ttm/ttm_device.h > index 035bbc044a3b..6a0b267d4fe6 100644 > --- a/include/drm/ttm/ttm_device.h > +++ b/include/drm/ttm/ttm_device.h > @@ -297,6 +297,8 @@ struct ttm_device { > struct delayed_work wq; > }; > +long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t > gfp_flags);
There is a typo here, long -> int.
Ah, yes. I missed to add the change to the header file.
I have tested the patch series on qxl and vmwgfx.
With that typo fixed, the series is Tested-by: Nirmoy Das nirmoy.das@amd.com
Thanks for pointing that out, Christian.
Thanks,
Nirmoy
static inline struct ttm_resource_manager * ttm_manager_type(struct ttm_device *bdev, int mem_type) {
Hi "Christian,
I love your patch! Yet something to improve:
[auto build test ERROR on drm-tip/drm-tip] [also build test ERROR on next-20210318] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapou... base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: i386-randconfig-m021-20210318 (attached as .config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 reproduce (this is a W=1 build): # https://github.com/0day-ci/linux/commit/a454d56ea061b53d24a62a700743e4508dd6... git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848 git checkout a454d56ea061b53d24a62a700743e4508dd6c9b1 # save the attached .config to linux build tree make W=1 ARCH=i386
If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot lkp@intel.com
All errors (new ones prefixed by >>):
drivers/gpu/drm/ttm/ttm_device.c:109:5: error: conflicting types for 'ttm_global_swapout'
109 | int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) | ^~~~~~~~~~~~~~~~~~ In file included from drivers/gpu/drm/ttm/ttm_device.c:32: include/drm/ttm/ttm_device.h:300:6: note: previous declaration of 'ttm_global_swapout' was here 300 | long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); | ^~~~~~~~~~~~~~~~~~ In file included from include/linux/linkage.h:7, from include/linux/kernel.h:7, from include/asm-generic/bug.h:20, from arch/x86/include/asm/bug.h:93, from include/linux/bug.h:5, from include/linux/mmdebug.h:5, from include/linux/mm.h:9, from drivers/gpu/drm/ttm/ttm_device.c:30: drivers/gpu/drm/ttm/ttm_device.c:132:15: error: conflicting types for 'ttm_global_swapout' 132 | EXPORT_SYMBOL(ttm_global_swapout); | ^~~~~~~~~~~~~~~~~~ include/linux/export.h:98:21: note: in definition of macro '___EXPORT_SYMBOL' 98 | extern typeof(sym) sym; \ | ^~~ include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL' 155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "") | ^~~~~~~~~~~~~~~ include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL' 158 | #define EXPORT_SYMBOL(sym) _EXPORT_SYMBOL(sym, "") | ^~~~~~~~~~~~~~ drivers/gpu/drm/ttm/ttm_device.c:132:1: note: in expansion of macro 'EXPORT_SYMBOL' 132 | EXPORT_SYMBOL(ttm_global_swapout); | ^~~~~~~~~~~~~ In file included from drivers/gpu/drm/ttm/ttm_device.c:32: include/drm/ttm/ttm_device.h:300:6: note: previous declaration of 'ttm_global_swapout' was here 300 | long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); | ^~~~~~~~~~~~~~~~~~
vim +/ttm_global_swapout +109 drivers/gpu/drm/ttm/ttm_device.c
104 105 /** 106 * A buffer object shrink method that tries to swap out the first 107 * buffer object on the global::swap_lru list. 108 */
109 int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
110 { 111 struct ttm_global *glob = &ttm_glob; 112 struct ttm_buffer_object *bo; 113 unsigned i; 114 int ret; 115 116 spin_lock(&glob->lru_lock); 117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { 118 list_for_each_entry(bo, &glob->swap_lru[i], swap) { 119 uint32_t num_pages = bo->ttm->num_pages; 120 121 ret = ttm_bo_swapout(bo, ctx, gfp_flags); 122 /* ttm_bo_swapout has dropped the lru_lock */ 123 if (!ret) 124 return num_pages; 125 if (ret != -EBUSY) 126 return ret; 127 } 128 } 129 spin_unlock(&glob->lru_lock); 130 return 0; 131 } 132 EXPORT_SYMBOL(ttm_global_swapout); 133
--- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Hi "Christian,
I love your patch! Yet something to improve:
[auto build test ERROR on drm-tip/drm-tip] [also build test ERROR on next-20210318] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next linus/master v5.12-rc3] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapou... base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: x86_64-randconfig-a005-20210318 (attached as .config) compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 6db3ab2903f42712f44000afb5aa467efbd25f35) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install x86_64 cross compiling tool for clang build # apt-get install binutils-x86-64-linux-gnu # https://github.com/0day-ci/linux/commit/a454d56ea061b53d24a62a700743e4508dd6... git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848 git checkout a454d56ea061b53d24a62a700743e4508dd6c9b1 # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64
If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot lkp@intel.com
All errors (new ones prefixed by >>):
drivers/gpu/drm/ttm/ttm_device.c:109:5: error: conflicting types for 'ttm_global_swapout'
int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) ^ include/drm/ttm/ttm_device.h:300:6: note: previous declaration is here long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); ^ 1 error generated.
vim +/ttm_global_swapout +109 drivers/gpu/drm/ttm/ttm_device.c
104 105 /** 106 * A buffer object shrink method that tries to swap out the first 107 * buffer object on the global::swap_lru list. 108 */
109 int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
110 { 111 struct ttm_global *glob = &ttm_glob; 112 struct ttm_buffer_object *bo; 113 unsigned i; 114 int ret; 115 116 spin_lock(&glob->lru_lock); 117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { 118 list_for_each_entry(bo, &glob->swap_lru[i], swap) { 119 uint32_t num_pages = bo->ttm->num_pages; 120 121 ret = ttm_bo_swapout(bo, ctx, gfp_flags); 122 /* ttm_bo_swapout has dropped the lru_lock */ 123 if (!ret) 124 return num_pages; 125 if (ret != -EBUSY) 126 return ret; 127 } 128 } 129 spin_unlock(&glob->lru_lock); 130 return 0; 131 } 132 EXPORT_SYMBOL(ttm_global_swapout); 133
--- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
General question for the audience: Is there any way to silence the build bot here?
This is a well known false positive.
Regards, Christian.
Am 18.03.21 um 19:13 schrieb kernel test robot:
Hi "Christian,
I love your patch! Yet something to improve:
[auto build test ERROR on drm-tip/drm-tip] [also build test ERROR on next-20210318] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next linus/master v5.12-rc3] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapou... base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: x86_64-randconfig-a005-20210318 (attached as .config) compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 6db3ab2903f42712f44000afb5aa467efbd25f35) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install x86_64 cross compiling tool for clang build # apt-get install binutils-x86-64-linux-gnu # https://github.com/0day-ci/linux/commit/a454d56ea061b53d24a62a700743e4508dd6... git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848 git checkout a454d56ea061b53d24a62a700743e4508dd6c9b1 # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64
If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot lkp@intel.com
All errors (new ones prefixed by >>):
drivers/gpu/drm/ttm/ttm_device.c:109:5: error: conflicting types for 'ttm_global_swapout'
int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) ^ include/drm/ttm/ttm_device.h:300:6: note: previous declaration is here long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); ^ 1 error generated.
vim +/ttm_global_swapout +109 drivers/gpu/drm/ttm/ttm_device.c
104 105 /** 106 * A buffer object shrink method that tries to swap out the first 107 * buffer object on the global::swap_lru list. 108 */
109 int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
110 { 111 struct ttm_global *glob = &ttm_glob; 112 struct ttm_buffer_object *bo; 113 unsigned i; 114 int ret; 115 116 spin_lock(&glob->lru_lock); 117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { 118 list_for_each_entry(bo, &glob->swap_lru[i], swap) { 119 uint32_t num_pages = bo->ttm->num_pages; 120 121 ret = ttm_bo_swapout(bo, ctx, gfp_flags); 122 /* ttm_bo_swapout has dropped the lru_lock */ 123 if (!ret) 124 return num_pages; 125 if (ret != -EBUSY) 126 return ret; 127 } 128 } 129 spin_unlock(&glob->lru_lock); 130 return 0; 131 } 132 EXPORT_SYMBOL(ttm_global_swapout); 133
0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
On Thu, Mar 18, 2021 at 08:47:17PM +0800, Christian K�nig wrote:
Move the iteration of the global lru into the new function ttm_global_swapout() and use that instead in drivers.
v2: consistently return int
Signed-off-by: Christian K?nig christian.koenig@amd.com
Reviewed-by: Huang Rui ray.huang@amd.com
drivers/gpu/drm/ttm/ttm_bo.c | 57 ++++++++--------------------- drivers/gpu/drm/ttm/ttm_device.c | 29 +++++++++++++++ drivers/gpu/drm/ttm/ttm_tt.c | 2 +- drivers/gpu/drm/vmwgfx/ttm_memory.c | 3 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- include/drm/ttm/ttm_bo_api.h | 3 +- include/drm/ttm/ttm_device.h | 2 + 7 files changed, 53 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 3c23e863a3f0..66e00b404ec3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1193,56 +1193,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, } EXPORT_SYMBOL(ttm_bo_wait);
-/*
- A buffer object shrink method that tries to swap out the first
- buffer object on the bo_global::swap_lru list.
- */
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) +int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
gfp_t gfp_flags)
{ struct ttm_global *glob = &ttm_glob;
- struct ttm_buffer_object *bo;
- int ret = -EBUSY; bool locked;
- unsigned i;
- spin_lock(&glob->lru_lock);
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
list_for_each_entry(bo, &glob->swap_lru[i], swap) {
if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
NULL))
continue;
if (!ttm_bo_get_unless_zero(bo)) {
if (locked)
dma_resv_unlock(bo->base.resv);
continue;
}
- int ret;
ret = 0;
break;
}
if (!ret)
break;
- }
- if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
return -EBUSY;
- if (ret) {
spin_unlock(&glob->lru_lock);
return ret;
if (!ttm_bo_get_unless_zero(bo)) {
if (locked)
dma_resv_unlock(bo->base.resv);
return -EBUSY;
}
if (bo->deleted) {
ret = ttm_bo_cleanup_refs(bo, false, false, locked);
ttm_bo_put(bo);ttm_bo_cleanup_refs(bo, false, false, locked);
return ret;
return 0;
}
ttm_bo_del_from_lru(bo);
/* TODO: Cleanup the locking */ spin_unlock(&glob->lru_lock);
- /**
- /*
*/
- Move to system cached
- if (bo->mem.mem_type != TTM_PL_SYSTEM) { struct ttm_operation_ctx ctx = { false, false }; struct ttm_resource evict_mem;
@@ -1262,29 +1241,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) } }
- /**
- /*
*/
- Make sure BO is idle.
ret = ttm_bo_wait(bo, false, false); if (unlikely(ret != 0)) goto out;
ttm_bo_unmap_virtual(bo);
/**
- /*
*/
- Swap out. Buffer will be swapped in again as soon as
- anyone tries to access a ttm page.
if (bo->bdev->funcs->swap_notify) bo->bdev->funcs->swap_notify(bo);
ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
out:
- /**
*
- /*
*/
- Unreserve without putting on LRU to avoid swapping out an
- already swapped buffer.
@@ -1293,7 +1269,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) ttm_bo_put(bo); return ret; } -EXPORT_SYMBOL(ttm_bo_swapout);
void ttm_bo_tt_destroy(struct ttm_buffer_object *bo) { diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 95e1b7b1f2e6..b1424189fdfb 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -102,6 +102,35 @@ static int ttm_global_init(void) return ret; }
+/**
- A buffer object shrink method that tries to swap out the first
- buffer object on the global::swap_lru list.
- */
+int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) +{
- struct ttm_global *glob = &ttm_glob;
- struct ttm_buffer_object *bo;
- unsigned i;
- int ret;
- spin_lock(&glob->lru_lock);
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
list_for_each_entry(bo, &glob->swap_lru[i], swap) {
uint32_t num_pages = bo->ttm->num_pages;
ret = ttm_bo_swapout(bo, ctx, gfp_flags);
/* ttm_bo_swapout has dropped the lru_lock */
if (!ret)
return num_pages;
if (ret != -EBUSY)
return ret;
}
- }
- spin_unlock(&glob->lru_lock);
- return 0;
+} +EXPORT_SYMBOL(ttm_global_swapout);
static void ttm_init_sysman(struct ttm_device *bdev) { struct ttm_resource_manager *man = &bdev->sysman; diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 2f0833c98d2c..95b5cff25f4c 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink, }; int ret;
- ret = ttm_bo_swapout(&ctx, GFP_NOFS);
- ret = ttm_global_swapout(&ctx, GFP_NOFS); return ret < 0 ? SHRINK_EMPTY : ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c index e972af07d029..104b95a8c7a2 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_memory.c +++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c @@ -38,6 +38,7 @@
#include <drm/drm_device.h> #include <drm/drm_file.h> +#include <drm/ttm/ttm_device.h>
#include "ttm_memory.h"
@@ -277,7 +278,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
while (ttm_zones_above_swap_target(glob, from_wq, extra)) { spin_unlock(&glob->lock);
ret = ttm_bo_swapout(ctx, GFP_KERNEL);
spin_lock(&glob->lock); if (unlikely(ret < 0)) break;ret = ttm_global_swapout(ctx, GFP_KERNEL);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 6910111099c8..b991422e156c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev) vmw_execbuf_release_pinned_bo(dev_priv); vmw_resource_evict_all(dev_priv); vmw_release_device_early(dev_priv);
- while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0);
- while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0); if (dev_priv->enable_fb) vmw_fifo_resource_dec(dev_priv); if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 4fb523dfab32..5044ac330858 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -560,7 +560,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp, const char __user *wbuf, char __user *rbuf, size_t count, loff_t *f_pos, bool write);
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); +int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
gfp_t gfp_flags);
/**
- ttm_bo_uses_embedded_gem_object - check if the given bo uses the
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index 035bbc044a3b..6a0b267d4fe6 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -297,6 +297,8 @@ struct ttm_device { struct delayed_work wq; };
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
static inline struct ttm_resource_manager * ttm_manager_type(struct ttm_device *bdev, int mem_type) { -- 2.25.1
dri-devel mailing list dri-devel@lists.freedesktop.org https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.free...
On Thu, 18 Mar 2021 at 12:47, Christian König ckoenig.leichtzumerken@gmail.com wrote:
Move the iteration of the global lru into the new function ttm_global_swapout() and use that instead in drivers.
v2: consistently return int
Signed-off-by: Christian König christian.koenig@amd.com
For the series, Reviewed-by: Matthew Auld matthew.auld@intel.com
dri-devel@lists.freedesktop.org