Move the iteration of the global lru into the new function ttm_global_swapout() and use that instead in drivers.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 57 ++++++++--------------------- drivers/gpu/drm/ttm/ttm_device.c | 29 +++++++++++++++ drivers/gpu/drm/ttm/ttm_tt.c | 2 +- drivers/gpu/drm/vmwgfx/ttm_memory.c | 3 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- include/drm/ttm/ttm_bo_api.h | 3 +- include/drm/ttm/ttm_device.h | 2 + 7 files changed, 53 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index a08dec7281fc..56d2e38af273 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1186,56 +1186,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, } EXPORT_SYMBOL(ttm_bo_wait);
-/* - * A buffer object shrink method that tries to swap out the first - * buffer object on the bo_global::swap_lru list. - */ -int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) +int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, + gfp_t gfp_flags) { struct ttm_global *glob = &ttm_glob; - struct ttm_buffer_object *bo; - int ret = -EBUSY; bool locked; - unsigned i; - - spin_lock(&glob->lru_lock); - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { - list_for_each_entry(bo, &glob->swap_lru[i], swap) { - if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, - NULL)) - continue; - - if (!ttm_bo_get_unless_zero(bo)) { - if (locked) - dma_resv_unlock(bo->base.resv); - continue; - } + int ret;
- ret = 0; - break; - } - if (!ret) - break; - } + if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL)) + return -EBUSY;
- if (ret) { - spin_unlock(&glob->lru_lock); - return ret; + if (!ttm_bo_get_unless_zero(bo)) { + if (locked) + dma_resv_unlock(bo->base.resv); + return -EBUSY; }
if (bo->deleted) { - ret = ttm_bo_cleanup_refs(bo, false, false, locked); + ttm_bo_cleanup_refs(bo, false, false, locked); ttm_bo_put(bo); - return ret; + return 0; }
ttm_bo_del_from_lru(bo); + /* TODO: Cleanup the locking */ spin_unlock(&glob->lru_lock);
- /** + /* * Move to system cached */ - if (bo->mem.mem_type != TTM_PL_SYSTEM) { struct ttm_operation_ctx ctx = { false, false }; struct ttm_resource evict_mem; @@ -1255,29 +1234,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) } }
- /** + /* * Make sure BO is idle. */ - ret = ttm_bo_wait(bo, false, false); if (unlikely(ret != 0)) goto out;
ttm_bo_unmap_virtual(bo);
- /** + /* * Swap out. Buffer will be swapped in again as soon as * anyone tries to access a ttm page. */ - if (bo->bdev->funcs->swap_notify) bo->bdev->funcs->swap_notify(bo);
ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags); out:
- /** - * + /* * Unreserve without putting on LRU to avoid swapping out an * already swapped buffer. */ @@ -1286,7 +1262,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) ttm_bo_put(bo); return ret; } -EXPORT_SYMBOL(ttm_bo_swapout);
void ttm_bo_tt_destroy(struct ttm_buffer_object *bo) { diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 95e1b7b1f2e6..dfc2a7e4e490 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -102,6 +102,35 @@ static int ttm_global_init(void) return ret; }
+/** + * A buffer object shrink method that tries to swap out the first + * buffer object on the global::swap_lru list. + */ +long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) +{ + struct ttm_global *glob = &ttm_glob; + struct ttm_buffer_object *bo; + unsigned i; + int ret; + + spin_lock(&glob->lru_lock); + for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { + list_for_each_entry(bo, &glob->swap_lru[i], swap) { + uint32_t num_pages = bo->ttm->num_pages; + + ret = ttm_bo_swapout(bo, ctx, gfp_flags); + /* ttm_bo_swapout has dropped the lru_lock */ + if (!ret) + return num_pages; + if (ret != -EBUSY) + return ret; + } + } + spin_unlock(&glob->lru_lock); + return 0; +} +EXPORT_SYMBOL(ttm_global_swapout); + static void ttm_init_sysman(struct ttm_device *bdev) { struct ttm_resource_manager *man = &bdev->sysman; diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 2f0833c98d2c..95b5cff25f4c 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink, }; int ret;
- ret = ttm_bo_swapout(&ctx, GFP_NOFS); + ret = ttm_global_swapout(&ctx, GFP_NOFS); return ret < 0 ? SHRINK_EMPTY : ret; }
diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c index e972af07d029..104b95a8c7a2 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_memory.c +++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c @@ -38,6 +38,7 @@
#include <drm/drm_device.h> #include <drm/drm_file.h> +#include <drm/ttm/ttm_device.h>
#include "ttm_memory.h"
@@ -277,7 +278,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
while (ttm_zones_above_swap_target(glob, from_wq, extra)) { spin_unlock(&glob->lock); - ret = ttm_bo_swapout(ctx, GFP_KERNEL); + ret = ttm_global_swapout(ctx, GFP_KERNEL); spin_lock(&glob->lock); if (unlikely(ret < 0)) break; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 6910111099c8..b991422e156c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev) vmw_execbuf_release_pinned_bo(dev_priv); vmw_resource_evict_all(dev_priv); vmw_release_device_early(dev_priv); - while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0); + while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0); if (dev_priv->enable_fb) vmw_fifo_resource_dec(dev_priv); if (atomic_read(&dev_priv->num_fifo_resources) != 0) { diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 4fb523dfab32..5044ac330858 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -560,7 +560,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp, const char __user *wbuf, char __user *rbuf, size_t count, loff_t *f_pos, bool write);
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); +int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, + gfp_t gfp_flags);
/** * ttm_bo_uses_embedded_gem_object - check if the given bo uses the diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index 035bbc044a3b..6a0b267d4fe6 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -297,6 +297,8 @@ struct ttm_device { struct delayed_work wq; };
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); + static inline struct ttm_resource_manager * ttm_manager_type(struct ttm_device *bdev, int mem_type) {
Instead evict round robin from each devices SYSTEM and TT domain.
v2: reorder num_pages access reported by Dan's script
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 33 ++-------------- drivers/gpu/drm/ttm/ttm_bo_util.c | 1 - drivers/gpu/drm/ttm/ttm_device.c | 60 +++++++++++++++++++++-------- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- include/drm/ttm/ttm_bo_api.h | 1 - include/drm/ttm/ttm_bo_driver.h | 1 - include/drm/ttm/ttm_device.h | 7 +--- 7 files changed, 52 insertions(+), 53 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 56d2e38af273..a1be88be357b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -73,7 +73,6 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) { struct ttm_device *bdev = bo->bdev;
- list_del_init(&bo->swap); list_del_init(&bo->lru);
if (bdev->funcs->del_from_lru_notify) @@ -104,16 +103,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
man = ttm_manager_type(bdev, mem->mem_type); list_move_tail(&bo->lru, &man->lru[bo->priority]); - if (man->use_tt && bo->ttm && - !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG | - TTM_PAGE_FLAG_SWAPPED))) { - struct list_head *swap; - - swap = &ttm_glob.swap_lru[bo->priority]; - list_move_tail(&bo->swap, swap); - } else { - list_del_init(&bo->swap); - }
if (bdev->funcs->del_from_lru_notify) bdev->funcs->del_from_lru_notify(bo); @@ -128,9 +117,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo); break; } - if (bo->ttm && !(bo->ttm->page_flags & - (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED))) - ttm_bo_bulk_move_set_pos(&bulk->swap[bo->priority], bo); } } EXPORT_SYMBOL(ttm_bo_move_to_lru_tail); @@ -168,20 +154,6 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk) list_bulk_move_tail(&man->lru[i], &pos->first->lru, &pos->last->lru); } - - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { - struct ttm_lru_bulk_move_pos *pos = &bulk->swap[i]; - struct list_head *lru; - - if (!pos->first) - continue; - - dma_resv_assert_held(pos->first->base.resv); - dma_resv_assert_held(pos->last->base.resv); - - lru = &ttm_glob.swap_lru[i]; - list_bulk_move_tail(lru, &pos->first->swap, &pos->last->swap); - } } EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
@@ -1058,7 +1030,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, kref_init(&bo->kref); INIT_LIST_HEAD(&bo->lru); INIT_LIST_HEAD(&bo->ddestroy); - INIT_LIST_HEAD(&bo->swap); bo->bdev = bdev; bo->type = type; bo->mem.mem_type = TTM_PL_SYSTEM; @@ -1193,6 +1164,10 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, bool locked; int ret;
+ if (!bo->ttm || bo->ttm->page_flags & (TTM_PAGE_FLAG_SG | + TTM_PAGE_FLAG_SWAPPED)) + return false; + if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL)) return -EBUSY;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 031e5819fec4..a2a17c84ceb3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -303,7 +303,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, atomic_inc(&ttm_glob.bo_count); INIT_LIST_HEAD(&fbo->base.ddestroy); INIT_LIST_HEAD(&fbo->base.lru); - INIT_LIST_HEAD(&fbo->base.swap); fbo->base.moving = NULL; drm_vma_node_reset(&fbo->base.base.vma_node);
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index dfc2a7e4e490..2c280fb1e992 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -67,7 +67,6 @@ static int ttm_global_init(void) unsigned long num_pages; struct sysinfo si; int ret = 0; - unsigned i;
mutex_lock(&ttm_global_mutex); if (++ttm_glob_use_count > 1) @@ -90,8 +89,6 @@ static int ttm_global_init(void) goto out; }
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) - INIT_LIST_HEAD(&glob->swap_lru[i]); INIT_LIST_HEAD(&glob->device_list); atomic_set(&glob->bo_count, 0);
@@ -109,27 +106,60 @@ static int ttm_global_init(void) long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) { struct ttm_global *glob = &ttm_glob; + struct ttm_device *bdev; + int ret = -EBUSY; + + mutex_lock(&ttm_global_mutex); + list_for_each_entry(bdev, &glob->device_list, device_list) { + ret = ttm_device_swapout(bdev, ctx, gfp_flags); + if (ret > 0) { + list_move_tail(&bdev->device_list, &glob->device_list); + break; + } + } + mutex_unlock(&ttm_global_mutex); + return ret; +} +EXPORT_SYMBOL(ttm_global_swapout); + +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, + gfp_t gfp_flags) +{ + struct ttm_global *glob = &ttm_glob; + struct ttm_resource_manager *man; struct ttm_buffer_object *bo; - unsigned i; + unsigned i, j; int ret;
spin_lock(&glob->lru_lock); - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { - list_for_each_entry(bo, &glob->swap_lru[i], swap) { - uint32_t num_pages = bo->ttm->num_pages; - - ret = ttm_bo_swapout(bo, ctx, gfp_flags); - /* ttm_bo_swapout has dropped the lru_lock */ - if (!ret) - return num_pages; - if (ret != -EBUSY) - return ret; + for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { + man = ttm_manager_type(bdev, i); + if (!man || !man->use_tt) + continue; + + for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) { + list_for_each_entry(bo, &man->lru[j], lru) { + long num_pages; + + if (!bo->ttm || + bo->ttm->page_flags & TTM_PAGE_FLAG_SG || + bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) + continue; + + num_pages = bo->ttm->num_pages; + ret = ttm_bo_swapout(bo, ctx, gfp_flags); + /* ttm_bo_swapout has dropped the lru_lock */ + if (!ret) + return num_pages; + if (ret != -EBUSY) + return ret; + } } } spin_unlock(&glob->lru_lock); return 0; } -EXPORT_SYMBOL(ttm_global_swapout); +EXPORT_SYMBOL(ttm_device_swapout);
static void ttm_init_sysman(struct ttm_device *bdev) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index b991422e156c..0e82b0662d9e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev) vmw_execbuf_release_pinned_bo(dev_priv); vmw_resource_evict_all(dev_priv); vmw_release_device_early(dev_priv); - while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0); + while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) == 0); if (dev_priv->enable_fb) vmw_fifo_resource_dec(dev_priv); if (atomic_read(&dev_priv->num_fifo_resources) != 0) { diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 5044ac330858..3587f660e8f4 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -144,7 +144,6 @@ struct ttm_buffer_object {
struct list_head lru; struct list_head ddestroy; - struct list_head swap;
/** * Members protected by a bo reservation. diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 8959c0075cfd..d007feef7676 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -69,7 +69,6 @@ struct ttm_lru_bulk_move_pos { struct ttm_lru_bulk_move { struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY]; struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY]; - struct ttm_lru_bulk_move_pos swap[TTM_MAX_BO_PRIORITY]; };
/* diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index 6a0b267d4fe6..cda6efb4c34b 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -63,11 +63,6 @@ extern struct ttm_global { */ struct list_head device_list;
- /** - * Protected by the lru_lock. - */ - struct list_head swap_lru[TTM_MAX_BO_PRIORITY]; - /** * Internal protection. */ @@ -298,6 +293,8 @@ struct ttm_device { };
long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, + gfp_t gfp_flags);
static inline struct ttm_resource_manager * ttm_manager_type(struct ttm_device *bdev, int mem_type)
Hi "Christian,
I love your patch! Perhaps something to improve:
[auto build test WARNING on drm-tip/drm-tip] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapou... base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: i386-randconfig-s002-20210315 (attached as .config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 reproduce: # apt-get install sparse # sparse version: v0.6.3-277-gc089cd2d-dirty # https://github.com/0day-ci/linux/commit/70ae63f3a85b9791dfcf38034c304aedda12... git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551 git checkout 70ae63f3a85b9791dfcf38034c304aedda122e7b # save the attached .config to linux build tree make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386
If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot lkp@intel.com
"sparse warnings: (new ones prefixed by >>)" drivers/gpu/drm/ttm/ttm_device.c:42:1: sparse: sparse: symbol 'ttm_global_mutex' was not declared. Should it be static? drivers/gpu/drm/ttm/ttm_device.c:43:10: sparse: sparse: symbol 'ttm_glob_use_count' was not declared. Should it be static?
drivers/gpu/drm/ttm/ttm_device.c:125:6: sparse: sparse: context imbalance in 'ttm_device_swapout' - wrong count at exit
vim +/ttm_device_swapout +125 drivers/gpu/drm/ttm/ttm_device.c
124
125 long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
126 gfp_t gfp_flags) 127 { 128 struct ttm_global *glob = &ttm_glob; 129 struct ttm_resource_manager *man; 130 struct ttm_buffer_object *bo; 131 unsigned i, j; 132 int ret; 133 134 spin_lock(&glob->lru_lock); 135 for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { 136 man = ttm_manager_type(bdev, i); 137 if (!man || !man->use_tt) 138 continue; 139 140 for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) { 141 list_for_each_entry(bo, &man->lru[j], lru) { 142 long num_pages; 143 144 if (!bo->ttm || 145 bo->ttm->page_flags & TTM_PAGE_FLAG_SG || 146 bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) 147 continue; 148 149 num_pages = bo->ttm->num_pages; 150 ret = ttm_bo_swapout(bo, ctx, gfp_flags); 151 /* ttm_bo_swapout has dropped the lru_lock */ 152 if (!ret) 153 return num_pages; 154 if (ret != -EBUSY) 155 return ret; 156 } 157 } 158 } 159 spin_unlock(&glob->lru_lock); 160 return 0; 161 } 162 EXPORT_SYMBOL(ttm_device_swapout); 163
--- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
On Mon, 15 Mar 2021 at 16:04, Christian König ckoenig.leichtzumerken@gmail.com wrote:
Instead evict round robin from each devices SYSTEM and TT domain.
v2: reorder num_pages access reported by Dan's script
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/ttm/ttm_bo.c | 33 ++-------------- drivers/gpu/drm/ttm/ttm_bo_util.c | 1 - drivers/gpu/drm/ttm/ttm_device.c | 60 +++++++++++++++++++++-------- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- include/drm/ttm/ttm_bo_api.h | 1 - include/drm/ttm/ttm_bo_driver.h | 1 - include/drm/ttm/ttm_device.h | 7 +--- 7 files changed, 52 insertions(+), 53 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 56d2e38af273..a1be88be357b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -73,7 +73,6 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) { struct ttm_device *bdev = bo->bdev;
list_del_init(&bo->swap); list_del_init(&bo->lru); if (bdev->funcs->del_from_lru_notify)
@@ -104,16 +103,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
man = ttm_manager_type(bdev, mem->mem_type); list_move_tail(&bo->lru, &man->lru[bo->priority]);
if (man->use_tt && bo->ttm &&
!(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
TTM_PAGE_FLAG_SWAPPED))) {
struct list_head *swap;
swap = &ttm_glob.swap_lru[bo->priority];
list_move_tail(&bo->swap, swap);
} else {
list_del_init(&bo->swap);
} if (bdev->funcs->del_from_lru_notify) bdev->funcs->del_from_lru_notify(bo);
@@ -128,9 +117,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo); break; }
if (bo->ttm && !(bo->ttm->page_flags &
(TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED)))
ttm_bo_bulk_move_set_pos(&bulk->swap[bo->priority], bo); }
} EXPORT_SYMBOL(ttm_bo_move_to_lru_tail); @@ -168,20 +154,6 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk) list_bulk_move_tail(&man->lru[i], &pos->first->lru, &pos->last->lru); }
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
struct ttm_lru_bulk_move_pos *pos = &bulk->swap[i];
struct list_head *lru;
if (!pos->first)
continue;
dma_resv_assert_held(pos->first->base.resv);
dma_resv_assert_held(pos->last->base.resv);
lru = &ttm_glob.swap_lru[i];
list_bulk_move_tail(lru, &pos->first->swap, &pos->last->swap);
}
} EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
@@ -1058,7 +1030,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, kref_init(&bo->kref); INIT_LIST_HEAD(&bo->lru); INIT_LIST_HEAD(&bo->ddestroy);
INIT_LIST_HEAD(&bo->swap); bo->bdev = bdev; bo->type = type; bo->mem.mem_type = TTM_PL_SYSTEM;
@@ -1193,6 +1164,10 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, bool locked; int ret;
if (!bo->ttm || bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
TTM_PAGE_FLAG_SWAPPED))
return false;
return 0; ?
Seems inconsistent to return zero here and not drop the lru lock? Or maybe turn this into a programmer error, since the current caller already checks for the above?
if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL)) return -EBUSY;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 031e5819fec4..a2a17c84ceb3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -303,7 +303,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, atomic_inc(&ttm_glob.bo_count); INIT_LIST_HEAD(&fbo->base.ddestroy); INIT_LIST_HEAD(&fbo->base.lru);
INIT_LIST_HEAD(&fbo->base.swap); fbo->base.moving = NULL; drm_vma_node_reset(&fbo->base.base.vma_node);
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index dfc2a7e4e490..2c280fb1e992 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -67,7 +67,6 @@ static int ttm_global_init(void) unsigned long num_pages; struct sysinfo si; int ret = 0;
unsigned i; mutex_lock(&ttm_global_mutex); if (++ttm_glob_use_count > 1)
@@ -90,8 +89,6 @@ static int ttm_global_init(void) goto out; }
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
INIT_LIST_HEAD(&glob->swap_lru[i]); INIT_LIST_HEAD(&glob->device_list); atomic_set(&glob->bo_count, 0);
@@ -109,27 +106,60 @@ static int ttm_global_init(void) long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) { struct ttm_global *glob = &ttm_glob;
struct ttm_device *bdev;
int ret = -EBUSY;
mutex_lock(&ttm_global_mutex);
list_for_each_entry(bdev, &glob->device_list, device_list) {
ret = ttm_device_swapout(bdev, ctx, gfp_flags);
Mixing int and long for num_pages.
Does ttm enforce a maximum page count somewhere for object sizes? Something like INT_MAX, since it doesn't look like ttm is consistently using the same type(unsigned long?) when representing the number of pages for an object?
if (ret > 0) {
list_move_tail(&bdev->device_list, &glob->device_list);
break;
}
}
mutex_unlock(&ttm_global_mutex);
return ret;
+} +EXPORT_SYMBOL(ttm_global_swapout);
+long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
gfp_t gfp_flags)
+{
struct ttm_global *glob = &ttm_glob;
struct ttm_resource_manager *man; struct ttm_buffer_object *bo;
unsigned i;
unsigned i, j; int ret; spin_lock(&glob->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
list_for_each_entry(bo, &glob->swap_lru[i], swap) {
uint32_t num_pages = bo->ttm->num_pages;
ret = ttm_bo_swapout(bo, ctx, gfp_flags);
/* ttm_bo_swapout has dropped the lru_lock */
if (!ret)
return num_pages;
if (ret != -EBUSY)
return ret;
for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
man = ttm_manager_type(bdev, i);
if (!man || !man->use_tt)
continue;
for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
list_for_each_entry(bo, &man->lru[j], lru) {
long num_pages;
if (!bo->ttm ||
bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
continue;
num_pages = bo->ttm->num_pages;
ret = ttm_bo_swapout(bo, ctx, gfp_flags);
/* ttm_bo_swapout has dropped the lru_lock */
if (!ret)
return num_pages;
if (ret != -EBUSY)
return ret;
} } } spin_unlock(&glob->lru_lock); return 0;
} -EXPORT_SYMBOL(ttm_global_swapout); +EXPORT_SYMBOL(ttm_device_swapout);
static void ttm_init_sysman(struct ttm_device *bdev) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index b991422e156c..0e82b0662d9e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev) vmw_execbuf_release_pinned_bo(dev_priv); vmw_resource_evict_all(dev_priv); vmw_release_device_early(dev_priv);
while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) == 0);
Is this the intended behaviour? ttm_device_swapout() still just returns num_pages if it swapped something out. I assume this wants to keep swapping stuff out, until it can't anymore. Or am I missing something?
if (dev_priv->enable_fb) vmw_fifo_resource_dec(dev_priv); if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 5044ac330858..3587f660e8f4 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -144,7 +144,6 @@ struct ttm_buffer_object {
struct list_head lru; struct list_head ddestroy;
struct list_head swap; /** * Members protected by a bo reservation.
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 8959c0075cfd..d007feef7676 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -69,7 +69,6 @@ struct ttm_lru_bulk_move_pos { struct ttm_lru_bulk_move { struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY]; struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
struct ttm_lru_bulk_move_pos swap[TTM_MAX_BO_PRIORITY];
};
/* diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index 6a0b267d4fe6..cda6efb4c34b 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -63,11 +63,6 @@ extern struct ttm_global { */ struct list_head device_list;
/**
* Protected by the lru_lock.
*/
struct list_head swap_lru[TTM_MAX_BO_PRIORITY];
/** * Internal protection. */
@@ -298,6 +293,8 @@ struct ttm_device { };
long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
gfp_t gfp_flags);
static inline struct ttm_resource_manager * ttm_manager_type(struct ttm_device *bdev, int mem_type) -- 2.25.1
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Am 15.03.21 um 19:54 schrieb Matthew Auld:
On Mon, 15 Mar 2021 at 16:04, Christian König ckoenig.leichtzumerken@gmail.com wrote:
[SNIP] @@ -1193,6 +1164,10 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, bool locked; int ret;
if (!bo->ttm || bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
TTM_PAGE_FLAG_SWAPPED))
return false;
return 0; ?
Seems inconsistent to return zero here and not drop the lru lock? Or maybe turn this into a programmer error, since the current caller already checks for the above?
Thanks, that is just an artifact from rebasing and should be removed.
[SNIP]
@@ -109,27 +106,60 @@ static int ttm_global_init(void) long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) { struct ttm_global *glob = &ttm_glob;
struct ttm_device *bdev;
int ret = -EBUSY;
mutex_lock(&ttm_global_mutex);
list_for_each_entry(bdev, &glob->device_list, device_list) {
ret = ttm_device_swapout(bdev, ctx, gfp_flags);
Mixing int and long for num_pages.
Does ttm enforce a maximum page count somewhere for object sizes?
We should use 32 bit values for the number of pages in TTM, even signed values allow for 8TB large BOs.
And I really hope that we can get rid of the BO approach in general before we ever come close to that limit.
Something like INT_MAX, since it doesn't look like ttm is consistently using the same type(unsigned long?) when representing the number of pages for an object?
I should probably add a check for that in the tt code, yes.
[SNIP] static void ttm_init_sysman(struct ttm_device *bdev) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index b991422e156c..0e82b0662d9e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev) vmw_execbuf_release_pinned_bo(dev_priv); vmw_resource_evict_all(dev_priv); vmw_release_device_early(dev_priv);
while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) == 0);
Is this the intended behaviour? ttm_device_swapout() still just returns num_pages if it swapped something out. I assume this wants to keep swapping stuff out, until it can't anymore. Or am I missing something?
Indeed that's a mix up. Thanks for pointing that out.
Christian.
Instead of having a global lock.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++--- drivers/gpu/drm/qxl/qxl_release.c | 5 +-- drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++-------------- drivers/gpu/drm/ttm/ttm_device.c | 12 +++---- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++--- drivers/gpu/drm/ttm/ttm_resource.c | 9 +++-- include/drm/ttm/ttm_bo_driver.h | 4 +-- include/drm/ttm/ttm_device.h | 4 +-- 8 files changed, 43 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9d19078246c8..ae18c0e32347 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm_bo_base *bo_base;
if (vm->bulk_moveable) { - spin_lock(&ttm_glob.lru_lock); + spin_lock(&adev->mman.bdev.lru_lock); ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&adev->mman.bdev.lru_lock); return; }
memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
- spin_lock(&ttm_glob.lru_lock); + spin_lock(&adev->mman.bdev.lru_lock); list_for_each_entry(bo_base, &vm->idle, vm_status) { struct amdgpu_bo *bo = bo_base->bo;
@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, &bo->shadow->tbo.mem, &vm->lru_bulk_move); } - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&adev->mman.bdev.lru_lock);
vm->bulk_moveable = true; } diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index f5845c96d414..b19f2f00b215 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) release->id | 0xf0000000, release->base.seqno); trace_dma_fence_emit(&release->base);
- spin_lock(&ttm_glob.lru_lock); - list_for_each_entry(entry, &release->bos, head) { bo = entry->bo;
dma_resv_add_shared_fence(bo->base.resv, &release->base); - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); + ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv); } - spin_unlock(&ttm_glob.lru_lock); ww_acquire_fini(&release->ticket); }
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index a1be88be357b..a8103c8718a3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) * reference it any more. The only tricky case is the trylock on * the resv object while holding the lru_lock. */ - spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); bo->base.resv = &bo->base._resv; - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); }
return r; @@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (unlock_resv) dma_resv_unlock(bo->base.resv); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock);
lret = dma_resv_wait_timeout_rcu(resv, true, interruptible, 30 * HZ); @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, else if (lret == 0) return -EBUSY;
- spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); if (unlock_resv && !dma_resv_trylock(bo->base.resv)) { /* * We raced, and lost, someone else holds the reservation now, @@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, * delayed destruction would succeed, so just return success * here. */ - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); return 0; } ret = 0; @@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, if (ret || unlikely(list_empty(&bo->ddestroy))) { if (unlock_resv) dma_resv_unlock(bo->base.resv); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); return ret; }
ttm_bo_del_from_lru(bo); list_del_init(&bo->ddestroy); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv) @@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, */ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) { - struct ttm_global *glob = &ttm_glob; struct list_head removed; bool empty;
INIT_LIST_HEAD(&removed);
- spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); while (!list_empty(&bdev->ddestroy)) { struct ttm_buffer_object *bo;
@@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) continue;
if (remove_all || bo->base.resv != &bo->base._resv) { - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock); dma_resv_lock(bo->base.resv, NULL);
- spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else if (dma_resv_trylock(bo->base.resv)) { ttm_bo_cleanup_refs(bo, false, !remove_all, true); } else { - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock); }
ttm_bo_put(bo); - spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); } list_splice_tail(&removed, &bdev->ddestroy); empty = list_empty(&bdev->ddestroy); - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock);
return empty; } @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref) ttm_bo_flush_all_fences(bo); bo->deleted = true;
- spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock);
/* * Make pinned bos immediately available to @@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
kref_init(&bo->kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock);
schedule_delayed_work(&bdev->wq, ((HZ / 100) < 1) ? 1 : HZ / 100); return; }
- spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); ttm_bo_del_from_lru(bo); list_del(&bo->ddestroy); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo); dma_resv_unlock(bo->base.resv); @@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, unsigned i; int ret;
- spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { list_for_each_entry(bo, &man->lru[i], lru) { bool busy; @@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, if (!bo) { if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) busy_bo = NULL; - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); if (busy_bo) ttm_bo_put(busy_bo); @@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, return ret; }
- spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock);
ret = ttm_bo_evict(bo, ctx); if (locked) @@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo, mem->mem_type = place->mem_type; mem->placement = place->flags;
- spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, mem, NULL); - spin_unlock(&ttm_glob.lru_lock); - + spin_unlock(&bo->bdev->lru_lock); return 0; }
@@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait); int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) { - struct ttm_global *glob = &ttm_glob; bool locked; int ret;
@@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
ttm_bo_del_from_lru(bo); /* TODO: Cleanup the locking */ - spin_unlock(&glob->lru_lock); + spin_unlock(&bo->bdev->lru_lock);
/* * Move to system cached diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 2c280fb1e992..924d892109e8 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -81,7 +81,6 @@ static int ttm_global_init(void) ttm_pool_mgr_init(num_pages * 50 / 100); ttm_tt_mgr_init();
- spin_lock_init(&glob->lru_lock); glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
if (unlikely(glob->dummy_read_page == NULL)) { @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout); long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) { - struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; struct ttm_buffer_object *bo; unsigned i, j; int ret;
- spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { man = ttm_manager_type(bdev, i); if (!man || !man->use_tt) @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, } } } - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock); return 0; } EXPORT_SYMBOL(ttm_device_swapout); @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
bdev->vma_manager = vma_manager; INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue); + spin_lock_init(&bdev->lru_lock); INIT_LIST_HEAD(&bdev->ddestroy); bdev->dev_mapping = mapping; mutex_lock(&ttm_global_mutex); @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
void ttm_device_fini(struct ttm_device *bdev) { - struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; unsigned i;
@@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev) if (ttm_bo_delayed_delete(bdev, true)) pr_debug("Delayed destroy list was clean\n");
- spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) if (list_empty(&man->lru[0])) pr_debug("Swap list %d was clean\n", i); - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock);
ttm_pool_fini(&bdev->pool); ttm_global_release(); diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 690ab97d52b7..071c48d672c6 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, if (list_empty(list)) return;
- spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
- ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); + ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv); } - spin_unlock(&ttm_glob.lru_lock);
if (ticket) ww_acquire_fini(ticket); @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, if (list_empty(list)) return;
- spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, dma_resv_add_shared_fence(bo->base.resv, fence); else dma_resv_add_excl_fence(bo->base.resv, fence); - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); + ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv); } - spin_unlock(&ttm_glob.lru_lock); if (ticket) ww_acquire_fini(ticket); } diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index ed1672a9f332..04f2eef653ab 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, .no_wait_gpu = false, .force_alloc = true }; - struct ttm_global *glob = &ttm_glob; struct dma_fence *fence; int ret; unsigned i; @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, * Can't use standard list traversal since we're unlocking. */
- spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { while (!list_empty(&man->lru[i])) { - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock); ret = ttm_mem_evict_first(bdev, man, NULL, &ctx, NULL); if (ret) return ret; - spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); } } - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock);
spin_lock(&man->move_lock); fence = dma_fence_get(man->move); diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index d007feef7676..dbccac957f8f 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, static inline void ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo) { - spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); }
static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo, diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index cda6efb4c34b..bae56d29e8ff 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -56,7 +56,6 @@ extern struct ttm_global { */
struct page *dummy_read_page; - spinlock_t lru_lock;
/** * Protected by ttm_global_mutex. @@ -277,8 +276,9 @@ struct ttm_device { struct ttm_pool pool;
/* - * Protected by the global:lru lock. + * Protection for the per manager LRU and ddestroy lists. */ + spinlock_t lru_lock; struct list_head ddestroy;
/*
Hi "Christian,
I love your patch! Perhaps something to improve:
[auto build test WARNING on drm-tip/drm-tip] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapou... base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: x86_64-randconfig-m001-20210315 (attached as .config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot lkp@intel.com
smatch warnings: drivers/gpu/drm/ttm/ttm_device.c:158 ttm_device_swapout() warn: inconsistent returns '&bdev->lru_lock'. drivers/gpu/drm/ttm/ttm_bo.c:665 ttm_mem_evict_first() error: we previously assumed 'bo' could be null (see line 662)
vim +158 drivers/gpu/drm/ttm/ttm_device.c
70ae63f3a85b97 Christian König 2021-03-15 123 70ae63f3a85b97 Christian König 2021-03-15 124 long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, 70ae63f3a85b97 Christian König 2021-03-15 125 gfp_t gfp_flags) 70ae63f3a85b97 Christian König 2021-03-15 126 { 70ae63f3a85b97 Christian König 2021-03-15 127 struct ttm_resource_manager *man; 824dca26fe3958 Christian König 2021-03-15 128 struct ttm_buffer_object *bo; 70ae63f3a85b97 Christian König 2021-03-15 129 unsigned i, j; 824dca26fe3958 Christian König 2021-03-15 130 int ret; 824dca26fe3958 Christian König 2021-03-15 131 1ed8d8fc515b90 Christian König 2021-03-15 132 spin_lock(&bdev->lru_lock); 70ae63f3a85b97 Christian König 2021-03-15 133 for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { 70ae63f3a85b97 Christian König 2021-03-15 134 man = ttm_manager_type(bdev, i); 70ae63f3a85b97 Christian König 2021-03-15 135 if (!man || !man->use_tt) 70ae63f3a85b97 Christian König 2021-03-15 136 continue; 70ae63f3a85b97 Christian König 2021-03-15 137 70ae63f3a85b97 Christian König 2021-03-15 138 for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) { 70ae63f3a85b97 Christian König 2021-03-15 139 list_for_each_entry(bo, &man->lru[j], lru) { 70ae63f3a85b97 Christian König 2021-03-15 140 long num_pages; 824dca26fe3958 Christian König 2021-03-15 141 70ae63f3a85b97 Christian König 2021-03-15 142 if (!bo->ttm || 70ae63f3a85b97 Christian König 2021-03-15 143 bo->ttm->page_flags & TTM_PAGE_FLAG_SG || 70ae63f3a85b97 Christian König 2021-03-15 144 bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) 70ae63f3a85b97 Christian König 2021-03-15 145 continue; 70ae63f3a85b97 Christian König 2021-03-15 146 70ae63f3a85b97 Christian König 2021-03-15 147 num_pages = bo->ttm->num_pages; 824dca26fe3958 Christian König 2021-03-15 148 ret = ttm_bo_swapout(bo, ctx, gfp_flags); 824dca26fe3958 Christian König 2021-03-15 149 /* ttm_bo_swapout has dropped the lru_lock */ 824dca26fe3958 Christian König 2021-03-15 150 if (!ret) 824dca26fe3958 Christian König 2021-03-15 151 return num_pages; 824dca26fe3958 Christian König 2021-03-15 152 if (ret != -EBUSY) 824dca26fe3958 Christian König 2021-03-15 153 return ret; 824dca26fe3958 Christian König 2021-03-15 154 } 824dca26fe3958 Christian König 2021-03-15 155 } 70ae63f3a85b97 Christian König 2021-03-15 156 } 1ed8d8fc515b90 Christian König 2021-03-15 157 spin_unlock(&bdev->lru_lock); 824dca26fe3958 Christian König 2021-03-15 @158 return 0; 824dca26fe3958 Christian König 2021-03-15 159 } 70ae63f3a85b97 Christian König 2021-03-15 160 EXPORT_SYMBOL(ttm_device_swapout); 824dca26fe3958 Christian König 2021-03-15 161
--- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote:
Instead of having a global lock.
Signed-off-by: Christian König christian.koenig@amd.com
I guess per zone lru lock is a lot more work since then we need to handle ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm lingo. -Daniel
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++--- drivers/gpu/drm/qxl/qxl_release.c | 5 +-- drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++-------------- drivers/gpu/drm/ttm/ttm_device.c | 12 +++---- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++--- drivers/gpu/drm/ttm/ttm_resource.c | 9 +++-- include/drm/ttm/ttm_bo_driver.h | 4 +-- include/drm/ttm/ttm_device.h | 4 +-- 8 files changed, 43 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9d19078246c8..ae18c0e32347 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm_bo_base *bo_base;
if (vm->bulk_moveable) {
spin_lock(&ttm_glob.lru_lock);
ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);spin_lock(&adev->mman.bdev.lru_lock);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&adev->mman.bdev.lru_lock);
return; }
memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&adev->mman.bdev.lru_lock); list_for_each_entry(bo_base, &vm->idle, vm_status) { struct amdgpu_bo *bo = bo_base->bo;
@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, &bo->shadow->tbo.mem, &vm->lru_bulk_move); }
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&adev->mman.bdev.lru_lock);
vm->bulk_moveable = true;
} diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index f5845c96d414..b19f2f00b215 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) release->id | 0xf0000000, release->base.seqno); trace_dma_fence_emit(&release->base);
spin_lock(&ttm_glob.lru_lock);
list_for_each_entry(entry, &release->bos, head) { bo = entry->bo;
dma_resv_add_shared_fence(bo->base.resv, &release->base);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
dma_resv_unlock(bo->base.resv); }ttm_bo_move_to_lru_tail_unlocked(bo);
- spin_unlock(&ttm_glob.lru_lock); ww_acquire_fini(&release->ticket);
}
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index a1be88be357b..a8103c8718a3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) * reference it any more. The only tricky case is the trylock on * the resv object while holding the lru_lock. */
spin_lock(&ttm_glob.lru_lock);
bo->base.resv = &bo->base._resv;spin_lock(&bo->bdev->lru_lock);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
}
return r;
@@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (unlock_resv) dma_resv_unlock(bo->base.resv);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
lret = dma_resv_wait_timeout_rcu(resv, true, interruptible, 30 * HZ);
@@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, else if (lret == 0) return -EBUSY;
spin_lock(&ttm_glob.lru_lock);
if (unlock_resv && !dma_resv_trylock(bo->base.resv)) { /* * We raced, and lost, someone else holds the reservation now,spin_lock(&bo->bdev->lru_lock);
@@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, * delayed destruction would succeed, so just return success * here. */
spin_unlock(&ttm_glob.lru_lock);
} ret = 0;spin_unlock(&bo->bdev->lru_lock); return 0;
@@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, if (ret || unlikely(list_empty(&bo->ddestroy))) { if (unlock_resv) dma_resv_unlock(bo->base.resv);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
return ret; }
ttm_bo_del_from_lru(bo); list_del_init(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv)
@@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, */ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) {
struct ttm_global *glob = &ttm_glob; struct list_head removed; bool empty;
INIT_LIST_HEAD(&removed);
spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); while (!list_empty(&bdev->ddestroy)) { struct ttm_buffer_object *bo;
@@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) continue;
if (remove_all || bo->base.resv != &bo->base._resv) {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock); dma_resv_lock(bo->base.resv, NULL);
spin_lock(&glob->lru_lock);
spin_lock(&bdev->lru_lock); ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else if (dma_resv_trylock(bo->base.resv)) { ttm_bo_cleanup_refs(bo, false, !remove_all, true); } else {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
}
ttm_bo_put(bo);
spin_lock(&glob->lru_lock);
} list_splice_tail(&removed, &bdev->ddestroy); empty = list_empty(&bdev->ddestroy);spin_lock(&bdev->lru_lock);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
return empty;
} @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref) ttm_bo_flush_all_fences(bo); bo->deleted = true;
spin_lock(&ttm_glob.lru_lock);
spin_lock(&bo->bdev->lru_lock);
/*
- Make pinned bos immediately available to
@@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
kref_init(&bo->kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
schedule_delayed_work(&bdev->wq, ((HZ / 100) < 1) ? 1 : HZ / 100); return; }
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_del_from_lru(bo); list_del(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo); dma_resv_unlock(bo->base.resv);
@@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, unsigned i; int ret;
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { list_for_each_entry(bo, &man->lru[i], lru) { bool busy;
@@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, if (!bo) { if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) busy_bo = NULL;
spin_unlock(&ttm_glob.lru_lock);
ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); if (busy_bo) ttm_bo_put(busy_bo);spin_unlock(&bo->bdev->lru_lock);
@@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, return ret; }
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
ret = ttm_bo_evict(bo, ctx); if (locked)
@@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo, mem->mem_type = place->mem_type; mem->placement = place->flags;
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
- spin_unlock(&bo->bdev->lru_lock); return 0;
}
@@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait); int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) {
- struct ttm_global *glob = &ttm_glob; bool locked; int ret;
@@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
ttm_bo_del_from_lru(bo); /* TODO: Cleanup the locking */
- spin_unlock(&glob->lru_lock);
spin_unlock(&bo->bdev->lru_lock);
/*
- Move to system cached
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 2c280fb1e992..924d892109e8 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -81,7 +81,6 @@ static int ttm_global_init(void) ttm_pool_mgr_init(num_pages * 50 / 100); ttm_tt_mgr_init();
spin_lock_init(&glob->lru_lock); glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
if (unlikely(glob->dummy_read_page == NULL)) {
@@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout); long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) {
struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; struct ttm_buffer_object *bo; unsigned i, j; int ret;
spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { man = ttm_manager_type(bdev, i); if (!man || !man->use_tt)
@@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, } } }
- spin_unlock(&glob->lru_lock);
- spin_unlock(&bdev->lru_lock); return 0;
} EXPORT_SYMBOL(ttm_device_swapout); @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
bdev->vma_manager = vma_manager; INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
- spin_lock_init(&bdev->lru_lock); INIT_LIST_HEAD(&bdev->ddestroy); bdev->dev_mapping = mapping; mutex_lock(&ttm_global_mutex);
@@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
void ttm_device_fini(struct ttm_device *bdev) {
- struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; unsigned i;
@@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev) if (ttm_bo_delayed_delete(bdev, true)) pr_debug("Delayed destroy list was clean\n");
- spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) if (list_empty(&man->lru[0])) pr_debug("Swap list %d was clean\n", i);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
ttm_pool_fini(&bdev->pool); ttm_global_release();
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 690ab97d52b7..071c48d672c6 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, if (list_empty(list)) return;
spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
dma_resv_unlock(bo->base.resv); }ttm_bo_move_to_lru_tail_unlocked(bo);
spin_unlock(&ttm_glob.lru_lock);
if (ticket) ww_acquire_fini(ticket);
@@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, if (list_empty(list)) return;
- spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, dma_resv_add_shared_fence(bo->base.resv, fence); else dma_resv_add_excl_fence(bo->base.resv, fence);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
dma_resv_unlock(bo->base.resv); }ttm_bo_move_to_lru_tail_unlocked(bo);
- spin_unlock(&ttm_glob.lru_lock); if (ticket) ww_acquire_fini(ticket);
} diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index ed1672a9f332..04f2eef653ab 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, .no_wait_gpu = false, .force_alloc = true };
- struct ttm_global *glob = &ttm_glob; struct dma_fence *fence; int ret; unsigned i;
@@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, * Can't use standard list traversal since we're unlocking. */
- spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { while (!list_empty(&man->lru[i])) {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock); ret = ttm_mem_evict_first(bdev, man, NULL, &ctx, NULL); if (ret) return ret;
spin_lock(&glob->lru_lock);
} }spin_lock(&bdev->lru_lock);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
spin_lock(&man->move_lock); fence = dma_fence_get(man->move);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index d007feef7676..dbccac957f8f 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, static inline void ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo) {
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
- spin_unlock(&bo->bdev->lru_lock);
}
static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo, diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index cda6efb4c34b..bae56d29e8ff 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -56,7 +56,6 @@ extern struct ttm_global { */
struct page *dummy_read_page;
spinlock_t lru_lock;
/**
- Protected by ttm_global_mutex.
@@ -277,8 +276,9 @@ struct ttm_device { struct ttm_pool pool;
/*
* Protected by the global:lru lock.
* Protection for the per manager LRU and ddestroy lists.
*/
spinlock_t lru_lock; struct list_head ddestroy;
/*
-- 2.25.1
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Am 16.03.21 um 10:35 schrieb Daniel Vetter:
On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote:
Instead of having a global lock.
Signed-off-by: Christian König christian.koenig@amd.com
I guess per zone lru lock is a lot more work since then we need to handle ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm lingo.
Making the LRU per resource manager is the long term goal, yes.
My key idea so far is that we make bo->mem a pointer and then move the LRU handling into the resource object instead of the BO.
The resource object then just references the BO and so that we can figure out which BO to evict or which fence to wait for to free up a resource.
Regards, Christian.
-Daniel
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++--- drivers/gpu/drm/qxl/qxl_release.c | 5 +-- drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++-------------- drivers/gpu/drm/ttm/ttm_device.c | 12 +++---- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++--- drivers/gpu/drm/ttm/ttm_resource.c | 9 +++-- include/drm/ttm/ttm_bo_driver.h | 4 +-- include/drm/ttm/ttm_device.h | 4 +-- 8 files changed, 43 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9d19078246c8..ae18c0e32347 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm_bo_base *bo_base;
if (vm->bulk_moveable) {
spin_lock(&ttm_glob.lru_lock);
ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);spin_lock(&adev->mman.bdev.lru_lock);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&adev->mman.bdev.lru_lock);
return; }
memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&adev->mman.bdev.lru_lock); list_for_each_entry(bo_base, &vm->idle, vm_status) { struct amdgpu_bo *bo = bo_base->bo;
@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, &bo->shadow->tbo.mem, &vm->lru_bulk_move); }
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&adev->mman.bdev.lru_lock);
vm->bulk_moveable = true; }
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index f5845c96d414..b19f2f00b215 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) release->id | 0xf0000000, release->base.seqno); trace_dma_fence_emit(&release->base);
spin_lock(&ttm_glob.lru_lock);
list_for_each_entry(entry, &release->bos, head) { bo = entry->bo;
dma_resv_add_shared_fence(bo->base.resv, &release->base);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
dma_resv_unlock(bo->base.resv); }ttm_bo_move_to_lru_tail_unlocked(bo);
- spin_unlock(&ttm_glob.lru_lock); ww_acquire_fini(&release->ticket); }
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index a1be88be357b..a8103c8718a3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) * reference it any more. The only tricky case is the trylock on * the resv object while holding the lru_lock. */
spin_lock(&ttm_glob.lru_lock);
bo->base.resv = &bo->base._resv;spin_lock(&bo->bdev->lru_lock);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
}
return r;
@@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (unlock_resv) dma_resv_unlock(bo->base.resv);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
lret = dma_resv_wait_timeout_rcu(resv, true, interruptible, 30 * HZ);
@@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, else if (lret == 0) return -EBUSY;
spin_lock(&ttm_glob.lru_lock);
if (unlock_resv && !dma_resv_trylock(bo->base.resv)) { /* * We raced, and lost, someone else holds the reservation now,spin_lock(&bo->bdev->lru_lock);
@@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, * delayed destruction would succeed, so just return success * here. */
spin_unlock(&ttm_glob.lru_lock);
} ret = 0;spin_unlock(&bo->bdev->lru_lock); return 0;
@@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, if (ret || unlikely(list_empty(&bo->ddestroy))) { if (unlock_resv) dma_resv_unlock(bo->base.resv);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
return ret; }
ttm_bo_del_from_lru(bo); list_del_init(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv)
@@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, */ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) {
struct ttm_global *glob = &ttm_glob; struct list_head removed; bool empty;
INIT_LIST_HEAD(&removed);
spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); while (!list_empty(&bdev->ddestroy)) { struct ttm_buffer_object *bo;
@@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) continue;
if (remove_all || bo->base.resv != &bo->base._resv) {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock); dma_resv_lock(bo->base.resv, NULL);
spin_lock(&glob->lru_lock);
spin_lock(&bdev->lru_lock); ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else if (dma_resv_trylock(bo->base.resv)) { ttm_bo_cleanup_refs(bo, false, !remove_all, true); } else {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
}
ttm_bo_put(bo);
spin_lock(&glob->lru_lock);
} list_splice_tail(&removed, &bdev->ddestroy); empty = list_empty(&bdev->ddestroy);spin_lock(&bdev->lru_lock);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
return empty; }
@@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref) ttm_bo_flush_all_fences(bo); bo->deleted = true;
spin_lock(&ttm_glob.lru_lock);
spin_lock(&bo->bdev->lru_lock);
/*
- Make pinned bos immediately available to
@@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
kref_init(&bo->kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
schedule_delayed_work(&bdev->wq, ((HZ / 100) < 1) ? 1 : HZ / 100); return; }
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_del_from_lru(bo); list_del(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo); dma_resv_unlock(bo->base.resv);
@@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, unsigned i; int ret;
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { list_for_each_entry(bo, &man->lru[i], lru) { bool busy;
@@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, if (!bo) { if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) busy_bo = NULL;
spin_unlock(&ttm_glob.lru_lock);
ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); if (busy_bo) ttm_bo_put(busy_bo);spin_unlock(&bo->bdev->lru_lock);
@@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, return ret; }
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
ret = ttm_bo_evict(bo, ctx); if (locked)
@@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo, mem->mem_type = place->mem_type; mem->placement = place->flags;
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
- spin_unlock(&bo->bdev->lru_lock); return 0; }
@@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait); int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) {
- struct ttm_global *glob = &ttm_glob; bool locked; int ret;
@@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
ttm_bo_del_from_lru(bo); /* TODO: Cleanup the locking */
- spin_unlock(&glob->lru_lock);
spin_unlock(&bo->bdev->lru_lock);
/*
- Move to system cached
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 2c280fb1e992..924d892109e8 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -81,7 +81,6 @@ static int ttm_global_init(void) ttm_pool_mgr_init(num_pages * 50 / 100); ttm_tt_mgr_init();
spin_lock_init(&glob->lru_lock); glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
if (unlikely(glob->dummy_read_page == NULL)) {
@@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout); long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) {
struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; struct ttm_buffer_object *bo; unsigned i, j; int ret;
spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { man = ttm_manager_type(bdev, i); if (!man || !man->use_tt)
@@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, } } }
- spin_unlock(&glob->lru_lock);
- spin_unlock(&bdev->lru_lock); return 0; } EXPORT_SYMBOL(ttm_device_swapout);
@@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
bdev->vma_manager = vma_manager; INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
- spin_lock_init(&bdev->lru_lock); INIT_LIST_HEAD(&bdev->ddestroy); bdev->dev_mapping = mapping; mutex_lock(&ttm_global_mutex);
@@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
void ttm_device_fini(struct ttm_device *bdev) {
- struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; unsigned i;
@@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev) if (ttm_bo_delayed_delete(bdev, true)) pr_debug("Delayed destroy list was clean\n");
- spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) if (list_empty(&man->lru[0])) pr_debug("Swap list %d was clean\n", i);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
ttm_pool_fini(&bdev->pool); ttm_global_release();
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 690ab97d52b7..071c48d672c6 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, if (list_empty(list)) return;
spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
dma_resv_unlock(bo->base.resv); }ttm_bo_move_to_lru_tail_unlocked(bo);
spin_unlock(&ttm_glob.lru_lock);
if (ticket) ww_acquire_fini(ticket);
@@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, if (list_empty(list)) return;
- spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, dma_resv_add_shared_fence(bo->base.resv, fence); else dma_resv_add_excl_fence(bo->base.resv, fence);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
dma_resv_unlock(bo->base.resv); }ttm_bo_move_to_lru_tail_unlocked(bo);
- spin_unlock(&ttm_glob.lru_lock); if (ticket) ww_acquire_fini(ticket); }
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index ed1672a9f332..04f2eef653ab 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, .no_wait_gpu = false, .force_alloc = true };
- struct ttm_global *glob = &ttm_glob; struct dma_fence *fence; int ret; unsigned i;
@@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, * Can't use standard list traversal since we're unlocking. */
- spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { while (!list_empty(&man->lru[i])) {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock); ret = ttm_mem_evict_first(bdev, man, NULL, &ctx, NULL); if (ret) return ret;
spin_lock(&glob->lru_lock);
} }spin_lock(&bdev->lru_lock);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
spin_lock(&man->move_lock); fence = dma_fence_get(man->move);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index d007feef7676..dbccac957f8f 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, static inline void ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo) {
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); }
static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index cda6efb4c34b..bae56d29e8ff 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -56,7 +56,6 @@ extern struct ttm_global { */
struct page *dummy_read_page;
spinlock_t lru_lock;
/**
- Protected by ttm_global_mutex.
@@ -277,8 +276,9 @@ struct ttm_device { struct ttm_pool pool;
/*
* Protected by the global:lru lock.
* Protection for the per manager LRU and ddestroy lists.
*/
spinlock_t lru_lock; struct list_head ddestroy;
/*
-- 2.25.1
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
On Tue, Mar 16, 2021 at 1:03 PM Christian König ckoenig.leichtzumerken@gmail.com wrote:
Am 16.03.21 um 10:35 schrieb Daniel Vetter:
On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote:
Instead of having a global lock.
Signed-off-by: Christian König christian.koenig@amd.com
I guess per zone lru lock is a lot more work since then we need to handle ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm lingo.
Making the LRU per resource manager is the long term goal, yes.
My key idea so far is that we make bo->mem a pointer and then move the LRU handling into the resource object instead of the BO.
The resource object then just references the BO and so that we can figure out which BO to evict or which fence to wait for to free up a resource.
Hm yeah that could work out fairly nicely. Both from locking but also refcounting pov. And maybe we could then use entirely free-standing mem objects instead of ghost objects? Since that's a part of ttm I don't grok and it always looks a bit like a hack to me. So for these ghost mem objects you'd only need the lru + dma_fence_wait (can grab a fence ref under the lru and then drop lru lock for that) for eviction, no dma_resv_lock. -Daniel
Regards, Christian.
-Daniel
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++--- drivers/gpu/drm/qxl/qxl_release.c | 5 +-- drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++-------------- drivers/gpu/drm/ttm/ttm_device.c | 12 +++---- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++--- drivers/gpu/drm/ttm/ttm_resource.c | 9 +++-- include/drm/ttm/ttm_bo_driver.h | 4 +-- include/drm/ttm/ttm_device.h | 4 +-- 8 files changed, 43 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9d19078246c8..ae18c0e32347 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm_bo_base *bo_base;
if (vm->bulk_moveable) {
spin_lock(&ttm_glob.lru_lock);
spin_lock(&adev->mman.bdev.lru_lock); ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&adev->mman.bdev.lru_lock); return;
}
memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&adev->mman.bdev.lru_lock); list_for_each_entry(bo_base, &vm->idle, vm_status) { struct amdgpu_bo *bo = bo_base->bo;
@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, &bo->shadow->tbo.mem, &vm->lru_bulk_move); }
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&adev->mman.bdev.lru_lock);
vm->bulk_moveable = true; }
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index f5845c96d414..b19f2f00b215 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) release->id | 0xf0000000, release->base.seqno); trace_dma_fence_emit(&release->base);
spin_lock(&ttm_glob.lru_lock);
list_for_each_entry(entry, &release->bos, head) { bo = entry->bo;
dma_resv_add_shared_fence(bo->base.resv, &release->base);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
}ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv);
- spin_unlock(&ttm_glob.lru_lock); ww_acquire_fini(&release->ticket); }
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index a1be88be357b..a8103c8718a3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) * reference it any more. The only tricky case is the trylock on * the resv object while holding the lru_lock. */
spin_lock(&ttm_glob.lru_lock);
spin_lock(&bo->bdev->lru_lock); bo->base.resv = &bo->base._resv;
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
}
return r;
@@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (unlock_resv) dma_resv_unlock(bo->base.resv);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); lret = dma_resv_wait_timeout_rcu(resv, true, interruptible, 30 * HZ);
@@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, else if (lret == 0) return -EBUSY;
spin_lock(&ttm_glob.lru_lock);
spin_lock(&bo->bdev->lru_lock); if (unlock_resv && !dma_resv_trylock(bo->base.resv)) { /* * We raced, and lost, someone else holds the reservation now,
@@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, * delayed destruction would succeed, so just return success * here. */
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); return 0; } ret = 0;
@@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, if (ret || unlikely(list_empty(&bo->ddestroy))) { if (unlock_resv) dma_resv_unlock(bo->base.resv);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); return ret;
}
ttm_bo_del_from_lru(bo); list_del_init(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv)
@@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, */ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) {
struct ttm_global *glob = &ttm_glob; struct list_head removed; bool empty;
INIT_LIST_HEAD(&removed);
spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); while (!list_empty(&bdev->ddestroy)) { struct ttm_buffer_object *bo;
@@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) continue;
if (remove_all || bo->base.resv != &bo->base._resv) {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock); dma_resv_lock(bo->base.resv, NULL);
spin_lock(&glob->lru_lock);
spin_lock(&bdev->lru_lock); ttm_bo_cleanup_refs(bo, false, !remove_all, true); } else if (dma_resv_trylock(bo->base.resv)) { ttm_bo_cleanup_refs(bo, false, !remove_all, true); } else {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock); } ttm_bo_put(bo);
spin_lock(&glob->lru_lock);
} list_splice_tail(&removed, &bdev->ddestroy); empty = list_empty(&bdev->ddestroy);spin_lock(&bdev->lru_lock);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
return empty; }
@@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref) ttm_bo_flush_all_fences(bo); bo->deleted = true;
spin_lock(&ttm_glob.lru_lock);
spin_lock(&bo->bdev->lru_lock); /* * Make pinned bos immediately available to
@@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
kref_init(&bo->kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); schedule_delayed_work(&bdev->wq, ((HZ / 100) < 1) ? 1 : HZ / 100); return;
}
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_del_from_lru(bo); list_del(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo); dma_resv_unlock(bo->base.resv);
@@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, unsigned i; int ret;
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { list_for_each_entry(bo, &man->lru[i], lru) { bool busy;
@@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, if (!bo) { if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) busy_bo = NULL;
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); if (busy_bo) ttm_bo_put(busy_bo);
@@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, return ret; }
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
ret = ttm_bo_evict(bo, ctx); if (locked)
@@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo, mem->mem_type = place->mem_type; mem->placement = place->flags;
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
- spin_unlock(&bo->bdev->lru_lock); return 0; }
@@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait); int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) {
- struct ttm_global *glob = &ttm_glob; bool locked; int ret;
@@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
ttm_bo_del_from_lru(bo); /* TODO: Cleanup the locking */
- spin_unlock(&glob->lru_lock);
spin_unlock(&bo->bdev->lru_lock);
/*
- Move to system cached
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 2c280fb1e992..924d892109e8 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -81,7 +81,6 @@ static int ttm_global_init(void) ttm_pool_mgr_init(num_pages * 50 / 100); ttm_tt_mgr_init();
spin_lock_init(&glob->lru_lock); glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
if (unlikely(glob->dummy_read_page == NULL)) {
@@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout); long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) {
struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; struct ttm_buffer_object *bo; unsigned i, j; int ret;
spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { man = ttm_manager_type(bdev, i); if (!man || !man->use_tt)
@@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, } } }
- spin_unlock(&glob->lru_lock);
- spin_unlock(&bdev->lru_lock); return 0; } EXPORT_SYMBOL(ttm_device_swapout);
@@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
bdev->vma_manager = vma_manager; INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
- spin_lock_init(&bdev->lru_lock); INIT_LIST_HEAD(&bdev->ddestroy); bdev->dev_mapping = mapping; mutex_lock(&ttm_global_mutex);
@@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
void ttm_device_fini(struct ttm_device *bdev) {
- struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; unsigned i;
@@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev) if (ttm_bo_delayed_delete(bdev, true)) pr_debug("Delayed destroy list was clean\n");
- spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) if (list_empty(&man->lru[0])) pr_debug("Swap list %d was clean\n", i);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
ttm_pool_fini(&bdev->pool); ttm_global_release();
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 690ab97d52b7..071c48d672c6 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, if (list_empty(list)) return;
spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
}ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv);
spin_unlock(&ttm_glob.lru_lock);
if (ticket) ww_acquire_fini(ticket);
@@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, if (list_empty(list)) return;
- spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, dma_resv_add_shared_fence(bo->base.resv, fence); else dma_resv_add_excl_fence(bo->base.resv, fence);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
}ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv);
- spin_unlock(&ttm_glob.lru_lock); if (ticket) ww_acquire_fini(ticket); }
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index ed1672a9f332..04f2eef653ab 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, .no_wait_gpu = false, .force_alloc = true };
- struct ttm_global *glob = &ttm_glob; struct dma_fence *fence; int ret; unsigned i;
@@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, * Can't use standard list traversal since we're unlocking. */
- spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { while (!list_empty(&man->lru[i])) {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock); ret = ttm_mem_evict_first(bdev, man, NULL, &ctx, NULL); if (ret) return ret;
spin_lock(&glob->lru_lock);
}spin_lock(&bdev->lru_lock); }
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
spin_lock(&man->move_lock); fence = dma_fence_get(man->move);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index d007feef7676..dbccac957f8f 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, static inline void ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo) {
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); }
static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index cda6efb4c34b..bae56d29e8ff 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -56,7 +56,6 @@ extern struct ttm_global { */
struct page *dummy_read_page;
spinlock_t lru_lock;
/**
- Protected by ttm_global_mutex.
@@ -277,8 +276,9 @@ struct ttm_device { struct ttm_pool pool;
/*
* Protected by the global:lru lock.
* Protection for the per manager LRU and ddestroy lists. */
spinlock_t lru_lock; struct list_head ddestroy;
/*
-- 2.25.1
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Am 16.03.21 um 13:05 schrieb Daniel Vetter:
On Tue, Mar 16, 2021 at 1:03 PM Christian König ckoenig.leichtzumerken@gmail.com wrote:
Am 16.03.21 um 10:35 schrieb Daniel Vetter:
On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote:
Instead of having a global lock.
Signed-off-by: Christian König christian.koenig@amd.com
I guess per zone lru lock is a lot more work since then we need to handle ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm lingo.
Making the LRU per resource manager is the long term goal, yes.
My key idea so far is that we make bo->mem a pointer and then move the LRU handling into the resource object instead of the BO.
The resource object then just references the BO and so that we can figure out which BO to evict or which fence to wait for to free up a resource.
Hm yeah that could work out fairly nicely. Both from locking but also refcounting pov. And maybe we could then use entirely free-standing mem objects instead of ghost objects? Since that's a part of ttm I don't grok and it always looks a bit like a hack to me. So for these ghost mem objects you'd only need the lru + dma_fence_wait (can grab a fence ref under the lru and then drop lru lock for that) for eviction, no dma_resv_lock.
Exactly that's the background here, yes.
Those ghost objects are more than just a bit of a hack and result in tons of checks in the driver if a BO is really a BO or a ghost.
Moving all that handling into the resource objects not only allows us to remove that, but also makes things like delayed delete work out pretty nicely.
Christian.
-Daniel
Regards, Christian.
-Daniel
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++--- drivers/gpu/drm/qxl/qxl_release.c | 5 +-- drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++-------------- drivers/gpu/drm/ttm/ttm_device.c | 12 +++---- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++--- drivers/gpu/drm/ttm/ttm_resource.c | 9 +++-- include/drm/ttm/ttm_bo_driver.h | 4 +-- include/drm/ttm/ttm_device.h | 4 +-- 8 files changed, 43 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9d19078246c8..ae18c0e32347 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm_bo_base *bo_base;
if (vm->bulk_moveable) {
spin_lock(&ttm_glob.lru_lock);
spin_lock(&adev->mman.bdev.lru_lock); ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&adev->mman.bdev.lru_lock); return; } memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&adev->mman.bdev.lru_lock); list_for_each_entry(bo_base, &vm->idle, vm_status) { struct amdgpu_bo *bo = bo_base->bo;
@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, &bo->shadow->tbo.mem, &vm->lru_bulk_move); }
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&adev->mman.bdev.lru_lock);
vm->bulk_moveable = true; }
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index f5845c96d414..b19f2f00b215 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) release->id | 0xf0000000, release->base.seqno); trace_dma_fence_emit(&release->base);
spin_lock(&ttm_glob.lru_lock);
list_for_each_entry(entry, &release->bos, head) { bo = entry->bo; dma_resv_add_shared_fence(bo->base.resv, &release->base);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv); }
- spin_unlock(&ttm_glob.lru_lock); ww_acquire_fini(&release->ticket); }
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index a1be88be357b..a8103c8718a3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) * reference it any more. The only tricky case is the trylock on * the resv object while holding the lru_lock. */
spin_lock(&ttm_glob.lru_lock);
spin_lock(&bo->bdev->lru_lock); bo->base.resv = &bo->base._resv;
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); } return r;
@@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (unlock_resv) dma_resv_unlock(bo->base.resv);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); lret = dma_resv_wait_timeout_rcu(resv, true, interruptible, 30 * HZ);
@@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, else if (lret == 0) return -EBUSY;
spin_lock(&ttm_glob.lru_lock);
spin_lock(&bo->bdev->lru_lock); if (unlock_resv && !dma_resv_trylock(bo->base.resv)) { /* * We raced, and lost, someone else holds the reservation now,
@@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, * delayed destruction would succeed, so just return success * here. */
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); return 0; } ret = 0;
@@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, if (ret || unlikely(list_empty(&bo->ddestroy))) { if (unlock_resv) dma_resv_unlock(bo->base.resv);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); return ret; } ttm_bo_del_from_lru(bo); list_del_init(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv)
@@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, */ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) {
struct ttm_global *glob = &ttm_glob; struct list_head removed; bool empty;
INIT_LIST_HEAD(&removed);
spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); while (!list_empty(&bdev->ddestroy)) { struct ttm_buffer_object *bo;
@@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) continue;
if (remove_all || bo->base.resv != &bo->base._resv) {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock); dma_resv_lock(bo->base.resv, NULL);
spin_lock(&glob->lru_lock);
spin_lock(&bdev->lru_lock); ttm_bo_cleanup_refs(bo, false, !remove_all, true); } else if (dma_resv_trylock(bo->base.resv)) { ttm_bo_cleanup_refs(bo, false, !remove_all, true); } else {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock); } ttm_bo_put(bo);
spin_lock(&glob->lru_lock);
spin_lock(&bdev->lru_lock); } list_splice_tail(&removed, &bdev->ddestroy); empty = list_empty(&bdev->ddestroy);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
return empty; }
@@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref) ttm_bo_flush_all_fences(bo); bo->deleted = true;
spin_lock(&ttm_glob.lru_lock);
spin_lock(&bo->bdev->lru_lock); /* * Make pinned bos immediately available to
@@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
kref_init(&bo->kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy);
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); schedule_delayed_work(&bdev->wq, ((HZ / 100) < 1) ? 1 : HZ / 100); return; }
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_del_from_lru(bo); list_del(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo); dma_resv_unlock(bo->base.resv);
@@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, unsigned i; int ret;
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { list_for_each_entry(bo, &man->lru[i], lru) { bool busy;
@@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, if (!bo) { if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) busy_bo = NULL;
spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); if (busy_bo) ttm_bo_put(busy_bo);
@@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, return ret; }
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock);
ret = ttm_bo_evict(bo, ctx); if (locked)
@@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo, mem->mem_type = place->mem_type; mem->placement = place->flags;
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
- spin_unlock(&bo->bdev->lru_lock); return 0; }
@@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait); int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) {
- struct ttm_global *glob = &ttm_glob; bool locked; int ret;
@@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
ttm_bo_del_from_lru(bo); /* TODO: Cleanup the locking */
- spin_unlock(&glob->lru_lock);
spin_unlock(&bo->bdev->lru_lock);
/*
- Move to system cached
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 2c280fb1e992..924d892109e8 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -81,7 +81,6 @@ static int ttm_global_init(void) ttm_pool_mgr_init(num_pages * 50 / 100); ttm_tt_mgr_init();
spin_lock_init(&glob->lru_lock); glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
if (unlikely(glob->dummy_read_page == NULL)) {
@@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout); long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) {
struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; struct ttm_buffer_object *bo; unsigned i, j; int ret;
spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { man = ttm_manager_type(bdev, i); if (!man || !man->use_tt)
@@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, } } }
- spin_unlock(&glob->lru_lock);
- spin_unlock(&bdev->lru_lock); return 0; } EXPORT_SYMBOL(ttm_device_swapout);
@@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
bdev->vma_manager = vma_manager; INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
- spin_lock_init(&bdev->lru_lock); INIT_LIST_HEAD(&bdev->ddestroy); bdev->dev_mapping = mapping; mutex_lock(&ttm_global_mutex);
@@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
void ttm_device_fini(struct ttm_device *bdev) {
- struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; unsigned i;
@@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev) if (ttm_bo_delayed_delete(bdev, true)) pr_debug("Delayed destroy list was clean\n");
- spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) if (list_empty(&man->lru[0])) pr_debug("Swap list %d was clean\n", i);
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
ttm_pool_fini(&bdev->pool); ttm_global_release();
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 690ab97d52b7..071c48d672c6 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, if (list_empty(list)) return;
spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv); }
spin_unlock(&ttm_glob.lru_lock);
if (ticket) ww_acquire_fini(ticket);
@@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, if (list_empty(list)) return;
- spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, dma_resv_add_shared_fence(bo->base.resv, fence); else dma_resv_add_excl_fence(bo->base.resv, fence);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv); }
- spin_unlock(&ttm_glob.lru_lock); if (ticket) ww_acquire_fini(ticket); }
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index ed1672a9f332..04f2eef653ab 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, .no_wait_gpu = false, .force_alloc = true };
- struct ttm_global *glob = &ttm_glob; struct dma_fence *fence; int ret; unsigned i;
@@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, * Can't use standard list traversal since we're unlocking. */
- spin_lock(&glob->lru_lock);
- spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { while (!list_empty(&man->lru[i])) {
spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock); ret = ttm_mem_evict_first(bdev, man, NULL, &ctx, NULL); if (ret) return ret;
spin_lock(&glob->lru_lock);
spin_lock(&bdev->lru_lock); } }
- spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->lru_lock);
spin_lock(&man->move_lock); fence = dma_fence_get(man->move);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index d007feef7676..dbccac957f8f 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, static inline void ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo) {
- spin_lock(&ttm_glob.lru_lock);
- spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
spin_unlock(&bo->bdev->lru_lock); }
static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index cda6efb4c34b..bae56d29e8ff 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -56,7 +56,6 @@ extern struct ttm_global { */
struct page *dummy_read_page;
spinlock_t lru_lock;
/**
- Protected by ttm_global_mutex.
@@ -277,8 +276,9 @@ struct ttm_device { struct ttm_pool pool;
/*
* Protected by the global:lru lock.
* Protection for the per manager LRU and ddestroy lists. */
spinlock_t lru_lock; struct list_head ddestroy;
/*
-- 2.25.1
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Hi "Christian,
I love your patch! Perhaps something to improve:
[auto build test WARNING on drm-tip/drm-tip] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapou... base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: i386-allyesconfig (attached as .config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 reproduce (this is a W=1 build): # https://github.com/0day-ci/linux/commit/824dca26fe395899b41d9790944ddea345f7... git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551 git checkout 824dca26fe395899b41d9790944ddea345f7a6fd # save the attached .config to linux build tree make W=1 ARCH=i386
If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot lkp@intel.com
All warnings (new ones prefixed by >>):
drivers/gpu/drm/ttm/ttm_device.c:42: warning: Function parameter or member 'ttm_global_mutex' not described in 'DEFINE_MUTEX' drivers/gpu/drm/ttm/ttm_device.c:42: warning: expecting prototype for ttm_global_mutex(). Prototype was for DEFINE_MUTEX() instead drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'ctx' not described in 'ttm_global_swapout' drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'gfp_flags' not described in 'ttm_global_swapout'
drivers/gpu/drm/ttm/ttm_device.c:110: warning: expecting prototype for A buffer object shrink method that tries to swap out the first(). Prototype was for ttm_global_swapout() instead
vim +110 drivers/gpu/drm/ttm/ttm_device.c
104 105 /** 106 * A buffer object shrink method that tries to swap out the first 107 * buffer object on the global::swap_lru list. 108 */ 109 long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
110 {
111 struct ttm_global *glob = &ttm_glob; 112 struct ttm_buffer_object *bo; 113 unsigned i; 114 int ret; 115 116 spin_lock(&glob->lru_lock); 117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { 118 list_for_each_entry(bo, &glob->swap_lru[i], swap) { 119 uint32_t num_pages = bo->ttm->num_pages; 120 121 ret = ttm_bo_swapout(bo, ctx, gfp_flags); 122 /* ttm_bo_swapout has dropped the lru_lock */ 123 if (!ret) 124 return num_pages; 125 if (ret != -EBUSY) 126 return ret; 127 } 128 } 129 spin_unlock(&glob->lru_lock); 130 return 0; 131 } 132 EXPORT_SYMBOL(ttm_global_swapout); 133
--- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Hi "Christian,
I love your patch! Perhaps something to improve:
[auto build test WARNING on drm-tip/drm-tip] [also build test WARNING on next-20210319] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapou... base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: x86_64-randconfig-a005-20210318 (attached as .config) compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project fcc1ce00931751ac02498986feb37744e9ace8de) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install x86_64 cross compiling tool for clang build # apt-get install binutils-x86-64-linux-gnu # https://github.com/0day-ci/linux/commit/824dca26fe395899b41d9790944ddea345f7... git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551 git checkout 824dca26fe395899b41d9790944ddea345f7a6fd # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64
If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot lkp@intel.com
All warnings (new ones prefixed by >>):
drivers/gpu/drm/ttm/ttm_device.c:42: warning: Function parameter or member 'ttm_global_mutex' not described in 'DEFINE_MUTEX' drivers/gpu/drm/ttm/ttm_device.c:42: warning: expecting prototype for ttm_global_mutex(). Prototype was for DEFINE_MUTEX() instead drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'ctx' not described in 'ttm_global_swapout' drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'gfp_flags' not described in 'ttm_global_swapout'
drivers/gpu/drm/ttm/ttm_device.c:110: warning: expecting prototype for A buffer object shrink method that tries to swap out the first(). Prototype was for ttm_global_swapout() instead
vim +110 drivers/gpu/drm/ttm/ttm_device.c
104 105 /** 106 * A buffer object shrink method that tries to swap out the first 107 * buffer object on the global::swap_lru list. 108 */ 109 long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
110 {
111 struct ttm_global *glob = &ttm_glob; 112 struct ttm_buffer_object *bo; 113 unsigned i; 114 int ret; 115 116 spin_lock(&glob->lru_lock); 117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { 118 list_for_each_entry(bo, &glob->swap_lru[i], swap) { 119 uint32_t num_pages = bo->ttm->num_pages; 120 121 ret = ttm_bo_swapout(bo, ctx, gfp_flags); 122 /* ttm_bo_swapout has dropped the lru_lock */ 123 if (!ret) 124 return num_pages; 125 if (ret != -EBUSY) 126 return ret; 127 } 128 } 129 spin_unlock(&glob->lru_lock); 130 return 0; 131 } 132 EXPORT_SYMBOL(ttm_global_swapout); 133
--- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
dri-devel@lists.freedesktop.org