During eviction we do want to trigger the OOM killer.
Only while doing new allocations we should try to avoid that and return -ENOMEM to the application.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 --- drivers/gpu/drm/ttm/ttm_pool.c | 2 +- drivers/gpu/drm/ttm/ttm_tt.c | 7 ------- include/drm/ttm/ttm_bo_api.h | 2 ++ include/drm/ttm/ttm_bo_driver.h | 3 --- 6 files changed, 5 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 1aa516429c80..52041f48e1c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -516,6 +516,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, struct ttm_operation_ctx ctx = { .interruptible = (bp->type != ttm_bo_type_kernel), .no_wait_gpu = bp->no_wait_gpu, + /* We opt to avoid OOM on system pages allocations */ + .retry_mayfail = true, .resv = bp->resv, .flags = bp->type != ttm_bo_type_kernel ? TTM_OPT_FLAG_ALLOW_RES_EVICT : 0 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index bd6e6641c3fc..c01c060e4ac5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1914,9 +1914,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) } adev->mman.initialized = true;
- /* We opt to avoid OOM on system pages allocations */ - adev->mman.bdev.no_retry = true; - /* Initialize VRAM pool with all of VRAM divided into pages */ r = amdgpu_vram_mgr_init(adev); if (r) { diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 1e50deefb5d5..7100fd0e9e88 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -367,7 +367,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, if (tt->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC) gfp_flags |= __GFP_ZERO;
- if (tt->page_flags & TTM_PAGE_FLAG_NO_RETRY) + if (ctx->retry_mayfail) gfp_flags |= __GFP_RETRY_MAYFAIL;
if (pool->use_dma32) diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 8861a74ac335..cfd633c7e764 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -51,9 +51,6 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc) if (bo->ttm) return 0;
- if (bdev->no_retry) - page_flags |= TTM_PAGE_FLAG_NO_RETRY; - switch (bo->type) { case ttm_bo_type_device: if (zero_alloc) @@ -211,8 +208,6 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
swap_space = swap_storage->f_mapping; gfp_mask = mapping_gfp_mask(swap_space); - if (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY) - gfp_mask |= __GFP_RETRY_MAYFAIL;
for (i = 0; i < ttm->num_pages; ++i) { from_page = shmem_read_mapping_page_gfp(swap_space, i, @@ -260,8 +255,6 @@ int ttm_tt_swapout(struct ttm_bo_device *bdev, struct ttm_tt *ttm)
swap_space = swap_storage->f_mapping; gfp_mask = mapping_gfp_mask(swap_space); - if (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY) - gfp_mask |= __GFP_RETRY_MAYFAIL;
for (i = 0; i < ttm->num_pages; ++i) { from_page = ttm->pages[i]; diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 37102e45e496..4c7c2d574db6 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -195,6 +195,7 @@ struct ttm_bo_kmap_obj { * * @interruptible: Sleep interruptible if sleeping. * @no_wait_gpu: Return immediately if the GPU is busy. + * @retry_mayfail: Set the __GFP_RETRY_MAYFAIL when allocation pages. * @resv: Reservation object to allow reserved evictions with. * @flags: Including the following flags * @@ -204,6 +205,7 @@ struct ttm_bo_kmap_obj { struct ttm_operation_ctx { bool interruptible; bool no_wait_gpu; + bool retry_mayfail; struct dma_resv *resv; uint64_t bytes_moved; uint32_t flags; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index e9f683fa72dc..da8208f43378 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -276,7 +276,6 @@ extern struct ttm_bo_global { * @dev_mapping: A pointer to the struct address_space representing the * device address space. * @wq: Work queue structure for the delayed delete workqueue. - * @no_retry: Don't retry allocation if it fails * */
@@ -314,8 +313,6 @@ struct ttm_bo_device { */
struct delayed_work wq; - - bool no_retry; };
static inline struct ttm_resource_manager *ttm_manager_type(struct ttm_bo_device *bdev,
The ttm_operation_ctx structure has a mixture of flags and bools. Drop the flags and replace them with bools as well.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 ++--- drivers/gpu/drm/ttm/ttm_bo.c | 2 +- drivers/gpu/drm/ttm/ttm_bo_vm.c | 3 +-- drivers/gpu/drm/ttm/ttm_memory.c | 3 ++- drivers/gpu/drm/ttm/ttm_resource.c | 2 +- include/drm/ttm/ttm_bo_api.h | 10 ++++------ 6 files changed, 11 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 52041f48e1c9..c302a2c7982d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -518,9 +518,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, .no_wait_gpu = bp->no_wait_gpu, /* We opt to avoid OOM on system pages allocations */ .retry_mayfail = true, - .resv = bp->resv, - .flags = bp->type != ttm_bo_type_kernel ? - TTM_OPT_FLAG_ALLOW_RES_EVICT : 0 + .allow_res_evict = bp->type != ttm_bo_type_kernel, + .resv = bp->resv }; struct amdgpu_bo *bo; unsigned long page_align, size = bp->size; diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index c63b7ea1cd5d..e2a124b3affb 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -637,7 +637,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
if (bo->base.resv == ctx->resv) { dma_resv_assert_held(bo->base.resv); - if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT) + if (ctx->allow_res_evict) ret = true; *locked = false; if (busy) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index eeaca5d1efe3..4cf9628f38ac 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -315,8 +315,7 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, struct ttm_operation_ctx ctx = { .interruptible = false, .no_wait_gpu = false, - .flags = TTM_OPT_FLAG_FORCE_ALLOC - + .fource_alloc = true };
ttm = bo->ttm; diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c index f9a90bfaa3c1..5ed1fc8f2ace 100644 --- a/drivers/gpu/drm/ttm/ttm_memory.c +++ b/drivers/gpu/drm/ttm/ttm_memory.c @@ -542,7 +542,8 @@ ttm_check_under_lowerlimit(struct ttm_mem_global *glob, { int64_t available;
- if (ctx->flags & TTM_OPT_FLAG_FORCE_ALLOC) + /* We allow over commit during suspend */ + if (ctx->force_alloc) return false;
available = get_nr_swap_pages() + si_mem_available(); diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 4ebc043e2867..29cf905d97b7 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -89,7 +89,7 @@ int ttm_resource_manager_evict_all(struct ttm_bo_device *bdev, struct ttm_operation_ctx ctx = { .interruptible = false, .no_wait_gpu = false, - .flags = TTM_OPT_FLAG_FORCE_ALLOC + .fource_alloc = true }; struct ttm_bo_global *glob = &ttm_bo_glob; struct dma_fence *fence; diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 4c7c2d574db6..6315133c69e5 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -196,6 +196,8 @@ struct ttm_bo_kmap_obj { * @interruptible: Sleep interruptible if sleeping. * @no_wait_gpu: Return immediately if the GPU is busy. * @retry_mayfail: Set the __GFP_RETRY_MAYFAIL when allocation pages. + * @allow_res_evict: Allow eviction of reserved BOs. + * @force_alloc: Fource allocation when serving page faults. * @resv: Reservation object to allow reserved evictions with. * @flags: Including the following flags * @@ -206,16 +208,12 @@ struct ttm_operation_ctx { bool interruptible; bool no_wait_gpu; bool retry_mayfail; + bool allow_res_evict; + bool force_alloc; struct dma_resv *resv; uint64_t bytes_moved; - uint32_t flags; };
-/* Allow eviction of reserved BOs */ -#define TTM_OPT_FLAG_ALLOW_RES_EVICT 0x1 -/* when serving page fault or suspend, allow alloc anyway */ -#define TTM_OPT_FLAG_FORCE_ALLOC 0x2 - /** * ttm_bo_get - reference a struct ttm_buffer_object *
On Mon, Nov 02, 2020 at 01:58:08PM +0100, Christian König wrote:
The ttm_operation_ctx structure has a mixture of flags and bools. Drop the flags and replace them with bools as well.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 ++--- drivers/gpu/drm/ttm/ttm_bo.c | 2 +- drivers/gpu/drm/ttm/ttm_bo_vm.c | 3 +-- drivers/gpu/drm/ttm/ttm_memory.c | 3 ++- drivers/gpu/drm/ttm/ttm_resource.c | 2 +- include/drm/ttm/ttm_bo_api.h | 10 ++++------ 6 files changed, 11 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 52041f48e1c9..c302a2c7982d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -518,9 +518,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, .no_wait_gpu = bp->no_wait_gpu, /* We opt to avoid OOM on system pages allocations */ .retry_mayfail = true,
.resv = bp->resv,
.flags = bp->type != ttm_bo_type_kernel ?
TTM_OPT_FLAG_ALLOW_RES_EVICT : 0
.allow_res_evict = bp->type != ttm_bo_type_kernel,
}; struct amdgpu_bo *bo; unsigned long page_align, size = bp->size;.resv = bp->resv
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index c63b7ea1cd5d..e2a124b3affb 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -637,7 +637,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
if (bo->base.resv == ctx->resv) { dma_resv_assert_held(bo->base.resv);
if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT)
*locked = false; if (busy)if (ctx->allow_res_evict) ret = true;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index eeaca5d1efe3..4cf9628f38ac 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -315,8 +315,7 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, struct ttm_operation_ctx ctx = { .interruptible = false, .no_wait_gpu = false,
.flags = TTM_OPT_FLAG_FORCE_ALLOC
.fource_alloc = true
};
ttm = bo->ttm;
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c index f9a90bfaa3c1..5ed1fc8f2ace 100644 --- a/drivers/gpu/drm/ttm/ttm_memory.c +++ b/drivers/gpu/drm/ttm/ttm_memory.c @@ -542,7 +542,8 @@ ttm_check_under_lowerlimit(struct ttm_mem_global *glob, { int64_t available;
- if (ctx->flags & TTM_OPT_FLAG_FORCE_ALLOC)
/* We allow over commit during suspend */
if (ctx->force_alloc) return false;
available = get_nr_swap_pages() + si_mem_available();
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 4ebc043e2867..29cf905d97b7 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -89,7 +89,7 @@ int ttm_resource_manager_evict_all(struct ttm_bo_device *bdev, struct ttm_operation_ctx ctx = { .interruptible = false, .no_wait_gpu = false,
.flags = TTM_OPT_FLAG_FORCE_ALLOC
}; struct ttm_bo_global *glob = &ttm_bo_glob; struct dma_fence *fence;.fource_alloc = true
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 4c7c2d574db6..6315133c69e5 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -196,6 +196,8 @@ struct ttm_bo_kmap_obj {
- @interruptible: Sleep interruptible if sleeping.
- @no_wait_gpu: Return immediately if the GPU is busy.
- @retry_mayfail: Set the __GFP_RETRY_MAYFAIL when allocation pages.
- @allow_res_evict: Allow eviction of reserved BOs.
- @force_alloc: Fource allocation when serving page faults.
s/Fource/Force/ and I think this would be an excellent application of the inline kerneldoc style for structs, so that you can spend a few more words on what exactly these do, and when they're supposed to be used.
I know originally we said we'll do kerneldoc last, but with all the stuff going on and details being discussed I don't think that makes much sense. We'll have forgotten it all again :-)
- @resv: Reservation object to allow reserved evictions with.
- @flags: Including the following flags
Forgot to remove this one here.
With the nits addressed:
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
@@ -206,16 +208,12 @@ struct ttm_operation_ctx { bool interruptible; bool no_wait_gpu; bool retry_mayfail;
- bool allow_res_evict;
- bool force_alloc; struct dma_resv *resv; uint64_t bytes_moved;
- uint32_t flags;
};
-/* Allow eviction of reserved BOs */ -#define TTM_OPT_FLAG_ALLOW_RES_EVICT 0x1 -/* when serving page fault or suspend, allow alloc anyway */ -#define TTM_OPT_FLAG_FORCE_ALLOC 0x2
/**
- ttm_bo_get - reference a struct ttm_buffer_object
-- 2.25.1
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Hi "Christian,
I love your patch! Yet something to improve:
[auto build test ERROR on drm-tip/drm-tip] [also build test ERROR on next-20201102] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.10-rc2] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-rework-no_r... base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: x86_64-randconfig-a004-20201102 (attached as .config) compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project cc91554ebb66e8c9a4b8c67ca2f1343eaac10cf6) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install x86_64 cross compiling tool for clang build # apt-get install binutils-x86-64-linux-gnu # https://github.com/0day-ci/linux/commit/1010fe871b783d6385714dbfde4e57ed966b... git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christian-K-nig/drm-ttm-rework-no_retry-handling/20201102-205950 git checkout 1010fe871b783d6385714dbfde4e57ed966b6749 # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64
If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot lkp@intel.com
All errors (new ones prefixed by >>):
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c:408:4: error: field designator 'flags' does not refer to any field in type 'struct ttm_operation_ctx'
.flags = 0 ^ 1 error generated. --
drivers/gpu/drm/ttm/ttm_bo_vm.c:318:5: error: field designator 'fource_alloc' does not refer to any field in type 'struct ttm_operation_ctx'; did you mean 'force_alloc'?
.fource_alloc = true ^~~~~~~~~~~~ force_alloc include/drm/ttm/ttm_bo_api.h:212:7: note: 'force_alloc' declared here bool force_alloc; ^ 1 error generated. --
drivers/gpu/drm/ttm/ttm_resource.c:92:4: error: field designator 'fource_alloc' does not refer to any field in type 'struct ttm_operation_ctx'; did you mean 'force_alloc'?
.fource_alloc = true ^~~~~~~~~~~~ force_alloc include/drm/ttm/ttm_bo_api.h:212:7: note: 'force_alloc' declared here bool force_alloc; ^ 1 error generated.
vim +408 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
d38ceaf99ed015f Alex Deucher 2015-04-20 399 14fd833efa3f136 Chunming Zhou 2016-08-04 400 static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, 14fd833efa3f136 Chunming Zhou 2016-08-04 401 struct amdgpu_bo *bo) d38ceaf99ed015f Alex Deucher 2015-04-20 402 { a7d64de659946e8 Christian König 2016-09-15 403 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 9251859a9adb8e5 Roger He 2017-12-08 404 struct ttm_operation_ctx ctx = { 9251859a9adb8e5 Roger He 2017-12-08 405 .interruptible = true, 9251859a9adb8e5 Roger He 2017-12-08 406 .no_wait_gpu = false, 5a5011a72489545 Gerd Hoffmann 2019-08-05 407 .resv = bo->tbo.base.resv, d330fca11500beb Roger He 2018-02-06 @408 .flags = 0 9251859a9adb8e5 Roger He 2017-12-08 409 }; 36409d122cb84fa Christian König 2015-12-21 410 uint32_t domain; 14fd833efa3f136 Chunming Zhou 2016-08-04 411 int r; 2f568dbd6b944c2 Christian König 2016-02-23 412 4671078eb8e390b Christian König 2020-09-21 413 if (bo->tbo.pin_count) 14fd833efa3f136 Chunming Zhou 2016-08-04 414 return 0; 36409d122cb84fa Christian König 2015-12-21 415 95844d20ae024b5 Marek Olšák 2016-08-17 416 /* Don't move this buffer if we have depleted our allowance 95844d20ae024b5 Marek Olšák 2016-08-17 417 * to move it. Don't move anything if the threshold is zero. d38ceaf99ed015f Alex Deucher 2015-04-20 418 */ 4993ba02635f69e Christian König 2019-05-06 419 if (p->bytes_moved < p->bytes_moved_threshold && 4993ba02635f69e Christian König 2019-05-06 420 (!bo->tbo.base.dma_buf || 4993ba02635f69e Christian König 2019-05-06 421 list_empty(&bo->tbo.base.dma_buf->attachments))) { c8c5e569c5b0c9a Andrey Grodzovsky 2018-06-12 422 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && 00f06b246a3056b John Brooks 2017-06-27 423 (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { 00f06b246a3056b John Brooks 2017-06-27 424 /* And don't move a CPU_ACCESS_REQUIRED BO to limited 00f06b246a3056b John Brooks 2017-06-27 425 * visible VRAM if we've depleted our allowance to do 00f06b246a3056b John Brooks 2017-06-27 426 * that. 00f06b246a3056b John Brooks 2017-06-27 427 */ 00f06b246a3056b John Brooks 2017-06-27 428 if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) 6d7d9c5aa212d06 Kent Russell 2017-08-08 429 domain = bo->preferred_domains; 36409d122cb84fa Christian König 2015-12-21 430 else 1ea863fd736eed8 Christian König 2015-12-18 431 domain = bo->allowed_domains; 00f06b246a3056b John Brooks 2017-06-27 432 } else { 6d7d9c5aa212d06 Kent Russell 2017-08-08 433 domain = bo->preferred_domains; 00f06b246a3056b John Brooks 2017-06-27 434 } 00f06b246a3056b John Brooks 2017-06-27 435 } else { 00f06b246a3056b John Brooks 2017-06-27 436 domain = bo->allowed_domains; 00f06b246a3056b John Brooks 2017-06-27 437 } d38ceaf99ed015f Alex Deucher 2015-04-20 438 d38ceaf99ed015f Alex Deucher 2015-04-20 439 retry: c704ab18e0a26a5 Christian König 2018-07-16 440 amdgpu_bo_placement_from_domain(bo, domain); 19be5570107108f Christian König 2017-04-12 441 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 6af046d26f34278 Christian König 2017-04-27 442 6af046d26f34278 Christian König 2017-04-27 443 p->bytes_moved += ctx.bytes_moved; c8c5e569c5b0c9a Andrey Grodzovsky 2018-06-12 444 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && 5422a28fe86f9f7 Christian König 2018-04-05 445 amdgpu_bo_in_cpu_visible_vram(bo)) 6af046d26f34278 Christian König 2017-04-27 446 p->bytes_moved_vis += ctx.bytes_moved; d38ceaf99ed015f Alex Deucher 2015-04-20 447 1abdc3d73dd9dc2 Christian König 2016-08-31 448 if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { 1ea863fd736eed8 Christian König 2015-12-18 449 domain = bo->allowed_domains; d38ceaf99ed015f Alex Deucher 2015-04-20 450 goto retry; d38ceaf99ed015f Alex Deucher 2015-04-20 451 } 14fd833efa3f136 Chunming Zhou 2016-08-04 452 14fd833efa3f136 Chunming Zhou 2016-08-04 453 return r; 14fd833efa3f136 Chunming Zhou 2016-08-04 454 } 14fd833efa3f136 Chunming Zhou 2016-08-04 455
--- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
On Mon, Nov 02, 2020 at 01:58:07PM +0100, Christian König wrote:
During eviction we do want to trigger the OOM killer.
Only while doing new allocations we should try to avoid that and return -ENOMEM to the application.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 --- drivers/gpu/drm/ttm/ttm_pool.c | 2 +- drivers/gpu/drm/ttm/ttm_tt.c | 7 ------- include/drm/ttm/ttm_bo_api.h | 2 ++ include/drm/ttm/ttm_bo_driver.h | 3 --- 6 files changed, 5 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 1aa516429c80..52041f48e1c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -516,6 +516,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, struct ttm_operation_ctx ctx = { .interruptible = (bp->type != ttm_bo_type_kernel), .no_wait_gpu = bp->no_wait_gpu,
/* We opt to avoid OOM on system pages allocations */
.retry_mayfail = true,
Hm this is a bit confusingly named imo, but it comes from the gfp flags. So maybe call it gfp_retry_mayfail, to make it clear where it's from and that it's only for system allocations. Personally I'd have called this limited_retry, but I think better to stay consistent.
Also I think we should __GFP_NOWARN for this, to avoid splats in dmesg since we expect to handle this.
With the bikesheds addressed somehow.
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
.resv = bp->resv, .flags = bp->type != ttm_bo_type_kernel ? TTM_OPT_FLAG_ALLOW_RES_EVICT : 0
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index bd6e6641c3fc..c01c060e4ac5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1914,9 +1914,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) } adev->mman.initialized = true;
- /* We opt to avoid OOM on system pages allocations */
- adev->mman.bdev.no_retry = true;
- /* Initialize VRAM pool with all of VRAM divided into pages */ r = amdgpu_vram_mgr_init(adev); if (r) {
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 1e50deefb5d5..7100fd0e9e88 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -367,7 +367,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, if (tt->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC) gfp_flags |= __GFP_ZERO;
- if (tt->page_flags & TTM_PAGE_FLAG_NO_RETRY)
if (ctx->retry_mayfail) gfp_flags |= __GFP_RETRY_MAYFAIL;
if (pool->use_dma32)
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 8861a74ac335..cfd633c7e764 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -51,9 +51,6 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc) if (bo->ttm) return 0;
- if (bdev->no_retry)
page_flags |= TTM_PAGE_FLAG_NO_RETRY;
- switch (bo->type) { case ttm_bo_type_device: if (zero_alloc)
@@ -211,8 +208,6 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
swap_space = swap_storage->f_mapping; gfp_mask = mapping_gfp_mask(swap_space);
if (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY)
gfp_mask |= __GFP_RETRY_MAYFAIL;
for (i = 0; i < ttm->num_pages; ++i) { from_page = shmem_read_mapping_page_gfp(swap_space, i,
@@ -260,8 +255,6 @@ int ttm_tt_swapout(struct ttm_bo_device *bdev, struct ttm_tt *ttm)
swap_space = swap_storage->f_mapping; gfp_mask = mapping_gfp_mask(swap_space);
if (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY)
gfp_mask |= __GFP_RETRY_MAYFAIL;
for (i = 0; i < ttm->num_pages; ++i) { from_page = ttm->pages[i];
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 37102e45e496..4c7c2d574db6 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -195,6 +195,7 @@ struct ttm_bo_kmap_obj {
- @interruptible: Sleep interruptible if sleeping.
- @no_wait_gpu: Return immediately if the GPU is busy.
- @retry_mayfail: Set the __GFP_RETRY_MAYFAIL when allocation pages.
- @resv: Reservation object to allow reserved evictions with.
- @flags: Including the following flags
@@ -204,6 +205,7 @@ struct ttm_bo_kmap_obj { struct ttm_operation_ctx { bool interruptible; bool no_wait_gpu;
- bool retry_mayfail; struct dma_resv *resv; uint64_t bytes_moved; uint32_t flags;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index e9f683fa72dc..da8208f43378 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -276,7 +276,6 @@ extern struct ttm_bo_global {
- @dev_mapping: A pointer to the struct address_space representing the
- device address space.
- @wq: Work queue structure for the delayed delete workqueue.
*/
- @no_retry: Don't retry allocation if it fails
@@ -314,8 +313,6 @@ struct ttm_bo_device { */
struct delayed_work wq;
- bool no_retry;
};
static inline struct ttm_resource_manager *ttm_manager_type(struct ttm_bo_device *bdev,
2.25.1
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
dri-devel@lists.freedesktop.org