From: Nicolai Hähnle nicolai.haehnle@amd.com
Allow callers to opt out of calling ttm_bo_validate immediately. This allows more flexibility in how locking of the reservation object is done, which is needed to fix a locking bug (destroy locked mutex) in amdgpu.
v2: callers of ttm_bo_init_top should use ttm_bo_unref for error handling
Signed-off-by: Nicolai Hähnle nicolai.haehnle@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 89 ++++++++++++++++++++++++++++---------------- include/drm/ttm/ttm_bo_api.h | 44 ++++++++++++++++++++++ 2 files changed, 100 insertions(+), 33 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index c438b04..390b763 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1119,44 +1119,23 @@ int ttm_bo_validate(struct ttm_buffer_object *bo, } EXPORT_SYMBOL(ttm_bo_validate);
-int ttm_bo_init(struct ttm_bo_device *bdev, - struct ttm_buffer_object *bo, - unsigned long size, - enum ttm_bo_type type, - struct ttm_placement *placement, - uint32_t page_alignment, - bool interruptible, - struct file *persistent_swap_storage, - size_t acc_size, - struct sg_table *sg, - struct reservation_object *resv, - void (*destroy) (struct ttm_buffer_object *)) +int ttm_bo_init_top(struct ttm_bo_device *bdev, + struct ttm_buffer_object *bo, + unsigned long size, + enum ttm_bo_type type, + uint32_t page_alignment, + struct file *persistent_swap_storage, + size_t acc_size, + struct sg_table *sg, + struct reservation_object *resv, + void (*destroy) (struct ttm_buffer_object *)) { int ret = 0; unsigned long num_pages; struct ttm_mem_global *mem_glob = bdev->glob->mem_glob; - bool locked; - - ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false); - if (ret) { - pr_err("Out of kernel memory\n"); - if (destroy) - (*destroy)(bo); - else - kfree(bo); - return -ENOMEM; - }
num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (num_pages == 0) { - pr_err("Illegal buffer object size\n"); - if (destroy) - (*destroy)(bo); - else - kfree(bo); - ttm_mem_global_free(mem_glob, acc_size); - return -EINVAL; - } + bo->destroy = destroy;
kref_init(&bo->kref); @@ -1181,7 +1160,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev, bo->moving = NULL; bo->mem.placement = (TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED); bo->persistent_swap_storage = persistent_swap_storage; - bo->acc_size = acc_size; + bo->acc_size = 0; bo->sg = sg; if (resv) { bo->resv = resv; @@ -1195,6 +1174,22 @@ int ttm_bo_init(struct ttm_bo_device *bdev, bo->priority = 0;
/* + * Error checking comes after initialization, so that the regular + * buffer object cleanup code can be used safely by the caller. + */ + if (num_pages == 0) { + pr_err("Illegal buffer object size\n"); + return -EINVAL; + } + + ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false); + if (ret) { + pr_err("Out of kernel memory\n"); + return -ENOMEM; + } + bo->acc_size = acc_size; + + /* * For ttm_bo_type_device buffers, allocate * address space from the device. */ @@ -1203,6 +1198,34 @@ int ttm_bo_init(struct ttm_bo_device *bdev, ret = drm_vma_offset_add(&bdev->vma_manager, &bo->vma_node, bo->mem.num_pages);
+ return ret; +} +EXPORT_SYMBOL(ttm_bo_init_top); + +int ttm_bo_init(struct ttm_bo_device *bdev, + struct ttm_buffer_object *bo, + unsigned long size, + enum ttm_bo_type type, + struct ttm_placement *placement, + uint32_t page_alignment, + bool interruptible, + struct file *persistent_swap_storage, + size_t acc_size, + struct sg_table *sg, + struct reservation_object *resv, + void (*destroy) (struct ttm_buffer_object *)) +{ + bool locked; + int ret; + + ret = ttm_bo_init_top(bdev, bo, size, type, page_alignment, + persistent_swap_storage, acc_size, sg, resv, + destroy); + if (ret) { + ttm_bo_unref(&bo); + return ret; + } + /* passed reservation objects should already be locked, * since otherwise lockdep will be angered in radeon. */ diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index f195899..6e8eaee 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -453,6 +453,50 @@ size_t ttm_bo_dma_acc_size(struct ttm_bo_device *bdev, unsigned struct_size);
/** + * ttm_bo_init_top + * + * @bdev: Pointer to a ttm_bo_device struct. + * @bo: Pointer to a ttm_buffer_object to be initialized. + * @size: Requested size of buffer object. + * @type: Requested type of buffer object. + * @flags: Initial placement flags. + * @page_alignment: Data alignment in pages. + * @persistent_swap_storage: Usually the swap storage is deleted for buffers + * pinned in physical memory. If this behaviour is not desired, this member + * holds a pointer to a persistent shmem object. Typically, this would + * point to the shmem object backing a GEM object if TTM is used to back a + * GEM user interface. + * @acc_size: Accounted size for this object. + * @resv: Pointer to a reservation_object, or NULL to let ttm allocate one. + * @destroy: Destroy function. Use NULL for kfree(). + * + * This function initializes a pre-allocated struct ttm_buffer_object. + * As this object may be part of a larger structure, this function, + * together with the @destroy function, + * enables driver-specific objects derived from a ttm_buffer_object. + * + * Unlike ttm_bo_init, @bo is not validated, and when an error is returned, + * the caller is responsible for freeing @bo (using ttm_bo_unref). + * + * On successful return, the object kref and list_kref are set to 1. + * + * Returns + * -ENOMEM: Out of memory. + * -EINVAL: Invalid buffer size. + */ + +extern int ttm_bo_init_top(struct ttm_bo_device *bdev, + struct ttm_buffer_object *bo, + unsigned long size, + enum ttm_bo_type type, + uint32_t page_alignment, + struct file *persistent_swap_storage, + size_t acc_size, + struct sg_table *sg, + struct reservation_object *resv, + void (*destroy) (struct ttm_buffer_object *)); + +/** * ttm_bo_init * * @bdev: Pointer to a ttm_bo_device struct.
From: Nicolai Hähnle nicolai.haehnle@amd.com
As the comment says: callers of ttm_bo_init cannot rely on having the only reference to the BO when the function returns successfully.
Signed-off-by: Nicolai Hähnle nicolai.haehnle@amd.com --- include/drm/ttm/ttm_bo_api.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 6e8eaee..5add713 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -520,7 +520,11 @@ extern int ttm_bo_init_top(struct ttm_bo_device *bdev, * As this object may be part of a larger structure, this function, * together with the @destroy function, * enables driver-specific objects derived from a ttm_buffer_object. - * On successful return, the object kref and list_kref are set to 1. + * + * On successful return, the caller owns an object kref to @bo. The kref and + * list_kref are usually set to 1, but note that in some situations, other + * tasks may already be holding references to @bo as well. + * * If a failure occurs, the function will call the @destroy function, or * kfree() if @destroy is NULL. Thus, after a failure, dereferencing @bo is * illegal and will likely cause memory corruption.
From: Nicolai Hähnle nicolai.haehnle@amd.com
Open-code the initial ttm_bo_validate call, so that we can properly unlock the reservation lock when it fails. Also, properly destruct the reservation object when the first part of TTM BO initialization fails.
Actual deadlocks caused by the missing unlock should have been fixed by "drm/ttm: never add BO that failed to validate to the LRU list", superseding the flawed fix in commit 38fc4856ad98 ("drm/amdgpu: fix a potential deadlock in amdgpu_bo_create_restricted()").
This change fixes remaining recursive locking errors that can be seen with lock debugging enabled, and avoids the error of freeing a locked mutex.
v2: use ttm_bo_unref for error handling, and rebase on latest changes
Fixes: 12a852219583 ("drm/amdgpu: improve AMDGPU_GEM_CREATE_VRAM_CLEARED handling (v2)") Signed-off-by: Nicolai Hähnle nicolai.haehnle@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index c2e57f7..15944ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -404,15 +404,31 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, }
initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); - r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type, - &bo->placement, page_align, !kernel, NULL, - acc_size, sg, resv ? resv : &bo->tbo.ttm_resv, - &amdgpu_ttm_bo_destroy); + r = ttm_bo_init_top(&adev->mman.bdev, &bo->tbo, size, type, + page_align, NULL, + acc_size, sg, resv ? resv : &bo->tbo.ttm_resv, + &amdgpu_ttm_bo_destroy); + + if (likely(r == 0)) + r = ttm_bo_validate(&bo->tbo, &bo->placement, !kernel, false); + + if (unlikely(r != 0)) { + struct ttm_buffer_object *tbo = &bo->tbo; + + if (!resv) + ww_mutex_unlock(&bo->tbo.ttm_resv.lock); + ttm_bo_unref(&tbo); + return r; + } + amdgpu_cs_report_moved_bytes(adev, atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved);
- if (unlikely(r != 0)) - return r; + if (!(bo->tbo.mem.placement & TTM_PL_FLAG_NO_EVICT)) { + spin_lock(&bo->tbo.glob->lru_lock); + ttm_bo_add_to_lru(&bo->tbo); + spin_unlock(&bo->tbo.glob->lru_lock); + }
bo->tbo.priority = ilog2(bo->tbo.num_pages); if (kernel)
On 16/02/17 04:10 AM, Nicolai Hähnle wrote:
From: Nicolai Hähnle nicolai.haehnle@amd.com
Open-code the initial ttm_bo_validate call, so that we can properly unlock the reservation lock when it fails. Also, properly destruct the reservation object when the first part of TTM BO initialization fails.
Actual deadlocks caused by the missing unlock should have been fixed by "drm/ttm: never add BO that failed to validate to the LRU list", superseding the flawed fix in commit 38fc4856ad98 ("drm/amdgpu: fix a potential deadlock in amdgpu_bo_create_restricted()").
This change fixes remaining recursive locking errors that can be seen with lock debugging enabled, and avoids the error of freeing a locked mutex.
v2: use ttm_bo_unref for error handling, and rebase on latest changes
Fixes: 12a852219583 ("drm/amdgpu: improve AMDGPU_GEM_CREATE_VRAM_CLEARED handling (v2)") Signed-off-by: Nicolai Hähnle nicolai.haehnle@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index c2e57f7..15944ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -404,15 +404,31 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, }
initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
- r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type,
&bo->placement, page_align, !kernel, NULL,
acc_size, sg, resv ? resv : &bo->tbo.ttm_resv,
&amdgpu_ttm_bo_destroy);
- r = ttm_bo_init_top(&adev->mman.bdev, &bo->tbo, size, type,
page_align, NULL,
acc_size, sg, resv ? resv : &bo->tbo.ttm_resv,
&amdgpu_ttm_bo_destroy);
- if (likely(r == 0))
r = ttm_bo_validate(&bo->tbo, &bo->placement, !kernel, false);
- if (unlikely(r != 0)) {
struct ttm_buffer_object *tbo = &bo->tbo;
if (!resv)
ww_mutex_unlock(&bo->tbo.ttm_resv.lock);
ttm_bo_unref(&tbo);
return r;
- }
I think this would be clearer as
if (unlikely(r != 0)) { struct ttm_buffer_object *tbo = &bo->tbo;
if (!resv) ww_mutex_unlock(&bo->tbo.ttm_resv.lock); ttm_bo_unref(&tbo); return r; }
r = ttm_bo_validate(&bo->tbo, &bo->placement, !kernel, false); [...]
amdgpu_cs_report_moved_bytes(adev, atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved);
- if (unlikely(r != 0))
return r;
- if (!(bo->tbo.mem.placement & TTM_PL_FLAG_NO_EVICT)) {
spin_lock(&bo->tbo.glob->lru_lock);
ttm_bo_add_to_lru(&bo->tbo);
spin_unlock(&bo->tbo.glob->lru_lock);
- }
It's a bit ugly to open-code this logic in driver code. Maybe add another TTM helper which calls ttm_bo_validate and ttm_bo_add_to_lru?
On 16.02.2017 02:00, Michel Dänzer wrote:
On 16/02/17 04:10 AM, Nicolai Hähnle wrote:
From: Nicolai Hähnle nicolai.haehnle@amd.com
Open-code the initial ttm_bo_validate call, so that we can properly unlock the reservation lock when it fails. Also, properly destruct the reservation object when the first part of TTM BO initialization fails.
Actual deadlocks caused by the missing unlock should have been fixed by "drm/ttm: never add BO that failed to validate to the LRU list", superseding the flawed fix in commit 38fc4856ad98 ("drm/amdgpu: fix a potential deadlock in amdgpu_bo_create_restricted()").
This change fixes remaining recursive locking errors that can be seen with lock debugging enabled, and avoids the error of freeing a locked mutex.
v2: use ttm_bo_unref for error handling, and rebase on latest changes
Fixes: 12a852219583 ("drm/amdgpu: improve AMDGPU_GEM_CREATE_VRAM_CLEARED handling (v2)") Signed-off-by: Nicolai Hähnle nicolai.haehnle@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index c2e57f7..15944ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -404,15 +404,31 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, }
initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
- r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type,
&bo->placement, page_align, !kernel, NULL,
acc_size, sg, resv ? resv : &bo->tbo.ttm_resv,
&amdgpu_ttm_bo_destroy);
- r = ttm_bo_init_top(&adev->mman.bdev, &bo->tbo, size, type,
page_align, NULL,
acc_size, sg, resv ? resv : &bo->tbo.ttm_resv,
&amdgpu_ttm_bo_destroy);
- if (likely(r == 0))
r = ttm_bo_validate(&bo->tbo, &bo->placement, !kernel, false);
- if (unlikely(r != 0)) {
struct ttm_buffer_object *tbo = &bo->tbo;
if (!resv)
ww_mutex_unlock(&bo->tbo.ttm_resv.lock);
ttm_bo_unref(&tbo);
return r;
- }
I think this would be clearer as
if (unlikely(r != 0)) { struct ttm_buffer_object *tbo = &bo->tbo;
if (!resv) ww_mutex_unlock(&bo->tbo.ttm_resv.lock); ttm_bo_unref(&tbo); return r;
}
r = ttm_bo_validate(&bo->tbo, &bo->placement, !kernel, false); [...]
amdgpu_cs_report_moved_bytes(adev, atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved);
- if (unlikely(r != 0))
return r;
- if (!(bo->tbo.mem.placement & TTM_PL_FLAG_NO_EVICT)) {
spin_lock(&bo->tbo.glob->lru_lock);
ttm_bo_add_to_lru(&bo->tbo);
spin_unlock(&bo->tbo.glob->lru_lock);
- }
It's a bit ugly to open-code this logic in driver code. Maybe add another TTM helper which calls ttm_bo_validate and ttm_bo_add_to_lru?
On further reflection, maybe a better approach would be to have a function ttm_bo_init_reserved, which returns with the reservation lock held upon success. We can then just change amdgpu_bo_create_restricted to ttm_bo_unreserve instead of ww_mutex_unlock, which does the add-to-LRU.
AFAICT, it doesn't make much sense to add the buffer to the LRU that early anyway, if amdgpu_fill_buffer is used.
Cheers, Nicolai
dri-devel@lists.freedesktop.org