Hi,
This patch looks mostly good, although I think ttm_bo_cleanup_refs becomes overly complicated: Could this do, or am I missing something?
static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool interruptible, bool no_wait_reserve, bool no_wait_gpu) { struct ttm_bo_device *bdev = bo->bdev; struct ttm_bo_global *glob = bo->glob; int put_count; int ret = 0;
/* * First, reserve while making sure we're still on the * ddestroy list. */ retry_reserve: spin_lock(&glob->lru_lock);
if (unlikely(list_empty(&bo->ddestroy))) { spin_unlock(&glob->lru_lock); return 0; }
ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
if (unlikely(ret == -EBUSY)) { spin_unlock(&glob->lru_lock); if (likely(!no_wait_reserve)) ret = ttm_bo_wait_unreserved(bo, interruptible); if (unlikely(ret != 0)) return ret;
goto retry_reserve; }
BUG_ON(ret != 0);
spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); spin_unlock(&bdev->fence_lock);
if (unlikely(ret != 0)) { atomic_set(&bo->reserved, 0); wake_up_all(&bo->event_queue); spin_unlock(&glob->lru_lock); return ret; }
put_count = ttm_bo_del_from_lru(bo); list_del_init(&bo->ddestroy); ++put_count;
spin_unlock(&glob->lru_lock); ttm_bo_cleanup_memtype_use(bo);
atomic_set(&bo_reserved, 0); wake_up_all(&bo->event_queue); ttm_bo_list_ref_sub(bo, put_count, true);
return 0; }
On 11/12/2012 03:00 PM, Maarten Lankhorst wrote:
I changed the hierarchy to make fence_lock the most inner lock, instead of outer lock. This will simplify things slightly, and hopefully makes it easier to make fence_lock global at one point should it be needed.
To make things clearer, I change the order around in ttm_bo_cleanup_refs and ttm_bo_cleanup_refs_or_queue.
A reservation is taken first, then fence lock is taken and a wait is attempted.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com
v2:
- fix conflict with upstream race fix, simplifies ttm_bo_cleanup_refs
v3:
- change removal of fence_lock to making it a inner lock instead
drivers/gpu/drm/ttm/ttm_bo.c | 95 ++++++++++++++++------------------ drivers/gpu/drm/ttm/ttm_execbuf_util.c | 4 +- 2 files changed, 48 insertions(+), 51 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index a3383a7..70285ff 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -478,28 +478,26 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; struct ttm_bo_global *glob = bo->glob;
- struct ttm_bo_driver *driver;
- struct ttm_bo_driver *driver = bdev->driver; void *sync_obj = NULL; int put_count; int ret;
- spin_lock(&bdev->fence_lock);
- (void) ttm_bo_wait(bo, false, false, true);
- if (!bo->sync_obj) {
spin_lock(&glob->lru_lock);
/**
* Lock inversion between bo:reserve and bdev::fence_lock here,
* but that's OK, since we're only trylocking.
*/
ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
- spin_lock(&glob->lru_lock);
- ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
- if (!ret) {
spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, false, true);
if (unlikely(ret == -EBUSY))
if (unlikely(ret == -EBUSY)) {
sync_obj = driver->sync_obj_ref(bo->sync_obj);
spin_unlock(&bdev->fence_lock);
atomic_set(&bo->reserved, 0);
wake_up_all(&bo->event_queue); goto queue;
}
spin_unlock(&bdev->fence_lock);
put_count = ttm_bo_del_from_lru(bo);
atomic_set(&bo->reserved, 0);
@@ -509,18 +507,11 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) ttm_bo_list_ref_sub(bo, put_count, true);
return;
} else {
spin_lock(&glob->lru_lock);
} queue:
driver = bdev->driver;
if (bo->sync_obj)
sync_obj = driver->sync_obj_ref(bo->sync_obj);
kref_get(&bo->list_kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy); spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->fence_lock);
if (sync_obj) { driver->sync_obj_flush(sync_obj);
@@ -546,54 +537,60 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool no_wait_gpu) { struct ttm_bo_device *bdev = bo->bdev;
struct ttm_bo_driver *driver = bdev->driver; struct ttm_bo_global *glob = bo->glob; int put_count; int ret = 0;
void *sync_obj;
retry:
- spin_lock(&bdev->fence_lock);
- ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
- spin_unlock(&bdev->fence_lock);
- spin_lock(&glob->lru_lock);
- if (unlikely(ret != 0))
return ret;
- ret = ttm_bo_reserve_locked(bo, interruptible,
no_wait_reserve, false, 0);
-retry_reserve:
- spin_lock(&glob->lru_lock);
if (unlikely(ret)) {
spin_unlock(&glob->lru_lock);
return ret;
}
if (unlikely(list_empty(&bo->ddestroy))) {
atomic_set(&bo->reserved, 0);
wake_up_all(&bo->event_queue);
spin_unlock(&glob->lru_lock); return 0; }
- ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
- if (unlikely(ret == -EBUSY)) {
spin_unlock(&glob->lru_lock);
if (likely(!no_wait_reserve))
ret = ttm_bo_wait_unreserved(bo, interruptible);
if (unlikely(ret != 0))
- spin_lock(&bdev->fence_lock);
- ret = ttm_bo_wait(bo, false, false, true);
- if (ret) {
if (no_wait_gpu) {
spin_unlock(&bdev->fence_lock);
atomic_set(&bo->reserved, 0);
wake_up_all(&bo->event_queue);
spin_unlock(&glob->lru_lock); return ret;
}
goto retry_reserve;
- }
- BUG_ON(ret != 0);
- /**
* We can re-check for sync object without taking
* the bo::lock since setting the sync object requires
* also bo::reserved. A busy object at this point may
* be caused by another thread recently starting an accelerated
* eviction.
*/
/**
* Take a reference to the fence and unreserve, if the wait
* was succesful and no new sync_obj was attached,
* ttm_bo_wait in retry will return ret = 0, and end the loop.
*/
- if (unlikely(bo->sync_obj)) {
sync_obj = driver->sync_obj_ref(&bo->sync_obj);
spin_unlock(&bdev->fence_lock);
atomic_set(&bo->reserved, 0); wake_up_all(&bo->event_queue); spin_unlock(&glob->lru_lock);
ret = driver->sync_obj_wait(bo->sync_obj, false, interruptible);
driver->sync_obj_unref(&sync_obj);
if (ret)
return ret;
goto retry; }
spin_unlock(&bdev->fence_lock);
put_count = ttm_bo_del_from_lru(bo); list_del_init(&bo->ddestroy);
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 1986d00..cd9e452 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -213,8 +213,8 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj) driver = bdev->driver; glob = bo->glob;
- spin_lock(&bdev->fence_lock); spin_lock(&glob->lru_lock);
spin_lock(&bdev->fence_lock);
list_for_each_entry(entry, list, head) { bo = entry->bo;
@@ -223,8 +223,8 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj) ttm_bo_unreserve_locked(bo); entry->reserved = false; }
- spin_unlock(&glob->lru_lock); spin_unlock(&bdev->fence_lock);
spin_unlock(&glob->lru_lock);
list_for_each_entry(entry, list, head) { if (entry->old_sync_obj)