On 11/28/2012 12:25 PM, Maarten Lankhorst wrote:
By removing the unlocking of lru and retaking it immediately, a race is removed where the bo is taken off the swap list or the lru list between the unlock and relock. As such the cleanup_refs code can be simplified, it will attempt to call ttm_bo_wait non-blockingly, and if it fails it will drop the locks and perform a blocking wait, or return an error if no_wait_gpu was set.
The need for looping is also eliminated, since swapout and evict_mem_first will always follow the destruction path, so no new fence is allowed to be attached. As far as I can see this may already have been the case, but the unlocking / relocking required a complicated loop to deal with re-reservation.
The downside is that ttm_bo_cleanup_memtype_use is no longer called with reservation held, so drivers must be aware that move_notify with a null parameter doesn't require a reservation.
Why can't we unreserve *after* ttm_bo_cleanup_memtype_use? That's not immediately clear from this patch.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com
drivers/gpu/drm/ttm/ttm_bo.c | 112 +++++++++++++++++++++++-------------------- 1 file changed, 60 insertions(+), 52 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 202fc20..02b275b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -486,14 +486,6 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo) bo->ttm = NULL; } ttm_bo_mem_put(bo, &bo->mem);
atomic_set(&bo->reserved, 0);
/*
* Make processes trying to reserve really pick it up.
*/
smp_mb__after_atomic_dec();
wake_up_all(&bo->event_queue); }
static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
@@ -515,6 +507,9 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) put_count = ttm_bo_del_from_lru(bo);
spin_unlock(&glob->lru_lock);
atomic_set(&bo->reserved, 0);
wake_up_all(&bo->event_queue);
I think (although I'm not 100% sure) that if we use atomic_set() to unreserve, and it's not followed by a spin_unlock(), we need to insert a memory barrier, like is done above in the removed code, otherwise memory operations protected by reserve may be reordered until after reservation.
ttm_bo_cleanup_memtype_use(bo); ttm_bo_list_ref_sub(bo, put_count, true);
@@ -543,68 +538,72 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) }
/**
- function ttm_bo_cleanup_refs
- function ttm_bo_cleanup_refs_and_unlock
- If bo idle, remove from delayed- and lru lists, and unref.
- If not idle, do nothing.
- Must be called with lru_lock and reservation held, this function
- will drop both before returning.
- @interruptible Any sleeps should occur interruptibly.
*/
- @no_wait_reserve Never wait for reserve. Return -EBUSY instead.
- @no_wait_gpu Never wait for gpu. Return -EBUSY instead.
-static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
bool interruptible,
bool no_wait_reserve,
bool no_wait_gpu)
+static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
bool interruptible,
{ struct ttm_bo_device *bdev = bo->bdev;bool no_wait_gpu)
- struct ttm_bo_driver *driver = bdev->driver; struct ttm_bo_global *glob = bo->glob; int put_count; int ret = 0;
-retry: spin_lock(&bdev->fence_lock);
- ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
- spin_unlock(&bdev->fence_lock);
- ret = ttm_bo_wait(bo, false, false, true);
- if (unlikely(ret != 0))
- if (ret && no_wait_gpu) {
spin_unlock(&bdev->fence_lock);
atomic_set(&bo->reserved, 0);
wake_up_all(&bo->event_queue);
return ret;spin_unlock(&glob->lru_lock);
- } else if (ret) {
void *sync_obj;
-retry_reserve:
- spin_lock(&glob->lru_lock);
- if (unlikely(list_empty(&bo->ddestroy))) {
/**
* Take a reference to the fence and unreserve,
* at this point the buffer should be dead, so
* no new sync objects can be attached.
*/
sync_obj = driver->sync_obj_ref(&bo->sync_obj);
spin_unlock(&bdev->fence_lock);
atomic_set(&bo->reserved, 0);
spin_unlock(&glob->lru_lock);wake_up_all(&bo->event_queue);
return 0;
}
ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
if (unlikely(ret == -EBUSY)) {
spin_unlock(&glob->lru_lock);
if (likely(!no_wait_reserve))
ret = ttm_bo_wait_unreserved(bo, interruptible);
if (unlikely(ret != 0))
ret = driver->sync_obj_wait(sync_obj, false, interruptible);
driver->sync_obj_unref(&sync_obj);
if (ret) return ret;
goto retry_reserve;
- }
- BUG_ON(ret != 0);
/* remove sync_obj with ttm_bo_wait */
spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, false, true);
spin_unlock(&bdev->fence_lock);
- /**
* We can re-check for sync object without taking
* the bo::lock since setting the sync object requires
* also bo::reserved. A busy object at this point may
* be caused by another thread recently starting an accelerated
* eviction.
*/
WARN_ON(ret);
- if (unlikely(bo->sync_obj)) {
spin_lock(&glob->lru_lock);
- } else {
atomic_set(&bo->reserved, 0); wake_up_all(&bo->event_queue);spin_unlock(&bdev->fence_lock);
- }
- if (unlikely(list_empty(&bo->ddestroy))) { spin_unlock(&glob->lru_lock);
goto retry;
return 0;
}
put_count = ttm_bo_del_from_lru(bo);
@@ -647,9 +646,13 @@ static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all) kref_get(&nentry->list_kref); }
spin_unlock(&glob->lru_lock);
ret = ttm_bo_cleanup_refs(entry, false, !remove_all,
!remove_all);
ret = ttm_bo_reserve_locked(entry, false, !remove_all, false, 0);
if (!ret)
ret = ttm_bo_cleanup_refs_and_unlock(entry, false,
!remove_all);
else
spin_unlock(&glob->lru_lock);
- kref_put(&entry->list_kref, ttm_bo_release_list); entry = nentry;
@@ -803,9 +806,13 @@ retry: kref_get(&bo->list_kref);
if (!list_empty(&bo->ddestroy)) {
spin_unlock(&glob->lru_lock);
ret = ttm_bo_cleanup_refs(bo, interruptible,
no_wait_reserve, no_wait_gpu);
ret = ttm_bo_reserve_locked(bo, interruptible, no_wait_reserve, false, 0);
if (!ret)
ret = ttm_bo_cleanup_refs_and_unlock(bo, interruptible,
no_wait_gpu);
else
spin_unlock(&glob->lru_lock);
kref_put(&bo->list_kref, ttm_bo_release_list);
return ret;
@@ -1799,8 +1806,9 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) kref_get(&bo->list_kref);
if (!list_empty(&bo->ddestroy)) {
spin_unlock(&glob->lru_lock);
(void) ttm_bo_cleanup_refs(bo, false, false, false);
ttm_bo_reserve_locked(bo, false, false, false, 0);
ttm_bo_cleanup_refs_and_unlock(bo, false, false);
kref_put(&bo->list_kref, ttm_bo_release_list); spin_lock(&glob->lru_lock); continue;