These patches attempt to optimize the delayed destroy handling. The first patch removes the need for the newly introduced mem_put_locked callback. The second patch optimizes the delayed buffer destruction somewhat, but has not yet seen extensive testing.
Release the lru spinlock early.
Signed-off-by: Thomas Hellstrom thellstrom@vmware.com --- drivers/gpu/drm/ttm/ttm_bo.c | 30 +++++------------------------- drivers/gpu/drm/ttm/ttm_bo_manager.c | 10 ---------- include/drm/ttm/ttm_bo_driver.h | 2 -- 3 files changed, 5 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index fd8344d..7497b79 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -439,36 +439,25 @@ out_err: }
/** - * Call bo::reserved and with the lru lock held. + * Call bo::reserved. * Will release GPU memory type usage on destruction. - * This is the place to put in driver specific hooks. - * Will release the bo::reserved lock and the - * lru lock on exit. + * This is the place to put in driver specific hooks to release + * driver private resources. + * Will release the bo::reserved lock. */
static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo) { - struct ttm_bo_global *glob = bo->glob; - if (bo->ttm) { - - /** - * Release the lru_lock, since we don't want to have - * an atomic requirement on ttm_tt[unbind|destroy]. - */ - - spin_unlock(&glob->lru_lock); ttm_tt_unbind(bo->ttm); ttm_tt_destroy(bo->ttm); bo->ttm = NULL; - spin_lock(&glob->lru_lock); }
- ttm_bo_mem_put_locked(bo, &bo->mem); + ttm_bo_mem_put(bo, &bo->mem);
atomic_set(&bo->reserved, 0); wake_up_all(&bo->event_queue); - spin_unlock(&glob->lru_lock); }
@@ -788,15 +777,6 @@ void ttm_bo_mem_put(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem) } EXPORT_SYMBOL(ttm_bo_mem_put);
-void ttm_bo_mem_put_locked(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem) -{ - struct ttm_mem_type_manager *man = &bo->bdev->man[mem->mem_type]; - - if (mem->mm_node) - (*man->func->put_node_locked)(man, mem); -} -EXPORT_SYMBOL(ttm_bo_mem_put_locked); - /** * Repeatedly evict memory from the LRU for @mem_type until we create enough * space, or we've evicted everything and there isn't enough space. diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c index 35c97b2..7410c19 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_manager.c +++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c @@ -90,15 +90,6 @@ static void ttm_bo_man_put_node(struct ttm_mem_type_manager *man, } }
-static void ttm_bo_man_put_node_locked(struct ttm_mem_type_manager *man, - struct ttm_mem_reg *mem) -{ - if (mem->mm_node) { - drm_mm_put_block(mem->mm_node); - mem->mm_node = NULL; - } -} - static int ttm_bo_man_init(struct ttm_mem_type_manager *man, unsigned long p_size) { @@ -152,7 +143,6 @@ const struct ttm_mem_type_manager_func ttm_bo_manager_func = { ttm_bo_man_takedown, ttm_bo_man_get_node, ttm_bo_man_put_node, - ttm_bo_man_put_node_locked, ttm_bo_man_debug }; EXPORT_SYMBOL(ttm_bo_manager_func); diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index d0ff529..d01b4dd 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -214,8 +214,6 @@ struct ttm_mem_type_manager_func { struct ttm_mem_reg *mem); void (*put_node)(struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem); - void (*put_node_locked)(struct ttm_mem_type_manager *man, - struct ttm_mem_reg *mem); void (*debug)(struct ttm_mem_type_manager *man, const char *prefix); };
This commit replaces the ttm_bo_cleanup_ref function with two new functions. One for the case where the bo is not yet on the delayed destroy list, and one for the case where the bo was on the delayed destroy list, at least at the time of call. This makes it possible to optimize the two cases somewhat.
It also enables the possibility to directly destroy buffers on the delayed delete list when they are about to be evicted or swapped out. Currently they were only evicted / swapped and destruction was left for the delayed buffer destruction thread.
Signed-off-by: Thomas Hellstrom thellstrom@vmware.com --- drivers/gpu/drm/ttm/ttm_bo.c | 171 ++++++++++++++++++++++++++---------------- 1 files changed, 107 insertions(+), 64 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 7497b79..2936629 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -460,99 +460,122 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo) wake_up_all(&bo->event_queue); }
- -/** - * If bo idle, remove from delayed- and lru lists, and unref. - * If not idle, and already on delayed list, do nothing. - * If not idle, and not on delayed list, put on delayed list, - * up the list_kref and schedule a delayed list check. - */ - -static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all) +static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; struct ttm_bo_global *glob = bo->glob; - struct ttm_bo_driver *driver = bdev->driver; + struct ttm_bo_driver *driver; + void *sync_obj; + void *sync_obj_arg; + int put_count; int ret;
spin_lock(&bo->lock); -retry: - (void) ttm_bo_wait(bo, false, false, !remove_all); - + (void) ttm_bo_wait(bo, false, false, true); if (!bo->sync_obj) { - int put_count; - - spin_unlock(&bo->lock);
spin_lock(&glob->lru_lock); - ret = ttm_bo_reserve_locked(bo, false, !remove_all, false, 0);
/** - * Someone else has the object reserved. Bail and retry. + * Lock inversion between bo::reserve and bo::lock here, + * but that's OK, since we're only trylocking. */
- if (unlikely(ret == -EBUSY)) { - spin_unlock(&glob->lru_lock); - spin_lock(&bo->lock); - goto requeue; - } - - /** - * We can re-check for sync object without taking - * the bo::lock since setting the sync object requires - * also bo::reserved. A busy object at this point may - * be caused by another thread starting an accelerated - * eviction. - */ + ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
- if (unlikely(bo->sync_obj)) { - atomic_set(&bo->reserved, 0); - wake_up_all(&bo->event_queue); - spin_unlock(&glob->lru_lock); - spin_lock(&bo->lock); - if (remove_all) - goto retry; - else - goto requeue; - } + if (unlikely(ret == -EBUSY)) + goto queue;
+ spin_unlock(&bo->lock); put_count = ttm_bo_del_from_lru(bo);
- if (!list_empty(&bo->ddestroy)) { - list_del_init(&bo->ddestroy); - ++put_count; - } + spin_unlock(&glob->lru_lock); ttm_bo_cleanup_memtype_use(bo);
while (put_count--) kref_put(&bo->list_kref, ttm_bo_ref_bug);
- return 0; + return; + } else { + spin_lock(&glob->lru_lock); } -requeue: +queue: + sync_obj = bo->sync_obj; + sync_obj_arg = bo->sync_obj_arg; + driver = bdev->driver; + + kref_get(&bo->list_kref); + list_add_tail(&bo->ddestroy, &bdev->ddestroy); + spin_unlock(&glob->lru_lock); + spin_unlock(&bo->lock); + + if (sync_obj) + driver->sync_obj_flush(sync_obj, sync_obj_arg); + schedule_delayed_work(&bdev->wq, + ((HZ / 100) < 1) ? 1 : HZ / 100); +} + +/** + * function ttm_bo_cleanup_refs + * If bo idle, remove from delayed- and lru lists, and unref. + * If not idle, do nothing. + * + * @interruptible Any sleeps should occur interruptibly. + * @no_wait_reserve Never wait for reserve. Return -EBUSY instead. + * @no_wait_gpu Never wait for gpu. Return -EBUSY instead. + */ + +static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, + bool interruptible, + bool no_wait_reserve, + bool no_wait_gpu) +{ + struct ttm_bo_global *glob = bo->glob; + int put_count; + int ret = 0; + +retry: + spin_lock(&bo->lock); + ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); + spin_unlock(&bo->lock); + + if (unlikely(ret != 0)) + return ret; + spin_lock(&glob->lru_lock); - if (list_empty(&bo->ddestroy)) { - void *sync_obj = bo->sync_obj; - void *sync_obj_arg = bo->sync_obj_arg; + ret = ttm_bo_reserve_locked(bo, interruptible, + no_wait_reserve, false, 0);
- kref_get(&bo->list_kref); - list_add_tail(&bo->ddestroy, &bdev->ddestroy); + if (unlikely(ret != 0) || list_empty(&bo->ddestroy)) { spin_unlock(&glob->lru_lock); - spin_unlock(&bo->lock); + return ret; + }
- if (sync_obj) - driver->sync_obj_flush(sync_obj, sync_obj_arg); - schedule_delayed_work(&bdev->wq, - ((HZ / 100) < 1) ? 1 : HZ / 100); - ret = 0; + /** + * We can re-check for sync object without taking + * the bo::lock since setting the sync object requires + * also bo::reserved. A busy object at this point may + * be caused by another thread recently starting an accelerated + * eviction. + */
- } else { + if (unlikely(bo->sync_obj)) { + atomic_set(&bo->reserved, 0); + wake_up_all(&bo->event_queue); spin_unlock(&glob->lru_lock); - spin_unlock(&bo->lock); - ret = -EBUSY; + goto retry; }
- return ret; + put_count = ttm_bo_del_from_lru(bo); + list_del_init(&bo->ddestroy); + ++put_count; + + ttm_bo_cleanup_memtype_use(bo); + + while (put_count--) + kref_put(&bo->list_kref, ttm_bo_ref_bug); + + return 0; }
/** @@ -584,7 +607,8 @@ static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all) }
spin_unlock(&glob->lru_lock); - ret = ttm_bo_cleanup_refs(entry, remove_all); + ret = ttm_bo_cleanup_refs(entry, false, !remove_all, + !remove_all); kref_put(&entry->list_kref, ttm_bo_release_list); entry = nentry;
@@ -627,7 +651,7 @@ static void ttm_bo_release(struct kref *kref) bo->vm_node = NULL; } write_unlock(&bdev->vm_lock); - ttm_bo_cleanup_refs(bo, false); + ttm_bo_cleanup_refs_or_queue(bo); kref_put(&bo->list_kref, ttm_bo_release_list); write_lock(&bdev->vm_lock); } @@ -735,6 +759,18 @@ retry: bo = list_first_entry(&man->lru, struct ttm_buffer_object, lru); kref_get(&bo->list_kref);
+ if (!list_empty(&bo->ddestroy)) { + spin_unlock(&glob->lru_lock); + ret = ttm_bo_cleanup_refs(bo, interruptible, + no_wait_reserve, no_wait_gpu); + kref_put(&bo->list_kref, ttm_bo_release_list); + + if (likely(ret == 0 || ret == -ERESTARTSYS)) + return ret; + + goto retry; + } + ret = ttm_bo_reserve_locked(bo, false, no_wait_reserve, false, 0);
if (unlikely(ret == -EBUSY)) { @@ -1758,6 +1794,13 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) struct ttm_buffer_object, swap); kref_get(&bo->list_kref);
+ if (!list_empty(&bo->ddestroy)) { + spin_unlock(&glob->lru_lock); + (void) ttm_bo_cleanup_refs(bo, false, false, false); + kref_put(&bo->list_kref, ttm_bo_release_list); + continue; + } + /** * Reserve buffer. Since we unlock while sleeping, we need * to re-check that nobody removed us from the swap-list while
On Thu, Oct 14, 2010 at 3:18 AM, Thomas Hellstrom thellstrom@vmware.com wrote:
This commit replaces the ttm_bo_cleanup_ref function with two new functions. One for the case where the bo is not yet on the delayed destroy list, and one for the case where the bo was on the delayed destroy list, at least at the time of call. This makes it possible to optimize the two cases somewhat.
I tried booting this today, but on radeon starting X hit a recursive spin lock on the lru lock.
(annotated below)
- @interruptible Any sleeps should occur interruptibly.
- @no_wait_reserve Never wait for reserve. Return -EBUSY instead.
- @no_wait_gpu Never wait for gpu. Return -EBUSY instead.
- */
+static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
- bool interruptible,
- bool no_wait_reserve,
- bool no_wait_gpu)
+{
- struct ttm_bo_global *glob = bo->glob;
- int put_count;
- int ret = 0;
+retry:
- spin_lock(&bo->lock);
- ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
- spin_unlock(&bo->lock);
- if (unlikely(ret != 0))
- return ret;
spin_lock(&glob->lru_lock);
^^ take LRU lock
- if (list_empty(&bo->ddestroy)) {
- void *sync_obj = bo->sync_obj;
- void *sync_obj_arg = bo->sync_obj_arg;
- ret = ttm_bo_reserve_locked(bo, interruptible,
- no_wait_reserve, false, 0);
- kref_get(&bo->list_kref);
- list_add_tail(&bo->ddestroy, &bdev->ddestroy);
- if (unlikely(ret != 0) || list_empty(&bo->ddestroy)) {
spin_unlock(&glob->lru_lock);
^^ drop in error path.
- spin_unlock(&bo->lock);
- return ret;
- }
- if (sync_obj)
- driver->sync_obj_flush(sync_obj, sync_obj_arg);
- schedule_delayed_work(&bdev->wq,
- ((HZ / 100) < 1) ? 1 : HZ / 100);
- ret = 0;
- /**
- * We can re-check for sync object without taking
- * the bo::lock since setting the sync object requires
- * also bo::reserved. A busy object at this point may
- * be caused by another thread recently starting an accelerated
- * eviction.
- */
- } else {
- if (unlikely(bo->sync_obj)) {
- atomic_set(&bo->reserved, 0);
- wake_up_all(&bo->event_queue);
spin_unlock(&glob->lru_lock);
^^ drop for retry path.
- spin_unlock(&bo->lock);
- ret = -EBUSY;
- goto retry;
}
- return ret;
- put_count = ttm_bo_del_from_lru(bo);
- list_del_init(&bo->ddestroy);
- ++put_count;
- ttm_bo_cleanup_memtype_use(bo);
^^ get into cleanup_memtype_use, which calls the ttm_bo_mem_put which tries to take the lru lock inside ttm_bo_man_put_node
Dave.
On 10/19/2010 05:57 AM, Dave Airlie wrote:
On Thu, Oct 14, 2010 at 3:18 AM, Thomas Hellstromthellstrom@vmware.com wrote:
This commit replaces the ttm_bo_cleanup_ref function with two new functions. One for the case where the bo is not yet on the delayed destroy list, and one for the case where the bo was on the delayed destroy list, at least at the time of call. This makes it possible to optimize the two cases somewhat.
I tried booting this today, but on radeon starting X hit a recursive spin lock on the lru lock.
(annotated below)
New patches sent.
/Thomas
On Thu, Oct 14, 2010 at 3:17 AM, Thomas Hellstrom thellstrom@vmware.com wrote:
Release the lru spinlock early.
Actually I think its this patch that introduces the recursion issue, since we still seem to hold the lock on entry into cleanup_memtype_use and not drop it at all.
Dave.
Signed-off-by: Thomas Hellstrom thellstrom@vmware.com
drivers/gpu/drm/ttm/ttm_bo.c | 30 +++++------------------------- drivers/gpu/drm/ttm/ttm_bo_manager.c | 10 ---------- include/drm/ttm/ttm_bo_driver.h | 2 -- 3 files changed, 5 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index fd8344d..7497b79 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -439,36 +439,25 @@ out_err: }
/**
- Call bo::reserved and with the lru lock held.
- Call bo::reserved.
* Will release GPU memory type usage on destruction.
- This is the place to put in driver specific hooks.
- Will release the bo::reserved lock and the
- lru lock on exit.
- This is the place to put in driver specific hooks to release
- driver private resources.
- Will release the bo::reserved lock.
*/
static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo) {
- struct ttm_bo_global *glob = bo->glob;
if (bo->ttm) {
- /**
- * Release the lru_lock, since we don't want to have
- * an atomic requirement on ttm_tt[unbind|destroy].
- */
- spin_unlock(&glob->lru_lock);
ttm_tt_unbind(bo->ttm); ttm_tt_destroy(bo->ttm); bo->ttm = NULL;
- spin_lock(&glob->lru_lock);
}
- ttm_bo_mem_put_locked(bo, &bo->mem);
- ttm_bo_mem_put(bo, &bo->mem);
atomic_set(&bo->reserved, 0); wake_up_all(&bo->event_queue);
- spin_unlock(&glob->lru_lock);
}
@@ -788,15 +777,6 @@ void ttm_bo_mem_put(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem) } EXPORT_SYMBOL(ttm_bo_mem_put);
-void ttm_bo_mem_put_locked(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem) -{
- struct ttm_mem_type_manager *man = &bo->bdev->man[mem->mem_type];
- if (mem->mm_node)
- (*man->func->put_node_locked)(man, mem);
-} -EXPORT_SYMBOL(ttm_bo_mem_put_locked);
/** * Repeatedly evict memory from the LRU for @mem_type until we create enough * space, or we've evicted everything and there isn't enough space. diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c index 35c97b2..7410c19 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_manager.c +++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c @@ -90,15 +90,6 @@ static void ttm_bo_man_put_node(struct ttm_mem_type_manager *man, } }
-static void ttm_bo_man_put_node_locked(struct ttm_mem_type_manager *man,
- struct ttm_mem_reg *mem)
-{
- if (mem->mm_node) {
- drm_mm_put_block(mem->mm_node);
- mem->mm_node = NULL;
- }
-}
static int ttm_bo_man_init(struct ttm_mem_type_manager *man, unsigned long p_size) { @@ -152,7 +143,6 @@ const struct ttm_mem_type_manager_func ttm_bo_manager_func = { ttm_bo_man_takedown, ttm_bo_man_get_node, ttm_bo_man_put_node,
- ttm_bo_man_put_node_locked,
ttm_bo_man_debug }; EXPORT_SYMBOL(ttm_bo_manager_func); diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index d0ff529..d01b4dd 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -214,8 +214,6 @@ struct ttm_mem_type_manager_func { struct ttm_mem_reg *mem); void (*put_node)(struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem);
- void (*put_node_locked)(struct ttm_mem_type_manager *man,
- struct ttm_mem_reg *mem);
void (*debug)(struct ttm_mem_type_manager *man, const char *prefix); };
-- 1.6.2.5
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
dri-devel@lists.freedesktop.org