This patch series improves on the performance and correctness of the reserving / fencing sequence.
Avoid the ttm_bo_unreserve() spinlocks by calling ttm_eu_backoff_reservation_locked under the lru spinlock.
Signed-off-by: Thomas Hellstrom thellstrom@vmware.com --- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 15 ++++++++------- 1 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index efb42a8..3f0d23c 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -99,15 +99,16 @@ static int ttm_eu_wait_unreserved_locked(struct list_head *list, void ttm_eu_backoff_reservation(struct list_head *list) { struct ttm_validate_buffer *entry; + struct ttm_bo_global *glob;
- list_for_each_entry(entry, list, head) { - struct ttm_buffer_object *bo = entry->bo; - if (!entry->reserved) - continue; + if (list_empty(list)) + return;
- entry->reserved = false; - ttm_bo_unreserve(bo); - } + entry = list_first_entry(list, struct ttm_validate_buffer, head); + glob = entry->bo->glob; + spin_lock(&glob->lru_lock); + ttm_eu_backoff_reservation_locked(list); + spin_unlock(&glob->lru_lock); } EXPORT_SYMBOL(ttm_eu_backoff_reservation);
Add an aid for the driver to detect deadlocks on multi-bo reservations Update documentation.
Signed-off-by: Thomas Hellstrom thellstrom@vmware.com --- drivers/gpu/drm/ttm/ttm_bo.c | 15 ++++++++++++--- include/drm/ttm/ttm_bo_driver.h | 25 ++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 9ef893d..5d87508 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -223,9 +223,18 @@ int ttm_bo_reserve_locked(struct ttm_buffer_object *bo, /** * Deadlock avoidance for multi-bo reserving. */ - if (use_sequence && bo->seq_valid && - (sequence - bo->val_seq < (1 << 31))) { - return -EAGAIN; + if (use_sequence && bo->seq_valid) { + /** + * We've already reserved this one. + */ + if (unlikely(sequence == bo->val_seq)) + return -EDEADLK; + /** + * Already reserved by a thread that will not back + * off for us. We need to back off. + */ + if (unlikely(sequence - bo->val_seq < (1 << 31))) + return -EAGAIN; }
if (no_wait) diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 95068e6..1e25a40 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -859,6 +859,9 @@ extern void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo); * try again. (only if use_sequence == 1). * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by * a signal. Release all buffer reservations and return to user-space. + * -EBUSY: The function needed to sleep, but @no_wait was true + * -EDEADLK: Bo already reserved using @sequence. This error code will only + * be returned if @use_sequence is set to true. */ extern int ttm_bo_reserve(struct ttm_buffer_object *bo, bool interruptible, @@ -868,11 +871,27 @@ extern int ttm_bo_reserve(struct ttm_buffer_object *bo, /** * ttm_bo_reserve_locked: * - * Similar to ttm_bo_reserve, but must be called with the glob::lru_lock - * spinlock held, and will not remove reserved buffers from the lru lists. + * @bo: A pointer to a struct ttm_buffer_object. + * @interruptible: Sleep interruptible if waiting. + * @no_wait: Don't sleep while trying to reserve, rather return -EBUSY. + * @use_sequence: If @bo is already reserved, Only sleep waiting for + * it to become unreserved if @sequence < (@bo)->sequence. + * + * Must be called with struct ttm_bo_global::lru_lock held, + * and will not remove reserved buffers from the lru lists. * The function may release the LRU spinlock if it needs to sleep. + * Otherwise identical to ttm_bo_reserve. + * + * Returns: + * -EAGAIN: The reservation may cause a deadlock. + * Release all buffer reservations, wait for @bo to become unreserved and + * try again. (only if use_sequence == 1). + * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by + * a signal. Release all buffer reservations and return to user-space. + * -EBUSY: The function needed to sleep, but @no_wait was true + * -EDEADLK: Bo already reserved using @sequence. This error code will only + * be returned if @use_sequence is set to true. */ - extern int ttm_bo_reserve_locked(struct ttm_buffer_object *bo, bool interruptible, bool no_wait, bool use_sequence,
The bo lock used only to protect the bo sync object members, and since it is a per bo lock, fencing a buffer list will see a lot of locks and unlocks. Replace it with a per-device lock that protects the sync object members on *all* bos. Reading and setting these members will always be very quick, so the risc of heavy lock contention is microscopic. Note that waiting for sync objects will always take place outside of this lock.
The bo device fence lock will eventually be replaced with a seqlock / rcu mechanism so we can determine that a bo is idle under a rcu / read seqlock.
However this change will allow us to batch fencing and unreserving of buffers with a minimal amount of locking.
Signed-off-by: Thomas Hellstrom thellstrom@vmware.com --- drivers/gpu/drm/nouveau/nouveau_gem.c | 12 +++--- drivers/gpu/drm/radeon/radeon_object.c | 4 +- drivers/gpu/drm/radeon/radeon_object.h | 4 +- drivers/gpu/drm/ttm/ttm_bo.c | 55 ++++++++++++++++--------------- drivers/gpu/drm/ttm/ttm_bo_util.c | 7 ++-- drivers/gpu/drm/ttm/ttm_bo_vm.c | 6 ++-- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 7 ++-- include/drm/ttm/ttm_bo_api.h | 6 +-- include/drm/ttm/ttm_bo_driver.h | 3 ++ 9 files changed, 53 insertions(+), 51 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 5c4c929..c7f117b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -228,10 +228,10 @@ validate_fini_list(struct list_head *list, struct nouveau_fence *fence) if (likely(fence)) { struct nouveau_fence *prev_fence;
- spin_lock(&nvbo->bo.lock); + spin_lock(&nvbo->bo.bdev->fence_lock); prev_fence = nvbo->bo.sync_obj; nvbo->bo.sync_obj = nouveau_fence_ref(fence); - spin_unlock(&nvbo->bo.lock); + spin_unlock(&nvbo->bo.bdev->fence_lock); nouveau_fence_unref((void *)&prev_fence); }
@@ -551,9 +551,9 @@ nouveau_gem_pushbuf_reloc_apply(struct drm_device *dev, data |= r->vor; }
- spin_lock(&nvbo->bo.lock); + spin_lock(&nvbo->bo.bdev->fence_lock); ret = ttm_bo_wait(&nvbo->bo, false, false, false); - spin_unlock(&nvbo->bo.lock); + spin_unlock(&nvbo->bo.bdev->fence_lock); if (ret) { NV_ERROR(dev, "reloc wait_idle failed: %d\n", ret); break; @@ -785,9 +785,9 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data, }
if (req->flags & NOUVEAU_GEM_CPU_PREP_NOBLOCK) { - spin_lock(&nvbo->bo.lock); + spin_lock(&nvbo->bo.bdev->fence_lock); ret = ttm_bo_wait(&nvbo->bo, false, false, no_wait); - spin_unlock(&nvbo->bo.lock); + spin_unlock(&nvbo->bo.bdev->fence_lock); } else { ret = ttm_bo_synccpu_write_grab(&nvbo->bo, no_wait); if (ret == 0) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 8eb1834..8af5ae4 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -368,11 +368,11 @@ void radeon_bo_list_fence(struct list_head *head, void *fence)
list_for_each_entry(lobj, head, list) { bo = lobj->bo; - spin_lock(&bo->tbo.lock); + spin_lock(&bo->tbo.bdev->fence_lock); old_fence = (struct radeon_fence *)bo->tbo.sync_obj; bo->tbo.sync_obj = radeon_fence_ref(fence); bo->tbo.sync_obj_arg = NULL; - spin_unlock(&bo->tbo.lock); + spin_unlock(&bo->tbo.bdev->fence_lock); if (old_fence) { radeon_fence_unref(&old_fence); } diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 3481bc7..7885d07 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -126,12 +126,12 @@ static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0); if (unlikely(r != 0)) return r; - spin_lock(&bo->tbo.lock); + spin_lock(&bo->tbo.bdev->fence_lock); if (mem_type) *mem_type = bo->tbo.mem.mem_type; if (bo->tbo.sync_obj) r = ttm_bo_wait(&bo->tbo, true, true, no_wait); - spin_unlock(&bo->tbo.lock); + spin_unlock(&bo->tbo.bdev->fence_lock); ttm_bo_unreserve(&bo->tbo); return r; } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 5d87508..d93c73b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -427,11 +427,9 @@ moved: }
if (bo->mem.mm_node) { - spin_lock(&bo->lock); bo->offset = (bo->mem.start << PAGE_SHIFT) + bdev->man[bo->mem.mem_type].gpu_offset; bo->cur_placement = bo->mem.placement; - spin_unlock(&bo->lock); } else bo->offset = 0;
@@ -485,14 +483,14 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) int put_count; int ret;
- spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); (void) ttm_bo_wait(bo, false, false, true); if (!bo->sync_obj) {
spin_lock(&glob->lru_lock);
/** - * Lock inversion between bo::reserve and bo::lock here, + * Lock inversion between bo:reserve and bdev::fence_lock here, * but that's OK, since we're only trylocking. */
@@ -501,7 +499,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) if (unlikely(ret == -EBUSY)) goto queue;
- spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); put_count = ttm_bo_del_from_lru(bo);
spin_unlock(&glob->lru_lock); @@ -522,7 +520,7 @@ queue: kref_get(&bo->list_kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy); spin_unlock(&glob->lru_lock); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock);
if (sync_obj) { driver->sync_obj_flush(sync_obj, sync_obj_arg); @@ -547,14 +545,15 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool no_wait_reserve, bool no_wait_gpu) { + struct ttm_bo_device *bdev = bo->bdev; struct ttm_bo_global *glob = bo->glob; int put_count; int ret = 0;
retry: - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock);
if (unlikely(ret != 0)) return ret; @@ -707,9 +706,9 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible, struct ttm_placement placement; int ret = 0;
- spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock);
if (unlikely(ret != 0)) { if (ret != -ERESTARTSYS) { @@ -1044,6 +1043,7 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo, { int ret = 0; struct ttm_mem_reg mem; + struct ttm_bo_device *bdev = bo->bdev;
BUG_ON(!atomic_read(&bo->reserved));
@@ -1052,9 +1052,9 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo, * Have the driver move function wait for idle when necessary, * instead of doing it here. */ - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (ret) return ret; mem.num_pages = bo->num_pages; @@ -1171,7 +1171,6 @@ int ttm_bo_init(struct ttm_bo_device *bdev, } bo->destroy = destroy;
- spin_lock_init(&bo->lock); kref_init(&bo->kref); kref_init(&bo->list_kref); atomic_set(&bo->cpu_writers, 0); @@ -1535,7 +1534,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev, bdev->dev_mapping = NULL; bdev->glob = glob; bdev->need_dma32 = need_dma32; - + spin_lock_init(&bdev->fence_lock); mutex_lock(&glob->device_list_mutex); list_add_tail(&bdev->device_list, &glob->device_list); mutex_unlock(&glob->device_list_mutex); @@ -1659,6 +1658,7 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, bool lazy, bool interruptible, bool no_wait) { struct ttm_bo_driver *driver = bo->bdev->driver; + struct ttm_bo_device *bdev = bo->bdev; void *sync_obj; void *sync_obj_arg; int ret = 0; @@ -1672,9 +1672,9 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, void *tmp_obj = bo->sync_obj; bo->sync_obj = NULL; clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); driver->sync_obj_unref(&tmp_obj); - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); continue; }
@@ -1683,29 +1683,29 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
sync_obj = driver->sync_obj_ref(bo->sync_obj); sync_obj_arg = bo->sync_obj_arg; - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); ret = driver->sync_obj_wait(sync_obj, sync_obj_arg, lazy, interruptible); if (unlikely(ret != 0)) { driver->sync_obj_unref(&sync_obj); - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); return ret; } - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); if (likely(bo->sync_obj == sync_obj && bo->sync_obj_arg == sync_obj_arg)) { void *tmp_obj = bo->sync_obj; bo->sync_obj = NULL; clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); driver->sync_obj_unref(&sync_obj); driver->sync_obj_unref(&tmp_obj); - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); } else { - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); driver->sync_obj_unref(&sync_obj); - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); } } return 0; @@ -1714,6 +1714,7 @@ EXPORT_SYMBOL(ttm_bo_wait);
int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait) { + struct ttm_bo_device *bdev = bo->bdev; int ret = 0;
/* @@ -1723,9 +1724,9 @@ int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait) ret = ttm_bo_reserve(bo, true, no_wait, false, 0); if (unlikely(ret != 0)) return ret; - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, true, no_wait); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (likely(ret == 0)) atomic_inc(&bo->cpu_writers); ttm_bo_unreserve(bo); @@ -1797,9 +1798,9 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) * Wait for GPU, then move to system cached. */
- spin_lock(&bo->lock); + spin_lock(&bo->bdev->fence_lock); ret = ttm_bo_wait(bo, false, false, false); - spin_unlock(&bo->lock); + spin_unlock(&bo->bdev->fence_lock);
if (unlikely(ret != 0)) goto out; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 3106d5b..4b75133 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -337,7 +337,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, * TODO: Explicit member copy would probably be better here. */
- spin_lock_init(&fbo->lock); init_waitqueue_head(&fbo->event_queue); INIT_LIST_HEAD(&fbo->ddestroy); INIT_LIST_HEAD(&fbo->lru); @@ -520,7 +519,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, struct ttm_buffer_object *ghost_obj; void *tmp_obj = NULL;
- spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); if (bo->sync_obj) { tmp_obj = bo->sync_obj; bo->sync_obj = NULL; @@ -529,7 +528,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, bo->sync_obj_arg = sync_obj_arg; if (evict) { ret = ttm_bo_wait(bo, false, false, false); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (tmp_obj) driver->sync_obj_unref(&tmp_obj); if (ret) @@ -552,7 +551,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, */
set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (tmp_obj) driver->sync_obj_unref(&tmp_obj);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index fe6cb77..8dd446c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -118,17 +118,17 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) * move. */
- spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) { ret = ttm_bo_wait(bo, false, true, false); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (unlikely(ret != 0)) { retval = (ret != -ERESTARTSYS) ? VM_FAULT_SIGBUS : VM_FAULT_NOPAGE; goto out_unlock; } } else - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock);
ret = ttm_mem_io_reserve(bdev, &bo->mem); diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 3f0d23c..a3ae712 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -198,14 +198,15 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj)
list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; - struct ttm_bo_driver *driver = bo->bdev->driver; + struct ttm_bo_device *bdev = bo->bdev; + struct ttm_bo_driver *driver = bdev->driver; void *old_sync_obj;
- spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); old_sync_obj = bo->sync_obj; bo->sync_obj = driver->sync_obj_ref(sync_obj); bo->sync_obj_arg = entry->new_sync_obj_arg; - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); ttm_bo_unreserve(bo); entry->reserved = false; if (old_sync_obj) diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index b0fc9c1..edacd48 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -154,7 +154,6 @@ struct ttm_tt; * keeps one refcount. When this refcount reaches zero, * the object is destroyed. * @event_queue: Queue for processes waiting on buffer object status change. - * @lock: spinlock protecting mostly synchronization members. * @mem: structure describing current placement. * @persistant_swap_storage: Usually the swap storage is deleted for buffers * pinned in physical memory. If this behaviour is not desired, this member @@ -213,7 +212,6 @@ struct ttm_buffer_object { struct kref kref; struct kref list_kref; wait_queue_head_t event_queue; - spinlock_t lock;
/** * Members protected by the bo::reserved lock. @@ -248,10 +246,10 @@ struct ttm_buffer_object { atomic_t reserved;
/** - * Members protected by the bo::lock + * Members protected by struct buffer_object_device::fence_lock * In addition, setting sync_obj to anything else * than NULL requires bo::reserved to be held. This allows for - * checking NULL while reserved but not holding bo::lock. + * checking NULL while reserved but not holding the mentioned lock. */
void *sync_obj_arg; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 1e25a40..ca8131e 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -510,6 +510,8 @@ struct ttm_bo_global { * * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver. * @man: An array of mem_type_managers. + * @fence_lock: Protects the synchronizing members on *all* bos belonging + * to this device. * @addr_space_mm: Range manager for the device address space. * lru_lock: Spinlock that protects the buffer+device lru lists and * ddestroy lists. @@ -531,6 +533,7 @@ struct ttm_bo_device { struct ttm_bo_driver *driver; rwlock_t vm_lock; struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES]; + spinlock_t fence_lock; /* * Protected by the vm lock. */
Drastically reduce the number of spin lock / unlock operations by performing unreserving and fencing under global locks.
Signed-off-by: Thomas Hellstrom thellstrom@vmware.com --- drivers/gpu/drm/ttm/ttm_bo.c | 11 +++++++-- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 36 ++++++++++++++++++++++--------- include/drm/ttm/ttm_bo_driver.h | 10 ++++++++ include/drm/ttm/ttm_execbuf_util.h | 2 + 4 files changed, 45 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d93c73b..551a5d3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -299,14 +299,19 @@ int ttm_bo_reserve(struct ttm_buffer_object *bo, return ret; }
+void ttm_bo_unreserve_locked(struct ttm_buffer_object *bo) +{ + ttm_bo_add_to_lru(bo); + atomic_set(&bo->reserved, 0); + wake_up_all(&bo->event_queue); +} + void ttm_bo_unreserve(struct ttm_buffer_object *bo) { struct ttm_bo_global *glob = bo->glob;
spin_lock(&glob->lru_lock); - ttm_bo_add_to_lru(bo); - atomic_set(&bo->reserved, 0); - wake_up_all(&bo->event_queue); + ttm_bo_unreserve_locked(bo); spin_unlock(&glob->lru_lock); } EXPORT_SYMBOL(ttm_bo_unreserve); diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index a3ae712..7646fb5 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -195,22 +195,36 @@ EXPORT_SYMBOL(ttm_eu_reserve_buffers); void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj) { struct ttm_validate_buffer *entry; + struct ttm_buffer_object *bo; + struct ttm_bo_global *glob; + struct ttm_bo_device *bdev; + struct ttm_bo_driver *driver;
- list_for_each_entry(entry, list, head) { - struct ttm_buffer_object *bo = entry->bo; - struct ttm_bo_device *bdev = bo->bdev; - struct ttm_bo_driver *driver = bdev->driver; - void *old_sync_obj; + if (list_empty(list)) + return; + + bo = list_first_entry(list, struct ttm_validate_buffer, head)->bo; + bdev = bo->bdev; + driver = bdev->driver; + glob = bo->glob;
- spin_lock(&bdev->fence_lock); - old_sync_obj = bo->sync_obj; + spin_lock(&bdev->fence_lock); + spin_lock(&glob->lru_lock); + + list_for_each_entry(entry, list, head) { + bo = entry->bo; + entry->old_sync_obj = bo->sync_obj; bo->sync_obj = driver->sync_obj_ref(sync_obj); bo->sync_obj_arg = entry->new_sync_obj_arg; - spin_unlock(&bdev->fence_lock); - ttm_bo_unreserve(bo); + ttm_bo_unreserve_locked(bo); entry->reserved = false; - if (old_sync_obj) - driver->sync_obj_unref(&old_sync_obj); + } + spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->fence_lock); + + list_for_each_entry(entry, list, head) { + if (entry->old_sync_obj) + driver->sync_obj_unref(&entry->old_sync_obj); } } EXPORT_SYMBOL(ttm_eu_fence_buffer_objects); diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index ca8131e..cfb9ca4 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -910,6 +910,16 @@ extern int ttm_bo_reserve_locked(struct ttm_buffer_object *bo, extern void ttm_bo_unreserve(struct ttm_buffer_object *bo);
/** + * ttm_bo_unreserve_locked + * + * @bo: A pointer to a struct ttm_buffer_object. + * + * Unreserve a previous reservation of @bo. + * Needs to be called with struct ttm_bo_global::lru_lock held. + */ +extern void ttm_bo_unreserve_locked(struct ttm_buffer_object *bo); + +/** * ttm_bo_wait_unreserved * * @bo: A pointer to a struct ttm_buffer_object. diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h index fd09b84..535ab00 100644 --- a/include/drm/ttm/ttm_execbuf_util.h +++ b/include/drm/ttm/ttm_execbuf_util.h @@ -44,6 +44,7 @@ * @reserved: Indicates whether @bo has been reserved for validation. * @removed: Indicates whether @bo has been removed from lru lists. * @put_count: Number of outstanding references on bo::list_kref. + * @old_sync_obj: Pointer to a sync object about to be unreferenced */
struct ttm_validate_buffer { @@ -53,6 +54,7 @@ struct ttm_validate_buffer { bool reserved; bool removed; int put_count; + void *old_sync_obj; };
/**
Rather than having the driver supply the validation sequence, leave that responsibility to TTM. This saves some confusion and a function argument.
Signed-off-by: Thomas Hellstrom thellstrom@vmware.com --- drivers/gpu/drm/ttm/ttm_bo.c | 1 + drivers/gpu/drm/ttm/ttm_execbuf_util.c | 5 ++++- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 1 - drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 3 +-- include/drm/ttm/ttm_bo_driver.h | 2 ++ include/drm/ttm/ttm_execbuf_util.h | 3 +-- 6 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 551a5d3..25e4c2a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1539,6 +1539,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev, bdev->dev_mapping = NULL; bdev->glob = glob; bdev->need_dma32 = need_dma32; + bdev->val_seq = 0; spin_lock_init(&bdev->fence_lock); mutex_lock(&glob->device_list_mutex); list_add_tail(&bdev->device_list, &glob->device_list); diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 7646fb5..9cb6aa2 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -124,11 +124,12 @@ EXPORT_SYMBOL(ttm_eu_backoff_reservation); * buffers in different orders. */
-int ttm_eu_reserve_buffers(struct list_head *list, uint32_t val_seq) +int ttm_eu_reserve_buffers(struct list_head *list) { struct ttm_bo_global *glob; struct ttm_validate_buffer *entry; int ret; + uint32_t val_seq;
if (list_empty(list)) return 0; @@ -138,6 +139,8 @@ int ttm_eu_reserve_buffers(struct list_head *list, uint32_t val_seq)
retry: spin_lock(&glob->lru_lock); + val_seq = entry->bo->bdev->val_seq++; + list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index e7a58d0..10fc01f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -264,7 +264,6 @@ struct vmw_private { */
struct vmw_sw_context ctx; - uint32_t val_seq; struct mutex cmdbuf_mutex;
/** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 76954e3..41b95ed 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -653,8 +653,7 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data, ret = vmw_cmd_check_all(dev_priv, sw_context, cmd, arg->command_size); if (unlikely(ret != 0)) goto out_err; - ret = ttm_eu_reserve_buffers(&sw_context->validate_nodes, - dev_priv->val_seq++); + ret = ttm_eu_reserve_buffers(&sw_context->validate_nodes); if (unlikely(ret != 0)) goto out_err;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index cfb9ca4..e3b2e24 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -515,6 +515,7 @@ struct ttm_bo_global { * @addr_space_mm: Range manager for the device address space. * lru_lock: Spinlock that protects the buffer+device lru lists and * ddestroy lists. + * @val_seq: Current validation sequence. * @nice_mode: Try nicely to wait for buffer idle when cleaning a manager. * If a GPU lockup has been detected, this is forced to 0. * @dev_mapping: A pointer to the struct address_space representing the @@ -544,6 +545,7 @@ struct ttm_bo_device { * Protected by the global:lru lock. */ struct list_head ddestroy; + uint32_t val_seq;
/* * Protected by load / firstopen / lastclose /unload sync. diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h index 535ab00..26cc7f9 100644 --- a/include/drm/ttm/ttm_execbuf_util.h +++ b/include/drm/ttm/ttm_execbuf_util.h @@ -72,7 +72,6 @@ extern void ttm_eu_backoff_reservation(struct list_head *list); * function ttm_eu_reserve_buffers * * @list: thread private list of ttm_validate_buffer structs. - * @val_seq: A unique sequence number. * * Tries to reserve bos pointed to by the list entries for validation. * If the function returns 0, all buffers are marked as "unfenced", @@ -94,7 +93,7 @@ extern void ttm_eu_backoff_reservation(struct list_head *list); * has failed. */
-extern int ttm_eu_reserve_buffers(struct list_head *list, uint32_t val_seq); +extern int ttm_eu_reserve_buffers(struct list_head *list);
/** * function ttm_eu_fence_buffer_objects.
On Wed, Nov 17, 2010 at 7:28 AM, Thomas Hellstrom thellstrom@vmware.com wrote:
This patch series improves on the performance and correctness of the reserving / fencing sequence.
Reviewed-by: Jerome Glisse jglisse@redhat.com
dri-devel@lists.freedesktop.org