Hi, Maarten,
As you know I have been having my doubts about this change. To me it seems insane to be forced to read the fence pointer under a reserved lock, simply because when you take the reserve lock, another process may have it and there is a substantial chance that that process will also be waiting for idle while holding the reserve lock.
In essence, to read the fence pointer, there is a large chance you will be waiting for idle to be able to access it. That's why it's protected by a separate spinlock in the first place.
So even if this change might not affect current driver much it's a change to the TTM api that leads to an IMHO very poor design.
/Thomas
On 10/12/2012 05:23 PM, Maarten Lankhorst wrote:
With the nouveau calls fixed there were only 2 places left that used fence_lock without a reservation, those are fixed in this patch:
ttm_bo_cleanup_refs_or_queue is fixed by simply doing things the other way around.
ttm_bo_cleanup_refs is fixed by taking a reservation first, then a pointer to the fence object and backs off from the reservation if waiting has to be performed.
Signed-off-by: Maarten Lankhorst maarten.lankhorst@canonical.com
This patch should be applied only after all the previous patches I sent are applied, since it depends on sync_obj_arg removal (kinda, probably fails to apply otherwise), uses ttm_bo_is_reserved, and depends on nouveau wait behavior being fixed.
drivers/gpu/drm/nouveau/nouveau_bo.c | 4 - drivers/gpu/drm/nouveau/nouveau_gem.c | 15 +--- drivers/gpu/drm/radeon/radeon_object.c | 2 - drivers/gpu/drm/ttm/ttm_bo.c | 124 ++++++++++++-------------------- drivers/gpu/drm/ttm/ttm_bo_util.c | 3 - drivers/gpu/drm/ttm/ttm_bo_vm.c | 5 - drivers/gpu/drm/ttm/ttm_execbuf_util.c | 2 - drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 3 - include/drm/ttm/ttm_bo_api.h | 12 +-- include/drm/ttm/ttm_bo_driver.h | 3 - 10 files changed, 52 insertions(+), 121 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index cee7448..9749c45 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1449,10 +1449,8 @@ nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence) if (likely(fence)) nouveau_fence_ref(fence);
spin_lock(&nvbo->bo.bdev->fence_lock); old_fence = nvbo->bo.sync_obj; nvbo->bo.sync_obj = fence;
spin_unlock(&nvbo->bo.bdev->fence_lock);
nouveau_fence_unref(&old_fence); }
@@ -1552,9 +1550,7 @@ nouveau_bo_vma_del(struct nouveau_bo *nvbo, struct nouveau_vma *vma) if (vma->node) { if (nvbo->bo.mem.mem_type != TTM_PL_SYSTEM) { ttm_bo_reserve(&nvbo->bo, false, false, false, 0);
spin_lock(&nvbo->bo.bdev->fence_lock); ttm_bo_wait(&nvbo->bo, false, false, false);
}spin_unlock(&nvbo->bo.bdev->fence_lock); ttm_bo_unreserve(&nvbo->bo); nouveau_vm_unmap(vma);
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 6d8391d..eaa700a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -391,18 +391,11 @@ retry: static int validate_sync(struct nouveau_channel *chan, struct nouveau_bo *nvbo) {
- struct nouveau_fence *fence = NULL;
- struct nouveau_fence *fence = nvbo->bo.sync_obj; int ret = 0;
- spin_lock(&nvbo->bo.bdev->fence_lock);
- if (nvbo->bo.sync_obj)
fence = nouveau_fence_ref(nvbo->bo.sync_obj);
- spin_unlock(&nvbo->bo.bdev->fence_lock);
- if (fence) {
- if (fence) ret = nouveau_fence_sync(fence, chan);
nouveau_fence_unref(&fence);
}
return ret; }
@@ -614,9 +607,7 @@ nouveau_gem_pushbuf_reloc_apply(struct drm_device *dev, data |= r->vor; }
ret = ttm_bo_wait(&nvbo->bo, false, false, false);spin_lock(&nvbo->bo.bdev->fence_lock);
if (ret) { NV_ERROR(drm, "reloc wait_idle failed: %d\n", ret); break;spin_unlock(&nvbo->bo.bdev->fence_lock);
@@ -848,11 +839,9 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
ret = ttm_bo_reserve(&nvbo->bo, true, false, false, 0); if (!ret) {
spin_lock(&nvbo->bo.bdev->fence_lock);
ret = ttm_bo_wait(&nvbo->bo, true, true, true); if (!no_wait && ret) fence = nouveau_fence_ref(nvbo->bo.sync_obj);
spin_unlock(&nvbo->bo.bdev->fence_lock);
ttm_bo_unreserve(&nvbo->bo); }
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 808c444..df430e4 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -613,12 +613,10 @@ int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait) r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0); if (unlikely(r != 0)) return r;
- spin_lock(&bo->tbo.bdev->fence_lock); if (mem_type) *mem_type = bo->tbo.mem.mem_type; if (bo->tbo.sync_obj) r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
- spin_unlock(&bo->tbo.bdev->fence_lock); ttm_bo_unreserve(&bo->tbo); return r; }
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index f6d7026..69fc29b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -498,28 +498,23 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; struct ttm_bo_global *glob = bo->glob;
- struct ttm_bo_driver *driver;
- struct ttm_bo_driver *driver = bdev->driver; void *sync_obj = NULL; int put_count; int ret;
- spin_lock(&bdev->fence_lock);
- (void) ttm_bo_wait(bo, false, false, true);
- if (!bo->sync_obj) {
spin_lock(&glob->lru_lock);
/**
* Lock inversion between bo:reserve and bdev::fence_lock here,
* but that's OK, since we're only trylocking.
*/
ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
- spin_lock(&glob->lru_lock);
- ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
- if (!ret) {
ret = ttm_bo_wait(bo, false, false, true);
if (unlikely(ret == -EBUSY))
if (unlikely(ret == -EBUSY)) {
sync_obj = driver->sync_obj_ref(bo->sync_obj);
atomic_set(&bo->reserved, 0);
wake_up_all(&bo->event_queue); goto queue;
}
spin_unlock(&bdev->fence_lock);
put_count = ttm_bo_del_from_lru(bo);
spin_unlock(&glob->lru_lock);
@@ -528,18 +523,11 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) ttm_bo_list_ref_sub(bo, put_count, true);
return;
} else {
spin_lock(&glob->lru_lock);
} queue:
driver = bdev->driver;
if (bo->sync_obj)
sync_obj = driver->sync_obj_ref(bo->sync_obj);
kref_get(&bo->list_kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy); spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->fence_lock);
if (sync_obj) { driver->sync_obj_flush(sync_obj);
@@ -565,25 +553,15 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool no_wait_gpu) { struct ttm_bo_device *bdev = bo->bdev;
struct ttm_bo_driver *driver = bdev->driver; struct ttm_bo_global *glob = bo->glob; int put_count; int ret = 0;
void *sync_obj;
retry:
spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
spin_unlock(&bdev->fence_lock);
if (unlikely(ret != 0))
return ret;
spin_lock(&glob->lru_lock);
if (unlikely(list_empty(&bo->ddestroy))) {
spin_unlock(&glob->lru_lock);
return 0;
}
ret = ttm_bo_reserve_locked(bo, interruptible, no_wait_reserve, false, 0);
@@ -592,18 +570,37 @@ retry: return ret; }
- /**
* We can re-check for sync object without taking
* the bo::lock since setting the sync object requires
* also bo::reserved. A busy object at this point may
* be caused by another thread recently starting an accelerated
* eviction.
*/
- if (unlikely(list_empty(&bo->ddestroy))) {
atomic_set(&bo->reserved, 0);
wake_up_all(&bo->event_queue);
spin_unlock(&glob->lru_lock);
return 0;
- }
- ret = ttm_bo_wait(bo, false, false, true);
- if (ret) {
if (no_wait_gpu) {
atomic_set(&bo->reserved, 0);
wake_up_all(&bo->event_queue);
spin_unlock(&glob->lru_lock);
return ret;
}
- if (unlikely(bo->sync_obj)) {
/**
* Take a reference to the fence and unreserve, if the wait
* was succesful and no new sync_obj was attached,
* ttm_bo_wait in retry will return ret = 0, and end the loop.
*/
atomic_set(&bo->reserved, 0); wake_up_all(&bo->event_queue); spin_unlock(&glob->lru_lock);sync_obj = driver->sync_obj_ref(&bo->sync_obj);
ret = driver->sync_obj_wait(bo->sync_obj, false, interruptible);
driver->sync_obj_unref(&sync_obj);
if (ret)
goto retry; }return ret;
@@ -735,9 +732,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible, struct ttm_placement placement; int ret = 0;
spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
spin_unlock(&bdev->fence_lock);
if (unlikely(ret != 0)) { if (ret != -ERESTARTSYS) {
@@ -1054,7 +1049,6 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo, { int ret = 0; struct ttm_mem_reg mem;
struct ttm_bo_device *bdev = bo->bdev;
BUG_ON(!ttm_bo_is_reserved(bo));
@@ -1063,9 +1057,7 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo, * Have the driver move function wait for idle when necessary, * instead of doing it here. */
- spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
- spin_unlock(&bdev->fence_lock); if (ret) return ret; mem.num_pages = bo->num_pages;
@@ -1554,7 +1546,6 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev, bdev->glob = glob; bdev->need_dma32 = need_dma32; bdev->val_seq = 0;
- spin_lock_init(&bdev->fence_lock); mutex_lock(&glob->device_list_mutex); list_add_tail(&bdev->device_list, &glob->device_list); mutex_unlock(&glob->device_list_mutex);
@@ -1690,52 +1681,33 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, bool lazy, bool interruptible, bool no_wait) { struct ttm_bo_driver *driver = bo->bdev->driver;
- struct ttm_bo_device *bdev = bo->bdev;
- void *sync_obj; int ret = 0;
- WARN_ON_ONCE(!ttm_bo_is_reserved(bo));
- if (likely(bo->sync_obj == NULL)) return 0;
- while (bo->sync_obj) {
- if (bo->sync_obj) { if (driver->sync_obj_signaled(bo->sync_obj)) { void *tmp_obj = bo->sync_obj; bo->sync_obj = NULL; clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
spin_unlock(&bdev->fence_lock); driver->sync_obj_unref(&tmp_obj);
spin_lock(&bdev->fence_lock);
continue;
return 0;
}
if (no_wait) return -EBUSY;
sync_obj = driver->sync_obj_ref(bo->sync_obj);
spin_unlock(&bdev->fence_lock);
ret = driver->sync_obj_wait(sync_obj,
if (unlikely(ret != 0)) {ret = driver->sync_obj_wait(bo->sync_obj, lazy, interruptible);
driver->sync_obj_unref(&sync_obj);
}spin_lock(&bdev->fence_lock); return ret;
spin_lock(&bdev->fence_lock);
if (likely(bo->sync_obj == sync_obj)) {
void *tmp_obj = bo->sync_obj;
bo->sync_obj = NULL;
clear_bit(TTM_BO_PRIV_FLAG_MOVING,
&bo->priv_flags);
spin_unlock(&bdev->fence_lock);
driver->sync_obj_unref(&sync_obj);
driver->sync_obj_unref(&tmp_obj);
spin_lock(&bdev->fence_lock);
} else {
spin_unlock(&bdev->fence_lock);
driver->sync_obj_unref(&sync_obj);
spin_lock(&bdev->fence_lock);
}
driver->sync_obj_unref(&bo->sync_obj);
clear_bit(TTM_BO_PRIV_FLAG_MOVING,
} return 0; }&bo->priv_flags);
@@ -1799,9 +1771,7 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) * Wait for GPU, then move to system cached. */
spin_lock(&bo->bdev->fence_lock); ret = ttm_bo_wait(bo, false, false, false);
spin_unlock(&bo->bdev->fence_lock);
if (unlikely(ret != 0)) goto out;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index d80d1e8..84d6e01 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -622,7 +622,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, struct ttm_buffer_object *ghost_obj; void *tmp_obj = NULL;
- spin_lock(&bdev->fence_lock); if (bo->sync_obj) { tmp_obj = bo->sync_obj; bo->sync_obj = NULL;
@@ -630,7 +629,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, bo->sync_obj = driver->sync_obj_ref(sync_obj); if (evict) { ret = ttm_bo_wait(bo, false, false, false);
if (tmp_obj) driver->sync_obj_unref(&tmp_obj); if (ret)spin_unlock(&bdev->fence_lock);
@@ -653,7 +651,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, */
set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
if (tmp_obj) driver->sync_obj_unref(&tmp_obj);spin_unlock(&bdev->fence_lock);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index a877813..55718c2 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -122,17 +122,14 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) * move. */
- spin_lock(&bdev->fence_lock); if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) { ret = ttm_bo_wait(bo, false, true, false);
if (unlikely(ret != 0)) { retval = (ret != -ERESTARTSYS) ? VM_FAULT_SIGBUS : VM_FAULT_NOPAGE; goto out_unlock; }spin_unlock(&bdev->fence_lock);
- } else
spin_unlock(&bdev->fence_lock);
}
ret = ttm_mem_io_lock(man, true); if (unlikely(ret != 0)) {
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 721886e..51b5e97 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -207,7 +207,6 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj) driver = bdev->driver; glob = bo->glob;
spin_lock(&bdev->fence_lock); spin_lock(&glob->lru_lock);
list_for_each_entry(entry, list, head) {
@@ -218,7 +217,6 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj) entry->reserved = false; } spin_unlock(&glob->lru_lock);
spin_unlock(&bdev->fence_lock);
list_for_each_entry(entry, list, head) { if (entry->old_sync_obj)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index ed3c1e7..e038c9a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -251,13 +251,10 @@ static void vmw_dummy_query_bo_prepare(struct vmw_private *dev_priv) volatile SVGA3dQueryResult *result; bool dummy; int ret;
struct ttm_bo_device *bdev = &dev_priv->bdev; struct ttm_buffer_object *bo = dev_priv->dummy_query_bo;
ttm_bo_reserve(bo, false, false, false, 0);
spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, false, false);
spin_unlock(&bdev->fence_lock); if (unlikely(ret != 0)) (void) vmw_fallback_wait(dev_priv, false, true, 0, false, 10*HZ);
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 816d9b1..6c69224 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -222,6 +222,8 @@ struct ttm_buffer_object { struct file *persistent_swap_storage; struct ttm_tt *ttm; bool evicted;
void *sync_obj;
unsigned long priv_flags;
/**
- Members protected by the bdev::lru_lock.
@@ -242,16 +244,6 @@ struct ttm_buffer_object { atomic_t reserved;
/**
* Members protected by struct buffer_object_device::fence_lock
* In addition, setting sync_obj to anything else
* than NULL requires bo::reserved to be held. This allows for
* checking NULL while reserved but not holding the mentioned lock.
*/
- void *sync_obj;
- unsigned long priv_flags;
- /**
*/
- Members protected by the bdev::vm_lock
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 0c8c3b5..aac101b 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -515,8 +515,6 @@ struct ttm_bo_global {
- @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
- @man: An array of mem_type_managers.
- @fence_lock: Protects the synchronizing members on *all* bos belonging
- to this device.
- @addr_space_mm: Range manager for the device address space.
- lru_lock: Spinlock that protects the buffer+device lru lists and
- ddestroy lists.
@@ -539,7 +537,6 @@ struct ttm_bo_device { struct ttm_bo_driver *driver; rwlock_t vm_lock; struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
- spinlock_t fence_lock; /*
*/
- Protected by the vm lock.
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel