From: Christian König christian.koenig@amd.com
Stop requiring that the src reservation object is locked for this operation.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/reservation.c | 56 ++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 14 deletions(-)
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index dec3a81..b44d9d7 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -266,8 +266,7 @@ EXPORT_SYMBOL(reservation_object_add_excl_fence); * @dst: the destination reservation object * @src: the source reservation object * -* Copy all fences from src to dst. Both src->lock as well as dst-lock must be -* held. +* Copy all fences from src to dst. dst-lock must be held. */ int reservation_object_copy_fences(struct reservation_object *dst, struct reservation_object *src) @@ -277,33 +276,62 @@ int reservation_object_copy_fences(struct reservation_object *dst, size_t size; unsigned i;
- src_list = reservation_object_get_list(src); + rcu_read_lock(); + src_list = rcu_dereference(src->fence);
+retry: if (src_list) { - size = offsetof(typeof(*src_list), - shared[src_list->shared_count]); + unsigned shared_count = src_list->shared_count; + + size = offsetof(typeof(*src_list), shared[shared_count]); + rcu_read_unlock(); + dst_list = kmalloc(size, GFP_KERNEL); if (!dst_list) return -ENOMEM;
- dst_list->shared_count = src_list->shared_count; - dst_list->shared_max = src_list->shared_count; - for (i = 0; i < src_list->shared_count; ++i) - dst_list->shared[i] = - dma_fence_get(src_list->shared[i]); + rcu_read_lock(); + src_list = rcu_dereference(src->fence); + if (!src_list || src_list->shared_count > shared_count) { + kfree(dst_list); + goto retry; + } + + dst_list->shared_count = 0; + dst_list->shared_max = shared_count; + for (i = 0; i < src_list->shared_count; ++i) { + struct dma_fence *fence; + + fence = rcu_dereference(src_list->shared[i]); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &fence->flags)) + continue; + + if (!dma_fence_get_rcu(fence)) { + kfree(dst_list); + src_list = rcu_dereference(src->fence); + goto retry; + } + + if (dma_fence_is_signaled(fence)) { + dma_fence_put(fence); + continue; + } + + dst_list->shared[dst_list->shared_count++] = fence; + } } else { dst_list = NULL; }
+ new = dma_fence_get_rcu_safe(&src->fence_excl); + rcu_read_unlock(); + kfree(dst->staged); dst->staged = NULL;
src_list = reservation_object_get_list(dst); - old = reservation_object_get_excl(dst); - new = reservation_object_get_excl(src); - - dma_fence_get(new);
preempt_disable(); write_seqcount_begin(&dst->seq);
From: Christian König christian.koenig@amd.com
With shared reservation objects __ttm_bo_reserve() can easily fail even on destroyed BOs. This prevents correct handling when we need to individualize the reservation object.
Fix this by individualizing the object before even trying to reserve it.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 180ce62..bee77d3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -440,28 +440,29 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) struct ttm_bo_global *glob = bo->glob; int ret;
+ ret = ttm_bo_individualize_resv(bo); + if (ret) { + /* Last resort, if we fail to allocate memory for the + * fences block for the BO to become idle + */ + reservation_object_wait_timeout_rcu(bo->resv, true, false, + 30 * HZ); + spin_lock(&glob->lru_lock); + goto error; + } + spin_lock(&glob->lru_lock); ret = __ttm_bo_reserve(bo, false, true, NULL); - if (!ret) { - if (!ttm_bo_wait(bo, false, true)) { + if (reservation_object_test_signaled_rcu(&bo->ttm_resv, true)) { ttm_bo_del_from_lru(bo); spin_unlock(&glob->lru_lock); + if (bo->resv != &bo->ttm_resv) + reservation_object_unlock(&bo->ttm_resv); ttm_bo_cleanup_memtype_use(bo); - return; }
- ret = ttm_bo_individualize_resv(bo); - if (ret) { - /* Last resort, if we fail to allocate memory for the - * fences block for the BO to become idle and free it. - */ - spin_unlock(&glob->lru_lock); - ttm_bo_wait(bo, true, true); - ttm_bo_cleanup_memtype_use(bo); - return; - } ttm_bo_flush_all_fences(bo);
/* @@ -474,11 +475,12 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) ttm_bo_add_to_lru(bo); }
- if (bo->resv != &bo->ttm_resv) - reservation_object_unlock(&bo->ttm_resv); __ttm_bo_unreserve(bo); } + if (bo->resv != &bo->ttm_resv) + reservation_object_unlock(&bo->ttm_resv);
+error: kref_get(&bo->list_kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy); spin_unlock(&glob->lru_lock);
Acked-by: Chunming Zhou david1.zhou@amd.com
On 2017年09月05日 03:02, Christian König wrote:
From: Christian König christian.koenig@amd.com
With shared reservation objects __ttm_bo_reserve() can easily fail even on destroyed BOs. This prevents correct handling when we need to individualize the reservation object.
Fix this by individualizing the object before even trying to reserve it.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/ttm/ttm_bo.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 180ce62..bee77d3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -440,28 +440,29 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) struct ttm_bo_global *glob = bo->glob; int ret;
- ret = ttm_bo_individualize_resv(bo);
- if (ret) {
/* Last resort, if we fail to allocate memory for the
* fences block for the BO to become idle
*/
reservation_object_wait_timeout_rcu(bo->resv, true, false,
30 * HZ);
spin_lock(&glob->lru_lock);
goto error;
- }
- spin_lock(&glob->lru_lock); ret = __ttm_bo_reserve(bo, false, true, NULL);
- if (!ret) {
if (!ttm_bo_wait(bo, false, true)) {
if (reservation_object_test_signaled_rcu(&bo->ttm_resv, true)) { ttm_bo_del_from_lru(bo); spin_unlock(&glob->lru_lock);
if (bo->resv != &bo->ttm_resv)
reservation_object_unlock(&bo->ttm_resv); ttm_bo_cleanup_memtype_use(bo);
return;
}
ret = ttm_bo_individualize_resv(bo);
if (ret) {
/* Last resort, if we fail to allocate memory for the
* fences block for the BO to become idle and free it.
*/
spin_unlock(&glob->lru_lock);
ttm_bo_wait(bo, true, true);
ttm_bo_cleanup_memtype_use(bo);
return;
}
ttm_bo_flush_all_fences(bo);
/*
@@ -474,11 +475,12 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) ttm_bo_add_to_lru(bo); }
if (bo->resv != &bo->ttm_resv)
__ttm_bo_unreserve(bo); }reservation_object_unlock(&bo->ttm_resv);
- if (bo->resv != &bo->ttm_resv)
reservation_object_unlock(&bo->ttm_resv);
+error: kref_get(&bo->list_kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy); spin_unlock(&glob->lru_lock);
Ping? David can you take a look?
Alex is on vacation and that is a rather important bug fix.
Thanks, Christian.
Am 04.09.2017 um 21:02 schrieb Christian König:
From: Christian König christian.koenig@amd.com
Stop requiring that the src reservation object is locked for this operation.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/reservation.c | 56 ++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 14 deletions(-)
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index dec3a81..b44d9d7 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -266,8 +266,7 @@ EXPORT_SYMBOL(reservation_object_add_excl_fence);
- @dst: the destination reservation object
- @src: the source reservation object
-* Copy all fences from src to dst. Both src->lock as well as dst-lock must be -* held. +* Copy all fences from src to dst. dst-lock must be held. */ int reservation_object_copy_fences(struct reservation_object *dst, struct reservation_object *src) @@ -277,33 +276,62 @@ int reservation_object_copy_fences(struct reservation_object *dst, size_t size; unsigned i;
- src_list = reservation_object_get_list(src);
- rcu_read_lock();
- src_list = rcu_dereference(src->fence);
+retry: if (src_list) {
size = offsetof(typeof(*src_list),
shared[src_list->shared_count]);
unsigned shared_count = src_list->shared_count;
size = offsetof(typeof(*src_list), shared[shared_count]);
rcu_read_unlock();
- dst_list = kmalloc(size, GFP_KERNEL); if (!dst_list) return -ENOMEM;
dst_list->shared_count = src_list->shared_count;
dst_list->shared_max = src_list->shared_count;
for (i = 0; i < src_list->shared_count; ++i)
dst_list->shared[i] =
dma_fence_get(src_list->shared[i]);
rcu_read_lock();
src_list = rcu_dereference(src->fence);
if (!src_list || src_list->shared_count > shared_count) {
kfree(dst_list);
goto retry;
}
dst_list->shared_count = 0;
dst_list->shared_max = shared_count;
for (i = 0; i < src_list->shared_count; ++i) {
struct dma_fence *fence;
fence = rcu_dereference(src_list->shared[i]);
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&fence->flags))
continue;
if (!dma_fence_get_rcu(fence)) {
kfree(dst_list);
src_list = rcu_dereference(src->fence);
goto retry;
}
if (dma_fence_is_signaled(fence)) {
dma_fence_put(fence);
continue;
}
dst_list->shared[dst_list->shared_count++] = fence;
}
} else { dst_list = NULL; }
new = dma_fence_get_rcu_safe(&src->fence_excl);
rcu_read_unlock();
kfree(dst->staged); dst->staged = NULL;
src_list = reservation_object_get_list(dst);
old = reservation_object_get_excl(dst);
new = reservation_object_get_excl(src);
dma_fence_get(new);
preempt_disable(); write_seqcount_begin(&dst->seq);
On 2017年09月07日 15:13, Christian König wrote:
Ping? David can you take a look?
Alex is on vacation and that is a rather important bug fix.
Thanks, Christian.
Am 04.09.2017 um 21:02 schrieb Christian König:
From: Christian König christian.koenig@amd.com
Stop requiring that the src reservation object is locked for this operation.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/reservation.c | 56 ++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 14 deletions(-)
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index dec3a81..b44d9d7 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -266,8 +266,7 @@ EXPORT_SYMBOL(reservation_object_add_excl_fence);
- @dst: the destination reservation object
- @src: the source reservation object
-* Copy all fences from src to dst. Both src->lock as well as dst-lock must be -* held. +* Copy all fences from src to dst. dst-lock must be held. */ int reservation_object_copy_fences(struct reservation_object *dst, struct reservation_object *src) @@ -277,33 +276,62 @@ int reservation_object_copy_fences(struct reservation_object *dst, size_t size; unsigned i;
- src_list = reservation_object_get_list(src);
- rcu_read_lock();
- src_list = rcu_dereference(src->fence); +retry: if (src_list) {
size = offsetof(typeof(*src_list),
shared[src_list->shared_count]);
unsigned shared_count = src_list->shared_count;
size = offsetof(typeof(*src_list), shared[shared_count]);
rcu_read_unlock();
dst_list = kmalloc(size, GFP_KERNEL); if (!dst_list) return -ENOMEM;
dst_list->shared_count = src_list->shared_count;
dst_list->shared_max = src_list->shared_count;
for (i = 0; i < src_list->shared_count; ++i)
dst_list->shared[i] =
dma_fence_get(src_list->shared[i]);
rcu_read_lock();
src_list = rcu_dereference(src->fence);
if (!src_list || src_list->shared_count > shared_count) {
kfree(dst_list);
goto retry;
}
dst_list->shared_count = 0;
dst_list->shared_max = shared_count;
for (i = 0; i < src_list->shared_count; ++i) {
struct dma_fence *fence;
fence = rcu_dereference(src_list->shared[i]);
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&fence->flags))
seems here is duplicated with the below dma_fence_is_signaled, can it be removed?
And I'm not sure the locking, but it looks good, so Acked-by: Chunming Zhou david1.zhou@amd.com
continue;
if (!dma_fence_get_rcu(fence)) {
kfree(dst_list);
src_list = rcu_dereference(src->fence);
goto retry;
}
if (dma_fence_is_signaled(fence)) {
dma_fence_put(fence);
continue;
}
dst_list->shared[dst_list->shared_count++] = fence;
} } else { dst_list = NULL; }
- new = dma_fence_get_rcu_safe(&src->fence_excl);
- rcu_read_unlock();
kfree(dst->staged); dst->staged = NULL; src_list = reservation_object_get_list(dst);
old = reservation_object_get_excl(dst);
- new = reservation_object_get_excl(src);
- dma_fence_get(new); preempt_disable(); write_seqcount_begin(&dst->seq);
On Thu, Sep 07, 2017 at 09:13:32AM +0200, Christian König wrote:
Ping? David can you take a look?
Alex is on vacation and that is a rather important bug fix.
Works better when you cc Gustavo/Maarten/Sumits/Chris I think, for anything dma-buf review needing. -Daniel
Thanks, Christian.
Am 04.09.2017 um 21:02 schrieb Christian König:
From: Christian König christian.koenig@amd.com
Stop requiring that the src reservation object is locked for this operation.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/reservation.c | 56 ++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 14 deletions(-)
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index dec3a81..b44d9d7 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -266,8 +266,7 @@ EXPORT_SYMBOL(reservation_object_add_excl_fence);
- @dst: the destination reservation object
- @src: the source reservation object
-* Copy all fences from src to dst. Both src->lock as well as dst-lock must be -* held. +* Copy all fences from src to dst. dst-lock must be held. */ int reservation_object_copy_fences(struct reservation_object *dst, struct reservation_object *src) @@ -277,33 +276,62 @@ int reservation_object_copy_fences(struct reservation_object *dst, size_t size; unsigned i;
- src_list = reservation_object_get_list(src);
- rcu_read_lock();
- src_list = rcu_dereference(src->fence);
+retry: if (src_list) {
size = offsetof(typeof(*src_list),
shared[src_list->shared_count]);
unsigned shared_count = src_list->shared_count;
size = offsetof(typeof(*src_list), shared[shared_count]);
rcu_read_unlock();
- dst_list = kmalloc(size, GFP_KERNEL); if (!dst_list) return -ENOMEM;
dst_list->shared_count = src_list->shared_count;
dst_list->shared_max = src_list->shared_count;
for (i = 0; i < src_list->shared_count; ++i)
dst_list->shared[i] =
dma_fence_get(src_list->shared[i]);
rcu_read_lock();
src_list = rcu_dereference(src->fence);
if (!src_list || src_list->shared_count > shared_count) {
kfree(dst_list);
goto retry;
}
dst_list->shared_count = 0;
dst_list->shared_max = shared_count;
for (i = 0; i < src_list->shared_count; ++i) {
struct dma_fence *fence;
fence = rcu_dereference(src_list->shared[i]);
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&fence->flags))
continue;
if (!dma_fence_get_rcu(fence)) {
kfree(dst_list);
src_list = rcu_dereference(src->fence);
goto retry;
}
if (dma_fence_is_signaled(fence)) {
dma_fence_put(fence);
continue;
}
dst_list->shared[dst_list->shared_count++] = fence;
} else { dst_list = NULL; }}
- new = dma_fence_get_rcu_safe(&src->fence_excl);
- rcu_read_unlock();
- kfree(dst->staged); dst->staged = NULL; src_list = reservation_object_get_list(dst);
- old = reservation_object_get_excl(dst);
- new = reservation_object_get_excl(src);
- dma_fence_get(new); preempt_disable(); write_seqcount_begin(&dst->seq);
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Op 04-09-17 om 21:02 schreef Christian König:
From: Christian König christian.koenig@amd.com
Stop requiring that the src reservation object is locked for this operation.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/reservation.c | 56 ++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 14 deletions(-)
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index dec3a81..b44d9d7 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -266,8 +266,7 @@ EXPORT_SYMBOL(reservation_object_add_excl_fence);
- @dst: the destination reservation object
- @src: the source reservation object
-* Copy all fences from src to dst. Both src->lock as well as dst-lock must be -* held. +* Copy all fences from src to dst. dst-lock must be held. */ int reservation_object_copy_fences(struct reservation_object *dst, struct reservation_object *src)
Could this be implemented using reservation_object_get_fences_rcu? You're essentially duplicating its functionality.
Cheers, Maarten
Am 10.09.2017 um 09:30 schrieb Maarten Lankhorst:
Op 04-09-17 om 21:02 schreef Christian König:
From: Christian König christian.koenig@amd.com
Stop requiring that the src reservation object is locked for this operation.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/reservation.c | 56 ++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 14 deletions(-)
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index dec3a81..b44d9d7 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -266,8 +266,7 @@ EXPORT_SYMBOL(reservation_object_add_excl_fence);
- @dst: the destination reservation object
- @src: the source reservation object
-* Copy all fences from src to dst. Both src->lock as well as dst-lock must be -* held. +* Copy all fences from src to dst. dst-lock must be held. */ int reservation_object_copy_fences(struct reservation_object *dst, struct reservation_object *src)
Could this be implemented using reservation_object_get_fences_rcu? You're essentially duplicating its functionality.
I've considered this as well, but reservation_object_get_fences_rcu() returns an array and here we need an reservation_object_list.
Regards, Christian.
Cheers, Maarten _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Op 11-09-17 om 14:53 schreef Christian König:
Am 10.09.2017 um 09:30 schrieb Maarten Lankhorst:
Op 04-09-17 om 21:02 schreef Christian König:
From: Christian König christian.koenig@amd.com
Stop requiring that the src reservation object is locked for this operation.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/reservation.c | 56 ++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 14 deletions(-)
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index dec3a81..b44d9d7 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -266,8 +266,7 @@ EXPORT_SYMBOL(reservation_object_add_excl_fence);
- @dst: the destination reservation object
- @src: the source reservation object
-* Copy all fences from src to dst. Both src->lock as well as dst-lock must be -* held. +* Copy all fences from src to dst. dst-lock must be held. */ int reservation_object_copy_fences(struct reservation_object *dst, struct reservation_object *src)
Could this be implemented using reservation_object_get_fences_rcu? You're essentially duplicating its functionality.
I've considered this as well, but reservation_object_get_fences_rcu() returns an array and here we need an reservation_object_list.
Doesn't seem too hard, below is the result from fiddling with the allocation function.. reservation_object_list is just a list of fences with some junk in front.
Here you go, but only tested with the compiler. O:) -----8<----- drivers/dma-buf/reservation.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------- 1 file changed, 109 insertions(+), 83 deletions(-)
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index dec3a815455d..72ee91f34132 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -261,87 +261,43 @@ void reservation_object_add_excl_fence(struct reservation_object *obj, } EXPORT_SYMBOL(reservation_object_add_excl_fence);
-/** -* reservation_object_copy_fences - Copy all fences from src to dst. -* @dst: the destination reservation object -* @src: the source reservation object -* -* Copy all fences from src to dst. Both src->lock as well as dst-lock must be -* held. -*/ -int reservation_object_copy_fences(struct reservation_object *dst, - struct reservation_object *src) +static bool realloc_shared(void **ptr, struct dma_fence ***pshared, unsigned shared_max, bool alloc_list, bool unlocked) { - struct reservation_object_list *src_list, *dst_list; - struct dma_fence *old, *new; - size_t size; - unsigned i; - - src_list = reservation_object_get_list(src); - - if (src_list) { - size = offsetof(typeof(*src_list), - shared[src_list->shared_count]); - dst_list = kmalloc(size, GFP_KERNEL); - if (!dst_list) - return -ENOMEM; - - dst_list->shared_count = src_list->shared_count; - dst_list->shared_max = src_list->shared_count; - for (i = 0; i < src_list->shared_count; ++i) - dst_list->shared[i] = - dma_fence_get(src_list->shared[i]); - } else { - dst_list = NULL; - } - - kfree(dst->staged); - dst->staged = NULL; + size_t sz; + void *newptr;
- src_list = reservation_object_get_list(dst); + if (alloc_list) + sz = offsetof(struct reservation_object_list, shared[shared_max]); + else + sz = shared_max * sizeof(struct dma_fence *);
- old = reservation_object_get_excl(dst); - new = reservation_object_get_excl(src); + newptr = krealloc(*ptr, sz, unlocked ? GFP_KERNEL : GFP_NOWAIT | __GFP_NOWARN); + if (!newptr) + return false;
- dma_fence_get(new); + *ptr = newptr; + if (alloc_list) { + struct reservation_object_list *fobj = newptr;
- preempt_disable(); - write_seqcount_begin(&dst->seq); - /* write_seqcount_begin provides the necessary memory barrier */ - RCU_INIT_POINTER(dst->fence_excl, new); - RCU_INIT_POINTER(dst->fence, dst_list); - write_seqcount_end(&dst->seq); - preempt_enable(); - - if (src_list) - kfree_rcu(src_list, rcu); - dma_fence_put(old); + fobj->shared_max = shared_max; + *pshared = fobj->shared; + } else + *pshared = newptr;
- return 0; + return true; } -EXPORT_SYMBOL(reservation_object_copy_fences);
-/** - * reservation_object_get_fences_rcu - Get an object's shared and exclusive - * fences without update side lock held - * @obj: the reservation object - * @pfence_excl: the returned exclusive fence (or NULL) - * @pshared_count: the number of shared fences returned - * @pshared: the array of shared fence ptrs returned (array is krealloc'd to - * the required size, and must be freed by caller) - * - * RETURNS - * Zero or -errno - */ -int reservation_object_get_fences_rcu(struct reservation_object *obj, +static int __reservation_object_get_fences_rcu(struct reservation_object *obj, struct dma_fence **pfence_excl, unsigned *pshared_count, - struct dma_fence ***pshared) + struct dma_fence ***pshared, + struct reservation_object_list **list_shared) { struct dma_fence **shared = NULL; struct dma_fence *fence_excl; unsigned int shared_count; int ret = 1; + void *ptr = NULL;
do { struct reservation_object_list *fobj; @@ -359,23 +315,16 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
fobj = rcu_dereference(obj->fence); if (fobj) { - struct dma_fence **nshared; - size_t sz = sizeof(*shared) * fobj->shared_max; - - nshared = krealloc(shared, sz, - GFP_NOWAIT | __GFP_NOWARN); - if (!nshared) { + if (!realloc_shared(&ptr, &shared, fobj->shared_max, list_shared, false)) { rcu_read_unlock(); - nshared = krealloc(shared, sz, GFP_KERNEL); - if (nshared) { - shared = nshared; - continue; + if (!realloc_shared(&ptr, &shared, fobj->shared_max, list_shared, true)) { + ret = -ENOMEM; + break; }
- ret = -ENOMEM; - break; + /* need to acquire rcu lock again and retry */ + continue; } - shared = nshared; shared_count = fobj->shared_count;
for (i = 0; i < shared_count; ++i) { @@ -398,16 +347,93 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj, } while (ret);
if (!shared_count) { - kfree(shared); + kfree(ptr); + ptr = NULL; shared = NULL; }
- *pshared_count = shared_count; - *pshared = shared; - *pfence_excl = fence_excl; + if (!list_shared) { + *pshared_count = shared_count; + *pshared = shared; + } else { + *list_shared = ptr; + if (ptr) + (*list_shared)->shared_count = shared_count; + }
+ *pfence_excl = fence_excl; return ret; } + + +/** +* reservation_object_copy_fences - Copy all fences from src to dst. +* @dst: the destination reservation object +* @src: the source reservation object +* +* Copy all fences from src to dst. dst-lock must be held. +*/ +int reservation_object_copy_fences(struct reservation_object *dst, + struct reservation_object *src) +{ + struct reservation_object_list *old_list, *dst_list; + struct dma_fence *excl, *old; + unsigned i; + int ret; + + ret = __reservation_object_get_fences_rcu(src, &excl, NULL, + NULL, &dst_list); + if (ret) + return ret; + + kfree(dst->staged); + dst->staged = NULL; + + old = rcu_dereference_protected(dst->fence_excl, + reservation_object_held(dst)); + + old_list = rcu_dereference_protected(dst->fence, + reservation_object_held(dst)); + + preempt_disable(); + write_seqcount_begin(&dst->seq); + /* write_seqcount_begin provides the necessary memory barrier */ + RCU_INIT_POINTER(dst->fence_excl, excl); + RCU_INIT_POINTER(dst->fence, dst_list); + write_seqcount_end(&dst->seq); + preempt_enable(); + + if (old_list) { + for (i = 0; i < old_list->shared_count; i++) + dma_fence_put(old_list->shared[i]); + + kfree_rcu(old_list, rcu); + } + dma_fence_put(old); + + return 0; +} +EXPORT_SYMBOL(reservation_object_copy_fences); + +/** + * reservation_object_get_fences_rcu - Get an object's shared and exclusive + * fences without update side lock held + * @obj: the reservation object + * @pfence_excl: the returned exclusive fence (or NULL) + * @pshared_count: the number of shared fences returned + * @pshared: the array of shared fence ptrs returned (array is krealloc'd to + * the required size, and must be freed by caller) + * + * RETURNS + * Zero or -errno + */ +int reservation_object_get_fences_rcu(struct reservation_object *obj, + struct dma_fence **pfence_excl, + unsigned *pshared_count, + struct dma_fence ***pshared) +{ + return __reservation_object_get_fences_rcu(obj, pfence_excl, pshared_count, pshared, NULL); +} EXPORT_SYMBOL_GPL(reservation_object_get_fences_rcu);
/**
Am 11.09.2017 um 15:56 schrieb Maarten Lankhorst:
Op 11-09-17 om 14:53 schreef Christian König:
Am 10.09.2017 um 09:30 schrieb Maarten Lankhorst:
Op 04-09-17 om 21:02 schreef Christian König:
From: Christian König christian.koenig@amd.com
Stop requiring that the src reservation object is locked for this operation.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/reservation.c | 56 ++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 14 deletions(-)
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index dec3a81..b44d9d7 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -266,8 +266,7 @@ EXPORT_SYMBOL(reservation_object_add_excl_fence);
- @dst: the destination reservation object
- @src: the source reservation object
-* Copy all fences from src to dst. Both src->lock as well as dst-lock must be -* held. +* Copy all fences from src to dst. dst-lock must be held. */ int reservation_object_copy_fences(struct reservation_object *dst, struct reservation_object *src)
Could this be implemented using reservation_object_get_fences_rcu? You're essentially duplicating its functionality.
I've considered this as well, but reservation_object_get_fences_rcu() returns an array and here we need an reservation_object_list.
Doesn't seem too hard, below is the result from fiddling with the allocation function.. reservation_object_list is just a list of fences with some junk in front.
Here you go, but only tested with the compiler. O:) -----8<----- drivers/dma-buf/reservation.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------- 1 file changed, 109 insertions(+), 83 deletions(-)
To be honest that looks rather ugly to me for not much gain.
Additional to that we loose the optimization I've stolen from the wait function.
Regards, Christian.
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index dec3a815455d..72ee91f34132 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -261,87 +261,43 @@ void reservation_object_add_excl_fence(struct reservation_object *obj, } EXPORT_SYMBOL(reservation_object_add_excl_fence);
-/** -* reservation_object_copy_fences - Copy all fences from src to dst. -* @dst: the destination reservation object -* @src: the source reservation object -* -* Copy all fences from src to dst. Both src->lock as well as dst-lock must be -* held. -*/ -int reservation_object_copy_fences(struct reservation_object *dst,
struct reservation_object *src)
+static bool realloc_shared(void **ptr, struct dma_fence ***pshared, unsigned shared_max, bool alloc_list, bool unlocked) {
- struct reservation_object_list *src_list, *dst_list;
- struct dma_fence *old, *new;
- size_t size;
- unsigned i;
- src_list = reservation_object_get_list(src);
- if (src_list) {
size = offsetof(typeof(*src_list),
shared[src_list->shared_count]);
dst_list = kmalloc(size, GFP_KERNEL);
if (!dst_list)
return -ENOMEM;
dst_list->shared_count = src_list->shared_count;
dst_list->shared_max = src_list->shared_count;
for (i = 0; i < src_list->shared_count; ++i)
dst_list->shared[i] =
dma_fence_get(src_list->shared[i]);
- } else {
dst_list = NULL;
- }
- kfree(dst->staged);
- dst->staged = NULL;
- size_t sz;
- void *newptr;
- src_list = reservation_object_get_list(dst);
- if (alloc_list)
sz = offsetof(struct reservation_object_list, shared[shared_max]);
- else
sz = shared_max * sizeof(struct dma_fence *);
- old = reservation_object_get_excl(dst);
- new = reservation_object_get_excl(src);
- newptr = krealloc(*ptr, sz, unlocked ? GFP_KERNEL : GFP_NOWAIT | __GFP_NOWARN);
- if (!newptr)
return false;
- dma_fence_get(new);
- *ptr = newptr;
- if (alloc_list) {
struct reservation_object_list *fobj = newptr;
- preempt_disable();
- write_seqcount_begin(&dst->seq);
- /* write_seqcount_begin provides the necessary memory barrier */
- RCU_INIT_POINTER(dst->fence_excl, new);
- RCU_INIT_POINTER(dst->fence, dst_list);
- write_seqcount_end(&dst->seq);
- preempt_enable();
- if (src_list)
kfree_rcu(src_list, rcu);
- dma_fence_put(old);
fobj->shared_max = shared_max;
*pshared = fobj->shared;
- } else
*pshared = newptr;
- return 0;
- return true; }
-EXPORT_SYMBOL(reservation_object_copy_fences);
-/**
- reservation_object_get_fences_rcu - Get an object's shared and exclusive
- fences without update side lock held
- @obj: the reservation object
- @pfence_excl: the returned exclusive fence (or NULL)
- @pshared_count: the number of shared fences returned
- @pshared: the array of shared fence ptrs returned (array is krealloc'd to
- the required size, and must be freed by caller)
- RETURNS
- Zero or -errno
- */
-int reservation_object_get_fences_rcu(struct reservation_object *obj, +static int __reservation_object_get_fences_rcu(struct reservation_object *obj, struct dma_fence **pfence_excl, unsigned *pshared_count,
struct dma_fence ***pshared)
struct dma_fence ***pshared,
struct reservation_object_list **list_shared)
{ struct dma_fence **shared = NULL; struct dma_fence *fence_excl; unsigned int shared_count; int ret = 1;
void *ptr = NULL;
do { struct reservation_object_list *fobj;
@@ -359,23 +315,16 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
fobj = rcu_dereference(obj->fence); if (fobj) {
struct dma_fence **nshared;
size_t sz = sizeof(*shared) * fobj->shared_max;
nshared = krealloc(shared, sz,
GFP_NOWAIT | __GFP_NOWARN);
if (!nshared) {
if (!realloc_shared(&ptr, &shared, fobj->shared_max, list_shared, false)) { rcu_read_unlock();
nshared = krealloc(shared, sz, GFP_KERNEL);
if (nshared) {
shared = nshared;
continue;
if (!realloc_shared(&ptr, &shared, fobj->shared_max, list_shared, true)) {
ret = -ENOMEM;
break; }
ret = -ENOMEM;
break;
/* need to acquire rcu lock again and retry */
continue; }
shared = nshared; shared_count = fobj->shared_count; for (i = 0; i < shared_count; ++i) {
@@ -398,16 +347,93 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj, } while (ret);
if (!shared_count) {
kfree(shared);
kfree(ptr);
shared = NULL; }ptr = NULL;
- *pshared_count = shared_count;
- *pshared = shared;
- *pfence_excl = fence_excl;
if (!list_shared) {
*pshared_count = shared_count;
*pshared = shared;
} else {
*list_shared = ptr;
if (ptr)
(*list_shared)->shared_count = shared_count;
}
*pfence_excl = fence_excl; return ret; }
+/** +* reservation_object_copy_fences - Copy all fences from src to dst. +* @dst: the destination reservation object +* @src: the source reservation object +* +* Copy all fences from src to dst. dst-lock must be held. +*/ +int reservation_object_copy_fences(struct reservation_object *dst,
struct reservation_object *src)
+{
- struct reservation_object_list *old_list, *dst_list;
- struct dma_fence *excl, *old;
- unsigned i;
- int ret;
- ret = __reservation_object_get_fences_rcu(src, &excl, NULL,
NULL, &dst_list);
- if (ret)
return ret;
- kfree(dst->staged);
- dst->staged = NULL;
- old = rcu_dereference_protected(dst->fence_excl,
reservation_object_held(dst));
- old_list = rcu_dereference_protected(dst->fence,
reservation_object_held(dst));
- preempt_disable();
- write_seqcount_begin(&dst->seq);
- /* write_seqcount_begin provides the necessary memory barrier */
- RCU_INIT_POINTER(dst->fence_excl, excl);
- RCU_INIT_POINTER(dst->fence, dst_list);
- write_seqcount_end(&dst->seq);
- preempt_enable();
- if (old_list) {
for (i = 0; i < old_list->shared_count; i++)
dma_fence_put(old_list->shared[i]);
kfree_rcu(old_list, rcu);
- }
- dma_fence_put(old);
- return 0;
+} +EXPORT_SYMBOL(reservation_object_copy_fences);
+/**
- reservation_object_get_fences_rcu - Get an object's shared and exclusive
- fences without update side lock held
- @obj: the reservation object
- @pfence_excl: the returned exclusive fence (or NULL)
- @pshared_count: the number of shared fences returned
- @pshared: the array of shared fence ptrs returned (array is krealloc'd to
- the required size, and must be freed by caller)
- RETURNS
- Zero or -errno
- */
+int reservation_object_get_fences_rcu(struct reservation_object *obj,
struct dma_fence **pfence_excl,
unsigned *pshared_count,
struct dma_fence ***pshared)
+{
- return __reservation_object_get_fences_rcu(obj, pfence_excl, pshared_count, pshared, NULL);
+} EXPORT_SYMBOL_GPL(reservation_object_get_fences_rcu);
/**
amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Op 11-09-17 om 16:45 schreef Christian König:
Am 11.09.2017 um 15:56 schrieb Maarten Lankhorst:
Op 11-09-17 om 14:53 schreef Christian König:
Am 10.09.2017 um 09:30 schrieb Maarten Lankhorst:
Op 04-09-17 om 21:02 schreef Christian König:
From: Christian König christian.koenig@amd.com
Stop requiring that the src reservation object is locked for this operation.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/reservation.c | 56 ++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 14 deletions(-)
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index dec3a81..b44d9d7 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -266,8 +266,7 @@ EXPORT_SYMBOL(reservation_object_add_excl_fence);
- @dst: the destination reservation object
- @src: the source reservation object
-* Copy all fences from src to dst. Both src->lock as well as dst-lock must be -* held. +* Copy all fences from src to dst. dst-lock must be held. */ int reservation_object_copy_fences(struct reservation_object *dst, struct reservation_object *src)
Could this be implemented using reservation_object_get_fences_rcu? You're essentially duplicating its functionality.
I've considered this as well, but reservation_object_get_fences_rcu() returns an array and here we need an reservation_object_list.
Doesn't seem too hard, below is the result from fiddling with the allocation function.. reservation_object_list is just a list of fences with some junk in front.
Here you go, but only tested with the compiler. O:) -----8<----- drivers/dma-buf/reservation.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------- 1 file changed, 109 insertions(+), 83 deletions(-)
To be honest that looks rather ugly to me for not much gain.
Additional to that we loose the optimization I've stolen from the wait function.
Right now your version does exactly the same as reservation_object_get_fences_rcu, but with a reservation_object_list instead of a fence array.
Can't you change reservation_object_get_fences_rcu to return a reservation_object_list? All callsites look like they could be easily converted.
So the new patch series would be: 1. Make reservation_object_get_fences_rcu return a reservation_object_list 2. Add optimization 3. Convert reservation_object_copy_fences to use rcu.
Cheers, Maarten
Am 11.09.2017 um 17:13 schrieb Maarten Lankhorst:
Op 11-09-17 om 16:45 schreef Christian König:
Am 11.09.2017 um 15:56 schrieb Maarten Lankhorst:
Op 11-09-17 om 14:53 schreef Christian König:
Am 10.09.2017 um 09:30 schrieb Maarten Lankhorst: [SNIP]
To be honest that looks rather ugly to me for not much gain.
Additional to that we loose the optimization I've stolen from the wait function.
Right now your version does exactly the same as reservation_object_get_fences_rcu, but with a reservation_object_list instead of a fence array.
Well then please take a closer look again:
for (i = 0; i < src_list->shared_count; ++i) { struct dma_fence *fence; fence = rcu_dereference(src_list->shared[i]); if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; if (!dma_fence_get_rcu(fence)) { kfree(dst_list); src_list = rcu_dereference(src->fence); goto retry; } if (dma_fence_is_signaled(fence)) { dma_fence_put(fence); continue; }
dst_list->shared[dst_list->shared_count++] = fence; }
We only take fences into the new reservation list when they aren't already signaled.
This can't be added to reservation_object_get_fences_rcu() because that would break VM handling on radeon and amdgpu.
Regards, Christian.
Am 11.09.2017 um 17:22 schrieb Christian König:
Am 11.09.2017 um 17:13 schrieb Maarten Lankhorst:
Op 11-09-17 om 16:45 schreef Christian König:
Am 11.09.2017 um 15:56 schrieb Maarten Lankhorst:
Op 11-09-17 om 14:53 schreef Christian König:
Am 10.09.2017 um 09:30 schrieb Maarten Lankhorst: [SNIP]
To be honest that looks rather ugly to me for not much gain.
Additional to that we loose the optimization I've stolen from the wait function.
Right now your version does exactly the same as reservation_object_get_fences_rcu, but with a reservation_object_list instead of a fence array.
Well then please take a closer look again:
for (i = 0; i < src_list->shared_count; ++i) { struct dma_fence *fence; fence = rcu_dereference(src_list->shared[i]); if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; if (!dma_fence_get_rcu(fence)) { kfree(dst_list); src_list = rcu_dereference(src->fence); goto retry; } if (dma_fence_is_signaled(fence)) { dma_fence_put(fence); continue; }
dst_list->shared[dst_list->shared_count++] = fence; }
We only take fences into the new reservation list when they aren't already signaled.
This can't be added to reservation_object_get_fences_rcu() because that would break VM handling on radeon and amdgpu.
What we could do is adding a function to return all fences (including the exclusive one) as reservation_object_list() and use that in both the wait as well as the copy function.
Regards, Christian.
Regards, Christian. _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Op 11-09-17 om 17:24 schreef Christian König:
Am 11.09.2017 um 17:22 schrieb Christian König:
Am 11.09.2017 um 17:13 schrieb Maarten Lankhorst:
Op 11-09-17 om 16:45 schreef Christian König:
Am 11.09.2017 um 15:56 schrieb Maarten Lankhorst:
Op 11-09-17 om 14:53 schreef Christian König:
Am 10.09.2017 um 09:30 schrieb Maarten Lankhorst: [SNIP]
To be honest that looks rather ugly to me for not much gain.
Additional to that we loose the optimization I've stolen from the wait function.
Right now your version does exactly the same as reservation_object_get_fences_rcu, but with a reservation_object_list instead of a fence array.
Well then please take a closer look again:
for (i = 0; i < src_list->shared_count; ++i) { struct dma_fence *fence; fence = rcu_dereference(src_list->shared[i]); if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; if (!dma_fence_get_rcu(fence)) { kfree(dst_list); src_list = rcu_dereference(src->fence); goto retry; } if (dma_fence_is_signaled(fence)) { dma_fence_put(fence); continue; }
dst_list->shared[dst_list->shared_count++] = fence; }
We only take fences into the new reservation list when they aren't already signaled.
This can't be added to reservation_object_get_fences_rcu() because that would break VM handling on radeon and amdgpu.
What we could do is adding a function to return all fences (including the exclusive one) as reservation_object_list() and use that in both the wait as well as the copy function.
Yeah, but I don't see the problem with VM, guessing amdgpu_vm_prt_fini.. why would it break if I pruned the signaled fences from the copied list?
Am 11.09.2017 um 17:29 schrieb Maarten Lankhorst:
Op 11-09-17 om 17:24 schreef Christian König:
Am 11.09.2017 um 17:22 schrieb Christian König:
Am 11.09.2017 um 17:13 schrieb Maarten Lankhorst:
Op 11-09-17 om 16:45 schreef Christian König:
Am 11.09.2017 um 15:56 schrieb Maarten Lankhorst:
Op 11-09-17 om 14:53 schreef Christian König: > Am 10.09.2017 um 09:30 schrieb Maarten Lankhorst: > [SNIP]
To be honest that looks rather ugly to me for not much gain.
Additional to that we loose the optimization I've stolen from the wait function.
Right now your version does exactly the same as reservation_object_get_fences_rcu, but with a reservation_object_list instead of a fence array.
Well then please take a closer look again:
for (i = 0; i < src_list->shared_count; ++i) { struct dma_fence *fence; fence = rcu_dereference(src_list->shared[i]); if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; if (!dma_fence_get_rcu(fence)) { kfree(dst_list); src_list = rcu_dereference(src->fence); goto retry; } if (dma_fence_is_signaled(fence)) { dma_fence_put(fence); continue; }
dst_list->shared[dst_list->shared_count++] = fence; }
We only take fences into the new reservation list when they aren't already signaled.
This can't be added to reservation_object_get_fences_rcu() because that would break VM handling on radeon and amdgpu.
What we could do is adding a function to return all fences (including the exclusive one) as reservation_object_list() and use that in both the wait as well as the copy function.
Yeah, but I don't see the problem with VM, guessing amdgpu_vm_prt_fini.. why would it break if I pruned the signaled fences from the copied list?
Not the PRT stuff would break, but the VM flush handling. Rather long story, but basically we need to have the already signaled fences as well to correctly update the hardware state.
We could of course handle all that in a single function which gets its behavior as parameters, but to be honest I think just having two copies of the code which looks almost the same is cleaner.
Regards, Christian.
amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
dri-devel@lists.freedesktop.org