This reverts 67c97fb79a7f ("dma-buf: add reservation_object_fences helper") dd7a7d1ff2f1 ("drm/i915: use new reservation_object_fences helper") 0e1d8083bddb ("dma-buf: further relax reservation_object_add_shared_fence") 5d344f58da76 ("dma-buf: nuke reservation_object seq number")
The scenario that defeats simply grabbing a set of shared/exclusive fences and using them blissfully under RCU is that any of those fences may be reallocated by a SLAB_TYPESAFE_BY_RCU fence slab cache. In this scenario, while keeping the rcu_read_lock we need to establish that no fence was changed in the dma_resv after a read (or full) memory barrier.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/dma-buf/dma-buf.c | 31 ++++- drivers/dma-buf/dma-resv.c | 109 ++++++++++++----- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 7 +- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 24 ++-- include/linux/dma-resv.h | 113 ++++++++---------- 5 files changed, 175 insertions(+), 109 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index b3400d6524ab..433d91d710e4 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -199,7 +199,7 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) struct dma_resv_list *fobj; struct dma_fence *fence_excl; __poll_t events; - unsigned shared_count; + unsigned shared_count, seq;
dmabuf = file->private_data; if (!dmabuf || !dmabuf->resv) @@ -213,8 +213,21 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) if (!events) return 0;
+retry: + seq = read_seqcount_begin(&resv->seq); rcu_read_lock(); - dma_resv_fences(resv, &fence_excl, &fobj, &shared_count); + + fobj = rcu_dereference(resv->fence); + if (fobj) + shared_count = fobj->shared_count; + else + shared_count = 0; + fence_excl = rcu_dereference(resv->fence_excl); + if (read_seqcount_retry(&resv->seq, seq)) { + rcu_read_unlock(); + goto retry; + } + if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) { struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl; __poll_t pevents = EPOLLIN; @@ -1144,6 +1157,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) struct dma_resv *robj; struct dma_resv_list *fobj; struct dma_fence *fence; + unsigned seq; int count = 0, attach_count, shared_count, i; size_t size = 0;
@@ -1174,9 +1188,16 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) buf_obj->name ?: "");
robj = buf_obj->resv; - rcu_read_lock(); - dma_resv_fences(robj, &fence, &fobj, &shared_count); - rcu_read_unlock(); + while (true) { + seq = read_seqcount_begin(&robj->seq); + rcu_read_lock(); + fobj = rcu_dereference(robj->fence); + shared_count = fobj ? fobj->shared_count : 0; + fence = rcu_dereference(robj->fence_excl); + if (!read_seqcount_retry(&robj->seq, seq)) + break; + rcu_read_unlock(); + }
if (fence) seq_printf(s, "\tExclusive fence: %s %s %ssignalled\n", diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index f5142683c851..42a8f3f11681 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -49,6 +49,12 @@ DEFINE_WD_CLASS(reservation_ww_class); EXPORT_SYMBOL(reservation_ww_class);
+struct lock_class_key reservation_seqcount_class; +EXPORT_SYMBOL(reservation_seqcount_class); + +const char reservation_seqcount_string[] = "reservation_seqcount"; +EXPORT_SYMBOL(reservation_seqcount_string); + /** * dma_resv_list_alloc - allocate fence list * @shared_max: number of fences we need space for @@ -96,6 +102,9 @@ static void dma_resv_list_free(struct dma_resv_list *list) void dma_resv_init(struct dma_resv *obj) { ww_mutex_init(&obj->lock, &reservation_ww_class); + + __seqcount_init(&obj->seq, reservation_seqcount_string, + &reservation_seqcount_class); RCU_INIT_POINTER(obj->fence, NULL); RCU_INIT_POINTER(obj->fence_excl, NULL); } @@ -225,6 +234,9 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence) fobj = dma_resv_get_list(obj); count = fobj->shared_count;
+ preempt_disable(); + write_seqcount_begin(&obj->seq); + for (i = 0; i < count; ++i) {
old = rcu_dereference_protected(fobj->shared[i], @@ -242,6 +254,9 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence) RCU_INIT_POINTER(fobj->shared[i], fence); /* pointer update must be visible before we extend the shared_count */ smp_store_mb(fobj->shared_count, count); + + write_seqcount_end(&obj->seq); + preempt_enable(); dma_fence_put(old); } EXPORT_SYMBOL(dma_resv_add_shared_fence); @@ -269,10 +284,12 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) dma_fence_get(fence);
preempt_disable(); - rcu_assign_pointer(obj->fence_excl, fence); - /* pointer update must be visible before we modify the shared_count */ + write_seqcount_begin(&obj->seq); + /* write_seqcount_begin provides the necessary memory barrier */ + RCU_INIT_POINTER(obj->fence_excl, fence); if (old) - smp_store_mb(old->shared_count, 0); + old->shared_count = 0; + write_seqcount_end(&obj->seq); preempt_enable();
/* inplace update, no shared fences */ @@ -295,15 +312,17 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) { struct dma_resv_list *src_list, *dst_list; struct dma_fence *old, *new; - unsigned int i, shared_count; + unsigned i;
dma_resv_assert_held(dst);
rcu_read_lock(); + src_list = rcu_dereference(src->fence);
retry: - dma_resv_fences(src, &new, &src_list, &shared_count); - if (shared_count) { + if (src_list) { + unsigned shared_count = src_list->shared_count; + rcu_read_unlock();
dst_list = dma_resv_list_alloc(shared_count); @@ -311,14 +330,14 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) return -ENOMEM;
rcu_read_lock(); - dma_resv_fences(src, &new, &src_list, &shared_count); - if (!src_list || shared_count > dst_list->shared_max) { + src_list = rcu_dereference(src->fence); + if (!src_list || src_list->shared_count > shared_count) { kfree(dst_list); goto retry; }
dst_list->shared_count = 0; - for (i = 0; i < shared_count; ++i) { + for (i = 0; i < src_list->shared_count; ++i) { struct dma_fence *fence;
fence = rcu_dereference(src_list->shared[i]); @@ -328,6 +347,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
if (!dma_fence_get_rcu(fence)) { dma_resv_list_free(dst_list); + src_list = rcu_dereference(src->fence); goto retry; }
@@ -342,18 +362,18 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) dst_list = NULL; }
- if (new && !dma_fence_get_rcu(new)) { - dma_resv_list_free(dst_list); - goto retry; - } + new = dma_fence_get_rcu_safe(&src->fence_excl); rcu_read_unlock();
src_list = dma_resv_get_list(dst); old = dma_resv_get_excl(dst);
preempt_disable(); - rcu_assign_pointer(dst->fence_excl, new); - rcu_assign_pointer(dst->fence, dst_list); + write_seqcount_begin(&dst->seq); + /* write_seqcount_begin provides the necessary memory barrier */ + RCU_INIT_POINTER(dst->fence_excl, new); + RCU_INIT_POINTER(dst->fence, dst_list); + write_seqcount_end(&dst->seq); preempt_enable();
dma_resv_list_free(src_list); @@ -388,18 +408,19 @@ int dma_resv_get_fences_rcu(struct dma_resv *obj,
do { struct dma_resv_list *fobj; - unsigned int i; + unsigned int i, seq; size_t sz = 0;
- i = 0; + shared_count = i = 0;
rcu_read_lock(); - dma_resv_fences(obj, &fence_excl, &fobj, - &shared_count); + seq = read_seqcount_begin(&obj->seq);
+ fence_excl = rcu_dereference(obj->fence_excl); if (fence_excl && !dma_fence_get_rcu(fence_excl)) goto unlock;
+ fobj = rcu_dereference(obj->fence); if (fobj) sz += sizeof(*shared) * fobj->shared_max;
@@ -427,6 +448,7 @@ int dma_resv_get_fences_rcu(struct dma_resv *obj, break; } shared = nshared; + shared_count = fobj ? fobj->shared_count : 0; for (i = 0; i < shared_count; ++i) { shared[i] = rcu_dereference(fobj->shared[i]); if (!dma_fence_get_rcu(shared[i])) @@ -434,7 +456,7 @@ int dma_resv_get_fences_rcu(struct dma_resv *obj, } }
- if (i != shared_count) { + if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) { while (i--) dma_fence_put(shared[i]); dma_fence_put(fence_excl); @@ -478,17 +500,18 @@ long dma_resv_wait_timeout_rcu(struct dma_resv *obj, bool wait_all, bool intr, unsigned long timeout) { - struct dma_resv_list *fobj; struct dma_fence *fence; - unsigned shared_count; + unsigned seq, shared_count; long ret = timeout ? timeout : 1; int i;
retry: + shared_count = 0; + seq = read_seqcount_begin(&obj->seq); rcu_read_lock(); i = -1;
- dma_resv_fences(obj, &fence, &fobj, &shared_count); + fence = rcu_dereference(obj->fence_excl); if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { if (!dma_fence_get_rcu(fence)) goto unlock_retry; @@ -503,6 +526,11 @@ long dma_resv_wait_timeout_rcu(struct dma_resv *obj, }
if (wait_all) { + struct dma_resv_list *fobj = rcu_dereference(obj->fence); + + if (fobj) + shared_count = fobj->shared_count; + for (i = 0; !fence && i < shared_count; ++i) { struct dma_fence *lfence = rcu_dereference(fobj->shared[i]);
@@ -525,6 +553,11 @@ long dma_resv_wait_timeout_rcu(struct dma_resv *obj,
rcu_read_unlock(); if (fence) { + if (read_seqcount_retry(&obj->seq, seq)) { + dma_fence_put(fence); + goto retry; + } + ret = dma_fence_wait_timeout(fence, intr, ret); dma_fence_put(fence); if (ret > 0 && wait_all && (i + 1 < shared_count)) @@ -567,19 +600,23 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) */ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all) { - struct dma_resv_list *fobj; - struct dma_fence *fence_excl; - unsigned shared_count; + unsigned seq, shared_count; int ret;
rcu_read_lock(); retry: ret = true; + shared_count = 0; + seq = read_seqcount_begin(&obj->seq);
- dma_resv_fences(obj, &fence_excl, &fobj, &shared_count); if (test_all) { unsigned i;
+ struct dma_resv_list *fobj = rcu_dereference(obj->fence); + + if (fobj) + shared_count = fobj->shared_count; + for (i = 0; i < shared_count; ++i) { struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
@@ -589,14 +626,24 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all) else if (!ret) break; } - }
- if (!shared_count && fence_excl) { - ret = dma_resv_test_signaled_single(fence_excl); - if (ret < 0) + if (read_seqcount_retry(&obj->seq, seq)) goto retry; }
+ if (!shared_count) { + struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl); + + if (fence_excl) { + ret = dma_resv_test_signaled_single(fence_excl); + if (ret < 0) + goto retry; + + if (read_seqcount_retry(&obj->seq, seq)) + goto retry; + } + } + rcu_read_unlock(); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index bc4ec6b20a87..76e3516484e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -251,7 +251,12 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, new->shared_max = old->shared_max; new->shared_count = k;
- rcu_assign_pointer(resv->fence, new); + /* Install the new fence list, seqcount provides the barriers */ + preempt_disable(); + write_seqcount_begin(&resv->seq); + RCU_INIT_POINTER(resv->fence, new); + write_seqcount_end(&resv->seq); + preempt_enable();
/* Drop the references to the removed fences or move them to ef_list */ for (i = j, k = 0; i < old->shared_count; ++i) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index a2aff1d8290e..3d4f5775a4ba 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -83,8 +83,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; struct dma_resv_list *list; - unsigned int i, shared_count; - struct dma_fence *excl; + unsigned int seq; int err;
err = -ENOENT; @@ -110,18 +109,29 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ - dma_resv_fences(obj->base.resv, &excl, &list, &shared_count); +retry: + seq = raw_read_seqcount(&obj->base.resv->seq);
/* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = busy_check_writer(excl); + args->busy = + busy_check_writer(rcu_dereference(obj->base.resv->fence_excl));
/* Translate shared fences to READ set of engines */ - for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = rcu_dereference(list->shared[i]); + list = rcu_dereference(obj->base.resv->fence); + if (list) { + unsigned int shared_count = list->shared_count, i;
- args->busy |= busy_check_reader(fence); + for (i = 0; i < shared_count; ++i) { + struct dma_fence *fence = + rcu_dereference(list->shared[i]); + + args->busy |= busy_check_reader(fence); + } }
+ if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq)) + goto retry; + err = 0; out: rcu_read_unlock(); diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 38f2802afabb..ee50d10f052b 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -46,6 +46,8 @@ #include <linux/rcupdate.h>
extern struct ww_class reservation_ww_class; +extern struct lock_class_key reservation_seqcount_class; +extern const char reservation_seqcount_string[];
/** * struct dma_resv_list - a list of shared fences @@ -69,6 +71,7 @@ struct dma_resv_list { */ struct dma_resv { struct ww_mutex lock; + seqcount_t seq;
struct dma_fence __rcu *fence_excl; struct dma_resv_list __rcu *fence; @@ -77,24 +80,6 @@ struct dma_resv { #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
-/** - * dma_resv_get_excl - get the reservation object's - * exclusive fence, with update-side lock held - * @obj: the reservation object - * - * Returns the exclusive fence (if any). Does NOT take a - * reference. Writers must hold obj->lock, readers may only - * hold a RCU read side lock. - * - * RETURNS - * The exclusive fence or NULL - */ -static inline struct dma_fence *dma_resv_get_excl(struct dma_resv *obj) -{ - return rcu_dereference_protected(obj->fence_excl, - dma_resv_held(obj)); -} - /** * dma_resv_get_list - get the reservation object's * shared fence list, with update-side lock held @@ -109,53 +94,6 @@ static inline struct dma_resv_list *dma_resv_get_list(struct dma_resv *obj) dma_resv_held(obj)); }
-/** - * dma_resv_fences - read consistent fence pointers - * @obj: reservation object where we get the fences from - * @excl: pointer for the exclusive fence - * @list: pointer for the shared fence list - * - * Make sure we have a consisten exclusive fence and shared fence list. - * Must be called with rcu read side lock held. - */ -static inline void dma_resv_fences(struct dma_resv *obj, - struct dma_fence **excl, - struct dma_resv_list **list, - u32 *shared_count) -{ - do { - *excl = rcu_dereference(obj->fence_excl); - *list = rcu_dereference(obj->fence); - *shared_count = *list ? (*list)->shared_count : 0; - smp_rmb(); /* See dma_resv_add_excl_fence */ - } while (rcu_access_pointer(obj->fence_excl) != *excl); -} - -/** - * dma_resv_get_excl_rcu - get the reservation object's - * exclusive fence, without lock held. - * @obj: the reservation object - * - * If there is an exclusive fence, this atomically increments it's - * reference count and returns it. - * - * RETURNS - * The exclusive fence or NULL if none - */ -static inline struct dma_fence *dma_resv_get_excl_rcu(struct dma_resv *obj) -{ - struct dma_fence *fence; - - if (!rcu_access_pointer(obj->fence_excl)) - return NULL; - - rcu_read_lock(); - fence = dma_fence_get_rcu_safe(&obj->fence_excl); - rcu_read_unlock(); - - return fence; -} - /** * dma_resv_lock - lock the reservation object * @obj: the reservation object @@ -290,6 +228,51 @@ static inline void dma_resv_unlock(struct dma_resv *obj) ww_mutex_unlock(&obj->lock); }
+/** + * dma_resv_get_excl - get the reservation object's + * exclusive fence, with update-side lock held + * @obj: the reservation object + * + * Returns the exclusive fence (if any). Does NOT take a + * reference. Writers must hold obj->lock, readers may only + * hold a RCU read side lock. + * + * RETURNS + * The exclusive fence or NULL + */ +static inline struct dma_fence * +dma_resv_get_excl(struct dma_resv *obj) +{ + return rcu_dereference_protected(obj->fence_excl, + dma_resv_held(obj)); +} + +/** + * dma_resv_get_excl_rcu - get the reservation object's + * exclusive fence, without lock held. + * @obj: the reservation object + * + * If there is an exclusive fence, this atomically increments it's + * reference count and returns it. + * + * RETURNS + * The exclusive fence or NULL if none + */ +static inline struct dma_fence * +dma_resv_get_excl_rcu(struct dma_resv *obj) +{ + struct dma_fence *fence; + + if (!rcu_access_pointer(obj->fence_excl)) + return NULL; + + rcu_read_lock(); + fence = dma_fence_get_rcu_safe(&obj->fence_excl); + rcu_read_unlock(); + + return fence; +} + void dma_resv_init(struct dma_resv *obj); void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences);
Quoting Chris Wilson (2019-08-14 19:24:01)
This reverts 67c97fb79a7f ("dma-buf: add reservation_object_fences helper") dd7a7d1ff2f1 ("drm/i915: use new reservation_object_fences helper") 0e1d8083bddb ("dma-buf: further relax reservation_object_add_shared_fence") 5d344f58da76 ("dma-buf: nuke reservation_object seq number")
The scenario that defeats simply grabbing a set of shared/exclusive fences and using them blissfully under RCU is that any of those fences may be reallocated by a SLAB_TYPESAFE_BY_RCU fence slab cache. In this scenario, while keeping the rcu_read_lock we need to establish that no fence was changed in the dma_resv after a read (or full) memory barrier.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Chris Wilson chris@chris-wilson.co.uk
I said I needed to go lie down, that proves it.
Cc: Christian König christian.koenig@amd.com
Cc: Daniel Vetter daniel.vetter@ffwll.ch
drivers/dma-buf/dma-buf.c | 31 ++++- drivers/dma-buf/dma-resv.c | 109 ++++++++++++----- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 7 +- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 24 ++-- include/linux/dma-resv.h | 113 ++++++++---------- 5 files changed, 175 insertions(+), 109 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index b3400d6524ab..433d91d710e4 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -199,7 +199,7 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) struct dma_resv_list *fobj; struct dma_fence *fence_excl; __poll_t events;
unsigned shared_count;
unsigned shared_count, seq; dmabuf = file->private_data; if (!dmabuf || !dmabuf->resv)
@@ -213,8 +213,21 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) if (!events) return 0;
+retry:
seq = read_seqcount_begin(&resv->seq); rcu_read_lock();
dma_resv_fences(resv, &fence_excl, &fobj, &shared_count);
fobj = rcu_dereference(resv->fence);
if (fobj)
shared_count = fobj->shared_count;
else
shared_count = 0;
fence_excl = rcu_dereference(resv->fence_excl);
if (read_seqcount_retry(&resv->seq, seq)) {
rcu_read_unlock();
goto retry;
}
if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) { struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl; __poll_t pevents = EPOLLIN;
@@ -1144,6 +1157,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) struct dma_resv *robj; struct dma_resv_list *fobj; struct dma_fence *fence;
unsigned seq; int count = 0, attach_count, shared_count, i; size_t size = 0;
@@ -1174,9 +1188,16 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) buf_obj->name ?: "");
robj = buf_obj->resv;
rcu_read_lock();
dma_resv_fences(robj, &fence, &fobj, &shared_count);
rcu_read_unlock();
while (true) {
seq = read_seqcount_begin(&robj->seq);
rcu_read_lock();
fobj = rcu_dereference(robj->fence);
shared_count = fobj ? fobj->shared_count : 0;
fence = rcu_dereference(robj->fence_excl);
if (!read_seqcount_retry(&robj->seq, seq))
break;
rcu_read_unlock();
} if (fence) seq_printf(s, "\tExclusive fence: %s %s %ssignalled\n",
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index f5142683c851..42a8f3f11681 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -49,6 +49,12 @@ DEFINE_WD_CLASS(reservation_ww_class); EXPORT_SYMBOL(reservation_ww_class);
+struct lock_class_key reservation_seqcount_class; +EXPORT_SYMBOL(reservation_seqcount_class);
+const char reservation_seqcount_string[] = "reservation_seqcount"; +EXPORT_SYMBOL(reservation_seqcount_string);
/**
- dma_resv_list_alloc - allocate fence list
- @shared_max: number of fences we need space for
@@ -96,6 +102,9 @@ static void dma_resv_list_free(struct dma_resv_list *list) void dma_resv_init(struct dma_resv *obj) { ww_mutex_init(&obj->lock, &reservation_ww_class);
__seqcount_init(&obj->seq, reservation_seqcount_string,
&reservation_seqcount_class); RCU_INIT_POINTER(obj->fence, NULL); RCU_INIT_POINTER(obj->fence_excl, NULL);
} @@ -225,6 +234,9 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence) fobj = dma_resv_get_list(obj); count = fobj->shared_count;
preempt_disable();
write_seqcount_begin(&obj->seq);
for (i = 0; i < count; ++i) { old = rcu_dereference_protected(fobj->shared[i],
@@ -242,6 +254,9 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence) RCU_INIT_POINTER(fobj->shared[i], fence); /* pointer update must be visible before we extend the shared_count */ smp_store_mb(fobj->shared_count, count);
write_seqcount_end(&obj->seq);
preempt_enable(); dma_fence_put(old);
} EXPORT_SYMBOL(dma_resv_add_shared_fence); @@ -269,10 +284,12 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) dma_fence_get(fence);
preempt_disable();
rcu_assign_pointer(obj->fence_excl, fence);
/* pointer update must be visible before we modify the shared_count */
write_seqcount_begin(&obj->seq);
/* write_seqcount_begin provides the necessary memory barrier */
RCU_INIT_POINTER(obj->fence_excl, fence); if (old)
smp_store_mb(old->shared_count, 0);
old->shared_count = 0;
write_seqcount_end(&obj->seq); preempt_enable(); /* inplace update, no shared fences */
@@ -295,15 +312,17 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) { struct dma_resv_list *src_list, *dst_list; struct dma_fence *old, *new;
unsigned int i, shared_count;
unsigned i; dma_resv_assert_held(dst); rcu_read_lock();
src_list = rcu_dereference(src->fence);
retry:
dma_resv_fences(src, &new, &src_list, &shared_count);
if (shared_count) {
if (src_list) {
unsigned shared_count = src_list->shared_count;
rcu_read_unlock(); dst_list = dma_resv_list_alloc(shared_count);
@@ -311,14 +330,14 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) return -ENOMEM;
rcu_read_lock();
dma_resv_fences(src, &new, &src_list, &shared_count);
if (!src_list || shared_count > dst_list->shared_max) {
src_list = rcu_dereference(src->fence);
if (!src_list || src_list->shared_count > shared_count) { kfree(dst_list); goto retry; } dst_list->shared_count = 0;
for (i = 0; i < shared_count; ++i) {
for (i = 0; i < src_list->shared_count; ++i) { struct dma_fence *fence; fence = rcu_dereference(src_list->shared[i]);
@@ -328,6 +347,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
if (!dma_fence_get_rcu(fence)) { dma_resv_list_free(dst_list);
src_list = rcu_dereference(src->fence); goto retry; }
@@ -342,18 +362,18 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) dst_list = NULL; }
if (new && !dma_fence_get_rcu(new)) {
dma_resv_list_free(dst_list);
goto retry;
}
new = dma_fence_get_rcu_safe(&src->fence_excl); rcu_read_unlock(); src_list = dma_resv_get_list(dst); old = dma_resv_get_excl(dst); preempt_disable();
rcu_assign_pointer(dst->fence_excl, new);
rcu_assign_pointer(dst->fence, dst_list);
write_seqcount_begin(&dst->seq);
/* write_seqcount_begin provides the necessary memory barrier */
RCU_INIT_POINTER(dst->fence_excl, new);
RCU_INIT_POINTER(dst->fence, dst_list);
write_seqcount_end(&dst->seq); preempt_enable(); dma_resv_list_free(src_list);
@@ -388,18 +408,19 @@ int dma_resv_get_fences_rcu(struct dma_resv *obj,
do { struct dma_resv_list *fobj;
unsigned int i;
unsigned int i, seq; size_t sz = 0;
i = 0;
shared_count = i = 0; rcu_read_lock();
dma_resv_fences(obj, &fence_excl, &fobj,
&shared_count);
seq = read_seqcount_begin(&obj->seq);
fence_excl = rcu_dereference(obj->fence_excl); if (fence_excl && !dma_fence_get_rcu(fence_excl)) goto unlock;
fobj = rcu_dereference(obj->fence); if (fobj) sz += sizeof(*shared) * fobj->shared_max;
@@ -427,6 +448,7 @@ int dma_resv_get_fences_rcu(struct dma_resv *obj, break; } shared = nshared;
shared_count = fobj ? fobj->shared_count : 0; for (i = 0; i < shared_count; ++i) { shared[i] = rcu_dereference(fobj->shared[i]); if (!dma_fence_get_rcu(shared[i]))
@@ -434,7 +456,7 @@ int dma_resv_get_fences_rcu(struct dma_resv *obj, } }
if (i != shared_count) {
if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) { while (i--) dma_fence_put(shared[i]); dma_fence_put(fence_excl);
@@ -478,17 +500,18 @@ long dma_resv_wait_timeout_rcu(struct dma_resv *obj, bool wait_all, bool intr, unsigned long timeout) {
struct dma_resv_list *fobj; struct dma_fence *fence;
unsigned shared_count;
unsigned seq, shared_count; long ret = timeout ? timeout : 1; int i;
retry:
shared_count = 0;
seq = read_seqcount_begin(&obj->seq); rcu_read_lock(); i = -1;
dma_resv_fences(obj, &fence, &fobj, &shared_count);
fence = rcu_dereference(obj->fence_excl); if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { if (!dma_fence_get_rcu(fence)) goto unlock_retry;
@@ -503,6 +526,11 @@ long dma_resv_wait_timeout_rcu(struct dma_resv *obj, }
if (wait_all) {
struct dma_resv_list *fobj = rcu_dereference(obj->fence);
if (fobj)
shared_count = fobj->shared_count;
for (i = 0; !fence && i < shared_count; ++i) { struct dma_fence *lfence = rcu_dereference(fobj->shared[i]);
@@ -525,6 +553,11 @@ long dma_resv_wait_timeout_rcu(struct dma_resv *obj,
rcu_read_unlock(); if (fence) {
if (read_seqcount_retry(&obj->seq, seq)) {
dma_fence_put(fence);
goto retry;
}
ret = dma_fence_wait_timeout(fence, intr, ret); dma_fence_put(fence); if (ret > 0 && wait_all && (i + 1 < shared_count))
@@ -567,19 +600,23 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) */ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all) {
struct dma_resv_list *fobj;
struct dma_fence *fence_excl;
unsigned shared_count;
unsigned seq, shared_count; int ret; rcu_read_lock();
retry: ret = true;
shared_count = 0;
seq = read_seqcount_begin(&obj->seq);
dma_resv_fences(obj, &fence_excl, &fobj, &shared_count); if (test_all) { unsigned i;
struct dma_resv_list *fobj = rcu_dereference(obj->fence);
if (fobj)
shared_count = fobj->shared_count;
for (i = 0; i < shared_count; ++i) { struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
@@ -589,14 +626,24 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all) else if (!ret) break; }
}
if (!shared_count && fence_excl) {
ret = dma_resv_test_signaled_single(fence_excl);
if (ret < 0)
if (read_seqcount_retry(&obj->seq, seq)) goto retry; }
if (!shared_count) {
struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl);
if (fence_excl) {
ret = dma_resv_test_signaled_single(fence_excl);
if (ret < 0)
goto retry;
if (read_seqcount_retry(&obj->seq, seq))
goto retry;
}
}
rcu_read_unlock(); return ret;
} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index bc4ec6b20a87..76e3516484e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -251,7 +251,12 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, new->shared_max = old->shared_max; new->shared_count = k;
rcu_assign_pointer(resv->fence, new);
/* Install the new fence list, seqcount provides the barriers */
preempt_disable();
write_seqcount_begin(&resv->seq);
RCU_INIT_POINTER(resv->fence, new);
write_seqcount_end(&resv->seq);
preempt_enable(); /* Drop the references to the removed fences or move them to ef_list */ for (i = j, k = 0; i < old->shared_count; ++i) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index a2aff1d8290e..3d4f5775a4ba 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -83,8 +83,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; struct dma_resv_list *list;
unsigned int i, shared_count;
struct dma_fence *excl;
unsigned int seq; int err; err = -ENOENT;
@@ -110,18 +109,29 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */
dma_resv_fences(obj->base.resv, &excl, &list, &shared_count);
+retry:
seq = raw_read_seqcount(&obj->base.resv->seq); /* Translate the exclusive fence to the READ *and* WRITE engine */
args->busy = busy_check_writer(excl);
args->busy =
busy_check_writer(rcu_dereference(obj->base.resv->fence_excl)); /* Translate shared fences to READ set of engines */
for (i = 0; i < shared_count; ++i) {
struct dma_fence *fence = rcu_dereference(list->shared[i]);
list = rcu_dereference(obj->base.resv->fence);
if (list) {
unsigned int shared_count = list->shared_count, i;
args->busy |= busy_check_reader(fence);
for (i = 0; i < shared_count; ++i) {
struct dma_fence *fence =
rcu_dereference(list->shared[i]);
args->busy |= busy_check_reader(fence);
} }
if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq))
goto retry;
err = 0;
out: rcu_read_unlock(); diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 38f2802afabb..ee50d10f052b 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -46,6 +46,8 @@ #include <linux/rcupdate.h>
extern struct ww_class reservation_ww_class; +extern struct lock_class_key reservation_seqcount_class; +extern const char reservation_seqcount_string[];
/**
- struct dma_resv_list - a list of shared fences
@@ -69,6 +71,7 @@ struct dma_resv_list { */ struct dma_resv { struct ww_mutex lock;
seqcount_t seq; struct dma_fence __rcu *fence_excl; struct dma_resv_list __rcu *fence;
@@ -77,24 +80,6 @@ struct dma_resv { #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
-/**
- dma_resv_get_excl - get the reservation object's
- exclusive fence, with update-side lock held
- @obj: the reservation object
- Returns the exclusive fence (if any). Does NOT take a
- reference. Writers must hold obj->lock, readers may only
- hold a RCU read side lock.
- RETURNS
- The exclusive fence or NULL
- */
-static inline struct dma_fence *dma_resv_get_excl(struct dma_resv *obj) -{
return rcu_dereference_protected(obj->fence_excl,
dma_resv_held(obj));
-}
/**
- dma_resv_get_list - get the reservation object's
- shared fence list, with update-side lock held
@@ -109,53 +94,6 @@ static inline struct dma_resv_list *dma_resv_get_list(struct dma_resv *obj) dma_resv_held(obj)); }
-/**
- dma_resv_fences - read consistent fence pointers
- @obj: reservation object where we get the fences from
- @excl: pointer for the exclusive fence
- @list: pointer for the shared fence list
- Make sure we have a consisten exclusive fence and shared fence list.
- Must be called with rcu read side lock held.
- */
-static inline void dma_resv_fences(struct dma_resv *obj,
struct dma_fence **excl,
struct dma_resv_list **list,
u32 *shared_count)
-{
do {
*excl = rcu_dereference(obj->fence_excl);
*list = rcu_dereference(obj->fence);
*shared_count = *list ? (*list)->shared_count : 0;
smp_rmb(); /* See dma_resv_add_excl_fence */
} while (rcu_access_pointer(obj->fence_excl) != *excl);
-}
-/**
- dma_resv_get_excl_rcu - get the reservation object's
- exclusive fence, without lock held.
- @obj: the reservation object
- If there is an exclusive fence, this atomically increments it's
- reference count and returns it.
- RETURNS
- The exclusive fence or NULL if none
- */
-static inline struct dma_fence *dma_resv_get_excl_rcu(struct dma_resv *obj) -{
struct dma_fence *fence;
if (!rcu_access_pointer(obj->fence_excl))
return NULL;
rcu_read_lock();
fence = dma_fence_get_rcu_safe(&obj->fence_excl);
rcu_read_unlock();
return fence;
-}
/**
- dma_resv_lock - lock the reservation object
- @obj: the reservation object
@@ -290,6 +228,51 @@ static inline void dma_resv_unlock(struct dma_resv *obj) ww_mutex_unlock(&obj->lock); }
+/**
- dma_resv_get_excl - get the reservation object's
- exclusive fence, with update-side lock held
- @obj: the reservation object
- Returns the exclusive fence (if any). Does NOT take a
- reference. Writers must hold obj->lock, readers may only
- hold a RCU read side lock.
- RETURNS
- The exclusive fence or NULL
- */
+static inline struct dma_fence * +dma_resv_get_excl(struct dma_resv *obj) +{
return rcu_dereference_protected(obj->fence_excl,
dma_resv_held(obj));
+}
+/**
- dma_resv_get_excl_rcu - get the reservation object's
- exclusive fence, without lock held.
- @obj: the reservation object
- If there is an exclusive fence, this atomically increments it's
- reference count and returns it.
- RETURNS
- The exclusive fence or NULL if none
- */
+static inline struct dma_fence * +dma_resv_get_excl_rcu(struct dma_resv *obj) +{
struct dma_fence *fence;
if (!rcu_access_pointer(obj->fence_excl))
return NULL;
rcu_read_lock();
fence = dma_fence_get_rcu_safe(&obj->fence_excl);
rcu_read_unlock();
return fence;
+}
void dma_resv_init(struct dma_resv *obj); void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences); -- 2.23.0.rc1
On Wed, Aug 14, 2019 at 07:26:44PM +0100, Chris Wilson wrote:
Quoting Chris Wilson (2019-08-14 19:24:01)
This reverts 67c97fb79a7f ("dma-buf: add reservation_object_fences helper") dd7a7d1ff2f1 ("drm/i915: use new reservation_object_fences helper") 0e1d8083bddb ("dma-buf: further relax reservation_object_add_shared_fence") 5d344f58da76 ("dma-buf: nuke reservation_object seq number")
Oh I didn't realize they landed already.
The scenario that defeats simply grabbing a set of shared/exclusive fences and using them blissfully under RCU is that any of those fences may be reallocated by a SLAB_TYPESAFE_BY_RCU fence slab cache. In this scenario, while keeping the rcu_read_lock we need to establish that no fence was changed in the dma_resv after a read (or full) memory barrier.
So if I'm reading correctly what Chris is saying here I guess my half comment in that other thread pointed at a real oversight. Since I still haven't checked and it's too late for real review not more for now.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Chris Wilson chris@chris-wilson.co.uk
I said I needed to go lie down, that proves it.
Yeah I guess we need to wait for Christian to wake up an have a working brain. -Daniel
Cc: Christian König christian.koenig@amd.com
Cc: Daniel Vetter daniel.vetter@ffwll.ch
drivers/dma-buf/dma-buf.c | 31 ++++- drivers/dma-buf/dma-resv.c | 109 ++++++++++++----- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 7 +- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 24 ++-- include/linux/dma-resv.h | 113 ++++++++---------- 5 files changed, 175 insertions(+), 109 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index b3400d6524ab..433d91d710e4 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -199,7 +199,7 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) struct dma_resv_list *fobj; struct dma_fence *fence_excl; __poll_t events;
unsigned shared_count;
unsigned shared_count, seq; dmabuf = file->private_data; if (!dmabuf || !dmabuf->resv)
@@ -213,8 +213,21 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) if (!events) return 0;
+retry:
seq = read_seqcount_begin(&resv->seq); rcu_read_lock();
dma_resv_fences(resv, &fence_excl, &fobj, &shared_count);
fobj = rcu_dereference(resv->fence);
if (fobj)
shared_count = fobj->shared_count;
else
shared_count = 0;
fence_excl = rcu_dereference(resv->fence_excl);
if (read_seqcount_retry(&resv->seq, seq)) {
rcu_read_unlock();
goto retry;
}
if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) { struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl; __poll_t pevents = EPOLLIN;
@@ -1144,6 +1157,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) struct dma_resv *robj; struct dma_resv_list *fobj; struct dma_fence *fence;
unsigned seq; int count = 0, attach_count, shared_count, i; size_t size = 0;
@@ -1174,9 +1188,16 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) buf_obj->name ?: "");
robj = buf_obj->resv;
rcu_read_lock();
dma_resv_fences(robj, &fence, &fobj, &shared_count);
rcu_read_unlock();
while (true) {
seq = read_seqcount_begin(&robj->seq);
rcu_read_lock();
fobj = rcu_dereference(robj->fence);
shared_count = fobj ? fobj->shared_count : 0;
fence = rcu_dereference(robj->fence_excl);
if (!read_seqcount_retry(&robj->seq, seq))
break;
rcu_read_unlock();
} if (fence) seq_printf(s, "\tExclusive fence: %s %s %ssignalled\n",
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index f5142683c851..42a8f3f11681 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -49,6 +49,12 @@ DEFINE_WD_CLASS(reservation_ww_class); EXPORT_SYMBOL(reservation_ww_class);
+struct lock_class_key reservation_seqcount_class; +EXPORT_SYMBOL(reservation_seqcount_class);
+const char reservation_seqcount_string[] = "reservation_seqcount"; +EXPORT_SYMBOL(reservation_seqcount_string);
/**
- dma_resv_list_alloc - allocate fence list
- @shared_max: number of fences we need space for
@@ -96,6 +102,9 @@ static void dma_resv_list_free(struct dma_resv_list *list) void dma_resv_init(struct dma_resv *obj) { ww_mutex_init(&obj->lock, &reservation_ww_class);
__seqcount_init(&obj->seq, reservation_seqcount_string,
&reservation_seqcount_class); RCU_INIT_POINTER(obj->fence, NULL); RCU_INIT_POINTER(obj->fence_excl, NULL);
} @@ -225,6 +234,9 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence) fobj = dma_resv_get_list(obj); count = fobj->shared_count;
preempt_disable();
write_seqcount_begin(&obj->seq);
for (i = 0; i < count; ++i) { old = rcu_dereference_protected(fobj->shared[i],
@@ -242,6 +254,9 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence) RCU_INIT_POINTER(fobj->shared[i], fence); /* pointer update must be visible before we extend the shared_count */ smp_store_mb(fobj->shared_count, count);
write_seqcount_end(&obj->seq);
preempt_enable(); dma_fence_put(old);
} EXPORT_SYMBOL(dma_resv_add_shared_fence); @@ -269,10 +284,12 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) dma_fence_get(fence);
preempt_disable();
rcu_assign_pointer(obj->fence_excl, fence);
/* pointer update must be visible before we modify the shared_count */
write_seqcount_begin(&obj->seq);
/* write_seqcount_begin provides the necessary memory barrier */
RCU_INIT_POINTER(obj->fence_excl, fence); if (old)
smp_store_mb(old->shared_count, 0);
old->shared_count = 0;
write_seqcount_end(&obj->seq); preempt_enable(); /* inplace update, no shared fences */
@@ -295,15 +312,17 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) { struct dma_resv_list *src_list, *dst_list; struct dma_fence *old, *new;
unsigned int i, shared_count;
unsigned i; dma_resv_assert_held(dst); rcu_read_lock();
src_list = rcu_dereference(src->fence);
retry:
dma_resv_fences(src, &new, &src_list, &shared_count);
if (shared_count) {
if (src_list) {
unsigned shared_count = src_list->shared_count;
rcu_read_unlock(); dst_list = dma_resv_list_alloc(shared_count);
@@ -311,14 +330,14 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) return -ENOMEM;
rcu_read_lock();
dma_resv_fences(src, &new, &src_list, &shared_count);
if (!src_list || shared_count > dst_list->shared_max) {
src_list = rcu_dereference(src->fence);
if (!src_list || src_list->shared_count > shared_count) { kfree(dst_list); goto retry; } dst_list->shared_count = 0;
for (i = 0; i < shared_count; ++i) {
for (i = 0; i < src_list->shared_count; ++i) { struct dma_fence *fence; fence = rcu_dereference(src_list->shared[i]);
@@ -328,6 +347,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
if (!dma_fence_get_rcu(fence)) { dma_resv_list_free(dst_list);
src_list = rcu_dereference(src->fence); goto retry; }
@@ -342,18 +362,18 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) dst_list = NULL; }
if (new && !dma_fence_get_rcu(new)) {
dma_resv_list_free(dst_list);
goto retry;
}
new = dma_fence_get_rcu_safe(&src->fence_excl); rcu_read_unlock(); src_list = dma_resv_get_list(dst); old = dma_resv_get_excl(dst); preempt_disable();
rcu_assign_pointer(dst->fence_excl, new);
rcu_assign_pointer(dst->fence, dst_list);
write_seqcount_begin(&dst->seq);
/* write_seqcount_begin provides the necessary memory barrier */
RCU_INIT_POINTER(dst->fence_excl, new);
RCU_INIT_POINTER(dst->fence, dst_list);
write_seqcount_end(&dst->seq); preempt_enable(); dma_resv_list_free(src_list);
@@ -388,18 +408,19 @@ int dma_resv_get_fences_rcu(struct dma_resv *obj,
do { struct dma_resv_list *fobj;
unsigned int i;
unsigned int i, seq; size_t sz = 0;
i = 0;
shared_count = i = 0; rcu_read_lock();
dma_resv_fences(obj, &fence_excl, &fobj,
&shared_count);
seq = read_seqcount_begin(&obj->seq);
fence_excl = rcu_dereference(obj->fence_excl); if (fence_excl && !dma_fence_get_rcu(fence_excl)) goto unlock;
fobj = rcu_dereference(obj->fence); if (fobj) sz += sizeof(*shared) * fobj->shared_max;
@@ -427,6 +448,7 @@ int dma_resv_get_fences_rcu(struct dma_resv *obj, break; } shared = nshared;
shared_count = fobj ? fobj->shared_count : 0; for (i = 0; i < shared_count; ++i) { shared[i] = rcu_dereference(fobj->shared[i]); if (!dma_fence_get_rcu(shared[i]))
@@ -434,7 +456,7 @@ int dma_resv_get_fences_rcu(struct dma_resv *obj, } }
if (i != shared_count) {
if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) { while (i--) dma_fence_put(shared[i]); dma_fence_put(fence_excl);
@@ -478,17 +500,18 @@ long dma_resv_wait_timeout_rcu(struct dma_resv *obj, bool wait_all, bool intr, unsigned long timeout) {
struct dma_resv_list *fobj; struct dma_fence *fence;
unsigned shared_count;
unsigned seq, shared_count; long ret = timeout ? timeout : 1; int i;
retry:
shared_count = 0;
seq = read_seqcount_begin(&obj->seq); rcu_read_lock(); i = -1;
dma_resv_fences(obj, &fence, &fobj, &shared_count);
fence = rcu_dereference(obj->fence_excl); if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { if (!dma_fence_get_rcu(fence)) goto unlock_retry;
@@ -503,6 +526,11 @@ long dma_resv_wait_timeout_rcu(struct dma_resv *obj, }
if (wait_all) {
struct dma_resv_list *fobj = rcu_dereference(obj->fence);
if (fobj)
shared_count = fobj->shared_count;
for (i = 0; !fence && i < shared_count; ++i) { struct dma_fence *lfence = rcu_dereference(fobj->shared[i]);
@@ -525,6 +553,11 @@ long dma_resv_wait_timeout_rcu(struct dma_resv *obj,
rcu_read_unlock(); if (fence) {
if (read_seqcount_retry(&obj->seq, seq)) {
dma_fence_put(fence);
goto retry;
}
ret = dma_fence_wait_timeout(fence, intr, ret); dma_fence_put(fence); if (ret > 0 && wait_all && (i + 1 < shared_count))
@@ -567,19 +600,23 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) */ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all) {
struct dma_resv_list *fobj;
struct dma_fence *fence_excl;
unsigned shared_count;
unsigned seq, shared_count; int ret; rcu_read_lock();
retry: ret = true;
shared_count = 0;
seq = read_seqcount_begin(&obj->seq);
dma_resv_fences(obj, &fence_excl, &fobj, &shared_count); if (test_all) { unsigned i;
struct dma_resv_list *fobj = rcu_dereference(obj->fence);
if (fobj)
shared_count = fobj->shared_count;
for (i = 0; i < shared_count; ++i) { struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
@@ -589,14 +626,24 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all) else if (!ret) break; }
}
if (!shared_count && fence_excl) {
ret = dma_resv_test_signaled_single(fence_excl);
if (ret < 0)
if (read_seqcount_retry(&obj->seq, seq)) goto retry; }
if (!shared_count) {
struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl);
if (fence_excl) {
ret = dma_resv_test_signaled_single(fence_excl);
if (ret < 0)
goto retry;
if (read_seqcount_retry(&obj->seq, seq))
goto retry;
}
}
rcu_read_unlock(); return ret;
} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index bc4ec6b20a87..76e3516484e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -251,7 +251,12 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, new->shared_max = old->shared_max; new->shared_count = k;
rcu_assign_pointer(resv->fence, new);
/* Install the new fence list, seqcount provides the barriers */
preempt_disable();
write_seqcount_begin(&resv->seq);
RCU_INIT_POINTER(resv->fence, new);
write_seqcount_end(&resv->seq);
preempt_enable(); /* Drop the references to the removed fences or move them to ef_list */ for (i = j, k = 0; i < old->shared_count; ++i) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index a2aff1d8290e..3d4f5775a4ba 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -83,8 +83,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; struct dma_resv_list *list;
unsigned int i, shared_count;
struct dma_fence *excl;
unsigned int seq; int err; err = -ENOENT;
@@ -110,18 +109,29 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */
dma_resv_fences(obj->base.resv, &excl, &list, &shared_count);
+retry:
seq = raw_read_seqcount(&obj->base.resv->seq); /* Translate the exclusive fence to the READ *and* WRITE engine */
args->busy = busy_check_writer(excl);
args->busy =
busy_check_writer(rcu_dereference(obj->base.resv->fence_excl)); /* Translate shared fences to READ set of engines */
for (i = 0; i < shared_count; ++i) {
struct dma_fence *fence = rcu_dereference(list->shared[i]);
list = rcu_dereference(obj->base.resv->fence);
if (list) {
unsigned int shared_count = list->shared_count, i;
args->busy |= busy_check_reader(fence);
for (i = 0; i < shared_count; ++i) {
struct dma_fence *fence =
rcu_dereference(list->shared[i]);
args->busy |= busy_check_reader(fence);
} }
if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq))
goto retry;
err = 0;
out: rcu_read_unlock(); diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 38f2802afabb..ee50d10f052b 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -46,6 +46,8 @@ #include <linux/rcupdate.h>
extern struct ww_class reservation_ww_class; +extern struct lock_class_key reservation_seqcount_class; +extern const char reservation_seqcount_string[];
/**
- struct dma_resv_list - a list of shared fences
@@ -69,6 +71,7 @@ struct dma_resv_list { */ struct dma_resv { struct ww_mutex lock;
seqcount_t seq; struct dma_fence __rcu *fence_excl; struct dma_resv_list __rcu *fence;
@@ -77,24 +80,6 @@ struct dma_resv { #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
-/**
- dma_resv_get_excl - get the reservation object's
- exclusive fence, with update-side lock held
- @obj: the reservation object
- Returns the exclusive fence (if any). Does NOT take a
- reference. Writers must hold obj->lock, readers may only
- hold a RCU read side lock.
- RETURNS
- The exclusive fence or NULL
- */
-static inline struct dma_fence *dma_resv_get_excl(struct dma_resv *obj) -{
return rcu_dereference_protected(obj->fence_excl,
dma_resv_held(obj));
-}
/**
- dma_resv_get_list - get the reservation object's
- shared fence list, with update-side lock held
@@ -109,53 +94,6 @@ static inline struct dma_resv_list *dma_resv_get_list(struct dma_resv *obj) dma_resv_held(obj)); }
-/**
- dma_resv_fences - read consistent fence pointers
- @obj: reservation object where we get the fences from
- @excl: pointer for the exclusive fence
- @list: pointer for the shared fence list
- Make sure we have a consisten exclusive fence and shared fence list.
- Must be called with rcu read side lock held.
- */
-static inline void dma_resv_fences(struct dma_resv *obj,
struct dma_fence **excl,
struct dma_resv_list **list,
u32 *shared_count)
-{
do {
*excl = rcu_dereference(obj->fence_excl);
*list = rcu_dereference(obj->fence);
*shared_count = *list ? (*list)->shared_count : 0;
smp_rmb(); /* See dma_resv_add_excl_fence */
} while (rcu_access_pointer(obj->fence_excl) != *excl);
-}
-/**
- dma_resv_get_excl_rcu - get the reservation object's
- exclusive fence, without lock held.
- @obj: the reservation object
- If there is an exclusive fence, this atomically increments it's
- reference count and returns it.
- RETURNS
- The exclusive fence or NULL if none
- */
-static inline struct dma_fence *dma_resv_get_excl_rcu(struct dma_resv *obj) -{
struct dma_fence *fence;
if (!rcu_access_pointer(obj->fence_excl))
return NULL;
rcu_read_lock();
fence = dma_fence_get_rcu_safe(&obj->fence_excl);
rcu_read_unlock();
return fence;
-}
/**
- dma_resv_lock - lock the reservation object
- @obj: the reservation object
@@ -290,6 +228,51 @@ static inline void dma_resv_unlock(struct dma_resv *obj) ww_mutex_unlock(&obj->lock); }
+/**
- dma_resv_get_excl - get the reservation object's
- exclusive fence, with update-side lock held
- @obj: the reservation object
- Returns the exclusive fence (if any). Does NOT take a
- reference. Writers must hold obj->lock, readers may only
- hold a RCU read side lock.
- RETURNS
- The exclusive fence or NULL
- */
+static inline struct dma_fence * +dma_resv_get_excl(struct dma_resv *obj) +{
return rcu_dereference_protected(obj->fence_excl,
dma_resv_held(obj));
+}
+/**
- dma_resv_get_excl_rcu - get the reservation object's
- exclusive fence, without lock held.
- @obj: the reservation object
- If there is an exclusive fence, this atomically increments it's
- reference count and returns it.
- RETURNS
- The exclusive fence or NULL if none
- */
+static inline struct dma_fence * +dma_resv_get_excl_rcu(struct dma_resv *obj) +{
struct dma_fence *fence;
if (!rcu_access_pointer(obj->fence_excl))
return NULL;
rcu_read_lock();
fence = dma_fence_get_rcu_safe(&obj->fence_excl);
rcu_read_unlock();
return fence;
+}
void dma_resv_init(struct dma_resv *obj); void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences); -- 2.23.0.rc1
Am 14.08.19 um 22:07 schrieb Daniel Vetter:
On Wed, Aug 14, 2019 at 07:26:44PM +0100, Chris Wilson wrote:
Quoting Chris Wilson (2019-08-14 19:24:01)
This reverts 67c97fb79a7f ("dma-buf: add reservation_object_fences helper") dd7a7d1ff2f1 ("drm/i915: use new reservation_object_fences helper") 0e1d8083bddb ("dma-buf: further relax reservation_object_add_shared_fence") 5d344f58da76 ("dma-buf: nuke reservation_object seq number")
Oh I didn't realize they landed already.
The scenario that defeats simply grabbing a set of shared/exclusive fences and using them blissfully under RCU is that any of those fences may be reallocated by a SLAB_TYPESAFE_BY_RCU fence slab cache. In this scenario, while keeping the rcu_read_lock we need to establish that no fence was changed in the dma_resv after a read (or full) memory barrier.
So if I'm reading correctly what Chris is saying here I guess my half comment in that other thread pointed at a real oversight. Since I still haven't checked and it's too late for real review not more for now.
Yeah, the root of the problem is that I didn't realized that fences could be reused while in the RCU grace period.
Need to go a step back and try to come up with a complete new approach for synchronization.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Chris Wilson chris@chris-wilson.co.uk
I said I needed to go lie down, that proves it.
Yeah I guess we need to wait for Christian to wake up an have a working brain.
Well in that case you will need to wait for a few more years for my kids to enter school age :)
Cheers, Christian.
-Daniel
Am 14.08.19 um 20:26 schrieb Chris Wilson:
Quoting Chris Wilson (2019-08-14 19:24:01)
This reverts 67c97fb79a7f ("dma-buf: add reservation_object_fences helper") dd7a7d1ff2f1 ("drm/i915: use new reservation_object_fences helper") 0e1d8083bddb ("dma-buf: further relax reservation_object_add_shared_fence") 5d344f58da76 ("dma-buf: nuke reservation_object seq number")
The scenario that defeats simply grabbing a set of shared/exclusive fences and using them blissfully under RCU is that any of those fences may be reallocated by a SLAB_TYPESAFE_BY_RCU fence slab cache. In this scenario, while keeping the rcu_read_lock we need to establish that no fence was changed in the dma_resv after a read (or full) memory barrier.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk
Acked-by: Christian König christian.koenig@amd.com
Cc: Chris Wilson chris@chris-wilson.co.uk
I said I needed to go lie down, that proves it.
Cc: Christian König christian.koenig@amd.com
Cc: Daniel Vetter daniel.vetter@ffwll.ch
drivers/dma-buf/dma-buf.c | 31 ++++- drivers/dma-buf/dma-resv.c | 109 ++++++++++++----- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 7 +- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 24 ++-- include/linux/dma-resv.h | 113 ++++++++---------- 5 files changed, 175 insertions(+), 109 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index b3400d6524ab..433d91d710e4 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -199,7 +199,7 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) struct dma_resv_list *fobj; struct dma_fence *fence_excl; __poll_t events;
unsigned shared_count;
unsigned shared_count, seq; dmabuf = file->private_data; if (!dmabuf || !dmabuf->resv)
@@ -213,8 +213,21 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) if (!events) return 0;
+retry:
seq = read_seqcount_begin(&resv->seq); rcu_read_lock();
dma_resv_fences(resv, &fence_excl, &fobj, &shared_count);
fobj = rcu_dereference(resv->fence);
if (fobj)
shared_count = fobj->shared_count;
else
shared_count = 0;
fence_excl = rcu_dereference(resv->fence_excl);
if (read_seqcount_retry(&resv->seq, seq)) {
rcu_read_unlock();
goto retry;
}
if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) { struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl; __poll_t pevents = EPOLLIN;
@@ -1144,6 +1157,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) struct dma_resv *robj; struct dma_resv_list *fobj; struct dma_fence *fence;
unsigned seq; int count = 0, attach_count, shared_count, i; size_t size = 0;
@@ -1174,9 +1188,16 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) buf_obj->name ?: "");
robj = buf_obj->resv;
rcu_read_lock();
dma_resv_fences(robj, &fence, &fobj, &shared_count);
rcu_read_unlock();
while (true) {
seq = read_seqcount_begin(&robj->seq);
rcu_read_lock();
fobj = rcu_dereference(robj->fence);
shared_count = fobj ? fobj->shared_count : 0;
fence = rcu_dereference(robj->fence_excl);
if (!read_seqcount_retry(&robj->seq, seq))
break;
rcu_read_unlock();
} if (fence) seq_printf(s, "\tExclusive fence: %s %s %ssignalled\n",
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index f5142683c851..42a8f3f11681 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -49,6 +49,12 @@ DEFINE_WD_CLASS(reservation_ww_class); EXPORT_SYMBOL(reservation_ww_class);
+struct lock_class_key reservation_seqcount_class; +EXPORT_SYMBOL(reservation_seqcount_class);
+const char reservation_seqcount_string[] = "reservation_seqcount"; +EXPORT_SYMBOL(reservation_seqcount_string);
- /**
- dma_resv_list_alloc - allocate fence list
- @shared_max: number of fences we need space for
@@ -96,6 +102,9 @@ static void dma_resv_list_free(struct dma_resv_list *list) void dma_resv_init(struct dma_resv *obj) { ww_mutex_init(&obj->lock, &reservation_ww_class);
__seqcount_init(&obj->seq, reservation_seqcount_string,
}&reservation_seqcount_class); RCU_INIT_POINTER(obj->fence, NULL); RCU_INIT_POINTER(obj->fence_excl, NULL);
@@ -225,6 +234,9 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence) fobj = dma_resv_get_list(obj); count = fobj->shared_count;
preempt_disable();
write_seqcount_begin(&obj->seq);
for (i = 0; i < count; ++i) { old = rcu_dereference_protected(fobj->shared[i],
@@ -242,6 +254,9 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence) RCU_INIT_POINTER(fobj->shared[i], fence); /* pointer update must be visible before we extend the shared_count */ smp_store_mb(fobj->shared_count, count);
write_seqcount_end(&obj->seq);
} EXPORT_SYMBOL(dma_resv_add_shared_fence);preempt_enable(); dma_fence_put(old);
@@ -269,10 +284,12 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) dma_fence_get(fence);
preempt_disable();
rcu_assign_pointer(obj->fence_excl, fence);
/* pointer update must be visible before we modify the shared_count */
write_seqcount_begin(&obj->seq);
/* write_seqcount_begin provides the necessary memory barrier */
RCU_INIT_POINTER(obj->fence_excl, fence); if (old)
smp_store_mb(old->shared_count, 0);
old->shared_count = 0;
write_seqcount_end(&obj->seq); preempt_enable(); /* inplace update, no shared fences */
@@ -295,15 +312,17 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) { struct dma_resv_list *src_list, *dst_list; struct dma_fence *old, *new;
unsigned int i, shared_count;
unsigned i; dma_resv_assert_held(dst); rcu_read_lock();
src_list = rcu_dereference(src->fence);
retry:
dma_resv_fences(src, &new, &src_list, &shared_count);
if (shared_count) {
if (src_list) {
unsigned shared_count = src_list->shared_count;
rcu_read_unlock(); dst_list = dma_resv_list_alloc(shared_count);
@@ -311,14 +330,14 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) return -ENOMEM;
rcu_read_lock();
dma_resv_fences(src, &new, &src_list, &shared_count);
if (!src_list || shared_count > dst_list->shared_max) {
src_list = rcu_dereference(src->fence);
if (!src_list || src_list->shared_count > shared_count) { kfree(dst_list); goto retry; } dst_list->shared_count = 0;
for (i = 0; i < shared_count; ++i) {
for (i = 0; i < src_list->shared_count; ++i) { struct dma_fence *fence; fence = rcu_dereference(src_list->shared[i]);
@@ -328,6 +347,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
if (!dma_fence_get_rcu(fence)) { dma_resv_list_free(dst_list);
src_list = rcu_dereference(src->fence); goto retry; }
@@ -342,18 +362,18 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) dst_list = NULL; }
if (new && !dma_fence_get_rcu(new)) {
dma_resv_list_free(dst_list);
goto retry;
}
new = dma_fence_get_rcu_safe(&src->fence_excl); rcu_read_unlock(); src_list = dma_resv_get_list(dst); old = dma_resv_get_excl(dst); preempt_disable();
rcu_assign_pointer(dst->fence_excl, new);
rcu_assign_pointer(dst->fence, dst_list);
write_seqcount_begin(&dst->seq);
/* write_seqcount_begin provides the necessary memory barrier */
RCU_INIT_POINTER(dst->fence_excl, new);
RCU_INIT_POINTER(dst->fence, dst_list);
write_seqcount_end(&dst->seq); preempt_enable(); dma_resv_list_free(src_list);
@@ -388,18 +408,19 @@ int dma_resv_get_fences_rcu(struct dma_resv *obj,
do { struct dma_resv_list *fobj;
unsigned int i;
unsigned int i, seq; size_t sz = 0;
i = 0;
shared_count = i = 0; rcu_read_lock();
dma_resv_fences(obj, &fence_excl, &fobj,
&shared_count);
seq = read_seqcount_begin(&obj->seq);
fence_excl = rcu_dereference(obj->fence_excl); if (fence_excl && !dma_fence_get_rcu(fence_excl)) goto unlock;
fobj = rcu_dereference(obj->fence); if (fobj) sz += sizeof(*shared) * fobj->shared_max;
@@ -427,6 +448,7 @@ int dma_resv_get_fences_rcu(struct dma_resv *obj, break; } shared = nshared;
shared_count = fobj ? fobj->shared_count : 0; for (i = 0; i < shared_count; ++i) { shared[i] = rcu_dereference(fobj->shared[i]); if (!dma_fence_get_rcu(shared[i]))
@@ -434,7 +456,7 @@ int dma_resv_get_fences_rcu(struct dma_resv *obj, } }
if (i != shared_count) {
if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) { while (i--) dma_fence_put(shared[i]); dma_fence_put(fence_excl);
@@ -478,17 +500,18 @@ long dma_resv_wait_timeout_rcu(struct dma_resv *obj, bool wait_all, bool intr, unsigned long timeout) {
struct dma_resv_list *fobj; struct dma_fence *fence;
unsigned shared_count;
unsigned seq, shared_count; long ret = timeout ? timeout : 1; int i;
retry:
shared_count = 0;
seq = read_seqcount_begin(&obj->seq); rcu_read_lock(); i = -1;
dma_resv_fences(obj, &fence, &fobj, &shared_count);
fence = rcu_dereference(obj->fence_excl); if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { if (!dma_fence_get_rcu(fence)) goto unlock_retry;
@@ -503,6 +526,11 @@ long dma_resv_wait_timeout_rcu(struct dma_resv *obj, }
if (wait_all) {
struct dma_resv_list *fobj = rcu_dereference(obj->fence);
if (fobj)
shared_count = fobj->shared_count;
for (i = 0; !fence && i < shared_count; ++i) { struct dma_fence *lfence = rcu_dereference(fobj->shared[i]);
@@ -525,6 +553,11 @@ long dma_resv_wait_timeout_rcu(struct dma_resv *obj,
rcu_read_unlock(); if (fence) {
if (read_seqcount_retry(&obj->seq, seq)) {
dma_fence_put(fence);
goto retry;
}
ret = dma_fence_wait_timeout(fence, intr, ret); dma_fence_put(fence); if (ret > 0 && wait_all && (i + 1 < shared_count))
@@ -567,19 +600,23 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) */ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all) {
struct dma_resv_list *fobj;
struct dma_fence *fence_excl;
unsigned shared_count;
unsigned seq, shared_count; int ret; rcu_read_lock();
retry: ret = true;
shared_count = 0;
seq = read_seqcount_begin(&obj->seq);
dma_resv_fences(obj, &fence_excl, &fobj, &shared_count); if (test_all) { unsigned i;
struct dma_resv_list *fobj = rcu_dereference(obj->fence);
if (fobj)
shared_count = fobj->shared_count;
for (i = 0; i < shared_count; ++i) { struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
@@ -589,14 +626,24 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all) else if (!ret) break; }
}
if (!shared_count && fence_excl) {
ret = dma_resv_test_signaled_single(fence_excl);
if (ret < 0)
if (read_seqcount_retry(&obj->seq, seq)) goto retry; }
if (!shared_count) {
struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl);
if (fence_excl) {
ret = dma_resv_test_signaled_single(fence_excl);
if (ret < 0)
goto retry;
if (read_seqcount_retry(&obj->seq, seq))
goto retry;
}
}
rcu_read_unlock(); return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index bc4ec6b20a87..76e3516484e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -251,7 +251,12 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, new->shared_max = old->shared_max; new->shared_count = k;
rcu_assign_pointer(resv->fence, new);
/* Install the new fence list, seqcount provides the barriers */
preempt_disable();
write_seqcount_begin(&resv->seq);
RCU_INIT_POINTER(resv->fence, new);
write_seqcount_end(&resv->seq);
preempt_enable(); /* Drop the references to the removed fences or move them to ef_list */ for (i = j, k = 0; i < old->shared_count; ++i) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index a2aff1d8290e..3d4f5775a4ba 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -83,8 +83,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; struct dma_resv_list *list;
unsigned int i, shared_count;
struct dma_fence *excl;
unsigned int seq; int err; err = -ENOENT;
@@ -110,18 +109,29 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */
dma_resv_fences(obj->base.resv, &excl, &list, &shared_count);
+retry:
seq = raw_read_seqcount(&obj->base.resv->seq); /* Translate the exclusive fence to the READ *and* WRITE engine */
args->busy = busy_check_writer(excl);
args->busy =
busy_check_writer(rcu_dereference(obj->base.resv->fence_excl)); /* Translate shared fences to READ set of engines */
for (i = 0; i < shared_count; ++i) {
struct dma_fence *fence = rcu_dereference(list->shared[i]);
list = rcu_dereference(obj->base.resv->fence);
if (list) {
unsigned int shared_count = list->shared_count, i;
args->busy |= busy_check_reader(fence);
for (i = 0; i < shared_count; ++i) {
struct dma_fence *fence =
rcu_dereference(list->shared[i]);
args->busy |= busy_check_reader(fence);
} }
if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq))
goto retry;
err = 0;
out: rcu_read_unlock();
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 38f2802afabb..ee50d10f052b 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -46,6 +46,8 @@ #include <linux/rcupdate.h>
extern struct ww_class reservation_ww_class; +extern struct lock_class_key reservation_seqcount_class; +extern const char reservation_seqcount_string[];
/**
- struct dma_resv_list - a list of shared fences
@@ -69,6 +71,7 @@ struct dma_resv_list { */ struct dma_resv { struct ww_mutex lock;
seqcount_t seq; struct dma_fence __rcu *fence_excl; struct dma_resv_list __rcu *fence;
@@ -77,24 +80,6 @@ struct dma_resv { #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
-/**
- dma_resv_get_excl - get the reservation object's
- exclusive fence, with update-side lock held
- @obj: the reservation object
- Returns the exclusive fence (if any). Does NOT take a
- reference. Writers must hold obj->lock, readers may only
- hold a RCU read side lock.
- RETURNS
- The exclusive fence or NULL
- */
-static inline struct dma_fence *dma_resv_get_excl(struct dma_resv *obj) -{
return rcu_dereference_protected(obj->fence_excl,
dma_resv_held(obj));
-}
- /**
- dma_resv_get_list - get the reservation object's
- shared fence list, with update-side lock held
@@ -109,53 +94,6 @@ static inline struct dma_resv_list *dma_resv_get_list(struct dma_resv *obj) dma_resv_held(obj)); }
-/**
- dma_resv_fences - read consistent fence pointers
- @obj: reservation object where we get the fences from
- @excl: pointer for the exclusive fence
- @list: pointer for the shared fence list
- Make sure we have a consisten exclusive fence and shared fence list.
- Must be called with rcu read side lock held.
- */
-static inline void dma_resv_fences(struct dma_resv *obj,
struct dma_fence **excl,
struct dma_resv_list **list,
u32 *shared_count)
-{
do {
*excl = rcu_dereference(obj->fence_excl);
*list = rcu_dereference(obj->fence);
*shared_count = *list ? (*list)->shared_count : 0;
smp_rmb(); /* See dma_resv_add_excl_fence */
} while (rcu_access_pointer(obj->fence_excl) != *excl);
-}
-/**
- dma_resv_get_excl_rcu - get the reservation object's
- exclusive fence, without lock held.
- @obj: the reservation object
- If there is an exclusive fence, this atomically increments it's
- reference count and returns it.
- RETURNS
- The exclusive fence or NULL if none
- */
-static inline struct dma_fence *dma_resv_get_excl_rcu(struct dma_resv *obj) -{
struct dma_fence *fence;
if (!rcu_access_pointer(obj->fence_excl))
return NULL;
rcu_read_lock();
fence = dma_fence_get_rcu_safe(&obj->fence_excl);
rcu_read_unlock();
return fence;
-}
- /**
- dma_resv_lock - lock the reservation object
- @obj: the reservation object
@@ -290,6 +228,51 @@ static inline void dma_resv_unlock(struct dma_resv *obj) ww_mutex_unlock(&obj->lock); }
+/**
- dma_resv_get_excl - get the reservation object's
- exclusive fence, with update-side lock held
- @obj: the reservation object
- Returns the exclusive fence (if any). Does NOT take a
- reference. Writers must hold obj->lock, readers may only
- hold a RCU read side lock.
- RETURNS
- The exclusive fence or NULL
- */
+static inline struct dma_fence * +dma_resv_get_excl(struct dma_resv *obj) +{
return rcu_dereference_protected(obj->fence_excl,
dma_resv_held(obj));
+}
+/**
- dma_resv_get_excl_rcu - get the reservation object's
- exclusive fence, without lock held.
- @obj: the reservation object
- If there is an exclusive fence, this atomically increments it's
- reference count and returns it.
- RETURNS
- The exclusive fence or NULL if none
- */
+static inline struct dma_fence * +dma_resv_get_excl_rcu(struct dma_resv *obj) +{
struct dma_fence *fence;
if (!rcu_access_pointer(obj->fence_excl))
return NULL;
rcu_read_lock();
fence = dma_fence_get_rcu_safe(&obj->fence_excl);
rcu_read_unlock();
return fence;
+}
- void dma_resv_init(struct dma_resv *obj); void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences);
-- 2.23.0.rc1
dri-devel@lists.freedesktop.org