From: Gustavo Padovan gustavo.padovan@collabora.com
We are going to use timeline_fence_signaled() in a internal function in the next commit.
Cc: Chris Wilson chris@chris-wilson.co.uk Signed-off-by: Gustavo Padovan gustavo.padovan@collabora.com --- drivers/dma-buf/sw_sync.c | 138 +++++++++++++++++++++++----------------------- 1 file changed, 69 insertions(+), 69 deletions(-)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index af1bc84..ef0cc08 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -125,6 +125,75 @@ static void sync_timeline_put(struct sync_timeline *obj) kref_put(&obj->kref, sync_timeline_free); }
+static const char *timeline_fence_get_driver_name(struct dma_fence *fence) +{ + return "sw_sync"; +} + +static const char *timeline_fence_get_timeline_name(struct dma_fence *fence) +{ + struct sync_timeline *parent = dma_fence_parent(fence); + + return parent->name; +} + +static void timeline_fence_release(struct dma_fence *fence) +{ + struct sync_pt *pt = dma_fence_to_sync_pt(fence); + struct sync_timeline *parent = dma_fence_parent(fence); + + if (!list_empty(&pt->link)) { + unsigned long flags; + + spin_lock_irqsave(fence->lock, flags); + if (!list_empty(&pt->link)) { + list_del(&pt->link); + rb_erase(&pt->node, &parent->pt_tree); + } + spin_unlock_irqrestore(fence->lock, flags); + } + + sync_timeline_put(parent); + dma_fence_free(fence); +} + +static bool timeline_fence_signaled(struct dma_fence *fence) +{ + struct sync_timeline *parent = dma_fence_parent(fence); + + return !__dma_fence_is_later(fence->seqno, parent->value); +} + +static bool timeline_fence_enable_signaling(struct dma_fence *fence) +{ + return true; +} + +static void timeline_fence_value_str(struct dma_fence *fence, + char *str, int size) +{ + snprintf(str, size, "%d", fence->seqno); +} + +static void timeline_fence_timeline_value_str(struct dma_fence *fence, + char *str, int size) +{ + struct sync_timeline *parent = dma_fence_parent(fence); + + snprintf(str, size, "%d", parent->value); +} + +static const struct dma_fence_ops timeline_fence_ops = { + .get_driver_name = timeline_fence_get_driver_name, + .get_timeline_name = timeline_fence_get_timeline_name, + .enable_signaling = timeline_fence_enable_signaling, + .signaled = timeline_fence_signaled, + .wait = dma_fence_default_wait, + .release = timeline_fence_release, + .fence_value_str = timeline_fence_value_str, + .timeline_value_str = timeline_fence_timeline_value_str, +}; + /** * sync_timeline_signal() - signal a status change on a sync_timeline * @obj: sync_timeline to signal @@ -216,75 +285,6 @@ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, return pt; }
-static const char *timeline_fence_get_driver_name(struct dma_fence *fence) -{ - return "sw_sync"; -} - -static const char *timeline_fence_get_timeline_name(struct dma_fence *fence) -{ - struct sync_timeline *parent = dma_fence_parent(fence); - - return parent->name; -} - -static void timeline_fence_release(struct dma_fence *fence) -{ - struct sync_pt *pt = dma_fence_to_sync_pt(fence); - struct sync_timeline *parent = dma_fence_parent(fence); - - if (!list_empty(&pt->link)) { - unsigned long flags; - - spin_lock_irqsave(fence->lock, flags); - if (!list_empty(&pt->link)) { - list_del(&pt->link); - rb_erase(&pt->node, &parent->pt_tree); - } - spin_unlock_irqrestore(fence->lock, flags); - } - - sync_timeline_put(parent); - dma_fence_free(fence); -} - -static bool timeline_fence_signaled(struct dma_fence *fence) -{ - struct sync_timeline *parent = dma_fence_parent(fence); - - return !__dma_fence_is_later(fence->seqno, parent->value); -} - -static bool timeline_fence_enable_signaling(struct dma_fence *fence) -{ - return true; -} - -static void timeline_fence_value_str(struct dma_fence *fence, - char *str, int size) -{ - snprintf(str, size, "%d", fence->seqno); -} - -static void timeline_fence_timeline_value_str(struct dma_fence *fence, - char *str, int size) -{ - struct sync_timeline *parent = dma_fence_parent(fence); - - snprintf(str, size, "%d", parent->value); -} - -static const struct dma_fence_ops timeline_fence_ops = { - .get_driver_name = timeline_fence_get_driver_name, - .get_timeline_name = timeline_fence_get_timeline_name, - .enable_signaling = timeline_fence_enable_signaling, - .signaled = timeline_fence_signaled, - .wait = dma_fence_default_wait, - .release = timeline_fence_release, - .fence_value_str = timeline_fence_value_str, - .timeline_value_str = timeline_fence_timeline_value_str, -}; - /* * *WARNING* *
From: Gustavo Padovan gustavo.padovan@collabora.com
If userspace already dropped its own reference by closing the sw_sync fence fd we might end up in a deadlock where dma_fence_is_signaled_locked() will trigger the release of the fence and thus try to hold the lock to remove the fence from the list.
dma_fence_is_signaled_locked() tries to release/free the fence and hold the lock in the process.
We fix that by changing the order operation and clean up the list and rb-tree first.
v2: Drop the fence get/put dance and manipulate the list first (Chris Wilson)
Cc: Chris Wilson chris@chris-wilson.co.uk Signed-off-by: Gustavo Padovan gustavo.padovan@collabora.com --- drivers/dma-buf/sw_sync.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index ef0cc08..38cc738 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -213,11 +213,21 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) obj->value += inc;
list_for_each_entry_safe(pt, next, &obj->pt_list, link) { - if (!dma_fence_is_signaled_locked(&pt->base)) + if (!timeline_fence_signaled(&pt->base)) break;
list_del_init(&pt->link); rb_erase(&pt->node, &obj->pt_tree); + + /* + * A signal callback may release the last reference to this + * fence, causing it to be freed. That operation has to be + * last to avoid a use after free inside this loop, and must + * be after we remove the fence from the timeline in order to + * prevent deadlocking on timeline->lock inside + * timeline_fence_release(). + */ + dma_fence_signal_locked(&pt->base); }
spin_unlock_irq(&obj->lock);
Quoting Gustavo Padovan (2017-07-29 16:22:16)
From: Gustavo Padovan gustavo.padovan@collabora.com
If userspace already dropped its own reference by closing the sw_sync fence fd we might end up in a deadlock where dma_fence_is_signaled_locked() will trigger the release of the fence and thus try to hold the lock to remove the fence from the list.
dma_fence_is_signaled_locked() tries to release/free the fence and hold the lock in the process.
We fix that by changing the order operation and clean up the list and rb-tree first.
v2: Drop the fence get/put dance and manipulate the list first (Chris Wilson)
Cc: Chris Wilson chris@chris-wilson.co.uk Signed-off-by: Gustavo Padovan gustavo.padovan@collabora.com
Reviewed-by: Chris Wilson chris@chris-wilson.co.uk -Chris
2017-07-30 Chris Wilson chris@chris-wilson.co.uk:
Quoting Gustavo Padovan (2017-07-29 16:22:16)
From: Gustavo Padovan gustavo.padovan@collabora.com
If userspace already dropped its own reference by closing the sw_sync fence fd we might end up in a deadlock where dma_fence_is_signaled_locked() will trigger the release of the fence and thus try to hold the lock to remove the fence from the list.
dma_fence_is_signaled_locked() tries to release/free the fence and hold the lock in the process.
We fix that by changing the order operation and clean up the list and rb-tree first.
v2: Drop the fence get/put dance and manipulate the list first (Chris Wilson)
Cc: Chris Wilson chris@chris-wilson.co.uk Signed-off-by: Gustavo Padovan gustavo.padovan@collabora.com
Reviewed-by: Chris Wilson chris@chris-wilson.co.uk
Thanks for reviewing. Pushed to drm-misc-next.
Gustavo
From: Gustavo Padovan gustavo.padovan@collabora.com
We found this bug in the sw_sync so adding a test case to prevent it to happen in the future.
Cc: Shuah Khan shuahkh@osg.samsung.com Cc: linux-kselftest@vger.kernel.org Signed-off-by: Gustavo Padovan gustavo.padovan@collabora.com
--- To be applied after the TAP13 convertion patches. --- tools/testing/selftests/sync/sync_fence.c | 23 +++++++++++++++++++++++ tools/testing/selftests/sync/sync_test.c | 1 + tools/testing/selftests/sync/synctest.h | 1 + 3 files changed, 25 insertions(+)
diff --git a/tools/testing/selftests/sync/sync_fence.c b/tools/testing/selftests/sync/sync_fence.c index 13f1752..70cfa61 100644 --- a/tools/testing/selftests/sync/sync_fence.c +++ b/tools/testing/selftests/sync/sync_fence.c @@ -29,6 +29,29 @@ #include "sw_sync.h" #include "synctest.h"
+int test_close_fence_fd_before_inc(void) +{ + int fence, valid, ret; + int timeline = sw_sync_timeline_create(); + + valid = sw_sync_timeline_is_valid(timeline); + ASSERT(valid, "Failure allocating timeline\n"); + + fence = sw_sync_fence_create(timeline, "allocFence", 1); + valid = sw_sync_fence_is_valid(fence); + ASSERT(valid, "Failure allocating fence\n"); + + sw_sync_fence_destroy(fence); + + /* Advance timeline from 0 -> 1 */ + ret = sw_sync_timeline_inc(timeline, 1); + ASSERT(ret == 0, "Failure advancing timeline\n"); + + sw_sync_timeline_destroy(timeline); + + return 0; +} + int test_fence_one_timeline_wait(void) { int fence, valid, ret; diff --git a/tools/testing/selftests/sync/sync_test.c b/tools/testing/selftests/sync/sync_test.c index 7f79382..2f73ace 100644 --- a/tools/testing/selftests/sync/sync_test.c +++ b/tools/testing/selftests/sync/sync_test.c @@ -95,6 +95,7 @@ int main(void) RUN_TEST(test_alloc_fence); RUN_TEST(test_alloc_fence_negative);
+ RUN_TEST(test_close_fence_fd_before_inc); RUN_TEST(test_fence_one_timeline_wait); RUN_TEST(test_fence_one_timeline_merge); RUN_TEST(test_fence_merge_same_fence); diff --git a/tools/testing/selftests/sync/synctest.h b/tools/testing/selftests/sync/synctest.h index 90a8e53..86a9532 100644 --- a/tools/testing/selftests/sync/synctest.h +++ b/tools/testing/selftests/sync/synctest.h @@ -46,6 +46,7 @@ int test_alloc_fence(void); int test_alloc_fence_negative(void);
/* Fence tests with one timeline */ +int test_close_fence_fd_before_inc(void); int test_fence_one_timeline_wait(void); int test_fence_one_timeline_merge(void);
Quoting Gustavo Padovan (2017-07-29 16:22:17)
From: Gustavo Padovan gustavo.padovan@collabora.com
We found this bug in the sw_sync so adding a test case to prevent it to happen in the future.
Cc: Shuah Khan shuahkh@osg.samsung.com Cc: linux-kselftest@vger.kernel.org Signed-off-by: Gustavo Padovan gustavo.padovan@collabora.com
To be applied after the TAP13 convertion patches.
tools/testing/selftests/sync/sync_fence.c | 23 +++++++++++++++++++++++ tools/testing/selftests/sync/sync_test.c | 1 + tools/testing/selftests/sync/synctest.h | 1 + 3 files changed, 25 insertions(+)
diff --git a/tools/testing/selftests/sync/sync_fence.c b/tools/testing/selftests/sync/sync_fence.c index 13f1752..70cfa61 100644 --- a/tools/testing/selftests/sync/sync_fence.c +++ b/tools/testing/selftests/sync/sync_fence.c @@ -29,6 +29,29 @@ #include "sw_sync.h" #include "synctest.h"
+int test_close_fence_fd_before_inc(void) +{
int fence, valid, ret;
int timeline = sw_sync_timeline_create();
valid = sw_sync_timeline_is_valid(timeline);
ASSERT(valid, "Failure allocating timeline\n");
fence = sw_sync_fence_create(timeline, "allocFence", 1);
valid = sw_sync_fence_is_valid(fence);
ASSERT(valid, "Failure allocating fence\n");
/* * We want the destroy + inc to run within the same RCU grace period so * that the zombie fence still exists on the timeline. */
sw_sync_fence_destroy(fence);
I think this doesn't exercise the bug you found as we should be entering the timeline_inc loop with fence.refcount==0 rather than the refcount going to zero within the loop.
To achieve that we need to find a callback that does unreference a dma-fence and chain those together so that it frees a sw_sync from the same timeline.
Still add the comment about what you are intending to hit here and Reviewed-by: Chris Wilson chris@chris-wilson.co.uk -Chris
2017-07-30 Chris Wilson chris@chris-wilson.co.uk:
Quoting Gustavo Padovan (2017-07-29 16:22:17)
From: Gustavo Padovan gustavo.padovan@collabora.com
We found this bug in the sw_sync so adding a test case to prevent it to happen in the future.
Cc: Shuah Khan shuahkh@osg.samsung.com Cc: linux-kselftest@vger.kernel.org Signed-off-by: Gustavo Padovan gustavo.padovan@collabora.com
To be applied after the TAP13 convertion patches.
tools/testing/selftests/sync/sync_fence.c | 23 +++++++++++++++++++++++ tools/testing/selftests/sync/sync_test.c | 1 + tools/testing/selftests/sync/synctest.h | 1 + 3 files changed, 25 insertions(+)
diff --git a/tools/testing/selftests/sync/sync_fence.c b/tools/testing/selftests/sync/sync_fence.c index 13f1752..70cfa61 100644 --- a/tools/testing/selftests/sync/sync_fence.c +++ b/tools/testing/selftests/sync/sync_fence.c @@ -29,6 +29,29 @@ #include "sw_sync.h" #include "synctest.h"
+int test_close_fence_fd_before_inc(void) +{
int fence, valid, ret;
int timeline = sw_sync_timeline_create();
valid = sw_sync_timeline_is_valid(timeline);
ASSERT(valid, "Failure allocating timeline\n");
fence = sw_sync_fence_create(timeline, "allocFence", 1);
valid = sw_sync_fence_is_valid(fence);
ASSERT(valid, "Failure allocating fence\n");
/*
- We want the destroy + inc to run within the same RCU grace period so
- that the zombie fence still exists on the timeline.
*/
sw_sync_fence_destroy(fence);
I think this doesn't exercise the bug you found as we should be entering the timeline_inc loop with fence.refcount==0 rather than the refcount going to zero within the loop.
To achieve that we need to find a callback that does unreference a dma-fence and chain those together so that it frees a sw_sync from the same timeline.
Indeed. Without the internal callback this test is a bit useless. We could test this under drm atomic tests on IGT. Particulary, I hit it playing with tests for v4l2 fences.
Gustavo
On 07/31/2017 01:43 PM, Gustavo Padovan wrote:
2017-07-30 Chris Wilson chris@chris-wilson.co.uk:
Quoting Gustavo Padovan (2017-07-29 16:22:17)
From: Gustavo Padovan gustavo.padovan@collabora.com
We found this bug in the sw_sync so adding a test case to prevent it to happen in the future.
Cc: Shuah Khan shuahkh@osg.samsung.com Cc: linux-kselftest@vger.kernel.org Signed-off-by: Gustavo Padovan gustavo.padovan@collabora.com
To be applied after the TAP13 convertion patches.
tools/testing/selftests/sync/sync_fence.c | 23 +++++++++++++++++++++++ tools/testing/selftests/sync/sync_test.c | 1 + tools/testing/selftests/sync/synctest.h | 1 + 3 files changed, 25 insertions(+)
diff --git a/tools/testing/selftests/sync/sync_fence.c b/tools/testing/selftests/sync/sync_fence.c index 13f1752..70cfa61 100644 --- a/tools/testing/selftests/sync/sync_fence.c +++ b/tools/testing/selftests/sync/sync_fence.c @@ -29,6 +29,29 @@ #include "sw_sync.h" #include "synctest.h"
+int test_close_fence_fd_before_inc(void) +{
int fence, valid, ret;
int timeline = sw_sync_timeline_create();
valid = sw_sync_timeline_is_valid(timeline);
ASSERT(valid, "Failure allocating timeline\n");
fence = sw_sync_fence_create(timeline, "allocFence", 1);
valid = sw_sync_fence_is_valid(fence);
ASSERT(valid, "Failure allocating fence\n");
/*
- We want the destroy + inc to run within the same RCU grace period so
- that the zombie fence still exists on the timeline.
*/
sw_sync_fence_destroy(fence);
I think this doesn't exercise the bug you found as we should be entering the timeline_inc loop with fence.refcount==0 rather than the refcount going to zero within the loop.
To achieve that we need to find a callback that does unreference a dma-fence and chain those together so that it frees a sw_sync from the same timeline.
Indeed. Without the internal callback this test is a bit useless. We could test this under drm atomic tests on IGT. Particulary, I hit it playing with tests for v4l2 fences.
Hi Gustavo,
Would you like this pulled into 4.14-rc1? Sounds like this test is for a feature that doesn't exist yet?
thanks, -- Shuah
Hi Shuah,
On Wed, 2017-08-02 at 13:45 -0600, Shuah Khan wrote:
On 07/31/2017 01:43 PM, Gustavo Padovan wrote:
2017-07-30 Chris Wilson chris@chris-wilson.co.uk:
Quoting Gustavo Padovan (2017-07-29 16:22:17)
From: Gustavo Padovan gustavo.padovan@collabora.com
We found this bug in the sw_sync so adding a test case to prevent it to happen in the future.
Cc: Shuah Khan shuahkh@osg.samsung.com Cc: linux-kselftest@vger.kernel.org Signed-off-by: Gustavo Padovan gustavo.padovan@collabora.com
To be applied after the TAP13 convertion patches.
tools/testing/selftests/sync/sync_fence.c | 23 +++++++++++++++++++++++ tools/testing/selftests/sync/sync_test.c | 1 + tools/testing/selftests/sync/synctest.h | 1 + 3 files changed, 25 insertions(+)
diff --git a/tools/testing/selftests/sync/sync_fence.c b/tools/testing/selftests/sync/sync_fence.c index 13f1752..70cfa61 100644 --- a/tools/testing/selftests/sync/sync_fence.c +++ b/tools/testing/selftests/sync/sync_fence.c @@ -29,6 +29,29 @@ #include "sw_sync.h" #include "synctest.h" +int test_close_fence_fd_before_inc(void) +{ + int fence, valid, ret; + int timeline = sw_sync_timeline_create();
+ valid = sw_sync_timeline_is_valid(timeline); + ASSERT(valid, "Failure allocating timeline\n");
+ fence = sw_sync_fence_create(timeline, "allocFence", 1); + valid = sw_sync_fence_is_valid(fence); + ASSERT(valid, "Failure allocating fence\n");
/* * We want the destroy + inc to run within the same RCU grace period so * that the zombie fence still exists on the timeline. */
+ sw_sync_fence_destroy(fence);
I think this doesn't exercise the bug you found as we should be entering the timeline_inc loop with fence.refcount==0 rather than the refcount going to zero within the loop.
To achieve that we need to find a callback that does unreference a dma-fence and chain those together so that it frees a sw_sync from the same timeline.
Indeed. Without the internal callback this test is a bit useless. We could test this under drm atomic tests on IGT. Particulary, I hit it playing with tests for v4l2 fences.
Hi Gustavo,
Would you like this pulled into 4.14-rc1? Sounds like this test is for a feature that doesn't exist yet?
It is for the current code, but I need to find a better way to trigger what I want. I'll rethink this test and resend.
Gustavo
Quoting Gustavo Padovan (2017-07-29 16:22:15)
From: Gustavo Padovan gustavo.padovan@collabora.com
We are going to use timeline_fence_signaled() in a internal function in the next commit.
Cc: Chris Wilson chris@chris-wilson.co.uk Signed-off-by: Gustavo Padovan gustavo.padovan@collabora.com
Purely mechanical, Reviewed-by: Chris Wilson chris@chris-wilson.co.uk -Chris
2017-07-30 Chris Wilson chris@chris-wilson.co.uk:
Quoting Gustavo Padovan (2017-07-29 16:22:15)
From: Gustavo Padovan gustavo.padovan@collabora.com
We are going to use timeline_fence_signaled() in a internal function in the next commit.
Cc: Chris Wilson chris@chris-wilson.co.uk Signed-off-by: Gustavo Padovan gustavo.padovan@collabora.com
Purely mechanical, Reviewed-by: Chris Wilson chris@chris-wilson.co.uk
Thanks for reviewing. Pushed to drm-misc-next.
Gustavo
dri-devel@lists.freedesktop.org